json 如何在VB6中将字符串转换或解码为可读格式?

cl25kdpy  于 2023-10-21  发布在  其他
关注(0)|答案(1)|浏览(145)

下面的字符串是json文件请求的结果。

StrResult ="\u00D8\u00B3\u00D9\u0084\u00D8\u00A7\u00D9\u0085 
\u00D8\u00AF\u00D9\u0086\u00DB\u008C\u00D8\u00A7"

如何将这个字符串转换成可读的字符?
提示:解码后应该接收的字符串是سلام دنیا,它在英语中的等价物是“Hello World”。
有许多其他语言的示例代码,包括Python,.Net等,但我找不到任何VB6的代码。

2q5ifsrm

2q5ifsrm1#

您提供的字符串不会解码为سلام دنیا,而是سلام دنیا。您可以确认here
字符串实际包含的是单个UTF-8字节,而不是Unicode代码点。这使得您的任务更加困难,因为VB 6字符串通常在内存中编码为UTF-16。
我最近开发了一个包含VBA的扩展字符串功能的库,但我认为关于转义和取消转义Unicode文字的部分应该可以像VB 6代码一样工作。你可以在GitHub here上找到整个库,但我可以在这里直接包含应该解决你问题的部分。
使用我下面提供的库中的函数,你应该能够实现你想要的结果,像这样:

StrResult = DecodeUTF8(EncodeANSI(UnescapeUnicode(StrResult)))

以下是所需的功能:

Public Enum UnicodeEscapeFormat
    [_efNone] = 0
    efPython = 1 '\uXXXX \u00XXXXXX (4 or 8 hex digits, 8 for chars outside BMP)
    efRust = 2   '\u{X} \U{XXXXXX}  (1 to 6 hex digits)
    efUPlus = 4  'u+XXXX u+XXXXXX   (4 or 6 hex digits)
    efMarkup = 8 '&#ddddddd;        (1 to 7 decimal digits)
    efAll = 15
    [_efMin] = efPython
    [_efMax] = efAll
End Enum

Private Type EscapeSequence
    ueFormat As UnicodeEscapeFormat
    ueSignature As String
    letSngSurrogate As Boolean
    buffPosition As Long
    currPosition As Long
    sigSize As Long
    escSize As Long
    codepoint As Long
    unEscSize As Long
End Type
Private Type TwoCharTemplate
    s As String * 2
End Type
Private Type LongTemplate
    l As Long
End Type

'Replaces all occurences of unicode characters outside the codePoint range
'defined by maxNonEscapedCharCode with literals of the following formats
'specified by `escapeFormat`:
' efPython = 1 ... \uXXXX \u00XXXXXX   (4 or 8 hex digits, 8 for chars outside BMP)
' efRust   = 2 ... \u{XXXX} \U{XXXXXX} (1 to 6 hex digits)
' efUPlus  = 4 ... u+XXXX u+XXXXXX     (4 or 6 hex digits)
' efMarkup = 8 ... &#ddddddd;          (1 to 7 decimal digits)
'Where:
'   - prefixes \u is case insensitive
'   - Xes are the digits of the codepoint in hexadecimal. (X = 0-9 or A-F/a-f)
'Note:
'   - Avoid u+XXXX syntax if string contains literals without delimiters as it
'     can be misinterpreted if adjacent to text starting with 0-9 or a-f.
'   - This function accepts all combinations of UnicodeEscapeFormats:
'     If called with, e.g. `escapeFormat = efRust Or efPython`, every character
'     in the scope will be escaped with in either format, efRust or efPython,
'     chosen at random for each replacement.
'   - If `escapeFormat` is set to efAll, it will replace every character in the
'     scope with a randomly chosen format of all available fotrmats.
'   - To escape every character, set `maxNonEscapedCharCode = -1`
Public Function EscapeUnicode(ByRef str As String, _
                     Optional ByVal maxNonEscapedCharCode As Long = &HFF, _
                     Optional ByVal escapeFormat As UnicodeEscapeFormat _
                                                = efPython) As String
    Const methodName As String = "EscapeUnicode"
    If maxNonEscapedCharCode < -1 Then Err.Raise 5, methodName, _
        "`maxNonEscapedCharCode` must be greater or equal -1."
    If escapeFormat < [_efMin] Or escapeFormat > [_efMax] Then _
        Err.Raise 5, methodName, "Invalid escape type."
    If Len(str) = 0 Then Exit Function
    Dim i As Long
    Dim j As Long:                j = 1
    Dim result() As String:       ReDim result(1 To Len(str))
    Dim copyChunkSize As Long
    Dim rndEscapeFormat As Boolean
    rndEscapeFormat = ((escapeFormat And (escapeFormat - 1)) <> 0) 'eFmt <> 2^n
    Dim numescapeFormats As Long
    If rndEscapeFormat Then
        Dim escapeFormats() As Long
        For i = 0 To (Log(efAll + 1) / Log(2)) - 1
            If 2 ^ i And escapeFormat Then
                ReDim Preserve escapeFormats(0 To numescapeFormats)
                escapeFormats(numescapeFormats) = 2 ^ i
                numescapeFormats = numescapeFormats + 1
            End If
        Next i
    End If
    For i = 1 To Len(str)
        Dim codepoint As Long: codepoint = AscU(Mid$(str, i, 2))
        If codepoint > maxNonEscapedCharCode Then
            If copyChunkSize > 0 Then
                result(j) = Mid$(str, i - copyChunkSize, copyChunkSize)
                copyChunkSize = 0
                j = j + 1
            End If
            If rndEscapeFormat Then
                escapeFormat = escapeFormats(Int(numescapeFormats * Rnd))
            End If
            Select Case escapeFormat
                Case efPython
                    If codepoint > &HFFFF& Then 'Outside BMP
                        result(j) = "\u" & "00" & Right$("0" & Hex(codepoint), 6)
                    Else 'BMP
                        result(j) = "\u" & Right$("000" & Hex(codepoint), 4)
                    End If
                Case efRust
                    result(j) = "\u{" & Hex(codepoint) & "}"
                Case efUPlus
                    If codepoint < &H1000& Then
                        result(j) = "u+" & Right$("000" & Hex(codepoint), 4)
                    Else
                        result(j) = "u+" & Hex(codepoint)
                    End If
                Case efMarkup
                    result(j) = "&#" & codepoint & ";"
            End Select
            If rndEscapeFormat Then
                If Int(2 * Rnd) = 1 Then result(j) = UCase(result(j))
            End If
            j = j + 1
        Else
            If codepoint < &H10000 Then
                copyChunkSize = copyChunkSize + 1
            Else
                copyChunkSize = copyChunkSize + 2
            End If
        End If
        If codepoint > &HFFFF& Then i = i + 1
    Next i
    If copyChunkSize > 0 Then _
        result(j) = Mid$(str, i - copyChunkSize, copyChunkSize)
    EscapeUnicode = Join(result, "")
End Function

'Replaces all occurences of unicode literals
'Accepts the following formattings `escapeFormat`:
'   efPython = 1 ... \uXXXX \u000XXXXX    (4 or 8 hex digits, 8 for chars outside BMP)
'   efRust   = 2 ... \u{XXXX} \U{XXXXXXX} (1 to 6 hex digits)
'   efUPlus  = 4 ... u+XXXX u+XXXXXX      (4 or 6 hex digits)
'   efMarkup = 8 ... &#ddddddd;           (1 to 7 decimal digits)
'Where:
'   - prefixes \u is case insensitive
'   - Xes are the digits of the codepoint in hexadecimal. (X = 0-9 or A-F/a-f)
'Example:
'   - "abcd &#97;u+0062\U0063xy\u{64}", efAll returns "abcd abcxyd"
'Notes:
'   - Avoid u+XXXX syntax if string contains literals without delimiters as it
'     can be misinterpreted if adjacent to text starting with 0-9 or a-f.
'   - This function also accepts all combinations of UnicodeEscapeFormats:
'       E.g.:
'UnescapeUnicode("abcd &#97;u+0062\U0063xy\u{64}", efMarkup Or efRust)
'       will return:
'"abcd au+0062\U0063xyd"
'   - By default, this function will not invalidate UTF-16 strings if they are
'     currently valid, but this can happen if `allowSingleSurrogates = True`
'     E.g.: EscapeUnicode(ChrU(&HD801&, True)) returns "\uD801", but this string
'     can no longer be un-escaped with UnescapeUnicode because "\uD801"
'     represents a surrogate halve which is invalid unicode on its own.
'     So UnescapeUnicode("\uD801") returns "\uD801" again, unless called with
'     the optional parameter `allowSingleSurrogates = False` like this
'     `UnescapeUnicode("\uD801", , True)`. This will return invalid UTF-16.
Public Function UnescapeUnicode(ByRef str As String, _
                       Optional ByVal escapeFormat As UnicodeEscapeFormat = efAll, _
                       Optional ByVal allowSingleSurrogates As Boolean = False) _
                                As String
    If escapeFormat < [_efMin] Or escapeFormat > [_efMax] Then
        Err.Raise 5, "EscapeUnicode", "Invalid escape format"
    End If

    Dim escapes() As EscapeSequence: escapes = NewEscapes()
    Dim lb As Long: lb = LBound(escapes)
    Dim ub As Long: ub = UBound(escapes)
    Dim i As Long

    For i = lb To ub 'Find first signature for each wanted format
        With escapes(i)
            If escapeFormat And .ueFormat Then
                .buffPosition = InStr(1, str, .ueSignature, vbBinaryCompare)
                .letSngSurrogate = allowSingleSurrogates
            End If
        End With
    Next i
    UnescapeUnicode = str 'Allocate buffer

    Const posByte As Byte = &H80
    Const buffSize As Long = 1024
    Dim buffSignaturePos(1 To buffSize) As Byte
    Dim buffFormat(1 To buffSize) As UnicodeEscapeFormat
    Dim buffEscIndex(1 To buffSize) As Long
    Dim posOffset As Long
    Dim diff As Long
    Dim highSur As Long
    Dim lowSur As Long
    Dim remainingLen As Long: remainingLen = Len(str)
    Dim posChar As String:    posChar = ChrB$(posByte)
    Dim outPos As Long:       outPos = 1
    Dim inPos As Long:        inPos = 1

    Do
        Dim upperLimit As Long: upperLimit = posOffset + buffSize
        For i = lb To ub 'Find all signatures within buffer size
            With escapes(i)
                Do Until .buffPosition = 0 Or .buffPosition > upperLimit
                    .buffPosition = .buffPosition - posOffset
                    buffSignaturePos(.buffPosition) = posByte
                    buffFormat(.buffPosition) = .ueFormat
                    buffEscIndex(.buffPosition) = i
                    .buffPosition = .buffPosition + .sigSize + posOffset
                    .buffPosition = InStr(.buffPosition, str, .ueSignature)
                Loop
            End With
        Next i

        Dim temp As String:  temp = buffSignaturePos
        Dim nextPos As Long: nextPos = InStrB(1, temp, posChar)

        Do Until nextPos = 0 'Unescape all found signatures from buffer
            i = buffEscIndex(nextPos)
            escapes(i).currPosition = nextPos + posOffset
            Select Case buffFormat(nextPos)
                Case efPython: TryPythonEscape escapes(i), str
                Case efRust:   TryRustEscape escapes(i), str
                Case efUPlus:  TryUPlusEscape escapes(i), str
                Case efMarkup: TryMarkupEscape escapes(i), str
            End Select
            With escapes(i)
                If .unEscSize > 0 Then
                    diff = .currPosition - inPos
                    If outPos > 1 Then
                        Mid$(UnescapeUnicode, outPos) = Mid$(str, inPos, diff)
                    End If
                    outPos = outPos + diff
                    If .unEscSize = 1 Then
                        Mid$(UnescapeUnicode, outPos) = ChrW$(.codepoint)
                    Else
                        .codepoint = .codepoint - &H10000
                        highSur = &HD800& Or (.codepoint \ &H400&)
                        lowSur = &HDC00& Or (.codepoint And &H3FF&)
                        Mid$(UnescapeUnicode, outPos) = ChrW$(highSur)
                        Mid$(UnescapeUnicode, outPos + 1) = ChrW$(lowSur)
                    End If
                    outPos = outPos + .unEscSize
                    inPos = .currPosition + .escSize
                    nextPos = nextPos + .escSize - .sigSize
                End If
                nextPos = InStrB(nextPos + .sigSize, temp, posChar)
            End With
        Loop
        remainingLen = remainingLen - buffSize
        posOffset = posOffset + buffSize
        Erase buffSignaturePos
    Loop Until remainingLen < 1

    If outPos > 1 Then
        diff = Len(str) - inPos + 1
        If diff > 0 Then
            Mid$(UnescapeUnicode, outPos, diff) = Mid$(str, inPos, diff)
        End If
        UnescapeUnicode = Left$(UnescapeUnicode, outPos + diff - 1)
    End If
End Function
Private Function NewEscapes() As EscapeSequence()
    Static escapes(0 To 6) As EscapeSequence
    If escapes(0).ueFormat = [_efNone] Then
        InitEscape escapes(0), efPython, "\U"
        InitEscape escapes(1), efPython, "\u"
        InitEscape escapes(2), efRust, "\U{"
        InitEscape escapes(3), efRust, "\u{"
        InitEscape escapes(4), efUPlus, "U+"
        InitEscape escapes(5), efUPlus, "u+"
        InitEscape escapes(6), efMarkup, "&#"
    End If
    NewEscapes = escapes
End Function
Private Sub InitEscape(ByRef escape As EscapeSequence, _
                       ByVal ueFormat As UnicodeEscapeFormat, _
                       ByRef ueSignature As String)
    With escape
        .ueFormat = ueFormat
        .ueSignature = ueSignature
        .sigSize = Len(ueSignature)
    End With
End Sub

Private Sub TryPythonEscape(ByRef escape As EscapeSequence, ByRef str As String)
    Const H As String = "[0-9A-Fa-f]"
    Const PYTHON_ESCAPE_PATTERN_NOT_BMP = "00[01]" & H & H & H & H & H
    Const PYTHON_ESCAPE_PATTERN_BMP As String = H & H & H & H & "*"
    Dim potentialEscape As String

    With escape
        .unEscSize = 0
        potentialEscape = Mid$(str, .currPosition + 2, 8) 'Exclude leading \[Uu]
        If potentialEscape Like PYTHON_ESCAPE_PATTERN_NOT_BMP Then
            .escSize = 10 '\[Uu]00[01]HHHHH
            .codepoint = CLng("&H" & potentialEscape) 'No extra Mid$ needed
            If .codepoint < &H10000 Then
                If IsValidBMP(.codepoint, .letSngSurrogate) Then
                    .unEscSize = 1
                    Exit Sub
                End If
            ElseIf .codepoint < &H110000 Then
                .unEscSize = 2
                Exit Sub
            End If
        End If
        If potentialEscape Like PYTHON_ESCAPE_PATTERN_BMP Then
            .escSize = 6 '\[Uu]HHHH
            .codepoint = CLng("&H" & Left$(potentialEscape, 4))
            If IsValidBMP(.codepoint, .letSngSurrogate) Then .unEscSize = 1
        End If
    End With
End Sub
Private Function IsValidBMP(ByVal codepoint As Long, _
                            ByVal letSingleSurrogate As Boolean) As Boolean
    IsValidBMP = (codepoint < &HD800& Or codepoint >= &HE000& Or letSingleSurrogate)
End Function

Private Sub TryRustEscape(ByRef escape As EscapeSequence, ByRef str As String)
    Static rustEscPattern(1 To 6) As String
    Static isPatternInit As Boolean
    Dim potentialEscape As String
    Dim nextBrace As Long

    If Not isPatternInit Then
        Dim i As Long
        rustEscPattern(1) = "[0-9A-Fa-f]}*"
        For i = 2 To 6
            rustEscPattern(i) = "[0-9A-Fa-f]" & rustEscPattern(i - 1)
        Next i
        isPatternInit = True
    End If
    With escape
        .unEscSize = 0
        potentialEscape = Mid$(str, .currPosition + 3, 7) 'Exclude leading \[Uu]{
        nextBrace = InStr(2, potentialEscape, "}", vbBinaryCompare)

        If nextBrace = 0 Then Exit Sub
        If Not potentialEscape Like rustEscPattern(nextBrace - 1) Then Exit Sub

        .codepoint = CLng("&H" & Left$(potentialEscape, nextBrace - 1))
        .escSize = nextBrace + 3
        If .codepoint < &H10000 Then
            If IsValidBMP(.codepoint, .letSngSurrogate) Then .unEscSize = 1
        ElseIf .codepoint < &H110000 Then
            .unEscSize = 2
        End If
    End With
End Sub

Private Sub TryUPlusEscape(ByRef escape As EscapeSequence, _
                           ByRef str As String)
    Const H As String = "[0-9A-Fa-f]"
    Const UPLUS_ESCAPE_PATTERN_4_DIGITS = H & H & H & H & "*"
    Const UPLUS_ESCAPE_PATTERN_5_DIGITS = H & H & H & H & H & "*"
    Const UPLUS_ESCAPE_PATTERN_6_DIGITS = H & H & H & H & H & H
    Dim potentialEscape As String

    With escape
        .unEscSize = 0
        potentialEscape = Mid$(str, .currPosition + 2, 6) 'Exclude leading [Uu]+
        If potentialEscape Like UPLUS_ESCAPE_PATTERN_6_DIGITS Then
            .escSize = 8
            .codepoint = CLng("&H" & potentialEscape)
            If .codepoint < &H10000 Then
                If IsValidBMP(.codepoint, .letSngSurrogate) Then
                    .unEscSize = 1
                    Exit Sub
                End If
            ElseIf .codepoint < &H110000 Then
                .unEscSize = 2
                Exit Sub
            End If
        End If
        If potentialEscape Like UPLUS_ESCAPE_PATTERN_5_DIGITS Then
            .escSize = 7
            .codepoint = CLng("&H" & Left$(potentialEscape, 5))
            If .codepoint < &H10000 Then
                If IsValidBMP(.codepoint, .letSngSurrogate) Then
                    .unEscSize = 1
                    Exit Sub
                End If
            Else
                .unEscSize = 2
                Exit Sub
            End If
        End If
        If potentialEscape Like UPLUS_ESCAPE_PATTERN_4_DIGITS Then
            .escSize = 6
            .codepoint = CLng("&H" & Left$(potentialEscape, 4))
            If IsValidBMP(.codepoint, .letSngSurrogate) Then .unEscSize = 1
        End If
    End With
End Sub
Private Sub TryMarkupEscape(ByRef escape As EscapeSequence, _
                            ByRef str As String)
    Static mEscPattern(1 To 7) As String
    Static isPatternInit As Boolean
    Dim potentialEscape As String
    Dim nextSemicolon As Long

    If Not isPatternInit Then
        Dim i As Long
        For i = 1 To 6
            mEscPattern(i) = String$(i, "#") & ";*"
        Next i
        mEscPattern(7) = "1######;"
        isPatternInit = True
    End If
    With escape
        .unEscSize = 0
        potentialEscape = Mid$(str, .currPosition + 2, 8) 'Exclude leading &[#]
        nextSemicolon = InStr(2, potentialEscape, ";", vbBinaryCompare)

        If nextSemicolon = 0 Then Exit Sub
        If Not potentialEscape Like mEscPattern(nextSemicolon - 1) Then Exit Sub

        .codepoint = CLng(Left$(potentialEscape, nextSemicolon - 1))
        .escSize = nextSemicolon + 2
        If .codepoint < &H10000 Then
            If IsValidBMP(.codepoint, .letSngSurrogate) Then .unEscSize = 1
        ElseIf .codepoint < &H110000 Then
            .unEscSize = 2
        End If
    End With
End Sub

'Returns the given unicode codepoint as standard VBA UTF-16LE string
Public Function ChrU(ByVal codepoint As Long, _
             Optional ByVal allowSingleSurrogates As Boolean = False) As String
    Const methodName As String = "ChrU"
    Static st As TwoCharTemplate
    Static lt As LongTemplate

    If codepoint < &H8000 Then Err.Raise 5, methodName, "Codepoint < -32768"
    If codepoint < 0 Then codepoint = codepoint And &HFFFF& 'Incase of uInt input

    If codepoint < &HD800& Then
        ChrU = ChrW$(codepoint)
    ElseIf codepoint < &HE000& And Not allowSingleSurrogates Then
        Err.Raise 5, methodName, "Range reserved for surrogate pairs"
    ElseIf codepoint < &H10000 Then
        ChrU = ChrW$(codepoint)
    ElseIf codepoint < &H110000 Then
        lt.l = (&HD800& Or (codepoint \ &H400& - &H40&)) _
            Or (&HDC00 Or (codepoint And &H3FF&)) * &H10000 '&HDC00 with no &
        LSet st = lt
        ChrU = st.s
    Else
        Err.Raise 5, methodName, "Codepoint outside of valid Unicode range."
    End If
End Function

'Returns a given characters unicode codepoint as long.
'Note: One unicode character can consist of two VBA "characters", a so-called
'      "surrogate pair" (input string of length 2, so Len(char) = 2!)
Public Function AscU(ByRef char As String) As Long
    AscU = AscW(char) And &HFFFF&
    If Len(char) > 1 Then
        Dim lo As Long: lo = AscW(Mid$(char, 2, 1)) And &HFFFF&
        If &HDC00& > lo Or lo > &HDFFF& Then Exit Function
        AscU = (AscU - &HD800&) * &H400& + (lo - &HDC00&) + &H10000
    End If
End Function

'Function transcoding a VBA-native UTF-16LE encoded string to an ANSI string
'Note: Information will be lost for codepoints > 255!
Public Function EncodeANSI(ByRef utf16leStr As String) As String
    Dim i As Long
    Dim j As Long:         j = 0
    Dim utf16le() As Byte: utf16le = utf16leStr
    Dim ansi() As Byte

    ReDim ansi(1 To Len(utf16leStr))
    For i = LBound(ansi) To UBound(ansi)
        If utf16le(j + 1) = 0 Then
            ansi(i) = utf16le(j)
            j = j + 2
        Else
            ansi(i) = &H3F 'Chr(&H3F) = "?"
            j = j + 2
        End If
    Next i
    EncodeANSI = ansi
End Function

'Function transcoding an UTF-8 encoded string to the VBA-native UTF-16LE
'Function transcoding an VBA-native UTF-16LE encoded string to UTF-8
Public Function DecodeUTF8(ByRef utf8Str As String, _
                  Optional ByVal raiseErrors As Boolean = False) As String

    Const methodName As String = "DecodeUTF8native"
    Dim i As Long
    Dim numBytesOfCodePoint As Byte

    Static numBytesOfCodePoints(0 To 255) As Byte
    Static mask(2 To 4) As Long
    Static minCp(2 To 4) As Long

    If numBytesOfCodePoints(0) = 0 Then
        For i = &H0& To &H7F&: numBytesOfCodePoints(i) = 1: Next i '0xxxxxxx
        '110xxxxx - C0 and C1 are invalid (overlong encoding)
        For i = &HC2& To &HDF&: numBytesOfCodePoints(i) = 2: Next i
        For i = &HE0& To &HEF&: numBytesOfCodePoints(i) = 3: Next i '1110xxxx
       '11110xxx - 11110100, 11110101+ (= &HF5+) outside of valid Unicode range
        For i = &HF0& To &HF4&: numBytesOfCodePoints(i) = 4: Next i
        For i = 2 To 4: mask(i) = (2 ^ (7 - i) - 1): Next i
        minCp(2) = &H80&: minCp(3) = &H800&: minCp(4) = &H10000
    End If

    Dim codepoint As Long
    Dim currByte As Byte
    Dim utf8() As Byte:  utf8 = utf8Str
    Dim utf16() As Byte: ReDim utf16(0 To (UBound(utf8) - LBound(utf8) + 1) * 2)
    Dim j As Long:       j = 0
    Dim k As Long

    i = LBound(utf8)
    Do While i <= UBound(utf8)
        codepoint = utf8(i)
        numBytesOfCodePoint = numBytesOfCodePoints(codepoint)

        If numBytesOfCodePoint = 0 Then
            If raiseErrors Then Err.Raise 5, methodName, "Invalid byte"
            GoTo insertErrChar
        ElseIf numBytesOfCodePoint = 1 Then
            utf16(j) = codepoint
            j = j + 2
        ElseIf i + numBytesOfCodePoint - 1 > UBound(utf8) Then
            If raiseErrors Then Err.Raise 5, methodName, _
                    "Incomplete UTF-8 codepoint at end of string."
            GoTo insertErrChar
        Else
            codepoint = utf8(i) And mask(numBytesOfCodePoint)

            For k = 1 To numBytesOfCodePoint - 1
                currByte = utf8(i + k)

                If (currByte And &HC0&) = &H80& Then
                    codepoint = (codepoint * &H40&) + (currByte And &H3F)
                Else
                    If raiseErrors Then _
                        Err.Raise 5, methodName, "Invalid continuation byte"
                    GoTo insertErrChar
                End If
            Next k
            'Convert the Unicode codepoint to UTF-16LE bytes
            If codepoint < minCp(numBytesOfCodePoint) Then
                If raiseErrors Then Err.Raise 5, methodName, "Overlong encoding"
                GoTo insertErrChar
            ElseIf codepoint < &HD800& Then
                utf16(j) = CByte(codepoint And &HFF&)
                utf16(j + 1) = CByte(codepoint \ &H100&)
                j = j + 2
            ElseIf codepoint < &HE000& Then
                If raiseErrors Then Err.Raise 5, methodName, _
                "Invalid Unicode codepoint.(Range reserved for surrogate pairs)"
                GoTo insertErrChar
            ElseIf codepoint < &H10000 Then
                If codepoint = &HFEFF& Then GoTo nextCp '(BOM - will be ignored)
                utf16(j) = codepoint And &HFF&
                utf16(j + 1) = codepoint \ &H100&
                j = j + 2
            ElseIf codepoint < &H110000 Then 'Calculate surrogate pair
                Dim m As Long:           m = codepoint - &H10000
                Dim loSurrogate As Long: loSurrogate = &HDC00& Or (m And &H3FF)
                Dim hiSurrogate As Long: hiSurrogate = &HD800& Or (m \ &H400&)

                utf16(j) = hiSurrogate And &HFF&
                utf16(j + 1) = hiSurrogate \ &H100&
                utf16(j + 2) = loSurrogate And &HFF&
                utf16(j + 3) = loSurrogate \ &H100&
                j = j + 4
            Else
                If raiseErrors Then Err.Raise 5, methodName, _
                        "Codepoint outside of valid Unicode range"
insertErrChar:  utf16(j) = &HFD
                utf16(j + 1) = &HFF
                j = j + 2

                If numBytesOfCodePoint = 0 Then numBytesOfCodePoint = 1
            End If
        End If
nextCp: i = i + numBytesOfCodePoint 'Move to the next UTF-8 codepoint
    Loop
    DecodeUTF8 = MidB$(utf16, 1, j)
End Function

注意:EncodeANSI函数可以像这样被“滥用”,因为转义字符串中的UTF-8字节总是被解码为单字节UTF-16字符,因为它们根据定义是单字节。这意味着EncodeANSI函数只是用来从字符串中删除每隔一个字节(由于UTF-16表示单字节字符的方式,这些字节都是空的)。结果字符串是您想要的字符串的UTF-8表示,然后我们将其“解码”(转换为UTF-16),因为这是vb 6表示Unicode字符串的原生方式。
我还包含了EscapeUnicode函数,所以你可以看到你的字符串实际上应该是什么样子,作为转义的unicode代码点:

actualEscapeSequence = EscapeUnicode(DecodeUTF8(EncodeANSI(UnescapeUnicode(StrResult))))

actualEscapeSequence将等于“\u0633\u0644\u0627\u0645\u062F\u0646\u06CC\u0627”,您可以确认它是正确的unicode转义序列here

相关问题