diff options
author | qmuntal <quimmuntal@gmail.com> | 2023-05-05 18:17:18 +0200 |
---|---|---|
committer | Quim Muntal <quimmuntal@gmail.com> | 2023-05-15 09:26:16 +0000 |
commit | 974236bda9b9aad87b4b10ec9af2cc01b14e382f (patch) | |
tree | 321cb16cc9fc2eec78fc81060ff64db06115a544 /src/internal | |
parent | 91b8cc0dfaae12af1a89e2b7ad3da10728883ee1 (diff) | |
download | go-git-974236bda9b9aad87b4b10ec9af2cc01b14e382f.tar.gz |
os, syscall: support ill-formed UTF-16 strings on Windows
Windows UTF-16 strings can contain unpaired surrogates, which can't be
decoded into a valid UTF-8 string. This file defines a set of functions
that can be used to encode and decode potentially ill-formed UTF-16
strings by using the
[the WTF-8 encoding](https://simonsapin.github.io/wtf-8/).
WTF-8 is a strict superset of UTF-8, i.e. any string that is
well-formed in UTF-8 is also well-formed in WTF-8 and the content
is unchanged. Also, the conversion never fails and is lossless.
The benefit of using WTF-8 instead of UTF-8 when decoding a UTF-16
string is that the conversion is lossless even for ill-formed
UTF-16 strings. This property allows to read an ill-formed UTF-16
string, convert it to a Go string, and convert it back to the same
original UTF-16 string.
Fixes #59971
Change-Id: Id6007f6e537844913402b233e73d698688cd5ba6
Reviewed-on: https://go-review.googlesource.com/c/go/+/493036
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Bryan Mills <bcmills@google.com>
Run-TryBot: Quim Muntal <quimmuntal@gmail.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Paul Hampson <Paul.Hampson@Pobox.com>
Diffstat (limited to 'src/internal')
-rw-r--r-- | src/internal/syscall/execenv/execenv_windows.go | 3 | ||||
-rw-r--r-- | src/internal/syscall/windows/registry/value.go | 2 | ||||
-rw-r--r-- | src/internal/syscall/windows/syscall_windows.go | 7 |
3 files changed, 3 insertions, 9 deletions
diff --git a/src/internal/syscall/execenv/execenv_windows.go b/src/internal/syscall/execenv/execenv_windows.go index 46ba12efc5..2a89ed1f58 100644 --- a/src/internal/syscall/execenv/execenv_windows.go +++ b/src/internal/syscall/execenv/execenv_windows.go @@ -9,7 +9,6 @@ package execenv import ( "internal/syscall/windows" "syscall" - "unicode/utf16" "unsafe" ) @@ -41,7 +40,7 @@ func Default(sys *syscall.SysProcAttr) (env []string, err error) { } entry := unsafe.Slice(blockp, (uintptr(end)-uintptr(unsafe.Pointer(blockp)))/2) - env = append(env, string(utf16.Decode(entry))) + env = append(env, syscall.UTF16ToString(entry)) blockp = (*uint16)(unsafe.Add(end, size)) } return diff --git a/src/internal/syscall/windows/registry/value.go b/src/internal/syscall/windows/registry/value.go index 025574015f..7dfee0330f 100644 --- a/src/internal/syscall/windows/registry/value.go +++ b/src/internal/syscall/windows/registry/value.go @@ -217,7 +217,7 @@ func (k Key) GetStringsValue(name string) (val []string, valtype uint32, err err from := 0 for i, c := range p { if c == 0 { - val = append(val, string(utf16.Decode(p[from:i]))) + val = append(val, syscall.UTF16ToString(p[from:i])) from = i + 1 } } diff --git a/src/internal/syscall/windows/syscall_windows.go b/src/internal/syscall/windows/syscall_windows.go index cfe4695258..53d32a14a0 100644 --- a/src/internal/syscall/windows/syscall_windows.go +++ b/src/internal/syscall/windows/syscall_windows.go @@ -7,7 +7,6 @@ package windows import ( "sync" "syscall" - "unicode/utf16" "unsafe" ) @@ -17,17 +16,13 @@ func UTF16PtrToString(p *uint16) string { if p == nil { return "" } - // Find NUL terminator. end := unsafe.Pointer(p) n := 0 for *(*uint16)(end) != 0 { end = unsafe.Pointer(uintptr(end) + unsafe.Sizeof(*p)) n++ } - // Turn *uint16 into []uint16. - s := unsafe.Slice(p, n) - // Decode []uint16 into string. - return string(utf16.Decode(s)) + return syscall.UTF16ToString(unsafe.Slice(p, n)) } const ( |