summaryrefslogtreecommitdiff
path: root/src/internal
diff options
context:
space:
mode:
authorqmuntal <quimmuntal@gmail.com>2023-05-05 18:17:18 +0200
committerQuim Muntal <quimmuntal@gmail.com>2023-05-15 09:26:16 +0000
commit974236bda9b9aad87b4b10ec9af2cc01b14e382f (patch)
tree321cb16cc9fc2eec78fc81060ff64db06115a544 /src/internal
parent91b8cc0dfaae12af1a89e2b7ad3da10728883ee1 (diff)
downloadgo-git-974236bda9b9aad87b4b10ec9af2cc01b14e382f.tar.gz
os, syscall: support ill-formed UTF-16 strings on Windows
Windows UTF-16 strings can contain unpaired surrogates, which can't be decoded into a valid UTF-8 string. This file defines a set of functions that can be used to encode and decode potentially ill-formed UTF-16 strings by using the [the WTF-8 encoding](https://simonsapin.github.io/wtf-8/). WTF-8 is a strict superset of UTF-8, i.e. any string that is well-formed in UTF-8 is also well-formed in WTF-8 and the content is unchanged. Also, the conversion never fails and is lossless. The benefit of using WTF-8 instead of UTF-8 when decoding a UTF-16 string is that the conversion is lossless even for ill-formed UTF-16 strings. This property allows to read an ill-formed UTF-16 string, convert it to a Go string, and convert it back to the same original UTF-16 string. Fixes #59971 Change-Id: Id6007f6e537844913402b233e73d698688cd5ba6 Reviewed-on: https://go-review.googlesource.com/c/go/+/493036 TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Bryan Mills <bcmills@google.com> Run-TryBot: Quim Muntal <quimmuntal@gmail.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Paul Hampson <Paul.Hampson@Pobox.com>
Diffstat (limited to 'src/internal')
-rw-r--r--src/internal/syscall/execenv/execenv_windows.go3
-rw-r--r--src/internal/syscall/windows/registry/value.go2
-rw-r--r--src/internal/syscall/windows/syscall_windows.go7
3 files changed, 3 insertions, 9 deletions
diff --git a/src/internal/syscall/execenv/execenv_windows.go b/src/internal/syscall/execenv/execenv_windows.go
index 46ba12efc5..2a89ed1f58 100644
--- a/src/internal/syscall/execenv/execenv_windows.go
+++ b/src/internal/syscall/execenv/execenv_windows.go
@@ -9,7 +9,6 @@ package execenv
import (
"internal/syscall/windows"
"syscall"
- "unicode/utf16"
"unsafe"
)
@@ -41,7 +40,7 @@ func Default(sys *syscall.SysProcAttr) (env []string, err error) {
}
entry := unsafe.Slice(blockp, (uintptr(end)-uintptr(unsafe.Pointer(blockp)))/2)
- env = append(env, string(utf16.Decode(entry)))
+ env = append(env, syscall.UTF16ToString(entry))
blockp = (*uint16)(unsafe.Add(end, size))
}
return
diff --git a/src/internal/syscall/windows/registry/value.go b/src/internal/syscall/windows/registry/value.go
index 025574015f..7dfee0330f 100644
--- a/src/internal/syscall/windows/registry/value.go
+++ b/src/internal/syscall/windows/registry/value.go
@@ -217,7 +217,7 @@ func (k Key) GetStringsValue(name string) (val []string, valtype uint32, err err
from := 0
for i, c := range p {
if c == 0 {
- val = append(val, string(utf16.Decode(p[from:i])))
+ val = append(val, syscall.UTF16ToString(p[from:i]))
from = i + 1
}
}
diff --git a/src/internal/syscall/windows/syscall_windows.go b/src/internal/syscall/windows/syscall_windows.go
index cfe4695258..53d32a14a0 100644
--- a/src/internal/syscall/windows/syscall_windows.go
+++ b/src/internal/syscall/windows/syscall_windows.go
@@ -7,7 +7,6 @@ package windows
import (
"sync"
"syscall"
- "unicode/utf16"
"unsafe"
)
@@ -17,17 +16,13 @@ func UTF16PtrToString(p *uint16) string {
if p == nil {
return ""
}
- // Find NUL terminator.
end := unsafe.Pointer(p)
n := 0
for *(*uint16)(end) != 0 {
end = unsafe.Pointer(uintptr(end) + unsafe.Sizeof(*p))
n++
}
- // Turn *uint16 into []uint16.
- s := unsafe.Slice(p, n)
- // Decode []uint16 into string.
- return string(utf16.Decode(s))
+ return syscall.UTF16ToString(unsafe.Slice(p, n))
}
const (