summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorKei Son <hey.calmdown@gmail.com>2009-12-11 10:37:48 -0800
committerKei Son <hey.calmdown@gmail.com>2009-12-11 10:37:48 -0800
commit42506b7ab20df820f52da6a1ab0edd97231feba2 (patch)
tree3016eecdbaea6a8f70831cb6a56929843bcceaa8 /src
parent2201430392d52a5f43d3808b5856deeb4a8e8914 (diff)
downloadgo-42506b7ab20df820f52da6a1ab0edd97231feba2.tar.gz
bytes, strings: allow -1 in Map to mean "drop this character".
xml: drop invalid characters in attribute names when constructing struct field names. R=rsc CC=r http://codereview.appspot.com/157104 Committer: Russ Cox <rsc@golang.org>
Diffstat (limited to 'src')
-rw-r--r--src/pkg/bytes/bytes.go21
-rw-r--r--src/pkg/bytes/bytes_test.go13
-rw-r--r--src/pkg/strings/strings.go29
-rw-r--r--src/pkg/strings/strings_test.go13
-rw-r--r--src/pkg/xml/read.go19
-rw-r--r--src/pkg/xml/read_test.go22
6 files changed, 91 insertions, 26 deletions
diff --git a/src/pkg/bytes/bytes.go b/src/pkg/bytes/bytes.go
index 9ab199ceb..85d4f9fd7 100644
--- a/src/pkg/bytes/bytes.go
+++ b/src/pkg/bytes/bytes.go
@@ -207,7 +207,8 @@ func HasSuffix(s, suffix []byte) bool {
}
// Map returns a copy of the byte array s with all its characters modified
-// according to the mapping function.
+// according to the mapping function. If mapping returns a negative value, the character is
+// dropped from the string with no replacement.
func Map(mapping func(rune int) int, s []byte) []byte {
// In the worst case, the array can grow when mapped, making
// things unpleasant. But it's so rare we barge in assuming it's
@@ -222,16 +223,18 @@ func Map(mapping func(rune int) int, s []byte) []byte {
rune, wid = utf8.DecodeRune(s[i:])
}
rune = mapping(rune);
- if nbytes+utf8.RuneLen(rune) > maxbytes {
- // Grow the buffer.
- maxbytes = maxbytes*2 + utf8.UTFMax;
- nb := make([]byte, maxbytes);
- for i, c := range b[0:nbytes] {
- nb[i] = c
+ if rune >= 0 {
+ if nbytes+utf8.RuneLen(rune) > maxbytes {
+ // Grow the buffer.
+ maxbytes = maxbytes*2 + utf8.UTFMax;
+ nb := make([]byte, maxbytes);
+ for i, c := range b[0:nbytes] {
+ nb[i] = c
+ }
+ b = nb;
}
- b = nb;
+ nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes]);
}
- nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes]);
i += wid;
}
return b[0:nbytes];
diff --git a/src/pkg/bytes/bytes_test.go b/src/pkg/bytes/bytes_test.go
index 553ceb7c5..3f77e6e9f 100644
--- a/src/pkg/bytes/bytes_test.go
+++ b/src/pkg/bytes/bytes_test.go
@@ -365,6 +365,19 @@ func TestMap(t *testing.T) {
if string(m) != expect {
t.Errorf("rot13: expected %q got %q", expect, m)
}
+
+ // 5. Drop
+ dropNotLatin := func(rune int) int {
+ if unicode.Is(unicode.Latin, rune) {
+ return rune
+ }
+ return -1;
+ };
+ m = Map(dropNotLatin, Bytes("Hello, 세계"));
+ expect = "Hello";
+ if string(m) != expect {
+ t.Errorf("drop: expected %q got %q", expect, m)
+ }
}
func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }
diff --git a/src/pkg/strings/strings.go b/src/pkg/strings/strings.go
index 7be98e6c1..4e375b4d5 100644
--- a/src/pkg/strings/strings.go
+++ b/src/pkg/strings/strings.go
@@ -178,7 +178,8 @@ func HasSuffix(s, suffix string) bool {
}
// Map returns a copy of the string s with all its characters modified
-// according to the mapping function.
+// according to the mapping function. If mapping returns a negative value, the character is
+// dropped from the string with no replacement.
func Map(mapping func(rune int) int, s string) string {
// In the worst case, the string can grow when mapped, making
// things unpleasant. But it's so rare we barge in assuming it's
@@ -188,20 +189,22 @@ func Map(mapping func(rune int) int, s string) string {
b := make([]byte, maxbytes);
for _, c := range s {
rune := mapping(c);
- wid := 1;
- if rune >= utf8.RuneSelf {
- wid = utf8.RuneLen(rune)
- }
- if nbytes+wid > maxbytes {
- // Grow the buffer.
- maxbytes = maxbytes*2 + utf8.UTFMax;
- nb := make([]byte, maxbytes);
- for i, c := range b[0:nbytes] {
- nb[i] = c
+ if rune >= 0 {
+ wid := 1;
+ if rune >= utf8.RuneSelf {
+ wid = utf8.RuneLen(rune)
+ }
+ if nbytes+wid > maxbytes {
+ // Grow the buffer.
+ maxbytes = maxbytes*2 + utf8.UTFMax;
+ nb := make([]byte, maxbytes);
+ for i, c := range b[0:nbytes] {
+ nb[i] = c
+ }
+ b = nb;
}
- b = nb;
+ nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes]);
}
- nbytes += utf8.EncodeRune(rune, b[nbytes:maxbytes]);
}
return string(b[0:nbytes]);
}
diff --git a/src/pkg/strings/strings_test.go b/src/pkg/strings/strings_test.go
index ce77c5c2f..e3e7f38ae 100644
--- a/src/pkg/strings/strings_test.go
+++ b/src/pkg/strings/strings_test.go
@@ -279,6 +279,19 @@ func TestMap(t *testing.T) {
if m != expect {
t.Errorf("rot13: expected %q got %q", expect, m)
}
+
+ // 5. Drop
+ dropNotLatin := func(rune int) int {
+ if unicode.Is(unicode.Latin, rune) {
+ return rune
+ }
+ return -1;
+ };
+ m = Map(dropNotLatin, "Hello, 세계");
+ expect = "Hello";
+ if m != expect {
+ t.Errorf("drop: expected %q got %q", expect, m)
+ }
}
func TestToUpper(t *testing.T) { runStringTests(t, ToUpper, "ToUpper", upperTests) }
diff --git a/src/pkg/xml/read.go b/src/pkg/xml/read.go
index 568578723..d8ee78a12 100644
--- a/src/pkg/xml/read.go
+++ b/src/pkg/xml/read.go
@@ -10,6 +10,7 @@ import (
"os";
"reflect";
"strings";
+ "unicode";
)
// BUG(rsc): Mapping between XML elements and data structures is inherently flawed:
@@ -144,6 +145,20 @@ func (p *Parser) Unmarshal(val interface{}, start *StartElement) os.Error {
return p.unmarshal(v.Elem(), start);
}
+// fieldName strips invalid characters from an XML name
+// to create a valid Go struct name. It also converts the
+// name to lower case letters.
+func fieldName(original string) string {
+ return strings.Map(
+ func(x int) int {
+ if unicode.IsDigit(x) || unicode.IsLetter(x) {
+ return unicode.ToLower(x)
+ }
+ return -1;
+ },
+ original)
+}
+
// Unmarshal a single XML element into val.
func (p *Parser) unmarshal(val reflect.Value, start *StartElement) os.Error {
// Find start element if we need it.
@@ -269,7 +284,7 @@ func (p *Parser) unmarshal(val reflect.Value, start *StartElement) os.Error {
val := "";
k := strings.ToLower(f.Name);
for _, a := range start.Attr {
- if strings.ToLower(a.Name.Local) == k {
+ if fieldName(a.Name.Local) == k {
val = a.Value;
break;
}
@@ -303,7 +318,7 @@ Loop:
// Look up by tag name.
// If that fails, fall back to mop-up field named "Any".
if sv != nil {
- k := strings.ToLower(t.Name.Local);
+ k := fieldName(t.Name.Local);
any := -1;
for i, n := 0, styp.NumField(); i < n; i++ {
f := styp.Field(i);
diff --git a/src/pkg/xml/read_test.go b/src/pkg/xml/read_test.go
index 14ad11a31..ca9b30d9b 100644
--- a/src/pkg/xml/read_test.go
+++ b/src/pkg/xml/read_test.go
@@ -24,8 +24,8 @@ func TestUnmarshalFeed(t *testing.T) {
// hget http://codereview.appspot.com/rss/mine/rsc
const rssFeedString = `
<?xml version="1.0" encoding="utf-8"?>
-<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-us"><title>Code Review - My issues</title><link href="http://codereview.appspot.com/" rel="alternate"></link><link href="http://codereview.appspot.com/rss/mine/rsc" rel="self"></link><id>http://codereview.appspot.com/</id><updated>2009-10-04T01:35:58+00:00</updated><author><name>rietveld</name></author><entry><title>rietveld: an attempt at pubsubhubbub
-</title><link href="http://codereview.appspot.com/126085" rel="alternate"></link><updated>2009-10-04T01:35:58+00:00</updated><author><name>email-address-removed</name></author><id>urn:md5:134d9179c41f806be79b3a5f7877d19a</id><summary type="html">
+<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en-us"><title>Code Review - My issues</title><link href="http://codereview.appspot.com/" rel="alternate"></link><li-nk href="http://codereview.appspot.com/rss/mine/rsc" rel="self"></li-nk><id>http://codereview.appspot.com/</id><updated>2009-10-04T01:35:58+00:00</updated><author><name>rietveld</name></author><entry><title>rietveld: an attempt at pubsubhubbub
+</title><link hre-f="http://codereview.appspot.com/126085" rel="alternate"></link><updated>2009-10-04T01:35:58+00:00</updated><author><name>email-address-removed</name></author><id>urn:md5:134d9179c41f806be79b3a5f7877d19a</id><summary type="html">
An attempt at adding pubsubhubbub support to Rietveld.
http://code.google.com/p/pubsubhubbub
http://code.google.com/p/rietveld/issues/detail?id=155
@@ -208,3 +208,21 @@ not being used from outside intra_region_diff.py.
},
},
}
+
+type FieldNameTest struct {
+ in, out string;
+}
+
+var FieldNameTests = []FieldNameTest{
+ FieldNameTest{"Profile-Image", "profileimage"},
+ FieldNameTest{"_score", "score"},
+}
+
+func TestFieldName(t *testing.T) {
+ for _, tt := range FieldNameTests {
+ a := fieldName(tt.in);
+ if a != tt.out {
+ t.Fatalf("have %#v\nwant %#v\n\n", a, tt.out)
+ }
+ }
+}