diff options
Diffstat (limited to 'libgo/go/strings/replace.go')
-rw-r--r-- | libgo/go/strings/replace.go | 315 |
1 files changed, 315 insertions, 0 deletions
diff --git a/libgo/go/strings/replace.go b/libgo/go/strings/replace.go new file mode 100644 index 00000000000..64a7f208b93 --- /dev/null +++ b/libgo/go/strings/replace.go @@ -0,0 +1,315 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package strings + +import ( + "io" + "os" +) + +// A Replacer replaces a list of strings with replacements. +type Replacer struct { + r replacer +} + +// replacer is the interface that a replacement algorithm needs to implement. +type replacer interface { + Replace(s string) string + WriteString(w io.Writer, s string) (n int, err os.Error) +} + +// byteBitmap represents bytes which are sought for replacement. +// byteBitmap is 256 bits wide, with a bit set for each old byte to be +// replaced. +type byteBitmap [256 / 32]uint32 + +func (m *byteBitmap) set(b byte) { + m[b>>5] |= uint32(1 << (b & 31)) +} + +// NewReplacer returns a new Replacer from a list of old, new string pairs. +// Replacements are performed in order, without overlapping matches. +func NewReplacer(oldnew ...string) *Replacer { + if len(oldnew)%2 == 1 { + panic("strings.NewReplacer: odd argument count") + } + + // Possible implementations. + var ( + bb byteReplacer + bs byteStringReplacer + gen genericReplacer + ) + + allOldBytes, allNewBytes := true, true + for len(oldnew) > 0 { + old, new := oldnew[0], oldnew[1] + oldnew = oldnew[2:] + if len(old) != 1 { + allOldBytes = false + } + if len(new) != 1 { + allNewBytes = false + } + + // generic + gen.p = append(gen.p, pair{old, new}) + + // byte -> string + if allOldBytes { + bs.old.set(old[0]) + bs.new[old[0]] = []byte(new) + } + + // byte -> byte + if allOldBytes && allNewBytes { + bb.old.set(old[0]) + bb.new[old[0]] = new[0] + } + } + + if allOldBytes && allNewBytes { + return &Replacer{r: &bb} + } + if allOldBytes { + return &Replacer{r: &bs} + } + return &Replacer{r: &gen} +} + +// Replace returns a copy of s with all replacements performed. +func (r *Replacer) Replace(s string) string { + return r.r.Replace(s) +} + +// WriteString writes s to w with all replacements performed. +func (r *Replacer) WriteString(w io.Writer, s string) (n int, err os.Error) { + return r.r.WriteString(w, s) +} + +// genericReplacer is the fully generic (and least optimized) algorithm. +// It's used as a fallback when nothing faster can be used. +type genericReplacer struct { + p []pair +} + +type pair struct{ old, new string } + +type appendSliceWriter struct { + b []byte +} + +func (w *appendSliceWriter) Write(p []byte) (int, os.Error) { + w.b = append(w.b, p...) + return len(p), nil +} + +func (r *genericReplacer) Replace(s string) string { + // TODO(bradfitz): optimized version + n, _ := r.WriteString(discard, s) + w := appendSliceWriter{make([]byte, 0, n)} + r.WriteString(&w, s) + return string(w.b) +} + +func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err os.Error) { + lastEmpty := false // the last replacement was of the empty string +Input: + // TODO(bradfitz): optimized version + for i := 0; i < len(s); { + for _, p := range r.p { + if p.old == "" && lastEmpty { + // Don't let old match twice in a row. + // (it doesn't advance the input and + // would otherwise loop forever) + continue + } + if HasPrefix(s[i:], p.old) { + if p.new != "" { + wn, err := w.Write([]byte(p.new)) + n += wn + if err != nil { + return n, err + } + } + i += len(p.old) + lastEmpty = p.old == "" + continue Input + } + } + wn, err := w.Write([]byte{s[i]}) + n += wn + if err != nil { + return n, err + } + i++ + } + + // Final empty match at end. + for _, p := range r.p { + if p.old == "" { + if p.new != "" { + wn, err := w.Write([]byte(p.new)) + n += wn + if err != nil { + return n, err + } + } + break + } + } + + return n, nil +} + +// byteReplacer is the implementation that's used when all the "old" +// and "new" values are single ASCII bytes. +type byteReplacer struct { + // old has a bit set for each old byte that should be replaced. + old byteBitmap + + // replacement byte, indexed by old byte. only valid if + // corresponding old bit is set. + new [256]byte +} + +func (r *byteReplacer) Replace(s string) string { + var buf []byte // lazily allocated + for i := 0; i < len(s); i++ { + b := s[i] + if r.old[b>>5]&uint32(1<<(b&31)) != 0 { + if buf == nil { + buf = []byte(s) + } + buf[i] = r.new[b] + } + } + if buf == nil { + return s + } + return string(buf) +} + +func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err os.Error) { + // TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation. + bufsize := 32 << 10 + if len(s) < bufsize { + bufsize = len(s) + } + buf := make([]byte, bufsize) + + for len(s) > 0 { + ncopy := copy(buf, s[:]) + s = s[ncopy:] + for i, b := range buf[:ncopy] { + if r.old[b>>5]&uint32(1<<(b&31)) != 0 { + buf[i] = r.new[b] + } + } + wn, err := w.Write(buf[:ncopy]) + n += wn + if err != nil { + return n, err + } + } + return n, nil +} + +// byteStringReplacer is the implementation that's used when all the +// "old" values are single ASCII bytes but the "new" values vary in +// size. +type byteStringReplacer struct { + // old has a bit set for each old byte that should be replaced. + old byteBitmap + + // replacement string, indexed by old byte. only valid if + // corresponding old bit is set. + new [256][]byte +} + +func (r *byteStringReplacer) Replace(s string) string { + newSize := 0 + anyChanges := false + for i := 0; i < len(s); i++ { + b := s[i] + if r.old[b>>5]&uint32(1<<(b&31)) != 0 { + anyChanges = true + newSize += len(r.new[b]) + } else { + newSize++ + } + } + if !anyChanges { + return s + } + buf := make([]byte, newSize) + bi := buf + for i := 0; i < len(s); i++ { + b := s[i] + if r.old[b>>5]&uint32(1<<(b&31)) != 0 { + n := copy(bi[:], r.new[b]) + bi = bi[n:] + } else { + bi[0] = b + bi = bi[1:] + } + } + return string(buf) +} + +// WriteString maintains one buffer that's at most 32KB. The bytes in +// s are enumerated and the buffer is filled. If it reaches its +// capacity or a byte has a replacement, the buffer is flushed to w. +func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err os.Error) { + // TODO(bradfitz): use io.WriteString with slices of s instead. + bufsize := 32 << 10 + if len(s) < bufsize { + bufsize = len(s) + } + buf := make([]byte, bufsize) + bi := buf[:0] + + for i := 0; i < len(s); i++ { + b := s[i] + var new []byte + if r.old[b>>5]&uint32(1<<(b&31)) != 0 { + new = r.new[b] + } else { + bi = append(bi, b) + } + if len(bi) == cap(bi) || (len(bi) > 0 && len(new) > 0) { + nw, err := w.Write(bi) + n += nw + if err != nil { + return n, err + } + bi = buf[:0] + } + if len(new) > 0 { + nw, err := w.Write(new) + n += nw + if err != nil { + return n, err + } + } + } + if len(bi) > 0 { + nw, err := w.Write(bi) + n += nw + if err != nil { + return n, err + } + } + return n, nil +} + +// strings is too low-level to import io/ioutil +var discard io.Writer = devNull(0) + +type devNull int + +func (devNull) Write(p []byte) (int, os.Error) { + return len(p), nil +} |