Upgrade V8 to 2.3.4

author: Ryan Dahl <ry@tinyclouds.org> 2010-08-03 10:33:16 -0700
committer: Ryan Dahl <ry@tinyclouds.org> 2010-08-03 10:33:16 -0700
commit: a6bc68a83beda7609330e525a73b7b227a1b9207 (patch)
tree: 062a3379d6db01815268a2278c6f5ed8b9f6298e /deps/v8/src/jsregexp.cc
parent: 4b19bd28967101b7afed3819cf595d4f04a1b11e (diff)
download: node-a6bc68a83beda7609330e525a73b7b227a1b9207.tar.gz
1 files changed, 24 insertions, 57 deletions
diff --git a/deps/v8/src/jsregexp.cc b/deps/v8/src/jsregexp.cc
index 9f98782bb..c9afc62e1 100644
--- a/deps/v8/src/jsregexp.cc
+++ b/deps/v8/src/jsregexp.cc
@@ -1272,7 +1272,7 @@ static int GetCaseIndependentLetters(uc16 character,
                                      bool ascii_subject,
                                      unibrow::uchar* letters) {
   int length = uncanonicalize.get(character, '\0', letters);
-  // Unibrow returns 0 or 1 for characters where case independependence is
+  // Unibrow returns 0 or 1 for characters where case independence is
   // trivial.
   if (length == 0) {
     letters[0] = character;
@@ -4026,74 +4026,48 @@ void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
         ranges->Add(CharacterRange::Singleton(chars[i]));
       }
     }
-  } else if (bottom <= kRangeCanonicalizeMax &&
-             top <= kRangeCanonicalizeMax) {
+  } else {
     // If this is a range we expand the characters block by block,
     // expanding contiguous subranges (blocks) one at a time.
     // The approach is as follows.  For a given start character we
-    // look up the block that contains it, for instance 'a' if the
-    // start character is 'c'.  A block is characterized by the property
-    // that all characters uncanonicalize in the same way as the first
-    // element, except that each entry in the result is incremented
-    // by the distance from the first element.  So a-z is a block
-    // because 'a' uncanonicalizes to ['a', 'A'] and the k'th letter
-    // uncanonicalizes to ['a' + k, 'A' + k].
-    // Once we've found the start point we look up its uncanonicalization
+    // look up the remainder of the block that contains it (represented
+    // by the end point), for instance we find 'z' if the character
+    // is 'c'.  A block is characterized by the property
+    // that all characters uncanonicalize in the same way, except that
+    // each entry in the result is incremented by the distance from the first
+    // element.  So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and
+    // the k'th letter uncanonicalizes to ['a' + k, 'A' + k].
+    // Once we've found the end point we look up its uncanonicalization
     // and produce a range for each element.  For instance for [c-f]
-    // we look up ['a', 'A'] and produce [c-f] and [C-F].  We then only
+    // we look up ['z', 'Z'] and produce [c-f] and [C-F].  We then only
     // add a range if it is not already contained in the input, so [c-f]
     // will be skipped but [C-F] will be added.  If this range is not
     // completely contained in a block we do this for all the blocks
-    // covered by the range.
+    // covered by the range (handling characters that is not in a block
+    // as a "singleton block").
     unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];
-    // First, look up the block that contains the 'bottom' character.
-    int length = canonrange.get(bottom, '\0', range);
-    if (length == 0) {
-      range[0] = bottom;
-    } else {
-      ASSERT_EQ(1, length);
-    }
     int pos = bottom;
-    // The start of the current block.  Note that except for the first
-    // iteration 'start' is always equal to 'pos'.
-    int start;
-    // If it is not the start point of a block the entry contains the
-    // offset of the character from the start point.
-    if ((range[0] & kStartMarker) == 0) {
-      start = pos - range[0];
-    } else {
-      start = pos;
-    }
-    // Then we add the ranges one at a time, incrementing the current
-    // position to be after the last block each time.  The position
-    // always points to the start of a block.
     while (pos < top) {
-      length = canonrange.get(start, '\0', range);
+      int length = canonrange.get(pos, '\0', range);
+      uc16 block_end;
       if (length == 0) {
-        range[0] = start;
+        block_end = pos;
       } else {
         ASSERT_EQ(1, length);
+        block_end = range[0];
       }
-      ASSERT((range[0] & kStartMarker) != 0);
-      // The start point of a block contains the distance to the end
-      // of the range.
-      int block_end = start + (range[0] & kPayloadMask) - 1;
       int end = (block_end > top) ? top : block_end;
-      length = uncanonicalize.get(start, '\0', range);
+      length = uncanonicalize.get(block_end, '\0', range);
       for (int i = 0; i < length; i++) {
         uc32 c = range[i];
-        uc16 range_from = c + (pos - start);
-        uc16 range_to = c + (end - start);
+        uc16 range_from = c - (block_end - pos);
+        uc16 range_to = c - (block_end - end);
         if (!(bottom <= range_from && range_to <= top)) {
           ranges->Add(CharacterRange(range_from, range_to));
         }
       }
-      start = pos = block_end + 1;
+      pos = end + 1;
     }
-  } else {
-    // Unibrow ranges don't work for high characters due to the "2^11 bug".
-    // Therefore we do something dumber for these ranges.
-    AddUncanonicals(ranges, bottom, top);
   }
 }
 
@@ -4208,20 +4182,14 @@ static void AddUncanonicals(ZoneList<CharacterRange>* ranges,
   // 0xa800 - 0xfaff
   // 0xfc00 - 0xfeff
   const int boundary_count = 18;
-  // The ASCII boundary and the kRangeCanonicalizeMax boundary are also in this
-  // array.  This is to split up big ranges and not because they actually denote
-  // a case-mapping-free-zone.
-  ASSERT(CharacterRange::kRangeCanonicalizeMax < 0x600);
-  const int kFirstRealCaselessZoneIndex = 2;
-  int boundaries[] = {0x80, CharacterRange::kRangeCanonicalizeMax,
+  int boundaries[] = {
       0x600, 0x1000, 0x1100, 0x1d00, 0x2000, 0x2100, 0x2200, 0x2400, 0x2500,
       0x2c00, 0x2e00, 0xa600, 0xa800, 0xfb00, 0xfc00, 0xff00};
 
   // Special ASCII rule from spec can save us some work here.
   if (bottom == 0x80 && top == 0xffff) return;
 
-  // We have optimized support for this range.
-  if (top <= CharacterRange::kRangeCanonicalizeMax) {
+  if (top <= boundaries[0]) {
     CharacterRange range(bottom, top);
     range.AddCaseEquivalents(ranges, false);
     return;
@@ -4238,8 +4206,7 @@ static void AddUncanonicals(ZoneList<CharacterRange>* ranges,
   }
 
   // If we are completely in a zone with no case mappings then we are done.
-  // We start at 2 so as not to except the ASCII range from mappings.
-  for (int i = kFirstRealCaselessZoneIndex; i < boundary_count; i += 2) {
+  for (int i = 0; i < boundary_count; i += 2) {
     if (bottom >= boundaries[i] && top < boundaries[i + 1]) {
 #ifdef DEBUG
       for (int j = bottom; j <= top; j++) {
author	Ryan Dahl <ry@tinyclouds.org>	2010-08-03 10:33:16 -0700
committer	Ryan Dahl <ry@tinyclouds.org>	2010-08-03 10:33:16 -0700
commit	a6bc68a83beda7609330e525a73b7b227a1b9207 (patch)
tree	062a3379d6db01815268a2278c6f5ed8b9f6298e /deps/v8/src/jsregexp.cc
parent	4b19bd28967101b7afed3819cf595d4f04a1b11e (diff)
download	node-a6bc68a83beda7609330e525a73b7b227a1b9207.tar.gz