diff options
author | Ryan Dahl <ry@tinyclouds.org> | 2010-08-03 10:33:16 -0700 |
---|---|---|
committer | Ryan Dahl <ry@tinyclouds.org> | 2010-08-03 10:33:16 -0700 |
commit | a6bc68a83beda7609330e525a73b7b227a1b9207 (patch) | |
tree | 062a3379d6db01815268a2278c6f5ed8b9f6298e /deps/v8/src/jsregexp.cc | |
parent | 4b19bd28967101b7afed3819cf595d4f04a1b11e (diff) | |
download | node-a6bc68a83beda7609330e525a73b7b227a1b9207.tar.gz |
Upgrade V8 to 2.3.4
Diffstat (limited to 'deps/v8/src/jsregexp.cc')
-rw-r--r-- | deps/v8/src/jsregexp.cc | 81 |
1 files changed, 24 insertions, 57 deletions
diff --git a/deps/v8/src/jsregexp.cc b/deps/v8/src/jsregexp.cc index 9f98782bb..c9afc62e1 100644 --- a/deps/v8/src/jsregexp.cc +++ b/deps/v8/src/jsregexp.cc @@ -1272,7 +1272,7 @@ static int GetCaseIndependentLetters(uc16 character, bool ascii_subject, unibrow::uchar* letters) { int length = uncanonicalize.get(character, '\0', letters); - // Unibrow returns 0 or 1 for characters where case independependence is + // Unibrow returns 0 or 1 for characters where case independence is // trivial. if (length == 0) { letters[0] = character; @@ -4026,74 +4026,48 @@ void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, ranges->Add(CharacterRange::Singleton(chars[i])); } } - } else if (bottom <= kRangeCanonicalizeMax && - top <= kRangeCanonicalizeMax) { + } else { // If this is a range we expand the characters block by block, // expanding contiguous subranges (blocks) one at a time. // The approach is as follows. For a given start character we - // look up the block that contains it, for instance 'a' if the - // start character is 'c'. A block is characterized by the property - // that all characters uncanonicalize in the same way as the first - // element, except that each entry in the result is incremented - // by the distance from the first element. So a-z is a block - // because 'a' uncanonicalizes to ['a', 'A'] and the k'th letter - // uncanonicalizes to ['a' + k, 'A' + k]. - // Once we've found the start point we look up its uncanonicalization + // look up the remainder of the block that contains it (represented + // by the end point), for instance we find 'z' if the character + // is 'c'. A block is characterized by the property + // that all characters uncanonicalize in the same way, except that + // each entry in the result is incremented by the distance from the first + // element. So a-z is a block because 'a' uncanonicalizes to ['a', 'A'] and + // the k'th letter uncanonicalizes to ['a' + k, 'A' + k]. + // Once we've found the end point we look up its uncanonicalization // and produce a range for each element. For instance for [c-f] - // we look up ['a', 'A'] and produce [c-f] and [C-F]. We then only + // we look up ['z', 'Z'] and produce [c-f] and [C-F]. We then only // add a range if it is not already contained in the input, so [c-f] // will be skipped but [C-F] will be added. If this range is not // completely contained in a block we do this for all the blocks - // covered by the range. + // covered by the range (handling characters that is not in a block + // as a "singleton block"). unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth]; - // First, look up the block that contains the 'bottom' character. - int length = canonrange.get(bottom, '\0', range); - if (length == 0) { - range[0] = bottom; - } else { - ASSERT_EQ(1, length); - } int pos = bottom; - // The start of the current block. Note that except for the first - // iteration 'start' is always equal to 'pos'. - int start; - // If it is not the start point of a block the entry contains the - // offset of the character from the start point. - if ((range[0] & kStartMarker) == 0) { - start = pos - range[0]; - } else { - start = pos; - } - // Then we add the ranges one at a time, incrementing the current - // position to be after the last block each time. The position - // always points to the start of a block. while (pos < top) { - length = canonrange.get(start, '\0', range); + int length = canonrange.get(pos, '\0', range); + uc16 block_end; if (length == 0) { - range[0] = start; + block_end = pos; } else { ASSERT_EQ(1, length); + block_end = range[0]; } - ASSERT((range[0] & kStartMarker) != 0); - // The start point of a block contains the distance to the end - // of the range. - int block_end = start + (range[0] & kPayloadMask) - 1; int end = (block_end > top) ? top : block_end; - length = uncanonicalize.get(start, '\0', range); + length = uncanonicalize.get(block_end, '\0', range); for (int i = 0; i < length; i++) { uc32 c = range[i]; - uc16 range_from = c + (pos - start); - uc16 range_to = c + (end - start); + uc16 range_from = c - (block_end - pos); + uc16 range_to = c - (block_end - end); if (!(bottom <= range_from && range_to <= top)) { ranges->Add(CharacterRange(range_from, range_to)); } } - start = pos = block_end + 1; + pos = end + 1; } - } else { - // Unibrow ranges don't work for high characters due to the "2^11 bug". - // Therefore we do something dumber for these ranges. - AddUncanonicals(ranges, bottom, top); } } @@ -4208,20 +4182,14 @@ static void AddUncanonicals(ZoneList<CharacterRange>* ranges, // 0xa800 - 0xfaff // 0xfc00 - 0xfeff const int boundary_count = 18; - // The ASCII boundary and the kRangeCanonicalizeMax boundary are also in this - // array. This is to split up big ranges and not because they actually denote - // a case-mapping-free-zone. - ASSERT(CharacterRange::kRangeCanonicalizeMax < 0x600); - const int kFirstRealCaselessZoneIndex = 2; - int boundaries[] = {0x80, CharacterRange::kRangeCanonicalizeMax, + int boundaries[] = { 0x600, 0x1000, 0x1100, 0x1d00, 0x2000, 0x2100, 0x2200, 0x2400, 0x2500, 0x2c00, 0x2e00, 0xa600, 0xa800, 0xfb00, 0xfc00, 0xff00}; // Special ASCII rule from spec can save us some work here. if (bottom == 0x80 && top == 0xffff) return; - // We have optimized support for this range. - if (top <= CharacterRange::kRangeCanonicalizeMax) { + if (top <= boundaries[0]) { CharacterRange range(bottom, top); range.AddCaseEquivalents(ranges, false); return; @@ -4238,8 +4206,7 @@ static void AddUncanonicals(ZoneList<CharacterRange>* ranges, } // If we are completely in a zone with no case mappings then we are done. - // We start at 2 so as not to except the ASCII range from mappings. - for (int i = kFirstRealCaselessZoneIndex; i < boundary_count; i += 2) { + for (int i = 0; i < boundary_count; i += 2) { if (bottom >= boundaries[i] && top < boundaries[i + 1]) { #ifdef DEBUG for (int j = bottom; j <= top; j++) { |