diff options
Diffstat (limited to 'deps/v8/src/jsregexp.cc')
-rw-r--r-- | deps/v8/src/jsregexp.cc | 720 |
1 files changed, 671 insertions, 49 deletions
diff --git a/deps/v8/src/jsregexp.cc b/deps/v8/src/jsregexp.cc index 04d194419f..8af472d39e 100644 --- a/deps/v8/src/jsregexp.cc +++ b/deps/v8/src/jsregexp.cc @@ -112,37 +112,6 @@ static inline void ThrowRegExpException(Handle<JSRegExp> re, // Generic RegExp methods. Dispatches to implementation specific methods. -class OffsetsVector { - public: - inline OffsetsVector(int num_registers) - : offsets_vector_length_(num_registers) { - if (offsets_vector_length_ > kStaticOffsetsVectorSize) { - vector_ = NewArray<int>(offsets_vector_length_); - } else { - vector_ = static_offsets_vector_; - } - } - inline ~OffsetsVector() { - if (offsets_vector_length_ > kStaticOffsetsVectorSize) { - DeleteArray(vector_); - vector_ = NULL; - } - } - inline int* vector() { return vector_; } - inline int length() { return offsets_vector_length_; } - - private: - int* vector_; - int offsets_vector_length_; - static const int kStaticOffsetsVectorSize = 50; - static int static_offsets_vector_[kStaticOffsetsVectorSize]; -}; - - -int OffsetsVector::static_offsets_vector_[ - OffsetsVector::kStaticOffsetsVectorSize]; - - Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, Handle<String> pattern, Handle<String> flag_str) { @@ -448,6 +417,14 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp, ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); // The captures come in (start, end+1) pairs. for (int i = 0; i < number_of_capture_registers; i += 2) { + // Capture values are relative to start_offset only. + // Convert them to be relative to start of string. + if (captures_vector[i] >= 0) { + captures_vector[i] += previous_index; + } + if (captures_vector[i + 1] >= 0) { + captures_vector[i + 1] += previous_index; + } SetCapture(*array, i, captures_vector[i]); SetCapture(*array, i + 1, captures_vector[i + 1]); } @@ -1431,14 +1408,6 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler, int cp_offset, bool check_offset, bool preloaded) { - if (cc->is_standard() && - macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), - cp_offset, - check_offset, - on_failure)) { - return; - } - ZoneList<CharacterRange>* ranges = cc->ranges(); int max_char; if (ascii) { @@ -1489,6 +1458,12 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler, macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset); } + if (cc->is_standard() && + macro_assembler->CheckSpecialCharacterClass(cc->standard_type(), + on_failure)) { + return; + } + for (int i = 0; i < last_valid_range; i++) { CharacterRange& range = ranges->at(i); Label next_range; @@ -1626,8 +1601,8 @@ int TextNode::EatsAtLeast(int still_to_find, int recursion_depth) { } -int NegativeLookaheadChoiceNode:: EatsAtLeast(int still_to_find, - int recursion_depth) { +int NegativeLookaheadChoiceNode::EatsAtLeast(int still_to_find, + int recursion_depth) { if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; // Alternative 0 is the negative lookahead, alternative 1 is what comes // afterwards. @@ -2049,6 +2024,12 @@ static void EmitWordCheck(RegExpMacroAssembler* assembler, Label* word, Label* non_word, bool fall_through_on_word) { + if (assembler->CheckSpecialCharacterClass( + fall_through_on_word ? 'w' : 'W', + fall_through_on_word ? non_word : word)) { + // Optimized implementation available. + return; + } assembler->CheckCharacterGT('z', non_word); assembler->CheckCharacterLT('0', non_word); assembler->CheckCharacterGT('a' - 1, word); @@ -2085,17 +2066,60 @@ static void EmitHat(RegExpCompiler* compiler, assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, new_trace.backtrack(), false); - // Newline means \n, \r, 0x2028 or 0x2029. - if (!compiler->ascii()) { - assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); + if (!assembler->CheckSpecialCharacterClass('n', + new_trace.backtrack())) { + // Newline means \n, \r, 0x2028 or 0x2029. + if (!compiler->ascii()) { + assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok); + } + assembler->CheckCharacter('\n', &ok); + assembler->CheckNotCharacter('\r', new_trace.backtrack()); } - assembler->CheckCharacter('\n', &ok); - assembler->CheckNotCharacter('\r', new_trace.backtrack()); assembler->Bind(&ok); on_success->Emit(compiler, &new_trace); } +// Emit the code to handle \b and \B (word-boundary or non-word-boundary) +// when we know whether the next character must be a word character or not. +static void EmitHalfBoundaryCheck(AssertionNode::AssertionNodeType type, + RegExpCompiler* compiler, + RegExpNode* on_success, + Trace* trace) { + RegExpMacroAssembler* assembler = compiler->macro_assembler(); + Label done; + + Trace new_trace(*trace); + + bool expect_word_character = (type == AssertionNode::AFTER_WORD_CHARACTER); + Label* on_word = expect_word_character ? &done : new_trace.backtrack(); + Label* on_non_word = expect_word_character ? new_trace.backtrack() : &done; + + // Check whether previous character was a word character. + switch (trace->at_start()) { + case Trace::TRUE: + if (expect_word_character) { + assembler->GoTo(on_non_word); + } + break; + case Trace::UNKNOWN: + ASSERT_EQ(0, trace->cp_offset()); + assembler->CheckAtStart(on_non_word); + // Fall through. + case Trace::FALSE: + int prev_char_offset = trace->cp_offset() - 1; + assembler->LoadCurrentCharacter(prev_char_offset, NULL, false, 1); + EmitWordCheck(assembler, on_word, on_non_word, expect_word_character); + // We may or may not have loaded the previous character. + new_trace.InvalidateCurrentCharacter(); + } + + assembler->Bind(&done); + + on_success->Emit(compiler, &new_trace); +} + + // Emit the code to handle \b and \B (word-boundary or non-word-boundary). static void EmitBoundaryCheck(AssertionNode::AssertionNodeType type, RegExpCompiler* compiler, @@ -2205,10 +2229,15 @@ void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) { case AFTER_NEWLINE: EmitHat(compiler, on_success(), trace); return; - case AT_NON_BOUNDARY: case AT_BOUNDARY: + case AT_NON_BOUNDARY: { EmitBoundaryCheck(type_, compiler, on_success(), trace); return; + } + case AFTER_WORD_CHARACTER: + case AFTER_NONWORD_CHARACTER: { + EmitHalfBoundaryCheck(type_, compiler, on_success(), trace); + } } on_success()->Emit(compiler, trace); } @@ -2791,7 +2820,7 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { // to generate probably can't use it. if (i != first_normal_choice) { alt_gen->expects_preload = false; - new_trace.set_characters_preloaded(0); + new_trace.InvalidateCurrentCharacter(); } if (i < choice_count - 1) { new_trace.set_backtrack(&alt_gen->after); @@ -3282,6 +3311,12 @@ void DotPrinter::VisitAssertion(AssertionNode* that) { case AssertionNode::AFTER_NEWLINE: stream()->Add("label=\"(?<=\\n)\", shape=septagon"); break; + case AssertionNode::AFTER_WORD_CHARACTER: + stream()->Add("label=\"(?<=\\w)\", shape=septagon"); + break; + case AssertionNode::AFTER_NONWORD_CHARACTER: + stream()->Add("label=\"(?<=\\W)\", shape=septagon"); + break; } stream()->Add("];\n"); PrintAttributes(that); @@ -3484,6 +3519,20 @@ bool RegExpCharacterClass::is_standard() { set_.set_standard_set_type('.'); return true; } + if (CompareRanges(set_.ranges(), + kLineTerminatorRanges, + kLineTerminatorRangeCount)) { + set_.set_standard_set_type('n'); + return true; + } + if (CompareRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { + set_.set_standard_set_type('w'); + return true; + } + if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) { + set_.set_standard_set_type('W'); + return true; + } return false; } @@ -4010,6 +4059,101 @@ void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges, } +bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) { + ASSERT_NOT_NULL(ranges); + int n = ranges->length(); + if (n <= 1) return true; + int max = ranges->at(0).to(); + for (int i = 1; i < n; i++) { + CharacterRange next_range = ranges->at(i); + if (next_range.from() <= max + 1) return false; + max = next_range.to(); + } + return true; +} + +SetRelation CharacterRange::WordCharacterRelation( + ZoneList<CharacterRange>* range) { + ASSERT(IsCanonical(range)); + int i = 0; // Word character range index. + int j = 0; // Argument range index. + ASSERT_NE(0, kWordRangeCount); + SetRelation result; + if (range->length() == 0) { + result.SetElementsInSecondSet(); + return result; + } + CharacterRange argument_range = range->at(0); + CharacterRange word_range = CharacterRange(kWordRanges[0], kWordRanges[1]); + while (i < kWordRangeCount && j < range->length()) { + // Check the two ranges for the five cases: + // - no overlap. + // - partial overlap (there are elements in both ranges that isn't + // in the other, and there are also elements that are in both). + // - argument range entirely inside word range. + // - word range entirely inside argument range. + // - ranges are completely equal. + + // First check for no overlap. The earlier range is not in the other set. + if (argument_range.from() > word_range.to()) { + // Ranges are disjoint. The earlier word range contains elements that + // cannot be in the argument set. + result.SetElementsInSecondSet(); + } else if (word_range.from() > argument_range.to()) { + // Ranges are disjoint. The earlier argument range contains elements that + // cannot be in the word set. + result.SetElementsInFirstSet(); + } else if (word_range.from() <= argument_range.from() && + word_range.to() >= argument_range.from()) { + result.SetElementsInBothSets(); + // argument range completely inside word range. + if (word_range.from() < argument_range.from() || + word_range.to() > argument_range.from()) { + result.SetElementsInSecondSet(); + } + } else if (word_range.from() >= argument_range.from() && + word_range.to() <= argument_range.from()) { + result.SetElementsInBothSets(); + result.SetElementsInFirstSet(); + } else { + // There is overlap, and neither is a subrange of the other + result.SetElementsInFirstSet(); + result.SetElementsInSecondSet(); + result.SetElementsInBothSets(); + } + if (result.NonTrivialIntersection()) { + // The result is as (im)precise as we can possibly make it. + return result; + } + // Progress the range(s) with minimal to-character. + uc16 word_to = word_range.to(); + uc16 argument_to = argument_range.to(); + if (argument_to <= word_to) { + j++; + if (j < range->length()) { + argument_range = range->at(j); + } + } + if (word_to <= argument_to) { + i += 2; + if (i < kWordRangeCount) { + word_range = CharacterRange(kWordRanges[i], kWordRanges[i + 1]); + } + } + } + // Check if anything wasn't compared in the loop. + if (i < kWordRangeCount) { + // word range contains something not in argument range. + result.SetElementsInSecondSet(); + } else if (j < range->length()) { + // Argument range contains something not in word range. + result.SetElementsInFirstSet(); + } + + return result; +} + + static void AddUncanonicals(ZoneList<CharacterRange>* ranges, int bottom, int top) { @@ -4119,6 +4263,287 @@ ZoneList<CharacterRange>* CharacterSet::ranges() { } +// Move a number of elements in a zonelist to another position +// in the same list. Handles overlapping source and target areas. +static void MoveRanges(ZoneList<CharacterRange>* list, + int from, + int to, + int count) { + // Ranges are potentially overlapping. + if (from < to) { + for (int i = count - 1; i >= 0; i--) { + list->at(to + i) = list->at(from + i); + } + } else { + for (int i = 0; i < count; i++) { + list->at(to + i) = list->at(from + i); + } + } +} + + +static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list, + int count, + CharacterRange insert) { + // Inserts a range into list[0..count[, which must be sorted + // by from value and non-overlapping and non-adjacent, using at most + // list[0..count] for the result. Returns the number of resulting + // canonicalized ranges. Inserting a range may collapse existing ranges into + // fewer ranges, so the return value can be anything in the range 1..count+1. + uc16 from = insert.from(); + uc16 to = insert.to(); + int start_pos = 0; + int end_pos = count; + for (int i = count - 1; i >= 0; i--) { + CharacterRange current = list->at(i); + if (current.from() > to + 1) { + end_pos = i; + } else if (current.to() + 1 < from) { + start_pos = i + 1; + break; + } + } + + // Inserted range overlaps, or is adjacent to, ranges at positions + // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are + // not affected by the insertion. + // If start_pos == end_pos, the range must be inserted before start_pos. + // if start_pos < end_pos, the entire range from start_pos to end_pos + // must be merged with the insert range. + + if (start_pos == end_pos) { + // Insert between existing ranges at position start_pos. + if (start_pos < count) { + MoveRanges(list, start_pos, start_pos + 1, count - start_pos); + } + list->at(start_pos) = insert; + return count + 1; + } + if (start_pos + 1 == end_pos) { + // Replace single existing range at position start_pos. + CharacterRange to_replace = list->at(start_pos); + int new_from = Min(to_replace.from(), from); + int new_to = Max(to_replace.to(), to); + list->at(start_pos) = CharacterRange(new_from, new_to); + return count; + } + // Replace a number of existing ranges from start_pos to end_pos - 1. + // Move the remaining ranges down. + + int new_from = Min(list->at(start_pos).from(), from); + int new_to = Max(list->at(end_pos - 1).to(), to); + if (end_pos < count) { + MoveRanges(list, end_pos, start_pos + 1, count - end_pos); + } + list->at(start_pos) = CharacterRange(new_from, new_to); + return count - (end_pos - start_pos) + 1; +} + + +void CharacterSet::Canonicalize() { + // Special/default classes are always considered canonical. The result + // of calling ranges() will be sorted. + if (ranges_ == NULL) return; + CharacterRange::Canonicalize(ranges_); +} + + +void CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) { + if (character_ranges->length() <= 1) return; + // Check whether ranges are already canonical (increasing, non-overlapping, + // non-adjacent). + int n = character_ranges->length(); + int max = character_ranges->at(0).to(); + int i = 1; + while (i < n) { + CharacterRange current = character_ranges->at(i); + if (current.from() <= max + 1) { + break; + } + max = current.to(); + i++; + } + // Canonical until the i'th range. If that's all of them, we are done. + if (i == n) return; + + // The ranges at index i and forward are not canonicalized. Make them so by + // doing the equivalent of insertion sort (inserting each into the previous + // list, in order). + // Notice that inserting a range can reduce the number of ranges in the + // result due to combining of adjacent and overlapping ranges. + int read = i; // Range to insert. + int num_canonical = i; // Length of canonicalized part of list. + do { + num_canonical = InsertRangeInCanonicalList(character_ranges, + num_canonical, + character_ranges->at(read)); + read++; + } while (read < n); + character_ranges->Rewind(num_canonical); + + ASSERT(CharacterRange::IsCanonical(character_ranges)); +} + + +// Utility function for CharacterRange::Merge. Adds a range at the end of +// a canonicalized range list, if necessary merging the range with the last +// range of the list. +static void AddRangeToSet(ZoneList<CharacterRange>* set, CharacterRange range) { + if (set == NULL) return; + ASSERT(set->length() == 0 || set->at(set->length() - 1).to() < range.from()); + int n = set->length(); + if (n > 0) { + CharacterRange lastRange = set->at(n - 1); + if (lastRange.to() == range.from() - 1) { + set->at(n - 1) = CharacterRange(lastRange.from(), range.to()); + return; + } + } + set->Add(range); +} + + +static void AddRangeToSelectedSet(int selector, + ZoneList<CharacterRange>* first_set, + ZoneList<CharacterRange>* second_set, + ZoneList<CharacterRange>* intersection_set, + CharacterRange range) { + switch (selector) { + case kInsideFirst: + AddRangeToSet(first_set, range); + break; + case kInsideSecond: + AddRangeToSet(second_set, range); + break; + case kInsideBoth: + AddRangeToSet(intersection_set, range); + break; + } +} + + + +void CharacterRange::Merge(ZoneList<CharacterRange>* first_set, + ZoneList<CharacterRange>* second_set, + ZoneList<CharacterRange>* first_set_only_out, + ZoneList<CharacterRange>* second_set_only_out, + ZoneList<CharacterRange>* both_sets_out) { + // Inputs are canonicalized. + ASSERT(CharacterRange::IsCanonical(first_set)); + ASSERT(CharacterRange::IsCanonical(second_set)); + // Outputs are empty, if applicable. + ASSERT(first_set_only_out == NULL || first_set_only_out->length() == 0); + ASSERT(second_set_only_out == NULL || second_set_only_out->length() == 0); + ASSERT(both_sets_out == NULL || both_sets_out->length() == 0); + + // Merge sets by iterating through the lists in order of lowest "from" value, + // and putting intervals into one of three sets. + + if (first_set->length() == 0) { + second_set_only_out->AddAll(*second_set); + return; + } + if (second_set->length() == 0) { + first_set_only_out->AddAll(*first_set); + return; + } + // Indices into input lists. + int i1 = 0; + int i2 = 0; + // Cache length of input lists. + int n1 = first_set->length(); + int n2 = second_set->length(); + // Current range. May be invalid if state is kInsideNone. + int from = 0; + int to = -1; + // Where current range comes from. + int state = kInsideNone; + + while (i1 < n1 || i2 < n2) { + CharacterRange next_range; + int range_source; + if (i2 == n2 || first_set->at(i1).from() < second_set->at(i2).from()) { + next_range = first_set->at(i1++); + range_source = kInsideFirst; + } else { + next_range = second_set->at(i2++); + range_source = kInsideSecond; + } + if (to < next_range.from()) { + // Ranges disjoint: |current| |next| + AddRangeToSelectedSet(state, + first_set_only_out, + second_set_only_out, + both_sets_out, + CharacterRange(from, to)); + from = next_range.from(); + to = next_range.to(); + state = range_source; + } else { + if (from < next_range.from()) { + AddRangeToSelectedSet(state, + first_set_only_out, + second_set_only_out, + both_sets_out, + CharacterRange(from, next_range.from()-1)); + } + if (to < next_range.to()) { + // Ranges overlap: |current| + // |next| + AddRangeToSelectedSet(state | range_source, + first_set_only_out, + second_set_only_out, + both_sets_out, + CharacterRange(next_range.from(), to)); + from = to + 1; + to = next_range.to(); + state = range_source; + } else { + // Range included: |current| , possibly ending at same character. + // |next| + AddRangeToSelectedSet( + state | range_source, + first_set_only_out, + second_set_only_out, + both_sets_out, + CharacterRange(next_range.from(), next_range.to())); + from = next_range.to() + 1; + // If ranges end at same character, both ranges are consumed completely. + if (next_range.to() == to) state = kInsideNone; + } + } + } + AddRangeToSelectedSet(state, + first_set_only_out, + second_set_only_out, + both_sets_out, + CharacterRange(from, to)); +} + + +void CharacterRange::Negate(ZoneList<CharacterRange>* ranges, + ZoneList<CharacterRange>* negated_ranges) { + ASSERT(CharacterRange::IsCanonical(ranges)); + ASSERT_EQ(0, negated_ranges->length()); + int range_count = ranges->length(); + uc16 from = 0; + int i = 0; + if (range_count > 0 && ranges->at(0).from() == 0) { + from = ranges->at(0).to(); + i = 1; + } + while (i < range_count) { + CharacterRange range = ranges->at(i); + negated_ranges->Add(CharacterRange(from + 1, range.from() - 1)); + from = range.to(); + i++; + } + if (from < String::kMaxUC16CharCode) { + negated_ranges->Add(CharacterRange(from + 1, String::kMaxUC16CharCode)); + } +} + + // ------------------------------------------------------------------- // Interest propagation @@ -4410,9 +4835,203 @@ void Analysis::VisitBackReference(BackReferenceNode* that) { void Analysis::VisitAssertion(AssertionNode* that) { EnsureAnalyzed(that->on_success()); + AssertionNode::AssertionNodeType type = that->type(); + if (type == AssertionNode::AT_BOUNDARY || + type == AssertionNode::AT_NON_BOUNDARY) { + // Check if the following character is known to be a word character + // or known to not be a word character. + ZoneList<CharacterRange>* following_chars = that->FirstCharacterSet(); + + CharacterRange::Canonicalize(following_chars); + + SetRelation word_relation = + CharacterRange::WordCharacterRelation(following_chars); + if (word_relation.ContainedIn()) { + // Following character is definitely a word character. + type = (type == AssertionNode::AT_BOUNDARY) ? + AssertionNode::AFTER_NONWORD_CHARACTER : + AssertionNode::AFTER_WORD_CHARACTER; + that->set_type(type); + } else if (word_relation.Disjoint()) { + // Following character is definitely *not* a word character. + type = (type == AssertionNode::AT_BOUNDARY) ? + AssertionNode::AFTER_WORD_CHARACTER : + AssertionNode::AFTER_NONWORD_CHARACTER; + that->set_type(type); + } + } +} + + +ZoneList<CharacterRange>* RegExpNode::FirstCharacterSet() { + if (first_character_set_ == NULL) { + if (ComputeFirstCharacterSet(kFirstCharBudget) < 0) { + // If we can't find an exact solution within the budget, we + // set the value to the set of every character, i.e., all characters + // are possible. + ZoneList<CharacterRange>* all_set = new ZoneList<CharacterRange>(1); + all_set->Add(CharacterRange::Everything()); + first_character_set_ = all_set; + } + } + return first_character_set_; +} + + +int RegExpNode::ComputeFirstCharacterSet(int budget) { + // Default behavior is to not be able to determine the first character. + return kComputeFirstCharacterSetFail; } +int LoopChoiceNode::ComputeFirstCharacterSet(int budget) { + budget--; + if (budget >= 0) { + // Find loop min-iteration. It's the value of the guarded choice node + // with a GEQ guard, if any. + int min_repetition = 0; + + for (int i = 0; i <= 1; i++) { + GuardedAlternative alternative = alternatives()->at(i); + ZoneList<Guard*>* guards = alternative.guards(); + if (guards != NULL && guards->length() > 0) { + Guard* guard = guards->at(0); + if (guard->op() == Guard::GEQ) { + min_repetition = guard->value(); + break; + } + } + } + + budget = loop_node()->ComputeFirstCharacterSet(budget); + if (budget >= 0) { + ZoneList<CharacterRange>* character_set = + loop_node()->first_character_set(); + if (body_can_be_zero_length() || min_repetition == 0) { + budget = continue_node()->ComputeFirstCharacterSet(budget); + if (budget < 0) return budget; + ZoneList<CharacterRange>* body_set = + continue_node()->first_character_set(); + ZoneList<CharacterRange>* union_set = + new ZoneList<CharacterRange>(Max(character_set->length(), + body_set->length())); + CharacterRange::Merge(character_set, + body_set, + union_set, + union_set, + union_set); + character_set = union_set; + } + set_first_character_set(character_set); + } + } + return budget; +} + + +int NegativeLookaheadChoiceNode::ComputeFirstCharacterSet(int budget) { + budget--; + if (budget >= 0) { + GuardedAlternative successor = this->alternatives()->at(1); + RegExpNode* successor_node = successor.node(); + budget = successor_node->ComputeFirstCharacterSet(budget); + if (budget >= 0) { + set_first_character_set(successor_node->first_character_set()); + } + } + return budget; +} + + +// The first character set of an EndNode is unknowable. Just use the +// default implementation that fails and returns all characters as possible. + + +int AssertionNode::ComputeFirstCharacterSet(int budget) { + budget -= 1; + if (budget >= 0) { + switch (type_) { + case AT_END: { + set_first_character_set(new ZoneList<CharacterRange>(0)); + break; + } + case AT_START: + case AT_BOUNDARY: + case AT_NON_BOUNDARY: + case AFTER_NEWLINE: + case AFTER_NONWORD_CHARACTER: + case AFTER_WORD_CHARACTER: { + ASSERT_NOT_NULL(on_success()); + budget = on_success()->ComputeFirstCharacterSet(budget); + set_first_character_set(on_success()->first_character_set()); + break; + } + } + } + return budget; +} + + +int ActionNode::ComputeFirstCharacterSet(int budget) { + if (type_ == POSITIVE_SUBMATCH_SUCCESS) return kComputeFirstCharacterSetFail; + budget--; + if (budget >= 0) { + ASSERT_NOT_NULL(on_success()); + budget = on_success()->ComputeFirstCharacterSet(budget); + if (budget >= 0) { + set_first_character_set(on_success()->first_character_set()); + } + } + return budget; +} + + +int BackReferenceNode::ComputeFirstCharacterSet(int budget) { + // We don't know anything about the first character of a backreference + // at this point. + return kComputeFirstCharacterSetFail; +} + + +int TextNode::ComputeFirstCharacterSet(int budget) { + budget--; + if (budget >= 0) { + ASSERT_NE(0, elements()->length()); + TextElement text = elements()->at(0); + if (text.type == TextElement::ATOM) { + RegExpAtom* atom = text.data.u_atom; + ASSERT_NE(0, atom->length()); + uc16 first_char = atom->data()[0]; + ZoneList<CharacterRange>* range = new ZoneList<CharacterRange>(1); + range->Add(CharacterRange(first_char, first_char)); + set_first_character_set(range); + } else { + ASSERT(text.type == TextElement::CHAR_CLASS); + RegExpCharacterClass* char_class = text.data.u_char_class; + if (char_class->is_negated()) { + ZoneList<CharacterRange>* ranges = char_class->ranges(); + int length = ranges->length(); + int new_length = length + 1; + if (length > 0) { + if (ranges->at(0).from() == 0) new_length--; + if (ranges->at(length - 1).to() == String::kMaxUC16CharCode) { + new_length--; + } + } + ZoneList<CharacterRange>* negated_ranges = + new ZoneList<CharacterRange>(new_length); + CharacterRange::Negate(ranges, negated_ranges); + set_first_character_set(negated_ranges); + } else { + set_first_character_set(char_class->ranges()); + } + } + } + return budget; +} + + + // ------------------------------------------------------------------- // Dispatch table construction @@ -4471,7 +5090,6 @@ void DispatchTableConstructor::VisitAssertion(AssertionNode* that) { } - static int CompareRangeByFrom(const CharacterRange* a, const CharacterRange* b) { return Compare<uc16>(a->from(), b->from()); @@ -4606,4 +5224,8 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data, pattern); } + +int OffsetsVector::static_offsets_vector_[ + OffsetsVector::kStaticOffsetsVectorSize]; + }} // namespace v8::internal |