1 files changed, 671 insertions, 49 deletions
diff --git a/deps/v8/src/jsregexp.cc b/deps/v8/src/jsregexp.cc
index 04d194419f..8af472d39e 100644
--- a/deps/v8/src/jsregexp.cc
+++ b/deps/v8/src/jsregexp.cc
@@ -112,37 +112,6 @@ static inline void ThrowRegExpException(Handle<JSRegExp> re,
 // Generic RegExp methods. Dispatches to implementation specific methods.
 
 
-class OffsetsVector {
- public:
-  inline OffsetsVector(int num_registers)
-      : offsets_vector_length_(num_registers) {
-    if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
-      vector_ = NewArray<int>(offsets_vector_length_);
-    } else {
-      vector_ = static_offsets_vector_;
-    }
-  }
-  inline ~OffsetsVector() {
-    if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
-      DeleteArray(vector_);
-      vector_ = NULL;
-    }
-  }
-  inline int* vector() { return vector_; }
-  inline int length() { return offsets_vector_length_; }
-
- private:
-  int* vector_;
-  int offsets_vector_length_;
-  static const int kStaticOffsetsVectorSize = 50;
-  static int static_offsets_vector_[kStaticOffsetsVectorSize];
-};
-
-
-int OffsetsVector::static_offsets_vector_[
-    OffsetsVector::kStaticOffsetsVectorSize];
-
-
 Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
                                    Handle<String> pattern,
                                    Handle<String> flag_str) {
@@ -448,6 +417,14 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
   ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
   // The captures come in (start, end+1) pairs.
   for (int i = 0; i < number_of_capture_registers; i += 2) {
+    // Capture values are relative to start_offset only.
+    // Convert them to be relative to start of string.
+    if (captures_vector[i] >= 0) {
+      captures_vector[i] += previous_index;
+    }
+    if (captures_vector[i + 1] >= 0) {
+      captures_vector[i + 1] += previous_index;
+    }
     SetCapture(*array, i, captures_vector[i]);
     SetCapture(*array, i + 1, captures_vector[i + 1]);
   }
@@ -1431,14 +1408,6 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
                           int cp_offset,
                           bool check_offset,
                           bool preloaded) {
-  if (cc->is_standard() &&
-      macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),
-                                                  cp_offset,
-                                                  check_offset,
-                                                  on_failure)) {
-    return;
-  }
-
   ZoneList<CharacterRange>* ranges = cc->ranges();
   int max_char;
   if (ascii) {
@@ -1489,6 +1458,12 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
     macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
   }
 
+  if (cc->is_standard() &&
+        macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),
+                                                    on_failure)) {
+      return;
+  }
+
   for (int i = 0; i < last_valid_range; i++) {
     CharacterRange& range = ranges->at(i);
     Label next_range;
@@ -1626,8 +1601,8 @@ int TextNode::EatsAtLeast(int still_to_find, int recursion_depth) {
 }
 
 
-int NegativeLookaheadChoiceNode:: EatsAtLeast(int still_to_find,
-                                              int recursion_depth) {
+int NegativeLookaheadChoiceNode::EatsAtLeast(int still_to_find,
+                                             int recursion_depth) {
   if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
   // Alternative 0 is the negative lookahead, alternative 1 is what comes
   // afterwards.
@@ -2049,6 +2024,12 @@ static void EmitWordCheck(RegExpMacroAssembler* assembler,
                           Label* word,
                           Label* non_word,
                           bool fall_through_on_word) {
+  if (assembler->CheckSpecialCharacterClass(
+          fall_through_on_word ? 'w' : 'W',
+          fall_through_on_word ? non_word : word)) {
+    // Optimized implementation available.
+    return;
+  }
   assembler->CheckCharacterGT('z', non_word);
   assembler->CheckCharacterLT('0', non_word);
   assembler->CheckCharacterGT('a' - 1, word);
@@ -2085,17 +2066,60 @@ static void EmitHat(RegExpCompiler* compiler,
   assembler->LoadCurrentCharacter(new_trace.cp_offset() -1,
                                   new_trace.backtrack(),
                                   false);
-  // Newline means \n, \r, 0x2028 or 0x2029.
-  if (!compiler->ascii()) {
-    assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
+  if (!assembler->CheckSpecialCharacterClass('n',
+                                             new_trace.backtrack())) {
+    // Newline means \n, \r, 0x2028 or 0x2029.
+    if (!compiler->ascii()) {
+      assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
+    }
+    assembler->CheckCharacter('\n', &ok);
+    assembler->CheckNotCharacter('\r', new_trace.backtrack());
   }
-  assembler->CheckCharacter('\n', &ok);
-  assembler->CheckNotCharacter('\r', new_trace.backtrack());
   assembler->Bind(&ok);
   on_success->Emit(compiler, &new_trace);
 }
 
 
+// Emit the code to handle \b and \B (word-boundary or non-word-boundary)
+// when we know whether the next character must be a word character or not.
+static void EmitHalfBoundaryCheck(AssertionNode::AssertionNodeType type,
+                                  RegExpCompiler* compiler,
+                                  RegExpNode* on_success,
+                                  Trace* trace) {
+  RegExpMacroAssembler* assembler = compiler->macro_assembler();
+  Label done;
+
+  Trace new_trace(*trace);
+
+  bool expect_word_character = (type == AssertionNode::AFTER_WORD_CHARACTER);
+  Label* on_word = expect_word_character ? &done : new_trace.backtrack();
+  Label* on_non_word = expect_word_character ? new_trace.backtrack() : &done;
+
+  // Check whether previous character was a word character.
+  switch (trace->at_start()) {
+    case Trace::TRUE:
+      if (expect_word_character) {
+        assembler->GoTo(on_non_word);
+      }
+      break;
+    case Trace::UNKNOWN:
+      ASSERT_EQ(0, trace->cp_offset());
+      assembler->CheckAtStart(on_non_word);
+      // Fall through.
+    case Trace::FALSE:
+      int prev_char_offset = trace->cp_offset() - 1;
+      assembler->LoadCurrentCharacter(prev_char_offset, NULL, false, 1);
+      EmitWordCheck(assembler, on_word, on_non_word, expect_word_character);
+      // We may or may not have loaded the previous character.
+      new_trace.InvalidateCurrentCharacter();
+  }
+
+  assembler->Bind(&done);
+
+  on_success->Emit(compiler, &new_trace);
+}
+
+
 // Emit the code to handle \b and \B (word-boundary or non-word-boundary).
 static void EmitBoundaryCheck(AssertionNode::AssertionNodeType type,
                               RegExpCompiler* compiler,
@@ -2205,10 +2229,15 @@ void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
     case AFTER_NEWLINE:
       EmitHat(compiler, on_success(), trace);
       return;
-    case AT_NON_BOUNDARY:
     case AT_BOUNDARY:
+    case AT_NON_BOUNDARY: {
       EmitBoundaryCheck(type_, compiler, on_success(), trace);
       return;
+    }
+    case AFTER_WORD_CHARACTER:
+    case AFTER_NONWORD_CHARACTER: {
+      EmitHalfBoundaryCheck(type_, compiler, on_success(), trace);
+    }
   }
   on_success()->Emit(compiler, trace);
 }
@@ -2791,7 +2820,7 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
       // to generate probably can't use it.
       if (i != first_normal_choice) {
         alt_gen->expects_preload = false;
-        new_trace.set_characters_preloaded(0);
+        new_trace.InvalidateCurrentCharacter();
       }
       if (i < choice_count - 1) {
         new_trace.set_backtrack(&alt_gen->after);
@@ -3282,6 +3311,12 @@ void DotPrinter::VisitAssertion(AssertionNode* that) {
     case AssertionNode::AFTER_NEWLINE:
       stream()->Add("label=\"(?<=\\n)\", shape=septagon");
       break;
+    case AssertionNode::AFTER_WORD_CHARACTER:
+      stream()->Add("label=\"(?<=\\w)\", shape=septagon");
+      break;
+    case AssertionNode::AFTER_NONWORD_CHARACTER:
+      stream()->Add("label=\"(?<=\\W)\", shape=septagon");
+      break;
   }
   stream()->Add("];\n");
   PrintAttributes(that);
@@ -3484,6 +3519,20 @@ bool RegExpCharacterClass::is_standard() {
     set_.set_standard_set_type('.');
     return true;
   }
+  if (CompareRanges(set_.ranges(),
+                    kLineTerminatorRanges,
+                    kLineTerminatorRangeCount)) {
+    set_.set_standard_set_type('n');
+    return true;
+  }
+  if (CompareRanges(set_.ranges(), kWordRanges, kWordRangeCount)) {
+    set_.set_standard_set_type('w');
+    return true;
+  }
+  if (CompareInverseRanges(set_.ranges(), kWordRanges, kWordRangeCount)) {
+    set_.set_standard_set_type('W');
+    return true;
+  }
   return false;
 }
 
@@ -4010,6 +4059,101 @@ void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
 }
 
 
+bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) {
+  ASSERT_NOT_NULL(ranges);
+  int n = ranges->length();
+  if (n <= 1) return true;
+  int max = ranges->at(0).to();
+  for (int i = 1; i < n; i++) {
+    CharacterRange next_range = ranges->at(i);
+    if (next_range.from() <= max + 1) return false;
+    max = next_range.to();
+  }
+  return true;
+}
+
+SetRelation CharacterRange::WordCharacterRelation(
+    ZoneList<CharacterRange>* range) {
+  ASSERT(IsCanonical(range));
+  int i = 0;  // Word character range index.
+  int j = 0;  // Argument range index.
+  ASSERT_NE(0, kWordRangeCount);
+  SetRelation result;
+  if (range->length() == 0) {
+    result.SetElementsInSecondSet();
+    return result;
+  }
+  CharacterRange argument_range = range->at(0);
+  CharacterRange word_range = CharacterRange(kWordRanges[0], kWordRanges[1]);
+  while (i < kWordRangeCount && j < range->length()) {
+    // Check the two ranges for the five cases:
+    // - no overlap.
+    // - partial overlap (there are elements in both ranges that isn't
+    //   in the other, and there are also elements that are in both).
+    // - argument range entirely inside word range.
+    // - word range entirely inside argument range.
+    // - ranges are completely equal.
+
+    // First check for no overlap. The earlier range is not in the other set.
+    if (argument_range.from() > word_range.to()) {
+      // Ranges are disjoint. The earlier word range contains elements that
+      // cannot be in the argument set.
+      result.SetElementsInSecondSet();
+    } else if (word_range.from() > argument_range.to()) {
+      // Ranges are disjoint. The earlier argument range contains elements that
+      // cannot be in the word set.
+      result.SetElementsInFirstSet();
+    } else if (word_range.from() <= argument_range.from() &&
+               word_range.to() >= argument_range.from()) {
+      result.SetElementsInBothSets();
+      // argument range completely inside word range.
+      if (word_range.from() < argument_range.from() ||
+          word_range.to() > argument_range.from()) {
+        result.SetElementsInSecondSet();
+      }
+    } else if (word_range.from() >= argument_range.from() &&
+               word_range.to() <= argument_range.from()) {
+      result.SetElementsInBothSets();
+      result.SetElementsInFirstSet();
+    } else {
+      // There is overlap, and neither is a subrange of the other
+      result.SetElementsInFirstSet();
+      result.SetElementsInSecondSet();
+      result.SetElementsInBothSets();
+    }
+    if (result.NonTrivialIntersection()) {
+      // The result is as (im)precise as we can possibly make it.
+      return result;
+    }
+    // Progress the range(s) with minimal to-character.
+    uc16 word_to = word_range.to();
+    uc16 argument_to = argument_range.to();
+    if (argument_to <= word_to) {
+      j++;
+      if (j < range->length()) {
+        argument_range = range->at(j);
+      }
+    }
+    if (word_to <= argument_to) {
+      i += 2;
+      if (i < kWordRangeCount) {
+        word_range = CharacterRange(kWordRanges[i], kWordRanges[i + 1]);
+      }
+    }
+  }
+  // Check if anything wasn't compared in the loop.
+  if (i < kWordRangeCount) {
+    // word range contains something not in argument range.
+    result.SetElementsInSecondSet();
+  } else if (j < range->length()) {
+    // Argument range contains something not in word range.
+    result.SetElementsInFirstSet();
+  }
+
+  return result;
+}
+
+
 static void AddUncanonicals(ZoneList<CharacterRange>* ranges,
                             int bottom,
                             int top) {
@@ -4119,6 +4263,287 @@ ZoneList<CharacterRange>* CharacterSet::ranges() {
 }
 
 
+// Move a number of elements in a zonelist to another position
+// in the same list. Handles overlapping source and target areas.
+static void MoveRanges(ZoneList<CharacterRange>* list,
+                       int from,
+                       int to,
+                       int count) {
+  // Ranges are potentially overlapping.
+  if (from < to) {
+    for (int i = count - 1; i >= 0; i--) {
+      list->at(to + i) = list->at(from + i);
+    }
+  } else {
+    for (int i = 0; i < count; i++) {
+      list->at(to + i) = list->at(from + i);
+    }
+  }
+}
+
+
+static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list,
+                                      int count,
+                                      CharacterRange insert) {
+  // Inserts a range into list[0..count[, which must be sorted
+  // by from value and non-overlapping and non-adjacent, using at most
+  // list[0..count] for the result. Returns the number of resulting
+  // canonicalized ranges. Inserting a range may collapse existing ranges into
+  // fewer ranges, so the return value can be anything in the range 1..count+1.
+  uc16 from = insert.from();
+  uc16 to = insert.to();
+  int start_pos = 0;
+  int end_pos = count;
+  for (int i = count - 1; i >= 0; i--) {
+    CharacterRange current = list->at(i);
+    if (current.from() > to + 1) {
+      end_pos = i;
+    } else if (current.to() + 1 < from) {
+      start_pos = i + 1;
+      break;
+    }
+  }
+
+  // Inserted range overlaps, or is adjacent to, ranges at positions
+  // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are
+  // not affected by the insertion.
+  // If start_pos == end_pos, the range must be inserted before start_pos.
+  // if start_pos < end_pos, the entire range from start_pos to end_pos
+  // must be merged with the insert range.
+
+  if (start_pos == end_pos) {
+    // Insert between existing ranges at position start_pos.
+    if (start_pos < count) {
+      MoveRanges(list, start_pos, start_pos + 1, count - start_pos);
+    }
+    list->at(start_pos) = insert;
+    return count + 1;
+  }
+  if (start_pos + 1 == end_pos) {
+    // Replace single existing range at position start_pos.
+    CharacterRange to_replace = list->at(start_pos);
+    int new_from = Min(to_replace.from(), from);
+    int new_to = Max(to_replace.to(), to);
+    list->at(start_pos) = CharacterRange(new_from, new_to);
+    return count;
+  }
+  // Replace a number of existing ranges from start_pos to end_pos - 1.
+  // Move the remaining ranges down.
+
+  int new_from = Min(list->at(start_pos).from(), from);
+  int new_to = Max(list->at(end_pos - 1).to(), to);
+  if (end_pos < count) {
+    MoveRanges(list, end_pos, start_pos + 1, count - end_pos);
+  }
+  list->at(start_pos) = CharacterRange(new_from, new_to);
+  return count - (end_pos - start_pos) + 1;
+}
+
+
+void CharacterSet::Canonicalize() {
+  // Special/default classes are always considered canonical. The result
+  // of calling ranges() will be sorted.
+  if (ranges_ == NULL) return;
+  CharacterRange::Canonicalize(ranges_);
+}
+
+
+void CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) {
+  if (character_ranges->length() <= 1) return;
+  // Check whether ranges are already canonical (increasing, non-overlapping,
+  // non-adjacent).
+  int n = character_ranges->length();
+  int max = character_ranges->at(0).to();
+  int i = 1;
+  while (i < n) {
+    CharacterRange current = character_ranges->at(i);
+    if (current.from() <= max + 1) {
+      break;
+    }
+    max = current.to();
+    i++;
+  }
+  // Canonical until the i'th range. If that's all of them, we are done.
+  if (i == n) return;
+
+  // The ranges at index i and forward are not canonicalized. Make them so by
+  // doing the equivalent of insertion sort (inserting each into the previous
+  // list, in order).
+  // Notice that inserting a range can reduce the number of ranges in the
+  // result due to combining of adjacent and overlapping ranges.
+  int read = i;  // Range to insert.
+  int num_canonical = i;  // Length of canonicalized part of list.
+  do {
+    num_canonical = InsertRangeInCanonicalList(character_ranges,
+                                               num_canonical,
+                                               character_ranges->at(read));
+    read++;
+  } while (read < n);
+  character_ranges->Rewind(num_canonical);
+
+  ASSERT(CharacterRange::IsCanonical(character_ranges));
+}
+
+
+// Utility function for CharacterRange::Merge. Adds a range at the end of
+// a canonicalized range list, if necessary merging the range with the last
+// range of the list.
+static void AddRangeToSet(ZoneList<CharacterRange>* set, CharacterRange range) {
+  if (set == NULL) return;
+  ASSERT(set->length() == 0 || set->at(set->length() - 1).to() < range.from());
+  int n = set->length();
+  if (n > 0) {
+    CharacterRange lastRange = set->at(n - 1);
+    if (lastRange.to() == range.from() - 1) {
+      set->at(n - 1) = CharacterRange(lastRange.from(), range.to());
+      return;
+    }
+  }
+  set->Add(range);
+}
+
+
+static void AddRangeToSelectedSet(int selector,
+                                  ZoneList<CharacterRange>* first_set,
+                                  ZoneList<CharacterRange>* second_set,
+                                  ZoneList<CharacterRange>* intersection_set,
+                                  CharacterRange range) {
+  switch (selector) {
+    case kInsideFirst:
+      AddRangeToSet(first_set, range);
+      break;
+    case kInsideSecond:
+      AddRangeToSet(second_set, range);
+      break;
+    case kInsideBoth:
+      AddRangeToSet(intersection_set, range);
+      break;
+  }
+}
+
+
+
+void CharacterRange::Merge(ZoneList<CharacterRange>* first_set,
+                           ZoneList<CharacterRange>* second_set,
+                           ZoneList<CharacterRange>* first_set_only_out,
+                           ZoneList<CharacterRange>* second_set_only_out,
+                           ZoneList<CharacterRange>* both_sets_out) {
+  // Inputs are canonicalized.
+  ASSERT(CharacterRange::IsCanonical(first_set));
+  ASSERT(CharacterRange::IsCanonical(second_set));
+  // Outputs are empty, if applicable.
+  ASSERT(first_set_only_out == NULL || first_set_only_out->length() == 0);
+  ASSERT(second_set_only_out == NULL || second_set_only_out->length() == 0);
+  ASSERT(both_sets_out == NULL || both_sets_out->length() == 0);
+
+  // Merge sets by iterating through the lists in order of lowest "from" value,
+  // and putting intervals into one of three sets.
+
+  if (first_set->length() == 0) {
+    second_set_only_out->AddAll(*second_set);
+    return;
+  }
+  if (second_set->length() == 0) {
+    first_set_only_out->AddAll(*first_set);
+    return;
+  }
+  // Indices into input lists.
+  int i1 = 0;
+  int i2 = 0;
+  // Cache length of input lists.
+  int n1 = first_set->length();
+  int n2 = second_set->length();
+  // Current range. May be invalid if state is kInsideNone.
+  int from = 0;
+  int to = -1;
+  // Where current range comes from.
+  int state = kInsideNone;
+
+  while (i1 < n1 || i2 < n2) {
+    CharacterRange next_range;
+    int range_source;
+    if (i2 == n2 || first_set->at(i1).from() < second_set->at(i2).from()) {
+      next_range = first_set->at(i1++);
+      range_source = kInsideFirst;
+    } else {
+      next_range = second_set->at(i2++);
+      range_source = kInsideSecond;
+    }
+    if (to < next_range.from()) {
+      // Ranges disjoint: |current|  |next|
+      AddRangeToSelectedSet(state,
+                            first_set_only_out,
+                            second_set_only_out,
+                            both_sets_out,
+                            CharacterRange(from, to));
+      from = next_range.from();
+      to = next_range.to();
+      state = range_source;
+    } else {
+      if (from < next_range.from()) {
+        AddRangeToSelectedSet(state,
+                              first_set_only_out,
+                              second_set_only_out,
+                              both_sets_out,
+                              CharacterRange(from, next_range.from()-1));
+      }
+      if (to < next_range.to()) {
+        // Ranges overlap:  |current|
+        //                       |next|
+        AddRangeToSelectedSet(state | range_source,
+                              first_set_only_out,
+                              second_set_only_out,
+                              both_sets_out,
+                              CharacterRange(next_range.from(), to));
+        from = to + 1;
+        to = next_range.to();
+        state = range_source;
+      } else {
+        // Range included:    |current| , possibly ending at same character.
+        //                      |next|
+        AddRangeToSelectedSet(
+            state | range_source,
+            first_set_only_out,
+            second_set_only_out,
+            both_sets_out,
+            CharacterRange(next_range.from(), next_range.to()));
+        from = next_range.to() + 1;
+        // If ranges end at same character, both ranges are consumed completely.
+        if (next_range.to() == to) state = kInsideNone;
+      }
+    }
+  }
+  AddRangeToSelectedSet(state,
+                        first_set_only_out,
+                        second_set_only_out,
+                        both_sets_out,
+                        CharacterRange(from, to));
+}
+
+
+void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,
+                            ZoneList<CharacterRange>* negated_ranges) {
+  ASSERT(CharacterRange::IsCanonical(ranges));
+  ASSERT_EQ(0, negated_ranges->length());
+  int range_count = ranges->length();
+  uc16 from = 0;
+  int i = 0;
+  if (range_count > 0 && ranges->at(0).from() == 0) {
+    from = ranges->at(0).to();
+    i = 1;
+  }
+  while (i < range_count) {
+    CharacterRange range = ranges->at(i);
+    negated_ranges->Add(CharacterRange(from + 1, range.from() - 1));
+    from = range.to();
+    i++;
+  }
+  if (from < String::kMaxUC16CharCode) {
+    negated_ranges->Add(CharacterRange(from + 1, String::kMaxUC16CharCode));
+  }
+}
+
+
 
 // -------------------------------------------------------------------
 // Interest propagation
@@ -4410,9 +4835,203 @@ void Analysis::VisitBackReference(BackReferenceNode* that) {
 
 void Analysis::VisitAssertion(AssertionNode* that) {
   EnsureAnalyzed(that->on_success());
+  AssertionNode::AssertionNodeType type = that->type();
+  if (type == AssertionNode::AT_BOUNDARY ||
+      type == AssertionNode::AT_NON_BOUNDARY) {
+    // Check if the following character is known to be a word character
+    // or known to not be a word character.
+    ZoneList<CharacterRange>* following_chars = that->FirstCharacterSet();
+
+    CharacterRange::Canonicalize(following_chars);
+
+    SetRelation word_relation =
+        CharacterRange::WordCharacterRelation(following_chars);
+    if (word_relation.ContainedIn()) {
+      // Following character is definitely a word character.
+      type = (type == AssertionNode::AT_BOUNDARY) ?
+                  AssertionNode::AFTER_NONWORD_CHARACTER :
+                  AssertionNode::AFTER_WORD_CHARACTER;
+      that->set_type(type);
+    } else if (word_relation.Disjoint()) {
+      // Following character is definitely *not* a word character.
+      type = (type == AssertionNode::AT_BOUNDARY) ?
+                  AssertionNode::AFTER_WORD_CHARACTER :
+                  AssertionNode::AFTER_NONWORD_CHARACTER;
+      that->set_type(type);
+    }
+  }
+}
+
+
+ZoneList<CharacterRange>* RegExpNode::FirstCharacterSet() {
+  if (first_character_set_ == NULL) {
+    if (ComputeFirstCharacterSet(kFirstCharBudget) < 0) {
+      // If we can't find an exact solution within the budget, we
+      // set the value to the set of every character, i.e., all characters
+      // are possible.
+      ZoneList<CharacterRange>* all_set = new ZoneList<CharacterRange>(1);
+      all_set->Add(CharacterRange::Everything());
+      first_character_set_ = all_set;
+    }
+  }
+  return first_character_set_;
+}
+
+
+int RegExpNode::ComputeFirstCharacterSet(int budget) {
+  // Default behavior is to not be able to determine the first character.
+  return kComputeFirstCharacterSetFail;
 }
 
 
+int LoopChoiceNode::ComputeFirstCharacterSet(int budget) {
+  budget--;
+  if (budget >= 0) {
+    // Find loop min-iteration. It's the value of the guarded choice node
+    // with a GEQ guard, if any.
+    int min_repetition = 0;
+
+    for (int i = 0; i <= 1; i++) {
+      GuardedAlternative alternative = alternatives()->at(i);
+      ZoneList<Guard*>* guards = alternative.guards();
+      if (guards != NULL && guards->length() > 0) {
+        Guard* guard = guards->at(0);
+        if (guard->op() == Guard::GEQ) {
+          min_repetition = guard->value();
+          break;
+        }
+      }
+    }
+
+    budget = loop_node()->ComputeFirstCharacterSet(budget);
+    if (budget >= 0) {
+      ZoneList<CharacterRange>* character_set =
+          loop_node()->first_character_set();
+      if (body_can_be_zero_length() || min_repetition == 0) {
+        budget = continue_node()->ComputeFirstCharacterSet(budget);
+        if (budget < 0) return budget;
+        ZoneList<CharacterRange>* body_set =
+            continue_node()->first_character_set();
+        ZoneList<CharacterRange>* union_set =
+          new ZoneList<CharacterRange>(Max(character_set->length(),
+                                           body_set->length()));
+        CharacterRange::Merge(character_set,
+                              body_set,
+                              union_set,
+                              union_set,
+                              union_set);
+        character_set = union_set;
+      }
+      set_first_character_set(character_set);
+    }
+  }
+  return budget;
+}
+
+
+int NegativeLookaheadChoiceNode::ComputeFirstCharacterSet(int budget) {
+  budget--;
+  if (budget >= 0) {
+    GuardedAlternative successor = this->alternatives()->at(1);
+    RegExpNode* successor_node = successor.node();
+    budget = successor_node->ComputeFirstCharacterSet(budget);
+    if (budget >= 0) {
+      set_first_character_set(successor_node->first_character_set());
+    }
+  }
+  return budget;
+}
+
+
+// The first character set of an EndNode is unknowable. Just use the
+// default implementation that fails and returns all characters as possible.
+
+
+int AssertionNode::ComputeFirstCharacterSet(int budget) {
+  budget -= 1;
+  if (budget >= 0) {
+    switch (type_) {
+      case AT_END: {
+        set_first_character_set(new ZoneList<CharacterRange>(0));
+        break;
+      }
+      case AT_START:
+      case AT_BOUNDARY:
+      case AT_NON_BOUNDARY:
+      case AFTER_NEWLINE:
+      case AFTER_NONWORD_CHARACTER:
+      case AFTER_WORD_CHARACTER: {
+        ASSERT_NOT_NULL(on_success());
+        budget = on_success()->ComputeFirstCharacterSet(budget);
+        set_first_character_set(on_success()->first_character_set());
+        break;
+      }
+    }
+  }
+  return budget;
+}
+
+
+int ActionNode::ComputeFirstCharacterSet(int budget) {
+  if (type_ == POSITIVE_SUBMATCH_SUCCESS) return kComputeFirstCharacterSetFail;
+  budget--;
+  if (budget >= 0) {
+    ASSERT_NOT_NULL(on_success());
+    budget = on_success()->ComputeFirstCharacterSet(budget);
+    if (budget >= 0) {
+      set_first_character_set(on_success()->first_character_set());
+    }
+  }
+  return budget;
+}
+
+
+int BackReferenceNode::ComputeFirstCharacterSet(int budget) {
+  // We don't know anything about the first character of a backreference
+  // at this point.
+  return kComputeFirstCharacterSetFail;
+}
+
+
+int TextNode::ComputeFirstCharacterSet(int budget) {
+  budget--;
+  if (budget >= 0) {
+    ASSERT_NE(0, elements()->length());
+    TextElement text = elements()->at(0);
+    if (text.type == TextElement::ATOM) {
+      RegExpAtom* atom = text.data.u_atom;
+      ASSERT_NE(0, atom->length());
+      uc16 first_char = atom->data()[0];
+      ZoneList<CharacterRange>* range = new ZoneList<CharacterRange>(1);
+      range->Add(CharacterRange(first_char, first_char));
+      set_first_character_set(range);
+    } else {
+      ASSERT(text.type == TextElement::CHAR_CLASS);
+      RegExpCharacterClass* char_class = text.data.u_char_class;
+      if (char_class->is_negated()) {
+        ZoneList<CharacterRange>* ranges = char_class->ranges();
+        int length = ranges->length();
+        int new_length = length + 1;
+        if (length > 0) {
+          if (ranges->at(0).from() == 0) new_length--;
+          if (ranges->at(length - 1).to() == String::kMaxUC16CharCode) {
+            new_length--;
+          }
+        }
+        ZoneList<CharacterRange>* negated_ranges =
+            new ZoneList<CharacterRange>(new_length);
+        CharacterRange::Negate(ranges, negated_ranges);
+        set_first_character_set(negated_ranges);
+      } else {
+        set_first_character_set(char_class->ranges());
+      }
+    }
+  }
+  return budget;
+}
+
+
+
 // -------------------------------------------------------------------
 // Dispatch table construction
 
@@ -4471,7 +5090,6 @@ void DispatchTableConstructor::VisitAssertion(AssertionNode* that) {
 }
 
 
-
 static int CompareRangeByFrom(const CharacterRange* a,
                               const CharacterRange* b) {
   return Compare<uc16>(a->from(), b->from());
@@ -4606,4 +5224,8 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data,
                            pattern);
 }
 
+
+int OffsetsVector::static_offsets_vector_[
+    OffsetsVector::kStaticOffsetsVectorSize];
+
 }}  // namespace v8::internal