summaryrefslogtreecommitdiff
path: root/libjava/classpath/gnu/regexp/RETokenRepeated.java
diff options
context:
space:
mode:
Diffstat (limited to 'libjava/classpath/gnu/regexp/RETokenRepeated.java')
-rw-r--r--libjava/classpath/gnu/regexp/RETokenRepeated.java310
1 files changed, 192 insertions, 118 deletions
diff --git a/libjava/classpath/gnu/regexp/RETokenRepeated.java b/libjava/classpath/gnu/regexp/RETokenRepeated.java
index 6291a3c3960..2d019c53cbd 100644
--- a/libjava/classpath/gnu/regexp/RETokenRepeated.java
+++ b/libjava/classpath/gnu/regexp/RETokenRepeated.java
@@ -1,5 +1,5 @@
/* gnu/regexp/RETokenRepeated.java
- Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+ Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
@@ -39,6 +39,7 @@ exception statement from your version. */
package gnu.regexp;
import java.util.Vector;
+import java.util.Arrays;
final class RETokenRepeated extends REToken {
private REToken token;
@@ -82,6 +83,38 @@ final class RETokenRepeated extends REToken {
return (min * token.getMinimumLength());
}
+ int getMaximumLength() {
+ if (max == Integer.MAX_VALUE) return Integer.MAX_VALUE;
+ int tmax = token.getMaximumLength();
+ if (tmax == Integer.MAX_VALUE) return tmax;
+ return (max * tmax);
+ }
+
+ private static REMatch findDoables(REToken tk,
+ CharIndexed input, REMatch mymatch) {
+
+ REMatch.REMatchList doables = new REMatch.REMatchList();
+
+ // try next repeat at all possible positions
+ for (REMatch current = mymatch;
+ current != null; current = current.next) {
+ REMatch recurrent = (REMatch) current.clone();
+ int origin = recurrent.index;
+ tk = (REToken) tk.clone();
+ tk.next = tk.uncle = null;
+ recurrent.matchFlags |= REMatch.MF_FIND_ALL;
+ if (tk.match(input, recurrent)) {
+ for (REMatch m = recurrent; m != null; m = m.next) {
+ m.matchFlags &= ~REMatch.MF_FIND_ALL;
+ }
+ if (recurrent.index == origin) recurrent.empty = true;
+ // add all items in current to doables array
+ doables.addTail(recurrent);
+ }
+ }
+ return doables.head;
+ }
+
// We do need to save every possible point, but the number of clone()
// invocations here is really a killer for performance on non-stingy
// repeat operators. I'm open to suggestions...
@@ -91,59 +124,167 @@ final class RETokenRepeated extends REToken {
// the subexpression back-reference operator allow that?
boolean match(CharIndexed input, REMatch mymatch) {
- // number of times we've matched so far
- int numRepeats = 0;
-
- // Possible positions for the next repeat to match at
- REMatch newMatch = mymatch;
- REMatch last = null;
- REMatch current;
- // Add the '0-repeats' index
- // positions.elementAt(z) == position [] in input after <<z>> matches
- Vector positions = new Vector();
- positions.addElement(newMatch);
-
- // Declare variables used in loop
- REMatch doables;
- REMatch doablesLast;
- REMatch recurrent;
- int lastIndex = mymatch.index;
-
- do {
- // Check for stingy match for each possibility.
- if (stingy && (numRepeats >= min)) {
- REMatch result = matchRest(input, newMatch);
- if (result != null) {
- mymatch.assignFrom(result);
- return true;
- }
+ boolean stopMatchingIfSatisfied =
+ (mymatch.matchFlags & REMatch.MF_FIND_ALL) == 0;
+
+ REMatch newMatch = matchMinimum(input, mymatch);
+ if (newMatch == null) return false;
+
+ // Array of positions we have already visited
+ int[] visited = initVisited();
+ for (REMatch m = newMatch; m != null; m = m.next) {
+ visited = addVisited(m.index, visited);
+ }
+
+ int max1 = decreaseMax(max, min);
+
+ newMatch = _match(input, newMatch, max1,
+ stopMatchingIfSatisfied, visited);
+ if (newMatch != null) {
+ mymatch.assignFrom(newMatch);
+ return true;
+ }
+ return false;
+ }
+
+ private static int decreaseMax(int m, int n) {
+ if (m == Integer.MAX_VALUE) return m;
+ return m - n;
+ }
+
+ // Array visited is an array of character positions we have already
+ // visited. visited[0] is used to store the effective length of the
+ // array.
+ private static int[] initVisited() {
+ int[] visited = new int[32];
+ visited[0] = 0;
+ return visited;
+ }
+
+ private static boolean visitedContains(int n, int[] visited) {
+ // Experience tells that for a small array like this,
+ // simple linear search is faster than binary search.
+ for (int i = 1; i < visited[0]; i++) {
+ if (n == visited[i]) return true;
+ }
+ return false;
+ }
+
+ private static int[] addVisited(int n, int[] visited) {
+ if (visitedContains(n, visited)) return visited;
+ if (visited[0] >= visited.length - 1) {
+ int[] newvisited = new int[visited.length + 32];
+ System.arraycopy(visited, 0, newvisited, 0, visited.length);
+ visited = newvisited;
+ }
+ visited[0]++;
+ visited[visited[0]] = n;
+ return visited;
+ }
+
+ private REMatch _match(CharIndexed input, REMatch mymatch,
+ int max1, boolean stopMatchingIfSatisfied,
+ int[] visited) {
+
+ if (max1 == 0) {
+ return matchRest(input, mymatch);
+ }
+ max1 = decreaseMax(max1, 1);
+
+ REMatch.REMatchList allResults = new REMatch.REMatchList();
+
+ // Depth-first search
+
+ for (REMatch cur = mymatch; cur != null; cur = cur.next) {
+
+ REMatch cur1 = (REMatch) cur.clone();
+
+ if (stingy) {
+ REMatch results = matchRest(input, cur1);
+ if (results != null) {
+ if (stopMatchingIfSatisfied) {
+ return results;
+ }
+ allResults.addTail(results);
+ }
}
- doables = null;
- doablesLast = null;
+ DO_THIS:
+ do {
- // try next repeat at all possible positions
- for (current = newMatch; current != null; current = current.next) {
- recurrent = (REMatch) current.clone();
- if (token.match(input, recurrent)) {
- // add all items in current to doables array
- if (doables == null) {
- doables = recurrent;
- doablesLast = recurrent;
- } else {
- // Order these from longest to shortest
- // Start by assuming longest (more repeats)
- doablesLast.next = recurrent;
+ boolean emptyMatchFound = false;
+ REMatch doables = findDoables(token, input, cur1);
+ if (doables == null) break DO_THIS;
+ if (doables.empty) emptyMatchFound = true;
+
+ if (!emptyMatchFound) {
+ REMatch.REMatchList list = new REMatch.REMatchList();
+ for (REMatch m = doables; m != null; m = m.next) {
+ REMatch m1 = (REMatch) m.clone();
+ int n = m1.index;
+ if (! visitedContains(n, visited)) {
+ visited = addVisited(n, visited);
+ list.addTail(m1);
}
- // Find new doablesLast
- while (doablesLast.next != null) {
- doablesLast = doablesLast.next;
+ }
+ if (list.head == null) break DO_THIS;
+ doables = list.head;
+ }
+
+ for (REMatch m = doables; m != null; m = m.next) {
+ if (! emptyMatchFound) {
+ REMatch m1 = _match(input, m, max1,
+ stopMatchingIfSatisfied, visited);
+ if (possessive) return m1;
+ if (m1 != null) {
+ if (stopMatchingIfSatisfied) {
+ return m1;
+ }
+ allResults.addTail(m1);
+ }
+ }
+ else {
+ REMatch m1 = matchRest(input, m);
+ if (m1 != null) {
+ if (stopMatchingIfSatisfied) {
+ return m1;
+ }
+ allResults.addTail(m1);
}
}
}
- // if none of the possibilities worked out, break out of do/while
- if (doables == null) break;
+
+ } while (false); // DO_THIS only once;
+
+ // This point itself is a candidate.
+ if (!stingy) {
+ REMatch m2 = matchRest(input, cur1);
+ if (m2 != null) {
+ if (stopMatchingIfSatisfied) {
+ return m2;
+ }
+ allResults.addTail(m2);
+ }
+ }
+ }
+
+ return allResults.head;
+ }
+
+ private REMatch matchMinimum(CharIndexed input, final REMatch mymatch) {
+ // Possible positions for the next repeat to match at
+ REMatch newMatch = mymatch;
+
+ // number of times we've matched so far
+ int numRepeats = 0;
+
+ while (numRepeats < min) {
+ REMatch doables = findDoables(token, input, newMatch);
+
+ // if none of the possibilities worked out,
+ // it means that minimum number of repeats could not be found.
+ if (doables == null) return null;
// reassign where the next repeat can match
newMatch = doables;
@@ -151,91 +292,24 @@ final class RETokenRepeated extends REToken {
// increment how many repeats we've successfully found
++numRepeats;
- positions.addElement(newMatch);
-
- // doables.index == lastIndex means an empty string
- // was the longest that matched this token.
- // We break here, otherwise we will fall into an endless loop.
- if (doables.index == lastIndex) {
- if (numRepeats < min) numRepeats = min;
- break;
- }
- lastIndex = doables.index;
- } while (numRepeats < max);
-
- // If there aren't enough repeats, then fail
- if (numRepeats < min) return false;
-
- // We're greedy, but ease off until a true match is found
- int posIndex = positions.size();
-
- // At this point we've either got too many or just the right amount.
- // See if this numRepeats works with the rest of the regexp.
- REMatch allResults = null;
- REMatch allResultsLast = null;
-
- REMatch results = null;
- int indexCount = posIndex - min;
- if (indexCount <= 0) {
- // This case occurs when we exited the previous do loop before
- // numRepeats >= min because an empty string matched the token.
- // In this case, an empty string can match as many times as
- // desired.
- indexCount = 1;
- }
- while (indexCount-- > 0) {
- --posIndex;
- newMatch = (REMatch) positions.elementAt(posIndex);
- results = matchRest(input, newMatch);
- if (results != null) {
- if (allResults == null) {
- allResults = results;
- allResultsLast = results;
- } else {
- // Order these from longest to shortest
- // Start by assuming longest (more repeats)
- allResultsLast.next = results;
- }
- // Find new doablesLast
- while (allResultsLast.next != null) {
- allResultsLast = allResultsLast.next;
- }
- }
- // else did not match rest of the tokens, try again on smaller sample
- // or break out when performing possessive matching
- if (possessive) break;
+ if (newMatch.empty) break;
}
- if (allResults != null) {
- mymatch.assignFrom(allResults); // does this get all?
- return true;
- }
- // If we fall out, no matches.
- return false;
+ return newMatch;
}
private REMatch matchRest(CharIndexed input, final REMatch newMatch) {
REMatch current, single;
- REMatch doneIndex = null;
- REMatch doneIndexLast = null;
+ REMatch.REMatchList doneIndex = new REMatch.REMatchList();
// Test all possible matches for this number of repeats
for (current = newMatch; current != null; current = current.next) {
// clone() separates a single match from the chain
single = (REMatch) current.clone();
if (next(input, single)) {
// chain results to doneIndex
- if (doneIndex == null) {
- doneIndex = single;
- doneIndexLast = single;
- } else {
- doneIndexLast.next = single;
- }
- // Find new doneIndexLast
- while (doneIndexLast.next != null) {
- doneIndexLast = doneIndexLast.next;
- }
+ doneIndex.addTail(single);
}
}
- return doneIndex;
+ return doneIndex.head;
}
void dump(StringBuffer os) {