diff options
Diffstat (limited to 'libjava/classpath/gnu/regexp/RETokenRepeated.java')
-rw-r--r-- | libjava/classpath/gnu/regexp/RETokenRepeated.java | 310 |
1 files changed, 192 insertions, 118 deletions
diff --git a/libjava/classpath/gnu/regexp/RETokenRepeated.java b/libjava/classpath/gnu/regexp/RETokenRepeated.java index 6291a3c3960..2d019c53cbd 100644 --- a/libjava/classpath/gnu/regexp/RETokenRepeated.java +++ b/libjava/classpath/gnu/regexp/RETokenRepeated.java @@ -1,5 +1,5 @@ /* gnu/regexp/RETokenRepeated.java - Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc. + Copyright (C) 2006 Free Software Foundation, Inc. This file is part of GNU Classpath. @@ -39,6 +39,7 @@ exception statement from your version. */ package gnu.regexp; import java.util.Vector; +import java.util.Arrays; final class RETokenRepeated extends REToken { private REToken token; @@ -82,6 +83,38 @@ final class RETokenRepeated extends REToken { return (min * token.getMinimumLength()); } + int getMaximumLength() { + if (max == Integer.MAX_VALUE) return Integer.MAX_VALUE; + int tmax = token.getMaximumLength(); + if (tmax == Integer.MAX_VALUE) return tmax; + return (max * tmax); + } + + private static REMatch findDoables(REToken tk, + CharIndexed input, REMatch mymatch) { + + REMatch.REMatchList doables = new REMatch.REMatchList(); + + // try next repeat at all possible positions + for (REMatch current = mymatch; + current != null; current = current.next) { + REMatch recurrent = (REMatch) current.clone(); + int origin = recurrent.index; + tk = (REToken) tk.clone(); + tk.next = tk.uncle = null; + recurrent.matchFlags |= REMatch.MF_FIND_ALL; + if (tk.match(input, recurrent)) { + for (REMatch m = recurrent; m != null; m = m.next) { + m.matchFlags &= ~REMatch.MF_FIND_ALL; + } + if (recurrent.index == origin) recurrent.empty = true; + // add all items in current to doables array + doables.addTail(recurrent); + } + } + return doables.head; + } + // We do need to save every possible point, but the number of clone() // invocations here is really a killer for performance on non-stingy // repeat operators. I'm open to suggestions... @@ -91,59 +124,167 @@ final class RETokenRepeated extends REToken { // the subexpression back-reference operator allow that? boolean match(CharIndexed input, REMatch mymatch) { - // number of times we've matched so far - int numRepeats = 0; - - // Possible positions for the next repeat to match at - REMatch newMatch = mymatch; - REMatch last = null; - REMatch current; - // Add the '0-repeats' index - // positions.elementAt(z) == position [] in input after <<z>> matches - Vector positions = new Vector(); - positions.addElement(newMatch); - - // Declare variables used in loop - REMatch doables; - REMatch doablesLast; - REMatch recurrent; - int lastIndex = mymatch.index; - - do { - // Check for stingy match for each possibility. - if (stingy && (numRepeats >= min)) { - REMatch result = matchRest(input, newMatch); - if (result != null) { - mymatch.assignFrom(result); - return true; - } + boolean stopMatchingIfSatisfied = + (mymatch.matchFlags & REMatch.MF_FIND_ALL) == 0; + + REMatch newMatch = matchMinimum(input, mymatch); + if (newMatch == null) return false; + + // Array of positions we have already visited + int[] visited = initVisited(); + for (REMatch m = newMatch; m != null; m = m.next) { + visited = addVisited(m.index, visited); + } + + int max1 = decreaseMax(max, min); + + newMatch = _match(input, newMatch, max1, + stopMatchingIfSatisfied, visited); + if (newMatch != null) { + mymatch.assignFrom(newMatch); + return true; + } + return false; + } + + private static int decreaseMax(int m, int n) { + if (m == Integer.MAX_VALUE) return m; + return m - n; + } + + // Array visited is an array of character positions we have already + // visited. visited[0] is used to store the effective length of the + // array. + private static int[] initVisited() { + int[] visited = new int[32]; + visited[0] = 0; + return visited; + } + + private static boolean visitedContains(int n, int[] visited) { + // Experience tells that for a small array like this, + // simple linear search is faster than binary search. + for (int i = 1; i < visited[0]; i++) { + if (n == visited[i]) return true; + } + return false; + } + + private static int[] addVisited(int n, int[] visited) { + if (visitedContains(n, visited)) return visited; + if (visited[0] >= visited.length - 1) { + int[] newvisited = new int[visited.length + 32]; + System.arraycopy(visited, 0, newvisited, 0, visited.length); + visited = newvisited; + } + visited[0]++; + visited[visited[0]] = n; + return visited; + } + + private REMatch _match(CharIndexed input, REMatch mymatch, + int max1, boolean stopMatchingIfSatisfied, + int[] visited) { + + if (max1 == 0) { + return matchRest(input, mymatch); + } + max1 = decreaseMax(max1, 1); + + REMatch.REMatchList allResults = new REMatch.REMatchList(); + + // Depth-first search + + for (REMatch cur = mymatch; cur != null; cur = cur.next) { + + REMatch cur1 = (REMatch) cur.clone(); + + if (stingy) { + REMatch results = matchRest(input, cur1); + if (results != null) { + if (stopMatchingIfSatisfied) { + return results; + } + allResults.addTail(results); + } } - doables = null; - doablesLast = null; + DO_THIS: + do { - // try next repeat at all possible positions - for (current = newMatch; current != null; current = current.next) { - recurrent = (REMatch) current.clone(); - if (token.match(input, recurrent)) { - // add all items in current to doables array - if (doables == null) { - doables = recurrent; - doablesLast = recurrent; - } else { - // Order these from longest to shortest - // Start by assuming longest (more repeats) - doablesLast.next = recurrent; + boolean emptyMatchFound = false; + REMatch doables = findDoables(token, input, cur1); + if (doables == null) break DO_THIS; + if (doables.empty) emptyMatchFound = true; + + if (!emptyMatchFound) { + REMatch.REMatchList list = new REMatch.REMatchList(); + for (REMatch m = doables; m != null; m = m.next) { + REMatch m1 = (REMatch) m.clone(); + int n = m1.index; + if (! visitedContains(n, visited)) { + visited = addVisited(n, visited); + list.addTail(m1); } - // Find new doablesLast - while (doablesLast.next != null) { - doablesLast = doablesLast.next; + } + if (list.head == null) break DO_THIS; + doables = list.head; + } + + for (REMatch m = doables; m != null; m = m.next) { + if (! emptyMatchFound) { + REMatch m1 = _match(input, m, max1, + stopMatchingIfSatisfied, visited); + if (possessive) return m1; + if (m1 != null) { + if (stopMatchingIfSatisfied) { + return m1; + } + allResults.addTail(m1); + } + } + else { + REMatch m1 = matchRest(input, m); + if (m1 != null) { + if (stopMatchingIfSatisfied) { + return m1; + } + allResults.addTail(m1); } } } - // if none of the possibilities worked out, break out of do/while - if (doables == null) break; + + } while (false); // DO_THIS only once; + + // This point itself is a candidate. + if (!stingy) { + REMatch m2 = matchRest(input, cur1); + if (m2 != null) { + if (stopMatchingIfSatisfied) { + return m2; + } + allResults.addTail(m2); + } + } + } + + return allResults.head; + } + + private REMatch matchMinimum(CharIndexed input, final REMatch mymatch) { + // Possible positions for the next repeat to match at + REMatch newMatch = mymatch; + + // number of times we've matched so far + int numRepeats = 0; + + while (numRepeats < min) { + REMatch doables = findDoables(token, input, newMatch); + + // if none of the possibilities worked out, + // it means that minimum number of repeats could not be found. + if (doables == null) return null; // reassign where the next repeat can match newMatch = doables; @@ -151,91 +292,24 @@ final class RETokenRepeated extends REToken { // increment how many repeats we've successfully found ++numRepeats; - positions.addElement(newMatch); - - // doables.index == lastIndex means an empty string - // was the longest that matched this token. - // We break here, otherwise we will fall into an endless loop. - if (doables.index == lastIndex) { - if (numRepeats < min) numRepeats = min; - break; - } - lastIndex = doables.index; - } while (numRepeats < max); - - // If there aren't enough repeats, then fail - if (numRepeats < min) return false; - - // We're greedy, but ease off until a true match is found - int posIndex = positions.size(); - - // At this point we've either got too many or just the right amount. - // See if this numRepeats works with the rest of the regexp. - REMatch allResults = null; - REMatch allResultsLast = null; - - REMatch results = null; - int indexCount = posIndex - min; - if (indexCount <= 0) { - // This case occurs when we exited the previous do loop before - // numRepeats >= min because an empty string matched the token. - // In this case, an empty string can match as many times as - // desired. - indexCount = 1; - } - while (indexCount-- > 0) { - --posIndex; - newMatch = (REMatch) positions.elementAt(posIndex); - results = matchRest(input, newMatch); - if (results != null) { - if (allResults == null) { - allResults = results; - allResultsLast = results; - } else { - // Order these from longest to shortest - // Start by assuming longest (more repeats) - allResultsLast.next = results; - } - // Find new doablesLast - while (allResultsLast.next != null) { - allResultsLast = allResultsLast.next; - } - } - // else did not match rest of the tokens, try again on smaller sample - // or break out when performing possessive matching - if (possessive) break; + if (newMatch.empty) break; } - if (allResults != null) { - mymatch.assignFrom(allResults); // does this get all? - return true; - } - // If we fall out, no matches. - return false; + return newMatch; } private REMatch matchRest(CharIndexed input, final REMatch newMatch) { REMatch current, single; - REMatch doneIndex = null; - REMatch doneIndexLast = null; + REMatch.REMatchList doneIndex = new REMatch.REMatchList(); // Test all possible matches for this number of repeats for (current = newMatch; current != null; current = current.next) { // clone() separates a single match from the chain single = (REMatch) current.clone(); if (next(input, single)) { // chain results to doneIndex - if (doneIndex == null) { - doneIndex = single; - doneIndexLast = single; - } else { - doneIndexLast.next = single; - } - // Find new doneIndexLast - while (doneIndexLast.next != null) { - doneIndexLast = doneIndexLast.next; - } + doneIndex.addTail(single); } } - return doneIndex; + return doneIndex.head; } void dump(StringBuffer os) { |