summaryrefslogtreecommitdiff
path: root/libjava/classpath/gnu/regexp/RETokenRepeated.java
diff options
context:
space:
mode:
Diffstat (limited to 'libjava/classpath/gnu/regexp/RETokenRepeated.java')
-rw-r--r--libjava/classpath/gnu/regexp/RETokenRepeated.java240
1 files changed, 240 insertions, 0 deletions
diff --git a/libjava/classpath/gnu/regexp/RETokenRepeated.java b/libjava/classpath/gnu/regexp/RETokenRepeated.java
new file mode 100644
index 00000000000..3165a6f1654
--- /dev/null
+++ b/libjava/classpath/gnu/regexp/RETokenRepeated.java
@@ -0,0 +1,240 @@
+/* gnu/regexp/RETokenRepeated.java
+ Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library. Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module. An independent module is a module which is not derived from
+or based on this library. If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so. If you do not wish to do so, delete this
+exception statement from your version. */
+
+
+package gnu.regexp;
+
+import java.util.Vector;
+
+final class RETokenRepeated extends REToken {
+ private REToken token;
+ private int min,max;
+ private boolean stingy;
+ private boolean possessive;
+
+ RETokenRepeated(int subIndex, REToken token, int min, int max) {
+ super(subIndex);
+ this.token = token;
+ this.min = min;
+ this.max = max;
+ }
+
+ /** Sets the minimal matching mode to true. */
+ void makeStingy() {
+ stingy = true;
+ }
+
+ /** Queries if this token has minimal matching enabled. */
+ boolean isStingy() {
+ return stingy;
+ }
+
+ /** Sets possessive matching mode to true. */
+ void makePossessive() {
+ possessive = true;
+ }
+
+ /** Queries if this token has possessive matching enabled. */
+ boolean isPossessive() {
+ return possessive;
+ }
+
+ /**
+ * The minimum length of a repeated token is the minimum length
+ * of the token multiplied by the minimum number of times it must
+ * match.
+ */
+ int getMinimumLength() {
+ return (min * token.getMinimumLength());
+ }
+
+ // We do need to save every possible point, but the number of clone()
+ // invocations here is really a killer for performance on non-stingy
+ // repeat operators. I'm open to suggestions...
+
+ // Hypothetical question: can you have a RE that matches 1 times,
+ // 3 times, 5 times, but not 2 times or 4 times? Does having
+ // the subexpression back-reference operator allow that?
+
+ boolean match(CharIndexed input, REMatch mymatch) {
+ // number of times we've matched so far
+ int numRepeats = 0;
+
+ // Possible positions for the next repeat to match at
+ REMatch newMatch = mymatch;
+ REMatch last = null;
+ REMatch current;
+
+ // Add the '0-repeats' index
+ // positions.elementAt(z) == position [] in input after <<z>> matches
+ Vector positions = new Vector();
+ positions.addElement(newMatch);
+
+ // Declare variables used in loop
+ REMatch doables;
+ REMatch doablesLast;
+ REMatch recurrent;
+
+ do {
+ // Check for stingy match for each possibility.
+ if (stingy && (numRepeats >= min)) {
+ REMatch result = matchRest(input, newMatch);
+ if (result != null) {
+ mymatch.assignFrom(result);
+ return true;
+ }
+ }
+
+ doables = null;
+ doablesLast = null;
+
+ // try next repeat at all possible positions
+ for (current = newMatch; current != null; current = current.next) {
+ recurrent = (REMatch) current.clone();
+ if (token.match(input, recurrent)) {
+ // add all items in current to doables array
+ if (doables == null) {
+ doables = recurrent;
+ doablesLast = recurrent;
+ } else {
+ // Order these from longest to shortest
+ // Start by assuming longest (more repeats)
+ doablesLast.next = recurrent;
+ }
+ // Find new doablesLast
+ while (doablesLast.next != null) {
+ doablesLast = doablesLast.next;
+ }
+ }
+ }
+ // if none of the possibilities worked out, break out of do/while
+ if (doables == null) break;
+
+ // reassign where the next repeat can match
+ newMatch = doables;
+
+ // increment how many repeats we've successfully found
+ ++numRepeats;
+
+ positions.addElement(newMatch);
+ } while (numRepeats < max);
+
+ // If there aren't enough repeats, then fail
+ if (numRepeats < min) return false;
+
+ // We're greedy, but ease off until a true match is found
+ int posIndex = positions.size();
+
+ // At this point we've either got too many or just the right amount.
+ // See if this numRepeats works with the rest of the regexp.
+ REMatch allResults = null;
+ REMatch allResultsLast = null;
+
+ REMatch results = null;
+ while (--posIndex >= min) {
+ newMatch = (REMatch) positions.elementAt(posIndex);
+ results = matchRest(input, newMatch);
+ if (results != null) {
+ if (allResults == null) {
+ allResults = results;
+ allResultsLast = results;
+ } else {
+ // Order these from longest to shortest
+ // Start by assuming longest (more repeats)
+ allResultsLast.next = results;
+ }
+ // Find new doablesLast
+ while (allResultsLast.next != null) {
+ allResultsLast = allResultsLast.next;
+ }
+ }
+ // else did not match rest of the tokens, try again on smaller sample
+ // or break out when performing possessive matching
+ if (possessive) break;
+ }
+ if (allResults != null) {
+ mymatch.assignFrom(allResults); // does this get all?
+ return true;
+ }
+ // If we fall out, no matches.
+ return false;
+ }
+
+ private REMatch matchRest(CharIndexed input, final REMatch newMatch) {
+ REMatch current, single;
+ REMatch doneIndex = null;
+ REMatch doneIndexLast = null;
+ // Test all possible matches for this number of repeats
+ for (current = newMatch; current != null; current = current.next) {
+ // clone() separates a single match from the chain
+ single = (REMatch) current.clone();
+ if (next(input, single)) {
+ // chain results to doneIndex
+ if (doneIndex == null) {
+ doneIndex = single;
+ doneIndexLast = single;
+ } else {
+ doneIndexLast.next = single;
+ }
+ // Find new doneIndexLast
+ while (doneIndexLast.next != null) {
+ doneIndexLast = doneIndexLast.next;
+ }
+ }
+ }
+ return doneIndex;
+ }
+
+ void dump(StringBuffer os) {
+ os.append("(?:");
+ token.dumpAll(os);
+ os.append(')');
+ if ((max == Integer.MAX_VALUE) && (min <= 1))
+ os.append( (min == 0) ? '*' : '+' );
+ else if ((min == 0) && (max == 1))
+ os.append('?');
+ else {
+ os.append('{').append(min);
+ if (max > min) {
+ os.append(',');
+ if (max != Integer.MAX_VALUE) os.append(max);
+ }
+ os.append('}');
+ }
+ if (stingy) os.append('?');
+ }
+}