summaryrefslogtreecommitdiff
path: root/src/dfa.h
diff options
context:
space:
mode:
authorAlain Magloire <alainm@rcsm.ee.mcgill.ca>2001-03-07 04:11:26 +0000
committerAlain Magloire <alainm@rcsm.ee.mcgill.ca>2001-03-07 04:11:26 +0000
commit519059dbf2c9dfe4e60621f84a6c4490a941b4e7 (patch)
tree1ff381805f8f229579a9818966915c81e8c18a2f /src/dfa.h
parent2b9c2eb1c5f396c5ba1727278caa6ba04b2f415f (diff)
downloadgrep-519059dbf2c9dfe4e60621f84a6c4490a941b4e7.tar.gz
Fix the search bugs
Only the Regex patterns should be split in an array, patterns[]. The dfa and KWset compiled patterns should remain global and the patterns compiled all at once. * src/search.c : include "error.h" and "xalloc.h" to get prototyping of x*alloc() and error(). (kwsinit) : Reverse to previous behaviour and takes no argument. (kwsmusts) : Likewised. (Gcompile) : For the regex pattern, split them and each pattern is put in different compiled structure patterns[]. The patterns are given to dfacomp() and kwsmusts() as is. (Ecompile) : Likewised. (Fcompile) : Reverse to the old behaviour of compiling the enire patterns in one shot. (EGexecute) : If falling to GNU regex for the matching, loop in the array of compile patterns[] to find a match. (error) : Many error () were call with arguments in the wrong order. * tests/file.sh : Simple test to check for pattern in files. Reaction to bug report fired by Greg Louis <glouis@dynamicro.on.ca> In multibyte environments, handle multibyte characters as single characters in bracket expressions. * src/dfa.h (mb_char_classes) : new structure. (mbcsets) : new variable. (nmbcsets) : new variable. (mbcsets_alloc) : new variable. * src/dfa.c (prtok) : handle MBCSET. (fetch_wc) : new function to fetch a wide character. (parse_bracket_exp_mb) : new function to handle multibyte character in lex(). (lex) : invoke parse_bracket_exp_mb() for multibyte bracket expression. (atom) : handle MBCSET. (epsclosure) : likewise. (dfaanalyze) : likewise. (dfastate) : likewise. (match_mb_charset) : new function to judge whether a bracket match with a multibyte character. (check_matching_with_multibyte_ops) : handle MBCSET. (dfainit) : initialize new variables. (dfafree) : free new variables.
Diffstat (limited to 'src/dfa.h')
-rw-r--r--src/dfa.h30
1 files changed, 30 insertions, 0 deletions
diff --git a/src/dfa.h b/src/dfa.h
index 0c6cdd72..bbc0457a 100644
--- a/src/dfa.h
+++ b/src/dfa.h
@@ -152,6 +152,9 @@ typedef enum
ANYCHAR, /* ANYCHAR is a terminal symbol that matches
any multibyte(or singlebyte) characters.
It is used only if MB_CUR_MAX > 1. */
+
+ MBCSET, /* MBCSET is similar to CSET, but for
+ multibyte characters. */
#endif /* MBS_SUPPORT */
CSET /* CSET and (and any value greater) is a
@@ -258,6 +261,26 @@ struct dfamust
struct dfamust *next;
};
+#ifdef MBS_SUPPORT
+/* A bracket operator.
+ e.g. [a-c], [[:alpha:]], etc. */
+struct mb_char_classes
+{
+ int invert;
+ wchar_t *chars; /* Normal characters. */
+ int nchars;
+ wctype_t *ch_classes; /* Character classes. */
+ int nch_classes;
+ wchar_t *range_sts; /* Range characters (start of the range). */
+ wchar_t *range_ends; /* Range characters (end of the range). */
+ int nranges;
+ char **equivs; /* Equivalent classes. */
+ int nequivs;
+ char **coll_elems;
+ int ncoll_elems; /* Collating elements. */
+};
+#endif
+
/* A compiled regular expression. */
struct dfa
{
@@ -286,6 +309,8 @@ struct dfa
a multibyte character.
bit 0 : tokens[i] is a singlebyte character, or the 1st-byte of
a multibyte character.
+ if tokens[i] = MBCSET
+ ("the index of mbcsets correspnd to this operator" << 2) + 3
e.g.
tokens
@@ -294,6 +319,11 @@ struct dfa
multibyte_prop
= 3 , 1 , 0 , 2 , 3
*/
+
+ /* Array of the bracket expressoin in the DFA. */
+ struct mb_char_classes *mbcsets;
+ int nmbcsets;
+ int mbcsets_alloc;
#endif
/* Stuff owned by the state builder. */