diff options
author | Jim Meyering <meyering@fb.com> | 2013-09-16 10:06:14 -0700 |
---|---|---|
committer | Jim Meyering <meyering@fb.com> | 2013-09-22 22:01:42 -0700 |
commit | 7e6682c62b92e15e4554ed715b946366275e9b94 (patch) | |
tree | 84d76b19f7abf99abf979ef02f4ba85e33f18a7e /src | |
parent | f1e1fb2c5c1538c313f8488ef687b9a96684f54e (diff) | |
download | grep-7e6682c62b92e15e4554ed715b946366275e9b94.tar.gz |
maint: dfa: improve comments and formatting
* src/dfa.c (add_utf8_anychar): Correct wording/alignment of a comment.
(dfaexec): Add curly braces around multi-line while statement within
a "then" block.
(ANYCHAR): Clarify comment: "." does not match an invalid UTF8 character.
(parse_bracket_exp) Improve comment.
Diffstat (limited to 'src')
-rw-r--r-- | src/dfa.c | 65 |
1 files changed, 33 insertions, 32 deletions
@@ -243,7 +243,7 @@ enum RPAREN, /* RPAREN never appears in the parse tree. */ ANYCHAR, /* ANYCHAR is a terminal symbol that matches - any multibyte (or single byte) characters. + a valid multibyte (or single byte) character. It is used only if MB_CUR_MAX > 1. */ MBCSET, /* MBCSET is similar to CSET, but for @@ -909,8 +909,7 @@ find_pred (const char *str) } /* Multibyte character handling sub-routine for lex. - This function parse a bracket expression and build a struct - mb_char_classes. */ + Parse a bracket expression and build a struct mb_char_classes. */ static token parse_bracket_exp (void) { @@ -1633,7 +1632,7 @@ add_utf8_anychar (void) { #if MBS_SUPPORT static const charclass utf8_classes[5] = { - {0, 0, 0, 0, ~0, ~0, 0, 0}, /* 80-bf: non-lead bytes */ + {0, 0, 0, 0, ~0, ~0, 0, 0}, /* 80-bf: non-leading bytes */ {~0, ~0, ~0, ~0, 0, 0, 0, 0}, /* 00-7f: 1-byte sequence */ {0, 0, 0, 0, 0, 0, ~3, 0}, /* c2-df: 2-byte sequence */ {0, 0, 0, 0, 0, 0, 0, 0xffff}, /* e0-ef: 3-byte sequence */ @@ -3322,37 +3321,39 @@ dfaexec (struct dfa *d, char const *begin, char *end, for (;;) { if (d->mb_cur_max > 1) - while ((t = trans[s]) != NULL) - { - if (p > buf_end) - break; - s1 = s; - SKIP_REMAINS_MB_IF_INITIAL_STATE (s, p); + { + while ((t = trans[s]) != NULL) + { + if (p > buf_end) + break; + s1 = s; + SKIP_REMAINS_MB_IF_INITIAL_STATE (s, p); - if (d->states[s].mbps.nelem == 0) - { - s = t[*p++]; - continue; - } + if (d->states[s].mbps.nelem == 0) + { + s = t[*p++]; + continue; + } - /* Falling back to the glibc matcher in this case gives - better performance (up to 25% better on [a-z], for - example) and enables support for collating symbols and - equivalence classes. */ - if (backref) - { - *backref = 1; - free (mblen_buf); - free (inputwcs); - *end = saved_end; - return (char *) p; - } + /* Falling back to the glibc matcher in this case gives + better performance (up to 25% better on [a-z], for + example) and enables support for collating symbols and + equivalence classes. */ + if (backref) + { + *backref = 1; + free (mblen_buf); + free (inputwcs); + *end = saved_end; + return (char *) p; + } - /* Can match with a multibyte character (and multi character - collating element). Transition table might be updated. */ - s = transit_state (d, s, &p); - trans = d->trans; - } + /* Can match with a multibyte character (and multi character + collating element). Transition table might be updated. */ + s = transit_state (d, s, &p); + trans = d->trans; + } + } else { while ((t = trans[s]) != NULL) |