summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJim Meyering <meyering@fb.com>2013-09-16 10:06:14 -0700
committerJim Meyering <meyering@fb.com>2013-09-22 22:01:42 -0700
commit7e6682c62b92e15e4554ed715b946366275e9b94 (patch)
tree84d76b19f7abf99abf979ef02f4ba85e33f18a7e /src
parentf1e1fb2c5c1538c313f8488ef687b9a96684f54e (diff)
downloadgrep-7e6682c62b92e15e4554ed715b946366275e9b94.tar.gz
maint: dfa: improve comments and formatting
* src/dfa.c (add_utf8_anychar): Correct wording/alignment of a comment. (dfaexec): Add curly braces around multi-line while statement within a "then" block. (ANYCHAR): Clarify comment: "." does not match an invalid UTF8 character. (parse_bracket_exp) Improve comment.
Diffstat (limited to 'src')
-rw-r--r--src/dfa.c65
1 files changed, 33 insertions, 32 deletions
diff --git a/src/dfa.c b/src/dfa.c
index b447a8a0..45e3c407 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -243,7 +243,7 @@ enum
RPAREN, /* RPAREN never appears in the parse tree. */
ANYCHAR, /* ANYCHAR is a terminal symbol that matches
- any multibyte (or single byte) characters.
+ a valid multibyte (or single byte) character.
It is used only if MB_CUR_MAX > 1. */
MBCSET, /* MBCSET is similar to CSET, but for
@@ -909,8 +909,7 @@ find_pred (const char *str)
}
/* Multibyte character handling sub-routine for lex.
- This function parse a bracket expression and build a struct
- mb_char_classes. */
+ Parse a bracket expression and build a struct mb_char_classes. */
static token
parse_bracket_exp (void)
{
@@ -1633,7 +1632,7 @@ add_utf8_anychar (void)
{
#if MBS_SUPPORT
static const charclass utf8_classes[5] = {
- {0, 0, 0, 0, ~0, ~0, 0, 0}, /* 80-bf: non-lead bytes */
+ {0, 0, 0, 0, ~0, ~0, 0, 0}, /* 80-bf: non-leading bytes */
{~0, ~0, ~0, ~0, 0, 0, 0, 0}, /* 00-7f: 1-byte sequence */
{0, 0, 0, 0, 0, 0, ~3, 0}, /* c2-df: 2-byte sequence */
{0, 0, 0, 0, 0, 0, 0, 0xffff}, /* e0-ef: 3-byte sequence */
@@ -3322,37 +3321,39 @@ dfaexec (struct dfa *d, char const *begin, char *end,
for (;;)
{
if (d->mb_cur_max > 1)
- while ((t = trans[s]) != NULL)
- {
- if (p > buf_end)
- break;
- s1 = s;
- SKIP_REMAINS_MB_IF_INITIAL_STATE (s, p);
+ {
+ while ((t = trans[s]) != NULL)
+ {
+ if (p > buf_end)
+ break;
+ s1 = s;
+ SKIP_REMAINS_MB_IF_INITIAL_STATE (s, p);
- if (d->states[s].mbps.nelem == 0)
- {
- s = t[*p++];
- continue;
- }
+ if (d->states[s].mbps.nelem == 0)
+ {
+ s = t[*p++];
+ continue;
+ }
- /* Falling back to the glibc matcher in this case gives
- better performance (up to 25% better on [a-z], for
- example) and enables support for collating symbols and
- equivalence classes. */
- if (backref)
- {
- *backref = 1;
- free (mblen_buf);
- free (inputwcs);
- *end = saved_end;
- return (char *) p;
- }
+ /* Falling back to the glibc matcher in this case gives
+ better performance (up to 25% better on [a-z], for
+ example) and enables support for collating symbols and
+ equivalence classes. */
+ if (backref)
+ {
+ *backref = 1;
+ free (mblen_buf);
+ free (inputwcs);
+ *end = saved_end;
+ return (char *) p;
+ }
- /* Can match with a multibyte character (and multi character
- collating element). Transition table might be updated. */
- s = transit_state (d, s, &p);
- trans = d->trans;
- }
+ /* Can match with a multibyte character (and multi character
+ collating element). Transition table might be updated. */
+ s = transit_state (d, s, &p);
+ trans = d->trans;
+ }
+ }
else
{
while ((t = trans[s]) != NULL)