summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorFather Chrysostomos <sprout@cpan.org>2013-11-18 21:53:43 -0800
committerFather Chrysostomos <sprout@cpan.org>2013-11-19 13:06:24 -0800
commit311cc1adfb2eac3d98a549ed5f912313fc528cea (patch)
treec674210ca07cfb9d1c920f3a4e49f5490280f240 /regcomp.c
parent9f57786ad809c9db4556a0b1b57e6fcde8b8ae0b (diff)
downloadperl-311cc1adfb2eac3d98a549ed5f912313fc528cea.tar.gz
Move <-- HERE arrow for ‘Switch condition not recognized’
$ ./perl -Ilib -e '/(?(1(?#...)))/' Switch condition not recognized in regex; marked by <-- HERE in m/(?(1( <-- HERE ?#...)))/ at -e line 1. $ ./perl -Ilib -e '/(?(1x(?#...)))/' Switch condition not recognized in regex; marked by <-- HERE in m/(?(1x(?#...) <-- HERE ))/ at -e line 1. With the first one-liner, the arrow in the error message is pointing to the first offending character. With the second one-liner, the arrow points to the comment following the offending character. The logic for positioning the character is a little odd. The idea is supposed to be something like: if current_character++ is not ')' croak with the arrow right before current_character But nextchar() is used instead of ++, and nextchar() skips trailing whitespace and comments after incrementing the current parse position. We already have code right here to revert back to the previous parse position and then increment it by one character, for the sake of UTF8. Indeed, it behaves differently if you add a non-ASCII character under ‘use utf8’: $ ./perl -Ilib -e 'use utf8; /é(?(1x(?#...)))/' Switch condition not recognized in regex; marked by <-- HERE in m/?(?(1x <-- HERE (?#...)))/ at -e line 1. So what this commit does is extend that backtrack logic to happen all the time, not just with UTF8.
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c9
1 files changed, 3 insertions, 6 deletions
diff --git a/regcomp.c b/regcomp.c
index c9464effe5..e78d2fc0a5 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -9573,14 +9573,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
insert_if_check_paren:
if (*(tmp = nextchar(pRExC_state)) != ')') {
- if ( UTF ) {
- /* Like the name implies, nextchar deals in chars,
- * not characters, so if under UTF, undo its work
+ /* nextchar also skips comments, so undo its work
* and skip over the the next character.
*/
- RExC_parse = tmp;
- RExC_parse += UTF8SKIP(RExC_parse);
- }
+ RExC_parse = tmp;
+ RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
vFAIL("Switch condition not recognized");
}
insert_if: