summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--dquote_static.c29
-rw-r--r--embed.fnc2
-rw-r--r--embed.h1
-rw-r--r--pod/perldelta.pod5
-rw-r--r--pod/perldiag.pod9
-rw-r--r--proto.h7
-rw-r--r--regcomp.c34
-rw-r--r--t/re/reg_mesg.t4
-rw-r--r--toke.c8
9 files changed, 96 insertions, 3 deletions
diff --git a/dquote_static.c b/dquote_static.c
index 61845ccc92..5a22993ac0 100644
--- a/dquote_static.c
+++ b/dquote_static.c
@@ -297,6 +297,35 @@ S_grok_bslash_x(pTHX_ char **s, UV *uv, const char** error_msg,
return TRUE;
}
+STATIC char*
+S_form_short_octal_warning(pTHX_
+ const char * const s, /* Points to first non-octal */
+ const STRLEN len /* Length of octals string, so
+ (s-len) points to first
+ octal */
+) {
+ /* Return a character string consisting of a warning message for when a
+ * string constant in octal is weird, like "\078". */
+
+ const char * sans_leading_zeros = s - len;
+
+ PERL_ARGS_ASSERT_FORM_SHORT_OCTAL_WARNING;
+
+ assert(*s == '8' || *s == '9');
+
+ /* Remove the leading zeros, retaining one zero so won't be zero length */
+ while (*sans_leading_zeros == '0') sans_leading_zeros++;
+ if (sans_leading_zeros == s) {
+ sans_leading_zeros--;
+ }
+
+ return Perl_form(aTHX_
+ "'%.*s' resolved to '\\o{%.*s}%c'",
+ (int) (len + 2), s - len - 1,
+ (int) (s - sans_leading_zeros), sans_leading_zeros,
+ *s);
+}
+
/*
* Local variables:
* c-indentation-style: bsd
diff --git a/embed.fnc b/embed.fnc
index 730691a8fa..b3b931cf0c 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -751,6 +751,8 @@ EMiR |bool |grok_bslash_x |NN char** s|NN UV* uv \
|const bool strict \
|const bool silence_non_portable \
|const bool utf8
+EMsPR |char*|form_short_octal_warning|NN const char * const s \
+ |const STRLEN len
#endif
Apd |UV |grok_hex |NN const char* start|NN STRLEN* len_p|NN I32* flags|NULLOK NV *result
Apd |int |grok_number |NN const char *pv|STRLEN len|NULLOK UV *valuep
diff --git a/embed.h b/embed.h
index 8289cecf0e..8e8279fc9a 100644
--- a/embed.h
+++ b/embed.h
@@ -961,6 +961,7 @@
#define _core_swash_init(a,b,c,d,e,f,g) Perl__core_swash_init(aTHX_ a,b,c,d,e,f,g)
# endif
# if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C)
+#define form_short_octal_warning(a,b) S_form_short_octal_warning(aTHX_ a,b)
#define grok_bslash_c(a,b,c) S_grok_bslash_c(aTHX_ a,b,c)
#define grok_bslash_o(a,b,c,d,e,f,g) S_grok_bslash_o(aTHX_ a,b,c,d,e,f,g)
#define grok_bslash_x(a,b,c,d,e,f,g) S_grok_bslash_x(aTHX_ a,b,c,d,e,f,g)
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index ea4db738fb..a43cf72071 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -210,8 +210,13 @@ XXX L<message|perldiag/"message">
=item *
+L<'%s' resolved to '\o{%s}%d'|perldiag/"'%s' resolved to '\o{%s}%d'">
+
+=item *
+
XXX L<message|perldiag/"message">
+
=back
=head2 Changes to Existing Diagnostics
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 9e6ee34ece..2be0f791ae 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -4313,6 +4313,15 @@ terminates. You might use ^# instead. See L<perlform>.
search list. So the additional elements in the replacement list
are meaningless.
+=item '%s' resolved to '\o{%s}%d'
+
+(W misc, regexp) You wrote something like C<\08>, or C<\179> in a
+double-quotish string. All but the last digit is treated as a single
+character, specified in octal. The last digit is the next character in
+the string. To tell Perl that this is indeed what you want, you can use
+the C<\o{ }> syntax, or use exactly three digits to specify the octal
+for the character.
+
=item Reversed %s= operator
(W syntax) You wrote your assignment operator backwards. The = must
diff --git a/proto.h b/proto.h
index b4d81d6128..7428380769 100644
--- a/proto.h
+++ b/proto.h
@@ -6788,6 +6788,13 @@ PERL_CALLCONV SV* Perl__core_swash_init(pTHX_ const char* pkg, const char* name,
#endif
#if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C)
+STATIC char* S_form_short_octal_warning(pTHX_ const char * const s, const STRLEN len)
+ __attribute__warn_unused_result__
+ __attribute__pure__
+ __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_FORM_SHORT_OCTAL_WARNING \
+ assert(s)
+
STATIC char S_grok_bslash_c(pTHX_ const char source, const bool utf8, const bool output_warning)
__attribute__warn_unused_result__;
diff --git a/regcomp.c b/regcomp.c
index c0a37b265b..05e9fe55f6 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -535,6 +535,13 @@ static const scan_data_t zero_scan_data =
Simple_vFAIL4(m, a1, a2, a3); \
} STMT_END
+/* m is not necessarily a "literal string", in this macro */
+#define reg_warn_non_literal_string(loc, m) STMT_START { \
+ const IV offset = loc - RExC_precomp; \
+ Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s" REPORT_LOCATION, \
+ m, (int)offset, RExC_precomp, RExC_precomp + offset); \
+} STMT_END
+
#define ckWARNreg(loc,m) STMT_START { \
const IV offset = loc - RExC_precomp; \
Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
@@ -10700,6 +10707,15 @@ tryagain:
REQUIRE_UTF8;
}
p += numlen;
+ if (SIZE_ONLY /* like \08, \178 */
+ && numlen < 3
+ && p < RExC_end
+ && isDIGIT(*p) && ckWARN(WARN_REGEXP))
+ {
+ reg_warn_non_literal_string(
+ p + 1,
+ form_short_octal_warning(p, numlen));
+ }
}
else { /* Not to be treated as an octal constant, go
find backref */
@@ -12166,11 +12182,25 @@ parseit:
numlen = (strict) ? 4 : 3;
value = grok_oct(--RExC_parse, &numlen, &flags, NULL);
RExC_parse += numlen;
- if (strict) {
- if (numlen != 3) {
+ if (numlen != 3) {
+ SAVEFREESV(listsv); /* In case warnings are fatalized */
+ if (strict) {
RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
vFAIL("Need exactly 3 octal digits");
}
+ else if (! SIZE_ONLY /* like \08, \178 */
+ && numlen < 3
+ && RExC_parse < RExC_end
+ && isDIGIT(*RExC_parse)
+ && ckWARN(WARN_REGEXP))
+ {
+ SAVEFREESV(RExC_rx_sv);
+ reg_warn_non_literal_string(
+ RExC_parse + 1,
+ form_short_octal_warning(RExC_parse, numlen));
+ (void)ReREFCNT_inc(RExC_rx_sv);
+ }
+ SvREFCNT_inc_simple_void_NN(listsv);
}
if (PL_encoding && value < 0x100)
goto recode_encoding;
diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t
index 7487421e28..14e9aceee2 100644
--- a/t/re/reg_mesg.t
+++ b/t/re/reg_mesg.t
@@ -165,6 +165,10 @@ my @warning = (
'm/[\pM-x]/' => 'False [] range "\pM-" in regex; marked by {#} in m/[\pM-{#}x]/',
"m'\\y'" => 'Unrecognized escape \y passed through in regex; marked by {#} in m/\y{#}/',
'/x{3,1}/' => 'Quantifier {n,m} with n > m can\'t match in regex; marked by {#} in m/x{3,1}{#}/',
+ '/\08/' => '\'\08\' resolved to \'\o{0}8\' in regex; marked by {#} in m/\08{#}/',
+ '/\018/' => '\'\018\' resolved to \'\o{1}8\' in regex; marked by {#} in m/\018{#}/',
+ '/[\08]/' => '\'\08\' resolved to \'\o{0}8\' in regex; marked by {#} in m/[\08{#}]/',
+ '/[\018]/' => '\'\018\' resolved to \'\o{1}8\' in regex; marked by {#} in m/[\018{#}]/',
'/(?[ \t ])/' => 'The regex_sets feature is experimental in regex; marked by {#} in m/(?[{#} \t ])/',
);
diff --git a/toke.c b/toke.c
index 8c53580887..987a68d0f5 100644
--- a/toke.c
+++ b/toke.c
@@ -3277,10 +3277,16 @@ S_scan_const(pTHX_ char *start)
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
{
- I32 flags = 0;
+ I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
STRLEN len = 3;
uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
s += len;
+ if (len < 3 && s < send && isDIGIT(*s)
+ && ckWARN(WARN_MISC))
+ {
+ Perl_warner(aTHX_ packWARN(WARN_MISC),
+ "%s", form_short_octal_warning(s, len));
+ }
}
goto NUM_ESCAPE_INSERT;