summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/charnames.pm4
-rw-r--r--lib/feature.pm6
-rw-r--r--pod/perldebguts.pod28
-rw-r--r--pod/perlfunc.pod12
-rw-r--r--pod/perlop.pod2
-rw-r--r--pod/perlre.pod6
-rw-r--r--pod/perlreapi.pod2
-rw-r--r--pod/perlunicode.pod4
-rw-r--r--pod/perlunifaq.pod2
-rw-r--r--regcomp.sym24
-rwxr-xr-xregen/feature.pl6
-rw-r--r--regen/mk_PL_charclass.pl2
-rw-r--r--regnodes.h24
-rw-r--r--t/re/re_tests2
14 files changed, 62 insertions, 62 deletions
diff --git a/lib/charnames.pm b/lib/charnames.pm
index 52f8cee9c4..97cafed73e 100644
--- a/lib/charnames.pm
+++ b/lib/charnames.pm
@@ -1,7 +1,7 @@
package charnames;
use strict;
use warnings;
-our $VERSION = '1.39';
+our $VERSION = '1.40';
use unicore::Name; # mktables-generated algorithmically-defined names
use _charnames (); # The submodule for this where most of the work gets done
@@ -228,7 +228,7 @@ input name is that of a character that won't fit into a byte (i.e., whose
ordinal is above 255).
Otherwise, any string that includes a C<\N{I<charname>}> or
-C<S<\N{U+I<code point>}>> will automatically have Unicode semantics (see
+C<S<\N{U+I<code point>}>> will automatically have Unicode rules (see
L<perlunicode/Byte and Character Semantics>).
=head1 LOOSE MATCHES
diff --git a/lib/feature.pm b/lib/feature.pm
index 32dc9300e3..222cdfd8b8 100644
--- a/lib/feature.pm
+++ b/lib/feature.pm
@@ -5,7 +5,7 @@
package feature;
-our $VERSION = '1.35';
+our $VERSION = '1.36';
our %feature = (
fc => 'feature_fc',
@@ -141,7 +141,7 @@ This feature is available starting with Perl 5.10.
=head2 The 'unicode_strings' feature
-C<use feature 'unicode_strings'> tells the compiler to use Unicode semantics
+C<use feature 'unicode_strings'> tells the compiler to use Unicode rules
in all string operations executed within its scope (unless they are also
within the scope of either C<use locale> or C<use bytes>). The same applies
to all regular expressions compiled within the scope, even if executed outside
@@ -149,7 +149,7 @@ it. It does not change the internal representation of strings, but only how
they are interpreted.
C<no feature 'unicode_strings'> tells the compiler to use the traditional
-Perl semantics wherein the native character set semantics is used unless it is
+Perl rules wherein the native character set rules is used unless it is
clear to Perl that Unicode is desired. This can lead to some surprises
when the behavior suddenly changes. (See
L<perlunicode/The "Unicode Bug"> for details.) For this reason, if you are
diff --git a/pod/perldebguts.pod b/pod/perldebguts.pod
index 526124e96b..0554b96e71 100644
--- a/pod/perldebguts.pod
+++ b/pod/perldebguts.pod
@@ -569,19 +569,19 @@ will be lost.
MEOL no Same, assuming multiline.
SEOL no Same, assuming singleline.
BOUND no Match "" at any word boundary using native
- charset semantics for non-utf8
+ charset rules for non-utf8
BOUNDL no Match "" at any locale word boundary
BOUNDU no Match "" at any word boundary using Unicode
- semantics
+ rules
BOUNDA no Match "" at any word boundary using ASCII
- semantics
+ rules
NBOUND no Match "" at any word non-boundary using
- native charset semantics for non-utf8
+ native charset rules for non-utf8
NBOUNDL no Match "" at any locale word non-boundary
NBOUNDU no Match "" at any word non-boundary using
- Unicode semantics
+ Unicode rules
NBOUNDA no Match "" at any word non-boundary using
- ASCII semantics
+ ASCII rules
GPOS no Matches where last m//g left off.
# [Special] alternatives:
@@ -680,25 +680,25 @@ will be lost.
REF num 1 Match some already matched string
REFF num 1 Match already matched string, folded using
- native charset semantics for non-utf8
+ native charset rules for non-utf8
REFFL num 1 Match already matched string, folded in loc.
REFFU num 1 Match already matched string, folded using
- unicode semantics for non-utf8
+ unicode rules for non-utf8
REFFA num 1 Match already matched string, folded using
- unicode semantics for non-utf8, no mixing
- ASCII, non-ASCII
+ unicode rules for non-utf8, no mixing ASCII,
+ non-ASCII
# Named references. Code in regcomp.c assumes that these all are after
# the numbered references
NREF no-sv 1 Match some already matched string
NREFF no-sv 1 Match already matched string, folded using
- native charset semantics for non-utf8
+ native charset rules for non-utf8
NREFFL no-sv 1 Match already matched string, folded in loc.
NREFFU num 1 Match already matched string, folded using
- unicode semantics for non-utf8
+ unicode rules for non-utf8
NREFFA num 1 Match already matched string, folded using
- unicode semantics for non-utf8, no mixing
- ASCII, non-ASCII
+ unicode rules for non-utf8, no mixing ASCII,
+ non-ASCII
IFMATCH off 1 2 Succeeds if the following matches.
UNLESSM off 1 2 Fails if the following matches.
diff --git a/pod/perlfunc.pod b/pod/perlfunc.pod
index f0ffd0679d..14c5171ae5 100644
--- a/pod/perlfunc.pod
+++ b/pod/perlfunc.pod
@@ -3327,20 +3327,20 @@ What gets returned depends on several factors:
=item If C<use bytes> is in effect:
-The results follow ASCII semantics. Only the characters C<A-Z> change,
+The results follow ASCII rules. Only the characters C<A-Z> change,
to C<a-z> respectively.
=item Otherwise, if C<use locale> (but not C<use locale ':not_characters'>) is in effect:
Respects current LC_CTYPE locale for code points < 256; and uses Unicode
-semantics for the remaining code points (this last can only happen if
+rules for the remaining code points (this last can only happen if
the UTF8 flag is also set). See L<perllocale>.
Starting in v5.20, Perl wil use full Unicode rules if the locale is
UTF-8. Otherwise, there is a deficiency in this scheme, which is that
case changes that cross the 255/256
boundary are not well-defined. For example, the lower case of LATIN CAPITAL
-LETTER SHARP S (U+1E9E) in Unicode semantics is U+00DF (on ASCII
+LETTER SHARP S (U+1E9E) in Unicode rules is U+00DF (on ASCII
platforms). But under C<use locale> (prior to v5.20 or not a UTF-8
locale), the lower case of U+1E9E is
itself, because 0xDF may not be LATIN SMALL LETTER SHARP S in the
@@ -3351,15 +3351,15 @@ many) where the 255/256 boundary would otherwise be crossed.
=item Otherwise, If EXPR has the UTF8 flag set:
-Unicode semantics are used for the case change.
+Unicode rules are used for the case change.
=item Otherwise, if C<use feature 'unicode_strings'> or C<use locale ':not_characters'> is in effect:
-Unicode semantics are used for the case change.
+Unicode rules are used for the case change.
=item Otherwise:
-ASCII semantics are used for the case change. The lowercase of any character
+ASCII rules are used for the case change. The lowercase of any character
outside the ASCII range is the character itself.
=back
diff --git a/pod/perlop.pod b/pod/perlop.pod
index 3e1553a42f..4b719895f3 100644
--- a/pod/perlop.pod
+++ b/pod/perlop.pod
@@ -1694,7 +1694,7 @@ modifier has is not propagated, being restricted to those patterns
explicitly using it.
The last four modifiers listed above, added in Perl 5.14,
-control the character set semantics, but C</a> is the only one you are likely
+control the character set rules, but C</a> is the only one you are likely
to want to specify explicitly; the other three are selected
automatically by various pragmas.
diff --git a/pod/perlre.pod b/pod/perlre.pod
index a67a99cf4c..c2026d801b 100644
--- a/pod/perlre.pod
+++ b/pod/perlre.pod
@@ -98,7 +98,7 @@ after the match regardless of the modifier.
=item a, d, l and u
X</a> X</d> X</l> X</u>
-These modifiers, all new in 5.14, affect which character-set semantics
+These modifiers, all new in 5.14, affect which character-set rules
(Unicode, etc.) are used, as described below in
L</Character set modifiers>.
@@ -182,7 +182,7 @@ X</x>
=head3 Character set modifiers
C</d>, C</u>, C</a>, and C</l>, available starting in 5.14, are called
-the character set modifiers; they affect the character set semantics
+the character set modifiers; they affect the character set rules
used for the regular expression.
The C</d>, C</u>, and C</l> modifiers are not likely to be of much use
@@ -410,7 +410,7 @@ between C<\w> and C<\W>, using the C</a> definitions of them (similarly
for C<\B>).
Otherwise, C</a> behaves like the C</u> modifier, in that
-case-insensitive matching uses Unicode semantics; for example, "k" will
+case-insensitive matching uses Unicode rules; for example, "k" will
match the Unicode C<\N{KELVIN SIGN}> under C</i> matching, and code
points in the Latin1 range, above ASCII will have Unicode rules when it
comes to case-insensitive matching.
diff --git a/pod/perlreapi.pod b/pod/perlreapi.pod
index 9030e0de22..3e25626cc5 100644
--- a/pod/perlreapi.pod
+++ b/pod/perlreapi.pod
@@ -143,7 +143,7 @@ TODO: Document this
=item Character set
-The character set semantics are determined by an enum that is contained
+The character set rules are determined by an enum that is contained
in this field. This is still experimental and subject to change, but
the current interface returns the rules by use of the in-line function
C<get_regex_charset(const U32 flags)>. The only currently documented
diff --git a/pod/perlunicode.pod b/pod/perlunicode.pod
index 72b65de42b..95db7ac355 100644
--- a/pod/perlunicode.pod
+++ b/pod/perlunicode.pod
@@ -92,13 +92,13 @@ without additional information from the user, Perl decides in favor of
compatibility and chooses to use byte semantics.
When C<use locale> (but not C<use locale ':not_characters'>) is in
-effect, Perl uses the semantics associated with the current locale.
+effect, Perl uses the rules associated with the current locale.
(C<use locale> overrides C<use feature 'unicode_strings'> in the same scope;
while C<use locale ':not_characters'> effectively also selects
C<use feature 'unicode_strings'> in its scope; see L<perllocale>.)
Otherwise, Perl uses the platform's native
byte semantics for characters whose code points are less than 256, and
-Unicode semantics for those greater than 255. That means that non-ASCII
+Unicode rules for those greater than 255. That means that non-ASCII
characters are undefined except for their
ordinal numbers. This means that none have case (upper and lower), nor are any
a member of character classes, like C<[:alpha:]> or C<\w>. (But all do belong
diff --git a/pod/perlunifaq.pod b/pod/perlunifaq.pod
index 93997683c4..19eadd4cab 100644
--- a/pod/perlunifaq.pod
+++ b/pod/perlunifaq.pod
@@ -155,7 +155,7 @@ Unicode, whether the string is encoded in UTF-8 or not, thus avoiding
the problem.
However, on earlier Perls, or if you pass strings to subroutines outside
-the feature's scope, you can force Unicode semantics by changing the
+the feature's scope, you can force Unicode rules by changing the
encoding to UTF-8 by doing C<utf8::upgrade($string)>. This can be used
safely on any string, as it checks and does not change strings that have
already been upgraded.
diff --git a/regcomp.sym b/regcomp.sym
index a1981862cc..bea2a8e716 100644
--- a/regcomp.sym
+++ b/regcomp.sym
@@ -38,15 +38,15 @@ SEOL EOL, no ; Same, assuming singleline.
# in regcomp.c uses the enum value of the modifier as an offset from the /d
# version. The complements must come after the non-complements.
# BOUND, POSIX and their complements are affected, as well as EXACTF.
-BOUND BOUND, no ; Match "" at any word boundary using native charset semantics for non-utf8
+BOUND BOUND, no ; Match "" at any word boundary using native charset rules for non-utf8
BOUNDL BOUND, no ; Match "" at any locale word boundary
-BOUNDU BOUND, no ; Match "" at any word boundary using Unicode semantics
-BOUNDA BOUND, no ; Match "" at any word boundary using ASCII semantics
+BOUNDU BOUND, no ; Match "" at any word boundary using Unicode rules
+BOUNDA BOUND, no ; Match "" at any word boundary using ASCII rules
# All NBOUND nodes are required by code in regexec.c to be greater than all BOUND ones
-NBOUND NBOUND, no ; Match "" at any word non-boundary using native charset semantics for non-utf8
+NBOUND NBOUND, no ; Match "" at any word non-boundary using native charset rules for non-utf8
NBOUNDL NBOUND, no ; Match "" at any locale word non-boundary
-NBOUNDU NBOUND, no ; Match "" at any word non-boundary using Unicode semantics
-NBOUNDA NBOUND, no ; Match "" at any word non-boundary using ASCII semantics
+NBOUNDU NBOUND, no ; Match "" at any word non-boundary using Unicode rules
+NBOUNDA NBOUND, no ; Match "" at any word non-boundary using ASCII rules
GPOS GPOS, no ; Matches where last m//g left off.
#* [Special] alternatives:
@@ -131,21 +131,21 @@ OPEN OPEN, num 1 ; Mark this point in input as start of #n.
CLOSE CLOSE, num 1 ; Analogous to OPEN.
REF REF, num 1 V ; Match some already matched string
-REFF REF, num 1 V ; Match already matched string, folded using native charset semantics for non-utf8
+REFF REF, num 1 V ; Match already matched string, folded using native charset rules for non-utf8
REFFL REF, num 1 V ; Match already matched string, folded in loc.
# N?REFF[AU] could have been implemented using the FLAGS field of the
# regnode, but by having a separate node type, we can use the existing switch
# statement to avoid some tests
-REFFU REF, num 1 V ; Match already matched string, folded using unicode semantics for non-utf8
-REFFA REF, num 1 V ; Match already matched string, folded using unicode semantics for non-utf8, no mixing ASCII, non-ASCII
+REFFU REF, num 1 V ; Match already matched string, folded using unicode rules for non-utf8
+REFFA REF, num 1 V ; Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII
#*Named references. Code in regcomp.c assumes that these all are after
#*the numbered references
NREF REF, no-sv 1 V ; Match some already matched string
-NREFF REF, no-sv 1 V ; Match already matched string, folded using native charset semantics for non-utf8
+NREFF REF, no-sv 1 V ; Match already matched string, folded using native charset rules for non-utf8
NREFFL REF, no-sv 1 V ; Match already matched string, folded in loc.
-NREFFU REF, num 1 V ; Match already matched string, folded using unicode semantics for non-utf8
-NREFFA REF, num 1 V ; Match already matched string, folded using unicode semantics for non-utf8, no mixing ASCII, non-ASCII
+NREFFU REF, num 1 V ; Match already matched string, folded using unicode rules for non-utf8
+NREFFA REF, num 1 V ; Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII
IFMATCH BRANCHJ, off 1 . 2 ; Succeeds if the following matches.
UNLESSM BRANCHJ, off 1 . 2 ; Fails if the following matches.
diff --git a/regen/feature.pl b/regen/feature.pl
index a46ebbcc91..fccfc2da16 100755
--- a/regen/feature.pl
+++ b/regen/feature.pl
@@ -361,7 +361,7 @@ read_only_bottom_close_and_rename($h);
__END__
package feature;
-our $VERSION = '1.35';
+our $VERSION = '1.36';
FEATURES
@@ -456,7 +456,7 @@ This feature is available starting with Perl 5.10.
=head2 The 'unicode_strings' feature
-C<use feature 'unicode_strings'> tells the compiler to use Unicode semantics
+C<use feature 'unicode_strings'> tells the compiler to use Unicode rules
in all string operations executed within its scope (unless they are also
within the scope of either C<use locale> or C<use bytes>). The same applies
to all regular expressions compiled within the scope, even if executed outside
@@ -464,7 +464,7 @@ it. It does not change the internal representation of strings, but only how
they are interpreted.
C<no feature 'unicode_strings'> tells the compiler to use the traditional
-Perl semantics wherein the native character set semantics is used unless it is
+Perl rules wherein the native character set rules is used unless it is
clear to Perl that Unicode is desired. This can lead to some surprises
when the behavior suddenly changes. (See
L<perlunicode/The "Unicode Bug"> for details.) For this reason, if you are
diff --git a/regen/mk_PL_charclass.pl b/regen/mk_PL_charclass.pl
index 63c06bc9f6..918bb4de58 100644
--- a/regen/mk_PL_charclass.pl
+++ b/regen/mk_PL_charclass.pl
@@ -161,7 +161,7 @@ my @bits; # Bit map for each code point
# For each character, calculate which properties it matches.
for my $ord (0..255) {
my $char = chr($ord);
- utf8::upgrade($char); # Important to use Unicode semantics!
+ utf8::upgrade($char); # Important to use Unicode rules!
# Look at all the properties we care about here.
for my $property (@properties) {
diff --git a/regnodes.h b/regnodes.h
index f9d4fc05dd..4f4ff9e192 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -18,14 +18,14 @@
#define EOL 6 /* 0x06 Match "" at end of line. */
#define MEOL 7 /* 0x07 Same, assuming multiline. */
#define SEOL 8 /* 0x08 Same, assuming singleline. */
-#define BOUND 9 /* 0x09 Match "" at any word boundary using native charset semantics for non-utf8 */
+#define BOUND 9 /* 0x09 Match "" at any word boundary using native charset rules for non-utf8 */
#define BOUNDL 10 /* 0x0a Match "" at any locale word boundary */
-#define BOUNDU 11 /* 0x0b Match "" at any word boundary using Unicode semantics */
-#define BOUNDA 12 /* 0x0c Match "" at any word boundary using ASCII semantics */
-#define NBOUND 13 /* 0x0d Match "" at any word non-boundary using native charset semantics for non-utf8 */
+#define BOUNDU 11 /* 0x0b Match "" at any word boundary using Unicode rules */
+#define BOUNDA 12 /* 0x0c Match "" at any word boundary using ASCII rules */
+#define NBOUND 13 /* 0x0d Match "" at any word non-boundary using native charset rules for non-utf8 */
#define NBOUNDL 14 /* 0x0e Match "" at any locale word non-boundary */
-#define NBOUNDU 15 /* 0x0f Match "" at any word non-boundary using Unicode semantics */
-#define NBOUNDA 16 /* 0x10 Match "" at any word non-boundary using ASCII semantics */
+#define NBOUNDU 15 /* 0x0f Match "" at any word non-boundary using Unicode rules */
+#define NBOUNDA 16 /* 0x10 Match "" at any word non-boundary using ASCII rules */
#define GPOS 17 /* 0x11 Matches where last m//g left off. */
#define REG_ANY 18 /* 0x12 Match any one character (except newline). */
#define SANY 19 /* 0x13 Match any one character. */
@@ -61,15 +61,15 @@
#define OPEN 49 /* 0x31 Mark this point in input as start of #n. */
#define CLOSE 50 /* 0x32 Analogous to OPEN. */
#define REF 51 /* 0x33 Match some already matched string */
-#define REFF 52 /* 0x34 Match already matched string, folded using native charset semantics for non-utf8 */
+#define REFF 52 /* 0x34 Match already matched string, folded using native charset rules for non-utf8 */
#define REFFL 53 /* 0x35 Match already matched string, folded in loc. */
-#define REFFU 54 /* 0x36 Match already matched string, folded using unicode semantics for non-utf8 */
-#define REFFA 55 /* 0x37 Match already matched string, folded using unicode semantics for non-utf8, no mixing ASCII, non-ASCII */
+#define REFFU 54 /* 0x36 Match already matched string, folded using unicode rules for non-utf8 */
+#define REFFA 55 /* 0x37 Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
#define NREF 56 /* 0x38 Match some already matched string */
-#define NREFF 57 /* 0x39 Match already matched string, folded using native charset semantics for non-utf8 */
+#define NREFF 57 /* 0x39 Match already matched string, folded using native charset rules for non-utf8 */
#define NREFFL 58 /* 0x3a Match already matched string, folded in loc. */
-#define NREFFU 59 /* 0x3b Match already matched string, folded using unicode semantics for non-utf8 */
-#define NREFFA 60 /* 0x3c Match already matched string, folded using unicode semantics for non-utf8, no mixing ASCII, non-ASCII */
+#define NREFFU 59 /* 0x3b Match already matched string, folded using unicode rules for non-utf8 */
+#define NREFFA 60 /* 0x3c Match already matched string, folded using unicode rules for non-utf8, no mixing ASCII, non-ASCII */
#define IFMATCH 61 /* 0x3d Succeeds if the following matches. */
#define UNLESSM 62 /* 0x3e Fails if the following matches. */
#define SUSPEND 63 /* 0x3f "Independent" sub-RE. */
diff --git a/t/re/re_tests b/t/re/re_tests
index 3ef6503405..22c0c74550 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1423,7 +1423,7 @@ foo(\h)bar foo\tbar y $1 \t
/[a\N{U+0100}]/ \x{100} y $& \x{100}
/[a\N{U+0100}]/ a y $& a
-# Verify that \N{U+...} forces Unicode semantics
+# Verify that \N{U+...} forces Unicode rules
/\N{U+41}\x{c1}/i a\x{e1} y $& a\x{e1}
/[\N{U+41}\x{c1}]/i \x{e1} y $& \x{e1}