summaryrefslogtreecommitdiff
path: root/op.c
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2018-01-09 10:05:33 +0000
committerDavid Mitchell <davem@iabyn.com>2018-01-19 13:45:19 +0000
commitea088e559c9bca8e7337d3d6236f06deb5afda32 (patch)
treed2b45636f53f4a63d07230bb6ca0d74818c4ee6a /op.c
parentddcffd84df3b0cf7b2325bed1db38b89bf19fa53 (diff)
downloadperl-ea088e559c9bca8e7337d3d6236f06deb5afda32.tar.gz
OP_TRANS: change extended table format
For non-utf8, OP_TRANS(R) ops have a translation table consisting of an array of 256 shorts attached. For tr///c, this table is extended to hold information about chars in the replacement list which aren't paired with chars in the search list. For example, tr/\x00-AE-\xff/bcdefg/c is equivalent to tr/BCD\x{100}-\x{7fffffff}/bcdefg/ which is equivalent to tr/BCD\x{100}-\x{7fffffff}/bcdefggggggggg..../ Only the BCD => bcd mappings can be stored in the basic 256-slot table, so potentially the following extra information needs recording in an extended table to handle codepoints > 0xff in the string being modified: 1) the extra replacement chars ("efg"); 2) the number of extra replacement chars (3); 3) the "repeat" char ('g'). Currently 2) and 3) are combined: the repeat char is found as the last extra char, and if there are no extra chars, the repeat char is treated as an extra char list of length 1. Similarly, an 'extra chars' length value of 1 can imply either one extra char, or no extra chars with the repeat char being faked as an extra char. An 'extra chars' length of 0 implies an empty replacement list, i.e. tr/....//c. This commit changes it so that the repeat char is *always* stored (in slot 0x101), with the extra chars stored beginning at slot 0x102. The 'extra chars' length value (located at slot 0x0100) has changed its meaning slightly: now -1 implies tr/....//c 0 implies no more replacement chars than search chars 1+ the number of excess replacement chars. This (should) make no function difference, but the extra information stored will make it easier to fix some bugs shortly.
Diffstat (limited to 'op.c')
-rw-r--r--op.c58
1 files changed, 39 insertions, 19 deletions
diff --git a/op.c b/op.c
index 5652858a1b..23fa59496a 100644
--- a/op.c
+++ b/op.c
@@ -6631,7 +6631,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
tbl = (short*)PerlMemShared_calloc(
/* one slot for 'extra len' count and one slot
- * for possible storing of last replacement char */
+ * for storing of last replacement char */
(complement && !del) ? 258 : 256,
sizeof(short));
cPVOPo->op_pv = (char*)tbl;
@@ -6661,28 +6661,48 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
}
assert(j <= (I32)rlen);
+
if (!del) {
- if (!rlen) {
- /* empty replacement list */
- j = rlen;
- if (!squash)
- o->op_private |= OPpTRANS_IDENTICAL;
+ /* the repeat char: it may be used to fill the 0x100+
+ * range. For example,
+ * tr/\x00-AE-\xff/bcd/c
+ * is equivalent to
+ * tr/BCD\x{100}-\x{7fffffff}/bcd/
+ * which is equivalent to
+ * tr/BCD\x{100}-\x{7fffffff}/bcddddddddd..../
+ * So remember the 'd'.
+ */
+ short repeat_char;
+ SSize_t excess = rlen - (SSize_t)j;
+
+ if (excess) {
+ /* More replacement chars than search chars:
+ * store excess replacement chars at end of main table.
+ */
+
+ tbl = (short *) PerlMemShared_realloc(tbl,
+ (0x102+excess) * sizeof(short));
+ cPVOPo->op_pv = (char*)tbl;
+ for (i = 0; i < (I32)excess; i++)
+ tbl[0x102+i] = r[j+i];
+ repeat_char = r[rlen-1];
}
- else if (j == (I32)rlen)
- /* no more replacement chars than search chars */
- j = rlen - 1;
else {
- /* more replacement chars than search chars */
- tbl =
- (short *)
- PerlMemShared_realloc(tbl,
- (0x101+rlen-j) * sizeof(short));
- cPVOPo->op_pv = (char*)tbl;
+ /* no more replacement chars than search chars */
+
+ if (rlen)
+ repeat_char = r[rlen - 1];
+ else {
+ /* empty replacement list */
+ repeat_char = 0; /* this value isn't used at runtime */
+ /* -1 excess count indicates empty replacement charlist */
+ excess = -1;
+ if (!squash)
+ o->op_private |= OPpTRANS_IDENTICAL;
+ }
}
- tbl[0x100] = (short)(rlen - j);
- /* store any excess replacement chars at end of main table */
- for (i=0; i < (I32)rlen - j; i++)
- tbl[0x101+i] = r[j+i];
+ tbl[0x100] = (short)excess; /* excess char count */
+ tbl[0x101] = (short)repeat_char; /* repeated replace char */
}
}
else {