summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2011-01-19 20:34:06 +0200
committerArnold D. Robbins <arnold@skeeve.com>2011-01-19 20:34:06 +0200
commit9c9c911c9974c6a50116b4ea9c4a047bc94fc9d5 (patch)
tree61282c73836b855b081a4d883f5bd0d28a75e54c
parentb4a1aa90519d34c87b3a6699b77a24f39b1b22c1 (diff)
downloadgawk-9c9c911c9974c6a50116b4ea9c4a047bc94fc9d5.tar.gz
Make single byte caching more elegant.
-rw-r--r--ChangeLog11
-rw-r--r--awk.h4
-rw-r--r--builtin.c10
-rw-r--r--io.c2
-rw-r--r--node.c4
5 files changed, 21 insertions, 10 deletions
diff --git a/ChangeLog b/ChangeLog
index 18067fae..666a435f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+Wed Jan 19 20:31:17 2011 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awk.h (is_valid_character): Add `& 0XFF' and remove casts to
+ unsigned chars in other files. Remove definition of this macro
+ in not MBS_SUPPORT case, since it wasn't being used.
+ (btowc_cache): New macro to index into the array and use the
+ same trick. Relies on ANSI C preprocessor semantics.
+ Fix all uses.
+ * builtin.c, node.c, io.c: Fix uses of these macros.
+
Wed Jan 19 20:19:29 2011 Arnold D. Robbins <arnold@skeeve.com>
* node.c (wstr2str): New function.
@@ -7,6 +17,7 @@ Wed Jan 19 20:19:29 2011 Arnold D. Robbins <arnold@skeeve.com>
simplify wide character case conversions.
(do_tolower, do_toupper): Use wide_tolower, wide_toupper in multibyte
case.
+ (do_substr): Simplify code a little bit.
Mon Jan 17 22:48:48 2011 Arnold D. Robbins <arnold@skeeve.com>
diff --git a/awk.h b/awk.h
index 664b4f7c..13674448 100644
--- a/awk.h
+++ b/awk.h
@@ -1329,11 +1329,11 @@ extern const wchar_t *wcasestrstr(const wchar_t *haystack, size_t hs_len,
const wchar_t *needle, size_t needle_len);
extern void free_wstr(NODE *n);
extern wint_t btowc_cache[];
+#define btowc_cache(x) btowc_cache[(x)&0xFF]
extern void init_btowc_cache();
-#define is_valid_character(b) (btowc_cache[b] != WEOF)
+#define is_valid_character(b) (btowc_cache[(b)&0xFF] != WEOF)
#else
#define free_wstr(NODE) /* empty */
-#define is_valid_character(c) (TRUE)
#endif
/* re.c */
extern Regexp *make_regexp(const char *s, size_t len, int ignorecase, int dfa, int canfatal);
diff --git a/builtin.c b/builtin.c
index a9ece11a..8bb10486 100644
--- a/builtin.c
+++ b/builtin.c
@@ -231,26 +231,26 @@ strncasecmpmbs(const unsigned char *s1, const unsigned char *s2, size_t n)
for (i1 = i2 = 0 ; i1 < n && i2 < n ;i1 += mbclen1, i2 += mbclen2) {
if (is_valid_character(s1[i1])) {
mbclen1 = 1;
- wc1 = btowc_cache[s1[i1]];
+ wc1 = btowc_cache(s1[i1]);
} else {
mbclen1 = mbrtowc(& wc1, (const char *)s1 + i1,
n - i1, & mbs1);
if (mbclen1 == (size_t) -1 || mbclen1 == (size_t) -2 || mbclen1 == 0) {
/* We treat it as a singlebyte character. */
mbclen1 = 1;
- wc1 = btowc_cache[s1[i1]];
+ wc1 = btowc_cache(s1[i1]);
}
}
if (is_valid_character(s2[i2])) {
mbclen2 = 1;
- wc2 = btowc_cache[s2[i2]];
+ wc2 = btowc_cache(s2[i2]);
} else {
mbclen2 = mbrtowc(& wc2, (const char *)s2 + i2,
n - i2, & mbs2);
if (mbclen2 == (size_t) -1 || mbclen2 == (size_t) -2 || mbclen2 == 0) {
/* We treat it as a singlebyte character. */
mbclen2 = 1;
- wc2 = btowc_cache[s2[i2]];
+ wc2 = btowc_cache(s2[i2]);
}
}
if ((gap = towlower(wc1) - towlower(wc2)) != 0)
@@ -313,8 +313,8 @@ do_index(int nargs)
const char *p1, *p2;
size_t l1, l2;
long ret;
- int do_single_byte = FALSE;
#ifdef MBS_SUPPORT
+ int do_single_byte = FALSE;
mbstate_t mbs1, mbs2;
if (gawk_mb_cur_max > 1) {
diff --git a/io.c b/io.c
index af15aa02..7cf48911 100644
--- a/io.c
+++ b/io.c
@@ -2689,7 +2689,7 @@ rs1scan(IOBUF *iop, struct recmatch *recm, SCANSTATE *state)
do {
if (*bp == rs)
found = 1;
- if (is_valid_character((unsigned char)*bp))
+ if (is_valid_character(*bp))
mbclen = 1;
else
mbclen = mbrlen(bp, len, &mbs);
diff --git a/node.c b/node.c
index cf16f794..a8805176 100644
--- a/node.c
+++ b/node.c
@@ -708,9 +708,9 @@ str2wstr(NODE *n, size_t **ptr)
* big speed up. Thanks to Ulrich Drepper for the tip.
* 11/2010: Thanks to Paolo Bonzini for some even faster code.
*/
- if (is_valid_character((unsigned char)*sp)) {
+ if (is_valid_character(*sp)) {
count = 1;
- wc = btowc_cache[(unsigned char)*sp];
+ wc = btowc_cache(*sp);
} else
count = mbrtowc(& wc, sp, src_count, & mbs);
switch (count) {