summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorArnold D. Robbins <arnold@skeeve.com>2014-09-14 18:58:30 +0300
committerArnold D. Robbins <arnold@skeeve.com>2014-09-14 18:58:30 +0300
commit1684dfc1f5abd1e0093feb586ae428ce42d1a827 (patch)
treef9405c255a06b3aacc8e3202f27003390316b6fe
parent9da96e570a835d6a0427c9182585af307d393f45 (diff)
downloadgawk-1684dfc1f5abd1e0093feb586ae428ce42d1a827.tar.gz
Force only ASCII letters to be allowed in identifiers.
-rw-r--r--ChangeLog6
-rw-r--r--awkgram.c35
-rw-r--r--awkgram.y35
3 files changed, 74 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index 32582ae1..0d2c8743 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2014-09-14 Arnold D. Robbins <arnold@skeeve.com>
+
+ * awkgram.y (is_identchar): Change from simple macro to function
+ since use of isalnum() let non-ASCII letters slip through into
+ identifiers.
+
2014-09-07 Arnold D. Robbins <arnold@skeeve.com>
* awk.h: Move libsigsegv stuff to ...
diff --git a/awkgram.c b/awkgram.c
index 0bf2e5f3..2737762c 100644
--- a/awkgram.c
+++ b/awkgram.c
@@ -195,7 +195,7 @@ extern double fmod(double x, double y);
#define YYSTYPE INSTRUCTION *
-#define is_identchar(c) (isalnum(c) || (c) == '_')
+static bool is_identchar(int c);
#line 201 "awkgram.c" /* yacc.c:339 */
@@ -8077,3 +8077,36 @@ install_builtins(void)
}
}
}
+
+/* is_identchar --- return true if c can be in an identifier */
+
+/*
+ * This can't be:
+ *
+ * #define is_identchar(c) (isalnum(c) || (c) == '_')
+ *
+ * because in non-C locales, character codes outside the set of
+ * ASCII letters and digits pass the test. BLEAH.
+ */
+
+static bool
+is_identchar(int c)
+{
+ switch (c) {
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+ case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+ case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+ case 'y': case 'z':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+ case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+ case 'Y': case 'Z':
+ case '0': case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ case '_':
+ return true;
+ }
+ return false;
+}
diff --git a/awkgram.y b/awkgram.y
index 6500d1f5..8d9a938c 100644
--- a/awkgram.y
+++ b/awkgram.y
@@ -155,7 +155,7 @@ extern double fmod(double x, double y);
#define YYSTYPE INSTRUCTION *
-#define is_identchar(c) (isalnum(c) || (c) == '_')
+static bool is_identchar(int c);
%}
%token FUNC_CALL NAME REGEXP FILENAME
@@ -5738,3 +5738,36 @@ install_builtins(void)
}
}
}
+
+/* is_identchar --- return true if c can be in an identifier */
+
+/*
+ * 9/2014: This can't be:
+ *
+ * #define is_identchar(c) (isalnum(c) || (c) == '_')
+ *
+ * because in non-C locales, character codes outside the set of
+ * ASCII letters and digits pass the test. BLEAH.
+ */
+
+static bool
+is_identchar(int c)
+{
+ switch (c) {
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
+ case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
+ case 's': case 't': case 'u': case 'v': case 'w': case 'x':
+ case 'y': case 'z':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+ case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+ case 'Y': case 'Z':
+ case '0': case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ case '_':
+ return true;
+ }
+ return false;
+}