summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBartek Fabiszewski <bfabiszewski@users.noreply.github.com>2017-11-27 12:19:26 +0100
committerphajdan <phajdan.jr@gmail.com>2017-11-27 12:19:26 +0100
commit73dd2967c8e1e4f6d7334ee9e539a323d6e66cbd (patch)
treeb70d1cd362b07aaf1694195ded0d2179dfa69135
parenta8d50da0cc93a28fa05bd892f49bf074e11280e6 (diff)
downloadhyphen-master.tar.gz
Discard too long lines in dictionary file (#14)HEADmaster
* Discard too long lines in dictionary file * Discard too long lines in dictionary file: add warning and test
-rw-r--r--hyphen.c22
-rw-r--r--tests/Makefile.am5
-rw-r--r--tests/longlines.hyph1
-rw-r--r--tests/longlines.pat10
-rwxr-xr-xtests/longlines.test4
-rw-r--r--tests/longlines.word1
6 files changed, 39 insertions, 4 deletions
diff --git a/hyphen.c b/hyphen.c
index 9a132d0..9f2b711 100644
--- a/hyphen.c
+++ b/hyphen.c
@@ -438,11 +438,25 @@ for (k = 0; k < 2; k++) {
}
if (k == 0 || nextlevel) {
- while (fgets (buf, sizeof(buf), f) != NULL) {
+ while (fgets(buf, sizeof(buf), f) != NULL) {
+
+ /* discard lines that don't fit in buffer */
+ if (!feof(f) && strchr(buf, '\n') == NULL) {
+ int c;
+ while ((c = fgetc(f)) != '\n' && c != EOF);
+ /* issue warning if not a comment */
+ if (buf[0] != '%') {
+ fprintf(stderr, "Warning: skipping too long pattern (more than %lu chars)\n", sizeof(buf));
+ }
+ continue;
+ }
+
if (strncmp(buf, "NEXTLEVEL", 9) == 0) {
- nextlevel = 1;
- break;
- } else if (buf[0] != '%') hnj_hyphen_load_line(buf, dict[k], hashtab);
+ nextlevel = 1;
+ break;
+ } else if (buf[0] != '%') {
+ hnj_hyphen_load_line(buf, dict[k], hashtab);
+ }
}
} else if (k == 1) {
/* default first level: hyphen and ASCII apostrophe */
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 2b08c67..e72863c 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -11,6 +11,7 @@ alt4.test \
alt5.test \
alt6.test \
alt7.test \
+longlines.test \
compound.test \
compound2.test \
compound3.test \
@@ -50,6 +51,9 @@ alt6.word \
alt7.hyph \
alt7.pat \
alt7.word \
+longlines.hyph \
+longlines.pat \
+longlines.word \
alt.hyph \
alt.pat \
alt.word \
@@ -105,6 +109,7 @@ alt4.test \
alt5.test \
alt6.test \
alt7.test \
+longlines.test \
alt.test \
basealt2.test \
basealt.test \
diff --git a/tests/longlines.hyph b/tests/longlines.hyph
new file mode 100644
index 0000000..8b0ee7f
--- /dev/null
+++ b/tests/longlines.hyph
@@ -0,0 +1 @@
+a=bc=d
diff --git a/tests/longlines.pat b/tests/longlines.pat
new file mode 100644
index 0000000..290aef4
--- /dev/null
+++ b/tests/longlines.pat
@@ -0,0 +1,10 @@
+ISO8859-1
+LEFTHYPHENMIN 1
+RIGHTHYPHENMIN 1
+% Check whether characters over MAX_CHARS are not treated as new line
+% This test is valid as long as MAX_CHARS is 100
+%
+% Following pattern should result in a=bc=d hyphenation
+a1b2c1d
+% and should not be overriden by pattern from too long comment (over MAX_CHARS characters)
+%|------------------------------ this part is 100 characters long --------------------------------|a8b9c8d
diff --git a/tests/longlines.test b/tests/longlines.test
new file mode 100755
index 0000000..f144145
--- /dev/null
+++ b/tests/longlines.test
@@ -0,0 +1,4 @@
+#!/bin/sh
+DIR="`dirname $0`"
+NAME="`basename $0 .test`"
+$DIR/test.sh $NAME.pat $NAME.word $NAME.hyph
diff --git a/tests/longlines.word b/tests/longlines.word
new file mode 100644
index 0000000..acbe86c
--- /dev/null
+++ b/tests/longlines.word
@@ -0,0 +1 @@
+abcd