Added to Usage note.

Added flag -L to output line numbers. Changed line tokenization to use g_unichar_type() to discriminate various kinds of Unicode characters. git-svn-id: svn+ssh://svn.abisource.com/svnroot/enchant/trunk@20859 bcba8976-2d24-0410-9c9c-aab3bd5fdfd6
author: Raphael Finkel <raphael@cs.uky.edu> 2003-09-24 13:31:23 +0000
committer: Raphael Finkel <raphael@cs.uky.edu> 2003-09-24 13:31:23 +0000
commit: b2e06c67c998b05a61b7af7a6355967f3eb8daee (patch)
tree: 909963795298fc496e812fe90c3a5241dd162746 /tests/enchant-ispell.c
parent: 8af7231a8a1a1b3991820f1d041f1e8ba2b8385d (diff)
download: enchant-b2e06c67c998b05a61b7af7a6355967f3eb8daee.tar.gz
1 files changed, 58 insertions, 33 deletions
diff --git a/tests/enchant-ispell.c b/tests/enchant-ispell.c
index e12aa0e..4af85dd 100644
--- a/tests/enchant-ispell.c
+++ b/tests/enchant-ispell.c
@@ -52,6 +52,8 @@ typedef enum
 		MODE_FILE
 	} IspellMode_t;
 
+static int countLines = 0, lineCount = 0;
+
 static void 
 print_version (FILE * to)
 {
@@ -61,7 +63,11 @@ print_version (FILE * to)
 static void
 print_help (FILE * to, const char * prog)
 {
-	fprintf (to, "Usage: %s [options] -a|-l|-v[v]|<file>\n", prog);
+	fprintf (to, "Usage: %s [options] -a|-l|-L|-v[v]|<file>\n", prog);
+	fprintf (to, "\t-a lists alternatives.\n", prog);
+	fprintf (to, "\t-l lists misspelings.\n", prog);
+	fprintf (to, "\t-L displays line numbers.\n", prog);
+	fprintf (to, "\t-v displays program version.\n", prog);
 }
 
 static gboolean
@@ -72,6 +78,7 @@ consume_line (FILE * in, GString * str)
 	gchar * utf;
 	gboolean ret = TRUE;
 
+	lineCount++;
 	g_string_truncate (str, 0);
 
 	while (ret && (ch = fgetc (in)) != EOF) {
@@ -85,14 +92,6 @@ consume_line (FILE * in, GString * str)
 		}
 	}
 
-	if (str->len) {
-		utf = g_locale_to_utf8 (str->str, str->len, &bytes_read, &bytes_written, NULL);
-		g_free (str->str);
-		
-		str->str = utf;
-		str->len = bytes_written;
-	}
-
 	return ret;
 }
 
@@ -102,6 +101,9 @@ print_utf (FILE * out, const char * str)
 	gsize bytes_read, bytes_written;
 	gchar * native;
 
+	// fprintf(out, "[%s]", str);
+	fprintf(out, "%s", str);
+	return;
 	native = g_locale_from_utf8 (str, -1, &bytes_read, &bytes_written, NULL);
 	fwrite (native, 1, bytes_written, out);
 	g_free (native);
@@ -114,12 +116,19 @@ do_mode_a (FILE * out, EnchantDict * dict, GString * word, size_t start_pos)
 	char ** suggs;
 
 	if (enchant_dict_check (dict, word->str, word->len) == 0)
-		fwrite ("*\n", 1, 2, out);
+		if (countLines) {
+			fprintf (out, "* %d\n", lineCount);
+		} else {
+			fwrite ("*\n", 1, 2, out);
+		}
 	else {
 		suggs = enchant_dict_suggest (dict, word->str, 
 					      word->len, &n_suggs);
 		if (!n_suggs || !suggs) {
 			fwrite ("# ", 1, 2, out);
+			if (countLines) {
+				fprintf (out, "%d ", lineCount);
+			}
 			print_utf (out, word->str);
 			fprintf (out, " %ld\n", start_pos+1);
 		}
@@ -127,6 +136,9 @@ do_mode_a (FILE * out, EnchantDict * dict, GString * word, size_t start_pos)
 			size_t i = 0;
 			
 			fwrite ("& ", 1, 2, out);
+			if (countLines) {
+				fprintf (out, "%d ", lineCount);
+			}
 			print_utf (out, word->str);
 			fprintf (out, " %ld %ld:", n_suggs, start_pos);
 			
@@ -147,6 +159,9 @@ static void
 do_mode_l (FILE * out, EnchantDict * dict, GString * word)
 {
 	if (enchant_dict_check (dict, word->str, word->len) != 0) {
+		if (countLines) {
+			fprintf (out, "%d ", lineCount);
+		}
 		print_utf (out, word->str);
 		fwrite ("\n", 1, 1, out);
 	}
@@ -170,35 +185,42 @@ tokenize_line (GString * line)
 	while (cur_pos < line->len && *utf) {
 		uc = g_utf8_get_char (utf); 
 		
-		if (g_unichar_isalpha (uc)) {
-			g_string_append_unichar (word, uc);
-			cur_pos++;
-		}
-		else if (g_unichar_ispunct (uc)) {
-			if (uc == '\'') {
+		// fprintf(stdout, "type of %s is %d\n", utf, g_unichar_type(uc));
+		switch (g_unichar_type(uc)) {
+			case G_UNICODE_MODIFIER_LETTER:
+			case G_UNICODE_LOWERCASE_LETTER:
+			case G_UNICODE_TITLECASE_LETTER:
+			case G_UNICODE_UPPERCASE_LETTER:
+			case G_UNICODE_OTHER_LETTER:
+			case G_UNICODE_COMBINING_MARK:
+			case G_UNICODE_ENCLOSING_MARK:
+			case G_UNICODE_NON_SPACING_MARK:
+			case G_UNICODE_DECIMAL_NUMBER:
+			case G_UNICODE_LETTER_NUMBER:
+			case G_UNICODE_OTHER_NUMBER:
+			case G_UNICODE_CONNECT_PUNCTUATION:
 				g_string_append_unichar (word, uc);
 				cur_pos++;
-			} else {
+				break;
+			case G_UNICODE_OTHER_PUNCTUATION:
+				if (uc == '\'') {
+					g_string_append_unichar (word, uc);
+					cur_pos++;
+					break;
+				}
+				// else fall through
+			default: // some sort of non-word character
 				if (word->len) {
-					tokens = g_slist_append (tokens, g_string_new_len (word->str, word->len));
-					tokens = g_slist_append (tokens, GINT_TO_POINTER(start_pos));
+					tokens = g_slist_append (tokens,
+						g_string_new_len (word->str, word->len));
+					tokens = g_slist_append (tokens,
+						GINT_TO_POINTER(start_pos));
 					g_string_truncate (word, 0);
 				}
-
-				start_pos = ++cur_pos;
-			}
-		} else {
-			if (word->len) {
-				tokens = g_slist_append (tokens, g_string_new_len (word->str, word->len));
-				tokens = g_slist_append (tokens, GINT_TO_POINTER(start_pos));
-				g_string_truncate (word, 0);
-			}
-
-			start_pos = ++cur_pos;
-		}
-
+		} // switch
+		start_pos = ++cur_pos;
 		utf = g_utf8_next_char (utf);
-	}
+	} // while
 
 	g_string_free (word, TRUE);
 
@@ -236,6 +258,7 @@ parse_file (FILE * in, FILE * out, IspellMode_t mode)
 	str = g_string_new (NULL);
 	
 	while (!was_last_line) {
+		str = g_string_new (NULL); // wasteful, but avoids segfault
 		was_last_line = consume_line (in, str);
 
 		if (str->len) {
@@ -299,6 +322,8 @@ int main (int argc, char ** argv)
 					mode = MODE_L;
 				else if (arg[1] == 'v')
 					mode = MODE_VERSION;
+				else if (arg[1] == 'L')
+					countLines++;
 			} 
 			else if (strlen (arg) > 2) {
 				fprintf (stderr, "-%c does not take any parameters.\n", arg[1]);
author	Raphael Finkel <raphael@cs.uky.edu>	2003-09-24 13:31:23 +0000
committer	Raphael Finkel <raphael@cs.uky.edu>	2003-09-24 13:31:23 +0000
commit	b2e06c67c998b05a61b7af7a6355967f3eb8daee (patch)
tree	909963795298fc496e812fe90c3a5241dd162746 /tests/enchant-ispell.c
parent	8af7231a8a1a1b3991820f1d041f1e8ba2b8385d (diff)
download	enchant-b2e06c67c998b05a61b7af7a6355967f3eb8daee.tar.gz