summaryrefslogtreecommitdiff
path: root/gcc/spellcheck.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/spellcheck.c')
-rw-r--r--gcc/spellcheck.c136
1 files changed, 136 insertions, 0 deletions
diff --git a/gcc/spellcheck.c b/gcc/spellcheck.c
new file mode 100644
index 00000000000..31ce3224546
--- /dev/null
+++ b/gcc/spellcheck.c
@@ -0,0 +1,136 @@
+/* Find near-matches for strings and identifiers.
+ Copyright (C) 2015 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "spellcheck.h"
+
+/* The Levenshtein distance is an "edit-distance": the minimal
+ number of one-character insertions, removals or substitutions
+ that are needed to change one string into another.
+
+ This implementation uses the Wagner-Fischer algorithm. */
+
+static edit_distance_t
+levenshtein_distance (const char *s, int len_s,
+ const char *t, int len_t)
+{
+ const bool debug = false;
+
+ if (debug)
+ {
+ printf ("s: \"%s\" (len_s=%i)\n", s, len_s);
+ printf ("t: \"%s\" (len_t=%i)\n", t, len_t);
+ }
+
+ if (len_s == 0)
+ return len_t;
+ if (len_t == 0)
+ return len_s;
+
+ /* We effectively build a matrix where each (i, j) contains the
+ Levenshtein distance between the prefix strings s[0:j]
+ and t[0:i].
+ Rather than actually build an (len_t + 1) * (len_s + 1) matrix,
+ we simply keep track of the last row, v0 and a new row, v1,
+ which avoids an (len_t + 1) * (len_s + 1) allocation and memory accesses
+ in favor of two (len_s + 1) allocations. These could potentially be
+ statically-allocated if we impose a maximum length on the
+ strings of interest. */
+ edit_distance_t *v0 = new edit_distance_t[len_s + 1];
+ edit_distance_t *v1 = new edit_distance_t[len_s + 1];
+
+ /* The first row is for the case of an empty target string, which
+ we can reach by deleting every character in the source string. */
+ for (int i = 0; i < len_s + 1; i++)
+ v0[i] = i;
+
+ /* Build successive rows. */
+ for (int i = 0; i < len_t; i++)
+ {
+ if (debug)
+ {
+ printf ("i:%i v0 = ", i);
+ for (int j = 0; j < len_s + 1; j++)
+ printf ("%i ", v0[j]);
+ printf ("\n");
+ }
+
+ /* The initial column is for the case of an empty source string; we
+ can reach prefixes of the target string of length i
+ by inserting i characters. */
+ v1[0] = i + 1;
+
+ /* Build the rest of the row by considering neighbours to
+ the north, west and northwest. */
+ for (int j = 0; j < len_s; j++)
+ {
+ edit_distance_t cost = (s[j] == t[i] ? 0 : 1);
+ edit_distance_t deletion = v1[j] + 1;
+ edit_distance_t insertion = v0[j + 1] + 1;
+ edit_distance_t substitution = v0[j] + cost;
+ edit_distance_t cheapest = MIN (deletion, insertion);
+ cheapest = MIN (cheapest, substitution);
+ v1[j + 1] = cheapest;
+ }
+
+ /* Prepare to move on to next row. */
+ for (int j = 0; j < len_s + 1; j++)
+ v0[j] = v1[j];
+ }
+
+ if (debug)
+ {
+ printf ("final v1 = ");
+ for (int j = 0; j < len_s + 1; j++)
+ printf ("%i ", v1[j]);
+ printf ("\n");
+ }
+
+ edit_distance_t result = v1[len_s];
+ delete[] v0;
+ delete[] v1;
+ return result;
+}
+
+/* Calculate Levenshtein distance between two nil-terminated strings.
+ This exists purely for the unit tests. */
+
+edit_distance_t
+levenshtein_distance (const char *s, const char *t)
+{
+ return levenshtein_distance (s, strlen (s), t, strlen (t));
+}
+
+/* Calculate Levenshtein distance between two identifiers. */
+
+edit_distance_t
+levenshtein_distance (tree ident_s, tree ident_t)
+{
+ gcc_assert (TREE_CODE (ident_s) == IDENTIFIER_NODE);
+ gcc_assert (TREE_CODE (ident_t) == IDENTIFIER_NODE);
+
+ return levenshtein_distance (IDENTIFIER_POINTER (ident_s),
+ IDENTIFIER_LENGTH (ident_s),
+ IDENTIFIER_POINTER (ident_t),
+ IDENTIFIER_LENGTH (ident_t));
+}