summaryrefslogtreecommitdiff
path: root/levenshtein.c
diff options
context:
space:
mode:
authorJohannes Schindelin <Johannes.Schindelin@gmx.de>2008-08-31 15:50:23 +0200
committerJunio C Hamano <gitster@pobox.com>2008-08-31 10:14:58 -0700
commit8af84dadb142f7321ff0ce8690385e99da8ede2f (patch)
treee47023679099de35b18da39c3b1c89efc0dd2c3a /levenshtein.c
parenta1184d85e8752658f02746982822f43f32316803 (diff)
downloadgit-8af84dadb142f7321ff0ce8690385e99da8ede2f.tar.gz
git wrapper: DWIM mistyped commands
This patch introduces a modified Damerau-Levenshtein algorithm into Git's code base, and uses it with the following penalties to show some similar commands when an unknown command was encountered: swap = 0, insertion = 1, substitution = 2, deletion = 4 A typical output would now look like this: $ git sm git: 'sm' is not a git-command. See 'git --help'. Did you mean one of these? am rm The cut-off is at similarity rating 6, which was empirically determined to give sensible results. As a convenience, if there is only one candidate, Git continues under the assumption that the user mistyped it. Example: $ git reabse WARNING: You called a Git program named 'reabse', which does not exist. Continuing under the assumption that you meant 'rebase' [...] Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de> Signed-off-by: Alex Riesen <raa.lkml@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'levenshtein.c')
-rw-r--r--levenshtein.c47
1 files changed, 47 insertions, 0 deletions
diff --git a/levenshtein.c b/levenshtein.c
new file mode 100644
index 0000000000..db52f2c205
--- /dev/null
+++ b/levenshtein.c
@@ -0,0 +1,47 @@
+#include "cache.h"
+#include "levenshtein.h"
+
+int levenshtein(const char *string1, const char *string2,
+ int w, int s, int a, int d)
+{
+ int len1 = strlen(string1), len2 = strlen(string2);
+ int *row0 = xmalloc(sizeof(int) * (len2 + 1));
+ int *row1 = xmalloc(sizeof(int) * (len2 + 1));
+ int *row2 = xmalloc(sizeof(int) * (len2 + 1));
+ int i, j;
+
+ for (j = 0; j <= len2; j++)
+ row1[j] = j * a;
+ for (i = 0; i < len1; i++) {
+ int *dummy;
+
+ row2[0] = (i + 1) * d;
+ for (j = 0; j < len2; j++) {
+ /* substitution */
+ row2[j + 1] = row1[j] + s * (string1[i] != string2[j]);
+ /* swap */
+ if (i > 0 && j > 0 && string1[i - 1] == string2[j] &&
+ string1[i] == string2[j - 1] &&
+ row2[j + 1] > row0[j - 1] + w)
+ row2[j + 1] = row0[j - 1] + w;
+ /* deletion */
+ if (j + 1 < len2 && row2[j + 1] > row1[j + 1] + d)
+ row2[j + 1] = row1[j + 1] + d;
+ /* insertion */
+ if (row2[j + 1] > row2[j] + a)
+ row2[j + 1] = row2[j] + a;
+ }
+
+ dummy = row0;
+ row0 = row1;
+ row1 = row2;
+ row2 = dummy;
+ }
+
+ i = row1[len2];
+ free(row0);
+ free(row1);
+ free(row2);
+
+ return i;
+}