1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
|
/* Find near-matches for strings and identifiers.
Copyright (C) 2015-2016 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#ifndef GCC_SPELLCHECK_H
#define GCC_SPELLCHECK_H
typedef unsigned int edit_distance_t;
const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX;
/* spellcheck.c */
extern edit_distance_t
levenshtein_distance (const char *s, int len_s,
const char *t, int len_t);
extern edit_distance_t
levenshtein_distance (const char *s, const char *t);
extern const char *
find_closest_string (const char *target,
const auto_vec<const char *> *candidates);
/* A traits class for describing a string-like type usable by
class best_match.
Specializations should provide the implementations of the following:
static size_t get_length (TYPE);
static const char *get_string (TYPE);
get_string should return a non-NULL ptr, which does not need to be
0-terminated. */
template <typename TYPE>
struct edit_distance_traits {};
/* Specialization of edit_distance_traits for C-style strings. */
template <>
struct edit_distance_traits<const char *>
{
static size_t get_length (const char *str)
{
gcc_assert (str);
return strlen (str);
}
static const char *get_string (const char *str)
{
gcc_assert (str);
return str;
}
};
/* A type for use when determining the best match against a string,
expressed as a template so that we can match against various
string-like types (const char *, frontend identifiers, and preprocessor
macros).
This type accumulates the best possible match against GOAL_TYPE for
a sequence of elements of CANDIDATE_TYPE, whilst minimizing the
number of calls to levenshtein_distance and to
edit_distance_traits<T>::get_length. */
template <typename GOAL_TYPE, typename CANDIDATE_TYPE>
class best_match
{
public:
typedef GOAL_TYPE goal_t;
typedef CANDIDATE_TYPE candidate_t;
typedef edit_distance_traits<goal_t> goal_traits;
typedef edit_distance_traits<candidate_t> candidate_traits;
/* Constructor. */
best_match (GOAL_TYPE goal,
edit_distance_t best_distance_so_far = MAX_EDIT_DISTANCE)
: m_goal (goal_traits::get_string (goal)),
m_goal_len (goal_traits::get_length (goal)),
m_best_candidate (NULL),
m_best_distance (best_distance_so_far)
{}
/* Compare the edit distance between CANDIDATE and m_goal,
and if it's the best so far, record it. */
void consider (candidate_t candidate)
{
size_t candidate_len = candidate_traits::get_length (candidate);
/* Calculate a lower bound on the candidate's distance to the goal,
based on the difference in lengths; it will require at least
this many insertions/deletions. */
edit_distance_t min_candidate_distance
= abs ((ssize_t)candidate_len - (ssize_t)m_goal_len);
/* If the candidate's length is sufficiently different to that
of the goal string, then the number of insertions/deletions
may be >= the best distance so far. If so, we can reject
the candidate immediately without needing to compute
the exact distance, since it won't be an improvement. */
if (min_candidate_distance >= m_best_distance)
return;
/* If the candidate will be unable to beat the criterion in
get_best_meaningful_candidate, reject it without computing
the exact distance. */
unsigned int cutoff = MAX (m_goal_len, candidate_len) / 2;
if (min_candidate_distance > cutoff)
return;
/* Otherwise, compute the distance and see if the candidate
has beaten the previous best value. */
edit_distance_t dist
= levenshtein_distance (m_goal, m_goal_len,
candidate_traits::get_string (candidate),
candidate_len);
if (dist < m_best_distance)
{
m_best_distance = dist;
m_best_candidate = candidate;
m_best_candidate_len = candidate_len;
}
}
/* Assuming that BEST_CANDIDATE is known to be better than
m_best_candidate, update (without recomputing the edit distance to
the goal). */
void set_best_so_far (CANDIDATE_TYPE best_candidate,
edit_distance_t best_distance,
size_t best_candidate_len)
{
gcc_assert (best_distance < m_best_distance);
m_best_candidate = best_candidate;
m_best_distance = best_distance;
m_best_candidate_len = best_candidate_len;
}
/* Get the best candidate so far, but applying a filter to ensure
that we return NULL if none of the candidates are close to the goal,
to avoid offering nonsensical suggestions to the user. */
candidate_t get_best_meaningful_candidate () const
{
/* If more than half of the letters were misspelled, the suggestion is
likely to be meaningless. */
if (m_best_candidate)
{
unsigned int cutoff = MAX (m_goal_len, m_best_candidate_len) / 2;
if (m_best_distance > cutoff)
return NULL;
}
return m_best_candidate;
}
edit_distance_t get_best_distance () const { return m_best_distance; }
size_t get_best_candidate_length () const { return m_best_candidate_len; }
private:
const char *m_goal;
size_t m_goal_len;
candidate_t m_best_candidate;
edit_distance_t m_best_distance;
size_t m_best_candidate_len;
};
#endif /* GCC_SPELLCHECK_H */
|