summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2023-05-16 10:53:42 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2023-05-16 10:53:42 -0400
commit0966291a4ade47f7c14313ab7868d1de4e4f6688 (patch)
tree9c4e818440f34d755c89bf31316a2f2f6cf7de4c
parent3282e071bc594923f5d5f09c6f5b548e425a558a (diff)
downloadpostgresql-REL_12_STABLE.tar.gz
Ensure Soundex difference() function handles empty input sanely.REL_12_STABLE
fuzzystrmatch's difference() function assumes that _soundex() always initializes its output buffer fully. This was not so for the case of a string containing no alphabetic characters, resulting in unstable output and Valgrind complaints. Fix by using memset() to fill the whole buffer in the early-exit case. Also make some cosmetic improvements (I didn't care for the random switches between "instr[0]" and "*instr" notation). Report and diagnosis by Alexander Lakhin (bug #17935). Back-patch to all supported branches. Discussion: https://postgr.es/m/17935-b99316aa79c18513@postgresql.org
-rw-r--r--contrib/fuzzystrmatch/expected/fuzzystrmatch.out6
-rw-r--r--contrib/fuzzystrmatch/fuzzystrmatch.c15
-rw-r--r--contrib/fuzzystrmatch/sql/fuzzystrmatch.sql1
3 files changed, 15 insertions, 7 deletions
diff --git a/contrib/fuzzystrmatch/expected/fuzzystrmatch.out b/contrib/fuzzystrmatch/expected/fuzzystrmatch.out
index 493c95cdfa..2827e81eb4 100644
--- a/contrib/fuzzystrmatch/expected/fuzzystrmatch.out
+++ b/contrib/fuzzystrmatch/expected/fuzzystrmatch.out
@@ -23,6 +23,12 @@ SELECT soundex('Anne'), soundex('Margaret'), difference('Anne', 'Margaret');
A500 | M626 | 0
(1 row)
+SELECT soundex(''), difference('', '');
+ soundex | difference
+---------+------------
+ | 4
+(1 row)
+
SELECT levenshtein('GUMBO', 'GAMBOL');
levenshtein
-------------
diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.c b/contrib/fuzzystrmatch/fuzzystrmatch.c
index b8992f7c3c..8d3420f674 100644
--- a/contrib/fuzzystrmatch/fuzzystrmatch.c
+++ b/contrib/fuzzystrmatch/fuzzystrmatch.c
@@ -729,16 +729,14 @@ _soundex(const char *instr, char *outstr)
AssertArg(instr);
AssertArg(outstr);
- outstr[SOUNDEX_LEN] = '\0';
-
/* Skip leading non-alphabetic characters */
- while (!isalpha((unsigned char) instr[0]) && instr[0])
+ while (*instr && !isalpha((unsigned char) *instr))
++instr;
- /* No string left */
- if (!instr[0])
+ /* If no string left, return all-zeroes buffer */
+ if (!*instr)
{
- outstr[0] = (char) 0;
+ memset(outstr, '\0', SOUNDEX_LEN + 1);
return;
}
@@ -751,7 +749,7 @@ _soundex(const char *instr, char *outstr)
if (isalpha((unsigned char) *instr) &&
soundex_code(*instr) != soundex_code(*(instr - 1)))
{
- *outstr = soundex_code(instr[0]);
+ *outstr = soundex_code(*instr);
if (*outstr != '0')
{
++outstr;
@@ -768,6 +766,9 @@ _soundex(const char *instr, char *outstr)
++outstr;
++count;
}
+
+ /* And null-terminate */
+ *outstr = '\0';
}
PG_FUNCTION_INFO_V1(difference);
diff --git a/contrib/fuzzystrmatch/sql/fuzzystrmatch.sql b/contrib/fuzzystrmatch/sql/fuzzystrmatch.sql
index f05dc28ffb..1d0e2197fb 100644
--- a/contrib/fuzzystrmatch/sql/fuzzystrmatch.sql
+++ b/contrib/fuzzystrmatch/sql/fuzzystrmatch.sql
@@ -6,6 +6,7 @@ SELECT soundex('hello world!');
SELECT soundex('Anne'), soundex('Ann'), difference('Anne', 'Ann');
SELECT soundex('Anne'), soundex('Andrew'), difference('Anne', 'Andrew');
SELECT soundex('Anne'), soundex('Margaret'), difference('Anne', 'Margaret');
+SELECT soundex(''), difference('', '');
SELECT levenshtein('GUMBO', 'GAMBOL');