summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2023-05-16 10:53:42 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2023-05-16 10:53:42 -0400
commit1d369c9e90f311ec98b07a259cac48c404c773d5 (patch)
treef4acf82756eee58cb7e1f7d4d1e14d6f80d59acc
parent27debd05dcbdfe3db487ba82568f7f9ae25ddde2 (diff)
downloadpostgresql-1d369c9e90f311ec98b07a259cac48c404c773d5.tar.gz
Ensure Soundex difference() function handles empty input sanely.
fuzzystrmatch's difference() function assumes that _soundex() always initializes its output buffer fully. This was not so for the case of a string containing no alphabetic characters, resulting in unstable output and Valgrind complaints. Fix by using memset() to fill the whole buffer in the early-exit case. Also make some cosmetic improvements (I didn't care for the random switches between "instr[0]" and "*instr" notation). Report and diagnosis by Alexander Lakhin (bug #17935). Back-patch to all supported branches. Discussion: https://postgr.es/m/17935-b99316aa79c18513@postgresql.org
-rw-r--r--contrib/fuzzystrmatch/expected/fuzzystrmatch.out6
-rw-r--r--contrib/fuzzystrmatch/fuzzystrmatch.c15
-rw-r--r--contrib/fuzzystrmatch/sql/fuzzystrmatch.sql1
3 files changed, 15 insertions, 7 deletions
diff --git a/contrib/fuzzystrmatch/expected/fuzzystrmatch.out b/contrib/fuzzystrmatch/expected/fuzzystrmatch.out
index bcb837fd6b..3195e1ec3c 100644
--- a/contrib/fuzzystrmatch/expected/fuzzystrmatch.out
+++ b/contrib/fuzzystrmatch/expected/fuzzystrmatch.out
@@ -23,6 +23,12 @@ SELECT soundex('Anne'), soundex('Margaret'), difference('Anne', 'Margaret');
A500 | M626 | 0
(1 row)
+SELECT soundex(''), difference('', '');
+ soundex | difference
+---------+------------
+ | 4
+(1 row)
+
SELECT levenshtein('GUMBO', 'GAMBOL');
levenshtein
-------------
diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.c b/contrib/fuzzystrmatch/fuzzystrmatch.c
index e1222714e4..5686497983 100644
--- a/contrib/fuzzystrmatch/fuzzystrmatch.c
+++ b/contrib/fuzzystrmatch/fuzzystrmatch.c
@@ -728,16 +728,14 @@ _soundex(const char *instr, char *outstr)
Assert(instr);
Assert(outstr);
- outstr[SOUNDEX_LEN] = '\0';
-
/* Skip leading non-alphabetic characters */
- while (!isalpha((unsigned char) instr[0]) && instr[0])
+ while (*instr && !isalpha((unsigned char) *instr))
++instr;
- /* No string left */
- if (!instr[0])
+ /* If no string left, return all-zeroes buffer */
+ if (!*instr)
{
- outstr[0] = (char) 0;
+ memset(outstr, '\0', SOUNDEX_LEN + 1);
return;
}
@@ -750,7 +748,7 @@ _soundex(const char *instr, char *outstr)
if (isalpha((unsigned char) *instr) &&
soundex_code(*instr) != soundex_code(*(instr - 1)))
{
- *outstr = soundex_code(instr[0]);
+ *outstr = soundex_code(*instr);
if (*outstr != '0')
{
++outstr;
@@ -767,6 +765,9 @@ _soundex(const char *instr, char *outstr)
++outstr;
++count;
}
+
+ /* And null-terminate */
+ *outstr = '\0';
}
PG_FUNCTION_INFO_V1(difference);
diff --git a/contrib/fuzzystrmatch/sql/fuzzystrmatch.sql b/contrib/fuzzystrmatch/sql/fuzzystrmatch.sql
index db05c7d6b6..0b4bb9be57 100644
--- a/contrib/fuzzystrmatch/sql/fuzzystrmatch.sql
+++ b/contrib/fuzzystrmatch/sql/fuzzystrmatch.sql
@@ -6,6 +6,7 @@ SELECT soundex('hello world!');
SELECT soundex('Anne'), soundex('Ann'), difference('Anne', 'Ann');
SELECT soundex('Anne'), soundex('Andrew'), difference('Anne', 'Andrew');
SELECT soundex('Anne'), soundex('Margaret'), difference('Anne', 'Margaret');
+SELECT soundex(''), difference('', '');
SELECT levenshtein('GUMBO', 'GAMBOL');