summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2023-05-16 10:53:42 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2023-05-16 10:53:42 -0400
commiteaf99e4c4ae4084b8261e5e1f9c2c93004a98638 (patch)
tree4432e741afe2b22cc98fa1b9ada1e4572b432173
parentf06156da18f67bc2c904c0a76b70dafcb14ca7c2 (diff)
downloadpostgresql-REL_15_STABLE.tar.gz
Ensure Soundex difference() function handles empty input sanely.REL_15_STABLE
fuzzystrmatch's difference() function assumes that _soundex() always initializes its output buffer fully. This was not so for the case of a string containing no alphabetic characters, resulting in unstable output and Valgrind complaints. Fix by using memset() to fill the whole buffer in the early-exit case. Also make some cosmetic improvements (I didn't care for the random switches between "instr[0]" and "*instr" notation). Report and diagnosis by Alexander Lakhin (bug #17935). Back-patch to all supported branches. Discussion: https://postgr.es/m/17935-b99316aa79c18513@postgresql.org
-rw-r--r--contrib/fuzzystrmatch/expected/fuzzystrmatch.out6
-rw-r--r--contrib/fuzzystrmatch/fuzzystrmatch.c15
-rw-r--r--contrib/fuzzystrmatch/sql/fuzzystrmatch.sql1
3 files changed, 15 insertions, 7 deletions
diff --git a/contrib/fuzzystrmatch/expected/fuzzystrmatch.out b/contrib/fuzzystrmatch/expected/fuzzystrmatch.out
index 493c95cdfa..2827e81eb4 100644
--- a/contrib/fuzzystrmatch/expected/fuzzystrmatch.out
+++ b/contrib/fuzzystrmatch/expected/fuzzystrmatch.out
@@ -23,6 +23,12 @@ SELECT soundex('Anne'), soundex('Margaret'), difference('Anne', 'Margaret');
A500 | M626 | 0
(1 row)
+SELECT soundex(''), difference('', '');
+ soundex | difference
+---------+------------
+ | 4
+(1 row)
+
SELECT levenshtein('GUMBO', 'GAMBOL');
levenshtein
-------------
diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.c b/contrib/fuzzystrmatch/fuzzystrmatch.c
index a04251ace6..18177d14db 100644
--- a/contrib/fuzzystrmatch/fuzzystrmatch.c
+++ b/contrib/fuzzystrmatch/fuzzystrmatch.c
@@ -727,16 +727,14 @@ _soundex(const char *instr, char *outstr)
AssertArg(instr);
AssertArg(outstr);
- outstr[SOUNDEX_LEN] = '\0';
-
/* Skip leading non-alphabetic characters */
- while (!isalpha((unsigned char) instr[0]) && instr[0])
+ while (*instr && !isalpha((unsigned char) *instr))
++instr;
- /* No string left */
- if (!instr[0])
+ /* If no string left, return all-zeroes buffer */
+ if (!*instr)
{
- outstr[0] = (char) 0;
+ memset(outstr, '\0', SOUNDEX_LEN + 1);
return;
}
@@ -749,7 +747,7 @@ _soundex(const char *instr, char *outstr)
if (isalpha((unsigned char) *instr) &&
soundex_code(*instr) != soundex_code(*(instr - 1)))
{
- *outstr = soundex_code(instr[0]);
+ *outstr = soundex_code(*instr);
if (*outstr != '0')
{
++outstr;
@@ -766,6 +764,9 @@ _soundex(const char *instr, char *outstr)
++outstr;
++count;
}
+
+ /* And null-terminate */
+ *outstr = '\0';
}
PG_FUNCTION_INFO_V1(difference);
diff --git a/contrib/fuzzystrmatch/sql/fuzzystrmatch.sql b/contrib/fuzzystrmatch/sql/fuzzystrmatch.sql
index f05dc28ffb..1d0e2197fb 100644
--- a/contrib/fuzzystrmatch/sql/fuzzystrmatch.sql
+++ b/contrib/fuzzystrmatch/sql/fuzzystrmatch.sql
@@ -6,6 +6,7 @@ SELECT soundex('hello world!');
SELECT soundex('Anne'), soundex('Ann'), difference('Anne', 'Ann');
SELECT soundex('Anne'), soundex('Andrew'), difference('Anne', 'Andrew');
SELECT soundex('Anne'), soundex('Margaret'), difference('Anne', 'Margaret');
+SELECT soundex(''), difference('', '');
SELECT levenshtein('GUMBO', 'GAMBOL');