From 1d369c9e90f311ec98b07a259cac48c404c773d5 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 16 May 2023 10:53:42 -0400 Subject: Ensure Soundex difference() function handles empty input sanely. fuzzystrmatch's difference() function assumes that _soundex() always initializes its output buffer fully. This was not so for the case of a string containing no alphabetic characters, resulting in unstable output and Valgrind complaints. Fix by using memset() to fill the whole buffer in the early-exit case. Also make some cosmetic improvements (I didn't care for the random switches between "instr[0]" and "*instr" notation). Report and diagnosis by Alexander Lakhin (bug #17935). Back-patch to all supported branches. Discussion: https://postgr.es/m/17935-b99316aa79c18513@postgresql.org --- contrib/fuzzystrmatch/expected/fuzzystrmatch.out | 6 ++++++ contrib/fuzzystrmatch/fuzzystrmatch.c | 15 ++++++++------- contrib/fuzzystrmatch/sql/fuzzystrmatch.sql | 1 + 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/contrib/fuzzystrmatch/expected/fuzzystrmatch.out b/contrib/fuzzystrmatch/expected/fuzzystrmatch.out index bcb837fd6b..3195e1ec3c 100644 --- a/contrib/fuzzystrmatch/expected/fuzzystrmatch.out +++ b/contrib/fuzzystrmatch/expected/fuzzystrmatch.out @@ -23,6 +23,12 @@ SELECT soundex('Anne'), soundex('Margaret'), difference('Anne', 'Margaret'); A500 | M626 | 0 (1 row) +SELECT soundex(''), difference('', ''); + soundex | difference +---------+------------ + | 4 +(1 row) + SELECT levenshtein('GUMBO', 'GAMBOL'); levenshtein ------------- diff --git a/contrib/fuzzystrmatch/fuzzystrmatch.c b/contrib/fuzzystrmatch/fuzzystrmatch.c index e1222714e4..5686497983 100644 --- a/contrib/fuzzystrmatch/fuzzystrmatch.c +++ b/contrib/fuzzystrmatch/fuzzystrmatch.c @@ -728,16 +728,14 @@ _soundex(const char *instr, char *outstr) Assert(instr); Assert(outstr); - outstr[SOUNDEX_LEN] = '\0'; - /* Skip leading non-alphabetic characters */ - while (!isalpha((unsigned char) instr[0]) && instr[0]) + while (*instr && !isalpha((unsigned char) *instr)) ++instr; - /* No string left */ - if (!instr[0]) + /* If no string left, return all-zeroes buffer */ + if (!*instr) { - outstr[0] = (char) 0; + memset(outstr, '\0', SOUNDEX_LEN + 1); return; } @@ -750,7 +748,7 @@ _soundex(const char *instr, char *outstr) if (isalpha((unsigned char) *instr) && soundex_code(*instr) != soundex_code(*(instr - 1))) { - *outstr = soundex_code(instr[0]); + *outstr = soundex_code(*instr); if (*outstr != '0') { ++outstr; @@ -767,6 +765,9 @@ _soundex(const char *instr, char *outstr) ++outstr; ++count; } + + /* And null-terminate */ + *outstr = '\0'; } PG_FUNCTION_INFO_V1(difference); diff --git a/contrib/fuzzystrmatch/sql/fuzzystrmatch.sql b/contrib/fuzzystrmatch/sql/fuzzystrmatch.sql index db05c7d6b6..0b4bb9be57 100644 --- a/contrib/fuzzystrmatch/sql/fuzzystrmatch.sql +++ b/contrib/fuzzystrmatch/sql/fuzzystrmatch.sql @@ -6,6 +6,7 @@ SELECT soundex('hello world!'); SELECT soundex('Anne'), soundex('Ann'), difference('Anne', 'Ann'); SELECT soundex('Anne'), soundex('Andrew'), difference('Anne', 'Andrew'); SELECT soundex('Anne'), soundex('Margaret'), difference('Anne', 'Margaret'); +SELECT soundex(''), difference('', ''); SELECT levenshtein('GUMBO', 'GAMBOL'); -- cgit v1.2.1