Fix inadequately-sized output buffer in contrib/unaccent.

The output buffer size in unaccent_lexize() was calculated as input string length times pg_database_encoding_max_length(), which effectively assumes that replacement strings aren't more than one character. While that was all that we previously documented it to support, the code actually has always allowed replacement strings of arbitrary length; so if you tried to make use of longer strings, you were at risk of buffer overrun. To fix, use an expansible StringInfo buffer instead of trying to determine the maximum space needed a-priori. This would be a security issue if unaccent rules files could be installed by unprivileged users; but fortunately they can't, so in the back branches the problem can be labeled as improper configuration by a superuser. Nonetheless, a memory stomp isn't a nice way of reacting to improper configuration, so let's back-patch the fix.
author: Tom Lane <tgl@sss.pgh.pa.us> 2014-07-01 11:22:43 -0400
committer: Tom Lane <tgl@sss.pgh.pa.us> 2014-07-01 11:23:21 -0400
commit: 5a421a47eb1fc4398f42678c09e35aa72dc7cf18 (patch)
tree: 2489ad9cbc06ff0e4e1ba1ded69135030b54859d /contrib/unaccent
parent: 9f03ca915196dfc871804a1f8aad26207f601fd6 (diff)
download: postgresql-5a421a47eb1fc4398f42678c09e35aa72dc7cf18.tar.gz
1 files changed, 24 insertions, 21 deletions
diff --git a/contrib/unaccent/unaccent.c b/contrib/unaccent/unaccent.c
index c7d54b205e..7c4072ed93 100644
--- a/contrib/unaccent/unaccent.c
+++ b/contrib/unaccent/unaccent.c
@@ -15,6 +15,7 @@
 
 #include "catalog/namespace.h"
 #include "commands/defrem.h"
+#include "lib/stringinfo.h"
 #include "tsearch/ts_cache.h"
 #include "tsearch/ts_locale.h"
 #include "tsearch/ts_public.h"
@@ -309,9 +310,12 @@ unaccent_lexize(PG_FUNCTION_ARGS)
 	TrieChar   *rootTrie = (TrieChar *) PG_GETARG_POINTER(0);
 	char	   *srcchar = (char *) PG_GETARG_POINTER(1);
 	int32		len = PG_GETARG_INT32(2);
-	char	   *srcstart = srcchar,
-			   *trgchar = NULL;
-	TSLexeme   *res = NULL;
+	char	   *srcstart = srcchar;
+	TSLexeme   *res;
+	StringInfoData buf;
+
+	/* we allocate storage for the buffer only if needed */
+	buf.data = NULL;
 
 	while (len > 0)
 	{
@@ -322,37 +326,36 @@ unaccent_lexize(PG_FUNCTION_ARGS)
 							 &matchlen);
 		if (node && node->replaceTo)
 		{
-			if (!res)
+			if (buf.data == NULL)
 			{
-				/* allocate res only if it's needed */
-				res = palloc0(sizeof(TSLexeme) * 2);
-				res->lexeme = trgchar = palloc(len * pg_database_encoding_max_length() + 1 /* \0 */ );
-				res->flags = TSL_FILTER;
+				/* initialize buffer */
+				initStringInfo(&buf);
+				/* insert any data we already skipped over */
 				if (srcchar != srcstart)
-				{
-					memcpy(trgchar, srcstart, srcchar - srcstart);
-					trgchar += (srcchar - srcstart);
-				}
+					appendBinaryStringInfo(&buf, srcstart, srcchar - srcstart);
 			}
-			memcpy(trgchar, node->replaceTo, node->replacelen);
-			trgchar += node->replacelen;
+			appendBinaryStringInfo(&buf, node->replaceTo, node->replacelen);
 		}
 		else
 		{
 			matchlen = pg_mblen(srcchar);
-			if (res)
-			{
-				memcpy(trgchar, srcchar, matchlen);
-				trgchar += matchlen;
-			}
+			if (buf.data != NULL)
+				appendBinaryStringInfo(&buf, srcchar, matchlen);
 		}
 
 		srcchar += matchlen;
 		len -= matchlen;
 	}
 
-	if (res)
-		*trgchar = '\0';
+	/* return a result only if we made at least one substitution */
+	if (buf.data != NULL)
+	{
+		res = (TSLexeme *) palloc0(sizeof(TSLexeme) * 2);
+		res->lexeme = buf.data;
+		res->flags = TSL_FILTER;
+	}
+	else
+		res = NULL;
 
 	PG_RETURN_POINTER(res);
 }
author	Tom Lane <tgl@sss.pgh.pa.us>	2014-07-01 11:22:43 -0400
committer	Tom Lane <tgl@sss.pgh.pa.us>	2014-07-01 11:23:21 -0400
commit	5a421a47eb1fc4398f42678c09e35aa72dc7cf18 (patch)
tree	2489ad9cbc06ff0e4e1ba1ded69135030b54859d /contrib/unaccent
parent	9f03ca915196dfc871804a1f8aad26207f601fd6 (diff)
download	postgresql-5a421a47eb1fc4398f42678c09e35aa72dc7cf18.tar.gz