summaryrefslogtreecommitdiff
path: root/contrib/dict_int
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2007-10-15 21:36:50 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2007-10-15 21:36:50 +0000
commit5fcb079858bb392e87067b5526e9df950db38024 (patch)
tree4ffb764af092be94fbe0e033dce2f492e6c937f7 /contrib/dict_int
parentfb631dba2a3c2c183bb99f2098491ecf96fb6664 (diff)
downloadpostgresql-5fcb079858bb392e87067b5526e9df950db38024.tar.gz
Add sample text search dictionary templates and parsers, to replace the
hard-to-maintain textual examples currently in the SGML docs. From Sergey Karpov.
Diffstat (limited to 'contrib/dict_int')
-rw-r--r--contrib/dict_int/Makefile19
-rw-r--r--contrib/dict_int/README.dict_int41
-rw-r--r--contrib/dict_int/dict_int.c99
-rw-r--r--contrib/dict_int/dict_int.sql.in29
-rw-r--r--contrib/dict_int/expected/dict_int.out308
-rw-r--r--contrib/dict_int/sql/dict_int.sql61
-rw-r--r--contrib/dict_int/uninstall_dict_int.sql9
7 files changed, 566 insertions, 0 deletions
diff --git a/contrib/dict_int/Makefile b/contrib/dict_int/Makefile
new file mode 100644
index 0000000000..4e03a69a6e
--- /dev/null
+++ b/contrib/dict_int/Makefile
@@ -0,0 +1,19 @@
+# $PostgreSQL: pgsql/contrib/dict_int/Makefile,v 1.1 2007/10/15 21:36:50 tgl Exp $
+
+MODULE_big = dict_int
+OBJS = dict_int.o
+DATA_built = dict_int.sql
+DATA = uninstall_dict_int.sql
+DOCS = README.dict_int
+REGRESS = dict_int
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/dict_int
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/dict_int/README.dict_int b/contrib/dict_int/README.dict_int
new file mode 100644
index 0000000000..5883c1c2f5
--- /dev/null
+++ b/contrib/dict_int/README.dict_int
@@ -0,0 +1,41 @@
+Dictionary for integers
+=======================
+
+The motivation for this example dictionary is to control the indexing of
+integers (signed and unsigned), and, consequently, to minimize the number of
+unique words which greatly affect the performance of searching.
+
+* Configuration
+
+The dictionary accepts two options:
+
+ - The MAXLEN parameter specifies the maximum length (number of digits)
+ allowed in an integer word. The default value is 6.
+
+ - The REJECTLONG parameter specifies if an overlength integer should be
+ truncated or ignored. If REJECTLONG=FALSE (default), the dictionary returns
+ the first MAXLEN digits of the integer. If REJECTLONG=TRUE, the
+ dictionary treats an overlength integer as a stop word, so that it will
+ not be indexed.
+
+* Usage
+
+1. Compile and install
+
+2. Load dictionary
+
+ psql mydb < dict_int.sql
+
+3. Test it
+
+ mydb# select ts_lexize('intdict', '12345678');
+ ts_lexize
+ -----------
+ {123456}
+
+4. Change its options as you wish
+
+ mydb# ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = 4, REJECTLONG = true);
+ ALTER TEXT SEARCH DICTIONARY
+
+That's all.
diff --git a/contrib/dict_int/dict_int.c b/contrib/dict_int/dict_int.c
new file mode 100644
index 0000000000..85d45491cc
--- /dev/null
+++ b/contrib/dict_int/dict_int.c
@@ -0,0 +1,99 @@
+/*-------------------------------------------------------------------------
+ *
+ * dict_int.c
+ * Text search dictionary for integers
+ *
+ * Copyright (c) 2007, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/contrib/dict_int/dict_int.c,v 1.1 2007/10/15 21:36:50 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "commands/defrem.h"
+#include "fmgr.h"
+#include "tsearch/ts_public.h"
+
+PG_MODULE_MAGIC;
+
+
+typedef struct {
+ int maxlen;
+ bool rejectlong;
+} DictInt;
+
+
+PG_FUNCTION_INFO_V1(dintdict_init);
+Datum dintdict_init(PG_FUNCTION_ARGS);
+
+PG_FUNCTION_INFO_V1(dintdict_lexize);
+Datum dintdict_lexize(PG_FUNCTION_ARGS);
+
+Datum
+dintdict_init(PG_FUNCTION_ARGS)
+{
+ List *dictoptions = (List *) PG_GETARG_POINTER(0);
+ DictInt *d;
+ ListCell *l;
+
+ d = (DictInt *) palloc0(sizeof(DictInt));
+ d->maxlen = 6;
+ d->rejectlong = false;
+
+ foreach(l, dictoptions)
+ {
+ DefElem *defel = (DefElem *) lfirst(l);
+
+ if (pg_strcasecmp(defel->defname, "MAXLEN") == 0)
+ {
+ d->maxlen = atoi(defGetString(defel));
+ }
+ else if (pg_strcasecmp(defel->defname, "REJECTLONG") == 0)
+ {
+ d->rejectlong = defGetBoolean(defel);
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unrecognized intdict parameter: \"%s\"",
+ defel->defname)));
+ }
+ }
+
+ PG_RETURN_POINTER(d);
+}
+
+Datum
+dintdict_lexize(PG_FUNCTION_ARGS)
+{
+ DictInt *d = (DictInt*)PG_GETARG_POINTER(0);
+ char *in = (char*)PG_GETARG_POINTER(1);
+ char *txt = pnstrdup(in, PG_GETARG_INT32(2));
+ TSLexeme *res=palloc(sizeof(TSLexeme)*2);
+
+ res[1].lexeme = NULL;
+ if (PG_GETARG_INT32(2) > d->maxlen)
+ {
+ if ( d->rejectlong )
+ {
+ /* reject by returning void array */
+ pfree(txt);
+ res[0].lexeme = NULL;
+ }
+ else
+ {
+ /* trim integer */
+ txt[d->maxlen] = '\0';
+ res[0].lexeme = txt;
+ }
+ }
+ else
+ {
+ res[0].lexeme = txt;
+ }
+
+ PG_RETURN_POINTER(res);
+}
diff --git a/contrib/dict_int/dict_int.sql.in b/contrib/dict_int/dict_int.sql.in
new file mode 100644
index 0000000000..0bd97a83e2
--- /dev/null
+++ b/contrib/dict_int/dict_int.sql.in
@@ -0,0 +1,29 @@
+-- $PostgreSQL: pgsql/contrib/dict_int/dict_int.sql.in,v 1.1 2007/10/15 21:36:50 tgl Exp $
+
+-- Adjust this setting to control where the objects get created.
+SET search_path = public;
+
+BEGIN;
+
+CREATE FUNCTION dintdict_init(internal)
+ RETURNS internal
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C STRICT;
+
+CREATE FUNCTION dintdict_lexize(internal, internal, internal, internal)
+ RETURNS internal
+ AS 'MODULE_PATHNAME'
+ LANGUAGE C STRICT;
+
+CREATE TEXT SEARCH TEMPLATE intdict_template (
+ LEXIZE = dintdict_lexize,
+ INIT = dintdict_init
+);
+
+CREATE TEXT SEARCH DICTIONARY intdict (
+ TEMPLATE = intdict_template
+);
+
+COMMENT ON TEXT SEARCH DICTIONARY intdict IS 'dictionary for integers';
+
+END;
diff --git a/contrib/dict_int/expected/dict_int.out b/contrib/dict_int/expected/dict_int.out
new file mode 100644
index 0000000000..7feb493e15
--- /dev/null
+++ b/contrib/dict_int/expected/dict_int.out
@@ -0,0 +1,308 @@
+--
+-- first, define the datatype. Turn off echoing so that expected file
+-- does not depend on contents of this file.
+--
+SET client_min_messages = warning;
+\set ECHO none
+RESET client_min_messages;
+--lexize
+select ts_lexize('intdict', '511673');
+ ts_lexize
+-----------
+ {511673}
+(1 row)
+
+select ts_lexize('intdict', '129');
+ ts_lexize
+-----------
+ {129}
+(1 row)
+
+select ts_lexize('intdict', '40865854');
+ ts_lexize
+-----------
+ {408658}
+(1 row)
+
+select ts_lexize('intdict', '952');
+ ts_lexize
+-----------
+ {952}
+(1 row)
+
+select ts_lexize('intdict', '654980341');
+ ts_lexize
+-----------
+ {654980}
+(1 row)
+
+select ts_lexize('intdict', '09810106');
+ ts_lexize
+-----------
+ {098101}
+(1 row)
+
+select ts_lexize('intdict', '14262713');
+ ts_lexize
+-----------
+ {142627}
+(1 row)
+
+select ts_lexize('intdict', '6532082986');
+ ts_lexize
+-----------
+ {653208}
+(1 row)
+
+select ts_lexize('intdict', '0150061');
+ ts_lexize
+-----------
+ {015006}
+(1 row)
+
+select ts_lexize('intdict', '7778');
+ ts_lexize
+-----------
+ {7778}
+(1 row)
+
+select ts_lexize('intdict', '9547');
+ ts_lexize
+-----------
+ {9547}
+(1 row)
+
+select ts_lexize('intdict', '753395478');
+ ts_lexize
+-----------
+ {753395}
+(1 row)
+
+select ts_lexize('intdict', '647652');
+ ts_lexize
+-----------
+ {647652}
+(1 row)
+
+select ts_lexize('intdict', '6988655574');
+ ts_lexize
+-----------
+ {698865}
+(1 row)
+
+select ts_lexize('intdict', '1279');
+ ts_lexize
+-----------
+ {1279}
+(1 row)
+
+select ts_lexize('intdict', '1266645909');
+ ts_lexize
+-----------
+ {126664}
+(1 row)
+
+select ts_lexize('intdict', '7594193969');
+ ts_lexize
+-----------
+ {759419}
+(1 row)
+
+select ts_lexize('intdict', '16928207');
+ ts_lexize
+-----------
+ {169282}
+(1 row)
+
+select ts_lexize('intdict', '196850350328');
+ ts_lexize
+-----------
+ {196850}
+(1 row)
+
+select ts_lexize('intdict', '22026985592');
+ ts_lexize
+-----------
+ {220269}
+(1 row)
+
+select ts_lexize('intdict', '2063765');
+ ts_lexize
+-----------
+ {206376}
+(1 row)
+
+select ts_lexize('intdict', '242387310');
+ ts_lexize
+-----------
+ {242387}
+(1 row)
+
+select ts_lexize('intdict', '93595');
+ ts_lexize
+-----------
+ {93595}
+(1 row)
+
+select ts_lexize('intdict', '9374');
+ ts_lexize
+-----------
+ {9374}
+(1 row)
+
+select ts_lexize('intdict', '996969');
+ ts_lexize
+-----------
+ {996969}
+(1 row)
+
+select ts_lexize('intdict', '353595982');
+ ts_lexize
+-----------
+ {353595}
+(1 row)
+
+select ts_lexize('intdict', '925860');
+ ts_lexize
+-----------
+ {925860}
+(1 row)
+
+select ts_lexize('intdict', '11848378337');
+ ts_lexize
+-----------
+ {118483}
+(1 row)
+
+select ts_lexize('intdict', '333');
+ ts_lexize
+-----------
+ {333}
+(1 row)
+
+select ts_lexize('intdict', '799287416765');
+ ts_lexize
+-----------
+ {799287}
+(1 row)
+
+select ts_lexize('intdict', '745939');
+ ts_lexize
+-----------
+ {745939}
+(1 row)
+
+select ts_lexize('intdict', '67601305734');
+ ts_lexize
+-----------
+ {676013}
+(1 row)
+
+select ts_lexize('intdict', '3361113');
+ ts_lexize
+-----------
+ {336111}
+(1 row)
+
+select ts_lexize('intdict', '9033778607');
+ ts_lexize
+-----------
+ {903377}
+(1 row)
+
+select ts_lexize('intdict', '7507648');
+ ts_lexize
+-----------
+ {750764}
+(1 row)
+
+select ts_lexize('intdict', '1166');
+ ts_lexize
+-----------
+ {1166}
+(1 row)
+
+select ts_lexize('intdict', '9360498');
+ ts_lexize
+-----------
+ {936049}
+(1 row)
+
+select ts_lexize('intdict', '917795');
+ ts_lexize
+-----------
+ {917795}
+(1 row)
+
+select ts_lexize('intdict', '9387894');
+ ts_lexize
+-----------
+ {938789}
+(1 row)
+
+select ts_lexize('intdict', '42764329');
+ ts_lexize
+-----------
+ {427643}
+(1 row)
+
+select ts_lexize('intdict', '564062');
+ ts_lexize
+-----------
+ {564062}
+(1 row)
+
+select ts_lexize('intdict', '5413377');
+ ts_lexize
+-----------
+ {541337}
+(1 row)
+
+select ts_lexize('intdict', '060965');
+ ts_lexize
+-----------
+ {060965}
+(1 row)
+
+select ts_lexize('intdict', '08273593');
+ ts_lexize
+-----------
+ {082735}
+(1 row)
+
+select ts_lexize('intdict', '593556010144');
+ ts_lexize
+-----------
+ {593556}
+(1 row)
+
+select ts_lexize('intdict', '17988843352');
+ ts_lexize
+-----------
+ {179888}
+(1 row)
+
+select ts_lexize('intdict', '252281774');
+ ts_lexize
+-----------
+ {252281}
+(1 row)
+
+select ts_lexize('intdict', '313425');
+ ts_lexize
+-----------
+ {313425}
+(1 row)
+
+select ts_lexize('intdict', '641439323669');
+ ts_lexize
+-----------
+ {641439}
+(1 row)
+
+select ts_lexize('intdict', '314532610153');
+ ts_lexize
+-----------
+ {314532}
+(1 row)
+
diff --git a/contrib/dict_int/sql/dict_int.sql b/contrib/dict_int/sql/dict_int.sql
new file mode 100644
index 0000000000..3a335f8f3d
--- /dev/null
+++ b/contrib/dict_int/sql/dict_int.sql
@@ -0,0 +1,61 @@
+--
+-- first, define the datatype. Turn off echoing so that expected file
+-- does not depend on contents of this file.
+--
+SET client_min_messages = warning;
+\set ECHO none
+\i dict_int.sql
+\set ECHO all
+RESET client_min_messages;
+
+--lexize
+select ts_lexize('intdict', '511673');
+select ts_lexize('intdict', '129');
+select ts_lexize('intdict', '40865854');
+select ts_lexize('intdict', '952');
+select ts_lexize('intdict', '654980341');
+select ts_lexize('intdict', '09810106');
+select ts_lexize('intdict', '14262713');
+select ts_lexize('intdict', '6532082986');
+select ts_lexize('intdict', '0150061');
+select ts_lexize('intdict', '7778');
+select ts_lexize('intdict', '9547');
+select ts_lexize('intdict', '753395478');
+select ts_lexize('intdict', '647652');
+select ts_lexize('intdict', '6988655574');
+select ts_lexize('intdict', '1279');
+select ts_lexize('intdict', '1266645909');
+select ts_lexize('intdict', '7594193969');
+select ts_lexize('intdict', '16928207');
+select ts_lexize('intdict', '196850350328');
+select ts_lexize('intdict', '22026985592');
+select ts_lexize('intdict', '2063765');
+select ts_lexize('intdict', '242387310');
+select ts_lexize('intdict', '93595');
+select ts_lexize('intdict', '9374');
+select ts_lexize('intdict', '996969');
+select ts_lexize('intdict', '353595982');
+select ts_lexize('intdict', '925860');
+select ts_lexize('intdict', '11848378337');
+select ts_lexize('intdict', '333');
+select ts_lexize('intdict', '799287416765');
+select ts_lexize('intdict', '745939');
+select ts_lexize('intdict', '67601305734');
+select ts_lexize('intdict', '3361113');
+select ts_lexize('intdict', '9033778607');
+select ts_lexize('intdict', '7507648');
+select ts_lexize('intdict', '1166');
+select ts_lexize('intdict', '9360498');
+select ts_lexize('intdict', '917795');
+select ts_lexize('intdict', '9387894');
+select ts_lexize('intdict', '42764329');
+select ts_lexize('intdict', '564062');
+select ts_lexize('intdict', '5413377');
+select ts_lexize('intdict', '060965');
+select ts_lexize('intdict', '08273593');
+select ts_lexize('intdict', '593556010144');
+select ts_lexize('intdict', '17988843352');
+select ts_lexize('intdict', '252281774');
+select ts_lexize('intdict', '313425');
+select ts_lexize('intdict', '641439323669');
+select ts_lexize('intdict', '314532610153');
diff --git a/contrib/dict_int/uninstall_dict_int.sql b/contrib/dict_int/uninstall_dict_int.sql
new file mode 100644
index 0000000000..0323ab298e
--- /dev/null
+++ b/contrib/dict_int/uninstall_dict_int.sql
@@ -0,0 +1,9 @@
+SET search_path = public;
+
+DROP TEXT SEARCH DICTIONARY intdict;
+
+DROP TEXT SEARCH TEMPLATE intdict_template;
+
+DROP FUNCTION dintdict_init(internal);
+
+DROP FUNCTION dintdict_lexize(internal,internal,internal,internal);