diff options
Diffstat (limited to 'contrib/tsearch2/tsvector.c')
-rw-r--r-- | contrib/tsearch2/tsvector.c | 1106 |
1 files changed, 0 insertions, 1106 deletions
diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c deleted file mode 100644 index dfbdacd345..0000000000 --- a/contrib/tsearch2/tsvector.c +++ /dev/null @@ -1,1106 +0,0 @@ -/* - * In/Out definitions for tsvector type - * Internal structure: - * string of values, array of position lexeme in string and it's length - * Teodor Sigaev <teodor@sigaev.ru> - */ -#include "postgres.h" - - -#include "access/gist.h" -#include "access/itup.h" -#include "catalog/namespace.h" -#include "commands/trigger.h" -#include "executor/spi.h" -#include "nodes/pg_list.h" -#include "storage/bufpage.h" -#include "utils/builtins.h" -#include "utils/pg_locale.h" -#include "mb/pg_wchar.h" - -#include <ctype.h> -#include "tsvector.h" -#include "query.h" -#include "ts_cfg.h" -#include "common.h" - -PG_FUNCTION_INFO_V1(tsvector_in); -Datum tsvector_in(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(tsvector_out); -Datum tsvector_out(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(to_tsvector); -Datum to_tsvector(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(to_tsvector_current); -Datum to_tsvector_current(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(to_tsvector_name); -Datum to_tsvector_name(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(tsearch2); -Datum tsearch2(PG_FUNCTION_ARGS); - -PG_FUNCTION_INFO_V1(tsvector_length); -Datum tsvector_length(PG_FUNCTION_ARGS); - -/* - * in/out text index type - */ -static int -comparePos(const void *a, const void *b) -{ - if (WEP_GETPOS(*(WordEntryPos *) a) == WEP_GETPOS(*(WordEntryPos *) b)) - return 0; - return (WEP_GETPOS(*(WordEntryPos *) a) > WEP_GETPOS(*(WordEntryPos *) b)) ? 1 : -1; -} - -static int -uniquePos(WordEntryPos * a, int4 l) -{ - WordEntryPos *ptr, - *res; - - res = a; - if (l == 1) - return l; - - qsort((void *) a, l, sizeof(WordEntryPos), comparePos); - - ptr = a + 1; - while (ptr - a < l) - { - if (WEP_GETPOS(*ptr) != WEP_GETPOS(*res)) - { - res++; - *res = *ptr; - if (res - a >= MAXNUMPOS - 1 || WEP_GETPOS(*res) == MAXENTRYPOS - 1) - break; - } - else if (WEP_GETWEIGHT(*ptr) > WEP_GETWEIGHT(*res)) - WEP_SETWEIGHT(*res, WEP_GETWEIGHT(*ptr)); - ptr++; - } - return res + 1 - a; -} - -static int -compareentry(const void *a, const void *b, void *arg) -{ - char *BufferStr = (char *) arg; - - if (((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len) - { - return strncmp(&BufferStr[((WordEntryIN *) a)->entry.pos], - &BufferStr[((WordEntryIN *) b)->entry.pos], - ((WordEntryIN *) a)->entry.len); - } - return (((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len) ? 1 : -1; -} - -static int -uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen) -{ - WordEntryIN *ptr, - *res; - - res = a; - if (l == 1) - { - if (a->entry.haspos) - { - *(uint16 *) (a->pos) = uniquePos(&(a->pos[1]), *(uint16 *) (a->pos)); - *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16 *) (a->pos) + 1) * sizeof(WordEntryPos); - } - return l; - } - - ptr = a + 1; - qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry, (void *) buf); - - while (ptr - a < l) - { - if (!(ptr->entry.len == res->entry.len && - strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0)) - { - if (res->entry.haspos) - { - *(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos)); - *outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos); - } - *outbuflen += SHORTALIGN(res->entry.len); - res++; - memcpy(res, ptr, sizeof(WordEntryIN)); - } - else if (ptr->entry.haspos) - { - if (res->entry.haspos) - { - int4 len = *(uint16 *) (ptr->pos) + 1 + *(uint16 *) (res->pos); - - res->pos = (WordEntryPos *) repalloc(res->pos, len * sizeof(WordEntryPos)); - memcpy(&(res->pos[*(uint16 *) (res->pos) + 1]), - &(ptr->pos[1]), *(uint16 *) (ptr->pos) * sizeof(WordEntryPos)); - *(uint16 *) (res->pos) += *(uint16 *) (ptr->pos); - pfree(ptr->pos); - } - else - { - res->entry.haspos = 1; - res->pos = ptr->pos; - } - } - ptr++; - } - if (res->entry.haspos) - { - *(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos)); - *outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos); - } - *outbuflen += SHORTALIGN(res->entry.len); - - return res + 1 - a; -} - -#define WAITWORD 1 -#define WAITENDWORD 2 -#define WAITNEXTCHAR 3 -#define WAITENDCMPLX 4 -#define WAITPOSINFO 5 -#define INPOSINFO 6 -#define WAITPOSDELIM 7 -#define WAITCHARCMPLX 8 - -#define RESIZEPRSBUF \ -do { \ - if ( state->curpos - state->word + pg_database_encoding_max_length() >= state->len ) \ - { \ - int4 clen = state->curpos - state->word; \ - state->len *= 2; \ - state->word = (char*)repalloc( (void*)state->word, state->len ); \ - state->curpos = state->word + clen; \ - } \ -} while (0) - - -int4 -gettoken_tsvector(TI_IN_STATE * state) -{ - int4 oldstate = 0; - - state->curpos = state->word; - state->state = WAITWORD; - state->alen = 0; - - while (1) - { - if (state->state == WAITWORD) - { - if (*(state->prsbuf) == '\0') - return 0; - else if (t_iseq(state->prsbuf, '\'')) - state->state = WAITENDCMPLX; - else if (t_iseq(state->prsbuf, '\\')) - { - state->state = WAITNEXTCHAR; - oldstate = WAITENDWORD; - } - else if (state->oprisdelim && ISOPERATOR(state->prsbuf)) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"))); - else if (!t_isspace(state->prsbuf)) - { - COPYCHAR(state->curpos, state->prsbuf); - state->curpos += pg_mblen(state->prsbuf); - state->state = WAITENDWORD; - } - } - else if (state->state == WAITNEXTCHAR) - { - if (*(state->prsbuf) == '\0') - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("there is no escaped character"))); - else - { - RESIZEPRSBUF; - COPYCHAR(state->curpos, state->prsbuf); - state->curpos += pg_mblen(state->prsbuf); - state->state = oldstate; - } - } - else if (state->state == WAITENDWORD) - { - if (t_iseq(state->prsbuf, '\\')) - { - state->state = WAITNEXTCHAR; - oldstate = WAITENDWORD; - } - else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' || - (state->oprisdelim && ISOPERATOR(state->prsbuf))) - { - RESIZEPRSBUF; - if (state->curpos == state->word) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"))); - *(state->curpos) = '\0'; - return 1; - } - else if (t_iseq(state->prsbuf, ':')) - { - if (state->curpos == state->word) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"))); - *(state->curpos) = '\0'; - if (state->oprisdelim) - return 1; - else - state->state = INPOSINFO; - } - else - { - RESIZEPRSBUF; - COPYCHAR(state->curpos, state->prsbuf); - state->curpos += pg_mblen(state->prsbuf); - } - } - else if (state->state == WAITENDCMPLX) - { - if (t_iseq(state->prsbuf, '\'')) - { - state->state = WAITCHARCMPLX; - } - else if (t_iseq(state->prsbuf, '\\')) - { - state->state = WAITNEXTCHAR; - oldstate = WAITENDCMPLX; - } - else if (*(state->prsbuf) == '\0') - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"))); - else - { - RESIZEPRSBUF; - COPYCHAR(state->curpos, state->prsbuf); - state->curpos += pg_mblen(state->prsbuf); - } - } - else if (state->state == WAITCHARCMPLX) - { - if (t_iseq(state->prsbuf, '\'')) - { - RESIZEPRSBUF; - COPYCHAR(state->curpos, state->prsbuf); - state->curpos += pg_mblen(state->prsbuf); - state->state = WAITENDCMPLX; - } - else - { - RESIZEPRSBUF; - *(state->curpos) = '\0'; - if (state->curpos == state->word) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"))); - if (state->oprisdelim) - { - /* state->prsbuf+=pg_mblen(state->prsbuf); */ - return 1; - } - else - state->state = WAITPOSINFO; - continue; /* recheck current character */ - } - } - else if (state->state == WAITPOSINFO) - { - if (t_iseq(state->prsbuf, ':')) - state->state = INPOSINFO; - else - return 1; - } - else if (state->state == INPOSINFO) - { - if (t_isdigit(state->prsbuf)) - { - if (state->alen == 0) - { - state->alen = 4; - state->pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * state->alen); - *(uint16 *) (state->pos) = 0; - } - else if (*(uint16 *) (state->pos) + 1 >= state->alen) - { - state->alen *= 2; - state->pos = (WordEntryPos *) repalloc(state->pos, sizeof(WordEntryPos) * state->alen); - } - (*(uint16 *) (state->pos))++; - WEP_SETPOS(state->pos[*(uint16 *) (state->pos)], LIMITPOS(atoi(state->prsbuf))); - if (WEP_GETPOS(state->pos[*(uint16 *) (state->pos)]) == 0) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("wrong position info"))); - WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0); - state->state = WAITPOSDELIM; - } - else - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"))); - } - else if (state->state == WAITPOSDELIM) - { - if (t_iseq(state->prsbuf, ',')) - state->state = INPOSINFO; - else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*')) - { - if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)])) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"))); - WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 3); - } - else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B')) - { - if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)])) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"))); - WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 2); - } - else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C')) - { - if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)])) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"))); - WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 1); - } - else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D')) - { - if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)])) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"))); - WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0); - } - else if (t_isspace(state->prsbuf) || - *(state->prsbuf) == '\0') - return 1; - else if (!t_isdigit(state->prsbuf)) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error"))); - } - else - /* internal error */ - elog(ERROR, "internal error"); - - /* get next char */ - state->prsbuf += pg_mblen(state->prsbuf); - } - - return 0; -} - -Datum -tsvector_in(PG_FUNCTION_ARGS) -{ - char *buf = PG_GETARG_CSTRING(0); - TI_IN_STATE state; - WordEntryIN *arr; - WordEntry *inarr; - int4 len = 0, - totallen = 64; - tsvector *in; - char *tmpbuf, - *cur; - int4 i, - buflen = 256; - - SET_FUNCOID(); - - pg_verifymbstr(buf, strlen(buf), false); - state.prsbuf = buf; - state.len = 32; - state.word = (char *) palloc(state.len); - state.oprisdelim = false; - - arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen); - cur = tmpbuf = (char *) palloc(buflen); - while (gettoken_tsvector(&state)) - { - if (len >= totallen) - { - totallen *= 2; - arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen); - } - while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen) - { - int4 dist = cur - tmpbuf; - - buflen *= 2; - tmpbuf = (char *) repalloc((void *) tmpbuf, buflen); - cur = tmpbuf + dist; - } - if (state.curpos - state.word >= MAXSTRLEN) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("word is too long"))); - arr[len].entry.len = state.curpos - state.word; - if (cur - tmpbuf > MAXSTRPOS) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("too long value"))); - arr[len].entry.pos = cur - tmpbuf; - memcpy((void *) cur, (void *) state.word, arr[len].entry.len); - cur += arr[len].entry.len; - if (state.alen) - { - arr[len].entry.haspos = 1; - arr[len].pos = state.pos; - } - else - arr[len].entry.haspos = 0; - len++; - } - pfree(state.word); - - if (len > 0) - len = uniqueentry(arr, len, tmpbuf, &buflen); - else - buflen = 0; - totallen = CALCDATASIZE(len, buflen); - in = (tsvector *) palloc0(totallen); - SET_VARSIZE(in, totallen); - in->size = len; - cur = STRPTR(in); - inarr = ARRPTR(in); - for (i = 0; i < len; i++) - { - memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len); - arr[i].entry.pos = cur - STRPTR(in); - cur += SHORTALIGN(arr[i].entry.len); - if (arr[i].entry.haspos) - { - memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos)); - cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos); - pfree(arr[i].pos); - } - memcpy(&(inarr[i]), &(arr[i].entry), sizeof(WordEntry)); - } - pfree(tmpbuf); - pfree(arr); - PG_RETURN_POINTER(in); -} - -Datum -tsvector_length(PG_FUNCTION_ARGS) -{ - tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); - int4 ret = in->size; - - PG_FREE_IF_COPY(in, 0); - PG_RETURN_INT32(ret); -} - -Datum -tsvector_out(PG_FUNCTION_ARGS) -{ - tsvector *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); - char *outbuf; - int4 i, - lenbuf = 0, - pp; - WordEntry *ptr = ARRPTR(out); - char *curbegin, - *curin, - *curout; - - lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ; - for (i = 0; i < out->size; i++) - { - lenbuf += ptr[i].len * 2 * pg_database_encoding_max_length() /* for escape */ ; - if (ptr[i].haspos) - lenbuf += 7 * POSDATALEN(out, &(ptr[i])); - } - - curout = outbuf = (char *) palloc(lenbuf); - for (i = 0; i < out->size; i++) - { - curbegin = curin = STRPTR(out) + ptr->pos; - if (i != 0) - *curout++ = ' '; - *curout++ = '\''; - while (curin - curbegin < ptr->len) - { - int len = pg_mblen(curin); - - if (t_iseq(curin, '\'')) - { - int4 pos = curout - outbuf; - - outbuf = (char *) repalloc((void *) outbuf, ++lenbuf); - curout = outbuf + pos; - *curout++ = '\''; - } - while (len--) - *curout++ = *curin++; - } - *curout++ = '\''; - if ((pp = POSDATALEN(out, ptr)) != 0) - { - WordEntryPos *wptr; - - *curout++ = ':'; - wptr = POSDATAPTR(out, ptr); - while (pp) - { - sprintf(curout, "%d", WEP_GETPOS(*wptr)); - curout = strchr(curout, '\0'); - switch (WEP_GETWEIGHT(*wptr)) - { - case 3: - *curout++ = 'A'; - break; - case 2: - *curout++ = 'B'; - break; - case 1: - *curout++ = 'C'; - break; - case 0: - default: - break; - } - if (pp > 1) - *curout++ = ','; - pp--; - wptr++; - } - } - ptr++; - } - *curout = '\0'; - outbuf[lenbuf - 1] = '\0'; - PG_FREE_IF_COPY(out, 0); - PG_RETURN_POINTER(outbuf); -} - -static int -compareWORD(const void *a, const void *b) -{ - if (((TSWORD *) a)->len == ((TSWORD *) b)->len) - { - int res = strncmp( - ((TSWORD *) a)->word, - ((TSWORD *) b)->word, - ((TSWORD *) b)->len); - - if (res == 0) - return (((TSWORD *) a)->pos.pos > ((TSWORD *) b)->pos.pos) ? 1 : -1; - return res; - } - return (((TSWORD *) a)->len > ((TSWORD *) b)->len) ? 1 : -1; -} - -static int -uniqueWORD(TSWORD * a, int4 l) -{ - TSWORD *ptr, - *res; - int tmppos; - - if (l == 1) - { - tmppos = LIMITPOS(a->pos.pos); - a->alen = 2; - a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen); - a->pos.apos[0] = 1; - a->pos.apos[1] = tmppos; - return l; - } - - res = a; - ptr = a + 1; - - qsort((void *) a, l, sizeof(TSWORD), compareWORD); - tmppos = LIMITPOS(a->pos.pos); - a->alen = 2; - a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen); - a->pos.apos[0] = 1; - a->pos.apos[1] = tmppos; - - while (ptr - a < l) - { - if (!(ptr->len == res->len && - strncmp(ptr->word, res->word, res->len) == 0)) - { - res++; - res->len = ptr->len; - res->word = ptr->word; - tmppos = LIMITPOS(ptr->pos.pos); - res->alen = 2; - res->pos.apos = (uint16 *) palloc(sizeof(uint16) * res->alen); - res->pos.apos[0] = 1; - res->pos.apos[1] = tmppos; - } - else - { - pfree(ptr->word); - if (res->pos.apos[0] < MAXNUMPOS - 1 && res->pos.apos[res->pos.apos[0]] != MAXENTRYPOS - 1) - { - if (res->pos.apos[0] + 1 >= res->alen) - { - res->alen *= 2; - res->pos.apos = (uint16 *) repalloc(res->pos.apos, sizeof(uint16) * res->alen); - } - if (res->pos.apos[0] == 0 || res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos)) - { - res->pos.apos[res->pos.apos[0] + 1] = LIMITPOS(ptr->pos.pos); - res->pos.apos[0]++; - } - } - } - ptr++; - } - - return res + 1 - a; -} - -/* - * make value of tsvector - */ -static tsvector * -makevalue(PRSTEXT * prs) -{ - int4 i, - j, - lenstr = 0, - totallen; - tsvector *in; - WordEntry *ptr; - char *str, - *cur; - - prs->curwords = uniqueWORD(prs->words, prs->curwords); - for (i = 0; i < prs->curwords; i++) - { - lenstr += SHORTALIGN(prs->words[i].len); - - if (prs->words[i].alen) - lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos); - } - - totallen = CALCDATASIZE(prs->curwords, lenstr); - in = (tsvector *) palloc0(totallen); - SET_VARSIZE(in, totallen); - in->size = prs->curwords; - - ptr = ARRPTR(in); - cur = str = STRPTR(in); - for (i = 0; i < prs->curwords; i++) - { - ptr->len = prs->words[i].len; - if (cur - str > MAXSTRPOS) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("value is too big"))); - ptr->pos = cur - str; - memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len); - pfree(prs->words[i].word); - cur += SHORTALIGN(prs->words[i].len); - if (prs->words[i].alen) - { - WordEntryPos *wptr; - - ptr->haspos = 1; - *(uint16 *) cur = prs->words[i].pos.apos[0]; - wptr = POSDATAPTR(in, ptr); - for (j = 0; j < *(uint16 *) cur; j++) - { - WEP_SETWEIGHT(wptr[j], 0); - WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]); - } - cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos); - pfree(prs->words[i].pos.apos); - } - else - ptr->haspos = 0; - ptr++; - } - pfree(prs->words); - return in; -} - - -Datum -to_tsvector(PG_FUNCTION_ARGS) -{ - text *in = PG_GETARG_TEXT_P(1); - PRSTEXT prs; - tsvector *out; - TSCfgInfo *cfg; - - SET_FUNCOID(); - cfg = findcfg(PG_GETARG_INT32(0)); - - prs.lenwords = 32; - prs.curwords = 0; - prs.pos = 0; - prs.words = (TSWORD *) palloc(sizeof(TSWORD) * prs.lenwords); - - parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ); - PG_FREE_IF_COPY(in, 1); - - if (prs.curwords) - out = makevalue(&prs); - else - { - pfree(prs.words); - out = palloc(CALCDATASIZE(0, 0)); - SET_VARSIZE(out, CALCDATASIZE(0, 0)); - out->size = 0; - } - PG_RETURN_POINTER(out); -} - -Datum -to_tsvector_name(PG_FUNCTION_ARGS) -{ - text *cfg = PG_GETARG_TEXT_P(0); - Datum res; - - SET_FUNCOID(); - res = DirectFunctionCall3( - to_tsvector, - Int32GetDatum(name2id_cfg(cfg)), - PG_GETARG_DATUM(1), - (Datum) 0 - ); - - PG_FREE_IF_COPY(cfg, 0); - PG_RETURN_DATUM(res); -} - -Datum -to_tsvector_current(PG_FUNCTION_ARGS) -{ - Datum res; - - SET_FUNCOID(); - res = DirectFunctionCall3( - to_tsvector, - Int32GetDatum(get_currcfg()), - PG_GETARG_DATUM(0), - (Datum) 0 - ); - - PG_RETURN_DATUM(res); -} - -static Oid -findFunc(char *fname) -{ - FuncCandidateList clist, - ptr; - Oid funcid = InvalidOid; - List *names = list_make1(makeString(fname)); - - ptr = clist = FuncnameGetCandidates(names, 1); - list_free(names); - - if (!ptr) - return funcid; - - while (ptr) - { - if (ptr->args[0] == TEXTOID && funcid == InvalidOid) - funcid = ptr->oid; - clist = ptr->next; - pfree(ptr); - ptr = clist; - } - - return funcid; -} - -/* - * Trigger - */ -Datum -tsearch2(PG_FUNCTION_ARGS) -{ - TriggerData *trigdata; - Trigger *trigger; - Relation rel; - HeapTuple rettuple = NULL; - int numidxattr, - i; - PRSTEXT prs; - Datum datum = (Datum) 0; - Oid funcoid = InvalidOid; - TSCfgInfo *cfg; - - SET_FUNCOID(); - cfg = findcfg(get_currcfg()); - - if (!CALLED_AS_TRIGGER(fcinfo)) - /* internal error */ - elog(ERROR, "TSearch: Not fired by trigger manager"); - - trigdata = (TriggerData *) fcinfo->context; - if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event)) - /* internal error */ - elog(ERROR, "TSearch: Cannot process STATEMENT events"); - if (TRIGGER_FIRED_AFTER(trigdata->tg_event)) - /* internal error */ - elog(ERROR, "TSearch: Must be fired BEFORE event"); - - if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event)) - rettuple = trigdata->tg_trigtuple; - else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) - rettuple = trigdata->tg_newtuple; - else - /* internal error */ - elog(ERROR, "TSearch: Unknown event"); - - trigger = trigdata->tg_trigger; - rel = trigdata->tg_relation; - - if (trigger->tgnargs < 2) - /* internal error */ - elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)"); - - numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]); - if (numidxattr == SPI_ERROR_NOATTRIBUTE) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("tsvector column \"%s\" does not exist", - trigger->tgargs[0]))); - - prs.lenwords = 32; - prs.curwords = 0; - prs.pos = 0; - prs.words = (TSWORD *) palloc(sizeof(TSWORD) * prs.lenwords); - - /* find all words in indexable column */ - for (i = 1; i < trigger->tgnargs; i++) - { - int numattr; - Oid oidtype; - Datum txt_toasted; - bool isnull; - text *txt; - - numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]); - if (numattr == SPI_ERROR_NOATTRIBUTE) - { - funcoid = findFunc(trigger->tgargs[i]); - if (funcoid == InvalidOid) - ereport(ERROR, - (errcode(ERRCODE_UNDEFINED_COLUMN), - errmsg("could not find function or field \"%s\"", - trigger->tgargs[i]))); - - continue; - } - oidtype = SPI_gettypeid(rel->rd_att, numattr); - /* We assume char() and varchar() are binary-equivalent to text */ - if (!(oidtype == TEXTOID || - oidtype == VARCHAROID || - oidtype == BPCHAROID)) - { - elog(WARNING, "TSearch: '%s' is not of character type", - trigger->tgargs[i]); - continue; - } - txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull); - if (isnull) - continue; - - if (funcoid != InvalidOid) - { - text *txttmp = (text *) DatumGetPointer(OidFunctionCall1( - funcoid, - PointerGetDatum(txt_toasted) - )); - - txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp))); - if (txt == txttmp) - txt_toasted = PointerGetDatum(txt); - } - else - txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted))); - - parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ); - if (txt != (text *) DatumGetPointer(txt_toasted)) - pfree(txt); - } - - /* make tsvector value */ - if (prs.curwords) - { - datum = PointerGetDatum(makevalue(&prs)); - rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr, - &datum, NULL); - pfree(DatumGetPointer(datum)); - } - else - { - tsvector *out = palloc(CALCDATASIZE(0, 0)); - - SET_VARSIZE(out, CALCDATASIZE(0, 0)); - out->size = 0; - datum = PointerGetDatum(out); - pfree(prs.words); - rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr, - &datum, NULL); - } - - if (rettuple == NULL) - /* internal error */ - elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result); - - return PointerGetDatum(rettuple); -} - -static int -silly_cmp_tsvector(const tsvector * a, const tsvector * b) -{ - if (VARSIZE(a) < VARSIZE(b)) - return -1; - else if (VARSIZE(a) > VARSIZE(b)) - return 1; - else if (a->size < b->size) - return -1; - else if (a->size > b->size) - return 1; - else - { - WordEntry *aptr = ARRPTR(a); - WordEntry *bptr = ARRPTR(b); - int i = 0; - int res; - - - for (i = 0; i < a->size; i++) - { - if (aptr->haspos != bptr->haspos) - { - return (aptr->haspos > bptr->haspos) ? -1 : 1; - } - else if (aptr->len != bptr->len) - { - return (aptr->len > bptr->len) ? -1 : 1; - } - else if ((res = strncmp(STRPTR(a) + aptr->pos, STRPTR(b) + bptr->pos, bptr->len)) != 0) - { - return res; - } - else if (aptr->haspos) - { - WordEntryPos *ap = POSDATAPTR(a, aptr); - WordEntryPos *bp = POSDATAPTR(b, bptr); - int j; - - if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr)) - return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1; - - for (j = 0; j < POSDATALEN(a, aptr); j++) - { - if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp)) - { - return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1; - } - else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp)) - { - return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1; - } - ap++, bp++; - } - } - - aptr++; - bptr++; - } - } - - return 0; -} - -PG_FUNCTION_INFO_V1(tsvector_cmp); -PG_FUNCTION_INFO_V1(tsvector_lt); -PG_FUNCTION_INFO_V1(tsvector_le); -PG_FUNCTION_INFO_V1(tsvector_eq); -PG_FUNCTION_INFO_V1(tsvector_ne); -PG_FUNCTION_INFO_V1(tsvector_ge); -PG_FUNCTION_INFO_V1(tsvector_gt); -Datum tsvector_cmp(PG_FUNCTION_ARGS); -Datum tsvector_lt(PG_FUNCTION_ARGS); -Datum tsvector_le(PG_FUNCTION_ARGS); -Datum tsvector_eq(PG_FUNCTION_ARGS); -Datum tsvector_ne(PG_FUNCTION_ARGS); -Datum tsvector_ge(PG_FUNCTION_ARGS); -Datum tsvector_gt(PG_FUNCTION_ARGS); - -#define RUNCMP \ -tsvector *a = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));\ -tsvector *b = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));\ -int res = silly_cmp_tsvector(a,b); \ -PG_FREE_IF_COPY(a,0); \ -PG_FREE_IF_COPY(b,1); \ - -Datum -tsvector_cmp(PG_FUNCTION_ARGS) -{ - RUNCMP - PG_RETURN_INT32(res); -} - -Datum -tsvector_lt(PG_FUNCTION_ARGS) -{ - RUNCMP - PG_RETURN_BOOL((res < 0) ? true : false); -} - -Datum -tsvector_le(PG_FUNCTION_ARGS) -{ - RUNCMP - PG_RETURN_BOOL((res <= 0) ? true : false); -} - -Datum -tsvector_eq(PG_FUNCTION_ARGS) -{ - RUNCMP - PG_RETURN_BOOL((res == 0) ? true : false); -} - -Datum -tsvector_ge(PG_FUNCTION_ARGS) -{ - RUNCMP - PG_RETURN_BOOL((res >= 0) ? true : false); -} - -Datum -tsvector_gt(PG_FUNCTION_ARGS) -{ - RUNCMP - PG_RETURN_BOOL((res > 0) ? true : false); -} - -Datum -tsvector_ne(PG_FUNCTION_ARGS) -{ - RUNCMP - PG_RETURN_BOOL((res != 0) ? true : false); -} |