diff options
| author | Lorry Tar Creator <lorry-tar-importer@baserock.org> | 2015-02-17 17:25:57 +0000 |
|---|---|---|
| committer | <> | 2015-03-17 16:26:24 +0000 |
| commit | 780b92ada9afcf1d58085a83a0b9e6bc982203d1 (patch) | |
| tree | 598f8b9fa431b228d29897e798de4ac0c1d3d970 /lang/sql/sqlite/ext/fts3/fts3.c | |
| parent | 7a2660ba9cc2dc03a69ddfcfd95369395cc87444 (diff) | |
| download | berkeleydb-master.tar.gz | |
Diffstat (limited to 'lang/sql/sqlite/ext/fts3/fts3.c')
| -rw-r--r-- | lang/sql/sqlite/ext/fts3/fts3.c | 4747 |
1 files changed, 3412 insertions, 1335 deletions
diff --git a/lang/sql/sqlite/ext/fts3/fts3.c b/lang/sql/sqlite/ext/fts3/fts3.c index 20da0516..44b7f431 100644 --- a/lang/sql/sqlite/ext/fts3/fts3.c +++ b/lang/sql/sqlite/ext/fts3/fts3.c @@ -70,7 +70,7 @@ ** A doclist is stored like this: ** ** array { -** varint docid; +** varint docid; (delta from previous doclist) ** array { (position list for column 0) ** varint position; (2 more than the delta from previous position) ** } @@ -101,8 +101,8 @@ ** at D signals the start of a new column; the 1 at E indicates that the ** new column is column number 1. There are two positions at 12 and 45 ** (14-2 and 35-2+12). The 0 at H indicate the end-of-document. The -** 234 at I is the next docid. It has one position 72 (72-2) and then -** terminates with the 0 at K. +** 234 at I is the delta to next docid (357). It has one position 70 +** (72-2) and then terminates with the 0 at K. ** ** A "position-list" is the list of positions for multiple columns for ** a single docid. A "column-list" is the set of positions for a single @@ -286,20 +286,15 @@ ** will eventually overtake the earlier data and knock it out. The ** query logic likewise merges doclists so that newer data knocks out ** older data. -** -** TODO(shess) Provide a VACUUM type operation to clear out all -** deletions and duplications. This would basically be a forced merge -** into a single segment. */ +#include "fts3Int.h" #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) #if defined(SQLITE_ENABLE_FTS3) && !defined(SQLITE_CORE) # define SQLITE_CORE 1 #endif -#include "fts3Int.h" - #include <assert.h> #include <stdlib.h> #include <stddef.h> @@ -313,6 +308,11 @@ SQLITE_EXTENSION_INIT1 #endif +static int fts3EvalNext(Fts3Cursor *pCsr); +static int fts3EvalStart(Fts3Cursor *pCsr); +static int fts3TermSegReaderCursor( + Fts3Cursor *, const char *, int, int, Fts3MultiSegReader **); + /* ** Write a 64-bit variable-length integer to memory starting at p[0]. ** The length of data written will be between 1 and FTS3_VARINT_MAX bytes. @@ -330,21 +330,37 @@ int sqlite3Fts3PutVarint(char *p, sqlite_int64 v){ return (int) (q - (unsigned char *)p); } +#define GETVARINT_STEP(v, ptr, shift, mask1, mask2, var, ret) \ + v = (v & mask1) | ( (*ptr++) << shift ); \ + if( (v & mask2)==0 ){ var = v; return ret; } +#define GETVARINT_INIT(v, ptr, shift, mask1, mask2, var, ret) \ + v = (*ptr++); \ + if( (v & mask2)==0 ){ var = v; return ret; } + /* ** Read a 64-bit variable-length integer from memory starting at p[0]. ** Return the number of bytes read, or 0 on error. ** The value is stored in *v. */ int sqlite3Fts3GetVarint(const char *p, sqlite_int64 *v){ - const unsigned char *q = (const unsigned char *) p; - sqlite_uint64 x = 0, y = 1; - while( (*q&0x80)==0x80 && q-(unsigned char *)p<FTS3_VARINT_MAX ){ - x += y * (*q++ & 0x7f); - y <<= 7; - } - x += y * (*q++); - *v = (sqlite_int64) x; - return (int) (q - (unsigned char *)p); + const char *pStart = p; + u32 a; + u64 b; + int shift; + + GETVARINT_INIT(a, p, 0, 0x00, 0x80, *v, 1); + GETVARINT_STEP(a, p, 7, 0x7F, 0x4000, *v, 2); + GETVARINT_STEP(a, p, 14, 0x3FFF, 0x200000, *v, 3); + GETVARINT_STEP(a, p, 21, 0x1FFFFF, 0x10000000, *v, 4); + b = (a & 0x0FFFFFFF ); + + for(shift=28; shift<=63; shift+=7){ + u64 c = *p++; + b += (c&0x7F) << shift; + if( (c & 0x80)==0 ) break; + } + *v = b; + return (int)(p - pStart); } /* @@ -352,10 +368,21 @@ int sqlite3Fts3GetVarint(const char *p, sqlite_int64 *v){ ** 32-bit integer before it is returned. */ int sqlite3Fts3GetVarint32(const char *p, int *pi){ - sqlite_int64 i; - int ret = sqlite3Fts3GetVarint(p, &i); - *pi = (int) i; - return ret; + u32 a; + +#ifndef fts3GetVarint32 + GETVARINT_INIT(a, p, 0, 0x00, 0x80, *pi, 1); +#else + a = (*p++); + assert( a & 0x80 ); +#endif + + GETVARINT_STEP(a, p, 7, 0x7F, 0x4000, *pi, 2); + GETVARINT_STEP(a, p, 14, 0x3FFF, 0x200000, *pi, 3); + GETVARINT_STEP(a, p, 21, 0x1FFFFF, 0x10000000, *pi, 4); + a = (a & 0x0FFFFFFF ); + *pi = (int)(a | ((u32)(*p & 0x0F) << 28)); + return 5; } /* @@ -420,17 +447,31 @@ static void fts3GetDeltaVarint(char **pp, sqlite3_int64 *pVal){ } /* -** As long as *pp has not reached its end (pEnd), then do the same -** as fts3GetDeltaVarint(): read a single varint and add it to *pVal. -** But if we have reached the end of the varint, just set *pp=0 and -** leave *pVal unchanged. +** When this function is called, *pp points to the first byte following a +** varint that is part of a doclist (or position-list, or any other list +** of varints). This function moves *pp to point to the start of that varint, +** and sets *pVal by the varint value. +** +** Argument pStart points to the first byte of the doclist that the +** varint is part of. */ -static void fts3GetDeltaVarint2(char **pp, char *pEnd, sqlite3_int64 *pVal){ - if( *pp>=pEnd ){ - *pp = 0; - }else{ - fts3GetDeltaVarint(pp, pVal); - } +static void fts3GetReverseVarint( + char **pp, + char *pStart, + sqlite3_int64 *pVal +){ + sqlite3_int64 iVal; + char *p; + + /* Pointer p now points at the first byte past the varint we are + ** interested in. So, unless the doclist is corrupt, the 0x80 bit is + ** clear on character p[-1]. */ + for(p = (*pp)-2; p>=pStart && *p&0x80; p--); + p++; + *pp = p; + + sqlite3Fts3GetVarint(p, &iVal); + *pVal = iVal; } /* @@ -450,6 +491,8 @@ static int fts3DisconnectMethod(sqlite3_vtab *pVtab){ sqlite3_free(p->zSegmentsTbl); sqlite3_free(p->zReadExprlist); sqlite3_free(p->zWriteExprlist); + sqlite3_free(p->zContentTbl); + sqlite3_free(p->zLanguageid); /* Invoke the tokenizer destructor to free the tokenizer. */ p->pTokenizer->pModule->xDestroy(p->pTokenizer); @@ -489,16 +532,19 @@ static void fts3DbExec( ** The xDestroy() virtual table method. */ static int fts3DestroyMethod(sqlite3_vtab *pVtab){ - int rc = SQLITE_OK; /* Return code */ Fts3Table *p = (Fts3Table *)pVtab; - sqlite3 *db = p->db; + int rc = SQLITE_OK; /* Return code */ + const char *zDb = p->zDb; /* Name of database (e.g. "main", "temp") */ + sqlite3 *db = p->db; /* Database handle */ /* Drop the shadow tables */ - fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_content'", p->zDb, p->zName); - fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segments'", p->zDb,p->zName); - fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segdir'", p->zDb, p->zName); - fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_docsize'", p->zDb, p->zName); - fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_stat'", p->zDb, p->zName); + if( p->zContentTbl==0 ){ + fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_content'", zDb, p->zName); + } + fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segments'", zDb,p->zName); + fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segdir'", zDb, p->zName); + fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_docsize'", zDb, p->zName); + fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_stat'", zDb, p->zName); /* If everything has worked, invoke fts3DisconnectMethod() to free the ** memory associated with the Fts3Table structure and return SQLITE_OK. @@ -523,6 +569,10 @@ static void fts3DeclareVtab(int *pRc, Fts3Table *p){ int rc; /* Return code */ char *zSql; /* SQL statement passed to declare_vtab() */ char *zCols; /* List of user defined columns */ + const char *zLanguageid; + + zLanguageid = (p->zLanguageid ? p->zLanguageid : "__langid"); + sqlite3_vtab_config(p->db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1); /* Create a list of user columns for the virtual table */ zCols = sqlite3_mprintf("%Q, ", p->azColumn[0]); @@ -532,7 +582,8 @@ static void fts3DeclareVtab(int *pRc, Fts3Table *p){ /* Create the whole "CREATE TABLE" statement to pass to SQLite */ zSql = sqlite3_mprintf( - "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN)", zCols, p->zName + "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN, %Q HIDDEN)", + zCols, p->zName, zLanguageid ); if( !zCols || !zSql ){ rc = SQLITE_NOMEM; @@ -547,6 +598,18 @@ static void fts3DeclareVtab(int *pRc, Fts3Table *p){ } /* +** Create the %_stat table if it does not already exist. +*/ +void sqlite3Fts3CreateStatTable(int *pRc, Fts3Table *p){ + fts3DbExec(pRc, p->db, + "CREATE TABLE IF NOT EXISTS %Q.'%q_stat'" + "(id INTEGER PRIMARY KEY, value BLOB);", + p->zDb, p->zName + ); + if( (*pRc)==SQLITE_OK ) p->bHasStat = 1; +} + +/* ** Create the backing store tables (%_content, %_segments and %_segdir) ** required by the FTS3 table passed as the only argument. This is done ** as part of the vtab xCreate() method. @@ -558,23 +621,31 @@ static void fts3DeclareVtab(int *pRc, Fts3Table *p){ static int fts3CreateTables(Fts3Table *p){ int rc = SQLITE_OK; /* Return code */ int i; /* Iterator variable */ - char *zContentCols; /* Columns of %_content table */ sqlite3 *db = p->db; /* The database connection */ - /* Create a list of user columns for the content table */ - zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY"); - for(i=0; zContentCols && i<p->nColumn; i++){ - char *z = p->azColumn[i]; - zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z); + if( p->zContentTbl==0 ){ + const char *zLanguageid = p->zLanguageid; + char *zContentCols; /* Columns of %_content table */ + + /* Create a list of user columns for the content table */ + zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY"); + for(i=0; zContentCols && i<p->nColumn; i++){ + char *z = p->azColumn[i]; + zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z); + } + if( zLanguageid && zContentCols ){ + zContentCols = sqlite3_mprintf("%z, langid", zContentCols, zLanguageid); + } + if( zContentCols==0 ) rc = SQLITE_NOMEM; + + /* Create the content table */ + fts3DbExec(&rc, db, + "CREATE TABLE %Q.'%q_content'(%s)", + p->zDb, p->zName, zContentCols + ); + sqlite3_free(zContentCols); } - if( zContentCols==0 ) rc = SQLITE_NOMEM; - /* Create the content table */ - fts3DbExec(&rc, db, - "CREATE TABLE %Q.'%q_content'(%s)", - p->zDb, p->zName, zContentCols - ); - sqlite3_free(zContentCols); /* Create other tables */ fts3DbExec(&rc, db, "CREATE TABLE %Q.'%q_segments'(blockid INTEGER PRIMARY KEY, block BLOB);", @@ -598,11 +669,9 @@ static int fts3CreateTables(Fts3Table *p){ p->zDb, p->zName ); } + assert( p->bHasStat==p->bFts4 ); if( p->bHasStat ){ - fts3DbExec(&rc, db, - "CREATE TABLE %Q.'%q_stat'(id INTEGER PRIMARY KEY, value BLOB);", - p->zDb, p->zName - ); + sqlite3Fts3CreateStatTable(&rc, p); } return rc; } @@ -629,6 +698,9 @@ static void fts3DatabasePageSize(int *pRc, Fts3Table *p){ sqlite3_step(pStmt); p->nPgsz = sqlite3_column_int(pStmt, 0); rc = sqlite3_finalize(pStmt); + }else if( rc==SQLITE_AUTH ){ + p->nPgsz = 1024; + rc = SQLITE_OK; } } assert( p->nPgsz>0 || rc!=SQLITE_OK ); @@ -681,6 +753,7 @@ static void fts3Appendf( char *z; va_start(ap, zFormat); z = sqlite3_vmprintf(zFormat, ap); + va_end(ap); if( z && *pz ){ char *z2 = sqlite3_mprintf("%s%s", *pz, z); sqlite3_free(z); @@ -705,7 +778,7 @@ static void fts3Appendf( static char *fts3QuoteId(char const *zInput){ int nRet; char *zRet; - nRet = 2 + strlen(zInput)*2 + 1; + nRet = 2 + (int)strlen(zInput)*2 + 1; zRet = sqlite3_malloc(nRet); if( zRet ){ int i; @@ -722,8 +795,8 @@ static char *fts3QuoteId(char const *zInput){ } /* -** Return a list of comma separated SQL expressions that could be used -** in a SELECT statement such as the following: +** Return a list of comma separated SQL expressions and a FROM clause that +** could be used in a SELECT statement such as the following: ** ** SELECT <list of expressions> FROM %_content AS x ... ** @@ -734,7 +807,7 @@ static char *fts3QuoteId(char const *zInput){ ** table has the three user-defined columns "a", "b", and "c", the following ** string is returned: ** -** "docid, unzip(x.'a'), unzip(x.'b'), unzip(x.'c')" +** "docid, unzip(x.'a'), unzip(x.'b'), unzip(x.'c') FROM %_content AS x" ** ** The pointer returned points to a buffer allocated by sqlite3_malloc(). It ** is the responsibility of the caller to eventually free it. @@ -750,16 +823,34 @@ static char *fts3ReadExprList(Fts3Table *p, const char *zFunc, int *pRc){ char *zFunction; int i; - if( !zFunc ){ - zFunction = ""; + if( p->zContentTbl==0 ){ + if( !zFunc ){ + zFunction = ""; + }else{ + zFree = zFunction = fts3QuoteId(zFunc); + } + fts3Appendf(pRc, &zRet, "docid"); + for(i=0; i<p->nColumn; i++){ + fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]); + } + if( p->zLanguageid ){ + fts3Appendf(pRc, &zRet, ", x.%Q", "langid"); + } + sqlite3_free(zFree); }else{ - zFree = zFunction = fts3QuoteId(zFunc); - } - fts3Appendf(pRc, &zRet, "docid"); - for(i=0; i<p->nColumn; i++){ - fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]); + fts3Appendf(pRc, &zRet, "rowid"); + for(i=0; i<p->nColumn; i++){ + fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]); + } + if( p->zLanguageid ){ + fts3Appendf(pRc, &zRet, ", x.%Q", p->zLanguageid); + } } - sqlite3_free(zFree); + fts3Appendf(pRc, &zRet, " FROM '%q'.'%q%s' AS x", + p->zDb, + (p->zContentTbl ? p->zContentTbl : p->zName), + (p->zContentTbl ? "" : "_content") + ); return zRet; } @@ -798,11 +889,180 @@ static char *fts3WriteExprList(Fts3Table *p, const char *zFunc, int *pRc){ for(i=0; i<p->nColumn; i++){ fts3Appendf(pRc, &zRet, ",%s(?)", zFunction); } + if( p->zLanguageid ){ + fts3Appendf(pRc, &zRet, ", ?"); + } sqlite3_free(zFree); return zRet; } /* +** This function interprets the string at (*pp) as a non-negative integer +** value. It reads the integer and sets *pnOut to the value read, then +** sets *pp to point to the byte immediately following the last byte of +** the integer value. +** +** Only decimal digits ('0'..'9') may be part of an integer value. +** +** If *pp does not being with a decimal digit SQLITE_ERROR is returned and +** the output value undefined. Otherwise SQLITE_OK is returned. +** +** This function is used when parsing the "prefix=" FTS4 parameter. +*/ +static int fts3GobbleInt(const char **pp, int *pnOut){ + const char *p; /* Iterator pointer */ + int nInt = 0; /* Output value */ + + for(p=*pp; p[0]>='0' && p[0]<='9'; p++){ + nInt = nInt * 10 + (p[0] - '0'); + } + if( p==*pp ) return SQLITE_ERROR; + *pnOut = nInt; + *pp = p; + return SQLITE_OK; +} + +/* +** This function is called to allocate an array of Fts3Index structures +** representing the indexes maintained by the current FTS table. FTS tables +** always maintain the main "terms" index, but may also maintain one or +** more "prefix" indexes, depending on the value of the "prefix=" parameter +** (if any) specified as part of the CREATE VIRTUAL TABLE statement. +** +** Argument zParam is passed the value of the "prefix=" option if one was +** specified, or NULL otherwise. +** +** If no error occurs, SQLITE_OK is returned and *apIndex set to point to +** the allocated array. *pnIndex is set to the number of elements in the +** array. If an error does occur, an SQLite error code is returned. +** +** Regardless of whether or not an error is returned, it is the responsibility +** of the caller to call sqlite3_free() on the output array to free it. +*/ +static int fts3PrefixParameter( + const char *zParam, /* ABC in prefix=ABC parameter to parse */ + int *pnIndex, /* OUT: size of *apIndex[] array */ + struct Fts3Index **apIndex /* OUT: Array of indexes for this table */ +){ + struct Fts3Index *aIndex; /* Allocated array */ + int nIndex = 1; /* Number of entries in array */ + + if( zParam && zParam[0] ){ + const char *p; + nIndex++; + for(p=zParam; *p; p++){ + if( *p==',' ) nIndex++; + } + } + + aIndex = sqlite3_malloc(sizeof(struct Fts3Index) * nIndex); + *apIndex = aIndex; + *pnIndex = nIndex; + if( !aIndex ){ + return SQLITE_NOMEM; + } + + memset(aIndex, 0, sizeof(struct Fts3Index) * nIndex); + if( zParam ){ + const char *p = zParam; + int i; + for(i=1; i<nIndex; i++){ + int nPrefix; + if( fts3GobbleInt(&p, &nPrefix) ) return SQLITE_ERROR; + aIndex[i].nPrefix = nPrefix; + p++; + } + } + + return SQLITE_OK; +} + +/* +** This function is called when initializing an FTS4 table that uses the +** content=xxx option. It determines the number of and names of the columns +** of the new FTS4 table. +** +** The third argument passed to this function is the value passed to the +** config=xxx option (i.e. "xxx"). This function queries the database for +** a table of that name. If found, the output variables are populated +** as follows: +** +** *pnCol: Set to the number of columns table xxx has, +** +** *pnStr: Set to the total amount of space required to store a copy +** of each columns name, including the nul-terminator. +** +** *pazCol: Set to point to an array of *pnCol strings. Each string is +** the name of the corresponding column in table xxx. The array +** and its contents are allocated using a single allocation. It +** is the responsibility of the caller to free this allocation +** by eventually passing the *pazCol value to sqlite3_free(). +** +** If the table cannot be found, an error code is returned and the output +** variables are undefined. Or, if an OOM is encountered, SQLITE_NOMEM is +** returned (and the output variables are undefined). +*/ +static int fts3ContentColumns( + sqlite3 *db, /* Database handle */ + const char *zDb, /* Name of db (i.e. "main", "temp" etc.) */ + const char *zTbl, /* Name of content table */ + const char ***pazCol, /* OUT: Malloc'd array of column names */ + int *pnCol, /* OUT: Size of array *pazCol */ + int *pnStr /* OUT: Bytes of string content */ +){ + int rc = SQLITE_OK; /* Return code */ + char *zSql; /* "SELECT *" statement on zTbl */ + sqlite3_stmt *pStmt = 0; /* Compiled version of zSql */ + + zSql = sqlite3_mprintf("SELECT * FROM %Q.%Q", zDb, zTbl); + if( !zSql ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0); + } + sqlite3_free(zSql); + + if( rc==SQLITE_OK ){ + const char **azCol; /* Output array */ + int nStr = 0; /* Size of all column names (incl. 0x00) */ + int nCol; /* Number of table columns */ + int i; /* Used to iterate through columns */ + + /* Loop through the returned columns. Set nStr to the number of bytes of + ** space required to store a copy of each column name, including the + ** nul-terminator byte. */ + nCol = sqlite3_column_count(pStmt); + for(i=0; i<nCol; i++){ + const char *zCol = sqlite3_column_name(pStmt, i); + nStr += (int)strlen(zCol) + 1; + } + + /* Allocate and populate the array to return. */ + azCol = (const char **)sqlite3_malloc(sizeof(char *) * nCol + nStr); + if( azCol==0 ){ + rc = SQLITE_NOMEM; + }else{ + char *p = (char *)&azCol[nCol]; + for(i=0; i<nCol; i++){ + const char *zCol = sqlite3_column_name(pStmt, i); + int n = (int)strlen(zCol)+1; + memcpy(p, zCol, n); + azCol[i] = p; + p += n; + } + } + sqlite3_finalize(pStmt); + + /* Set the output variables. */ + *pnCol = nCol; + *pnStr = nStr; + *pazCol = azCol; + } + + return rc; +} + +/* ** This function is the implementation of both the xConnect and xCreate ** methods of the FTS3 virtual table. ** @@ -834,12 +1094,22 @@ static int fts3InitVtab( int nDb; /* Bytes required to hold database name */ int nName; /* Bytes required to hold table name */ int isFts4 = (argv[0][3]=='4'); /* True for FTS4, false for FTS3 */ - int bNoDocsize = 0; /* True to omit %_docsize table */ const char **aCol; /* Array of column names */ sqlite3_tokenizer *pTokenizer = 0; /* Tokenizer for this table */ - char *zCompress = 0; - char *zUncompress = 0; + int nIndex; /* Size of aIndex[] array */ + struct Fts3Index *aIndex = 0; /* Array of indexes for this table */ + + /* The results of parsing supported FTS4 key=value options: */ + int bNoDocsize = 0; /* True to omit %_docsize table */ + int bDescIdx = 0; /* True to store descending indexes */ + char *zPrefix = 0; /* Prefix parameter value (or NULL) */ + char *zCompress = 0; /* compress=? parameter (or NULL) */ + char *zUncompress = 0; /* uncompress=? parameter (or NULL) */ + char *zContent = 0; /* content=? parameter (or NULL) */ + char *zLanguageid = 0; /* languageid=? parameter (or NULL) */ + char **azNotindexed = 0; /* The set of notindexed= columns */ + int nNotindexed = 0; /* Size of azNotindexed[] array */ assert( strlen(argv[0])==4 ); assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4) @@ -849,9 +1119,19 @@ static int fts3InitVtab( nDb = (int)strlen(argv[1]) + 1; nName = (int)strlen(argv[2]) + 1; - aCol = (const char **)sqlite3_malloc(sizeof(const char *) * (argc-2) ); - if( !aCol ) return SQLITE_NOMEM; - memset((void *)aCol, 0, sizeof(const char *) * (argc-2)); + nByte = sizeof(const char *) * (argc-2); + aCol = (const char **)sqlite3_malloc(nByte); + if( aCol ){ + memset((void*)aCol, 0, nByte); + azNotindexed = (char **)sqlite3_malloc(nByte); + } + if( azNotindexed ){ + memset(azNotindexed, 0, nByte); + } + if( !aCol || !azNotindexed ){ + rc = SQLITE_NOMEM; + goto fts3_init_out; + } /* Loop through all of the arguments passed by the user to the FTS3/4 ** module (i.e. all the column names and special arguments). This loop @@ -880,28 +1160,92 @@ static int fts3InitVtab( /* Check if it is an FTS4 special argument. */ else if( isFts4 && fts3IsSpecialColumn(z, &nKey, &zVal) ){ + struct Fts4Option { + const char *zOpt; + int nOpt; + } aFts4Opt[] = { + { "matchinfo", 9 }, /* 0 -> MATCHINFO */ + { "prefix", 6 }, /* 1 -> PREFIX */ + { "compress", 8 }, /* 2 -> COMPRESS */ + { "uncompress", 10 }, /* 3 -> UNCOMPRESS */ + { "order", 5 }, /* 4 -> ORDER */ + { "content", 7 }, /* 5 -> CONTENT */ + { "languageid", 10 }, /* 6 -> LANGUAGEID */ + { "notindexed", 10 } /* 7 -> NOTINDEXED */ + }; + + int iOpt; if( !zVal ){ rc = SQLITE_NOMEM; - goto fts3_init_out; - } - if( nKey==9 && 0==sqlite3_strnicmp(z, "matchinfo", 9) ){ - if( strlen(zVal)==4 && 0==sqlite3_strnicmp(zVal, "fts3", 4) ){ - bNoDocsize = 1; - }else{ - *pzErr = sqlite3_mprintf("unrecognized matchinfo: %s", zVal); + }else{ + for(iOpt=0; iOpt<SizeofArray(aFts4Opt); iOpt++){ + struct Fts4Option *pOp = &aFts4Opt[iOpt]; + if( nKey==pOp->nOpt && !sqlite3_strnicmp(z, pOp->zOpt, pOp->nOpt) ){ + break; + } + } + if( iOpt==SizeofArray(aFts4Opt) ){ + *pzErr = sqlite3_mprintf("unrecognized parameter: %s", z); rc = SQLITE_ERROR; + }else{ + switch( iOpt ){ + case 0: /* MATCHINFO */ + if( strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "fts3", 4) ){ + *pzErr = sqlite3_mprintf("unrecognized matchinfo: %s", zVal); + rc = SQLITE_ERROR; + } + bNoDocsize = 1; + break; + + case 1: /* PREFIX */ + sqlite3_free(zPrefix); + zPrefix = zVal; + zVal = 0; + break; + + case 2: /* COMPRESS */ + sqlite3_free(zCompress); + zCompress = zVal; + zVal = 0; + break; + + case 3: /* UNCOMPRESS */ + sqlite3_free(zUncompress); + zUncompress = zVal; + zVal = 0; + break; + + case 4: /* ORDER */ + if( (strlen(zVal)!=3 || sqlite3_strnicmp(zVal, "asc", 3)) + && (strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "desc", 4)) + ){ + *pzErr = sqlite3_mprintf("unrecognized order: %s", zVal); + rc = SQLITE_ERROR; + } + bDescIdx = (zVal[0]=='d' || zVal[0]=='D'); + break; + + case 5: /* CONTENT */ + sqlite3_free(zContent); + zContent = zVal; + zVal = 0; + break; + + case 6: /* LANGUAGEID */ + assert( iOpt==6 ); + sqlite3_free(zLanguageid); + zLanguageid = zVal; + zVal = 0; + break; + + case 7: /* NOTINDEXED */ + azNotindexed[nNotindexed++] = zVal; + zVal = 0; + break; + } } - }else if( nKey==8 && 0==sqlite3_strnicmp(z, "compress", 8) ){ - zCompress = zVal; - zVal = 0; - }else if( nKey==10 && 0==sqlite3_strnicmp(z, "uncompress", 10) ){ - zUncompress = zVal; - zVal = 0; - }else{ - *pzErr = sqlite3_mprintf("unrecognized parameter: %s", z); - rc = SQLITE_ERROR; + sqlite3_free(zVal); } - sqlite3_free(zVal); } /* Otherwise, the argument is a column name. */ @@ -910,6 +1254,39 @@ static int fts3InitVtab( aCol[nCol++] = z; } } + + /* If a content=xxx option was specified, the following: + ** + ** 1. Ignore any compress= and uncompress= options. + ** + ** 2. If no column names were specified as part of the CREATE VIRTUAL + ** TABLE statement, use all columns from the content table. + */ + if( rc==SQLITE_OK && zContent ){ + sqlite3_free(zCompress); + sqlite3_free(zUncompress); + zCompress = 0; + zUncompress = 0; + if( nCol==0 ){ + sqlite3_free((void*)aCol); + aCol = 0; + rc = fts3ContentColumns(db, argv[1], zContent, &aCol, &nCol, &nString); + + /* If a languageid= option was specified, remove the language id + ** column from the aCol[] array. */ + if( rc==SQLITE_OK && zLanguageid ){ + int j; + for(j=0; j<nCol; j++){ + if( sqlite3_stricmp(zLanguageid, aCol[j])==0 ){ + int k; + for(k=j; k<nCol; k++) aCol[k] = aCol[k+1]; + nCol--; + break; + } + } + } + } + } if( rc!=SQLITE_OK ) goto fts3_init_out; if( nCol==0 ){ @@ -925,10 +1302,18 @@ static int fts3InitVtab( } assert( pTokenizer ); + rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex); + if( rc==SQLITE_ERROR ){ + assert( zPrefix ); + *pzErr = sqlite3_mprintf("error parsing prefix parameter: %s", zPrefix); + } + if( rc!=SQLITE_OK ) goto fts3_init_out; /* Allocate and populate the Fts3Table structure. */ - nByte = sizeof(Fts3Table) + /* Fts3Table */ + nByte = sizeof(Fts3Table) + /* Fts3Table */ nCol * sizeof(char *) + /* azColumn */ + nIndex * sizeof(struct Fts3Index) + /* aIndex */ + nCol * sizeof(u8) + /* abNotindexed */ nName + /* zName */ nDb + /* zDb */ nString; /* Space for azColumn strings */ @@ -943,14 +1328,29 @@ static int fts3InitVtab( p->nPendingData = 0; p->azColumn = (char **)&p[1]; p->pTokenizer = pTokenizer; - p->nNodeSize = 1000; p->nMaxPendingData = FTS3_MAX_PENDING_DATA; p->bHasDocsize = (isFts4 && bNoDocsize==0); p->bHasStat = isFts4; - fts3HashInit(&p->pendingTerms, FTS3_HASH_STRING, 1); + p->bFts4 = isFts4; + p->bDescIdx = bDescIdx; + p->bAutoincrmerge = 0xff; /* 0xff means setting unknown */ + p->zContentTbl = zContent; + p->zLanguageid = zLanguageid; + zContent = 0; + zLanguageid = 0; + TESTONLY( p->inTransaction = -1 ); + TESTONLY( p->mxSavepoint = -1 ); + + p->aIndex = (struct Fts3Index *)&p->azColumn[nCol]; + memcpy(p->aIndex, aIndex, sizeof(struct Fts3Index) * nIndex); + p->nIndex = nIndex; + for(i=0; i<nIndex; i++){ + fts3HashInit(&p->aIndex[i].hPending, FTS3_HASH_STRING, 1); + } + p->abNotindexed = (u8 *)&p->aIndex[nIndex]; /* Fill in the zName and zDb fields of the vtab structure. */ - zCsr = (char *)&p->azColumn[nCol]; + zCsr = (char *)&p->abNotindexed[nCol]; p->zName = zCsr; memcpy(zCsr, argv[2], nName); zCsr += nName; @@ -961,7 +1361,7 @@ static int fts3InitVtab( /* Fill in the azColumn array */ for(iCol=0; iCol<nCol; iCol++){ char *z; - int n; + int n = 0; z = (char *)sqlite3Fts3NextToken(aCol[iCol], &n); memcpy(zCsr, z, n); zCsr[n] = '\0'; @@ -971,7 +1371,26 @@ static int fts3InitVtab( assert( zCsr <= &((char *)p)[nByte] ); } - if( (zCompress==0)!=(zUncompress==0) ){ + /* Fill in the abNotindexed array */ + for(iCol=0; iCol<nCol; iCol++){ + int n = (int)strlen(p->azColumn[iCol]); + for(i=0; i<nNotindexed; i++){ + char *zNot = azNotindexed[i]; + if( zNot && 0==sqlite3_strnicmp(p->azColumn[iCol], zNot, n) ){ + p->abNotindexed[iCol] = 1; + sqlite3_free(zNot); + azNotindexed[i] = 0; + } + } + } + for(i=0; i<nNotindexed; i++){ + if( azNotindexed[i] ){ + *pzErr = sqlite3_mprintf("no such column: %s", azNotindexed[i]); + rc = SQLITE_ERROR; + } + } + + if( rc==SQLITE_OK && (zCompress==0)!=(zUncompress==0) ){ char const *zMiss = (zCompress==0 ? "compress" : "uncompress"); rc = SQLITE_ERROR; *pzErr = sqlite3_mprintf("missing %s parameter in fts4 constructor", zMiss); @@ -987,19 +1406,34 @@ static int fts3InitVtab( rc = fts3CreateTables(p); } - /* Figure out the page-size for the database. This is required in order to - ** estimate the cost of loading large doclists from the database (see - ** function sqlite3Fts3SegReaderCost() for details). + /* Check to see if a legacy fts3 table has been "upgraded" by the + ** addition of a %_stat table so that it can use incremental merge. */ + if( !isFts4 && !isCreate ){ + int rc2 = SQLITE_OK; + fts3DbExec(&rc2, db, "SELECT 1 FROM %Q.'%q_stat' WHERE id=2", + p->zDb, p->zName); + if( rc2==SQLITE_OK ) p->bHasStat = 1; + } + + /* Figure out the page-size for the database. This is required in order to + ** estimate the cost of loading large doclists from the database. */ fts3DatabasePageSize(&rc, p); + p->nNodeSize = p->nPgsz-35; /* Declare the table schema to SQLite. */ fts3DeclareVtab(&rc, p); fts3_init_out: + sqlite3_free(zPrefix); + sqlite3_free(aIndex); sqlite3_free(zCompress); sqlite3_free(zUncompress); + sqlite3_free(zContent); + sqlite3_free(zLanguageid); + for(i=0; i<nNotindexed; i++) sqlite3_free(azNotindexed[i]); sqlite3_free((void *)aCol); + sqlite3_free((void *)azNotindexed); if( rc!=SQLITE_OK ){ if( p ){ fts3DisconnectMethod((sqlite3_vtab *)p); @@ -1007,6 +1441,7 @@ fts3_init_out: pTokenizer->pModule->xDestroy(pTokenizer); } }else{ + assert( p->pSegments==0 ); *ppVTab = &p->base; } return rc; @@ -1037,6 +1472,19 @@ static int fts3CreateMethod( return fts3InitVtab(1, db, pAux, argc, argv, ppVtab, pzErr); } +/* +** Set the pIdxInfo->estimatedRows variable to nRow. Unless this +** extension is currently being used by a version of SQLite too old to +** support estimatedRows. In that case this function is a no-op. +*/ +static void fts3SetEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){ +#if SQLITE_VERSION_NUMBER>=3008002 + if( sqlite3_libversion_number()>=3008002 ){ + pIdxInfo->estimatedRows = nRow; + } +#endif +} + /* ** Implementation of the xBestIndex method for FTS3 tables. There ** are three possible strategies, in order of preference: @@ -1050,20 +1498,39 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ int i; /* Iterator variable */ int iCons = -1; /* Index of constraint to use */ + int iLangidCons = -1; /* Index of langid=x constraint, if present */ + int iDocidGe = -1; /* Index of docid>=x constraint, if present */ + int iDocidLe = -1; /* Index of docid<=x constraint, if present */ + int iIdx; + /* By default use a full table scan. This is an expensive option, ** so search through the constraints to see if a more efficient ** strategy is possible. */ pInfo->idxNum = FTS3_FULLSCAN_SEARCH; - pInfo->estimatedCost = 500000; + pInfo->estimatedCost = 5000000; for(i=0; i<pInfo->nConstraint; i++){ + int bDocid; /* True if this constraint is on docid */ struct sqlite3_index_constraint *pCons = &pInfo->aConstraint[i]; - if( pCons->usable==0 ) continue; + if( pCons->usable==0 ){ + if( pCons->op==SQLITE_INDEX_CONSTRAINT_MATCH ){ + /* There exists an unusable MATCH constraint. This means that if + ** the planner does elect to use the results of this call as part + ** of the overall query plan the user will see an "unable to use + ** function MATCH in the requested context" error. To discourage + ** this, return a very high cost here. */ + pInfo->idxNum = FTS3_FULLSCAN_SEARCH; + pInfo->estimatedCost = 1e50; + fts3SetEstimatedRows(pInfo, ((sqlite3_int64)1) << 50); + return SQLITE_OK; + } + continue; + } + + bDocid = (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1); /* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */ - if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ - && (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1 ) - ){ + if( iCons<0 && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ && bDocid ){ pInfo->idxNum = FTS3_DOCID_SEARCH; pInfo->estimatedCost = 1.0; iCons = i; @@ -1084,14 +1551,64 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){ pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn; pInfo->estimatedCost = 2.0; iCons = i; - break; + } + + /* Equality constraint on the langid column */ + if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ + && pCons->iColumn==p->nColumn + 2 + ){ + iLangidCons = i; + } + + if( bDocid ){ + switch( pCons->op ){ + case SQLITE_INDEX_CONSTRAINT_GE: + case SQLITE_INDEX_CONSTRAINT_GT: + iDocidGe = i; + break; + + case SQLITE_INDEX_CONSTRAINT_LE: + case SQLITE_INDEX_CONSTRAINT_LT: + iDocidLe = i; + break; + } } } + iIdx = 1; if( iCons>=0 ){ - pInfo->aConstraintUsage[iCons].argvIndex = 1; + pInfo->aConstraintUsage[iCons].argvIndex = iIdx++; pInfo->aConstraintUsage[iCons].omit = 1; } + if( iLangidCons>=0 ){ + pInfo->idxNum |= FTS3_HAVE_LANGID; + pInfo->aConstraintUsage[iLangidCons].argvIndex = iIdx++; + } + if( iDocidGe>=0 ){ + pInfo->idxNum |= FTS3_HAVE_DOCID_GE; + pInfo->aConstraintUsage[iDocidGe].argvIndex = iIdx++; + } + if( iDocidLe>=0 ){ + pInfo->idxNum |= FTS3_HAVE_DOCID_LE; + pInfo->aConstraintUsage[iDocidLe].argvIndex = iIdx++; + } + + /* Regardless of the strategy selected, FTS can deliver rows in rowid (or + ** docid) order. Both ascending and descending are possible. + */ + if( pInfo->nOrderBy==1 ){ + struct sqlite3_index_orderby *pOrder = &pInfo->aOrderBy[0]; + if( pOrder->iColumn<0 || pOrder->iColumn==p->nColumn+1 ){ + if( pOrder->desc ){ + pInfo->idxStr = "DESC"; + }else{ + pInfo->idxStr = "ASC"; + } + pInfo->orderByConsumed = 1; + } + } + + assert( p->pSegments==0 ); return SQLITE_OK; } @@ -1127,39 +1644,69 @@ static int fts3CloseMethod(sqlite3_vtab_cursor *pCursor){ sqlite3Fts3FreeDeferredTokens(pCsr); sqlite3_free(pCsr->aDoclist); sqlite3_free(pCsr->aMatchinfo); + assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); sqlite3_free(pCsr); return SQLITE_OK; } /* +** If pCsr->pStmt has not been prepared (i.e. if pCsr->pStmt==0), then +** compose and prepare an SQL statement of the form: +** +** "SELECT <columns> FROM %_content WHERE rowid = ?" +** +** (or the equivalent for a content=xxx table) and set pCsr->pStmt to +** it. If an error occurs, return an SQLite error code. +** +** Otherwise, set *ppStmt to point to pCsr->pStmt and return SQLITE_OK. +*/ +static int fts3CursorSeekStmt(Fts3Cursor *pCsr, sqlite3_stmt **ppStmt){ + int rc = SQLITE_OK; + if( pCsr->pStmt==0 ){ + Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; + char *zSql; + zSql = sqlite3_mprintf("SELECT %s WHERE rowid = ?", p->zReadExprlist); + if( !zSql ) return SQLITE_NOMEM; + rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0); + sqlite3_free(zSql); + } + *ppStmt = pCsr->pStmt; + return rc; +} + +/* ** Position the pCsr->pStmt statement so that it is on the row ** of the %_content table that contains the last match. Return ** SQLITE_OK on success. */ static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){ + int rc = SQLITE_OK; if( pCsr->isRequireSeek ){ - pCsr->isRequireSeek = 0; - sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iPrevId); - if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){ - return SQLITE_OK; - }else{ - int rc = sqlite3_reset(pCsr->pStmt); - if( rc==SQLITE_OK ){ - /* If no row was found and no error has occured, then the %_content - ** table is missing a row that is present in the full-text index. - ** The data structures are corrupt. - */ - rc = SQLITE_CORRUPT; - } - pCsr->isEof = 1; - if( pContext ){ - sqlite3_result_error_code(pContext, rc); + sqlite3_stmt *pStmt = 0; + + rc = fts3CursorSeekStmt(pCsr, &pStmt); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iPrevId); + pCsr->isRequireSeek = 0; + if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){ + return SQLITE_OK; + }else{ + rc = sqlite3_reset(pCsr->pStmt); + if( rc==SQLITE_OK && ((Fts3Table *)pCsr->base.pVtab)->zContentTbl==0 ){ + /* If no row was found and no error has occurred, then the %_content + ** table is missing a row that is present in the full-text index. + ** The data structures are corrupt. */ + rc = FTS_CORRUPT_VTAB; + pCsr->isEof = 1; + } } - return rc; } - }else{ - return SQLITE_OK; } + + if( rc!=SQLITE_OK && pContext ){ + sqlite3_result_error_code(pContext, rc); + } + return rc; } /* @@ -1209,7 +1756,7 @@ static int fts3ScanInteriorNode( zCsr += sqlite3Fts3GetVarint(zCsr, &iChild); zCsr += sqlite3Fts3GetVarint(zCsr, &iChild); if( zCsr>zEnd ){ - return SQLITE_CORRUPT; + return FTS_CORRUPT_VTAB; } while( zCsr<zEnd && (piFirst || piLast) ){ @@ -1221,13 +1768,13 @@ static int fts3ScanInteriorNode( /* Load the next term on the node into zBuffer. Use realloc() to expand ** the size of zBuffer if required. */ if( !isFirstTerm ){ - zCsr += sqlite3Fts3GetVarint32(zCsr, &nPrefix); + zCsr += fts3GetVarint32(zCsr, &nPrefix); } isFirstTerm = 0; - zCsr += sqlite3Fts3GetVarint32(zCsr, &nSuffix); + zCsr += fts3GetVarint32(zCsr, &nSuffix); if( nPrefix<0 || nSuffix<0 || &zCsr[nSuffix]>zEnd ){ - rc = SQLITE_CORRUPT; + rc = FTS_CORRUPT_VTAB; goto finish_scan; } if( nPrefix+nSuffix>nAlloc ){ @@ -1240,6 +1787,7 @@ static int fts3ScanInteriorNode( } zBuffer = zNew; } + assert( zBuffer ); memcpy(&zBuffer[nPrefix], zCsr, nSuffix); nBuffer = nPrefix + nSuffix; zCsr += nSuffix; @@ -1311,7 +1859,7 @@ static int fts3SelectLeaf( assert( piLeaf || piLeaf2 ); - sqlite3Fts3GetVarint32(zNode, &iHeight); + fts3GetVarint32(zNode, &iHeight); rc = fts3ScanInteriorNode(zTerm, nTerm, zNode, nNode, piLeaf, piLeaf2); assert( !piLeaf2 || !piLeaf || rc!=SQLITE_OK || (*piLeaf<=*piLeaf2) ); @@ -1320,7 +1868,7 @@ static int fts3SelectLeaf( int nBlob; /* Size of zBlob in bytes */ if( piLeaf && piLeaf2 && (*piLeaf!=*piLeaf2) ){ - rc = sqlite3Fts3ReadBlock(p, *piLeaf, &zBlob, &nBlob); + rc = sqlite3Fts3ReadBlock(p, *piLeaf, &zBlob, &nBlob, 0); if( rc==SQLITE_OK ){ rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, 0); } @@ -1330,7 +1878,7 @@ static int fts3SelectLeaf( } if( rc==SQLITE_OK ){ - rc = sqlite3Fts3ReadBlock(p, piLeaf ? *piLeaf : *piLeaf2, &zBlob, &nBlob); + rc = sqlite3Fts3ReadBlock(p, piLeaf?*piLeaf:*piLeaf2, &zBlob, &nBlob, 0); } if( rc==SQLITE_OK ){ rc = fts3SelectLeaf(p, zTerm, nTerm, zBlob, nBlob, piLeaf, piLeaf2); @@ -1513,11 +2061,11 @@ static void fts3PoslistMerge( int iCol1; /* The current column index in pp1 */ int iCol2; /* The current column index in pp2 */ - if( *p1==POS_COLUMN ) sqlite3Fts3GetVarint32(&p1[1], &iCol1); + if( *p1==POS_COLUMN ) fts3GetVarint32(&p1[1], &iCol1); else if( *p1==POS_END ) iCol1 = POSITION_LIST_END; else iCol1 = 0; - if( *p2==POS_COLUMN ) sqlite3Fts3GetVarint32(&p2[1], &iCol2); + if( *p2==POS_COLUMN ) fts3GetVarint32(&p2[1], &iCol2); else if( *p2==POS_END ) iCol2 = POSITION_LIST_END; else iCol2 = 0; @@ -1568,8 +2116,6 @@ static void fts3PoslistMerge( } /* -** nToken==1 searches for adjacent positions. -** ** This function is used to merge two position lists into one. When it is ** called, *pp1 and *pp2 must both point to position lists. A position-list is ** the part of a doclist that follows each document id. For example, if a row @@ -1589,6 +2135,8 @@ static void fts3PoslistMerge( ** *pp1 so that (pos(*pp2)>pos(*pp1) && pos(*pp2)-pos(*pp1)<=nToken). i.e. ** when the *pp1 token appears before the *pp2 token, but not more than nToken ** slots before it. +** +** e.g. nToken==1 searches for adjacent positions. */ static int fts3PoslistPhraseMerge( char **pp, /* IN/OUT: Preallocated output buffer */ @@ -1598,7 +2146,7 @@ static int fts3PoslistPhraseMerge( char **pp1, /* IN/OUT: Left input list */ char **pp2 /* IN/OUT: Right input list */ ){ - char *p = (pp ? *pp : 0); + char *p = *pp; char *p1 = *pp1; char *p2 = *pp2; int iCol1 = 0; @@ -1607,14 +2155,14 @@ static int fts3PoslistPhraseMerge( /* Never set both isSaveLeft and isExact for the same invocation. */ assert( isSaveLeft==0 || isExact==0 ); - assert( *p1!=0 && *p2!=0 ); + assert( p!=0 && *p1!=0 && *p2!=0 ); if( *p1==POS_COLUMN ){ p1++; - p1 += sqlite3Fts3GetVarint32(p1, &iCol1); + p1 += fts3GetVarint32(p1, &iCol1); } if( *p2==POS_COLUMN ){ p2++; - p2 += sqlite3Fts3GetVarint32(p2, &iCol2); + p2 += fts3GetVarint32(p2, &iCol2); } while( 1 ){ @@ -1624,7 +2172,7 @@ static int fts3PoslistPhraseMerge( sqlite3_int64 iPos1 = 0; sqlite3_int64 iPos2 = 0; - if( pp && iCol1 ){ + if( iCol1 ){ *p++ = POS_COLUMN; p += sqlite3Fts3PutVarint(p, iCol1); } @@ -1639,16 +2187,10 @@ static int fts3PoslistPhraseMerge( || (isExact==0 && iPos2>iPos1 && iPos2<=iPos1+nToken) ){ sqlite3_int64 iSave; - if( !pp ){ - fts3PoslistCopy(0, &p2); - fts3PoslistCopy(0, &p1); - *pp1 = p1; - *pp2 = p2; - return 1; - } iSave = isSaveLeft ? iPos1 : iPos2; fts3PutDeltaVarint(&p, &iPrev, iSave+2); iPrev -= 2; pSave = 0; + assert( p ); } if( (!isSaveLeft && iPos2<=(iPos1+nToken)) || iPos2<=iPos1 ){ if( (*p2&0xFE)==0 ) break; @@ -1670,9 +2212,9 @@ static int fts3PoslistPhraseMerge( if( 0==*p1 || 0==*p2 ) break; p1++; - p1 += sqlite3Fts3GetVarint32(p1, &iCol1); + p1 += fts3GetVarint32(p1, &iCol1); p2++; - p2 += sqlite3Fts3GetVarint32(p2, &iCol2); + p2 += fts3GetVarint32(p2, &iCol2); } /* Advance pointer p1 or p2 (whichever corresponds to the smaller of @@ -1684,12 +2226,12 @@ static int fts3PoslistPhraseMerge( fts3ColumnlistCopy(0, &p1); if( 0==*p1 ) break; p1++; - p1 += sqlite3Fts3GetVarint32(p1, &iCol1); + p1 += fts3GetVarint32(p1, &iCol1); }else{ fts3ColumnlistCopy(0, &p2); if( 0==*p2 ) break; p2++; - p2 += sqlite3Fts3GetVarint32(p2, &iCol2); + p2 += fts3GetVarint32(p2, &iCol2); } } @@ -1697,7 +2239,7 @@ static int fts3PoslistPhraseMerge( fts3PoslistCopy(0, &p1); *pp1 = p1; *pp2 = p2; - if( !pp || *pp==p ){ + if( *pp==p ){ return 0; } *p++ = 0x00; @@ -1706,7 +2248,19 @@ static int fts3PoslistPhraseMerge( } /* -** Merge two position-lists as required by the NEAR operator. +** Merge two position-lists as required by the NEAR operator. The argument +** position lists correspond to the left and right phrases of an expression +** like: +** +** "phrase 1" NEAR "phrase number 2" +** +** Position list *pp1 corresponds to the left-hand side of the NEAR +** expression and *pp2 to the right. As usual, the indexes in the position +** lists are the offsets of the last token in each phrase (tokens "1" and "2" +** in the example above). +** +** The output position list - written to *pp - is a copy of *pp2 with those +** entries that are not sufficiently NEAR entries in *pp1 removed. */ static int fts3PoslistNearMerge( char **pp, /* Output buffer */ @@ -1719,226 +2273,325 @@ static int fts3PoslistNearMerge( char *p1 = *pp1; char *p2 = *pp2; - if( !pp ){ - if( fts3PoslistPhraseMerge(0, nRight, 0, 0, pp1, pp2) ) return 1; - *pp1 = p1; - *pp2 = p2; - return fts3PoslistPhraseMerge(0, nLeft, 0, 0, pp2, pp1); + char *pTmp1 = aTmp; + char *pTmp2; + char *aTmp2; + int res = 1; + + fts3PoslistPhraseMerge(&pTmp1, nRight, 0, 0, pp1, pp2); + aTmp2 = pTmp2 = pTmp1; + *pp1 = p1; + *pp2 = p2; + fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, 0, pp2, pp1); + if( pTmp1!=aTmp && pTmp2!=aTmp2 ){ + fts3PoslistMerge(pp, &aTmp, &aTmp2); + }else if( pTmp1!=aTmp ){ + fts3PoslistCopy(pp, &aTmp); + }else if( pTmp2!=aTmp2 ){ + fts3PoslistCopy(pp, &aTmp2); + }else{ + res = 0; + } + + return res; +} + +/* +** An instance of this function is used to merge together the (potentially +** large number of) doclists for each term that matches a prefix query. +** See function fts3TermSelectMerge() for details. +*/ +typedef struct TermSelect TermSelect; +struct TermSelect { + char *aaOutput[16]; /* Malloc'd output buffers */ + int anOutput[16]; /* Size each output buffer in bytes */ +}; + +/* +** This function is used to read a single varint from a buffer. Parameter +** pEnd points 1 byte past the end of the buffer. When this function is +** called, if *pp points to pEnd or greater, then the end of the buffer +** has been reached. In this case *pp is set to 0 and the function returns. +** +** If *pp does not point to or past pEnd, then a single varint is read +** from *pp. *pp is then set to point 1 byte past the end of the read varint. +** +** If bDescIdx is false, the value read is added to *pVal before returning. +** If it is true, the value read is subtracted from *pVal before this +** function returns. +*/ +static void fts3GetDeltaVarint3( + char **pp, /* IN/OUT: Point to read varint from */ + char *pEnd, /* End of buffer */ + int bDescIdx, /* True if docids are descending */ + sqlite3_int64 *pVal /* IN/OUT: Integer value */ +){ + if( *pp>=pEnd ){ + *pp = 0; }else{ - char *pTmp1 = aTmp; - char *pTmp2; - char *aTmp2; - int res = 1; - - fts3PoslistPhraseMerge(&pTmp1, nRight, 0, 0, pp1, pp2); - aTmp2 = pTmp2 = pTmp1; - *pp1 = p1; - *pp2 = p2; - fts3PoslistPhraseMerge(&pTmp2, nLeft, 1, 0, pp2, pp1); - if( pTmp1!=aTmp && pTmp2!=aTmp2 ){ - fts3PoslistMerge(pp, &aTmp, &aTmp2); - }else if( pTmp1!=aTmp ){ - fts3PoslistCopy(pp, &aTmp); - }else if( pTmp2!=aTmp2 ){ - fts3PoslistCopy(pp, &aTmp2); + sqlite3_int64 iVal; + *pp += sqlite3Fts3GetVarint(*pp, &iVal); + if( bDescIdx ){ + *pVal -= iVal; }else{ - res = 0; + *pVal += iVal; } + } +} - return res; +/* +** This function is used to write a single varint to a buffer. The varint +** is written to *pp. Before returning, *pp is set to point 1 byte past the +** end of the value written. +** +** If *pbFirst is zero when this function is called, the value written to +** the buffer is that of parameter iVal. +** +** If *pbFirst is non-zero when this function is called, then the value +** written is either (iVal-*piPrev) (if bDescIdx is zero) or (*piPrev-iVal) +** (if bDescIdx is non-zero). +** +** Before returning, this function always sets *pbFirst to 1 and *piPrev +** to the value of parameter iVal. +*/ +static void fts3PutDeltaVarint3( + char **pp, /* IN/OUT: Output pointer */ + int bDescIdx, /* True for descending docids */ + sqlite3_int64 *piPrev, /* IN/OUT: Previous value written to list */ + int *pbFirst, /* IN/OUT: True after first int written */ + sqlite3_int64 iVal /* Write this value to the list */ +){ + sqlite3_int64 iWrite; + if( bDescIdx==0 || *pbFirst==0 ){ + iWrite = iVal - *piPrev; + }else{ + iWrite = *piPrev - iVal; } + assert( *pbFirst || *piPrev==0 ); + assert( *pbFirst==0 || iWrite>0 ); + *pp += sqlite3Fts3PutVarint(*pp, iWrite); + *piPrev = iVal; + *pbFirst = 1; } + /* -** Values that may be used as the first parameter to fts3DoclistMerge(). +** This macro is used by various functions that merge doclists. The two +** arguments are 64-bit docid values. If the value of the stack variable +** bDescDoclist is 0 when this macro is invoked, then it returns (i1-i2). +** Otherwise, (i2-i1). +** +** Using this makes it easier to write code that can merge doclists that are +** sorted in either ascending or descending order. */ -#define MERGE_NOT 2 /* D + D -> D */ -#define MERGE_AND 3 /* D + D -> D */ -#define MERGE_OR 4 /* D + D -> D */ -#define MERGE_POS_OR 5 /* P + P -> P */ -#define MERGE_PHRASE 6 /* P + P -> D */ -#define MERGE_POS_PHRASE 7 /* P + P -> P */ -#define MERGE_NEAR 8 /* P + P -> D */ -#define MERGE_POS_NEAR 9 /* P + P -> P */ +#define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i1-i2)) /* -** Merge the two doclists passed in buffer a1 (size n1 bytes) and a2 -** (size n2 bytes). The output is written to pre-allocated buffer aBuffer, -** which is guaranteed to be large enough to hold the results. The number -** of bytes written to aBuffer is stored in *pnBuffer before returning. +** This function does an "OR" merge of two doclists (output contains all +** positions contained in either argument doclist). If the docids in the +** input doclists are sorted in ascending order, parameter bDescDoclist +** should be false. If they are sorted in ascending order, it should be +** passed a non-zero value. +** +** If no error occurs, *paOut is set to point at an sqlite3_malloc'd buffer +** containing the output doclist and SQLITE_OK is returned. In this case +** *pnOut is set to the number of bytes in the output doclist. ** -** If successful, SQLITE_OK is returned. Otherwise, if a malloc error -** occurs while allocating a temporary buffer as part of the merge operation, -** SQLITE_NOMEM is returned. +** If an error occurs, an SQLite error code is returned. The output values +** are undefined in this case. */ -static int fts3DoclistMerge( - int mergetype, /* One of the MERGE_XXX constants */ - int nParam1, /* Used by MERGE_NEAR and MERGE_POS_NEAR */ - int nParam2, /* Used by MERGE_NEAR and MERGE_POS_NEAR */ - char *aBuffer, /* Pre-allocated output buffer */ - int *pnBuffer, /* OUT: Bytes written to aBuffer */ - char *a1, /* Buffer containing first doclist */ - int n1, /* Size of buffer a1 */ - char *a2, /* Buffer containing second doclist */ - int n2, /* Size of buffer a2 */ - int *pnDoc /* OUT: Number of docids in output */ +static int fts3DoclistOrMerge( + int bDescDoclist, /* True if arguments are desc */ + char *a1, int n1, /* First doclist */ + char *a2, int n2, /* Second doclist */ + char **paOut, int *pnOut /* OUT: Malloc'd doclist */ ){ sqlite3_int64 i1 = 0; sqlite3_int64 i2 = 0; sqlite3_int64 iPrev = 0; - - char *p = aBuffer; - char *p1 = a1; - char *p2 = a2; char *pEnd1 = &a1[n1]; char *pEnd2 = &a2[n2]; - int nDoc = 0; + char *p1 = a1; + char *p2 = a2; + char *p; + char *aOut; + int bFirstOut = 0; - assert( mergetype==MERGE_OR || mergetype==MERGE_POS_OR - || mergetype==MERGE_AND || mergetype==MERGE_NOT - || mergetype==MERGE_PHRASE || mergetype==MERGE_POS_PHRASE - || mergetype==MERGE_NEAR || mergetype==MERGE_POS_NEAR - ); + *paOut = 0; + *pnOut = 0; - if( !aBuffer ){ - *pnBuffer = 0; - return SQLITE_NOMEM; + /* Allocate space for the output. Both the input and output doclists + ** are delta encoded. If they are in ascending order (bDescDoclist==0), + ** then the first docid in each list is simply encoded as a varint. For + ** each subsequent docid, the varint stored is the difference between the + ** current and previous docid (a positive number - since the list is in + ** ascending order). + ** + ** The first docid written to the output is therefore encoded using the + ** same number of bytes as it is in whichever of the input lists it is + ** read from. And each subsequent docid read from the same input list + ** consumes either the same or less bytes as it did in the input (since + ** the difference between it and the previous value in the output must + ** be a positive value less than or equal to the delta value read from + ** the input list). The same argument applies to all but the first docid + ** read from the 'other' list. And to the contents of all position lists + ** that will be copied and merged from the input to the output. + ** + ** However, if the first docid copied to the output is a negative number, + ** then the encoding of the first docid from the 'other' input list may + ** be larger in the output than it was in the input (since the delta value + ** may be a larger positive integer than the actual docid). + ** + ** The space required to store the output is therefore the sum of the + ** sizes of the two inputs, plus enough space for exactly one of the input + ** docids to grow. + ** + ** A symetric argument may be made if the doclists are in descending + ** order. + */ + aOut = sqlite3_malloc(n1+n2+FTS3_VARINT_MAX-1); + if( !aOut ) return SQLITE_NOMEM; + + p = aOut; + fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1); + fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2); + while( p1 || p2 ){ + sqlite3_int64 iDiff = DOCID_CMP(i1, i2); + + if( p2 && p1 && iDiff==0 ){ + fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); + fts3PoslistMerge(&p, &p1, &p2); + fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); + fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); + }else if( !p2 || (p1 && iDiff<0) ){ + fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); + fts3PoslistCopy(&p, &p1); + fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); + }else{ + fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i2); + fts3PoslistCopy(&p, &p2); + fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); + } } - /* Read the first docid from each doclist */ - fts3GetDeltaVarint2(&p1, pEnd1, &i1); - fts3GetDeltaVarint2(&p2, pEnd2, &i2); - - switch( mergetype ){ - case MERGE_OR: - case MERGE_POS_OR: - while( p1 || p2 ){ - if( p2 && p1 && i1==i2 ){ - fts3PutDeltaVarint(&p, &iPrev, i1); - if( mergetype==MERGE_POS_OR ) fts3PoslistMerge(&p, &p1, &p2); - fts3GetDeltaVarint2(&p1, pEnd1, &i1); - fts3GetDeltaVarint2(&p2, pEnd2, &i2); - }else if( !p2 || (p1 && i1<i2) ){ - fts3PutDeltaVarint(&p, &iPrev, i1); - if( mergetype==MERGE_POS_OR ) fts3PoslistCopy(&p, &p1); - fts3GetDeltaVarint2(&p1, pEnd1, &i1); - }else{ - fts3PutDeltaVarint(&p, &iPrev, i2); - if( mergetype==MERGE_POS_OR ) fts3PoslistCopy(&p, &p2); - fts3GetDeltaVarint2(&p2, pEnd2, &i2); - } - } - break; - - case MERGE_AND: - while( p1 && p2 ){ - if( i1==i2 ){ - fts3PutDeltaVarint(&p, &iPrev, i1); - fts3GetDeltaVarint2(&p1, pEnd1, &i1); - fts3GetDeltaVarint2(&p2, pEnd2, &i2); - nDoc++; - }else if( i1<i2 ){ - fts3GetDeltaVarint2(&p1, pEnd1, &i1); - }else{ - fts3GetDeltaVarint2(&p2, pEnd2, &i2); - } - } - break; + *paOut = aOut; + *pnOut = (int)(p-aOut); + assert( *pnOut<=n1+n2+FTS3_VARINT_MAX-1 ); + return SQLITE_OK; +} - case MERGE_NOT: - while( p1 ){ - if( p2 && i1==i2 ){ - fts3GetDeltaVarint2(&p1, pEnd1, &i1); - fts3GetDeltaVarint2(&p2, pEnd2, &i2); - }else if( !p2 || i1<i2 ){ - fts3PutDeltaVarint(&p, &iPrev, i1); - fts3GetDeltaVarint2(&p1, pEnd1, &i1); - }else{ - fts3GetDeltaVarint2(&p2, pEnd2, &i2); - } - } - break; +/* +** This function does a "phrase" merge of two doclists. In a phrase merge, +** the output contains a copy of each position from the right-hand input +** doclist for which there is a position in the left-hand input doclist +** exactly nDist tokens before it. +** +** If the docids in the input doclists are sorted in ascending order, +** parameter bDescDoclist should be false. If they are sorted in ascending +** order, it should be passed a non-zero value. +** +** The right-hand input doclist is overwritten by this function. +*/ +static void fts3DoclistPhraseMerge( + int bDescDoclist, /* True if arguments are desc */ + int nDist, /* Distance from left to right (1=adjacent) */ + char *aLeft, int nLeft, /* Left doclist */ + char *aRight, int *pnRight /* IN/OUT: Right/output doclist */ +){ + sqlite3_int64 i1 = 0; + sqlite3_int64 i2 = 0; + sqlite3_int64 iPrev = 0; + char *pEnd1 = &aLeft[nLeft]; + char *pEnd2 = &aRight[*pnRight]; + char *p1 = aLeft; + char *p2 = aRight; + char *p; + int bFirstOut = 0; + char *aOut = aRight; + + assert( nDist>0 ); + + p = aOut; + fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1); + fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2); + + while( p1 && p2 ){ + sqlite3_int64 iDiff = DOCID_CMP(i1, i2); + if( iDiff==0 ){ + char *pSave = p; + sqlite3_int64 iPrevSave = iPrev; + int bFirstOutSave = bFirstOut; - case MERGE_POS_PHRASE: - case MERGE_PHRASE: { - char **ppPos = (mergetype==MERGE_PHRASE ? 0 : &p); - while( p1 && p2 ){ - if( i1==i2 ){ - char *pSave = p; - sqlite3_int64 iPrevSave = iPrev; - fts3PutDeltaVarint(&p, &iPrev, i1); - if( 0==fts3PoslistPhraseMerge(ppPos, nParam1, 0, 1, &p1, &p2) ){ - p = pSave; - iPrev = iPrevSave; - }else{ - nDoc++; - } - fts3GetDeltaVarint2(&p1, pEnd1, &i1); - fts3GetDeltaVarint2(&p2, pEnd2, &i2); - }else if( i1<i2 ){ - fts3PoslistCopy(0, &p1); - fts3GetDeltaVarint2(&p1, pEnd1, &i1); - }else{ - fts3PoslistCopy(0, &p2); - fts3GetDeltaVarint2(&p2, pEnd2, &i2); - } + fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1); + if( 0==fts3PoslistPhraseMerge(&p, nDist, 0, 1, &p1, &p2) ){ + p = pSave; + iPrev = iPrevSave; + bFirstOut = bFirstOutSave; } - break; + fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); + fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); + }else if( iDiff<0 ){ + fts3PoslistCopy(0, &p1); + fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1); + }else{ + fts3PoslistCopy(0, &p2); + fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2); } + } - default: assert( mergetype==MERGE_POS_NEAR || mergetype==MERGE_NEAR ); { - char *aTmp = 0; - char **ppPos = 0; - - if( mergetype==MERGE_POS_NEAR ){ - ppPos = &p; - aTmp = sqlite3_malloc(2*(n1+n2+1)); - if( !aTmp ){ - return SQLITE_NOMEM; - } - } - - while( p1 && p2 ){ - if( i1==i2 ){ - char *pSave = p; - sqlite3_int64 iPrevSave = iPrev; - fts3PutDeltaVarint(&p, &iPrev, i1); + *pnRight = (int)(p - aOut); +} - if( !fts3PoslistNearMerge(ppPos, aTmp, nParam1, nParam2, &p1, &p2) ){ - iPrev = iPrevSave; - p = pSave; - } +/* +** Argument pList points to a position list nList bytes in size. This +** function checks to see if the position list contains any entries for +** a token in position 0 (of any column). If so, it writes argument iDelta +** to the output buffer pOut, followed by a position list consisting only +** of the entries from pList at position 0, and terminated by an 0x00 byte. +** The value returned is the number of bytes written to pOut (if any). +*/ +int sqlite3Fts3FirstFilter( + sqlite3_int64 iDelta, /* Varint that may be written to pOut */ + char *pList, /* Position list (no 0x00 term) */ + int nList, /* Size of pList in bytes */ + char *pOut /* Write output here */ +){ + int nOut = 0; + int bWritten = 0; /* True once iDelta has been written */ + char *p = pList; + char *pEnd = &pList[nList]; + + if( *p!=0x01 ){ + if( *p==0x02 ){ + nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta); + pOut[nOut++] = 0x02; + bWritten = 1; + } + fts3ColumnlistCopy(0, &p); + } - fts3GetDeltaVarint2(&p1, pEnd1, &i1); - fts3GetDeltaVarint2(&p2, pEnd2, &i2); - }else if( i1<i2 ){ - fts3PoslistCopy(0, &p1); - fts3GetDeltaVarint2(&p1, pEnd1, &i1); - }else{ - fts3PoslistCopy(0, &p2); - fts3GetDeltaVarint2(&p2, pEnd2, &i2); - } + while( p<pEnd && *p==0x01 ){ + sqlite3_int64 iCol; + p++; + p += sqlite3Fts3GetVarint(p, &iCol); + if( *p==0x02 ){ + if( bWritten==0 ){ + nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta); + bWritten = 1; } - sqlite3_free(aTmp); - break; + pOut[nOut++] = 0x01; + nOut += sqlite3Fts3PutVarint(&pOut[nOut], iCol); + pOut[nOut++] = 0x02; } + fts3ColumnlistCopy(0, &p); + } + if( bWritten ){ + pOut[nOut++] = 0x00; } - if( pnDoc ) *pnDoc = nDoc; - *pnBuffer = (int)(p-aBuffer); - return SQLITE_OK; + return nOut; } -/* -** A pointer to an instance of this structure is used as the context -** argument to sqlite3Fts3SegReaderIterate() -*/ -typedef struct TermSelect TermSelect; -struct TermSelect { - int isReqPos; - char *aaOutput[16]; /* Malloc'd output buffer */ - int anOutput[16]; /* Size of output in bytes */ -}; /* ** Merge all doclists in the TermSelect.aaOutput[] array into a single @@ -1949,8 +2602,7 @@ struct TermSelect { ** the responsibility of the caller to free any doclists left in the ** TermSelect.aaOutput[] array. */ -static int fts3TermSelectMerge(TermSelect *pTS){ - int mergetype = (pTS->isReqPos ? MERGE_POS_OR : MERGE_OR); +static int fts3TermSelectFinishMerge(Fts3Table *p, TermSelect *pTS){ char *aOut = 0; int nOut = 0; int i; @@ -1965,15 +2617,17 @@ static int fts3TermSelectMerge(TermSelect *pTS){ nOut = pTS->anOutput[i]; pTS->aaOutput[i] = 0; }else{ - int nNew = nOut + pTS->anOutput[i]; - char *aNew = sqlite3_malloc(nNew); - if( !aNew ){ + int nNew; + char *aNew; + + int rc = fts3DoclistOrMerge(p->bDescIdx, + pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut, &aNew, &nNew + ); + if( rc!=SQLITE_OK ){ sqlite3_free(aOut); - return SQLITE_NOMEM; + return rc; } - fts3DoclistMerge(mergetype, 0, 0, - aNew, &nNew, pTS->aaOutput[i], pTS->anOutput[i], aOut, nOut, 0 - ); + sqlite3_free(pTS->aaOutput[i]); sqlite3_free(aOut); pTS->aaOutput[i] = 0; @@ -1989,29 +2643,28 @@ static int fts3TermSelectMerge(TermSelect *pTS){ } /* -** This function is used as the sqlite3Fts3SegReaderIterate() callback when -** querying the full-text index for a doclist associated with a term or -** term-prefix. +** Merge the doclist aDoclist/nDoclist into the TermSelect object passed +** as the first argument. The merge is an "OR" merge (see function +** fts3DoclistOrMerge() for details). +** +** This function is called with the doclist for each term that matches +** a queried prefix. It merges all these doclists into one, the doclist +** for the specified prefix. Since there can be a very large number of +** doclists to merge, the merging is done pair-wise using the TermSelect +** object. +** +** This function returns SQLITE_OK if the merge is successful, or an +** SQLite error code (SQLITE_NOMEM) if an error occurs. */ -static int fts3TermSelectCb( - Fts3Table *p, /* Virtual table object */ - void *pContext, /* Pointer to TermSelect structure */ - char *zTerm, - int nTerm, - char *aDoclist, - int nDoclist +static int fts3TermSelectMerge( + Fts3Table *p, /* FTS table handle */ + TermSelect *pTS, /* TermSelect object to merge into */ + char *aDoclist, /* Pointer to doclist */ + int nDoclist /* Size of aDoclist in bytes */ ){ - TermSelect *pTS = (TermSelect *)pContext; - - UNUSED_PARAMETER(p); - UNUSED_PARAMETER(zTerm); - UNUSED_PARAMETER(nTerm); - if( pTS->aaOutput[0]==0 ){ /* If this is the first term selected, copy the doclist to the output - ** buffer using memcpy(). TODO: Add a way to transfer control of the - ** aDoclist buffer from the caller so as to avoid the memcpy(). - */ + ** buffer using memcpy(). */ pTS->aaOutput[0] = sqlite3_malloc(nDoclist); pTS->anOutput[0] = nDoclist; if( pTS->aaOutput[0] ){ @@ -2020,126 +2673,108 @@ static int fts3TermSelectCb( return SQLITE_NOMEM; } }else{ - int mergetype = (pTS->isReqPos ? MERGE_POS_OR : MERGE_OR); char *aMerge = aDoclist; int nMerge = nDoclist; int iOut; for(iOut=0; iOut<SizeofArray(pTS->aaOutput); iOut++){ - char *aNew; - int nNew; if( pTS->aaOutput[iOut]==0 ){ assert( iOut>0 ); pTS->aaOutput[iOut] = aMerge; pTS->anOutput[iOut] = nMerge; break; - } + }else{ + char *aNew; + int nNew; - nNew = nMerge + pTS->anOutput[iOut]; - aNew = sqlite3_malloc(nNew); - if( !aNew ){ - if( aMerge!=aDoclist ){ - sqlite3_free(aMerge); + int rc = fts3DoclistOrMerge(p->bDescIdx, aMerge, nMerge, + pTS->aaOutput[iOut], pTS->anOutput[iOut], &aNew, &nNew + ); + if( rc!=SQLITE_OK ){ + if( aMerge!=aDoclist ) sqlite3_free(aMerge); + return rc; } - return SQLITE_NOMEM; - } - fts3DoclistMerge(mergetype, 0, 0, aNew, &nNew, - pTS->aaOutput[iOut], pTS->anOutput[iOut], aMerge, nMerge, 0 - ); - - if( iOut>0 ) sqlite3_free(aMerge); - sqlite3_free(pTS->aaOutput[iOut]); - pTS->aaOutput[iOut] = 0; - aMerge = aNew; - nMerge = nNew; - if( (iOut+1)==SizeofArray(pTS->aaOutput) ){ - pTS->aaOutput[iOut] = aMerge; - pTS->anOutput[iOut] = nMerge; + if( aMerge!=aDoclist ) sqlite3_free(aMerge); + sqlite3_free(pTS->aaOutput[iOut]); + pTS->aaOutput[iOut] = 0; + + aMerge = aNew; + nMerge = nNew; + if( (iOut+1)==SizeofArray(pTS->aaOutput) ){ + pTS->aaOutput[iOut] = aMerge; + pTS->anOutput[iOut] = nMerge; + } } } } return SQLITE_OK; } -static int fts3DeferredTermSelect( - Fts3DeferredToken *pToken, /* Phrase token */ - int isTermPos, /* True to include positions */ - int *pnOut, /* OUT: Size of list */ - char **ppOut /* OUT: Body of list */ +/* +** Append SegReader object pNew to the end of the pCsr->apSegment[] array. +*/ +static int fts3SegReaderCursorAppend( + Fts3MultiSegReader *pCsr, + Fts3SegReader *pNew ){ - char *aSource; - int nSource; - - aSource = sqlite3Fts3DeferredDoclist(pToken, &nSource); - if( !aSource ){ - *pnOut = 0; - *ppOut = 0; - }else if( isTermPos ){ - *ppOut = sqlite3_malloc(nSource); - if( !*ppOut ) return SQLITE_NOMEM; - memcpy(*ppOut, aSource, nSource); - *pnOut = nSource; - }else{ - sqlite3_int64 docid; - *pnOut = sqlite3Fts3GetVarint(aSource, &docid); - *ppOut = sqlite3_malloc(*pnOut); - if( !*ppOut ) return SQLITE_NOMEM; - sqlite3Fts3PutVarint(*ppOut, docid); + if( (pCsr->nSegment%16)==0 ){ + Fts3SegReader **apNew; + int nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*); + apNew = (Fts3SegReader **)sqlite3_realloc(pCsr->apSegment, nByte); + if( !apNew ){ + sqlite3Fts3SegReaderFree(pNew); + return SQLITE_NOMEM; + } + pCsr->apSegment = apNew; } - + pCsr->apSegment[pCsr->nSegment++] = pNew; return SQLITE_OK; } -int sqlite3Fts3SegReaderCursor( +/* +** Add seg-reader objects to the Fts3MultiSegReader object passed as the +** 8th argument. +** +** This function returns SQLITE_OK if successful, or an SQLite error code +** otherwise. +*/ +static int fts3SegReaderCursor( Fts3Table *p, /* FTS3 table handle */ + int iLangid, /* Language id */ + int iIndex, /* Index to search (from 0 to p->nIndex-1) */ int iLevel, /* Level of segments to scan */ const char *zTerm, /* Term to query for */ int nTerm, /* Size of zTerm in bytes */ int isPrefix, /* True for a prefix search */ int isScan, /* True to scan from zTerm to EOF */ - Fts3SegReaderCursor *pCsr /* Cursor object to populate */ + Fts3MultiSegReader *pCsr /* Cursor object to populate */ ){ - int rc = SQLITE_OK; - int rc2; - int iAge = 0; - sqlite3_stmt *pStmt = 0; - Fts3SegReader *pPending = 0; - - assert( iLevel==FTS3_SEGCURSOR_ALL - || iLevel==FTS3_SEGCURSOR_PENDING - || iLevel>=0 - ); - assert( FTS3_SEGCURSOR_PENDING<0 ); - assert( FTS3_SEGCURSOR_ALL<0 ); - assert( iLevel==FTS3_SEGCURSOR_ALL || (zTerm==0 && isPrefix==1) ); - assert( isPrefix==0 || isScan==0 ); - - - memset(pCsr, 0, sizeof(Fts3SegReaderCursor)); - - /* If iLevel is less than 0, include a seg-reader for the pending-terms. */ - assert( isScan==0 || fts3HashCount(&p->pendingTerms)==0 ); - if( iLevel<0 && isScan==0 ){ - rc = sqlite3Fts3SegReaderPending(p, zTerm, nTerm, isPrefix, &pPending); - if( rc==SQLITE_OK && pPending ){ - int nByte = (sizeof(Fts3SegReader *) * 16); - pCsr->apSegment = (Fts3SegReader **)sqlite3_malloc(nByte); - if( pCsr->apSegment==0 ){ - rc = SQLITE_NOMEM; - }else{ - pCsr->apSegment[0] = pPending; - pCsr->nSegment = 1; - pPending = 0; - } + int rc = SQLITE_OK; /* Error code */ + sqlite3_stmt *pStmt = 0; /* Statement to iterate through segments */ + int rc2; /* Result of sqlite3_reset() */ + + /* If iLevel is less than 0 and this is not a scan, include a seg-reader + ** for the pending-terms. If this is a scan, then this call must be being + ** made by an fts4aux module, not an FTS table. In this case calling + ** Fts3SegReaderPending might segfault, as the data structures used by + ** fts4aux are not completely populated. So it's easiest to filter these + ** calls out here. */ + if( iLevel<0 && p->aIndex ){ + Fts3SegReader *pSeg = 0; + rc = sqlite3Fts3SegReaderPending(p, iIndex, zTerm, nTerm, isPrefix, &pSeg); + if( rc==SQLITE_OK && pSeg ){ + rc = fts3SegReaderCursorAppend(pCsr, pSeg); } } if( iLevel!=FTS3_SEGCURSOR_PENDING ){ if( rc==SQLITE_OK ){ - rc = sqlite3Fts3AllSegdirs(p, iLevel, &pStmt); + rc = sqlite3Fts3AllSegdirs(p, iLangid, iIndex, iLevel, &pStmt); } + while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){ + Fts3SegReader *pSeg = 0; /* Read the values returned by the SELECT into local variables. */ sqlite3_int64 iStartBlock = sqlite3_column_int64(pStmt, 1); @@ -2148,18 +2783,6 @@ int sqlite3Fts3SegReaderCursor( int nRoot = sqlite3_column_bytes(pStmt, 4); char const *zRoot = sqlite3_column_blob(pStmt, 4); - /* If nSegment is a multiple of 16 the array needs to be extended. */ - if( (pCsr->nSegment%16)==0 ){ - Fts3SegReader **apNew; - int nByte = (pCsr->nSegment + 16)*sizeof(Fts3SegReader*); - apNew = (Fts3SegReader **)sqlite3_realloc(pCsr->apSegment, nByte); - if( !apNew ){ - rc = SQLITE_NOMEM; - goto finished; - } - pCsr->apSegment = apNew; - } - /* If zTerm is not NULL, and this segment is not stored entirely on its ** root node, the range of leaves scanned can be reduced. Do this. */ if( iStartBlock && zTerm ){ @@ -2169,88 +2792,168 @@ int sqlite3Fts3SegReaderCursor( if( isPrefix==0 && isScan==0 ) iLeavesEndBlock = iStartBlock; } - rc = sqlite3Fts3SegReaderNew(iAge, iStartBlock, iLeavesEndBlock, - iEndBlock, zRoot, nRoot, &pCsr->apSegment[pCsr->nSegment] + rc = sqlite3Fts3SegReaderNew(pCsr->nSegment+1, + (isPrefix==0 && isScan==0), + iStartBlock, iLeavesEndBlock, + iEndBlock, zRoot, nRoot, &pSeg ); if( rc!=SQLITE_OK ) goto finished; - pCsr->nSegment++; - iAge++; + rc = fts3SegReaderCursorAppend(pCsr, pSeg); } } finished: rc2 = sqlite3_reset(pStmt); if( rc==SQLITE_DONE ) rc = rc2; - sqlite3Fts3SegReaderFree(pPending); return rc; } +/* +** Set up a cursor object for iterating through a full-text index or a +** single level therein. +*/ +int sqlite3Fts3SegReaderCursor( + Fts3Table *p, /* FTS3 table handle */ + int iLangid, /* Language-id to search */ + int iIndex, /* Index to search (from 0 to p->nIndex-1) */ + int iLevel, /* Level of segments to scan */ + const char *zTerm, /* Term to query for */ + int nTerm, /* Size of zTerm in bytes */ + int isPrefix, /* True for a prefix search */ + int isScan, /* True to scan from zTerm to EOF */ + Fts3MultiSegReader *pCsr /* Cursor object to populate */ +){ + assert( iIndex>=0 && iIndex<p->nIndex ); + assert( iLevel==FTS3_SEGCURSOR_ALL + || iLevel==FTS3_SEGCURSOR_PENDING + || iLevel>=0 + ); + assert( iLevel<FTS3_SEGDIR_MAXLEVEL ); + assert( FTS3_SEGCURSOR_ALL<0 && FTS3_SEGCURSOR_PENDING<0 ); + assert( isPrefix==0 || isScan==0 ); + + memset(pCsr, 0, sizeof(Fts3MultiSegReader)); + return fts3SegReaderCursor( + p, iLangid, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr + ); +} + +/* +** In addition to its current configuration, have the Fts3MultiSegReader +** passed as the 4th argument also scan the doclist for term zTerm/nTerm. +** +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. +*/ +static int fts3SegReaderCursorAddZero( + Fts3Table *p, /* FTS virtual table handle */ + int iLangid, + const char *zTerm, /* Term to scan doclist of */ + int nTerm, /* Number of bytes in zTerm */ + Fts3MultiSegReader *pCsr /* Fts3MultiSegReader to modify */ +){ + return fts3SegReaderCursor(p, + iLangid, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr + ); +} +/* +** Open an Fts3MultiSegReader to scan the doclist for term zTerm/nTerm. Or, +** if isPrefix is true, to scan the doclist for all terms for which +** zTerm/nTerm is a prefix. If successful, return SQLITE_OK and write +** a pointer to the new Fts3MultiSegReader to *ppSegcsr. Otherwise, return +** an SQLite error code. +** +** It is the responsibility of the caller to free this object by eventually +** passing it to fts3SegReaderCursorFree() +** +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. +** Output parameter *ppSegcsr is set to 0 if an error occurs. +*/ static int fts3TermSegReaderCursor( Fts3Cursor *pCsr, /* Virtual table cursor handle */ const char *zTerm, /* Term to query for */ int nTerm, /* Size of zTerm in bytes */ int isPrefix, /* True for a prefix search */ - Fts3SegReaderCursor **ppSegcsr /* OUT: Allocated seg-reader cursor */ + Fts3MultiSegReader **ppSegcsr /* OUT: Allocated seg-reader cursor */ ){ - Fts3SegReaderCursor *pSegcsr; /* Object to allocate and return */ + Fts3MultiSegReader *pSegcsr; /* Object to allocate and return */ int rc = SQLITE_NOMEM; /* Return code */ - pSegcsr = sqlite3_malloc(sizeof(Fts3SegReaderCursor)); + pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader)); if( pSegcsr ){ - Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; int i; - int nCost = 0; - rc = sqlite3Fts3SegReaderCursor( - p, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr); - - for(i=0; rc==SQLITE_OK && i<pSegcsr->nSegment; i++){ - rc = sqlite3Fts3SegReaderCost(pCsr, pSegcsr->apSegment[i], &nCost); + int bFound = 0; /* True once an index has been found */ + Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; + + if( isPrefix ){ + for(i=1; bFound==0 && i<p->nIndex; i++){ + if( p->aIndex[i].nPrefix==nTerm ){ + bFound = 1; + rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, + i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr + ); + pSegcsr->bLookup = 1; + } + } + + for(i=1; bFound==0 && i<p->nIndex; i++){ + if( p->aIndex[i].nPrefix==nTerm+1 ){ + bFound = 1; + rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, + i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr + ); + if( rc==SQLITE_OK ){ + rc = fts3SegReaderCursorAddZero( + p, pCsr->iLangid, zTerm, nTerm, pSegcsr + ); + } + } + } + } + + if( bFound==0 ){ + rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid, + 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr + ); + pSegcsr->bLookup = !isPrefix; } - pSegcsr->nCost = nCost; } *ppSegcsr = pSegcsr; return rc; } -static void fts3SegReaderCursorFree(Fts3SegReaderCursor *pSegcsr){ +/* +** Free an Fts3MultiSegReader allocated by fts3TermSegReaderCursor(). +*/ +static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){ sqlite3Fts3SegReaderFinish(pSegcsr); sqlite3_free(pSegcsr); } /* -** This function retreives the doclist for the specified term (or term -** prefix) from the database. -** -** The returned doclist may be in one of two formats, depending on the -** value of parameter isReqPos. If isReqPos is zero, then the doclist is -** a sorted list of delta-compressed docids (a bare doclist). If isReqPos -** is non-zero, then the returned list is in the same format as is stored -** in the database without the found length specifier at the start of on-disk -** doclists. +** This function retrieves the doclist for the specified term (or term +** prefix) from the database. */ static int fts3TermSelect( Fts3Table *p, /* Virtual table handle */ Fts3PhraseToken *pTok, /* Token to query for */ int iColumn, /* Column to query (or -ve for all columns) */ - int isReqPos, /* True to include position lists in output */ int *pnOut, /* OUT: Size of buffer at *ppOut */ char **ppOut /* OUT: Malloced result buffer */ ){ int rc; /* Return code */ - Fts3SegReaderCursor *pSegcsr; /* Seg-reader cursor for this term */ - TermSelect tsc; /* Context object for fts3TermSelectCb() */ + Fts3MultiSegReader *pSegcsr; /* Seg-reader cursor for this term */ + TermSelect tsc; /* Object for pair-wise doclist merging */ Fts3SegFilter filter; /* Segment term filter configuration */ pSegcsr = pTok->pSegcsr; memset(&tsc, 0, sizeof(TermSelect)); - tsc.isReqPos = isReqPos; - filter.flags = FTS3_SEGMENT_IGNORE_EMPTY + filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0) - | (isReqPos ? FTS3_SEGMENT_REQUIRE_POS : 0) + | (pTok->bFirst ? FTS3_SEGMENT_FIRST : 0) | (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0); filter.iCol = iColumn; filter.zTerm = pTok->z; @@ -2260,13 +2963,11 @@ static int fts3TermSelect( while( SQLITE_OK==rc && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pSegcsr)) ){ - rc = fts3TermSelectCb(p, (void *)&tsc, - pSegcsr->zTerm, pSegcsr->nTerm, pSegcsr->aDoclist, pSegcsr->nDoclist - ); + rc = fts3TermSelectMerge(p, &tsc, pSegcsr->aDoclist, pSegcsr->nDoclist); } if( rc==SQLITE_OK ){ - rc = fts3TermSelectMerge(&tsc); + rc = fts3TermSelectFinishMerge(p, &tsc); } if( rc==SQLITE_OK ){ *ppOut = tsc.aaOutput[0]; @@ -2292,24 +2993,15 @@ static int fts3TermSelect( ** that the doclist is simply a list of docids stored as delta encoded ** varints. */ -static int fts3DoclistCountDocids(int isPoslist, char *aList, int nList){ +static int fts3DoclistCountDocids(char *aList, int nList){ int nDoc = 0; /* Return value */ if( aList ){ char *aEnd = &aList[nList]; /* Pointer to one byte after EOF */ char *p = aList; /* Cursor */ - if( !isPoslist ){ - /* The number of docids in the list is the same as the number of - ** varints. In FTS3 a varint consists of a single byte with the 0x80 - ** bit cleared and zero or more bytes with the 0x80 bit set. So to - ** count the varints in the buffer, just count the number of bytes - ** with the 0x80 bit clear. */ - while( p<aEnd ) nDoc += (((*p++)&0x80)==0); - }else{ - while( p<aEnd ){ - nDoc++; - while( (*p++)&0x80 ); /* Skip docid varint */ - fts3PoslistCopy(0, &p); /* Skip over position list */ - } + while( p<aEnd ){ + nDoc++; + while( (*p++)&0x80 ); /* Skip docid varint */ + fts3PoslistCopy(0, &p); /* Skip over position list */ } } @@ -2317,697 +3009,59 @@ static int fts3DoclistCountDocids(int isPoslist, char *aList, int nList){ } /* -** Call sqlite3Fts3DeferToken() for each token in the expression pExpr. -*/ -static int fts3DeferExpression(Fts3Cursor *pCsr, Fts3Expr *pExpr){ - int rc = SQLITE_OK; - if( pExpr ){ - rc = fts3DeferExpression(pCsr, pExpr->pLeft); - if( rc==SQLITE_OK ){ - rc = fts3DeferExpression(pCsr, pExpr->pRight); - } - if( pExpr->eType==FTSQUERY_PHRASE ){ - int iCol = pExpr->pPhrase->iColumn; - int i; - for(i=0; rc==SQLITE_OK && i<pExpr->pPhrase->nToken; i++){ - Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i]; - if( pToken->pDeferred==0 ){ - rc = sqlite3Fts3DeferToken(pCsr, pToken, iCol); - } - } - } - } - return rc; -} - -/* -** This function removes the position information from a doclist. When -** called, buffer aList (size *pnList bytes) contains a doclist that includes -** position information. This function removes the position information so -** that aList contains only docids, and adjusts *pnList to reflect the new -** (possibly reduced) size of the doclist. -*/ -static void fts3DoclistStripPositions( - char *aList, /* IN/OUT: Buffer containing doclist */ - int *pnList /* IN/OUT: Size of doclist in bytes */ -){ - if( aList ){ - char *aEnd = &aList[*pnList]; /* Pointer to one byte after EOF */ - char *p = aList; /* Input cursor */ - char *pOut = aList; /* Output cursor */ - - while( p<aEnd ){ - sqlite3_int64 delta; - p += sqlite3Fts3GetVarint(p, &delta); - fts3PoslistCopy(0, &p); - pOut += sqlite3Fts3PutVarint(pOut, delta); - } - - *pnList = (int)(pOut - aList); - } -} - -/* -** Return a DocList corresponding to the phrase *pPhrase. +** Advance the cursor to the next row in the %_content table that +** matches the search criteria. For a MATCH search, this will be +** the next row that matches. For a full-table scan, this will be +** simply the next row in the %_content table. For a docid lookup, +** this routine simply sets the EOF flag. ** -** If this function returns SQLITE_OK, but *pnOut is set to a negative value, -** then no tokens in the phrase were looked up in the full-text index. This -** is only possible when this function is called from within xFilter(). The -** caller should assume that all documents match the phrase. The actual -** filtering will take place in xNext(). +** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned +** even if we reach end-of-file. The fts3EofMethod() will be called +** subsequently to determine whether or not an EOF was hit. */ -static int fts3PhraseSelect( - Fts3Cursor *pCsr, /* Virtual table cursor handle */ - Fts3Phrase *pPhrase, /* Phrase to return a doclist for */ - int isReqPos, /* True if output should contain positions */ - char **paOut, /* OUT: Pointer to malloc'd result buffer */ - int *pnOut /* OUT: Size of buffer at *paOut */ -){ - char *pOut = 0; - int nOut = 0; - int rc = SQLITE_OK; - int ii; - int iCol = pPhrase->iColumn; - int isTermPos = (pPhrase->nToken>1 || isReqPos); - Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; - int isFirst = 1; - - int iPrevTok = 0; - int nDoc = 0; - - /* If this is an xFilter() evaluation, create a segment-reader for each - ** phrase token. Or, if this is an xNext() or snippet/offsets/matchinfo - ** evaluation, only create segment-readers if there are no Fts3DeferredToken - ** objects attached to the phrase-tokens. - */ - for(ii=0; ii<pPhrase->nToken; ii++){ - Fts3PhraseToken *pTok = &pPhrase->aToken[ii]; - if( pTok->pSegcsr==0 ){ - if( (pCsr->eEvalmode==FTS3_EVAL_FILTER) - || (pCsr->eEvalmode==FTS3_EVAL_NEXT && pCsr->pDeferred==0) - || (pCsr->eEvalmode==FTS3_EVAL_MATCHINFO && pTok->bFulltext) - ){ - rc = fts3TermSegReaderCursor( - pCsr, pTok->z, pTok->n, pTok->isPrefix, &pTok->pSegcsr - ); - if( rc!=SQLITE_OK ) return rc; - } - } - } - - for(ii=0; ii<pPhrase->nToken; ii++){ - Fts3PhraseToken *pTok; /* Token to find doclist for */ - int iTok = 0; /* The token being queried this iteration */ - char *pList = 0; /* Pointer to token doclist */ - int nList = 0; /* Size of buffer at pList */ - - /* Select a token to process. If this is an xFilter() call, then tokens - ** are processed in order from least to most costly. Otherwise, tokens - ** are processed in the order in which they occur in the phrase. - */ - if( pCsr->eEvalmode==FTS3_EVAL_MATCHINFO ){ - assert( isReqPos ); - iTok = ii; - pTok = &pPhrase->aToken[iTok]; - if( pTok->bFulltext==0 ) continue; - }else if( pCsr->eEvalmode==FTS3_EVAL_NEXT || isReqPos ){ - iTok = ii; - pTok = &pPhrase->aToken[iTok]; - }else{ - int nMinCost = 0x7FFFFFFF; - int jj; - - /* Find the remaining token with the lowest cost. */ - for(jj=0; jj<pPhrase->nToken; jj++){ - Fts3SegReaderCursor *pSegcsr = pPhrase->aToken[jj].pSegcsr; - if( pSegcsr && pSegcsr->nCost<nMinCost ){ - iTok = jj; - nMinCost = pSegcsr->nCost; - } - } - pTok = &pPhrase->aToken[iTok]; - - /* This branch is taken if it is determined that loading the doclist - ** for the next token would require more IO than loading all documents - ** currently identified by doclist pOut/nOut. No further doclists will - ** be loaded from the full-text index for this phrase. - */ - if( nMinCost>nDoc && ii>0 ){ - rc = fts3DeferExpression(pCsr, pCsr->pExpr); - break; - } - } - - if( pCsr->eEvalmode==FTS3_EVAL_NEXT && pTok->pDeferred ){ - rc = fts3DeferredTermSelect(pTok->pDeferred, isTermPos, &nList, &pList); - }else{ - if( pTok->pSegcsr ){ - rc = fts3TermSelect(p, pTok, iCol, isTermPos, &nList, &pList); - } - pTok->bFulltext = 1; - } - assert( rc!=SQLITE_OK || pCsr->eEvalmode || pTok->pSegcsr==0 ); - if( rc!=SQLITE_OK ) break; - - if( isFirst ){ - pOut = pList; - nOut = nList; - if( pCsr->eEvalmode==FTS3_EVAL_FILTER && pPhrase->nToken>1 ){ - nDoc = fts3DoclistCountDocids(1, pOut, nOut); - } - isFirst = 0; - iPrevTok = iTok; +static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){ + int rc; + Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; + if( pCsr->eSearch==FTS3_DOCID_SEARCH || pCsr->eSearch==FTS3_FULLSCAN_SEARCH ){ + if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){ + pCsr->isEof = 1; + rc = sqlite3_reset(pCsr->pStmt); }else{ - /* Merge the new term list and the current output. */ - char *aLeft, *aRight; - int nLeft, nRight; - int nDist; - int mt; - - /* If this is the final token of the phrase, and positions were not - ** requested by the caller, use MERGE_PHRASE instead of POS_PHRASE. - ** This drops the position information from the output list. - */ - mt = MERGE_POS_PHRASE; - if( ii==pPhrase->nToken-1 && !isReqPos ) mt = MERGE_PHRASE; - - assert( iPrevTok!=iTok ); - if( iPrevTok<iTok ){ - aLeft = pOut; - nLeft = nOut; - aRight = pList; - nRight = nList; - nDist = iTok-iPrevTok; - iPrevTok = iTok; - }else{ - aRight = pOut; - nRight = nOut; - aLeft = pList; - nLeft = nList; - nDist = iPrevTok-iTok; - } - pOut = aRight; - fts3DoclistMerge( - mt, nDist, 0, pOut, &nOut, aLeft, nLeft, aRight, nRight, &nDoc - ); - sqlite3_free(aLeft); - } - assert( nOut==0 || pOut!=0 ); - } - - if( rc==SQLITE_OK ){ - if( ii!=pPhrase->nToken ){ - assert( pCsr->eEvalmode==FTS3_EVAL_FILTER && isReqPos==0 ); - fts3DoclistStripPositions(pOut, &nOut); + pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0); + rc = SQLITE_OK; } - *paOut = pOut; - *pnOut = nOut; }else{ - sqlite3_free(pOut); + rc = fts3EvalNext((Fts3Cursor *)pCursor); } + assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); return rc; } /* -** This function merges two doclists according to the requirements of a -** NEAR operator. -** -** Both input doclists must include position information. The output doclist -** includes position information if the first argument to this function -** is MERGE_POS_NEAR, or does not if it is MERGE_NEAR. -*/ -static int fts3NearMerge( - int mergetype, /* MERGE_POS_NEAR or MERGE_NEAR */ - int nNear, /* Parameter to NEAR operator */ - int nTokenLeft, /* Number of tokens in LHS phrase arg */ - char *aLeft, /* Doclist for LHS (incl. positions) */ - int nLeft, /* Size of LHS doclist in bytes */ - int nTokenRight, /* As nTokenLeft */ - char *aRight, /* As aLeft */ - int nRight, /* As nRight */ - char **paOut, /* OUT: Results of merge (malloced) */ - int *pnOut /* OUT: Sized of output buffer */ -){ - char *aOut; /* Buffer to write output doclist to */ - int rc; /* Return code */ - - assert( mergetype==MERGE_POS_NEAR || MERGE_NEAR ); - - aOut = sqlite3_malloc(nLeft+nRight+1); - if( aOut==0 ){ - rc = SQLITE_NOMEM; - }else{ - rc = fts3DoclistMerge(mergetype, nNear+nTokenRight, nNear+nTokenLeft, - aOut, pnOut, aLeft, nLeft, aRight, nRight, 0 - ); - if( rc!=SQLITE_OK ){ - sqlite3_free(aOut); - aOut = 0; - } - } - - *paOut = aOut; - return rc; -} - -/* -** This function is used as part of the processing for the snippet() and -** offsets() functions. +** The following are copied from sqliteInt.h. ** -** Both pLeft and pRight are expression nodes of type FTSQUERY_PHRASE. Both -** have their respective doclists (including position information) loaded -** in Fts3Expr.aDoclist/nDoclist. This function removes all entries from -** each doclist that are not within nNear tokens of a corresponding entry -** in the other doclist. -*/ -int sqlite3Fts3ExprNearTrim(Fts3Expr *pLeft, Fts3Expr *pRight, int nNear){ - int rc; /* Return code */ - - assert( pLeft->eType==FTSQUERY_PHRASE ); - assert( pRight->eType==FTSQUERY_PHRASE ); - assert( pLeft->isLoaded && pRight->isLoaded ); - - if( pLeft->aDoclist==0 || pRight->aDoclist==0 ){ - sqlite3_free(pLeft->aDoclist); - sqlite3_free(pRight->aDoclist); - pRight->aDoclist = 0; - pLeft->aDoclist = 0; - rc = SQLITE_OK; - }else{ - char *aOut; /* Buffer in which to assemble new doclist */ - int nOut; /* Size of buffer aOut in bytes */ - - rc = fts3NearMerge(MERGE_POS_NEAR, nNear, - pLeft->pPhrase->nToken, pLeft->aDoclist, pLeft->nDoclist, - pRight->pPhrase->nToken, pRight->aDoclist, pRight->nDoclist, - &aOut, &nOut - ); - if( rc!=SQLITE_OK ) return rc; - sqlite3_free(pRight->aDoclist); - pRight->aDoclist = aOut; - pRight->nDoclist = nOut; - - rc = fts3NearMerge(MERGE_POS_NEAR, nNear, - pRight->pPhrase->nToken, pRight->aDoclist, pRight->nDoclist, - pLeft->pPhrase->nToken, pLeft->aDoclist, pLeft->nDoclist, - &aOut, &nOut - ); - sqlite3_free(pLeft->aDoclist); - pLeft->aDoclist = aOut; - pLeft->nDoclist = nOut; - } - return rc; -} - - -/* -** Allocate an Fts3SegReaderArray for each token in the expression pExpr. -** The allocated objects are stored in the Fts3PhraseToken.pArray member -** variables of each token structure. -*/ -static int fts3ExprAllocateSegReaders( - Fts3Cursor *pCsr, /* FTS3 table */ - Fts3Expr *pExpr, /* Expression to create seg-readers for */ - int *pnExpr /* OUT: Number of AND'd expressions */ -){ - int rc = SQLITE_OK; /* Return code */ - - assert( pCsr->eEvalmode==FTS3_EVAL_FILTER ); - if( pnExpr && pExpr->eType!=FTSQUERY_AND ){ - (*pnExpr)++; - pnExpr = 0; - } - - if( pExpr->eType==FTSQUERY_PHRASE ){ - Fts3Phrase *pPhrase = pExpr->pPhrase; - int ii; - - for(ii=0; rc==SQLITE_OK && ii<pPhrase->nToken; ii++){ - Fts3PhraseToken *pTok = &pPhrase->aToken[ii]; - if( pTok->pSegcsr==0 ){ - rc = fts3TermSegReaderCursor( - pCsr, pTok->z, pTok->n, pTok->isPrefix, &pTok->pSegcsr - ); - } - } - }else{ - rc = fts3ExprAllocateSegReaders(pCsr, pExpr->pLeft, pnExpr); - if( rc==SQLITE_OK ){ - rc = fts3ExprAllocateSegReaders(pCsr, pExpr->pRight, pnExpr); - } - } - return rc; -} - -/* -** Free the Fts3SegReaderArray objects associated with each token in the -** expression pExpr. In other words, this function frees the resources -** allocated by fts3ExprAllocateSegReaders(). +** Constants for the largest and smallest possible 64-bit signed integers. +** These macros are designed to work correctly on both 32-bit and 64-bit +** compilers. */ -static void fts3ExprFreeSegReaders(Fts3Expr *pExpr){ - if( pExpr ){ - Fts3Phrase *pPhrase = pExpr->pPhrase; - if( pPhrase ){ - int kk; - for(kk=0; kk<pPhrase->nToken; kk++){ - fts3SegReaderCursorFree(pPhrase->aToken[kk].pSegcsr); - pPhrase->aToken[kk].pSegcsr = 0; - } - } - fts3ExprFreeSegReaders(pExpr->pLeft); - fts3ExprFreeSegReaders(pExpr->pRight); - } -} - -/* -** Return the sum of the costs of all tokens in the expression pExpr. This -** function must be called after Fts3SegReaderArrays have been allocated -** for all tokens using fts3ExprAllocateSegReaders(). -*/ -static int fts3ExprCost(Fts3Expr *pExpr){ - int nCost; /* Return value */ - if( pExpr->eType==FTSQUERY_PHRASE ){ - Fts3Phrase *pPhrase = pExpr->pPhrase; - int ii; - nCost = 0; - for(ii=0; ii<pPhrase->nToken; ii++){ - Fts3SegReaderCursor *pSegcsr = pPhrase->aToken[ii].pSegcsr; - if( pSegcsr ) nCost += pSegcsr->nCost; - } - }else{ - nCost = fts3ExprCost(pExpr->pLeft) + fts3ExprCost(pExpr->pRight); - } - return nCost; -} - -/* -** The following is a helper function (and type) for fts3EvalExpr(). It -** must be called after Fts3SegReaders have been allocated for every token -** in the expression. See the context it is called from in fts3EvalExpr() -** for further explanation. -*/ -typedef struct ExprAndCost ExprAndCost; -struct ExprAndCost { - Fts3Expr *pExpr; - int nCost; -}; -static void fts3ExprAssignCosts( - Fts3Expr *pExpr, /* Expression to create seg-readers for */ - ExprAndCost **ppExprCost /* OUT: Write to *ppExprCost */ -){ - if( pExpr->eType==FTSQUERY_AND ){ - fts3ExprAssignCosts(pExpr->pLeft, ppExprCost); - fts3ExprAssignCosts(pExpr->pRight, ppExprCost); - }else{ - (*ppExprCost)->pExpr = pExpr; - (*ppExprCost)->nCost = fts3ExprCost(pExpr); - (*ppExprCost)++; - } -} - -/* -** Evaluate the full-text expression pExpr against FTS3 table pTab. Store -** the resulting doclist in *paOut and *pnOut. This routine mallocs for -** the space needed to store the output. The caller is responsible for -** freeing the space when it has finished. -** -** This function is called in two distinct contexts: -** -** * From within the virtual table xFilter() method. In this case, the -** output doclist contains entries for all rows in the table, based on -** data read from the full-text index. -** -** In this case, if the query expression contains one or more tokens that -** are very common, then the returned doclist may contain a superset of -** the documents that actually match the expression. -** -** * From within the virtual table xNext() method. This call is only made -** if the call from within xFilter() found that there were very common -** tokens in the query expression and did return a superset of the -** matching documents. In this case the returned doclist contains only -** entries that correspond to the current row of the table. Instead of -** reading the data for each token from the full-text index, the data is -** already available in-memory in the Fts3PhraseToken.pDeferred structures. -** See fts3EvalDeferred() for how it gets there. -** -** In the first case above, Fts3Cursor.doDeferred==0. In the second (if it is -** required) Fts3Cursor.doDeferred==1. -** -** If the SQLite invokes the snippet(), offsets() or matchinfo() function -** as part of a SELECT on an FTS3 table, this function is called on each -** individual phrase expression in the query. If there were very common tokens -** found in the xFilter() call, then this function is called once for phrase -** for each row visited, and the returned doclist contains entries for the -** current row only. Otherwise, if there were no very common tokens, then this -** function is called once only for each phrase in the query and the returned -** doclist contains entries for all rows of the table. -** -** Fts3Cursor.doDeferred==1 when this function is called on phrases as a -** result of a snippet(), offsets() or matchinfo() invocation. -*/ -static int fts3EvalExpr( - Fts3Cursor *p, /* Virtual table cursor handle */ - Fts3Expr *pExpr, /* Parsed fts3 expression */ - char **paOut, /* OUT: Pointer to malloc'd result buffer */ - int *pnOut, /* OUT: Size of buffer at *paOut */ - int isReqPos /* Require positions in output buffer */ -){ - int rc = SQLITE_OK; /* Return code */ - - /* Zero the output parameters. */ - *paOut = 0; - *pnOut = 0; - - if( pExpr ){ - assert( pExpr->eType==FTSQUERY_NEAR || pExpr->eType==FTSQUERY_OR - || pExpr->eType==FTSQUERY_AND || pExpr->eType==FTSQUERY_NOT - || pExpr->eType==FTSQUERY_PHRASE - ); - assert( pExpr->eType==FTSQUERY_PHRASE || isReqPos==0 ); - - if( pExpr->eType==FTSQUERY_PHRASE ){ - rc = fts3PhraseSelect(p, pExpr->pPhrase, - isReqPos || (pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR), - paOut, pnOut - ); - fts3ExprFreeSegReaders(pExpr); - }else if( p->eEvalmode==FTS3_EVAL_FILTER && pExpr->eType==FTSQUERY_AND ){ - ExprAndCost *aExpr = 0; /* Array of AND'd expressions and costs */ - int nExpr = 0; /* Size of aExpr[] */ - char *aRet = 0; /* Doclist to return to caller */ - int nRet = 0; /* Length of aRet[] in bytes */ - int nDoc = 0x7FFFFFFF; - - assert( !isReqPos ); - - rc = fts3ExprAllocateSegReaders(p, pExpr, &nExpr); - if( rc==SQLITE_OK ){ - assert( nExpr>1 ); - aExpr = sqlite3_malloc(sizeof(ExprAndCost) * nExpr); - if( !aExpr ) rc = SQLITE_NOMEM; - } - if( rc==SQLITE_OK ){ - int ii; /* Used to iterate through expressions */ - - fts3ExprAssignCosts(pExpr, &aExpr); - aExpr -= nExpr; - for(ii=0; ii<nExpr; ii++){ - char *aNew; - int nNew; - int jj; - ExprAndCost *pBest = 0; - - for(jj=0; jj<nExpr; jj++){ - ExprAndCost *pCand = &aExpr[jj]; - if( pCand->pExpr && (pBest==0 || pCand->nCost<pBest->nCost) ){ - pBest = pCand; - } - } - - if( pBest->nCost>nDoc ){ - rc = fts3DeferExpression(p, p->pExpr); - break; - }else{ - rc = fts3EvalExpr(p, pBest->pExpr, &aNew, &nNew, 0); - if( rc!=SQLITE_OK ) break; - pBest->pExpr = 0; - if( ii==0 ){ - aRet = aNew; - nRet = nNew; - nDoc = fts3DoclistCountDocids(0, aRet, nRet); - }else{ - fts3DoclistMerge( - MERGE_AND, 0, 0, aRet, &nRet, aRet, nRet, aNew, nNew, &nDoc - ); - sqlite3_free(aNew); - } - } - } - } - - if( rc==SQLITE_OK ){ - *paOut = aRet; - *pnOut = nRet; - }else{ - assert( *paOut==0 ); - sqlite3_free(aRet); - } - sqlite3_free(aExpr); - fts3ExprFreeSegReaders(pExpr); - - }else{ - char *aLeft; - char *aRight; - int nLeft; - int nRight; - - assert( pExpr->eType==FTSQUERY_NEAR - || pExpr->eType==FTSQUERY_OR - || pExpr->eType==FTSQUERY_NOT - || (pExpr->eType==FTSQUERY_AND && p->eEvalmode==FTS3_EVAL_NEXT) - ); - - if( 0==(rc = fts3EvalExpr(p, pExpr->pRight, &aRight, &nRight, isReqPos)) - && 0==(rc = fts3EvalExpr(p, pExpr->pLeft, &aLeft, &nLeft, isReqPos)) - ){ - switch( pExpr->eType ){ - case FTSQUERY_NEAR: { - Fts3Expr *pLeft; - Fts3Expr *pRight; - int mergetype = MERGE_NEAR; - if( pExpr->pParent && pExpr->pParent->eType==FTSQUERY_NEAR ){ - mergetype = MERGE_POS_NEAR; - } - pLeft = pExpr->pLeft; - while( pLeft->eType==FTSQUERY_NEAR ){ - pLeft=pLeft->pRight; - } - pRight = pExpr->pRight; - assert( pRight->eType==FTSQUERY_PHRASE ); - assert( pLeft->eType==FTSQUERY_PHRASE ); - - rc = fts3NearMerge(mergetype, pExpr->nNear, - pLeft->pPhrase->nToken, aLeft, nLeft, - pRight->pPhrase->nToken, aRight, nRight, - paOut, pnOut - ); - sqlite3_free(aLeft); - break; - } - - case FTSQUERY_OR: { - /* Allocate a buffer for the output. The maximum size is the - ** sum of the sizes of the two input buffers. The +1 term is - ** so that a buffer of zero bytes is never allocated - this can - ** cause fts3DoclistMerge() to incorrectly return SQLITE_NOMEM. - */ - char *aBuffer = sqlite3_malloc(nRight+nLeft+1); - rc = fts3DoclistMerge(MERGE_OR, 0, 0, aBuffer, pnOut, - aLeft, nLeft, aRight, nRight, 0 - ); - *paOut = aBuffer; - sqlite3_free(aLeft); - break; - } - - default: { - assert( FTSQUERY_NOT==MERGE_NOT && FTSQUERY_AND==MERGE_AND ); - fts3DoclistMerge(pExpr->eType, 0, 0, aLeft, pnOut, - aLeft, nLeft, aRight, nRight, 0 - ); - *paOut = aLeft; - break; - } - } - } - sqlite3_free(aRight); - } - } - - assert( rc==SQLITE_OK || *paOut==0 ); - return rc; -} +#ifndef SQLITE_AMALGAMATION +# define LARGEST_INT64 (0xffffffff|(((sqlite3_int64)0x7fffffff)<<32)) +# define SMALLEST_INT64 (((sqlite3_int64)-1) - LARGEST_INT64) +#endif /* -** This function is called from within xNext() for each row visited by -** an FTS3 query. If evaluating the FTS3 query expression within xFilter() -** was able to determine the exact set of matching rows, this function sets -** *pbRes to true and returns SQLITE_IO immediately. -** -** Otherwise, if evaluating the query expression within xFilter() returned a -** superset of the matching documents instead of an exact set (this happens -** when the query includes very common tokens and it is deemed too expensive to -** load their doclists from disk), this function tests if the current row -** really does match the FTS3 query. -** -** If an error occurs, an SQLite error code is returned. Otherwise, SQLITE_OK -** is returned and *pbRes is set to true if the current row matches the -** FTS3 query (and should be included in the results returned to SQLite), or -** false otherwise. +** If the numeric type of argument pVal is "integer", then return it +** converted to a 64-bit signed integer. Otherwise, return a copy of +** the second parameter, iDefault. */ -static int fts3EvalDeferred( - Fts3Cursor *pCsr, /* FTS3 cursor pointing at row to test */ - int *pbRes /* OUT: Set to true if row is a match */ -){ - int rc = SQLITE_OK; - if( pCsr->pDeferred==0 ){ - *pbRes = 1; - }else{ - rc = fts3CursorSeek(0, pCsr); - if( rc==SQLITE_OK ){ - sqlite3Fts3FreeDeferredDoclists(pCsr); - rc = sqlite3Fts3CacheDeferredDoclists(pCsr); - } - if( rc==SQLITE_OK ){ - char *a = 0; - int n = 0; - rc = fts3EvalExpr(pCsr, pCsr->pExpr, &a, &n, 0); - assert( n>=0 ); - *pbRes = (n>0); - sqlite3_free(a); +static sqlite3_int64 fts3DocidRange(sqlite3_value *pVal, i64 iDefault){ + if( pVal ){ + int eType = sqlite3_value_numeric_type(pVal); + if( eType==SQLITE_INTEGER ){ + return sqlite3_value_int64(pVal); } } - return rc; -} - -/* -** Advance the cursor to the next row in the %_content table that -** matches the search criteria. For a MATCH search, this will be -** the next row that matches. For a full-table scan, this will be -** simply the next row in the %_content table. For a docid lookup, -** this routine simply sets the EOF flag. -** -** Return SQLITE_OK if nothing goes wrong. SQLITE_OK is returned -** even if we reach end-of-file. The fts3EofMethod() will be called -** subsequently to determine whether or not an EOF was hit. -*/ -static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){ - int res; - int rc = SQLITE_OK; /* Return code */ - Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; - - pCsr->eEvalmode = FTS3_EVAL_NEXT; - do { - if( pCsr->aDoclist==0 ){ - if( SQLITE_ROW!=sqlite3_step(pCsr->pStmt) ){ - pCsr->isEof = 1; - rc = sqlite3_reset(pCsr->pStmt); - break; - } - pCsr->iPrevId = sqlite3_column_int64(pCsr->pStmt, 0); - }else{ - if( pCsr->pNextId>=&pCsr->aDoclist[pCsr->nDoclist] ){ - pCsr->isEof = 1; - break; - } - sqlite3_reset(pCsr->pStmt); - fts3GetDeltaVarint(&pCsr->pNextId, &pCsr->iPrevId); - pCsr->isRequireSeek = 1; - pCsr->isMatchinfoNeeded = 1; - } - }while( SQLITE_OK==(rc = fts3EvalDeferred(pCsr, &res)) && res==0 ); - - return rc; + return iDefault; } /* @@ -3033,52 +3087,71 @@ static int fts3FilterMethod( int nVal, /* Number of elements in apVal */ sqlite3_value **apVal /* Arguments for the indexing scheme */ ){ - const char *azSql[] = { - "SELECT %s FROM %Q.'%q_content' AS x WHERE docid = ?", /* non-full-scan */ - "SELECT %s FROM %Q.'%q_content' AS x ", /* full-scan */ - }; - int rc; /* Return code */ + int rc; char *zSql; /* SQL statement used to access %_content */ + int eSearch; Fts3Table *p = (Fts3Table *)pCursor->pVtab; Fts3Cursor *pCsr = (Fts3Cursor *)pCursor; + sqlite3_value *pCons = 0; /* The MATCH or rowid constraint, if any */ + sqlite3_value *pLangid = 0; /* The "langid = ?" constraint, if any */ + sqlite3_value *pDocidGe = 0; /* The "docid >= ?" constraint, if any */ + sqlite3_value *pDocidLe = 0; /* The "docid <= ?" constraint, if any */ + int iIdx; + UNUSED_PARAMETER(idxStr); UNUSED_PARAMETER(nVal); - assert( idxNum>=0 && idxNum<=(FTS3_FULLTEXT_SEARCH+p->nColumn) ); - assert( nVal==0 || nVal==1 ); - assert( (nVal==0)==(idxNum==FTS3_FULLSCAN_SEARCH) ); + eSearch = (idxNum & 0x0000FFFF); + assert( eSearch>=0 && eSearch<=(FTS3_FULLTEXT_SEARCH+p->nColumn) ); assert( p->pSegments==0 ); + /* Collect arguments into local variables */ + iIdx = 0; + if( eSearch!=FTS3_FULLSCAN_SEARCH ) pCons = apVal[iIdx++]; + if( idxNum & FTS3_HAVE_LANGID ) pLangid = apVal[iIdx++]; + if( idxNum & FTS3_HAVE_DOCID_GE ) pDocidGe = apVal[iIdx++]; + if( idxNum & FTS3_HAVE_DOCID_LE ) pDocidLe = apVal[iIdx++]; + assert( iIdx==nVal ); + /* In case the cursor has been used before, clear it now. */ sqlite3_finalize(pCsr->pStmt); sqlite3_free(pCsr->aDoclist); sqlite3Fts3ExprFree(pCsr->pExpr); memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor)); - if( idxNum!=FTS3_DOCID_SEARCH && idxNum!=FTS3_FULLSCAN_SEARCH ){ - int iCol = idxNum-FTS3_FULLTEXT_SEARCH; - const char *zQuery = (const char *)sqlite3_value_text(apVal[0]); + /* Set the lower and upper bounds on docids to return */ + pCsr->iMinDocid = fts3DocidRange(pDocidGe, SMALLEST_INT64); + pCsr->iMaxDocid = fts3DocidRange(pDocidLe, LARGEST_INT64); + + if( idxStr ){ + pCsr->bDesc = (idxStr[0]=='D'); + }else{ + pCsr->bDesc = p->bDescIdx; + } + pCsr->eSearch = (i16)eSearch; + + if( eSearch!=FTS3_DOCID_SEARCH && eSearch!=FTS3_FULLSCAN_SEARCH ){ + int iCol = eSearch-FTS3_FULLTEXT_SEARCH; + const char *zQuery = (const char *)sqlite3_value_text(pCons); - if( zQuery==0 && sqlite3_value_type(apVal[0])!=SQLITE_NULL ){ + if( zQuery==0 && sqlite3_value_type(pCons)!=SQLITE_NULL ){ return SQLITE_NOMEM; } - rc = sqlite3Fts3ExprParse(p->pTokenizer, p->azColumn, p->nColumn, - iCol, zQuery, -1, &pCsr->pExpr + pCsr->iLangid = 0; + if( pLangid ) pCsr->iLangid = sqlite3_value_int(pLangid); + + assert( p->base.zErrMsg==0 ); + rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid, + p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr, + &p->base.zErrMsg ); if( rc!=SQLITE_OK ){ - if( rc==SQLITE_ERROR ){ - p->base.zErrMsg = sqlite3_mprintf("malformed MATCH expression: [%s]", - zQuery); - } return rc; } - rc = sqlite3Fts3ReadLock(p); - if( rc!=SQLITE_OK ) return rc; - - rc = fts3EvalExpr(pCsr, pCsr->pExpr, &pCsr->aDoclist, &pCsr->nDoclist, 0); + rc = fts3EvalStart(pCsr); sqlite3Fts3SegmentsClose(p); if( rc!=SQLITE_OK ) return rc; pCsr->pNextId = pCsr->aDoclist; @@ -3090,20 +3163,25 @@ static int fts3FilterMethod( ** full-text query or docid lookup, the statement retrieves a single ** row by docid. */ - zSql = (char *)azSql[idxNum==FTS3_FULLSCAN_SEARCH]; - zSql = sqlite3_mprintf(zSql, p->zReadExprlist, p->zDb, p->zName); - if( !zSql ){ - rc = SQLITE_NOMEM; - }else{ - rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0); - sqlite3_free(zSql); - } - if( rc==SQLITE_OK && idxNum==FTS3_DOCID_SEARCH ){ - rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]); + if( eSearch==FTS3_FULLSCAN_SEARCH ){ + zSql = sqlite3_mprintf( + "SELECT %s ORDER BY rowid %s", + p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC") + ); + if( zSql ){ + rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0); + sqlite3_free(zSql); + }else{ + rc = SQLITE_NOMEM; + } + }else if( eSearch==FTS3_DOCID_SEARCH ){ + rc = fts3CursorSeekStmt(pCsr, &pCsr->pStmt); + if( rc==SQLITE_OK ){ + rc = sqlite3_bind_value(pCsr->pStmt, 1, pCons); + } } - pCsr->eSearch = (i16)idxNum; - if( rc!=SQLITE_OK ) return rc; + return fts3NextMethod(pCursor); } @@ -3123,54 +3201,63 @@ static int fts3EofMethod(sqlite3_vtab_cursor *pCursor){ */ static int fts3RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ Fts3Cursor *pCsr = (Fts3Cursor *) pCursor; - if( pCsr->aDoclist ){ - *pRowid = pCsr->iPrevId; - }else{ - /* This branch runs if the query is implemented using a full-table scan - ** (not using the full-text index). In this case grab the rowid from the - ** SELECT statement. - */ - assert( pCsr->isRequireSeek==0 ); - *pRowid = sqlite3_column_int64(pCsr->pStmt, 0); - } + *pRowid = pCsr->iPrevId; return SQLITE_OK; } /* ** This is the xColumn method, called by SQLite to request a value from ** the row that the supplied cursor currently points to. +** +** If: +** +** (iCol < p->nColumn) -> The value of the iCol'th user column. +** (iCol == p->nColumn) -> Magic column with the same name as the table. +** (iCol == p->nColumn+1) -> Docid column +** (iCol == p->nColumn+2) -> Langid column */ static int fts3ColumnMethod( sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ - sqlite3_context *pContext, /* Context for sqlite3_result_xxx() calls */ + sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ int iCol /* Index of column to read value from */ ){ - int rc; /* Return Code */ + int rc = SQLITE_OK; /* Return Code */ Fts3Cursor *pCsr = (Fts3Cursor *) pCursor; Fts3Table *p = (Fts3Table *)pCursor->pVtab; /* The column value supplied by SQLite must be in range. */ - assert( iCol>=0 && iCol<=p->nColumn+1 ); + assert( iCol>=0 && iCol<=p->nColumn+2 ); if( iCol==p->nColumn+1 ){ /* This call is a request for the "docid" column. Since "docid" is an ** alias for "rowid", use the xRowid() method to obtain the value. */ - sqlite3_int64 iRowid; - rc = fts3RowidMethod(pCursor, &iRowid); - sqlite3_result_int64(pContext, iRowid); + sqlite3_result_int64(pCtx, pCsr->iPrevId); }else if( iCol==p->nColumn ){ /* The extra column whose name is the same as the table. - ** Return a blob which is a pointer to the cursor. - */ - sqlite3_result_blob(pContext, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT); - rc = SQLITE_OK; + ** Return a blob which is a pointer to the cursor. */ + sqlite3_result_blob(pCtx, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT); + }else if( iCol==p->nColumn+2 && pCsr->pExpr ){ + sqlite3_result_int64(pCtx, pCsr->iLangid); }else{ + /* The requested column is either a user column (one that contains + ** indexed data), or the language-id column. */ rc = fts3CursorSeek(0, pCsr); + if( rc==SQLITE_OK ){ - sqlite3_result_value(pContext, sqlite3_column_value(pCsr->pStmt, iCol+1)); + if( iCol==p->nColumn+2 ){ + int iLangid = 0; + if( p->zLanguageid ){ + iLangid = sqlite3_column_int(pCsr->pStmt, p->nColumn+1); + } + sqlite3_result_int(pCtx, iLangid); + }else if( sqlite3_data_count(pCsr->pStmt)>(iCol+1) ){ + sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1)); + } } } + + assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); return rc; } @@ -3193,8 +3280,42 @@ static int fts3UpdateMethod( ** hash-table to the database. */ static int fts3SyncMethod(sqlite3_vtab *pVtab){ - int rc = sqlite3Fts3PendingTermsFlush((Fts3Table *)pVtab); - sqlite3Fts3SegmentsClose((Fts3Table *)pVtab); + + /* Following an incremental-merge operation, assuming that the input + ** segments are not completely consumed (the usual case), they are updated + ** in place to remove the entries that have already been merged. This + ** involves updating the leaf block that contains the smallest unmerged + ** entry and each block (if any) between the leaf and the root node. So + ** if the height of the input segment b-trees is N, and input segments + ** are merged eight at a time, updating the input segments at the end + ** of an incremental-merge requires writing (8*(1+N)) blocks. N is usually + ** small - often between 0 and 2. So the overhead of the incremental + ** merge is somewhere between 8 and 24 blocks. To avoid this overhead + ** dwarfing the actual productive work accomplished, the incremental merge + ** is only attempted if it will write at least 64 leaf blocks. Hence + ** nMinMerge. + ** + ** Of course, updating the input segments also involves deleting a bunch + ** of blocks from the segments table. But this is not considered overhead + ** as it would also be required by a crisis-merge that used the same input + ** segments. + */ + const u32 nMinMerge = 64; /* Minimum amount of incr-merge work to do */ + + Fts3Table *p = (Fts3Table*)pVtab; + int rc = sqlite3Fts3PendingTermsFlush(p); + + if( rc==SQLITE_OK && p->bAutoincrmerge==1 && p->nLeafAdd>(nMinMerge/16) ){ + int mxLevel = 0; /* Maximum relative level value in db */ + int A; /* Incr-merge parameter A */ + + rc = sqlite3Fts3MaxLevel(p, &mxLevel); + assert( rc==SQLITE_OK || mxLevel==0 ); + A = p->nLeafAdd * mxLevel; + A += (A/2); + if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, 8); + } + sqlite3Fts3SegmentsClose(p); return rc; } @@ -3202,8 +3323,14 @@ static int fts3SyncMethod(sqlite3_vtab *pVtab){ ** Implementation of xBegin() method. This is a no-op. */ static int fts3BeginMethod(sqlite3_vtab *pVtab){ + Fts3Table *p = (Fts3Table*)pVtab; UNUSED_PARAMETER(pVtab); - assert( ((Fts3Table *)pVtab)->nPendingData==0 ); + assert( p->pSegments==0 ); + assert( p->nPendingData==0 ); + assert( p->inTransaction!=1 ); + TESTONLY( p->inTransaction = 1 ); + TESTONLY( p->mxSavepoint = -1; ); + p->nLeafAdd = 0; return SQLITE_OK; } @@ -3213,8 +3340,13 @@ static int fts3BeginMethod(sqlite3_vtab *pVtab){ ** by fts3SyncMethod(). */ static int fts3CommitMethod(sqlite3_vtab *pVtab){ + TESTONLY( Fts3Table *p = (Fts3Table*)pVtab ); UNUSED_PARAMETER(pVtab); - assert( ((Fts3Table *)pVtab)->nPendingData==0 ); + assert( p->nPendingData==0 ); + assert( p->inTransaction!=0 ); + assert( p->pSegments==0 ); + TESTONLY( p->inTransaction = 0 ); + TESTONLY( p->mxSavepoint = -1; ); return SQLITE_OK; } @@ -3223,93 +3355,31 @@ static int fts3CommitMethod(sqlite3_vtab *pVtab){ ** hash-table. Any changes made to the database are reverted by SQLite. */ static int fts3RollbackMethod(sqlite3_vtab *pVtab){ - sqlite3Fts3PendingTermsClear((Fts3Table *)pVtab); + Fts3Table *p = (Fts3Table*)pVtab; + sqlite3Fts3PendingTermsClear(p); + assert( p->inTransaction!=0 ); + TESTONLY( p->inTransaction = 0 ); + TESTONLY( p->mxSavepoint = -1; ); return SQLITE_OK; } /* -** Load the doclist associated with expression pExpr to pExpr->aDoclist. -** The loaded doclist contains positions as well as the document ids. -** This is used by the matchinfo(), snippet() and offsets() auxillary -** functions. +** When called, *ppPoslist must point to the byte immediately following the +** end of a position-list. i.e. ( (*ppPoslist)[-1]==POS_END ). This function +** moves *ppPoslist so that it instead points to the first byte of the +** same position list. */ -int sqlite3Fts3ExprLoadDoclist(Fts3Cursor *pCsr, Fts3Expr *pExpr){ - int rc; - assert( pExpr->eType==FTSQUERY_PHRASE && pExpr->pPhrase ); - assert( pCsr->eEvalmode==FTS3_EVAL_NEXT ); - rc = fts3EvalExpr(pCsr, pExpr, &pExpr->aDoclist, &pExpr->nDoclist, 1); - return rc; -} - -int sqlite3Fts3ExprLoadFtDoclist( - Fts3Cursor *pCsr, - Fts3Expr *pExpr, - char **paDoclist, - int *pnDoclist -){ - int rc; - assert( pCsr->eEvalmode==FTS3_EVAL_NEXT ); - assert( pExpr->eType==FTSQUERY_PHRASE && pExpr->pPhrase ); - pCsr->eEvalmode = FTS3_EVAL_MATCHINFO; - rc = fts3EvalExpr(pCsr, pExpr, paDoclist, pnDoclist, 1); - pCsr->eEvalmode = FTS3_EVAL_NEXT; - return rc; -} +static void fts3ReversePoslist(char *pStart, char **ppPoslist){ + char *p = &(*ppPoslist)[-2]; + char c = 0; -/* -** After ExprLoadDoclist() (see above) has been called, this function is -** used to iterate/search through the position lists that make up the doclist -** stored in pExpr->aDoclist. -*/ -char *sqlite3Fts3FindPositions( - Fts3Expr *pExpr, /* Access this expressions doclist */ - sqlite3_int64 iDocid, /* Docid associated with requested pos-list */ - int iCol /* Column of requested pos-list */ -){ - assert( pExpr->isLoaded ); - if( pExpr->aDoclist ){ - char *pEnd = &pExpr->aDoclist[pExpr->nDoclist]; - char *pCsr; - - if( pExpr->pCurrent==0 ){ - pExpr->pCurrent = pExpr->aDoclist; - pExpr->iCurrent = 0; - pExpr->pCurrent += sqlite3Fts3GetVarint(pExpr->pCurrent,&pExpr->iCurrent); - } - pCsr = pExpr->pCurrent; - assert( pCsr ); - - while( pCsr<pEnd ){ - if( pExpr->iCurrent<iDocid ){ - fts3PoslistCopy(0, &pCsr); - if( pCsr<pEnd ){ - fts3GetDeltaVarint(&pCsr, &pExpr->iCurrent); - } - pExpr->pCurrent = pCsr; - }else{ - if( pExpr->iCurrent==iDocid ){ - int iThis = 0; - if( iCol<0 ){ - /* If iCol is negative, return a pointer to the start of the - ** position-list (instead of a pointer to the start of a list - ** of offsets associated with a specific column). - */ - return pCsr; - } - while( iThis<iCol ){ - fts3ColumnlistCopy(0, &pCsr); - if( *pCsr==0x00 ) return 0; - pCsr++; - pCsr += sqlite3Fts3GetVarint32(pCsr, &iThis); - } - if( iCol==iThis && (*pCsr&0xFE) ) return pCsr; - } - return 0; - } - } + while( p>pStart && (c=*p--)==0 ); + while( p>pStart && (*p & 0x80) | c ){ + c = *p--; } - - return 0; + if( p>pStart ){ p = &p[2]; } + while( *p++&0x80 ); + *ppPoslist = p; } /* @@ -3510,15 +3580,22 @@ static int fts3RenameMethod( sqlite3 *db = p->db; /* Database connection */ int rc; /* Return Code */ + /* As it happens, the pending terms table is always empty here. This is + ** because an "ALTER TABLE RENAME TABLE" statement inside a transaction + ** always opens a savepoint transaction. And the xSavepoint() method + ** flushes the pending terms table. But leave the (no-op) call to + ** PendingTermsFlush() in in case that changes. + */ + assert( p->nPendingData==0 ); rc = sqlite3Fts3PendingTermsFlush(p); - if( rc!=SQLITE_OK ){ - return rc; + + if( p->zContentTbl==0 ){ + fts3DbExec(&rc, db, + "ALTER TABLE %Q.'%q_content' RENAME TO '%q_content';", + p->zDb, p->zName, zName + ); } - fts3DbExec(&rc, db, - "ALTER TABLE %Q.'%q_content' RENAME TO '%q_content';", - p->zDb, p->zName, zName - ); if( p->bHasDocsize ){ fts3DbExec(&rc, db, "ALTER TABLE %Q.'%q_docsize' RENAME TO '%q_docsize';", @@ -3542,8 +3619,55 @@ static int fts3RenameMethod( return rc; } +/* +** The xSavepoint() method. +** +** Flush the contents of the pending-terms table to disk. +*/ +static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){ + int rc = SQLITE_OK; + UNUSED_PARAMETER(iSavepoint); + assert( ((Fts3Table *)pVtab)->inTransaction ); + assert( ((Fts3Table *)pVtab)->mxSavepoint < iSavepoint ); + TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint ); + if( ((Fts3Table *)pVtab)->bIgnoreSavepoint==0 ){ + rc = fts3SyncMethod(pVtab); + } + return rc; +} + +/* +** The xRelease() method. +** +** This is a no-op. +*/ +static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){ + TESTONLY( Fts3Table *p = (Fts3Table*)pVtab ); + UNUSED_PARAMETER(iSavepoint); + UNUSED_PARAMETER(pVtab); + assert( p->inTransaction ); + assert( p->mxSavepoint >= iSavepoint ); + TESTONLY( p->mxSavepoint = iSavepoint-1 ); + return SQLITE_OK; +} + +/* +** The xRollbackTo() method. +** +** Discard the contents of the pending terms table. +*/ +static int fts3RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){ + Fts3Table *p = (Fts3Table*)pVtab; + UNUSED_PARAMETER(iSavepoint); + assert( p->inTransaction ); + assert( p->mxSavepoint >= iSavepoint ); + TESTONLY( p->mxSavepoint = iSavepoint ); + sqlite3Fts3PendingTermsClear(p); + return SQLITE_OK; +} + static const sqlite3_module fts3Module = { - /* iVersion */ 0, + /* iVersion */ 2, /* xCreate */ fts3CreateMethod, /* xConnect */ fts3ConnectMethod, /* xBestIndex */ fts3BestIndexMethod, @@ -3563,6 +3687,9 @@ static const sqlite3_module fts3Module = { /* xRollback */ fts3RollbackMethod, /* xFindFunction */ fts3FindFunctionMethod, /* xRename */ fts3RenameMethod, + /* xSavepoint */ fts3SavepointMethod, + /* xRelease */ fts3ReleaseMethod, + /* xRollbackTo */ fts3RollbackToMethod, }; /* @@ -3588,12 +3715,15 @@ static void hashDestroy(void *p){ */ void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule); +#ifdef SQLITE_ENABLE_FTS4_UNICODE61 +void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**ppModule); +#endif #ifdef SQLITE_ENABLE_ICU void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule); #endif /* -** Initialise the fts3 extension. If this extension is built as part +** Initialize the fts3 extension. If this extension is built as part ** of the sqlite library, then this function is called directly by ** SQLite. If fts3 is built as a dynamically loadable extension, this ** function is called by the sqlite3_extension_init() entry point. @@ -3603,19 +3733,31 @@ int sqlite3Fts3Init(sqlite3 *db){ Fts3Hash *pHash = 0; const sqlite3_tokenizer_module *pSimple = 0; const sqlite3_tokenizer_module *pPorter = 0; +#ifdef SQLITE_ENABLE_FTS4_UNICODE61 + const sqlite3_tokenizer_module *pUnicode = 0; +#endif #ifdef SQLITE_ENABLE_ICU const sqlite3_tokenizer_module *pIcu = 0; sqlite3Fts3IcuTokenizerModule(&pIcu); #endif +#ifdef SQLITE_ENABLE_FTS4_UNICODE61 + sqlite3Fts3UnicodeTokenizer(&pUnicode); +#endif + +#ifdef SQLITE_TEST + rc = sqlite3Fts3InitTerm(db); + if( rc!=SQLITE_OK ) return rc; +#endif + rc = sqlite3Fts3InitAux(db); if( rc!=SQLITE_OK ) return rc; sqlite3Fts3SimpleTokenizerModule(&pSimple); sqlite3Fts3PorterTokenizerModule(&pPorter); - /* Allocate and initialise the hash-table used to store tokenizers. */ + /* Allocate and initialize the hash-table used to store tokenizers. */ pHash = sqlite3_malloc(sizeof(Fts3Hash)); if( !pHash ){ rc = SQLITE_NOMEM; @@ -3627,6 +3769,10 @@ int sqlite3Fts3Init(sqlite3 *db){ if( rc==SQLITE_OK ){ if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple) || sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter) + +#ifdef SQLITE_ENABLE_FTS4_UNICODE61 + || sqlite3Fts3HashInsert(pHash, "unicode61", 10, (void *)pUnicode) +#endif #ifdef SQLITE_ENABLE_ICU || (pIcu && sqlite3Fts3HashInsert(pHash, "icu", 4, (void *)pIcu)) #endif @@ -3661,9 +3807,13 @@ int sqlite3Fts3Init(sqlite3 *db){ db, "fts4", &fts3Module, (void *)pHash, 0 ); } + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3InitTok(db, (void *)pHash); + } return rc; } + /* An error has occurred. Delete the hash table and return the error code. */ assert( rc!=SQLITE_OK ); if( pHash ){ @@ -3673,8 +3823,1935 @@ int sqlite3Fts3Init(sqlite3 *db){ return rc; } +/* +** Allocate an Fts3MultiSegReader for each token in the expression headed +** by pExpr. +** +** An Fts3SegReader object is a cursor that can seek or scan a range of +** entries within a single segment b-tree. An Fts3MultiSegReader uses multiple +** Fts3SegReader objects internally to provide an interface to seek or scan +** within the union of all segments of a b-tree. Hence the name. +** +** If the allocated Fts3MultiSegReader just seeks to a single entry in a +** segment b-tree (if the term is not a prefix or it is a prefix for which +** there exists prefix b-tree of the right length) then it may be traversed +** and merged incrementally. Otherwise, it has to be merged into an in-memory +** doclist and then traversed. +*/ +static void fts3EvalAllocateReaders( + Fts3Cursor *pCsr, /* FTS cursor handle */ + Fts3Expr *pExpr, /* Allocate readers for this expression */ + int *pnToken, /* OUT: Total number of tokens in phrase. */ + int *pnOr, /* OUT: Total number of OR nodes in expr. */ + int *pRc /* IN/OUT: Error code */ +){ + if( pExpr && SQLITE_OK==*pRc ){ + if( pExpr->eType==FTSQUERY_PHRASE ){ + int i; + int nToken = pExpr->pPhrase->nToken; + *pnToken += nToken; + for(i=0; i<nToken; i++){ + Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i]; + int rc = fts3TermSegReaderCursor(pCsr, + pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr + ); + if( rc!=SQLITE_OK ){ + *pRc = rc; + return; + } + } + assert( pExpr->pPhrase->iDoclistToken==0 ); + pExpr->pPhrase->iDoclistToken = -1; + }else{ + *pnOr += (pExpr->eType==FTSQUERY_OR); + fts3EvalAllocateReaders(pCsr, pExpr->pLeft, pnToken, pnOr, pRc); + fts3EvalAllocateReaders(pCsr, pExpr->pRight, pnToken, pnOr, pRc); + } + } +} + +/* +** Arguments pList/nList contain the doclist for token iToken of phrase p. +** It is merged into the main doclist stored in p->doclist.aAll/nAll. +** +** This function assumes that pList points to a buffer allocated using +** sqlite3_malloc(). This function takes responsibility for eventually +** freeing the buffer. +*/ +static void fts3EvalPhraseMergeToken( + Fts3Table *pTab, /* FTS Table pointer */ + Fts3Phrase *p, /* Phrase to merge pList/nList into */ + int iToken, /* Token pList/nList corresponds to */ + char *pList, /* Pointer to doclist */ + int nList /* Number of bytes in pList */ +){ + assert( iToken!=p->iDoclistToken ); + + if( pList==0 ){ + sqlite3_free(p->doclist.aAll); + p->doclist.aAll = 0; + p->doclist.nAll = 0; + } + + else if( p->iDoclistToken<0 ){ + p->doclist.aAll = pList; + p->doclist.nAll = nList; + } + + else if( p->doclist.aAll==0 ){ + sqlite3_free(pList); + } + + else { + char *pLeft; + char *pRight; + int nLeft; + int nRight; + int nDiff; + + if( p->iDoclistToken<iToken ){ + pLeft = p->doclist.aAll; + nLeft = p->doclist.nAll; + pRight = pList; + nRight = nList; + nDiff = iToken - p->iDoclistToken; + }else{ + pRight = p->doclist.aAll; + nRight = p->doclist.nAll; + pLeft = pList; + nLeft = nList; + nDiff = p->iDoclistToken - iToken; + } + + fts3DoclistPhraseMerge(pTab->bDescIdx, nDiff, pLeft, nLeft, pRight,&nRight); + sqlite3_free(pLeft); + p->doclist.aAll = pRight; + p->doclist.nAll = nRight; + } + + if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken; +} + +/* +** Load the doclist for phrase p into p->doclist.aAll/nAll. The loaded doclist +** does not take deferred tokens into account. +** +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. +*/ +static int fts3EvalPhraseLoad( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Phrase *p /* Phrase object */ +){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int iToken; + int rc = SQLITE_OK; + + for(iToken=0; rc==SQLITE_OK && iToken<p->nToken; iToken++){ + Fts3PhraseToken *pToken = &p->aToken[iToken]; + assert( pToken->pDeferred==0 || pToken->pSegcsr==0 ); + + if( pToken->pSegcsr ){ + int nThis = 0; + char *pThis = 0; + rc = fts3TermSelect(pTab, pToken, p->iColumn, &nThis, &pThis); + if( rc==SQLITE_OK ){ + fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis); + } + } + assert( pToken->pSegcsr==0 ); + } + + return rc; +} + +/* +** This function is called on each phrase after the position lists for +** any deferred tokens have been loaded into memory. It updates the phrases +** current position list to include only those positions that are really +** instances of the phrase (after considering deferred tokens). If this +** means that the phrase does not appear in the current row, doclist.pList +** and doclist.nList are both zeroed. +** +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. +*/ +static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){ + int iToken; /* Used to iterate through phrase tokens */ + char *aPoslist = 0; /* Position list for deferred tokens */ + int nPoslist = 0; /* Number of bytes in aPoslist */ + int iPrev = -1; /* Token number of previous deferred token */ + + assert( pPhrase->doclist.bFreeList==0 ); + + for(iToken=0; iToken<pPhrase->nToken; iToken++){ + Fts3PhraseToken *pToken = &pPhrase->aToken[iToken]; + Fts3DeferredToken *pDeferred = pToken->pDeferred; + + if( pDeferred ){ + char *pList; + int nList; + int rc = sqlite3Fts3DeferredTokenList(pDeferred, &pList, &nList); + if( rc!=SQLITE_OK ) return rc; + + if( pList==0 ){ + sqlite3_free(aPoslist); + pPhrase->doclist.pList = 0; + pPhrase->doclist.nList = 0; + return SQLITE_OK; + + }else if( aPoslist==0 ){ + aPoslist = pList; + nPoslist = nList; + + }else{ + char *aOut = pList; + char *p1 = aPoslist; + char *p2 = aOut; + + assert( iPrev>=0 ); + fts3PoslistPhraseMerge(&aOut, iToken-iPrev, 0, 1, &p1, &p2); + sqlite3_free(aPoslist); + aPoslist = pList; + nPoslist = (int)(aOut - aPoslist); + if( nPoslist==0 ){ + sqlite3_free(aPoslist); + pPhrase->doclist.pList = 0; + pPhrase->doclist.nList = 0; + return SQLITE_OK; + } + } + iPrev = iToken; + } + } + + if( iPrev>=0 ){ + int nMaxUndeferred = pPhrase->iDoclistToken; + if( nMaxUndeferred<0 ){ + pPhrase->doclist.pList = aPoslist; + pPhrase->doclist.nList = nPoslist; + pPhrase->doclist.iDocid = pCsr->iPrevId; + pPhrase->doclist.bFreeList = 1; + }else{ + int nDistance; + char *p1; + char *p2; + char *aOut; + + if( nMaxUndeferred>iPrev ){ + p1 = aPoslist; + p2 = pPhrase->doclist.pList; + nDistance = nMaxUndeferred - iPrev; + }else{ + p1 = pPhrase->doclist.pList; + p2 = aPoslist; + nDistance = iPrev - nMaxUndeferred; + } + + aOut = (char *)sqlite3_malloc(nPoslist+8); + if( !aOut ){ + sqlite3_free(aPoslist); + return SQLITE_NOMEM; + } + + pPhrase->doclist.pList = aOut; + if( fts3PoslistPhraseMerge(&aOut, nDistance, 0, 1, &p1, &p2) ){ + pPhrase->doclist.bFreeList = 1; + pPhrase->doclist.nList = (int)(aOut - pPhrase->doclist.pList); + }else{ + sqlite3_free(aOut); + pPhrase->doclist.pList = 0; + pPhrase->doclist.nList = 0; + } + sqlite3_free(aPoslist); + } + } + + return SQLITE_OK; +} + +/* +** Maximum number of tokens a phrase may have to be considered for the +** incremental doclists strategy. +*/ +#define MAX_INCR_PHRASE_TOKENS 4 + +/* +** This function is called for each Fts3Phrase in a full-text query +** expression to initialize the mechanism for returning rows. Once this +** function has been called successfully on an Fts3Phrase, it may be +** used with fts3EvalPhraseNext() to iterate through the matching docids. +** +** If parameter bOptOk is true, then the phrase may (or may not) use the +** incremental loading strategy. Otherwise, the entire doclist is loaded into +** memory within this call. +** +** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code. +*/ +static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc = SQLITE_OK; /* Error code */ + int i; + + /* Determine if doclists may be loaded from disk incrementally. This is + ** possible if the bOptOk argument is true, the FTS doclists will be + ** scanned in forward order, and the phrase consists of + ** MAX_INCR_PHRASE_TOKENS or fewer tokens, none of which are are "^first" + ** tokens or prefix tokens that cannot use a prefix-index. */ + int bHaveIncr = 0; + int bIncrOk = (bOptOk + && pCsr->bDesc==pTab->bDescIdx + && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 + && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 +#ifdef SQLITE_TEST + && pTab->bNoIncrDoclist==0 +#endif + ); + for(i=0; bIncrOk==1 && i<p->nToken; i++){ + Fts3PhraseToken *pToken = &p->aToken[i]; + if( pToken->bFirst || (pToken->pSegcsr!=0 && !pToken->pSegcsr->bLookup) ){ + bIncrOk = 0; + } + if( pToken->pSegcsr ) bHaveIncr = 1; + } + + if( bIncrOk && bHaveIncr ){ + /* Use the incremental approach. */ + int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn); + for(i=0; rc==SQLITE_OK && i<p->nToken; i++){ + Fts3PhraseToken *pToken = &p->aToken[i]; + Fts3MultiSegReader *pSegcsr = pToken->pSegcsr; + if( pSegcsr ){ + rc = sqlite3Fts3MsrIncrStart(pTab, pSegcsr, iCol, pToken->z, pToken->n); + } + } + p->bIncr = 1; + }else{ + /* Load the full doclist for the phrase into memory. */ + rc = fts3EvalPhraseLoad(pCsr, p); + p->bIncr = 0; + } + + assert( rc!=SQLITE_OK || p->nToken<1 || p->aToken[0].pSegcsr==0 || p->bIncr ); + return rc; +} + +/* +** This function is used to iterate backwards (from the end to start) +** through doclists. It is used by this module to iterate through phrase +** doclists in reverse and by the fts3_write.c module to iterate through +** pending-terms lists when writing to databases with "order=desc". +** +** The doclist may be sorted in ascending (parameter bDescIdx==0) or +** descending (parameter bDescIdx==1) order of docid. Regardless, this +** function iterates from the end of the doclist to the beginning. +*/ +void sqlite3Fts3DoclistPrev( + int bDescIdx, /* True if the doclist is desc */ + char *aDoclist, /* Pointer to entire doclist */ + int nDoclist, /* Length of aDoclist in bytes */ + char **ppIter, /* IN/OUT: Iterator pointer */ + sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */ + int *pnList, /* OUT: List length pointer */ + u8 *pbEof /* OUT: End-of-file flag */ +){ + char *p = *ppIter; + + assert( nDoclist>0 ); + assert( *pbEof==0 ); + assert( p || *piDocid==0 ); + assert( !p || (p>aDoclist && p<&aDoclist[nDoclist]) ); + + if( p==0 ){ + sqlite3_int64 iDocid = 0; + char *pNext = 0; + char *pDocid = aDoclist; + char *pEnd = &aDoclist[nDoclist]; + int iMul = 1; + + while( pDocid<pEnd ){ + sqlite3_int64 iDelta; + pDocid += sqlite3Fts3GetVarint(pDocid, &iDelta); + iDocid += (iMul * iDelta); + pNext = pDocid; + fts3PoslistCopy(0, &pDocid); + while( pDocid<pEnd && *pDocid==0 ) pDocid++; + iMul = (bDescIdx ? -1 : 1); + } + + *pnList = (int)(pEnd - pNext); + *ppIter = pNext; + *piDocid = iDocid; + }else{ + int iMul = (bDescIdx ? -1 : 1); + sqlite3_int64 iDelta; + fts3GetReverseVarint(&p, aDoclist, &iDelta); + *piDocid -= (iMul * iDelta); + + if( p==aDoclist ){ + *pbEof = 1; + }else{ + char *pSave = p; + fts3ReversePoslist(aDoclist, &p); + *pnList = (int)(pSave - p); + } + *ppIter = p; + } +} + +/* +** Iterate forwards through a doclist. +*/ +void sqlite3Fts3DoclistNext( + int bDescIdx, /* True if the doclist is desc */ + char *aDoclist, /* Pointer to entire doclist */ + int nDoclist, /* Length of aDoclist in bytes */ + char **ppIter, /* IN/OUT: Iterator pointer */ + sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */ + u8 *pbEof /* OUT: End-of-file flag */ +){ + char *p = *ppIter; + + assert( nDoclist>0 ); + assert( *pbEof==0 ); + assert( p || *piDocid==0 ); + assert( !p || (p>=aDoclist && p<=&aDoclist[nDoclist]) ); + + if( p==0 ){ + p = aDoclist; + p += sqlite3Fts3GetVarint(p, piDocid); + }else{ + fts3PoslistCopy(0, &p); + if( p>=&aDoclist[nDoclist] ){ + *pbEof = 1; + }else{ + sqlite3_int64 iVar; + p += sqlite3Fts3GetVarint(p, &iVar); + *piDocid += ((bDescIdx ? -1 : 1) * iVar); + } + } + + *ppIter = p; +} + +/* +** Advance the iterator pDL to the next entry in pDL->aAll/nAll. Set *pbEof +** to true if EOF is reached. +*/ +static void fts3EvalDlPhraseNext( + Fts3Table *pTab, + Fts3Doclist *pDL, + u8 *pbEof +){ + char *pIter; /* Used to iterate through aAll */ + char *pEnd = &pDL->aAll[pDL->nAll]; /* 1 byte past end of aAll */ + + if( pDL->pNextDocid ){ + pIter = pDL->pNextDocid; + }else{ + pIter = pDL->aAll; + } + + if( pIter>=pEnd ){ + /* We have already reached the end of this doclist. EOF. */ + *pbEof = 1; + }else{ + sqlite3_int64 iDelta; + pIter += sqlite3Fts3GetVarint(pIter, &iDelta); + if( pTab->bDescIdx==0 || pDL->pNextDocid==0 ){ + pDL->iDocid += iDelta; + }else{ + pDL->iDocid -= iDelta; + } + pDL->pList = pIter; + fts3PoslistCopy(0, &pIter); + pDL->nList = (int)(pIter - pDL->pList); + + /* pIter now points just past the 0x00 that terminates the position- + ** list for document pDL->iDocid. However, if this position-list was + ** edited in place by fts3EvalNearTrim(), then pIter may not actually + ** point to the start of the next docid value. The following line deals + ** with this case by advancing pIter past the zero-padding added by + ** fts3EvalNearTrim(). */ + while( pIter<pEnd && *pIter==0 ) pIter++; + + pDL->pNextDocid = pIter; + assert( pIter>=&pDL->aAll[pDL->nAll] || *pIter ); + *pbEof = 0; + } +} + +/* +** Helper type used by fts3EvalIncrPhraseNext() and incrPhraseTokenNext(). +*/ +typedef struct TokenDoclist TokenDoclist; +struct TokenDoclist { + int bIgnore; + sqlite3_int64 iDocid; + char *pList; + int nList; +}; + +/* +** Token pToken is an incrementally loaded token that is part of a +** multi-token phrase. Advance it to the next matching document in the +** database and populate output variable *p with the details of the new +** entry. Or, if the iterator has reached EOF, set *pbEof to true. +** +** If an error occurs, return an SQLite error code. Otherwise, return +** SQLITE_OK. +*/ +static int incrPhraseTokenNext( + Fts3Table *pTab, /* Virtual table handle */ + Fts3Phrase *pPhrase, /* Phrase to advance token of */ + int iToken, /* Specific token to advance */ + TokenDoclist *p, /* OUT: Docid and doclist for new entry */ + u8 *pbEof /* OUT: True if iterator is at EOF */ +){ + int rc = SQLITE_OK; + + if( pPhrase->iDoclistToken==iToken ){ + assert( p->bIgnore==0 ); + assert( pPhrase->aToken[iToken].pSegcsr==0 ); + fts3EvalDlPhraseNext(pTab, &pPhrase->doclist, pbEof); + p->pList = pPhrase->doclist.pList; + p->nList = pPhrase->doclist.nList; + p->iDocid = pPhrase->doclist.iDocid; + }else{ + Fts3PhraseToken *pToken = &pPhrase->aToken[iToken]; + assert( pToken->pDeferred==0 ); + assert( pToken->pSegcsr || pPhrase->iDoclistToken>=0 ); + if( pToken->pSegcsr ){ + assert( p->bIgnore==0 ); + rc = sqlite3Fts3MsrIncrNext( + pTab, pToken->pSegcsr, &p->iDocid, &p->pList, &p->nList + ); + if( p->pList==0 ) *pbEof = 1; + }else{ + p->bIgnore = 1; + } + } + + return rc; +} + + +/* +** The phrase iterator passed as the second argument: +** +** * features at least one token that uses an incremental doclist, and +** +** * does not contain any deferred tokens. +** +** Advance it to the next matching documnent in the database and populate +** the Fts3Doclist.pList and nList fields. +** +** If there is no "next" entry and no error occurs, then *pbEof is set to +** 1 before returning. Otherwise, if no error occurs and the iterator is +** successfully advanced, *pbEof is set to 0. +** +** If an error occurs, return an SQLite error code. Otherwise, return +** SQLITE_OK. +*/ +static int fts3EvalIncrPhraseNext( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Phrase *p, /* Phrase object to advance to next docid */ + u8 *pbEof /* OUT: Set to 1 if EOF */ +){ + int rc = SQLITE_OK; + Fts3Doclist *pDL = &p->doclist; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + u8 bEof = 0; + + /* This is only called if it is guaranteed that the phrase has at least + ** one incremental token. In which case the bIncr flag is set. */ + assert( p->bIncr==1 ); + + if( p->nToken==1 && p->bIncr ){ + rc = sqlite3Fts3MsrIncrNext(pTab, p->aToken[0].pSegcsr, + &pDL->iDocid, &pDL->pList, &pDL->nList + ); + if( pDL->pList==0 ) bEof = 1; + }else{ + int bDescDoclist = pCsr->bDesc; + struct TokenDoclist a[MAX_INCR_PHRASE_TOKENS]; + + memset(a, 0, sizeof(a)); + assert( p->nToken<=MAX_INCR_PHRASE_TOKENS ); + assert( p->iDoclistToken<MAX_INCR_PHRASE_TOKENS ); + + while( bEof==0 ){ + int bMaxSet = 0; + sqlite3_int64 iMax = 0; /* Largest docid for all iterators */ + int i; /* Used to iterate through tokens */ + + /* Advance the iterator for each token in the phrase once. */ + for(i=0; rc==SQLITE_OK && i<p->nToken && bEof==0; i++){ + rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof); + if( a[i].bIgnore==0 && (bMaxSet==0 || DOCID_CMP(iMax, a[i].iDocid)<0) ){ + iMax = a[i].iDocid; + bMaxSet = 1; + } + } + assert( rc!=SQLITE_OK || a[p->nToken-1].bIgnore==0 ); + assert( rc!=SQLITE_OK || bMaxSet ); + + /* Keep advancing iterators until they all point to the same document */ + for(i=0; i<p->nToken; i++){ + while( rc==SQLITE_OK && bEof==0 + && a[i].bIgnore==0 && DOCID_CMP(a[i].iDocid, iMax)<0 + ){ + rc = incrPhraseTokenNext(pTab, p, i, &a[i], &bEof); + if( DOCID_CMP(a[i].iDocid, iMax)>0 ){ + iMax = a[i].iDocid; + i = 0; + } + } + } + + /* Check if the current entries really are a phrase match */ + if( bEof==0 ){ + int nList = 0; + int nByte = a[p->nToken-1].nList; + char *aDoclist = sqlite3_malloc(nByte+1); + if( !aDoclist ) return SQLITE_NOMEM; + memcpy(aDoclist, a[p->nToken-1].pList, nByte+1); + + for(i=0; i<(p->nToken-1); i++){ + if( a[i].bIgnore==0 ){ + char *pL = a[i].pList; + char *pR = aDoclist; + char *pOut = aDoclist; + int nDist = p->nToken-1-i; + int res = fts3PoslistPhraseMerge(&pOut, nDist, 0, 1, &pL, &pR); + if( res==0 ) break; + nList = (int)(pOut - aDoclist); + } + } + if( i==(p->nToken-1) ){ + pDL->iDocid = iMax; + pDL->pList = aDoclist; + pDL->nList = nList; + pDL->bFreeList = 1; + break; + } + sqlite3_free(aDoclist); + } + } + } + + *pbEof = bEof; + return rc; +} + +/* +** Attempt to move the phrase iterator to point to the next matching docid. +** If an error occurs, return an SQLite error code. Otherwise, return +** SQLITE_OK. +** +** If there is no "next" entry and no error occurs, then *pbEof is set to +** 1 before returning. Otherwise, if no error occurs and the iterator is +** successfully advanced, *pbEof is set to 0. +*/ +static int fts3EvalPhraseNext( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Phrase *p, /* Phrase object to advance to next docid */ + u8 *pbEof /* OUT: Set to 1 if EOF */ +){ + int rc = SQLITE_OK; + Fts3Doclist *pDL = &p->doclist; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + + if( p->bIncr ){ + rc = fts3EvalIncrPhraseNext(pCsr, p, pbEof); + }else if( pCsr->bDesc!=pTab->bDescIdx && pDL->nAll ){ + sqlite3Fts3DoclistPrev(pTab->bDescIdx, pDL->aAll, pDL->nAll, + &pDL->pNextDocid, &pDL->iDocid, &pDL->nList, pbEof + ); + pDL->pList = pDL->pNextDocid; + }else{ + fts3EvalDlPhraseNext(pTab, pDL, pbEof); + } + + return rc; +} + +/* +** +** If *pRc is not SQLITE_OK when this function is called, it is a no-op. +** Otherwise, fts3EvalPhraseStart() is called on all phrases within the +** expression. Also the Fts3Expr.bDeferred variable is set to true for any +** expressions for which all descendent tokens are deferred. +** +** If parameter bOptOk is zero, then it is guaranteed that the +** Fts3Phrase.doclist.aAll/nAll variables contain the entire doclist for +** each phrase in the expression (subject to deferred token processing). +** Or, if bOptOk is non-zero, then one or more tokens within the expression +** may be loaded incrementally, meaning doclist.aAll/nAll is not available. +** +** If an error occurs within this function, *pRc is set to an SQLite error +** code before returning. +*/ +static void fts3EvalStartReaders( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Expr *pExpr, /* Expression to initialize phrases in */ + int *pRc /* IN/OUT: Error code */ +){ + if( pExpr && SQLITE_OK==*pRc ){ + if( pExpr->eType==FTSQUERY_PHRASE ){ + int i; + int nToken = pExpr->pPhrase->nToken; + for(i=0; i<nToken; i++){ + if( pExpr->pPhrase->aToken[i].pDeferred==0 ) break; + } + pExpr->bDeferred = (i==nToken); + *pRc = fts3EvalPhraseStart(pCsr, 1, pExpr->pPhrase); + }else{ + fts3EvalStartReaders(pCsr, pExpr->pLeft, pRc); + fts3EvalStartReaders(pCsr, pExpr->pRight, pRc); + pExpr->bDeferred = (pExpr->pLeft->bDeferred && pExpr->pRight->bDeferred); + } + } +} + +/* +** An array of the following structures is assembled as part of the process +** of selecting tokens to defer before the query starts executing (as part +** of the xFilter() method). There is one element in the array for each +** token in the FTS expression. +** +** Tokens are divided into AND/NEAR clusters. All tokens in a cluster belong +** to phrases that are connected only by AND and NEAR operators (not OR or +** NOT). When determining tokens to defer, each AND/NEAR cluster is considered +** separately. The root of a tokens AND/NEAR cluster is stored in +** Fts3TokenAndCost.pRoot. +*/ +typedef struct Fts3TokenAndCost Fts3TokenAndCost; +struct Fts3TokenAndCost { + Fts3Phrase *pPhrase; /* The phrase the token belongs to */ + int iToken; /* Position of token in phrase */ + Fts3PhraseToken *pToken; /* The token itself */ + Fts3Expr *pRoot; /* Root of NEAR/AND cluster */ + int nOvfl; /* Number of overflow pages to load doclist */ + int iCol; /* The column the token must match */ +}; + +/* +** This function is used to populate an allocated Fts3TokenAndCost array. +** +** If *pRc is not SQLITE_OK when this function is called, it is a no-op. +** Otherwise, if an error occurs during execution, *pRc is set to an +** SQLite error code. +*/ +static void fts3EvalTokenCosts( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Expr *pRoot, /* Root of current AND/NEAR cluster */ + Fts3Expr *pExpr, /* Expression to consider */ + Fts3TokenAndCost **ppTC, /* Write new entries to *(*ppTC)++ */ + Fts3Expr ***ppOr, /* Write new OR root to *(*ppOr)++ */ + int *pRc /* IN/OUT: Error code */ +){ + if( *pRc==SQLITE_OK ){ + if( pExpr->eType==FTSQUERY_PHRASE ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + int i; + for(i=0; *pRc==SQLITE_OK && i<pPhrase->nToken; i++){ + Fts3TokenAndCost *pTC = (*ppTC)++; + pTC->pPhrase = pPhrase; + pTC->iToken = i; + pTC->pRoot = pRoot; + pTC->pToken = &pPhrase->aToken[i]; + pTC->iCol = pPhrase->iColumn; + *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl); + } + }else if( pExpr->eType!=FTSQUERY_NOT ){ + assert( pExpr->eType==FTSQUERY_OR + || pExpr->eType==FTSQUERY_AND + || pExpr->eType==FTSQUERY_NEAR + ); + assert( pExpr->pLeft && pExpr->pRight ); + if( pExpr->eType==FTSQUERY_OR ){ + pRoot = pExpr->pLeft; + **ppOr = pRoot; + (*ppOr)++; + } + fts3EvalTokenCosts(pCsr, pRoot, pExpr->pLeft, ppTC, ppOr, pRc); + if( pExpr->eType==FTSQUERY_OR ){ + pRoot = pExpr->pRight; + **ppOr = pRoot; + (*ppOr)++; + } + fts3EvalTokenCosts(pCsr, pRoot, pExpr->pRight, ppTC, ppOr, pRc); + } + } +} + +/* +** Determine the average document (row) size in pages. If successful, +** write this value to *pnPage and return SQLITE_OK. Otherwise, return +** an SQLite error code. +** +** The average document size in pages is calculated by first calculating +** determining the average size in bytes, B. If B is less than the amount +** of data that will fit on a single leaf page of an intkey table in +** this database, then the average docsize is 1. Otherwise, it is 1 plus +** the number of overflow pages consumed by a record B bytes in size. +*/ +static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){ + if( pCsr->nRowAvg==0 ){ + /* The average document size, which is required to calculate the cost + ** of each doclist, has not yet been determined. Read the required + ** data from the %_stat table to calculate it. + ** + ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 + ** varints, where nCol is the number of columns in the FTS3 table. + ** The first varint is the number of documents currently stored in + ** the table. The following nCol varints contain the total amount of + ** data stored in all rows of each column of the table, from left + ** to right. + */ + int rc; + Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; + sqlite3_stmt *pStmt; + sqlite3_int64 nDoc = 0; + sqlite3_int64 nByte = 0; + const char *pEnd; + const char *a; + + rc = sqlite3Fts3SelectDoctotal(p, &pStmt); + if( rc!=SQLITE_OK ) return rc; + a = sqlite3_column_blob(pStmt, 0); + assert( a ); + + pEnd = &a[sqlite3_column_bytes(pStmt, 0)]; + a += sqlite3Fts3GetVarint(a, &nDoc); + while( a<pEnd ){ + a += sqlite3Fts3GetVarint(a, &nByte); + } + if( nDoc==0 || nByte==0 ){ + sqlite3_reset(pStmt); + return FTS_CORRUPT_VTAB; + } + + pCsr->nDoc = nDoc; + pCsr->nRowAvg = (int)(((nByte / nDoc) + p->nPgsz) / p->nPgsz); + assert( pCsr->nRowAvg>0 ); + rc = sqlite3_reset(pStmt); + if( rc!=SQLITE_OK ) return rc; + } + + *pnPage = pCsr->nRowAvg; + return SQLITE_OK; +} + +/* +** This function is called to select the tokens (if any) that will be +** deferred. The array aTC[] has already been populated when this is +** called. +** +** This function is called once for each AND/NEAR cluster in the +** expression. Each invocation determines which tokens to defer within +** the cluster with root node pRoot. See comments above the definition +** of struct Fts3TokenAndCost for more details. +** +** If no error occurs, SQLITE_OK is returned and sqlite3Fts3DeferToken() +** called on each token to defer. Otherwise, an SQLite error code is +** returned. +*/ +static int fts3EvalSelectDeferred( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Expr *pRoot, /* Consider tokens with this root node */ + Fts3TokenAndCost *aTC, /* Array of expression tokens and costs */ + int nTC /* Number of entries in aTC[] */ +){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int nDocSize = 0; /* Number of pages per doc loaded */ + int rc = SQLITE_OK; /* Return code */ + int ii; /* Iterator variable for various purposes */ + int nOvfl = 0; /* Total overflow pages used by doclists */ + int nToken = 0; /* Total number of tokens in cluster */ + + int nMinEst = 0; /* The minimum count for any phrase so far. */ + int nLoad4 = 1; /* (Phrases that will be loaded)^4. */ + + /* Tokens are never deferred for FTS tables created using the content=xxx + ** option. The reason being that it is not guaranteed that the content + ** table actually contains the same data as the index. To prevent this from + ** causing any problems, the deferred token optimization is completely + ** disabled for content=xxx tables. */ + if( pTab->zContentTbl ){ + return SQLITE_OK; + } + + /* Count the tokens in this AND/NEAR cluster. If none of the doclists + ** associated with the tokens spill onto overflow pages, or if there is + ** only 1 token, exit early. No tokens to defer in this case. */ + for(ii=0; ii<nTC; ii++){ + if( aTC[ii].pRoot==pRoot ){ + nOvfl += aTC[ii].nOvfl; + nToken++; + } + } + if( nOvfl==0 || nToken<2 ) return SQLITE_OK; + + /* Obtain the average docsize (in pages). */ + rc = fts3EvalAverageDocsize(pCsr, &nDocSize); + assert( rc!=SQLITE_OK || nDocSize>0 ); + + + /* Iterate through all tokens in this AND/NEAR cluster, in ascending order + ** of the number of overflow pages that will be loaded by the pager layer + ** to retrieve the entire doclist for the token from the full-text index. + ** Load the doclists for tokens that are either: + ** + ** a. The cheapest token in the entire query (i.e. the one visited by the + ** first iteration of this loop), or + ** + ** b. Part of a multi-token phrase. + ** + ** After each token doclist is loaded, merge it with the others from the + ** same phrase and count the number of documents that the merged doclist + ** contains. Set variable "nMinEst" to the smallest number of documents in + ** any phrase doclist for which 1 or more token doclists have been loaded. + ** Let nOther be the number of other phrases for which it is certain that + ** one or more tokens will not be deferred. + ** + ** Then, for each token, defer it if loading the doclist would result in + ** loading N or more overflow pages into memory, where N is computed as: + ** + ** (nMinEst + 4^nOther - 1) / (4^nOther) + */ + for(ii=0; ii<nToken && rc==SQLITE_OK; ii++){ + int iTC; /* Used to iterate through aTC[] array. */ + Fts3TokenAndCost *pTC = 0; /* Set to cheapest remaining token. */ + + /* Set pTC to point to the cheapest remaining token. */ + for(iTC=0; iTC<nTC; iTC++){ + if( aTC[iTC].pToken && aTC[iTC].pRoot==pRoot + && (!pTC || aTC[iTC].nOvfl<pTC->nOvfl) + ){ + pTC = &aTC[iTC]; + } + } + assert( pTC ); + + if( ii && pTC->nOvfl>=((nMinEst+(nLoad4/4)-1)/(nLoad4/4))*nDocSize ){ + /* The number of overflow pages to load for this (and therefore all + ** subsequent) tokens is greater than the estimated number of pages + ** that will be loaded if all subsequent tokens are deferred. + */ + Fts3PhraseToken *pToken = pTC->pToken; + rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol); + fts3SegReaderCursorFree(pToken->pSegcsr); + pToken->pSegcsr = 0; + }else{ + /* Set nLoad4 to the value of (4^nOther) for the next iteration of the + ** for-loop. Except, limit the value to 2^24 to prevent it from + ** overflowing the 32-bit integer it is stored in. */ + if( ii<12 ) nLoad4 = nLoad4*4; + + if( ii==0 || (pTC->pPhrase->nToken>1 && ii!=nToken-1) ){ + /* Either this is the cheapest token in the entire query, or it is + ** part of a multi-token phrase. Either way, the entire doclist will + ** (eventually) be loaded into memory. It may as well be now. */ + Fts3PhraseToken *pToken = pTC->pToken; + int nList = 0; + char *pList = 0; + rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList); + assert( rc==SQLITE_OK || pList==0 ); + if( rc==SQLITE_OK ){ + int nCount; + fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList); + nCount = fts3DoclistCountDocids( + pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll + ); + if( ii==0 || nCount<nMinEst ) nMinEst = nCount; + } + } + } + pTC->pToken = 0; + } + + return rc; +} + +/* +** This function is called from within the xFilter method. It initializes +** the full-text query currently stored in pCsr->pExpr. To iterate through +** the results of a query, the caller does: +** +** fts3EvalStart(pCsr); +** while( 1 ){ +** fts3EvalNext(pCsr); +** if( pCsr->bEof ) break; +** ... return row pCsr->iPrevId to the caller ... +** } +*/ +static int fts3EvalStart(Fts3Cursor *pCsr){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc = SQLITE_OK; + int nToken = 0; + int nOr = 0; + + /* Allocate a MultiSegReader for each token in the expression. */ + fts3EvalAllocateReaders(pCsr, pCsr->pExpr, &nToken, &nOr, &rc); + + /* Determine which, if any, tokens in the expression should be deferred. */ +#ifndef SQLITE_DISABLE_FTS4_DEFERRED + if( rc==SQLITE_OK && nToken>1 && pTab->bFts4 ){ + Fts3TokenAndCost *aTC; + Fts3Expr **apOr; + aTC = (Fts3TokenAndCost *)sqlite3_malloc( + sizeof(Fts3TokenAndCost) * nToken + + sizeof(Fts3Expr *) * nOr * 2 + ); + apOr = (Fts3Expr **)&aTC[nToken]; + + if( !aTC ){ + rc = SQLITE_NOMEM; + }else{ + int ii; + Fts3TokenAndCost *pTC = aTC; + Fts3Expr **ppOr = apOr; + + fts3EvalTokenCosts(pCsr, 0, pCsr->pExpr, &pTC, &ppOr, &rc); + nToken = (int)(pTC-aTC); + nOr = (int)(ppOr-apOr); + + if( rc==SQLITE_OK ){ + rc = fts3EvalSelectDeferred(pCsr, 0, aTC, nToken); + for(ii=0; rc==SQLITE_OK && ii<nOr; ii++){ + rc = fts3EvalSelectDeferred(pCsr, apOr[ii], aTC, nToken); + } + } + + sqlite3_free(aTC); + } + } +#endif + + fts3EvalStartReaders(pCsr, pCsr->pExpr, &rc); + return rc; +} + +/* +** Invalidate the current position list for phrase pPhrase. +*/ +static void fts3EvalInvalidatePoslist(Fts3Phrase *pPhrase){ + if( pPhrase->doclist.bFreeList ){ + sqlite3_free(pPhrase->doclist.pList); + } + pPhrase->doclist.pList = 0; + pPhrase->doclist.nList = 0; + pPhrase->doclist.bFreeList = 0; +} + +/* +** This function is called to edit the position list associated with +** the phrase object passed as the fifth argument according to a NEAR +** condition. For example: +** +** abc NEAR/5 "def ghi" +** +** Parameter nNear is passed the NEAR distance of the expression (5 in +** the example above). When this function is called, *paPoslist points to +** the position list, and *pnToken is the number of phrase tokens in, the +** phrase on the other side of the NEAR operator to pPhrase. For example, +** if pPhrase refers to the "def ghi" phrase, then *paPoslist points to +** the position list associated with phrase "abc". +** +** All positions in the pPhrase position list that are not sufficiently +** close to a position in the *paPoslist position list are removed. If this +** leaves 0 positions, zero is returned. Otherwise, non-zero. +** +** Before returning, *paPoslist is set to point to the position lsit +** associated with pPhrase. And *pnToken is set to the number of tokens in +** pPhrase. +*/ +static int fts3EvalNearTrim( + int nNear, /* NEAR distance. As in "NEAR/nNear". */ + char *aTmp, /* Temporary space to use */ + char **paPoslist, /* IN/OUT: Position list */ + int *pnToken, /* IN/OUT: Tokens in phrase of *paPoslist */ + Fts3Phrase *pPhrase /* The phrase object to trim the doclist of */ +){ + int nParam1 = nNear + pPhrase->nToken; + int nParam2 = nNear + *pnToken; + int nNew; + char *p2; + char *pOut; + int res; + + assert( pPhrase->doclist.pList ); + + p2 = pOut = pPhrase->doclist.pList; + res = fts3PoslistNearMerge( + &pOut, aTmp, nParam1, nParam2, paPoslist, &p2 + ); + if( res ){ + nNew = (int)(pOut - pPhrase->doclist.pList) - 1; + assert( pPhrase->doclist.pList[nNew]=='\0' ); + assert( nNew<=pPhrase->doclist.nList && nNew>0 ); + memset(&pPhrase->doclist.pList[nNew], 0, pPhrase->doclist.nList - nNew); + pPhrase->doclist.nList = nNew; + *paPoslist = pPhrase->doclist.pList; + *pnToken = pPhrase->nToken; + } + + return res; +} + +/* +** This function is a no-op if *pRc is other than SQLITE_OK when it is called. +** Otherwise, it advances the expression passed as the second argument to +** point to the next matching row in the database. Expressions iterate through +** matching rows in docid order. Ascending order if Fts3Cursor.bDesc is zero, +** or descending if it is non-zero. +** +** If an error occurs, *pRc is set to an SQLite error code. Otherwise, if +** successful, the following variables in pExpr are set: +** +** Fts3Expr.bEof (non-zero if EOF - there is no next row) +** Fts3Expr.iDocid (valid if bEof==0. The docid of the next row) +** +** If the expression is of type FTSQUERY_PHRASE, and the expression is not +** at EOF, then the following variables are populated with the position list +** for the phrase for the visited row: +** +** FTs3Expr.pPhrase->doclist.nList (length of pList in bytes) +** FTs3Expr.pPhrase->doclist.pList (pointer to position list) +** +** It says above that this function advances the expression to the next +** matching row. This is usually true, but there are the following exceptions: +** +** 1. Deferred tokens are not taken into account. If a phrase consists +** entirely of deferred tokens, it is assumed to match every row in +** the db. In this case the position-list is not populated at all. +** +** Or, if a phrase contains one or more deferred tokens and one or +** more non-deferred tokens, then the expression is advanced to the +** next possible match, considering only non-deferred tokens. In other +** words, if the phrase is "A B C", and "B" is deferred, the expression +** is advanced to the next row that contains an instance of "A * C", +** where "*" may match any single token. The position list in this case +** is populated as for "A * C" before returning. +** +** 2. NEAR is treated as AND. If the expression is "x NEAR y", it is +** advanced to point to the next row that matches "x AND y". +** +** See fts3EvalTestDeferredAndNear() for details on testing if a row is +** really a match, taking into account deferred tokens and NEAR operators. +*/ +static void fts3EvalNextRow( + Fts3Cursor *pCsr, /* FTS Cursor handle */ + Fts3Expr *pExpr, /* Expr. to advance to next matching row */ + int *pRc /* IN/OUT: Error code */ +){ + if( *pRc==SQLITE_OK ){ + int bDescDoclist = pCsr->bDesc; /* Used by DOCID_CMP() macro */ + assert( pExpr->bEof==0 ); + pExpr->bStart = 1; + + switch( pExpr->eType ){ + case FTSQUERY_NEAR: + case FTSQUERY_AND: { + Fts3Expr *pLeft = pExpr->pLeft; + Fts3Expr *pRight = pExpr->pRight; + assert( !pLeft->bDeferred || !pRight->bDeferred ); + + if( pLeft->bDeferred ){ + /* LHS is entirely deferred. So we assume it matches every row. + ** Advance the RHS iterator to find the next row visited. */ + fts3EvalNextRow(pCsr, pRight, pRc); + pExpr->iDocid = pRight->iDocid; + pExpr->bEof = pRight->bEof; + }else if( pRight->bDeferred ){ + /* RHS is entirely deferred. So we assume it matches every row. + ** Advance the LHS iterator to find the next row visited. */ + fts3EvalNextRow(pCsr, pLeft, pRc); + pExpr->iDocid = pLeft->iDocid; + pExpr->bEof = pLeft->bEof; + }else{ + /* Neither the RHS or LHS are deferred. */ + fts3EvalNextRow(pCsr, pLeft, pRc); + fts3EvalNextRow(pCsr, pRight, pRc); + while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){ + sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid); + if( iDiff==0 ) break; + if( iDiff<0 ){ + fts3EvalNextRow(pCsr, pLeft, pRc); + }else{ + fts3EvalNextRow(pCsr, pRight, pRc); + } + } + pExpr->iDocid = pLeft->iDocid; + pExpr->bEof = (pLeft->bEof || pRight->bEof); + } + break; + } + + case FTSQUERY_OR: { + Fts3Expr *pLeft = pExpr->pLeft; + Fts3Expr *pRight = pExpr->pRight; + sqlite3_int64 iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid); + + assert( pLeft->bStart || pLeft->iDocid==pRight->iDocid ); + assert( pRight->bStart || pLeft->iDocid==pRight->iDocid ); + + if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){ + fts3EvalNextRow(pCsr, pLeft, pRc); + }else if( pLeft->bEof || (pRight->bEof==0 && iCmp>0) ){ + fts3EvalNextRow(pCsr, pRight, pRc); + }else{ + fts3EvalNextRow(pCsr, pLeft, pRc); + fts3EvalNextRow(pCsr, pRight, pRc); + } + + pExpr->bEof = (pLeft->bEof && pRight->bEof); + iCmp = DOCID_CMP(pLeft->iDocid, pRight->iDocid); + if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){ + pExpr->iDocid = pLeft->iDocid; + }else{ + pExpr->iDocid = pRight->iDocid; + } + + break; + } + + case FTSQUERY_NOT: { + Fts3Expr *pLeft = pExpr->pLeft; + Fts3Expr *pRight = pExpr->pRight; + + if( pRight->bStart==0 ){ + fts3EvalNextRow(pCsr, pRight, pRc); + assert( *pRc!=SQLITE_OK || pRight->bStart ); + } + + fts3EvalNextRow(pCsr, pLeft, pRc); + if( pLeft->bEof==0 ){ + while( !*pRc + && !pRight->bEof + && DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0 + ){ + fts3EvalNextRow(pCsr, pRight, pRc); + } + } + pExpr->iDocid = pLeft->iDocid; + pExpr->bEof = pLeft->bEof; + break; + } + + default: { + Fts3Phrase *pPhrase = pExpr->pPhrase; + fts3EvalInvalidatePoslist(pPhrase); + *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof); + pExpr->iDocid = pPhrase->doclist.iDocid; + break; + } + } + } +} + +/* +** If *pRc is not SQLITE_OK, or if pExpr is not the root node of a NEAR +** cluster, then this function returns 1 immediately. +** +** Otherwise, it checks if the current row really does match the NEAR +** expression, using the data currently stored in the position lists +** (Fts3Expr->pPhrase.doclist.pList/nList) for each phrase in the expression. +** +** If the current row is a match, the position list associated with each +** phrase in the NEAR expression is edited in place to contain only those +** phrase instances sufficiently close to their peers to satisfy all NEAR +** constraints. In this case it returns 1. If the NEAR expression does not +** match the current row, 0 is returned. The position lists may or may not +** be edited if 0 is returned. +*/ +static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){ + int res = 1; + + /* The following block runs if pExpr is the root of a NEAR query. + ** For example, the query: + ** + ** "w" NEAR "x" NEAR "y" NEAR "z" + ** + ** which is represented in tree form as: + ** + ** | + ** +--NEAR--+ <-- root of NEAR query + ** | | + ** +--NEAR--+ "z" + ** | | + ** +--NEAR--+ "y" + ** | | + ** "w" "x" + ** + ** The right-hand child of a NEAR node is always a phrase. The + ** left-hand child may be either a phrase or a NEAR node. There are + ** no exceptions to this - it's the way the parser in fts3_expr.c works. + */ + if( *pRc==SQLITE_OK + && pExpr->eType==FTSQUERY_NEAR + && pExpr->bEof==0 + && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) + ){ + Fts3Expr *p; + int nTmp = 0; /* Bytes of temp space */ + char *aTmp; /* Temp space for PoslistNearMerge() */ + + /* Allocate temporary working space. */ + for(p=pExpr; p->pLeft; p=p->pLeft){ + nTmp += p->pRight->pPhrase->doclist.nList; + } + nTmp += p->pPhrase->doclist.nList; + if( nTmp==0 ){ + res = 0; + }else{ + aTmp = sqlite3_malloc(nTmp*2); + if( !aTmp ){ + *pRc = SQLITE_NOMEM; + res = 0; + }else{ + char *aPoslist = p->pPhrase->doclist.pList; + int nToken = p->pPhrase->nToken; + + for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){ + Fts3Phrase *pPhrase = p->pRight->pPhrase; + int nNear = p->nNear; + res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); + } + + aPoslist = pExpr->pRight->pPhrase->doclist.pList; + nToken = pExpr->pRight->pPhrase->nToken; + for(p=pExpr->pLeft; p && res; p=p->pLeft){ + int nNear; + Fts3Phrase *pPhrase; + assert( p->pParent && p->pParent->pLeft==p ); + nNear = p->pParent->nNear; + pPhrase = ( + p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase + ); + res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase); + } + } + + sqlite3_free(aTmp); + } + } + + return res; +} + +/* +** This function is a helper function for fts3EvalTestDeferredAndNear(). +** Assuming no error occurs or has occurred, It returns non-zero if the +** expression passed as the second argument matches the row that pCsr +** currently points to, or zero if it does not. +** +** If *pRc is not SQLITE_OK when this function is called, it is a no-op. +** If an error occurs during execution of this function, *pRc is set to +** the appropriate SQLite error code. In this case the returned value is +** undefined. +*/ +static int fts3EvalTestExpr( + Fts3Cursor *pCsr, /* FTS cursor handle */ + Fts3Expr *pExpr, /* Expr to test. May or may not be root. */ + int *pRc /* IN/OUT: Error code */ +){ + int bHit = 1; /* Return value */ + if( *pRc==SQLITE_OK ){ + switch( pExpr->eType ){ + case FTSQUERY_NEAR: + case FTSQUERY_AND: + bHit = ( + fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc) + && fts3EvalTestExpr(pCsr, pExpr->pRight, pRc) + && fts3EvalNearTest(pExpr, pRc) + ); + + /* If the NEAR expression does not match any rows, zero the doclist for + ** all phrases involved in the NEAR. This is because the snippet(), + ** offsets() and matchinfo() functions are not supposed to recognize + ** any instances of phrases that are part of unmatched NEAR queries. + ** For example if this expression: + ** + ** ... MATCH 'a OR (b NEAR c)' + ** + ** is matched against a row containing: + ** + ** 'a b d e' + ** + ** then any snippet() should ony highlight the "a" term, not the "b" + ** (as "b" is part of a non-matching NEAR clause). + */ + if( bHit==0 + && pExpr->eType==FTSQUERY_NEAR + && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR) + ){ + Fts3Expr *p; + for(p=pExpr; p->pPhrase==0; p=p->pLeft){ + if( p->pRight->iDocid==pCsr->iPrevId ){ + fts3EvalInvalidatePoslist(p->pRight->pPhrase); + } + } + if( p->iDocid==pCsr->iPrevId ){ + fts3EvalInvalidatePoslist(p->pPhrase); + } + } + + break; + + case FTSQUERY_OR: { + int bHit1 = fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc); + int bHit2 = fts3EvalTestExpr(pCsr, pExpr->pRight, pRc); + bHit = bHit1 || bHit2; + break; + } + + case FTSQUERY_NOT: + bHit = ( + fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc) + && !fts3EvalTestExpr(pCsr, pExpr->pRight, pRc) + ); + break; + + default: { +#ifndef SQLITE_DISABLE_FTS4_DEFERRED + if( pCsr->pDeferred + && (pExpr->iDocid==pCsr->iPrevId || pExpr->bDeferred) + ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + assert( pExpr->bDeferred || pPhrase->doclist.bFreeList==0 ); + if( pExpr->bDeferred ){ + fts3EvalInvalidatePoslist(pPhrase); + } + *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase); + bHit = (pPhrase->doclist.pList!=0); + pExpr->iDocid = pCsr->iPrevId; + }else +#endif + { + bHit = (pExpr->bEof==0 && pExpr->iDocid==pCsr->iPrevId); + } + break; + } + } + } + return bHit; +} + +/* +** This function is called as the second part of each xNext operation when +** iterating through the results of a full-text query. At this point the +** cursor points to a row that matches the query expression, with the +** following caveats: +** +** * Up until this point, "NEAR" operators in the expression have been +** treated as "AND". +** +** * Deferred tokens have not yet been considered. +** +** If *pRc is not SQLITE_OK when this function is called, it immediately +** returns 0. Otherwise, it tests whether or not after considering NEAR +** operators and deferred tokens the current row is still a match for the +** expression. It returns 1 if both of the following are true: +** +** 1. *pRc is SQLITE_OK when this function returns, and +** +** 2. After scanning the current FTS table row for the deferred tokens, +** it is determined that the row does *not* match the query. +** +** Or, if no error occurs and it seems the current row does match the FTS +** query, return 0. +*/ +static int fts3EvalTestDeferredAndNear(Fts3Cursor *pCsr, int *pRc){ + int rc = *pRc; + int bMiss = 0; + if( rc==SQLITE_OK ){ + + /* If there are one or more deferred tokens, load the current row into + ** memory and scan it to determine the position list for each deferred + ** token. Then, see if this row is really a match, considering deferred + ** tokens and NEAR operators (neither of which were taken into account + ** earlier, by fts3EvalNextRow()). + */ + if( pCsr->pDeferred ){ + rc = fts3CursorSeek(0, pCsr); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3CacheDeferredDoclists(pCsr); + } + } + bMiss = (0==fts3EvalTestExpr(pCsr, pCsr->pExpr, &rc)); + + /* Free the position-lists accumulated for each deferred token above. */ + sqlite3Fts3FreeDeferredDoclists(pCsr); + *pRc = rc; + } + return (rc==SQLITE_OK && bMiss); +} + +/* +** Advance to the next document that matches the FTS expression in +** Fts3Cursor.pExpr. +*/ +static int fts3EvalNext(Fts3Cursor *pCsr){ + int rc = SQLITE_OK; /* Return Code */ + Fts3Expr *pExpr = pCsr->pExpr; + assert( pCsr->isEof==0 ); + if( pExpr==0 ){ + pCsr->isEof = 1; + }else{ + do { + if( pCsr->isRequireSeek==0 ){ + sqlite3_reset(pCsr->pStmt); + } + assert( sqlite3_data_count(pCsr->pStmt)==0 ); + fts3EvalNextRow(pCsr, pExpr, &rc); + pCsr->isEof = pExpr->bEof; + pCsr->isRequireSeek = 1; + pCsr->isMatchinfoNeeded = 1; + pCsr->iPrevId = pExpr->iDocid; + }while( pCsr->isEof==0 && fts3EvalTestDeferredAndNear(pCsr, &rc) ); + } + + /* Check if the cursor is past the end of the docid range specified + ** by Fts3Cursor.iMinDocid/iMaxDocid. If so, set the EOF flag. */ + if( rc==SQLITE_OK && ( + (pCsr->bDesc==0 && pCsr->iPrevId>pCsr->iMaxDocid) + || (pCsr->bDesc!=0 && pCsr->iPrevId<pCsr->iMinDocid) + )){ + pCsr->isEof = 1; + } + + return rc; +} + +/* +** Restart interation for expression pExpr so that the next call to +** fts3EvalNext() visits the first row. Do not allow incremental +** loading or merging of phrase doclists for this iteration. +** +** If *pRc is other than SQLITE_OK when this function is called, it is +** a no-op. If an error occurs within this function, *pRc is set to an +** SQLite error code before returning. +*/ +static void fts3EvalRestart( + Fts3Cursor *pCsr, + Fts3Expr *pExpr, + int *pRc +){ + if( pExpr && *pRc==SQLITE_OK ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + + if( pPhrase ){ + fts3EvalInvalidatePoslist(pPhrase); + if( pPhrase->bIncr ){ + int i; + for(i=0; i<pPhrase->nToken; i++){ + Fts3PhraseToken *pToken = &pPhrase->aToken[i]; + assert( pToken->pDeferred==0 ); + if( pToken->pSegcsr ){ + sqlite3Fts3MsrIncrRestart(pToken->pSegcsr); + } + } + *pRc = fts3EvalPhraseStart(pCsr, 0, pPhrase); + } + pPhrase->doclist.pNextDocid = 0; + pPhrase->doclist.iDocid = 0; + } + + pExpr->iDocid = 0; + pExpr->bEof = 0; + pExpr->bStart = 0; + + fts3EvalRestart(pCsr, pExpr->pLeft, pRc); + fts3EvalRestart(pCsr, pExpr->pRight, pRc); + } +} + +/* +** After allocating the Fts3Expr.aMI[] array for each phrase in the +** expression rooted at pExpr, the cursor iterates through all rows matched +** by pExpr, calling this function for each row. This function increments +** the values in Fts3Expr.aMI[] according to the position-list currently +** found in Fts3Expr.pPhrase->doclist.pList for each of the phrase +** expression nodes. +*/ +static void fts3EvalUpdateCounts(Fts3Expr *pExpr){ + if( pExpr ){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + if( pPhrase && pPhrase->doclist.pList ){ + int iCol = 0; + char *p = pPhrase->doclist.pList; + + assert( *p ); + while( 1 ){ + u8 c = 0; + int iCnt = 0; + while( 0xFE & (*p | c) ){ + if( (c&0x80)==0 ) iCnt++; + c = *p++ & 0x80; + } + + /* aMI[iCol*3 + 1] = Number of occurrences + ** aMI[iCol*3 + 2] = Number of rows containing at least one instance + */ + pExpr->aMI[iCol*3 + 1] += iCnt; + pExpr->aMI[iCol*3 + 2] += (iCnt>0); + if( *p==0x00 ) break; + p++; + p += fts3GetVarint32(p, &iCol); + } + } + + fts3EvalUpdateCounts(pExpr->pLeft); + fts3EvalUpdateCounts(pExpr->pRight); + } +} + +/* +** Expression pExpr must be of type FTSQUERY_PHRASE. +** +** If it is not already allocated and populated, this function allocates and +** populates the Fts3Expr.aMI[] array for expression pExpr. If pExpr is part +** of a NEAR expression, then it also allocates and populates the same array +** for all other phrases that are part of the NEAR expression. +** +** SQLITE_OK is returned if the aMI[] array is successfully allocated and +** populated. Otherwise, if an error occurs, an SQLite error code is returned. +*/ +static int fts3EvalGatherStats( + Fts3Cursor *pCsr, /* Cursor object */ + Fts3Expr *pExpr /* FTSQUERY_PHRASE expression */ +){ + int rc = SQLITE_OK; /* Return code */ + + assert( pExpr->eType==FTSQUERY_PHRASE ); + if( pExpr->aMI==0 ){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + Fts3Expr *pRoot; /* Root of NEAR expression */ + Fts3Expr *p; /* Iterator used for several purposes */ + + sqlite3_int64 iPrevId = pCsr->iPrevId; + sqlite3_int64 iDocid; + u8 bEof; + + /* Find the root of the NEAR expression */ + pRoot = pExpr; + while( pRoot->pParent && pRoot->pParent->eType==FTSQUERY_NEAR ){ + pRoot = pRoot->pParent; + } + iDocid = pRoot->iDocid; + bEof = pRoot->bEof; + assert( pRoot->bStart ); + + /* Allocate space for the aMSI[] array of each FTSQUERY_PHRASE node */ + for(p=pRoot; p; p=p->pLeft){ + Fts3Expr *pE = (p->eType==FTSQUERY_PHRASE?p:p->pRight); + assert( pE->aMI==0 ); + pE->aMI = (u32 *)sqlite3_malloc(pTab->nColumn * 3 * sizeof(u32)); + if( !pE->aMI ) return SQLITE_NOMEM; + memset(pE->aMI, 0, pTab->nColumn * 3 * sizeof(u32)); + } + + fts3EvalRestart(pCsr, pRoot, &rc); + + while( pCsr->isEof==0 && rc==SQLITE_OK ){ + + do { + /* Ensure the %_content statement is reset. */ + if( pCsr->isRequireSeek==0 ) sqlite3_reset(pCsr->pStmt); + assert( sqlite3_data_count(pCsr->pStmt)==0 ); + + /* Advance to the next document */ + fts3EvalNextRow(pCsr, pRoot, &rc); + pCsr->isEof = pRoot->bEof; + pCsr->isRequireSeek = 1; + pCsr->isMatchinfoNeeded = 1; + pCsr->iPrevId = pRoot->iDocid; + }while( pCsr->isEof==0 + && pRoot->eType==FTSQUERY_NEAR + && fts3EvalTestDeferredAndNear(pCsr, &rc) + ); + + if( rc==SQLITE_OK && pCsr->isEof==0 ){ + fts3EvalUpdateCounts(pRoot); + } + } + + pCsr->isEof = 0; + pCsr->iPrevId = iPrevId; + + if( bEof ){ + pRoot->bEof = bEof; + }else{ + /* Caution: pRoot may iterate through docids in ascending or descending + ** order. For this reason, even though it seems more defensive, the + ** do loop can not be written: + ** + ** do {...} while( pRoot->iDocid<iDocid && rc==SQLITE_OK ); + */ + fts3EvalRestart(pCsr, pRoot, &rc); + do { + fts3EvalNextRow(pCsr, pRoot, &rc); + assert( pRoot->bEof==0 ); + }while( pRoot->iDocid!=iDocid && rc==SQLITE_OK ); + fts3EvalTestDeferredAndNear(pCsr, &rc); + } + } + return rc; +} + +/* +** This function is used by the matchinfo() module to query a phrase +** expression node for the following information: +** +** 1. The total number of occurrences of the phrase in each column of +** the FTS table (considering all rows), and +** +** 2. For each column, the number of rows in the table for which the +** column contains at least one instance of the phrase. +** +** If no error occurs, SQLITE_OK is returned and the values for each column +** written into the array aiOut as follows: +** +** aiOut[iCol*3 + 1] = Number of occurrences +** aiOut[iCol*3 + 2] = Number of rows containing at least one instance +** +** Caveats: +** +** * If a phrase consists entirely of deferred tokens, then all output +** values are set to the number of documents in the table. In other +** words we assume that very common tokens occur exactly once in each +** column of each row of the table. +** +** * If a phrase contains some deferred tokens (and some non-deferred +** tokens), count the potential occurrence identified by considering +** the non-deferred tokens instead of actual phrase occurrences. +** +** * If the phrase is part of a NEAR expression, then only phrase instances +** that meet the NEAR constraint are included in the counts. +*/ +int sqlite3Fts3EvalPhraseStats( + Fts3Cursor *pCsr, /* FTS cursor handle */ + Fts3Expr *pExpr, /* Phrase expression */ + u32 *aiOut /* Array to write results into (see above) */ +){ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc = SQLITE_OK; + int iCol; + + if( pExpr->bDeferred && pExpr->pParent->eType!=FTSQUERY_NEAR ){ + assert( pCsr->nDoc>0 ); + for(iCol=0; iCol<pTab->nColumn; iCol++){ + aiOut[iCol*3 + 1] = (u32)pCsr->nDoc; + aiOut[iCol*3 + 2] = (u32)pCsr->nDoc; + } + }else{ + rc = fts3EvalGatherStats(pCsr, pExpr); + if( rc==SQLITE_OK ){ + assert( pExpr->aMI ); + for(iCol=0; iCol<pTab->nColumn; iCol++){ + aiOut[iCol*3 + 1] = pExpr->aMI[iCol*3 + 1]; + aiOut[iCol*3 + 2] = pExpr->aMI[iCol*3 + 2]; + } + } + } + + return rc; +} + +/* +** The expression pExpr passed as the second argument to this function +** must be of type FTSQUERY_PHRASE. +** +** The returned value is either NULL or a pointer to a buffer containing +** a position-list indicating the occurrences of the phrase in column iCol +** of the current row. +** +** More specifically, the returned buffer contains 1 varint for each +** occurrence of the phrase in the column, stored using the normal (delta+2) +** compression and is terminated by either an 0x01 or 0x00 byte. For example, +** if the requested column contains "a b X c d X X" and the position-list +** for 'X' is requested, the buffer returned may contain: +** +** 0x04 0x05 0x03 0x01 or 0x04 0x05 0x03 0x00 +** +** This function works regardless of whether or not the phrase is deferred, +** incremental, or neither. +*/ +int sqlite3Fts3EvalPhrasePoslist( + Fts3Cursor *pCsr, /* FTS3 cursor object */ + Fts3Expr *pExpr, /* Phrase to return doclist for */ + int iCol, /* Column to return position list for */ + char **ppOut /* OUT: Pointer to position list */ +){ + Fts3Phrase *pPhrase = pExpr->pPhrase; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + char *pIter; + int iThis; + sqlite3_int64 iDocid; + + /* If this phrase is applies specifically to some column other than + ** column iCol, return a NULL pointer. */ + *ppOut = 0; + assert( iCol>=0 && iCol<pTab->nColumn ); + if( (pPhrase->iColumn<pTab->nColumn && pPhrase->iColumn!=iCol) ){ + return SQLITE_OK; + } + + iDocid = pExpr->iDocid; + pIter = pPhrase->doclist.pList; + if( iDocid!=pCsr->iPrevId || pExpr->bEof ){ + int bDescDoclist = pTab->bDescIdx; /* For DOCID_CMP macro */ + int iMul; /* +1 if csr dir matches index dir, else -1 */ + int bOr = 0; + u8 bEof = 0; + u8 bTreeEof = 0; + Fts3Expr *p; /* Used to iterate from pExpr to root */ + Fts3Expr *pNear; /* Most senior NEAR ancestor (or pExpr) */ + + /* Check if this phrase descends from an OR expression node. If not, + ** return NULL. Otherwise, the entry that corresponds to docid + ** pCsr->iPrevId may lie earlier in the doclist buffer. Or, if the + ** tree that the node is part of has been marked as EOF, but the node + ** itself is not EOF, then it may point to an earlier entry. */ + pNear = pExpr; + for(p=pExpr->pParent; p; p=p->pParent){ + if( p->eType==FTSQUERY_OR ) bOr = 1; + if( p->eType==FTSQUERY_NEAR ) pNear = p; + if( p->bEof ) bTreeEof = 1; + } + if( bOr==0 ) return SQLITE_OK; + + /* This is the descendent of an OR node. In this case we cannot use + ** an incremental phrase. Load the entire doclist for the phrase + ** into memory in this case. */ + if( pPhrase->bIncr ){ + int rc = SQLITE_OK; + int bEofSave = pExpr->bEof; + fts3EvalRestart(pCsr, pExpr, &rc); + while( rc==SQLITE_OK && !pExpr->bEof ){ + fts3EvalNextRow(pCsr, pExpr, &rc); + if( bEofSave==0 && pExpr->iDocid==iDocid ) break; + } + pIter = pPhrase->doclist.pList; + assert( rc!=SQLITE_OK || pPhrase->bIncr==0 ); + if( rc!=SQLITE_OK ) return rc; + } + + iMul = ((pCsr->bDesc==bDescDoclist) ? 1 : -1); + while( bTreeEof==1 + && pNear->bEof==0 + && (DOCID_CMP(pNear->iDocid, pCsr->iPrevId) * iMul)<0 + ){ + int rc = SQLITE_OK; + fts3EvalNextRow(pCsr, pExpr, &rc); + if( rc!=SQLITE_OK ) return rc; + iDocid = pExpr->iDocid; + pIter = pPhrase->doclist.pList; + } + + bEof = (pPhrase->doclist.nAll==0); + assert( bDescDoclist==0 || bDescDoclist==1 ); + assert( pCsr->bDesc==0 || pCsr->bDesc==1 ); + + if( bEof==0 ){ + if( pCsr->bDesc==bDescDoclist ){ + int dummy; + if( pNear->bEof ){ + /* This expression is already at EOF. So position it to point to the + ** last entry in the doclist at pPhrase->doclist.aAll[]. Variable + ** iDocid is already set for this entry, so all that is required is + ** to set pIter to point to the first byte of the last position-list + ** in the doclist. + ** + ** It would also be correct to set pIter and iDocid to zero. In + ** this case, the first call to sqltie3Fts4DoclistPrev() below + ** would also move the iterator to point to the last entry in the + ** doclist. However, this is expensive, as to do so it has to + ** iterate through the entire doclist from start to finish (since + ** it does not know the docid for the last entry). */ + pIter = &pPhrase->doclist.aAll[pPhrase->doclist.nAll-1]; + fts3ReversePoslist(pPhrase->doclist.aAll, &pIter); + } + while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){ + sqlite3Fts3DoclistPrev( + bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, + &pIter, &iDocid, &dummy, &bEof + ); + } + }else{ + if( pNear->bEof ){ + pIter = 0; + iDocid = 0; + } + while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){ + sqlite3Fts3DoclistNext( + bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, + &pIter, &iDocid, &bEof + ); + } + } + } + + if( bEof || iDocid!=pCsr->iPrevId ) pIter = 0; + } + if( pIter==0 ) return SQLITE_OK; + + if( *pIter==0x01 ){ + pIter++; + pIter += fts3GetVarint32(pIter, &iThis); + }else{ + iThis = 0; + } + while( iThis<iCol ){ + fts3ColumnlistCopy(0, &pIter); + if( *pIter==0x00 ) return 0; + pIter++; + pIter += fts3GetVarint32(pIter, &iThis); + } + + *ppOut = ((iCol==iThis)?pIter:0); + return SQLITE_OK; +} + +/* +** Free all components of the Fts3Phrase structure that were allocated by +** the eval module. Specifically, this means to free: +** +** * the contents of pPhrase->doclist, and +** * any Fts3MultiSegReader objects held by phrase tokens. +*/ +void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){ + if( pPhrase ){ + int i; + sqlite3_free(pPhrase->doclist.aAll); + fts3EvalInvalidatePoslist(pPhrase); + memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist)); + for(i=0; i<pPhrase->nToken; i++){ + fts3SegReaderCursorFree(pPhrase->aToken[i].pSegcsr); + pPhrase->aToken[i].pSegcsr = 0; + } + } +} + + +/* +** Return SQLITE_CORRUPT_VTAB. +*/ +#ifdef SQLITE_DEBUG +int sqlite3Fts3Corrupt(){ + return SQLITE_CORRUPT_VTAB; +} +#endif + #if !SQLITE_CORE -int sqlite3_extension_init( +/* +** Initialize API pointer table, if required. +*/ +#ifdef _WIN32 +__declspec(dllexport) +#endif +int sqlite3_fts3_init( sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi |
