summaryrefslogtreecommitdiff
path: root/src/libtracker-fts
diff options
context:
space:
mode:
authorCarlos Garnacho <carlos@lanedo.com>2013-01-21 16:55:33 +0100
committerMartyn Russell <martyn@lanedo.com>2013-02-04 15:43:13 +0000
commitee253d914609656ddcf3c4b9c3f0a0ffc07c00c8 (patch)
tree7b22d3bbcc669eaa805911d4ae63e073689faa9d /src/libtracker-fts
parent5f30d16fcdf1d07802c0fb02edefce51c8bbcfaf (diff)
downloadtracker-ee253d914609656ddcf3c4b9c3f0a0ffc07c00c8.tar.gz
Update FTS code
Diffstat (limited to 'src/libtracker-fts')
-rw-r--r--src/libtracker-fts/Makefile.am2
-rw-r--r--src/libtracker-fts/fts3.c889
-rw-r--r--src/libtracker-fts/fts3.h9
-rw-r--r--src/libtracker-fts/fts3Int.h91
-rw-r--r--src/libtracker-fts/fts3_aux.c6
-rw-r--r--src/libtracker-fts/fts3_expr.c75
-rw-r--r--src/libtracker-fts/fts3_icu.c11
-rw-r--r--src/libtracker-fts/fts3_porter.c3
-rw-r--r--src/libtracker-fts/fts3_snippet.c91
-rw-r--r--src/libtracker-fts/fts3_term.c16
-rw-r--r--src/libtracker-fts/fts3_test.c213
-rw-r--r--src/libtracker-fts/fts3_tokenizer.c39
-rw-r--r--src/libtracker-fts/fts3_tokenizer.h11
-rw-r--r--src/libtracker-fts/fts3_tokenizer1.c1
-rw-r--r--src/libtracker-fts/fts3_unicode.c393
-rw-r--r--src/libtracker-fts/fts3_unicode2.c366
-rw-r--r--src/libtracker-fts/fts3_write.c2490
17 files changed, 4228 insertions, 478 deletions
diff --git a/src/libtracker-fts/Makefile.am b/src/libtracker-fts/Makefile.am
index 1780f0623..6e4b7f0ba 100644
--- a/src/libtracker-fts/Makefile.am
+++ b/src/libtracker-fts/Makefile.am
@@ -23,6 +23,8 @@ fts4_sources = \
fts3_tokenizer1.c \
fts3_tokenizer.c \
fts3_tokenizer.h \
+ fts3_unicode.c \
+ fts3_unicode2.c \
fts3_write.c
libtracker_fts_la_SOURCES = \
diff --git a/src/libtracker-fts/fts3.c b/src/libtracker-fts/fts3.c
index f29970dd2..3dc62ba8d 100644
--- a/src/libtracker-fts/fts3.c
+++ b/src/libtracker-fts/fts3.c
@@ -70,7 +70,7 @@
** A doclist is stored like this:
**
** array {
-** varint docid;
+** varint docid; (delta from previous doclist)
** array { (position list for column 0)
** varint position; (2 more than the delta from previous position)
** }
@@ -101,8 +101,8 @@
** at D signals the start of a new column; the 1 at E indicates that the
** new column is column number 1. There are two positions at 12 and 45
** (14-2 and 35-2+12). The 0 at H indicate the end-of-document. The
-** 234 at I is the next docid. It has one position 72 (72-2) and then
-** terminates with the 0 at K.
+** 234 at I is the delta to next docid (357). It has one position 70
+** (72-2) and then terminates with the 0 at K.
**
** A "position-list" is the list of positions for multiple columns for
** a single docid. A "column-list" is the set of positions for a single
@@ -286,10 +286,6 @@
** will eventually overtake the earlier data and knock it out. The
** query logic likewise merges doclists so that newer data knocks out
** older data.
-**
-** TODO(shess) Provide a VACUUM type operation to clear out all
-** deletions and duplications. This would basically be a forced merge
-** into a single segment.
*/
#include "fts3Int.h"
@@ -308,8 +304,8 @@
#include "fts3.h"
#ifndef SQLITE_CORE
- # include "sqlite3ext.h"
- SQLITE_EXTENSION_INIT1
+# include "sqlite3ext.h"
+ SQLITE_EXTENSION_INIT1
#endif
static int fts3EvalNext(Fts3Cursor *pCsr);
@@ -438,7 +434,7 @@ static void fts3GetReverseVarint(
sqlite3_int64 *pVal
){
sqlite3_int64 iVal;
- char *p = *pp;
+ char *p;
/* Pointer p now points at the first byte past the varint we are
** interested in. So, unless the doclist is corrupt, the 0x80 bit is
@@ -468,6 +464,8 @@ static int fts3DisconnectMethod(sqlite3_vtab *pVtab){
sqlite3_free(p->zSegmentsTbl);
sqlite3_free(p->zReadExprlist);
sqlite3_free(p->zWriteExprlist);
+ sqlite3_free(p->zContentTbl);
+ sqlite3_free(p->zLanguageid);
/* Invoke the tokenizer destructor to free the tokenizer. */
p->pTokenizer->pModule->xDestroy(p->pTokenizer);
@@ -507,16 +505,19 @@ static void fts3DbExec(
** The xDestroy() virtual table method.
*/
static int fts3DestroyMethod(sqlite3_vtab *pVtab){
- int rc = SQLITE_OK; /* Return code */
Fts3Table *p = (Fts3Table *)pVtab;
- sqlite3 *db = p->db;
+ int rc = SQLITE_OK; /* Return code */
+ const char *zDb = p->zDb; /* Name of database (e.g. "main", "temp") */
+ sqlite3 *db = p->db; /* Database handle */
/* Drop the shadow tables */
- fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_content'", p->zDb, p->zName);
- fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segments'", p->zDb,p->zName);
- fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segdir'", p->zDb, p->zName);
- fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_docsize'", p->zDb, p->zName);
- fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_stat'", p->zDb, p->zName);
+ if( p->zContentTbl==0 ){
+ fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_content'", zDb, p->zName);
+ }
+ fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segments'", zDb,p->zName);
+ fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segdir'", zDb, p->zName);
+ fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_docsize'", zDb, p->zName);
+ fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_stat'", zDb, p->zName);
/* If everything has worked, invoke fts3DisconnectMethod() to free the
** memory associated with the Fts3Table structure and return SQLITE_OK.
@@ -541,7 +542,9 @@ static void fts3DeclareVtab(int *pRc, Fts3Table *p){
int rc; /* Return code */
char *zSql; /* SQL statement passed to declare_vtab() */
char *zCols; /* List of user defined columns */
+ const char *zLanguageid;
+ zLanguageid = (p->zLanguageid ? p->zLanguageid : "__langid");
sqlite3_vtab_config(p->db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1);
/* Create a list of user columns for the virtual table */
@@ -552,7 +555,8 @@ static void fts3DeclareVtab(int *pRc, Fts3Table *p){
/* Create the whole "CREATE TABLE" statement to pass to SQLite */
zSql = sqlite3_mprintf(
- "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN)", zCols, p->zName
+ "CREATE TABLE x(%s %Q HIDDEN, docid HIDDEN, %Q HIDDEN)",
+ zCols, p->zName, zLanguageid
);
if( !zCols || !zSql ){
rc = SQLITE_NOMEM;
@@ -567,6 +571,18 @@ static void fts3DeclareVtab(int *pRc, Fts3Table *p){
}
/*
+** Create the %_stat table if it does not already exist.
+*/
+void sqlite3Fts3CreateStatTable(int *pRc, Fts3Table *p){
+ fts3DbExec(pRc, p->db,
+ "CREATE TABLE IF NOT EXISTS %Q.'%q_stat'"
+ "(id INTEGER PRIMARY KEY, value BLOB);",
+ p->zDb, p->zName
+ );
+ if( (*pRc)==SQLITE_OK ) p->bHasStat = 1;
+}
+
+/*
** Create the backing store tables (%_content, %_segments and %_segdir)
** required by the FTS3 table passed as the only argument. This is done
** as part of the vtab xCreate() method.
@@ -578,23 +594,31 @@ static void fts3DeclareVtab(int *pRc, Fts3Table *p){
static int fts3CreateTables(Fts3Table *p){
int rc = SQLITE_OK; /* Return code */
int i; /* Iterator variable */
- char *zContentCols; /* Columns of %_content table */
sqlite3 *db = p->db; /* The database connection */
- /* Create a list of user columns for the content table */
- zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY");
- for(i=0; zContentCols && i<p->nColumn; i++){
- char *z = p->azColumn[i];
- zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z);
+ if( p->zContentTbl==0 ){
+ const char *zLanguageid = p->zLanguageid;
+ char *zContentCols; /* Columns of %_content table */
+
+ /* Create a list of user columns for the content table */
+ zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY");
+ for(i=0; zContentCols && i<p->nColumn; i++){
+ char *z = p->azColumn[i];
+ zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z);
+ }
+ if( zLanguageid && zContentCols ){
+ zContentCols = sqlite3_mprintf("%z, langid", zContentCols, zLanguageid);
+ }
+ if( zContentCols==0 ) rc = SQLITE_NOMEM;
+
+ /* Create the content table */
+ fts3DbExec(&rc, db,
+ "CREATE TABLE %Q.'%q_content'(%s)",
+ p->zDb, p->zName, zContentCols
+ );
+ sqlite3_free(zContentCols);
}
- if( zContentCols==0 ) rc = SQLITE_NOMEM;
- /* Create the content table */
- fts3DbExec(&rc, db,
- "CREATE TABLE %Q.'%q_content'(%s)",
- p->zDb, p->zName, zContentCols
- );
- sqlite3_free(zContentCols);
/* Create other tables */
fts3DbExec(&rc, db,
"CREATE TABLE %Q.'%q_segments'(blockid INTEGER PRIMARY KEY, block BLOB);",
@@ -618,11 +642,9 @@ static int fts3CreateTables(Fts3Table *p){
p->zDb, p->zName
);
}
+ assert( p->bHasStat==p->bFts4 );
if( p->bHasStat ){
- fts3DbExec(&rc, db,
- "CREATE TABLE %Q.'%q_stat'(id INTEGER PRIMARY KEY, value BLOB);",
- p->zDb, p->zName
- );
+ sqlite3Fts3CreateStatTable(&rc, p);
}
return rc;
}
@@ -704,6 +726,7 @@ static void fts3Appendf(
char *z;
va_start(ap, zFormat);
z = sqlite3_vmprintf(zFormat, ap);
+ va_end(ap);
if( z && *pz ){
char *z2 = sqlite3_mprintf("%s%s", *pz, z);
sqlite3_free(z);
@@ -728,7 +751,7 @@ static void fts3Appendf(
static char *fts3QuoteId(char const *zInput){
int nRet;
char *zRet;
- nRet = 2 + strlen(zInput)*2 + 1;
+ nRet = 2 + (int)strlen(zInput)*2 + 1;
zRet = sqlite3_malloc(nRet);
if( zRet ){
int i;
@@ -745,8 +768,8 @@ static char *fts3QuoteId(char const *zInput){
}
/*
-** Return a list of comma separated SQL expressions that could be used
-** in a SELECT statement such as the following:
+** Return a list of comma separated SQL expressions and a FROM clause that
+** could be used in a SELECT statement such as the following:
**
** SELECT <list of expressions> FROM %_content AS x ...
**
@@ -757,7 +780,7 @@ static char *fts3QuoteId(char const *zInput){
** table has the three user-defined columns "a", "b", and "c", the following
** string is returned:
**
-** "docid, unzip(x.'a'), unzip(x.'b'), unzip(x.'c')"
+** "docid, unzip(x.'a'), unzip(x.'b'), unzip(x.'c') FROM %_content AS x"
**
** The pointer returned points to a buffer allocated by sqlite3_malloc(). It
** is the responsibility of the caller to eventually free it.
@@ -773,16 +796,34 @@ static char *fts3ReadExprList(Fts3Table *p, const char *zFunc, int *pRc){
char *zFunction;
int i;
- if( !zFunc ){
- zFunction = "";
+ if( p->zContentTbl==0 ){
+ if( !zFunc ){
+ zFunction = "";
+ }else{
+ zFree = zFunction = fts3QuoteId(zFunc);
+ }
+ fts3Appendf(pRc, &zRet, "docid");
+ for(i=0; i<p->nColumn; i++){
+ fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]);
+ }
+ if( p->zLanguageid ){
+ fts3Appendf(pRc, &zRet, ", x.%Q", "langid");
+ }
+ sqlite3_free(zFree);
}else{
- zFree = zFunction = fts3QuoteId(zFunc);
- }
- fts3Appendf(pRc, &zRet, "docid");
- for(i=0; i<p->nColumn; i++){
- fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]);
+ fts3Appendf(pRc, &zRet, "rowid");
+ for(i=0; i<p->nColumn; i++){
+ fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]);
+ }
+ if( p->zLanguageid ){
+ fts3Appendf(pRc, &zRet, ", x.%Q", p->zLanguageid);
+ }
}
- sqlite3_free(zFree);
+ fts3Appendf(pRc, &zRet, " FROM '%q'.'%q%s' AS x",
+ p->zDb,
+ (p->zContentTbl ? p->zContentTbl : p->zName),
+ (p->zContentTbl ? "" : "_content")
+ );
return zRet;
}
@@ -821,6 +862,9 @@ static char *fts3WriteExprList(Fts3Table *p, const char *zFunc, int *pRc){
for(i=0; i<p->nColumn; i++){
fts3Appendf(pRc, &zRet, ",%s(?)", zFunction);
}
+ if( p->zLanguageid ){
+ fts3Appendf(pRc, &zRet, ", ?");
+ }
sqlite3_free(zFree);
return zRet;
}
@@ -839,7 +883,7 @@ static char *fts3WriteExprList(Fts3Table *p, const char *zFunc, int *pRc){
** This function is used when parsing the "prefix=" FTS4 parameter.
*/
static int fts3GobbleInt(const char **pp, int *pnOut){
- const char *p = *pp; /* Iterator pointer */
+ const char *p; /* Iterator pointer */
int nInt = 0; /* Output value */
for(p=*pp; p[0]>='0' && p[0]<='9'; p++){
@@ -907,6 +951,91 @@ static int fts3PrefixParameter(
}
/*
+** This function is called when initializing an FTS4 table that uses the
+** content=xxx option. It determines the number of and names of the columns
+** of the new FTS4 table.
+**
+** The third argument passed to this function is the value passed to the
+** config=xxx option (i.e. "xxx"). This function queries the database for
+** a table of that name. If found, the output variables are populated
+** as follows:
+**
+** *pnCol: Set to the number of columns table xxx has,
+**
+** *pnStr: Set to the total amount of space required to store a copy
+** of each columns name, including the nul-terminator.
+**
+** *pazCol: Set to point to an array of *pnCol strings. Each string is
+** the name of the corresponding column in table xxx. The array
+** and its contents are allocated using a single allocation. It
+** is the responsibility of the caller to free this allocation
+** by eventually passing the *pazCol value to sqlite3_free().
+**
+** If the table cannot be found, an error code is returned and the output
+** variables are undefined. Or, if an OOM is encountered, SQLITE_NOMEM is
+** returned (and the output variables are undefined).
+*/
+static int fts3ContentColumns(
+ sqlite3 *db, /* Database handle */
+ const char *zDb, /* Name of db (i.e. "main", "temp" etc.) */
+ const char *zTbl, /* Name of content table */
+ const char ***pazCol, /* OUT: Malloc'd array of column names */
+ int *pnCol, /* OUT: Size of array *pazCol */
+ int *pnStr /* OUT: Bytes of string content */
+){
+ int rc = SQLITE_OK; /* Return code */
+ char *zSql; /* "SELECT *" statement on zTbl */
+ sqlite3_stmt *pStmt = 0; /* Compiled version of zSql */
+
+ zSql = sqlite3_mprintf("SELECT * FROM %Q.%Q", zDb, zTbl);
+ if( !zSql ){
+ rc = SQLITE_NOMEM;
+ }else{
+ rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
+ }
+ sqlite3_free(zSql);
+
+ if( rc==SQLITE_OK ){
+ const char **azCol; /* Output array */
+ int nStr = 0; /* Size of all column names (incl. 0x00) */
+ int nCol; /* Number of table columns */
+ int i; /* Used to iterate through columns */
+
+ /* Loop through the returned columns. Set nStr to the number of bytes of
+ ** space required to store a copy of each column name, including the
+ ** nul-terminator byte. */
+ nCol = sqlite3_column_count(pStmt);
+ for(i=0; i<nCol; i++){
+ const char *zCol = sqlite3_column_name(pStmt, i);
+ nStr += (int)strlen(zCol) + 1;
+ }
+
+ /* Allocate and populate the array to return. */
+ azCol = (const char **)sqlite3_malloc(sizeof(char *) * nCol + nStr);
+ if( azCol==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ char *p = (char *)&azCol[nCol];
+ for(i=0; i<nCol; i++){
+ const char *zCol = sqlite3_column_name(pStmt, i);
+ int n = (int)strlen(zCol)+1;
+ memcpy(p, zCol, n);
+ azCol[i] = p;
+ p += n;
+ }
+ }
+ sqlite3_finalize(pStmt);
+
+ /* Set the output variables. */
+ *pnCol = nCol;
+ *pnStr = nStr;
+ *pazCol = azCol;
+ }
+
+ return rc;
+}
+
+/*
** This function is the implementation of both the xConnect and xCreate
** methods of the FTS3 virtual table.
**
@@ -950,6 +1079,8 @@ static int fts3InitVtab(
char *zPrefix = 0; /* Prefix parameter value (or NULL) */
char *zCompress = 0; /* compress=? parameter (or NULL) */
char *zUncompress = 0; /* uncompress=? parameter (or NULL) */
+ char *zContent = 0; /* content=? parameter (or NULL) */
+ char *zLanguageid = 0; /* languageid=? parameter (or NULL) */
assert( strlen(argv[0])==4 );
assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4)
@@ -993,13 +1124,14 @@ static int fts3InitVtab(
struct Fts4Option {
const char *zOpt;
int nOpt;
- char **pzVar;
} aFts4Opt[] = {
- { "matchinfo", 9, 0 }, /* 0 -> MATCHINFO */
- { "prefix", 6, 0 }, /* 1 -> PREFIX */
- { "compress", 8, 0 }, /* 2 -> COMPRESS */
- { "uncompress", 10, 0 }, /* 3 -> UNCOMPRESS */
- { "order", 5, 0 } /* 4 -> ORDER */
+ { "matchinfo", 9 }, /* 0 -> MATCHINFO */
+ { "prefix", 6 }, /* 1 -> PREFIX */
+ { "compress", 8 }, /* 2 -> COMPRESS */
+ { "uncompress", 10 }, /* 3 -> UNCOMPRESS */
+ { "order", 5 }, /* 4 -> ORDER */
+ { "content", 7 }, /* 5 -> CONTENT */
+ { "languageid", 10 } /* 6 -> LANGUAGEID */
};
int iOpt;
@@ -1045,13 +1177,26 @@ static int fts3InitVtab(
case 4: /* ORDER */
if( (strlen(zVal)!=3 || sqlite3_strnicmp(zVal, "asc", 3))
- && (strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "desc", 3))
+ && (strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "desc", 4))
){
*pzErr = sqlite3_mprintf("unrecognized order: %s", zVal);
rc = SQLITE_ERROR;
}
bDescIdx = (zVal[0]=='d' || zVal[0]=='D');
break;
+
+ case 5: /* CONTENT */
+ sqlite3_free(zContent);
+ zContent = zVal;
+ zVal = 0;
+ break;
+
+ case 6: /* LANGUAGEID */
+ assert( iOpt==6 );
+ sqlite3_free(zLanguageid);
+ zLanguageid = zVal;
+ zVal = 0;
+ break;
}
}
sqlite3_free(zVal);
@@ -1064,6 +1209,39 @@ static int fts3InitVtab(
aCol[nCol++] = z;
}
}
+
+ /* If a content=xxx option was specified, the following:
+ **
+ ** 1. Ignore any compress= and uncompress= options.
+ **
+ ** 2. If no column names were specified as part of the CREATE VIRTUAL
+ ** TABLE statement, use all columns from the content table.
+ */
+ if( rc==SQLITE_OK && zContent ){
+ sqlite3_free(zCompress);
+ sqlite3_free(zUncompress);
+ zCompress = 0;
+ zUncompress = 0;
+ if( nCol==0 ){
+ sqlite3_free((void*)aCol);
+ aCol = 0;
+ rc = fts3ContentColumns(db, argv[1], zContent, &aCol, &nCol, &nString);
+
+ /* If a languageid= option was specified, remove the language id
+ ** column from the aCol[] array. */
+ if( rc==SQLITE_OK && zLanguageid ){
+ int j;
+ for(j=0; j<nCol; j++){
+ if( sqlite3_stricmp(zLanguageid, aCol[j])==0 ){
+ int k;
+ for(k=j; k<nCol; k++) aCol[k] = aCol[k+1];
+ nCol--;
+ break;
+ }
+ }
+ }
+ }
+ }
if( rc!=SQLITE_OK ) goto fts3_init_out;
if( nCol==0 ){
@@ -1107,7 +1285,13 @@ static int fts3InitVtab(
p->nMaxPendingData = FTS3_MAX_PENDING_DATA;
p->bHasDocsize = (isFts4 && bNoDocsize==0);
p->bHasStat = isFts4;
+ p->bFts4 = isFts4;
p->bDescIdx = bDescIdx;
+ p->bAutoincrmerge = 0xff; /* 0xff means setting unknown */
+ p->zContentTbl = zContent;
+ p->zLanguageid = zLanguageid;
+ zContent = 0;
+ zLanguageid = 0;
TESTONLY( p->inTransaction = -1 );
TESTONLY( p->mxSavepoint = -1 );
@@ -1156,6 +1340,16 @@ static int fts3InitVtab(
rc = fts3CreateTables(p);
}
+ /* Check to see if a legacy fts3 table has been "upgraded" by the
+ ** addition of a %_stat table so that it can use incremental merge.
+ */
+ if( !isFts4 && !isCreate ){
+ int rc2 = SQLITE_OK;
+ fts3DbExec(&rc2, db, "SELECT 1 FROM %Q.'%q_stat' WHERE id=2",
+ p->zDb, p->zName);
+ if( rc2==SQLITE_OK ) p->bHasStat = 1;
+ }
+
/* Figure out the page-size for the database. This is required in order to
** estimate the cost of loading large doclists from the database. */
fts3DatabasePageSize(&rc, p);
@@ -1169,6 +1363,8 @@ fts3_init_out:
sqlite3_free(aIndex);
sqlite3_free(zCompress);
sqlite3_free(zUncompress);
+ sqlite3_free(zContent);
+ sqlite3_free(zLanguageid);
sqlite3_free((void *)aCol);
if( rc!=SQLITE_OK ){
if( p ){
@@ -1220,6 +1416,7 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
Fts3Table *p = (Fts3Table *)pVTab;
int i; /* Iterator variable */
int iCons = -1; /* Index of constraint to use */
+ int iLangidCons = -1; /* Index of langid=x constraint, if present */
/* By default use a full table scan. This is an expensive option,
** so search through the constraints to see if a more efficient
@@ -1232,7 +1429,8 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
if( pCons->usable==0 ) continue;
/* A direct lookup on the rowid or docid column. Assign a cost of 1.0. */
- if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ
+ if( iCons<0
+ && pCons->op==SQLITE_INDEX_CONSTRAINT_EQ
&& (pCons->iColumn<0 || pCons->iColumn==p->nColumn+1 )
){
pInfo->idxNum = FTS3_DOCID_SEARCH;
@@ -1255,7 +1453,13 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
pInfo->idxNum = FTS3_FULLTEXT_SEARCH + pCons->iColumn;
pInfo->estimatedCost = 2.0;
iCons = i;
- break;
+ }
+
+ /* Equality constraint on the langid column */
+ if( pCons->op==SQLITE_INDEX_CONSTRAINT_EQ
+ && pCons->iColumn==p->nColumn + 2
+ ){
+ iLangidCons = i;
}
}
@@ -1263,6 +1467,9 @@ static int fts3BestIndexMethod(sqlite3_vtab *pVTab, sqlite3_index_info *pInfo){
pInfo->aConstraintUsage[iCons].argvIndex = 1;
pInfo->aConstraintUsage[iCons].omit = 1;
}
+ if( iLangidCons>=0 ){
+ pInfo->aConstraintUsage[iLangidCons].argvIndex = 2;
+ }
/* Regardless of the strategy selected, FTS can deliver rows in rowid (or
** docid) order. Both ascending and descending are possible.
@@ -1321,34 +1528,63 @@ static int fts3CloseMethod(sqlite3_vtab_cursor *pCursor){
}
/*
+** If pCsr->pStmt has not been prepared (i.e. if pCsr->pStmt==0), then
+** compose and prepare an SQL statement of the form:
+**
+** "SELECT <columns> FROM %_content WHERE rowid = ?"
+**
+** (or the equivalent for a content=xxx table) and set pCsr->pStmt to
+** it. If an error occurs, return an SQLite error code.
+**
+** Otherwise, set *ppStmt to point to pCsr->pStmt and return SQLITE_OK.
+*/
+static int fts3CursorSeekStmt(Fts3Cursor *pCsr, sqlite3_stmt **ppStmt){
+ int rc = SQLITE_OK;
+ if( pCsr->pStmt==0 ){
+ Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
+ char *zSql;
+ zSql = sqlite3_mprintf("SELECT %s WHERE rowid = ?", p->zReadExprlist);
+ if( !zSql ) return SQLITE_NOMEM;
+ rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0);
+ sqlite3_free(zSql);
+ }
+ *ppStmt = pCsr->pStmt;
+ return rc;
+}
+
+/*
** Position the pCsr->pStmt statement so that it is on the row
** of the %_content table that contains the last match. Return
** SQLITE_OK on success.
*/
static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){
+ int rc = SQLITE_OK;
if( pCsr->isRequireSeek ){
- sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iPrevId);
- pCsr->isRequireSeek = 0;
- if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){
- return SQLITE_OK;
- }else{
- int rc = sqlite3_reset(pCsr->pStmt);
- if( rc==SQLITE_OK ){
- /* If no row was found and no error has occured, then the %_content
- ** table is missing a row that is present in the full-text index.
- ** The data structures are corrupt.
- */
- rc = SQLITE_CORRUPT_VTAB;
- }
- pCsr->isEof = 1;
- if( pContext ){
- sqlite3_result_error_code(pContext, rc);
+ sqlite3_stmt *pStmt = 0;
+
+ rc = fts3CursorSeekStmt(pCsr, &pStmt);
+ if( rc==SQLITE_OK ){
+ sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iPrevId);
+ pCsr->isRequireSeek = 0;
+ if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){
+ return SQLITE_OK;
+ }else{
+ rc = sqlite3_reset(pCsr->pStmt);
+ if( rc==SQLITE_OK && ((Fts3Table *)pCsr->base.pVtab)->zContentTbl==0 ){
+ /* If no row was found and no error has occured, then the %_content
+ ** table is missing a row that is present in the full-text index.
+ ** The data structures are corrupt. */
+ rc = FTS_CORRUPT_VTAB;
+ pCsr->isEof = 1;
+ }
}
- return rc;
}
- }else{
- return SQLITE_OK;
}
+
+ if( rc!=SQLITE_OK && pContext ){
+ sqlite3_result_error_code(pContext, rc);
+ }
+ return rc;
}
/*
@@ -1398,7 +1634,7 @@ static int fts3ScanInteriorNode(
zCsr += sqlite3Fts3GetVarint(zCsr, &iChild);
zCsr += sqlite3Fts3GetVarint(zCsr, &iChild);
if( zCsr>zEnd ){
- return SQLITE_CORRUPT_VTAB;
+ return FTS_CORRUPT_VTAB;
}
while( zCsr<zEnd && (piFirst || piLast) ){
@@ -1416,7 +1652,7 @@ static int fts3ScanInteriorNode(
zCsr += sqlite3Fts3GetVarint32(zCsr, &nSuffix);
if( nPrefix<0 || nSuffix<0 || &zCsr[nSuffix]>zEnd ){
- rc = SQLITE_CORRUPT_VTAB;
+ rc = FTS_CORRUPT_VTAB;
goto finish_scan;
}
if( nPrefix+nSuffix>nAlloc ){
@@ -1429,6 +1665,7 @@ static int fts3ScanInteriorNode(
}
zBuffer = zNew;
}
+ assert( zBuffer );
memcpy(&zBuffer[nPrefix], zCsr, nSuffix);
nBuffer = nPrefix + nSuffix;
zCsr += nSuffix;
@@ -1787,7 +2024,7 @@ static int fts3PoslistPhraseMerge(
char **pp1, /* IN/OUT: Left input list */
char **pp2 /* IN/OUT: Right input list */
){
- char *p = (pp ? *pp : 0);
+ char *p = *pp;
char *p1 = *pp1;
char *p2 = *pp2;
int iCol1 = 0;
@@ -1796,7 +2033,7 @@ static int fts3PoslistPhraseMerge(
/* Never set both isSaveLeft and isExact for the same invocation. */
assert( isSaveLeft==0 || isExact==0 );
- assert( *p1!=0 && *p2!=0 );
+ assert( p!=0 && *p1!=0 && *p2!=0 );
if( *p1==POS_COLUMN ){
p1++;
p1 += sqlite3Fts3GetVarint32(p1, &iCol1);
@@ -1813,7 +2050,7 @@ static int fts3PoslistPhraseMerge(
sqlite3_int64 iPos1 = 0;
sqlite3_int64 iPos2 = 0;
- if( pp && iCol1 ){
+ if( iCol1 ){
*p++ = POS_COLUMN;
p += sqlite3Fts3PutVarint(p, iCol1);
}
@@ -1828,16 +2065,10 @@ static int fts3PoslistPhraseMerge(
|| (isExact==0 && iPos2>iPos1 && iPos2<=iPos1+nToken)
){
sqlite3_int64 iSave;
- if( !pp ){
- fts3PoslistCopy(0, &p2);
- fts3PoslistCopy(0, &p1);
- *pp1 = p1;
- *pp2 = p2;
- return 1;
- }
iSave = isSaveLeft ? iPos1 : iPos2;
fts3PutDeltaVarint(&p, &iPrev, iSave+2); iPrev -= 2;
pSave = 0;
+ assert( p );
}
if( (!isSaveLeft && iPos2<=(iPos1+nToken)) || iPos2<=iPos1 ){
if( (*p2&0xFE)==0 ) break;
@@ -1886,7 +2117,7 @@ static int fts3PoslistPhraseMerge(
fts3PoslistCopy(0, &p1);
*pp1 = p1;
*pp2 = p2;
- if( !pp || *pp==p ){
+ if( *pp==p ){
return 0;
}
*p++ = 0x00;
@@ -2122,7 +2353,7 @@ static int fts3DoclistOrMerge(
}
*paOut = aOut;
- *pnOut = (p-aOut);
+ *pnOut = (int)(p-aOut);
assert( *pnOut<=n1+n2+FTS3_VARINT_MAX-1 );
return SQLITE_OK;
}
@@ -2186,7 +2417,57 @@ static void fts3DoclistPhraseMerge(
}
}
- *pnRight = p - aOut;
+ *pnRight = (int)(p - aOut);
+}
+
+/*
+** Argument pList points to a position list nList bytes in size. This
+** function checks to see if the position list contains any entries for
+** a token in position 0 (of any column). If so, it writes argument iDelta
+** to the output buffer pOut, followed by a position list consisting only
+** of the entries from pList at position 0, and terminated by an 0x00 byte.
+** The value returned is the number of bytes written to pOut (if any).
+*/
+int sqlite3Fts3FirstFilter(
+ sqlite3_int64 iDelta, /* Varint that may be written to pOut */
+ char *pList, /* Position list (no 0x00 term) */
+ int nList, /* Size of pList in bytes */
+ char *pOut /* Write output here */
+){
+ int nOut = 0;
+ int bWritten = 0; /* True once iDelta has been written */
+ char *p = pList;
+ char *pEnd = &pList[nList];
+
+ if( *p!=0x01 ){
+ if( *p==0x02 ){
+ nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta);
+ pOut[nOut++] = 0x02;
+ bWritten = 1;
+ }
+ fts3ColumnlistCopy(0, &p);
+ }
+
+ while( p<pEnd && *p==0x01 ){
+ sqlite3_int64 iCol;
+ p++;
+ p += sqlite3Fts3GetVarint(p, &iCol);
+ if( *p==0x02 ){
+ if( bWritten==0 ){
+ nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta);
+ bWritten = 1;
+ }
+ pOut[nOut++] = 0x01;
+ nOut += sqlite3Fts3PutVarint(&pOut[nOut], iCol);
+ pOut[nOut++] = 0x02;
+ }
+ fts3ColumnlistCopy(0, &p);
+ }
+ if( bWritten ){
+ pOut[nOut++] = 0x00;
+ }
+
+ return nOut;
}
@@ -2338,6 +2619,7 @@ static int fts3SegReaderCursorAppend(
*/
static int fts3SegReaderCursor(
Fts3Table *p, /* FTS3 table handle */
+ int iLangid, /* Language id */
int iIndex, /* Index to search (from 0 to p->nIndex-1) */
int iLevel, /* Level of segments to scan */
const char *zTerm, /* Term to query for */
@@ -2366,7 +2648,7 @@ static int fts3SegReaderCursor(
if( iLevel!=FTS3_SEGCURSOR_PENDING ){
if( rc==SQLITE_OK ){
- rc = sqlite3Fts3AllSegdirs(p, iIndex, iLevel, &pStmt);
+ rc = sqlite3Fts3AllSegdirs(p, iLangid, iIndex, iLevel, &pStmt);
}
while( rc==SQLITE_OK && SQLITE_ROW==(rc = sqlite3_step(pStmt)) ){
@@ -2389,7 +2671,9 @@ static int fts3SegReaderCursor(
}
rc = sqlite3Fts3SegReaderNew(pCsr->nSegment+1,
- iStartBlock, iLeavesEndBlock, iEndBlock, zRoot, nRoot, &pSeg
+ (isPrefix==0 && isScan==0),
+ iStartBlock, iLeavesEndBlock,
+ iEndBlock, zRoot, nRoot, &pSeg
);
if( rc!=SQLITE_OK ) goto finished;
rc = fts3SegReaderCursorAppend(pCsr, pSeg);
@@ -2409,6 +2693,7 @@ static int fts3SegReaderCursor(
*/
int sqlite3Fts3SegReaderCursor(
Fts3Table *p, /* FTS3 table handle */
+ int iLangid, /* Language-id to search */
int iIndex, /* Index to search (from 0 to p->nIndex-1) */
int iLevel, /* Level of segments to scan */
const char *zTerm, /* Term to query for */
@@ -2426,14 +2711,9 @@ int sqlite3Fts3SegReaderCursor(
assert( FTS3_SEGCURSOR_ALL<0 && FTS3_SEGCURSOR_PENDING<0 );
assert( isPrefix==0 || isScan==0 );
- /* "isScan" is only set to true by the ft4aux module, an ordinary
- ** full-text tables. */
- assert( isScan==0 || p->aIndex==0 );
-
memset(pCsr, 0, sizeof(Fts3MultiSegReader));
-
return fts3SegReaderCursor(
- p, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr
+ p, iLangid, iIndex, iLevel, zTerm, nTerm, isPrefix, isScan, pCsr
);
}
@@ -2445,11 +2725,14 @@ int sqlite3Fts3SegReaderCursor(
*/
static int fts3SegReaderCursorAddZero(
Fts3Table *p, /* FTS virtual table handle */
+ int iLangid,
const char *zTerm, /* Term to scan doclist of */
int nTerm, /* Number of bytes in zTerm */
Fts3MultiSegReader *pCsr /* Fts3MultiSegReader to modify */
){
- return fts3SegReaderCursor(p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr);
+ return fts3SegReaderCursor(p,
+ iLangid, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr
+ );
}
/*
@@ -2485,8 +2768,9 @@ static int fts3TermSegReaderCursor(
for(i=1; bFound==0 && i<p->nIndex; i++){
if( p->aIndex[i].nPrefix==nTerm ){
bFound = 1;
- rc = sqlite3Fts3SegReaderCursor(
- p, i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr);
+ rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
+ i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0, pSegcsr
+ );
pSegcsr->bLookup = 1;
}
}
@@ -2494,19 +2778,21 @@ static int fts3TermSegReaderCursor(
for(i=1; bFound==0 && i<p->nIndex; i++){
if( p->aIndex[i].nPrefix==nTerm+1 ){
bFound = 1;
- rc = sqlite3Fts3SegReaderCursor(
- p, i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr
+ rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
+ i, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 1, 0, pSegcsr
);
if( rc==SQLITE_OK ){
- rc = fts3SegReaderCursorAddZero(p, zTerm, nTerm, pSegcsr);
+ rc = fts3SegReaderCursorAddZero(
+ p, pCsr->iLangid, zTerm, nTerm, pSegcsr
+ );
}
}
}
}
if( bFound==0 ){
- rc = sqlite3Fts3SegReaderCursor(
- p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr
+ rc = sqlite3Fts3SegReaderCursor(p, pCsr->iLangid,
+ 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, isPrefix, 0, pSegcsr
);
pSegcsr->bLookup = !isPrefix;
}
@@ -2545,6 +2831,7 @@ static int fts3TermSelect(
filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS
| (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0)
+ | (pTok->bFirst ? FTS3_SEGMENT_FIRST : 0)
| (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0);
filter.iCol = iColumn;
filter.zTerm = pTok->z;
@@ -2660,7 +2947,7 @@ static int fts3FilterMethod(
UNUSED_PARAMETER(nVal);
assert( idxNum>=0 && idxNum<=(FTS3_FULLTEXT_SEARCH+p->nColumn) );
- assert( nVal==0 || nVal==1 );
+ assert( nVal==0 || nVal==1 || nVal==2 );
assert( (nVal==0)==(idxNum==FTS3_FULLSCAN_SEARCH) );
assert( p->pSegments==0 );
@@ -2685,8 +2972,11 @@ static int fts3FilterMethod(
return SQLITE_NOMEM;
}
- rc = sqlite3Fts3ExprParse(p->pTokenizer, p->azColumn, p->nColumn,
- iCol, zQuery, -1, &pCsr->pExpr
+ pCsr->iLangid = 0;
+ if( nVal==2 ) pCsr->iLangid = sqlite3_value_int(apVal[1]);
+
+ rc = sqlite3Fts3ExprParse(p->pTokenizer, pCsr->iLangid,
+ p->azColumn, p->bFts4, p->nColumn, iCol, zQuery, -1, &pCsr->pExpr
);
if( rc!=SQLITE_OK ){
if( rc==SQLITE_ERROR ){
@@ -2713,23 +3003,24 @@ static int fts3FilterMethod(
** row by docid.
*/
if( idxNum==FTS3_FULLSCAN_SEARCH ){
- const char *zSort = (pCsr->bDesc ? "DESC" : "ASC");
- const char *zTmpl = "SELECT %s FROM %Q.'%q_content' AS x ORDER BY docid %s";
- zSql = sqlite3_mprintf(zTmpl, p->zReadExprlist, p->zDb, p->zName, zSort);
- }else{
- const char *zTmpl = "SELECT %s FROM %Q.'%q_content' AS x WHERE docid = ?";
- zSql = sqlite3_mprintf(zTmpl, p->zReadExprlist, p->zDb, p->zName);
+ zSql = sqlite3_mprintf(
+ "SELECT %s ORDER BY rowid %s",
+ p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC")
+ );
+ if( zSql ){
+ rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0);
+ sqlite3_free(zSql);
+ }else{
+ rc = SQLITE_NOMEM;
+ }
+ }else if( idxNum==FTS3_DOCID_SEARCH ){
+ rc = fts3CursorSeekStmt(pCsr, &pCsr->pStmt);
+ if( rc==SQLITE_OK ){
+ rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]);
+ }
}
- if( !zSql ) return SQLITE_NOMEM;
- rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0);
- sqlite3_free(zSql);
if( rc!=SQLITE_OK ) return rc;
- if( idxNum==FTS3_DOCID_SEARCH ){
- rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]);
- if( rc!=SQLITE_OK ) return rc;
- }
-
return fts3NextMethod(pCursor);
}
@@ -2756,10 +3047,17 @@ static int fts3RowidMethod(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
/*
** This is the xColumn method, called by SQLite to request a value from
** the row that the supplied cursor currently points to.
+**
+** If:
+**
+** (iCol < p->nColumn) -> The value of the iCol'th user column.
+** (iCol == p->nColumn) -> Magic column with the same name as the table.
+** (iCol == p->nColumn+1) -> Docid column
+** (iCol == p->nColumn+2) -> Langid column
*/
static int fts3ColumnMethod(
sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */
- sqlite3_context *pContext, /* Context for sqlite3_result_xxx() calls */
+ sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */
int iCol /* Index of column to read value from */
){
int rc = SQLITE_OK; /* Return Code */
@@ -2767,22 +3065,34 @@ static int fts3ColumnMethod(
Fts3Table *p = (Fts3Table *)pCursor->pVtab;
/* The column value supplied by SQLite must be in range. */
- assert( iCol>=0 && iCol<=p->nColumn+1 );
+ assert( iCol>=0 && iCol<=p->nColumn+2 );
if( iCol==p->nColumn+1 ){
/* This call is a request for the "docid" column. Since "docid" is an
** alias for "rowid", use the xRowid() method to obtain the value.
*/
- sqlite3_result_int64(pContext, pCsr->iPrevId);
+ sqlite3_result_int64(pCtx, pCsr->iPrevId);
}else if( iCol==p->nColumn ){
/* The extra column whose name is the same as the table.
- ** Return a blob which is a pointer to the cursor.
- */
- sqlite3_result_blob(pContext, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT);
+ ** Return a blob which is a pointer to the cursor. */
+ sqlite3_result_blob(pCtx, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT);
+ }else if( iCol==p->nColumn+2 && pCsr->pExpr ){
+ sqlite3_result_int64(pCtx, pCsr->iLangid);
}else{
+ /* The requested column is either a user column (one that contains
+ ** indexed data), or the language-id column. */
rc = fts3CursorSeek(0, pCsr);
+
if( rc==SQLITE_OK ){
- sqlite3_result_value(pContext, sqlite3_column_value(pCsr->pStmt, iCol+1));
+ if( iCol==p->nColumn+2 ){
+ int iLangid = 0;
+ if( p->zLanguageid ){
+ iLangid = sqlite3_column_int(pCsr->pStmt, p->nColumn+1);
+ }
+ sqlite3_result_int(pCtx, iLangid);
+ }else if( sqlite3_data_count(pCsr->pStmt)>(iCol+1) ){
+ sqlite3_result_value(pCtx, sqlite3_column_value(pCsr->pStmt, iCol+1));
+ }
}
}
@@ -2809,8 +3119,42 @@ static int fts3UpdateMethod(
** hash-table to the database.
*/
static int fts3SyncMethod(sqlite3_vtab *pVtab){
- int rc = sqlite3Fts3PendingTermsFlush((Fts3Table *)pVtab);
- sqlite3Fts3SegmentsClose((Fts3Table *)pVtab);
+
+ /* Following an incremental-merge operation, assuming that the input
+ ** segments are not completely consumed (the usual case), they are updated
+ ** in place to remove the entries that have already been merged. This
+ ** involves updating the leaf block that contains the smallest unmerged
+ ** entry and each block (if any) between the leaf and the root node. So
+ ** if the height of the input segment b-trees is N, and input segments
+ ** are merged eight at a time, updating the input segments at the end
+ ** of an incremental-merge requires writing (8*(1+N)) blocks. N is usually
+ ** small - often between 0 and 2. So the overhead of the incremental
+ ** merge is somewhere between 8 and 24 blocks. To avoid this overhead
+ ** dwarfing the actual productive work accomplished, the incremental merge
+ ** is only attempted if it will write at least 64 leaf blocks. Hence
+ ** nMinMerge.
+ **
+ ** Of course, updating the input segments also involves deleting a bunch
+ ** of blocks from the segments table. But this is not considered overhead
+ ** as it would also be required by a crisis-merge that used the same input
+ ** segments.
+ */
+ const u32 nMinMerge = 64; /* Minimum amount of incr-merge work to do */
+
+ Fts3Table *p = (Fts3Table*)pVtab;
+ int rc = sqlite3Fts3PendingTermsFlush(p);
+
+ if( rc==SQLITE_OK && p->bAutoincrmerge==1 && p->nLeafAdd>(nMinMerge/16) ){
+ int mxLevel = 0; /* Maximum relative level value in db */
+ int A; /* Incr-merge parameter A */
+
+ rc = sqlite3Fts3MaxLevel(p, &mxLevel);
+ assert( rc==SQLITE_OK || mxLevel==0 );
+ A = p->nLeafAdd * mxLevel;
+ A += (A/2);
+ if( A>(int)nMinMerge ) rc = sqlite3Fts3Incrmerge(p, A, 8);
+ }
+ sqlite3Fts3SegmentsClose(p);
return rc;
}
@@ -2818,13 +3162,14 @@ static int fts3SyncMethod(sqlite3_vtab *pVtab){
** Implementation of xBegin() method. This is a no-op.
*/
static int fts3BeginMethod(sqlite3_vtab *pVtab){
- TESTONLY( Fts3Table *p = (Fts3Table*)pVtab );
+ Fts3Table *p = (Fts3Table*)pVtab;
UNUSED_PARAMETER(pVtab);
assert( p->pSegments==0 );
assert( p->nPendingData==0 );
assert( p->inTransaction!=1 );
TESTONLY( p->inTransaction = 1 );
TESTONLY( p->mxSavepoint = -1; );
+ p->nLeafAdd = 0;
return SQLITE_OK;
}
@@ -2865,7 +3210,7 @@ static int fts3RollbackMethod(sqlite3_vtab *pVtab){
*/
static void fts3ReversePoslist(char *pStart, char **ppPoslist){
char *p = &(*ppPoslist)[-2];
- char c;
+ char c = 0;
while( p>pStart && (c=*p--)==0 );
while( p>pStart && (*p & 0x80) | c ){
@@ -3074,15 +3419,22 @@ static int fts3RenameMethod(
sqlite3 *db = p->db; /* Database connection */
int rc; /* Return Code */
+ /* As it happens, the pending terms table is always empty here. This is
+ ** because an "ALTER TABLE RENAME TABLE" statement inside a transaction
+ ** always opens a savepoint transaction. And the xSavepoint() method
+ ** flushes the pending terms table. But leave the (no-op) call to
+ ** PendingTermsFlush() in in case that changes.
+ */
+ assert( p->nPendingData==0 );
rc = sqlite3Fts3PendingTermsFlush(p);
- if( rc!=SQLITE_OK ){
- return rc;
+
+ if( p->zContentTbl==0 ){
+ fts3DbExec(&rc, db,
+ "ALTER TABLE %Q.'%q_content' RENAME TO '%q_content';",
+ p->zDb, p->zName, zName
+ );
}
- fts3DbExec(&rc, db,
- "ALTER TABLE %Q.'%q_content' RENAME TO '%q_content';",
- p->zDb, p->zName, zName
- );
if( p->bHasDocsize ){
fts3DbExec(&rc, db,
"ALTER TABLE %Q.'%q_docsize' RENAME TO '%q_docsize';",
@@ -3112,11 +3464,15 @@ static int fts3RenameMethod(
** Flush the contents of the pending-terms table to disk.
*/
static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
+ int rc = SQLITE_OK;
UNUSED_PARAMETER(iSavepoint);
assert( ((Fts3Table *)pVtab)->inTransaction );
assert( ((Fts3Table *)pVtab)->mxSavepoint < iSavepoint );
TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint );
- return fts3SyncMethod(pVtab);
+ if( ((Fts3Table *)pVtab)->bIgnoreSavepoint==0 ){
+ rc = fts3SyncMethod(pVtab);
+ }
+ return rc;
}
/*
@@ -3198,6 +3554,9 @@ static void hashDestroy(void *p){
*/
void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
+#ifdef SQLITE_ENABLE_FTS4_UNICODE61
+void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**ppModule);
+#endif
#ifdef SQLITE_ENABLE_ICU
void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
#endif
@@ -3213,12 +3572,19 @@ int sqlite3Fts3Init(sqlite3 *db){
Fts3Hash *pHash = 0;
const sqlite3_tokenizer_module *pSimple = 0;
const sqlite3_tokenizer_module *pPorter = 0;
+#ifdef SQLITE_ENABLE_FTS4_UNICODE61
+ const sqlite3_tokenizer_module *pUnicode = 0;
+#endif
#ifdef SQLITE_ENABLE_ICU
const sqlite3_tokenizer_module *pIcu = 0;
sqlite3Fts3IcuTokenizerModule(&pIcu);
#endif
+#ifdef SQLITE_ENABLE_FTS4_UNICODE61
+ sqlite3Fts3UnicodeTokenizer(&pUnicode);
+#endif
+
#ifdef SQLITE_TEST
rc = sqlite3Fts3InitTerm(db);
if( rc!=SQLITE_OK ) return rc;
@@ -3242,6 +3608,10 @@ int sqlite3Fts3Init(sqlite3 *db){
if( rc==SQLITE_OK ){
if( sqlite3Fts3HashInsert(pHash, "simple", 7, (void *)pSimple)
|| sqlite3Fts3HashInsert(pHash, "porter", 7, (void *)pPorter)
+
+#ifdef SQLITE_ENABLE_FTS4_UNICODE61
+ || sqlite3Fts3HashInsert(pHash, "unicode61", 10, (void *)pUnicode)
+#endif
#ifdef SQLITE_ENABLE_ICU
|| (pIcu && sqlite3Fts3HashInsert(pHash, "icu", 4, (void *)pIcu))
#endif
@@ -3441,21 +3811,20 @@ static int fts3EvalPhraseLoad(
*/
static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
int iToken; /* Used to iterate through phrase tokens */
- int rc = SQLITE_OK; /* Return code */
char *aPoslist = 0; /* Position list for deferred tokens */
int nPoslist = 0; /* Number of bytes in aPoslist */
int iPrev = -1; /* Token number of previous deferred token */
assert( pPhrase->doclist.bFreeList==0 );
- for(iToken=0; rc==SQLITE_OK && iToken<pPhrase->nToken; iToken++){
+ for(iToken=0; iToken<pPhrase->nToken; iToken++){
Fts3PhraseToken *pToken = &pPhrase->aToken[iToken];
Fts3DeferredToken *pDeferred = pToken->pDeferred;
if( pDeferred ){
char *pList;
int nList;
- rc = sqlite3Fts3DeferredTokenList(pDeferred, &pList, &nList);
+ int rc = sqlite3Fts3DeferredTokenList(pDeferred, &pList, &nList);
if( rc!=SQLITE_OK ) return rc;
if( pList==0 ){
@@ -3477,7 +3846,7 @@ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
fts3PoslistPhraseMerge(&aOut, iToken-iPrev, 0, 1, &p1, &p2);
sqlite3_free(aPoslist);
aPoslist = pList;
- nPoslist = aOut - aPoslist;
+ nPoslist = (int)(aOut - aPoslist);
if( nPoslist==0 ){
sqlite3_free(aPoslist);
pPhrase->doclist.pList = 0;
@@ -3521,7 +3890,7 @@ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
pPhrase->doclist.pList = aOut;
if( fts3PoslistPhraseMerge(&aOut, nDistance, 0, 1, &p1, &p2) ){
pPhrase->doclist.bFreeList = 1;
- pPhrase->doclist.nList = (aOut - pPhrase->doclist.pList);
+ pPhrase->doclist.nList = (int)(aOut - pPhrase->doclist.pList);
}else{
sqlite3_free(aOut);
pPhrase->doclist.pList = 0;
@@ -3556,6 +3925,7 @@ static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
&& p->nToken==1
&& pFirst->pSegcsr
&& pFirst->pSegcsr->bLookup
+ && pFirst->bFirst==0
){
/* Use the incremental approach. */
int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn);
@@ -3589,7 +3959,7 @@ void sqlite3Fts3DoclistPrev(
int nDoclist, /* Length of aDoclist in bytes */
char **ppIter, /* IN/OUT: Iterator pointer */
sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */
- int *pnList, /* IN/OUT: List length pointer */
+ int *pnList, /* OUT: List length pointer */
u8 *pbEof /* OUT: End-of-file flag */
){
char *p = *ppIter;
@@ -3616,7 +3986,7 @@ void sqlite3Fts3DoclistPrev(
iMul = (bDescIdx ? -1 : 1);
}
- *pnList = pEnd - pNext;
+ *pnList = (int)(pEnd - pNext);
*ppIter = pNext;
*piDocid = iDocid;
}else{
@@ -3630,13 +4000,48 @@ void sqlite3Fts3DoclistPrev(
}else{
char *pSave = p;
fts3ReversePoslist(aDoclist, &p);
- *pnList = (pSave - p);
+ *pnList = (int)(pSave - p);
}
*ppIter = p;
}
}
/*
+** Iterate forwards through a doclist.
+*/
+void sqlite3Fts3DoclistNext(
+ int bDescIdx, /* True if the doclist is desc */
+ char *aDoclist, /* Pointer to entire doclist */
+ int nDoclist, /* Length of aDoclist in bytes */
+ char **ppIter, /* IN/OUT: Iterator pointer */
+ sqlite3_int64 *piDocid, /* IN/OUT: Docid pointer */
+ u8 *pbEof /* OUT: End-of-file flag */
+){
+ char *p = *ppIter;
+
+ assert( nDoclist>0 );
+ assert( *pbEof==0 );
+ assert( p || *piDocid==0 );
+ assert( !p || (p>=aDoclist && p<=&aDoclist[nDoclist]) );
+
+ if( p==0 ){
+ p = aDoclist;
+ p += sqlite3Fts3GetVarint(p, piDocid);
+ }else{
+ fts3PoslistCopy(0, &p);
+ if( p>=&aDoclist[nDoclist] ){
+ *pbEof = 1;
+ }else{
+ sqlite3_int64 iVar;
+ p += sqlite3Fts3GetVarint(p, &iVar);
+ *piDocid += ((bDescIdx ? -1 : 1) * iVar);
+ }
+ }
+
+ *ppIter = p;
+}
+
+/*
** Attempt to move the phrase iterator to point to the next matching docid.
** If an error occurs, return an SQLite error code. Otherwise, return
** SQLITE_OK.
@@ -3690,7 +4095,7 @@ static int fts3EvalPhraseNext(
}
pDL->pList = pIter;
fts3PoslistCopy(0, &pIter);
- pDL->nList = (pIter - pDL->pList);
+ pDL->nList = (int)(pIter - pDL->pList);
/* pIter now points just past the 0x00 that terminates the position-
** list for document pDL->iDocid. However, if this position-list was
@@ -3785,7 +4190,7 @@ static void fts3EvalTokenCosts(
Fts3Expr ***ppOr, /* Write new OR root to *(*ppOr)++ */
int *pRc /* IN/OUT: Error code */
){
- if( *pRc==SQLITE_OK && pExpr ){
+ if( *pRc==SQLITE_OK ){
if( pExpr->eType==FTSQUERY_PHRASE ){
Fts3Phrase *pPhrase = pExpr->pPhrase;
int i;
@@ -3799,6 +4204,11 @@ static void fts3EvalTokenCosts(
*pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl);
}
}else if( pExpr->eType!=FTSQUERY_NOT ){
+ assert( pExpr->eType==FTSQUERY_OR
+ || pExpr->eType==FTSQUERY_AND
+ || pExpr->eType==FTSQUERY_NEAR
+ );
+ assert( pExpr->pLeft && pExpr->pRight );
if( pExpr->eType==FTSQUERY_OR ){
pRoot = pExpr->pLeft;
**ppOr = pRoot;
@@ -3859,7 +4269,7 @@ static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){
}
if( nDoc==0 || nByte==0 ){
sqlite3_reset(pStmt);
- return SQLITE_CORRUPT_VTAB;
+ return FTS_CORRUPT_VTAB;
}
pCsr->nDoc = nDoc;
@@ -3903,6 +4313,15 @@ static int fts3EvalSelectDeferred(
int nMinEst = 0; /* The minimum count for any phrase so far. */
int nLoad4 = 1; /* (Phrases that will be loaded)^4. */
+ /* Tokens are never deferred for FTS tables created using the content=xxx
+ ** option. The reason being that it is not guaranteed that the content
+ ** table actually contains the same data as the index. To prevent this from
+ ** causing any problems, the deferred token optimization is completely
+ ** disabled for content=xxx tables. */
+ if( pTab->zContentTbl ){
+ return SQLITE_OK;
+ }
+
/* Count the tokens in this AND/NEAR cluster. If none of the doclists
** associated with the tokens spill onto overflow pages, or if there is
** only 1 token, exit early. No tokens to defer in this case. */
@@ -3965,7 +4384,11 @@ static int fts3EvalSelectDeferred(
fts3SegReaderCursorFree(pToken->pSegcsr);
pToken->pSegcsr = 0;
}else{
- nLoad4 = nLoad4*4;
+ /* Set nLoad4 to the value of (4^nOther) for the next iteration of the
+ ** for-loop. Except, limit the value to 2^24 to prevent it from
+ ** overflowing the 32-bit integer it is stored in. */
+ if( ii<12 ) nLoad4 = nLoad4*4;
+
if( ii==0 || pTC->pPhrase->nToken>1 ){
/* Either this is the cheapest token in the entire query, or it is
** part of a multi-token phrase. Either way, the entire doclist will
@@ -4013,7 +4436,8 @@ static int fts3EvalStart(Fts3Cursor *pCsr){
fts3EvalAllocateReaders(pCsr, pCsr->pExpr, &nToken, &nOr, &rc);
/* Determine which, if any, tokens in the expression should be deferred. */
- if( rc==SQLITE_OK && nToken>1 && pTab->bHasStat ){
+#ifndef SQLITE_DISABLE_FTS4_DEFERRED
+ if( rc==SQLITE_OK && nToken>1 && pTab->bFts4 ){
Fts3TokenAndCost *aTC;
Fts3Expr **apOr;
aTC = (Fts3TokenAndCost *)sqlite3_malloc(
@@ -4030,8 +4454,8 @@ static int fts3EvalStart(Fts3Cursor *pCsr){
Fts3Expr **ppOr = apOr;
fts3EvalTokenCosts(pCsr, 0, pCsr->pExpr, &pTC, &ppOr, &rc);
- nToken = pTC-aTC;
- nOr = ppOr-apOr;
+ nToken = (int)(pTC-aTC);
+ nOr = (int)(ppOr-apOr);
if( rc==SQLITE_OK ){
rc = fts3EvalSelectDeferred(pCsr, 0, aTC, nToken);
@@ -4043,6 +4467,7 @@ static int fts3EvalStart(Fts3Cursor *pCsr){
sqlite3_free(aTC);
}
}
+#endif
fts3EvalStartReaders(pCsr, pCsr->pExpr, 1, &rc);
return rc;
@@ -4103,7 +4528,7 @@ static int fts3EvalNearTrim(
&pOut, aTmp, nParam1, nParam2, paPoslist, &p2
);
if( res ){
- nNew = (pOut - pPhrase->doclist.pList) - 1;
+ nNew = (int)(pOut - pPhrase->doclist.pList) - 1;
assert( pPhrase->doclist.pList[nNew]=='\0' );
assert( nNew<=pPhrase->doclist.nList && nNew>0 );
memset(&pPhrase->doclist.pList[nNew], 0, pPhrase->doclist.nList - nNew);
@@ -4318,32 +4743,39 @@ static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){
nTmp += p->pRight->pPhrase->doclist.nList;
}
nTmp += p->pPhrase->doclist.nList;
- aTmp = sqlite3_malloc(nTmp*2);
- if( !aTmp ){
- *pRc = SQLITE_NOMEM;
+ if( nTmp==0 ){
res = 0;
}else{
- char *aPoslist = p->pPhrase->doclist.pList;
- int nToken = p->pPhrase->nToken;
+ aTmp = sqlite3_malloc(nTmp*2);
+ if( !aTmp ){
+ *pRc = SQLITE_NOMEM;
+ res = 0;
+ }else{
+ char *aPoslist = p->pPhrase->doclist.pList;
+ int nToken = p->pPhrase->nToken;
- for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){
- Fts3Phrase *pPhrase = p->pRight->pPhrase;
- int nNear = p->nNear;
- res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
- }
-
- aPoslist = pExpr->pRight->pPhrase->doclist.pList;
- nToken = pExpr->pRight->pPhrase->nToken;
- for(p=pExpr->pLeft; p && res; p=p->pLeft){
- int nNear = p->pParent->nNear;
- Fts3Phrase *pPhrase = (
- p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase
- );
- res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
+ for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){
+ Fts3Phrase *pPhrase = p->pRight->pPhrase;
+ int nNear = p->nNear;
+ res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
+ }
+
+ aPoslist = pExpr->pRight->pPhrase->doclist.pList;
+ nToken = pExpr->pRight->pPhrase->nToken;
+ for(p=pExpr->pLeft; p && res; p=p->pLeft){
+ int nNear;
+ Fts3Phrase *pPhrase;
+ assert( p->pParent && p->pParent->pLeft==p );
+ nNear = p->pParent->nNear;
+ pPhrase = (
+ p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase
+ );
+ res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
+ }
}
- }
- sqlite3_free(aTmp);
+ sqlite3_free(aTmp);
+ }
}
return res;
@@ -4423,6 +4855,7 @@ static int fts3EvalTestExpr(
break;
default: {
+#ifndef SQLITE_DISABLE_FTS4_DEFERRED
if( pCsr->pDeferred
&& (pExpr->iDocid==pCsr->iPrevId || pExpr->bDeferred)
){
@@ -4434,7 +4867,9 @@ static int fts3EvalTestExpr(
*pRc = fts3EvalDeferredPhrase(pCsr, pPhrase);
bHit = (pPhrase->doclist.pList!=0);
pExpr->iDocid = pCsr->iPrevId;
- }else{
+ }else
+#endif
+ {
bHit = (pExpr->bEof==0 && pExpr->iDocid==pCsr->iPrevId);
}
break;
@@ -4770,26 +5205,87 @@ int sqlite3Fts3EvalPhraseStats(
** This function works regardless of whether or not the phrase is deferred,
** incremental, or neither.
*/
-char *sqlite3Fts3EvalPhrasePoslist(
+int sqlite3Fts3EvalPhrasePoslist(
Fts3Cursor *pCsr, /* FTS3 cursor object */
Fts3Expr *pExpr, /* Phrase to return doclist for */
- int iCol /* Column to return position list for */
+ int iCol, /* Column to return position list for */
+ char **ppOut /* OUT: Pointer to position list */
){
Fts3Phrase *pPhrase = pExpr->pPhrase;
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
- char *pIter = pPhrase->doclist.pList;
+ char *pIter;
int iThis;
+ sqlite3_int64 iDocid;
+ /* If this phrase is applies specifically to some column other than
+ ** column iCol, return a NULL pointer. */
+ *ppOut = 0;
assert( iCol>=0 && iCol<pTab->nColumn );
- if( !pIter
- || pExpr->bEof
- || pExpr->iDocid!=pCsr->iPrevId
- || (pPhrase->iColumn<pTab->nColumn && pPhrase->iColumn!=iCol)
- ){
- return 0;
+ if( (pPhrase->iColumn<pTab->nColumn && pPhrase->iColumn!=iCol) ){
+ return SQLITE_OK;
+ }
+
+ iDocid = pExpr->iDocid;
+ pIter = pPhrase->doclist.pList;
+ if( iDocid!=pCsr->iPrevId || pExpr->bEof ){
+ int bDescDoclist = pTab->bDescIdx; /* For DOCID_CMP macro */
+ int bOr = 0;
+ u8 bEof = 0;
+ Fts3Expr *p;
+
+ /* Check if this phrase descends from an OR expression node. If not,
+ ** return NULL. Otherwise, the entry that corresponds to docid
+ ** pCsr->iPrevId may lie earlier in the doclist buffer. */
+ for(p=pExpr->pParent; p; p=p->pParent){
+ if( p->eType==FTSQUERY_OR ) bOr = 1;
+ }
+ if( bOr==0 ) return SQLITE_OK;
+
+ /* This is the descendent of an OR node. In this case we cannot use
+ ** an incremental phrase. Load the entire doclist for the phrase
+ ** into memory in this case. */
+ if( pPhrase->bIncr ){
+ int rc = SQLITE_OK;
+ int bEofSave = pExpr->bEof;
+ fts3EvalRestart(pCsr, pExpr, &rc);
+ while( rc==SQLITE_OK && !pExpr->bEof ){
+ fts3EvalNextRow(pCsr, pExpr, &rc);
+ if( bEofSave==0 && pExpr->iDocid==iDocid ) break;
+ }
+ pIter = pPhrase->doclist.pList;
+ assert( rc!=SQLITE_OK || pPhrase->bIncr==0 );
+ if( rc!=SQLITE_OK ) return rc;
+ }
+
+ if( pExpr->bEof ){
+ pIter = 0;
+ iDocid = 0;
+ }
+ bEof = (pPhrase->doclist.nAll==0);
+ assert( bDescDoclist==0 || bDescDoclist==1 );
+ assert( pCsr->bDesc==0 || pCsr->bDesc==1 );
+
+ if( pCsr->bDesc==bDescDoclist ){
+ int dummy;
+ while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){
+ sqlite3Fts3DoclistPrev(
+ bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll,
+ &pIter, &iDocid, &dummy, &bEof
+ );
+ }
+ }else{
+ while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){
+ sqlite3Fts3DoclistNext(
+ bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll,
+ &pIter, &iDocid, &bEof
+ );
+ }
+ }
+
+ if( bEof || iDocid!=pCsr->iPrevId ) pIter = 0;
}
+ if( pIter==0 ) return SQLITE_OK;
- assert( pPhrase->doclist.nList>0 );
if( *pIter==0x01 ){
pIter++;
pIter += sqlite3Fts3GetVarint32(pIter, &iThis);
@@ -4803,7 +5299,8 @@ char *sqlite3Fts3EvalPhrasePoslist(
pIter += sqlite3Fts3GetVarint32(pIter, &iThis);
}
- return ((iCol==iThis)?pIter:0);
+ *ppOut = ((iCol==iThis)?pIter:0);
+ return SQLITE_OK;
}
/*
@@ -4826,6 +5323,16 @@ void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){
}
}
+
+/*
+** Return SQLITE_CORRUPT_VTAB.
+*/
+#ifdef SQLITE_DEBUG
+int sqlite3Fts3Corrupt(){
+ return SQLITE_CORRUPT_VTAB;
+}
+#endif
+
#if !SQLITE_CORE
/*
** Initialize API pointer table, if required.
diff --git a/src/libtracker-fts/fts3.h b/src/libtracker-fts/fts3.h
index 4a06f637f..b0346826d 100644
--- a/src/libtracker-fts/fts3.h
+++ b/src/libtracker-fts/fts3.h
@@ -20,9 +20,12 @@ extern "C" {
#endif /* __cplusplus */
int sqlite3Fts3Init(sqlite3 *db);
-int fts4_extension_init(sqlite3 *db,
- char **pzErrMsg,
- void *pApi);
+
+int fts4_extension_init(
+ sqlite3 *db,
+ char **pzErrMsg,
+ void *pApi
+);
#ifdef __cplusplus
} /* extern "C" */
diff --git a/src/libtracker-fts/fts3Int.h b/src/libtracker-fts/fts3Int.h
index ed8043adf..77ca4704e 100644
--- a/src/libtracker-fts/fts3Int.h
+++ b/src/libtracker-fts/fts3Int.h
@@ -67,6 +67,9 @@ extern const sqlite3_api_routines *sqlite3_api;
#ifndef MIN
# define MIN(x,y) ((x)<(y)?(x):(y))
#endif
+#ifndef MAX
+# define MAX(x,y) ((x)>(y)?(x):(y))
+#endif
/*
** Maximum length of a varint encoded integer. The varint format is different
@@ -121,7 +124,7 @@ extern const sqlite3_api_routines *sqlite3_api;
# define NEVER(X) (0)
#else
# define ALWAYS(x) (x)
-# define NEVER(X) (x)
+# define NEVER(x) (x)
#endif
/*
@@ -131,6 +134,7 @@ typedef unsigned char u8; /* 1-byte (or larger) unsigned integer */
typedef short int i16; /* 2-byte (or larger) signed integer */
typedef unsigned int u32; /* 4-byte unsigned integer */
typedef sqlite3_uint64 u64; /* 8-byte unsigned integer */
+typedef sqlite3_int64 i64; /* 8-byte signed integer */
/*
** Macro used to suppress compiler warnings for unused parameters.
@@ -157,6 +161,13 @@ typedef sqlite3_uint64 u64; /* 8-byte unsigned integer */
#endif /* SQLITE_AMALGAMATION */
+#ifdef SQLITE_DEBUG
+int sqlite3Fts3Corrupt(void);
+# define FTS_CORRUPT_VTAB sqlite3Fts3Corrupt()
+#else
+# define FTS_CORRUPT_VTAB SQLITE_CORRUPT_VTAB
+#endif
+
typedef struct Fts3Table Fts3Table;
typedef struct Fts3Cursor Fts3Cursor;
typedef struct Fts3Expr Fts3Expr;
@@ -184,36 +195,45 @@ struct Fts3Table {
int nColumn; /* number of named columns in virtual table */
char **azColumn; /* column names. malloced */
sqlite3_tokenizer *pTokenizer; /* tokenizer for inserts and queries */
+ char *zContentTbl; /* content=xxx option, or NULL */
+ char *zLanguageid; /* languageid=xxx option, or NULL */
+ u8 bAutoincrmerge; /* True if automerge=1 */
+ u32 nLeafAdd; /* Number of leaf blocks added this trans */
/* Precompiled statements used by the implementation. Each of these
** statements is run and reset within a single virtual table API call.
*/
- sqlite3_stmt *aStmt[27];
+ sqlite3_stmt *aStmt[37];
char *zReadExprlist;
char *zWriteExprlist;
int nNodeSize; /* Soft limit for node size */
+ u8 bFts4; /* True for FTS4, false for FTS3 */
u8 bHasStat; /* True if %_stat table exists */
u8 bHasDocsize; /* True if %_docsize table exists */
u8 bDescIdx; /* True if doclists are in reverse order */
+ u8 bIgnoreSavepoint; /* True to ignore xSavepoint invocations */
int nPgsz; /* Page size for host database */
char *zSegmentsTbl; /* Name of %_segments table */
sqlite3_blob *pSegments; /* Blob handle open on %_segments table */
- /* TODO: Fix the first paragraph of this comment.
- **
- ** The following hash table is used to buffer pending index updates during
- ** transactions. Variable nPendingData estimates the memory size of the
- ** pending data, including hash table overhead, but not malloc overhead.
- ** When nPendingData exceeds nMaxPendingData, the buffer is flushed
- ** automatically. Variable iPrevDocid is the docid of the most recently
- ** inserted record.
+ /*
+ ** The following array of hash tables is used to buffer pending index
+ ** updates during transactions. All pending updates buffered at any one
+ ** time must share a common language-id (see the FTS4 langid= feature).
+ ** The current language id is stored in variable iPrevLangid.
**
** A single FTS4 table may have multiple full-text indexes. For each index
** there is an entry in the aIndex[] array. Index 0 is an index of all the
** terms that appear in the document set. Each subsequent index in aIndex[]
** is an index of prefixes of a specific length.
+ **
+ ** Variable nPendingData contains an estimate the memory consumed by the
+ ** pending data structures, including hash table overhead, but not including
+ ** malloc overhead. When nPendingData exceeds nMaxPendingData, all hash
+ ** tables are flushed to disk. Variable iPrevDocid is the docid of the most
+ ** recently inserted record.
*/
int nIndex; /* Size of aIndex[] */
struct Fts3Index {
@@ -223,12 +243,13 @@ struct Fts3Table {
int nMaxPendingData; /* Max pending data before flush to disk */
int nPendingData; /* Current bytes of pending data */
sqlite_int64 iPrevDocid; /* Docid of most recently inserted document */
+ int iPrevLangid; /* Langid of recently inserted document */
-#if defined(SQLITE_DEBUG)
+#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
/* State variables used for validating that the transaction control
** methods of the virtual table are called at appropriate times. These
- ** values do not contribution to the FTS computation; they are used for
- ** verifying the SQLite core.
+ ** values do not contribute to FTS functionality; they are used for
+ ** verifying the operation of the SQLite core.
*/
int inTransaction; /* True after xBegin but before xCommit/xRollback */
int mxSavepoint; /* Largest valid xSavepoint integer */
@@ -247,6 +268,7 @@ struct Fts3Cursor {
u8 isRequireSeek; /* True if must seek pStmt to %_content row */
sqlite3_stmt *pStmt; /* Prepared statement in use by the cursor */
Fts3Expr *pExpr; /* Parsed MATCH query string */
+ int iLangid; /* Language being queried for */
int nPhrase; /* Number of matchable phrases in query */
Fts3DeferredToken *pDeferred; /* Deferred search tokens, if any */
sqlite3_int64 iPrevId; /* Previous id read from aDoclist */
@@ -309,6 +331,7 @@ struct Fts3PhraseToken {
char *z; /* Text of the token */
int n; /* Number of bytes in buffer z */
int isPrefix; /* True if token ends with a "*" character */
+ int bFirst; /* True if token must appear at position 0 */
/* Variables above this point are populated when the expression is
** parsed (by code in fts3_expr.c). Below this point the variables are
@@ -392,23 +415,34 @@ int sqlite3Fts3UpdateMethod(sqlite3_vtab*,int,sqlite3_value**,sqlite3_int64*);
int sqlite3Fts3PendingTermsFlush(Fts3Table *);
void sqlite3Fts3PendingTermsClear(Fts3Table *);
int sqlite3Fts3Optimize(Fts3Table *);
-int sqlite3Fts3SegReaderNew(int, sqlite3_int64,
+int sqlite3Fts3SegReaderNew(int, int, sqlite3_int64,
sqlite3_int64, sqlite3_int64, const char *, int, Fts3SegReader**);
int sqlite3Fts3SegReaderPending(
Fts3Table*,int,const char*,int,int,Fts3SegReader**);
void sqlite3Fts3SegReaderFree(Fts3SegReader *);
-int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, sqlite3_stmt **);
+int sqlite3Fts3AllSegdirs(Fts3Table*, int, int, int, sqlite3_stmt **);
int sqlite3Fts3ReadLock(Fts3Table *);
int sqlite3Fts3ReadBlock(Fts3Table*, sqlite3_int64, char **, int*, int*);
int sqlite3Fts3SelectDoctotal(Fts3Table *, sqlite3_stmt **);
int sqlite3Fts3SelectDocsize(Fts3Table *, sqlite3_int64, sqlite3_stmt **);
+#ifndef SQLITE_DISABLE_FTS4_DEFERRED
void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *);
int sqlite3Fts3DeferToken(Fts3Cursor *, Fts3PhraseToken *, int);
int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *);
void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *);
+int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *);
+#else
+# define sqlite3Fts3FreeDeferredTokens(x)
+# define sqlite3Fts3DeferToken(x,y,z) SQLITE_OK
+# define sqlite3Fts3CacheDeferredDoclists(x) SQLITE_OK
+# define sqlite3Fts3FreeDeferredDoclists(x)
+# define sqlite3Fts3DeferredTokenList(x,y,z) SQLITE_OK
+#endif
+
void sqlite3Fts3SegmentsClose(Fts3Table *);
+int sqlite3Fts3MaxLevel(Fts3Table *, int *);
/* Special values interpreted by sqlite3SegReaderCursor() */
#define FTS3_SEGCURSOR_PENDING -1
@@ -418,8 +452,8 @@ int sqlite3Fts3SegReaderStart(Fts3Table*, Fts3MultiSegReader*, Fts3SegFilter*);
int sqlite3Fts3SegReaderStep(Fts3Table *, Fts3MultiSegReader *);
void sqlite3Fts3SegReaderFinish(Fts3MultiSegReader *);
-int sqlite3Fts3SegReaderCursor(
- Fts3Table *, int, int, const char *, int, int, int, Fts3MultiSegReader *);
+int sqlite3Fts3SegReaderCursor(Fts3Table *,
+ int, int, int, const char *, int, int, int, Fts3MultiSegReader *);
/* Flags allowed as part of the 4th argument to SegmentReaderIterate() */
#define FTS3_SEGMENT_REQUIRE_POS 0x00000001
@@ -427,6 +461,7 @@ int sqlite3Fts3SegReaderCursor(
#define FTS3_SEGMENT_COLUMN_FILTER 0x00000004
#define FTS3_SEGMENT_PREFIX 0x00000008
#define FTS3_SEGMENT_SCAN 0x00000010
+#define FTS3_SEGMENT_FIRST 0x00000020
/* Type passed as 4th argument to SegmentReaderIterate() */
struct Fts3SegFilter {
@@ -459,6 +494,8 @@ struct Fts3MultiSegReader {
int nDoclist; /* Size of aDoclist[] in bytes */
};
+int sqlite3Fts3Incrmerge(Fts3Table*,int,int);
+
/* fts3.c */
int sqlite3Fts3PutVarint(char *, sqlite3_int64);
int sqlite3Fts3GetVarint(const char *, sqlite_int64 *);
@@ -466,8 +503,9 @@ int sqlite3Fts3GetVarint32(const char *, int *);
int sqlite3Fts3VarintLen(sqlite3_uint64);
void sqlite3Fts3Dequote(char *);
void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*);
-
int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *);
+int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *);
+void sqlite3Fts3CreateStatTable(int*, Fts3Table*);
/* fts3_tokenizer.c */
const char *sqlite3Fts3NextToken(const char *, int *);
@@ -485,8 +523,8 @@ void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const char *,
void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *);
/* fts3_expr.c */
-int sqlite3Fts3ExprParse(sqlite3_tokenizer *,
- char **, int, int, const char *, int, Fts3Expr **
+int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int,
+ char **, int, int, int, const char *, int, Fts3Expr **
);
void sqlite3Fts3ExprFree(Fts3Expr *);
#ifdef SQLITE_TEST
@@ -494,6 +532,10 @@ int sqlite3Fts3ExprInitTestInterface(sqlite3 *db);
int sqlite3Fts3InitTerm(sqlite3 *db);
#endif
+int sqlite3Fts3OpenTokenizer(sqlite3_tokenizer *, int, const char *, int,
+ sqlite3_tokenizer_cursor **
+);
+
/* fts3_aux.c */
int sqlite3Fts3InitAux(sqlite3 *db);
@@ -503,11 +545,16 @@ int sqlite3Fts3MsrIncrStart(
Fts3Table*, Fts3MultiSegReader*, int, const char*, int);
int sqlite3Fts3MsrIncrNext(
Fts3Table *, Fts3MultiSegReader *, sqlite3_int64 *, char **, int *);
-char *sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol);
+int sqlite3Fts3EvalPhrasePoslist(Fts3Cursor *, Fts3Expr *, int iCol, char **);
int sqlite3Fts3MsrOvfl(Fts3Cursor *, Fts3MultiSegReader *, int *);
int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr);
-int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *);
+/* fts3_unicode2.c (functions generated by parsing unicode text files) */
+#ifdef SQLITE_ENABLE_FTS4_UNICODE61
+int sqlite3FtsUnicodeFold(int, int);
+int sqlite3FtsUnicodeIsalnum(int);
+int sqlite3FtsUnicodeIsdiacritic(int);
+#endif
#endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */
#endif /* _FTSINT_H */
diff --git a/src/libtracker-fts/fts3_aux.c b/src/libtracker-fts/fts3_aux.c
index ada85d796..a2bff2e1d 100644
--- a/src/libtracker-fts/fts3_aux.c
+++ b/src/libtracker-fts/fts3_aux.c
@@ -79,9 +79,9 @@ static int fts3auxConnectMethod(
}
zDb = argv[1];
- nDb = strlen(zDb);
+ nDb = (int)strlen(zDb);
zFts3 = argv[3];
- nFts3 = strlen(zFts3);
+ nFts3 = (int)strlen(zFts3);
rc = sqlite3_declare_vtab(db, FTS3_TERMS_SCHEMA);
if( rc!=SQLITE_OK ) return rc;
@@ -376,7 +376,7 @@ static int fts3auxFilterMethod(
if( pCsr->zStop==0 ) return SQLITE_NOMEM;
}
- rc = sqlite3Fts3SegReaderCursor(pFts3, 0, FTS3_SEGCURSOR_ALL,
+ rc = sqlite3Fts3SegReaderCursor(pFts3, 0, 0, FTS3_SEGCURSOR_ALL,
pCsr->filter.zTerm, pCsr->filter.nTerm, 0, isScan, &pCsr->csr
);
if( rc==SQLITE_OK ){
diff --git a/src/libtracker-fts/fts3_expr.c b/src/libtracker-fts/fts3_expr.c
index 7eb2962d4..7612789de 100644
--- a/src/libtracker-fts/fts3_expr.c
+++ b/src/libtracker-fts/fts3_expr.c
@@ -92,7 +92,9 @@ int sqlite3_fts3_enable_parentheses = 0;
typedef struct ParseContext ParseContext;
struct ParseContext {
sqlite3_tokenizer *pTokenizer; /* Tokenizer module */
+ int iLangid; /* Language id used with tokenizer */
const char **azCol; /* Array of column names for fts3 table */
+ int bFts4; /* True to allow FTS4-only syntax */
int nCol; /* Number of entries in azCol[] */
int iDefaultCol; /* Default column to query */
int isNot; /* True if getNextNode() sees a unary - */
@@ -126,6 +128,33 @@ static void *fts3MallocZero(int nByte){
return pRet;
}
+int sqlite3Fts3OpenTokenizer(
+ sqlite3_tokenizer *pTokenizer,
+ int iLangid,
+ const char *z,
+ int n,
+ sqlite3_tokenizer_cursor **ppCsr
+){
+ sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
+ sqlite3_tokenizer_cursor *pCsr = 0;
+ int rc;
+
+ rc = pModule->xOpen(pTokenizer, z, n, &pCsr);
+ assert( rc==SQLITE_OK || pCsr==0 );
+ if( rc==SQLITE_OK ){
+ pCsr->pTokenizer = pTokenizer;
+ if( pModule->iVersion>=1 ){
+ rc = pModule->xLanguageid(pCsr, iLangid);
+ if( rc!=SQLITE_OK ){
+ pModule->xClose(pCsr);
+ pCsr = 0;
+ }
+ }
+ }
+ *ppCsr = pCsr;
+ return rc;
+}
+
/*
** Extract the next token from buffer z (length n) using the tokenizer
@@ -153,15 +182,13 @@ static int getNextToken(
Fts3Expr *pRet = 0;
int nConsumed = 0;
- rc = pModule->xOpen(pTokenizer, z, n, &pCursor);
+ rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, n, &pCursor);
if( rc==SQLITE_OK ){
const char *zToken;
- int nToken, iStart, iEnd, iPosition;
+ int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0;
int nByte; /* total space to allocate */
- pCursor->pTokenizer = pTokenizer;
rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
-
if( rc==SQLITE_OK ){
nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
pRet = (Fts3Expr *)fts3MallocZero(nByte);
@@ -180,9 +207,21 @@ static int getNextToken(
pRet->pPhrase->aToken[0].isPrefix = 1;
iEnd++;
}
- if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){
- pParse->isNot = 1;
+
+ while( 1 ){
+ if( !sqlite3_fts3_enable_parentheses
+ && iStart>0 && z[iStart-1]=='-'
+ ){
+ pParse->isNot = 1;
+ iStart--;
+ }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){
+ pRet->pPhrase->aToken[0].bFirst = 1;
+ iStart--;
+ }else{
+ break;
+ }
}
+
}
nConsumed = iEnd;
}
@@ -255,13 +294,13 @@ static int getNextString(
** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
** structures.
*/
- rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor);
+ rc = sqlite3Fts3OpenTokenizer(
+ pTokenizer, pParse->iLangid, zInput, nInput, &pCursor);
if( rc==SQLITE_OK ){
int ii;
- pCursor->pTokenizer = pTokenizer;
for(ii=0; rc==SQLITE_OK; ii++){
const char *zByte;
- int nByte, iBegin, iEnd, iPos;
+ int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0;
rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);
if( rc==SQLITE_OK ){
Fts3PhraseToken *pToken;
@@ -281,6 +320,7 @@ static int getNextString(
pToken->n = nByte;
pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*');
+ pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^');
nToken = ii+1;
}
}
@@ -302,8 +342,12 @@ static int getNextString(
p->pPhrase->nToken = nToken;
zBuf = (char *)&p->pPhrase->aToken[nToken];
- memcpy(zBuf, zTemp, nTemp);
- sqlite3_free(zTemp);
+ if( zTemp ){
+ memcpy(zBuf, zTemp, nTemp);
+ sqlite3_free(zTemp);
+ }else{
+ assert( nTemp==0 );
+ }
for(jj=0; jj<p->pPhrase->nToken; jj++){
p->pPhrase->aToken[jj].z = zBuf;
@@ -727,7 +771,9 @@ exprparse_out:
*/
int sqlite3Fts3ExprParse(
sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
+ int iLangid, /* Language id for tokenizer */
char **azCol, /* Array of column names for fts3 table */
+ int bFts4, /* True to allow FTS4-only syntax */
int nCol, /* Number of entries in azCol[] */
int iDefaultCol, /* Default column to query */
const char *z, int n, /* Text of MATCH query */
@@ -736,11 +782,14 @@ int sqlite3Fts3ExprParse(
int nParsed;
int rc;
ParseContext sParse;
+
+ memset(&sParse, 0, sizeof(ParseContext));
sParse.pTokenizer = pTokenizer;
+ sParse.iLangid = iLangid;
sParse.azCol = (const char **)azCol;
sParse.nCol = nCol;
sParse.iDefaultCol = iDefaultCol;
- sParse.nNest = 0;
+ sParse.bFts4 = bFts4;
if( z==0 ){
*ppExpr = 0;
return SQLITE_OK;
@@ -930,7 +979,7 @@ static void fts3ExprTest(
}
rc = sqlite3Fts3ExprParse(
- pTokenizer, azCol, nCol, nCol, zExpr, nExpr, &pExpr
+ pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
);
if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
sqlite3_result_error(context, "Error parsing expression", -1);
diff --git a/src/libtracker-fts/fts3_icu.c b/src/libtracker-fts/fts3_icu.c
index a10a55d67..52df8c7d8 100644
--- a/src/libtracker-fts/fts3_icu.c
+++ b/src/libtracker-fts/fts3_icu.c
@@ -110,13 +110,16 @@ static int icuOpen(
*ppCursor = 0;
- if( nInput<0 ){
+ if( zInput==0 ){
+ nInput = 0;
+ zInput = "";
+ }else if( nInput<0 ){
nInput = strlen(zInput);
}
nChar = nInput+1;
pCsr = (IcuCursor *)sqlite3_malloc(
sizeof(IcuCursor) + /* IcuCursor */
- nChar * sizeof(UChar) + /* IcuCursor.aChar[] */
+ ((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */
(nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */
);
if( !pCsr ){
@@ -124,7 +127,7 @@ static int icuOpen(
}
memset(pCsr, 0, sizeof(IcuCursor));
pCsr->aChar = (UChar *)&pCsr[1];
- pCsr->aOffset = (int *)&pCsr->aChar[nChar];
+ pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3];
pCsr->aOffset[iOut] = iInput;
U8_NEXT(zInput, iInput, nInput, c);
@@ -196,7 +199,7 @@ static int icuNext(
while( iStart<iEnd ){
int iWhite = iStart;
- U8_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
+ U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c);
if( u_isspace(c) ){
iStart = iWhite;
}else{
diff --git a/src/libtracker-fts/fts3_porter.c b/src/libtracker-fts/fts3_porter.c
index 148c57008..579745b85 100644
--- a/src/libtracker-fts/fts3_porter.c
+++ b/src/libtracker-fts/fts3_porter.c
@@ -40,7 +40,7 @@ typedef struct porter_tokenizer {
} porter_tokenizer;
/*
-** Class derived from sqlit3_tokenizer_cursor
+** Class derived from sqlite3_tokenizer_cursor
*/
typedef struct porter_tokenizer_cursor {
sqlite3_tokenizer_cursor base;
@@ -630,6 +630,7 @@ static const sqlite3_tokenizer_module porterTokenizerModule = {
porterOpen,
porterClose,
porterNext,
+ 0
};
/*
diff --git a/src/libtracker-fts/fts3_snippet.c b/src/libtracker-fts/fts3_snippet.c
index b569eb131..4bee014dc 100644
--- a/src/libtracker-fts/fts3_snippet.c
+++ b/src/libtracker-fts/fts3_snippet.c
@@ -360,23 +360,27 @@ static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
SnippetIter *p = (SnippetIter *)ctx;
SnippetPhrase *pPhrase = &p->aPhrase[iPhrase];
char *pCsr;
+ int rc;
pPhrase->nToken = pExpr->pPhrase->nToken;
-
- pCsr = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol);
+ rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pCsr);
+ assert( rc==SQLITE_OK || pCsr==0 );
if( pCsr ){
int iFirst = 0;
pPhrase->pList = pCsr;
fts3GetDeltaPosition(&pCsr, &iFirst);
+ assert( iFirst>=0 );
pPhrase->pHead = pCsr;
pPhrase->pTail = pCsr;
pPhrase->iHead = iFirst;
pPhrase->iTail = iFirst;
}else{
- assert( pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 );
+ assert( rc!=SQLITE_OK || (
+ pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0
+ ));
}
- return SQLITE_OK;
+ return rc;
}
/*
@@ -531,6 +535,7 @@ static int fts3StringAppend(
*/
static int fts3SnippetShift(
Fts3Table *pTab, /* FTS3 table snippet comes from */
+ int iLangid, /* Language id to use in tokenizing */
int nSnippet, /* Number of tokens desired for snippet */
const char *zDoc, /* Document text to extract snippet from */
int nDoc, /* Size of buffer zDoc in bytes */
@@ -566,13 +571,12 @@ static int fts3SnippetShift(
/* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired)
** or more tokens in zDoc/nDoc.
*/
- rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
+ rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC);
if( rc!=SQLITE_OK ){
return rc;
}
- pC->pTokenizer = pTab->pTokenizer;
while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){
- const char *ZDUMMY; int DUMMY1, DUMMY2, DUMMY3;
+ const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0;
rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent);
}
pMod->xClose(pC);
@@ -616,8 +620,6 @@ static int fts3SnippetText(
int iCol = pFragment->iCol+1; /* Query column to extract text from */
sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */
sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */
- const char *ZDUMMY; /* Dummy argument used with tokenizer */
- int DUMMY1; /* Dummy argument used with tokenizer */
zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol);
if( zDoc==0 ){
@@ -630,17 +632,29 @@ static int fts3SnippetText(
/* Open a token cursor on the document. */
pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule;
- rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
+ rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC);
if( rc!=SQLITE_OK ){
return rc;
}
- pC->pTokenizer = pTab->pTokenizer;
while( rc==SQLITE_OK ){
- int iBegin; /* Offset in zDoc of start of token */
- int iFin; /* Offset in zDoc of end of token */
- int isHighlight; /* True for highlighted terms */
-
+ const char *ZDUMMY; /* Dummy argument used with tokenizer */
+ int DUMMY1 = -1; /* Dummy argument used with tokenizer */
+ int iBegin = 0; /* Offset in zDoc of start of token */
+ int iFin = 0; /* Offset in zDoc of end of token */
+ int isHighlight = 0; /* True for highlighted terms */
+
+ /* Variable DUMMY1 is initialized to a negative value above. Elsewhere
+ ** in the FTS code the variable that the third argument to xNext points to
+ ** is initialized to zero before the first (*but not necessarily
+ ** subsequent*) call to xNext(). This is done for a particular application
+ ** that needs to know whether or not the tokenizer is being used for
+ ** snippet generation or for some other purpose.
+ **
+ ** Extreme care is required when writing code to depend on this
+ ** initialization. It is not a documented part of the tokenizer interface.
+ ** If a tokenizer is used directly by any code outside of FTS, this
+ ** convention might not be respected. */
rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent);
if( rc!=SQLITE_OK ){
if( rc==SQLITE_DONE ){
@@ -656,7 +670,9 @@ static int fts3SnippetText(
if( !isShiftDone ){
int n = nDoc - iBegin;
- rc = fts3SnippetShift(pTab, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask);
+ rc = fts3SnippetShift(
+ pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask
+ );
isShiftDone = 1;
/* Now that the shift has been done, check if the initial "..." are
@@ -768,13 +784,14 @@ static int fts3ExprLocalHitsCb(
int iPhrase, /* Phrase number */
void *pCtx /* Pointer to MatchInfo structure */
){
+ int rc = SQLITE_OK;
MatchInfo *p = (MatchInfo *)pCtx;
int iStart = iPhrase * p->nCol * 3;
int i;
- for(i=0; i<p->nCol; i++){
+ for(i=0; i<p->nCol && rc==SQLITE_OK; i++){
char *pCsr;
- pCsr = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i);
+ rc = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i, &pCsr);
if( pCsr ){
p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr);
}else{
@@ -782,7 +799,7 @@ static int fts3ExprLocalHitsCb(
}
}
- return SQLITE_OK;
+ return rc;
}
static int fts3MatchinfoCheck(
@@ -792,8 +809,8 @@ static int fts3MatchinfoCheck(
){
if( (cArg==FTS3_MATCHINFO_NPHRASE)
|| (cArg==FTS3_MATCHINFO_NCOL)
- || (cArg==FTS3_MATCHINFO_NDOC && pTab->bHasStat)
- || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bHasStat)
+ || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4)
+ || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4)
|| (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize)
|| (cArg==FTS3_MATCHINFO_LCS)
|| (cArg==FTS3_MATCHINFO_HITS)
@@ -848,7 +865,7 @@ static int fts3MatchinfoSelectDoctotal(
a = sqlite3_column_blob(pStmt, 0);
a += sqlite3Fts3GetVarint(a, &nDoc);
- if( nDoc==0 ) return SQLITE_CORRUPT_VTAB;
+ if( nDoc==0 ) return FTS_CORRUPT_VTAB;
*pnDoc = (u32)nDoc;
if( paLen ) *paLen = a;
@@ -943,8 +960,10 @@ static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){
int nLive = 0; /* Number of iterators in aIter not at EOF */
for(i=0; i<pInfo->nPhrase; i++){
+ int rc;
LcsIterator *pIt = &aIter[i];
- pIt->pRead = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol);
+ rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead);
+ if( rc!=SQLITE_OK ) return rc;
if( pIt->pRead ){
pIt->iPos = pIt->iPosOffset;
fts3LcsIteratorAdvance(&aIter[i]);
@@ -1296,9 +1315,10 @@ static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){
int iTerm; /* For looping through nTerm phrase terms */
char *pList; /* Pointer to position list for phrase */
int iPos = 0; /* First position in position-list */
+ int rc;
UNUSED_PARAMETER(iPhrase);
- pList = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol);
+ rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pList);
nTerm = pExpr->pPhrase->nToken;
if( pList ){
fts3GetDeltaPosition(&pList, &iPos);
@@ -1312,7 +1332,7 @@ static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){
pT->iPos = iPos;
}
- return SQLITE_OK;
+ return rc;
}
/*
@@ -1324,8 +1344,6 @@ void sqlite3Fts3Offsets(
){
Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule;
- const char *ZDUMMY; /* Dummy argument used with xNext() */
- int NDUMMY; /* Dummy argument used with xNext() */
int rc; /* Return Code */
int nToken; /* Number of tokens in query */
int iCol; /* Column currently being processed */
@@ -1358,9 +1376,11 @@ void sqlite3Fts3Offsets(
*/
for(iCol=0; iCol<pTab->nColumn; iCol++){
sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */
- int iStart;
- int iEnd;
- int iCurrent;
+ const char *ZDUMMY; /* Dummy argument used with xNext() */
+ int NDUMMY = 0; /* Dummy argument used with xNext() */
+ int iStart = 0;
+ int iEnd = 0;
+ int iCurrent = 0;
const char *zDoc;
int nDoc;
@@ -1389,9 +1409,10 @@ void sqlite3Fts3Offsets(
}
/* Initialize a tokenizer iterator to iterate through column iCol. */
- rc = pMod->xOpen(pTab->pTokenizer, zDoc, nDoc, &pC);
+ rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid,
+ zDoc, nDoc, &pC
+ );
if( rc!=SQLITE_OK ) goto offsets_out;
- pC->pTokenizer = pTab->pTokenizer;
rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent);
while( rc==SQLITE_OK ){
@@ -1409,7 +1430,7 @@ void sqlite3Fts3Offsets(
if( !pTerm ){
/* All offsets for this column have been gathered. */
- break;
+ rc = SQLITE_DONE;
}else{
assert( iCurrent<=iMinPos );
if( 0==(0xFE&*pTerm->pList) ){
@@ -1426,8 +1447,8 @@ void sqlite3Fts3Offsets(
"%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart
);
rc = fts3StringAppend(&res, aBuffer, -1);
- }else if( rc==SQLITE_DONE ){
- rc = SQLITE_CORRUPT_VTAB;
+ }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){
+ rc = FTS_CORRUPT_VTAB;
}
}
}
diff --git a/src/libtracker-fts/fts3_term.c b/src/libtracker-fts/fts3_term.c
index d3eb690bd..c49d5cb65 100644
--- a/src/libtracker-fts/fts3_term.c
+++ b/src/libtracker-fts/fts3_term.c
@@ -73,6 +73,7 @@ static int fts3termConnectMethod(
Fts3termTable *p; /* Virtual table object to return */
int iIndex = 0;
+ UNUSED_PARAMETER(pCtx);
if( argc==5 ){
iIndex = atoi(argv[4]);
argc--;
@@ -87,9 +88,9 @@ static int fts3termConnectMethod(
}
zDb = argv[1];
- nDb = strlen(zDb);
+ nDb = (int)strlen(zDb);
zFts3 = argv[3];
- nFts3 = strlen(zFts3);
+ nFts3 = (int)strlen(zFts3);
rc = sqlite3_declare_vtab(db, FTS3_TERMS_SCHEMA);
if( rc!=SQLITE_OK ) return rc;
@@ -231,12 +232,12 @@ static int fts3termNextMethod(sqlite3_vtab_cursor *pCursor){
if( v==1 ){
pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
- pCsr->iCol += v;
+ pCsr->iCol += (int)v;
pCsr->iPos = 0;
pCsr->pNext += sqlite3Fts3GetVarint(pCsr->pNext, &v);
}
- pCsr->iPos += (v - 2);
+ pCsr->iPos += (int)(v - 2);
return SQLITE_OK;
}
@@ -271,7 +272,7 @@ static int fts3termFilterMethod(
pCsr->filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
pCsr->filter.flags |= FTS3_SEGMENT_SCAN;
- rc = sqlite3Fts3SegReaderCursor(pFts3, p->iIndex, FTS3_SEGCURSOR_ALL,
+ rc = sqlite3Fts3SegReaderCursor(pFts3, 0, p->iIndex, FTS3_SEGCURSOR_ALL,
pCsr->filter.zTerm, pCsr->filter.nTerm, 0, 1, &pCsr->csr
);
if( rc==SQLITE_OK ){
@@ -357,7 +358,10 @@ int sqlite3Fts3InitTerm(sqlite3 *db){
0, /* xCommit */
0, /* xRollback */
0, /* xFindFunction */
- 0 /* xRename */
+ 0, /* xRename */
+ 0, /* xSavepoint */
+ 0, /* xRelease */
+ 0 /* xRollbackTo */
};
int rc; /* Return code */
diff --git a/src/libtracker-fts/fts3_test.c b/src/libtracker-fts/fts3_test.c
index 72735f3d1..4da0b8f13 100644
--- a/src/libtracker-fts/fts3_test.c
+++ b/src/libtracker-fts/fts3_test.c
@@ -13,13 +13,17 @@
** This file is not part of the production FTS code. It is only used for
** testing. It contains a Tcl command that can be used to test if a document
** matches an FTS NEAR expression.
+**
+** As of March 2012, it also contains a version 1 tokenizer used for testing
+** that the sqlite3_tokenizer_module.xLanguage() method is invoked correctly.
*/
#include <tcl.h>
#include <string.h>
#include <assert.h>
-#ifdef SQLITE_TEST
+#if defined(SQLITE_TEST)
+#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
/* Required so that the "ifdef SQLITE_ENABLE_FTS3" below works */
#include "fts3Int.h"
@@ -158,6 +162,8 @@ static int fts3_near_match_cmd(
Tcl_Obj **apExprToken;
int nExprToken;
+ UNUSED_PARAMETER(clientData);
+
/* Must have 3 or more arguments. */
if( objc<3 || (objc%2)==0 ){
Tcl_WrongNumArgs(interp, 1, objv, "DOCUMENT EXPR ?OPTION VALUE?...");
@@ -311,14 +317,219 @@ static int fts3_configure_incr_load_cmd(
Tcl_SetObjResult(interp, pRet);
Tcl_DecrRefCount(pRet);
#endif
+ UNUSED_PARAMETER(clientData);
+ return TCL_OK;
+}
+
+#ifdef SQLITE_ENABLE_FTS3
+/**************************************************************************
+** Beginning of test tokenizer code.
+**
+** For language 0, this tokenizer is similar to the default 'simple'
+** tokenizer. For other languages L, the following:
+**
+** * Odd numbered languages are case-sensitive. Even numbered
+** languages are not.
+**
+** * Language ids 100 or greater are considered an error.
+**
+** The implementation assumes that the input contains only ASCII characters
+** (i.e. those that may be encoded in UTF-8 using a single byte).
+*/
+typedef struct test_tokenizer {
+ sqlite3_tokenizer base;
+} test_tokenizer;
+
+typedef struct test_tokenizer_cursor {
+ sqlite3_tokenizer_cursor base;
+ const char *aInput; /* Input being tokenized */
+ int nInput; /* Size of the input in bytes */
+ int iInput; /* Current offset in aInput */
+ int iToken; /* Index of next token to be returned */
+ char *aBuffer; /* Buffer containing current token */
+ int nBuffer; /* Number of bytes allocated at pToken */
+ int iLangid; /* Configured language id */
+} test_tokenizer_cursor;
+
+static int testTokenizerCreate(
+ int argc, const char * const *argv,
+ sqlite3_tokenizer **ppTokenizer
+){
+ test_tokenizer *pNew;
+ UNUSED_PARAMETER(argc);
+ UNUSED_PARAMETER(argv);
+
+ pNew = sqlite3_malloc(sizeof(test_tokenizer));
+ if( !pNew ) return SQLITE_NOMEM;
+ memset(pNew, 0, sizeof(test_tokenizer));
+
+ *ppTokenizer = (sqlite3_tokenizer *)pNew;
+ return SQLITE_OK;
+}
+
+static int testTokenizerDestroy(sqlite3_tokenizer *pTokenizer){
+ test_tokenizer *p = (test_tokenizer *)pTokenizer;
+ sqlite3_free(p);
+ return SQLITE_OK;
+}
+
+static int testTokenizerOpen(
+ sqlite3_tokenizer *pTokenizer, /* The tokenizer */
+ const char *pInput, int nBytes, /* String to be tokenized */
+ sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */
+){
+ int rc = SQLITE_OK; /* Return code */
+ test_tokenizer_cursor *pCsr; /* New cursor object */
+
+ UNUSED_PARAMETER(pTokenizer);
+
+ pCsr = (test_tokenizer_cursor *)sqlite3_malloc(sizeof(test_tokenizer_cursor));
+ if( pCsr==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ memset(pCsr, 0, sizeof(test_tokenizer_cursor));
+ pCsr->aInput = pInput;
+ if( nBytes<0 ){
+ pCsr->nInput = (int)strlen(pInput);
+ }else{
+ pCsr->nInput = nBytes;
+ }
+ }
+
+ *ppCursor = (sqlite3_tokenizer_cursor *)pCsr;
+ return rc;
+}
+
+static int testTokenizerClose(sqlite3_tokenizer_cursor *pCursor){
+ test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
+ sqlite3_free(pCsr->aBuffer);
+ sqlite3_free(pCsr);
+ return SQLITE_OK;
+}
+
+static int testIsTokenChar(char c){
+ return (c>='a' && c<='z') || (c>='A' && c<='Z');
+}
+static int testTolower(char c){
+ char ret = c;
+ if( ret>='A' && ret<='Z') ret = ret - ('A'-'a');
+ return ret;
+}
+
+static int testTokenizerNext(
+ sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by testTokenizerOpen */
+ const char **ppToken, /* OUT: *ppToken is the token text */
+ int *pnBytes, /* OUT: Number of bytes in token */
+ int *piStartOffset, /* OUT: Starting offset of token */
+ int *piEndOffset, /* OUT: Ending offset of token */
+ int *piPosition /* OUT: Position integer of token */
+){
+ test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
+ int rc = SQLITE_OK;
+ const char *p;
+ const char *pEnd;
+
+ p = &pCsr->aInput[pCsr->iInput];
+ pEnd = &pCsr->aInput[pCsr->nInput];
+
+ /* Skip past any white-space */
+ assert( p<=pEnd );
+ while( p<pEnd && testIsTokenChar(*p)==0 ) p++;
+
+ if( p==pEnd ){
+ rc = SQLITE_DONE;
+ }else{
+ /* Advance to the end of the token */
+ const char *pToken = p;
+ int nToken;
+ while( p<pEnd && testIsTokenChar(*p) ) p++;
+ nToken = (int)(p-pToken);
+
+ /* Copy the token into the buffer */
+ if( nToken>pCsr->nBuffer ){
+ sqlite3_free(pCsr->aBuffer);
+ pCsr->aBuffer = sqlite3_malloc(nToken);
+ }
+ if( pCsr->aBuffer==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ int i;
+
+ if( pCsr->iLangid & 0x00000001 ){
+ for(i=0; i<nToken; i++) pCsr->aBuffer[i] = pToken[i];
+ }else{
+ for(i=0; i<nToken; i++) pCsr->aBuffer[i] = testTolower(pToken[i]);
+ }
+ pCsr->iToken++;
+ pCsr->iInput = (int)(p - pCsr->aInput);
+
+ *ppToken = pCsr->aBuffer;
+ *pnBytes = nToken;
+ *piStartOffset = (int)(pToken - pCsr->aInput);
+ *piEndOffset = (int)(p - pCsr->aInput);
+ *piPosition = pCsr->iToken;
+ }
+ }
+
+ return rc;
+}
+
+static int testTokenizerLanguage(
+ sqlite3_tokenizer_cursor *pCursor,
+ int iLangid
+){
+ int rc = SQLITE_OK;
+ test_tokenizer_cursor *pCsr = (test_tokenizer_cursor *)pCursor;
+ pCsr->iLangid = iLangid;
+ if( pCsr->iLangid>=100 ){
+ rc = SQLITE_ERROR;
+ }
+ return rc;
+}
+#endif
+
+static int fts3_test_tokenizer_cmd(
+ ClientData clientData,
+ Tcl_Interp *interp,
+ int objc,
+ Tcl_Obj *CONST objv[]
+){
+#ifdef SQLITE_ENABLE_FTS3
+ static const sqlite3_tokenizer_module testTokenizerModule = {
+ 1,
+ testTokenizerCreate,
+ testTokenizerDestroy,
+ testTokenizerOpen,
+ testTokenizerClose,
+ testTokenizerNext,
+ testTokenizerLanguage
+ };
+ const sqlite3_tokenizer_module *pPtr = &testTokenizerModule;
+ if( objc!=1 ){
+ Tcl_WrongNumArgs(interp, 1, objv, "");
+ return TCL_ERROR;
+ }
+ Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(
+ (const unsigned char *)&pPtr, sizeof(sqlite3_tokenizer_module *)
+ ));
+#endif
+ UNUSED_PARAMETER(clientData);
return TCL_OK;
}
+/*
+** End of tokenizer code.
+**************************************************************************/
+
int Sqlitetestfts3_Init(Tcl_Interp *interp){
Tcl_CreateObjCommand(interp, "fts3_near_match", fts3_near_match_cmd, 0, 0);
Tcl_CreateObjCommand(interp,
"fts3_configure_incr_load", fts3_configure_incr_load_cmd, 0, 0
);
+ Tcl_CreateObjCommand(
+ interp, "fts3_test_tokenizer", fts3_test_tokenizer_cmd, 0, 0
+ );
return TCL_OK;
}
+#endif /* SQLITE_ENABLE_FTS3 || SQLITE_ENABLE_FTS4 */
#endif /* ifdef SQLITE_TEST */
diff --git a/src/libtracker-fts/fts3_tokenizer.c b/src/libtracker-fts/fts3_tokenizer.c
index 6494bb96d..8241be81f 100644
--- a/src/libtracker-fts/fts3_tokenizer.c
+++ b/src/libtracker-fts/fts3_tokenizer.c
@@ -209,10 +209,9 @@ int sqlite3Fts3InitTokenizer(
/*
** Implementation of a special SQL scalar function for testing tokenizers
** designed to be used in concert with the Tcl testing framework. This
-** function must be called with two arguments:
+** function must be called with two or more arguments:
**
-** SELECT <function-name>(<key-name>, <input-string>);
-** SELECT <function-name>(<key-name>, <pointer>);
+** SELECT <function-name>(<key-name>, ..., <input-string>);
**
** where <function-name> is the name passed as the second argument
** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer')
@@ -249,27 +248,27 @@ static void testFunc(
const char *zInput;
int nInput;
- const char *zArg = 0;
+ const char *azArg[64];
const char *zToken;
- int nToken;
- int iStart;
- int iEnd;
- int iPos;
+ int nToken = 0;
+ int iStart = 0;
+ int iEnd = 0;
+ int iPos = 0;
+ int i;
Tcl_Obj *pRet;
- assert( argc==2 || argc==3 );
+ if( argc<2 ){
+ sqlite3_result_error(context, "insufficient arguments", -1);
+ return;
+ }
nName = sqlite3_value_bytes(argv[0]);
zName = (const char *)sqlite3_value_text(argv[0]);
nInput = sqlite3_value_bytes(argv[argc-1]);
zInput = (const char *)sqlite3_value_text(argv[argc-1]);
- if( argc==3 ){
- zArg = (const char *)sqlite3_value_text(argv[1]);
- }
-
pHash = (Fts3Hash *)sqlite3_user_data(context);
p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1);
@@ -283,16 +282,19 @@ static void testFunc(
pRet = Tcl_NewObj();
Tcl_IncrRefCount(pRet);
- if( SQLITE_OK!=p->xCreate(zArg ? 1 : 0, &zArg, &pTokenizer) ){
+ for(i=1; i<argc-1; i++){
+ azArg[i-1] = (const char *)sqlite3_value_text(argv[i]);
+ }
+
+ if( SQLITE_OK!=p->xCreate(argc-2, azArg, &pTokenizer) ){
zErr = "error in xCreate()";
goto finish;
}
pTokenizer->pModule = p;
- if( SQLITE_OK!=p->xOpen(pTokenizer, zInput, nInput, &pCsr) ){
+ if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){
zErr = "error in xOpen()";
goto finish;
}
- pCsr->pTokenizer = pTokenizer;
while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){
Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos));
@@ -468,10 +470,7 @@ int sqlite3Fts3InitHashTable(
}
#ifdef SQLITE_TEST
if( SQLITE_OK==rc ){
- rc = sqlite3_create_function(db, zTest, 2, any, p, testFunc, 0, 0);
- }
- if( SQLITE_OK==rc ){
- rc = sqlite3_create_function(db, zTest, 3, any, p, testFunc, 0, 0);
+ rc = sqlite3_create_function(db, zTest, -1, any, p, testFunc, 0, 0);
}
if( SQLITE_OK==rc ){
rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0);
diff --git a/src/libtracker-fts/fts3_tokenizer.h b/src/libtracker-fts/fts3_tokenizer.h
index 615644506..c91c7ed79 100644
--- a/src/libtracker-fts/fts3_tokenizer.h
+++ b/src/libtracker-fts/fts3_tokenizer.h
@@ -52,7 +52,7 @@ typedef struct sqlite3_tokenizer_cursor sqlite3_tokenizer_cursor;
struct sqlite3_tokenizer_module {
/*
- ** Structure version. Should always be set to 0.
+ ** Structure version. Should always be set to 0 or 1.
*/
int iVersion;
@@ -133,6 +133,15 @@ struct sqlite3_tokenizer_module {
int *piEndOffset, /* OUT: Byte offset of end of token in input buffer */
int *piPosition /* OUT: Number of tokens returned before this one */
);
+
+ /***********************************************************************
+ ** Methods below this point are only available if iVersion>=1.
+ */
+
+ /*
+ ** Configure the language id of a tokenizer cursor.
+ */
+ int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid);
};
struct sqlite3_tokenizer {
diff --git a/src/libtracker-fts/fts3_tokenizer1.c b/src/libtracker-fts/fts3_tokenizer1.c
index d11a49976..deea06d92 100644
--- a/src/libtracker-fts/fts3_tokenizer1.c
+++ b/src/libtracker-fts/fts3_tokenizer1.c
@@ -218,6 +218,7 @@ static const sqlite3_tokenizer_module simpleTokenizerModule = {
simpleOpen,
simpleClose,
simpleNext,
+ 0,
};
/*
diff --git a/src/libtracker-fts/fts3_unicode.c b/src/libtracker-fts/fts3_unicode.c
new file mode 100644
index 000000000..79941edbb
--- /dev/null
+++ b/src/libtracker-fts/fts3_unicode.c
@@ -0,0 +1,393 @@
+/*
+** 2012 May 24
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+**
+** Implementation of the "unicode" full-text-search tokenizer.
+*/
+
+#ifdef SQLITE_ENABLE_FTS4_UNICODE61
+
+#include "fts3Int.h"
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "fts3_tokenizer.h"
+
+/*
+** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied
+** from the sqlite3 source file utf.c. If this file is compiled as part
+** of the amalgamation, they are not required.
+*/
+#ifndef SQLITE_AMALGAMATION
+
+static const unsigned char sqlite3Utf8Trans1[] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
+};
+
+#define READ_UTF8(zIn, zTerm, c) \
+ c = *(zIn++); \
+ if( c>=0xc0 ){ \
+ c = sqlite3Utf8Trans1[c-0xc0]; \
+ while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \
+ c = (c<<6) + (0x3f & *(zIn++)); \
+ } \
+ if( c<0x80 \
+ || (c&0xFFFFF800)==0xD800 \
+ || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \
+ }
+
+#define WRITE_UTF8(zOut, c) { \
+ if( c<0x00080 ){ \
+ *zOut++ = (u8)(c&0xFF); \
+ } \
+ else if( c<0x00800 ){ \
+ *zOut++ = 0xC0 + (u8)((c>>6)&0x1F); \
+ *zOut++ = 0x80 + (u8)(c & 0x3F); \
+ } \
+ else if( c<0x10000 ){ \
+ *zOut++ = 0xE0 + (u8)((c>>12)&0x0F); \
+ *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \
+ *zOut++ = 0x80 + (u8)(c & 0x3F); \
+ }else{ \
+ *zOut++ = 0xF0 + (u8)((c>>18) & 0x07); \
+ *zOut++ = 0x80 + (u8)((c>>12) & 0x3F); \
+ *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \
+ *zOut++ = 0x80 + (u8)(c & 0x3F); \
+ } \
+}
+
+#endif /* ifndef SQLITE_AMALGAMATION */
+
+typedef struct unicode_tokenizer unicode_tokenizer;
+typedef struct unicode_cursor unicode_cursor;
+
+struct unicode_tokenizer {
+ sqlite3_tokenizer base;
+ int bRemoveDiacritic;
+ int nException;
+ int *aiException;
+};
+
+struct unicode_cursor {
+ sqlite3_tokenizer_cursor base;
+ const unsigned char *aInput; /* Input text being tokenized */
+ int nInput; /* Size of aInput[] in bytes */
+ int iOff; /* Current offset within aInput[] */
+ int iToken; /* Index of next token to be returned */
+ char *zToken; /* storage for current token */
+ int nAlloc; /* space allocated at zToken */
+};
+
+
+/*
+** Destroy a tokenizer allocated by unicodeCreate().
+*/
+static int unicodeDestroy(sqlite3_tokenizer *pTokenizer){
+ if( pTokenizer ){
+ unicode_tokenizer *p = (unicode_tokenizer *)pTokenizer;
+ sqlite3_free(p->aiException);
+ sqlite3_free(p);
+ }
+ return SQLITE_OK;
+}
+
+/*
+** As part of a tokenchars= or separators= option, the CREATE VIRTUAL TABLE
+** statement has specified that the tokenizer for this table shall consider
+** all characters in string zIn/nIn to be separators (if bAlnum==0) or
+** token characters (if bAlnum==1).
+**
+** For each codepoint in the zIn/nIn string, this function checks if the
+** sqlite3FtsUnicodeIsalnum() function already returns the desired result.
+** If so, no action is taken. Otherwise, the codepoint is added to the
+** unicode_tokenizer.aiException[] array. For the purposes of tokenization,
+** the return value of sqlite3FtsUnicodeIsalnum() is inverted for all
+** codepoints in the aiException[] array.
+**
+** If a standalone diacritic mark (one that sqlite3FtsUnicodeIsdiacritic()
+** identifies as a diacritic) occurs in the zIn/nIn string it is ignored.
+** It is not possible to change the behaviour of the tokenizer with respect
+** to these codepoints.
+*/
+static int unicodeAddExceptions(
+ unicode_tokenizer *p, /* Tokenizer to add exceptions to */
+ int bAlnum, /* Replace Isalnum() return value with this */
+ const char *zIn, /* Array of characters to make exceptions */
+ int nIn /* Length of z in bytes */
+){
+ const unsigned char *z = (const unsigned char *)zIn;
+ const unsigned char *zTerm = &z[nIn];
+ int iCode;
+ int nEntry = 0;
+
+ assert( bAlnum==0 || bAlnum==1 );
+
+ while( z<zTerm ){
+ READ_UTF8(z, zTerm, iCode);
+ assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
+ if( sqlite3FtsUnicodeIsalnum(iCode)!=bAlnum
+ && sqlite3FtsUnicodeIsdiacritic(iCode)==0
+ ){
+ nEntry++;
+ }
+ }
+
+ if( nEntry ){
+ int *aNew; /* New aiException[] array */
+ int nNew; /* Number of valid entries in array aNew[] */
+
+ aNew = sqlite3_realloc(p->aiException, (p->nException+nEntry)*sizeof(int));
+ if( aNew==0 ) return SQLITE_NOMEM;
+ nNew = p->nException;
+
+ z = (const unsigned char *)zIn;
+ while( z<zTerm ){
+ READ_UTF8(z, zTerm, iCode);
+ if( sqlite3FtsUnicodeIsalnum(iCode)!=bAlnum
+ && sqlite3FtsUnicodeIsdiacritic(iCode)==0
+ ){
+ int i, j;
+ for(i=0; i<nNew && aNew[i]<iCode; i++);
+ for(j=nNew; j>i; j--) aNew[j] = aNew[j-1];
+ aNew[i] = iCode;
+ nNew++;
+ }
+ }
+ p->aiException = aNew;
+ p->nException = nNew;
+ }
+
+ return SQLITE_OK;
+}
+
+/*
+** Return true if the p->aiException[] array contains the value iCode.
+*/
+static int unicodeIsException(unicode_tokenizer *p, int iCode){
+ if( p->nException>0 ){
+ int *a = p->aiException;
+ int iLo = 0;
+ int iHi = p->nException-1;
+
+ while( iHi>=iLo ){
+ int iTest = (iHi + iLo) / 2;
+ if( iCode==a[iTest] ){
+ return 1;
+ }else if( iCode>a[iTest] ){
+ iLo = iTest+1;
+ }else{
+ iHi = iTest-1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*
+** Return true if, for the purposes of tokenization, codepoint iCode is
+** considered a token character (not a separator).
+*/
+static int unicodeIsAlnum(unicode_tokenizer *p, int iCode){
+ assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 );
+ return sqlite3FtsUnicodeIsalnum(iCode) ^ unicodeIsException(p, iCode);
+}
+
+/*
+** Create a new tokenizer instance.
+*/
+static int unicodeCreate(
+ int nArg, /* Size of array argv[] */
+ const char * const *azArg, /* Tokenizer creation arguments */
+ sqlite3_tokenizer **pp /* OUT: New tokenizer handle */
+){
+ unicode_tokenizer *pNew; /* New tokenizer object */
+ int i;
+ int rc = SQLITE_OK;
+
+ pNew = (unicode_tokenizer *) sqlite3_malloc(sizeof(unicode_tokenizer));
+ if( pNew==NULL ) return SQLITE_NOMEM;
+ memset(pNew, 0, sizeof(unicode_tokenizer));
+ pNew->bRemoveDiacritic = 1;
+
+ for(i=0; rc==SQLITE_OK && i<nArg; i++){
+ const char *z = azArg[i];
+ int n = strlen(z);
+
+ if( n==19 && memcmp("remove_diacritics=1", z, 19)==0 ){
+ pNew->bRemoveDiacritic = 1;
+ }
+ else if( n==19 && memcmp("remove_diacritics=0", z, 19)==0 ){
+ pNew->bRemoveDiacritic = 0;
+ }
+ else if( n>=11 && memcmp("tokenchars=", z, 11)==0 ){
+ rc = unicodeAddExceptions(pNew, 1, &z[11], n-11);
+ }
+ else if( n>=11 && memcmp("separators=", z, 11)==0 ){
+ rc = unicodeAddExceptions(pNew, 0, &z[11], n-11);
+ }
+ else{
+ /* Unrecognized argument */
+ rc = SQLITE_ERROR;
+ }
+ }
+
+ if( rc!=SQLITE_OK ){
+ unicodeDestroy((sqlite3_tokenizer *)pNew);
+ pNew = 0;
+ }
+ *pp = (sqlite3_tokenizer *)pNew;
+ return rc;
+}
+
+/*
+** Prepare to begin tokenizing a particular string. The input
+** string to be tokenized is pInput[0..nBytes-1]. A cursor
+** used to incrementally tokenize this string is returned in
+** *ppCursor.
+*/
+static int unicodeOpen(
+ sqlite3_tokenizer *p, /* The tokenizer */
+ const char *aInput, /* Input string */
+ int nInput, /* Size of string aInput in bytes */
+ sqlite3_tokenizer_cursor **pp /* OUT: New cursor object */
+){
+ unicode_cursor *pCsr;
+
+ pCsr = (unicode_cursor *)sqlite3_malloc(sizeof(unicode_cursor));
+ if( pCsr==0 ){
+ return SQLITE_NOMEM;
+ }
+ memset(pCsr, 0, sizeof(unicode_cursor));
+
+ pCsr->aInput = (const unsigned char *)aInput;
+ if( aInput==0 ){
+ pCsr->nInput = 0;
+ }else if( nInput<0 ){
+ pCsr->nInput = (int)strlen(aInput);
+ }else{
+ pCsr->nInput = nInput;
+ }
+
+ *pp = &pCsr->base;
+ UNUSED_PARAMETER(p);
+ return SQLITE_OK;
+}
+
+/*
+** Close a tokenization cursor previously opened by a call to
+** simpleOpen() above.
+*/
+static int unicodeClose(sqlite3_tokenizer_cursor *pCursor){
+ unicode_cursor *pCsr = (unicode_cursor *) pCursor;
+ sqlite3_free(pCsr->zToken);
+ sqlite3_free(pCsr);
+ return SQLITE_OK;
+}
+
+/*
+** Extract the next token from a tokenization cursor. The cursor must
+** have been opened by a prior call to simpleOpen().
+*/
+static int unicodeNext(
+ sqlite3_tokenizer_cursor *pC, /* Cursor returned by simpleOpen */
+ const char **paToken, /* OUT: Token text */
+ int *pnToken, /* OUT: Number of bytes at *paToken */
+ int *piStart, /* OUT: Starting offset of token */
+ int *piEnd, /* OUT: Ending offset of token */
+ int *piPos /* OUT: Position integer of token */
+){
+ unicode_cursor *pCsr = (unicode_cursor *)pC;
+ unicode_tokenizer *p = ((unicode_tokenizer *)pCsr->base.pTokenizer);
+ int iCode;
+ char *zOut;
+ const unsigned char *z = &pCsr->aInput[pCsr->iOff];
+ const unsigned char *zStart = z;
+ const unsigned char *zEnd;
+ const unsigned char *zTerm = &pCsr->aInput[pCsr->nInput];
+
+ /* Scan past any delimiter characters before the start of the next token.
+ ** Return SQLITE_DONE early if this takes us all the way to the end of
+ ** the input. */
+ while( z<zTerm ){
+ READ_UTF8(z, zTerm, iCode);
+ if( unicodeIsAlnum(p, iCode) ) break;
+ zStart = z;
+ }
+ if( zStart>=zTerm ) return SQLITE_DONE;
+
+ zOut = pCsr->zToken;
+ do {
+ int iOut;
+
+ /* Grow the output buffer if required. */
+ if( (zOut-pCsr->zToken)>=(pCsr->nAlloc-4) ){
+ char *zNew = sqlite3_realloc(pCsr->zToken, pCsr->nAlloc+64);
+ if( !zNew ) return SQLITE_NOMEM;
+ zOut = &zNew[zOut - pCsr->zToken];
+ pCsr->zToken = zNew;
+ pCsr->nAlloc += 64;
+ }
+
+ /* Write the folded case of the last character read to the output */
+ zEnd = z;
+ iOut = sqlite3FtsUnicodeFold(iCode, p->bRemoveDiacritic);
+ if( iOut ){
+ WRITE_UTF8(zOut, iOut);
+ }
+
+ /* If the cursor is not at EOF, read the next character */
+ if( z>=zTerm ) break;
+ READ_UTF8(z, zTerm, iCode);
+ }while( unicodeIsAlnum(p, iCode)
+ || sqlite3FtsUnicodeIsdiacritic(iCode)
+ );
+
+ /* Set the output variables and return. */
+ pCsr->iOff = (z - pCsr->aInput);
+ *paToken = pCsr->zToken;
+ *pnToken = zOut - pCsr->zToken;
+ *piStart = (zStart - pCsr->aInput);
+ *piEnd = (zEnd - pCsr->aInput);
+ *piPos = pCsr->iToken++;
+ return SQLITE_OK;
+}
+
+/*
+** Set *ppModule to a pointer to the sqlite3_tokenizer_module
+** structure for the unicode tokenizer.
+*/
+void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const **ppModule){
+ static const sqlite3_tokenizer_module module = {
+ 0,
+ unicodeCreate,
+ unicodeDestroy,
+ unicodeOpen,
+ unicodeClose,
+ unicodeNext,
+ 0,
+ };
+ *ppModule = &module;
+}
+
+#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
+#endif /* ifndef SQLITE_ENABLE_FTS4_UNICODE61 */
diff --git a/src/libtracker-fts/fts3_unicode2.c b/src/libtracker-fts/fts3_unicode2.c
new file mode 100644
index 000000000..3c2456902
--- /dev/null
+++ b/src/libtracker-fts/fts3_unicode2.c
@@ -0,0 +1,366 @@
+/*
+** 2012 May 25
+**
+** The author disclaims copyright to this source code. In place of
+** a legal notice, here is a blessing:
+**
+** May you do good and not evil.
+** May you find forgiveness for yourself and forgive others.
+** May you share freely, never taking more than you give.
+**
+******************************************************************************
+*/
+
+/*
+** DO NOT EDIT THIS MACHINE GENERATED FILE.
+*/
+
+#if defined(SQLITE_ENABLE_FTS4_UNICODE61)
+#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4)
+
+#include <assert.h>
+
+/*
+** Return true if the argument corresponds to a unicode codepoint
+** classified as either a letter or a number. Otherwise false.
+**
+** The results are undefined if the value passed to this function
+** is less than zero.
+*/
+int sqlite3FtsUnicodeIsalnum(int c){
+ /* Each unsigned integer in the following array corresponds to a contiguous
+ ** range of unicode codepoints that are not either letters or numbers (i.e.
+ ** codepoints for which this function should return 0).
+ **
+ ** The most significant 22 bits in each 32-bit value contain the first
+ ** codepoint in the range. The least significant 10 bits are used to store
+ ** the size of the range (always at least 1). In other words, the value
+ ** ((C<<22) + N) represents a range of N codepoints starting with codepoint
+ ** C. It is not possible to represent a range larger than 1023 codepoints
+ ** using this format.
+ */
+ const static unsigned int aEntry[] = {
+ 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07,
+ 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01,
+ 0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401,
+ 0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01,
+ 0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01,
+ 0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802,
+ 0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F,
+ 0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401,
+ 0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804,
+ 0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403,
+ 0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812,
+ 0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001,
+ 0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802,
+ 0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805,
+ 0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401,
+ 0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03,
+ 0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807,
+ 0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001,
+ 0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01,
+ 0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804,
+ 0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001,
+ 0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802,
+ 0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01,
+ 0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06,
+ 0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007,
+ 0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006,
+ 0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417,
+ 0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14,
+ 0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07,
+ 0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01,
+ 0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001,
+ 0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802,
+ 0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F,
+ 0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002,
+ 0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802,
+ 0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006,
+ 0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D,
+ 0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802,
+ 0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027,
+ 0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403,
+ 0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805,
+ 0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04,
+ 0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401,
+ 0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005,
+ 0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B,
+ 0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A,
+ 0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001,
+ 0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59,
+ 0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807,
+ 0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01,
+ 0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E,
+ 0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100,
+ 0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10,
+ 0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402,
+ 0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804,
+ 0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012,
+ 0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004,
+ 0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002,
+ 0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803,
+ 0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07,
+ 0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02,
+ 0x037FFC02, 0x03E3FC01, 0x03EC7801, 0x03ECA401, 0x03EEC810,
+ 0x03F4F802, 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023,
+ 0x03F95013, 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807,
+ 0x03FCEC06, 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405,
+ 0x04040003, 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E,
+ 0x040E7C01, 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01,
+ 0x04280403, 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01,
+ 0x04294009, 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016,
+ 0x04420003, 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004,
+ 0x04460003, 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004,
+ 0x05BD442E, 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5,
+ 0x07480046, 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01,
+ 0x075C5401, 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401,
+ 0x075EA401, 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064,
+ 0x07C2800F, 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F,
+ 0x07C4C03C, 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009,
+ 0x07C94002, 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014,
+ 0x07CE8025, 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001,
+ 0x07D108B6, 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018,
+ 0x07D7EC46, 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401,
+ 0x38008060, 0x380400F0, 0x3C000001, 0x3FFFF401, 0x40000001,
+ 0x43FFF401,
+ };
+ static const unsigned int aAscii[4] = {
+ 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001,
+ };
+
+ if( c<128 ){
+ return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 );
+ }else if( c<(1<<22) ){
+ unsigned int key = (((unsigned int)c)<<10) | 0x000003FF;
+ int iRes;
+ int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
+ int iLo = 0;
+ while( iHi>=iLo ){
+ int iTest = (iHi + iLo) / 2;
+ if( key >= aEntry[iTest] ){
+ iRes = iTest;
+ iLo = iTest+1;
+ }else{
+ iHi = iTest-1;
+ }
+ }
+ assert( aEntry[0]<key );
+ assert( key>=aEntry[iRes] );
+ return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF)));
+ }
+ return 1;
+}
+
+
+/*
+** If the argument is a codepoint corresponding to a lowercase letter
+** in the ASCII range with a diacritic added, return the codepoint
+** of the ASCII letter only. For example, if passed 235 - "LATIN
+** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER
+** E"). The resuls of passing a codepoint that corresponds to an
+** uppercase letter are undefined.
+*/
+static int remove_diacritic(int c){
+ unsigned short aDia[] = {
+ 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995,
+ 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286,
+ 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732,
+ 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336,
+ 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928,
+ 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234,
+ 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504,
+ 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529,
+ 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726,
+ 61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122,
+ 62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536,
+ 62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730,
+ 62924, 63050, 63082, 63274, 63390,
+ };
+ char aChar[] = {
+ '\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c',
+ 'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r',
+ 's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o',
+ 'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r',
+ 'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0',
+ '\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h',
+ 'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't',
+ 'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a',
+ 'e', 'i', 'o', 'u', 'y',
+ };
+
+ unsigned int key = (((unsigned int)c)<<3) | 0x00000007;
+ int iRes = 0;
+ int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1;
+ int iLo = 0;
+ while( iHi>=iLo ){
+ int iTest = (iHi + iLo) / 2;
+ if( key >= aDia[iTest] ){
+ iRes = iTest;
+ iLo = iTest+1;
+ }else{
+ iHi = iTest-1;
+ }
+ }
+ assert( key>=aDia[iRes] );
+ return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]);
+};
+
+
+/*
+** Return true if the argument interpreted as a unicode codepoint
+** is a diacritical modifier character.
+*/
+int sqlite3FtsUnicodeIsdiacritic(int c){
+ unsigned int mask0 = 0x08029FDF;
+ unsigned int mask1 = 0x000361F8;
+ if( c<768 || c>817 ) return 0;
+ return (c < 768+32) ?
+ (mask0 & (1 << (c-768))) :
+ (mask1 & (1 << (c-768-32)));
+}
+
+
+/*
+** Interpret the argument as a unicode codepoint. If the codepoint
+** is an upper case character that has a lower case equivalent,
+** return the codepoint corresponding to the lower case version.
+** Otherwise, return a copy of the argument.
+**
+** The results are undefined if the value passed to this function
+** is less than zero.
+*/
+int sqlite3FtsUnicodeFold(int c, int bRemoveDiacritic){
+ /* Each entry in the following array defines a rule for folding a range
+ ** of codepoints to lower case. The rule applies to a range of nRange
+ ** codepoints starting at codepoint iCode.
+ **
+ ** If the least significant bit in flags is clear, then the rule applies
+ ** to all nRange codepoints (i.e. all nRange codepoints are upper case and
+ ** need to be folded). Or, if it is set, then the rule only applies to
+ ** every second codepoint in the range, starting with codepoint C.
+ **
+ ** The 7 most significant bits in flags are an index into the aiOff[]
+ ** array. If a specific codepoint C does require folding, then its lower
+ ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF).
+ **
+ ** The contents of this array are generated by parsing the CaseFolding.txt
+ ** file distributed as part of the "Unicode Character Database". See
+ ** http://www.unicode.org for details.
+ */
+ static const struct TableEntry {
+ unsigned short iCode;
+ unsigned char flags;
+ unsigned char nRange;
+ } aEntry[] = {
+ {65, 14, 26}, {181, 64, 1}, {192, 14, 23},
+ {216, 14, 7}, {256, 1, 48}, {306, 1, 6},
+ {313, 1, 16}, {330, 1, 46}, {376, 116, 1},
+ {377, 1, 6}, {383, 104, 1}, {385, 50, 1},
+ {386, 1, 4}, {390, 44, 1}, {391, 0, 1},
+ {393, 42, 2}, {395, 0, 1}, {398, 32, 1},
+ {399, 38, 1}, {400, 40, 1}, {401, 0, 1},
+ {403, 42, 1}, {404, 46, 1}, {406, 52, 1},
+ {407, 48, 1}, {408, 0, 1}, {412, 52, 1},
+ {413, 54, 1}, {415, 56, 1}, {416, 1, 6},
+ {422, 60, 1}, {423, 0, 1}, {425, 60, 1},
+ {428, 0, 1}, {430, 60, 1}, {431, 0, 1},
+ {433, 58, 2}, {435, 1, 4}, {439, 62, 1},
+ {440, 0, 1}, {444, 0, 1}, {452, 2, 1},
+ {453, 0, 1}, {455, 2, 1}, {456, 0, 1},
+ {458, 2, 1}, {459, 1, 18}, {478, 1, 18},
+ {497, 2, 1}, {498, 1, 4}, {502, 122, 1},
+ {503, 134, 1}, {504, 1, 40}, {544, 110, 1},
+ {546, 1, 18}, {570, 70, 1}, {571, 0, 1},
+ {573, 108, 1}, {574, 68, 1}, {577, 0, 1},
+ {579, 106, 1}, {580, 28, 1}, {581, 30, 1},
+ {582, 1, 10}, {837, 36, 1}, {880, 1, 4},
+ {886, 0, 1}, {902, 18, 1}, {904, 16, 3},
+ {908, 26, 1}, {910, 24, 2}, {913, 14, 17},
+ {931, 14, 9}, {962, 0, 1}, {975, 4, 1},
+ {976, 140, 1}, {977, 142, 1}, {981, 146, 1},
+ {982, 144, 1}, {984, 1, 24}, {1008, 136, 1},
+ {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1},
+ {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1},
+ {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32},
+ {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1},
+ {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38},
+ {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1},
+ {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1},
+ {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6},
+ {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6},
+ {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8},
+ {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2},
+ {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1},
+ {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2},
+ {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2},
+ {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2},
+ {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1},
+ {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16},
+ {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47},
+ {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1},
+ {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1},
+ {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1},
+ {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2},
+ {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1},
+ {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14},
+ {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1},
+ {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1},
+ {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1},
+ {65313, 14, 26},
+ };
+ static const unsigned short aiOff[] = {
+ 1, 2, 8, 15, 16, 26, 28, 32,
+ 37, 38, 40, 48, 63, 64, 69, 71,
+ 79, 80, 116, 202, 203, 205, 206, 207,
+ 209, 210, 211, 213, 214, 217, 218, 219,
+ 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721,
+ 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274,
+ 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406,
+ 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462,
+ 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511,
+ 65514, 65521, 65527, 65528, 65529,
+ };
+
+ int ret = c;
+
+ assert( c>=0 );
+ assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 );
+
+ if( c<128 ){
+ if( c>='A' && c<='Z' ) ret = c + ('a' - 'A');
+ }else if( c<65536 ){
+ int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1;
+ int iLo = 0;
+ int iRes = -1;
+
+ while( iHi>=iLo ){
+ int iTest = (iHi + iLo) / 2;
+ int cmp = (c - aEntry[iTest].iCode);
+ if( cmp>=0 ){
+ iRes = iTest;
+ iLo = iTest+1;
+ }else{
+ iHi = iTest-1;
+ }
+ }
+ assert( iRes<0 || c>=aEntry[iRes].iCode );
+
+ if( iRes>=0 ){
+ const struct TableEntry *p = &aEntry[iRes];
+ if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){
+ ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF;
+ assert( ret>0 );
+ }
+ }
+
+ if( bRemoveDiacritic ) ret = remove_diacritic(ret);
+ }
+
+ else if( c>=66560 && c<66600 ){
+ ret = c + 40;
+ }
+
+ return ret;
+}
+#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */
+#endif /* !defined(SQLITE_ENABLE_FTS4_UNICODE61) */
diff --git a/src/libtracker-fts/fts3_write.c b/src/libtracker-fts/fts3_write.c
index 36f2249e1..bda7fbbe1 100644
--- a/src/libtracker-fts/fts3_write.c
+++ b/src/libtracker-fts/fts3_write.c
@@ -24,6 +24,9 @@
#include <assert.h>
#include <stdlib.h>
+
+#define FTS_MAX_APPENDABLE_HEIGHT 16
+
/*
** When full-text index nodes are loaded from disk, the buffer that they
** are loaded into has the following number of bytes of padding at the end
@@ -63,6 +66,29 @@ int test_fts3_node_chunk_threshold = (4*1024)*4;
# define FTS3_NODE_CHUNK_THRESHOLD (FTS3_NODE_CHUNKSIZE*4)
#endif
+/*
+** The two values that may be meaningfully bound to the :1 parameter in
+** statements SQL_REPLACE_STAT and SQL_SELECT_STAT.
+*/
+#define FTS_STAT_DOCTOTAL 0
+#define FTS_STAT_INCRMERGEHINT 1
+#define FTS_STAT_AUTOINCRMERGE 2
+
+/*
+** If FTS_LOG_MERGES is defined, call sqlite3_log() to report each automatic
+** and incremental merge operation that takes place. This is used for
+** debugging FTS only, it should not usually be turned on in production
+** systems.
+*/
+#ifdef FTS3_LOG_MERGES
+static void fts3LogMerge(int nMerge, sqlite3_int64 iAbsLevel){
+ sqlite3_log(SQLITE_OK, "%d-way merge from level %d", nMerge, (int)iAbsLevel);
+}
+#else
+#define fts3LogMerge(x, y)
+#endif
+
+
typedef struct PendingList PendingList;
typedef struct SegmentNode SegmentNode;
typedef struct SegmentWriter SegmentWriter;
@@ -110,6 +136,8 @@ struct Fts3DeferredToken {
*/
struct Fts3SegReader {
int iIdx; /* Index within level, or 0x7FFFFFFF for PT */
+ u8 bLookup; /* True for a lookup only */
+ u8 rootOnly; /* True for a root-only reader */
sqlite3_int64 iStartBlock; /* Rowid of first leaf block to traverse */
sqlite3_int64 iLeafEndBlock; /* Rowid of final leaf block to traverse */
@@ -143,7 +171,7 @@ struct Fts3SegReader {
};
#define fts3SegReaderIsPending(p) ((p)->ppNextElem!=0)
-#define fts3SegReaderIsRootOnly(p) ((p)->aNode==(char *)&(p)[1])
+#define fts3SegReaderIsRootOnly(p) ((p)->rootOnly!=0)
/*
** An instance of this structure is used to create a segment b-tree in the
@@ -223,13 +251,22 @@ struct SegmentNode {
#define SQL_DELETE_DOCSIZE 19
#define SQL_REPLACE_DOCSIZE 20
#define SQL_SELECT_DOCSIZE 21
-#define SQL_SELECT_DOCTOTAL 22
-#define SQL_REPLACE_DOCTOTAL 23
+#define SQL_SELECT_STAT 22
+#define SQL_REPLACE_STAT 23
#define SQL_SELECT_ALL_PREFIX_LEVEL 24
#define SQL_DELETE_ALL_TERMS_SEGDIR 25
-
#define SQL_DELETE_SEGDIR_RANGE 26
+#define SQL_SELECT_ALL_LANGID 27
+#define SQL_FIND_MERGE_LEVEL 28
+#define SQL_MAX_LEAF_NODE_ESTIMATE 29
+#define SQL_DELETE_SEGDIR_ENTRY 30
+#define SQL_SHIFT_SEGDIR_ENTRY 31
+#define SQL_SELECT_SEGDIR 32
+#define SQL_CHOMP_SEGDIR 33
+#define SQL_SEGMENT_IS_APPENDABLE 34
+#define SQL_SELECT_INDEXES 35
+#define SQL_SELECT_MXLEVEL 36
/*
** This function is used to obtain an SQLite prepared statement handle
@@ -256,11 +293,11 @@ static int fts3SqlStmt(
/* 4 */ "DELETE FROM %Q.'%q_segdir'",
/* 5 */ "DELETE FROM %Q.'%q_docsize'",
/* 6 */ "DELETE FROM %Q.'%q_stat'",
-/* 7 */ "SELECT %s FROM %Q.'%q_content' AS x WHERE rowid=?",
+/* 7 */ "SELECT %s WHERE rowid=?",
/* 8 */ "SELECT (SELECT max(idx) FROM %Q.'%q_segdir' WHERE level = ?) + 1",
-/* 9 */ "INSERT INTO %Q.'%q_segments'(blockid, block) VALUES(?, ?)",
+/* 9 */ "REPLACE INTO %Q.'%q_segments'(blockid, block) VALUES(?, ?)",
/* 10 */ "SELECT coalesce((SELECT max(blockid) FROM %Q.'%q_segments') + 1, 1)",
-/* 11 */ "INSERT INTO %Q.'%q_segdir' VALUES(?,?,?,?,?,?)",
+/* 11 */ "REPLACE INTO %Q.'%q_segdir' VALUES(?,?,?,?,?,?)",
/* Return segments in order from oldest to newest.*/
/* 12 */ "SELECT idx, start_block, leaves_end_block, end_block, root "
@@ -278,13 +315,61 @@ static int fts3SqlStmt(
/* 19 */ "DELETE FROM %Q.'%q_docsize' WHERE docid = ?",
/* 20 */ "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)",
/* 21 */ "SELECT size FROM %Q.'%q_docsize' WHERE docid=?",
-/* 22 */ "SELECT value FROM %Q.'%q_stat' WHERE id=0",
-/* 23 */ "REPLACE INTO %Q.'%q_stat' VALUES(0,?)",
+/* 22 */ "SELECT value FROM %Q.'%q_stat' WHERE id=?",
+/* 23 */ "REPLACE INTO %Q.'%q_stat' VALUES(?,?)",
/* 24 */ "",
/* 25 */ "",
/* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?",
-
+/* 27 */ "SELECT DISTINCT level / (1024 * ?) FROM %Q.'%q_segdir'",
+
+/* This statement is used to determine which level to read the input from
+** when performing an incremental merge. It returns the absolute level number
+** of the oldest level in the db that contains at least ? segments. Or,
+** if no level in the FTS index contains more than ? segments, the statement
+** returns zero rows. */
+/* 28 */ "SELECT level FROM %Q.'%q_segdir' GROUP BY level HAVING count(*)>=?"
+ " ORDER BY (level %% 1024) ASC LIMIT 1",
+
+/* Estimate the upper limit on the number of leaf nodes in a new segment
+** created by merging the oldest :2 segments from absolute level :1. See
+** function sqlite3Fts3Incrmerge() for details. */
+/* 29 */ "SELECT 2 * total(1 + leaves_end_block - start_block) "
+ " FROM %Q.'%q_segdir' WHERE level = ? AND idx < ?",
+
+/* SQL_DELETE_SEGDIR_ENTRY
+** Delete the %_segdir entry on absolute level :1 with index :2. */
+/* 30 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?",
+
+/* SQL_SHIFT_SEGDIR_ENTRY
+** Modify the idx value for the segment with idx=:3 on absolute level :2
+** to :1. */
+/* 31 */ "UPDATE %Q.'%q_segdir' SET idx = ? WHERE level=? AND idx=?",
+
+/* SQL_SELECT_SEGDIR
+** Read a single entry from the %_segdir table. The entry from absolute
+** level :1 with index value :2. */
+/* 32 */ "SELECT idx, start_block, leaves_end_block, end_block, root "
+ "FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?",
+
+/* SQL_CHOMP_SEGDIR
+** Update the start_block (:1) and root (:2) fields of the %_segdir
+** entry located on absolute level :3 with index :4. */
+/* 33 */ "UPDATE %Q.'%q_segdir' SET start_block = ?, root = ?"
+ "WHERE level = ? AND idx = ?",
+
+/* SQL_SEGMENT_IS_APPENDABLE
+** Return a single row if the segment with end_block=? is appendable. Or
+** no rows otherwise. */
+/* 34 */ "SELECT 1 FROM %Q.'%q_segments' WHERE blockid=? AND block IS NULL",
+
+/* SQL_SELECT_INDEXES
+** Return the list of valid segment indexes for absolute level ? */
+/* 35 */ "SELECT idx FROM %Q.'%q_segdir' WHERE level=? ORDER BY 1 ASC",
+
+/* SQL_SELECT_MXLEVEL
+** Return the largest relative level in the FTS index or indexes. */
+/* 36 */ "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'"
};
int rc = SQLITE_OK;
sqlite3_stmt *pStmt;
@@ -298,7 +383,7 @@ static int fts3SqlStmt(
if( eStmt==SQL_CONTENT_INSERT ){
zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName, p->zWriteExprlist);
}else if( eStmt==SQL_SELECT_CONTENT_BY_ROWID ){
- zSql = sqlite3_mprintf(azSql[eStmt], p->zReadExprlist, p->zDb, p->zName);
+ zSql = sqlite3_mprintf(azSql[eStmt], p->zReadExprlist);
}else{
zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName);
}
@@ -322,26 +407,22 @@ static int fts3SqlStmt(
return rc;
}
+
static int fts3SelectDocsize(
Fts3Table *pTab, /* FTS3 table handle */
- int eStmt, /* Either SQL_SELECT_DOCSIZE or DOCTOTAL */
sqlite3_int64 iDocid, /* Docid to bind for SQL_SELECT_DOCSIZE */
sqlite3_stmt **ppStmt /* OUT: Statement handle */
){
sqlite3_stmt *pStmt = 0; /* Statement requested from fts3SqlStmt() */
int rc; /* Return code */
- assert( eStmt==SQL_SELECT_DOCSIZE || eStmt==SQL_SELECT_DOCTOTAL );
-
- rc = fts3SqlStmt(pTab, eStmt, &pStmt, 0);
+ rc = fts3SqlStmt(pTab, SQL_SELECT_DOCSIZE, &pStmt, 0);
if( rc==SQLITE_OK ){
- if( eStmt==SQL_SELECT_DOCSIZE ){
- sqlite3_bind_int64(pStmt, 1, iDocid);
- }
+ sqlite3_bind_int64(pStmt, 1, iDocid);
rc = sqlite3_step(pStmt);
if( rc!=SQLITE_ROW || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB ){
rc = sqlite3_reset(pStmt);
- if( rc==SQLITE_OK ) rc = SQLITE_CORRUPT_VTAB;
+ if( rc==SQLITE_OK ) rc = FTS_CORRUPT_VTAB;
pStmt = 0;
}else{
rc = SQLITE_OK;
@@ -356,7 +437,21 @@ int sqlite3Fts3SelectDoctotal(
Fts3Table *pTab, /* Fts3 table handle */
sqlite3_stmt **ppStmt /* OUT: Statement handle */
){
- return fts3SelectDocsize(pTab, SQL_SELECT_DOCTOTAL, 0, ppStmt);
+ sqlite3_stmt *pStmt = 0;
+ int rc;
+ rc = fts3SqlStmt(pTab, SQL_SELECT_STAT, &pStmt, 0);
+ if( rc==SQLITE_OK ){
+ sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL);
+ if( sqlite3_step(pStmt)!=SQLITE_ROW
+ || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB
+ ){
+ rc = sqlite3_reset(pStmt);
+ if( rc==SQLITE_OK ) rc = FTS_CORRUPT_VTAB;
+ pStmt = 0;
+ }
+ }
+ *ppStmt = pStmt;
+ return rc;
}
int sqlite3Fts3SelectDocsize(
@@ -364,7 +459,7 @@ int sqlite3Fts3SelectDocsize(
sqlite3_int64 iDocid, /* Docid to read size data for */
sqlite3_stmt **ppStmt /* OUT: Statement handle */
){
- return fts3SelectDocsize(pTab, SQL_SELECT_DOCSIZE, iDocid, ppStmt);
+ return fts3SelectDocsize(pTab, iDocid, ppStmt);
}
/*
@@ -409,21 +504,66 @@ static void fts3SqlExec(
** not what users expect when they get SQLITE_LOCKED_SHAREDCACHE. It can
** still happen if the user reads data directly from the %_segments or
** %_segdir tables instead of going through FTS3 though.
+**
+** This reasoning does not apply to a content=xxx table.
*/
int sqlite3Fts3ReadLock(Fts3Table *p){
int rc; /* Return code */
sqlite3_stmt *pStmt; /* Statement used to obtain lock */
- rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pStmt, 0);
- if( rc==SQLITE_OK ){
- sqlite3_bind_null(pStmt, 1);
- sqlite3_step(pStmt);
- rc = sqlite3_reset(pStmt);
+ if( p->zContentTbl==0 ){
+ rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pStmt, 0);
+ if( rc==SQLITE_OK ){
+ sqlite3_bind_null(pStmt, 1);
+ sqlite3_step(pStmt);
+ rc = sqlite3_reset(pStmt);
+ }
+ }else{
+ rc = SQLITE_OK;
}
+
return rc;
}
/*
+** FTS maintains a separate indexes for each language-id (a 32-bit integer).
+** Within each language id, a separate index is maintained to store the
+** document terms, and each configured prefix size (configured the FTS
+** "prefix=" option). And each index consists of multiple levels ("relative
+** levels").
+**
+** All three of these values (the language id, the specific index and the
+** level within the index) are encoded in 64-bit integer values stored
+** in the %_segdir table on disk. This function is used to convert three
+** separate component values into the single 64-bit integer value that
+** can be used to query the %_segdir table.
+**
+** Specifically, each language-id/index combination is allocated 1024
+** 64-bit integer level values ("absolute levels"). The main terms index
+** for language-id 0 is allocate values 0-1023. The first prefix index
+** (if any) for language-id 0 is allocated values 1024-2047. And so on.
+** Language 1 indexes are allocated immediately following language 0.
+**
+** So, for a system with nPrefix prefix indexes configured, the block of
+** absolute levels that corresponds to language-id iLangid and index
+** iIndex starts at absolute level ((iLangid * (nPrefix+1) + iIndex) * 1024).
+*/
+static sqlite3_int64 getAbsoluteLevel(
+ Fts3Table *p, /* FTS3 table handle */
+ int iLangid, /* Language id */
+ int iIndex, /* Index in p->aIndex[] */
+ int iLevel /* Level of segments */
+){
+ sqlite3_int64 iBase; /* First absolute level for iLangid/iIndex */
+ assert( iLangid>=0 );
+ assert( p->nIndex>0 );
+ assert( iIndex>=0 && iIndex<p->nIndex );
+
+ iBase = ((sqlite3_int64)iLangid * p->nIndex + iIndex) * FTS3_SEGDIR_MAXLEVEL;
+ return iBase + iLevel;
+}
+
+/*
** Set *ppStmt to a statement handle that may be used to iterate through
** all rows in the %_segdir table, from oldest to newest. If successful,
** return SQLITE_OK. If an error occurs while preparing the statement,
@@ -442,8 +582,9 @@ int sqlite3Fts3ReadLock(Fts3Table *p){
*/
int sqlite3Fts3AllSegdirs(
Fts3Table *p, /* FTS3 table */
+ int iLangid, /* Language being queried */
int iIndex, /* Index for p->aIndex[] */
- int iLevel, /* Level to select */
+ int iLevel, /* Level to select (relative level) */
sqlite3_stmt **ppStmt /* OUT: Compiled statement */
){
int rc;
@@ -457,14 +598,16 @@ int sqlite3Fts3AllSegdirs(
/* "SELECT * FROM %_segdir WHERE level BETWEEN ? AND ? ORDER BY ..." */
rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE, &pStmt, 0);
if( rc==SQLITE_OK ){
- sqlite3_bind_int(pStmt, 1, iIndex*FTS3_SEGDIR_MAXLEVEL);
- sqlite3_bind_int(pStmt, 2, (iIndex+1)*FTS3_SEGDIR_MAXLEVEL-1);
+ sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
+ sqlite3_bind_int64(pStmt, 2,
+ getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
+ );
}
}else{
/* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */
rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0);
if( rc==SQLITE_OK ){
- sqlite3_bind_int(pStmt, 1, iLevel+iIndex*FTS3_SEGDIR_MAXLEVEL);
+ sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex,iLevel));
}
}
*ppStmt = pStmt;
@@ -630,18 +773,19 @@ static int fts3PendingTermsAddOne(
*/
static int fts3PendingTermsAdd(
Fts3Table *p, /* Table into which text will be inserted */
+ int iLangid, /* Language id to use */
const char *zText, /* Text of document to be inserted */
int iCol, /* Column into which text is being inserted */
- u32 *pnWord /* OUT: Number of tokens inserted */
+ u32 *pnWord /* IN/OUT: Incr. by number tokens inserted */
){
int rc;
- int iStart;
- int iEnd;
- int iPos;
+ int iStart = 0;
+ int iEnd = 0;
+ int iPos = 0;
int nWord = 0;
char const *zToken;
- int nToken;
+ int nToken = 0;
sqlite3_tokenizer *pTokenizer = p->pTokenizer;
sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
@@ -659,11 +803,10 @@ static int fts3PendingTermsAdd(
return SQLITE_OK;
}
- rc = pModule->xOpen(pTokenizer, zText, -1, &pCsr);
+ rc = sqlite3Fts3OpenTokenizer(pTokenizer, iLangid, zText, -1, &pCsr);
if( rc!=SQLITE_OK ){
return rc;
}
- pCsr->pTokenizer = pTokenizer;
xNext = pModule->xNext;
while( SQLITE_OK==rc
@@ -697,7 +840,7 @@ static int fts3PendingTermsAdd(
}
pModule->xClose(pCsr);
- *pnWord = nWord;
+ *pnWord += nWord;
return (rc==SQLITE_DONE ? SQLITE_OK : rc);
}
@@ -706,18 +849,28 @@ static int fts3PendingTermsAdd(
** fts3PendingTermsAdd() are to add term/position-list pairs for the
** contents of the document with docid iDocid.
*/
-static int fts3PendingTermsDocid(Fts3Table *p, sqlite_int64 iDocid){
+static int fts3PendingTermsDocid(
+ Fts3Table *p, /* Full-text table handle */
+ int iLangid, /* Language id of row being written */
+ sqlite_int64 iDocid /* Docid of row being written */
+){
+ assert( iLangid>=0 );
+
/* TODO(shess) Explore whether partially flushing the buffer on
** forced-flush would provide better performance. I suspect that if
** we ordered the doclists by size and flushed the largest until the
** buffer was half empty, that would let the less frequent terms
** generate longer doclists.
*/
- if( iDocid<=p->iPrevDocid || p->nPendingData>p->nMaxPendingData ){
+ if( iDocid<=p->iPrevDocid
+ || p->iPrevLangid!=iLangid
+ || p->nPendingData>p->nMaxPendingData
+ ){
int rc = sqlite3Fts3PendingTermsFlush(p);
if( rc!=SQLITE_OK ) return rc;
}
p->iPrevDocid = iDocid;
+ p->iPrevLangid = iLangid;
return SQLITE_OK;
}
@@ -746,11 +899,16 @@ void sqlite3Fts3PendingTermsClear(Fts3Table *p){
** Argument apVal is the same as the similarly named argument passed to
** fts3InsertData(). Parameter iDocid is the docid of the new row.
*/
-static int fts3InsertTerms(Fts3Table *p, sqlite3_value **apVal, u32 *aSz){
+static int fts3InsertTerms(
+ Fts3Table *p,
+ int iLangid,
+ sqlite3_value **apVal,
+ u32 *aSz
+){
int i; /* Iterator variable */
for(i=2; i<p->nColumn+2; i++){
const char *zText = (const char *)sqlite3_value_text(apVal[i]);
- int rc = fts3PendingTermsAdd(p, zText, i-2, &aSz[i-2]);
+ int rc = fts3PendingTermsAdd(p, iLangid, zText, i-2, &aSz[i-2]);
if( rc!=SQLITE_OK ){
return rc;
}
@@ -771,6 +929,7 @@ static int fts3InsertTerms(Fts3Table *p, sqlite3_value **apVal, u32 *aSz){
** apVal[p->nColumn+1] Right-most user-defined column
** apVal[p->nColumn+2] Hidden column with same name as table
** apVal[p->nColumn+3] Hidden "docid" column (alias for rowid)
+** apVal[p->nColumn+4] Hidden languageid column
*/
static int fts3InsertData(
Fts3Table *p, /* Full-text table */
@@ -780,6 +939,18 @@ static int fts3InsertData(
int rc; /* Return code */
sqlite3_stmt *pContentInsert; /* INSERT INTO %_content VALUES(...) */
+ if( p->zContentTbl ){
+ sqlite3_value *pRowid = apVal[p->nColumn+3];
+ if( sqlite3_value_type(pRowid)==SQLITE_NULL ){
+ pRowid = apVal[1];
+ }
+ if( sqlite3_value_type(pRowid)!=SQLITE_INTEGER ){
+ return SQLITE_CONSTRAINT;
+ }
+ *piDocid = sqlite3_value_int64(pRowid);
+ return SQLITE_OK;
+ }
+
/* Locate the statement handle used to insert data into the %_content
** table. The SQL for this statement is:
**
@@ -789,9 +960,13 @@ static int fts3InsertData(
** defined columns in the FTS3 table, plus one for the docid field.
*/
rc = fts3SqlStmt(p, SQL_CONTENT_INSERT, &pContentInsert, &apVal[1]);
- if( rc!=SQLITE_OK ){
- return rc;
+ if( rc==SQLITE_OK && p->zLanguageid ){
+ rc = sqlite3_bind_int(
+ pContentInsert, p->nColumn+2,
+ sqlite3_value_int(apVal[p->nColumn+4])
+ );
}
+ if( rc!=SQLITE_OK ) return rc;
/* There is a quirk here. The users INSERT statement may have specified
** a value for the "rowid" field, for the "docid" field, or for both.
@@ -830,14 +1005,16 @@ static int fts3InsertData(
** Remove all data from the FTS3 table. Clear the hash table containing
** pending terms.
*/
-static int fts3DeleteAll(Fts3Table *p){
+static int fts3DeleteAll(Fts3Table *p, int bContent){
int rc = SQLITE_OK; /* Return code */
/* Discard the contents of the pending-terms hash table. */
sqlite3Fts3PendingTermsClear(p);
- /* Delete everything from the %_content, %_segments and %_segdir tables. */
- fts3SqlExec(&rc, p, SQL_DELETE_ALL_CONTENT, 0);
+ /* Delete everything from the shadow tables. Except, leave %_content as
+ ** is if bContent is false. */
+ assert( p->zContentTbl==0 || bContent==0 );
+ if( bContent ) fts3SqlExec(&rc, p, SQL_DELETE_ALL_CONTENT, 0);
fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGMENTS, 0);
fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0);
if( p->bHasDocsize ){
@@ -850,6 +1027,15 @@ static int fts3DeleteAll(Fts3Table *p){
}
/*
+**
+*/
+static int langidFromSelect(Fts3Table *p, sqlite3_stmt *pSelect){
+ int iLangid = 0;
+ if( p->zLanguageid ) iLangid = sqlite3_column_int(pSelect, p->nColumn+1);
+ return iLangid;
+}
+
+/*
** The first element in the apVal[] array is assumed to contain the docid
** (an integer) of a row about to be deleted. Remove all terms from the
** full-text index.
@@ -858,26 +1044,31 @@ static void fts3DeleteTerms(
int *pRC, /* Result code */
Fts3Table *p, /* The FTS table to delete from */
sqlite3_value *pRowid, /* The docid to be deleted */
- u32 *aSz /* Sizes of deleted document written here */
+ u32 *aSz, /* Sizes of deleted document written here */
+ int *pbFound /* OUT: Set to true if row really does exist */
){
int rc;
sqlite3_stmt *pSelect;
+ assert( *pbFound==0 );
if( *pRC ) return;
rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid);
if( rc==SQLITE_OK ){
if( SQLITE_ROW==sqlite3_step(pSelect) ){
int i;
- for(i=1; i<=p->nColumn; i++){
+ int iLangid = langidFromSelect(p, pSelect);
+ rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pSelect, 0));
+ for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){
const char *zText = (const char *)sqlite3_column_text(pSelect, i);
- rc = fts3PendingTermsAdd(p, zText, -1, &aSz[i-1]);
- if( rc!=SQLITE_OK ){
- sqlite3_reset(pSelect);
- *pRC = rc;
- return;
- }
+ rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[i-1]);
aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i);
}
+ if( rc!=SQLITE_OK ){
+ sqlite3_reset(pSelect);
+ *pRC = rc;
+ return;
+ }
+ *pbFound = 1;
}
rc = sqlite3_reset(pSelect);
}else{
@@ -890,7 +1081,7 @@ static void fts3DeleteTerms(
** Forward declaration to account for the circular dependency between
** functions fts3SegmentMerge() and fts3AllocateSegdirIdx().
*/
-static int fts3SegmentMerge(Fts3Table *, int, int);
+static int fts3SegmentMerge(Fts3Table *, int, int, int);
/*
** This function allocates a new level iLevel index in the segdir table.
@@ -909,6 +1100,7 @@ static int fts3SegmentMerge(Fts3Table *, int, int);
*/
static int fts3AllocateSegdirIdx(
Fts3Table *p,
+ int iLangid, /* Language id */
int iIndex, /* Index for p->aIndex */
int iLevel,
int *piIdx
@@ -917,10 +1109,15 @@ static int fts3AllocateSegdirIdx(
sqlite3_stmt *pNextIdx; /* Query for next idx at level iLevel */
int iNext = 0; /* Result of query pNextIdx */
+ assert( iLangid>=0 );
+ assert( p->nIndex>=1 );
+
/* Set variable iNext to the next available segdir index at level iLevel. */
rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0);
if( rc==SQLITE_OK ){
- sqlite3_bind_int(pNextIdx, 1, iIndex*FTS3_SEGDIR_MAXLEVEL + iLevel);
+ sqlite3_bind_int64(
+ pNextIdx, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel)
+ );
if( SQLITE_ROW==sqlite3_step(pNextIdx) ){
iNext = sqlite3_column_int(pNextIdx, 0);
}
@@ -934,7 +1131,8 @@ static int fts3AllocateSegdirIdx(
** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext.
*/
if( iNext>=FTS3_MERGE_COUNT ){
- rc = fts3SegmentMerge(p, iIndex, iLevel);
+ fts3LogMerge(16, getAbsoluteLevel(p, iLangid, iIndex, iLevel));
+ rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel);
*piIdx = 0;
}else{
*piIdx = iNext;
@@ -981,7 +1179,7 @@ int sqlite3Fts3ReadBlock(
int rc; /* Return code */
/* pnBlob must be non-NULL. paBlob may be NULL or non-NULL. */
- assert( pnBlob);
+ assert( pnBlob );
if( p->pSegments ){
rc = sqlite3_blob_reopen(p->pSegments, iBlockid);
@@ -1068,6 +1266,18 @@ static int fts3SegReaderRequire(Fts3SegReader *pReader, char *pFrom, int nByte){
}
/*
+** Set an Fts3SegReader cursor to point at EOF.
+*/
+static void fts3SegReaderSetEof(Fts3SegReader *pSeg){
+ if( !fts3SegReaderIsRootOnly(pSeg) ){
+ sqlite3_free(pSeg->aNode);
+ sqlite3_blob_close(pSeg->pBlob);
+ pSeg->pBlob = 0;
+ }
+ pSeg->aNode = 0;
+}
+
+/*
** Move the iterator passed as the first argument to the next term in the
** segment. If successful, SQLITE_OK is returned. If there is no next term,
** SQLITE_DONE. Otherwise, an SQLite error code.
@@ -1106,12 +1316,7 @@ static int fts3SegReaderNext(
return SQLITE_OK;
}
- if( !fts3SegReaderIsRootOnly(pReader) ){
- sqlite3_free(pReader->aNode);
- sqlite3_blob_close(pReader->pBlob);
- pReader->pBlob = 0;
- }
- pReader->aNode = 0;
+ fts3SegReaderSetEof(pReader);
/* If iCurrentBlock>=iLeafEndBlock, this is an EOF condition. All leaf
** blocks have already been traversed. */
@@ -1145,7 +1350,7 @@ static int fts3SegReaderNext(
if( nPrefix<0 || nSuffix<=0
|| &pNext[nSuffix]>&pReader->aNode[pReader->nNode]
){
- return SQLITE_CORRUPT_VTAB;
+ return FTS_CORRUPT_VTAB;
}
if( nPrefix+nSuffix>pReader->nTermAlloc ){
@@ -1175,7 +1380,7 @@ static int fts3SegReaderNext(
if( &pReader->aDoclist[pReader->nDoclist]>&pReader->aNode[pReader->nNode]
|| (pReader->nPopulate==0 && pReader->aDoclist[pReader->nDoclist-1])
){
- return SQLITE_CORRUPT_VTAB;
+ return FTS_CORRUPT_VTAB;
}
return SQLITE_OK;
}
@@ -1315,7 +1520,7 @@ int sqlite3Fts3MsrOvfl(
int rc = SQLITE_OK;
int pgsz = p->nPgsz;
- assert( p->bHasStat );
+ assert( p->bFts4 );
assert( pgsz>0 );
for(ii=0; rc==SQLITE_OK && ii<pMsr->nSegment; ii++){
@@ -1358,6 +1563,7 @@ void sqlite3Fts3SegReaderFree(Fts3SegReader *pReader){
*/
int sqlite3Fts3SegReaderNew(
int iAge, /* Segment "age". */
+ int bLookup, /* True for a lookup only */
sqlite3_int64 iStartLeaf, /* First leaf to traverse */
sqlite3_int64 iEndLeaf, /* Final leaf to traverse */
sqlite3_int64 iEndBlock, /* Final block of segment */
@@ -1365,7 +1571,6 @@ int sqlite3Fts3SegReaderNew(
int nRoot, /* Size of buffer containing root node */
Fts3SegReader **ppReader /* OUT: Allocated Fts3SegReader */
){
- int rc = SQLITE_OK; /* Return code */
Fts3SegReader *pReader; /* Newly allocated SegReader object */
int nExtra = 0; /* Bytes to allocate segment root node */
@@ -1380,6 +1585,7 @@ int sqlite3Fts3SegReaderNew(
}
memset(pReader, 0, sizeof(Fts3SegReader));
pReader->iIdx = iAge;
+ pReader->bLookup = bLookup!=0;
pReader->iStartBlock = iStartLeaf;
pReader->iLeafEndBlock = iEndLeaf;
pReader->iEndBlock = iEndBlock;
@@ -1387,19 +1593,15 @@ int sqlite3Fts3SegReaderNew(
if( nExtra ){
/* The entire segment is stored in the root node. */
pReader->aNode = (char *)&pReader[1];
+ pReader->rootOnly = 1;
pReader->nNode = nRoot;
memcpy(pReader->aNode, zRoot, nRoot);
memset(&pReader->aNode[nRoot], 0, FTS3_NODE_PADDING);
}else{
pReader->iCurrentBlock = iStartLeaf-1;
}
-
- if( rc==SQLITE_OK ){
- *ppReader = pReader;
- }else{
- sqlite3Fts3SegReaderFree(pReader);
- }
- return rc;
+ *ppReader = pReader;
+ return SQLITE_OK;
}
/*
@@ -1449,6 +1651,7 @@ int sqlite3Fts3SegReaderPending(
Fts3SegReader **ppReader /* OUT: SegReader for pending-terms */
){
Fts3SegReader *pReader = 0; /* Fts3SegReader object to return */
+ Fts3HashElem *pE; /* Iterator variable */
Fts3HashElem **aElem = 0; /* Array of term hash entries to scan */
int nElem = 0; /* Size of array at aElem */
int rc = SQLITE_OK; /* Return Code */
@@ -1457,7 +1660,6 @@ int sqlite3Fts3SegReaderPending(
pHash = &p->aIndex[iIndex].hPending;
if( bPrefix ){
int nAlloc = 0; /* Size of allocated array at aElem */
- Fts3HashElem *pE = 0; /* Iterator variable */
for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){
char *zKey = (char *)fts3HashKey(pE);
@@ -1491,8 +1693,13 @@ int sqlite3Fts3SegReaderPending(
}else{
/* The query is a simple term lookup that matches at most one term in
- ** the index. All that is required is a straight hash-lookup. */
- Fts3HashElem *pE = fts3HashFindElem(pHash, zTerm, nTerm);
+ ** the index. All that is required is a straight hash-lookup.
+ **
+ ** Because the stack address of pE may be accessed via the aElem pointer
+ ** below, the "Fts3HashElem *pE" must be declared so that it is valid
+ ** within this entire function, not just this "else{...}" block.
+ */
+ pE = fts3HashFindElem(pHash, zTerm, nTerm);
if( pE ){
aElem = &pE;
nElem = 1;
@@ -1672,12 +1879,33 @@ static int fts3WriteSegment(
return rc;
}
+/*
+** Find the largest relative level number in the table. If successful, set
+** *pnMax to this value and return SQLITE_OK. Otherwise, if an error occurs,
+** set *pnMax to zero and return an SQLite error code.
+*/
+int sqlite3Fts3MaxLevel(Fts3Table *p, int *pnMax){
+ int rc;
+ int mxLevel = 0;
+ sqlite3_stmt *pStmt = 0;
+
+ rc = fts3SqlStmt(p, SQL_SELECT_MXLEVEL, &pStmt, 0);
+ if( rc==SQLITE_OK ){
+ if( SQLITE_ROW==sqlite3_step(pStmt) ){
+ mxLevel = sqlite3_column_int(pStmt, 0);
+ }
+ rc = sqlite3_reset(pStmt);
+ }
+ *pnMax = mxLevel;
+ return rc;
+}
+
/*
** Insert a record into the %_segdir table.
*/
static int fts3WriteSegdir(
Fts3Table *p, /* Virtual table handle */
- int iLevel, /* Value for "level" field */
+ sqlite3_int64 iLevel, /* Value for "level" field (absolute level) */
int iIdx, /* Value for "idx" field */
sqlite3_int64 iStartBlock, /* Value for "start_block" field */
sqlite3_int64 iLeafEndBlock, /* Value for "leaves_end_block" field */
@@ -1688,7 +1916,7 @@ static int fts3WriteSegdir(
sqlite3_stmt *pStmt;
int rc = fts3SqlStmt(p, SQL_INSERT_SEGDIR, &pStmt, 0);
if( rc==SQLITE_OK ){
- sqlite3_bind_int(pStmt, 1, iLevel);
+ sqlite3_bind_int64(pStmt, 1, iLevel);
sqlite3_bind_int(pStmt, 2, iIdx);
sqlite3_bind_int64(pStmt, 3, iStartBlock);
sqlite3_bind_int64(pStmt, 4, iLeafEndBlock);
@@ -1988,6 +2216,7 @@ static int fts3SegWriterAdd(
/* The current leaf node is full. Write it out to the database. */
rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, nData);
if( rc!=SQLITE_OK ) return rc;
+ p->nLeafAdd++;
/* Add the current term to the interior node tree. The term added to
** the interior tree must:
@@ -2071,7 +2300,7 @@ static int fts3SegWriterAdd(
static int fts3SegWriterFlush(
Fts3Table *p, /* Virtual table handle */
SegmentWriter *pWriter, /* SegmentWriter to flush to the db */
- int iLevel, /* Value for 'level' column of %_segdir */
+ sqlite3_int64 iLevel, /* Value for 'level' column of %_segdir */
int iIdx /* Value for 'idx' column of %_segdir */
){
int rc; /* Return code */
@@ -2096,6 +2325,7 @@ static int fts3SegWriterFlush(
rc = fts3WriteSegdir(
p, iLevel, iIdx, 0, 0, 0, pWriter->aData, pWriter->nData);
}
+ p->nLeafAdd++;
return rc;
}
@@ -2125,12 +2355,18 @@ static void fts3SegWriterFree(SegmentWriter *pWriter){
static int fts3IsEmpty(Fts3Table *p, sqlite3_value *pRowid, int *pisEmpty){
sqlite3_stmt *pStmt;
int rc;
- rc = fts3SqlStmt(p, SQL_IS_EMPTY, &pStmt, &pRowid);
- if( rc==SQLITE_OK ){
- if( SQLITE_ROW==sqlite3_step(pStmt) ){
- *pisEmpty = sqlite3_column_int(pStmt, 0);
+ if( p->zContentTbl ){
+ /* If using the content=xxx option, assume the table is never empty */
+ *pisEmpty = 0;
+ rc = SQLITE_OK;
+ }else{
+ rc = fts3SqlStmt(p, SQL_IS_EMPTY, &pStmt, &pRowid);
+ if( rc==SQLITE_OK ){
+ if( SQLITE_ROW==sqlite3_step(pStmt) ){
+ *pisEmpty = sqlite3_column_int(pStmt, 0);
+ }
+ rc = sqlite3_reset(pStmt);
}
- rc = sqlite3_reset(pStmt);
}
return rc;
}
@@ -2143,7 +2379,12 @@ static int fts3IsEmpty(Fts3Table *p, sqlite3_value *pRowid, int *pisEmpty){
**
** Return SQLITE_OK if successful, or an SQLite error code if not.
*/
-static int fts3SegmentMaxLevel(Fts3Table *p, int iIndex, int *pnMax){
+static int fts3SegmentMaxLevel(
+ Fts3Table *p,
+ int iLangid,
+ int iIndex,
+ sqlite3_int64 *pnMax
+){
sqlite3_stmt *pStmt;
int rc;
assert( iIndex>=0 && iIndex<p->nIndex );
@@ -2156,15 +2397,40 @@ static int fts3SegmentMaxLevel(Fts3Table *p, int iIndex, int *pnMax){
*/
rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0);
if( rc!=SQLITE_OK ) return rc;
- sqlite3_bind_int(pStmt, 1, iIndex*FTS3_SEGDIR_MAXLEVEL);
- sqlite3_bind_int(pStmt, 2, (iIndex+1)*FTS3_SEGDIR_MAXLEVEL - 1);
+ sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
+ sqlite3_bind_int64(pStmt, 2,
+ getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
+ );
if( SQLITE_ROW==sqlite3_step(pStmt) ){
- *pnMax = sqlite3_column_int(pStmt, 0);
+ *pnMax = sqlite3_column_int64(pStmt, 0);
}
return sqlite3_reset(pStmt);
}
/*
+** Delete all entries in the %_segments table associated with the segment
+** opened with seg-reader pSeg. This function does not affect the contents
+** of the %_segdir table.
+*/
+static int fts3DeleteSegment(
+ Fts3Table *p, /* FTS table handle */
+ Fts3SegReader *pSeg /* Segment to delete */
+){
+ int rc = SQLITE_OK; /* Return code */
+ if( pSeg->iStartBlock ){
+ sqlite3_stmt *pDelete; /* SQL statement to delete rows */
+ rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDelete, 0);
+ if( rc==SQLITE_OK ){
+ sqlite3_bind_int64(pDelete, 1, pSeg->iStartBlock);
+ sqlite3_bind_int64(pDelete, 2, pSeg->iEndBlock);
+ sqlite3_step(pDelete);
+ rc = sqlite3_reset(pDelete);
+ }
+ }
+ return rc;
+}
+
+/*
** This function is used after merging multiple segments into a single large
** segment to delete the old, now redundant, segment b-trees. Specifically,
** it:
@@ -2180,24 +2446,18 @@ static int fts3SegmentMaxLevel(Fts3Table *p, int iIndex, int *pnMax){
*/
static int fts3DeleteSegdir(
Fts3Table *p, /* Virtual table handle */
+ int iLangid, /* Language id */
int iIndex, /* Index for p->aIndex */
int iLevel, /* Level of %_segdir entries to delete */
Fts3SegReader **apSegment, /* Array of SegReader objects */
int nReader /* Size of array apSegment */
){
- int rc; /* Return Code */
+ int rc = SQLITE_OK; /* Return Code */
int i; /* Iterator variable */
- sqlite3_stmt *pDelete; /* SQL statement to delete rows */
+ sqlite3_stmt *pDelete = 0; /* SQL statement to delete rows */
- rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDelete, 0);
for(i=0; rc==SQLITE_OK && i<nReader; i++){
- Fts3SegReader *pSegment = apSegment[i];
- if( pSegment->iStartBlock ){
- sqlite3_bind_int64(pDelete, 1, pSegment->iStartBlock);
- sqlite3_bind_int64(pDelete, 2, pSegment->iEndBlock);
- sqlite3_step(pDelete);
- rc = sqlite3_reset(pDelete);
- }
+ rc = fts3DeleteSegment(p, apSegment[i]);
}
if( rc!=SQLITE_OK ){
return rc;
@@ -2207,13 +2467,17 @@ static int fts3DeleteSegdir(
if( iLevel==FTS3_SEGCURSOR_ALL ){
rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_RANGE, &pDelete, 0);
if( rc==SQLITE_OK ){
- sqlite3_bind_int(pDelete, 1, iIndex*FTS3_SEGDIR_MAXLEVEL);
- sqlite3_bind_int(pDelete, 2, (iIndex+1) * FTS3_SEGDIR_MAXLEVEL - 1);
+ sqlite3_bind_int64(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, 0));
+ sqlite3_bind_int64(pDelete, 2,
+ getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1)
+ );
}
}else{
rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pDelete, 0);
if( rc==SQLITE_OK ){
- sqlite3_bind_int(pDelete, 1, iIndex*FTS3_SEGDIR_MAXLEVEL + iLevel);
+ sqlite3_bind_int64(
+ pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel)
+ );
}
}
@@ -2376,11 +2640,16 @@ static int fts3SegReaderStart(
** b-tree leaf nodes contain more than one term.
*/
for(i=0; pCsr->bRestart==0 && i<pCsr->nSegment; i++){
+ int res = 0;
Fts3SegReader *pSeg = pCsr->apSegment[i];
do {
int rc = fts3SegReaderNext(p, pSeg, 0);
if( rc!=SQLITE_OK ) return rc;
- }while( zTerm && fts3SegReaderTermCmp(pSeg, zTerm, nTerm)<0 );
+ }while( zTerm && (res = fts3SegReaderTermCmp(pSeg, zTerm, nTerm))<0 );
+
+ if( pSeg->bLookup && res!=0 ){
+ fts3SegReaderSetEof(pSeg);
+ }
}
fts3SegReaderSort(pCsr->apSegment, nSeg, nSeg, fts3SegReaderCmp);
@@ -2482,6 +2751,7 @@ int sqlite3Fts3SegReaderStep(
int isColFilter = (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER);
int isPrefix = (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX);
int isScan = (pCsr->pFilter->flags & FTS3_SEGMENT_SCAN);
+ int isFirst = (pCsr->pFilter->flags & FTS3_SEGMENT_FIRST);
Fts3SegReader **apSegment = pCsr->apSegment;
int nSegment = pCsr->nSegment;
@@ -2500,7 +2770,12 @@ int sqlite3Fts3SegReaderStep(
** forward. Then sort the list in order of current term again.
*/
for(i=0; i<pCsr->nAdvance; i++){
- rc = fts3SegReaderNext(p, apSegment[i], 0);
+ Fts3SegReader *pSeg = apSegment[i];
+ if( pSeg->bLookup ){
+ fts3SegReaderSetEof(pSeg);
+ }else{
+ rc = fts3SegReaderNext(p, pSeg, 0);
+ }
if( rc!=SQLITE_OK ) return rc;
}
fts3SegReaderSort(apSegment, nSegment, pCsr->nAdvance, fts3SegReaderCmp);
@@ -2541,6 +2816,7 @@ int sqlite3Fts3SegReaderStep(
assert( isIgnoreEmpty || (isRequirePos && !isColFilter) );
if( nMerge==1
&& !isIgnoreEmpty
+ && !isFirst
&& (p->bDescIdx==0 || fts3SegReaderIsPending(apSegment[0])==0)
){
pCsr->nDoclist = apSegment[0]->nDoclist;
@@ -2606,12 +2882,24 @@ int sqlite3Fts3SegReaderStep(
}
pCsr->aBuffer = aNew;
}
- nDoclist += sqlite3Fts3PutVarint(&pCsr->aBuffer[nDoclist], iDelta);
- iPrev = iDocid;
- if( isRequirePos ){
- memcpy(&pCsr->aBuffer[nDoclist], pList, nList);
- nDoclist += nList;
- pCsr->aBuffer[nDoclist++] = '\0';
+
+ if( isFirst ){
+ char *a = &pCsr->aBuffer[nDoclist];
+ int nWrite;
+
+ nWrite = sqlite3Fts3FirstFilter(iDelta, pList, nList, a);
+ if( nWrite ){
+ iPrev = iDocid;
+ nDoclist += nWrite;
+ }
+ }else{
+ nDoclist += sqlite3Fts3PutVarint(&pCsr->aBuffer[nDoclist], iDelta);
+ iPrev = iDocid;
+ if( isRequirePos ){
+ memcpy(&pCsr->aBuffer[nDoclist], pList, nList);
+ nDoclist += nList;
+ pCsr->aBuffer[nDoclist++] = '\0';
+ }
}
}
@@ -2658,13 +2946,18 @@ void sqlite3Fts3SegReaderFinish(
** Otherwise, if successful, SQLITE_OK is returned. If an error occurs,
** an SQLite error code is returned.
*/
-static int fts3SegmentMerge(Fts3Table *p, int iIndex, int iLevel){
+static int fts3SegmentMerge(
+ Fts3Table *p,
+ int iLangid, /* Language id to merge */
+ int iIndex, /* Index in p->aIndex[] to merge */
+ int iLevel /* Level to merge */
+){
int rc; /* Return code */
int iIdx = 0; /* Index of new segment */
- int iNewLevel = 0; /* Level/index to create new segment at */
+ sqlite3_int64 iNewLevel = 0; /* Level/index to create new segment at */
SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */
Fts3SegFilter filter; /* Segment term filter condition */
- Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */
+ Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */
int bIgnoreEmpty = 0; /* True to ignore empty segments */
assert( iLevel==FTS3_SEGCURSOR_ALL
@@ -2674,36 +2967,36 @@ static int fts3SegmentMerge(Fts3Table *p, int iIndex, int iLevel){
assert( iLevel<FTS3_SEGDIR_MAXLEVEL );
assert( iIndex>=0 && iIndex<p->nIndex );
- rc = sqlite3Fts3SegReaderCursor(p, iIndex, iLevel, 0, 0, 1, 0, &csr);
+ rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr);
if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished;
if( iLevel==FTS3_SEGCURSOR_ALL ){
/* This call is to merge all segments in the database to a single
- ** segment. The level of the new segment is equal to the the numerically
+ ** segment. The level of the new segment is equal to the numerically
** greatest segment level currently present in the database for this
** index. The idx of the new segment is always 0. */
if( csr.nSegment==1 ){
rc = SQLITE_DONE;
goto finished;
}
- rc = fts3SegmentMaxLevel(p, iIndex, &iNewLevel);
+ rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iNewLevel);
bIgnoreEmpty = 1;
}else if( iLevel==FTS3_SEGCURSOR_PENDING ){
- iNewLevel = iIndex * FTS3_SEGDIR_MAXLEVEL;
- rc = fts3AllocateSegdirIdx(p, iIndex, 0, &iIdx);
+ iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, 0);
+ rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, 0, &iIdx);
}else{
/* This call is to merge all segments at level iLevel. find the next
** available segment index at level iLevel+1. The call to
** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to
** a single iLevel+2 segment if necessary. */
- rc = fts3AllocateSegdirIdx(p, iIndex, iLevel+1, &iIdx);
- iNewLevel = iIndex * FTS3_SEGDIR_MAXLEVEL + iLevel+1;
+ rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx);
+ iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1);
}
if( rc!=SQLITE_OK ) goto finished;
assert( csr.nSegment>0 );
- assert( iNewLevel>=(iIndex*FTS3_SEGDIR_MAXLEVEL) );
- assert( iNewLevel<((iIndex+1)*FTS3_SEGDIR_MAXLEVEL) );
+ assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) );
+ assert( iNewLevel<getAbsoluteLevel(p, iLangid, iIndex,FTS3_SEGDIR_MAXLEVEL) );
memset(&filter, 0, sizeof(Fts3SegFilter));
filter.flags = FTS3_SEGMENT_REQUIRE_POS;
@@ -2720,7 +3013,9 @@ static int fts3SegmentMerge(Fts3Table *p, int iIndex, int iLevel){
assert( pWriter );
if( iLevel!=FTS3_SEGCURSOR_PENDING ){
- rc = fts3DeleteSegdir(p, iIndex, iLevel, csr.apSegment, csr.nSegment);
+ rc = fts3DeleteSegdir(
+ p, iLangid, iIndex, iLevel, csr.apSegment, csr.nSegment
+ );
if( rc!=SQLITE_OK ) goto finished;
}
rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx);
@@ -2738,11 +3033,28 @@ static int fts3SegmentMerge(Fts3Table *p, int iIndex, int iLevel){
int sqlite3Fts3PendingTermsFlush(Fts3Table *p){
int rc = SQLITE_OK;
int i;
+
for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
- rc = fts3SegmentMerge(p, i, FTS3_SEGCURSOR_PENDING);
+ rc = fts3SegmentMerge(p, p->iPrevLangid, i, FTS3_SEGCURSOR_PENDING);
if( rc==SQLITE_DONE ) rc = SQLITE_OK;
}
sqlite3Fts3PendingTermsClear(p);
+
+ /* Determine the auto-incr-merge setting if unknown. If enabled,
+ ** estimate the number of leaf blocks of content to be written
+ */
+ if( rc==SQLITE_OK && p->bHasStat
+ && p->bAutoincrmerge==0xff && p->nLeafAdd>0
+ ){
+ sqlite3_stmt *pStmt = 0;
+ rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0);
+ if( rc==SQLITE_OK ){
+ sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE);
+ rc = sqlite3_step(pStmt);
+ p->bAutoincrmerge = (rc==SQLITE_ROW && sqlite3_column_int(pStmt, 0));
+ rc = sqlite3_reset(pStmt);
+ }
+ }
return rc;
}
@@ -2787,9 +3099,9 @@ static void fts3DecodeIntArray(
** a blob of varints.
*/
static void fts3InsertDocsize(
- int *pRC, /* Result code */
- Fts3Table *p, /* Table into which to insert */
- u32 *aSz /* Sizes of each column */
+ int *pRC, /* Result code */
+ Fts3Table *p, /* Table into which to insert */
+ u32 *aSz /* Sizes of each column, in tokens */
){
char *pBlob; /* The BLOB encoding of the document size */
int nBlob; /* Number of bytes in the BLOB */
@@ -2853,12 +3165,13 @@ static void fts3UpdateDocTotals(
return;
}
pBlob = (char*)&a[nStat];
- rc = fts3SqlStmt(p, SQL_SELECT_DOCTOTAL, &pStmt, 0);
+ rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0);
if( rc ){
sqlite3_free(a);
*pRC = rc;
return;
}
+ sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL);
if( sqlite3_step(pStmt)==SQLITE_ROW ){
fts3DecodeIntArray(nStat, a,
sqlite3_column_blob(pStmt, 0),
@@ -2866,7 +3179,12 @@ static void fts3UpdateDocTotals(
}else{
memset(a, 0, sizeof(u32)*(nStat) );
}
- sqlite3_reset(pStmt);
+ rc = sqlite3_reset(pStmt);
+ if( rc!=SQLITE_OK ){
+ sqlite3_free(a);
+ *pRC = rc;
+ return;
+ }
if( nChng<0 && a[0]<(u32)(-nChng) ){
a[0] = 0;
}else{
@@ -2882,29 +3200,47 @@ static void fts3UpdateDocTotals(
a[i+1] = x;
}
fts3EncodeIntArray(nStat, a, pBlob, &nBlob);
- rc = fts3SqlStmt(p, SQL_REPLACE_DOCTOTAL, &pStmt, 0);
+ rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0);
if( rc ){
sqlite3_free(a);
*pRC = rc;
return;
}
- sqlite3_bind_blob(pStmt, 1, pBlob, nBlob, SQLITE_STATIC);
+ sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL);
+ sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, SQLITE_STATIC);
sqlite3_step(pStmt);
*pRC = sqlite3_reset(pStmt);
sqlite3_free(a);
}
+/*
+** Merge the entire database so that there is one segment for each
+** iIndex/iLangid combination.
+*/
static int fts3DoOptimize(Fts3Table *p, int bReturnDone){
- int i;
int bSeenDone = 0;
- int rc = SQLITE_OK;
- for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
- rc = fts3SegmentMerge(p, i, FTS3_SEGCURSOR_ALL);
- if( rc==SQLITE_DONE ){
- bSeenDone = 1;
- rc = SQLITE_OK;
+ int rc;
+ sqlite3_stmt *pAllLangid = 0;
+
+ rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
+ if( rc==SQLITE_OK ){
+ int rc2;
+ sqlite3_bind_int(pAllLangid, 1, p->nIndex);
+ while( sqlite3_step(pAllLangid)==SQLITE_ROW ){
+ int i;
+ int iLangid = sqlite3_column_int(pAllLangid, 0);
+ for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){
+ rc = fts3SegmentMerge(p, iLangid, i, FTS3_SEGCURSOR_ALL);
+ if( rc==SQLITE_DONE ){
+ bSeenDone = 1;
+ rc = SQLITE_OK;
+ }
+ }
}
+ rc2 = sqlite3_reset(pAllLangid);
+ if( rc==SQLITE_OK ) rc = rc2;
}
+
sqlite3Fts3SegmentsClose(p);
sqlite3Fts3PendingTermsClear(p);
@@ -2912,6 +3248,1768 @@ static int fts3DoOptimize(Fts3Table *p, int bReturnDone){
}
/*
+** This function is called when the user executes the following statement:
+**
+** INSERT INTO <tbl>(<tbl>) VALUES('rebuild');
+**
+** The entire FTS index is discarded and rebuilt. If the table is one
+** created using the content=xxx option, then the new index is based on
+** the current contents of the xxx table. Otherwise, it is rebuilt based
+** on the contents of the %_content table.
+*/
+static int fts3DoRebuild(Fts3Table *p){
+ int rc; /* Return Code */
+
+ rc = fts3DeleteAll(p, 0);
+ if( rc==SQLITE_OK ){
+ u32 *aSz = 0;
+ u32 *aSzIns = 0;
+ u32 *aSzDel = 0;
+ sqlite3_stmt *pStmt = 0;
+ int nEntry = 0;
+
+ /* Compose and prepare an SQL statement to loop through the content table */
+ char *zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist);
+ if( !zSql ){
+ rc = SQLITE_NOMEM;
+ }else{
+ rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
+ sqlite3_free(zSql);
+ }
+
+ if( rc==SQLITE_OK ){
+ int nByte = sizeof(u32) * (p->nColumn+1)*3;
+ aSz = (u32 *)sqlite3_malloc(nByte);
+ if( aSz==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ memset(aSz, 0, nByte);
+ aSzIns = &aSz[p->nColumn+1];
+ aSzDel = &aSzIns[p->nColumn+1];
+ }
+ }
+
+ while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
+ int iCol;
+ int iLangid = langidFromSelect(p, pStmt);
+ rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pStmt, 0));
+ memset(aSz, 0, sizeof(aSz[0]) * (p->nColumn+1));
+ for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
+ const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1);
+ rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]);
+ aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1);
+ }
+ if( p->bHasDocsize ){
+ fts3InsertDocsize(&rc, p, aSz);
+ }
+ if( rc!=SQLITE_OK ){
+ sqlite3_finalize(pStmt);
+ pStmt = 0;
+ }else{
+ nEntry++;
+ for(iCol=0; iCol<=p->nColumn; iCol++){
+ aSzIns[iCol] += aSz[iCol];
+ }
+ }
+ }
+ if( p->bFts4 ){
+ fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nEntry);
+ }
+ sqlite3_free(aSz);
+
+ if( pStmt ){
+ int rc2 = sqlite3_finalize(pStmt);
+ if( rc==SQLITE_OK ){
+ rc = rc2;
+ }
+ }
+ }
+
+ return rc;
+}
+
+
+/*
+** This function opens a cursor used to read the input data for an
+** incremental merge operation. Specifically, it opens a cursor to scan
+** the oldest nSeg segments (idx=0 through idx=(nSeg-1)) in absolute
+** level iAbsLevel.
+*/
+static int fts3IncrmergeCsr(
+ Fts3Table *p, /* FTS3 table handle */
+ sqlite3_int64 iAbsLevel, /* Absolute level to open */
+ int nSeg, /* Number of segments to merge */
+ Fts3MultiSegReader *pCsr /* Cursor object to populate */
+){
+ int rc; /* Return Code */
+ sqlite3_stmt *pStmt = 0; /* Statement used to read %_segdir entry */
+ int nByte; /* Bytes allocated at pCsr->apSegment[] */
+
+ /* Allocate space for the Fts3MultiSegReader.aCsr[] array */
+ memset(pCsr, 0, sizeof(*pCsr));
+ nByte = sizeof(Fts3SegReader *) * nSeg;
+ pCsr->apSegment = (Fts3SegReader **)sqlite3_malloc(nByte);
+
+ if( pCsr->apSegment==0 ){
+ rc = SQLITE_NOMEM;
+ }else{
+ memset(pCsr->apSegment, 0, nByte);
+ rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0);
+ }
+ if( rc==SQLITE_OK ){
+ int i;
+ int rc2;
+ sqlite3_bind_int64(pStmt, 1, iAbsLevel);
+ assert( pCsr->nSegment==0 );
+ for(i=0; rc==SQLITE_OK && sqlite3_step(pStmt)==SQLITE_ROW && i<nSeg; i++){
+ rc = sqlite3Fts3SegReaderNew(i, 0,
+ sqlite3_column_int64(pStmt, 1), /* segdir.start_block */
+ sqlite3_column_int64(pStmt, 2), /* segdir.leaves_end_block */
+ sqlite3_column_int64(pStmt, 3), /* segdir.end_block */
+ sqlite3_column_blob(pStmt, 4), /* segdir.root */
+ sqlite3_column_bytes(pStmt, 4), /* segdir.root */
+ &pCsr->apSegment[i]
+ );
+ pCsr->nSegment++;
+ }
+ rc2 = sqlite3_reset(pStmt);
+ if( rc==SQLITE_OK ) rc = rc2;
+ }
+
+ return rc;
+}
+
+typedef struct IncrmergeWriter IncrmergeWriter;
+typedef struct NodeWriter NodeWriter;
+typedef struct Blob Blob;
+typedef struct NodeReader NodeReader;
+
+/*
+** An instance of the following structure is used as a dynamic buffer
+** to build up nodes or other blobs of data in.
+**
+** The function blobGrowBuffer() is used to extend the allocation.
+*/
+struct Blob {
+ char *a; /* Pointer to allocation */
+ int n; /* Number of valid bytes of data in a[] */
+ int nAlloc; /* Allocated size of a[] (nAlloc>=n) */
+};
+
+/*
+** This structure is used to build up buffers containing segment b-tree
+** nodes (blocks).
+*/
+struct NodeWriter {
+ sqlite3_int64 iBlock; /* Current block id */
+ Blob key; /* Last key written to the current block */
+ Blob block; /* Current block image */
+};
+
+/*
+** An object of this type contains the state required to create or append
+** to an appendable b-tree segment.
+*/
+struct IncrmergeWriter {
+ int nLeafEst; /* Space allocated for leaf blocks */
+ int nWork; /* Number of leaf pages flushed */
+ sqlite3_int64 iAbsLevel; /* Absolute level of input segments */
+ int iIdx; /* Index of *output* segment in iAbsLevel+1 */
+ sqlite3_int64 iStart; /* Block number of first allocated block */
+ sqlite3_int64 iEnd; /* Block number of last allocated block */
+ NodeWriter aNodeWriter[FTS_MAX_APPENDABLE_HEIGHT];
+};
+
+/*
+** An object of the following type is used to read data from a single
+** FTS segment node. See the following functions:
+**
+** nodeReaderInit()
+** nodeReaderNext()
+** nodeReaderRelease()
+*/
+struct NodeReader {
+ const char *aNode;
+ int nNode;
+ int iOff; /* Current offset within aNode[] */
+
+ /* Output variables. Containing the current node entry. */
+ sqlite3_int64 iChild; /* Pointer to child node */
+ Blob term; /* Current term */
+ const char *aDoclist; /* Pointer to doclist */
+ int nDoclist; /* Size of doclist in bytes */
+};
+
+/*
+** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
+** Otherwise, if the allocation at pBlob->a is not already at least nMin
+** bytes in size, extend (realloc) it to be so.
+**
+** If an OOM error occurs, set *pRc to SQLITE_NOMEM and leave pBlob->a
+** unmodified. Otherwise, if the allocation succeeds, update pBlob->nAlloc
+** to reflect the new size of the pBlob->a[] buffer.
+*/
+static void blobGrowBuffer(Blob *pBlob, int nMin, int *pRc){
+ if( *pRc==SQLITE_OK && nMin>pBlob->nAlloc ){
+ int nAlloc = nMin;
+ char *a = (char *)sqlite3_realloc(pBlob->a, nAlloc);
+ if( a ){
+ pBlob->nAlloc = nAlloc;
+ pBlob->a = a;
+ }else{
+ *pRc = SQLITE_NOMEM;
+ }
+ }
+}
+
+/*
+** Attempt to advance the node-reader object passed as the first argument to
+** the next entry on the node.
+**
+** Return an error code if an error occurs (SQLITE_NOMEM is possible).
+** Otherwise return SQLITE_OK. If there is no next entry on the node
+** (e.g. because the current entry is the last) set NodeReader->aNode to
+** NULL to indicate EOF. Otherwise, populate the NodeReader structure output
+** variables for the new entry.
+*/
+static int nodeReaderNext(NodeReader *p){
+ int bFirst = (p->term.n==0); /* True for first term on the node */
+ int nPrefix = 0; /* Bytes to copy from previous term */
+ int nSuffix = 0; /* Bytes to append to the prefix */
+ int rc = SQLITE_OK; /* Return code */
+
+ assert( p->aNode );
+ if( p->iChild && bFirst==0 ) p->iChild++;
+ if( p->iOff>=p->nNode ){
+ /* EOF */
+ p->aNode = 0;
+ }else{
+ if( bFirst==0 ){
+ p->iOff += sqlite3Fts3GetVarint32(&p->aNode[p->iOff], &nPrefix);
+ }
+ p->iOff += sqlite3Fts3GetVarint32(&p->aNode[p->iOff], &nSuffix);
+
+ blobGrowBuffer(&p->term, nPrefix+nSuffix, &rc);
+ if( rc==SQLITE_OK ){
+ memcpy(&p->term.a[nPrefix], &p->aNode[p->iOff], nSuffix);
+ p->term.n = nPrefix+nSuffix;
+ p->iOff += nSuffix;
+ if( p->iChild==0 ){
+ p->iOff += sqlite3Fts3GetVarint32(&p->aNode[p->iOff], &p->nDoclist);
+ p->aDoclist = &p->aNode[p->iOff];
+ p->iOff += p->nDoclist;
+ }
+ }
+ }
+
+ assert( p->iOff<=p->nNode );
+
+ return rc;
+}
+
+/*
+** Release all dynamic resources held by node-reader object *p.
+*/
+static void nodeReaderRelease(NodeReader *p){
+ sqlite3_free(p->term.a);
+}
+
+/*
+** Initialize a node-reader object to read the node in buffer aNode/nNode.
+**
+** If successful, SQLITE_OK is returned and the NodeReader object set to
+** point to the first entry on the node (if any). Otherwise, an SQLite
+** error code is returned.
+*/
+static int nodeReaderInit(NodeReader *p, const char *aNode, int nNode){
+ memset(p, 0, sizeof(NodeReader));
+ p->aNode = aNode;
+ p->nNode = nNode;
+
+ /* Figure out if this is a leaf or an internal node. */
+ if( p->aNode[0] ){
+ /* An internal node. */
+ p->iOff = 1 + sqlite3Fts3GetVarint(&p->aNode[1], &p->iChild);
+ }else{
+ p->iOff = 1;
+ }
+
+ return nodeReaderNext(p);
+}
+
+/*
+** This function is called while writing an FTS segment each time a leaf o
+** node is finished and written to disk. The key (zTerm/nTerm) is guaranteed
+** to be greater than the largest key on the node just written, but smaller
+** than or equal to the first key that will be written to the next leaf
+** node.
+**
+** The block id of the leaf node just written to disk may be found in
+** (pWriter->aNodeWriter[0].iBlock) when this function is called.
+*/
+static int fts3IncrmergePush(
+ Fts3Table *p, /* Fts3 table handle */
+ IncrmergeWriter *pWriter, /* Writer object */
+ const char *zTerm, /* Term to write to internal node */
+ int nTerm /* Bytes at zTerm */
+){
+ sqlite3_int64 iPtr = pWriter->aNodeWriter[0].iBlock;
+ int iLayer;
+
+ assert( nTerm>0 );
+ for(iLayer=1; ALWAYS(iLayer<FTS_MAX_APPENDABLE_HEIGHT); iLayer++){
+ sqlite3_int64 iNextPtr = 0;
+ NodeWriter *pNode = &pWriter->aNodeWriter[iLayer];
+ int rc = SQLITE_OK;
+ int nPrefix;
+ int nSuffix;
+ int nSpace;
+
+ /* Figure out how much space the key will consume if it is written to
+ ** the current node of layer iLayer. Due to the prefix compression,
+ ** the space required changes depending on which node the key is to
+ ** be added to. */
+ nPrefix = fts3PrefixCompress(pNode->key.a, pNode->key.n, zTerm, nTerm);
+ nSuffix = nTerm - nPrefix;
+ nSpace = sqlite3Fts3VarintLen(nPrefix);
+ nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix;
+
+ if( pNode->key.n==0 || (pNode->block.n + nSpace)<=p->nNodeSize ){
+ /* If the current node of layer iLayer contains zero keys, or if adding
+ ** the key to it will not cause it to grow to larger than nNodeSize
+ ** bytes in size, write the key here. */
+
+ Blob *pBlk = &pNode->block;
+ if( pBlk->n==0 ){
+ blobGrowBuffer(pBlk, p->nNodeSize, &rc);
+ if( rc==SQLITE_OK ){
+ pBlk->a[0] = (char)iLayer;
+ pBlk->n = 1 + sqlite3Fts3PutVarint(&pBlk->a[1], iPtr);
+ }
+ }
+ blobGrowBuffer(pBlk, pBlk->n + nSpace, &rc);
+ blobGrowBuffer(&pNode->key, nTerm, &rc);
+
+ if( rc==SQLITE_OK ){
+ if( pNode->key.n ){
+ pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nPrefix);
+ }
+ pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nSuffix);
+ memcpy(&pBlk->a[pBlk->n], &zTerm[nPrefix], nSuffix);
+ pBlk->n += nSuffix;
+
+ memcpy(pNode->key.a, zTerm, nTerm);
+ pNode->key.n = nTerm;
+ }
+ }else{
+ /* Otherwise, flush the current node of layer iLayer to disk.
+ ** Then allocate a new, empty sibling node. The key will be written
+ ** into the parent of this node. */
+ rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n);
+
+ assert( pNode->block.nAlloc>=p->nNodeSize );
+ pNode->block.a[0] = (char)iLayer;
+ pNode->block.n = 1 + sqlite3Fts3PutVarint(&pNode->block.a[1], iPtr+1);
+
+ iNextPtr = pNode->iBlock;
+ pNode->iBlock++;
+ pNode->key.n = 0;
+ }
+
+ if( rc!=SQLITE_OK || iNextPtr==0 ) return rc;
+ iPtr = iNextPtr;
+ }
+
+ assert( 0 );
+ return 0;
+}
+
+/*
+** Append a term and (optionally) doclist to the FTS segment node currently
+** stored in blob *pNode. The node need not contain any terms, but the
+** header must be written before this function is called.
+**
+** A node header is a single 0x00 byte for a leaf node, or a height varint
+** followed by the left-hand-child varint for an internal node.
+**
+** The term to be appended is passed via arguments zTerm/nTerm. For a
+** leaf node, the doclist is passed as aDoclist/nDoclist. For an internal
+** node, both aDoclist and nDoclist must be passed 0.
+**
+** If the size of the value in blob pPrev is zero, then this is the first
+** term written to the node. Otherwise, pPrev contains a copy of the
+** previous term. Before this function returns, it is updated to contain a
+** copy of zTerm/nTerm.
+**
+** It is assumed that the buffer associated with pNode is already large
+** enough to accommodate the new entry. The buffer associated with pPrev
+** is extended by this function if requrired.
+**
+** If an error (i.e. OOM condition) occurs, an SQLite error code is
+** returned. Otherwise, SQLITE_OK.
+*/
+static int fts3AppendToNode(
+ Blob *pNode, /* Current node image to append to */
+ Blob *pPrev, /* Buffer containing previous term written */
+ const char *zTerm, /* New term to write */
+ int nTerm, /* Size of zTerm in bytes */
+ const char *aDoclist, /* Doclist (or NULL) to write */
+ int nDoclist /* Size of aDoclist in bytes */
+){
+ int rc = SQLITE_OK; /* Return code */
+ int bFirst = (pPrev->n==0); /* True if this is the first term written */
+ int nPrefix; /* Size of term prefix in bytes */
+ int nSuffix; /* Size of term suffix in bytes */
+
+ /* Node must have already been started. There must be a doclist for a
+ ** leaf node, and there must not be a doclist for an internal node. */
+ assert( pNode->n>0 );
+ assert( (pNode->a[0]=='\0')==(aDoclist!=0) );
+
+ blobGrowBuffer(pPrev, nTerm, &rc);
+ if( rc!=SQLITE_OK ) return rc;
+
+ nPrefix = fts3PrefixCompress(pPrev->a, pPrev->n, zTerm, nTerm);
+ nSuffix = nTerm - nPrefix;
+ memcpy(pPrev->a, zTerm, nTerm);
+ pPrev->n = nTerm;
+
+ if( bFirst==0 ){
+ pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nPrefix);
+ }
+ pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nSuffix);
+ memcpy(&pNode->a[pNode->n], &zTerm[nPrefix], nSuffix);
+ pNode->n += nSuffix;
+
+ if( aDoclist ){
+ pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nDoclist);
+ memcpy(&pNode->a[pNode->n], aDoclist, nDoclist);
+ pNode->n += nDoclist;
+ }
+
+ assert( pNode->n<=pNode->nAlloc );
+
+ return SQLITE_OK;
+}
+
+/*
+** Append the current term and doclist pointed to by cursor pCsr to the
+** appendable b-tree segment opened for writing by pWriter.
+**
+** Return SQLITE_OK if successful, or an SQLite error code otherwise.
+*/
+static int fts3IncrmergeAppend(
+ Fts3Table *p, /* Fts3 table handle */
+ IncrmergeWriter *pWriter, /* Writer object */
+ Fts3MultiSegReader *pCsr /* Cursor containing term and doclist */
+){
+ const char *zTerm = pCsr->zTerm;
+ int nTerm = pCsr->nTerm;
+ const char *aDoclist = pCsr->aDoclist;
+ int nDoclist = pCsr->nDoclist;
+ int rc = SQLITE_OK; /* Return code */
+ int nSpace; /* Total space in bytes required on leaf */
+ int nPrefix; /* Size of prefix shared with previous term */
+ int nSuffix; /* Size of suffix (nTerm - nPrefix) */
+ NodeWriter *pLeaf; /* Object used to write leaf nodes */
+
+ pLeaf = &pWriter->aNodeWriter[0];
+ nPrefix = fts3PrefixCompress(pLeaf->key.a, pLeaf->key.n, zTerm, nTerm);
+ nSuffix = nTerm - nPrefix;
+
+ nSpace = sqlite3Fts3VarintLen(nPrefix);
+ nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix;
+ nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist;
+
+ /* If the current block is not empty, and if adding this term/doclist
+ ** to the current block would make it larger than Fts3Table.nNodeSize
+ ** bytes, write this block out to the database. */
+ if( pLeaf->block.n>0 && (pLeaf->block.n + nSpace)>p->nNodeSize ){
+ rc = fts3WriteSegment(p, pLeaf->iBlock, pLeaf->block.a, pLeaf->block.n);
+ pWriter->nWork++;
+
+ /* Add the current term to the parent node. The term added to the
+ ** parent must:
+ **
+ ** a) be greater than the largest term on the leaf node just written
+ ** to the database (still available in pLeaf->key), and
+ **
+ ** b) be less than or equal to the term about to be added to the new
+ ** leaf node (zTerm/nTerm).
+ **
+ ** In other words, it must be the prefix of zTerm 1 byte longer than
+ ** the common prefix (if any) of zTerm and pWriter->zTerm.
+ */
+ if( rc==SQLITE_OK ){
+ rc = fts3IncrmergePush(p, pWriter, zTerm, nPrefix+1);
+ }
+
+ /* Advance to the next output block */
+ pLeaf->iBlock++;
+ pLeaf->key.n = 0;
+ pLeaf->block.n = 0;
+
+ nSuffix = nTerm;
+ nSpace = 1;
+ nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix;
+ nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist;
+ }
+
+ blobGrowBuffer(&pLeaf->block, pLeaf->block.n + nSpace, &rc);
+
+ if( rc==SQLITE_OK ){
+ if( pLeaf->block.n==0 ){
+ pLeaf->block.n = 1;
+ pLeaf->block.a[0] = '\0';
+ }
+ rc = fts3AppendToNode(
+ &pLeaf->block, &pLeaf->key, zTerm, nTerm, aDoclist, nDoclist
+ );
+ }
+
+ return rc;
+}
+
+/*
+** This function is called to release all dynamic resources held by the
+** merge-writer object pWriter, and if no error has occurred, to flush
+** all outstanding node buffers held by pWriter to disk.
+**
+** If *pRc is not SQLITE_OK when this function is called, then no attempt
+** is made to write any data to disk. Instead, this function serves only
+** to release outstanding resources.
+**
+** Otherwise, if *pRc is initially SQLITE_OK and an error occurs while
+** flushing buffers to disk, *pRc is set to an SQLite error code before
+** returning.
+*/
+static void fts3IncrmergeRelease(
+ Fts3Table *p, /* FTS3 table handle */
+ IncrmergeWriter *pWriter, /* Merge-writer object */
+ int *pRc /* IN/OUT: Error code */
+){
+ int i; /* Used to iterate through non-root layers */
+ int iRoot; /* Index of root in pWriter->aNodeWriter */
+ NodeWriter *pRoot; /* NodeWriter for root node */
+ int rc = *pRc; /* Error code */
+
+ /* Set iRoot to the index in pWriter->aNodeWriter[] of the output segment
+ ** root node. If the segment fits entirely on a single leaf node, iRoot
+ ** will be set to 0. If the root node is the parent of the leaves, iRoot
+ ** will be 1. And so on. */
+ for(iRoot=FTS_MAX_APPENDABLE_HEIGHT-1; iRoot>=0; iRoot--){
+ NodeWriter *pNode = &pWriter->aNodeWriter[iRoot];
+ if( pNode->block.n>0 ) break;
+ assert( *pRc || pNode->block.nAlloc==0 );
+ assert( *pRc || pNode->key.nAlloc==0 );
+ sqlite3_free(pNode->block.a);
+ sqlite3_free(pNode->key.a);
+ }
+
+ /* Empty output segment. This is a no-op. */
+ if( iRoot<0 ) return;
+
+ /* The entire output segment fits on a single node. Normally, this means
+ ** the node would be stored as a blob in the "root" column of the %_segdir
+ ** table. However, this is not permitted in this case. The problem is that
+ ** space has already been reserved in the %_segments table, and so the
+ ** start_block and end_block fields of the %_segdir table must be populated.
+ ** And, by design or by accident, released versions of FTS cannot handle
+ ** segments that fit entirely on the root node with start_block!=0.
+ **
+ ** Instead, create a synthetic root node that contains nothing but a
+ ** pointer to the single content node. So that the segment consists of a
+ ** single leaf and a single interior (root) node.
+ **
+ ** Todo: Better might be to defer allocating space in the %_segments
+ ** table until we are sure it is needed.
+ */
+ if( iRoot==0 ){
+ Blob *pBlock = &pWriter->aNodeWriter[1].block;
+ blobGrowBuffer(pBlock, 1 + FTS3_VARINT_MAX, &rc);
+ if( rc==SQLITE_OK ){
+ pBlock->a[0] = 0x01;
+ pBlock->n = 1 + sqlite3Fts3PutVarint(
+ &pBlock->a[1], pWriter->aNodeWriter[0].iBlock
+ );
+ }
+ iRoot = 1;
+ }
+ pRoot = &pWriter->aNodeWriter[iRoot];
+
+ /* Flush all currently outstanding nodes to disk. */
+ for(i=0; i<iRoot; i++){
+ NodeWriter *pNode = &pWriter->aNodeWriter[i];
+ if( pNode->block.n>0 && rc==SQLITE_OK ){
+ rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n);
+ }
+ sqlite3_free(pNode->block.a);
+ sqlite3_free(pNode->key.a);
+ }
+
+ /* Write the %_segdir record. */
+ if( rc==SQLITE_OK ){
+ rc = fts3WriteSegdir(p,
+ pWriter->iAbsLevel+1, /* level */
+ pWriter->iIdx, /* idx */
+ pWriter->iStart, /* start_block */
+ pWriter->aNodeWriter[0].iBlock, /* leaves_end_block */
+ pWriter->iEnd, /* end_block */
+ pRoot->block.a, pRoot->block.n /* root */
+ );
+ }
+ sqlite3_free(pRoot->block.a);
+ sqlite3_free(pRoot->key.a);
+
+ *pRc = rc;
+}
+
+/*
+** Compare the term in buffer zLhs (size in bytes nLhs) with that in
+** zRhs (size in bytes nRhs) using memcmp. If one term is a prefix of
+** the other, it is considered to be smaller than the other.
+**
+** Return -ve if zLhs is smaller than zRhs, 0 if it is equal, or +ve
+** if it is greater.
+*/
+static int fts3TermCmp(
+ const char *zLhs, int nLhs, /* LHS of comparison */
+ const char *zRhs, int nRhs /* RHS of comparison */
+){
+ int nCmp = MIN(nLhs, nRhs);
+ int res;
+
+ res = memcmp(zLhs, zRhs, nCmp);
+ if( res==0 ) res = nLhs - nRhs;
+
+ return res;
+}
+
+
+/*
+** Query to see if the entry in the %_segments table with blockid iEnd is
+** NULL. If no error occurs and the entry is NULL, set *pbRes 1 before
+** returning. Otherwise, set *pbRes to 0.
+**
+** Or, if an error occurs while querying the database, return an SQLite
+** error code. The final value of *pbRes is undefined in this case.
+**
+** This is used to test if a segment is an "appendable" segment. If it
+** is, then a NULL entry has been inserted into the %_segments table
+** with blockid %_segdir.end_block.
+*/
+static int fts3IsAppendable(Fts3Table *p, sqlite3_int64 iEnd, int *pbRes){
+ int bRes = 0; /* Result to set *pbRes to */
+ sqlite3_stmt *pCheck = 0; /* Statement to query database with */
+ int rc; /* Return code */
+
+ rc = fts3SqlStmt(p, SQL_SEGMENT_IS_APPENDABLE, &pCheck, 0);
+ if( rc==SQLITE_OK ){
+ sqlite3_bind_int64(pCheck, 1, iEnd);
+ if( SQLITE_ROW==sqlite3_step(pCheck) ) bRes = 1;
+ rc = sqlite3_reset(pCheck);
+ }
+
+ *pbRes = bRes;
+ return rc;
+}
+
+/*
+** This function is called when initializing an incremental-merge operation.
+** It checks if the existing segment with index value iIdx at absolute level
+** (iAbsLevel+1) can be appended to by the incremental merge. If it can, the
+** merge-writer object *pWriter is initialized to write to it.
+**
+** An existing segment can be appended to by an incremental merge if:
+**
+** * It was initially created as an appendable segment (with all required
+** space pre-allocated), and
+**
+** * The first key read from the input (arguments zKey and nKey) is
+** greater than the largest key currently stored in the potential
+** output segment.
+*/
+static int fts3IncrmergeLoad(
+ Fts3Table *p, /* Fts3 table handle */
+ sqlite3_int64 iAbsLevel, /* Absolute level of input segments */
+ int iIdx, /* Index of candidate output segment */
+ const char *zKey, /* First key to write */
+ int nKey, /* Number of bytes in nKey */
+ IncrmergeWriter *pWriter /* Populate this object */
+){
+ int rc; /* Return code */
+ sqlite3_stmt *pSelect = 0; /* SELECT to read %_segdir entry */
+
+ rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pSelect, 0);
+ if( rc==SQLITE_OK ){
+ sqlite3_int64 iStart = 0; /* Value of %_segdir.start_block */
+ sqlite3_int64 iLeafEnd = 0; /* Value of %_segdir.leaves_end_block */
+ sqlite3_int64 iEnd = 0; /* Value of %_segdir.end_block */
+ const char *aRoot = 0; /* Pointer to %_segdir.root buffer */
+ int nRoot = 0; /* Size of aRoot[] in bytes */
+ int rc2; /* Return code from sqlite3_reset() */
+ int bAppendable = 0; /* Set to true if segment is appendable */
+
+ /* Read the %_segdir entry for index iIdx absolute level (iAbsLevel+1) */
+ sqlite3_bind_int64(pSelect, 1, iAbsLevel+1);
+ sqlite3_bind_int(pSelect, 2, iIdx);
+ if( sqlite3_step(pSelect)==SQLITE_ROW ){
+ iStart = sqlite3_column_int64(pSelect, 1);
+ iLeafEnd = sqlite3_column_int64(pSelect, 2);
+ iEnd = sqlite3_column_int64(pSelect, 3);
+ nRoot = sqlite3_column_bytes(pSelect, 4);
+ aRoot = sqlite3_column_blob(pSelect, 4);
+ }else{
+ return sqlite3_reset(pSelect);
+ }
+
+ /* Check for the zero-length marker in the %_segments table */
+ rc = fts3IsAppendable(p, iEnd, &bAppendable);
+
+ /* Check that zKey/nKey is larger than the largest key the candidate */
+ if( rc==SQLITE_OK && bAppendable ){
+ char *aLeaf = 0;
+ int nLeaf = 0;
+
+ rc = sqlite3Fts3ReadBlock(p, iLeafEnd, &aLeaf, &nLeaf, 0);
+ if( rc==SQLITE_OK ){
+ NodeReader reader;
+ for(rc = nodeReaderInit(&reader, aLeaf, nLeaf);
+ rc==SQLITE_OK && reader.aNode;
+ rc = nodeReaderNext(&reader)
+ ){
+ assert( reader.aNode );
+ }
+ if( fts3TermCmp(zKey, nKey, reader.term.a, reader.term.n)<=0 ){
+ bAppendable = 0;
+ }
+ nodeReaderRelease(&reader);
+ }
+ sqlite3_free(aLeaf);
+ }
+
+ if( rc==SQLITE_OK && bAppendable ){
+ /* It is possible to append to this segment. Set up the IncrmergeWriter
+ ** object to do so. */
+ int i;
+ int nHeight = (int)aRoot[0];
+ NodeWriter *pNode;
+
+ pWriter->nLeafEst = (int)((iEnd - iStart) + 1)/FTS_MAX_APPENDABLE_HEIGHT;
+ pWriter->iStart = iStart;
+ pWriter->iEnd = iEnd;
+ pWriter->iAbsLevel = iAbsLevel;
+ pWriter->iIdx = iIdx;
+
+ for(i=nHeight+1; i<FTS_MAX_APPENDABLE_HEIGHT; i++){
+ pWriter->aNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst;
+ }
+
+ pNode = &pWriter->aNodeWriter[nHeight];
+ pNode->iBlock = pWriter->iStart + pWriter->nLeafEst*nHeight;
+ blobGrowBuffer(&pNode->block, MAX(nRoot, p->nNodeSize), &rc);
+ if( rc==SQLITE_OK ){
+ memcpy(pNode->block.a, aRoot, nRoot);
+ pNode->block.n = nRoot;
+ }
+
+ for(i=nHeight; i>=0 && rc==SQLITE_OK; i--){
+ NodeReader reader;
+ pNode = &pWriter->aNodeWriter[i];
+
+ rc = nodeReaderInit(&reader, pNode->block.a, pNode->block.n);
+ while( reader.aNode && rc==SQLITE_OK ) rc = nodeReaderNext(&reader);
+ blobGrowBuffer(&pNode->key, reader.term.n, &rc);
+ if( rc==SQLITE_OK ){
+ memcpy(pNode->key.a, reader.term.a, reader.term.n);
+ pNode->key.n = reader.term.n;
+ if( i>0 ){
+ char *aBlock = 0;
+ int nBlock = 0;
+ pNode = &pWriter->aNodeWriter[i-1];
+ pNode->iBlock = reader.iChild;
+ rc = sqlite3Fts3ReadBlock(p, reader.iChild, &aBlock, &nBlock, 0);
+ blobGrowBuffer(&pNode->block, MAX(nBlock, p->nNodeSize), &rc);
+ if( rc==SQLITE_OK ){
+ memcpy(pNode->block.a, aBlock, nBlock);
+ pNode->block.n = nBlock;
+ }
+ sqlite3_free(aBlock);
+ }
+ }
+ nodeReaderRelease(&reader);
+ }
+ }
+
+ rc2 = sqlite3_reset(pSelect);
+ if( rc==SQLITE_OK ) rc = rc2;
+ }
+
+ return rc;
+}
+
+/*
+** Determine the largest segment index value that exists within absolute
+** level iAbsLevel+1. If no error occurs, set *piIdx to this value plus
+** one before returning SQLITE_OK. Or, if there are no segments at all
+** within level iAbsLevel, set *piIdx to zero.
+**
+** If an error occurs, return an SQLite error code. The final value of
+** *piIdx is undefined in this case.
+*/
+static int fts3IncrmergeOutputIdx(
+ Fts3Table *p, /* FTS Table handle */
+ sqlite3_int64 iAbsLevel, /* Absolute index of input segments */
+ int *piIdx /* OUT: Next free index at iAbsLevel+1 */
+){
+ int rc;
+ sqlite3_stmt *pOutputIdx = 0; /* SQL used to find output index */
+
+ rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pOutputIdx, 0);
+ if( rc==SQLITE_OK ){
+ sqlite3_bind_int64(pOutputIdx, 1, iAbsLevel+1);
+ sqlite3_step(pOutputIdx);
+ *piIdx = sqlite3_column_int(pOutputIdx, 0);
+ rc = sqlite3_reset(pOutputIdx);
+ }
+
+ return rc;
+}
+
+/*
+** Allocate an appendable output segment on absolute level iAbsLevel+1
+** with idx value iIdx.
+**
+** In the %_segdir table, a segment is defined by the values in three
+** columns:
+**
+** start_block
+** leaves_end_block
+** end_block
+**
+** When an appendable segment is allocated, it is estimated that the
+** maximum number of leaf blocks that may be required is the sum of the
+** number of leaf blocks consumed by the input segments, plus the number
+** of input segments, multiplied by two. This value is stored in stack
+** variable nLeafEst.
+**
+** A total of 16*nLeafEst blocks are allocated when an appendable segment
+** is created ((1 + end_block - start_block)==16*nLeafEst). The contiguous
+** array of leaf nodes starts at the first block allocated. The array
+** of interior nodes that are parents of the leaf nodes start at block
+** (start_block + (1 + end_block - start_block) / 16). And so on.
+**
+** In the actual code below, the value "16" is replaced with the
+** pre-processor macro FTS_MAX_APPENDABLE_HEIGHT.
+*/
+static int fts3IncrmergeWriter(
+ Fts3Table *p, /* Fts3 table handle */
+ sqlite3_int64 iAbsLevel, /* Absolute level of input segments */
+ int iIdx, /* Index of new output segment */
+ Fts3MultiSegReader *pCsr, /* Cursor that data will be read from */
+ IncrmergeWriter *pWriter /* Populate this object */
+){
+ int rc; /* Return Code */
+ int i; /* Iterator variable */
+ int nLeafEst = 0; /* Blocks allocated for leaf nodes */
+ sqlite3_stmt *pLeafEst = 0; /* SQL used to determine nLeafEst */
+ sqlite3_stmt *pFirstBlock = 0; /* SQL used to determine first block */
+
+ /* Calculate nLeafEst. */
+ rc = fts3SqlStmt(p, SQL_MAX_LEAF_NODE_ESTIMATE, &pLeafEst, 0);
+ if( rc==SQLITE_OK ){
+ sqlite3_bind_int64(pLeafEst, 1, iAbsLevel);
+ sqlite3_bind_int64(pLeafEst, 2, pCsr->nSegment);
+ if( SQLITE_ROW==sqlite3_step(pLeafEst) ){
+ nLeafEst = sqlite3_column_int(pLeafEst, 0);
+ }
+ rc = sqlite3_reset(pLeafEst);
+ }
+ if( rc!=SQLITE_OK ) return rc;
+
+ /* Calculate the first block to use in the output segment */
+ rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pFirstBlock, 0);
+ if( rc==SQLITE_OK ){
+ if( SQLITE_ROW==sqlite3_step(pFirstBlock) ){
+ pWriter->iStart = sqlite3_column_int64(pFirstBlock, 0);
+ pWriter->iEnd = pWriter->iStart - 1;
+ pWriter->iEnd += nLeafEst * FTS_MAX_APPENDABLE_HEIGHT;
+ }
+ rc = sqlite3_reset(pFirstBlock);
+ }
+ if( rc!=SQLITE_OK ) return rc;
+
+ /* Insert the marker in the %_segments table to make sure nobody tries
+ ** to steal the space just allocated. This is also used to identify
+ ** appendable segments. */
+ rc = fts3WriteSegment(p, pWriter->iEnd, 0, 0);
+ if( rc!=SQLITE_OK ) return rc;
+
+ pWriter->iAbsLevel = iAbsLevel;
+ pWriter->nLeafEst = nLeafEst;
+ pWriter->iIdx = iIdx;
+
+ /* Set up the array of NodeWriter objects */
+ for(i=0; i<FTS_MAX_APPENDABLE_HEIGHT; i++){
+ pWriter->aNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst;
+ }
+ return SQLITE_OK;
+}
+
+/*
+** Remove an entry from the %_segdir table. This involves running the
+** following two statements:
+**
+** DELETE FROM %_segdir WHERE level = :iAbsLevel AND idx = :iIdx
+** UPDATE %_segdir SET idx = idx - 1 WHERE level = :iAbsLevel AND idx > :iIdx
+**
+** The DELETE statement removes the specific %_segdir level. The UPDATE
+** statement ensures that the remaining segments have contiguously allocated
+** idx values.
+*/
+static int fts3RemoveSegdirEntry(
+ Fts3Table *p, /* FTS3 table handle */
+ sqlite3_int64 iAbsLevel, /* Absolute level to delete from */
+ int iIdx /* Index of %_segdir entry to delete */
+){
+ int rc; /* Return code */
+ sqlite3_stmt *pDelete = 0; /* DELETE statement */
+
+ rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_ENTRY, &pDelete, 0);
+ if( rc==SQLITE_OK ){
+ sqlite3_bind_int64(pDelete, 1, iAbsLevel);
+ sqlite3_bind_int(pDelete, 2, iIdx);
+ sqlite3_step(pDelete);
+ rc = sqlite3_reset(pDelete);
+ }
+
+ return rc;
+}
+
+/*
+** One or more segments have just been removed from absolute level iAbsLevel.
+** Update the 'idx' values of the remaining segments in the level so that
+** the idx values are a contiguous sequence starting from 0.
+*/
+static int fts3RepackSegdirLevel(
+ Fts3Table *p, /* FTS3 table handle */
+ sqlite3_int64 iAbsLevel /* Absolute level to repack */
+){
+ int rc; /* Return code */
+ int *aIdx = 0; /* Array of remaining idx values */
+ int nIdx = 0; /* Valid entries in aIdx[] */
+ int nAlloc = 0; /* Allocated size of aIdx[] */
+ int i; /* Iterator variable */
+ sqlite3_stmt *pSelect = 0; /* Select statement to read idx values */
+ sqlite3_stmt *pUpdate = 0; /* Update statement to modify idx values */
+
+ rc = fts3SqlStmt(p, SQL_SELECT_INDEXES, &pSelect, 0);
+ if( rc==SQLITE_OK ){
+ int rc2;
+ sqlite3_bind_int64(pSelect, 1, iAbsLevel);
+ while( SQLITE_ROW==sqlite3_step(pSelect) ){
+ if( nIdx>=nAlloc ){
+ int *aNew;
+ nAlloc += 16;
+ aNew = sqlite3_realloc(aIdx, nAlloc*sizeof(int));
+ if( !aNew ){
+ rc = SQLITE_NOMEM;
+ break;
+ }
+ aIdx = aNew;
+ }
+ aIdx[nIdx++] = sqlite3_column_int(pSelect, 0);
+ }
+ rc2 = sqlite3_reset(pSelect);
+ if( rc==SQLITE_OK ) rc = rc2;
+ }
+
+ if( rc==SQLITE_OK ){
+ rc = fts3SqlStmt(p, SQL_SHIFT_SEGDIR_ENTRY, &pUpdate, 0);
+ }
+ if( rc==SQLITE_OK ){
+ sqlite3_bind_int64(pUpdate, 2, iAbsLevel);
+ }
+
+ assert( p->bIgnoreSavepoint==0 );
+ p->bIgnoreSavepoint = 1;
+ for(i=0; rc==SQLITE_OK && i<nIdx; i++){
+ if( aIdx[i]!=i ){
+ sqlite3_bind_int(pUpdate, 3, aIdx[i]);
+ sqlite3_bind_int(pUpdate, 1, i);
+ sqlite3_step(pUpdate);
+ rc = sqlite3_reset(pUpdate);
+ }
+ }
+ p->bIgnoreSavepoint = 0;
+
+ sqlite3_free(aIdx);
+ return rc;
+}
+
+static void fts3StartNode(Blob *pNode, int iHeight, sqlite3_int64 iChild){
+ pNode->a[0] = (char)iHeight;
+ if( iChild ){
+ assert( pNode->nAlloc>=1+sqlite3Fts3VarintLen(iChild) );
+ pNode->n = 1 + sqlite3Fts3PutVarint(&pNode->a[1], iChild);
+ }else{
+ assert( pNode->nAlloc>=1 );
+ pNode->n = 1;
+ }
+}
+
+/*
+** The first two arguments are a pointer to and the size of a segment b-tree
+** node. The node may be a leaf or an internal node.
+**
+** This function creates a new node image in blob object *pNew by copying
+** all terms that are greater than or equal to zTerm/nTerm (for leaf nodes)
+** or greater than zTerm/nTerm (for internal nodes) from aNode/nNode.
+*/
+static int fts3TruncateNode(
+ const char *aNode, /* Current node image */
+ int nNode, /* Size of aNode in bytes */
+ Blob *pNew, /* OUT: Write new node image here */
+ const char *zTerm, /* Omit all terms smaller than this */
+ int nTerm, /* Size of zTerm in bytes */
+ sqlite3_int64 *piBlock /* OUT: Block number in next layer down */
+){
+ NodeReader reader; /* Reader object */
+ Blob prev = {0, 0, 0}; /* Previous term written to new node */
+ int rc = SQLITE_OK; /* Return code */
+ int bLeaf = aNode[0]=='\0'; /* True for a leaf node */
+
+ /* Allocate required output space */
+ blobGrowBuffer(pNew, nNode, &rc);
+ if( rc!=SQLITE_OK ) return rc;
+ pNew->n = 0;
+
+ /* Populate new node buffer */
+ for(rc = nodeReaderInit(&reader, aNode, nNode);
+ rc==SQLITE_OK && reader.aNode;
+ rc = nodeReaderNext(&reader)
+ ){
+ if( pNew->n==0 ){
+ int res = fts3TermCmp(reader.term.a, reader.term.n, zTerm, nTerm);
+ if( res<0 || (bLeaf==0 && res==0) ) continue;
+ fts3StartNode(pNew, (int)aNode[0], reader.iChild);
+ *piBlock = reader.iChild;
+ }
+ rc = fts3AppendToNode(
+ pNew, &prev, reader.term.a, reader.term.n,
+ reader.aDoclist, reader.nDoclist
+ );
+ if( rc!=SQLITE_OK ) break;
+ }
+ if( pNew->n==0 ){
+ fts3StartNode(pNew, (int)aNode[0], reader.iChild);
+ *piBlock = reader.iChild;
+ }
+ assert( pNew->n<=pNew->nAlloc );
+
+ nodeReaderRelease(&reader);
+ sqlite3_free(prev.a);
+ return rc;
+}
+
+/*
+** Remove all terms smaller than zTerm/nTerm from segment iIdx in absolute
+** level iAbsLevel. This may involve deleting entries from the %_segments
+** table, and modifying existing entries in both the %_segments and %_segdir
+** tables.
+**
+** SQLITE_OK is returned if the segment is updated successfully. Or an
+** SQLite error code otherwise.
+*/
+static int fts3TruncateSegment(
+ Fts3Table *p, /* FTS3 table handle */
+ sqlite3_int64 iAbsLevel, /* Absolute level of segment to modify */
+ int iIdx, /* Index within level of segment to modify */
+ const char *zTerm, /* Remove terms smaller than this */
+ int nTerm /* Number of bytes in buffer zTerm */
+){
+ int rc = SQLITE_OK; /* Return code */
+ Blob root = {0,0,0}; /* New root page image */
+ Blob block = {0,0,0}; /* Buffer used for any other block */
+ sqlite3_int64 iBlock = 0; /* Block id */
+ sqlite3_int64 iNewStart = 0; /* New value for iStartBlock */
+ sqlite3_int64 iOldStart = 0; /* Old value for iStartBlock */
+ sqlite3_stmt *pFetch = 0; /* Statement used to fetch segdir */
+
+ rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pFetch, 0);
+ if( rc==SQLITE_OK ){
+ int rc2; /* sqlite3_reset() return code */
+ sqlite3_bind_int64(pFetch, 1, iAbsLevel);
+ sqlite3_bind_int(pFetch, 2, iIdx);
+ if( SQLITE_ROW==sqlite3_step(pFetch) ){
+ const char *aRoot = sqlite3_column_blob(pFetch, 4);
+ int nRoot = sqlite3_column_bytes(pFetch, 4);
+ iOldStart = sqlite3_column_int64(pFetch, 1);
+ rc = fts3TruncateNode(aRoot, nRoot, &root, zTerm, nTerm, &iBlock);
+ }
+ rc2 = sqlite3_reset(pFetch);
+ if( rc==SQLITE_OK ) rc = rc2;
+ }
+
+ while( rc==SQLITE_OK && iBlock ){
+ char *aBlock = 0;
+ int nBlock = 0;
+ iNewStart = iBlock;
+
+ rc = sqlite3Fts3ReadBlock(p, iBlock, &aBlock, &nBlock, 0);
+ if( rc==SQLITE_OK ){
+ rc = fts3TruncateNode(aBlock, nBlock, &block, zTerm, nTerm, &iBlock);
+ }
+ if( rc==SQLITE_OK ){
+ rc = fts3WriteSegment(p, iNewStart, block.a, block.n);
+ }
+ sqlite3_free(aBlock);
+ }
+
+ /* Variable iNewStart now contains the first valid leaf node. */
+ if( rc==SQLITE_OK && iNewStart ){
+ sqlite3_stmt *pDel = 0;
+ rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDel, 0);
+ if( rc==SQLITE_OK ){
+ sqlite3_bind_int64(pDel, 1, iOldStart);
+ sqlite3_bind_int64(pDel, 2, iNewStart-1);
+ sqlite3_step(pDel);
+ rc = sqlite3_reset(pDel);
+ }
+ }
+
+ if( rc==SQLITE_OK ){
+ sqlite3_stmt *pChomp = 0;
+ rc = fts3SqlStmt(p, SQL_CHOMP_SEGDIR, &pChomp, 0);
+ if( rc==SQLITE_OK ){
+ sqlite3_bind_int64(pChomp, 1, iNewStart);
+ sqlite3_bind_blob(pChomp, 2, root.a, root.n, SQLITE_STATIC);
+ sqlite3_bind_int64(pChomp, 3, iAbsLevel);
+ sqlite3_bind_int(pChomp, 4, iIdx);
+ sqlite3_step(pChomp);
+ rc = sqlite3_reset(pChomp);
+ }
+ }
+
+ sqlite3_free(root.a);
+ sqlite3_free(block.a);
+ return rc;
+}
+
+/*
+** This function is called after an incrmental-merge operation has run to
+** merge (or partially merge) two or more segments from absolute level
+** iAbsLevel.
+**
+** Each input segment is either removed from the db completely (if all of
+** its data was copied to the output segment by the incrmerge operation)
+** or modified in place so that it no longer contains those entries that
+** have been duplicated in the output segment.
+*/
+static int fts3IncrmergeChomp(
+ Fts3Table *p, /* FTS table handle */
+ sqlite3_int64 iAbsLevel, /* Absolute level containing segments */
+ Fts3MultiSegReader *pCsr, /* Chomp all segments opened by this cursor */
+ int *pnRem /* Number of segments not deleted */
+){
+ int i;
+ int nRem = 0;
+ int rc = SQLITE_OK;
+
+ for(i=pCsr->nSegment-1; i>=0 && rc==SQLITE_OK; i--){
+ Fts3SegReader *pSeg = 0;
+ int j;
+
+ /* Find the Fts3SegReader object with Fts3SegReader.iIdx==i. It is hiding
+ ** somewhere in the pCsr->apSegment[] array. */
+ for(j=0; ALWAYS(j<pCsr->nSegment); j++){
+ pSeg = pCsr->apSegment[j];
+ if( pSeg->iIdx==i ) break;
+ }
+ assert( j<pCsr->nSegment && pSeg->iIdx==i );
+
+ if( pSeg->aNode==0 ){
+ /* Seg-reader is at EOF. Remove the entire input segment. */
+ rc = fts3DeleteSegment(p, pSeg);
+ if( rc==SQLITE_OK ){
+ rc = fts3RemoveSegdirEntry(p, iAbsLevel, pSeg->iIdx);
+ }
+ *pnRem = 0;
+ }else{
+ /* The incremental merge did not copy all the data from this
+ ** segment to the upper level. The segment is modified in place
+ ** so that it contains no keys smaller than zTerm/nTerm. */
+ const char *zTerm = pSeg->zTerm;
+ int nTerm = pSeg->nTerm;
+ rc = fts3TruncateSegment(p, iAbsLevel, pSeg->iIdx, zTerm, nTerm);
+ nRem++;
+ }
+ }
+
+ if( rc==SQLITE_OK && nRem!=pCsr->nSegment ){
+ rc = fts3RepackSegdirLevel(p, iAbsLevel);
+ }
+
+ *pnRem = nRem;
+ return rc;
+}
+
+/*
+** Store an incr-merge hint in the database.
+*/
+static int fts3IncrmergeHintStore(Fts3Table *p, Blob *pHint){
+ sqlite3_stmt *pReplace = 0;
+ int rc; /* Return code */
+
+ rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pReplace, 0);
+ if( rc==SQLITE_OK ){
+ sqlite3_bind_int(pReplace, 1, FTS_STAT_INCRMERGEHINT);
+ sqlite3_bind_blob(pReplace, 2, pHint->a, pHint->n, SQLITE_STATIC);
+ sqlite3_step(pReplace);
+ rc = sqlite3_reset(pReplace);
+ }
+
+ return rc;
+}
+
+/*
+** Load an incr-merge hint from the database. The incr-merge hint, if one
+** exists, is stored in the rowid==1 row of the %_stat table.
+**
+** If successful, populate blob *pHint with the value read from the %_stat
+** table and return SQLITE_OK. Otherwise, if an error occurs, return an
+** SQLite error code.
+*/
+static int fts3IncrmergeHintLoad(Fts3Table *p, Blob *pHint){
+ sqlite3_stmt *pSelect = 0;
+ int rc;
+
+ pHint->n = 0;
+ rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pSelect, 0);
+ if( rc==SQLITE_OK ){
+ int rc2;
+ sqlite3_bind_int(pSelect, 1, FTS_STAT_INCRMERGEHINT);
+ if( SQLITE_ROW==sqlite3_step(pSelect) ){
+ const char *aHint = sqlite3_column_blob(pSelect, 0);
+ int nHint = sqlite3_column_bytes(pSelect, 0);
+ if( aHint ){
+ blobGrowBuffer(pHint, nHint, &rc);
+ if( rc==SQLITE_OK ){
+ memcpy(pHint->a, aHint, nHint);
+ pHint->n = nHint;
+ }
+ }
+ }
+ rc2 = sqlite3_reset(pSelect);
+ if( rc==SQLITE_OK ) rc = rc2;
+ }
+
+ return rc;
+}
+
+/*
+** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
+** Otherwise, append an entry to the hint stored in blob *pHint. Each entry
+** consists of two varints, the absolute level number of the input segments
+** and the number of input segments.
+**
+** If successful, leave *pRc set to SQLITE_OK and return. If an error occurs,
+** set *pRc to an SQLite error code before returning.
+*/
+static void fts3IncrmergeHintPush(
+ Blob *pHint, /* Hint blob to append to */
+ i64 iAbsLevel, /* First varint to store in hint */
+ int nInput, /* Second varint to store in hint */
+ int *pRc /* IN/OUT: Error code */
+){
+ blobGrowBuffer(pHint, pHint->n + 2*FTS3_VARINT_MAX, pRc);
+ if( *pRc==SQLITE_OK ){
+ pHint->n += sqlite3Fts3PutVarint(&pHint->a[pHint->n], iAbsLevel);
+ pHint->n += sqlite3Fts3PutVarint(&pHint->a[pHint->n], (i64)nInput);
+ }
+}
+
+/*
+** Read the last entry (most recently pushed) from the hint blob *pHint
+** and then remove the entry. Write the two values read to *piAbsLevel and
+** *pnInput before returning.
+**
+** If no error occurs, return SQLITE_OK. If the hint blob in *pHint does
+** not contain at least two valid varints, return SQLITE_CORRUPT_VTAB.
+*/
+static int fts3IncrmergeHintPop(Blob *pHint, i64 *piAbsLevel, int *pnInput){
+ const int nHint = pHint->n;
+ int i;
+
+ i = pHint->n-2;
+ while( i>0 && (pHint->a[i-1] & 0x80) ) i--;
+ while( i>0 && (pHint->a[i-1] & 0x80) ) i--;
+
+ pHint->n = i;
+ i += sqlite3Fts3GetVarint(&pHint->a[i], piAbsLevel);
+ i += sqlite3Fts3GetVarint32(&pHint->a[i], pnInput);
+ if( i!=nHint ) return SQLITE_CORRUPT_VTAB;
+
+ return SQLITE_OK;
+}
+
+
+/*
+** Attempt an incremental merge that writes nMerge leaf blocks.
+**
+** Incremental merges happen nMin segments at a time. The two
+** segments to be merged are the nMin oldest segments (the ones with
+** the smallest indexes) in the highest level that contains at least
+** nMin segments. Multiple merges might occur in an attempt to write the
+** quota of nMerge leaf blocks.
+*/
+int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){
+ int rc; /* Return code */
+ int nRem = nMerge; /* Number of leaf pages yet to be written */
+ Fts3MultiSegReader *pCsr; /* Cursor used to read input data */
+ Fts3SegFilter *pFilter; /* Filter used with cursor pCsr */
+ IncrmergeWriter *pWriter; /* Writer object */
+ int nSeg = 0; /* Number of input segments */
+ sqlite3_int64 iAbsLevel = 0; /* Absolute level number to work on */
+ Blob hint = {0, 0, 0}; /* Hint read from %_stat table */
+ int bDirtyHint = 0; /* True if blob 'hint' has been modified */
+
+ /* Allocate space for the cursor, filter and writer objects */
+ const int nAlloc = sizeof(*pCsr) + sizeof(*pFilter) + sizeof(*pWriter);
+ pWriter = (IncrmergeWriter *)sqlite3_malloc(nAlloc);
+ if( !pWriter ) return SQLITE_NOMEM;
+ pFilter = (Fts3SegFilter *)&pWriter[1];
+ pCsr = (Fts3MultiSegReader *)&pFilter[1];
+
+ rc = fts3IncrmergeHintLoad(p, &hint);
+ while( rc==SQLITE_OK && nRem>0 ){
+ const i64 nMod = FTS3_SEGDIR_MAXLEVEL * p->nIndex;
+ sqlite3_stmt *pFindLevel = 0; /* SQL used to determine iAbsLevel */
+ int bUseHint = 0; /* True if attempting to append */
+
+ /* Search the %_segdir table for the absolute level with the smallest
+ ** relative level number that contains at least nMin segments, if any.
+ ** If one is found, set iAbsLevel to the absolute level number and
+ ** nSeg to nMin. If no level with at least nMin segments can be found,
+ ** set nSeg to -1.
+ */
+ rc = fts3SqlStmt(p, SQL_FIND_MERGE_LEVEL, &pFindLevel, 0);
+ sqlite3_bind_int(pFindLevel, 1, nMin);
+ if( sqlite3_step(pFindLevel)==SQLITE_ROW ){
+ iAbsLevel = sqlite3_column_int64(pFindLevel, 0);
+ nSeg = nMin;
+ }else{
+ nSeg = -1;
+ }
+ rc = sqlite3_reset(pFindLevel);
+
+ /* If the hint read from the %_stat table is not empty, check if the
+ ** last entry in it specifies a relative level smaller than or equal
+ ** to the level identified by the block above (if any). If so, this
+ ** iteration of the loop will work on merging at the hinted level.
+ */
+ if( rc==SQLITE_OK && hint.n ){
+ int nHint = hint.n;
+ sqlite3_int64 iHintAbsLevel = 0; /* Hint level */
+ int nHintSeg = 0; /* Hint number of segments */
+
+ rc = fts3IncrmergeHintPop(&hint, &iHintAbsLevel, &nHintSeg);
+ if( nSeg<0 || (iAbsLevel % nMod) >= (iHintAbsLevel % nMod) ){
+ iAbsLevel = iHintAbsLevel;
+ nSeg = nHintSeg;
+ bUseHint = 1;
+ bDirtyHint = 1;
+ }else{
+ /* This undoes the effect of the HintPop() above - so that no entry
+ ** is removed from the hint blob. */
+ hint.n = nHint;
+ }
+ }
+
+ /* If nSeg is less that zero, then there is no level with at least
+ ** nMin segments and no hint in the %_stat table. No work to do.
+ ** Exit early in this case. */
+ if( nSeg<0 ) break;
+
+ /* Open a cursor to iterate through the contents of the oldest nSeg
+ ** indexes of absolute level iAbsLevel. If this cursor is opened using
+ ** the 'hint' parameters, it is possible that there are less than nSeg
+ ** segments available in level iAbsLevel. In this case, no work is
+ ** done on iAbsLevel - fall through to the next iteration of the loop
+ ** to start work on some other level. */
+ memset(pWriter, 0, nAlloc);
+ pFilter->flags = FTS3_SEGMENT_REQUIRE_POS;
+ if( rc==SQLITE_OK ){
+ rc = fts3IncrmergeCsr(p, iAbsLevel, nSeg, pCsr);
+ }
+ if( SQLITE_OK==rc && pCsr->nSegment==nSeg
+ && SQLITE_OK==(rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter))
+ && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pCsr))
+ ){
+ int iIdx = 0; /* Largest idx in level (iAbsLevel+1) */
+ rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx);
+ if( rc==SQLITE_OK ){
+ if( bUseHint && iIdx>0 ){
+ const char *zKey = pCsr->zTerm;
+ int nKey = pCsr->nTerm;
+ rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter);
+ }else{
+ rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter);
+ }
+ }
+
+ if( rc==SQLITE_OK && pWriter->nLeafEst ){
+ fts3LogMerge(nSeg, iAbsLevel);
+ do {
+ rc = fts3IncrmergeAppend(p, pWriter, pCsr);
+ if( rc==SQLITE_OK ) rc = sqlite3Fts3SegReaderStep(p, pCsr);
+ if( pWriter->nWork>=nRem && rc==SQLITE_ROW ) rc = SQLITE_OK;
+ }while( rc==SQLITE_ROW );
+
+ /* Update or delete the input segments */
+ if( rc==SQLITE_OK ){
+ nRem -= (1 + pWriter->nWork);
+ rc = fts3IncrmergeChomp(p, iAbsLevel, pCsr, &nSeg);
+ if( nSeg!=0 ){
+ bDirtyHint = 1;
+ fts3IncrmergeHintPush(&hint, iAbsLevel, nSeg, &rc);
+ }
+ }
+ }
+
+ fts3IncrmergeRelease(p, pWriter, &rc);
+ }
+
+ sqlite3Fts3SegReaderFinish(pCsr);
+ }
+
+ /* Write the hint values into the %_stat table for the next incr-merger */
+ if( bDirtyHint && rc==SQLITE_OK ){
+ rc = fts3IncrmergeHintStore(p, &hint);
+ }
+
+ sqlite3_free(pWriter);
+ sqlite3_free(hint.a);
+ return rc;
+}
+
+/*
+** Convert the text beginning at *pz into an integer and return
+** its value. Advance *pz to point to the first character past
+** the integer.
+*/
+static int fts3Getint(const char **pz){
+ const char *z = *pz;
+ int i = 0;
+ while( (*z)>='0' && (*z)<='9' ) i = 10*i + *(z++) - '0';
+ *pz = z;
+ return i;
+}
+
+/*
+** Process statements of the form:
+**
+** INSERT INTO table(table) VALUES('merge=A,B');
+**
+** A and B are integers that decode to be the number of leaf pages
+** written for the merge, and the minimum number of segments on a level
+** before it will be selected for a merge, respectively.
+*/
+static int fts3DoIncrmerge(
+ Fts3Table *p, /* FTS3 table handle */
+ const char *zParam /* Nul-terminated string containing "A,B" */
+){
+ int rc;
+ int nMin = (FTS3_MERGE_COUNT / 2);
+ int nMerge = 0;
+ const char *z = zParam;
+
+ /* Read the first integer value */
+ nMerge = fts3Getint(&z);
+
+ /* If the first integer value is followed by a ',', read the second
+ ** integer value. */
+ if( z[0]==',' && z[1]!='\0' ){
+ z++;
+ nMin = fts3Getint(&z);
+ }
+
+ if( z[0]!='\0' || nMin<2 ){
+ rc = SQLITE_ERROR;
+ }else{
+ rc = SQLITE_OK;
+ if( !p->bHasStat ){
+ assert( p->bFts4==0 );
+ sqlite3Fts3CreateStatTable(&rc, p);
+ }
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts3Incrmerge(p, nMerge, nMin);
+ }
+ sqlite3Fts3SegmentsClose(p);
+ }
+ return rc;
+}
+
+/*
+** Process statements of the form:
+**
+** INSERT INTO table(table) VALUES('automerge=X');
+**
+** where X is an integer. X==0 means to turn automerge off. X!=0 means
+** turn it on. The setting is persistent.
+*/
+static int fts3DoAutoincrmerge(
+ Fts3Table *p, /* FTS3 table handle */
+ const char *zParam /* Nul-terminated string containing boolean */
+){
+ int rc = SQLITE_OK;
+ sqlite3_stmt *pStmt = 0;
+ p->bAutoincrmerge = fts3Getint(&zParam)!=0;
+ if( !p->bHasStat ){
+ assert( p->bFts4==0 );
+ sqlite3Fts3CreateStatTable(&rc, p);
+ if( rc ) return rc;
+ }
+ rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0);
+ if( rc ) return rc;;
+ sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE);
+ sqlite3_bind_int(pStmt, 2, p->bAutoincrmerge);
+ sqlite3_step(pStmt);
+ rc = sqlite3_reset(pStmt);
+ return rc;
+}
+
+/*
+** Return a 64-bit checksum for the FTS index entry specified by the
+** arguments to this function.
+*/
+static u64 fts3ChecksumEntry(
+ const char *zTerm, /* Pointer to buffer containing term */
+ int nTerm, /* Size of zTerm in bytes */
+ int iLangid, /* Language id for current row */
+ int iIndex, /* Index (0..Fts3Table.nIndex-1) */
+ i64 iDocid, /* Docid for current row. */
+ int iCol, /* Column number */
+ int iPos /* Position */
+){
+ int i;
+ u64 ret = (u64)iDocid;
+
+ ret += (ret<<3) + iLangid;
+ ret += (ret<<3) + iIndex;
+ ret += (ret<<3) + iCol;
+ ret += (ret<<3) + iPos;
+ for(i=0; i<nTerm; i++) ret += (ret<<3) + zTerm[i];
+
+ return ret;
+}
+
+/*
+** Return a checksum of all entries in the FTS index that correspond to
+** language id iLangid. The checksum is calculated by XORing the checksums
+** of each individual entry (see fts3ChecksumEntry()) together.
+**
+** If successful, the checksum value is returned and *pRc set to SQLITE_OK.
+** Otherwise, if an error occurs, *pRc is set to an SQLite error code. The
+** return value is undefined in this case.
+*/
+static u64 fts3ChecksumIndex(
+ Fts3Table *p, /* FTS3 table handle */
+ int iLangid, /* Language id to return cksum for */
+ int iIndex, /* Index to cksum (0..p->nIndex-1) */
+ int *pRc /* OUT: Return code */
+){
+ Fts3SegFilter filter;
+ Fts3MultiSegReader csr;
+ int rc;
+ u64 cksum = 0;
+
+ assert( *pRc==SQLITE_OK );
+
+ memset(&filter, 0, sizeof(filter));
+ memset(&csr, 0, sizeof(csr));
+ filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY;
+ filter.flags |= FTS3_SEGMENT_SCAN;
+
+ rc = sqlite3Fts3SegReaderCursor(
+ p, iLangid, iIndex, FTS3_SEGCURSOR_ALL, 0, 0, 0, 1,&csr
+ );
+ if( rc==SQLITE_OK ){
+ rc = sqlite3Fts3SegReaderStart(p, &csr, &filter);
+ }
+
+ if( rc==SQLITE_OK ){
+ while( SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, &csr)) ){
+ char *pCsr = csr.aDoclist;
+ char *pEnd = &pCsr[csr.nDoclist];
+
+ i64 iDocid = 0;
+ i64 iCol = 0;
+ i64 iPos = 0;
+
+ pCsr += sqlite3Fts3GetVarint(pCsr, &iDocid);
+ while( pCsr<pEnd ){
+ i64 iVal = 0;
+ pCsr += sqlite3Fts3GetVarint(pCsr, &iVal);
+ if( pCsr<pEnd ){
+ if( iVal==0 || iVal==1 ){
+ iCol = 0;
+ iPos = 0;
+ if( iVal ){
+ pCsr += sqlite3Fts3GetVarint(pCsr, &iCol);
+ }else{
+ pCsr += sqlite3Fts3GetVarint(pCsr, &iVal);
+ iDocid += iVal;
+ }
+ }else{
+ iPos += (iVal - 2);
+ cksum = cksum ^ fts3ChecksumEntry(
+ csr.zTerm, csr.nTerm, iLangid, iIndex, iDocid,
+ (int)iCol, (int)iPos
+ );
+ }
+ }
+ }
+ }
+ }
+ sqlite3Fts3SegReaderFinish(&csr);
+
+ *pRc = rc;
+ return cksum;
+}
+
+/*
+** Check if the contents of the FTS index match the current contents of the
+** content table. If no error occurs and the contents do match, set *pbOk
+** to true and return SQLITE_OK. Or if the contents do not match, set *pbOk
+** to false before returning.
+**
+** If an error occurs (e.g. an OOM or IO error), return an SQLite error
+** code. The final value of *pbOk is undefined in this case.
+*/
+static int fts3IntegrityCheck(Fts3Table *p, int *pbOk){
+ int rc = SQLITE_OK; /* Return code */
+ u64 cksum1 = 0; /* Checksum based on FTS index contents */
+ u64 cksum2 = 0; /* Checksum based on %_content contents */
+ sqlite3_stmt *pAllLangid = 0; /* Statement to return all language-ids */
+
+ /* This block calculates the checksum according to the FTS index. */
+ rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
+ if( rc==SQLITE_OK ){
+ int rc2;
+ sqlite3_bind_int(pAllLangid, 1, p->nIndex);
+ while( rc==SQLITE_OK && sqlite3_step(pAllLangid)==SQLITE_ROW ){
+ int iLangid = sqlite3_column_int(pAllLangid, 0);
+ int i;
+ for(i=0; i<p->nIndex; i++){
+ cksum1 = cksum1 ^ fts3ChecksumIndex(p, iLangid, i, &rc);
+ }
+ }
+ rc2 = sqlite3_reset(pAllLangid);
+ if( rc==SQLITE_OK ) rc = rc2;
+ }
+
+ /* This block calculates the checksum according to the %_content table */
+ rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0);
+ if( rc==SQLITE_OK ){
+ sqlite3_tokenizer_module const *pModule = p->pTokenizer->pModule;
+ sqlite3_stmt *pStmt = 0;
+ char *zSql;
+
+ zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist);
+ if( !zSql ){
+ rc = SQLITE_NOMEM;
+ }else{
+ rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
+ sqlite3_free(zSql);
+ }
+
+ while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
+ i64 iDocid = sqlite3_column_int64(pStmt, 0);
+ int iLang = langidFromSelect(p, pStmt);
+ int iCol;
+
+ for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
+ const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1);
+ int nText = sqlite3_column_bytes(pStmt, iCol+1);
+ sqlite3_tokenizer_cursor *pT = 0;
+
+ rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText, &pT);
+ while( rc==SQLITE_OK ){
+ char const *zToken; /* Buffer containing token */
+ int nToken = 0; /* Number of bytes in token */
+ int iDum1 = 0, iDum2 = 0; /* Dummy variables */
+ int iPos = 0; /* Position of token in zText */
+
+ rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos);
+ if( rc==SQLITE_OK ){
+ int i;
+ cksum2 = cksum2 ^ fts3ChecksumEntry(
+ zToken, nToken, iLang, 0, iDocid, iCol, iPos
+ );
+ for(i=1; i<p->nIndex; i++){
+ if( p->aIndex[i].nPrefix<=nToken ){
+ cksum2 = cksum2 ^ fts3ChecksumEntry(
+ zToken, p->aIndex[i].nPrefix, iLang, i, iDocid, iCol, iPos
+ );
+ }
+ }
+ }
+ }
+ if( pT ) pModule->xClose(pT);
+ if( rc==SQLITE_DONE ) rc = SQLITE_OK;
+ }
+ }
+
+ sqlite3_finalize(pStmt);
+ }
+
+ *pbOk = (cksum1==cksum2);
+ return rc;
+}
+
+/*
+** Run the integrity-check. If no error occurs and the current contents of
+** the FTS index are correct, return SQLITE_OK. Or, if the contents of the
+** FTS index are incorrect, return SQLITE_CORRUPT_VTAB.
+**
+** Or, if an error (e.g. an OOM or IO error) occurs, return an SQLite
+** error code.
+**
+** The integrity-check works as follows. For each token and indexed token
+** prefix in the document set, a 64-bit checksum is calculated (by code
+** in fts3ChecksumEntry()) based on the following:
+**
+** + The index number (0 for the main index, 1 for the first prefix
+** index etc.),
+** + The token (or token prefix) text itself,
+** + The language-id of the row it appears in,
+** + The docid of the row it appears in,
+** + The column it appears in, and
+** + The tokens position within that column.
+**
+** The checksums for all entries in the index are XORed together to create
+** a single checksum for the entire index.
+**
+** The integrity-check code calculates the same checksum in two ways:
+**
+** 1. By scanning the contents of the FTS index, and
+** 2. By scanning and tokenizing the content table.
+**
+** If the two checksums are identical, the integrity-check is deemed to have
+** passed.
+*/
+static int fts3DoIntegrityCheck(
+ Fts3Table *p /* FTS3 table handle */
+){
+ int rc;
+ int bOk = 0;
+ rc = fts3IntegrityCheck(p, &bOk);
+ if( rc==SQLITE_OK && bOk==0 ) rc = SQLITE_CORRUPT_VTAB;
+ return rc;
+}
+
+/*
** Handle a 'special' INSERT of the form:
**
** "INSERT INTO tbl(tbl) VALUES(<expr>)"
@@ -2928,6 +5026,14 @@ static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){
return SQLITE_NOMEM;
}else if( nVal==8 && 0==sqlite3_strnicmp(zVal, "optimize", 8) ){
rc = fts3DoOptimize(p, 0);
+ }else if( nVal==7 && 0==sqlite3_strnicmp(zVal, "rebuild", 7) ){
+ rc = fts3DoRebuild(p);
+ }else if( nVal==15 && 0==sqlite3_strnicmp(zVal, "integrity-check", 15) ){
+ rc = fts3DoIntegrityCheck(p);
+ }else if( nVal>6 && 0==sqlite3_strnicmp(zVal, "merge=", 6) ){
+ rc = fts3DoIncrmerge(p, &zVal[6]);
+ }else if( nVal>10 && 0==sqlite3_strnicmp(zVal, "automerge=", 10) ){
+ rc = fts3DoAutoincrmerge(p, &zVal[10]);
#ifdef SQLITE_TEST
}else if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){
p->nNodeSize = atoi(&zVal[9]);
@@ -2943,6 +5049,7 @@ static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){
return rc;
}
+#ifndef SQLITE_DISABLE_FTS4_DEFERRED
/*
** Delete all cached deferred doclists. Deferred doclists are cached
** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function.
@@ -2996,18 +5103,18 @@ int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){
const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1);
sqlite3_tokenizer_cursor *pTC = 0;
- rc = pModule->xOpen(pT, zText, -1, &pTC);
+ rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC);
while( rc==SQLITE_OK ){
char const *zToken; /* Buffer containing token */
- int nToken; /* Number of bytes in token */
- int iDum1, iDum2; /* Dummy variables */
- int iPos; /* Position of token in zText */
+ int nToken = 0; /* Number of bytes in token */
+ int iDum1 = 0, iDum2 = 0; /* Dummy variables */
+ int iPos = 0; /* Position of token in zText */
- pTC->pTokenizer = pT;
rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos);
for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
Fts3PhraseToken *pPT = pDef->pToken;
if( (pDef->iCol>=p->nColumn || pDef->iCol==i)
+ && (pPT->bFirst==0 || iPos==0)
&& (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken))
&& (0==memcmp(zToken, pPT->z, pPT->n))
){
@@ -3080,6 +5187,7 @@ int sqlite3Fts3DeferToken(
return SQLITE_OK;
}
+#endif
/*
** SQLite value pRowid contains the rowid of a row that may or may not be
@@ -3089,26 +5197,32 @@ int sqlite3Fts3DeferToken(
static int fts3DeleteByRowid(
Fts3Table *p,
sqlite3_value *pRowid,
- int *pnDoc,
+ int *pnChng, /* IN/OUT: Decrement if row is deleted */
u32 *aSzDel
){
- int isEmpty = 0;
- int rc = fts3IsEmpty(p, pRowid, &isEmpty);
- if( rc==SQLITE_OK ){
- if( isEmpty ){
- /* Deleting this row means the whole table is empty. In this case
- ** delete the contents of all three tables and throw away any
- ** data in the pendingTerms hash table. */
- rc = fts3DeleteAll(p);
- *pnDoc = *pnDoc - 1;
- }else{
- sqlite3_int64 iRemove = sqlite3_value_int64(pRowid);
- rc = fts3PendingTermsDocid(p, iRemove);
- fts3DeleteTerms(&rc, p, pRowid, aSzDel);
- fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid);
- if( sqlite3_changes(p->db) ) *pnDoc = *pnDoc - 1;
- if( p->bHasDocsize ){
- fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid);
+ int rc = SQLITE_OK; /* Return code */
+ int bFound = 0; /* True if *pRowid really is in the table */
+
+ fts3DeleteTerms(&rc, p, pRowid, aSzDel, &bFound);
+ if( bFound && rc==SQLITE_OK ){
+ int isEmpty = 0; /* Deleting *pRowid leaves the table empty */
+ rc = fts3IsEmpty(p, pRowid, &isEmpty);
+ if( rc==SQLITE_OK ){
+ if( isEmpty ){
+ /* Deleting this row means the whole table is empty. In this case
+ ** delete the contents of all three tables and throw away any
+ ** data in the pendingTerms hash table. */
+ rc = fts3DeleteAll(p, 1);
+ *pnChng = 0;
+ memset(aSzDel, 0, sizeof(u32) * (p->nColumn+1) * 2);
+ }else{
+ *pnChng = *pnChng - 1;
+ if( p->zContentTbl==0 ){
+ fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid);
+ }
+ if( p->bHasDocsize ){
+ fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid);
+ }
}
}
}
@@ -3118,7 +5232,16 @@ static int fts3DeleteByRowid(
/*
** This function does the work for the xUpdate method of FTS3 virtual
-** tables.
+** tables. The schema of the virtual table being:
+**
+** CREATE TABLE <table name>(
+** <user columns>,
+** <table name> HIDDEN,
+** docid HIDDEN,
+** <langid> HIDDEN
+** );
+**
+**
*/
int sqlite3Fts3UpdateMethod(
sqlite3_vtab *pVtab, /* FTS3 vtab object */
@@ -3129,13 +5252,16 @@ int sqlite3Fts3UpdateMethod(
Fts3Table *p = (Fts3Table *)pVtab;
int rc = SQLITE_OK; /* Return Code */
int isRemove = 0; /* True for an UPDATE or DELETE */
- sqlite3_int64 iRemove = 0; /* Rowid removed by UPDATE or DELETE */
u32 *aSzIns = 0; /* Sizes of inserted documents */
- u32 *aSzDel; /* Sizes of deleted documents */
+ u32 *aSzDel = 0; /* Sizes of deleted documents */
int nChng = 0; /* Net change in number of documents */
int bInsertDone = 0;
assert( p->pSegments==0 );
+ assert(
+ nArg==1 /* DELETE operations */
+ || nArg==(2 + p->nColumn + 3) /* INSERT or UPDATE operations */
+ );
/* Check for a "special" INSERT operation. One of the form:
**
@@ -3149,14 +5275,19 @@ int sqlite3Fts3UpdateMethod(
goto update_out;
}
+ if( nArg>1 && sqlite3_value_int(apVal[2 + p->nColumn + 2])<0 ){
+ rc = SQLITE_CONSTRAINT;
+ goto update_out;
+ }
+
/* Allocate space to hold the change in document sizes */
- aSzIns = sqlite3_malloc( sizeof(aSzIns[0])*(p->nColumn+1)*2 );
- if( aSzIns==0 ){
+ aSzDel = sqlite3_malloc( sizeof(aSzDel[0])*(p->nColumn+1)*2 );
+ if( aSzDel==0 ){
rc = SQLITE_NOMEM;
goto update_out;
}
- aSzDel = &aSzIns[p->nColumn+1];
- memset(aSzIns, 0, sizeof(aSzIns[0])*(p->nColumn+1)*2);
+ aSzIns = &aSzDel[p->nColumn+1];
+ memset(aSzDel, 0, sizeof(aSzDel[0])*(p->nColumn+1)*2);
/* If this is an INSERT operation, or an UPDATE that modifies the rowid
** value, then this operation requires constraint handling.
@@ -3167,7 +5298,7 @@ int sqlite3Fts3UpdateMethod(
** detect the conflict and return SQLITE_CONSTRAINT before beginning to
** modify the database file.
*/
- if( nArg>1 ){
+ if( nArg>1 && p->zContentTbl==0 ){
/* Find the value object that holds the new rowid value. */
sqlite3_value *pNewRowid = apVal[3+p->nColumn];
if( sqlite3_value_type(pNewRowid)==SQLITE_NULL ){
@@ -3212,20 +5343,23 @@ int sqlite3Fts3UpdateMethod(
assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER );
rc = fts3DeleteByRowid(p, apVal[0], &nChng, aSzDel);
isRemove = 1;
- iRemove = sqlite3_value_int64(apVal[0]);
}
/* If this is an INSERT or UPDATE operation, insert the new record. */
if( nArg>1 && rc==SQLITE_OK ){
+ int iLangid = sqlite3_value_int(apVal[2 + p->nColumn + 2]);
if( bInsertDone==0 ){
rc = fts3InsertData(p, apVal, pRowid);
- if( rc==SQLITE_CONSTRAINT ) rc = SQLITE_CORRUPT_VTAB;
+ if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){
+ rc = FTS_CORRUPT_VTAB;
+ }
}
- if( rc==SQLITE_OK && (!isRemove || *pRowid!=iRemove) ){
- rc = fts3PendingTermsDocid(p, *pRowid);
+ if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){
+ rc = fts3PendingTermsDocid(p, iLangid, *pRowid);
}
if( rc==SQLITE_OK ){
- rc = fts3InsertTerms(p, apVal, aSzIns);
+ assert( p->iPrevDocid==*pRowid );
+ rc = fts3InsertTerms(p, iLangid, apVal, aSzIns);
}
if( p->bHasDocsize ){
fts3InsertDocsize(&rc, p, aSzIns);
@@ -3233,12 +5367,12 @@ int sqlite3Fts3UpdateMethod(
nChng++;
}
- if( p->bHasStat ){
+ if( p->bFts4 ){
fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nChng);
}
update_out:
- sqlite3_free(aSzIns);
+ sqlite3_free(aSzDel);
sqlite3Fts3SegmentsClose(p);
return rc;
}