1 files changed, 395 insertions, 176 deletions
diff --git a/storage/innobase/fts/fts0que.cc b/storage/innobase/fts/fts0que.cc
index 58b429a8406..5c757b4f176 100644
--- a/storage/innobase/fts/fts0que.cc
+++ b/storage/innobase/fts/fts0que.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -24,6 +24,7 @@ Created 2007/03/27 Sunny Bains
 Completed 2011/7/10 Sunny and Jimmy Yang
 *******************************************************/
 
+#include "dict0dict.h" /* dict_table_get_n_rows() */
 #include "ut0rbt.h"
 #include "row0sel.h"
 #include "fts0fts.h"
@@ -57,15 +58,10 @@ static const double FTS_NORMALIZE_COEFF = 0.0115F;
 /* For parsing the search phrase */
 static const char* FTS_PHRASE_DELIMITER = "\t ";
 
-typedef struct fts_match_struct fts_match_t;
-typedef	struct fts_query_struct fts_query_t;
-typedef struct fts_phrase_struct fts_phrase_t;
-typedef struct fts_select_struct fts_select_t;
-typedef struct fts_doc_freq_struct fts_doc_freq_t;
-typedef struct fts_word_freq_struct fts_word_freq_t;
+struct fts_word_freq_t;
 
 /** State of an FTS query. */
-struct fts_query_struct {
+struct fts_query_t {
 	mem_heap_t*	heap;		/*!< Heap to use for allocations */
 
 	trx_t*		trx;		/*!< The query transaction */
@@ -126,11 +122,11 @@ struct fts_query_struct {
 					position info for each matched word
 					in the word list */
 
-	ulint		total_docs;	/*!< The total number of documents */
+	ib_uint64_t	total_docs;	/*!< The total number of documents */
 
 	ulint		total_words;	/*!< The total number of words */
 
-	ulint		error;		/*!< Error code if any, that is
+	dberr_t		error;		/*!< Error code if any, that is
 					encountered during query processing */
 
 	ib_rbt_t*	word_freqs;	/*!< RB tree of word frequencies per
@@ -144,7 +140,7 @@ struct fts_query_struct {
 
 /** For phrase matching, first we collect the documents and the positions
 then we match. */
-struct fts_match_struct {
+struct fts_match_t {
 	doc_id_t	doc_id;		/*!< Document id */
 
 	ulint		start;		/*!< Start the phrase match from
@@ -158,7 +154,7 @@ struct fts_match_struct {
 /** For matching tokens in a phrase search. We use this data structure in
 the callback that determines whether a document should be accepted or
 rejected for a phrase search. */
-struct fts_select_struct {
+struct fts_select_t {
 	doc_id_t	doc_id;		/*!< The document id to match */
 
 	ulint		min_pos;	/*!< For found to be TRUE at least
@@ -173,8 +169,23 @@ struct fts_select_struct {
 					the FTS index */
 };
 
+/** structure defines a set of ranges for original documents, each of which
+has a minimum position and maximum position. Text in such range should
+contain all words in the proximity search. We will need to count the
+words in such range to make sure it is less than the specified distance
+of the proximity search */
+struct fts_proximity_t {
+	ulint		n_pos;		/*!< number of position set, defines
+					a range (min to max) containing all
+					matching words */
+	ulint*		min_pos;	/*!< the minimum position (in bytes)
+					of the range */
+	ulint*		max_pos;	/*!< the maximum position (in bytes)
+					of the range */
+};
+
 /** The match positions and tokesn to match */
-struct fts_phrase_struct {
+struct fts_phrase_t {
 	ibool		found;		/*!< Match result */
 
 	const fts_match_t*
@@ -188,23 +199,26 @@ struct fts_phrase_struct {
 	CHARSET_INFO*	charset;	/*!< Phrase match charset */
 	mem_heap_t*     heap;		/*!< Heap for word processing */
 	ulint		zip_size;	/*!< row zip size */
+	fts_proximity_t*proximity_pos;	/*!< position info for proximity
+					search verification. Records the min
+					and max position of words matched */
 };
 
 /** For storing the frequncy of a word/term in a document */
-struct fts_doc_freq_struct {
+struct fts_doc_freq_t {
 	doc_id_t	doc_id;		/*!< Document id */
 	ulint		freq;		/*!< Frequency of a word in a document */
 };
 
 /** To determine the word frequency per document. */
-struct fts_word_freq_struct {
+struct fts_word_freq_t {
 	byte*		word;		/*!< Word for which we need the freq,
 					it's allocated on the query heap */
 
 	ib_rbt_t*	doc_freqs;	/*!< RB Tree for storing per document
 					word frequencies. The elements are
 					of type fts_doc_freq_t */
-	ulint		doc_count;	/*!< Total number of documents that
+	ib_uint64_t	doc_count;	/*!< Total number of documents that
 					contain this word */
 	double		idf;		/*!< Inverse document frequency */
 };
@@ -257,37 +271,46 @@ search arguments to search the document again, thus "expand"
 the search result set.
 @return DB_SUCCESS if success, otherwise the error code */
 static
-ulint
+dberr_t
 fts_expand_query(
 /*=============*/
 	dict_index_t*	index,		/*!< in: FTS index to search */
-	fts_query_t*	query);		/*!< in: query result, to be freed
+	fts_query_t*	query)		/*!< in: query result, to be freed
 					by the client */
+	__attribute__((nonnull, warn_unused_result));
 /*************************************************************//**
 This function finds documents that contain all words in a
 phrase or proximity search. And if proximity search, verify
-the words are close to each other enough, as in specified distance.
+the words are close enough to each other, as in specified distance.
 This function is called for phrase and proximity search.
 @return TRUE if documents are found, FALSE if otherwise */
 static
 ibool
-fts_check_phrase_proximity(
-/*=======================*/
-	fts_query_t*	query,		/*!< in:  query instance */
+fts_phrase_or_proximity_search(
+/*===========================*/
+	fts_query_t*	query,		/*!< in/out:  query instance
+					query->doc_ids might be instantiated
+					with qualified doc IDs */
 	ib_vector_t*	tokens);	/*!< in: Tokens contain words */
 /*************************************************************//**
-This function check the words in result document are close to each
-other enough (within proximity rnage). This is used for proximity search.
-@return TRUE if words are close to each other, FALSE if otherwise */
+This function checks whether words in result documents are close to
+each other (within proximity range as specified by "distance").
+If "distance" is MAX_ULINT, then it will find all combinations of
+positions of matching words and store min and max positions
+in the "qualified_pos" for later verification.
+@return true if words are close to each other, false if otherwise */
 static
-ulint
-fts_proximity_check_position(
-/*=========================*/
-	fts_match_t**	match,		/*!< in: query instance */
-	ulint		num_match,	/*!< in: number of matching
-					items */
-	ulint		distance);	/*!< in: distance value
-					for proximity search */
+bool
+fts_proximity_get_positions(
+/*========================*/
+	fts_match_t**		match,		/*!< in: query instance */
+	ulint			num_match,	/*!< in: number of matching
+						items */
+	ulint			distance,	/*!< in: distance value
+						for proximity search */
+	fts_proximity_t*	qualified_pos);	/*!< out: the position info
+						records ranges containing
+						all matching words. */
 #if 0
 /********************************************************************
 Get the total number of words in a documents. */
@@ -954,8 +977,8 @@ cont_search:
 /*****************************************************************//**
 Set difference.
 @return DB_SUCCESS if all went well */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_query_difference(
 /*=================*/
 	fts_query_t*		query,	/*!< in: query instance */
@@ -993,15 +1016,21 @@ fts_query_difference(
 		ut_a(index_cache != NULL);
 
 		/* Search the cache for a matching word first. */
-		nodes = fts_cache_find_word(index_cache, token);
+		if (query->cur_node->term.wildcard
+		    && query->flags != FTS_PROXIMITY
+		    && query->flags != FTS_PHRASE) {
+			fts_cache_find_wildcard(query, index_cache, token);
+		} else {
+			nodes = fts_cache_find_word(index_cache, token);
 
-		for (i = 0; nodes && i < ib_vector_size(nodes); ++i) {
-			const fts_node_t*	node;
+			for (i = 0; nodes && i < ib_vector_size(nodes); ++i) {
+				const fts_node_t*	node;
 
-			node = static_cast<const fts_node_t*>(
-				ib_vector_get_const(nodes, i));
+				node = static_cast<const fts_node_t*>(
+					ib_vector_get_const(nodes, i));
 
-			fts_query_check_node(query, token, node);
+				fts_query_check_node(query, token, node);
+			}
 		}
 
 		rw_lock_x_unlock(&cache->lock);
@@ -1026,8 +1055,8 @@ fts_query_difference(
 /*****************************************************************//**
 Intersect the token doc ids with the current set.
 @return DB_SUCCESS if all went well */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_query_intersect(
 /*================*/
 	fts_query_t*		query,	/*!< in: query instance */
@@ -1216,8 +1245,8 @@ fts_query_cache(
 /*****************************************************************//**
 Set union.
 @return DB_SUCCESS if all went well */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_query_union(
 /*============*/
 	fts_query_t*		query,	/*!< in: query instance */
@@ -1248,13 +1277,7 @@ fts_query_union(
 
 	/* Single '%' would confuse parser in pars_like_rebind(). In addition,
 	our wildcard search only supports prefix search */
-	if (*token->f_str == '%') {
-		if (token->f_len == 1) {
-			return(query->error);
-		}
-		token->f_str++;
-		token->f_len--;
-	}
+	ut_ad(*token->f_str != '%');
 
 	fts_query_cache(query, token);
 
@@ -1485,6 +1508,67 @@ fts_query_match_phrase_terms(
 }
 
 /*****************************************************************//**
+Callback function to count the number of words in position ranges,
+and see whether the word count is in specified "phrase->distance"
+@return true if the number of characters is less than the "distance" */
+static
+bool
+fts_proximity_is_word_in_range(
+/*===========================*/
+	const fts_phrase_t*
+			phrase,		/*!< in: phrase with the search info */
+	byte*		start,		/*!< in: text to search */
+	ulint		total_len)	/*!< in: length of text */
+{
+	fts_proximity_t*	proximity_pos = phrase->proximity_pos;
+
+	/* Search each matched position pair (with min and max positions)
+	and count the number of words in the range */
+	for (ulint i = 0; i < proximity_pos->n_pos; i++) {
+		ulint		cur_pos = proximity_pos->min_pos[i];
+		ulint		n_word = 0;
+
+		ut_ad(proximity_pos->max_pos[i] <= total_len);
+
+		/* Walk through words in the range and count them */
+		while (cur_pos <= proximity_pos->max_pos[i]) {
+			ulint		len;
+			fts_string_t	str;
+			ulint           offset = 0;
+
+			len = innobase_mysql_fts_get_token(
+				phrase->charset,
+				start + cur_pos,
+				start + total_len, &str, &offset);
+
+			if (len == 0) {
+				break;
+			}
+
+			/* Advances position with "len" bytes */
+			cur_pos += len;
+
+			/* Record the number of words */
+			if (str.f_n_char > 0) {
+				n_word++;
+			}
+
+			if (n_word > phrase->distance) {
+				break;
+			}
+		}
+
+		/* Check if the number of words is less than specified
+		"distance" */
+		if (n_word && n_word <= phrase->distance) {
+			return(true);
+		}
+	}
+
+	return(false);
+}
+
+/*****************************************************************//**
 Callback function to fetch and search the document.
 @return TRUE if matched else FALSE */
 static
@@ -1594,31 +1678,77 @@ fts_query_fetch_document(
 	sel_node_t*	node = static_cast<sel_node_t*>(row);
 	fts_phrase_t*	phrase = static_cast<fts_phrase_t*>(user_arg);
 	ulint		prev_len = 0;
+	ulint		total_len = 0;
+	byte*		document_text = NULL;
 
 	exp = node->select_list;
 
 	phrase->found = FALSE;
 
+	/* For proximity search, we will need to get the whole document
+	from all fields, so first count the total length of the document
+	from all the fields */
+	if (phrase->proximity_pos) {
+		 while (exp) {
+			ulint		field_len;
+			dfield_t*	dfield = que_node_get_val(exp);
+			byte*		data = static_cast<byte*>(
+						dfield_get_data(dfield));
+
+			if (dfield_is_ext(dfield)) {
+				ulint	local_len = dfield_get_len(dfield);
+
+				local_len -= BTR_EXTERN_FIELD_REF_SIZE;
+
+				field_len = mach_read_from_4(
+					data + local_len + BTR_EXTERN_LEN + 4);
+			} else {
+				field_len = dfield_get_len(dfield);
+			}
+
+			if (field_len != UNIV_SQL_NULL) {
+				total_len += field_len + 1;
+			}
+
+			exp = que_node_get_next(exp);
+		}
+
+		document_text = static_cast<byte*>(mem_heap_zalloc(
+					phrase->heap, total_len));
+
+		if (!document_text) {
+			return(FALSE);
+		}
+	}
+
+	exp = node->select_list;
+
 	while (exp) {
 		dfield_t*	dfield = que_node_get_val(exp);
-		void*		data = NULL;
+		byte*		data = static_cast<byte*>(
+					dfield_get_data(dfield));
 		ulint		cur_len;
 
 		if (dfield_is_ext(dfield)) {
 			data = btr_copy_externally_stored_field(
-				&cur_len, static_cast<const byte*>(data),
-				phrase->zip_size,
+				&cur_len, data, phrase->zip_size,
 				dfield_get_len(dfield), phrase->heap);
 		} else {
-			data = dfield_get_data(dfield);
 			cur_len = dfield_get_len(dfield);
 		}
 
 		if (cur_len != UNIV_SQL_NULL && cur_len != 0) {
-			phrase->found =
-				fts_query_match_phrase(
-					phrase, static_cast<byte*>(data),
-					cur_len, prev_len, phrase->heap);
+			if (phrase->proximity_pos) {
+				memcpy(document_text + prev_len, data, cur_len);
+			} else {
+				/* For phrase search */
+				phrase->found =
+					fts_query_match_phrase(
+						phrase,
+						static_cast<byte*>(data),
+						cur_len, prev_len,
+						phrase->heap);
+			}
 		}
 
 		if (phrase->found) {
@@ -1633,6 +1763,13 @@ fts_query_fetch_document(
 		exp = que_node_get_next(exp);
 	}
 
+	if (phrase->proximity_pos) {
+		ut_ad(prev_len <= total_len);
+
+		phrase->found = fts_proximity_is_word_in_range(
+			phrase, document_text, total_len);
+	}
+
 	return(phrase->found);
 }
 
@@ -1689,13 +1826,12 @@ fts_query_select(
 
 /********************************************************************
 Read the rows from the FTS index, that match word and where the
-doc id is between first and last doc id. */
-static
-ulint
+doc id is between first and last doc id.
+@return DB_SUCCESS if all went well else error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_query_find_term(
 /*================*/
-					/*!< out: DB_SUCCESS if all went well
-					else error code */
 	fts_query_t*		query,	/*!< in: FTS query state */
 	que_t**			graph,	/*!< in: prepared statement */
 	const fts_string_t*	word,	/*!< in: the word to fetch */
@@ -1705,7 +1841,7 @@ fts_query_find_term(
 	ibool*			found)	/*!< out: TRUE if found else FALSE */
 {
 	pars_info_t*		info;
-	ulint			error;
+	dberr_t			error;
 	fts_select_t		select;
 	doc_id_t		match_doc_id;
 	trx_t*			trx = query->trx;
@@ -1830,19 +1966,18 @@ fts_query_sum(
 }
 
 /********************************************************************
-Calculate the total documents that contain a particular word (term). */
-static
-ulint
+Calculate the total documents that contain a particular word (term).
+@return DB_SUCCESS if all went well else error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_query_total_docs_containing_term(
 /*=================================*/
-					/*!< out: DB_SUCCESS if all went well
-					else error code */
 	fts_query_t*		query,	/*!< in: FTS query state */
 	const fts_string_t*	word,	/*!< in: the word to check */
 	ulint*			total)	/*!< out: documents containing word */
 {
 	pars_info_t*		info;
-	ulint			error;
+	dberr_t			error;
 	que_t*			graph;
 	ulint			selected;
 	trx_t*			trx = query->trx;
@@ -1910,19 +2045,18 @@ fts_query_total_docs_containing_term(
 }
 
 /********************************************************************
-Get the total number of words in a documents. */
-static
-ulint
+Get the total number of words in a documents.
+@return DB_SUCCESS if all went well else error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_query_terms_in_document(
 /*========================*/
-					/*!< out: DB_SUCCESS if all went well
-					else error code */
 	fts_query_t*	query,		/*!< in: FTS query state */
 	doc_id_t	doc_id,		/*!< in: the word to check */
 	ulint*		total)		/*!< out: total words in document */
 {
 	pars_info_t*	info;
-	ulint		error;
+	dberr_t		error;
 	que_t*		graph;
 	doc_id_t	read_doc_id;
 	trx_t*		trx = query->trx;
@@ -1993,9 +2127,9 @@ fts_query_terms_in_document(
 
 /*****************************************************************//**
 Retrieve the document and match the phrase tokens.
-@return TRUE if matches else FALSE */
-static
-ulint
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_query_match_document(
 /*=====================*/
 	ib_vector_t*	tokens,		/*!< in: phrase tokens */
@@ -2004,7 +2138,7 @@ fts_query_match_document(
 	ulint		distance,	/*!< in: proximity distance */
 	ibool*		found)		/*!< out: TRUE if phrase found */
 {
-	ulint		error;
+	dberr_t		error;
 	fts_phrase_t	phrase;
 
 	memset(&phrase, 0x0, sizeof(phrase));
@@ -2025,8 +2159,8 @@ fts_query_match_document(
 
 	if (error != DB_SUCCESS) {
 		ut_print_timestamp(stderr);
-		fprintf(stderr, "InnoDB: Error: (%lu) matching document.\n",
-			error);
+		fprintf(stderr, "InnoDB: Error: (%s) matching document.\n",
+			ut_strerr(error));
 	} else {
 		*found = phrase.found;
 	}
@@ -2037,11 +2171,66 @@ fts_query_match_document(
 }
 
 /*****************************************************************//**
+This function fetches the original documents and count the
+words in between matching words to see that is in specified distance
+@return DB_SUCCESS if all OK */
+static __attribute__((nonnull, warn_unused_result))
+bool
+fts_query_is_in_proximity_range(
+/*============================*/
+	const fts_query_t*	query,		/*!< in:  query instance */
+	fts_match_t**		match,		/*!< in: query instance */
+	fts_proximity_t*	qualified_pos)	/*!< in: position info for
+						qualified ranges */
+{
+	fts_get_doc_t		get_doc;
+	fts_cache_t*		cache = query->index->table->fts->cache;
+	dberr_t			err;
+	fts_phrase_t		phrase;
+
+	memset(&get_doc, 0x0, sizeof(get_doc));
+	memset(&phrase, 0x0, sizeof(phrase));
+
+	rw_lock_x_lock(&cache->lock);
+	get_doc.index_cache = fts_find_index_cache(cache, query->index);
+	rw_lock_x_unlock(&cache->lock);
+	ut_a(get_doc.index_cache != NULL);
+
+	phrase.distance = query->distance;
+	phrase.charset = get_doc.index_cache->charset;
+	phrase.zip_size = dict_table_zip_size(
+		get_doc.index_cache->index->table);
+	phrase.heap = mem_heap_create(512);
+	phrase.proximity_pos = qualified_pos;
+	phrase.found = FALSE;
+
+	err = fts_doc_fetch_by_doc_id(
+		&get_doc, match[0]->doc_id, NULL, FTS_FETCH_DOC_BY_ID_EQUAL,
+		fts_query_fetch_document, &phrase);
+
+	if (err != DB_SUCCESS) {
+		ib_logf(IB_LOG_LEVEL_ERROR,
+			"Error: (%s) in verification phase of proximity "
+			"search", ut_strerr(err));
+	}
+
+	/* Free the prepared statement. */
+	if (get_doc.get_document_graph) {
+		fts_que_graph_free(get_doc.get_document_graph);
+		get_doc.get_document_graph = NULL;
+	}
+
+	mem_heap_free(phrase.heap);
+
+	return(err == DB_SUCCESS && phrase.found);
+}
+
+/*****************************************************************//**
 Iterate over the matched document ids and search the for the
 actual phrase in the text.
 @return DB_SUCCESS if all OK */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_query_search_phrase(
 /*====================*/
 	fts_query_t*		query,	/*!< in: query instance */
@@ -2050,8 +2239,6 @@ fts_query_search_phrase(
 	ulint			i;
 	fts_get_doc_t		get_doc;
 	ulint			n_matched;
-	// FIXME: Debug code
-	ulint			searched = 0;
 	fts_cache_t*		cache = query->index->table->fts->cache;
 
 	n_matched = ib_vector_size(query->matched);
@@ -2061,9 +2248,7 @@ fts_query_search_phrase(
 
 	rw_lock_x_lock(&cache->lock);
 
-	// FIXME: We shouldn't have to cast here.
-	get_doc.index_cache = (fts_index_cache_t*)
-	fts_find_index_cache(cache, query->index);
+	get_doc.index_cache = fts_find_index_cache(cache, query->index);
 
 	/* Must find the index cache */
 	ut_a(get_doc.index_cache != NULL);
@@ -2089,9 +2274,6 @@ fts_query_search_phrase(
 		an earlier pass. */
 		if (match->doc_id != 0) {
 
-			// FIXME: Debug code
-			++searched;
-
 			query->error = fts_query_match_document(
 				tokens, &get_doc,
 				match, query->distance, &found);
@@ -2119,18 +2301,14 @@ fts_query_search_phrase(
 		get_doc.get_document_graph = NULL;
 	}
 
-	// FIXME: Debug code
-	ut_print_timestamp(stderr);
-	printf(" End: %lu, %lu\n", searched, ib_vector_size(query->matched));
-
 	return(query->error);
 }
 
 /*****************************************************************//**
 Text/Phrase search.
-@return count of doc ids added */
-static
-ulint
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_query_phrase_search(
 /*====================*/
 	fts_query_t*		query,	/*!< in: query instance */
@@ -2290,7 +2468,7 @@ fts_query_phrase_search(
 		/* If we are doing proximity search, verify the distance
 		between all words, and check they are in specified distance. */
 		if (query->flags & FTS_PROXIMITY) {
-			fts_check_phrase_proximity(query, tokens);
+			fts_phrase_or_proximity_search(query, tokens);
 		} else {
 			ibool	matched;
 
@@ -2301,7 +2479,7 @@ fts_query_phrase_search(
 			and then doing a search through the text. Isolated
 			testing shows this also helps in mitigating disruption
 			of the buffer cache. */
-			matched = fts_check_phrase_proximity(query, tokens);
+			matched = fts_phrase_or_proximity_search(query, tokens);
 			query->matched = query->match_array[0];
 
 			/* Read the actual text in and search for the phrase. */
@@ -2329,8 +2507,8 @@ func_exit:
 /*****************************************************************//**
 Find the word and evaluate.
 @return DB_SUCCESS if all went well */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_query_execute(
 /*==============*/
 	fts_query_t*		query,	/*!< in: query instance */
@@ -2477,13 +2655,12 @@ fts_query_visitor(
 /*****************************************************************//**
 Process (nested) sub-expression, create a new result set to store the
 sub-expression result by processing nodes under current sub-expression
-list. Merge the sub-expression result with that of parent expression list. */
-
-ulint
+list. Merge the sub-expression result with that of parent expression list.
+@return DB_SUCCESS if all went well */
+UNIV_INTERN
+dberr_t
 fts_ast_visit_sub_exp(
 /*==================*/
-						/*!< out: DB_SUCCESS if all
-						went well */
 	fts_ast_node_t*		node,		/*!< in,out: current root node */
 	fts_ast_callback	visitor,	/*!< in: callback function */
 	void*			arg)		/*!< in,out: arg for callback */
@@ -2492,8 +2669,9 @@ fts_ast_visit_sub_exp(
 	fts_query_t*		query = static_cast<fts_query_t*>(arg);
 	ib_rbt_t*		parent_doc_ids;
 	ib_rbt_t*		subexpr_doc_ids;
-	ulint			error = DB_SUCCESS;
+	dberr_t			error = DB_SUCCESS;
 	ibool			inited = query->inited;
+	bool			will_be_ignored = false;
 
 	ut_a(node->type == FTS_AST_SUBEXP_LIST);
 
@@ -2521,7 +2699,8 @@ fts_ast_visit_sub_exp(
 
 	/* Process nodes in current sub-expression and store its
 	result set in query->doc_ids we created above. */
-	error = fts_ast_visit(FTS_NONE, node->next, visitor, arg);
+	error = fts_ast_visit(FTS_NONE, node->next, visitor,
+			      arg, &will_be_ignored);
 
 	/* Reinstate parent node state and prepare for merge. */
 	query->inited = inited;
@@ -2757,6 +2936,8 @@ fts_query_read_node(
 	ut_a(query->cur_node->type == FTS_AST_TERM ||
 	     query->cur_node->type == FTS_AST_TEXT);
 
+	memset(&node, 0, sizeof(node));
+
 	/* Need to consider the wildcard search case, the word frequency
 	is created on the search string not the actual word. So we need
 	to assign the frequency on search string behalf. */
@@ -2879,8 +3060,8 @@ fts_query_calculate_idf(
 /*====================*/
 	fts_query_t*	query)	/*!< in: Query state */
 {
-	const ib_rbt_node_t* node;
-	double		total_docs = query->total_docs;
+	const ib_rbt_node_t*	node;
+	ib_uint64_t		total_docs = query->total_docs;
 
 	/* We need to free any instances of fts_doc_freq_t that we
 	may have allocated. */
@@ -2893,7 +3074,7 @@ fts_query_calculate_idf(
 		word_freq = rbt_value(fts_word_freq_t, node);
 
 		if (word_freq->doc_count > 0) {
-			if (total_docs == (double) word_freq->doc_count) {
+			if (total_docs == word_freq->doc_count) {
 				/* QP assume ranking > 0 if we find
 				a match. Since Log10(1) = 0, we cannot
 				make IDF a zero value if do find a
@@ -2907,10 +3088,13 @@ fts_query_calculate_idf(
 			}
 		}
 
-		fprintf(stderr,"'%s' -> %lu/%lu %6.5lf\n",
-		       word_freq->word,
-		       query->total_docs, word_freq->doc_count,
-		       word_freq->idf);
+		if (fts_enable_diag_print) {
+			fprintf(stderr,"'%s' -> " UINT64PF "/" UINT64PF
+				" %6.5lf\n",
+			        word_freq->word,
+			        query->total_docs, word_freq->doc_count,
+			        word_freq->idf);
+		}
 	}
 }
 
@@ -3017,7 +3201,7 @@ fts_retrieve_ranking(
 
 		ranking = rbt_value(fts_ranking_t, parent.last);
 
-		return (ranking->rank);
+		return(ranking->rank);
 	}
 
 	return(0);
@@ -3184,7 +3368,7 @@ fts_query_parse(
 FTS Query entry point.
 @return DB_SUCCESS if successful otherwise error code */
 UNIV_INTERN
-ulint
+dberr_t
 fts_query(
 /*======*/
 	trx_t*		trx,		/*!< in: transaction */
@@ -3196,7 +3380,7 @@ fts_query(
 	fts_result_t**	result)		/*!< in/out: result doc ids */
 {
 	fts_query_t	query;
-	ulint		error;
+	dberr_t		error = DB_SUCCESS;
 	byte*		lc_query_str;
 	ulint		lc_query_str_len;
 	ulint		result_len;
@@ -3204,6 +3388,7 @@ fts_query(
 	trx_t*		query_trx;
 	CHARSET_INFO*	charset;
 	ulint		start_time_ms;
+	bool		will_be_ignored = false;
 
 	boolean_mode = flags & FTS_BOOL;
 
@@ -3237,20 +3422,24 @@ fts_query(
 	/* Setup the RB tree that will be used to collect per term
 	statistics. */
 	query.word_freqs = rbt_create_arg_cmp(
-		sizeof(fts_word_freq_t), innobase_fts_string_cmp, charset);
+		sizeof(fts_word_freq_t), innobase_fts_string_cmp, 
+                (void*) charset);
 
-	query.total_docs = fts_get_total_document_count(index->table);
+	query.total_docs = dict_table_get_n_rows(index->table);
 
-	error = fts_get_total_word_count(trx, query.index, &query.total_words);
+#ifdef FTS_DOC_STATS_DEBUG
+	if (ft_enable_diag_print) {
+		error = fts_get_total_word_count(
+			trx, query.index, &query.total_words);
 
-	if (error != DB_SUCCESS) {
-		goto func_exit;
-	}
+		if (error != DB_SUCCESS) {
+			goto func_exit;
+		}
 
-#ifdef	FTS_INTERNAL_DIAG_PRINT
-	fprintf(stderr, "Total docs: %lu Total words: %lu\n",
-		query.total_docs, query.total_words);
-#endif
+		fprintf(stderr, "Total docs: " UINT64PF " Total words: %lu\n",
+			query.total_docs, query.total_words);
+	}
+#endif /* FTS_DOC_STATS_DEBUG */
 
 	query.fts_common_table.suffix = "DELETED";
 
@@ -3299,13 +3488,14 @@ fts_query(
 		sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
 
 	/* Parse the input query string. */
-	if (fts_query_parse(&query, lc_query_str, query_len)) {
+	if (fts_query_parse(&query, lc_query_str, result_len)) {
 		fts_ast_node_t*	ast = query.root;
 
 		/* Traverse the Abstract Syntax Tree (AST) and execute
 		the query. */
 		query.error = fts_ast_visit(
-			FTS_NONE, ast, fts_query_visitor, &query);
+			FTS_NONE, ast, fts_query_visitor,
+			&query, &will_be_ignored);
 
 		/* If query expansion is requested, extend the search
 		with first search pass result */
@@ -3453,8 +3643,8 @@ words in documents found in the first search pass will be used as
 search arguments to search the document again, thus "expand"
 the search result set.
 @return DB_SUCCESS if success, otherwise the error code */
-static
-ulint
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
 fts_expand_query(
 /*=============*/
 	dict_index_t*	index,		/*!< in: FTS index to search */
@@ -3463,7 +3653,7 @@ fts_expand_query(
 	const ib_rbt_node_t*	node;
 	const ib_rbt_node_t*	token_node;
 	fts_doc_t		result_doc;
-	ulint			error = DB_SUCCESS;
+	dberr_t			error = DB_SUCCESS;
 	const fts_index_cache_t*index_cache;
 
 	/* If no doc is found in first search pass, return */
@@ -3482,7 +3672,7 @@ fts_expand_query(
 
 	result_doc.tokens = rbt_create_arg_cmp(
 		sizeof(fts_token_t), innobase_fts_text_cmp,
-		index_cache->charset);
+		(void*) index_cache->charset);
 
 	result_doc.charset = index_cache->charset;
 
@@ -3557,14 +3747,16 @@ fts_expand_query(
 /*************************************************************//**
 This function finds documents that contain all words in a
 phrase or proximity search. And if proximity search, verify
-the words are close to each other enough, as in specified distance.
+the words are close enough to each other, as in specified distance.
 This function is called for phrase and proximity search.
 @return TRUE if documents are found, FALSE if otherwise */
 static
 ibool
-fts_check_phrase_proximity(
-/*=======================*/
-	fts_query_t*	query,		/*!< in:  query instance */
+fts_phrase_or_proximity_search(
+/*===========================*/
+	fts_query_t*	query,		/*!< in/out:  query instance.
+					query->doc_ids might be instantiated
+					with qualified doc IDs */
 	ib_vector_t*	tokens)		/*!< in: Tokens contain words */
 {
 	ulint		n_matched;
@@ -3581,8 +3773,13 @@ fts_check_phrase_proximity(
 	walk through the list and find common documents that
 	contain all the matching words. */
 	for (i = 0; i < n_matched; i++) {
-		ulint	j;
-		ulint	k = 0;
+		ulint		j;
+		ulint		k = 0;
+		fts_proximity_t	qualified_pos;
+		ulint		qualified_pos_buf[MAX_PROXIMITY_ITEM * 2];
+
+		qualified_pos.min_pos = &qualified_pos_buf[0];
+		qualified_pos.max_pos = &qualified_pos_buf[MAX_PROXIMITY_ITEM];
 
 		match[0] = static_cast<fts_match_t*>(
 			ib_vector_get(query->match_array[0], i));
@@ -3647,24 +3844,31 @@ fts_check_phrase_proximity(
 
 		/* For this matching doc, we need to further
 		verify whether the words in the doc are close
-		to each other, and with in distance specified
+		to each other, and within the distance specified
 		in the proximity search */
 		if (query->flags & FTS_PHRASE) {
 			matched = TRUE;
-		} else if (fts_proximity_check_position(
-			match, num_token, query->distance)) {
-			ulint	z;
-			/* If so, mark we find a matching doc */
-			fts_query_process_doc_id(query, match[0]->doc_id, 0);
+		} else if (fts_proximity_get_positions(
+			match, num_token, ULINT_MAX, &qualified_pos)) {
+
+			/* Fetch the original documents and count the
+			words in between matching words to see that is in
+			specified distance */
+			if (fts_query_is_in_proximity_range(
+				query, match, &qualified_pos)) {
+				/* If so, mark we find a matching doc */
+				fts_query_process_doc_id(
+					query, match[0]->doc_id, 0);
 
-			matched = TRUE;
-			for (z = 0; z < num_token; z++) {
-				fts_string_t*	token;
-				token = static_cast<fts_string_t*>(
-					ib_vector_get(tokens, z));
-				fts_query_add_word_to_document(
-					query, match[0]->doc_id,
-					token->f_str);
+				matched = TRUE;
+				for (ulint z = 0; z < num_token; z++) {
+					fts_string_t*	token;
+					token = static_cast<fts_string_t*>(
+						ib_vector_get(tokens, z));
+					fts_query_add_word_to_document(
+						query, match[0]->doc_id,
+						token->f_str);
+				}
 			}
 		}
 
@@ -3678,24 +3882,32 @@ func_exit:
 }
 
 /*************************************************************//**
-This function check the words in result document are close to each
-other (within proximity range). This is used for proximity search.
-@return TRUE if words are close to each other, FALSE if otherwise */
+This function checks whether words in result documents are close to
+each other (within proximity range as specified by "distance").
+If "distance" is MAX_ULINT, then it will find all combinations of
+positions of matching words and store min and max positions
+in the "qualified_pos" for later verification.
+@return true if words are close to each other, false if otherwise */
 static
-ulint
-fts_proximity_check_position(
-/*=========================*/
-	fts_match_t**	match,		/*!< in: query instance */
-	ulint		num_match,	/*!< in: number of matching
-					items */
-	ulint		distance)	/*!< in: distance value
-					for proximity search */
+bool
+fts_proximity_get_positions(
+/*========================*/
+	fts_match_t**		match,		/*!< in: query instance */
+	ulint			num_match,	/*!< in: number of matching
+						items */
+	ulint			distance,	/*!< in: distance value
+						for proximity search */
+	fts_proximity_t*	qualified_pos)	/*!< out: the position info
+						records ranges containing
+						all matching words. */
 {
 	ulint	i;
 	ulint	idx[MAX_PROXIMITY_ITEM];
 	ulint	num_pos[MAX_PROXIMITY_ITEM];
 	ulint	min_idx;
 
+	qualified_pos->n_pos = 0;
+
 	ut_a(num_match < MAX_PROXIMITY_ITEM);
 
 	/* Each word could appear multiple times in a doc. So
@@ -3747,14 +3959,21 @@ fts_proximity_check_position(
 		find a good match */
 		if (max_pos - min_pos <= distance
 		    && (i >= num_match || position[i] != ULINT_UNDEFINED)) {
-			return(TRUE);
-		} else {
-			/* Otherwise, move to the next position is the
-			list for the word with the smallest position */
-			idx[min_idx]++;
+			/* The charset has variable character
+			length encoding, record the min_pos and
+			max_pos, we will need to verify the actual
+			number of characters */
+			qualified_pos->min_pos[qualified_pos->n_pos] = min_pos;
+			qualified_pos->max_pos[qualified_pos->n_pos] = max_pos;
+			qualified_pos->n_pos++;
 		}
+
+		/* Otherwise, move to the next position is the
+		list for the word with the smallest position */
+		idx[min_idx]++;
 	}
 
-	/* Failed to find all words within the range for the doc */
-	return(FALSE);
+	ut_ad(qualified_pos->n_pos <= MAX_PROXIMITY_ITEM);
+
+	return(qualified_pos->n_pos != 0);
 }