summaryrefslogtreecommitdiff
path: root/storage/innobase/include/fts0types.ic
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/include/fts0types.ic')
-rw-r--r--storage/innobase/include/fts0types.ic427
1 files changed, 427 insertions, 0 deletions
diff --git a/storage/innobase/include/fts0types.ic b/storage/innobase/include/fts0types.ic
new file mode 100644
index 00000000000..2734a331a86
--- /dev/null
+++ b/storage/innobase/include/fts0types.ic
@@ -0,0 +1,427 @@
+/*****************************************************************************
+
+Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fts0types.ic
+Full text search types.
+
+Created 2007-03-27 Sunny Bains
+*******************************************************/
+
+#ifndef INNOBASE_FTS0TYPES_IC
+#define INNOBASE_FTS0TYPES_IC
+
+#include <ctype.h>
+
+#include "rem0cmp.h"
+#include "ha_prototypes.h"
+
+extern const ulint UTF8_ERROR;
+
+/* Determine if a UTF-8 continuation byte is valid. */
+#define fts_utf8_is_valid(b) (((b) & 0xC0) == 0x80)
+
+/******************************************************************//**
+Compare two fts_trx_table_t instances.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_trx_table_cmp(
+/*==============*/
+ const void* p1, /*!< in: id1 */
+ const void* p2) /*!< in: id2 */
+{
+ const dict_table_t* table1 = (*(const fts_trx_table_t**) p1)->table;
+ const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table;
+
+ return((table1->id > table2->id)
+ ? 1
+ : (table1->id == table2->id)
+ ? 0
+ : -1);
+}
+
+/******************************************************************//**
+Compare a table id with a fts_trx_table_t table id.
+@return < 0 if n1 < n2, 0 if n1 == n2,> 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_trx_table_id_cmp(
+/*=================*/
+ const void* p1, /*!< in: id1 */
+ const void* p2) /*!< in: id2 */
+{
+ const ullint* table_id = (const ullint*) p1;
+ const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table;
+
+ return((*table_id > table2->id)
+ ? 1
+ : (*table_id == table2->id)
+ ? 0
+ : -1);
+}
+
+/******************************************************************//**
+Duplicate an UTF-8 string.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+void
+fts_utf8_string_dup(
+/*================*/
+ fts_string_t* dst, /*!< in: dup to here */
+ const fts_string_t* src, /*!< in: src string */
+ mem_heap_t* heap) /*!< in: heap to use */
+{
+ dst->f_str = (byte*) mem_heap_dup(heap, src->f_str, src->f_len + 1);
+
+ dst->f_len = src->f_len;
+ dst->f_str[src->f_len] = 0;
+ dst->f_n_char = src->f_n_char;
+}
+
+/******************************************************************//**
+Compare two fts_trx_row_t doc_ids.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_trx_row_doc_id_cmp(
+/*===================*/
+ const void* p1, /*!< in: id1 */
+ const void* p2) /*!< in: id2 */
+{
+ const fts_trx_row_t* tr1 = (const fts_trx_row_t*) p1;
+ const fts_trx_row_t* tr2 = (const fts_trx_row_t*) p2;
+
+ return((int)(tr1->doc_id - tr2->doc_id));
+}
+
+/******************************************************************//**
+Compare two fts_ranking_t doc_ids.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_ranking_doc_id_cmp(
+/*===================*/
+ const void* p1, /*!< in: id1 */
+ const void* p2) /*!< in: id2 */
+{
+ const fts_ranking_t* rk1 = (const fts_ranking_t*) p1;
+ const fts_ranking_t* rk2 = (const fts_ranking_t*) p2;
+
+ return((int)(rk1->doc_id - rk2->doc_id));
+}
+
+/******************************************************************//**
+Compare two fts_update_t doc_ids.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_update_doc_id_cmp(
+/*==================*/
+ const void* p1, /*!< in: id1 */
+ const void* p2) /*!< in: id2 */
+{
+ const fts_update_t* up1 = (const fts_update_t*) p1;
+ const fts_update_t* up2 = (const fts_update_t*) p2;
+
+ return((int)(up1->doc_id - up2->doc_id));
+}
+
+
+/******************************************************************//**
+Lowercase an UTF-8 string. */
+UNIV_INLINE
+void
+fts_utf8_tolower(
+/*=============*/
+ fts_string_t* str) /*!< in: string */
+{
+ innobase_casedn_str((char*) str->f_str);
+}
+
+/******************************************************************//**
+Compare two UTF-8 strings.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_utf8_string_cmp(
+/*================*/
+ const void* p1, /*!< in: key */
+ const void* p2) /*!< in: node */
+{
+ const fts_string_t* s1 = (const fts_string_t*) p1;
+ const fts_string_t* s2 = (const fts_string_t*) p2;
+
+ return(cmp_data_data_slow_varchar(
+ s1->f_str, s1->f_len, s2->f_str, s2->f_len));
+}
+
+/******************************************************************//**
+Compare two UTF-8 strings, and return match (0) if
+passed in "key" value equals or is the prefix of the "node" value.
+@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
+UNIV_INLINE
+int
+fts_utf8_string_cmp_prefix(
+/*=======================*/
+ const void* p1, /*!< in: key */
+ const void* p2) /*!< in: node */
+{
+ int result;
+ ulint len;
+
+ const fts_string_t* s1 = (const fts_string_t*) p1;
+ const fts_string_t* s2 = (const fts_string_t*) p2;
+
+ len = ut_min(s1->f_len, s2->f_len);
+
+ result = cmp_data_data_slow_varchar(s1->f_str, len, s2->f_str, len);
+
+ if (result) {
+ return(result);
+ }
+
+ if (s1->f_len > s2->f_len) {
+ return(1);
+ }
+
+ return(0);
+}
+
+/******************************************************************//**
+Decode a UTF-8 character.
+
+http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf:
+
+ Scalar Value 1st Byte 2nd Byte 3rd Byte 4th Byte
+00000000 0xxxxxxx 0xxxxxxx
+00000yyy yyxxxxxx 110yyyyy 10xxxxxx
+zzzzyyyy yyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx
+000uuuzz zzzzyyyy yyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx
+
+This function decodes UTF-8 sequences up to 6 bytes (31 bits).
+
+On error *ptr will point to the first byte that was not correctly
+decoded. This will hopefully help in resyncing the input.
+@return UTF8_ERROR if *ptr did not point to a valid
+UTF-8 sequence, or the Unicode code point. */
+UNIV_INLINE
+ulint
+fts_utf8_decode(
+/*============*/
+ const byte** ptr) /*!< in/out: pointer to
+ UTF-8 string. The
+ pointer is advanced to
+ the start of the next
+ character. */
+{
+ const byte* p = *ptr;
+ ulint ch = *p++;
+#ifdef UNIV_DEBUG
+ ulint min_ch;
+#endif /* UNIV_DEBUG */
+
+ if (UNIV_LIKELY(ch < 0x80)) {
+ /* 0xxxxxxx */
+ } else if (UNIV_UNLIKELY(ch < 0xC0)) {
+ /* A continuation byte cannot start a code. */
+ goto err_exit;
+ } else if (ch < 0xE0) {
+ /* 110yyyyy 10xxxxxx */
+ ch &= 0x1F;
+ ut_d(min_ch = 0x80);
+ goto get1;
+ } else if (ch < 0xF0) {
+ /* 1110zzzz 10yyyyyy 10xxxxxx */
+ ch &= 0x0F;
+ ut_d(min_ch = 0x800);
+ goto get2;
+ } else if (ch < 0xF8) {
+ /* 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx */
+ ch &= 0x07;
+ ut_d(min_ch = 0x10000);
+ goto get3;
+ } else if (ch < 0xFC) {
+ /* 111110tt 10uuuuuu 10zzzzzz 10yyyyyy 10xxxxxx */
+ ch &= 0x03;
+ ut_d(min_ch = 0x200000);
+ goto get4;
+ } else if (ch < 0xFE) {
+ /* 1111110s 10tttttt 10uuuuuu 10zzzzzz 10yyyyyy 10xxxxxx */
+ ut_d(min_ch = 0x4000000);
+ if (!fts_utf8_is_valid(*p)) {
+ goto err_exit;
+ }
+ ch <<= 6;
+ ch |= (*p++) & 0x3F;
+get4:
+ if (!fts_utf8_is_valid(*p)) {
+ goto err_exit;
+ }
+ ch <<= 6;
+ ch |= (*p++) & 0x3F;
+get3:
+ if (!fts_utf8_is_valid(*p)) {
+ goto err_exit;
+ }
+ ch <<= 6;
+ ch |= (*p++) & 0x3F;
+get2:
+ if (!fts_utf8_is_valid(*p)) {
+ goto err_exit;
+ }
+ ch <<= 6;
+ ch |= (*p++) & 0x3F;
+get1:
+ if (!fts_utf8_is_valid(*p)) {
+ goto err_exit;
+ }
+ ch <<= 6;
+ ch |= (*p++) & 0x3F;
+
+ /* The following is needed in the 6-byte case
+ when ulint is wider than 32 bits. */
+ ch &= 0xFFFFFFFF;
+
+ /* The code positions U+D800 to U+DFFF (UTF-16 surrogate pairs)
+ and U+FFFE and U+FFFF cannot occur in valid UTF-8. */
+
+ if ( (ch >= 0xD800 && ch <= 0xDFFF)
+#ifdef UNIV_DEBUG
+ || ch < min_ch
+#endif /* UNIV_DEBUG */
+ || ch == 0xFFFE || ch == 0xFFFF) {
+
+ ch = UTF8_ERROR;
+ }
+ } else {
+err_exit:
+ ch = UTF8_ERROR;
+ }
+
+ *ptr = p;
+
+ return(ch);
+}
+
+/******************************************************************//**
+Get the first character's code position for FTS index partition */
+extern
+ulint
+innobase_strnxfrm(
+/*==============*/
+ const CHARSET_INFO* cs, /*!< in: Character set */
+ const uchar* p2, /*!< in: string */
+ const ulint len2); /*!< in: string length */
+
+/******************************************************************//**
+Select the FTS auxiliary index for the given character.
+@return the index to use for the string */
+UNIV_INLINE
+ulint
+fts_select_index(
+/*=============*/
+ const CHARSET_INFO* cs, /*!< in: Charset */
+ const byte* str, /*!< in: string */
+ ulint len) /*!< in: string length */
+{
+ ulint selected = 0;
+ ulint value = innobase_strnxfrm(cs, str, len);
+
+ while (fts_index_selector[selected].value != 0) {
+
+ if (fts_index_selector[selected].value == value) {
+
+ return(selected);
+
+ } else if (fts_index_selector[selected].value > value) {
+
+ return(selected > 0 ? selected - 1 : 0);
+ }
+
+ ++selected;
+ }
+
+ ut_ad(selected > 1);
+
+ return(selected - 1);
+}
+
+/******************************************************************//**
+Select the next FTS auxiliary index for the given character.
+@return the next index to use for character */
+UNIV_INLINE
+ulint
+fts_select_next_index(
+/*==================*/
+ const CHARSET_INFO* cs, /*!< in: Charset */
+ const byte* str, /*!< in: string */
+ ulint len) /*!< in: string length */
+{
+ ulint selected = 0;
+ ulint value = innobase_strnxfrm(cs, str, len);
+
+ while (fts_index_selector[selected].value != 0) {
+
+ if (fts_index_selector[selected].value == value) {
+
+ return(selected + 1);
+
+ } else if (fts_index_selector[selected].value > value) {
+
+ return(selected);
+ }
+
+ ++selected;
+ }
+
+ ut_ad(selected > 0);
+
+ return((ulint) selected);
+}
+
+/******************************************************************//**
+Return the selected FTS aux index suffix. */
+UNIV_INLINE
+const char*
+fts_get_suffix(
+/*===========*/
+ ulint selected) /*!< in: selected index */
+{
+ return(fts_index_selector[selected].suffix);
+}
+
+/******************************************************************//**
+Get the number of index selectors.
+@return The number of selectors */
+UNIV_INLINE
+ulint
+fts_get_n_selectors(void)
+/*=====================*/
+{
+ ulint i = 0;
+
+ // FIXME: This is a hack
+ while (fts_index_selector[i].value != 0) {
+ ++i;
+ }
+
+ return(i);
+}
+
+#endif /* INNOBASE_FTS0TYPES_IC */