summaryrefslogtreecommitdiff
path: root/storage/innobase/include/fts0ast.h
blob: 0b55d458036fb547d447a3794664a184c1260287 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
/*****************************************************************************

Copyright (c) 2007, 2018, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2016, 2018, MariaDB Corporation.

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA

*****************************************************************************/

/******************************************************************//**
@file include/fts0ast.h
The FTS query parser (AST) abstract syntax tree routines

Created 2007/03/16/03 Sunny Bains
*******************************************************/

#ifndef INNOBASE_FST0AST_H
#define INNOBASE_FST0AST_H

#include "mem0mem.h"

#ifdef UNIV_PFS_MEMORY

#define malloc(A)	ut_malloc_nokey(A)
#define free(A)		ut_free(A)
#define realloc(P, A)	ut_realloc(P, A)

#endif /* UNIV_PFS_MEMORY */

/* The type of AST Node */
enum fts_ast_type_t {
	FTS_AST_OPER,				/*!< Operator */
	FTS_AST_NUMB,				/*!< Number */
	FTS_AST_TERM,				/*!< Term (or word) */
	FTS_AST_TEXT,				/*!< Text string */
	FTS_AST_PARSER_PHRASE_LIST,		/*!< Phase for plugin parser
						The difference from text type
						is that we tokenize text into
						term list */
	FTS_AST_LIST,				/*!< Expression list */
	FTS_AST_SUBEXP_LIST			/*!< Sub-Expression list */
};

/* The FTS query operators that we support */
enum fts_ast_oper_t {
	FTS_NONE,				/*!< No operator */

	FTS_IGNORE,				/*!< Ignore rows that contain
						this word */

	FTS_EXIST,				/*!< Include rows that contain
						this word */

	FTS_NEGATE,				/*!< Include rows that contain
						this word but rank them
						lower*/

	FTS_INCR_RATING,			/*!< Increase the rank for this
						word*/

	FTS_DECR_RATING,			/*!< Decrease the rank for this
						word*/

	FTS_DISTANCE,				/*!< Proximity distance */
	FTS_IGNORE_SKIP,			/*!< Transient node operator
						signifies that this is a
						FTS_IGNORE node, and ignored in
						the first pass of
						fts_ast_visit() */
	FTS_EXIST_SKIP				/*!< Transient node operator
						signifies that this ia a
						FTS_EXIST node, and ignored in
						the first pass of
						fts_ast_visit() */
};

/* Data types used by the FTS parser */
struct fts_lexer_t;
struct fts_ast_node_t;
struct fts_ast_state_t;
struct fts_ast_string_t;

typedef dberr_t (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*);

/********************************************************************
Parse the string using the lexer setup within state.*/
int
fts_parse(
/*======*/
						/* out: 0 on OK, 1 on error */
	fts_ast_state_t* state);		/*!< in: ast state instance.*/

/********************************************************************
Create an AST operator node */
extern
fts_ast_node_t*
fts_ast_create_node_oper(
/*=====================*/
	void*		arg,			/*!< in: ast state */
	fts_ast_oper_t	oper);			/*!< in: ast operator */
/********************************************************************
Create an AST term node, makes a copy of ptr */
extern
fts_ast_node_t*
fts_ast_create_node_term(
/*=====================*/
	void*			arg,		/*!< in: ast state */
	const fts_ast_string_t*	ptr);		/*!< in: term string */
/********************************************************************
Create an AST text node */
extern
fts_ast_node_t*
fts_ast_create_node_text(
/*=====================*/
	void*			arg,		/*!< in: ast state */
	const fts_ast_string_t*	ptr);		/*!< in: text string */
/********************************************************************
Create an AST expr list node */
extern
fts_ast_node_t*
fts_ast_create_node_list(
/*=====================*/
	void*		arg,			/*!< in: ast state */
	fts_ast_node_t*	expr);			/*!< in: ast expr */
/********************************************************************
Create a sub-expression list node. This function takes ownership of
expr and is responsible for deleting it. */
extern
fts_ast_node_t*
fts_ast_create_node_subexp_list(
/*============================*/
						/* out: new node */
	void*		arg,			/*!< in: ast state instance */
	fts_ast_node_t*	expr);			/*!< in: ast expr instance */
/********************************************************************
Set the wildcard attribute of a term.*/
extern
void
fts_ast_term_set_wildcard(
/*======================*/
	fts_ast_node_t*	node);			/*!< in: term to change */
/********************************************************************
Set the proximity attribute of a text node. */
void
fts_ast_text_set_distance(
/*======================*/
	fts_ast_node_t*	node,			/*!< in/out: text node */
	ulint		distance);		/*!< in: the text proximity
						distance */
/********************************************************************//**
Free a fts_ast_node_t instance.
@return next node to free */
fts_ast_node_t*
fts_ast_free_node(
/*==============*/
	fts_ast_node_t*	node);			/*!< in: node to free */
/********************************************************************
Add a sub-expression to an AST*/
extern
fts_ast_node_t*
fts_ast_add_node(
/*=============*/
	fts_ast_node_t*	list,			/*!< in: list node instance */
	fts_ast_node_t*	node);			/*!< in: (sub) expr to add */
/********************************************************************
Print the AST node recursively.*/
extern
void
fts_ast_node_print(
/*===============*/
	fts_ast_node_t*	node);			/*!< in: ast node to print */
/********************************************************************
Free node and expr allocations.*/
extern
void
fts_ast_state_free(
/*===============*/
	fts_ast_state_t*state);			/*!< in: state instance
						to free */
/** Check only union operation involved in the node
@param[in]	node	ast node to check
@return true if the node contains only union else false. */
bool
fts_ast_node_check_union(
	fts_ast_node_t*	node);

/******************************************************************//**
Traverse the AST - in-order traversal.
@return DB_SUCCESS if all went well */
dberr_t
fts_ast_visit(
/*==========*/
	fts_ast_oper_t		oper,		/*!< in: FTS operator */
	fts_ast_node_t*		node,		/*!< in: instance to traverse*/
	fts_ast_callback	visitor,	/*!< in: callback */
	void*			arg,		/*!< in: callback arg */
	bool*			has_ignore)	/*!< out: whether we encounter
						and ignored processing an
						operator, currently we only
						ignore FTS_IGNORE operator */
	MY_ATTRIBUTE((nonnull, warn_unused_result));
/********************************************************************
Create a lex instance.*/
fts_lexer_t*
fts_lexer_create(
/*=============*/
	ibool		boolean_mode,		/*!< in: query type */
	const byte*	query,			/*!< in: query string */
	ulint		query_len)		/*!< in: query string len */
	MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
/********************************************************************
Free an fts_lexer_t instance.*/
void
fts_lexer_free(
/*===========*/
	fts_lexer_t*	fts_lexer)		/*!< in: lexer instance to
						free */
	MY_ATTRIBUTE((nonnull));

/**
Create an ast string object, with NUL-terminator, so the string
has one more byte than len
@param[in] str		pointer to string
@param[in] len		length of the string
@return ast string with NUL-terminator */
fts_ast_string_t*
fts_ast_string_create(
	const byte*	str,
	ulint		len);

/**
Free an ast string instance
@param[in,out] ast_str		string to free */
void
fts_ast_string_free(
	fts_ast_string_t*	ast_str);

/**
Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul
@param[in] str		string to translate
@param[in] base		the base
@return translated number */
ulint
fts_ast_string_to_ul(
	const fts_ast_string_t*	ast_str,
	int			base);

/* String of length len.
We always store the string of length len with a terminating '\0',
regardless of there is any 0x00 in the string itself */
struct fts_ast_string_t {
	/*!< Pointer to string. */
	byte*		str;

	/*!< Length of the string. */
	ulint		len;
};

/* Query term type */
struct fts_ast_term_t {
	fts_ast_string_t*	ptr;		/*!< Pointer to term string.*/
	ibool			wildcard;	/*!< TRUE if wild card set.*/
};

/* Query text type */
struct fts_ast_text_t {
	fts_ast_string_t*	ptr;		/*!< Pointer to text string.*/
	ulint			distance;	/*!< > 0 if proximity distance
						set */
};

/* The list of nodes in an expr list */
struct fts_ast_list_t {
	fts_ast_node_t*	head;			/*!< Children list head */
	fts_ast_node_t*	tail;			/*!< Children list tail */
};

/* FTS AST node to store the term, text, operator and sub-expressions.*/
struct fts_ast_node_t {
	fts_ast_type_t	type;			/*!< The type of node */
	fts_ast_text_t	text;			/*!< Text node */
	fts_ast_term_t	term;			/*!< Term node */
	fts_ast_oper_t	oper;			/*!< Operator value */
	fts_ast_list_t	list;			/*!< Expression list */
	fts_ast_node_t*	next;			/*!< Link for expr list */
	fts_ast_node_t*	next_alloc;		/*!< For tracking allocations */
	bool		visited;		/*!< whether this node is
						already processed */
	/** current transaction */
	const trx_t*	trx;
	/* Used by plugin parser */
	fts_ast_node_t* up_node;		/*!< Direct up node */
	bool		go_up;			/*!< Flag if go one level up */
};

/* To track state during parsing */
struct fts_ast_state_t {
	mem_heap_t*	heap;			/*!< Heap to use for alloc */
	fts_ast_node_t*	root;			/*!< If all goes OK, then this
						will point to the root.*/

	fts_ast_list_t	list;			/*!< List of nodes allocated */

	fts_lexer_t*	lexer;			/*!< Lexer callback + arg */
	CHARSET_INFO*	charset;		/*!< charset used for
						tokenization */
	/* Used by plugin parser */
	fts_ast_node_t*	cur_node;		/*!< Current node into which
						 we add new node */
	int		depth;			/*!< Depth of parsing state */
};

/******************************************************************//**
Create an AST term node, makes a copy of ptr for plugin parser
@return node */
extern
fts_ast_node_t*
fts_ast_create_node_term_for_parser(
/*==========i=====================*/
	void*		arg,			/*!< in: ast state */
	const char*	ptr,			/*!< in: term string */
	const ulint	len);			/*!< in: term string length */

/******************************************************************//**
Create an AST phrase list node for plugin parser
@return node */
extern
fts_ast_node_t*
fts_ast_create_node_phrase_list(
/*============================*/
	void*		arg);			/*!< in: ast state */

#ifdef UNIV_DEBUG
const char*
fts_ast_node_type_get(fts_ast_type_t	type);
#endif /* UNIV_DEBUG */

#endif /* INNOBASE_FSTS0AST_H */