summaryrefslogtreecommitdiff
path: root/storage/innobase/include/row0ftsort.h
blob: 3ae5d5bc175f1d4fee94fb394a2580a2ee8c38d5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
/*****************************************************************************

Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2015, 2018, MariaDB Corporation.

This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA

*****************************************************************************/

/**************************************************//**
@file include/row0ftsort.h
Create Full Text Index with (parallel) merge sort

Created 10/13/2010 Jimmy Yang
*******************************************************/

#ifndef row0ftsort_h
#define row0ftsort_h

#include "univ.i"
#include "data0data.h"
#include "dict0types.h"
#include "row0mysql.h"
#include "fts0fts.h"
#include "fts0types.h"
#include "fts0priv.h"
#include "row0merge.h"
#include "btr0bulk.h"
#include "os0thread.h"

/** This structure defineds information the scan thread will fetch
and put to the linked list for parallel tokenization/sort threads
to process */
typedef struct fts_doc_item     fts_doc_item_t;

/** Information about temporary files used in merge sort */
struct fts_doc_item {
	dfield_t*	field;		/*!< field contains document string */
	doc_id_t	doc_id;		/*!< document ID */
	UT_LIST_NODE_T(fts_doc_item_t)	doc_list;
					/*!< list of doc items */
};

/** This defines the list type that scan thread would feed the parallel
tokenization threads and sort threads. */
typedef UT_LIST_BASE_NODE_T(fts_doc_item_t)     fts_doc_list_t;

#define FTS_PLL_MERGE		1

/** Sort information passed to each individual parallel sort thread */
struct fts_psort_t;

/** Common info passed to each parallel sort thread */
struct fts_psort_common_t {
	row_merge_dup_t*	dup;		/*!< descriptor of FTS index */
	dict_table_t*		new_table;	/*!< source table */
	trx_t*			trx;		/*!< transaction */
	fts_psort_t*		all_info;	/*!< all parallel sort info */
	os_event_t		sort_event;	/*!< sort event */
	os_event_t		merge_event;	/*!< merge event */
	ibool			opt_doc_id_size;/*!< whether to use 4 bytes
						instead of 8 bytes integer to
						store Doc ID during sort, if
						Doc ID will not be big enough
						to use 8 bytes value */
};

struct fts_psort_t {
	ulint			psort_id;	/*!< Parallel sort ID */
	row_merge_buf_t*	merge_buf[FTS_NUM_AUX_INDEX];
						/*!< sort buffer */
	merge_file_t*		merge_file[FTS_NUM_AUX_INDEX];
						/*!< sort file */
	row_merge_block_t*	merge_block[FTS_NUM_AUX_INDEX];
						/*!< buffer to write to file */
	row_merge_block_t*	block_alloc[FTS_NUM_AUX_INDEX];
						/*!< buffer to allocated */
	row_merge_block_t*	crypt_block[FTS_NUM_AUX_INDEX];
						/*!< buffer to crypt data */
	row_merge_block_t*	crypt_alloc[FTS_NUM_AUX_INDEX];
						/*!< buffer to allocated */
	ulint			child_status;	/*!< child thread status */
	ulint			state;		/*!< parent thread state */
	fts_doc_list_t		fts_doc_list;	/*!< doc list to process */
	fts_psort_common_t*	psort_common;	/*!< ptr to all psort info */
	os_thread_t		thread_hdl;	/*!< thread handler */
	dberr_t			error;		/*!< db error during psort */
	ulint			memory_used;	/*!< memory used by fts_doc_list */
	ib_mutex_t		mutex;		/*!< mutex for fts_doc_list */
};

/** Row fts token for plugin parser */
struct row_fts_token_t {
	fts_string_t*	text;		/*!< token */
	UT_LIST_NODE_T(row_fts_token_t)
			token_list;	/*!< next token link */
};

typedef UT_LIST_BASE_NODE_T(row_fts_token_t)     fts_token_list_t;

/** Structure stores information from string tokenization operation */
struct fts_tokenize_ctx {
	ulint			processed_len;  /*!< processed string length */
	ulint			init_pos;       /*!< doc start position */
	ulint			buf_used;       /*!< the sort buffer (ID) when
						tokenization stops, which
						could due to sort buffer full */
	ulint			rows_added[FTS_NUM_AUX_INDEX];
						/*!< number of rows added for
						each FTS index partition */
	ib_rbt_t*		cached_stopword;/*!< in: stopword list */
	dfield_t		sort_field[FTS_NUM_FIELDS_SORT];
						/*!< in: sort field */
	fts_token_list_t	fts_token_list;

	fts_tokenize_ctx() :
		processed_len(0), init_pos(0), buf_used(0),
		rows_added(), cached_stopword(NULL), sort_field(),
		fts_token_list()
	{
		memset(rows_added, 0, sizeof rows_added);
		memset(sort_field, 0, sizeof sort_field);
		UT_LIST_INIT(fts_token_list, &row_fts_token_t::token_list);
	}
};

typedef struct fts_tokenize_ctx fts_tokenize_ctx_t;

/** Structure stores information needed for the insertion phase of FTS
parallel sort. */
struct fts_psort_insert {
	CHARSET_INFO*	charset;	/*!< charset info */
	mem_heap_t*	heap;		/*!< heap */
	ibool		opt_doc_id_size;/*!< Whether to use smaller (4 bytes)
					integer for Doc ID */
	BtrBulk*	btr_bulk;	/*!< Bulk load instance */
	dtuple_t*	tuple;		/*!< Tuple to insert */

#ifdef UNIV_DEBUG
	ulint		aux_index_id;	/*!< Auxiliary index id */
#endif
};

typedef struct fts_psort_insert	fts_psort_insert_t;


/** status bit used for communication between parent and child thread */
#define FTS_PARENT_COMPLETE	1
#define FTS_PARENT_EXITING	2
#define FTS_CHILD_COMPLETE	1
#define FTS_CHILD_EXITING	2

/** Print some debug information */
#define	FTSORT_PRINT

#ifdef	FTSORT_PRINT
#define	DEBUG_FTS_SORT_PRINT(str)		\
	do {					\
		ut_print_timestamp(stderr);	\
		fprintf(stderr, str);		\
	} while (0)
#else
#define DEBUG_FTS_SORT_PRINT(str)
#endif	/* FTSORT_PRINT */

/*************************************************************//**
Create a temporary "fts sort index" used to merge sort the
tokenized doc string. The index has three "fields":

1) Tokenized word,
2) Doc ID
3) Word's position in original 'doc'.

@return dict_index_t structure for the fts sort index */
dict_index_t*
row_merge_create_fts_sort_index(
/*============================*/
	dict_index_t*	index,	/*!< in: Original FTS index
				based on which this sort index
				is created */
	dict_table_t*	table,	/*!< in,out: table that FTS index
				is being created on */
	ibool*		opt_doc_id_size);
				/*!< out: whether to use 4 bytes
				instead of 8 bytes integer to
				store Doc ID during sort */

/********************************************************************//**
Initialize FTS parallel sort structures.
@return TRUE if all successful */
ibool
row_fts_psort_info_init(
/*====================*/
	trx_t*			trx,	/*!< in: transaction */
	row_merge_dup_t*	dup,	/*!< in,own: descriptor of
					FTS index being created */
	const dict_table_t*	new_table,/*!< in: table where indexes are
					created */
	ibool			opt_doc_id_size,
					/*!< in: whether to use 4 bytes
					instead of 8 bytes integer to
					store Doc ID during sort */
	fts_psort_t**		psort,	/*!< out: parallel sort info to be
					instantiated */
	fts_psort_t**		merge)	/*!< out: parallel merge info
					to be instantiated */
	MY_ATTRIBUTE((nonnull));
/********************************************************************//**
Clean up and deallocate FTS parallel sort structures, and close
temparary merge sort files */
void
row_fts_psort_info_destroy(
/*=======================*/
	fts_psort_t*	psort_info,	/*!< parallel sort info */
	fts_psort_t*	merge_info);	/*!< parallel merge info */
/********************************************************************//**
Free up merge buffers when merge sort is done */
void
row_fts_free_pll_merge_buf(
/*=======================*/
	fts_psort_t*	psort_info);	/*!< in: parallel sort info */

/*********************************************************************//**
Start the parallel tokenization and parallel merge sort */
void
row_fts_start_psort(
/*================*/
	fts_psort_t*	psort_info);	/*!< in: parallel sort info */
/*********************************************************************//**
Kick off the parallel merge and insert thread */
void
row_fts_start_parallel_merge(
/*=========================*/
	fts_psort_t*	merge_info);	/*!< in: parallel sort info */
/********************************************************************//**
Propagate a newly added record up one level in the selection tree
@return parent where this value propagated to */
int
row_merge_fts_sel_propagate(
/*========================*/
	int		propogated,	/*<! in: tree node propagated */
	int*		sel_tree,	/*<! in: selection tree */
	ulint		level,		/*<! in: selection tree level */
	const mrec_t**	 mrec,		/*<! in: sort record */
	ulint**		offsets,	/*<! in: record offsets */
	dict_index_t*	index);		/*<! in: FTS index */
/********************************************************************//**
Read sorted file containing index data tuples and insert these data
tuples to the index
@return DB_SUCCESS or error number */
dberr_t
row_fts_merge_insert(
/*=================*/
	dict_index_t*	index,		/*!< in: index */
	dict_table_t*	table,		/*!< in: new table */
	fts_psort_t*	psort_info,	/*!< parallel sort info */
	ulint		id)		/* !< in: which auxiliary table's data
					to insert to */
	MY_ATTRIBUTE((nonnull));
#endif /* row0ftsort_h */