diff options
author | serg@serg.mysql.com <> | 2002-04-18 14:12:29 +0000 |
---|---|---|
committer | serg@serg.mysql.com <> | 2002-04-18 14:12:29 +0000 |
commit | 3cc69d5a646e25930611196c05b0b38960d1aa5d (patch) | |
tree | 41a1575c9ff76d1abb797f7fa4a654207dc3cdb5 /myisam/ft_boolean_search.c | |
parent | 08384a3abd0f42974e79a663249db9eab5e66d6a (diff) | |
download | mariadb-git-3cc69d5a646e25930611196c05b0b38960d1aa5d.tar.gz |
phrase search
Diffstat (limited to 'myisam/ft_boolean_search.c')
-rw-r--r-- | myisam/ft_boolean_search.c | 79 |
1 files changed, 57 insertions, 22 deletions
diff --git a/myisam/ft_boolean_search.c b/myisam/ft_boolean_search.c index dd310b4921a..10b5044826f 100644 --- a/myisam/ft_boolean_search.c +++ b/myisam/ft_boolean_search.c @@ -59,6 +59,7 @@ static double *nwghts=_nwghts+5; /* nwghts[i] = -0.5*1.5**i */ typedef struct st_ftb_expr FTB_EXPR; struct st_ftb_expr { FTB_EXPR *up; + byte *quot, *qend; float weight; uint flags; my_off_t docid[2]; /* for index search and for scan */ @@ -113,7 +114,7 @@ int FTB_WORD_cmp_list(CHARSET_INFO *cs, FTB_WORD **a, FTB_WORD **b) } void _ftb_parse_query(FTB *ftb, byte **start, byte *end, - FTB_EXPR *up, uint depth) + FTB_EXPR *up, uint depth) { byte res; FTB_PARAM param; @@ -126,16 +127,17 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, return; param.prev=' '; + param.quot=up->quot; while ((res=ft_get_word(start,end,&w,¶m))) { - int r=param.plusminus; + int r=param.plusminus; float weight= (float) (param.pmsign ? nwghts : wghts)[(r>5)?5:((r<-5)?-5:r)]; switch (res) { case 1: /* word found */ ftbw=(FTB_WORD *)alloc_root(&ftb->mem_root, - sizeof(FTB_WORD) + - (param.trunc ? MI_MAX_KEY_BUFF : - w.len+extra)); + sizeof(FTB_WORD) + + (param.trunc ? MI_MAX_KEY_BUFF : + w.len+extra)); ftbw->len=w.len+1; ftbw->flags=0; if (param.yesno>0) ftbw->flags|=FTB_FLAG_YES; @@ -149,7 +151,7 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, ftbw->word[0]=w.len; if (param.yesno > 0) up->ythresh++; queue_insert(& ftb->queue, (byte *)ftbw); - ftb->with_scan|=param.trunc; + ftb->with_scan|=(param.trunc & FTB_FLAG_TRUNC); break; case 2: /* left bracket */ ftbe=(FTB_EXPR *)alloc_root(&ftb->mem_root, sizeof(FTB_EXPR)); @@ -160,10 +162,12 @@ void _ftb_parse_query(FTB *ftb, byte **start, byte *end, ftbe->up=up; ftbe->ythresh=ftbe->yweaks=0; ftbe->docid[0]=ftbe->docid[1]=HA_POS_ERROR; + if ((ftbe->quot=param.quot)) ftb->with_scan|=2; if (param.yesno > 0) up->ythresh++; _ftb_parse_query(ftb, start, end, ftbe, depth+1); break; case 3: /* right bracket */ + if (up->quot) up->qend=param.quot; return; } } @@ -209,7 +213,7 @@ void _ftb_init_index_search(FT_INFO *ftb) ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC), ftbw->word + (ftbw->flags&FTB_FLAG_TRUNC), ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC), - 0); + 0); } if (r) /* not found */ { @@ -260,7 +264,7 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, ftbe->weight=1; ftbe->flags=FTB_FLAG_YES; ftbe->nos=1; - ftbe->up=0; + ftbe->quot=ftbe->up=0; ftbe->ythresh=ftbe->yweaks=0; ftbe->docid[0]=ftbe->docid[1]=HA_POS_ERROR; ftb->root=ftbe; @@ -270,16 +274,39 @@ FT_INFO * ft_init_boolean_search(MI_INFO *info, uint keynr, byte *query, memcpy(ftb->list, ftb->queue.root+1, sizeof(FTB_WORD *)*ftb->queue.elements); qsort2(ftb->list, ftb->queue.elements, sizeof(FTB_WORD *), (qsort2_cmp)FTB_WORD_cmp_list, ftb->charset); - if (ftb->queue.elements<2) ftb->with_scan=0; + if (ftb->queue.elements<2) ftb->with_scan &= ~FTB_FLAG_TRUNC; ftb->state=READY; return ftb; } -void _ftb_climb_the_tree(FTB_WORD *ftbw, uint mode) +/* returns 1 if str0 contain str1 */ +int _ftb_strstr(const byte *s0, const byte *e0, + const byte *s1, const byte *e1, + CHARSET_INFO *cs) { + const byte *p; + + while (s0 < e0) + { + while (s0 < e0 && cs->to_upper[*s0++] != cs->to_upper[*s1]) + /* no-op */; + if (s0 >= e0) + return 0; + p=s1+1; + while (s0 < e0 && p < e1 && cs->to_upper[*s0++] == cs->to_upper[*p++]) + /* no-op */; + if (p >= e1) + return 1; + } + return 0; +} + +void _ftb_climb_the_tree(FTB *ftb, FTB_WORD *ftbw, FT_SEG_ITERATOR *ftsi_orig) +{ + FT_SEG_ITERATOR ftsi; FTB_EXPR *ftbe; float weight=ftbw->weight; - int yn=ftbw->flags, ythresh; + int yn=ftbw->flags, ythresh, mode=(ftsi_orig != 0); my_off_t curdoc=ftbw->docid[mode]; for (ftbe=ftbw->up; ftbe; ftbe=ftbe->up) @@ -300,6 +327,20 @@ void _ftb_climb_the_tree(FTB_WORD *ftbw, uint mode) { yn=ftbe->flags; weight=ftbe->cur_weight*ftbe->weight; + if (mode && ftbe->quot) + { + int not_found=1; + + memcpy(&ftsi, ftsi_orig, sizeof(ftsi)); + while (_mi_ft_segiterator(&ftsi) && not_found) + { + if (!ftsi.pos) + continue; + not_found = ! _ftb_strstr(ftsi.pos, ftsi.pos+ftsi.len, + ftbe->quot, ftbe->qend, ftb->charset); + } + if (not_found) break; + } /* ftbe->quot */ } else break; @@ -356,7 +397,7 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record) { while (curdoc==(ftbw=(FTB_WORD *)queue_top(& ftb->queue))->docid[0]) { - _ftb_climb_the_tree(ftbw,0); + _ftb_climb_the_tree(ftb, ftbw, 0); /* update queue */ r=_mi_search(info, keyinfo, (uchar*) ftbw->word, USE_WHOLE_KEY, @@ -367,7 +408,7 @@ int ft_boolean_read_next(FT_INFO *ftb, char *record) info->lastkey + (ftbw->flags&FTB_FLAG_TRUNC), ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC), ftbw->word + (ftbw->flags&FTB_FLAG_TRUNC), - ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC), + ftbw->len - (ftbw->flags&FTB_FLAG_TRUNC), 0); } if (r) /* not found */ @@ -414,7 +455,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) FT_WORD word; FTB_WORD *ftbw; FTB_EXPR *ftbe; - FT_SEG_ITERATOR ftsi; + FT_SEG_ITERATOR ftsi, ftsi2; const byte *end; my_off_t docid=ftb->info->lastpos; @@ -423,17 +464,11 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) if (!ftb->queue.elements) return 0; -#if NOT_USED - if (ftb->state == READY || ftb->state == INDEX_DONE) - ftb->state=SCAN; - else if (ftb->state != SCAN) - return -3.0; -#endif - if (ftb->keynr==NO_SUCH_KEY) _mi_ft_segiterator_dummy_init(record, length, &ftsi); else _mi_ft_segiterator_init(ftb->info, ftb->keynr, record, &ftsi); + memcpy(&ftsi2, &ftsi, sizeof(ftsi)); while (_mi_ft_segiterator(&ftsi)) { @@ -464,7 +499,7 @@ float ft_boolean_find_relevance(FT_INFO *ftb, byte *record, uint length) if (ftbw->docid[1] == docid) continue; ftbw->docid[1]=docid; - _ftb_climb_the_tree(ftbw,1); + _ftb_climb_the_tree(ftb, ftbw, &ftsi2); } } } |