summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBram Moolenaar <Bram@vim.org>2017-01-15 16:52:51 +0100
committerBram Moolenaar <Bram@vim.org>2017-01-15 16:52:51 +0100
commit810f9c361c83afb36b9f1cdadca2b93f1201d039 (patch)
treedf2c3926f3e1d6fcbeebf9c9b3cfd79539d9fb20
parent296b1f28ca9cedeb55872f306808b2214b519ce7 (diff)
downloadvim-git-8.0.0190.tar.gz
patch 8.0.0190: finding duplicate tags uses a slow linear searchv8.0.0190
Problem: Detecting duplicate tags uses a slow linear search. Solution: Use a much faster hash table solution. (James McCoy, closes #1046) But don't add hi_keylen, it makes hash tables 50% bigger.
-rw-r--r--src/tag.c320
-rw-r--r--src/version.c2
2 files changed, 160 insertions, 162 deletions
diff --git a/src/tag.c b/src/tag.c
index 646cbcd62..94b5e2b68 100644
--- a/src/tag.c
+++ b/src/tag.c
@@ -35,9 +35,9 @@ typedef struct tag_pointers
} tagptrs_T;
/*
- * The matching tags are first stored in ga_match[]. In which one depends on
- * the priority of the match.
- * At the end, the matches from ga_match[] are concatenated, to make a list
+ * The matching tags are first stored in one of the ht_match[] hash tables. In
+ * which one depends on the priority of the match.
+ * At the end, all the matches from ht_match[] are concatenated, to make a list
* sorted on priority.
*/
#define MT_ST_CUR 0 /* static match in current file */
@@ -1341,12 +1341,9 @@ find_tags(
int is_etag; /* current file is emaces style */
#endif
- struct match_found
- {
- int len; /* nr of chars of match[] to be compared */
- char_u match[1]; /* actually longer */
- } *mfp, *mfp2;
- garray_T ga_match[MT_COUNT];
+ char_u *mfp;
+ hashtab_T ht_match[MT_COUNT];
+ hash_T hash = 0;
int match_count = 0; /* number of matches found */
char_u **matches;
int mtt;
@@ -1402,16 +1399,16 @@ find_tags(
vimconv.vc_type = CONV_NONE;
#endif
-/*
- * Allocate memory for the buffers that are used
- */
+ /*
+ * Allocate memory for the buffers that are used
+ */
lbuf = alloc(lbuf_size);
tag_fname = alloc(MAXPATHL + 1);
#ifdef FEAT_EMACS_TAGS
ebuf = alloc(LSIZE);
#endif
for (mtt = 0; mtt < MT_COUNT; ++mtt)
- ga_init2(&ga_match[mtt], (int)sizeof(struct match_found *), 100);
+ hash_init(&ht_match[mtt]);
/* check for out of memory situation */
if (lbuf == NULL || tag_fname == NULL
@@ -2206,10 +2203,12 @@ parse_line:
}
/*
- * If a match is found, add it to ga_match[].
+ * If a match is found, add it to ht_match[].
*/
if (match)
{
+ int len = 0;
+
#ifdef FEAT_CSCOPE
if (use_cscope)
{
@@ -2262,179 +2261,169 @@ parse_line:
}
/*
- * Add the found match in ga_match[mtt], avoiding duplicates.
+ * Add the found match in ht_match[mtt].
* Store the info we need later, which depends on the kind of
* tags we are dealing with.
*/
- if (ga_grow(&ga_match[mtt], 1) == OK)
+ if (help_only)
{
- int len;
- int heuristic;
-
- if (help_only)
- {
#ifdef FEAT_MULTI_LANG
# define ML_EXTRA 3
#else
# define ML_EXTRA 0
#endif
- /*
- * Append the help-heuristic number after the
- * tagname, for sorting it later.
- */
- *tagp.tagname_end = NUL;
- len = (int)(tagp.tagname_end - tagp.tagname);
- mfp = (struct match_found *)
- alloc((int)sizeof(struct match_found) + len
- + 10 + ML_EXTRA);
- if (mfp != NULL)
- {
- /* "len" includes the language and the NUL, but
- * not the priority. */
- mfp->len = len + ML_EXTRA + 1;
-#define ML_HELP_LEN 6
- p = mfp->match;
- STRCPY(p, tagp.tagname);
+ /*
+ * Append the help-heuristic number after the tagname, for
+ * sorting it later. The heuristic is ignored for
+ * detecting duplicates.
+ * The format is {tagname}@{lang}NUL{heuristic}NUL
+ */
+ *tagp.tagname_end = NUL;
+ len = (int)(tagp.tagname_end - tagp.tagname);
+ mfp = (char_u *)alloc((int)sizeof(char_u) + len + 10 + ML_EXTRA + 1);
+ if (mfp != NULL)
+ {
+ int heuristic;
+
+ p = mfp;
+ STRCPY(p, tagp.tagname);
#ifdef FEAT_MULTI_LANG
- p[len] = '@';
- STRCPY(p + len + 1, help_lang);
+ p[len] = '@';
+ STRCPY(p + len + 1, help_lang);
#endif
- heuristic = help_heuristic(tagp.tagname,
- match_re ? matchoff : 0, !match_no_ic);
+ heuristic = help_heuristic(tagp.tagname,
+ match_re ? matchoff : 0, !match_no_ic);
#ifdef FEAT_MULTI_LANG
- heuristic += help_pri;
+ heuristic += help_pri;
#endif
- sprintf((char *)p + len + 1 + ML_EXTRA, "%06d",
- heuristic);
- }
- *tagp.tagname_end = TAB;
+ sprintf((char *)p + len + 1 + ML_EXTRA, "%06d",
+ heuristic);
}
- else if (name_only)
+ *tagp.tagname_end = TAB;
+ }
+ else if (name_only)
+ {
+ if (get_it_again)
{
- if (get_it_again)
- {
- char_u *temp_end = tagp.command;
+ char_u *temp_end = tagp.command;
- if (*temp_end == '/')
- while (*temp_end && *temp_end != '\r'
- && *temp_end != '\n'
- && *temp_end != '$')
- temp_end++;
+ if (*temp_end == '/')
+ while (*temp_end && *temp_end != '\r'
+ && *temp_end != '\n'
+ && *temp_end != '$')
+ temp_end++;
- if (tagp.command + 2 < temp_end)
- {
- len = (int)(temp_end - tagp.command - 2);
- mfp = (struct match_found *)alloc(
- (int)sizeof(struct match_found) + len);
- if (mfp != NULL)
- {
- mfp->len = len + 1; /* include the NUL */
- p = mfp->match;
- vim_strncpy(p, tagp.command + 2, len);
- }
- }
- else
- mfp = NULL;
- get_it_again = FALSE;
- }
- else
+ if (tagp.command + 2 < temp_end)
{
- len = (int)(tagp.tagname_end - tagp.tagname);
- mfp = (struct match_found *)alloc(
- (int)sizeof(struct match_found) + len);
+ len = (int)(temp_end - tagp.command - 2);
+ mfp = (char_u *)alloc((int)sizeof(char_u) + len + 1);
if (mfp != NULL)
- {
- mfp->len = len + 1; /* include the NUL */
- p = mfp->match;
- vim_strncpy(p, tagp.tagname, len);
- }
-
- /* if wanted, re-read line to get long form too */
- if (State & INSERT)
- get_it_again = p_sft;
+ vim_strncpy(mfp, tagp.command + 2, len);
}
+ else
+ mfp = NULL;
+ get_it_again = FALSE;
}
else
{
- /* Save the tag in a buffer.
- * Emacs tag: <mtt><tag_fname><NUL><ebuf><NUL><lbuf>
- * other tag: <mtt><tag_fname><NUL><NUL><lbuf>
- * without Emacs tags: <mtt><tag_fname><NUL><lbuf>
- */
- len = (int)STRLEN(tag_fname)
- + (int)STRLEN(lbuf) + 3;
+ len = (int)(tagp.tagname_end - tagp.tagname);
+ mfp = (char_u *)alloc((int)sizeof(char_u) + len + 1);
+ if (mfp != NULL)
+ vim_strncpy(mfp, tagp.tagname, len);
+
+ /* if wanted, re-read line to get long form too */
+ if (State & INSERT)
+ get_it_again = p_sft;
+ }
+ }
+ else
+ {
+#define TAG_SEP 0x01
+ size_t tag_fname_len = STRLEN(tag_fname);
#ifdef FEAT_EMACS_TAGS
- if (is_etag)
- len += (int)STRLEN(ebuf) + 1;
- else
- ++len;
+ size_t ebuf_len = 0;
#endif
- mfp = (struct match_found *)alloc(
- (int)sizeof(struct match_found) + len);
- if (mfp != NULL)
- {
- mfp->len = len;
- p = mfp->match;
- p[0] = mtt;
- STRCPY(p + 1, tag_fname);
+
+ /* Save the tag in a buffer.
+ * Use 0x01 to separate fields (Can't use NUL, because the
+ * hash key is terminated by NUL).
+ * Emacs tag: <mtt><tag_fname><0x01><ebuf><0x01><lbuf><NUL>
+ * other tag: <mtt><tag_fname><0x01><0x01><lbuf><NUL>
+ * without Emacs tags: <mtt><tag_fname><0x01><lbuf><NUL>
+ */
+ len = (int)tag_fname_len + (int)STRLEN(lbuf) + 3;
+#ifdef FEAT_EMACS_TAGS
+ if (is_etag)
+ {
+ ebuf_len = STRLEN(ebuf);
+ len += (int)ebuf_len + 1;
+ }
+ else
+ ++len;
+#endif
+ mfp = (char_u *)alloc((int)sizeof(char_u) + len + 1);
+ if (mfp != NULL)
+ {
+ p = mfp;
+ p[0] = mtt;
+ STRCPY(p + 1, tag_fname);
#ifdef BACKSLASH_IN_FILENAME
- /* Ignore differences in slashes, avoid adding
- * both path/file and path\file. */
- slash_adjust(p + 1);
+ /* Ignore differences in slashes, avoid adding
+ * both path/file and path\file. */
+ slash_adjust(p + 1);
#endif
- s = p + 1 + STRLEN(tag_fname) + 1;
+ p[tag_fname_len + 1] = TAG_SEP;
+ s = p + 1 + tag_fname_len + 1;
#ifdef FEAT_EMACS_TAGS
- if (is_etag)
- {
- STRCPY(s, ebuf);
- s += STRLEN(ebuf) + 1;
- }
- else
- *s++ = NUL;
-#endif
- STRCPY(s, lbuf);
+ if (is_etag)
+ {
+ STRCPY(s, ebuf);
+ s[ebuf_len] = TAG_SEP;
+ s += ebuf_len + 1;
}
+ else
+ *s++ = TAG_SEP;
+#endif
+ STRCPY(s, lbuf);
}
+ }
- if (mfp != NULL)
- {
- /*
- * Don't add identical matches.
- * This can take a lot of time when finding many
- * matches, check for CTRL-C now and then.
- * Add all cscope tags, because they are all listed.
- */
+ if (mfp != NULL)
+ {
+ hashitem_T *hi;
+
+ /*
+ * Don't add identical matches.
+ * Add all cscope tags, because they are all listed.
+ * "mfp" is used as a hash key, there is a NUL byte to end
+ * the part matters for comparing, more bytes may follow
+ * after it. E.g. help tags store the priority after the
+ * NUL.
+ */
#ifdef FEAT_CSCOPE
- if (use_cscope)
- i = -1;
- else
+ if (use_cscope)
+ hash++;
+ else
#endif
- for (i = ga_match[mtt].ga_len; --i >= 0 && !got_int; )
- {
- mfp2 = ((struct match_found **)
- (ga_match[mtt].ga_data))[i];
- if (mfp2->len == mfp->len
- && memcmp(mfp2->match, mfp->match,
- (size_t)mfp->len) == 0)
- break;
- fast_breakcheck();
- }
- if (i < 0)
+ hash = hash_hash(mfp);
+ hi = hash_lookup(&ht_match[mtt], mfp, hash);
+ if (HASHITEM_EMPTY(hi))
+ {
+ if (hash_add_item(&ht_match[mtt], hi, mfp, hash)
+ == FAIL)
{
- ((struct match_found **)(ga_match[mtt].ga_data))
- [ga_match[mtt].ga_len++] = mfp;
- ++match_count;
+ /* Out of memory! Just forget about the rest. */
+ retval = OK;
+ stop_searching = TRUE;
+ break;
}
else
- vim_free(mfp);
+ ++match_count;
}
- }
- else /* Out of memory! Just forget about the rest. */
- {
- retval = OK;
- stop_searching = TRUE;
- break;
+ else
+ /* duplicate tag, drop it */
+ vim_free(mfp);
}
}
#ifdef FEAT_CSCOPE
@@ -2532,7 +2521,7 @@ findtag_end:
#endif
/*
- * Move the matches from the ga_match[] arrays into one list of
+ * Move the matches from the ht_match[] arrays into one list of
* matches. When retval == FAIL, free the matches.
*/
if (retval == FAIL)
@@ -2546,22 +2535,29 @@ findtag_end:
match_count = 0;
for (mtt = 0; mtt < MT_COUNT; ++mtt)
{
- for (i = 0; i < ga_match[mtt].ga_len; ++i)
+ hashitem_T *hi;
+ long_u todo;
+
+ todo = (long)ht_match[mtt].ht_used;
+ for (hi = ht_match[mtt].ht_array; todo > 0; ++hi)
{
- mfp = ((struct match_found **)(ga_match[mtt].ga_data))[i];
- if (matches == NULL)
- vim_free(mfp);
- else
+ if (!HASHITEM_EMPTY(hi))
{
- /* To avoid allocating memory again we turn the struct
- * match_found into a string. For help the priority was not
- * included in the length. */
- mch_memmove(mfp, mfp->match,
- (size_t)(mfp->len + (help_only ? ML_HELP_LEN : 0)));
- matches[match_count++] = (char_u *)mfp;
+ mfp = hi->hi_key;
+ if (matches == NULL)
+ vim_free(mfp);
+ else
+ {
+ /* now change the TAG_SEP back to NUL */
+ for (p = mfp; *p != NUL; ++p)
+ if (*p == TAG_SEP)
+ *p = NUL;
+ matches[match_count++] = (char_u *)mfp;
+ }
+ todo--;
}
}
- ga_clear(&ga_match[mtt]);
+ hash_clear(&ht_match[mtt]);
}
*matchesp = matches;
diff --git a/src/version.c b/src/version.c
index 368a984e3..2094db704 100644
--- a/src/version.c
+++ b/src/version.c
@@ -765,6 +765,8 @@ static char *(features[]) =
static int included_patches[] =
{ /* Add new patch number below this line */
/**/
+ 190,
+/**/
189,
/**/
188,