/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8; fill-column: 160 -*- */ /* * Copyright (C) 1999-2008 Novell, Inc. (www.novell.com) * * This library is free software: you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this library. If not, see . * * Authors: Michael Zucchi */ #include "evolution-data-server-config.h" #include #include #include #include #include #include #include #include #include #include #include "camel-block-file.h" #include "camel-mempool.h" #include "camel-object.h" #include "camel-partition-table.h" #include "camel-text-index.h" #define w(x) #define io(x) #define d(x) /*(printf ("%s (%d):%s: ", __FILE__, __LINE__, __PRETTY_FUNCTION__),(x))*/ /* cursor debug */ #define c(x) #define CAMEL_TEXT_INDEX_MAX_WORDLEN (36) #define CAMEL_TEXT_INDEX_LOCK(kf, lock) \ (g_rec_mutex_lock (&((CamelTextIndex *) kf)->priv->lock)) #define CAMEL_TEXT_INDEX_UNLOCK(kf, lock) \ (g_rec_mutex_unlock (&((CamelTextIndex *) kf)->priv->lock)) static gint text_index_compress_nosync (CamelIndex *idx); /* ********************************************************************** */ struct _CamelTextIndexNamePrivate { GString *buffer; camel_key_t nameid; CamelMemPool *pool; }; CamelTextIndexName *camel_text_index_name_new (CamelTextIndex *idx, const gchar *name, camel_key_t nameid); /* ****************************** */ struct _CamelTextIndexCursorPrivate { camel_block_t first; camel_block_t next; gint record_index; gsize record_count; camel_key_t *records; gchar *current; }; CamelTextIndexCursor *camel_text_index_cursor_new (CamelTextIndex *idx, camel_block_t data); /* ****************************** */ struct _CamelTextIndexKeyCursorPrivate { CamelKeyTable *table; camel_key_t keyid; guint flags; camel_block_t data; gchar *current; }; CamelTextIndexKeyCursor *camel_text_index_key_cursor_new (CamelTextIndex *idx, CamelKeyTable *table); /* ********************************************************************** */ #define CAMEL_TEXT_INDEX_VERSION "TEXT.000" #define CAMEL_TEXT_INDEX_KEY_VERSION "KEYS.000" struct _CamelTextIndexPrivate { CamelBlockFile *blocks; CamelKeyFile *links; CamelKeyTable *word_index; CamelPartitionTable *word_hash; CamelKeyTable *name_index; CamelPartitionTable *name_hash; /* Cache of words to write */ guint word_cache_limit; GQueue word_cache; GHashTable *words; GRecMutex lock; }; /* Root block of text index */ struct _CamelTextIndexRoot { struct _CamelBlockRoot root; /* FIXME: the index root could contain a pointer to the hash root */ camel_block_t word_index_root; /* a keyindex containing the keyid -> word mapping */ camel_block_t word_hash_root; /* a partitionindex containing word -> keyid mapping */ camel_block_t name_index_root; /* same, for names */ camel_block_t name_hash_root; guint32 words; /* total words */ guint32 names; /* total names */ guint32 deleted; /* deleted names */ guint32 keys; /* total key 'chunks' written, used with deleted to determine fragmentation */ }; struct _CamelTextIndexWord { camel_block_t data; /* where the data starts */ camel_key_t wordid; gchar *word; guint used; camel_key_t names[32]; }; /* ********************************************************************** */ /* CamelTextIndex */ /* ********************************************************************** */ G_DEFINE_TYPE_WITH_PRIVATE (CamelTextIndex, camel_text_index, CAMEL_TYPE_INDEX) static void text_index_dispose (GObject *object) { CamelTextIndexPrivate *priv; priv = CAMEL_TEXT_INDEX (object)->priv; /* Only run this the first time. */ if (priv->word_index != NULL) camel_index_sync (CAMEL_INDEX (object)); g_clear_object (&priv->word_index); g_clear_object (&priv->word_hash); g_clear_object (&priv->name_index); g_clear_object (&priv->name_hash); g_clear_object (&priv->blocks); g_clear_object (&priv->links); /* Chain up to parent's dispose () method. */ G_OBJECT_CLASS (camel_text_index_parent_class)->dispose (object); } static void text_index_finalize (GObject *object) { CamelTextIndexPrivate *priv; priv = CAMEL_TEXT_INDEX (object)->priv; g_warn_if_fail (g_queue_is_empty (&priv->word_cache)); g_warn_if_fail (g_hash_table_size (priv->words) == 0); g_hash_table_destroy (priv->words); g_rec_mutex_clear (&priv->lock); /* Chain up to parent's finalize () method. */ G_OBJECT_CLASS (camel_text_index_parent_class)->finalize (object); } /* call locked */ static void text_index_add_name_to_word (CamelIndex *idx, const gchar *word, camel_key_t nameid) { struct _CamelTextIndexWord *w; CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv; camel_key_t wordid; camel_block_t data; struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *) camel_block_file_get_root (p->blocks); w = g_hash_table_lookup (p->words, word); if (w == NULL) { GQueue trash = G_QUEUE_INIT; GList *link; guint length; wordid = camel_partition_table_lookup (p->word_hash, word); if (wordid == 0) { data = 0; wordid = camel_key_table_add (p->word_index, word, 0, 0); if (wordid == 0) { g_warning ( "Could not create key entry for word '%s': %s\n", word, g_strerror (errno)); return; } if (camel_partition_table_add (p->word_hash, word, wordid) == -1) { g_warning ( "Could not create hash entry for word '%s': %s\n", word, g_strerror (errno)); return; } rb->words++; camel_block_file_touch_block (p->blocks, camel_block_file_get_root_block (p->blocks)); } else { data = camel_key_table_lookup (p->word_index, wordid, NULL, NULL); if (data == 0) { g_warning ( "Could not find key entry for word '%s': %s\n", word, g_strerror (errno)); return; } } w = g_malloc0 (sizeof (*w)); w->word = g_strdup (word); w->wordid = wordid; w->used = 1; w->data = data; w->names[0] = nameid; g_hash_table_insert (p->words, w->word, w); g_queue_push_head (&p->word_cache, w); length = p->word_cache.length; link = g_queue_peek_tail_link (&p->word_cache); while (link != NULL && length > p->word_cache_limit) { struct _CamelTextIndexWord *ww = link->data; io (printf ("writing key file entry '%s' [%x]\n", ww->word, ww->data)); if (camel_key_file_write (p->links, &ww->data, ww->used, ww->names) != -1) { io (printf (" new data [%x]\n", ww->data)); rb->keys++; camel_block_file_touch_block (p->blocks, camel_block_file_get_root_block (p->blocks)); /* if this call fails - we still point to the old data - not fatal */ camel_key_table_set_data ( p->word_index, ww->wordid, ww->data); g_hash_table_remove (p->words, ww->word); g_queue_push_tail (&trash, link); link->data = NULL; g_free (ww->word); g_free (ww); length--; } link = g_list_previous (link); } /* Remove deleted words from the cache. */ while ((link = g_queue_pop_head (&trash)) != NULL) g_queue_delete_link (&p->word_cache, link); } else { g_queue_remove (&p->word_cache, w); g_queue_push_head (&p->word_cache, w); w->names[w->used] = nameid; w->used++; if (w->used == G_N_ELEMENTS (w->names)) { io (printf ("writing key file entry '%s' [%x]\n", w->word, w->data)); if (camel_key_file_write (p->links, &w->data, w->used, w->names) != -1) { rb->keys++; camel_block_file_touch_block (p->blocks, camel_block_file_get_root_block (p->blocks)); /* if this call fails - we still point to the old data - not fatal */ camel_key_table_set_data ( p->word_index, w->wordid, w->data); } /* FIXME: what to on error? lost data? */ w->used = 0; } } } static gint text_index_sync (CamelIndex *idx) { CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv; struct _CamelTextIndexWord *ww; struct _CamelTextIndexRoot *rb; gint ret = 0, wfrag, nfrag; d (printf ("sync: blocks = %p\n", p->blocks)); if (p->blocks == NULL || p->links == NULL || p->word_index == NULL || p->word_hash == NULL || p->name_index == NULL || p->name_hash == NULL) return 0; rb = (struct _CamelTextIndexRoot *) camel_block_file_get_root (p->blocks); /* sync/flush word cache */ CAMEL_TEXT_INDEX_LOCK (idx, lock); /* we sync, bump down the cache limits since we don't need them for reading */ camel_block_file_set_cache_limit (p->blocks, 128); /* this doesn't really need to be dropped, its only used in updates anyway */ p->word_cache_limit = 1024; while ((ww = g_queue_pop_head (&p->word_cache))) { if (ww->used > 0) { io (printf ("writing key file entry '%s' [%x]\n", ww->word, ww->data)); if (camel_key_file_write (p->links, &ww->data, ww->used, ww->names) != -1) { io (printf (" new data [%x]\n", ww->data)); rb->keys++; camel_block_file_touch_block (p->blocks, camel_block_file_get_root_block (p->blocks)); camel_key_table_set_data ( p->word_index, ww->wordid, ww->data); } else { ret = -1; } ww->used = 0; } g_hash_table_remove (p->words, ww->word); g_free (ww->word); g_free (ww); } if (camel_key_table_sync (p->word_index) == -1 || camel_key_table_sync (p->name_index) == -1 || camel_partition_table_sync (p->word_hash) == -1 || camel_partition_table_sync (p->name_hash) == -1) ret = -1; /* only do the frag/compress check if we did some new writes on this index */ wfrag = rb->words ? (((rb->keys - rb->words) * 100)/ rb->words) : 0; nfrag = rb->names ? ((rb->deleted * 100) / rb->names) : 0; d (printf (" words = %d, keys = %d\n", rb->words, rb->keys)); if (ret == 0) { if (wfrag > 30 || nfrag > 20) ret = text_index_compress_nosync (idx); } ret = ret == -1 ? ret : camel_block_file_sync (p->blocks); CAMEL_TEXT_INDEX_UNLOCK (idx, lock); return ret; } static void tmp_name (const gchar *in, gchar *o, gsize o_len) { gchar *s; s = strrchr (in, '/'); if (s) { memcpy (o, in, s - in + 1); memcpy (o + (s - in + 1), ".#", 2); strcpy (o + (s - in + 3), s + 1); } else { g_snprintf (o, o_len, ".#%s", in); } } static gint text_index_compress (CamelIndex *idx) { gint ret; CAMEL_TEXT_INDEX_LOCK (idx, lock); ret = camel_index_sync (idx); if (ret != -1) ret = text_index_compress_nosync (idx); CAMEL_TEXT_INDEX_UNLOCK (idx, lock); return ret; } /* Attempt to recover index space by compressing the indices */ static gint text_index_compress_nosync (CamelIndex *idx) { CamelTextIndex *newidx; CamelTextIndexPrivate *newp, *oldp; camel_key_t oldkeyid, newkeyid; GHashTable *remap; camel_block_t data, newdata; gint i, ret = -1; gchar *name = NULL; guint flags; gchar *newpath, *savepath, *oldpath; gsize count, newcount; camel_key_t *records, newrecords[256]; struct _CamelTextIndexRoot *rb; i = strlen (idx->path) + 16; oldpath = alloca (i); newpath = alloca (i); savepath = alloca (i); g_strlcpy (oldpath, idx->path, i); oldpath[strlen (oldpath) - strlen (".index")] = 0; tmp_name (oldpath, newpath, i); g_snprintf (savepath, i, "%s~", oldpath); d (printf ("Old index: %s\n", idx->path)); d (printf ("Old path: %s\n", oldpath)); d (printf ("New: %s\n", newpath)); d (printf ("Save: %s\n", savepath)); newidx = camel_text_index_new (newpath, O_RDWR | O_CREAT); if (newidx == NULL) return -1; newp = CAMEL_TEXT_INDEX (newidx)->priv; oldp = CAMEL_TEXT_INDEX (idx)->priv; CAMEL_TEXT_INDEX_LOCK (idx, lock); rb = (struct _CamelTextIndexRoot *) camel_block_file_get_root (newp->blocks); rb->words = 0; rb->names = 0; rb->deleted = 0; rb->keys = 0; /* Process: * For each name we still have: * Add it to the new index & setup remap table * * For each word: * Copy word's data to a new file * Add new word to index (*) (can we just copy blocks?) */ /* Copy undeleted names to new index file, creating new indices */ io (printf ("Copying undeleted names to new file\n")); remap = g_hash_table_new (NULL, NULL); oldkeyid = 0; while ((oldkeyid = camel_key_table_next (oldp->name_index, oldkeyid, &name, &flags, &data))) { if ((flags&1) == 0) { io (printf ("copying name '%s'\n", name)); newkeyid = camel_key_table_add ( newp->name_index, name, data, flags); if (newkeyid == 0) goto fail; rb->names++; camel_partition_table_add ( newp->name_hash, name, newkeyid); g_hash_table_insert (remap, GINT_TO_POINTER (oldkeyid), GINT_TO_POINTER (newkeyid)); } else { io (printf ("deleted name '%s'\n", name)); } g_free (name); name = NULL; } /* Copy word data across, remapping/deleting and create new index for it */ /* We re-block the data into 256 entry lots while we're at it, since we only * have to do 1 at a time and its cheap */ oldkeyid = 0; while ((oldkeyid = camel_key_table_next (oldp->word_index, oldkeyid, &name, &flags, &data))) { io (printf ("copying word '%s'\n", name)); newdata = 0; newcount = 0; if (data) { rb->words++; rb->keys++; } while (data) { if (camel_key_file_read (oldp->links, &data, &count, &records) == -1) { io (printf ("could not read from old keys at %d for word '%s'\n", (gint) data, name)); goto fail; } for (i = 0; i < count; i++) { newkeyid = (camel_key_t) GPOINTER_TO_INT (g_hash_table_lookup (remap, GINT_TO_POINTER (records[i]))); if (newkeyid) { newrecords[newcount++] = newkeyid; if (newcount == G_N_ELEMENTS (newrecords)) { if (camel_key_file_write (newp->links, &newdata, newcount, newrecords) == -1) { g_free (records); goto fail; } newcount = 0; } } } g_free (records); } if (newcount > 0) { if (camel_key_file_write (newp->links, &newdata, newcount, newrecords) == -1) goto fail; } if (newdata != 0) { newkeyid = camel_key_table_add ( newp->word_index, name, newdata, flags); if (newkeyid == 0) goto fail; camel_partition_table_add ( newp->word_hash, name, newkeyid); } g_free (name); name = NULL; } camel_block_file_touch_block (newp->blocks, camel_block_file_get_root_block (newp->blocks)); if (camel_index_sync (CAMEL_INDEX (newidx)) == -1) goto fail; /* Rename underlying files to match */ ret = camel_index_rename (idx, savepath); if (ret == -1) goto fail; /* If this fails, we'll pick up something during restart? */ ret = camel_index_rename ((CamelIndex *) newidx, oldpath); if (ret == -1) goto fail; #define myswap(a, b) { gpointer tmp = a; a = b; b = tmp; } /* Poke the private data across to the new object */ /* And change the fd's over, etc? */ /* Yes: This is a hack */ myswap (newp->blocks, oldp->blocks); myswap (newp->links, oldp->links); myswap (newp->word_index, oldp->word_index); myswap (newp->word_hash, oldp->word_hash); myswap (newp->name_index, oldp->name_index); myswap (newp->name_hash, oldp->name_hash); myswap (((CamelIndex *) newidx)->path, ((CamelIndex *) idx)->path); #undef myswap ret = 0; fail: CAMEL_TEXT_INDEX_UNLOCK (idx, lock); camel_index_delete ((CamelIndex *) newidx); g_object_unref (newidx); g_free (name); g_hash_table_destroy (remap); /* clean up temp files always */ g_snprintf (savepath, i, "%s~.index", oldpath); g_unlink (savepath); g_snprintf (newpath, i, "%s.data", savepath); g_unlink (newpath); return ret; } static gint text_index_delete (CamelIndex *idx) { CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv; gint ret = 0; if (camel_block_file_delete (p->blocks) == -1) ret = -1; if (camel_key_file_delete (p->links) == -1) ret = -1; return ret; } static gint text_index_rename (CamelIndex *idx, const gchar *path) { CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv; gchar *newlink, *newblock; gsize newlink_len, newblock_len; gint err, ret; CAMEL_TEXT_INDEX_LOCK (idx, lock); newblock_len = strlen (path) + 8; newblock = alloca (newblock_len); g_snprintf (newblock, newblock_len, "%s.index", path); ret = camel_block_file_rename (p->blocks, newblock); if (ret == -1) { CAMEL_TEXT_INDEX_UNLOCK (idx, lock); return -1; } newlink_len = strlen (path) + 16; newlink = alloca (newlink_len); g_snprintf (newlink, newlink_len, "%s.index.data", path); ret = camel_key_file_rename (p->links, newlink); if (ret == -1) { err = errno; camel_block_file_rename (p->blocks, idx->path); CAMEL_TEXT_INDEX_UNLOCK (idx, lock); errno = err; return -1; } g_free (idx->path); idx->path = g_strdup (newblock); CAMEL_TEXT_INDEX_UNLOCK (idx, lock); return 0; } static gint text_index_has_name (CamelIndex *idx, const gchar *name) { CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv; return camel_partition_table_lookup (p->name_hash, name) != 0; } static CamelIndexName * text_index_add_name (CamelIndex *idx, const gchar *name) { CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv; camel_key_t keyid; CamelIndexName *idn; struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *) camel_block_file_get_root (p->blocks); CAMEL_TEXT_INDEX_LOCK (idx, lock); /* if we're adding words, up the cache limits a lot */ if (p->word_cache_limit < 8192) { camel_block_file_set_cache_limit (p->blocks, 1024); p->word_cache_limit = 8192; } /* If we have it already replace it */ keyid = camel_partition_table_lookup (p->name_hash, name); if (keyid != 0) { /* TODO: We could just update the partition table's * key pointer rather than having to delete it */ rb->deleted++; camel_key_table_set_flags (p->name_index, keyid, 1, 1); camel_partition_table_remove (p->name_hash, name); } keyid = camel_key_table_add (p->name_index, name, 0, 0); if (keyid != 0) { camel_partition_table_add (p->name_hash, name, keyid); rb->names++; } camel_block_file_touch_block (p->blocks, camel_block_file_get_root_block (p->blocks)); /* TODO: if keyid == 0, we had a failure, we should somehow flag that, but for * now just return a valid object but discard its results, see text_index_write_name */ CAMEL_TEXT_INDEX_UNLOCK (idx, lock); idn = (CamelIndexName *) camel_text_index_name_new ((CamelTextIndex *) idx, name, keyid); return idn; } /* call locked */ static void hash_write_word (gchar *word, gpointer data, CamelIndexName *idn) { CamelTextIndexName *tin = (CamelTextIndexName *) idn; text_index_add_name_to_word (idn->index, word, tin->priv->nameid); } static gint text_index_write_name (CamelIndex *idx, CamelIndexName *idn) { /* force 'flush' of any outstanding data */ camel_index_name_add_buffer (idn, NULL, 0); /* see text_index_add_name for when this can be 0 */ if (((CamelTextIndexName *) idn)->priv->nameid != 0) { CAMEL_TEXT_INDEX_LOCK (idx, lock); g_hash_table_foreach (idn->words, (GHFunc) hash_write_word, idn); CAMEL_TEXT_INDEX_UNLOCK (idx, lock); } return 0; } static CamelIndexCursor * text_index_find_name (CamelIndex *idx, const gchar *name) { /* what was this for, umm */ return NULL; } static void text_index_delete_name (CamelIndex *idx, const gchar *name) { CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv; camel_key_t keyid; struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *) camel_block_file_get_root (p->blocks); d (printf ("Delete name: %s\n", name)); /* probably doesn't really need locking, but oh well */ CAMEL_TEXT_INDEX_LOCK (idx, lock); /* We just mark the key deleted, and remove it from the hash table */ keyid = camel_partition_table_lookup (p->name_hash, name); if (keyid != 0) { rb->deleted++; camel_block_file_touch_block (p->blocks, camel_block_file_get_root_block (p->blocks)); camel_key_table_set_flags (p->name_index, keyid, 1, 1); camel_partition_table_remove (p->name_hash, name); } CAMEL_TEXT_INDEX_UNLOCK (idx, lock); } static CamelIndexCursor * text_index_find (CamelIndex *idx, const gchar *word) { CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv; camel_key_t keyid; camel_block_t data = 0; guint flags; CamelIndexCursor *idc; CAMEL_TEXT_INDEX_LOCK (idx, lock); keyid = camel_partition_table_lookup (p->word_hash, word); if (keyid != 0) { data = camel_key_table_lookup ( p->word_index, keyid, NULL, &flags); if (flags & 1) data = 0; } CAMEL_TEXT_INDEX_UNLOCK (idx, lock); idc = (CamelIndexCursor *) camel_text_index_cursor_new ((CamelTextIndex *) idx, data); return idc; } static CamelIndexCursor * text_index_words (CamelIndex *idx) { CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv; return (CamelIndexCursor *) camel_text_index_key_cursor_new ((CamelTextIndex *) idx, p->word_index); } static void camel_text_index_class_init (CamelTextIndexClass *class) { GObjectClass *object_class; CamelIndexClass *index_class; object_class = G_OBJECT_CLASS (class); object_class->dispose = text_index_dispose; object_class->finalize = text_index_finalize; index_class = CAMEL_INDEX_CLASS (class); index_class->sync = text_index_sync; index_class->compress = text_index_compress; index_class->delete_ = text_index_delete; index_class->rename = text_index_rename; index_class->has_name = text_index_has_name; index_class->add_name = text_index_add_name; index_class->write_name = text_index_write_name; index_class->find_name = text_index_find_name; index_class->delete_name = text_index_delete_name; index_class->find = text_index_find; index_class->words = text_index_words; } static void camel_text_index_init (CamelTextIndex *text_index) { text_index->priv = camel_text_index_get_instance_private (text_index); g_queue_init (&text_index->priv->word_cache); text_index->priv->words = g_hash_table_new (g_str_hash, g_str_equal); /* This cache size and the block cache size have been tuned for * about the best with moderate memory usage. Doubling the memory * usage barely affects performance. */ text_index->priv->word_cache_limit = 4096; /* 1024 = 128K */ g_rec_mutex_init (&text_index->priv->lock); } static gchar * text_index_normalize (CamelIndex *idx, const gchar *in, gpointer data) { gchar *word; /* Sigh, this is really expensive */ /*g_utf8_normalize (in, strlen (in), G_NORMALIZE_ALL);*/ word = g_utf8_strdown (in, -1); return word; } CamelTextIndex * camel_text_index_new (const gchar *path, gint flags) { CamelTextIndex *idx = g_object_new (CAMEL_TYPE_TEXT_INDEX, NULL); CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv; struct _CamelTextIndexRoot *rb; gchar *link; gsize link_len; CamelBlock *bl; camel_index_construct ((CamelIndex *) idx, path, flags); camel_index_set_normalize ((CamelIndex *) idx, text_index_normalize, NULL); p->blocks = camel_block_file_new ( idx->parent.path, flags, CAMEL_TEXT_INDEX_VERSION, CAMEL_BLOCK_SIZE); if (p->blocks == NULL) goto fail; link_len = strlen (idx->parent.path) + 7; link = alloca (link_len); g_snprintf (link, link_len, "%s.data", idx->parent.path); p->links = camel_key_file_new (link, flags, CAMEL_TEXT_INDEX_KEY_VERSION); if (p->links == NULL) goto fail; rb = (struct _CamelTextIndexRoot *) camel_block_file_get_root (p->blocks); if (rb->word_index_root == 0) { bl = camel_block_file_new_block (p->blocks); if (bl == NULL) goto fail; rb->word_index_root = bl->id; camel_block_file_unref_block (p->blocks, bl); camel_block_file_touch_block (p->blocks, camel_block_file_get_root_block (p->blocks)); } if (rb->word_hash_root == 0) { bl = camel_block_file_new_block (p->blocks); if (bl == NULL) goto fail; rb->word_hash_root = bl->id; camel_block_file_unref_block (p->blocks, bl); camel_block_file_touch_block (p->blocks, camel_block_file_get_root_block (p->blocks)); } if (rb->name_index_root == 0) { bl = camel_block_file_new_block (p->blocks); if (bl == NULL) goto fail; rb->name_index_root = bl->id; camel_block_file_unref_block (p->blocks, bl); camel_block_file_touch_block (p->blocks, camel_block_file_get_root_block (p->blocks)); } if (rb->name_hash_root == 0) { bl = camel_block_file_new_block (p->blocks); if (bl == NULL) goto fail; rb->name_hash_root = bl->id; camel_block_file_unref_block (p->blocks, bl); camel_block_file_touch_block (p->blocks, camel_block_file_get_root_block (p->blocks)); } p->word_index = camel_key_table_new (p->blocks, rb->word_index_root); p->word_hash = camel_partition_table_new (p->blocks, rb->word_hash_root); p->name_index = camel_key_table_new (p->blocks, rb->name_index_root); p->name_hash = camel_partition_table_new (p->blocks, rb->name_hash_root); if (p->word_index == NULL || p->word_hash == NULL || p->name_index == NULL || p->name_hash == NULL) { g_object_unref (idx); idx = NULL; } return idx; fail: g_object_unref (idx); return NULL; } /* returns 0 if the index exists, is valid, and synced, -1 otherwise */ gint camel_text_index_check (const gchar *path) { gchar *block, *key; gsize block_len, key_len; CamelBlockFile *blocks; CamelKeyFile *keys; block_len = strlen (path) + 7; block = alloca (block_len); g_snprintf (block, block_len, "%s.index", path); blocks = camel_block_file_new (block, O_RDONLY, CAMEL_TEXT_INDEX_VERSION, CAMEL_BLOCK_SIZE); if (blocks == NULL) { io (printf ("Check failed: No block file: %s\n", g_strerror (errno))); return -1; } key_len = strlen (path) + 12; key = alloca (key_len); g_snprintf (key, key_len, "%s.index.data", path); keys = camel_key_file_new (key, O_RDONLY, CAMEL_TEXT_INDEX_KEY_VERSION); if (keys == NULL) { io (printf ("Check failed: No key file: %s\n", g_strerror (errno))); g_object_unref (blocks); return -1; } g_object_unref (keys); g_object_unref (blocks); return 0; } gint camel_text_index_rename (const gchar *old, const gchar *new) { gchar *oldname, *newname; gsize oldname_len, newname_len; gint err; /* TODO: camel_text_index_rename should find out if we have an active index and use that instead */ oldname_len = strlen (old) + 12; newname_len = strlen (new) + 12; oldname = alloca (oldname_len); newname = alloca (newname_len); g_snprintf (oldname, oldname_len, "%s.index", old); g_snprintf (newname, newname_len, "%s.index", new); if (g_rename (oldname, newname) == -1 && errno != ENOENT) return -1; g_snprintf (oldname, oldname_len, "%s.index.data", old); g_snprintf (newname, newname_len, "%s.index.data", new); if (g_rename (oldname, newname) == -1 && errno != ENOENT) { err = errno; g_snprintf (oldname, oldname_len, "%s.index", old); g_snprintf (newname, newname_len, "%s.index", new); if (g_rename (newname, oldname) == -1) { g_warning ( "%s: Failed to rename '%s' to '%s': %s", G_STRFUNC, newname, oldname, g_strerror (errno)); } errno = err; return -1; } return 0; } gint camel_text_index_remove (const gchar *old) { gchar *block, *key; gsize block_len, key_len; gint ret = 0; /* TODO: needs to poke any active indices to remain unlinked */ block_len = strlen (old) + 12; block = alloca (block_len); key_len = strlen (old) + 12; key = alloca (key_len); g_snprintf (block, block_len, "%s.index", old); g_snprintf (key, key_len, "%s.index.data", old); if (g_unlink (block) == -1 && errno != ENOENT && errno != ENOTDIR) ret = -1; if (g_unlink (key) == -1 && errno != ENOENT && errno != ENOTDIR) ret = -1; if (ret == 0) errno = 0; return ret; } /* Debug */ void camel_text_index_info (CamelTextIndex *idx) { CamelTextIndexPrivate *p = idx->priv; struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *) camel_block_file_get_root (p->blocks); gint frag; printf ("Path: '%s'\n", idx->parent.path); printf ("Version: %u\n", idx->parent.version); printf ("Flags: %08x\n", idx->parent.flags); printf ("Total words: %u\n", rb->words); printf ("Total names: %u\n", rb->names); printf ("Total deleted: %u\n", rb->deleted); printf ("Total key blocks: %u\n", rb->keys); if (rb->words > 0) { frag = ((rb->keys - rb->words) * 100)/ rb->words; printf ("Word fragmentation: %d%%\n", frag); } if (rb->names > 0) { frag = (rb->deleted * 100)/ rb->names; printf ("Name fragmentation: %d%%\n", frag); } } /* #define DUMP_RAW */ #ifdef DUMP_RAW enum { KEY_ROOT = 1, KEY_DATA = 2, PARTITION_MAP = 4, PARTITION_DATA = 8 }; static void add_type (GHashTable *map, camel_block_t id, gint type) { camel_block_t old; old = g_hash_table_lookup (map, id); if (old == type) return; if (old != 0 && old != type) g_warning ("block %x redefined as type %d, already type %d\n", id, type, old); g_hash_table_insert (map, id, GINT_TO_POINTER (type | old)); } static void add_partition (GHashTable *map, CamelBlockFile *blocks, camel_block_t id) { CamelBlock *bl; CamelPartitionMapBlock *pm; gint i; while (id) { add_type (map, id, PARTITION_MAP); bl = camel_block_file_get_block (blocks, id); if (bl == NULL) { g_warning ("couldn't get parition: %x\n", id); return; } pm = (CamelPartitionMapBlock *) &bl->data; if (pm->used > G_N_ELEMENTS (pm->partition)) { g_warning ("Partition block %x invalid\n", id); camel_block_file_unref_block (blocks, bl); return; } for (i = 0; i < pm->used; i++) add_type (map, pm->partition[i].blockid, PARTITION_DATA); id = pm->next; camel_block_file_unref_block (blocks, bl); } } static void add_keys (GHashTable *map, CamelBlockFile *blocks, camel_block_t id) { CamelBlock *rbl, *bl; CamelKeyRootBlock *root; CamelKeyBlock *kb; add_type (map, id, KEY_ROOT); rbl = camel_block_file_get_block (blocks, id); if (rbl == NULL) { g_warning ("couldn't get key root: %x\n", id); return; } root = (CamelKeyRootBlock *) &rbl->data; id = root->first; while (id) { add_type (map, id, KEY_DATA); bl = camel_block_file_get_block (blocks, id); if (bl == NULL) { g_warning ("couldn't get key: %x\n", id); break; } kb = (CamelKeyBlock *) &bl->data; id = kb->next; camel_block_file_unref_block (blocks, bl); } camel_block_file_unref_block (blocks, rbl); } static void dump_raw (GHashTable *map, gchar *path) { gchar buf[1024]; gchar line[256]; gchar *p, c, *e, *a, *o; gint v, n, len, i, type; gchar hex[16] = "0123456789ABCDEF"; gint fd; camel_block_t id, total; fd = g_open (path, O_RDONLY | O_BINARY, 0); if (fd == -1) return; total = 0; while ((len = read (fd, buf, 1024)) == 1024) { id = total; type = g_hash_table_lookup (map, id); switch (type) { case 0: printf (" - unknown -\n"); break; default: printf (" - invalid -\n"); break; case KEY_ROOT: { CamelKeyRootBlock *r = (CamelKeyRootBlock *) buf; printf ("Key root:\n"); printf ("First: %08x Last: %08x Free: %08x\n", r->first, r->last, r->free); } break; case KEY_DATA: { CamelKeyBlock *k = (CamelKeyBlock *) buf; printf ("Key data:\n"); printf ("Next: %08x Used: %u\n", k->next, k->used); for (i = 0; i < k->used; i++) { if (i == 0) len = sizeof (k->u.keydata); else len = k->u.keys[i - 1].offset; len -= k->u.keys[i].offset; printf ( "[%03d]: %08x %5d %06x %3d '%.*s'\n", i, k->u.keys[i].data, k->u.keys[i].offset, k->u.keys[i].flags, len, len, k->u.keydata + k->u.keys[i].offset); } } break; case PARTITION_MAP: { CamelPartitionMapBlock *m = (CamelPartitionMapBlock *) buf; printf ("Partition map\n"); printf ("Next: %08x Used: %u\n", m->next, m->used); for (i = 0; i < m->used; i++) { printf ("[%03d]: %08x -> %08x\n", i, m->partition[i].hashid, m->partition[i].blockid); } } break; case PARTITION_DATA: { CamelPartitionKeyBlock *k = (CamelPartitionKeyBlock *) buf; printf ("Partition data\n"); printf ("Used: %u\n", k->used); } break; } printf ("--raw--\n"); len = 1024; p = buf; do { g_snprintf (line, sizeof (line), "%08x: ", total); total += 16; o = line + 10; a = o + 16 * 2 + 2; i = 0; while (len && i < 16) { c = *p++; *a++ = isprint (c)?c:'.'; *o++ = hex[(c>>4)&0x0f]; *o++ = hex[c&0x0f]; i++; if (i == 8) *o++ = ' '; len--; } *a = 0; printf ("%s\n", line); } while (len); printf ("\n"); } close (fd); } #endif /* Debug */ void camel_text_index_dump (CamelTextIndex *idx) { CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv; #ifndef DUMP_RAW camel_key_t keyid; gchar *word = NULL; const gchar *name; guint flags; camel_block_t data; /* Iterate over all names in the file first */ printf ("UID's in index\n"); keyid = 0; while ((keyid = camel_key_table_next (p->name_index, keyid, &word, &flags, &data))) { if ((flags & 1) == 0) printf (" %s\n", word); else printf (" %s (deleted)\n", word); g_free (word); word = NULL; } g_clear_pointer (&word, g_free); printf ("Word's in index\n"); keyid = 0; while ((keyid = camel_key_table_next (p->word_index, keyid, &word, &flags, &data))) { CamelIndexCursor *idc; printf ("Word: '%s':\n", word); idc = camel_index_find ((CamelIndex *) idx, word); while ((name = camel_index_cursor_next (idc))) { printf (" %s", name); } printf ("\n"); g_object_unref (idc); g_free (word); word = NULL; } g_clear_pointer (&word, g_free); #else /* a more low-level dump routine */ GHashTable *block_type = g_hash_table_new (NULL, NULL); camel_block_t id; struct stat st; gint type; add_keys (block_type, p->blocks, p->word_index->rootid); add_keys (block_type, p->blocks, p->name_index->rootid); add_partition (block_type, p->blocks, p->word_hash->rootid); add_partition (block_type, p->blocks, p->name_hash->rootid); dump_raw (block_type, p->blocks->path); g_hash_table_destroy (block_type); #endif } /* more debug stuff */ void camel_text_index_validate (CamelTextIndex *idx) { CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv; camel_key_t keyid; gchar *word = NULL; const gchar *name; guint flags; camel_block_t data; gchar *oldword; camel_key_t *records; gsize count; GHashTable *names, *deleted, *words, *keys, *name_word, *word_word; names = g_hash_table_new (NULL, NULL); deleted = g_hash_table_new (NULL, NULL); name_word = g_hash_table_new (g_str_hash, g_str_equal); words = g_hash_table_new (NULL, NULL); keys = g_hash_table_new (NULL, NULL); word_word = g_hash_table_new (g_str_hash, g_str_equal); /* Iterate over all names in the file first */ printf ("Checking UID consistency\n"); keyid = 0; while ((keyid = camel_key_table_next (p->name_index, keyid, &word, &flags, &data))) { if ((oldword = g_hash_table_lookup (names, GINT_TO_POINTER (keyid))) != NULL || (oldword = g_hash_table_lookup (deleted, GINT_TO_POINTER (keyid))) != NULL) { printf ("Warning, name '%s' duplicates key (%x) with name '%s'\n", word, keyid, oldword); g_free (word); } else { g_hash_table_insert (name_word, word, GINT_TO_POINTER (1)); if ((flags & 1) == 0) { g_hash_table_insert (names, GINT_TO_POINTER (keyid), word); } else { g_hash_table_insert (deleted, GINT_TO_POINTER (keyid), word); } } word = NULL; } g_clear_pointer (&word, g_free); printf ("Checking WORD member consistency\n"); keyid = 0; while ((keyid = camel_key_table_next (p->word_index, keyid, &word, &flags, &data))) { CamelIndexCursor *idc; GHashTable *used; /* first, check for duplicates of keyid, and data */ if ((oldword = g_hash_table_lookup (words, GINT_TO_POINTER (keyid))) != NULL) { printf ("Warning, word '%s' duplicates key (%x) with name '%s'\n", word, keyid, oldword); g_free (word); word = oldword; } else { g_hash_table_insert (words, GINT_TO_POINTER (keyid), word); } if (data == 0) { /* This may not be an issue if things have been removed over time, * though it is a problem if its a fresh index */ printf ("Word '%s' has no data associated with it\n", word); } else { if ((oldword = g_hash_table_lookup (keys, GUINT_TO_POINTER (data))) != NULL) { printf ("Warning, word '%s' duplicates data (%x) with name '%s'\n", word, data, oldword); } else { g_hash_table_insert (keys, GUINT_TO_POINTER (data), word); } } if (g_hash_table_lookup (word_word, word) != NULL) { printf ("Warning, word '%s' occurs more than once\n", word); } else { g_hash_table_insert (word_word, word, word); } used = g_hash_table_new (g_str_hash, g_str_equal); idc = camel_index_find ((CamelIndex *) idx, word); while ((name = camel_index_cursor_next (idc))) { if (g_hash_table_lookup (name_word, name) == NULL) { printf ("word '%s' references non-existant name '%s'\n", word, name); } if (g_hash_table_lookup (used, name) != NULL) { printf ("word '%s' uses word '%s' more than once\n", word, name); } else { g_hash_table_insert (used, g_strdup (name), (gpointer) 1); } } g_object_unref (idc); g_hash_table_foreach (used, (GHFunc) g_free, NULL); g_hash_table_destroy (used); printf ("word '%s'\n", word); while (data) { printf (" data %x ", data); if (camel_key_file_read (p->links, &data, &count, &records) == -1) { printf ("Warning, read failed for word '%s', at data '%u'\n", word, data); data = 0; } else { printf ("(%d)\n", (gint) count); g_free (records); } } word = NULL; } g_clear_pointer (&word, g_free); g_hash_table_destroy (names); g_hash_table_destroy (deleted); g_hash_table_destroy (keys); g_hash_table_foreach (words, (GHFunc) g_free, NULL); g_hash_table_destroy (words); g_hash_table_foreach (name_word, (GHFunc) g_free, NULL); g_hash_table_destroy (name_word); g_hash_table_foreach (word_word, (GHFunc) g_free, NULL); g_hash_table_destroy (word_word); } /* ********************************************************************** */ /* CamelTextIndexName */ /* ********************************************************************** */ G_DEFINE_TYPE_WITH_PRIVATE (CamelTextIndexName, camel_text_index_name, CAMEL_TYPE_INDEX_NAME) static void text_index_name_finalize (GObject *object) { CamelTextIndexNamePrivate *priv; priv = CAMEL_TEXT_INDEX_NAME (object)->priv; g_hash_table_destroy (CAMEL_TEXT_INDEX_NAME (object)->parent.words); g_string_free (priv->buffer, TRUE); camel_mempool_destroy (priv->pool); /* Chain up to parent's finalize() method. */ G_OBJECT_CLASS (camel_text_index_name_parent_class)->finalize (object); } static void text_index_name_add_word (CamelIndexName *idn, const gchar *word) { CamelTextIndexNamePrivate *p = ((CamelTextIndexName *) idn)->priv; if (g_hash_table_lookup (idn->words, word) == NULL) { gchar *w = camel_mempool_strdup (p->pool, word); g_hash_table_insert (idn->words, w, w); } } /* Why? * Because it doesn't hang/loop forever on bad data * Used to clean up utf8 before it gets further */ static inline guint32 camel_utf8_next (const guchar **ptr, const guchar *ptrend) { register guchar *p = (guchar *) * ptr; register guint c; register guint32 v; gint l; if (p == ptrend) return 0; while ((c = *p++)) { if (c < 0x80) { *ptr = p; return c; } else if ((c&0xe0) == 0xc0) { v = c & 0x1f; l = 1; } else if ((c&0xf0) == 0xe0) { v = c & 0x0f; l = 2; } else if ((c&0xf8) == 0xf0) { v = c & 0x07; l = 3; } else if ((c&0xfc) == 0xf8) { v = c & 0x03; l = 4; } else if ((c&0xfe) == 0xfc) { v = c & 0x01; l = 5; } else /* Invalid, ignore and look for next start gchar if room */ if (p == ptrend) { return 0; } else { continue; } /* bad data or truncated buffer */ if (p + l > ptrend) return 0; while (l && ((c = *p) & 0xc0) == 0x80) { p++; l--; v = (v << 6) | (c & 0x3f); } /* valid gchar */ if (l == 0) { *ptr = p; return v; } /* else look for a start gchar again */ } return 0; } static gsize text_index_name_add_buffer (CamelIndexName *idn, const gchar *buffer, gsize len) { CamelTextIndexNamePrivate *p = CAMEL_TEXT_INDEX_NAME (idn)->priv; const guchar *ptr, *ptrend; guint32 c; gchar utf8[8]; gint utf8len; if (buffer == NULL) { if (p->buffer->len) { camel_index_name_add_word (idn, p->buffer->str); g_string_truncate (p->buffer, 0); } return 0; } ptr = (const guchar *) buffer; ptrend = (const guchar *) buffer + len; while ((c = camel_utf8_next (&ptr, ptrend))) { if (g_unichar_isalnum (c)) { c = g_unichar_tolower (c); utf8len = g_unichar_to_utf8 (c, utf8); utf8[utf8len] = 0; g_string_append (p->buffer, utf8); } else { if (p->buffer->len > 0 && p->buffer->len <= CAMEL_TEXT_INDEX_MAX_WORDLEN) { text_index_name_add_word (idn, p->buffer->str); /*camel_index_name_add_word (idn, p->buffer->str);*/ } g_string_truncate (p->buffer, 0); } } return 0; } static void camel_text_index_name_class_init (CamelTextIndexNameClass *class) { GObjectClass *object_class; CamelIndexNameClass *index_name_class; object_class = G_OBJECT_CLASS (class); object_class->finalize = text_index_name_finalize; index_name_class = CAMEL_INDEX_NAME_CLASS (class); index_name_class->add_word = text_index_name_add_word; index_name_class->add_buffer = text_index_name_add_buffer; } static void camel_text_index_name_init (CamelTextIndexName *text_index_name) { text_index_name->priv = camel_text_index_name_get_instance_private (text_index_name); text_index_name->parent.words = g_hash_table_new ( g_str_hash, g_str_equal); text_index_name->priv->buffer = g_string_new (""); text_index_name->priv->pool = camel_mempool_new (256, 128, CAMEL_MEMPOOL_ALIGN_BYTE); } CamelTextIndexName * camel_text_index_name_new (CamelTextIndex *idx, const gchar *name, camel_key_t nameid) { CamelTextIndexName *idn = g_object_new (CAMEL_TYPE_TEXT_INDEX_NAME, NULL); CamelIndexName *cin = &idn->parent; CamelTextIndexNamePrivate *p = CAMEL_TEXT_INDEX_NAME (idn)->priv; cin->index = CAMEL_INDEX (g_object_ref (idx)); cin->name = camel_mempool_strdup (p->pool, name); p->nameid = nameid; return idn; } /* ********************************************************************** */ /* CamelTextIndexCursor */ /* ********************************************************************** */ G_DEFINE_TYPE_WITH_PRIVATE (CamelTextIndexCursor, camel_text_index_cursor, CAMEL_TYPE_INDEX_CURSOR) static void text_index_cursor_finalize (GObject *object) { CamelTextIndexCursorPrivate *priv; priv = CAMEL_TEXT_INDEX_CURSOR (object)->priv; g_free (priv->records); g_free (priv->current); /* Chain up to parent's finalize() method. */ G_OBJECT_CLASS (camel_text_index_cursor_parent_class)->finalize (object); } static const gchar * text_index_cursor_next (CamelIndexCursor *idc) { CamelTextIndexCursorPrivate *p = CAMEL_TEXT_INDEX_CURSOR (idc)->priv; CamelTextIndexPrivate *tip = CAMEL_TEXT_INDEX (idc->index)->priv; guint flags; c (printf ("Going to next cursor for word with data '%08x' next %08x\n", p->first, p->next)); do { while (p->record_index >= p->record_count) { g_free (p->records); p->records = NULL; p->record_index = 0; p->record_count = 0; if (p->next == 0) return NULL; if (camel_key_file_read (tip->links, &p->next, &p->record_count, &p->records) == -1) return NULL; } g_free (p->current); p->current = NULL; flags = 0; camel_key_table_lookup ( tip->name_index, p->records[p->record_index], &p->current, &flags); if (flags & 1) { g_free (p->current); p->current = NULL; } p->record_index++; } while (p->current == NULL); return p->current; } static void camel_text_index_cursor_class_init (CamelTextIndexCursorClass *class) { GObjectClass *object_class; CamelIndexCursorClass *index_cursor_class; object_class = G_OBJECT_CLASS (class); object_class->finalize = text_index_cursor_finalize; index_cursor_class = CAMEL_INDEX_CURSOR_CLASS (class); index_cursor_class->next = text_index_cursor_next; } static void camel_text_index_cursor_init (CamelTextIndexCursor *text_index_cursor) { text_index_cursor->priv = camel_text_index_cursor_get_instance_private (text_index_cursor); } CamelTextIndexCursor * camel_text_index_cursor_new (CamelTextIndex *idx, camel_block_t data) { CamelTextIndexCursor *idc = g_object_new (CAMEL_TYPE_TEXT_INDEX_CURSOR, NULL); CamelIndexCursor *cic = &idc->parent; CamelTextIndexCursorPrivate *p = CAMEL_TEXT_INDEX_CURSOR (idc)->priv; cic->index = CAMEL_INDEX (g_object_ref (idx)); p->first = data; p->next = data; p->record_count = 0; p->record_index = 0; return idc; } /* ********************************************************************** */ /* CamelTextIndexKeyCursor */ /* ********************************************************************** */ G_DEFINE_TYPE_WITH_PRIVATE (CamelTextIndexKeyCursor, camel_text_index_key_cursor, CAMEL_TYPE_INDEX_CURSOR) static void text_index_key_cursor_dispose (GObject *object) { CamelTextIndexKeyCursorPrivate *priv; priv = CAMEL_TEXT_INDEX_KEY_CURSOR (object)->priv; g_clear_object (&priv->table); /* Chain up parent's dispose() method. */ G_OBJECT_CLASS (camel_text_index_key_cursor_parent_class)->dispose (object); } static void text_index_key_cursor_finalize (GObject *object) { CamelTextIndexKeyCursorPrivate *priv; priv = CAMEL_TEXT_INDEX_KEY_CURSOR (object)->priv; g_free (priv->current); /* Chain up to parent's finalize() method. */ G_OBJECT_CLASS (camel_text_index_key_cursor_parent_class)->finalize (object); } static const gchar * text_index_key_cursor_next (CamelIndexCursor *idc) { CamelTextIndexKeyCursorPrivate *p = CAMEL_TEXT_INDEX_KEY_CURSOR (idc)->priv; c (printf ("Going to next cursor for keyid %08x\n", p->keyid)); g_free (p->current); p->current = NULL; while ((p->keyid = camel_key_table_next (p->table, p->keyid, &p->current, &p->flags, &p->data))) { if ((p->flags & 1) == 0) { return p->current; } else { g_free (p->current); p->current = NULL; } } return NULL; } static void camel_text_index_key_cursor_class_init (CamelTextIndexKeyCursorClass *class) { GObjectClass *object_class; CamelIndexCursorClass *index_cursor_class; object_class = G_OBJECT_CLASS (class); object_class->dispose = text_index_key_cursor_dispose; object_class->finalize = text_index_key_cursor_finalize; index_cursor_class = CAMEL_INDEX_CURSOR_CLASS (class); index_cursor_class->next = text_index_key_cursor_next; } static void camel_text_index_key_cursor_init (CamelTextIndexKeyCursor *text_index_key_cursor) { text_index_key_cursor->priv = camel_text_index_key_cursor_get_instance_private (text_index_key_cursor); text_index_key_cursor->priv->keyid = 0; text_index_key_cursor->priv->flags = 0; text_index_key_cursor->priv->data = 0; text_index_key_cursor->priv->current = NULL; } CamelTextIndexKeyCursor * camel_text_index_key_cursor_new (CamelTextIndex *idx, CamelKeyTable *table) { CamelTextIndexKeyCursor *idc = g_object_new (CAMEL_TYPE_TEXT_INDEX_KEY_CURSOR, NULL); CamelIndexCursor *cic = &idc->parent; CamelTextIndexKeyCursorPrivate *p = CAMEL_TEXT_INDEX_KEY_CURSOR (idc)->priv; cic->index = CAMEL_INDEX (g_object_ref (idx)); p->table = g_object_ref (table); return idc; } /* ********************************************************************** */ #define m(x) #if 0 struct _CamelIndexRoot { struct _CamelBlockRoot root; camel_block_t word_root; /* a keyindex containing the keyid -> word mapping */ camel_block_t word_hash_root; /* a partitionindex containing word -> keyid mapping */ camel_block_t name_root; /* same, for names */ camel_block_t name_hash_root; }; gchar wordbuffer[] = "This is a buffer of multiple words. Some of the words are duplicates" " while other words are the same, some are in difFerenT Different different case cAsE casE," " with,with:with;with-with'with\"'\"various punctuation as well. So much for those Words. and 10" " numbers in a row too 1,2,3,4,5,6,7,8,9,10! Yay!."; gint main (gint argc, gchar **argv) { #if 0 CamelBlockFile *bs; CamelKeyTable *ki; CamelPartitionTable *cpi; CamelBlock *keyroot, *partroot; struct _CamelIndexRoot *root; FILE *fp; gchar line[256], *key; camel_key_t keyid; gint index = 0, flags, data; #endif CamelIndex *idx; CamelIndexName *idn; CamelIndexCursor *idc; const gchar *word; gint i; printf ("Camel text index tester!\n"); camel_init (NULL, 0); idx = (CamelIndex *) camel_text_index_new ("textindex", O_CREAT | O_RDWR | O_TRUNC); #if 1 camel_index_compress (idx); return 0; #endif for (i = 0; i < 100; i++) { gchar name[16]; g_snprintf (name, sizeof (name), "%d", i); printf ("Adding words to name '%s'\n", name); idn = camel_index_add_name (idx, name); camel_index_name_add_buffer (idn, wordbuffer, sizeof (wordbuffer) - 1); camel_index_write_name (idx, idn); g_object_unref (idn); } printf ("Looking up which names contain word 'word'\n"); idc = camel_index_find (idx, "words"); while ((word = camel_index_cursor_next (idc)) != NULL) { printf (" name is '%s'\n", word); } g_object_unref (idc); printf ("done.\n"); printf ("Looking up which names contain word 'truncate'\n"); idc = camel_index_find (idx, "truncate"); while ((word = camel_index_cursor_next (idc)) != NULL) { printf (" name is '%s'\n", word); } g_object_unref (idc); printf ("done.\n"); camel_index_sync (idx); g_object_unref (idx); #if 0 bs = camel_block_file_new ("blocks", "TESTINDX", CAMEL_BLOCK_SIZE); root = (struct _CamelIndexRoot *) camel_block_file_get_root (bs); if (root->word_root == 0) { keyroot = camel_block_file_new_block (bs); root->word_root = keyroot->id; camel_block_file_touch_block (bs, camel_block_file_get_root_block (bs)); } if (root->word_hash_root == 0) { partroot = camel_block_file_new_block (bs); root->word_hash_root = partroot->id; camel_block_file_touch_block (bs, camel_block_file_get_root_block (bs)); } ki = camel_key_table_new (bs, root->word_root); cpi = camel_partition_table_new (bs, root->word_hash_root); fp = fopen ("/usr/dict/words", "r"); if (fp == NULL) { perror ("fopen"); return 1; } while (fgets (line, sizeof (line), fp) != NULL) { line[strlen (line) - 1] = 0; /* see if its already there */ keyid = camel_partition_table_lookup (cpi, line); if (keyid == 0) { m (printf ("Adding word '%s' %d\n", line, index)); keyid = camel_key_table_add (ki, line, index, 0); m (printf (" key = %08x\n", keyid)); camel_partition_table_add (cpi, line, keyid); m (printf ("Lookup word '%s'\n", line)); keyid = camel_partition_table_lookup (cpi, line); m (printf (" key = %08x\n", keyid)); } m (printf ("Lookup key %08x\n", keyid)); camel_key_table_set_flags (ki, keyid, index, 1); data = camel_key_table_lookup (ki, keyid, &key, &flags); m (printf (" word = '%s' %d %04x\n", key, data, flags)); g_return_val_if_fail (data == index && strcmp (key, line) == 0, -1); g_free (key); index++; } printf ("Scanning again\n"); fseek (fp, SEEK_SET, 0); index = 0; while (fgets (line, sizeof (line), fp) != NULL) { line[strlen (line) - 1] = 0; m (printf ("Lookup word '%s' %d\n", line, index)); keyid = camel_partition_table_lookup (cpi, line); m (printf (" key = %08d\n", keyid)); m (printf ("Lookup key %08x\n", keyid)); data = camel_key_table_lookup (ki, keyid, &key, &flags); m (printf (" word = '%s' %d\n", key, data)); g_return_val_if_fail (data == index && strcmp (key, line) == 0, -1); g_free (key); index++; } fclose (fp); printf ("Freeing partition index\n"); camel_partition_table_free (cpi); printf ("Syncing block file\n"); camel_block_file_sync (bs); #endif return 0; } #endif