diff options
author | Amin Jain <ext-amit.1.jain@nokia.com> | 2010-02-25 15:28:21 +0000 |
---|---|---|
committer | Martyn Russell <martyn@lanedo.com> | 2010-02-25 15:28:21 +0000 |
commit | 456ff036c3d5e193f714e3fb9a833d45ef2bba6e (patch) | |
tree | 67f204847fa0a164d191844f1a75501ed1f4d9a0 | |
parent | fdacc1dae9e43c16d84bdeab6296877b9eeaf47f (diff) | |
download | tracker-456ff036c3d5e193f714e3fb9a833d45ef2bba6e.tar.gz |
Fixes GB#609075, Adding support for pdf extractor to extract the index data from the pdf files
-rw-r--r-- | src/tracker-extract/tracker-extract-pdf.c | 132 |
1 files changed, 132 insertions, 0 deletions
diff --git a/src/tracker-extract/tracker-extract-pdf.c b/src/tracker-extract/tracker-extract-pdf.c index 32490d1a0..b6517651e 100644 --- a/src/tracker-extract/tracker-extract-pdf.c +++ b/src/tracker-extract/tracker-extract-pdf.c @@ -1,6 +1,7 @@ /* * Copyright (C) 2006, Mr Jamie McCracken (jamiemcc@gnome.org) * Copyright (C) 2008-2009, Nokia + * Copyright (C) 2010, Amit Aggarwal (amitcs06@gmail.com) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public @@ -56,6 +57,135 @@ static TrackerExtractData data[] = { }; static void +read_toc (PopplerIndexIter *index, + GString **toc) +{ + if (!index) { + return; + } + + if (!*toc) { + *toc = g_string_new (""); + } + + do { + PopplerAction *action; + PopplerIndexIter *iter; + + action = poppler_index_iter_get_action (index); + + if (!action) { + continue; + } + + switch (action->type) { + case POPPLER_ACTION_GOTO_DEST: { + PopplerActionGotoDest *ag = (PopplerActionGotoDest*) action; + PopplerDest *agd = ag->dest; + + if (!tracker_is_empty_string (ag->title)) { + g_string_append_printf (*toc, "%s ", ag->title); + } + + if (!tracker_is_empty_string (agd->named_dest)) { + g_string_append_printf (*toc, "%s ", agd->named_dest); + } + + break; + } + + case POPPLER_ACTION_LAUNCH: { + PopplerActionLaunch *al = (PopplerActionLaunch*) action; + + if (!tracker_is_empty_string (al->title)) { + g_string_append_printf (*toc, "%s ", al->title); + } + + if (!tracker_is_empty_string (al->file_name)) { + g_string_append_printf (*toc, "%s ", al->file_name); + } + + if (!tracker_is_empty_string (al->params)) { + g_string_append_printf (*toc, "%s ", al->params); + } + + break; + } + + case POPPLER_ACTION_URI: { + PopplerActionUri *au = (PopplerActionUri*) action; + + if (!tracker_is_empty_string (au->uri)) { + g_string_append_printf (*toc, "%s ", au->uri); + } + + break; + } + + case POPPLER_ACTION_NAMED: { + PopplerActionNamed *an = (PopplerActionNamed*) action; + + if (!tracker_is_empty_string (an->title)) { + g_string_append_printf (*toc, "%s, ", an->title); + } + + if (!tracker_is_empty_string (an->named_dest)) { + g_string_append_printf (*toc, "%s ", an->named_dest); + } + + break; + } + + case POPPLER_ACTION_MOVIE: { + PopplerActionNamed *am = (PopplerActionNamed*) action; + + if (!tracker_is_empty_string (am->title)) { + g_string_append_printf (*toc, "%s ", am->title); + } + + break; + } + + case POPPLER_ACTION_NONE: + case POPPLER_ACTION_UNKNOWN: + case POPPLER_ACTION_GOTO_REMOTE: + /* Do nothing */ + break; + } + + iter = poppler_index_iter_get_child (index); + read_toc (iter, toc); + } while (poppler_index_iter_next (index)); + + poppler_index_iter_free (index); +} + +static void +read_outline (PopplerDocument *document, + TrackerSparqlBuilder *metadata) +{ + PopplerIndexIter *index; + GString *toc = NULL; + + index = poppler_index_iter_new (document); + + if (!index) { + return; + } + + read_toc (index, &toc); + + if (toc) { + if (toc->len > 0) { + tracker_sparql_builder_predicate (metadata, "nfo:tableOfContents"); + tracker_sparql_builder_object_unvalidated (metadata, toc->str); + } + + g_string_free (toc, TRUE); + } +} + +static void insert_keywords (TrackerSparqlBuilder *metadata, gchar *keywords) { @@ -466,6 +596,8 @@ extract_pdf (const gchar *uri, g_free (content); } + read_outline (document, metadata); + g_object_unref (document); } |