diff options
| -rw-r--r-- | doc/websupport.rst | 3 | ||||
| -rw-r--r-- | sphinx/websupport/__init__.py | 15 | ||||
| -rw-r--r-- | sphinx/websupport/search/__init__.py | 88 | ||||
| -rw-r--r-- | sphinx/websupport/search/whooshsearch.py | 8 | ||||
| -rw-r--r-- | sphinx/websupport/search/xapiansearch.py | 6 |
5 files changed, 95 insertions, 25 deletions
diff --git a/doc/websupport.rst b/doc/websupport.rst index e8fc238b..1b6725df 100644 --- a/doc/websupport.rst +++ b/doc/websupport.rst @@ -11,4 +11,5 @@ into your web application. To learn more read the web/quickstart web/api - web/frontend
\ No newline at end of file + web/frontend + web/searchadapters
\ No newline at end of file diff --git a/sphinx/websupport/__init__.py b/sphinx/websupport/__init__.py index 2dbbe319..407cb4c9 100644 --- a/sphinx/websupport/__init__.py +++ b/sphinx/websupport/__init__.py @@ -17,7 +17,7 @@ from jinja2 import Environment, FileSystemLoader from sphinx.application import Sphinx from sphinx.util.osutil import ensuredir -from sphinx.websupport.search import search_adapters +from sphinx.websupport.search import BaseSearch, search_adapters from sphinx.websupport import comments as sphinxcomments class WebSupportApp(Sphinx): @@ -66,11 +66,14 @@ class WebSupport(object): self.template_env = Environment(loader=loader) def _init_search(self, search): - mod, cls = search_adapters[search] - search_class = getattr(__import__('sphinx.websupport.search.' + mod, + if isinstance(search, BaseSearch): + self.search = search + else: + mod, cls = search_adapters[search] + search_class = getattr(__import__('sphinx.websupport.search.' + mod, None, None, [cls]), cls) - search_path = path.join(self.outdir, 'search') - self.search = search_class(search_path) + search_path = path.join(self.outdir, 'search') + self.search = search_class(search_path) self.results_template = \ self.template_env.get_template('searchresults.html') @@ -133,7 +136,7 @@ class WebSupport(object): :param q: the search query """ - results, results_found, results_displayed = self.search.query(q) + results = self.search.query(q) ctx = {'search_performed': True, 'search_results': results, 'q': q} diff --git a/sphinx/websupport/search/__init__.py b/sphinx/websupport/search/__init__.py index b4bf7386..1886776a 100644 --- a/sphinx/websupport/search/__init__.py +++ b/sphinx/websupport/search/__init__.py @@ -13,39 +13,107 @@ import re class BaseSearch(object): def init_indexing(self, changed=[]): + """Called by the builder to initialize the search indexer. `changed` + is a list of pagenames that will be reindexed. You may want to remove + these from the search index before indexing begins. + + `param changed` is a list of pagenames that will be re-indexed + """ pass def finish_indexing(self): + """Called by the builder when writing has been completed. Use this + to perform any finalization or cleanup actions after indexing is + complete. + """ pass def feed(self, pagename, title, doctree): + """Called by the builder to add a doctree to the index. Converts the + `doctree` to text and passes it to :meth:`add_document`. You probably + won't want to override this unless you need access to the `doctree`. + Override :meth:`add_document` instead. + + `pagename` is the name of the page to be indexed + + `title` is the title of the page to be indexed + + `doctree` is the docutils doctree representation of the page + """ self.add_document(pagename, title, doctree.astext()) - def add_document(self, path, title, text): - raise NotImplemented + def add_document(self, pagename, title, text): + """Called by :meth:`feed` to add a document to the search index. + This method should should do everything necessary to add a single + document to the search index. + + `pagename` is name of the page being indexed. + It is the combination of the source files relative path and filename, + minus the extension. For example, if the source file is + "ext/builders.rst", the `pagename` would be "ext/builders". This + will need to be returned with search results when processing a + query. + + `title` is the page's title, and will need to be returned with + search results. + + `text` is the full text of the page. You probably want to store this + somehow to use while creating the context for search results. + """ + raise NotImplementedError() def query(self, q): + """Called by the web support api to get search results. This method + compiles the regular expression to be used when + :meth:`extracting context <extract_context>`, then calls + :meth:`handle_query`. You won't want to override this unless you + don't want to use the included :meth:`extract_context` method. + Override :meth:`handle_query` instead. + + `q` is the search query string. + """ self.context_re = re.compile('|'.join(q.split()), re.I) return self.handle_query(q) def handle_query(self, q): - raise NotImplemented + """Called by :meth:`query` to retrieve search results for a search + query `q`. This should return an iterable containing tuples of the + following format:: + + (<path>, <title> <context>) + + `path` and `title` are the same values that were passed to + :meth:`add_document`, and `context` should be a short text snippet + of the text surrounding the search query in the document. + + The :meth:`extract_context` method is provided as a simple way + to create the `context`. + """ + raise NotImplementedError() - def extract_context(self, text, query_string): + def extract_context(self, text, length=240): + """Extract the context for the search query from the documents + full `text`. + + `text` is the full text of the document to create the context for. + + `length` is the length of the context snippet to return. + """ res = self.context_re.search(text) if res is None: return '' - start = max(res.start() - 120, 0) - end = start + 240 - context = ''.join(['...' if start > 0 else '', - text[start:end], - '...' if end < len(text) else '']) + context_start = max(res.start() - length/2, 0) + context_end = start + length + context = ''.join(['...' if context_start > 0 else '', + text[context_start:context_end], + '...' if context_end < len(text) else '']) try: return unicode(context, errors='ignore') except TypeError: return context - + +# The build in search adapters. search_adapters = { 'xapian': ('xapiansearch', 'XapianSearch'), 'whoosh': ('whooshsearch', 'WhooshSearch'), diff --git a/sphinx/websupport/search/whooshsearch.py b/sphinx/websupport/search/whooshsearch.py index 991d4232..00c7403c 100644 --- a/sphinx/websupport/search/whooshsearch.py +++ b/sphinx/websupport/search/whooshsearch.py @@ -38,8 +38,8 @@ class WhooshSearch(BaseSearch): def finish_indexing(self): self.writer.commit() - def add_document(self, path, title, text): - self.writer.add_document(path=unicode(path), + def add_document(self, pagename, title, text): + self.writer.add_document(path=unicode(pagename), title=title, text=text) @@ -47,10 +47,10 @@ class WhooshSearch(BaseSearch): res = self.searcher.find('text', q) results = [] for result in res: - context = self.extract_context(result['text'], q) + context = self.extract_context(result['text']) results.append((result['path'], result.get('title', ''), context)) - return results, len(res), res.scored_length() + return results diff --git a/sphinx/websupport/search/xapiansearch.py b/sphinx/websupport/search/xapiansearch.py index f8dbecd9..f5ad9688 100644 --- a/sphinx/websupport/search/xapiansearch.py +++ b/sphinx/websupport/search/xapiansearch.py @@ -70,15 +70,13 @@ class XapianSearch(BaseSearch): # Find the top 100 results for the query. enquire.set_query(query) matches = enquire.get_mset(0, 100) - results_found = matches.get_matches_estimated() - results_displayed = matches.size() results = [] for m in matches: - context = self.extract_context(m.document.get_data(), q) + context = self.extract_context(m.document.get_data()) results.append((m.document.get_value(self.DOC_PATH), m.document.get_value(self.DOC_TITLE), ''.join(context) )) - return results, results_found, results_displayed + return results |
