summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/websupport.rst3
-rw-r--r--sphinx/websupport/__init__.py15
-rw-r--r--sphinx/websupport/search/__init__.py88
-rw-r--r--sphinx/websupport/search/whooshsearch.py8
-rw-r--r--sphinx/websupport/search/xapiansearch.py6
5 files changed, 95 insertions, 25 deletions
diff --git a/doc/websupport.rst b/doc/websupport.rst
index e8fc238b..1b6725df 100644
--- a/doc/websupport.rst
+++ b/doc/websupport.rst
@@ -11,4 +11,5 @@ into your web application. To learn more read the
web/quickstart
web/api
- web/frontend \ No newline at end of file
+ web/frontend
+ web/searchadapters \ No newline at end of file
diff --git a/sphinx/websupport/__init__.py b/sphinx/websupport/__init__.py
index 2dbbe319..407cb4c9 100644
--- a/sphinx/websupport/__init__.py
+++ b/sphinx/websupport/__init__.py
@@ -17,7 +17,7 @@ from jinja2 import Environment, FileSystemLoader
from sphinx.application import Sphinx
from sphinx.util.osutil import ensuredir
-from sphinx.websupport.search import search_adapters
+from sphinx.websupport.search import BaseSearch, search_adapters
from sphinx.websupport import comments as sphinxcomments
class WebSupportApp(Sphinx):
@@ -66,11 +66,14 @@ class WebSupport(object):
self.template_env = Environment(loader=loader)
def _init_search(self, search):
- mod, cls = search_adapters[search]
- search_class = getattr(__import__('sphinx.websupport.search.' + mod,
+ if isinstance(search, BaseSearch):
+ self.search = search
+ else:
+ mod, cls = search_adapters[search]
+ search_class = getattr(__import__('sphinx.websupport.search.' + mod,
None, None, [cls]), cls)
- search_path = path.join(self.outdir, 'search')
- self.search = search_class(search_path)
+ search_path = path.join(self.outdir, 'search')
+ self.search = search_class(search_path)
self.results_template = \
self.template_env.get_template('searchresults.html')
@@ -133,7 +136,7 @@ class WebSupport(object):
:param q: the search query
"""
- results, results_found, results_displayed = self.search.query(q)
+ results = self.search.query(q)
ctx = {'search_performed': True,
'search_results': results,
'q': q}
diff --git a/sphinx/websupport/search/__init__.py b/sphinx/websupport/search/__init__.py
index b4bf7386..1886776a 100644
--- a/sphinx/websupport/search/__init__.py
+++ b/sphinx/websupport/search/__init__.py
@@ -13,39 +13,107 @@ import re
class BaseSearch(object):
def init_indexing(self, changed=[]):
+ """Called by the builder to initialize the search indexer. `changed`
+ is a list of pagenames that will be reindexed. You may want to remove
+ these from the search index before indexing begins.
+
+ `param changed` is a list of pagenames that will be re-indexed
+ """
pass
def finish_indexing(self):
+ """Called by the builder when writing has been completed. Use this
+ to perform any finalization or cleanup actions after indexing is
+ complete.
+ """
pass
def feed(self, pagename, title, doctree):
+ """Called by the builder to add a doctree to the index. Converts the
+ `doctree` to text and passes it to :meth:`add_document`. You probably
+ won't want to override this unless you need access to the `doctree`.
+ Override :meth:`add_document` instead.
+
+ `pagename` is the name of the page to be indexed
+
+ `title` is the title of the page to be indexed
+
+ `doctree` is the docutils doctree representation of the page
+ """
self.add_document(pagename, title, doctree.astext())
- def add_document(self, path, title, text):
- raise NotImplemented
+ def add_document(self, pagename, title, text):
+ """Called by :meth:`feed` to add a document to the search index.
+ This method should should do everything necessary to add a single
+ document to the search index.
+
+ `pagename` is name of the page being indexed.
+ It is the combination of the source files relative path and filename,
+ minus the extension. For example, if the source file is
+ "ext/builders.rst", the `pagename` would be "ext/builders". This
+ will need to be returned with search results when processing a
+ query.
+
+ `title` is the page's title, and will need to be returned with
+ search results.
+
+ `text` is the full text of the page. You probably want to store this
+ somehow to use while creating the context for search results.
+ """
+ raise NotImplementedError()
def query(self, q):
+ """Called by the web support api to get search results. This method
+ compiles the regular expression to be used when
+ :meth:`extracting context <extract_context>`, then calls
+ :meth:`handle_query`. You won't want to override this unless you
+ don't want to use the included :meth:`extract_context` method.
+ Override :meth:`handle_query` instead.
+
+ `q` is the search query string.
+ """
self.context_re = re.compile('|'.join(q.split()), re.I)
return self.handle_query(q)
def handle_query(self, q):
- raise NotImplemented
+ """Called by :meth:`query` to retrieve search results for a search
+ query `q`. This should return an iterable containing tuples of the
+ following format::
+
+ (<path>, <title> <context>)
+
+ `path` and `title` are the same values that were passed to
+ :meth:`add_document`, and `context` should be a short text snippet
+ of the text surrounding the search query in the document.
+
+ The :meth:`extract_context` method is provided as a simple way
+ to create the `context`.
+ """
+ raise NotImplementedError()
- def extract_context(self, text, query_string):
+ def extract_context(self, text, length=240):
+ """Extract the context for the search query from the documents
+ full `text`.
+
+ `text` is the full text of the document to create the context for.
+
+ `length` is the length of the context snippet to return.
+ """
res = self.context_re.search(text)
if res is None:
return ''
- start = max(res.start() - 120, 0)
- end = start + 240
- context = ''.join(['...' if start > 0 else '',
- text[start:end],
- '...' if end < len(text) else ''])
+ context_start = max(res.start() - length/2, 0)
+ context_end = start + length
+ context = ''.join(['...' if context_start > 0 else '',
+ text[context_start:context_end],
+ '...' if context_end < len(text) else ''])
try:
return unicode(context, errors='ignore')
except TypeError:
return context
-
+
+# The build in search adapters.
search_adapters = {
'xapian': ('xapiansearch', 'XapianSearch'),
'whoosh': ('whooshsearch', 'WhooshSearch'),
diff --git a/sphinx/websupport/search/whooshsearch.py b/sphinx/websupport/search/whooshsearch.py
index 991d4232..00c7403c 100644
--- a/sphinx/websupport/search/whooshsearch.py
+++ b/sphinx/websupport/search/whooshsearch.py
@@ -38,8 +38,8 @@ class WhooshSearch(BaseSearch):
def finish_indexing(self):
self.writer.commit()
- def add_document(self, path, title, text):
- self.writer.add_document(path=unicode(path),
+ def add_document(self, pagename, title, text):
+ self.writer.add_document(path=unicode(pagename),
title=title,
text=text)
@@ -47,10 +47,10 @@ class WhooshSearch(BaseSearch):
res = self.searcher.find('text', q)
results = []
for result in res:
- context = self.extract_context(result['text'], q)
+ context = self.extract_context(result['text'])
results.append((result['path'],
result.get('title', ''),
context))
- return results, len(res), res.scored_length()
+ return results
diff --git a/sphinx/websupport/search/xapiansearch.py b/sphinx/websupport/search/xapiansearch.py
index f8dbecd9..f5ad9688 100644
--- a/sphinx/websupport/search/xapiansearch.py
+++ b/sphinx/websupport/search/xapiansearch.py
@@ -70,15 +70,13 @@ class XapianSearch(BaseSearch):
# Find the top 100 results for the query.
enquire.set_query(query)
matches = enquire.get_mset(0, 100)
- results_found = matches.get_matches_estimated()
- results_displayed = matches.size()
results = []
for m in matches:
- context = self.extract_context(m.document.get_data(), q)
+ context = self.extract_context(m.document.get_data())
results.append((m.document.get_value(self.DOC_PATH),
m.document.get_value(self.DOC_TITLE),
''.join(context) ))
- return results, results_found, results_displayed
+ return results