summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYoshiki Shibukawa <yoshiki@shibu.jp>2014-01-22 02:12:04 -0800
committerYoshiki Shibukawa <yoshiki@shibu.jp>2014-01-22 02:12:04 -0800
commitd8c8fa8240b0d4dd67702a5da388055d4429c393 (patch)
treee202a140420722fcd215a2a248673a3dbcc21b77
parent49238644401fa2728f171f5fa29c18f9150b96d5 (diff)
downloadsphinx-d8c8fa8240b0d4dd67702a5da388055d4429c393.tar.gz
Add development memo about stemming JS code, acceleration tips about stemming, small bug fix
-rw-r--r--doc/config.rst9
-rw-r--r--doc/devguide.rst12
-rw-r--r--sphinx/search/__init__.py2
3 files changed, 22 insertions, 1 deletions
diff --git a/doc/config.rst b/doc/config.rst
index 260ccddd..2fd1cf66 100644
--- a/doc/config.rst
+++ b/doc/config.rst
@@ -747,6 +747,15 @@ that use Sphinx' HTMLWriter class.
* ``sv`` -- Swedish
* ``tr`` -- Turkish
+ .. admonition:: Accelerate build speed
+
+ Each language (except Japanese) provides its own stemming algorithm.
+ Sphinx uses Python implementation by default. You can use
+ C implementation to accelerate building the index file.
+
+ * `PorterStemmer <https://pypi.python.org/pypi/PorterStemmer>`_ (`en`)
+ * `PyStemmer <https://pypi.python.org/pypi/PyStemmer>`_ (all languages)
+
.. versionadded:: 1.1
.. versionchanged:: 1.3
diff --git a/doc/devguide.rst b/doc/devguide.rst
index fccdd3fa..666822d6 100644
--- a/doc/devguide.rst
+++ b/doc/devguide.rst
@@ -243,3 +243,15 @@ Debugging Tips
* Set the debugging options in the `Docutils configuration file
<http://docutils.sourceforge.net/docs/user/config.html>`_.
+
+* JavaScript stemming algorithms in `sphinx/search/*.py` (except `en.py`) are
+ genereted by
+ `modified snowballcode generator <https://github.com/shibukawa/snowball>`_.
+ Generated `JSX <http://jsx.github.io/>`_ files are
+ in `this repository <https://github.com/shibukawa/snowball-stemmer.jsx>`_.
+ You can get resulting JavaScript files by the following command:
+
+ .. code-block:: bash
+
+ $ npm install
+ $ node_modules/.bin/grunt build # -> dest/*.global.js
diff --git a/sphinx/search/__init__.py b/sphinx/search/__init__.py
index 14fe16dd..03a1f9df 100644
--- a/sphinx/search/__init__.py
+++ b/sphinx/search/__init__.py
@@ -89,7 +89,7 @@ var Stemmer = function() {
Return true if the target word should be registered in the search index.
This method is called after stemming.
"""
- return not (((len(word) < 3) and (12353 < ord(word[0]) < 12436)) or
+ return len(word) == 0 or not (((len(word) < 3) and (12353 < ord(word[0]) < 12436)) or
(ord(word[0]) < 256 and (len(word) < 3 or word in self.stopwords or
word.isdigit())))