summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexis Metaireau <alexis@notmyidea.org>2012-09-02 09:30:29 -0700
committerAlexis Metaireau <alexis@notmyidea.org>2012-09-02 09:30:29 -0700
commit8dcc503bc011bd6d2472e6211cb72eb6283d216b (patch)
treea44753e7cd328b1cdf2ba496192d802a04f07c63
parentdfd3fca92bbd647f89f4bf8d1072aa6ac2315632 (diff)
parent229b0e4dcc50daa32ffaf48234f58d1e694ecb7a (diff)
downloadpelican-8dcc503bc011bd6d2472e6211cb72eb6283d216b.tar.gz
Merge pull request #468 from m-r-r/master
New signal and new plugin
-rw-r--r--docs/plugins.rst79
-rw-r--r--pelican/__init__.py14
-rw-r--r--pelican/plugins/sitemap.py208
-rw-r--r--pelican/signals.py1
4 files changed, 301 insertions, 1 deletions
diff --git a/docs/plugins.rst b/docs/plugins.rst
index 53858668..99c0429a 100644
--- a/docs/plugins.rst
+++ b/docs/plugins.rst
@@ -59,6 +59,9 @@ Signal Arguments Description
initialized pelican object
article_generate_context article_generator, metadata
article_generator_init article_generator invoked in the ArticlesGenerator.__init__
+get_generators generators invoked in Pelican.get_generator_classes,
+ can return a Generator, or several
+ generator in a tuple or in a list.
pages_generate_context pages_generator, metadata
pages_generator_init pages_generator invoked in the PagesGenerator.__init__
========================= ============================ =========================================
@@ -108,3 +111,79 @@ variable, as in the example::
``github_activity`` is a list of lists. The first element is the title
and the second element is the raw HTML from GitHub.
+
+
+Sitemap
+-------
+
+The plugin generates a sitemap of the blog.
+It can generates plain text sitemaps or XML sitemaps.
+
+Configuration
+"""""""""""""
+
+You can use the setting ``SITEMAP`` variable to configure the behavior of the
+plugin.
+
+The ``SITEMAP`` variable must be a Python dictionary, it can contain tree keys:
+
+
+- ``format``, which set the output format of the plugin (``xml`` or ``txt``)
+
+- ``priorities``, which is a dictionary with three keys:
+
+ - ``articles``, the priority for the URLs of the articles and their
+ translations
+
+ - ``pages``, the priority for the URLs of the static pages
+
+ - ``indexes``, the priority for the URLs of the index pages, such as tags,
+ author pages, categories indexes, archives, etc...
+
+ All the values of this dictionary must be decimal numbers between ``0`` and ``1``.
+
+- ``changefreqs``, which is a dictionary with three items:
+
+ - ``articles``, the update frequency of the articles
+
+ - ``pages``, the update frequency of the pages
+
+ - ``indexes``, the update frequency of the index pages
+
+ An valid value is ``always``, ``hourly``, ``daily``, ``weekly``, ``monthly``,
+ ``yearly`` or ``never``.
+
+
+If a key is missing or a value is incorrect, it will be replaced with the
+default value.
+
+The sitemap is saved in ``<output_path>/sitemap.<format>``.
+
+.. note::
+ ``priorities`` and ``changefreqs`` are informations for search engines.
+ They are only used in the XML sitemaps.
+ For more information: <http://www.sitemaps.org/protocol.html#xmlTagDefinitions>
+
+
+Example
+"""""""
+
+Here is an example of configuration (it's also the default settings):
+
+.. code-block:: python
+
+ PLUGINS=['pelican.plugins.sitemap',]
+
+ SITEMAP = {
+ 'format': 'xml',
+ 'priorities': {
+ 'articles': 0.5,
+ 'indexes': 0.5,
+ 'pages': 0.5
+ },
+ 'changefreqs': {
+ 'articles': 'monthly',
+ 'indexes': 'daily',
+ 'pages': 'monthly'
+ }
+ }
diff --git a/pelican/__init__.py b/pelican/__init__.py
index a69752d8..b9f9bb22 100644
--- a/pelican/__init__.py
+++ b/pelican/__init__.py
@@ -8,7 +8,7 @@ import argparse
from pelican import signals
-from pelican.generators import (ArticlesGenerator, PagesGenerator,
+from pelican.generators import (Generator, ArticlesGenerator, PagesGenerator,
StaticGenerator, PdfGenerator, LessCSSGenerator)
from pelican.log import init
from pelican.settings import read_settings, _DEFAULT_CONFIG
@@ -185,6 +185,18 @@ class Pelican(object):
generators.append(PdfGenerator)
if self.settings['LESS_GENERATOR']: # can be True or PATH to lessc
generators.append(LessCSSGenerator)
+
+ for pair in signals.get_generators.send(self):
+ (funct, value) = pair
+
+ if not isinstance(value, (tuple, list)):
+ value = (value, )
+
+ for v in value:
+ if isinstance(v, type):
+ logger.debug('Found generator: {0}'.format(v))
+ generators.append(v)
+
return generators
def get_writer(self):
diff --git a/pelican/plugins/sitemap.py b/pelican/plugins/sitemap.py
new file mode 100644
index 00000000..6402ba9c
--- /dev/null
+++ b/pelican/plugins/sitemap.py
@@ -0,0 +1,208 @@
+import os.path
+
+from datetime import datetime
+from logging import debug, warning, error, info
+from codecs import open
+
+from pelican import signals, contents
+
+TXT_HEADER = u"""{0}/index.html
+{0}/archives.html
+{0}/tags.html
+{0}/categories.html
+"""
+
+XML_HEADER = u"""<?xml version="1.0" encoding="utf-8"?>
+<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
+ xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+
+ <url>
+ <loc>{0}/index.html</loc>
+ <lastmod>{1}</lastmod>
+ <changefreq>{2}</changefreq>
+ <priority>{3}</priority>
+ </url>
+
+ <url>
+ <loc>{0}/archives.html</loc>
+ <lastmod>{1}</lastmod>
+ <changefreq>{2}</changefreq>
+ <priority>{3}</priority>
+ </url>
+
+ <url>
+ <loc>{0}/tags.html</loc>
+ <lastmod>{1}</lastmod>
+ <changefreq>{2}</changefreq>
+ <priority>{3}</priority>
+ </url>
+
+ <url>
+ <loc>{0}/categories.html</loc>
+ <lastmod>{1}</lastmod>
+ <changefreq>{2}</changefreq>
+ <priority>{3}</priority>
+ </url>
+"""
+
+XML_URL = u"""
+ <url>
+ <loc>{0}/{1}</loc>
+ <lastmod>{2}</lastmod>
+ <changefreq>{3}</changefreq>
+ <priority>{4}</priority>
+ </url>
+"""
+
+XML_FOOTER = u"""
+</urlset>
+"""
+
+
+def format_date(date):
+ if date.tzinfo:
+ tz = date.strftime('%s')
+ tz = tz[:-2] + ':' + tz[-2:]
+ else:
+ tz = "-00:00"
+ return date.strftime("%Y-%m-%dT%H:%M:%S") + tz
+
+
+
+class SitemapGenerator(object):
+
+ def __init__(self, context, settings, path, theme, output_path, *null):
+
+ self.output_path = output_path
+ self.context = context
+ self.now = datetime.now()
+ self.siteurl = settings.get('SITEURL')
+
+ self.format = 'xml'
+
+ self.changefreqs = {
+ 'articles': 'monthly',
+ 'indexes': 'daily',
+ 'pages': 'monthly'
+ }
+
+ self.priorities = {
+ 'articles': 0.5,
+ 'indexes': 0.5,
+ 'pages': 0.5
+ }
+
+ config = settings.get('SITEMAP', {})
+
+ if not isinstance(config, dict):
+ warning("sitemap plugin: the SITEMAP setting must be a dict")
+ else:
+ fmt = config.get('format')
+ pris = config.get('priorities')
+ chfreqs = config.get('changefreqs')
+
+ if fmt not in ('xml', 'txt'):
+ warning("sitemap plugin: SITEMAP['format'] must be `txt' or `xml'")
+ warning("sitemap plugin: Setting SITEMAP['format'] on `xml'")
+ elif fmt == 'txt':
+ self.format = fmt
+ return
+
+ valid_keys = ('articles', 'indexes', 'pages')
+ valid_chfreqs = ('always', 'hourly', 'daily', 'weekly', 'monthly',
+ 'yearly', 'never')
+
+ if isinstance(pris, dict):
+ for k, v in pris.iteritems():
+ if k in valid_keys and not isinstance(v, (int, float)):
+ default = self.priorities[k]
+ warning("sitemap plugin: priorities must be numbers")
+ warning("sitemap plugin: setting SITEMAP['priorities']"
+ "['{0}'] on {1}".format(k, default))
+ pris[k] = default
+ self.priorities.update(pris)
+ elif pris is not None:
+ warning("sitemap plugin: SITEMAP['priorities'] must be a dict")
+ warning("sitemap plugin: using the default values")
+
+ if isinstance(chfreqs, dict):
+ for k, v in chfreqs.iteritems():
+ if k in valid_keys and v not in valid_chfreqs:
+ default = self.changefreqs[k]
+ warning("sitemap plugin: invalid changefreq `{0}'".format(v))
+ warning("sitemap plugin: setting SITEMAP['changefreqs']"
+ "['{0}'] on '{1}'".format(k, default))
+ chfreqs[k] = default
+ self.changefreqs.update(chfreqs)
+ elif chfreqs is not None:
+ warning("sitemap plugin: SITEMAP['changefreqs'] must be a dict")
+ warning("sitemap plugin: using the default values")
+
+
+
+ def write_url(self, page, fd):
+
+ if getattr(page, 'status', 'published') != 'published':
+ return
+
+ lastmod = format_date(getattr(page, 'date', self.now))
+
+ if isinstance(page, contents.Article):
+ pri = self.priorities['articles']
+ chfreq = self.changefreqs['articles']
+ elif isinstance(page, contents.Page):
+ pri = self.priorities['pages']
+ chfreq = self.changefreqs['pages']
+ else:
+ pri = self.priorities['indexes']
+ chfreq = self.changefreqs['indexes']
+
+
+ if self.format == 'xml':
+ fd.write(XML_URL.format(self.siteurl, page.url, lastmod, chfreq, pri))
+ else:
+ fd.write(self.siteurl + '/' + loc + '\n')
+
+
+ def generate_output(self, writer):
+ path = os.path.join(self.output_path, 'sitemap.{0}'.format(self.format))
+
+ pages = self.context['pages'] + self.context['articles'] \
+ + [ c for (c, a) in self.context['categories']] \
+ + [ t for (t, a) in self.context['tags']] \
+ + [ a for (a, b) in self.context['authors']]
+
+ for article in self.context['articles']:
+ pages += article.translations
+
+
+ info('writing {0}'.format(path))
+
+ with open(path, 'w', encoding='utf-8') as fd:
+
+ if self.format == 'xml':
+ fd.write(XML_HEADER.format(
+ self.siteurl,
+ format_date(self.now),
+ self.changefreqs['indexes'],
+ self.priorities['indexes']
+ )
+ )
+ else:
+ fd.write(TXT_HEADER.format(self.siteurl))
+
+ for page in pages:
+ self.write_url(page, fd)
+
+ if self.format == 'xml':
+ fd.write(XML_FOOTER)
+
+
+
+def get_generators(generators):
+ return SitemapGenerator
+
+
+def register():
+ signals.get_generators.connect(get_generators)
diff --git a/pelican/signals.py b/pelican/signals.py
index 4d9ab512..7ee88a0a 100644
--- a/pelican/signals.py
+++ b/pelican/signals.py
@@ -3,5 +3,6 @@ from blinker import signal
initialized = signal('pelican_initialized')
article_generate_context = signal('article_generate_context')
article_generator_init = signal('article_generator_init')
+get_generators = signal('get_generators')
pages_generate_context = signal('pages_generate_context')
pages_generator_init = signal('pages_generator_init')