From 229b0e4dcc50daa32ffaf48234f58d1e694ecb7a Mon Sep 17 00:00:00 2001 From: m-r-r Date: Tue, 21 Aug 2012 13:08:21 +0200 Subject: Sitemap plugin & `get_generators` signal This is a combination of 13 commits: 1. New signal for registering custom generators 2. New plugin: pelican.plugins.sitemap 3. pelican.plugins.sitemap: more settings 4. pelican.plugins.sitemap: translations are indexed 5. pelican.plugins.sitemap: added documentation 6. pelican.plugins.sitemap: added XML DTD & W3C dates 7. pelican.plugins.sitemap: removed a bug 8. the `get_generators` can now return a tuple 9. pelican.plugins.sitemap: cleaned the code 10. pelican.plugin.sitemap: settings changes 11. sitemap plugin: improved configuration & documentation 12. sitemap plugin: :set spell 13. sitemap plugin: removed useless whitespaces --- docs/plugins.rst | 79 +++++++++++++++++ pelican/__init__.py | 14 ++- pelican/plugins/sitemap.py | 208 +++++++++++++++++++++++++++++++++++++++++++++ pelican/signals.py | 1 + 4 files changed, 301 insertions(+), 1 deletion(-) create mode 100644 pelican/plugins/sitemap.py diff --git a/docs/plugins.rst b/docs/plugins.rst index 53858668..99c0429a 100644 --- a/docs/plugins.rst +++ b/docs/plugins.rst @@ -59,6 +59,9 @@ Signal Arguments Description initialized pelican object article_generate_context article_generator, metadata article_generator_init article_generator invoked in the ArticlesGenerator.__init__ +get_generators generators invoked in Pelican.get_generator_classes, + can return a Generator, or several + generator in a tuple or in a list. pages_generate_context pages_generator, metadata pages_generator_init pages_generator invoked in the PagesGenerator.__init__ ========================= ============================ ========================================= @@ -108,3 +111,79 @@ variable, as in the example:: ``github_activity`` is a list of lists. The first element is the title and the second element is the raw HTML from GitHub. + + +Sitemap +------- + +The plugin generates a sitemap of the blog. +It can generates plain text sitemaps or XML sitemaps. + +Configuration +""""""""""""" + +You can use the setting ``SITEMAP`` variable to configure the behavior of the +plugin. + +The ``SITEMAP`` variable must be a Python dictionary, it can contain tree keys: + + +- ``format``, which set the output format of the plugin (``xml`` or ``txt``) + +- ``priorities``, which is a dictionary with three keys: + + - ``articles``, the priority for the URLs of the articles and their + translations + + - ``pages``, the priority for the URLs of the static pages + + - ``indexes``, the priority for the URLs of the index pages, such as tags, + author pages, categories indexes, archives, etc... + + All the values of this dictionary must be decimal numbers between ``0`` and ``1``. + +- ``changefreqs``, which is a dictionary with three items: + + - ``articles``, the update frequency of the articles + + - ``pages``, the update frequency of the pages + + - ``indexes``, the update frequency of the index pages + + An valid value is ``always``, ``hourly``, ``daily``, ``weekly``, ``monthly``, + ``yearly`` or ``never``. + + +If a key is missing or a value is incorrect, it will be replaced with the +default value. + +The sitemap is saved in ``/sitemap.``. + +.. note:: + ``priorities`` and ``changefreqs`` are informations for search engines. + They are only used in the XML sitemaps. + For more information: + + +Example +""""""" + +Here is an example of configuration (it's also the default settings): + +.. code-block:: python + + PLUGINS=['pelican.plugins.sitemap',] + + SITEMAP = { + 'format': 'xml', + 'priorities': { + 'articles': 0.5, + 'indexes': 0.5, + 'pages': 0.5 + }, + 'changefreqs': { + 'articles': 'monthly', + 'indexes': 'daily', + 'pages': 'monthly' + } + } diff --git a/pelican/__init__.py b/pelican/__init__.py index a69752d8..b9f9bb22 100644 --- a/pelican/__init__.py +++ b/pelican/__init__.py @@ -8,7 +8,7 @@ import argparse from pelican import signals -from pelican.generators import (ArticlesGenerator, PagesGenerator, +from pelican.generators import (Generator, ArticlesGenerator, PagesGenerator, StaticGenerator, PdfGenerator, LessCSSGenerator) from pelican.log import init from pelican.settings import read_settings, _DEFAULT_CONFIG @@ -185,6 +185,18 @@ class Pelican(object): generators.append(PdfGenerator) if self.settings['LESS_GENERATOR']: # can be True or PATH to lessc generators.append(LessCSSGenerator) + + for pair in signals.get_generators.send(self): + (funct, value) = pair + + if not isinstance(value, (tuple, list)): + value = (value, ) + + for v in value: + if isinstance(v, type): + logger.debug('Found generator: {0}'.format(v)) + generators.append(v) + return generators def get_writer(self): diff --git a/pelican/plugins/sitemap.py b/pelican/plugins/sitemap.py new file mode 100644 index 00000000..6402ba9c --- /dev/null +++ b/pelican/plugins/sitemap.py @@ -0,0 +1,208 @@ +import os.path + +from datetime import datetime +from logging import debug, warning, error, info +from codecs import open + +from pelican import signals, contents + +TXT_HEADER = u"""{0}/index.html +{0}/archives.html +{0}/tags.html +{0}/categories.html +""" + +XML_HEADER = u""" + + + + {0}/index.html + {1} + {2} + {3} + + + + {0}/archives.html + {1} + {2} + {3} + + + + {0}/tags.html + {1} + {2} + {3} + + + + {0}/categories.html + {1} + {2} + {3} + +""" + +XML_URL = u""" + + {0}/{1} + {2} + {3} + {4} + +""" + +XML_FOOTER = u""" + +""" + + +def format_date(date): + if date.tzinfo: + tz = date.strftime('%s') + tz = tz[:-2] + ':' + tz[-2:] + else: + tz = "-00:00" + return date.strftime("%Y-%m-%dT%H:%M:%S") + tz + + + +class SitemapGenerator(object): + + def __init__(self, context, settings, path, theme, output_path, *null): + + self.output_path = output_path + self.context = context + self.now = datetime.now() + self.siteurl = settings.get('SITEURL') + + self.format = 'xml' + + self.changefreqs = { + 'articles': 'monthly', + 'indexes': 'daily', + 'pages': 'monthly' + } + + self.priorities = { + 'articles': 0.5, + 'indexes': 0.5, + 'pages': 0.5 + } + + config = settings.get('SITEMAP', {}) + + if not isinstance(config, dict): + warning("sitemap plugin: the SITEMAP setting must be a dict") + else: + fmt = config.get('format') + pris = config.get('priorities') + chfreqs = config.get('changefreqs') + + if fmt not in ('xml', 'txt'): + warning("sitemap plugin: SITEMAP['format'] must be `txt' or `xml'") + warning("sitemap plugin: Setting SITEMAP['format'] on `xml'") + elif fmt == 'txt': + self.format = fmt + return + + valid_keys = ('articles', 'indexes', 'pages') + valid_chfreqs = ('always', 'hourly', 'daily', 'weekly', 'monthly', + 'yearly', 'never') + + if isinstance(pris, dict): + for k, v in pris.iteritems(): + if k in valid_keys and not isinstance(v, (int, float)): + default = self.priorities[k] + warning("sitemap plugin: priorities must be numbers") + warning("sitemap plugin: setting SITEMAP['priorities']" + "['{0}'] on {1}".format(k, default)) + pris[k] = default + self.priorities.update(pris) + elif pris is not None: + warning("sitemap plugin: SITEMAP['priorities'] must be a dict") + warning("sitemap plugin: using the default values") + + if isinstance(chfreqs, dict): + for k, v in chfreqs.iteritems(): + if k in valid_keys and v not in valid_chfreqs: + default = self.changefreqs[k] + warning("sitemap plugin: invalid changefreq `{0}'".format(v)) + warning("sitemap plugin: setting SITEMAP['changefreqs']" + "['{0}'] on '{1}'".format(k, default)) + chfreqs[k] = default + self.changefreqs.update(chfreqs) + elif chfreqs is not None: + warning("sitemap plugin: SITEMAP['changefreqs'] must be a dict") + warning("sitemap plugin: using the default values") + + + + def write_url(self, page, fd): + + if getattr(page, 'status', 'published') != 'published': + return + + lastmod = format_date(getattr(page, 'date', self.now)) + + if isinstance(page, contents.Article): + pri = self.priorities['articles'] + chfreq = self.changefreqs['articles'] + elif isinstance(page, contents.Page): + pri = self.priorities['pages'] + chfreq = self.changefreqs['pages'] + else: + pri = self.priorities['indexes'] + chfreq = self.changefreqs['indexes'] + + + if self.format == 'xml': + fd.write(XML_URL.format(self.siteurl, page.url, lastmod, chfreq, pri)) + else: + fd.write(self.siteurl + '/' + loc + '\n') + + + def generate_output(self, writer): + path = os.path.join(self.output_path, 'sitemap.{0}'.format(self.format)) + + pages = self.context['pages'] + self.context['articles'] \ + + [ c for (c, a) in self.context['categories']] \ + + [ t for (t, a) in self.context['tags']] \ + + [ a for (a, b) in self.context['authors']] + + for article in self.context['articles']: + pages += article.translations + + + info('writing {0}'.format(path)) + + with open(path, 'w', encoding='utf-8') as fd: + + if self.format == 'xml': + fd.write(XML_HEADER.format( + self.siteurl, + format_date(self.now), + self.changefreqs['indexes'], + self.priorities['indexes'] + ) + ) + else: + fd.write(TXT_HEADER.format(self.siteurl)) + + for page in pages: + self.write_url(page, fd) + + if self.format == 'xml': + fd.write(XML_FOOTER) + + + +def get_generators(generators): + return SitemapGenerator + + +def register(): + signals.get_generators.connect(get_generators) diff --git a/pelican/signals.py b/pelican/signals.py index 4d9ab512..7ee88a0a 100644 --- a/pelican/signals.py +++ b/pelican/signals.py @@ -3,5 +3,6 @@ from blinker import signal initialized = signal('pelican_initialized') article_generate_context = signal('article_generate_context') article_generator_init = signal('article_generator_init') +get_generators = signal('get_generators') pages_generate_context = signal('pages_generate_context') pages_generator_init = signal('pages_generator_init') -- cgit v1.2.1