summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLiang-Bo Wang <me@liang2.tw>2022-05-19 02:08:55 +0800
committerGitHub <noreply@github.com>2022-05-18 14:08:55 -0400
commit12c3378ab9a46fae416a7bb5549e3f123a11d22a (patch)
tree7eb1658131962f1687c3d7136add2c69549b4cc2
parent7334ecd4c747f9a36351a5073b55c2a606917ef9 (diff)
downloadpython-markdown-12c3378ab9a46fae416a7bb5549e3f123a11d22a.tar.gz
Pass language to Pygments formatter in CodeHilite
* Add an extra option `lang_str` to pass the language of the code block to the specified Pygments formatter. * Include an example custom Pygments formatter in the documentation that includes the language of the code in the output using the new option. Resolves #1255.
-rw-r--r--docs/change_log/release-3.4.md8
-rw-r--r--docs/extensions/code_hilite.md52
-rw-r--r--markdown/extensions/codehilite.py16
-rw-r--r--tests/test_syntax/extensions/test_code_hilite.py86
-rw-r--r--tests/test_syntax/extensions/test_fenced_code.py46
5 files changed, 199 insertions, 9 deletions
diff --git a/docs/change_log/release-3.4.md b/docs/change_log/release-3.4.md
index 6dc8401..7a30de5 100644
--- a/docs/change_log/release-3.4.md
+++ b/docs/change_log/release-3.4.md
@@ -10,7 +10,7 @@ PyPy3.
### The `table` extension now uses a `style` attribute instead of `align` attribute for alignment.
The [HTML4 spec][spec4] specifically
-deprecates the use of the `align` attribute and it does not appear at all in the
+deprecates the use of the `align` attribute and it does not appear at all in the
[HTML5 spec][spec5]. Therefore, by default, the [table] extension will now use the `style`
attribute (setting just the `text-align` property) in `td` and `th` blocks.
@@ -55,8 +55,10 @@ The following new features have been included in the 3.4 release:
parameter which can be used to set the CSS class(es) on the `<div>` that contains the
Table of Contents (#1224).
-* The Codehilite extension now supports a `pygments_formatter` option that can be set to
- use a custom formatter class with Pygments.
+* The CodeHilite extension now supports a `pygments_formatter` option that can be set to
+ use a custom formatter class with Pygments (#1187). Additionally, the specified
+ Pygments formatter is passed an extra option `lang_str` to denote the language of
+ the code block (#1258).
- If set to a string like `'html'`, we get the default formatter by that name.
- If set to a class (or any callable), it is called with all the options to get a
formatter instance.
diff --git a/docs/extensions/code_hilite.md b/docs/extensions/code_hilite.md
index 6fa6190..5179d0a 100644
--- a/docs/extensions/code_hilite.md
+++ b/docs/extensions/code_hilite.md
@@ -231,9 +231,6 @@ The following options are provided to configure the output:
* **`lang_prefix`**{ #lang_prefix }:
The prefix prepended to the language class assigned to the HTML `<code>` tag. Default: `language-`.
- This option only applies when `use_pygments` is `False` as Pygments does not provide an option to include a
- language prefix.
-
* **`pygments_formatter`**{ #pygments_formatter }:
This option can be used to change the Pygments formatter used for highlighting the code blocks. By default, this
is set to the string `'html'`, which means it'll use the default `HtmlFormatter` provided by Pygments.
@@ -241,6 +238,11 @@ The following options are provided to configure the output:
This can be set to a string representing any of the other default formatters, or set to a formatter class (or
any callable).
+ The code's language is always passed to the formatter as an extra option `lang_str`, with the value formatted as
+ `{lang_prefix}{lang}`. If the language is unspecified, the language guessed by Pygments will be used. While
+ this option has no effect to the Pygments's builtin formatters, a user can make use of the language in their custom
+ formatter. See an example below.
+
To see what formatters are available and how to subclass an existing formatter, please visit [Pygments
documentation on this topic][pygments formatters].
@@ -256,6 +258,50 @@ A trivial example:
markdown.markdown(some_text, extensions=['codehilite'])
```
+To keep the code block's language in the Pygments generated HTML output, one can provide a custom Pygments formatter
+that takes the `lang_str` option. For example,
+
+```python
+from pygments.formatters import HtmlFormatter
+from markdown.extensions.codehilite import CodeHiliteExtension
+
+
+class CustomHtmlFormatter(HtmlFormatter):
+ def __init__(self, lang_str='', **options):
+ super().__init__(**options)
+ # lang_str has the value {lang_prefix}{lang}
+ # specified by the CodeHilite's options
+ self.lang_str = lang_str
+
+ def _wrap_code(self, source):
+ yield 0, f'<code class="{self.lang_str}">'
+ yield from source
+ yield 0, '</code>'
+
+
+some_text = '''\
+ :::python
+ print('hellow world')
+'''
+
+markdown.markdown(
+ some_text,
+ extensions=[CodeHiliteExtension(pygments_formatter=CustomHtmlFormatter)],
+)
+```
+
+The formatter above will output the following HTML structure for the code block:
+
+```html
+<div class="codehilite">
+ <pre>
+ <code class="language-python">
+ ...
+ </code>
+ </pre>
+</div>
+```
+
[html formatter]: https://pygments.org/docs/formatters/#HtmlFormatter
[lexer]: https://pygments.org/docs/lexers/
[spec]: https://www.w3.org/TR/html5/text-level-semantics.html#the-code-element
diff --git a/markdown/extensions/codehilite.py b/markdown/extensions/codehilite.py
index b92ebdc..4721c59 100644
--- a/markdown/extensions/codehilite.py
+++ b/markdown/extensions/codehilite.py
@@ -64,12 +64,14 @@ class CodeHilite:
* use_pygments: Pass code to pygments for code highlighting. If `False`, the code is
instead wrapped for highlighting by a JavaScript library. Default: `True`.
+ * pygments_formatter: The name of a Pygments formatter or a formatter class used for
+ highlighting the code blocks. Default: `html`.
+
* linenums: An alias to Pygments `linenos` formatter option. Default: `None`.
* css_class: An alias to Pygments `cssclass` formatter option. Default: 'codehilite'.
- * lang_prefix: Prefix prepended to the language when `use_pygments` is `False`.
- Default: "language-".
+ * lang_prefix: Prefix prepended to the language. Default: "language-".
Other Options:
Any other options are accepted and passed on to the lexer and formatter. Therefore,
@@ -81,6 +83,10 @@ class CodeHilite:
Formatter options: https://pygments.org/docs/formatters/#HtmlFormatter
Lexer Options: https://pygments.org/docs/lexers/
+ Additionally, when Pygments is enabled, the code's language is passed to the
+ formatter as an extra option `lang_str`, whose value being `{lang_prefix}{lang}`.
+ This option has no effect to the Pygments's builtin formatters.
+
Advanced Usage:
code = CodeHilite(
src = some_code,
@@ -141,13 +147,17 @@ class CodeHilite:
lexer = get_lexer_by_name('text', **self.options)
except ValueError: # pragma: no cover
lexer = get_lexer_by_name('text', **self.options)
+ if not self.lang:
+ # Use the guessed lexer's langauge instead
+ self.lang = lexer.aliases[0]
+ lang_str = f'{self.lang_prefix}{self.lang}'
if isinstance(self.pygments_formatter, str):
try:
formatter = get_formatter_by_name(self.pygments_formatter, **self.options)
except ClassNotFound:
formatter = get_formatter_by_name('html', **self.options)
else:
- formatter = self.pygments_formatter(**self.options)
+ formatter = self.pygments_formatter(lang_str=lang_str, **self.options)
return highlight(self.src, lexer, formatter)
else:
# just escape and build markup usable by JS highlighting libs
diff --git a/tests/test_syntax/extensions/test_code_hilite.py b/tests/test_syntax/extensions/test_code_hilite.py
index 41502d9..09dd523 100644
--- a/tests/test_syntax/extensions/test_code_hilite.py
+++ b/tests/test_syntax/extensions/test_code_hilite.py
@@ -354,6 +354,22 @@ class TestCodeHiliteExtension(TestCase):
if has_pygments and pygments.__version__ != required_pygments_version:
self.skipTest(f'Pygments=={required_pygments_version} is required')
+ # Define a custom Pygments formatter (same example in the documentation)
+ if has_pygments:
+ class CustomAddLangHtmlFormatter(pygments.formatters.HtmlFormatter):
+ def __init__(self, lang_str='', **options):
+ super().__init__(**options)
+ self.lang_str = lang_str
+
+ def _wrap_code(self, source):
+ yield 0, f'<code class="{self.lang_str}">'
+ yield from source
+ yield 0, '</code>'
+ else:
+ CustomAddLangHtmlFormatter = None
+
+ self.custom_pygments_formatter = CustomAddLangHtmlFormatter
+
maxDiff = None
def testBasicCodeHilite(self):
@@ -676,3 +692,73 @@ class TestCodeHiliteExtension(TestCase):
expected,
extensions=[CodeHiliteExtension(pygments_style="native", noclasses=True)]
)
+
+ def testFormatterLangStr(self):
+ if has_pygments:
+ expected = (
+ '<div class="codehilite"><pre><span></span><code class="language-python">'
+ '<span class="c1"># A Code Comment</span>\n'
+ '</code></pre></div>'
+ )
+ else:
+ expected = (
+ '<pre class="codehilite"><code class="language-python"># A Code Comment\n'
+ '</code></pre>'
+ )
+
+ self.assertMarkdownRenders(
+ '\t:::Python\n'
+ '\t# A Code Comment',
+ expected,
+ extensions=[
+ CodeHiliteExtension(
+ guess_lang=False,
+ pygments_formatter=self.custom_pygments_formatter
+ )
+ ]
+ )
+
+ def testFormatterLangStrGuessLang(self):
+ if has_pygments:
+ expected = (
+ '<div class="codehilite"><pre><span></span>'
+ '<code class="language-js+php"><span class="cp">&lt;?php</span> '
+ '<span class="k">print</span><span class="p">(</span>'
+ '<span class="s2">&quot;Hello World&quot;</span>'
+ '<span class="p">);</span> <span class="cp">?&gt;</span>\n'
+ '</code></pre></div>'
+ )
+ else:
+ expected = (
+ '<pre class="codehilite"><code>&lt;?php print(&quot;Hello World&quot;); ?&gt;\n'
+ '</code></pre>'
+ )
+ # Use PHP as the the starting `<?php` tag ensures an accurate guess.
+ self.assertMarkdownRenders(
+ '\t<?php print("Hello World"); ?>',
+ expected,
+ extensions=[CodeHiliteExtension(pygments_formatter=self.custom_pygments_formatter)]
+ )
+
+ def testFormatterLangStrEmptyLang(self):
+ if has_pygments:
+ expected = (
+ '<div class="codehilite"><pre><span></span>'
+ '<code class="language-text"># A Code Comment\n'
+ '</code></pre></div>'
+ )
+ else:
+ expected = (
+ '<pre class="codehilite"><code># A Code Comment\n'
+ '</code></pre>'
+ )
+ self.assertMarkdownRenders(
+ '\t# A Code Comment',
+ expected,
+ extensions=[
+ CodeHiliteExtension(
+ guess_lang=False,
+ pygments_formatter=self.custom_pygments_formatter,
+ )
+ ]
+ )
diff --git a/tests/test_syntax/extensions/test_fenced_code.py b/tests/test_syntax/extensions/test_fenced_code.py
index f8c3e91..be3c215 100644
--- a/tests/test_syntax/extensions/test_fenced_code.py
+++ b/tests/test_syntax/extensions/test_fenced_code.py
@@ -896,6 +896,52 @@ class TestFencedCodeWithCodehilite(TestCase):
]
)
+ def testPygmentsAddLangClassFormatter(self):
+ if has_pygments:
+ class CustomAddLangHtmlFormatter(pygments.formatters.HtmlFormatter):
+ def __init__(self, lang_str='', **options):
+ super().__init__(**options)
+ self.lang_str = lang_str
+
+ def _wrap_code(self, source):
+ yield 0, f'<code class="{self.lang_str}">'
+ yield from source
+ yield 0, '</code>'
+
+ expected = '''
+ <div class="codehilite"><pre><span></span><code class="language-text">hello world
+ hello another world
+ </code></pre></div>
+ '''
+ else:
+ CustomAddLangHtmlFormatter = None
+ expected = '''
+ <pre class="codehilite"><code class="language-text">hello world
+ hello another world
+ </code></pre>
+ '''
+
+ self.assertMarkdownRenders(
+ self.dedent(
+ '''
+ ```text
+ hello world
+ hello another world
+ ```
+ '''
+ ),
+ self.dedent(
+ expected
+ ),
+ extensions=[
+ markdown.extensions.codehilite.CodeHiliteExtension(
+ guess_lang=False,
+ pygments_formatter=CustomAddLangHtmlFormatter,
+ ),
+ 'fenced_code'
+ ]
+ )
+
def testSvgCustomPygmentsFormatter(self):
if has_pygments:
expected = '''