summaryrefslogtreecommitdiff
path: root/docs/reference/gtk-markdown-to-docbook.py
diff options
context:
space:
mode:
Diffstat (limited to 'docs/reference/gtk-markdown-to-docbook.py')
-rwxr-xr-xdocs/reference/gtk-markdown-to-docbook.py201
1 files changed, 201 insertions, 0 deletions
diff --git a/docs/reference/gtk-markdown-to-docbook.py b/docs/reference/gtk-markdown-to-docbook.py
new file mode 100755
index 00000000..25274116
--- /dev/null
+++ b/docs/reference/gtk-markdown-to-docbook.py
@@ -0,0 +1,201 @@
+#!/usr/bin/python3
+#
+# Call pandoc to convert markdown to docbook, then expand gtk-doc
+# abbreviations (|[ ]|, function(), #object, %constant, etc)
+
+# Upstream: https://gitlab.gnome.org/GNOME/gtk/-/blob/master/docs/reference/gtk/gtk-markdown-to-docbook
+
+import sys
+import re
+import tempfile
+import os.path
+import subprocess
+
+# The following code is taken from gtk-doc
+
+def ExpandAbbreviations(symbol, text):
+ # Hack!
+ # Strip xlink namespace from hrefs since pandoc insists on
+ # inserting them, and namespace setup doesn't transfer across
+ # xi:include.
+ # Yay for XML!
+ text = re.sub('xlink:href', 'href', text)
+
+ # Convert '@param()'
+ text = re.sub(r'(\A|[^\\])\@(\w+((\.|->)\w+)*)\s*\(\)', r'\1<parameter>\2()</parameter>', text)
+
+ # Convert 'function()' or 'macro()'.
+ # if there is abc_*_def() we don't want to make a link to _def()
+ # FIXME: also handle abc(def(....)) : but that would need to be done recursively :/
+ def f1(m):
+ return m.group(1) + MakeXRef(m.group(2), tagify(m.group(2) + "()", "function"))
+ text = re.sub(r'([^\*.\w])(\w+)\s*\(\)', f1, text)
+ # handle #Object.func()
+ text = re.sub(r'(\A|[^\\])#([\w\-:\.]+[\w]+)\s*\(\)', f1, text)
+
+ # Convert '@param', but not '\@param'.
+ text = re.sub(r'(\A|[^\\])\@(\w+((\.|->)\w+)*)', r'\1<parameter>\2</parameter>', text)
+ text = re.sub(r'/\\\@', r'\@', text)
+
+ # Convert '%constant', but not '\%constant'.
+ # Also allow negative numbers, e.g. %-1.
+ def f2(m):
+ return m.group(1) + MakeXRef(m.group(2), tagify(m.group(2), "literal"))
+
+ text = re.sub(r'(\A|[^\\])\%(-?\w+)', f2, text)
+ text = re.sub(r'\\\%', r'\%', text)
+
+ # Convert '#symbol', but not '\#symbol'.
+
+ # Only convert #foo after a space to avoid interfering with
+ # fragment identifiers in urls
+ def f3(m):
+ return m.group(1) + MakeHashXRef(m.group(2), "type")
+
+ text = re.sub(r'(\A|[ ])#([\w\-:\.]+[\w]+)', f3, text)
+ text = re.sub(r'\\#', '#', text)
+
+ return text
+
+# Standard C preprocessor directives, which we ignore for '#' abbreviations.
+PreProcessorDirectives = {
+ 'assert', 'define', 'elif', 'else', 'endif', 'error', 'if', 'ifdef', 'ifndef',
+ 'include', 'line', 'pragma', 'unassert', 'undef', 'warning'
+}
+
+def MakeHashXRef(symbol, tag):
+ text = symbol
+
+ # Check for things like '#include', '#define', and skip them.
+ if symbol in PreProcessorDirectives:
+ return "#%s" % symbol
+
+ # Get rid of special suffixes ('-struct','-enum').
+ text = re.sub(r'-struct$', '', text)
+ text = re.sub(r'-enum$', '', text)
+
+ # If the symbol is in the form "Object::signal", then change the symbol to
+ # "Object-signal" and use "signal" as the text.
+ if '::' in symbol:
+ o, s = symbol.split('::', 1)
+ symbol = '%s-%s' % (o, s)
+ text = u'“' + s + u'”'
+
+ # If the symbol is in the form "Object:property", then change the symbol to
+ # "Object--property" and use "property" as the text.
+ if ':' in symbol:
+ o, p = symbol.split(':', 1)
+ symbol = '%s--%s' % (o, p)
+ text = u'“' + p + u'”'
+
+ if tag != '':
+ text = tagify(text, tag)
+
+ return MakeXRef(symbol, text)
+
+def MakeXRef(symbol, text=None):
+ """This returns a cross-reference link to the given symbol.
+
+ Though it doesn't try to do this for a few standard C types that it knows
+ won't be in the documentation.
+
+ Args:
+ symbol (str): the symbol to try to create a XRef to.
+ text (str): text to put inside the XRef, defaults to symbol
+
+ Returns:
+ str: a docbook link
+ """
+ symbol = symbol.strip()
+ if not text:
+ text = symbol
+
+ # Get rid of special suffixes ('-struct','-enum').
+ text = re.sub(r'-struct$', '', text)
+ text = re.sub(r'-enum$', '', text)
+
+ if ' ' in symbol:
+ return text
+
+ symbol_id = CreateValidSGMLID(symbol)
+ return "<link linkend=\"%s\">%s</link>" % (symbol_id, text)
+
+def CreateValidSGMLID(xml_id):
+ """Creates a valid SGML 'id' from the given string.
+
+ According to http://www.w3.org/TR/html4/types.html#type-id "ID and NAME
+ tokens must begin with a letter ([A-Za-z]) and may be followed by any number
+ of letters, digits ([0-9]), hyphens ("-"), underscores ("_"), colons (":"),
+ and periods (".")."
+
+ When creating SGML IDS, we append ":CAPS" to all all-caps identifiers to
+ prevent name clashes (SGML ids are case-insensitive). (It basically never is
+ the case that mixed-case identifiers would collide.)
+
+ Args:
+ id (str): The text to be converted into a valid SGML id.
+
+ Returns:
+ str: The converted id.
+ """
+
+ # Special case, '_' would end up as '' so we use 'gettext-macro' instead.
+ if xml_id == '_':
+ return "gettext-macro"
+
+ xml_id = re.sub(r'[,;]', '', xml_id)
+ xml_id = re.sub(r'[_ ]', '-', xml_id)
+ xml_id = re.sub(r'^-+', '', xml_id)
+ xml_id = xml_id.replace('::', '-')
+ xml_id = xml_id.replace(':', '--')
+
+ # Append ":CAPS" to all all-caps identifiers
+ # FIXME: there are some inconsistencies here, we have index files containing e.g. TRUE--CAPS
+ if xml_id.isupper() and not xml_id.endswith('-CAPS'):
+ xml_id += ':CAPS'
+
+ return xml_id
+
+def tagify(text, elem):
+ # Adds a tag around some text.
+ # e.g tagify("Text", "literal") => "<literal>Text</literal>".
+ return '<' + elem + '>' + text + '</' + elem + '>'
+
+# End of gtk-doc excerpts
+
+MarkdownExtensions = {
+ '-auto_identifiers', # we use explicit identifiers where needed
+ '+header_attributes', # for explicit identifiers
+ '+blank_before_header', # helps with gtk-doc #Object abbreviations
+ '+compact_definition_lists', # to replace <variablelist>
+ '+pipe_tables',
+ '+backtick_code_blocks', # to replace |[ ]|
+ '+fenced_code_attributes', # to add language annotations
+ '-raw_html', # to escape literal tags like <child> in input
+ '+startnum', # to have interrupted lists in the q&a part
+}
+
+def ConvertToDocbook(infile, outfile):
+ basename = os.path.basename(infile)
+ if basename.startswith('section'):
+ division='section'
+ else:
+ division='chapter'
+ input_format = "markdown" + "".join(MarkdownExtensions)
+ output_format = "docbook4"
+ subprocess.check_call(["pandoc", infile, "-o", outfile,
+ "--from=" + input_format,
+ "--to=" + output_format,
+ "--top-level-division=" + division])
+
+def ExpandGtkDocAbbreviations(infile, outfile):
+ contents = open(infile, 'r', encoding='utf-8').read()
+ with open(outfile, 'w', encoding='utf-8') as out:
+ out.write(ExpandAbbreviations("file", contents))
+
+
+if __name__ == '__main__':
+ tmp = tempfile.mktemp()
+ ConvertToDocbook(sys.argv[1], tmp)
+ ExpandGtkDocAbbreviations(tmp, sys.argv[2])
+ os.remove(tmp)