diff options
Diffstat (limited to 'docs/reference/gtk-markdown-to-docbook.py')
-rwxr-xr-x | docs/reference/gtk-markdown-to-docbook.py | 201 |
1 files changed, 201 insertions, 0 deletions
diff --git a/docs/reference/gtk-markdown-to-docbook.py b/docs/reference/gtk-markdown-to-docbook.py new file mode 100755 index 00000000..25274116 --- /dev/null +++ b/docs/reference/gtk-markdown-to-docbook.py @@ -0,0 +1,201 @@ +#!/usr/bin/python3 +# +# Call pandoc to convert markdown to docbook, then expand gtk-doc +# abbreviations (|[ ]|, function(), #object, %constant, etc) + +# Upstream: https://gitlab.gnome.org/GNOME/gtk/-/blob/master/docs/reference/gtk/gtk-markdown-to-docbook + +import sys +import re +import tempfile +import os.path +import subprocess + +# The following code is taken from gtk-doc + +def ExpandAbbreviations(symbol, text): + # Hack! + # Strip xlink namespace from hrefs since pandoc insists on + # inserting them, and namespace setup doesn't transfer across + # xi:include. + # Yay for XML! + text = re.sub('xlink:href', 'href', text) + + # Convert '@param()' + text = re.sub(r'(\A|[^\\])\@(\w+((\.|->)\w+)*)\s*\(\)', r'\1<parameter>\2()</parameter>', text) + + # Convert 'function()' or 'macro()'. + # if there is abc_*_def() we don't want to make a link to _def() + # FIXME: also handle abc(def(....)) : but that would need to be done recursively :/ + def f1(m): + return m.group(1) + MakeXRef(m.group(2), tagify(m.group(2) + "()", "function")) + text = re.sub(r'([^\*.\w])(\w+)\s*\(\)', f1, text) + # handle #Object.func() + text = re.sub(r'(\A|[^\\])#([\w\-:\.]+[\w]+)\s*\(\)', f1, text) + + # Convert '@param', but not '\@param'. + text = re.sub(r'(\A|[^\\])\@(\w+((\.|->)\w+)*)', r'\1<parameter>\2</parameter>', text) + text = re.sub(r'/\\\@', r'\@', text) + + # Convert '%constant', but not '\%constant'. + # Also allow negative numbers, e.g. %-1. + def f2(m): + return m.group(1) + MakeXRef(m.group(2), tagify(m.group(2), "literal")) + + text = re.sub(r'(\A|[^\\])\%(-?\w+)', f2, text) + text = re.sub(r'\\\%', r'\%', text) + + # Convert '#symbol', but not '\#symbol'. + + # Only convert #foo after a space to avoid interfering with + # fragment identifiers in urls + def f3(m): + return m.group(1) + MakeHashXRef(m.group(2), "type") + + text = re.sub(r'(\A|[ ])#([\w\-:\.]+[\w]+)', f3, text) + text = re.sub(r'\\#', '#', text) + + return text + +# Standard C preprocessor directives, which we ignore for '#' abbreviations. +PreProcessorDirectives = { + 'assert', 'define', 'elif', 'else', 'endif', 'error', 'if', 'ifdef', 'ifndef', + 'include', 'line', 'pragma', 'unassert', 'undef', 'warning' +} + +def MakeHashXRef(symbol, tag): + text = symbol + + # Check for things like '#include', '#define', and skip them. + if symbol in PreProcessorDirectives: + return "#%s" % symbol + + # Get rid of special suffixes ('-struct','-enum'). + text = re.sub(r'-struct$', '', text) + text = re.sub(r'-enum$', '', text) + + # If the symbol is in the form "Object::signal", then change the symbol to + # "Object-signal" and use "signal" as the text. + if '::' in symbol: + o, s = symbol.split('::', 1) + symbol = '%s-%s' % (o, s) + text = u'“' + s + u'”' + + # If the symbol is in the form "Object:property", then change the symbol to + # "Object--property" and use "property" as the text. + if ':' in symbol: + o, p = symbol.split(':', 1) + symbol = '%s--%s' % (o, p) + text = u'“' + p + u'”' + + if tag != '': + text = tagify(text, tag) + + return MakeXRef(symbol, text) + +def MakeXRef(symbol, text=None): + """This returns a cross-reference link to the given symbol. + + Though it doesn't try to do this for a few standard C types that it knows + won't be in the documentation. + + Args: + symbol (str): the symbol to try to create a XRef to. + text (str): text to put inside the XRef, defaults to symbol + + Returns: + str: a docbook link + """ + symbol = symbol.strip() + if not text: + text = symbol + + # Get rid of special suffixes ('-struct','-enum'). + text = re.sub(r'-struct$', '', text) + text = re.sub(r'-enum$', '', text) + + if ' ' in symbol: + return text + + symbol_id = CreateValidSGMLID(symbol) + return "<link linkend=\"%s\">%s</link>" % (symbol_id, text) + +def CreateValidSGMLID(xml_id): + """Creates a valid SGML 'id' from the given string. + + According to http://www.w3.org/TR/html4/types.html#type-id "ID and NAME + tokens must begin with a letter ([A-Za-z]) and may be followed by any number + of letters, digits ([0-9]), hyphens ("-"), underscores ("_"), colons (":"), + and periods (".")." + + When creating SGML IDS, we append ":CAPS" to all all-caps identifiers to + prevent name clashes (SGML ids are case-insensitive). (It basically never is + the case that mixed-case identifiers would collide.) + + Args: + id (str): The text to be converted into a valid SGML id. + + Returns: + str: The converted id. + """ + + # Special case, '_' would end up as '' so we use 'gettext-macro' instead. + if xml_id == '_': + return "gettext-macro" + + xml_id = re.sub(r'[,;]', '', xml_id) + xml_id = re.sub(r'[_ ]', '-', xml_id) + xml_id = re.sub(r'^-+', '', xml_id) + xml_id = xml_id.replace('::', '-') + xml_id = xml_id.replace(':', '--') + + # Append ":CAPS" to all all-caps identifiers + # FIXME: there are some inconsistencies here, we have index files containing e.g. TRUE--CAPS + if xml_id.isupper() and not xml_id.endswith('-CAPS'): + xml_id += ':CAPS' + + return xml_id + +def tagify(text, elem): + # Adds a tag around some text. + # e.g tagify("Text", "literal") => "<literal>Text</literal>". + return '<' + elem + '>' + text + '</' + elem + '>' + +# End of gtk-doc excerpts + +MarkdownExtensions = { + '-auto_identifiers', # we use explicit identifiers where needed + '+header_attributes', # for explicit identifiers + '+blank_before_header', # helps with gtk-doc #Object abbreviations + '+compact_definition_lists', # to replace <variablelist> + '+pipe_tables', + '+backtick_code_blocks', # to replace |[ ]| + '+fenced_code_attributes', # to add language annotations + '-raw_html', # to escape literal tags like <child> in input + '+startnum', # to have interrupted lists in the q&a part +} + +def ConvertToDocbook(infile, outfile): + basename = os.path.basename(infile) + if basename.startswith('section'): + division='section' + else: + division='chapter' + input_format = "markdown" + "".join(MarkdownExtensions) + output_format = "docbook4" + subprocess.check_call(["pandoc", infile, "-o", outfile, + "--from=" + input_format, + "--to=" + output_format, + "--top-level-division=" + division]) + +def ExpandGtkDocAbbreviations(infile, outfile): + contents = open(infile, 'r', encoding='utf-8').read() + with open(outfile, 'w', encoding='utf-8') as out: + out.write(ExpandAbbreviations("file", contents)) + + +if __name__ == '__main__': + tmp = tempfile.mktemp() + ConvertToDocbook(sys.argv[1], tmp) + ExpandGtkDocAbbreviations(tmp, sys.argv[2]) + os.remove(tmp) |