diff options
Diffstat (limited to 'tablib/packages/odf/odf2moinmoin.py')
| -rw-r--r-- | tablib/packages/odf/odf2moinmoin.py | 579 |
1 files changed, 0 insertions, 579 deletions
diff --git a/tablib/packages/odf/odf2moinmoin.py b/tablib/packages/odf/odf2moinmoin.py deleted file mode 100644 index 167fcda..0000000 --- a/tablib/packages/odf/odf2moinmoin.py +++ /dev/null @@ -1,579 +0,0 @@ -# -*- coding: utf-8 -*- -# Copyright (C) 2006-2008 Søren Roug, European Environment Agency -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -# -# See http://trac.edgewall.org/wiki/WikiFormatting -# -# Contributor(s): -# - -import sys, zipfile, xml.dom.minidom -from namespaces import nsdict -from elementtypes import * - -IGNORED_TAGS = [ - 'draw:a' - 'draw:g', - 'draw:line', - 'draw:object-ole', - 'office:annotation', - 'presentation:notes', - 'svg:desc', -] + [ nsdict[item[0]]+":"+item[1] for item in empty_elements] - -INLINE_TAGS = [ nsdict[item[0]]+":"+item[1] for item in inline_elements] - - -class TextProps: - """ Holds properties for a text style. """ - - def __init__(self): - - self.italic = False - self.bold = False - self.fixed = False - self.underlined = False - self.strikethrough = False - self.superscript = False - self.subscript = False - - def setItalic(self, value): - if value == "italic": - self.italic = True - elif value == "normal": - self.italic = False - - def setBold(self, value): - if value == "bold": - self.bold = True - elif value == "normal": - self.bold = False - - def setFixed(self, value): - self.fixed = value - - def setUnderlined(self, value): - if value and value != "none": - self.underlined = True - - def setStrikethrough(self, value): - if value and value != "none": - self.strikethrough = True - - def setPosition(self, value): - if value is None or value == '': - return - posisize = value.split(' ') - textpos = posisize[0] - if textpos.find('%') == -1: - if textpos == "sub": - self.superscript = False - self.subscript = True - elif textpos == "super": - self.superscript = True - self.subscript = False - else: - itextpos = int(textpos[:textpos.find('%')]) - if itextpos > 10: - self.superscript = False - self.subscript = True - elif itextpos < -10: - self.superscript = True - self.subscript = False - - def __str__(self): - - return "[italic=%s, bold=i%s, fixed=%s]" % (str(self.italic), - str(self.bold), - str(self.fixed)) - -class ParagraphProps: - """ Holds properties of a paragraph style. """ - - def __init__(self): - - self.blockquote = False - self.headingLevel = 0 - self.code = False - self.title = False - self.indented = 0 - - def setIndented(self, value): - self.indented = value - - def setHeading(self, level): - self.headingLevel = level - - def setTitle(self, value): - self.title = value - - def setCode(self, value): - self.code = value - - - def __str__(self): - - return "[bq=%s, h=%d, code=%s]" % (str(self.blockquote), - self.headingLevel, - str(self.code)) - - -class ListProperties: - """ Holds properties for a list style. """ - - def __init__(self): - self.ordered = False - - def setOrdered(self, value): - self.ordered = value - - - -class ODF2MoinMoin(object): - - - def __init__(self, filepath): - self.footnotes = [] - self.footnoteCounter = 0 - self.textStyles = {"Standard": TextProps()} - self.paragraphStyles = {"Standard": ParagraphProps()} - self.listStyles = {} - self.fixedFonts = [] - self.hasTitle = 0 - self.lastsegment = None - - # Tags - self.elements = { - 'draw:page': self.textToString, - 'draw:frame': self.textToString, - 'draw:image': self.draw_image, - 'draw:text-box': self.textToString, - 'text:a': self.text_a, - 'text:note': self.text_note, - } - for tag in IGNORED_TAGS: - self.elements[tag] = self.do_nothing - - for tag in INLINE_TAGS: - self.elements[tag] = self.inline_markup - self.elements['text:line-break'] = self.text_line_break - self.elements['text:s'] = self.text_s - self.elements['text:tab'] = self.text_tab - - self.load(filepath) - - def processFontDeclarations(self, fontDecl): - """ Extracts necessary font information from a font-declaration - element. - """ - for fontFace in fontDecl.getElementsByTagName("style:font-face"): - if fontFace.getAttribute("style:font-pitch") == "fixed": - self.fixedFonts.append(fontFace.getAttribute("style:name")) - - - - def extractTextProperties(self, style, parent=None): - """ Extracts text properties from a style element. """ - - textProps = TextProps() - - if parent: - parentProp = self.textStyles.get(parent, None) - if parentProp: - textProp = parentProp - - textPropEl = style.getElementsByTagName("style:text-properties") - if not textPropEl: return textProps - - textPropEl = textPropEl[0] - - textProps.setItalic(textPropEl.getAttribute("fo:font-style")) - textProps.setBold(textPropEl.getAttribute("fo:font-weight")) - textProps.setUnderlined(textPropEl.getAttribute("style:text-underline-style")) - textProps.setStrikethrough(textPropEl.getAttribute("style:text-line-through-style")) - textProps.setPosition(textPropEl.getAttribute("style:text-position")) - - if textPropEl.getAttribute("style:font-name") in self.fixedFonts: - textProps.setFixed(True) - - return textProps - - def extractParagraphProperties(self, style, parent=None): - """ Extracts paragraph properties from a style element. """ - - paraProps = ParagraphProps() - - name = style.getAttribute("style:name") - - if name.startswith("Heading_20_"): - level = name[11:] - try: - level = int(level) - paraProps.setHeading(level) - except: - level = 0 - - if name == "Title": - paraProps.setTitle(True) - - paraPropEl = style.getElementsByTagName("style:paragraph-properties") - if paraPropEl: - paraPropEl = paraPropEl[0] - leftMargin = paraPropEl.getAttribute("fo:margin-left") - if leftMargin: - try: - leftMargin = float(leftMargin[:-2]) - if leftMargin > 0.01: - paraProps.setIndented(True) - except: - pass - - textProps = self.extractTextProperties(style) - if textProps.fixed: - paraProps.setCode(True) - - return paraProps - - - def processStyles(self, styleElements): - """ Runs through "style" elements extracting necessary information. - """ - - for style in styleElements: - - name = style.getAttribute("style:name") - - if name == "Standard": continue - - family = style.getAttribute("style:family") - parent = style.getAttribute("style:parent-style-name") - - if family == "text": - self.textStyles[name] = self.extractTextProperties(style, parent) - - elif family == "paragraph": - self.paragraphStyles[name] = \ - self.extractParagraphProperties(style, parent) - self.textStyles[name] = self.extractTextProperties(style, parent) - - def processListStyles(self, listStyleElements): - - for style in listStyleElements: - name = style.getAttribute("style:name") - - prop = ListProperties() - if style.hasChildNodes(): - subitems = [el for el in style.childNodes - if el.nodeType == xml.dom.Node.ELEMENT_NODE - and el.tagName == "text:list-level-style-number"] - if len(subitems) > 0: - prop.setOrdered(True) - - self.listStyles[name] = prop - - - def load(self, filepath): - """ Loads an ODT file. """ - - zip = zipfile.ZipFile(filepath) - - styles_doc = xml.dom.minidom.parseString(zip.read("styles.xml")) - fontfacedecls = styles_doc.getElementsByTagName("office:font-face-decls") - if fontfacedecls: - self.processFontDeclarations(fontfacedecls[0]) - self.processStyles(styles_doc.getElementsByTagName("style:style")) - self.processListStyles(styles_doc.getElementsByTagName("text:list-style")) - - self.content = xml.dom.minidom.parseString(zip.read("content.xml")) - fontfacedecls = self.content.getElementsByTagName("office:font-face-decls") - if fontfacedecls: - self.processFontDeclarations(fontfacedecls[0]) - - self.processStyles(self.content.getElementsByTagName("style:style")) - self.processListStyles(self.content.getElementsByTagName("text:list-style")) - - def compressCodeBlocks(self, text): - """ Removes extra blank lines from code blocks. """ - - return text - lines = text.split("\n") - buffer = [] - numLines = len(lines) - for i in range(numLines): - - if (lines[i].strip() or i == numLines-1 or i == 0 or - not ( lines[i-1].startswith(" ") - and lines[i+1].startswith(" ") ) ): - buffer.append("\n" + lines[i]) - - return ''.join(buffer) - -#----------------------------------- - def do_nothing(self, node): - return '' - - def draw_image(self, node): - """ - """ - - link = node.getAttribute("xlink:href") - if link and link[:2] == './': # Indicates a sub-object, which isn't supported - return "%s\n" % link - if link and link[:9] == 'Pictures/': - link = link[9:] - return "[[Image(%s)]]\n" % link - - def text_a(self, node): - text = self.textToString(node) - link = node.getAttribute("xlink:href") - if link.strip() == text.strip(): - return "[%s] " % link.strip() - else: - return "[%s %s] " % (link.strip(), text.strip()) - - - def text_line_break(self, node): - return "[[BR]]" - - def text_note(self, node): - cite = (node.getElementsByTagName("text:note-citation")[0] - .childNodes[0].nodeValue) - body = (node.getElementsByTagName("text:note-body")[0] - .childNodes[0]) - self.footnotes.append((cite, self.textToString(body))) - return "^%s^" % cite - - def text_s(self, node): - try: - num = int(node.getAttribute("text:c")) - return " "*num - except: - return " " - - def text_tab(self, node): - return " " - - def inline_markup(self, node): - text = self.textToString(node) - - if not text.strip(): - return '' # don't apply styles to white space - - styleName = node.getAttribute("text:style-name") - style = self.textStyles.get(styleName, TextProps()) - - if style.fixed: - return "`" + text + "`" - - mark = [] - if style: - if style.italic: - mark.append("''") - if style.bold: - mark.append("'''") - if style.underlined: - mark.append("__") - if style.strikethrough: - mark.append("~~") - if style.superscript: - mark.append("^") - if style.subscript: - mark.append(",,") - revmark = mark[:] - revmark.reverse() - return "%s%s%s" % (''.join(mark), text, ''.join(revmark)) - -#----------------------------------- - def listToString(self, listElement, indent = 0): - - self.lastsegment = listElement.tagName - buffer = [] - - styleName = listElement.getAttribute("text:style-name") - props = self.listStyles.get(styleName, ListProperties()) - - i = 0 - for item in listElement.childNodes: - buffer.append(" "*indent) - i += 1 - if props.ordered: - number = str(i) - number = " " + number + ". " - buffer.append(" 1. ") - else: - buffer.append(" * ") - subitems = [el for el in item.childNodes - if el.tagName in ["text:p", "text:h", "text:list"]] - for subitem in subitems: - if subitem.tagName == "text:list": - buffer.append("\n") - buffer.append(self.listToString(subitem, indent+3)) - else: - buffer.append(self.paragraphToString(subitem, indent+3)) - self.lastsegment = subitem.tagName - self.lastsegment = item.tagName - buffer.append("\n") - - return ''.join(buffer) - - def tableToString(self, tableElement): - """ MoinMoin uses || to delimit table cells - """ - - self.lastsegment = tableElement.tagName - buffer = [] - - for item in tableElement.childNodes: - self.lastsegment = item.tagName - if item.tagName == "table:table-header-rows": - buffer.append(self.tableToString(item)) - if item.tagName == "table:table-row": - buffer.append("\n||") - for cell in item.childNodes: - buffer.append(self.inline_markup(cell)) - buffer.append("||") - self.lastsegment = cell.tagName - return ''.join(buffer) - - - def toString(self): - """ Converts the document to a string. - FIXME: Result from second call differs from first call - """ - body = self.content.getElementsByTagName("office:body")[0] - text = body.childNodes[0] - - buffer = [] - - paragraphs = [el for el in text.childNodes - if el.tagName in ["draw:page", "text:p", "text:h","text:section", - "text:list", "table:table"]] - - for paragraph in paragraphs: - if paragraph.tagName == "text:list": - text = self.listToString(paragraph) - elif paragraph.tagName == "text:section": - text = self.textToString(paragraph) - elif paragraph.tagName == "table:table": - text = self.tableToString(paragraph) - else: - text = self.paragraphToString(paragraph) - if text: - buffer.append(text) - - if self.footnotes: - - buffer.append("----") - for cite, body in self.footnotes: - buffer.append("%s: %s" % (cite, body)) - - - buffer.append("") - return self.compressCodeBlocks('\n'.join(buffer)) - - - def textToString(self, element): - - buffer = [] - - for node in element.childNodes: - - if node.nodeType == xml.dom.Node.TEXT_NODE: - buffer.append(node.nodeValue) - - elif node.nodeType == xml.dom.Node.ELEMENT_NODE: - tag = node.tagName - - if tag in ("draw:text-box", "draw:frame"): - buffer.append(self.textToString(node)) - - elif tag in ("text:p", "text:h"): - text = self.paragraphToString(node) - if text: - buffer.append(text) - elif tag == "text:list": - buffer.append(self.listToString(node)) - else: - method = self.elements.get(tag) - if method: - buffer.append(method(node)) - else: - buffer.append(" {" + tag + "} ") - - return ''.join(buffer) - - def paragraphToString(self, paragraph, indent = 0): - - dummyParaProps = ParagraphProps() - - style_name = paragraph.getAttribute("text:style-name") - paraProps = self.paragraphStyles.get(style_name, dummyParaProps) - text = self.inline_markup(paragraph) - - if paraProps and not paraProps.code: - text = text.strip() - - if paragraph.tagName == "text:p" and self.lastsegment == "text:p": - text = "\n" + text - - self.lastsegment = paragraph.tagName - - if paraProps.title: - self.hasTitle = 1 - return "= " + text + " =\n" - - outlinelevel = paragraph.getAttribute("text:outline-level") - if outlinelevel: - - level = int(outlinelevel) - if self.hasTitle: level += 1 - - if level >= 1: - return "=" * level + " " + text + " " + "=" * level + "\n" - - elif paraProps.code: - return "{{{\n" + text + "\n}}}\n" - - if paraProps.indented: - return self.wrapParagraph(text, indent = indent, blockquote = True) - - else: - return self.wrapParagraph(text, indent = indent) - - - def wrapParagraph(self, text, indent = 0, blockquote=False): - - counter = 0 - buffer = [] - LIMIT = 50 - - if blockquote: - buffer.append(" ") - - return ''.join(buffer) + text - # Unused from here - for token in text.split(): - - if counter > LIMIT - indent: - buffer.append("\n" + " "*indent) - if blockquote: - buffer.append(" ") - counter = 0 - - buffer.append(token + " ") - counter += len(token) - - return ''.join(buffer) |
