summaryrefslogtreecommitdiff
path: root/tablib/packages/odf/odf2moinmoin.py
diff options
context:
space:
mode:
Diffstat (limited to 'tablib/packages/odf/odf2moinmoin.py')
-rw-r--r--tablib/packages/odf/odf2moinmoin.py579
1 files changed, 0 insertions, 579 deletions
diff --git a/tablib/packages/odf/odf2moinmoin.py b/tablib/packages/odf/odf2moinmoin.py
deleted file mode 100644
index 167fcda..0000000
--- a/tablib/packages/odf/odf2moinmoin.py
+++ /dev/null
@@ -1,579 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2006-2008 Søren Roug, European Environment Agency
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-#
-# See http://trac.edgewall.org/wiki/WikiFormatting
-#
-# Contributor(s):
-#
-
-import sys, zipfile, xml.dom.minidom
-from namespaces import nsdict
-from elementtypes import *
-
-IGNORED_TAGS = [
- 'draw:a'
- 'draw:g',
- 'draw:line',
- 'draw:object-ole',
- 'office:annotation',
- 'presentation:notes',
- 'svg:desc',
-] + [ nsdict[item[0]]+":"+item[1] for item in empty_elements]
-
-INLINE_TAGS = [ nsdict[item[0]]+":"+item[1] for item in inline_elements]
-
-
-class TextProps:
- """ Holds properties for a text style. """
-
- def __init__(self):
-
- self.italic = False
- self.bold = False
- self.fixed = False
- self.underlined = False
- self.strikethrough = False
- self.superscript = False
- self.subscript = False
-
- def setItalic(self, value):
- if value == "italic":
- self.italic = True
- elif value == "normal":
- self.italic = False
-
- def setBold(self, value):
- if value == "bold":
- self.bold = True
- elif value == "normal":
- self.bold = False
-
- def setFixed(self, value):
- self.fixed = value
-
- def setUnderlined(self, value):
- if value and value != "none":
- self.underlined = True
-
- def setStrikethrough(self, value):
- if value and value != "none":
- self.strikethrough = True
-
- def setPosition(self, value):
- if value is None or value == '':
- return
- posisize = value.split(' ')
- textpos = posisize[0]
- if textpos.find('%') == -1:
- if textpos == "sub":
- self.superscript = False
- self.subscript = True
- elif textpos == "super":
- self.superscript = True
- self.subscript = False
- else:
- itextpos = int(textpos[:textpos.find('%')])
- if itextpos > 10:
- self.superscript = False
- self.subscript = True
- elif itextpos < -10:
- self.superscript = True
- self.subscript = False
-
- def __str__(self):
-
- return "[italic=%s, bold=i%s, fixed=%s]" % (str(self.italic),
- str(self.bold),
- str(self.fixed))
-
-class ParagraphProps:
- """ Holds properties of a paragraph style. """
-
- def __init__(self):
-
- self.blockquote = False
- self.headingLevel = 0
- self.code = False
- self.title = False
- self.indented = 0
-
- def setIndented(self, value):
- self.indented = value
-
- def setHeading(self, level):
- self.headingLevel = level
-
- def setTitle(self, value):
- self.title = value
-
- def setCode(self, value):
- self.code = value
-
-
- def __str__(self):
-
- return "[bq=%s, h=%d, code=%s]" % (str(self.blockquote),
- self.headingLevel,
- str(self.code))
-
-
-class ListProperties:
- """ Holds properties for a list style. """
-
- def __init__(self):
- self.ordered = False
-
- def setOrdered(self, value):
- self.ordered = value
-
-
-
-class ODF2MoinMoin(object):
-
-
- def __init__(self, filepath):
- self.footnotes = []
- self.footnoteCounter = 0
- self.textStyles = {"Standard": TextProps()}
- self.paragraphStyles = {"Standard": ParagraphProps()}
- self.listStyles = {}
- self.fixedFonts = []
- self.hasTitle = 0
- self.lastsegment = None
-
- # Tags
- self.elements = {
- 'draw:page': self.textToString,
- 'draw:frame': self.textToString,
- 'draw:image': self.draw_image,
- 'draw:text-box': self.textToString,
- 'text:a': self.text_a,
- 'text:note': self.text_note,
- }
- for tag in IGNORED_TAGS:
- self.elements[tag] = self.do_nothing
-
- for tag in INLINE_TAGS:
- self.elements[tag] = self.inline_markup
- self.elements['text:line-break'] = self.text_line_break
- self.elements['text:s'] = self.text_s
- self.elements['text:tab'] = self.text_tab
-
- self.load(filepath)
-
- def processFontDeclarations(self, fontDecl):
- """ Extracts necessary font information from a font-declaration
- element.
- """
- for fontFace in fontDecl.getElementsByTagName("style:font-face"):
- if fontFace.getAttribute("style:font-pitch") == "fixed":
- self.fixedFonts.append(fontFace.getAttribute("style:name"))
-
-
-
- def extractTextProperties(self, style, parent=None):
- """ Extracts text properties from a style element. """
-
- textProps = TextProps()
-
- if parent:
- parentProp = self.textStyles.get(parent, None)
- if parentProp:
- textProp = parentProp
-
- textPropEl = style.getElementsByTagName("style:text-properties")
- if not textPropEl: return textProps
-
- textPropEl = textPropEl[0]
-
- textProps.setItalic(textPropEl.getAttribute("fo:font-style"))
- textProps.setBold(textPropEl.getAttribute("fo:font-weight"))
- textProps.setUnderlined(textPropEl.getAttribute("style:text-underline-style"))
- textProps.setStrikethrough(textPropEl.getAttribute("style:text-line-through-style"))
- textProps.setPosition(textPropEl.getAttribute("style:text-position"))
-
- if textPropEl.getAttribute("style:font-name") in self.fixedFonts:
- textProps.setFixed(True)
-
- return textProps
-
- def extractParagraphProperties(self, style, parent=None):
- """ Extracts paragraph properties from a style element. """
-
- paraProps = ParagraphProps()
-
- name = style.getAttribute("style:name")
-
- if name.startswith("Heading_20_"):
- level = name[11:]
- try:
- level = int(level)
- paraProps.setHeading(level)
- except:
- level = 0
-
- if name == "Title":
- paraProps.setTitle(True)
-
- paraPropEl = style.getElementsByTagName("style:paragraph-properties")
- if paraPropEl:
- paraPropEl = paraPropEl[0]
- leftMargin = paraPropEl.getAttribute("fo:margin-left")
- if leftMargin:
- try:
- leftMargin = float(leftMargin[:-2])
- if leftMargin > 0.01:
- paraProps.setIndented(True)
- except:
- pass
-
- textProps = self.extractTextProperties(style)
- if textProps.fixed:
- paraProps.setCode(True)
-
- return paraProps
-
-
- def processStyles(self, styleElements):
- """ Runs through "style" elements extracting necessary information.
- """
-
- for style in styleElements:
-
- name = style.getAttribute("style:name")
-
- if name == "Standard": continue
-
- family = style.getAttribute("style:family")
- parent = style.getAttribute("style:parent-style-name")
-
- if family == "text":
- self.textStyles[name] = self.extractTextProperties(style, parent)
-
- elif family == "paragraph":
- self.paragraphStyles[name] = \
- self.extractParagraphProperties(style, parent)
- self.textStyles[name] = self.extractTextProperties(style, parent)
-
- def processListStyles(self, listStyleElements):
-
- for style in listStyleElements:
- name = style.getAttribute("style:name")
-
- prop = ListProperties()
- if style.hasChildNodes():
- subitems = [el for el in style.childNodes
- if el.nodeType == xml.dom.Node.ELEMENT_NODE
- and el.tagName == "text:list-level-style-number"]
- if len(subitems) > 0:
- prop.setOrdered(True)
-
- self.listStyles[name] = prop
-
-
- def load(self, filepath):
- """ Loads an ODT file. """
-
- zip = zipfile.ZipFile(filepath)
-
- styles_doc = xml.dom.minidom.parseString(zip.read("styles.xml"))
- fontfacedecls = styles_doc.getElementsByTagName("office:font-face-decls")
- if fontfacedecls:
- self.processFontDeclarations(fontfacedecls[0])
- self.processStyles(styles_doc.getElementsByTagName("style:style"))
- self.processListStyles(styles_doc.getElementsByTagName("text:list-style"))
-
- self.content = xml.dom.minidom.parseString(zip.read("content.xml"))
- fontfacedecls = self.content.getElementsByTagName("office:font-face-decls")
- if fontfacedecls:
- self.processFontDeclarations(fontfacedecls[0])
-
- self.processStyles(self.content.getElementsByTagName("style:style"))
- self.processListStyles(self.content.getElementsByTagName("text:list-style"))
-
- def compressCodeBlocks(self, text):
- """ Removes extra blank lines from code blocks. """
-
- return text
- lines = text.split("\n")
- buffer = []
- numLines = len(lines)
- for i in range(numLines):
-
- if (lines[i].strip() or i == numLines-1 or i == 0 or
- not ( lines[i-1].startswith(" ")
- and lines[i+1].startswith(" ") ) ):
- buffer.append("\n" + lines[i])
-
- return ''.join(buffer)
-
-#-----------------------------------
- def do_nothing(self, node):
- return ''
-
- def draw_image(self, node):
- """
- """
-
- link = node.getAttribute("xlink:href")
- if link and link[:2] == './': # Indicates a sub-object, which isn't supported
- return "%s\n" % link
- if link and link[:9] == 'Pictures/':
- link = link[9:]
- return "[[Image(%s)]]\n" % link
-
- def text_a(self, node):
- text = self.textToString(node)
- link = node.getAttribute("xlink:href")
- if link.strip() == text.strip():
- return "[%s] " % link.strip()
- else:
- return "[%s %s] " % (link.strip(), text.strip())
-
-
- def text_line_break(self, node):
- return "[[BR]]"
-
- def text_note(self, node):
- cite = (node.getElementsByTagName("text:note-citation")[0]
- .childNodes[0].nodeValue)
- body = (node.getElementsByTagName("text:note-body")[0]
- .childNodes[0])
- self.footnotes.append((cite, self.textToString(body)))
- return "^%s^" % cite
-
- def text_s(self, node):
- try:
- num = int(node.getAttribute("text:c"))
- return " "*num
- except:
- return " "
-
- def text_tab(self, node):
- return " "
-
- def inline_markup(self, node):
- text = self.textToString(node)
-
- if not text.strip():
- return '' # don't apply styles to white space
-
- styleName = node.getAttribute("text:style-name")
- style = self.textStyles.get(styleName, TextProps())
-
- if style.fixed:
- return "`" + text + "`"
-
- mark = []
- if style:
- if style.italic:
- mark.append("''")
- if style.bold:
- mark.append("'''")
- if style.underlined:
- mark.append("__")
- if style.strikethrough:
- mark.append("~~")
- if style.superscript:
- mark.append("^")
- if style.subscript:
- mark.append(",,")
- revmark = mark[:]
- revmark.reverse()
- return "%s%s%s" % (''.join(mark), text, ''.join(revmark))
-
-#-----------------------------------
- def listToString(self, listElement, indent = 0):
-
- self.lastsegment = listElement.tagName
- buffer = []
-
- styleName = listElement.getAttribute("text:style-name")
- props = self.listStyles.get(styleName, ListProperties())
-
- i = 0
- for item in listElement.childNodes:
- buffer.append(" "*indent)
- i += 1
- if props.ordered:
- number = str(i)
- number = " " + number + ". "
- buffer.append(" 1. ")
- else:
- buffer.append(" * ")
- subitems = [el for el in item.childNodes
- if el.tagName in ["text:p", "text:h", "text:list"]]
- for subitem in subitems:
- if subitem.tagName == "text:list":
- buffer.append("\n")
- buffer.append(self.listToString(subitem, indent+3))
- else:
- buffer.append(self.paragraphToString(subitem, indent+3))
- self.lastsegment = subitem.tagName
- self.lastsegment = item.tagName
- buffer.append("\n")
-
- return ''.join(buffer)
-
- def tableToString(self, tableElement):
- """ MoinMoin uses || to delimit table cells
- """
-
- self.lastsegment = tableElement.tagName
- buffer = []
-
- for item in tableElement.childNodes:
- self.lastsegment = item.tagName
- if item.tagName == "table:table-header-rows":
- buffer.append(self.tableToString(item))
- if item.tagName == "table:table-row":
- buffer.append("\n||")
- for cell in item.childNodes:
- buffer.append(self.inline_markup(cell))
- buffer.append("||")
- self.lastsegment = cell.tagName
- return ''.join(buffer)
-
-
- def toString(self):
- """ Converts the document to a string.
- FIXME: Result from second call differs from first call
- """
- body = self.content.getElementsByTagName("office:body")[0]
- text = body.childNodes[0]
-
- buffer = []
-
- paragraphs = [el for el in text.childNodes
- if el.tagName in ["draw:page", "text:p", "text:h","text:section",
- "text:list", "table:table"]]
-
- for paragraph in paragraphs:
- if paragraph.tagName == "text:list":
- text = self.listToString(paragraph)
- elif paragraph.tagName == "text:section":
- text = self.textToString(paragraph)
- elif paragraph.tagName == "table:table":
- text = self.tableToString(paragraph)
- else:
- text = self.paragraphToString(paragraph)
- if text:
- buffer.append(text)
-
- if self.footnotes:
-
- buffer.append("----")
- for cite, body in self.footnotes:
- buffer.append("%s: %s" % (cite, body))
-
-
- buffer.append("")
- return self.compressCodeBlocks('\n'.join(buffer))
-
-
- def textToString(self, element):
-
- buffer = []
-
- for node in element.childNodes:
-
- if node.nodeType == xml.dom.Node.TEXT_NODE:
- buffer.append(node.nodeValue)
-
- elif node.nodeType == xml.dom.Node.ELEMENT_NODE:
- tag = node.tagName
-
- if tag in ("draw:text-box", "draw:frame"):
- buffer.append(self.textToString(node))
-
- elif tag in ("text:p", "text:h"):
- text = self.paragraphToString(node)
- if text:
- buffer.append(text)
- elif tag == "text:list":
- buffer.append(self.listToString(node))
- else:
- method = self.elements.get(tag)
- if method:
- buffer.append(method(node))
- else:
- buffer.append(" {" + tag + "} ")
-
- return ''.join(buffer)
-
- def paragraphToString(self, paragraph, indent = 0):
-
- dummyParaProps = ParagraphProps()
-
- style_name = paragraph.getAttribute("text:style-name")
- paraProps = self.paragraphStyles.get(style_name, dummyParaProps)
- text = self.inline_markup(paragraph)
-
- if paraProps and not paraProps.code:
- text = text.strip()
-
- if paragraph.tagName == "text:p" and self.lastsegment == "text:p":
- text = "\n" + text
-
- self.lastsegment = paragraph.tagName
-
- if paraProps.title:
- self.hasTitle = 1
- return "= " + text + " =\n"
-
- outlinelevel = paragraph.getAttribute("text:outline-level")
- if outlinelevel:
-
- level = int(outlinelevel)
- if self.hasTitle: level += 1
-
- if level >= 1:
- return "=" * level + " " + text + " " + "=" * level + "\n"
-
- elif paraProps.code:
- return "{{{\n" + text + "\n}}}\n"
-
- if paraProps.indented:
- return self.wrapParagraph(text, indent = indent, blockquote = True)
-
- else:
- return self.wrapParagraph(text, indent = indent)
-
-
- def wrapParagraph(self, text, indent = 0, blockquote=False):
-
- counter = 0
- buffer = []
- LIMIT = 50
-
- if blockquote:
- buffer.append(" ")
-
- return ''.join(buffer) + text
- # Unused from here
- for token in text.split():
-
- if counter > LIMIT - indent:
- buffer.append("\n" + " "*indent)
- if blockquote:
- buffer.append(" ")
- counter = 0
-
- buffer.append(token + " ")
- counter += len(token)
-
- return ''.join(buffer)