summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnthon van der Neut <anthon@mnt.org>2015-06-02 17:20:15 +0200
committerAnthon van der Neut <anthon@mnt.org>2015-06-02 17:20:15 +0200
commit202c60292d864540e2eac9015ef2ae937e0e63fa (patch)
tree20f8f2781dac001dae0c8a637497b72b24750861
parent2eaab316796e9584bf3d1b9ce50731b8477cdf64 (diff)
downloadruamel.yaml-202c60292d864540e2eac9015ef2ae937e0e63fa.tar.gz
missing 0.9.4 files
-rw-r--r--py/convert/__init__.py0
-rw-r--r--py/convert/html.py119
-rw-r--r--py/main.py352
-rw-r--r--test/test_convert.py61
-rw-r--r--winwhl.bat8
5 files changed, 540 insertions, 0 deletions
diff --git a/py/convert/__init__.py b/py/convert/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/py/convert/__init__.py
diff --git a/py/convert/html.py b/py/convert/html.py
new file mode 100644
index 0000000..2fbd605
--- /dev/null
+++ b/py/convert/html.py
@@ -0,0 +1,119 @@
+# coding: utf-8
+
+from __future__ import print_function
+
+"""
+conversion helpers for yaml utility to/from HTML
+
+html/xml to yaml inspired by
+http://programmers.stackexchange.com/questions/157395/yaml-translating-free-flowing-text-from-xml
+
+"""
+
+import sys
+
+
+class HTML2YAML(object):
+ def __init__(self, args=None):
+ self.flatten = not getattr(args, 'no_flatten', False)
+ self.strip = getattr(args, 'strip', False)
+ self.no_body = getattr(args, 'no_body', False)
+
+ def __call__(self, html):
+ d = self.html_to_data(html)
+ if self.no_body:
+ d = d['html']['body']
+ return self.data_to_yaml(d)
+
+ def data_to_yaml(self, d):
+ import ruamel.yaml
+ return ruamel.yaml.dump(
+ d,
+ Dumper=ruamel.yaml.RoundTripDumper
+ )
+
+ def html_to_data(self, html):
+ try:
+ import bs4
+ except ImportError:
+ print("For HTML conversion you need to install BeautifulSoup")
+ print("e.g. using (pip install beautifulsoup4)")
+ sys.exit(1)
+
+ soup = bs4.BeautifulSoup(html)
+ data = self._convert_node(soup)
+ return data
+
+ def _convert_node(self, node, depth=0):
+ try:
+ import bs4
+ except ImportError:
+ print("For HTML conversion you need to install BeautifulSoup")
+ print("e.g. using (pip install beautifulsoup4)")
+ sys.exit(1)
+ from ruamel.yaml.comments import CommentedMap
+ from ruamel.yaml.scalarstring import PreservedScalarString
+ ret_val = []
+ if node.attrs:
+ ret_val.append({'.attribute': node.attrs})
+ for data in node.contents:
+ if isinstance(data, bs4.Tag):
+ kv = CommentedMap()
+ #print data.name, data.attrs
+ # convert the intenals of the tag
+ kv[data.name] = self._convert_node(data, depth+1)
+ ret_val.append(kv)
+ elif isinstance(data, bs4.NavigableString):
+ s, nl = self._strip(data)
+ if not s:
+ continue
+ if nl:
+ ret_val.append(PreservedScalarString(s))
+ continue
+ ret_val.append(s)
+ else:
+ print('unknow type', type(data))
+ if self.flatten and len(ret_val) == 1:
+ return ret_val[0]
+ return ret_val
+
+ def _strip(self, data):
+ import textwrap
+ # multiline strings might be nicely formatted so don't
+ # use .strip() immediately
+ if self.strip:
+ s = data.strip()
+ else:
+ s = data.rstrip()
+ if not s:
+ return None, False
+ first_nl_pos = s.find(u'\n')
+ if first_nl_pos < 0:
+ return s, False
+ if not s[:first_nl_pos].strip(): # i.e. space until first newline
+ if u'\n' not in s[first_nl_pos+1:]:
+ print(repr(data), repr(s))
+ # single line of text preceded and followed by nl
+ return s.strip(), False
+ # use data here, removing the final newline would get your |- as marker
+ s = textwrap.dedent(data[first_nl_pos+1:])
+ return s, True
+
+
+class YAML2HTML(object):
+ def __init__(self, args=None):
+ pass
+
+ def __call__(self, yaml):
+ d = self.yaml_to_data(yaml)
+ return self.data_to_html(d)
+
+ def data_to_html(self, d):
+ if isinstance(d, dict):
+ pass
+
+ def yaml_to_data(self, yaml):
+ import ruamel.yaml
+ return ruamel.yaml.load(yaml)
+ return data
+
diff --git a/py/main.py b/py/main.py
new file mode 100644
index 0000000..90ee4d5
--- /dev/null
+++ b/py/main.py
@@ -0,0 +1,352 @@
+# coding: utf-8
+
+from __future__ import absolute_import
+
+
+from ruamel.yaml.error import *
+
+from ruamel.yaml.tokens import *
+from ruamel.yaml.events import *
+from ruamel.yaml.nodes import *
+
+from ruamel.yaml.loader import *
+from ruamel.yaml.dumper import *
+from ruamel.yaml.compat import StringIO, BytesIO, with_metaclass, PY3
+
+import io
+
+def scan(stream, Loader=Loader):
+ """
+ Scan a YAML stream and produce scanning tokens.
+ """
+ loader = Loader(stream)
+ try:
+ while loader.check_token():
+ yield loader.get_token()
+ finally:
+ loader.dispose()
+
+
+def parse(stream, Loader=Loader):
+ """
+ Parse a YAML stream and produce parsing events.
+ """
+ loader = Loader(stream)
+ try:
+ while loader.check_event():
+ yield loader.get_event()
+ finally:
+ loader.dispose()
+
+
+def compose(stream, Loader=Loader):
+ """
+ Parse the first YAML document in a stream
+ and produce the corresponding representation tree.
+ """
+ loader = Loader(stream)
+ try:
+ return loader.get_single_node()
+ finally:
+ loader.dispose()
+
+
+def compose_all(stream, Loader=Loader):
+ """
+ Parse all YAML documents in a stream
+ and produce corresponding representation trees.
+ """
+ loader = Loader(stream)
+ try:
+ while loader.check_node():
+ yield loader.get_node()
+ finally:
+ loader.dispose()
+
+
+def load(stream, Loader=Loader):
+ """
+ Parse the first YAML document in a stream
+ and produce the corresponding Python object.
+ """
+ loader = Loader(stream)
+ try:
+ return loader.get_single_data()
+ finally:
+ loader.dispose()
+
+
+def load_all(stream, Loader=Loader):
+ """
+ Parse all YAML documents in a stream
+ and produce corresponding Python objects.
+ """
+ loader = Loader(stream)
+ try:
+ while loader.check_data():
+ yield loader.get_data()
+ finally:
+ loader.dispose()
+
+
+def safe_load(stream):
+ """
+ Parse the first YAML document in a stream
+ and produce the corresponding Python object.
+ Resolve only basic YAML tags.
+ """
+ return load(stream, SafeLoader)
+
+
+def safe_load_all(stream):
+ """
+ Parse all YAML documents in a stream
+ and produce corresponding Python objects.
+ Resolve only basic YAML tags.
+ """
+ return load_all(stream, SafeLoader)
+
+
+def emit(events, stream=None, Dumper=Dumper,
+ canonical=None, indent=None, width=None,
+ allow_unicode=None, line_break=None):
+ """
+ Emit YAML parsing events into a stream.
+ If stream is None, return the produced string instead.
+ """
+ getvalue = None
+ if stream is None:
+ stream = StringIO()
+ getvalue = stream.getvalue
+ dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
+ allow_unicode=allow_unicode, line_break=line_break)
+ try:
+ for event in events:
+ dumper.emit(event)
+ finally:
+ dumper.dispose()
+ if getvalue:
+ return getvalue()
+
+enc = None if PY3 else 'utf-8'
+
+
+def serialize_all(nodes, stream=None, Dumper=Dumper,
+ canonical=None, indent=None, width=None,
+ allow_unicode=None, line_break=None,
+ encoding=enc, explicit_start=None, explicit_end=None,
+ version=None, tags=None):
+ """
+ Serialize a sequence of representation trees into a YAML stream.
+ If stream is None, return the produced string instead.
+ """
+ getvalue = None
+ if stream is None:
+ if encoding is None:
+ stream = StringIO()
+ else:
+ stream = BytesIO()
+ getvalue = stream.getvalue
+ dumper = Dumper(stream, canonical=canonical, indent=indent, width=width,
+ allow_unicode=allow_unicode, line_break=line_break,
+ encoding=encoding, version=version, tags=tags,
+ explicit_start=explicit_start, explicit_end=explicit_end)
+ try:
+ dumper.open()
+ for node in nodes:
+ dumper.serialize(node)
+ dumper.close()
+ finally:
+ dumper.dispose()
+ if getvalue:
+ return getvalue()
+
+
+def serialize(node, stream=None, Dumper=Dumper, **kwds):
+ """
+ Serialize a representation tree into a YAML stream.
+ If stream is None, return the produced string instead.
+ """
+ return serialize_all([node], stream, Dumper=Dumper, **kwds)
+
+
+def dump_all(documents, stream=None, Dumper=Dumper,
+ default_style=None, default_flow_style=None,
+ canonical=None, indent=None, width=None,
+ allow_unicode=None, line_break=None,
+ encoding=enc, explicit_start=None, explicit_end=None,
+ version=None, tags=None):
+ """
+ Serialize a sequence of Python objects into a YAML stream.
+ If stream is None, return the produced string instead.
+ """
+ getvalue = None
+ if stream is None:
+ if encoding is None:
+ stream = StringIO()
+ else:
+ stream = BytesIO()
+ getvalue = stream.getvalue
+ dumper = Dumper(stream, default_style=default_style,
+ default_flow_style=default_flow_style,
+ canonical=canonical, indent=indent, width=width,
+ allow_unicode=allow_unicode, line_break=line_break,
+ encoding=encoding, version=version, tags=tags,
+ explicit_start=explicit_start, explicit_end=explicit_end)
+ try:
+ dumper.open()
+ for data in documents:
+ dumper.represent(data)
+ dumper.close()
+ finally:
+ dumper.dispose()
+ if getvalue:
+ return getvalue()
+
+
+def dump(data, stream=None, Dumper=Dumper,
+ default_style=None, default_flow_style=None,
+ canonical=None, indent=None, width=None,
+ allow_unicode=None, line_break=None,
+ encoding=enc, explicit_start=None, explicit_end=None,
+ version=None, tags=None):
+
+ """
+ Serialize a Python object into a YAML stream.
+ If stream is None, return the produced string instead.
+
+ default_style ∈ None, '', '"', "'", '|', '>'
+
+ """
+ return dump_all([data], stream, Dumper=Dumper,
+ default_style=default_style,
+ default_flow_style=default_flow_style,
+ canonical=canonical,
+ indent=indent, width=width,
+ allow_unicode=allow_unicode,
+ line_break=line_break,
+ encoding=encoding, explicit_start=explicit_start,
+ explicit_end=explicit_end,
+ version=version, tags=tags)
+
+
+def safe_dump_all(documents, stream=None, **kwds):
+ """
+ Serialize a sequence of Python objects into a YAML stream.
+ Produce only basic YAML tags.
+ If stream is None, return the produced string instead.
+ """
+ return dump_all(documents, stream, Dumper=SafeDumper, **kwds)
+
+
+def safe_dump(data, stream=None, **kwds):
+ """
+ Serialize a Python object into a YAML stream.
+ Produce only basic YAML tags.
+ If stream is None, return the produced string instead.
+ """
+ return dump_all([data], stream, Dumper=SafeDumper, **kwds)
+
+
+def add_implicit_resolver(tag, regexp, first=None,
+ Loader=Loader, Dumper=Dumper):
+ """
+ Add an implicit scalar detector.
+ If an implicit scalar value matches the given regexp,
+ the corresponding tag is assigned to the scalar.
+ first is a sequence of possible initial characters or None.
+ """
+ Loader.add_implicit_resolver(tag, regexp, first)
+ Dumper.add_implicit_resolver(tag, regexp, first)
+
+
+def add_path_resolver(tag, path, kind=None, Loader=Loader, Dumper=Dumper):
+ """
+ Add a path based resolver for the given tag.
+ A path is a list of keys that forms a path
+ to a node in the representation tree.
+ Keys can be string values, integers, or None.
+ """
+ Loader.add_path_resolver(tag, path, kind)
+ Dumper.add_path_resolver(tag, path, kind)
+
+
+def add_constructor(tag, constructor, Loader=Loader):
+ """
+ Add a constructor for the given tag.
+ Constructor is a function that accepts a Loader instance
+ and a node object and produces the corresponding Python object.
+ """
+ Loader.add_constructor(tag, constructor)
+
+
+def add_multi_constructor(tag_prefix, multi_constructor, Loader=Loader):
+ """
+ Add a multi-constructor for the given tag prefix.
+ Multi-constructor is called for a node if its tag starts with tag_prefix.
+ Multi-constructor accepts a Loader instance, a tag suffix,
+ and a node object and produces the corresponding Python object.
+ """
+ Loader.add_multi_constructor(tag_prefix, multi_constructor)
+
+
+def add_representer(data_type, representer, Dumper=Dumper):
+ """
+ Add a representer for the given type.
+ Representer is a function accepting a Dumper instance
+ and an instance of the given data type
+ and producing the corresponding representation node.
+ """
+ Dumper.add_representer(data_type, representer)
+
+
+def add_multi_representer(data_type, multi_representer, Dumper=Dumper):
+ """
+ Add a representer for the given type.
+ Multi-representer is a function accepting a Dumper instance
+ and an instance of the given data type or subtype
+ and producing the corresponding representation node.
+ """
+ Dumper.add_multi_representer(data_type, multi_representer)
+
+
+class YAMLObjectMetaclass(type):
+ """
+ The metaclass for YAMLObject.
+ """
+ def __init__(cls, name, bases, kwds):
+ super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds)
+ if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None:
+ cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml)
+ cls.yaml_dumper.add_representer(cls, cls.to_yaml)
+
+
+class YAMLObject(with_metaclass(YAMLObjectMetaclass)):
+ """
+ An object that can dump itself to a YAML stream
+ and load itself from a YAML stream.
+ """
+ __slots__ = () # no direct instantiation, so allow immutable subclasses
+
+ yaml_loader = Loader
+ yaml_dumper = Dumper
+
+ yaml_tag = None
+ yaml_flow_style = None
+
+ @classmethod
+ def from_yaml(cls, loader, node):
+ """
+ Convert a representation node to a Python object.
+ """
+ return loader.construct_yaml_object(node, cls)
+
+ @classmethod
+ def to_yaml(cls, dumper, data):
+ """
+ Convert a Python object to a representation node.
+ """
+ return dumper.represent_yaml_object(cls.yaml_tag, data, cls,
+ flow_style=cls.yaml_flow_style)
+
+
diff --git a/test/test_convert.py b/test/test_convert.py
new file mode 100644
index 0000000..a4ad70d
--- /dev/null
+++ b/test/test_convert.py
@@ -0,0 +1,61 @@
+
+from ruamel.yaml.convert.html import HTML2YAML
+from textwrap import dedent
+
+class Bin:
+ pass
+
+class TestH2Y:
+ sample1 = dedent("""\
+ text:
+ - |-
+ This is an example text, spanning multiple lines, and it has embedded elements
+ like
+ - a:
+ - .attribute:
+ p: value
+ - this
+ - and
+ - b: this
+ - '. There is also a list:'
+ - quote:
+ - text:
+ - |-
+ The text of the quote, spanning multiple lines, and it has
+ embedded elements like
+ - c:
+ - .attribute:
+ p: value
+ - this
+ - and
+ - b: this
+ - author: The Author of this quote
+ - Text continues here.
+ """)
+
+
+ def test_00(self):
+ b = Bin()
+ b.strip = True
+ h2y = HTML2YAML(b)
+ d = h2y.html_to_data(dedent("""\
+ <text>
+ This is an example text, spanning multiple lines, and it has embedded elements
+ like <a p="value">this</a> and <b>this</b>. There is also a list:
+ <quote>
+ <text>The text of the quote, spanning multiple lines, and it has
+ embedded elements like <c p="value">this</c> and <b>this</b></text>
+ <author>The Author of this quote</author>
+ </quote>
+ Text continues here.
+ </text>
+ """))
+ if 'html' in d:
+ d = d['html']['body']
+ res = h2y.data_to_yaml(d)
+ assert res == self.sample1
+
+
+ def XXtest_01(self):
+ y2h = YAML2HTML(b)
+ d = y2h.yaml_to_data(self.sample1)
diff --git a/winwhl.bat b/winwhl.bat
new file mode 100644
index 0000000..d99eeea
--- /dev/null
+++ b/winwhl.bat
@@ -0,0 +1,8 @@
+c:\python\2.7\python.exe setup.py bdist_wheel
+c:\python\2.6\python.exe setup.py bdist_wheel
+c:\python\2.7-32\python.exe setup.py bdist_wheel
+c:\python\2.6-32\python.exe setup.py bdist_wheel
+c:\python\3.4\python.exe setup.py bdist_wheel
+c:\python\3.3\python.exe setup.py bdist_wheel
+c:\python\3.4-32\python.exe setup.py bdist_wheel
+c:\python\3.3-32\python.exe setup.py bdist_wheel