diff options
author | Anthon van der Neut <anthon@mnt.org> | 2015-06-02 17:20:15 +0200 |
---|---|---|
committer | Anthon van der Neut <anthon@mnt.org> | 2015-06-02 17:20:15 +0200 |
commit | 202c60292d864540e2eac9015ef2ae937e0e63fa (patch) | |
tree | 20f8f2781dac001dae0c8a637497b72b24750861 | |
parent | 2eaab316796e9584bf3d1b9ce50731b8477cdf64 (diff) | |
download | ruamel.yaml-202c60292d864540e2eac9015ef2ae937e0e63fa.tar.gz |
missing 0.9.4 files
-rw-r--r-- | py/convert/__init__.py | 0 | ||||
-rw-r--r-- | py/convert/html.py | 119 | ||||
-rw-r--r-- | py/main.py | 352 | ||||
-rw-r--r-- | test/test_convert.py | 61 | ||||
-rw-r--r-- | winwhl.bat | 8 |
5 files changed, 540 insertions, 0 deletions
diff --git a/py/convert/__init__.py b/py/convert/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/py/convert/__init__.py diff --git a/py/convert/html.py b/py/convert/html.py new file mode 100644 index 0000000..2fbd605 --- /dev/null +++ b/py/convert/html.py @@ -0,0 +1,119 @@ +# coding: utf-8 + +from __future__ import print_function + +""" +conversion helpers for yaml utility to/from HTML + +html/xml to yaml inspired by +http://programmers.stackexchange.com/questions/157395/yaml-translating-free-flowing-text-from-xml + +""" + +import sys + + +class HTML2YAML(object): + def __init__(self, args=None): + self.flatten = not getattr(args, 'no_flatten', False) + self.strip = getattr(args, 'strip', False) + self.no_body = getattr(args, 'no_body', False) + + def __call__(self, html): + d = self.html_to_data(html) + if self.no_body: + d = d['html']['body'] + return self.data_to_yaml(d) + + def data_to_yaml(self, d): + import ruamel.yaml + return ruamel.yaml.dump( + d, + Dumper=ruamel.yaml.RoundTripDumper + ) + + def html_to_data(self, html): + try: + import bs4 + except ImportError: + print("For HTML conversion you need to install BeautifulSoup") + print("e.g. using (pip install beautifulsoup4)") + sys.exit(1) + + soup = bs4.BeautifulSoup(html) + data = self._convert_node(soup) + return data + + def _convert_node(self, node, depth=0): + try: + import bs4 + except ImportError: + print("For HTML conversion you need to install BeautifulSoup") + print("e.g. using (pip install beautifulsoup4)") + sys.exit(1) + from ruamel.yaml.comments import CommentedMap + from ruamel.yaml.scalarstring import PreservedScalarString + ret_val = [] + if node.attrs: + ret_val.append({'.attribute': node.attrs}) + for data in node.contents: + if isinstance(data, bs4.Tag): + kv = CommentedMap() + #print data.name, data.attrs + # convert the intenals of the tag + kv[data.name] = self._convert_node(data, depth+1) + ret_val.append(kv) + elif isinstance(data, bs4.NavigableString): + s, nl = self._strip(data) + if not s: + continue + if nl: + ret_val.append(PreservedScalarString(s)) + continue + ret_val.append(s) + else: + print('unknow type', type(data)) + if self.flatten and len(ret_val) == 1: + return ret_val[0] + return ret_val + + def _strip(self, data): + import textwrap + # multiline strings might be nicely formatted so don't + # use .strip() immediately + if self.strip: + s = data.strip() + else: + s = data.rstrip() + if not s: + return None, False + first_nl_pos = s.find(u'\n') + if first_nl_pos < 0: + return s, False + if not s[:first_nl_pos].strip(): # i.e. space until first newline + if u'\n' not in s[first_nl_pos+1:]: + print(repr(data), repr(s)) + # single line of text preceded and followed by nl + return s.strip(), False + # use data here, removing the final newline would get your |- as marker + s = textwrap.dedent(data[first_nl_pos+1:]) + return s, True + + +class YAML2HTML(object): + def __init__(self, args=None): + pass + + def __call__(self, yaml): + d = self.yaml_to_data(yaml) + return self.data_to_html(d) + + def data_to_html(self, d): + if isinstance(d, dict): + pass + + def yaml_to_data(self, yaml): + import ruamel.yaml + return ruamel.yaml.load(yaml) + return data + diff --git a/py/main.py b/py/main.py new file mode 100644 index 0000000..90ee4d5 --- /dev/null +++ b/py/main.py @@ -0,0 +1,352 @@ +# coding: utf-8 + +from __future__ import absolute_import + + +from ruamel.yaml.error import * + +from ruamel.yaml.tokens import * +from ruamel.yaml.events import * +from ruamel.yaml.nodes import * + +from ruamel.yaml.loader import * +from ruamel.yaml.dumper import * +from ruamel.yaml.compat import StringIO, BytesIO, with_metaclass, PY3 + +import io + +def scan(stream, Loader=Loader): + """ + Scan a YAML stream and produce scanning tokens. + """ + loader = Loader(stream) + try: + while loader.check_token(): + yield loader.get_token() + finally: + loader.dispose() + + +def parse(stream, Loader=Loader): + """ + Parse a YAML stream and produce parsing events. + """ + loader = Loader(stream) + try: + while loader.check_event(): + yield loader.get_event() + finally: + loader.dispose() + + +def compose(stream, Loader=Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding representation tree. + """ + loader = Loader(stream) + try: + return loader.get_single_node() + finally: + loader.dispose() + + +def compose_all(stream, Loader=Loader): + """ + Parse all YAML documents in a stream + and produce corresponding representation trees. + """ + loader = Loader(stream) + try: + while loader.check_node(): + yield loader.get_node() + finally: + loader.dispose() + + +def load(stream, Loader=Loader): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + """ + loader = Loader(stream) + try: + return loader.get_single_data() + finally: + loader.dispose() + + +def load_all(stream, Loader=Loader): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + """ + loader = Loader(stream) + try: + while loader.check_data(): + yield loader.get_data() + finally: + loader.dispose() + + +def safe_load(stream): + """ + Parse the first YAML document in a stream + and produce the corresponding Python object. + Resolve only basic YAML tags. + """ + return load(stream, SafeLoader) + + +def safe_load_all(stream): + """ + Parse all YAML documents in a stream + and produce corresponding Python objects. + Resolve only basic YAML tags. + """ + return load_all(stream, SafeLoader) + + +def emit(events, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None): + """ + Emit YAML parsing events into a stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + stream = StringIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break) + try: + for event in events: + dumper.emit(event) + finally: + dumper.dispose() + if getvalue: + return getvalue() + +enc = None if PY3 else 'utf-8' + + +def serialize_all(nodes, stream=None, Dumper=Dumper, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=enc, explicit_start=None, explicit_end=None, + version=None, tags=None): + """ + Serialize a sequence of representation trees into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + if encoding is None: + stream = StringIO() + else: + stream = BytesIO() + getvalue = stream.getvalue + dumper = Dumper(stream, canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end) + try: + dumper.open() + for node in nodes: + dumper.serialize(node) + dumper.close() + finally: + dumper.dispose() + if getvalue: + return getvalue() + + +def serialize(node, stream=None, Dumper=Dumper, **kwds): + """ + Serialize a representation tree into a YAML stream. + If stream is None, return the produced string instead. + """ + return serialize_all([node], stream, Dumper=Dumper, **kwds) + + +def dump_all(documents, stream=None, Dumper=Dumper, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=enc, explicit_start=None, explicit_end=None, + version=None, tags=None): + """ + Serialize a sequence of Python objects into a YAML stream. + If stream is None, return the produced string instead. + """ + getvalue = None + if stream is None: + if encoding is None: + stream = StringIO() + else: + stream = BytesIO() + getvalue = stream.getvalue + dumper = Dumper(stream, default_style=default_style, + default_flow_style=default_flow_style, + canonical=canonical, indent=indent, width=width, + allow_unicode=allow_unicode, line_break=line_break, + encoding=encoding, version=version, tags=tags, + explicit_start=explicit_start, explicit_end=explicit_end) + try: + dumper.open() + for data in documents: + dumper.represent(data) + dumper.close() + finally: + dumper.dispose() + if getvalue: + return getvalue() + + +def dump(data, stream=None, Dumper=Dumper, + default_style=None, default_flow_style=None, + canonical=None, indent=None, width=None, + allow_unicode=None, line_break=None, + encoding=enc, explicit_start=None, explicit_end=None, + version=None, tags=None): + + """ + Serialize a Python object into a YAML stream. + If stream is None, return the produced string instead. + + default_style ∈ None, '', '"', "'", '|', '>' + + """ + return dump_all([data], stream, Dumper=Dumper, + default_style=default_style, + default_flow_style=default_flow_style, + canonical=canonical, + indent=indent, width=width, + allow_unicode=allow_unicode, + line_break=line_break, + encoding=encoding, explicit_start=explicit_start, + explicit_end=explicit_end, + version=version, tags=tags) + + +def safe_dump_all(documents, stream=None, **kwds): + """ + Serialize a sequence of Python objects into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all(documents, stream, Dumper=SafeDumper, **kwds) + + +def safe_dump(data, stream=None, **kwds): + """ + Serialize a Python object into a YAML stream. + Produce only basic YAML tags. + If stream is None, return the produced string instead. + """ + return dump_all([data], stream, Dumper=SafeDumper, **kwds) + + +def add_implicit_resolver(tag, regexp, first=None, + Loader=Loader, Dumper=Dumper): + """ + Add an implicit scalar detector. + If an implicit scalar value matches the given regexp, + the corresponding tag is assigned to the scalar. + first is a sequence of possible initial characters or None. + """ + Loader.add_implicit_resolver(tag, regexp, first) + Dumper.add_implicit_resolver(tag, regexp, first) + + +def add_path_resolver(tag, path, kind=None, Loader=Loader, Dumper=Dumper): + """ + Add a path based resolver for the given tag. + A path is a list of keys that forms a path + to a node in the representation tree. + Keys can be string values, integers, or None. + """ + Loader.add_path_resolver(tag, path, kind) + Dumper.add_path_resolver(tag, path, kind) + + +def add_constructor(tag, constructor, Loader=Loader): + """ + Add a constructor for the given tag. + Constructor is a function that accepts a Loader instance + and a node object and produces the corresponding Python object. + """ + Loader.add_constructor(tag, constructor) + + +def add_multi_constructor(tag_prefix, multi_constructor, Loader=Loader): + """ + Add a multi-constructor for the given tag prefix. + Multi-constructor is called for a node if its tag starts with tag_prefix. + Multi-constructor accepts a Loader instance, a tag suffix, + and a node object and produces the corresponding Python object. + """ + Loader.add_multi_constructor(tag_prefix, multi_constructor) + + +def add_representer(data_type, representer, Dumper=Dumper): + """ + Add a representer for the given type. + Representer is a function accepting a Dumper instance + and an instance of the given data type + and producing the corresponding representation node. + """ + Dumper.add_representer(data_type, representer) + + +def add_multi_representer(data_type, multi_representer, Dumper=Dumper): + """ + Add a representer for the given type. + Multi-representer is a function accepting a Dumper instance + and an instance of the given data type or subtype + and producing the corresponding representation node. + """ + Dumper.add_multi_representer(data_type, multi_representer) + + +class YAMLObjectMetaclass(type): + """ + The metaclass for YAMLObject. + """ + def __init__(cls, name, bases, kwds): + super(YAMLObjectMetaclass, cls).__init__(name, bases, kwds) + if 'yaml_tag' in kwds and kwds['yaml_tag'] is not None: + cls.yaml_loader.add_constructor(cls.yaml_tag, cls.from_yaml) + cls.yaml_dumper.add_representer(cls, cls.to_yaml) + + +class YAMLObject(with_metaclass(YAMLObjectMetaclass)): + """ + An object that can dump itself to a YAML stream + and load itself from a YAML stream. + """ + __slots__ = () # no direct instantiation, so allow immutable subclasses + + yaml_loader = Loader + yaml_dumper = Dumper + + yaml_tag = None + yaml_flow_style = None + + @classmethod + def from_yaml(cls, loader, node): + """ + Convert a representation node to a Python object. + """ + return loader.construct_yaml_object(node, cls) + + @classmethod + def to_yaml(cls, dumper, data): + """ + Convert a Python object to a representation node. + """ + return dumper.represent_yaml_object(cls.yaml_tag, data, cls, + flow_style=cls.yaml_flow_style) + + diff --git a/test/test_convert.py b/test/test_convert.py new file mode 100644 index 0000000..a4ad70d --- /dev/null +++ b/test/test_convert.py @@ -0,0 +1,61 @@ + +from ruamel.yaml.convert.html import HTML2YAML +from textwrap import dedent + +class Bin: + pass + +class TestH2Y: + sample1 = dedent("""\ + text: + - |- + This is an example text, spanning multiple lines, and it has embedded elements + like + - a: + - .attribute: + p: value + - this + - and + - b: this + - '. There is also a list:' + - quote: + - text: + - |- + The text of the quote, spanning multiple lines, and it has + embedded elements like + - c: + - .attribute: + p: value + - this + - and + - b: this + - author: The Author of this quote + - Text continues here. + """) + + + def test_00(self): + b = Bin() + b.strip = True + h2y = HTML2YAML(b) + d = h2y.html_to_data(dedent("""\ + <text> + This is an example text, spanning multiple lines, and it has embedded elements + like <a p="value">this</a> and <b>this</b>. There is also a list: + <quote> + <text>The text of the quote, spanning multiple lines, and it has + embedded elements like <c p="value">this</c> and <b>this</b></text> + <author>The Author of this quote</author> + </quote> + Text continues here. + </text> + """)) + if 'html' in d: + d = d['html']['body'] + res = h2y.data_to_yaml(d) + assert res == self.sample1 + + + def XXtest_01(self): + y2h = YAML2HTML(b) + d = y2h.yaml_to_data(self.sample1) diff --git a/winwhl.bat b/winwhl.bat new file mode 100644 index 0000000..d99eeea --- /dev/null +++ b/winwhl.bat @@ -0,0 +1,8 @@ +c:\python\2.7\python.exe setup.py bdist_wheel +c:\python\2.6\python.exe setup.py bdist_wheel +c:\python\2.7-32\python.exe setup.py bdist_wheel +c:\python\2.6-32\python.exe setup.py bdist_wheel +c:\python\3.4\python.exe setup.py bdist_wheel +c:\python\3.3\python.exe setup.py bdist_wheel +c:\python\3.4-32\python.exe setup.py bdist_wheel +c:\python\3.3-32\python.exe setup.py bdist_wheel |