diff options
| author | Mike Bayer <mike_mp@zzzcomputing.com> | 2022-08-18 13:56:50 -0400 |
|---|---|---|
| committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2022-09-25 19:38:10 -0400 |
| commit | 81d8394c0b5342cdc603cb2e07e12139c9506bf6 (patch) | |
| tree | 5453f51ef80bb3b0b4705025070439fdccfea29c /examples/elementtree | |
| parent | a8029f5a7e3e376ec57f1614ab0294b717d53c05 (diff) | |
| download | sqlalchemy-81d8394c0b5342cdc603cb2e07e12139c9506bf6.tar.gz | |
New ORM Query Guide featuring DML support
reviewers: these docs publish periodically at:
https://docs.sqlalchemy.org/en/gerrit/4042/orm/queryguide/index.html
See the "last generated" timestamp near the bottom of the
page to ensure the latest version is up
Change includes some other adjustments:
* small typing fixes for end-user benefit
* removal of a bunch of old examples for patterns that nobody
uses or aren't really what we promote now
* modernization of some examples, including inheritance
Change-Id: I9929daab7797be9515f71c888b28af1209e789ff
Diffstat (limited to 'examples/elementtree')
| -rw-r--r-- | examples/elementtree/__init__.py | 25 | ||||
| -rw-r--r-- | examples/elementtree/adjacency_list.py | 284 | ||||
| -rw-r--r-- | examples/elementtree/optimized_al.py | 296 | ||||
| -rw-r--r-- | examples/elementtree/pickle_type.py | 78 | ||||
| -rw-r--r-- | examples/elementtree/test.xml | 9 | ||||
| -rw-r--r-- | examples/elementtree/test2.xml | 4 | ||||
| -rw-r--r-- | examples/elementtree/test3.xml | 7 |
7 files changed, 0 insertions, 703 deletions
diff --git a/examples/elementtree/__init__.py b/examples/elementtree/__init__.py deleted file mode 100644 index b2d90a739..000000000 --- a/examples/elementtree/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -""" -Illustrates three strategies for persisting and querying XML -documents as represented by ElementTree in a relational -database. The techniques do not apply any mappings to the -ElementTree objects directly, so are compatible with the -native cElementTree as well as lxml, and can be adapted to -suit any kind of DOM representation system. Querying along -xpath-like strings is illustrated as well. - -E.g.:: - - # parse an XML file and persist in the database - doc = ElementTree.parse("test.xml") - session.add(Document(file, doc)) - session.commit() - - # locate documents with a certain path/attribute structure - for document in find_document('/somefile/header/field2[@attr=foo]'): - # dump the XML - print(document) - -.. autosource:: - :files: pickle_type.py, adjacency_list.py, optimized_al.py - -""" diff --git a/examples/elementtree/adjacency_list.py b/examples/elementtree/adjacency_list.py deleted file mode 100644 index 0502e3e9e..000000000 --- a/examples/elementtree/adjacency_list.py +++ /dev/null @@ -1,284 +0,0 @@ -""" -Illustrates an explicit way to persist an XML document expressed using -ElementTree. - -Each DOM node is stored in an individual -table row, with attributes represented in a separate table. The -nodes are associated in a hierarchy using an adjacency list -structure. A query function is introduced which can search for nodes -along any path with a given structure of attributes, basically a -(very narrow) subset of xpath. - -This example explicitly marshals/unmarshals the ElementTree document into -mapped entities which have their own tables. Compare to pickle_type.py which -uses PickleType to accomplish the same task. Note that the usage of both -styles of persistence are identical, as is the structure of the main Document -class. - -""" - -# PART I - Imports/Configuration - -import os -import re -from xml.etree import ElementTree - -from sqlalchemy import and_ -from sqlalchemy import Column -from sqlalchemy import create_engine -from sqlalchemy import ForeignKey -from sqlalchemy import Integer -from sqlalchemy import String -from sqlalchemy import Table -from sqlalchemy import Unicode -from sqlalchemy.orm import aliased -from sqlalchemy.orm import lazyload -from sqlalchemy.orm import mapper -from sqlalchemy.orm import registry -from sqlalchemy.orm import relationship -from sqlalchemy.orm import Session - - -e = create_engine("sqlite://") -mapper_registry = registry() - -# PART II - Table Metadata - -# stores a top level record of an XML document. -documents = Table( - "documents", - mapper_registry.metadata, - Column("document_id", Integer, primary_key=True), - Column("filename", String(30), unique=True), - Column("element_id", Integer, ForeignKey("elements.element_id")), -) - -# stores XML nodes in an adjacency list model. This corresponds to -# Element and SubElement objects. -elements = Table( - "elements", - mapper_registry.metadata, - Column("element_id", Integer, primary_key=True), - Column("parent_id", Integer, ForeignKey("elements.element_id")), - Column("tag", Unicode(30), nullable=False), - Column("text", Unicode), - Column("tail", Unicode), -) - -# stores attributes. This corresponds to the dictionary of attributes -# stored by an Element or SubElement. -attributes = Table( - "attributes", - mapper_registry.metadata, - Column( - "element_id", - Integer, - ForeignKey("elements.element_id"), - primary_key=True, - ), - Column("name", Unicode(100), nullable=False, primary_key=True), - Column("value", Unicode(255)), -) - -mapper_registry.metadata.create_all(e) - -# PART III - Model - -# our document class. contains a string name, -# and the ElementTree root element. - - -class Document: - def __init__(self, name, element): - self.filename = name - self.element = element - - -# PART IV - Persistence Mapping - -# Node class. a non-public class which will represent the DB-persisted -# Element/SubElement object. We cannot create mappers for ElementTree elements -# directly because they are at the very least not new-style classes, and also -# may be backed by native implementations. so here we construct an adapter. - - -class _Node: - pass - - -# Attribute class. also internal, this will represent the key/value attributes -# stored for a particular Node. - - -class _Attribute: - def __init__(self, name, value): - self.name = name - self.value = value - - -# setup mappers. Document will eagerly load a list of _Node objects. -mapper( - Document, - documents, - properties={"_root": relationship(_Node, lazy="joined", cascade="all")}, -) - -mapper( - _Node, - elements, - properties={ - "children": relationship(_Node, cascade="all"), - # eagerly load attributes - "attributes": relationship( - _Attribute, lazy="joined", cascade="all, delete-orphan" - ), - }, -) - -mapper(_Attribute, attributes) - -# define marshalling functions that convert from _Node/_Attribute to/from -# ElementTree objects. this will set the ElementTree element as -# "document._element", and append the root _Node object to the "_root" mapped -# collection. - - -class ElementTreeMarshal: - def __get__(self, document, owner): - if document is None: - return self - - if hasattr(document, "_element"): - return document._element - - def traverse(node, parent=None): - if parent is not None: - elem = ElementTree.SubElement(parent, node.tag) - else: - elem = ElementTree.Element(node.tag) - elem.text = node.text - elem.tail = node.tail - for attr in node.attributes: - elem.attrib[attr.name] = attr.value - for child in node.children: - traverse(child, parent=elem) - return elem - - document._element = ElementTree.ElementTree(traverse(document._root)) - return document._element - - def __set__(self, document, element): - def traverse(node): - n = _Node() - n.tag = str(node.tag) - n.text = str(node.text) - n.tail = str(node.tail) if node.tail else None - n.children = [traverse(n2) for n2 in node] - n.attributes = [ - _Attribute(str(k), str(v)) for k, v in node.attrib.items() - ] - return n - - document._root = traverse(element.getroot()) - document._element = element - - def __delete__(self, document): - del document._element - document._root = [] - - -# override Document's "element" attribute with the marshaller. -Document.element = ElementTreeMarshal() - -# PART V - Basic Persistence Example - -line = "\n--------------------------------------------------------" - -# save to DB -session = Session(e) - -# get ElementTree documents -for file in ("test.xml", "test2.xml", "test3.xml"): - filename = os.path.join(os.path.dirname(__file__), file) - doc = ElementTree.parse(filename) - session.add(Document(file, doc)) - -print("\nSaving three documents...", line) -session.commit() -print("Done.") - -print("\nFull text of document 'text.xml':", line) -document = session.query(Document).filter_by(filename="test.xml").first() - -ElementTree.dump(document.element) - -# PART VI - Searching for Paths - -# manually search for a document which contains "/somefile/header/field1:hi" -root = aliased(_Node) -child_node = aliased(_Node) -grandchild_node = aliased(_Node) - -d = ( - session.query(Document) - .join(Document._root.of_type(root)) - .filter(root.tag == "somefile") - .join(root.children.of_type(child_node)) - .filter(child_node.tag == "header") - .join(child_node.children.of_type(grandchild_node)) - .filter( - and_(grandchild_node.tag == "field1", grandchild_node.text == "hi") - ) - .one() -) -ElementTree.dump(d.element) - -# generalize the above approach into an extremely impoverished xpath function: - - -def find_document(path, compareto): - query = session.query(Document) - attribute = Document._root - for i, match in enumerate( - re.finditer(r"/([\w_]+)(?:\[@([\w_]+)(?:=(.*))?\])?", path) - ): - (token, attrname, attrvalue) = match.group(1, 2, 3) - target_node = aliased(_Node) - - query = query.join(attribute.of_type(target_node)).filter( - target_node.tag == token - ) - - attribute = target_node.children - - if attrname: - attribute_entity = aliased(_Attribute) - - if attrvalue: - query = query.join( - target_node.attributes.of_type(attribute_entity) - ).filter( - and_( - attribute_entity.name == attrname, - attribute_entity.value == attrvalue, - ) - ) - else: - query = query.join( - target_node.attributes.of_type(attribute_entity) - ).filter(attribute_entity.name == attrname) - return ( - query.options(lazyload(Document._root)) - .filter(target_node.text == compareto) - .all() - ) - - -for path, compareto in ( - ("/somefile/header/field1", "hi"), - ("/somefile/field1", "hi"), - ("/somefile/header/field2", "there"), - ("/somefile/header/field2[@attr=foo]", "there"), -): - print("\nDocuments containing '%s=%s':" % (path, compareto), line) - print([d.filename for d in find_document(path, compareto)]) diff --git a/examples/elementtree/optimized_al.py b/examples/elementtree/optimized_al.py deleted file mode 100644 index 8aea977f1..000000000 --- a/examples/elementtree/optimized_al.py +++ /dev/null @@ -1,296 +0,0 @@ -"""Uses the same strategy as - ``adjacency_list.py``, but associates each DOM row with its owning - document row, so that a full document of DOM nodes can be loaded - using O(1) queries - the construction of the "hierarchy" is performed - after the load in a non-recursive fashion and is more - efficient. - -""" - -# PART I - Imports/Configuration -import os -import re -from xml.etree import ElementTree - -from sqlalchemy import and_ -from sqlalchemy import Column -from sqlalchemy import create_engine -from sqlalchemy import ForeignKey -from sqlalchemy import Integer -from sqlalchemy import String -from sqlalchemy import Table -from sqlalchemy import Unicode -from sqlalchemy.orm import aliased -from sqlalchemy.orm import lazyload -from sqlalchemy.orm import mapper -from sqlalchemy.orm import registry -from sqlalchemy.orm import relationship -from sqlalchemy.orm import Session - - -e = create_engine("sqlite://") -mapper_registry = registry() - -# PART II - Table Metadata - -# stores a top level record of an XML document. -documents = Table( - "documents", - mapper_registry.metadata, - Column("document_id", Integer, primary_key=True), - Column("filename", String(30), unique=True), -) - -# stores XML nodes in an adjacency list model. This corresponds to -# Element and SubElement objects. -elements = Table( - "elements", - mapper_registry.metadata, - Column("element_id", Integer, primary_key=True), - Column("parent_id", Integer, ForeignKey("elements.element_id")), - Column("document_id", Integer, ForeignKey("documents.document_id")), - Column("tag", Unicode(30), nullable=False), - Column("text", Unicode), - Column("tail", Unicode), -) - -# stores attributes. This corresponds to the dictionary of attributes -# stored by an Element or SubElement. -attributes = Table( - "attributes", - mapper_registry.metadata, - Column( - "element_id", - Integer, - ForeignKey("elements.element_id"), - primary_key=True, - ), - Column("name", Unicode(100), nullable=False, primary_key=True), - Column("value", Unicode(255)), -) - -mapper_registry.metadata.create_all(e) - -# PART III - Model - -# our document class. contains a string name, -# and the ElementTree root element. - - -class Document: - def __init__(self, name, element): - self.filename = name - self.element = element - - -# PART IV - Persistence Mapping - -# Node class. a non-public class which will represent the DB-persisted -# Element/SubElement object. We cannot create mappers for ElementTree elements -# directly because they are at the very least not new-style classes, and also -# may be backed by native implementations. so here we construct an adapter. - - -class _Node: - pass - - -# Attribute class. also internal, this will represent the key/value attributes -# stored for a particular Node. - - -class _Attribute: - def __init__(self, name, value): - self.name = name - self.value = value - - -# setup mappers. Document will eagerly load a list of _Node objects. -# they will be ordered in primary key/insert order, so that we can reconstruct -# an ElementTree structure from the list. -mapper( - Document, - documents, - properties={ - "_nodes": relationship( - _Node, lazy="joined", cascade="all, delete-orphan" - ) - }, -) - -# the _Node objects change the way they load so that a list of _Nodes will -# organize themselves hierarchically using the ElementTreeMarshal. this -# depends on the ordering of nodes being hierarchical as well; relationship() -# always applies at least ROWID/primary key ordering to rows which will -# suffice. -mapper( - _Node, - elements, - properties={ - "children": relationship( - _Node, lazy=None - ), # doesn't load; used only for the save relationship - "attributes": relationship( - _Attribute, lazy="joined", cascade="all, delete-orphan" - ), # eagerly load attributes - }, -) - -mapper(_Attribute, attributes) - -# define marshalling functions that convert from _Node/_Attribute to/from -# ElementTree objects. this will set the ElementTree element as -# "document._element", and append the root _Node object to the "_nodes" mapped -# collection. - - -class ElementTreeMarshal: - def __get__(self, document, owner): - if document is None: - return self - - if hasattr(document, "_element"): - return document._element - - nodes = {} - root = None - for node in document._nodes: - if node.parent_id is not None: - parent = nodes[node.parent_id] - elem = ElementTree.SubElement(parent, node.tag) - nodes[node.element_id] = elem - else: - parent = None - elem = root = ElementTree.Element(node.tag) - nodes[node.element_id] = root - for attr in node.attributes: - elem.attrib[attr.name] = attr.value - elem.text = node.text - elem.tail = node.tail - - document._element = ElementTree.ElementTree(root) - return document._element - - def __set__(self, document, element): - def traverse(node): - n = _Node() - n.tag = str(node.tag) - n.text = str(node.text) - n.tail = str(node.tail) - document._nodes.append(n) - n.children = [traverse(n2) for n2 in node] - n.attributes = [ - _Attribute(str(k), str(v)) for k, v in node.attrib.items() - ] - return n - - traverse(element.getroot()) - document._element = element - - def __delete__(self, document): - del document._element - document._nodes = [] - - -# override Document's "element" attribute with the marshaller. -Document.element = ElementTreeMarshal() - -# PART V - Basic Persistence Example - -line = "\n--------------------------------------------------------" - -# save to DB -session = Session(e) - -# get ElementTree documents -for file in ("test.xml", "test2.xml", "test3.xml"): - filename = os.path.join(os.path.dirname(__file__), file) - doc = ElementTree.parse(filename) - session.add(Document(file, doc)) - -print("\nSaving three documents...", line) -session.commit() -print("Done.") - -print("\nFull text of document 'text.xml':", line) -document = session.query(Document).filter_by(filename="test.xml").first() - -ElementTree.dump(document.element) - -# PART VI - Searching for Paths - -# manually search for a document which contains "/somefile/header/field1:hi" -print("\nManual search for /somefile/header/field1=='hi':", line) - -root = aliased(_Node) -child_node = aliased(_Node) -grandchild_node = aliased(_Node) - -d = ( - session.query(Document) - .join(Document._nodes.of_type(root)) - .filter(and_(root.parent_id.is_(None), root.tag == "somefile")) - .join(root.children.of_type(child_node)) - .filter(child_node.tag == "header") - .join(child_node.children.of_type(grandchild_node)) - .filter( - and_(grandchild_node.tag == "field1", grandchild_node.text == "hi") - ) - .one() -) -ElementTree.dump(d.element) - -# generalize the above approach into an extremely impoverished xpath function: - - -def find_document(path, compareto): - query = session.query(Document) - - for i, match in enumerate( - re.finditer(r"/([\w_]+)(?:\[@([\w_]+)(?:=(.*))?\])?", path) - ): - (token, attrname, attrvalue) = match.group(1, 2, 3) - - if not i: - parent = Document - target_node = aliased(_Node) - - query = query.join(parent._nodes.of_type(target_node)).filter( - target_node.parent_id.is_(None) - ) - else: - parent = target_node - target_node = aliased(_Node) - - query = query.join(parent.children.of_type(target_node)) - - query = query.filter(target_node.tag == token) - if attrname: - attribute_entity = aliased(_Attribute) - query = query.join( - target_node.attributes.of_type(attribute_entity) - ) - if attrvalue: - query = query.filter( - and_( - attribute_entity.name == attrname, - attribute_entity.value == attrvalue, - ) - ) - else: - query = query.filter(attribute_entity.name == attrname) - return ( - query.options(lazyload(Document._nodes)) - .filter(target_node.text == compareto) - .all() - ) - - -for path, compareto in ( - ("/somefile/header/field1", "hi"), - ("/somefile/field1", "hi"), - ("/somefile/header/field2", "there"), - ("/somefile/header/field2[@attr=foo]", "there"), -): - print("\nDocuments containing '%s=%s':" % (path, compareto), line) - print([d.filename for d in find_document(path, compareto)]) diff --git a/examples/elementtree/pickle_type.py b/examples/elementtree/pickle_type.py deleted file mode 100644 index e96a128b3..000000000 --- a/examples/elementtree/pickle_type.py +++ /dev/null @@ -1,78 +0,0 @@ -""" -illustrates a quick and dirty way to persist an XML document expressed using -ElementTree and pickle. - -This is a trivial example using PickleType to marshal/unmarshal the ElementTree -document into a binary column. Compare to explicit.py which stores the -individual components of the ElementTree structure in distinct rows using two -additional mapped entities. Note that the usage of both styles of persistence -are identical, as is the structure of the main Document class. - -""" - -import os -from xml.etree import ElementTree - -from sqlalchemy import Column -from sqlalchemy import create_engine -from sqlalchemy import Integer -from sqlalchemy import PickleType -from sqlalchemy import String -from sqlalchemy import Table -from sqlalchemy.orm import registry -from sqlalchemy.orm import Session - - -e = create_engine("sqlite://") -mapper_registry = registry() - - -# setup a comparator for the PickleType since it's a mutable -# element. - - -def are_elements_equal(x, y): - return x == y - - -# stores a top level record of an XML document. -# the "element" column will store the ElementTree document as a BLOB. -documents = Table( - "documents", - mapper_registry.metadata, - Column("document_id", Integer, primary_key=True), - Column("filename", String(30), unique=True), - Column("element", PickleType(comparator=are_elements_equal)), -) - -mapper_registry.metadata.create_all(e) - -# our document class. contains a string name, -# and the ElementTree root element. - - -class Document: - def __init__(self, name, element): - self.filename = name - self.element = element - - -# setup mapper. -mapper_registry.map_imperatively(Document, documents) - -# time to test ! - -# get ElementTree document -filename = os.path.join(os.path.dirname(__file__), "test.xml") -doc = ElementTree.parse(filename) - -# save to DB -session = Session(e) -session.add(Document("test.xml", doc)) -session.commit() - -# restore -document = session.query(Document).filter_by(filename="test.xml").first() - -# print -ElementTree.dump(document.element) diff --git a/examples/elementtree/test.xml b/examples/elementtree/test.xml deleted file mode 100644 index edb44ccc2..000000000 --- a/examples/elementtree/test.xml +++ /dev/null @@ -1,9 +0,0 @@ -<somefile> - This is somefile. - <header name="foo" value="bar" hoho="lala"> - <field1>hi</field1> - <field2>there</field2> - Some additional text within the header. - </header> - Some more text within somefile. -</somefile>
\ No newline at end of file diff --git a/examples/elementtree/test2.xml b/examples/elementtree/test2.xml deleted file mode 100644 index 69d3167a8..000000000 --- a/examples/elementtree/test2.xml +++ /dev/null @@ -1,4 +0,0 @@ -<somefile> - <field1>hi</field1> - <field2>there</field2> -</somefile>
\ No newline at end of file diff --git a/examples/elementtree/test3.xml b/examples/elementtree/test3.xml deleted file mode 100644 index 6a7a2343e..000000000 --- a/examples/elementtree/test3.xml +++ /dev/null @@ -1,7 +0,0 @@ -<somefile> - test3 - <header name="aheader" value="bar" hoho="lala"> - <field1>one</field1> - <field2 attr='foo'>there</field2> - </header> -</somefile>
\ No newline at end of file |
