From ed4fc64bb0ac61c27bc4af32962fb129e74a36bf Mon Sep 17 00:00:00 2001 From: Mike Bayer Date: Fri, 27 Jul 2007 04:08:53 +0000 Subject: merging 0.4 branch to trunk. see CHANGES for details. 0.3 moves to maintenance branch in branches/rel_0_3. --- examples/adjacencytree/basic_tree.py | 16 +-- examples/adjacencytree/byroot_tree.py | 61 ++++---- examples/backref/backref_tree.py | 41 ------ examples/collections/large_collection.py | 3 +- examples/elementtree/adjacency_list.py | 215 ++++++++++++++++++++++++++++ examples/elementtree/optimized_al.py | 224 ++++++++++++++++++++++++++++++ examples/elementtree/pickle.py | 65 +++++++++ examples/elementtree/test.xml | 9 ++ examples/elementtree/test2.xml | 4 + examples/elementtree/test3.xml | 7 + examples/pickle/custom_pickler.py | 5 +- examples/poly_assoc/poly_assoc.py | 3 +- examples/poly_assoc/poly_assoc_fk.py | 3 +- examples/poly_assoc/poly_assoc_generic.py | 3 +- examples/polymorph/concrete.py | 3 +- examples/polymorph/polymorph.py | 7 +- examples/polymorph/single.py | 4 +- examples/sharding/attribute_shard.py | 194 ++++++++++++++++++++++++++ examples/vertical/vertical.py | 15 +- 19 files changed, 777 insertions(+), 105 deletions(-) delete mode 100644 examples/backref/backref_tree.py create mode 100644 examples/elementtree/adjacency_list.py create mode 100644 examples/elementtree/optimized_al.py create mode 100644 examples/elementtree/pickle.py create mode 100644 examples/elementtree/test.xml create mode 100644 examples/elementtree/test2.xml create mode 100644 examples/elementtree/test3.xml create mode 100644 examples/sharding/attribute_shard.py (limited to 'examples') diff --git a/examples/adjacencytree/basic_tree.py b/examples/adjacencytree/basic_tree.py index 9676fae89..53bdc8298 100644 --- a/examples/adjacencytree/basic_tree.py +++ b/examples/adjacencytree/basic_tree.py @@ -1,9 +1,12 @@ """a basic Adjacency List model tree.""" from sqlalchemy import * +from sqlalchemy.orm import * from sqlalchemy.util import OrderedDict +from sqlalchemy.orm.collections import attribute_mapped_collection -metadata = MetaData('sqlite:///', echo=True) +metadata = MetaData('sqlite:///') +metadata.bind.echo = True trees = Table('treenodes', metadata, Column('node_id', Integer, Sequence('treenode_id_seq',optional=False), primary_key=True), @@ -11,17 +14,10 @@ trees = Table('treenodes', metadata, Column('node_name', String(50), nullable=False), ) -class NodeList(OrderedDict): - """subclasses OrderedDict to allow usage as a list-based property.""" - def append(self, node): - self[node.name] = node - def __iter__(self): - return iter(self.values()) class TreeNode(object): """a rich Tree class which includes path-based operations""" def __init__(self, name): - self.children = NodeList() self.name = name self.parent = None self.id = None @@ -30,7 +26,7 @@ class TreeNode(object): if isinstance(node, str): node = TreeNode(node) node.parent = self - self.children.append(node) + self.children[node.name] = node def __repr__(self): return self._getstring(0, False) def __str__(self): @@ -47,7 +43,7 @@ mapper(TreeNode, trees, properties=dict( id=trees.c.node_id, name=trees.c.node_name, parent_id=trees.c.parent_node_id, - children=relation(TreeNode, cascade="all", backref=backref("parent", remote_side=[trees.c.node_id]), collection_class=NodeList), + children=relation(TreeNode, cascade="all", backref=backref("parent", remote_side=[trees.c.node_id]), collection_class=attribute_mapped_collection('name')), )) print "\n\n\n----------------------------" diff --git a/examples/adjacencytree/byroot_tree.py b/examples/adjacencytree/byroot_tree.py index 5ec055392..a61bde875 100644 --- a/examples/adjacencytree/byroot_tree.py +++ b/examples/adjacencytree/byroot_tree.py @@ -3,7 +3,8 @@ introduces a new selection method which selects an entire tree of nodes at once, advantage of a custom MapperExtension to assemble incoming nodes into their correct structure.""" from sqlalchemy import * -from sqlalchemy.util import OrderedDict +from sqlalchemy.orm import * +from sqlalchemy.orm.collections import attribute_mapped_collection engine = create_engine('sqlite:///:memory:', echo=True) @@ -28,82 +29,69 @@ treedata = Table( ) -class NodeList(OrderedDict): - """subclasses OrderedDict to allow usage as a list-based property.""" - def append(self, node): - self[node.name] = node - def __iter__(self): - return iter(self.values()) - - class TreeNode(object): """a hierarchical Tree class, which adds the concept of a "root node". The root is the topmost node in a tree, or in other words a node whose parent ID is NULL. All child nodes that are decendents of a particular root, as well as a root node itself, - reference this root node. - this is useful as a way to identify all nodes in a tree as belonging to a single - identifiable root. Any node can return its root node and therefore the "tree" that it - belongs to, and entire trees can be selected from the database in one query, by - identifying their common root ID.""" + reference this root node. """ def __init__(self, name): - """for data integrity, a TreeNode requires its name to be passed as a parameter - to its constructor, so there is no chance of a TreeNode that doesnt have a name.""" self.name = name - self.children = NodeList() self.root = self - self.parent = None - self.id = None - self.data =None - self.parent_id = None - self.root_id=None + def _set_root(self, root): self.root = root - for c in self.children: + for c in self.children.values(): c._set_root(root) + def append(self, node): if isinstance(node, str): node = TreeNode(node) - node.parent = self node._set_root(self.root) self.children.append(node) + def __repr__(self): return self._getstring(0, False) + def __str__(self): return self._getstring(0, False) + def _getstring(self, level, expand = False): s = (' ' * level) + "%s (%s,%s,%s, %d): %s" % (self.name, self.id,self.parent_id,self.root_id, id(self), repr(self.data)) + '\n' if expand: s += ''.join([n._getstring(level+1, True) for n in self.children.values()]) return s + def print_nodes(self): return self._getstring(0, True) class TreeLoader(MapperExtension): - """an extension that will plug-in additional functionality to the Mapper.""" + def after_insert(self, mapper, connection, instance): """runs after the insert of a new TreeNode row. The primary key of the row is not determined until the insert is complete, since most DB's use autoincrementing columns. If this node is the root node, we will take the new primary key and update it as the value of the node's "root ID" as well, since its root node is itself.""" + if instance.root is instance: connection.execute(mapper.mapped_table.update(TreeNode.c.id==instance.id, values=dict(root_node_id=instance.id))) instance.root_id = instance.id - def append_result(self, mapper, selectcontext, row, instance, identitykey, result, isnew): + def append_result(self, mapper, selectcontext, row, instance, result, **flags): """runs as results from a SELECT statement are processed, and newly created or already-existing instances that correspond to each row are appended to result lists. This method will only append root nodes to the result list, and will attach child nodes to their appropriate parent node as they arrive from the select results. This allows a SELECT statement which returns both root and child nodes in one query to return a list of "roots".""" + + isnew = flags.get('isnew', False) + if instance.parent_id is None: result.append(instance) else: - if isnew or context.populate_existing: + if isnew or selectcontext.populate_existing: parentnode = selectcontext.identity_map[mapper.identity_key(instance.parent_id)] - parentnode.children.append_without_event(instance) - # fire off lazy loader before the instance is part of the session - instance.children + parentnode.children.append(instance) return False class TreeData(object): @@ -127,12 +115,19 @@ mapper(TreeNode, trees, properties=dict( name=trees.c.node_name, parent_id=trees.c.parent_node_id, root_id=trees.c.root_node_id, - root=relation(TreeNode, primaryjoin=trees.c.root_node_id==trees.c.node_id, remote_side=trees.c.node_id, lazy=None, uselist=False), - children=relation(TreeNode, primaryjoin=trees.c.parent_node_id==trees.c.node_id, lazy=None, uselist=True, cascade="delete,save-update", collection_class=NodeList), - data=relation(mapper(TreeData, treedata, properties=dict(id=treedata.c.data_id)), cascade="delete,delete-orphan,save-update", lazy=False) + root=relation(TreeNode, primaryjoin=trees.c.root_node_id==trees.c.node_id, remote_side=trees.c.node_id, lazy=None), + children=relation(TreeNode, + primaryjoin=trees.c.parent_node_id==trees.c.node_id, + lazy=None, + cascade="all", + collection_class=attribute_mapped_collection('name'), + backref=backref('parent', primaryjoin=trees.c.parent_node_id==trees.c.node_id, remote_side=trees.c.node_id) + ), + data=relation(TreeData, cascade="all, delete-orphan", lazy=False) ), extension = TreeLoader()) +mapper(TreeData, treedata, properties={'id':treedata.c.data_id}) session = create_session() diff --git a/examples/backref/backref_tree.py b/examples/backref/backref_tree.py deleted file mode 100644 index 7386d034c..000000000 --- a/examples/backref/backref_tree.py +++ /dev/null @@ -1,41 +0,0 @@ -from sqlalchemy import * - -metadata = MetaData('sqlite:///', echo=True) - -class Tree(object): - def __init__(self, name='', father=None): - self.name = name - self.father = father - def __str__(self): - return '' % self.name - def __repr__(self): - return self.__str__() - -table = Table('tree', metadata, - Column('id', Integer, primary_key=True), - Column('name', String(64), nullable=False), - Column('father_id', Integer, ForeignKey('tree.id'), nullable=True)) -table.create() - -mapper(Tree, table, - properties={ - 'childs':relation(Tree, remote_side=table.c.father_id, primaryjoin=table.c.father_id==table.c.id, backref=backref('father', remote_side=table.c.id))}, - ) - -root = Tree('root') -child1 = Tree('child1', root) -child2 = Tree('child2', root) -child3 = Tree('child3', child1) - -child4 = Tree('child4') -child1.childs.append(child4) - -session = create_session() -session.save(root) -session.flush() - -print root.childs -print child1.childs -print child2.childs -print child2.father -print child3.father diff --git a/examples/collections/large_collection.py b/examples/collections/large_collection.py index d592441ab..3c53db121 100644 --- a/examples/collections/large_collection.py +++ b/examples/collections/large_collection.py @@ -1,7 +1,8 @@ """illlustrates techniques for dealing with very large collections""" from sqlalchemy import * -meta = MetaData('sqlite://', echo=True) +meta = MetaData('sqlite://') +meta.bind.echo = True org_table = Table('organizations', meta, Column('org_id', Integer, primary_key=True), diff --git a/examples/elementtree/adjacency_list.py b/examples/elementtree/adjacency_list.py new file mode 100644 index 000000000..204662f56 --- /dev/null +++ b/examples/elementtree/adjacency_list.py @@ -0,0 +1,215 @@ +"""illustrates an explicit way to persist an XML document expressed using ElementTree. + +This example explicitly marshals/unmarshals the ElementTree document into +mapped entities which have their own tables. Compare to pickle.py which +uses pickle to accomplish the same task. Note that the usage of both +styles of persistence are identical, as is the structure of the main Document class. +""" + +################################# PART I - Imports/Coniguration ########################################### +from sqlalchemy import * +from sqlalchemy.orm import * + +import sys, os, StringIO, re + +import logging +logging.basicConfig() + +# uncomment to show SQL statements +#logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) + +# uncomment to show SQL statements and result sets +#logging.getLogger('sqlalchemy.engine').setLevel(logging.DEBUG) + + +from elementtree import ElementTree +from elementtree.ElementTree import Element, SubElement + +meta = MetaData() +meta.engine = 'sqlite://' + +################################# PART II - Table Metadata ########################################### + +# stores a top level record of an XML document. +documents = Table('documents', meta, + Column('document_id', Integer, primary_key=True), + Column('filename', String(30), unique=True), + Column('element_id', Integer, ForeignKey('elements.element_id')) +) + +# stores XML nodes in an adjacency list model. This corresponds to +# Element and SubElement objects. +elements = Table('elements', meta, + Column('element_id', Integer, primary_key=True), + Column('parent_id', Integer, ForeignKey('elements.element_id')), + Column('tag', Unicode(30), nullable=False), + Column('text', Unicode), + Column('tail', Unicode) + ) + +# stores attributes. This corresponds to the dictionary of attributes +# stored by an Element or SubElement. +attributes = Table('attributes', meta, + Column('element_id', Integer, ForeignKey('elements.element_id'), primary_key=True), + Column('name', Unicode(100), nullable=False, primary_key=True), + Column('value', Unicode(255))) + +meta.create_all() + +#################################### PART III - Model ############################################# + +# our document class. contains a string name, +# and the ElementTree root element. +class Document(object): + def __init__(self, name, element): + self.filename = name + self.element = element + + def __str__(self): + buf = StringIO.StringIO() + self.element.write(buf) + return buf.getvalue() + +#################################### PART IV - Persistence Mapping ################################### + +# Node class. a non-public class which will represent +# the DB-persisted Element/SubElement object. We cannot create mappers for +# ElementTree elements directly because they are at the very least not new-style +# classes, and also may be backed by native implementations. +# so here we construct an adapter. +class _Node(object): + pass + +# Attribute class. also internal, this will represent the key/value attributes stored for +# a particular Node. +class _Attribute(object): + def __init__(self, name, value): + self.name = name + self.value = value + +# setup mappers. Document will eagerly load a list of _Node objects. +mapper(Document, documents, properties={ + '_root':relation(_Node, lazy=False, cascade="all") +}) + +mapper(_Node, elements, properties={ + 'children':relation(_Node, cascade="all"), + 'attributes':relation(_Attribute, lazy=False, cascade="all, delete-orphan"), # eagerly load attributes +}) + +mapper(_Attribute, attributes) + +# define marshalling functions that convert from _Node/_Attribute to/from ElementTree objects. +# this will set the ElementTree element as "document._element", and append the root _Node +# object to the "_root" mapped collection. +class ElementTreeMarshal(object): + def __get__(self, document, owner): + if document is None: + return self + + if hasattr(document, '_element'): + return document._element + + def traverse(node, parent=None): + if parent is not None: + elem = ElementTree.SubElement(parent, node.tag) + else: + elem = ElementTree.Element(node.tag) + elem.text = node.text + elem.tail = node.tail + for attr in node.attributes: + elem.attrib[attr.name] = attr.value + for child in node.children: + traverse(child, parent=elem) + return elem + + document._element = ElementTree.ElementTree(traverse(document._root)) + return document._element + + def __set__(self, document, element): + def traverse(node): + n = _Node() + n.tag = node.tag + n.text = node.text + n.tail = node.tail + n.children = [traverse(n2) for n2 in node] + n.attributes = [_Attribute(k, v) for k, v in node.attrib.iteritems()] + return n + + document._root = traverse(element.getroot()) + document._element = element + + def __delete__(self, document): + del document._element + document._root = [] + +# override Document's "element" attribute with the marshaller. +Document.element = ElementTreeMarshal() + +########################################### PART V - Basic Persistence Example ############################ + +line = "\n--------------------------------------------------------" + +# save to DB +session = create_session() + +# get ElementTree documents +for file in ('test.xml', 'test2.xml', 'test3.xml'): + filename = os.path.join(os.path.dirname(sys.argv[0]), file) + doc = ElementTree.parse(filename) + session.save(Document(file, doc)) + +print "\nSaving three documents...", line +session.flush() +print "Done." + +# clear session (to illustrate a full load), restore +session.clear() + +print "\nFull text of document 'text.xml':", line +document = session.query(Document).filter_by(filename="test.xml").first() + +print document + +############################################ PART VI - Searching for Paths ####################################### + +# manually search for a document which contains "/somefile/header/field1:hi" +print "\nManual search for /somefile/header/field1=='hi':", line +n1 = elements.alias('n1') +n2 = elements.alias('n2') +n3 = elements.alias('n3') +j = documents.join(n1).join(n2, n1.c.element_id==n2.c.parent_id).join(n3, n2.c.element_id==n3.c.parent_id) +d = session.query(Document).select_from(j).filter(n1.c.tag=='somefile').filter(n2.c.tag=='header').filter(and_(n3.c.tag=='field1', n3.c.text=='hi')).one() +print d + +# generalize the above approach into an extremely impoverished xpath function: +def find_document(path, compareto): + j = documents + prev_elements = None + query = session.query(Document) + for i, match in enumerate(re.finditer(r'/([\w_]+)(?:\[@([\w_]+)(?:=(.*))?\])?', path)): + (token, attrname, attrvalue) = match.group(1, 2, 3) + a = elements.alias("n%d" % i) + query = query.filter(a.c.tag==token) + if attrname: + attr_alias = attributes.alias('a%d' % i) + if attrvalue: + query = query.filter(and_(a.c.element_id==attr_alias.c.element_id, attr_alias.c.name==attrname, attr_alias.c.value==attrvalue)) + else: + query = query.filter(and_(a.c.element_id==attr_alias.c.element_id, attr_alias.c.name==attrname)) + if prev_elements is not None: + j = j.join(a, prev_elements.c.element_id==a.c.parent_id) + else: + j = j.join(a) + prev_elements = a + return query.options(lazyload('_root')).select_from(j).filter(prev_elements.c.text==compareto).all() + +for path, compareto in ( + ('/somefile/header/field1', 'hi'), + ('/somefile/field1', 'hi'), + ('/somefile/header/field2', 'there'), + ('/somefile/header/field2[@attr=foo]', 'there') + ): + print "\nDocuments containing '%s=%s':" % (path, compareto), line + print [d.filename for d in find_document(path, compareto)] + diff --git a/examples/elementtree/optimized_al.py b/examples/elementtree/optimized_al.py new file mode 100644 index 000000000..17b6489de --- /dev/null +++ b/examples/elementtree/optimized_al.py @@ -0,0 +1,224 @@ +"""This script duplicates adjacency_list.py, but optimizes the loading +of XML nodes to be based on a "flattened" datamodel. Any number of XML documents, +each of arbitrary complexity, can be loaded in their entirety via a single query +which joins on only three tables. + +""" + +################################# PART I - Imports/Coniguration ########################################### +from sqlalchemy import * +from sqlalchemy.orm import * + +import sys, os, StringIO, re + +import logging +logging.basicConfig() + +# uncomment to show SQL statements +#logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) + +# uncomment to show SQL statements and result sets +logging.getLogger('sqlalchemy.engine').setLevel(logging.DEBUG) + + +from elementtree import ElementTree +from elementtree.ElementTree import Element, SubElement + +meta = MetaData() +meta.engine = 'sqlite://' + +################################# PART II - Table Metadata ########################################### + +# stores a top level record of an XML document. +documents = Table('documents', meta, + Column('document_id', Integer, primary_key=True), + Column('filename', String(30), unique=True), +) + +# stores XML nodes in an adjacency list model. This corresponds to +# Element and SubElement objects. +elements = Table('elements', meta, + Column('element_id', Integer, primary_key=True), + Column('parent_id', Integer, ForeignKey('elements.element_id')), + Column('document_id', Integer, ForeignKey('documents.document_id')), + Column('tag', Unicode(30), nullable=False), + Column('text', Unicode), + Column('tail', Unicode) + ) + +# stores attributes. This corresponds to the dictionary of attributes +# stored by an Element or SubElement. +attributes = Table('attributes', meta, + Column('element_id', Integer, ForeignKey('elements.element_id'), primary_key=True), + Column('name', Unicode(100), nullable=False, primary_key=True), + Column('value', Unicode(255))) + +meta.create_all() + +#################################### PART III - Model ############################################# + +# our document class. contains a string name, +# and the ElementTree root element. +class Document(object): + def __init__(self, name, element): + self.filename = name + self.element = element + + def __str__(self): + buf = StringIO.StringIO() + self.element.write(buf) + return buf.getvalue() + +#################################### PART IV - Persistence Mapping ################################### + +# Node class. a non-public class which will represent +# the DB-persisted Element/SubElement object. We cannot create mappers for +# ElementTree elements directly because they are at the very least not new-style +# classes, and also may be backed by native implementations. +# so here we construct an adapter. +class _Node(object): + pass + +# Attribute class. also internal, this will represent the key/value attributes stored for +# a particular Node. +class _Attribute(object): + def __init__(self, name, value): + self.name = name + self.value = value + +# setup mappers. Document will eagerly load a list of _Node objects. +# they will be ordered in primary key/insert order, so that we can reconstruct +# an ElementTree structure from the list. +mapper(Document, documents, properties={ + '_nodes':relation(_Node, lazy=False, cascade="all, delete-orphan") +}) + +# the _Node objects change the way they load so that a list of _Nodes will organize +# themselves hierarchically using the HierarchicalLoader. this depends on the ordering of +# nodes being hierarchical as well; relation() always applies at least ROWID/primary key +# ordering to rows which will suffice. +mapper(_Node, elements, properties={ + 'children':relation(_Node, lazy=None), # doesnt load; used only for the save relationship + 'attributes':relation(_Attribute, lazy=False, cascade="all, delete-orphan"), # eagerly load attributes +}) + +mapper(_Attribute, attributes) + +# define marshalling functions that convert from _Node/_Attribute to/from ElementTree objects. +# this will set the ElementTree element as "document._element", and append the root _Node +# object to the "_nodes" mapped collection. +class ElementTreeMarshal(object): + def __get__(self, document, owner): + if document is None: + return self + + if hasattr(document, '_element'): + return document._element + + nodes = {} + root = None + for node in document._nodes: + if node.parent_id is not None: + parent = nodes[node.parent_id] + elem = ElementTree.SubElement(parent, node.tag) + nodes[node.element_id] = elem + else: + parent = None + elem = root = ElementTree.Element(node.tag) + nodes[node.element_id] = root + for attr in node.attributes: + elem.attrib[attr.name] = attr.value + elem.text = node.text + elem.tail = node.tail + + document._element = ElementTree.ElementTree(root) + return document._element + + def __set__(self, document, element): + def traverse(node): + n = _Node() + n.tag = node.tag + n.text = node.text + n.tail = node.tail + document._nodes.append(n) + n.children = [traverse(n2) for n2 in node] + n.attributes = [_Attribute(k, v) for k, v in node.attrib.iteritems()] + return n + + traverse(element.getroot()) + document._element = element + + def __delete__(self, document): + del document._element + document._nodes = [] + +# override Document's "element" attribute with the marshaller. +Document.element = ElementTreeMarshal() + +########################################### PART V - Basic Persistence Example ############################ + +line = "\n--------------------------------------------------------" + +# save to DB +session = create_session() + +# get ElementTree documents +for file in ('test.xml', 'test2.xml', 'test3.xml'): + filename = os.path.join(os.path.dirname(sys.argv[0]), file) + doc = ElementTree.parse(filename) + session.save(Document(file, doc)) + +print "\nSaving three documents...", line +session.flush() +print "Done." + +# clear session (to illustrate a full load), restore +session.clear() + +print "\nFull text of document 'text.xml':", line +document = session.query(Document).filter_by(filename="test.xml").first() + +print document + +############################################ PART VI - Searching for Paths ####################################### + +# manually search for a document which contains "/somefile/header/field1:hi" +print "\nManual search for /somefile/header/field1=='hi':", line +n1 = elements.alias('n1') +n2 = elements.alias('n2') +n3 = elements.alias('n3') +j = documents.join(n1).join(n2, n1.c.element_id==n2.c.parent_id).join(n3, n2.c.element_id==n3.c.parent_id) +d = session.query(Document).select_from(j).filter(n1.c.tag=='somefile').filter(n2.c.tag=='header').filter(and_(n3.c.tag=='field1', n3.c.text=='hi')).one() +print d + +# generalize the above approach into an extremely impoverished xpath function: +def find_document(path, compareto): + j = documents + prev_elements = None + query = session.query(Document) + for i, match in enumerate(re.finditer(r'/([\w_]+)(?:\[@([\w_]+)(?:=(.*))?\])?', path)): + (token, attrname, attrvalue) = match.group(1, 2, 3) + a = elements.alias("n%d" % i) + query = query.filter(a.c.tag==token) + if attrname: + attr_alias = attributes.alias('a%d' % i) + if attrvalue: + query = query.filter(and_(a.c.element_id==attr_alias.c.element_id, attr_alias.c.name==attrname, attr_alias.c.value==attrvalue)) + else: + query = query.filter(and_(a.c.element_id==attr_alias.c.element_id, attr_alias.c.name==attrname)) + if prev_elements is not None: + j = j.join(a, prev_elements.c.element_id==a.c.parent_id) + else: + j = j.join(a) + prev_elements = a + return query.options(lazyload('_nodes')).select_from(j).filter(prev_elements.c.text==compareto).all() + +for path, compareto in ( + ('/somefile/header/field1', 'hi'), + ('/somefile/field1', 'hi'), + ('/somefile/header/field2', 'there'), + ('/somefile/header/field2[@attr=foo]', 'there') + ): + print "\nDocuments containing '%s=%s':" % (path, compareto), line + print [d.filename for d in find_document(path, compareto)] + diff --git a/examples/elementtree/pickle.py b/examples/elementtree/pickle.py new file mode 100644 index 000000000..443ca85c3 --- /dev/null +++ b/examples/elementtree/pickle.py @@ -0,0 +1,65 @@ +"""illustrates a quick and dirty way to persist an XML document expressed using ElementTree and pickle. + +This is a trivial example using PickleType to marshal/unmarshal the ElementTree +document into a binary column. Compare to explicit.py which stores the individual components of the ElementTree +structure in distinct rows using two additional mapped entities. Note that the usage of both +styles of persistence are identical, as is the structure of the main Document class. +""" + +from sqlalchemy import * +from sqlalchemy.orm import * + +import sys, os + +import logging +logging.basicConfig() + +# uncomment to show SQL statements +#logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO) + +# uncomment to show SQL statements and result sets +#logging.getLogger('sqlalchemy.engine').setLevel(logging.DEBUG) + +from elementtree import ElementTree + +meta = MetaData() +meta.engine = 'sqlite://' + +# stores a top level record of an XML document. +# the "element" column will store the ElementTree document as a BLOB. +documents = Table('documents', meta, + Column('document_id', Integer, primary_key=True), + Column('filename', String(30), unique=True), + Column('element', PickleType) +) + +meta.create_all() + +# our document class. contains a string name, +# and the ElementTree root element. +class Document(object): + def __init__(self, name, element): + self.filename = name + self.element = element + +# setup mapper. +mapper(Document, documents) + +###### time to test ! ######### + +# get ElementTree document +filename = os.path.join(os.path.dirname(sys.argv[0]), "test.xml") +doc = ElementTree.parse(filename) + +# save to DB +session = create_session() +session.save(Document("test.xml", doc)) +session.flush() + +# clear session (to illustrate a full load), restore +session.clear() +document = session.query(Document).filter_by(filename="test.xml").first() + +# print +document.element.write(sys.stdout) + diff --git a/examples/elementtree/test.xml b/examples/elementtree/test.xml new file mode 100644 index 000000000..edb44ccc2 --- /dev/null +++ b/examples/elementtree/test.xml @@ -0,0 +1,9 @@ + + This is somefile. +
+ hi + there + Some additional text within the header. +
+ Some more text within somefile. +
\ No newline at end of file diff --git a/examples/elementtree/test2.xml b/examples/elementtree/test2.xml new file mode 100644 index 000000000..69d3167a8 --- /dev/null +++ b/examples/elementtree/test2.xml @@ -0,0 +1,4 @@ + + hi + there + \ No newline at end of file diff --git a/examples/elementtree/test3.xml b/examples/elementtree/test3.xml new file mode 100644 index 000000000..6a7a2343e --- /dev/null +++ b/examples/elementtree/test3.xml @@ -0,0 +1,7 @@ + + test3 +
+ one + there +
+
\ No newline at end of file diff --git a/examples/pickle/custom_pickler.py b/examples/pickle/custom_pickler.py index 4b259c1f8..b45e16e7c 100644 --- a/examples/pickle/custom_pickler.py +++ b/examples/pickle/custom_pickler.py @@ -6,7 +6,8 @@ from cStringIO import StringIO from pickle import Pickler, Unpickler import threading -meta = MetaData('sqlite://', echo=True) +meta = MetaData('sqlite://') +meta.bind.echo = True class MyExt(MapperExtension): def populate_instance(self, mapper, selectcontext, row, instance, identitykey, isnew): @@ -25,7 +26,7 @@ class MyPickler(object): def persistent_id(self, obj): if getattr(obj, "id", None) is None: sess = MyPickler.sessions.current - newsess = create_session(bind_to=sess.connection(class_mapper(Bar))) + newsess = create_session(bind=sess.connection(class_mapper(Bar))) newsess.save(obj) newsess.flush() key = "%s:%s" % (type(obj).__name__, obj.id) diff --git a/examples/poly_assoc/poly_assoc.py b/examples/poly_assoc/poly_assoc.py index a2ac6140f..c13ffbfa1 100644 --- a/examples/poly_assoc/poly_assoc.py +++ b/examples/poly_assoc/poly_assoc.py @@ -21,8 +21,9 @@ the associated target object from those which associate with it. """ from sqlalchemy import * +from sqlalchemy.orm import * -metadata = MetaData('sqlite://', echo=False) +metadata = MetaData('sqlite://') ####### # addresses table, class, 'addressable interface'. diff --git a/examples/poly_assoc/poly_assoc_fk.py b/examples/poly_assoc/poly_assoc_fk.py index f3cedac72..22ee50009 100644 --- a/examples/poly_assoc/poly_assoc_fk.py +++ b/examples/poly_assoc/poly_assoc_fk.py @@ -20,8 +20,9 @@ poly_assoc_generic.py. """ from sqlalchemy import * +from sqlalchemy.orm import * -metadata = MetaData('sqlite://', echo=False) +metadata = MetaData('sqlite://') ####### # addresses table, class, 'addressable interface'. diff --git a/examples/poly_assoc/poly_assoc_generic.py b/examples/poly_assoc/poly_assoc_generic.py index 9cc7321db..4fca31019 100644 --- a/examples/poly_assoc/poly_assoc_generic.py +++ b/examples/poly_assoc/poly_assoc_generic.py @@ -7,8 +7,9 @@ function "association" which creates a new polymorphic association """ from sqlalchemy import * +from sqlalchemy.orm import * -metadata = MetaData('sqlite://', echo=False) +metadata = MetaData('sqlite://') def association(cls, table): """create an association 'interface'.""" diff --git a/examples/polymorph/concrete.py b/examples/polymorph/concrete.py index 593d3f480..5f12e9a3d 100644 --- a/examples/polymorph/concrete.py +++ b/examples/polymorph/concrete.py @@ -1,4 +1,5 @@ from sqlalchemy import * +from sqlalchemy.orm import * metadata = MetaData() @@ -49,7 +50,7 @@ manager_mapper = mapper(Manager, managers_table, inherits=employee_mapper, concr engineer_mapper = mapper(Engineer, engineers_table, inherits=employee_mapper, concrete=True, polymorphic_identity='engineer') -session = create_session(bind_to=engine) +session = create_session(bind=engine) m1 = Manager("pointy haired boss", "manager1") e1 = Engineer("wally", "engineer1") diff --git a/examples/polymorph/polymorph.py b/examples/polymorph/polymorph.py index 231a9d8e4..4f3aeb7d2 100644 --- a/examples/polymorph/polymorph.py +++ b/examples/polymorph/polymorph.py @@ -1,10 +1,11 @@ from sqlalchemy import * +from sqlalchemy.orm import * import sets -# this example illustrates a polymorphic load of two classes, where each class has a very -# different set of properties +# this example illustrates a polymorphic load of two classes -metadata = MetaData('sqlite://', echo=True) +metadata = MetaData('sqlite://') +metadata.bind.echo = True # a table to store companies companies = Table('companies', metadata, diff --git a/examples/polymorph/single.py b/examples/polymorph/single.py index dcdb3c890..dfc426416 100644 --- a/examples/polymorph/single.py +++ b/examples/polymorph/single.py @@ -1,6 +1,8 @@ from sqlalchemy import * +from sqlalchemy.orm import * -metadata = MetaData('sqlite://', echo='debug') +metadata = MetaData('sqlite://') +metadata.bind.echo = 'debug' # a table to store companies companies = Table('companies', metadata, diff --git a/examples/sharding/attribute_shard.py b/examples/sharding/attribute_shard.py new file mode 100644 index 000000000..e95b978ae --- /dev/null +++ b/examples/sharding/attribute_shard.py @@ -0,0 +1,194 @@ +"""a basic example of using the SQLAlchemy Sharding API. +Sharding refers to horizontally scaling data across multiple +databases. + +In this example, four sqlite databases will store information about +weather data on a database-per-continent basis. + +To set up a sharding system, you need: + 1. multiple databases, each assined a 'shard id' + 2. a function which can return a single shard id, given an instance + to be saved; this is called "shard_chooser" + 3. a function which can return a list of shard ids which apply to a particular + instance identifier; this is called "id_chooser". If it returns all shard ids, + all shards will be searched. + 4. a function which can return a list of shard ids to try, given a particular + Query ("query_chooser"). If it returns all shard ids, all shards will be + queried and the results joined together. +""" + +# step 1. imports +from sqlalchemy import * +from sqlalchemy.orm import * +from sqlalchemy.orm.shard import ShardedSession +from sqlalchemy.sql import ColumnOperators +import datetime, operator + +# step 2. databases +echo = True +db1 = create_engine('sqlite:///shard1.db', echo=echo) +db2 = create_engine('sqlite:///shard2.db', echo=echo) +db3 = create_engine('sqlite:///shard3.db', echo=echo) +db4 = create_engine('sqlite:///shard4.db', echo=echo) + + +# step 3. create session function. this binds the shard ids +# to databases within a ShardedSession and returns it. +def create_session(): + s = ShardedSession(shard_chooser, id_chooser, query_chooser) + s.bind_shard('north_america', db1) + s.bind_shard('asia', db2) + s.bind_shard('europe', db3) + s.bind_shard('south_america', db4) + return s + +# step 4. table setup. +meta = MetaData() + +# we need a way to create identifiers which are unique across all +# databases. one easy way would be to just use a composite primary key, where one +# value is the shard id. but here, we'll show something more "generic", an +# id generation function. we'll use a simplistic "id table" stored in database +# #1. Any other method will do just as well; UUID, hilo, application-specific, etc. + +ids = Table('ids', meta, + Column('nextid', Integer, nullable=False)) + +def id_generator(ctx): + # in reality, might want to use a separate transaction for this. + c = db1.connect() + nextid = c.execute(ids.select(for_update=True)).scalar() + c.execute(ids.update(values={ids.c.nextid : ids.c.nextid + 1})) + return nextid + +# table setup. we'll store a lead table of continents/cities, +# and a secondary table storing locations. +# a particular row will be placed in the database whose shard id corresponds to the +# 'continent'. in this setup, secondary rows in 'weather_reports' will +# be placed in the same DB as that of the parent, but this can be changed +# if you're willing to write more complex sharding functions. + +weather_locations = Table("weather_locations", meta, + Column('id', Integer, primary_key=True, default=id_generator), + Column('continent', String(30), nullable=False), + Column('city', String(50), nullable=False) + ) + +weather_reports = Table("weather_reports", meta, + Column('id', Integer, primary_key=True), + Column('location_id', Integer, ForeignKey('weather_locations.id')), + Column('temperature', Float), + Column('report_time', DateTime, default=datetime.datetime.now), +) + +# create tables +for db in (db1, db2, db3, db4): + meta.drop_all(db) + meta.create_all(db) + +# establish initial "id" in db1 +db1.execute(ids.insert(), nextid=1) + + +# step 5. define sharding functions. + +# we'll use a straight mapping of a particular set of "country" +# attributes to shard id. +shard_lookup = { + 'North America':'north_america', + 'Asia':'asia', + 'Europe':'europe', + 'South America':'south_america' +} + +# shard_chooser - looks at the given instance and returns a shard id +# note that we need to define conditions for +# the WeatherLocation class, as well as our secondary Report class which will +# point back to its WeatherLocation via its 'location' attribute. +def shard_chooser(mapper, instance): + if isinstance(instance, WeatherLocation): + return shard_lookup[instance.continent] + else: + return shard_chooser(mapper, instance.location) + +# id_chooser. given a primary key, returns a list of shards +# to search. here, we don't have any particular information from a +# pk so we just return all shard ids. often, youd want to do some +# kind of round-robin strategy here so that requests are evenly +# distributed among DBs +def id_chooser(ident): + return ['north_america', 'asia', 'europe', 'south_america'] + +# query_chooser. this also returns a list of shard ids, which can +# just be all of them. but here we'll search into the Query in order +# to try to narrow down the list of shards to query. +def query_chooser(query): + ids = [] + + # here we will traverse through the query's criterion, searching + # for SQL constructs. we'll grab continent names as we find them + # and convert to shard ids + class FindContinent(sql.ClauseVisitor): + def visit_binary(self, binary): + if binary.left is weather_locations.c.continent: + if binary.operator == operator.eq: + ids.append(shard_lookup[binary.right.value]) + elif binary.operator == ColumnOperators.in_op: + for bind in binary.right.clauses: + ids.append(shard_lookup[bind.value]) + + FindContinent().traverse(query._criterion) + if len(ids) == 0: + return ['north_america', 'asia', 'europe', 'south_america'] + else: + return ids + +# step 6. mapped classes. +class WeatherLocation(object): + def __init__(self, continent, city): + self.continent = continent + self.city = city + +class Report(object): + def __init__(self, temperature): + self.temperature = temperature + +# step 7. mappers +mapper(WeatherLocation, weather_locations, properties={ + 'reports':relation(Report, backref='location') +}) + +mapper(Report, weather_reports) + + +# save and load objects! + +tokyo = WeatherLocation('Asia', 'Tokyo') +newyork = WeatherLocation('North America', 'New York') +toronto = WeatherLocation('North America', 'Toronto') +london = WeatherLocation('Europe', 'London') +dublin = WeatherLocation('Europe', 'Dublin') +brasilia = WeatherLocation('South America', 'Brasila') +quito = WeatherLocation('South America', 'Quito') + +tokyo.reports.append(Report(80.0)) +newyork.reports.append(Report(75)) +quito.reports.append(Report(85)) + +sess = create_session() +for c in [tokyo, newyork, toronto, london, dublin, brasilia, quito]: + sess.save(c) +sess.flush() + +sess.clear() + +t = sess.query(WeatherLocation).get(tokyo.id) +assert t.city == tokyo.city +assert t.reports[0].temperature == 80.0 + +north_american_cities = sess.query(WeatherLocation).filter(WeatherLocation.continent == 'North America') +assert [c.city for c in north_american_cities] == ['New York', 'Toronto'] + +asia_and_europe = sess.query(WeatherLocation).filter(WeatherLocation.continent.in_('Europe', 'Asia')) +assert set([c.city for c in asia_and_europe]) == set(['Tokyo', 'London', 'Dublin']) + diff --git a/examples/vertical/vertical.py b/examples/vertical/vertical.py index e9fff9163..e3b48c336 100644 --- a/examples/vertical/vertical.py +++ b/examples/vertical/vertical.py @@ -3,9 +3,12 @@ represented in distinct database rows. This allows objects to be created with d fields that are all persisted in a normalized fashion.""" from sqlalchemy import * +from sqlalchemy.orm import * +from sqlalchemy.orm.collections import mapped_collection import datetime -e = MetaData('sqlite://', echo=True) +e = MetaData('sqlite://') +e.bind.echo = True # this table represents Entity objects. each Entity gets a row in this table, # with a primary key and a title. @@ -37,14 +40,6 @@ entity_values = Table('entity_values', e, e.create_all() -class EntityDict(dict): - """this is a dictionary that implements an append() and an __iter__ method. - such a dictionary can be used with SQLAlchemy list-based attributes.""" - def append(self, entityvalue): - self[entityvalue.field.name] = entityvalue - def __iter__(self): - return iter(self.values()) - class Entity(object): """represents an Entity. The __getattr__ method is overridden to search the object's _entities dictionary for the appropriate value, and the __setattribute__ @@ -123,7 +118,7 @@ mapper( ) mapper(Entity, entities, properties = { - '_entities' : relation(EntityValue, lazy=False, cascade='all', collection_class=EntityDict) + '_entities' : relation(EntityValue, lazy=False, cascade='all', collection_class=mapped_collection(lambda entityvalue: entityvalue.field.name)) }) # create two entities. the objects can be used about as regularly as -- cgit v1.2.1