diff options
| author | Mike Bayer <mike_mp@zzzcomputing.com> | 2006-05-25 14:20:23 +0000 |
|---|---|---|
| committer | Mike Bayer <mike_mp@zzzcomputing.com> | 2006-05-25 14:20:23 +0000 |
| commit | bb79e2e871d0a4585164c1a6ed626d96d0231975 (patch) | |
| tree | 6d457ba6c36c408b45db24ec3c29e147fe7504ff /doc | |
| parent | 4fc3a0648699c2b441251ba4e1d37a9107bd1986 (diff) | |
| download | sqlalchemy-bb79e2e871d0a4585164c1a6ed626d96d0231975.tar.gz | |
merged 0.2 branch into trunk; 0.1 now in sqlalchemy/branches/rel_0_1
Diffstat (limited to 'doc')
| -rw-r--r-- | doc/alphaapi.html | 27 | ||||
| -rw-r--r-- | doc/alphaimplementation.html | 16 | ||||
| -rw-r--r-- | doc/build/compile_docstrings.py | 23 | ||||
| -rw-r--r-- | doc/build/components/formatting.myt | 3 | ||||
| -rw-r--r-- | doc/build/content/adv_datamapping.myt | 734 | ||||
| -rw-r--r-- | doc/build/content/adv_datamapping.txt | 802 | ||||
| -rw-r--r-- | doc/build/content/datamapping.txt | 696 | ||||
| -rw-r--r-- | doc/build/content/dbengine.txt | 377 | ||||
| -rw-r--r-- | doc/build/content/document_base.myt | 11 | ||||
| -rw-r--r-- | doc/build/content/metadata.txt | 238 | ||||
| -rw-r--r-- | doc/build/content/plugins.txt | 309 | ||||
| -rw-r--r-- | doc/build/content/pooling.myt | 66 | ||||
| -rw-r--r-- | doc/build/content/pooling.txt | 65 | ||||
| -rw-r--r-- | doc/build/content/sqlconstruction.txt | 272 | ||||
| -rw-r--r-- | doc/build/content/threadlocal.txt | 2 | ||||
| -rw-r--r-- | doc/build/content/trailmap.myt | 53 | ||||
| -rw-r--r-- | doc/build/content/tutorial.txt | 640 | ||||
| -rw-r--r-- | doc/build/content/types.txt | 10 | ||||
| -rw-r--r-- | doc/build/content/unitofwork.txt | 577 | ||||
| -rw-r--r-- | doc/build/testdocs.py | 20 | ||||
| -rw-r--r-- | doc/build/txt2myt.py | 24 | ||||
| -rw-r--r-- | doc/docs.css | 12 | ||||
| -rw-r--r-- | doc/scripts.js | 7 |
23 files changed, 2972 insertions, 2012 deletions
diff --git a/doc/alphaapi.html b/doc/alphaapi.html new file mode 100644 index 000000000..9bef756ed --- /dev/null +++ b/doc/alphaapi.html @@ -0,0 +1,27 @@ +<html> +<head> + <link href="style.css" rel="stylesheet" type="text/css"></link> + <link href="docs.css" rel="stylesheet" type="text/css"></link> + <script src="scripts.js"></script> + <title>SQLAlchemy Documentation</title> +</head> +<body> + <h3>What is an Alpha API Feature?</h3> +<p><b>Alpha API</b> indicates that the best way for a particular feature to be presented hasn't been firmly settled on as of yet, and the current way is being introduced on a trial basis. Its spirit is not as much a warning that "this API might change", its more an invitation to the users saying, "heres a new idea I had. I'm not sure if this is the best way to do it. Do you like it ? Should we do this differently? Or is it good the way it is ?". Alpha API features are always small in scope and are presented in releases so that the greatest number of users get some hands-on experience with it; large-scoped API or architectural changes will always be discussed on the mailing list/Wiki first.</p> + +<p>Reasons why a feature might want to change include: + <ul> + <li>The API for the feature is too difficult to use for the typical task, and needs to be more "convenient"</li> + <li>The feature only implements a subsection of what it really should be doing</li> + <li>The feature's interface is inconsistent with that of other features which operate at a similar level</li> + <li>The feature is confusing and is often misunderstood, and would be better replaced by a more manual feature that makes the task clearer</li> + <li>The feature overlaps with another feature and effectively provides too many ways to do the same thing</li> + <li>The feature made some assumptions about the total field of use cases which is not really true, and it breaks in other scenarios</li> + </ul> + +</p> +<p>A good example of what was essentially an "alpha feature" is the <code>private=True</code> flag. This flag on a <code>relation()</code> indicates that child objects should be deleted along with the parent. After this flag experienced some usage by the SA userbase, some users remarked that a more generic and configurable way was Hibernates <code>cascade="all, delete-orphan"</code>, and also that the term <code>cascade</code> was clearer in purpose than the more ambiguous <code>private</code> keyword, which could be construed as a "private variable".</p> + +<center><input type="button" value="close window" onclick="window.close()"></center> +</body> +</html>
\ No newline at end of file diff --git a/doc/alphaimplementation.html b/doc/alphaimplementation.html new file mode 100644 index 000000000..204092493 --- /dev/null +++ b/doc/alphaimplementation.html @@ -0,0 +1,16 @@ +<html> +<head> + <link href="style.css" rel="stylesheet" type="text/css"></link> + <link href="docs.css" rel="stylesheet" type="text/css"></link> + <script src="scripts.js"></script> + <title>SQLAlchemy Documentation</title> +</head> +<body> + <h3>What is an Alpha Implementation Feature?</h3> +<p><b>Alpha Implementation</b> indicates a feature where developer confidence in its functionality has not yet been firmly established. This typically includes brand new features for which adequate unit tests have not been completed, and/or features whose scope is broad enough that its not clear what additional unit tests might be needed.</p> + +<p>Alpha implementation is not meant to discourage the usage of a feature, it is only meant to indicate that some difficulties in getting full functionality from the feature may occur, and to encourage the reporting of these difficulties either via the mailing list or through <a href="http://www.sqlalchemy.org/trac/newticket" target="_blank">submitting a ticket</a>.</p> + +<center><input type="button" value="close window" onclick="window.close()"></center> +</body> +</html>
\ No newline at end of file diff --git a/doc/build/compile_docstrings.py b/doc/build/compile_docstrings.py index fb9527319..191a5f4ec 100644 --- a/doc/build/compile_docstrings.py +++ b/doc/build/compile_docstrings.py @@ -7,25 +7,32 @@ import docstring import sqlalchemy.schema as schema import sqlalchemy.engine as engine +import sqlalchemy.engine.strategies as strategies import sqlalchemy.sql as sql import sqlalchemy.pool as pool -import sqlalchemy.mapping as mapping +import sqlalchemy.orm as orm import sqlalchemy.exceptions as exceptions import sqlalchemy.ext.proxy as proxy +import sqlalchemy.ext.sessioncontext as sessioncontext +import sqlalchemy.mods.threadlocal as threadlocal objects = [] def make_doc(obj, classes=None, functions=None): objects.append(docstring.ObjectDoc(obj, classes=classes, functions=functions)) +make_doc(obj=sql, classes=[sql.Engine, sql.AbstractDialect, sql.ClauseParameters, sql.Compiled, sql.ClauseElement, sql.TableClause, sql.ColumnClause]) make_doc(obj=schema) -make_doc(obj=engine, classes=[engine.SQLSession, engine.SQLEngine, engine.ResultProxy, engine.RowProxy]) -make_doc(obj=sql, classes=[sql.ClauseParameters, sql.Compiled, sql.ClauseElement, sql.TableClause, sql.ColumnClause]) -make_doc(obj=pool, classes=[pool.DBProxy, pool.Pool, pool.QueuePool, pool.SingletonThreadPool]) -make_doc(obj=mapping, classes=[mapping.Mapper, mapping.MapperExtension]) -make_doc(obj=mapping.query, classes=[mapping.query.Query]) -make_doc(obj=mapping.objectstore, classes=[mapping.objectstore.Session, mapping.objectstore.Session.SessionTrans]) +make_doc(obj=engine, classes=[engine.ComposedSQLEngine, engine.Connection, engine.Transaction, engine.Dialect, engine.ConnectionProvider, engine.ExecutionContext, engine.ResultProxy, engine.RowProxy]) +make_doc(obj=strategies) +make_doc(obj=orm, classes=[orm.Mapper, orm.MapperExtension]) +make_doc(obj=orm.query, classes=[orm.query.Query]) +make_doc(obj=orm.session, classes=[orm.session.Session, orm.session.SessionTransaction]) +make_doc(obj=sessioncontext) +make_doc(obj=threadlocal) make_doc(obj=exceptions) +make_doc(obj=pool, classes=[pool.DBProxy, pool.Pool, pool.QueuePool, pool.SingletonThreadPool]) make_doc(obj=proxy) + output = os.path.join(os.getcwd(), 'content', "compiled_docstrings.pickle") -pickle.dump(objects, file(output, 'w'))
\ No newline at end of file +pickle.dump(objects, file(output, 'w')) diff --git a/doc/build/components/formatting.myt b/doc/build/components/formatting.myt index bc8e69d65..52928f539 100644 --- a/doc/build/components/formatting.myt +++ b/doc/build/components/formatting.myt @@ -261,6 +261,7 @@ title = None syntaxtype = 'python' html_escape = False + use_sliders = False </%args> <%init> @@ -289,7 +290,7 @@ return "<pre>" + highlight.highlight(fix_indent(match.group(1)), html_escape = html_escape, syntaxtype = syntaxtype) + "</pre>" content = p.sub(hlight, "<pre>" + m.content() + "</pre>") </%init> -<div class="code"> +<div class="<% use_sliders and "sliding_code" or "code" %>"> % if title is not None: <div class="codetitle"><% title %></div> % diff --git a/doc/build/content/adv_datamapping.myt b/doc/build/content/adv_datamapping.myt deleted file mode 100644 index 7a8fefd0c..000000000 --- a/doc/build/content/adv_datamapping.myt +++ /dev/null @@ -1,734 +0,0 @@ -<%flags>inherit='document_base.myt'</%flags> -<%attr>title='Advanced Data Mapping'</%attr> -<&|doclib.myt:item, name="adv_datamapping", description="Advanced Data Mapping" &> -<p>This section details all the options available to Mappers, as well as advanced patterns.</p> - -<p>To start, heres the tables we will work with again:</p> - <&|formatting.myt:code&> - from sqlalchemy import * - db = create_engine('sqlite://filename=mydb', echo=True) - - # a table to store users - users = Table('users', db, - Column('user_id', Integer, primary_key = True), - Column('user_name', String(40)), - Column('password', String(80)) - ) - - # a table that stores mailing addresses associated with a specific user - addresses = Table('addresses', db, - Column('address_id', Integer, primary_key = True), - Column('user_id', Integer, ForeignKey("users.user_id")), - Column('street', String(100)), - Column('city', String(80)), - Column('state', String(2)), - Column('zip', String(10)) - ) - - # a table that stores keywords - keywords = Table('keywords', db, - Column('keyword_id', Integer, primary_key = True), - Column('name', VARCHAR(50)) - ) - - # a table that associates keywords with users - userkeywords = Table('userkeywords', db, - Column('user_id', INT, ForeignKey("users")), - Column('keyword_id', INT, ForeignKey("keywords")) - ) - - </&> - -<&|doclib.myt:item, name="relations", description="More On Relations" &> - <&|doclib.myt:item, name="customjoin", description="Custom Join Conditions" &> - <p>When creating relations on a mapper, most examples so far have illustrated the mapper and relationship joining up based on the foreign keys of the tables they represent. in fact, this "automatic" inspection can be completely circumvented using the <span class="codeline">primaryjoin</span> and <span class="codeline">secondaryjoin</span> arguments to <span class="codeline">relation</span>, as in this example which creates a User object which has a relationship to all of its Addresses which are in Boston: - <&|formatting.myt:code&> - class User(object): - pass - class Address(object): - pass - Address.mapper = mapper(Address, addresses) - User.mapper = mapper(User, users, properties={ - 'boston_addreses' : relation(Address.mapper, primaryjoin= - and_(users.c.user_id==Address.c.user_id, - Addresses.c.city=='Boston')) - }) - </&> - <P>Many to many relationships can be customized by one or both of <span class="codeline">primaryjoin</span> and <span class="codeline">secondaryjoin</span>, shown below with just the default many-to-many relationship explicitly set:</p> - <&|formatting.myt:code&> - class User(object): - pass - class Keyword(object): - pass - Keyword.mapper = mapper(Keyword, keywords) - User.mapper = mapper(User, users, properties={ - 'keywords':relation(Keyword.mapper, - primaryjoin=users.c.user_id==userkeywords.c.user_id, - secondaryjoin=userkeywords.c.keyword_id==keywords.c.keyword_id - ) - }) - </&> - </&> - <&|doclib.myt:item, name="multiplejoin", description="Lazy/Eager Joins Multiple Times to One Table" &> - - <p>The previous example leads in to the idea of joining against the same table multiple times. Below is a User object that has lists of its Boston and New York addresses, both lazily loaded when they are first accessed:</p> - <&|formatting.myt:code&> - User.mapper = mapper(User, users, properties={ - 'boston_addreses' : relation(Address.mapper, primaryjoin= - and_(users.c.user_id==Address.c.user_id, - Addresses.c.city=='Boston')), - 'newyork_addresses' : relation(Address.mapper, primaryjoin= - and_(users.c.user_id==Address.c.user_id, - Addresses.c.city=='New York')), - }) - </&> - <p>A complication arises with the above pattern if you want the relations to be eager loaded. Since there will be two separate joins to the addresses table during an eager load, an alias needs to be used to separate them. You can create an alias of the addresses table to separate them, but then you are in effect creating a brand new mapper for each property, unrelated to the main Address mapper, which can create problems with commit operations. So an additional argument <span class="codeline">use_alias</span> can be used with an eager relationship to specify the alias to be used just within the eager query:</p> - <&|formatting.myt:code&> - User.mapper = mapper(User, users, properties={ - 'boston_addreses' : relation(Address.mapper, primaryjoin= - and_(User.c.user_id==Address.c.user_id, - Addresses.c.city=='Boston'), lazy=False, use_alias=True), - 'newyork_addresses' : relation(Address.mapper, primaryjoin= - and_(User.c.user_id==Address.c.user_id, - Addresses.c.city=='New York'), lazy=False, use_alias=True), - }) - - <&formatting.myt:poplink&>u = User.mapper.select() - - <&|formatting.myt:codepopper, link="sql" &> - SELECT users.user_id AS users_user_id, users.user_name AS users_user_name, - users.password AS users_password, - addresses_EF45.address_id AS addresses_EF45_address_id, addresses_EF45.user_id AS addresses_EF45_user_id, - addresses_EF45.street AS addresses_EF45_street, addresses_EF45.city AS addresses_EF45_city, - addresses_EF45.state AS addresses_EF45_state, addresses_EF45.zip AS addresses_EF45_zip, - addresses_63C5.address_id AS addresses_63C5_address_id, addresses_63C5.user_id AS addresses_63C5_user_id, - addresses_63C5.street AS addresses_63C5_street, addresses_63C5.city AS addresses_63C5_city, - addresses_63C5.state AS addresses_63C5_state, addresses_63C5.zip AS addresses_63C5_zip - FROM users - LEFT OUTER JOIN addresses AS addresses_EF45 ON users.user_id = addresses_EF45.user_id - AND addresses_EF45.city = :addresses_city - LEFT OUTER JOIN addresses AS addresses_63C5 ON users.user_id = addresses_63C5.user_id - AND addresses_63C5.city = :addresses_city_1 - ORDER BY users.oid, addresses_EF45.oid, addresses_63C5.oid - {'addresses_city_1': 'New York', 'addresses_city': 'Boston'} - </&> - </&> - </&> - - <&|doclib.myt:item, name="relationoptions", description="Relation Options" &> - Keyword options to the <span class="codeline">relation</span> function include: - <ul> - <li>lazy=(True|False|None) - specifies how the related items should be loaded. a value of True indicates they should be loaded when the property is first accessed. A value of False indicates they should be loaded by joining against the parent object query, so parent and child are loaded in one round trip. A value of None indicates the related items are not loaded by the mapper in any case; the application will manually insert items into the list in some other way. A relationship with lazy=None is still important; items added to the list or removed will cause the appropriate updates and deletes upon commit().</li> - <li>primaryjoin - a ClauseElement that will be used as the primary join of this child object against the parent object, or in a many-to-many relationship the join of the primary object to the association table. By default, this value is computed based on the foreign key relationships of the parent and child tables (or association table).</li> - <li>secondaryjoin - a ClauseElement that will be used as the join of an association table to the child object. By default, this value is computed based on the foreign key relationships of the association and child tables.</li> - <li>foreignkey - specifies which column in this relationship is "foreign", i.e. which column refers to the parent object. This value is automatically determined in all cases, based on the primary and secondary join conditions, except in the case of a self-referential mapper, where it is needed to indicate the child object's reference back to it's parent.</li> - <li>uselist - a boolean that indicates if this property should be loaded as a list or a scalar. In most cases, this value is determined based on the type and direction of the relationship - one to many forms a list, one to one forms a scalar, many to many is a list. If a scalar is desired where normally a list would be present, set uselist to False.</li> - <li>private - indicates if these child objects are "private" to the parent; removed items will also be deleted, and if the parent item is deleted, all child objects are deleted as well. See the example in <&formatting.myt:link, path="datamapping_relations_private"&>.</li> - <li>backreference - indicates the name of a property to be placed on the related mapper's class that will handle this relationship in the other direction, including synchronizing the object attributes on both sides of the relation. See the example in <&formatting.myt:link, path="datamapping_relations_backreferences"&>.</li> - <li>order_by - indicates the ordering that should be applied when loading these items. See the section <&formatting.myt:link, path="adv_datamapping_orderby" &> for details.</li> - <li>association - When specifying a many to many relationship with an association object, this keyword should reference the mapper of the target object of the association. See the example in <&formatting.myt:link, path="datamapping_association"&>.</li> - <li>post_update - this indicates that the relationship should be handled by a second UPDATE statement after an INSERT, or before a DELETE. using this flag essentially means the relationship will not incur any "dependency" between parent and child item, as the particular foreign key relationship between them is handled by a second statement. use this flag when a particular mapping arrangement will incur two rows that are dependent on each other, such as a table that has a one-to-many relationship to a set of child rows, and also has a column that references a single child row within that list (i.e. both tables contain a foreign key to each other). If a commit() operation returns an error that a "cyclical dependency" was detected, this is a cue that you might want to use post_update.</li> - </ul> - </&> - -</&> -<&|doclib.myt:item, name="orderby", description="Controlling Ordering" &> -<p>By default, mappers will not supply any ORDER BY clause when selecting rows. This can be modified in several ways.</p> - -<p>A "default ordering" can be supplied by all mappers, by enabling the "default_ordering" flag to the engine, which indicates that table primary keys or object IDs should be used as the default ordering:</p> -<&|formatting.myt:code&> - db = create_engine('postgres://username=scott&password=tiger', default_ordering=True) -</&> -<p>The "order_by" parameter can be sent to a mapper, overriding the per-engine ordering if any. A value of None means that the mapper should not use any ordering, even if the engine's default_ordering property is True. A non-None value, which can be a column, an <span class="codeline">asc</span> or <span class="codeline">desc</span> clause, or an array of either one, indicates the ORDER BY clause that should be added to all select queries:</p> -<&|formatting.myt:code&> - # disable all ordering - mapper = mapper(User, users, order_by=None) - - # order by a column - mapper = mapper(User, users, order_by=users.c.user_id) - - # order by multiple items - mapper = mapper(User, users, order_by=[users.c.user_id, desc(users.c.user_name)]) -</&> -<p>"order_by" can also be specified to an individual <span class="codeline">select</span> method, overriding all other per-engine/per-mapper orderings: -<&|formatting.myt:code&> - # order by a column - l = mapper.select(users.c.user_name=='fred', order_by=users.c.user_id) - - # order by multiple criterion - l = mapper.select(users.c.user_name=='fred', order_by=[users.c.user_id, desc(users.c.user_name)]) -</&> -<p>For relations, the "order_by" property can also be specified to all forms of relation:</p> -<&|formatting.myt:code&> - # order address objects by address id - mapper = mapper(User, users, properties = { - 'addresses' : relation(mapper(Address, addresses), order_by=addresses.c.address_id) - }) - - # eager load with ordering - the ORDER BY clauses of parent/child will be organized properly - mapper = mapper(User, users, properties = { - 'addresses' : relation(mapper(Address, addresses), order_by=desc(addresses.c.email_address), eager=True) - }, order_by=users.c.user_id) - -</&> -</&> -<&|doclib.myt:item, name="limits", description="Limiting Rows" &> -<p>You can limit rows in a regular SQL query by specifying <span class="codeline">limit</span> and <span class="codeline">offset</span>. A Mapper can handle the same concepts:</p> -<&|formatting.myt:code&> - class User(object): - pass - - m = mapper(User, users) -<&formatting.myt:poplink&>r = m.select(limit=20, offset=10) -<&|formatting.myt:codepopper, link="sql" &>SELECT users.user_id AS users_user_id, -users.user_name AS users_user_name, users.password AS users_password -FROM users ORDER BY users.oid - LIMIT 20 OFFSET 10 -{} -</&> -</&> -However, things get tricky when dealing with eager relationships, since a straight LIMIT of rows does not represent the count of items when joining against other tables to load related items as well. So here is what SQLAlchemy will do when you use limit or offset with an eager relationship: - <&|formatting.myt:code&> - class User(object): - pass - class Address(object): - pass - m = mapper(User, users, properties={ - 'addresses' : relation(mapper(Address, addresses), lazy=False) - }) - r = m.select(User.c.user_name.like('F%'), limit=20, offset=10) -<&|formatting.myt:poppedcode, link="sql" &> -SELECT users.user_id AS users_user_id, users.user_name AS users_user_name, -users.password AS users_password, addresses.address_id AS addresses_address_id, -addresses.user_id AS addresses_user_id, addresses.street AS addresses_street, -addresses.city AS addresses_city, addresses.state AS addresses_state, -addresses.zip AS addresses_zip -FROM -(SELECT users.user_id FROM users WHERE users.user_name LIKE %(users_user_name)s -ORDER BY users.oid LIMIT 20 OFFSET 10) AS rowcount, - users LEFT OUTER JOIN addresses ON users.user_id = addresses.user_id -WHERE rowcount.user_id = users.user_id ORDER BY users.oid, addresses.oid -{'users_user_name': 'F%'} - - </&> - </&> - <p>The main WHERE clause as well as the limiting clauses are coerced into a subquery; this subquery represents the desired result of objects. A containing query, which handles the eager relationships, is joined against the subquery to produce the result.</p> -</&> -<&|doclib.myt:item, name="colname", description="Overriding Column Names" &> -<p>When mappers are constructed, by default the column names in the Table metadata are used as the names of attributes on the mapped class. This can be customzed within the properties by stating the key/column combinations explicitly:</p> -<&|formatting.myt:code&> - user_mapper = mapper(User, users, properties={ - 'id' : users.c.user_id, - 'name' : users.c.user_name, - }) -</&> -<p>In the situation when column names overlap in a mapper against multiple tables, columns may be referenced together with a list: -<&|formatting.myt:code&> - # join users and addresses - usersaddresses = sql.join(users, addresses, users.c.user_id == addresses.c.user_id) - m = mapper(User, usersaddresses, - properties = { - 'id' : [users.c.user_id, addresses.c.user_id], - } - ) -</&> -</&> -<&|doclib.myt:item, name="deferred", description="Deferred Column Loading" &> -<p>This feature allows particular columns of a table to not be loaded by default, instead being loaded later on when first referenced. It is essentailly "column-level lazy loading". This feature is useful when one wants to avoid loading a large text or binary field into memory when its not needed. Individual columns can be lazy loaded by themselves or placed into groups that lazy-load together.</p> -<&|formatting.myt:code&> - book_excerpts = Table('books', db, - Column('book_id', Integer, primary_key=True), - Column('title', String(200), nullable=False), - Column('summary', String(2000)), - Column('excerpt', String), - Column('photo', Binary) - ) - - class Book(object): - pass - - # define a mapper that will load each of 'excerpt' and 'photo' in - # separate, individual-row SELECT statements when each attribute - # is first referenced on the individual object instance - book_mapper = mapper(Book, book_excerpts, properties = { - 'excerpt' : deferred(book_excerpts.c.excerpt), - 'photo' : deferred(book_excerpts.c.photo) - }) -</&> -<p>Deferred columns can be placed into groups so that they load together:</p> -<&|formatting.myt:code&> - book_excerpts = Table('books', db, - Column('book_id', Integer, primary_key=True), - Column('title', String(200), nullable=False), - Column('summary', String(2000)), - Column('excerpt', String), - Column('photo1', Binary), - Column('photo2', Binary), - Column('photo3', Binary) - ) - - class Book(object): - pass - - # define a mapper with a 'photos' deferred group. when one photo is referenced, - # all three photos will be loaded in one SELECT statement. The 'excerpt' will - # be loaded separately when it is first referenced. - book_mapper = mapper(Book, book_excerpts, properties = { - 'excerpt' : deferred(book_excerpts.c.excerpt), - 'photo1' : deferred(book_excerpts.c.photo1, group='photos'), - 'photo2' : deferred(book_excerpts.c.photo2, group='photos'), - 'photo3' : deferred(book_excerpts.c.photo3, group='photos') - }) -</&> -</&> -<&|doclib.myt:item, name="options", description="More on Mapper Options" &> - <p>The <span class="codeline">options</span> method of mapper, first introduced in <&formatting.myt:link, path="datamapping_relations_options" &>, supports the copying of a mapper into a new one, with any number of its relations replaced by new ones. The method takes a variable number of <span class="codeline">MapperOption</span> objects which know how to change specific things about the mapper. The five available options are <span class="codeline">eagerload</span>, <span class="codeline">lazyload</span>, <span class="codeline">noload</span>, <span class="codeline">deferred</span> and <span class="codeline">extension</span>.</p> - <P>An example of a mapper with a lazy load relationship, upgraded to an eager load relationship: - <&|formatting.myt:code&> - class User(object): - pass - class Address(object): - pass - - # a 'lazy' relationship - User.mapper = mapper(User, users, properties = { - 'addreses':relation(mapper(Address, addresses), lazy=True) - }) - - # copy the mapper and convert 'addresses' to be eager - eagermapper = User.mapper.options(eagerload('addresses')) - </&> - - <p>The load options also can take keyword arguments that apply to the new relationship. To take the "double" address lazy relationship from the previous section and upgrade it to eager, adding the "selectalias" keywords as well:</p> - <&|formatting.myt:code&> - m = User.mapper.options( - eagerload('boston_addresses', selectalias='boston_ad'), - eagerload('newyork_addresses', selectalias='newyork_ad') - ) - </&> - <p>The <span class="codeline">defer</span> and <span class="codeline">undefer</span> options can control the deferred loading of attributes:</p> - <&|formatting.myt:code&> - # set the 'excerpt' deferred attribute to load normally - m = book_mapper.options(undefer('excerpt')) - - # set the referenced mapper 'photos' to defer its loading of the column 'imagedata' - m = book_mapper.options(defer('photos.imagedata')) - </&> - <p>Options can also take a limited set of keyword arguments which will be applied to a new mapper. For example, to create a mapper that refreshes all objects loaded each time:</p> - <&|formatting.myt:code&> - m2 = mapper.options(always_refresh=True) - </&> - <p>Or, a mapper with different ordering:</p> - <&|formatting.myt:code&> - m2 = mapper.options(order_by=[newcol]) - </&> - -</&> - - -<&|doclib.myt:item, name="inheritance", description="Mapping a Class with Table Inheritance" &> - - <p>Table Inheritance indicates the pattern where two tables, in a parent-child relationship, are mapped to an inheritance chain of classes. If a table "employees" contains additional information about managers in the table "managers", a corresponding object inheritance pattern would have an Employee class and a Manager class. Loading a Manager object means you are joining managers to employees. For SQLAlchemy, this pattern is just a special case of a mapper that maps against a joined relationship, and is provided via the <span class="codeline">inherits</span> keyword. - <&|formatting.myt:code&> - class User(object): - """a user object.""" - pass - User.mapper = mapper(User, users) - - class AddressUser(User): - """a user object that also has the users mailing address.""" - pass - - # define a mapper for AddressUser that inherits the User.mapper, and joins on the user_id column - AddressUser.mapper = mapper( - AddressUser, - addresses, inherits=User.mapper - ) - - items = AddressUser.mapper.select() - </&> -<P>Above, the join condition is determined via the foreign keys between the users and the addresses table. To specify the join condition explicitly, use <span class="codeline">inherit_condition</span>: -<&|formatting.myt:code&> - AddressUser.mapper = mapper( - AddressUser, - addresses, inherits=User.mapper, - inherit_condition=users.c.user_id==addresses.c.user_id - ) -</&> -</&> - -<&|doclib.myt:item, name="joins", description="Mapping a Class against Multiple Tables" &> - <P>The more general case of the pattern described in "table inheritance" is a mapper that maps against more than one table. The <span class="codeline">join</span> keyword from the SQL package creates a neat selectable unit comprised of multiple tables, complete with its own composite primary key, which can be passed in to a mapper as the table.</p> - <&|formatting.myt:code&> - # a class - class AddressUser(object): - pass - - # define a Join - j = join(users, addresses) - - # map to it - the identity of an AddressUser object will be - # based on (user_id, address_id) since those are the primary keys involved - m = mapper(AddressUser, j) - </&> - - A second example: - <&|formatting.myt:code&> - # many-to-many join on an association table - j = join(users, userkeywords, - users.c.user_id==userkeywords.c.user_id).join(keywords, - userkeywords.c.keyword_id==keywords.c.keyword_id) - - # a class - class KeywordUser(object): - pass - - # map to it - the identity of a KeywordUser object will be - # (user_id, keyword_id) since those are the primary keys involved - m = mapper(KeywordUser, j) - </&> -</&> -<&|doclib.myt:item, name="selects", description="Mapping a Class against Arbitary Selects" &> -<p>Similar to mapping against a join, a plain select() object can be used with a mapper as well. Below, an example select which contains two aggregate functions and a group_by is mapped to a class:</p> - <&|formatting.myt:code&> - s = select([customers, - func.count(orders).label('order_count'), - func.max(orders.price).label('highest_order')], - customers.c.customer_id==orders.c.customer_id, - group_by=[c for c in customers.c] - ) - class Customer(object): - pass - - mapper = mapper(Customer, s) - </&> -<p>Above, the "customers" table is joined against the "orders" table to produce a full row for each customer row, the total count of related rows in the "orders" table, and the highest price in the "orders" table, grouped against the full set of columns in the "customers" table. That query is then mapped against the Customer class. New instances of Customer will contain attributes for each column in the "customers" table as well as an "order_count" and "highest_order" attribute. Updates to the Customer object will only be reflected in the "customers" table and not the "orders" table. This is because the primary keys of the "orders" table are not represented in this mapper and therefore the table is not affected by save or delete operations.</p> -</&> -<&|doclib.myt:item, name="multiple", description="Multiple Mappers for One Class" &> - <p>By now it should be apparent that the mapper defined for a class is in no way the only mapper that exists for that class. Other mappers can be created at any time; either explicitly or via the <span class="codeline">options</span> method, to provide different loading behavior.</p> - - <p>However, its not as simple as that. The mapper serves a dual purpose; one is to generate select statements and load objects from executing those statements; the other is to keep track of the defined dependencies of that object when save and delete operations occur, and to extend the attributes of the object so that they store information about their history and communicate with the unit of work system. For this reason, it is a good idea to be aware of the behavior of multiple mappers. When creating dependency relationships between objects, one should insure that only the primary mappers are used in those relationships, else deep object traversal operations will fail to load in the expected properties, and update operations will not take all the dependencies into account. </p> - - <p>Generally its as simple as, the <i>first</i> mapper that is defined for a particular class is the one that gets to define that classes' relationships to other mapped classes, and also decorates its attributes and constructors with special behavior. Any subsequent mappers created for that class will be able to load new instances, but object manipulation operations will still function via the original mapper. The special keyword <span class="codeline">is_primary</span> will override this behavior, and make any mapper the new "primary" mapper. - </p> - <&|formatting.myt:code&> - class User(object): - pass - - # mapper one - mark it as "primary", meaning this mapper will handle - # saving and class-level properties - m1 = mapper(User, users, is_primary=True) - - # mapper two - this one will also eager-load address objects in - m2 = mapper(User, users, properties={ - 'addresses' : relation(mapper(Address, addresses), lazy=False) - }) - - # get a user. this user will not have an 'addreses' property - u1 = m1.select(User.c.user_id==10) - - # get another user. this user will have an 'addreses' property. - u2 = m2.select(User.c.user_id==27) - - # make some modifications, including adding an Address object. - u1.user_name = 'jack' - u2.user_name = 'jane' - u2.addresses.append(Address('123 green street')) - - # upon commit, the User objects will be saved. - # the Address object will not, since the primary mapper for User - # does not have an 'addresses' relationship defined - objectstore.commit() - </&> -</&> -<&|doclib.myt:item, name="circular", description="Circular Mapping" &> -<p>Oftentimes it is necessary for two mappers to be related to each other. With a datamodel that consists of Users that store Addresses, you might have an Address object and want to access the "user" attribute on it, or have a User object and want to get the list of Address objects. The easiest way to do this is via the <span class="codeline">backreference</span> keyword described in <&formatting.myt:link, path="datamapping_relations_backreferences"&>. Although even when backreferences are used, it is sometimes necessary to explicitly specify the relations on both mappers pointing to each other.</p> -<p>To achieve this involves creating the first mapper by itself, then creating the second mapper referencing the first, then adding references to the first mapper to reference the second:</p> -<&|formatting.myt:code&> - class User(object): - pass - class Address(object): - pass - User.mapper = mapper(User, users) - Address.mapper = mapper(Address, addresses, properties={ - 'user':relation(User.mapper) - }) - User.mapper.add_property('addresses', relation(Address.mapper)) -</&> -<p>Note that with a circular relationship as above, you cannot declare both relationships as "eager" relationships, since that produces a circular query situation which will generate a recursion exception. So what if you want to load an Address and its User eagerly? Just make a second mapper using options: -<&|formatting.myt:code&> - eagermapper = Address.mapper.options(eagerload('user')) - s = eagermapper.select(Address.c.address_id==12) -</&> -</&> -<&|doclib.myt:item, name="recursive", description="Self Referential Mappers" &> -<p>A self-referential mapper is a mapper that is designed to operate with an <b>adjacency list</b> table. This is a table that contains one or more foreign keys back to itself, and is usually used to create hierarchical tree structures. SQLAlchemy's default model of saving items based on table dependencies is not sufficient in this case, as an adjacency list table introduces dependencies between individual rows. Fortunately, SQLAlchemy will automatically detect a self-referential mapper and do the extra lifting to make it work. </p> - <&|formatting.myt:code&> - # define a self-referential table - trees = Table('treenodes', engine, - Column('node_id', Integer, primary_key=True), - Column('parent_node_id', Integer, ForeignKey('treenodes.node_id'), nullable=True), - Column('node_name', String(50), nullable=False), - ) - - # treenode class - class TreeNode(object): - pass - - # mapper defines "children" property, pointing back to TreeNode class, - # with the mapper unspecified. it will point back to the primary - # mapper on the TreeNode class. - TreeNode.mapper = mapper(TreeNode, trees, properties={ - 'children' : relation( - TreeNode, - private=True - ), - } - ) - - # or, specify the circular relationship after establishing the original mapper: - mymapper = mapper(TreeNode, trees) - - mymapper.add_property('children', relation( - mymapper, - private=True - )) - - </&> - <p>This kind of mapper goes through a lot of extra effort when saving and deleting items, to determine the correct dependency graph of nodes within the tree.</p> - - <p>A self-referential mapper where there is more than one relationship on the table requires that all join conditions be explicitly spelled out. Below is a self-referring table that contains a "parent_node_id" column to reference parent/child relationships, and a "root_node_id" column which points child nodes back to the ultimate root node:</p> - <&|formatting.myt:code&> - # define a self-referential table with several relations - trees = Table('treenodes', engine, - Column('node_id', Integer, primary_key=True), - Column('parent_node_id', Integer, ForeignKey('treenodes.node_id'), nullable=True), - Column('root_node_id', Integer, ForeignKey('treenodes.node_id'), nullable=True), - Column('node_name', String(50), nullable=False), - ) - - # treenode class - class TreeNode(object): - pass - - # define the "children" property as well as the "root" property - TreeNode.mapper = mapper(TreeNode, trees, properties={ - 'children' : relation( - TreeNode, - primaryjoin=trees.c.parent_node_id==trees.c.node_id - private=True - ), - 'root' : relation( - TreeNode, - primaryjoin=trees.c.root_node_id=trees.c.node_id, - foreignkey=trees.c.node_id, - uselist=False - ) - } - ) - </&> -<p>The "root" property on a TreeNode is a many-to-one relationship. By default, a self-referential mapper declares relationships as one-to-many, so the extra parameter <span class="codeline">foreignkey</span>, pointing to the "many" side of a relationship, is needed to indicate a "many-to-one" self-referring relationship.</p> -<p>Both TreeNode examples above are available in functional form in the <span class="codeline">examples/adjacencytree</span> directory of the distribution.</p> -</&> -<&|doclib.myt:item, name="resultset", description="Result-Set Mapping" &> - <p>Take any result set and feed it into a mapper to produce objects. Multiple mappers can be combined to retrieve unrelated objects from the same row in one step. The <span class="codeline">instances</span> method on mapper takes a ResultProxy object, which is the result type generated from SQLEngine, and delivers object instances.</p> - <&|formatting.myt:code, title="single object"&> - class User(object): - pass - - User.mapper = mapper(User, users) - - # select users - c = users.select().execute() - - # get objects - userlist = User.mapper.instances(c) - </&> - - <&|formatting.myt:code, title="multiple objects"&> - # define a second class/mapper - class Address(object): - pass - - Address.mapper = mapper(Address, addresses) - - # select users and addresses in one query - s = select([users, addresses], users.c.user_id==addresses.c.user_id) - - # execute it, and process the results with the User mapper, chained to the Address mapper - r = User.mapper.instances(s.execute(), Address.mapper) - - # result rows are an array of objects, one for each mapper used - for entry in r: - user = r[0] - address = r[1] - </&> -</&> -<&|doclib.myt:item, name="arguments", description="Mapper Arguments" &> -<p>Other arguments not covered above include:</p> -<ul> - <li>version_id_col=None - an integer-holding Column object that will be assigned an incrementing - counter, which is added to the WHERE clause used by UPDATE and DELETE statements. The matching row - count returned by the database is compared to the expected row count, and an exception is raised if they dont match. This is a basic "optimistic concurrency" check. Without the version id column, SQLAlchemy still compares the updated rowcount.</li> - <li>always_refresh=False - this option will cause the mapper to refresh all the attributes of all objects loaded by select/get statements, regardless of if they already exist in the current session. this includes all lazy- and eager-loaded relationship attributes, and will also overwrite any changes made to attributes on the column.</li> - <li>entity_name=None - this is an optional "entity name" that will be appended to the key used to associate classes to this mapper. What this basically means is, several primary mappers can be made against the same class by using different entity names; object instances will have the entity name tagged to them, so that all operations will occur on them relative to that mapper. When instantiating new objects, use <code>_sa_entity='name'</code> to tag them to the appropriate mapper.</li> -</ul> -</&> -<&|doclib.myt:item, name="extending", description="Extending Mapper" &> -<p>Mappers can have functionality augmented or replaced at many points in its execution via the usage of the MapperExtension class. This class is just a series of "hooks" where various functionality takes place. An application can make its own MapperExtension objects, overriding only the methods it needs. - <&|formatting.myt:code&> - class MapperExtension(object): - def create_instance(self, mapper, row, imap, class_): - """called when a new object instance is about to be created from a row. - the method can choose to create the instance itself, or it can return - None to indicate normal object creation should take place. - - mapper - the mapper doing the operation - row - the result row from the database - imap - a dictionary that is storing the running set of objects collected from the - current result set - class_ - the class we are mapping. - """ - def append_result(self, mapper, row, imap, result, instance, isnew, populate_existing=False): - """called when an object instance is being appended to a result list. - - If it returns True, it is assumed that this method handled the appending itself. - - mapper - the mapper doing the operation - row - the result row from the database - imap - a dictionary that is storing the running set of objects collected from the - current result set - result - an instance of util.HistoryArraySet(), which may be an attribute on an - object if this is a related object load (lazy or eager). use result.append_nohistory(value) - to append objects to this list. - instance - the object instance to be appended to the result - isnew - indicates if this is the first time we have seen this object instance in the current result - set. if you are selecting from a join, such as an eager load, you might see the same object instance - many times in the same result set. - populate_existing - usually False, indicates if object instances that were already in the main - identity map, i.e. were loaded by a previous select(), get their attributes overwritten - """ - def before_insert(self, mapper, instance): - """called before an object instance is INSERTed into its table. - - this is a good place to set up primary key values and such that arent handled otherwise.""" - def after_insert(self, mapper, instance): - """called after an object instance has been INSERTed""" - def before_delete(self, mapper, instance): - """called before an object instance is DELETEed""" - - </&> - <p>To use MapperExtension, make your own subclass of it and just send it off to a mapper:</p> - <&|formatting.myt:code&> - mapper = mapper(User, users, extension=MyExtension()) - </&> - <p>An existing mapper can create a copy of itself using an extension via the <span class="codeline">extension</span> option: - <&|formatting.myt:code&> - extended_mapper = mapper.options(extension(MyExtension())) - </&> - -</&> -<&|doclib.myt:item, name="class", description="How Mapper Modifies Mapped Classes" &> -<p>This section is a quick summary of what's going on when you send a class to the <span class="codeline">mapper()</span> function. This material, not required to be able to use SQLAlchemy, is a little more dense and should be approached patiently!</p> - -<p>The primary changes to a class that is mapped involve attaching property objects to it which represent table columns. These property objects essentially track changes. In addition, the __init__ method of the object is decorated to track object creates.</p> -<p>Here is a quick rundown of all the changes in code form: - <&|formatting.myt:code&> - # step 1 - override __init__ to 'register_new' with the Unit of Work - oldinit = myclass.__init__ - def init(self, *args, **kwargs): - nohist = kwargs.pop('_mapper_nohistory', False) - oldinit(self, *args, **kwargs) - if not nohist: - # register_new with Unit Of Work - objectstore.uow().register_new(self) - myclass.__init__ = init - - # step 2 - set a string identifier that will - # locate the classes' primary mapper - myclass._mapper = mapper.hashkey - - # step 3 - add column accessor - myclass.c = mapper.columns - - # step 4 - attribute decorating. - # this happens mostly within the package sqlalchemy.attributes - - # this dictionary will store a series of callables - # that generate "history" containers for - # individual object attributes - myclass._class_managed_attributes = {} - - # create individual properties for each column - - # these objects know how to talk - # to the attribute package to create appropriate behavior. - # the next example examines the attributes package more closely. - myclass.column1 = SmartProperty().property('column1', uselist=False) - myclass.column2 = SmartProperty().property('column2', uselist=True) - </&> -<p>The attribute package is used when save operations occur to get a handle on modified values. In the example below, -a full round-trip attribute tracking operation is illustrated:</p> -<&|formatting.myt:code&> - import sqlalchemy.attributes as attributes - - # create an attribute manager. - # the sqlalchemy.mapping package keeps one of these around as - # 'objectstore.global_attributes' - manager = attributes.AttributeManager() - - # regular old new-style class - class MyClass(object): - pass - - # register a scalar and a list attribute - manager.register_attribute(MyClass, 'column1', uselist=False) - manager.register_attribute(MyClass, 'column2', uselist=True) - - # create/modify an object - obj = MyClass() - obj.column1 = 'this is a new value' - obj.column2.append('value 1') - obj.column2.append('value 2') - - # get history objects - col1_history = manager.get_history(obj, 'column1') - col2_history = manager.get_history(obj, 'column2') - - # whats new ? - >>> col1_history.added_items() - ['this is a new value'] - - >>> col2_history.added_items() - ['value1', 'value2'] - - # commit changes - manager.commit(obj) - - # the new values become the "unchanged" values - >>> col1_history.added_items() - [] - - >>> col1_history.unchanged_items() - ['this is a new value'] - - >>> col2_history.added_items() - [] - - >>> col2_history.unchanged_items() - ['value1', 'value2'] -</&> -<p>The above AttributeManager also includes a method <span class="codeline">value_changed</span> which is triggered whenever change events occur on the managed object attributes. The Unit of Work (objectstore) package overrides this method in order to receive change events; its essentially this:</p> -<&|formatting.myt:code&> - import sqlalchemy.attributes as attributes - class UOWAttributeManager(attributes.AttributeManager): - def value_changed(self, obj, key, value): - if hasattr(obj, '_instance_key'): - uow().register_dirty(obj) - else: - uow().register_new(obj) - - global_attributes = UOWAttributeManager() -</&> -<p>Objects that contain the attribute "_instance_key" are already registered with the Identity Map, and are assumed to have come from the database. They therefore get marked as "dirty" when changes happen. Objects without an "_instance_key" are not from the database, and get marked as "new" when changes happen, although usually this will already have occured via the object's __init__ method.</p> -</&> -</&> diff --git a/doc/build/content/adv_datamapping.txt b/doc/build/content/adv_datamapping.txt new file mode 100644 index 000000000..3b027ab28 --- /dev/null +++ b/doc/build/content/adv_datamapping.txt @@ -0,0 +1,802 @@ +[alpha_api]: javascript:alphaApi() +[alpha_implementation]: javascript:alphaImplementation() + +Advanced Data Mapping {@name=advdatamapping} +====================== + +This section details all the options available to Mappers, as well as advanced patterns. + +To start, heres the tables we will work with again: + + {python} + from sqlalchemy import * + + metadata = MetaData() + + # a table to store users + users_table = Table('users', metadata, + Column('user_id', Integer, primary_key = True), + Column('user_name', String(40)), + Column('password', String(80)) + ) + + # a table that stores mailing addresses associated with a specific user + addresses_table = Table('addresses', metadata, + Column('address_id', Integer, primary_key = True), + Column('user_id', Integer, ForeignKey("users.user_id")), + Column('street', String(100)), + Column('city', String(80)), + Column('state', String(2)), + Column('zip', String(10)) + ) + + # a table that stores keywords + keywords_table = Table('keywords', metadata, + Column('keyword_id', Integer, primary_key = True), + Column('name', VARCHAR(50)) + ) + + # a table that associates keywords with users + userkeywords_table = Table('userkeywords', metadata, + Column('user_id', INT, ForeignKey("users")), + Column('keyword_id', INT, ForeignKey("keywords")) + ) + + +### More On Mapper Properties {@name=properties} + +#### Overriding Column Names {@name=colname} + +When mappers are constructed, by default the column names in the Table metadata are used as the names of attributes on the mapped class. This can be customzed within the properties by stating the key/column combinations explicitly: + + {python} + user_mapper = mapper(User, users_table, properties={ + 'id' : users_table.c.user_id, + 'name' : users_table.c.user_name, + }) + +In the situation when column names overlap in a mapper against multiple tables, columns may be referenced together with a list: + + {python} + # join users and addresses + usersaddresses = sql.join(users_table, addresses_table, users_table.c.user_id == addresses_table.c.user_id) + m = mapper(User, usersaddresses, + properties = { + 'id' : [users_table.c.user_id, addresses_table.c.user_id], + } + ) + +#### Overriding Properties {@name=overriding} + +A common request is the ability to create custom class properties that override the behavior of setting/getting an attribute. Currently, the easiest way to do this in SQLAlchemy is how it would be done in any Python program; define your attribute with a different name, such as "_attribute", and use a property to get/set its value. The mapper just needs to be told of the special name: + + {python} + class MyClass(object): + def _set_email(self, email): + self._email = email + def _get_email(self, email): + return self._email + email = property(_get_email, _set_email) + + mapper(MyClass, mytable, properties = { + # map the '_email' attribute to the "email" column + # on the table + '_email': mytable.c.email + }) + +In a later release, SQLAlchemy will also allow `_get_email` and `_set_email` to be attached directly to the "email" property created by the mapper, and +will also allow this association to occur via decorators. + + +#### Custom List Classes {@name=customlist} + +Feature Status: [Alpha API][alpha_api] + +A one-to-many or many-to-many relationship results in a list-holding element being attached to all instances of a class. Currently, this list is an instance of `sqlalchemy.util.HistoryArraySet`, is a `UserDict` instance that *decorates* an underlying list object. The implementation of this list can be controlled, and can in fact be any object that implements a `list`-style `append` and `__iter__` method. A common need is for a list-based relationship to actually be a dictionary. This can be achieved by subclassing `dict` to have `list`-like behavior. + +In this example, a class `MyClass` is defined, which is associated with a parent object `MyParent`. The collection of `MyClass` objects on each `MyParent` object will be a dictionary, storing each `MyClass` instance keyed to its `name` attribute. + + {python} + # a class to be stored in the list + class MyClass(object): + def __init__(self, name): + self.name = name + + # create a dictionary that will act like a list, and store + # instances of MyClass + class MyDict(dict): + def append(self, item): + self[item.name] = item + def __iter__(self): + return self.values() + + # parent class + class MyParent(object): + # this class-level attribute provides the class to be + # used by the 'myclasses' attribute + myclasses = MyDict + + # mappers, constructed normally + mapper(MyClass, myclass_table) + mapper(MyParent, myparent_table, properties={ + 'myclasses' : relation(MyClass) + }) + + # elements on 'myclasses' can be accessed via string keyname + myparent = MyParent() + myparent.myclasses.append(MyClass('this is myclass')) + myclass = myparent.myclasses['this is myclass'] + + +#### Custom Join Conditions {@name=customjoin} + +When creating relations on a mapper, most examples so far have illustrated the mapper and relationship joining up based on the foreign keys of the tables they represent. in fact, this "automatic" inspection can be completely circumvented using the `primaryjoin` and `secondaryjoin` arguments to `relation`, as in this example which creates a User object which has a relationship to all of its Addresses which are in Boston: + + {python} + class User(object): + pass + class Address(object): + pass + + mapper(Address, addresses_table) + mapper(User, users_table, properties={ + 'boston_addreses' : relation(Address, primaryjoin= + and_(users_table.c.user_id==Address.c.user_id, + Addresses.c.city=='Boston')) + }) + +Many to many relationships can be customized by one or both of `primaryjoin` and `secondaryjoin`, shown below with just the default many-to-many relationship explicitly set: + + {python} + class User(object): + pass + class Keyword(object): + pass + mapper(Keyword, keywords_table) + mapper(User, users_table, properties={ + 'keywords':relation(Keyword, secondary=userkeywords_table + primaryjoin=users_table.c.user_id==userkeywords_table.c.user_id, + secondaryjoin=userkeywords_table.c.keyword_id==keywords_table.c.keyword_id + ) + }) + +#### Lazy/Eager Joins Multiple Times to One Table {@name=multiplejoin} + +The previous example leads in to the idea of joining against the same table multiple times. Below is a User object that has lists of its Boston and New York addresses: + + {python} + mapper(User, users_table, properties={ + 'boston_addreses' : relation(Address, primaryjoin= + and_(users_table.c.user_id==Address.c.user_id, + Addresses.c.city=='Boston')), + 'newyork_addresses' : relation(Address, primaryjoin= + and_(users_table.c.user_id==Address.c.user_id, + Addresses.c.city=='New York')), + }) + +Both lazy and eager loading support multiple joins equally well. + +#### Deferred Column Loading {@name=deferred} + +This feature allows particular columns of a table to not be loaded by default, instead being loaded later on when first referenced. It is essentailly "column-level lazy loading". This feature is useful when one wants to avoid loading a large text or binary field into memory when its not needed. Individual columns can be lazy loaded by themselves or placed into groups that lazy-load together. + + {python} + book_excerpts = Table('books', db, + Column('book_id', Integer, primary_key=True), + Column('title', String(200), nullable=False), + Column('summary', String(2000)), + Column('excerpt', String), + Column('photo', Binary) + ) + + class Book(object): + pass + + # define a mapper that will load each of 'excerpt' and 'photo' in + # separate, individual-row SELECT statements when each attribute + # is first referenced on the individual object instance + mapper(Book, book_excerpts, properties = { + 'excerpt' : deferred(book_excerpts.c.excerpt), + 'photo' : deferred(book_excerpts.c.photo) + }) + +Deferred columns can be placed into groups so that they load together: + + {python} + book_excerpts = Table('books', db, + Column('book_id', Integer, primary_key=True), + Column('title', String(200), nullable=False), + Column('summary', String(2000)), + Column('excerpt', String), + Column('photo1', Binary), + Column('photo2', Binary), + Column('photo3', Binary) + ) + + class Book(object): + pass + + # define a mapper with a 'photos' deferred group. when one photo is referenced, + # all three photos will be loaded in one SELECT statement. The 'excerpt' will + # be loaded separately when it is first referenced. + mapper(Book, book_excerpts, properties = { + 'excerpt' : deferred(book_excerpts.c.excerpt), + 'photo1' : deferred(book_excerpts.c.photo1, group='photos'), + 'photo2' : deferred(book_excerpts.c.photo2, group='photos'), + 'photo3' : deferred(book_excerpts.c.photo3, group='photos') + }) + +#### Relation Options {@name=relationoptions} + +Keyword options to the `relation` function include: + +* lazy=(True|False|None) - specifies how the related items should be loaded. a value of True indicates they should be loaded when the property is first accessed. A value of False indicates they should be loaded by joining against the parent object query, so parent and child are loaded in one round trip. A value of None indicates the related items are not loaded by the mapper in any case; the application will manually insert items into the list in some other way. A relationship with lazy=None is still important; items added to the list or removed will cause the appropriate updates and deletes upon flush(). Future capabilities for lazy might also include "lazy='extra'", which would allow lazy loading of child elements one at a time, for very large collections. +* cascade - a string list of **cascade rules** which determines how persistence operations should be "cascaded" from parent to child. For a description of cascade rules, see [datamapping_relations_cycle](rel:datamapping_relations_lifecycle) and [unitofwork_cascade](rel:unitofwork_cascade). +* secondary - for a many-to-many relationship, specifies the intermediary table. +* primaryjoin - a ClauseElement that will be used as the primary join of this child object against the parent object, or in a many-to-many relationship the join of the primary object to the association table. By default, this value is computed based on the foreign key relationships of the parent and child tables (or association table). +* secondaryjoin - a ClauseElement that will be used as the join of an association table to the child object. By default, this value is computed based on the foreign key relationships of the association and child tables. +* foreignkey - specifies which column in this relationship is "foreign", i.e. which column refers to the parent object. This value is automatically determined in most cases based on the primary and secondary join conditions, except in the case of a self-referential mapper, where it is needed to indicate the child object's reference back to it's parent, or in the case where the join conditions do not represent any primary key columns to properly represent the direction of the relationship. +* uselist - a boolean that indicates if this property should be loaded as a list or a scalar. In most cases, this value is determined based on the type and direction of the relationship - one to many forms a list, many to one forms a scalar, many to many is a list. If a scalar is desired where normally a list would be present, such as a bi-directional one-to-one relationship, set uselist to False. +* private - setting `private=True` is the equivalent of setting `cascade="all, delete-orphan"`, and indicates the lifecycle of child objects should be contained within that of the parent. See the example in [datamapping_relations_cycle](rel:datamapping_relations_lifecycle). +* backref - indicates the name of a property to be placed on the related mapper's class that will handle this relationship in the other direction, including synchronizing the object attributes on both sides of the relation. Can also point to a `backref()` construct for more configurability. See [datamapping_relations_backreferences](rel:datamapping_relations_backreferences). +* order_by - indicates the ordering that should be applied when loading these items. See the section [advdatamapping_orderby](rel:advdatamapping_orderby) for details. +* association - When specifying a many to many relationship with an association object, this keyword should reference the mapper or class of the target object of the association. See the example in [datamapping_association](rel:datamapping_association). +* post_update - this indicates that the relationship should be handled by a second UPDATE statement after an INSERT, or before a DELETE. using this flag essentially means the relationship will not incur any "dependency" between parent and child item, as the particular foreign key relationship between them is handled by a second statement. use this flag when a particular mapping arrangement will incur two rows that are dependent on each other, such as a table that has a one-to-many relationship to a set of child rows, and also has a column that references a single child row within that list (i.e. both tables contain a foreign key to each other). If a flush() operation returns an error that a "cyclical dependency" was detected, this is a cue that you might want to use post_update. + +### Controlling Ordering {@name=orderby} + +By default, mappers will attempt to ORDER BY the "oid" column of a table, or the primary key column, when selecting rows. This can be modified in several ways. + +The "order_by" parameter can be sent to a mapper, overriding the per-engine ordering if any. A value of None means that the mapper should not use any ordering. A non-None value, which can be a column, an `asc` or `desc` clause, or an array of either one, indicates the ORDER BY clause that should be added to all select queries: + + {python} + # disable all ordering + mapper = mapper(User, users_table, order_by=None) + + # order by a column + mapper = mapper(User, users_table, order_by=users_tableusers_table.c.user_id) + + # order by multiple items + mapper = mapper(User, users_table, order_by=[users_table.c.user_id, desc(users_table.c.user_name)]) + +"order_by" can also be specified to an individual `select` method, overriding all other per-engine/per-mapper orderings: + + {python} + # order by a column + l = mapper.select(users_table.c.user_name=='fred', order_by=users_table.c.user_id) + + # order by multiple criterion + l = mapper.select(users_table.c.user_name=='fred', order_by=[users_table.c.user_id, desc(users_table.c.user_name)]) + +For relations, the "order_by" property can also be specified to all forms of relation: + + {python} + # order address objects by address id + mapper = mapper(User, users_table, properties = { + 'addresses' : relation(mapper(Address, addresses_table), order_by=addresses_table.c.address_id) + }) + + # eager load with ordering - the ORDER BY clauses of parent/child will be organized properly + mapper = mapper(User, users_table, properties = { + 'addresses' : relation(mapper(Address, addresses_table), order_by=desc(addresses_table.c.email_address), eager=True) + }, order_by=users_table.c.user_id) + +### Limiting Rows {@name=limits} + +You can limit rows in a regular SQL query by specifying `limit` and `offset`. A Mapper can handle the same concepts: + + {python} + class User(object): + pass + + mapper(User, users_table) + {sql}r = session.query(User).select(limit=20, offset=10) + SELECT users.user_id AS users_user_id, + users.user_name AS users_user_name, users.password AS users_password + FROM users ORDER BY users.oid + LIMIT 20 OFFSET 10 + {} + +However, things get tricky when dealing with eager relationships, since a straight LIMIT of rows does not represent the count of items when joining against other tables to load related items as well. So here is what SQLAlchemy will do when you use limit or offset with an eager relationship: + + {python} + class User(object): + pass + class Address(object): + pass + mapper(User, users_table, properties={ + 'addresses' : relation(mapper(Address, addresses_table), lazy=False) + }) + r = session.query(User).select(User.c.user_name.like('F%'), limit=20, offset=10) + {opensql}SELECT users.user_id AS users_user_id, users.user_name AS users_user_name, + users.password AS users_password, addresses.address_id AS addresses_address_id, + addresses.user_id AS addresses_user_id, addresses.street AS addresses_street, + addresses.city AS addresses_city, addresses.state AS addresses_state, + addresses.zip AS addresses_zip + FROM + (SELECT users.user_id FROM users WHERE users.user_name LIKE %(users_user_name)s + ORDER BY users.oid LIMIT 20 OFFSET 10) AS rowcount, + users LEFT OUTER JOIN addresses ON users.user_id = addresses.user_id + WHERE rowcount.user_id = users.user_id ORDER BY users.oid, addresses.oid + {'users_user_name': 'F%'} + +The main WHERE clause as well as the limiting clauses are coerced into a subquery; this subquery represents the desired result of objects. A containing query, which handles the eager relationships, is joined against the subquery to produce the result. + +### More on Mapper Options {@name=options} + +The `options` method on the `Query` object, first introduced in [datamapping_relations_options](rel:datamapping_relations_options), produces a new `Query` object by creating a copy of the underlying `Mapper` and placing modified properties on it. The `options` method is also directly available off the `Mapper` object itself, so that the newly copied `Mapper` can be dealt with directly. The `options` method takes a variable number of `MapperOption` objects which know how to change specific things about the mapper. The five available options are `eagerload`, `lazyload`, `noload`, `deferred` and `extension`. + +An example of a mapper with a lazy load relationship, upgraded to an eager load relationship: + + {python} + class User(object): + pass + class Address(object): + pass + + # a 'lazy' relationship + mapper(User, users_table, properties = { + 'addreses':relation(mapper(Address, addresses_table), lazy=True) + }) + + # copy the mapper and convert 'addresses' to be eager + eagermapper = class_mapper(User).options(eagerload('addresses')) + +The `defer` and `undefer` options can control the deferred loading of attributes: + + {python} + # set the 'excerpt' deferred attribute to load normally + m = book_mapper.options(undefer('excerpt')) + + # set the referenced mapper 'photos' to defer its loading of the column 'imagedata' + m = book_mapper.options(defer('photos.imagedata')) + +### Mapping a Class with Table Inheritance {@name=inheritance} + +Feature Status: [Alpha Implementation][alpha_implementation] + +Inheritance in databases comes in three forms: *single table inheritance*, where several types of classes are stored in one table, *concrete table inheritance*, where each type of class is stored in its own table, and *multiple table inheritance*, where the parent/child classes are stored in their own tables that are joined together in a select. + +There is also a concept of `polymorphic` loading, which indicates if multiple kinds of classes can be loaded in one pass. + +SQLAlchemy supports all three kinds of inheritance. Additionally, true `polymorphic` loading is supported in a straightfoward way for single table inheritance, and has some more manually-configured features that can make it happen for concrete and multiple table inheritance. + +Working examples of polymorphic inheritance come with the distribution in the directory `examples/polymorphic`. + +Here are the classes we will use to represent an inheritance relationship: + + {python} + class Employee(object): + def __init__(self, name): + self.name = name + def __repr__(self): + return self.__class__.__name__ + " " + self.name + + class Manager(Employee): + def __init__(self, name, manager_data): + self.name = name + self.manager_data = manager_data + def __repr__(self): + return self.__class__.__name__ + " " + self.name + " " + self.manager_data + + class Engineer(Employee): + def __init__(self, name, engineer_info): + self.name = name + self.engineer_info = engineer_info + def __repr__(self): + return self.__class__.__name__ + " " + self.name + " " + self.engineer_info + +Each class supports a common `name` attribute, while the `Manager` class has its own attribute `manager_data` and the `Engineer` class has its own attribute `engineer_info`. + +#### Single Table Inheritance + +This will support polymorphic loading via the `Employee` mapper. + + {python} + employees_table = Table('employees', metadata, + Column('employee_id', Integer, primary_key=True), + Column('name', String(50)), + Column('manager_data', String(50)), + Column('engineer_info', String(50)), + Column('type', String(20)) + ) + + employee_mapper = mapper(Employee, employees_table, polymorphic_on=employees_table.c.type) + manager_mapper = mapper(Manager, inherits=employee_mapper, polymorphic_identity='manager') + engineer_mapper = mapper(Engineer, inherits=employee_mapper, polymorphic_identity='engineer') + +#### Concrete Table Inheritance + +Without polymorphic loading, you just define a separate mapper for each class. + + {python title="Concrete Inheritance, Non-polymorphic"} + managers_table = Table('managers', metadata, + Column('employee_id', Integer, primary_key=True), + Column('name', String(50)), + Column('manager_data', String(50)), + ) + + engineers_table = Table('engineers', metadata, + Column('employee_id', Integer, primary_key=True), + Column('name', String(50)), + Column('engineer_info', String(50)), + ) + + manager_mapper = mapper(Manager, managers_table) + engineer_mapper = mapper(Engineer, engineers_table) + +With polymorphic loading, the SQL query to do the actual polymorphic load must be constructed, usually as a UNION. There is a helper function to create these UNIONS called `polymorphic_union`. + + {python title="Concrete Inheritance, Polymorphic"} + pjoin = polymorphic_union({ + 'manager':managers_table, + 'engineer':engineers_table + }, 'type', 'pjoin') + + employee_mapper = mapper(Employee, pjoin, polymorphic_on=pjoin.c.type) + manager_mapper = mapper(Manager, managers_table, inherits=employee_mapper, concrete=True, polymorphic_identity='manager') + engineer_mapper = mapper(Engineer, engineers_table, inherits=employee_mapper, concrete=True, polymorphic_identity='engineer') + +A future release of SQLALchemy might better merge the generated UNION into the mapper construction phase. + +#### Multiple Table Inheritance + +Like concrete table inheritance, this can be done non-polymorphically, or with a little more complexity, polymorphically: + + {python title="Multiple Table Inheritance, Non-polymorphic"} + people = Table('people', metadata, + Column('person_id', Integer, primary_key=True), + Column('name', String(50)), + Column('type', String(30))) + + engineers = Table('engineers', metadata, + Column('person_id', Integer, ForeignKey('people.person_id'), primary_key=True), + Column('engineer_info', String(50)), + ) + + managers = Table('managers', metadata, + Column('person_id', Integer, ForeignKey('people.person_id'), primary_key=True), + Column('manager_data', String(50)), + ) + + person_mapper = mapper(Person, people) + mapper(Engineer, engineers, inherits=person_mapper) + mapper(Manager, managers, inherits=person_mapper) + +Polymorphic: + + {python title="Multiple Table Inheritance, Polymorphic"} + person_join = polymorphic_union( + { + 'engineer':people.join(engineers), + 'manager':people.join(managers), + 'person':people.select(people.c.type=='person'), + }, None, 'pjoin') + + person_mapper = mapper(Person, people, select_table=person_join, polymorphic_on=person_join.c.type, polymorphic_identity='person') + mapper(Engineer, engineers, inherits=person_mapper, polymorphic_identity='engineer') + mapper(Manager, managers, inherits=person_mapper, polymorphic_identity='manager') + +The join condition in a multiple table inheritance relationship can be specified explicitly, using `inherit_condition`: + + {python} + AddressUser.mapper = mapper( + AddressUser, + addresses_table, inherits=User.mapper, + inherit_condition=users_table.c.user_id==addresses_table.c.user_id + ) + +### Mapping a Class against Multiple Tables {@name=joins} + +Mappers can be constructed against arbitrary relational units (called `Selectables`) as well as plain `Tables`. For example, The `join` keyword from the SQL package creates a neat selectable unit comprised of multiple tables, complete with its own composite primary key, which can be passed in to a mapper as the table. + + {python} + # a class + class AddressUser(object): + pass + + # define a Join + j = join(users_table, addresses_table) + + # map to it - the identity of an AddressUser object will be + # based on (user_id, address_id) since those are the primary keys involved + m = mapper(AddressUser, j) + + A second example: + {python} + # many-to-many join on an association table + j = join(users_table, userkeywords, + users_table.c.user_id==userkeywords.c.user_id).join(keywords, + userkeywords.c.keyword_id==keywords.c.keyword_id) + + # a class + class KeywordUser(object): + pass + + # map to it - the identity of a KeywordUser object will be + # (user_id, keyword_id) since those are the primary keys involved + m = mapper(KeywordUser, j) + +### Mapping a Class against Arbitary Selects {@name=selects} + +Similar to mapping against a join, a plain select() object can be used with a mapper as well. Below, an example select which contains two aggregate functions and a group_by is mapped to a class: + + {python} + s = select([customers, + func.count(orders).label('order_count'), + func.max(orders.price).label('highest_order')], + customers.c.customer_id==orders.c.customer_id, + group_by=[c for c in customers.c] + ) + class Customer(object): + pass + + m = mapper(Customer, s) + +Above, the "customers" table is joined against the "orders" table to produce a full row for each customer row, the total count of related rows in the "orders" table, and the highest price in the "orders" table, grouped against the full set of columns in the "customers" table. That query is then mapped against the Customer class. New instances of Customer will contain attributes for each column in the "customers" table as well as an "order_count" and "highest_order" attribute. Updates to the Customer object will only be reflected in the "customers" table and not the "orders" table. This is because the primary keys of the "orders" table are not represented in this mapper and therefore the table is not affected by save or delete operations. + +### Multiple Mappers for One Class {@name=multiple} + +The first mapper created for a certain class is known as that class's "primary mapper." Other mappers can be created as well, these come in two varieties. + +* **secondary mapper** - this is a mapper that must be constructed with the keyword argument `non_primary=True`, and represents a load-only mapper. Objects that are loaded with a secondary mapper will have their save operation processed by the primary mapper. It is also invalid to add new `relation()`s to a non-primary mapper. To use this mapper with the Session, specify it to the `query` method: + +example: + + {python} + # primary mapper + mapper(User, users_table) + + # make a secondary mapper to load User against a join + othermapper = mapper(User, users_table.join(someothertable), non_primary=True) + + # select + result = session.query(othermapper).select() + +* **entity name mapper** - this is a mapper that is a fully functioning primary mapper for a class, which is distinguished from the regular primary mapper by an `entity_name` parameter. Instances loaded with this mapper will be totally managed by this new mapper and have no connection to the original one. Most methods on `Session` include an optional `entity_name` parameter in order to specify this condition. + +example: + + {python} + # primary mapper + mapper(User, users_table) + + # make an entity name mapper that stores User objects in another table + mapper(User, alternate_users_table, entity_name='alt') + + # make two User objects + user1 = User() + user2 = User() + + # save one in in the "users" table + session.save(user1) + + # save the other in the "alternate_users_table" + session.save(user2, entity_name='alt') + + session.flush() + + # select from the alternate mapper + session.query(User, entity_name='alt').select() + +### Circular Mapping {@name=circular} + +Oftentimes it is necessary for two mappers to be related to each other. With a datamodel that consists of Users that store Addresses, you might have an Address object and want to access the "user" attribute on it, or have a User object and want to get the list of Address objects. The easiest way to do this is via the `backref` keyword described in [datamapping_relations_backreferences](rel:datamapping_relations_backreferences). Although even when backreferences are used, it is sometimes necessary to explicitly specify the relations on both mappers pointing to each other. +To achieve this involves creating the first mapper by itself, then creating the second mapper referencing the first, then adding references to the first mapper to reference the second: + + {python} + usermapper = mapper(User, users) + mapper(Address, addresses_table, properties={ + 'user':relation(User) + }) + + usermapper.add_property('addresses', relation(Address)) + +Note that with a circular relationship as above, you cannot declare both relationships as "eager" relationships, since that produces a circular query situation which will generate a recursion exception. So what if you want to load an Address and its User eagerly? Just use eager options: + + {python} + eagerquery = session.query(Address).options(eagerload('user')) + s = eagerquery.select(Address.c.address_id==12) + +### Self Referential Mappers {@name=recursive} + +A self-referential mapper is a mapper that is designed to operate with an <b>adjacency list</b> table. This is a table that contains one or more foreign keys back to itself, and is usually used to create hierarchical tree structures. SQLAlchemy's default model of saving items based on table dependencies is not sufficient in this case, as an adjacency list table introduces dependencies between individual rows. Fortunately, SQLAlchemy will automatically detect a self-referential mapper and do the extra lifting to make it work. + + {python} + # define a self-referential table + trees = Table('treenodes', engine, + Column('node_id', Integer, primary_key=True), + Column('parent_node_id', Integer, ForeignKey('treenodes.node_id'), nullable=True), + Column('node_name', String(50), nullable=False), + ) + + # treenode class + class TreeNode(object): + pass + + # mapper defines "children" property, pointing back to TreeNode class, + # with the mapper unspecified. it will point back to the primary + # mapper on the TreeNode class. + TreeNode.mapper = mapper(TreeNode, trees, properties={ + 'children' : relation( + TreeNode, + cascade="all, delete-orphan" + ), + } + ) + + # or, specify the circular relationship after establishing the original mapper: + mymapper = mapper(TreeNode, trees) + + mymapper.add_property('children', relation( + mymapper, + cascade="all, delete-orphan" + )) + +This kind of mapper goes through a lot of extra effort when saving and deleting items, to determine the correct dependency graph of nodes within the tree. + +A self-referential mapper where there is more than one relationship on the table requires that all join conditions be explicitly spelled out. Below is a self-referring table that contains a "parent_node_id" column to reference parent/child relationships, and a "root_node_id" column which points child nodes back to the ultimate root node: + + {python} + # define a self-referential table with several relations + trees = Table('treenodes', engine, + Column('node_id', Integer, primary_key=True), + Column('parent_node_id', Integer, ForeignKey('treenodes.node_id'), nullable=True), + Column('root_node_id', Integer, ForeignKey('treenodes.node_id'), nullable=True), + Column('node_name', String(50), nullable=False), + ) + + # treenode class + class TreeNode(object): + pass + + # define the "children" property as well as the "root" property + TreeNode.mapper = mapper(TreeNode, trees, properties={ + 'children' : relation( + TreeNode, + primaryjoin=trees.c.parent_node_id==trees.c.node_id + cascade="all, delete-orphan" + ), + 'root' : relation( + TreeNode, + primaryjoin=trees.c.root_node_id=trees.c.node_id, + foreignkey=trees.c.node_id, + uselist=False + ) + } + ) + +The "root" property on a TreeNode is a many-to-one relationship. By default, a self-referential mapper declares relationships as one-to-many, so the extra parameter `foreignkey`, pointing to the remote side of a relationship, is needed to indicate a "many-to-one" self-referring relationship. +Both TreeNode examples above are available in functional form in the `examples/adjacencytree` directory of the distribution. + +### Result-Set Mapping {@name=resultset} + +Take any result set and feed it into a mapper to produce objects. Multiple mappers can be combined to retrieve unrelated objects from the same row in one step. The `instances` method on mapper takes a ResultProxy object, which is the result type generated from SQLEngine, and delivers object instances. + + {python} + class User(object): + pass + + User.mapper = mapper(User, users_table) + + # select users + c = users_table.select().execute() + + # get objects + userlist = User.mapper.instances(c) + + {python} + # define a second class/mapper + class Address(object): + pass + + Address.mapper = mapper(Address, addresses_table) + + # select users and addresses in one query + s = select([users_table, addresses_table], users_table.c.user_id==addresses_table.c.user_id) + + # execute it, and process the results with the User mapper, chained to the Address mapper + r = User.mapper.instances(s.execute(), Address.mapper) + + # result rows are an array of objects, one for each mapper used + for entry in r: + user = r[0] + address = r[1] + +### Mapper Arguments {@name=arguments} + +Other arguments not covered above include: + +* select\_table=None - often used with polymorphic mappers, this is a `Selectable` which will take the place of the `Mapper`'s main table argument when performing queries. +* version\_id\_col=None - an integer-holding Column object that will be assigned an incrementing +counter, which is added to the WHERE clause used by UPDATE and DELETE statements. The matching row +count returned by the database is compared to the expected row count, and an exception is raised if they dont match. This is a basic "optimistic concurrency" check. Without the version id column, SQLAlchemy still compares the updated rowcount. +* always\_refresh=False - this option will cause the mapper to refresh all the attributes of all objects loaded by select/get statements, regardless of if they already exist in the current session. this includes all lazy- and eager-loaded relationship attributes, and will also overwrite any changes made to attributes on the column. + +### Extending Mapper {@name=extending} + +Mappers can have functionality augmented or replaced at many points in its execution via the usage of the MapperExtension class. This class is just a series of "hooks" where various functionality takes place. An application can make its own MapperExtension objects, overriding only the methods it needs. Methods that are not overridden return the special value `sqlalchemy.orm.mapper.EXT_PASS`, which indicates the operation should proceed as normally. + + {python} + class MapperExtension(object): + def select_by(self, query, *args, **kwargs): + """overrides the select_by method of the Query object""" + def select(self, query, *args, **kwargs): + """overrides the select method of the Query object""" + def create_instance(self, mapper, session, row, imap, class_): + """called when a new object instance is about to be created from a row. + the method can choose to create the instance itself, or it can return + None to indicate normal object creation should take place. + + mapper - the mapper doing the operation + + row - the result row from the database + + imap - a dictionary that is storing the running set of objects collected from the + current result set + + class_ - the class we are mapping. + """ + def append_result(self, mapper, session, row, imap, result, instance, isnew, populate_existing=False): + """called when an object instance is being appended to a result list. + + If this method returns True, it is assumed that the mapper should do the appending, else + if this method returns False, it is assumed that the append was handled by this method. + + mapper - the mapper doing the operation + + row - the result row from the database + + imap - a dictionary that is storing the running set of objects collected from the + current result set + + result - an instance of util.HistoryArraySet(), which may be an attribute on an + object if this is a related object load (lazy or eager). use result.append_nohistory(value) + to append objects to this list. + + instance - the object instance to be appended to the result + + isnew - indicates if this is the first time we have seen this object instance in the current result + set. if you are selecting from a join, such as an eager load, you might see the same object instance + many times in the same result set. + + populate_existing - usually False, indicates if object instances that were already in the main + identity map, i.e. were loaded by a previous select(), get their attributes overwritten + """ + def populate_instance(self, mapper, session, instance, row, identitykey, imap, isnew): + """called right before the mapper, after creating an instance from a row, passes the row + to its MapperProperty objects which are responsible for populating the object's attributes. + If this method returns True, it is assumed that the mapper should do the appending, else + if this method returns False, it is assumed that the append was handled by this method. + + Essentially, this method is used to have a different mapper populate the object: + + def populate_instance(self, mapper, session, instance, row, identitykey, imap, isnew): + othermapper.populate_instance(session, instance, row, identitykey, imap, isnew, frommapper=mapper) + return True + """ + def before_insert(self, mapper, connection, instance): + """called before an object instance is INSERTed into its table. + + this is a good place to set up primary key values and such that arent handled otherwise.""" + def before_update(self, mapper, connection, instance): + """called before an object instnace is UPDATED""" + def after_update(self, mapper, connection, instance): + """called after an object instnace is UPDATED""" + def after_insert(self, mapper, connection, instance): + """called after an object instance has been INSERTed""" + def before_delete(self, mapper, connection, instance): + """called before an object instance is DELETEed""" + def after_delete(self, mapper, connection, instance): + """called after an object instance is DELETEed""" + +To use MapperExtension, make your own subclass of it and just send it off to a mapper: + + {python} + m = mapper(User, users_table, extension=MyExtension()) + +Multiple extensions will be chained together and processed in order; they are specified as a list: + + {python} + m = mapper(User, users_table, extension=[ext1, ext2, ext3]) + diff --git a/doc/build/content/datamapping.txt b/doc/build/content/datamapping.txt index 35f8ccc3c..3722f5ac5 100644 --- a/doc/build/content/datamapping.txt +++ b/doc/build/content/datamapping.txt @@ -1,30 +1,37 @@ -Data Mapping +[alpha_api]: javascript:alphaApi() +[alpha_implementation]: javascript:alphaImplementation() + +Data Mapping {@name=datamapping} ============ ### Basic Data Mapping {@name=datamapping} Data mapping describes the process of defining *Mapper* objects, which associate table metadata with user-defined classes. -The Mapper's role is to perform SQL operations upon the database, associating individual table rows with instances of those classes, and individual database columns with properties upon those instances, to transparently associate in-memory objects with a persistent database representation. +The `Mapper`'s role is to perform SQL operations upon the database, associating individual table rows with instances of those classes, and individual database columns with properties upon those instances, to transparently associate in-memory objects with a persistent database representation. + +When a `Mapper` is created to associate a `Table` object with a class, all of the columns defined in the `Table` object are associated with the class via property accessors, which add overriding functionality to the normal process of setting and getting object attributes. These property accessors keep track of changes to object attributes; these changes will be stored to the database when the application "flushes" the current state of objects (known as a *Unit of Work*). + +Two objects provide the primary interface for interacting with Mappers and the "unit of work" in SA 0.2, which are the `Query` object and the `Session` object. `Query` deals with selecting objects from the database, whereas `Session` provides a context for loaded objects and the ability to communicate changes on those objects back to the database. -When a Mapper is created to associate a Table object with a class, all of the columns defined in the Table object are associated with the class via property accessors, which add overriding functionality to the normal process of setting and getting object attributes. These property accessors keep track of changes to object attributes; these changes will be stored to the database when the application "commits" the current transactional context (known as a *Unit of Work*). The `__init__()` method of the object is also decorated to communicate changes when new instances of the object are created. +The primary method on `Query` for loading objects is its `select()` method, which has similar arguments to a `sqlalchemy.sql.Select` object. But this select method executes automatically and returns results, instead of awaiting an execute() call. Instead of returning a cursor-like object, it returns an array of objects. -The Mapper also provides the interface by which instances of the object are loaded from the database. The primary method for this is its `select()` method, which has similar arguments to a `sqlalchemy.sql.Select` object. But this select method executes automatically and returns results, instead of awaiting an execute() call. Instead of returning a cursor-like object, it returns an array of objects. +The three configurational elements to be defined, i.e. the `Table` metadata, the user-defined class, and the `Mapper`, are typically defined as module-level variables, and may be defined in any fashion suitable to the application, with the only requirement being that the class and table metadata are described before the mapper. For the sake of example, we will be defining these elements close together, but this should not be construed as a requirement; since SQLAlchemy is not a framework, those decisions are left to the developer or an external framework. -The three elements to be defined, i.e. the Table metadata, the user-defined class, and the Mapper, are typically defined as module-level variables, and may be defined in any fashion suitable to the application, with the only requirement being that the class and table metadata are described before the mapper. For the sake of example, we will be defining these elements close together, but this should not be construed as a requirement; since SQLAlchemy is not a framework, those decisions are left to the developer or an external framework. +Also, keep in mind that the examples in this section deal with explicit `Session` objects mapped directly to `Engine` objects, which represents the most explicit style of using the ORM. Options exist for how this is configured, including binding `Table` objects directly to `Engines` (described in [metadata_tables_binding](rel:metadata_tables_binding)), as well as using the "Threadlocal" plugin which provides various code shortcuts by using an implicit Session associated to the current thread (described in [plugins_threadlocal](rel:plugins_threadlocal)). ### Synopsis {@name=synopsis} -This is the simplest form of a full "round trip" of creating table meta data, creating a class, mapping the class to the table, getting some results, and saving changes. For each concept, the following sections will dig in deeper to the available capabilities. +First, the metadata/mapper configuration code: {python} from sqlalchemy import * - # engine - engine = create_engine("sqlite://mydb.db") - - # table metadata - users = Table('users', engine, + # metadata + meta = MetaData() + + # table object + users_table = Table('users', meta, Column('user_id', Integer, primary_key=True), Column('user_name', String(16)), Column('password', String(20)) @@ -34,11 +41,21 @@ This is the simplest form of a full "round trip" of creating table meta data, cr class User(object): pass - # create a mapper - usermapper = mapper(User, users) + # create a mapper and associate it with the User class. + # technically we dont really need the 'usermapper' variable. + usermapper = mapper(User, users_table) + +Note that no database definitions are required. Next we will define an `Engine` and connect a `Session` to it, and perform a simple select: + + {python} + # engine + engine = create_engine("sqlite://mydb.db") + + # session + session = create_session(bind_to=engine) # select - {sql}user = usermapper.select_by(user_name='fred')[0] + {sql}user = session.query(User).select_by(user_name='fred')[0] SELECT users.user_id AS users_user_id, users.user_name AS users_user_name, users.password AS users_password FROM users @@ -48,151 +65,108 @@ This is the simplest form of a full "round trip" of creating table meta data, cr # modify user.user_name = 'fred jones' - # commit - saves everything that changed - {sql}objectstore.commit() + # flush - saves everything that changed + {sql}session.flush() UPDATE users SET user_name=:user_name WHERE users.user_id = :user_id [{'user_name': 'fred jones', 'user_id': 1}] - - -#### Attaching Mappers to their Class {@name=attaching} -For convenience's sake, the Mapper can be attached as an attribute on the class itself as well: +### The Query Object {@name=query} - {python} - User.mapper = mapper(User, users) - - userlist = User.mapper.select_by(user_id=12) - -There is also a full-blown "monkeypatch" function that creates a primary mapper, attaches the above mapper class property, and also the methods `get, get_by, select, select_by, selectone, selectfirst, commit, expire, refresh, expunge` and `delete`: +The method `session.query(class_or_mapper)` returns a `Query` object. Below is a synopsis of things you can do with `Query`: {python} - # "assign" a mapper to the User class/users table - assign_mapper(User, users) - - # methods are attached to the class for selecting - userlist = User.select_by(user_id=12) + # get a query from a Session based on class: + query = session.query(User) - myuser = User.get(1) + # get a query from a Session given a Mapper: + query = session.query(usermapper) - # mark an object as deleted for the next commit - myuser.delete() - - # commit the changes on a specific object - myotheruser.commit() - -Other methods of associating mappers and finder methods with their corresponding classes, such as via common base classes or mixins, can be devised as well. SQLAlchemy does not aim to dictate application architecture and will always allow the broadest variety of architectural patterns, but may include more helper objects and suggested architectures in the future. - -#### Overriding Properties {@name=overriding} - -A common request is the ability to create custom class properties that override the behavior of setting/getting an attribute. Currently, the easiest way to do this in SQLAlchemy is how it would be done in any Python program; define your attribute with a different name, such as "_attribute", and use a property to get/set its value. The mapper just needs to be told of the special name: - - {python} - class MyClass(object): - def _set_email(self, email): - self._email = email - def _get_email(self, email): - return self._email - email = property(_get_email, _set_email) - - m = mapper(MyClass, mytable, properties = { - # map the '_email' attribute to the "email" column - # on the table - '_email': mytable.c.email - }) - -In a later release, SQLAlchemy will also allow _get_email and _set_email to be attached directly to the "email" property created by the mapper, and will also allow this association to occur via decorators. + # select_by, which takes keyword arguments. the + # keyword arguments represent property names and the values + # represent values which will be compared via the = operator. + # the comparisons are joined together via "AND". + result = query.select_by(name='john', street='123 green street') -### Selecting from a Mapper {@name=selecting} - -There are a variety of ways to select from a mapper. These range from minimalist to explicit. Below is a synopsis of the these methods: - - {python} - # select_by, using property names or column names as keys - # the keys are grouped together by an AND operator - result = mapper.select_by(name='john', street='123 green street') - - # select_by can also combine SQL criterion with key/value properties - result = mapper.select_by(users.c.user_name=='john', - addresses.c.zip_code=='12345', street='123 green street') + # select_by can also combine ClauseElements with key/value properties. + # all ClauseElements and keyword-based criterion are combined together + # via "AND". + result = query.select_by(users_table.c.user_name=='john', + addresses_table.c.zip_code=='12345', street='123 green street') # get_by, which takes the same arguments as select_by # returns a single scalar result or None if no results - user = mapper.get_by(id=12) + user = query.get_by(id=12) # "dynamic" versions of select_by and get_by - everything past the # "select_by_" or "get_by_" is used as the key, and the function argument # as the value - result = mapper.select_by_name('fred') - u = mapper.get_by_name('fred') + result = query.select_by_name('fred') + u = query.get_by_name('fred') # get an object directly from its primary key. this will bypass the SQL # call if the object has already been loaded - u = mapper.get(15) + u = query.get(15) # get an object that has a composite primary key of three columns. # the order of the arguments matches that of the table meta data. - myobj = mapper.get(27, 3, 'receipts') + myobj = query.get((27, 3, 'receipts')) # using a WHERE criterion - result = mapper.select(or_(users.c.user_name == 'john', users.c.user_name=='fred')) + result = query.select(or_(users_table.c.user_name == 'john', users_table.c.user_name=='fred')) # using a WHERE criterion to get a scalar - u = mapper.selectfirst(users.c.user_name=='john') - + u = query.selectfirst(users_table.c.user_name=='john') + # selectone() is a stricter version of selectfirst() which # will raise an exception if there is not exactly one row - u = mapper.selectone(users.c.user_name=='john') + u = query.selectone(users_table.c.user_name=='john') # using a full select object - result = mapper.select(users.select(users.c.user_name=='john')) - - # using straight text - result = mapper.select_text("select * from users where user_name='fred'") + result = query.select(users_table.select(users_table.c.user_name=='john')) - # or using a "text" object - result = mapper.select(text("select * from users where user_name='fred'", engine=engine)) - Some of the above examples above illustrate the usage of the mapper's Table object to provide the columns for a WHERE Clause. These columns are also accessible off of the mapped class directly. When a mapper is assigned to a class, it also attaches a special property accessor `c` to the class itself, which can be used just like the table metadata to access the columns of the table: {python} - User.mapper = mapper(User, users) - - userlist = User.mapper.select(User.c.user_id==12) - + userlist = session.query(User).select(User.c.user_id==12) ### Saving Objects {@name=saving} -When objects corresponding to mapped classes are created or manipulated, all changes are logged by a package called `sqlalchemy.mapping.objectstore`. The changes are then written to the database when an application calls `objectstore.commit()`. This pattern is known as a *Unit of Work*, and has many advantages over saving individual objects or attributes on those objects with individual method invocations. Domain models can be built with far greater complexity with no concern over the order of saves and deletes, excessive database round-trips and write operations, or deadlocking issues. The commit() operation uses a transaction as well, and will also perform "concurrency checking" to insure the proper number of rows were in fact affected (not supported with the current MySQL drivers). Transactional resources are used effectively in all cases; the unit of work handles all the details. +When objects corresponding to mapped classes are created or manipulated, all changes are logged by the `Session` object. The changes are then written to the database when an application calls `flush()`. This pattern is known as a *Unit of Work*, and has many advantages over saving individual objects or attributes on those objects with individual method invocations. Domain models can be built with far greater complexity with no concern over the order of saves and deletes, excessive database round-trips and write operations, or deadlocking issues. The `flush()` operation batches its SQL statements into a transaction, and can also perform optimistic concurrency checks (using a version id column) to insure the proper number of rows were in fact affected (not supported with the current MySQL drivers). -The Unit of Work is a powerful tool, and has some important concepts that must be understood in order to use it effectively. While this section illustrates rudimentary Unit of Work usage, it is strongly encouraged to consult the [unitofwork](rel:unitofwork) section for a full description on all its operations, including session control, deletion, and developmental guidelines. +The Unit of Work is a powerful tool, and has some important concepts that should be understood in order to use it effectively. See the [unitofwork](rel:unitofwork) section for a full description on all its operations. -When a mapper is created, the target class has its mapped properties decorated by specialized property accessors that track changes, and its `__init__()` method is also decorated to mark new objects as "new". +When a mapper is created, the target class has its mapped properties decorated by specialized property accessors that track changes. New objects by default must be explicitly added to the `Session`, however this can be made automatic by using [plugins_threadlocal](rel:plugins_threadlocal) or [plugins_sessioncontext](rel:plugins_sessioncontext). {python} - User.mapper = mapper(User, users) - + mapper(User, users_table) + # create a new User myuser = User() myuser.user_name = 'jane' myuser.password = 'hello123' - + # create another new User myuser2 = User() myuser2.user_name = 'ed' myuser2.password = 'lalalala' - + + # create a Session and save them + sess = create_session() + sess.save(myuser) + sess.save(myuser2) + # load a third User from the database - {sql}myuser3 = User.mapper.select(User.c.user_name=='fred')[0] + {sql}myuser3 = sess.query(User).select(User.c.user_name=='fred')[0] SELECT users.user_id AS users_user_id, users.user_name AS users_user_name, users.password AS users_password FROM users WHERE users.user_name = :users_user_name {'users_user_name': 'fred'} - + myuser3.user_name = 'fredjones' - + # save all changes - {sql}objectstore.commit() + {sql}session.flush() UPDATE users SET user_name=:user_name WHERE users.user_id =:users_user_id [{'users_user_id': 1, 'user_name': 'fredjones'}] @@ -201,11 +175,11 @@ When a mapper is created, the target class has its mapped properties decorated b INSERT INTO users (user_name, password) VALUES (:user_name, :password) {'password': 'lalalala', 'user_name': 'ed'} -In the examples above, we defined a User class with basically no properties or methods. Theres no particular reason it has to be this way, the class can explicitly set up whatever properties it wants, whether or not they will be managed by the mapper. It can also specify a constructor, with the restriction that the constructor is able to function with no arguments being passed to it (this restriction can be lifted with some extra parameters to the mapper; more on that later): +The mapped class can also specify whatever methods and/or constructor it wants: {python} class User(object): - def __init__(self, user_name = None, password = None): + def __init__(self, user_name, password): self.user_id = None self.user_name = user_name self.password = password @@ -214,35 +188,42 @@ In the examples above, we defined a User class with basically no properties or m def __repr__(self): return "User id %s name %s password %s" % (repr(self.user_id), repr(self.user_name), repr(self.password)) - User.mapper = mapper(User, users) + mapper(User, users_table) + sess = create_session() u = User('john', 'foo') - {sql}objectstore.commit() + sess.save(u) + {sql}session.flush() INSERT INTO users (user_name, password) VALUES (:user_name, :password) {'password': 'foo', 'user_name': 'john'} >>> u User id 1 name 'john' password 'foo' -Recent versions of SQLAlchemy will only put modified object attributes columns into the UPDATE statements generated upon commit. This is to conserve database traffic and also to successfully interact with a "deferred" attribute, which is a mapped object attribute against the mapper's primary table that isnt loaded until referenced by the application. +SQLAlchemy will only put modified object attributes columns into the UPDATE statements generated upon flush. This is to conserve database traffic and also to successfully interact with a "deferred" attribute, which is a mapped object attribute against the mapper's primary table that isnt loaded until referenced by the application. ### Defining and Using Relationships {@name=relations} -So that covers how to map the columns in a table to an object, how to load objects, create new ones, and save changes. The next step is how to define an object's relationships to other database-persisted objects. This is done via the `relation` function provided by the mapper module. So with our User class, lets also define the User has having one or more mailing addresses. First, the table metadata: +So that covers how to map the columns in a table to an object, how to load objects, create new ones, and save changes. The next step is how to define an object's relationships to other database-persisted objects. This is done via the `relation` function provided by the `orm` module. + +#### One to Many {@name=onetomany} + +So with our User class, lets also define the User has having one or more mailing addresses. First, the table metadata: {python} from sqlalchemy import * - engine = create_engine('sqlite://filename=mydb') + + metadata = MetaData() # define user table - users = Table('users', engine, + users_table = Table('users', metadata, Column('user_id', Integer, primary_key=True), Column('user_name', String(16)), Column('password', String(20)) ) # define user address table - addresses = Table('addresses', engine, + addresses_table = Table('addresses', metadata, Column('address_id', Integer, primary_key=True), Column('user_id', Integer, ForeignKey("users.user_id")), Column('street', String(100)), @@ -251,39 +232,46 @@ So that covers how to map the columns in a table to an object, how to load objec Column('zip', String(10)) ) -Of importance here is the addresses table's definition of a *foreign key* relationship to the users table, relating the user_id column into a parent-child relationship. When a Mapper wants to indicate a relation of one object to another, this ForeignKey object is the default method by which the relationship is determined (although if you didn't define ForeignKeys, or you want to specify explicit relationship columns, that is available as well). +Of importance here is the addresses table's definition of a *foreign key* relationship to the users table, relating the user_id column into a parent-child relationship. When a `Mapper` wants to indicate a relation of one object to another, the `ForeignKey` relationships are the default method by which the relationship is determined (options also exist to describe the relationships explicitly). -So then lets define two classes, the familiar User class, as well as an Address class: +So then lets define two classes, the familiar `User` class, as well as an `Address` class: {python} class User(object): - def __init__(self, user_name = None, password = None): + def __init__(self, user_name, password): self.user_name = user_name self.password = password class Address(object): - def __init__(self, street=None, city=None, state=None, zip=None): + def __init__(self, street, city, state, zip): self.street = street self.city = city self.state = state self.zip = zip -And then a Mapper that will define a relationship of the User and the Address classes to each other as well as their table metadata. We will add an additional mapper keyword argument `properties` which is a dictionary relating the name of an object property to a database relationship, in this case a `relation` object against a newly defined mapper for the Address class: +And then a `Mapper` that will define a relationship of the `User` and the `Address` classes to each other as well as their table metadata. We will add an additional mapper keyword argument `properties` which is a dictionary relating the names of class attributes to database relationships, in this case a `relation` object against a newly defined mapper for the Address class: {python} - User.mapper = mapper(User, users, properties = { - 'addresses' : relation(mapper(Address, addresses)) - } - ) + mapper(Address, addresses_table) + mapper(User, users_table, properties = { + 'addresses' : relation(Address) + } + ) Lets do some operations with these classes and see what happens: {python} + engine = create_engine('sqlite:///mydb.db') + metadata.create_all(engine) + + session = create_session(bind_to=engine) + u = User('jane', 'hihilala') u.addresses.append(Address('123 anywhere street', 'big city', 'UT', '76543')) u.addresses.append(Address('1 Park Place', 'some other city', 'OK', '83923')) - objectstore.commit() + session.save(u) + session.flush() {opensql}INSERT INTO users (user_name, password) VALUES (:user_name, :password) {'password': 'hihilala', 'user_name': 'jane'} INSERT INTO addresses (user_id, street, city, state, zip) VALUES (:user_id, :street, :city, :state, :zip) @@ -291,15 +279,15 @@ Lets do some operations with these classes and see what happens: INSERT INTO addresses (user_id, street, city, state, zip) VALUES (:user_id, :street, :city, :state, :zip) {'city': 'some other city', 'state': 'OK', 'street': '1 Park Place', 'user_id':1, 'zip': '83923'} -A lot just happened there! The Mapper object figured out how to relate rows in the addresses table to the users table, and also upon commit had to determine the proper order in which to insert rows. After the insert, all the User and Address objects have all their new primary and foreign keys populated. +A lot just happened there! The `Mapper` figured out how to relate rows in the addresses table to the users table, and also upon flush had to determine the proper order in which to insert rows. After the insert, all the `User` and `Address` objects have their new primary and foreign key attributes populated. -Also notice that when we created a Mapper on the User class which defined an 'addresses' relation, the newly created User instance magically had an "addresses" attribute which behaved like a list. This list is in reality a property accessor function, which returns an instance of `sqlalchemy.util.HistoryArraySet`, which fulfills the full set of Python list accessors, but maintains a *unique* set of objects (based on their in-memory identity), and also tracks additions and deletions to the list: +Also notice that when we created a `Mapper` on the `User` class which defined an `addresses` relation, the newly created `User` instance magically had an "addresses" attribute which behaved like a list. This list is in reality a property function which returns an instance of `sqlalchemy.util.HistoryArraySet`. This object fulfills the full set of Python list accessors, but maintains a *unique* set of objects (based on their in-memory identity), and also tracks additions and deletions to the list: {python} del u.addresses[1] u.addresses.append(Address('27 New Place', 'Houston', 'TX', '34839')) - objectstore.commit() + session.flush() {opensql}UPDATE addresses SET user_id=:user_id WHERE addresses.address_id = :addresses_address_id @@ -307,36 +295,43 @@ Also notice that when we created a Mapper on the User class which defined an 'ad INSERT INTO addresses (user_id, street, city, state, zip) VALUES (:user_id, :street, :city, :state, :zip) {'city': 'Houston', 'state': 'TX', 'street': '27 New Place', 'user_id': 1, 'zip': '34839'} - -#### Useful Feature: Private Relations {@name=private} -So our one address that was removed from the list, was updated to have a user_id of `None`, and a new address object was inserted to correspond to the new Address added to the User. But now, theres a mailing address with no user_id floating around in the database of no use to anyone. How can we avoid this ? This is acheived by using the `private=True` parameter of `relation`: +Note that when creating a relation with the `relation()` function, the target can either be a class, in which case the primary mapper for that class is used as the target, or a `Mapper` instance itself, as returned by the `mapper()` function. + +#### Lifecycle Relations {@name=lifecycle} + +In the previous example, a single address was removed from the `addresses` attribute of a `User` object, resulting in the corresponding database row being updated to have a user_id of `None`. But now, theres a mailing address with no user_id floating around in the database of no use to anyone. How can we avoid this ? This is acheived by using the `cascade` parameter of `relation`: {python} - User.mapper = mapper(User, users, properties = { - 'addresses' : relation(mapper(Address, addresses), private=True) - } - ) + clear_mappers() # clear mappers from the previous example + mapper(Address, addresses_table) + mapper(User, users_table, properties = { + 'addresses' : relation(Address, cascade="all, delete-orphan") + } + ) + del u.addresses[1] u.addresses.append(Address('27 New Place', 'Houston', 'TX', '34839')) - objectstore.commit() + session.flush() {opensql}INSERT INTO addresses (user_id, street, city, state, zip) VALUES (:user_id, :street, :city, :state, :zip) {'city': 'Houston', 'state': 'TX', 'street': '27 New Place', 'user_id': 1, 'zip': '34839'} DELETE FROM addresses WHERE addresses.address_id = :address_id [{'address_id': 2}] -In this case, with the private flag set, the element that was removed from the addresses list was also removed from the database. By specifying the `private` flag on a relation, it is indicated to the Mapper that these related objects exist only as children of the parent object, otherwise should be deleted. +In this case, with the `delete-orphan` **cascade rule** set, the element that was removed from the addresses list was also removed from the database. Specifying `cascade="all, delete-orphan"` means that every persistence operation performed on the parent object will be *cascaded* to the child object or objects handled by the relation, and additionally that each child object cannot exist without being attached to a parent. Such a relationship indicates that the **lifecycle** of the `Address` objects are bounded by that of their parent `User` object. + +Cascading is described fully in [unitofwork_cascade](rel:unitofwork_cascade). -#### Useful Feature: Backreferences {@name=backreferences} +#### Backreferences {@name=backreferences} -By creating relations with the `backref` keyword, a bi-directional relationship can be created which will keep both ends of the relationship updated automatically, even without any database queries being executed. Below, the User mapper is created with an "addresses" property, and the corresponding Address mapper receives a "backreference" to the User object via the property name "user": +By creating relations with the `backref` keyword, a bi-directional relationship can be created which will keep both ends of the relationship updated automatically, independently of database operations. Below, the `User` mapper is created with an `addresses` property, and the corresponding `Address` mapper receives a "backreference" to the `User` object via the property name `user`: {python} - Address.mapper = mapper(Address, addresses) - User.mapper = mapper(User, users, properties = { - 'addresses' : relation(Address.mapper, backref='user') + Address = mapper(Address, addresses_table) + User = mapper(User, users_table, properties = { + 'addresses' : relation(Address, backref='user') } ) @@ -356,51 +351,37 @@ By creating relations with the `backref` keyword, a bi-directional relationship >>> a1.user is user and a2.user is user True -The backreference feature also works with many-to-many relationships, which are described later. When creating a backreference, a corresponding property is placed on the child mapper. The default arguments to this property can be overridden using the `backref()` function: +The backreference feature also works with many-to-many relationships, which are described later. When creating a backreference, a corresponding property (i.e. a second `relation()`) is placed on the child mapper. The default arguments to this property can be overridden using the `backref()` function: {python} - Address.mapper = mapper(Address, addresses) - - User.mapper = mapper(User, users, properties = { - 'addresses' : relation(Address.mapper, - backref=backref('user', lazy=False, private=True)) - } - ) + mapper(User, users_table) + mapper(Address, addresses_table, properties={ + 'user':relation(User, backref=backref('addresses', cascade="all, delete-orphan")) + }) -#### Creating Relationships Automatically with cascade_mappers {@name=cascade} -The mapper package has a helper function `cascade_mappers()` which can simplify the task of linking several mappers together. Given a list of classes and/or mappers, it identifies the foreign key relationships between the given mappers or corresponding class mappers, and creates relation() objects representing those relationships, including a backreference. Attempts to find the "secondary" table in a many-to-many relationship as well. The names of the relations are a lowercase version of the related class. In the case of one-to-many or many-to-many, the name is "pluralized", which currently is based on the English language (i.e. an 's' or 'es' added to it): +The `backref()` function is often used to set up a bi-directional one-to-one relationship. This is because the `relation()` function by default creates a "one-to-many" relationship when presented with a primary key/foreign key relationship, but the `backref()` function can redefine the `uselist` property to make it a scalar: {python} - # create two mappers. the 'users' and 'addresses' tables have a foreign key - # relationship - mapper1 = mapper(User, users) - mapper2 = mapper(Address, addresses) - - # cascade the two mappers together (can also specify User, Address as the arguments) - cascade_mappers(mapper1, mapper2) - - # two new object instances - u = User('user1') - a = Address('test') + mapper(User, users_table) + mapper(Address, addresses_table, properties={ + 'user' : relation(User, backref=backref('address', uselist=False)) + }) - # "addresses" and "user" property are automatically added - u.addresses.append(a) - print a.user -#### Selecting from Relationships: Lazy Load {@name=lazyload} +### Selecting from Relationships {@name=selectrelations} -We've seen how the `relation` specifier affects the saving of an object and its child items, how does it affect selecting them? By default, the relation keyword indicates that the related property should be attached a *Lazy Loader* when instances of the parent object are loaded from the database; this is just a callable function that when accessed will invoke a second SQL query to load the child objects of the parent. +We've seen how the `relation` specifier affects the saving of an object and its child items, how does it affect selecting them? By default, the relation keyword indicates that the related property should be attached a *lazy loader* when instances of the parent object are loaded from the database; this is just a callable function that when accessed will invoke a second SQL query to load the child objects of the parent. {python} # define a mapper - User.mapper = mapper(User, users, properties = { - 'addresses' : relation(mapper(Address, addresses), private=True) - }) + mapper(User, users_table, properties = { + 'addresses' : relation(mapper(Address, addresses_table)) + }) # select users where username is 'jane', get the first element of the list # this will incur a load operation for the parent table - {sql}user = User.mapper.select(user_name='jane')[0] + {sql}user = session.query(User).select(User.c.user_name=='jane')[0] SELECT users.user_id AS users_user_id, users.user_name AS users_user_name, users.password AS users_password FROM users WHERE users.user_name = :users_user_name ORDER BY users.oid @@ -414,15 +395,16 @@ We've seen how the `relation` specifier affects the saving of an object and its addresses.city AS addresses_city, addresses.state AS addresses_state, addresses.zip AS addresses_zip FROM addresses WHERE addresses.user_id = :users_user_id ORDER BY addresses.oid - {'users_user_id': 1} - print repr(a) + {'users_user_id': 1} + + print repr(a) -##### Useful Feature: Creating Joins via select_by {@name=relselectby} +#### Creating Joins Across Relations {@name=relselectby} -In mappers that have relationships, the `select_by` method and its cousins include special functionality that can be used to create joins. Just specify a key in the argument list which is not present in the primary mapper's list of properties or columns, but *is* present in the property list of one of its relationships: +For mappers that have relationships, the `select_by` method of the `Query` object can create queries that include automatically created joins. Just specify a key in the argument list which is not present in the primary mapper's list of properties or columns, but *is* present in the property list of one of its relationships: {python} - {sql}l = User.mapper.select_by(street='123 Green Street') + {sql}l = session.query(User).select_by(street='123 Green Street') SELECT users.user_id AS users_user_id, users.user_name AS users_user_name, users.password AS users_password FROM users, addresses @@ -434,208 +416,248 @@ In mappers that have relationships, the `select_by` method and its cousins inclu The above example is shorthand for: {python} - l = User.mapper.select(and_( + l = session.query(User).select(and_( Address.c.user_id==User.c.user_id, Address.c.street=='123 Green Street') ) - -##### How to Refresh the List? {@name=refreshing} -Once the child list of Address objects is loaded, it is done loading for the lifetime of the object instance. Changes to the list will not be interfered with by subsequent loads, and upon commit those changes will be saved. Similarly, if a new User object is created and child Address objects added, a subsequent select operation which happens to touch upon that User instance, will also not affect the child list, since it is already loaded. - -The issue of when the mapper actually gets brand new objects from the database versus when it assumes the in-memory version is fine the way it is, is a subject of *transactional scope*. Described in more detail in the Unit of Work section, for now it should be noted that the total storage of all newly created and selected objects, *within the scope of the current thread*, can be reset via releasing or otherwise disregarding all current object instances, and calling: +All keyword arguments sent to `select_by` are used to create query criterion. This means that familiar `select` keyword options like `order_by` and `limit` are not directly available. To enable these options with `select_by`, you can try the [plugins_selectresults](rel:plugins_selectresults) extension which offers methods off the result of a `select` or `select_by` such as `order_by()` and array slicing functions that generate new queries. + +Also, `select_by` will *not* create joins derived from `Column`-based expressions (i.e. `ClauseElement` objects); the reason is that a `Column`-based expression may include many columns, and `select_by` has no way to know which columns in the expression correspond to properties and which don't (it also prefers not to dig into column expressions which may be very complex). The next section describes some ways to combine `Column` expressions with `select_by`'s auto-joining capabilities. + +#### More Granular Join Control Using join\_to, join\_via {@name=jointo} + +Feature Status: [Alpha API][alpha_api] + +The `join_to` method of `Query` is a component of the `select_by` operation, and is given a keyname in order to return a "join path" from the Query's mapper to the mapper which is referenced by a `relation()` of the given name: {python} - objectstore.clear() - -This operation will clear out all currently mapped object instances, and subsequent select statements will load fresh copies from the databse. - -To operate upon a single object, just use the `remove` function: + >>> q = session.query(User) + >>> j = q.join_to('addresses') + >>> print j + users.user_id=addresses.user_id + +`join_to` can also be given the name of a column-based property, in which case it will locate a path to the nearest mapper which has that property as a column: {python} - # (this function coming soon) - objectstore.remove(myobject) + >>> q = session.query(User) + >>> j = q.join_to('street') + >>> print j + users.user_id=addresses.user_id +Also available is the `join_via` function, which is similar to `join_to`, except instead of traversing through all properties to find a path to the given key, its given an explicit path to the target property: -#### Selecting from Relationships: Eager Load {@name=eagerload} + {python} + >>> q = session.query(User) + >>> j = q.join_via(['orders', 'items']) + >>> print j + users.c.user_id==orders.c.user_id AND orders.c.item_id==items.c.item_id -With just a single parameter "lazy=False" specified to the relation object, the parent and child SQL queries can be joined together. +Expressions produced by `join_to` and `join_via` can be used with `select` to create more complicated query criterion across multiple relations: {python} - Address.mapper = mapper(Address, addresses) - User.mapper = mapper(User, users, properties = { - 'addresses' : relation(Address.mapper, lazy=False) - } - ) - - {sql}user = User.mapper.get_by(user_name='jane') - SELECT users.user_id AS users_user_id, users.user_name AS users_user_name, - users.password AS users_password, - addresses.address_id AS addresses_address_id, addresses.user_id AS addresses_user_id, - addresses.street AS addresses_street, addresses.city AS addresses_city, - addresses.state AS addresses_state, addresses.zip AS addresses_zip - FROM users LEFT OUTER JOIN addresses ON users.user_id = addresses.user_id - WHERE users.user_name = :users_user_name ORDER BY users.oid, addresses.oid + >>> l = q.select( + (addresses_table.c.street=='some address') & + (items_table.c.item_name=='item #4') & + q.join_to('addresses') & + q.join_via(['orders', 'items']) + ) + + +#### Eager Loading {@name=eagerload} + +With just a single parameter `lazy=False` specified to the relation object, the parent and child SQL queries can be joined together. + + {python} + mapper(Address, addresses_table) + mapper(User, users_table, properties = { + 'addresses' : relation(Address, lazy=False) + } + ) + + {sql}users = session.query(User).select(User.c.user_name=='Jane') + SELECT users.user_name AS users_user_name, users.password AS users_password, + users.user_id AS users_user_id, addresses_4fb8.city AS addresses_4fb8_city, + addresses_4fb8.address_id AS addresses_4fb8_address_id, addresses_4fb8.user_id AS addresses_4fb8_user_id, + addresses_4fb8.zip AS addresses_4fb8_zip, addresses_4fb8.state AS addresses_4fb8_state, + addresses_4fb8.street AS addresses_4fb8_street + FROM users LEFT OUTER JOIN addresses AS addresses_4fb8 ON users.user_id = addresses_4fb8.user_id + WHERE users.user_name = :users_user_name ORDER BY users.oid, addresses_4fb8.oid {'users_user_name': 'jane'} - for a in user.addresses: - print repr(a) - + for u in users: + print repr(u) + for a in u.addresses: + print repr(a) Above, a pretty ambitious query is generated just by specifying that the User should be loaded with its child Addresses in one query. When the mapper processes the results, it uses an *Identity Map* to keep track of objects that were already loaded, based on their primary key identity. Through this method, the redundant rows produced by the join are organized into the distinct object instances they represent. The generation of this query is also immune to the effects of additional joins being specified in the original query. To use our select_by example above, joining against the "addresses" table to locate users with a certain street results in this behavior: {python} - {sql}users = User.mapper.select_by(street='123 Green Street') - SELECT users.user_id AS users_user_id, - users.user_name AS users_user_name, users.password AS users_password, - addresses.address_id AS addresses_address_id, - addresses.user_id AS addresses_user_id, addresses.street AS addresses_street, - addresses.city AS addresses_city, addresses.state AS addresses_state, - addresses.zip AS addresses_zip - FROM addresses AS addresses_417c, - users LEFT OUTER JOIN addresses ON users.user_id = addresses.user_id - WHERE addresses_417c.street = :addresses_street - AND users.user_id = addresses_417c.user_id - ORDER BY users.oid, addresses.oid + {sql}users = session.query(User).select_by(street='123 Green Street') + + SELECT users.user_name AS users_user_name, + users.password AS users_password, users.user_id AS users_user_id, + addresses_6ca7.city AS addresses_6ca7_city, + addresses_6ca7.address_id AS addresses_6ca7_address_id, + addresses_6ca7.user_id AS addresses_6ca7_user_id, + addresses_6ca7.zip AS addresses_6ca7_zip, addresses_6ca7.state AS addresses_6ca7_state, + addresses_6ca7.street AS addresses_6ca7_street + FROM addresses, users LEFT OUTER JOIN addresses AS addresses_6ca7 ON users.user_id = addresses_6ca7.user_id + WHERE addresses.street = :addresses_street AND users.user_id = addresses.user_id ORDER BY users.oid, addresses_6ca7.oid {'addresses_street': '123 Green Street'} -The join implied by passing the "street" parameter is converted into an "aliasized" clause by the eager loader, so that it does not conflict with the join used to eager load the child address objects. +The join implied by passing the "street" parameter is stated as an *additional* join between the `addresses` and `users` tables. Also, since the eager join is "aliasized", no name conflict occurs. -#### Switching Lazy/Eager, No Load {@name=options} +#### Using Options to Change the Loading Strategy {@name=options} -The `options` method of mapper provides an easy way to get alternate forms of a mapper from an original one. The most common use of this feature is to change the "eager/lazy" loading behavior of a particular mapper, via the functions `eagerload()`, `lazyload()` and `noload()`: +The `options` method on the `Query` object provides an easy way to get alternate forms of a mapper query from an original one. The most common use of this feature is to change the "eager/lazy" loading behavior of a particular mapper, via the functions `eagerload()`, `lazyload()` and `noload()`: {python} # user mapper with lazy addresses - User.mapper = mapper(User, users, properties = { - 'addresses' : relation(mapper(Address, addresses)) + mapper(User, users_table, properties = { + 'addresses' : relation(mapper(Address, addresses_table)) } ) - # make an eager loader - eagermapper = User.mapper.options(eagerload('addresses')) - u = eagermapper.select() + # query object + query = session.query(User) - # make another mapper that wont load the addresses at all - plainmapper = User.mapper.options(noload('addresses')) + # make an eager loading query + eagerquery = query.options(eagerload('addresses')) + u = eagerquery.select() + + # make another query that wont load the addresses at all + plainquery = query.options(noload('addresses')) # multiple options can be specified - mymapper = oldmapper.options(lazyload('tracker'), noload('streets'), eagerload('members')) + myquery = oldquery.options(lazyload('tracker'), noload('streets'), eagerload('members')) # to specify a relation on a relation, separate the property names by a "." - mymapper = oldmapper.options(eagerload('orders.items')) + myquery = oldquery.options(eagerload('orders.items')) ### One to One/Many to One {@name=onetoone} The above examples focused on the "one-to-many" relationship. To do other forms of relationship is easy, as the `relation` function can usually figure out what you want: {python} + metadata = MetaData() + # a table to store a user's preferences for a site - prefs = Table('user_prefs', engine, + prefs_table = Table('user_prefs', metadata, Column('pref_id', Integer, primary_key = True), Column('stylename', String(20)), Column('save_password', Boolean, nullable = False), Column('timezone', CHAR(3), nullable = False) ) - # user table gets 'preference_id' column added - users = Table('users', engine, + # user table with a 'preference_id' column + users_table = Table('users', metadata, Column('user_id', Integer, primary_key = True), Column('user_name', String(16), nullable = False), Column('password', String(20), nullable = False), - Column('preference_id', Integer, ForeignKey("prefs.pref_id")) + Column('preference_id', Integer, ForeignKey("user_prefs.pref_id")) ) - # class definition for preferences + # engine and some test data + engine = create_engine('sqlite:///', echo=True) + metadata.create_all(engine) + engine.execute(prefs_table.insert(), dict(pref_id=1, stylename='green', save_password=1, timezone='EST')) + engine.execute(users_table.insert(), dict(user_name = 'fred', password='45nfss', preference_id=1)) + + # classes + class User(object): + def __init__(self, user_name, password): + self.user_name = user_name + self.password = password + class UserPrefs(object): pass - UserPrefs.mapper = mapper(UserPrefs, prefs) - - # address mapper - Address.mapper = mapper(Address, addresses) + + mapper(UserPrefs, prefs_table) - # make a new mapper referencing everything. - m = mapper(User, users, properties = dict( - addresses = relation(Address.mapper, lazy=True, private=True), - preferences = relation(UserPrefs.mapper, lazy=False, private=True), + mapper(User, users_table, properties = dict( + preferences = relation(UserPrefs, lazy=False, cascade="all, delete-orphan"), )) # select - {sql}user = m.get_by(user_name='fred') - SELECT users.user_id AS users_user_id, users.user_name AS users_user_name, - users.password AS users_password, users.preference_id AS users_preference_id, - user_prefs.pref_id AS user_prefs_pref_id, user_prefs.stylename AS user_prefs_stylename, - user_prefs.save_password AS user_prefs_save_password, user_prefs.timezone AS user_prefs_timezone - FROM users LEFT OUTER JOIN user_prefs ON user_prefs.pref_id = users.preference_id - WHERE users.user_name = :users_user_name ORDER BY users.oid, user_prefs.oid + session = create_session(bind_to=engine) + {sql}user = session.query(User).get_by(user_name='fred') + SELECT users.preference_id AS users_preference_id, users.user_name AS users_user_name, + users.password AS users_password, users.user_id AS users_user_id, + user_prefs_4eb2.timezone AS user_prefs_4eb2_timezone, user_prefs_4eb2.stylename AS user_prefs_4eb2_stylename, + user_prefs_4eb2.save_password AS user_prefs_4eb2_save_password, user_prefs_4eb2.pref_id AS user_prefs_4eb2_pref_id + FROM (SELECT users.user_id AS users_user_id FROM users WHERE users.user_name = :users_user_name ORDER BY users.oid + LIMIT 1 OFFSET 0) AS rowcount, + users LEFT OUTER JOIN user_prefs AS user_prefs_4eb2 ON user_prefs_4eb2.pref_id = users.preference_id + WHERE rowcount.users_user_id = users.user_id ORDER BY users.oid, user_prefs_4eb2.oid {'users_user_name': 'fred'} - + save_password = user.preferences.save_password - + # modify user.preferences.stylename = 'bluesteel' - {sql}user.addresses.append(Address('freddy@hi.org')) - SELECT email_addresses.address_id AS email_addresses_address_id, - email_addresses.user_id AS email_addresses_user_id, - email_addresses.email_address AS email_addresses_email_address - FROM email_addresses - WHERE email_addresses.user_id = :users_user_id - ORDER BY email_addresses.oid, email_addresses.oid - {'users_user_id': 1} - - # commit - {sql}objectstore.commit() + + # flush + {sql}session.flush() UPDATE user_prefs SET stylename=:stylename WHERE user_prefs.pref_id = :pref_id [{'stylename': 'bluesteel', 'pref_id': 1}] - INSERT INTO email_addresses (address_id, user_id, email_address) - VALUES (:address_id, :user_id, :email_address) - {'email_address': 'freddy@hi.org', 'address_id': None, 'user_id': 1} ### Many to Many {@name=manytomany} The `relation` function handles a basic many-to-many relationship when you specify the association table: {python} - articles = Table('articles', engine, + metadata = MetaData() + + articles_table = Table('articles', metadata, Column('article_id', Integer, primary_key = True), Column('headline', String(150), key='headline'), Column('body', TEXT, key='body'), ) - keywords = Table('keywords', engine, + keywords_table = Table('keywords', metadata, Column('keyword_id', Integer, primary_key = True), Column('keyword_name', String(50)) ) - itemkeywords = Table('article_keywords', engine, + itemkeywords_table = Table('article_keywords', metadata, Column('article_id', Integer, ForeignKey("articles.article_id")), Column('keyword_id', Integer, ForeignKey("keywords.keyword_id")) ) + engine = create_engine('sqlite:///') + metadata.create_all(engine) + # class definitions class Keyword(object): - def __init__(self, name = None): + def __init__(self, name): self.keyword_name = name class Article(object): pass + + mapper(Keyword, keywords_table) # define a mapper that does many-to-many on the 'itemkeywords' association # table - Article.mapper = mapper(Article, articles, properties = dict( - keywords = relation(mapper(Keyword, keywords), itemkeywords, lazy=False) + mapper(Article, articles_table, properties = dict( + keywords = relation(Keyword, secondary=itemkeywords_table, lazy=False) ) ) + session = create_session(bind_to=engine) + article = Article() article.headline = 'a headline' article.body = 'this is the body' article.keywords.append(Keyword('politics')) article.keywords.append(Keyword('entertainment')) - {sql}objectstore.commit() + session.save(article) + + {sql}session.flush() INSERT INTO keywords (name) VALUES (:name) {'name': 'politics'} INSERT INTO keywords (name) VALUES (:name) @@ -646,33 +668,25 @@ The `relation` function handles a basic many-to-many relationship when you speci [{'keyword_id': 1, 'article_id': 1}, {'keyword_id': 2, 'article_id': 1}] # select articles based on a keyword. select_by will handle the extra joins. - {sql}articles = Article.mapper.select_by(keyword_name='politics') - SELECT articles.article_id AS articles_article_id, - articles.article_headline AS articles_article_headline, - articles.article_body AS articles_article_body, - keywords.keyword_id AS keywords_keyword_id, - keywords.keyword_name AS keywords_keyword_name - FROM keywords AS keywords_f008, - article_keywords AS article_keywords_dbf0, - articles LEFT OUTER JOIN article_keywords ON - articles.article_id = article_keywords.article_id - LEFT OUTER JOIN keywords ON - keywords.keyword_id = article_keywords.keyword_id - WHERE (keywords_f008.keyword_name = :keywords_keyword_name - AND articles.article_id = article_keywords_dbf0.article_id) - AND keywords_f008.keyword_id = article_keywords_dbf0.keyword_id - ORDER BY articles.oid, article_keywords.oid + {sql}articles = session.query(Article).select_by(keyword_name='politics') + SELECT keywords_e2f2.keyword_id AS keywords_e2f2_keyword_id, keywords_e2f2.keyword_name AS keywords_e2f2_keyword_name, + articles.headline AS articles_headline, articles.body AS articles_body, articles.article_id AS articles_article_id + FROM keywords, article_keywords, articles + LEFT OUTER JOIN article_keywords AS article_keyword_3da2 ON articles.article_id = article_keyword_3da2.article_id + LEFT OUTER JOIN keywords AS keywords_e2f2 ON keywords_e2f2.keyword_id = article_keyword_3da2.keyword_id + WHERE (keywords.keyword_name = :keywords_keywords_name AND articles.article_id = article_keywords.article_id) + AND keywords.keyword_id = article_keywords.keyword_id ORDER BY articles.oid, article_keyword_3da2.oid {'keywords_keyword_name': 'politics'} - # modify a = articles[0] - del a.keywords[:] + + # clear out keywords with a new list + a.keywords = [] a.keywords.append(Keyword('topstories')) a.keywords.append(Keyword('government')) - # commit. individual INSERT/DELETE operations will take place only for the list - # elements that changed. - {sql}objectstore.commit() + # flush + {sql}session.flush() INSERT INTO keywords (name) VALUES (:name) {'name': 'topstories'} INSERT INTO keywords (name) VALUES (:name) @@ -689,62 +703,82 @@ The `relation` function handles a basic many-to-many relationship when you speci Many to Many can also be done with an association object, that adds additional information about how two items are related. This association object is set up in basically the same way as any other mapped object. However, since an association table typically has no primary key columns, you have to tell the mapper what columns will compose its "primary key", which are the two (or more) columns involved in the association. Also, the relation function needs an additional hint as to the fact that this mapped object is an association object, via the "association" argument which points to the class or mapper representing the other side of the association. {python} + from sqlalchemy import * + metadata = MetaData() + + users_table = Table('users', metadata, + Column('user_id', Integer, primary_key = True), + Column('user_name', String(16), nullable = False), + ) + + articles_table = Table('articles', metadata, + Column('article_id', Integer, primary_key = True), + Column('headline', String(150), key='headline'), + Column('body', TEXT, key='body'), + ) + + keywords_table = Table('keywords', metadata, + Column('keyword_id', Integer, primary_key = True), + Column('keyword_name', String(50)) + ) + # add "attached_by" column which will reference the user who attached this keyword - itemkeywords = Table('article_keywords', engine, + itemkeywords_table = Table('article_keywords', metadata, Column('article_id', Integer, ForeignKey("articles.article_id")), Column('keyword_id', Integer, ForeignKey("keywords.keyword_id")), Column('attached_by', Integer, ForeignKey("users.user_id")) ) - # define an association class + engine = create_engine('sqlite:///', echo=True) + metadata.create_all(engine) + + # class definitions + class User(object): + pass + class Keyword(object): + def __init__(self, name): + self.keyword_name = name + class Article(object): + pass class KeywordAssociation(object): pass + mapper(User, users_table) + mapper(Keyword, keywords_table) + # mapper for KeywordAssociation # specify "primary key" columns manually - KeywordAssociation.mapper = mapper(KeywordAssociation, itemkeywords, - primary_key = [itemkeywords.c.article_id, itemkeywords.c.keyword_id], + mapper(KeywordAssociation, itemkeywords_table, + primary_key = [itemkeywords_table.c.article_id, itemkeywords_table.c.keyword_id], properties={ - 'keyword' : relation(Keyword, lazy = False), # uses primary Keyword mapper - 'user' : relation(User, lazy = True) # uses primary User mapper + 'keyword' : relation(Keyword, lazy = False), + 'user' : relation(User, lazy = False) } ) - # mappers for Users, Keywords - User.mapper = mapper(User, users) - Keyword.mapper = mapper(Keyword, keywords) - - # define the mapper. - m = mapper(Article, articles, properties={ - 'keywords':relation(KeywordAssociation.mapper, lazy=False, association=Keyword) + # Article mapper, relates to Keyword via KeywordAssociation + mapper(Article, articles_table, properties={ + 'keywords':relation(KeywordAssociation, lazy=False, association=Keyword) } ) - # bonus step - well, we do want to load the users in one shot, - # so modify the mapper via an option. - # this returns a new mapper with the option switched on. - m2 = mapper.options(eagerload('keywords.user')) - - # select by keyword again - {sql}alist = m2.select_by(keyword_name='jacks_stories') - SELECT articles.article_id AS articles_article_id, - articles.article_headline AS articles_article_headline, - articles.article_body AS articles_article_body, - article_keywords.article_id AS article_keywords_article_id, - article_keywords.keyword_id AS article_keywords_keyword_id, - article_keywords.attached_by AS article_keywords_attached_by, - users.user_id AS users_user_id, users.user_name AS users_user_name, - users.password AS users_password, users.preference_id AS users_preference_id, - keywords.keyword_id AS keywords_keyword_id, keywords.name AS keywords_name - FROM article_keywords article_keywords_3a64, keywords keywords_11b7, - articles LEFT OUTER JOIN article_keywords ON articles.article_id = article_keywords.article_id - LEFT OUTER JOIN users ON users.user_id = article_keywords.attached_by - LEFT OUTER JOIN keywords ON keywords.keyword_id = article_keywords.keyword_id - WHERE keywords_11b7.keyword_id = article_keywords_3a64.keyword_id - AND article_keywords_3a64.article_id = articles.article_id - AND keywords_11b7.name = :keywords_name - ORDER BY articles.oid, article_keywords.oid, users.oid, keywords.oid - {'keywords_name': 'jacks_stories'} + session = create_session(bind_to=engine) + # select by keyword + {sql}alist = session.query(Article).select_by(keyword_name='jacks_stories') + SELECT article_keyword_f9af.keyword_id AS article_keyword_f9af_key_b3e1, + article_keyword_f9af.attached_by AS article_keyword_f9af_att_95d4, + article_keyword_f9af.article_id AS article_keyword_f9af_art_fd49, + users_9c30.user_name AS users_9c30_user_name, users_9c30.user_id AS users_9c30_user_id, + keywords_dc54.keyword_id AS keywords_dc54_keyword_id, keywords_dc54.keyword_name AS keywords_dc54_keyword_name, + articles.headline AS articles_headline, articles.body AS articles_body, articles.article_id AS articles_article_id + FROM keywords, article_keywords, articles + LEFT OUTER JOIN article_keywords AS article_keyword_f9af ON articles.article_id = article_keyword_f9af.article_id + LEFT OUTER JOIN users AS users_9c30 ON users_9c30.user_id = article_keyword_f9af.attached_by + LEFT OUTER JOIN keywords AS keywords_dc54 ON keywords_dc54.keyword_id = article_keyword_f9af.keyword_id + WHERE (keywords.keyword_name = :keywords_keywords_name AND keywords.keyword_id = article_keywords.keyword_id) + AND articles.article_id = article_keywords.article_id + ORDER BY articles.oid, article_keyword_f9af.oid, users_9c30.oid, keywords_dc54.oid + {'keywords_keywords_name': 'jacks_stories'} # user is available for a in alist: diff --git a/doc/build/content/dbengine.txt b/doc/build/content/dbengine.txt index 77a8e4583..85767bf14 100644 --- a/doc/build/content/dbengine.txt +++ b/doc/build/content/dbengine.txt @@ -1,109 +1,59 @@ Database Engines {@name=dbengine} -================ +============================ -A database engine is a subclass of `sqlalchemy.engine.SQLEngine`, and is the starting point for where SQLAlchemy provides a layer of abstraction on top of the various DBAPI2 database modules. It serves as an abstract factory for database-specific implementation objects as well as a layer of abstraction over the most essential tasks of a database connection, including connecting, executing queries, returning result sets, and managing transactions. - -The average developer doesn't need to know anything about the interface or workings of a SQLEngine in order to use it. Simply creating one, and then specifying it when constructing tables and other SQL objects is all that's needed. - -A SQLEngine is also a layer of abstraction on top of the connection pooling described in the previous section. While a DBAPI connection pool can be used explicitly alongside a SQLEngine, its not really necessary. Once you have a SQLEngine, you can retrieve pooled connections directly from its underlying connection pool via its own `connection()` method. However, if you're exclusively using SQLALchemy's SQL construction objects and/or object-relational mappers, all the details of connecting are handled by those libraries automatically. - +A database engine is a subclass of `sqlalchemy.sql.Engine`, and is the starting point for where SQLAlchemy provides a layer of abstraction on top of the various DBAPI2 database modules. For all databases supported by SA, there is a specific "implementation" module, found in the `sqlalchemy.databases` package, that provides all the objects an `Engine` needs in order to perform its job. A typical user of SQLAlchemy never needs to deal with these modules directly. For many purposes, the only knowledge that's needed is how to create an Engine for a particular connection URL. When dealing with direct execution of SQL statements, one would also be aware of Result, Connection, and Transaction objects. The primary public facing objects are: -### Establishing a Database Engine {@name=establishing} - -Engines exist for SQLite, Postgres, MySQL, MS-SQL, and Oracle, using the Pysqlite, Psycopg (1 or 2), MySQLDB, adodbapi or pymssql, and cx_Oracle modules (there is also experimental support for Firebird). Each engine imports its corresponding module which is required to be installed. For Postgres and Oracle, an alternate module may be specified at construction time as well. - -The string based argument names for connecting are translated to the appropriate names when the connection is made; argument names include "host" or "hostname" for database host, "database", "db", or "dbname" for the database name (also is dsn for Oracle), "user" or "username" for the user, and "password", "pw", or "passwd" for the password. SQLite expects "filename" or "file" for the filename, or if None it defaults to "":memory:". +* **URL** - represents the identifier for a particular database. URL objects are usually created automatically based on a given connect string passed to the `create_engine()` function. +* **Engine** - Combines a connection-providing resource with implementation-provided objects that know how to generate, execute, and gather information about SQL statements. It also provides the primary interface by which Connections are obtained, as well as a context for constructed SQL objects and schema constructs to "implicitly execute" themselves, which is an optional feature of SA 0.2. The Engine object that is normally dealt with is an instance of `sqlalchemy.engine.base.ComposedSQLEngine`. +* **Connection** - represents a connection to the database. The underlying connection object returned by a DBAPI's connect() method is referenced internally by the Connection object. Connection provides methods that handle the execution of SQLAlchemy's own SQL constructs, as well as literal string-based statements. +* **Transaction** - represents a transaction on a single Connection. Includes `begin()`, `commit()` and `rollback()` methods that support basic "nestable" behavior, meaning an outermost transaction is maintained against multiple nested calls to begin/commit. +* **ResultProxy** - Represents the results of an execution, and is most analgous to the cursor object in DBAPI. It primarily allows iteration over result sets, but also provides an interface to information about inserts/updates/deletes, such as the count of rows affected, last inserted IDs, etc. +* **RowProxy** - Represents a single row returned by the fetchone() method on ResultProxy. -The connection arguments can be specified as a string + dictionary pair, or a single URL-encoded string, as follows: - - {python}from sqlalchemy import * +Underneath the public-facing API of `ComposedSQLEngine`, several components are provided by database implementations to provide the full behavior, including: - # sqlite in memory - sqlite_engine = create_engine('sqlite', {'filename':':memory:'}, **opts) +* **Dialect** - this object is provided by database implementations to describe the behavior of a particular database. It acts as a repository for metadata about a database's characteristics, and provides factory methods for other objects that deal with generating SQL strings and objects that handle some of the details of statement execution. +* **ConnectionProvider** - this object knows how to return a DBAPI connection object. It typically talks to a connection pool which maintains one or more connections in memory for quick re-use. +* **ExecutionContext** - this object is created for each execution of a single SQL statement, and tracks information about its execution such as primary keys inserted, the total count of rows affected, etc. It also may implement any special logic that various DBAPI implementations may require before or after a statement execution. +* **Compiler** - receives SQL expression objects and assembles them into strings that are suitable for direct execution, as well as collecting bind parameters into a dictionary or list to be sent along with the statement. +* **SchemaGenerator** - receives collections of Schema objects and knows how to generate the appropriate SQL for `CREATE` and `DROP` statements. - # via URL - sqlite_engine = create_engine('sqlite://', **opts) - - # sqlite using a file - sqlite_engine = create_engine('sqlite', {'filename':'querytest.db'}, **opts) - - # via URL - sqlite_engine = create_engine('sqlite://filename=querytest.db', **opts) - - # postgres - postgres_engine = create_engine('postgres', - {'database':'test', - 'host':'127.0.0.1', - 'user':'scott', - 'password':'tiger'}, **opts) - - # via URL - postgres_engine = create_engine('postgres://database=test&host=127.0.0.1&user=scott&password=tiger') - - # mysql - mysql_engine = create_engine('mysql', - { - 'db':'mydb', - 'user':'scott', - 'passwd':'tiger', - 'host':'127.0.0.1' - } - **opts) - # oracle - oracle_engine = create_engine('oracle', - {'dsn':'mydsn', - 'user':'scott', - 'password':'tiger'}, **opts) - - - -Note that the general form of connecting to an engine is: +### Supported Databases {@name=supported} - {python}# separate arguments - engine = create_engine( - <enginename>, - {<named DBAPI arguments>}, - <sqlalchemy options> - ) +Engines exist for SQLite, Postgres, MySQL, and Oracle, using the Pysqlite, Psycopg (1 or 2), MySQLDB, and cx_Oracle modules. There is also preliminary support for MS-SQL using adodbapi or pymssql, as well as Firebird. For each engine, a distinct Python module exists in the `sqlalchemy.databases` package, which provides implementations of some of the objects mentioned in the previous section. - # url - engine = create_engine('<enginename>://<named DBAPI arguments>', <sqlalchemy options>) +### Establishing a Database Engine {@name=establishing} -### Database Engine Methods {@name=methods} +SQLAlchemy 0.2 indicates the source of an Engine strictly via [RFC-1738](http://rfc.net/rfc1738.html) style URLs, combined with optional keyword arguments to specify options for the Engine. The form of the URL is: -A few useful methods off the SQLEngine are described here: + $ driver://username:password@host:port/database - {python}engine = create_engine('postgres://hostname=localhost&user=scott&password=tiger&database=test') +Available drivernames are `sqlite`, `mysql`, `postgres`, `oracle`, `mssql`, and `firebird`. For sqlite, the database name is the filename to connect to, or the special name ":memory:" which indicates an in-memory database. The URL is typically sent as a string to the `create_engine()` function: - # get a pooled DBAPI connection - conn = engine.connection() + {python} + pg_db = create_engine('postgres://scott:tiger@localhost:5432/mydatabase') + sqlite_db = create_engine('sqlite:///mydb.txt') + mysql_db = create_engine('mysql://localhost/foo') + oracle_db = create_engine('oracle://scott:tiger@dsn') - # create/drop tables based on table metadata objects - # (see the next section, Table Metadata, for info on table metadata) - engine.create(mytable) - engine.drop(mytable) +The `Engine` will create its first connection to the database when a SQL statement is executed. As concurrent statements are executed, the underlying connection pool will grow to a default size of five connections, and will allow a default "overflow" of ten. Since the `Engine` is essentially "home base" for the connection pool, it follows that you should keep a single `Engine` per database established within an application, rather than creating a new one for each connection. - # get the DBAPI module being used - dbapi = engine.dbapi() +### Database Engine Options {@name=options} - # get the default schema name - name = engine.get_default_schema_name() +Keyword options can also be specified to `create_engine()`, following the string URL as follows: - # execute some SQL directly, returns a ResultProxy (see the SQL Construction section for details) - result = engine.execute("select * from table where col1=:col1", {'col1':'foo'}) + {python} + db = create_engine('postgres://...', encoding='latin1', echo=True, module=psycopg1) - # log a message to the engine's log stream - engine.log('this is a message') - -### Database Engine Options {@name=options} +Options that can be specified include the following: -The remaining arguments to `create_engine` are keyword arguments that are passed to the specific subclass of `sqlalchemy.engine.SQLEngine` being used, as well as the underlying `sqlalchemy.pool.Pool` instance. All of the options described in the previous section [pooling_configuration](rel:pooling_configuration) can be specified, as well as engine-specific options: - -* pool=None : an instance of `sqlalchemy.pool.Pool` to be used as the underlying source for connections, overriding the engine's connect arguments (pooling is described in the previous section). If None, a default Pool (QueuePool or SingletonThreadPool as appropriate) will be created using the engine's connect arguments. +* strategy='plain' : the Strategy describes the general configuration used to create this Engine. The two available values are `plain`, which is the default, and `threadlocal`, which applies a "thread-local context" to implicit executions performed by the Engine. This context is further described in [dbengine_connections_context](rel:dbengine_connections_context). +* pool=None : an instance of `sqlalchemy.pool.Pool` to be used as the underlying source for connections, overriding the engine's connect arguments (pooling is described in [pooling](rel:pooling)). If None, a default `Pool` (usually `QueuePool`, or `SingletonThreadPool` in the case of SQLite) will be created using the engine's connect arguments. Example: - {python}from sqlalchemy import * + {python} + from sqlalchemy import * import sqlalchemy.pool as pool import MySQLdb @@ -112,146 +62,187 @@ Example: engine = create_engine('mysql', pool=pool.QueuePool(getconn, pool_size=20, max_overflow=40)) -* echo=False : if True, the SQLEngine will log all statements as well as a repr() of their parameter lists to the engines logger, which defaults to sys.stdout. A SQLEngine instances' "echo" data member can be modified at any time to turn logging on and off. If set to the string 'debug', result rows will be printed to the standard output as well. -* logger=None : a file-like object where logging output can be sent, if echo is set to True. This defaults to sys.stdout. -* module=None : used by Oracle and Postgres, this is a reference to a DBAPI2 module to be used instead of the engine's default module. For Postgres, the default is psycopg2, or psycopg1 if 2 cannot be found. For Oracle, its cx_Oracle. -* default_ordering=False : if True, table objects and associated joins and aliases will generate information used for ordering by primary keys (or OIDs, if the database supports OIDs). This information is used by the Mapper system to when it constructs select queries to supply a default ordering to mapped objects. -* use_ansi=True : used only by Oracle; when False, the Oracle driver attempts to support a particular "quirk" of some Oracle databases, that the LEFT OUTER JOIN SQL syntax is not supported, and the "Oracle join" syntax of using <column1>(+)=<column2> must be used in order to achieve a LEFT OUTER JOIN. Its advised that the Oracle database be configured to have full ANSI support instead of using this feature. -* use_oids=False : used only by Postgres, will enable the column name "oid" as the object ID column. Postgres as of 8.1 has object IDs disabled by default. -* convert_unicode=False : if set to True, all String/character based types will convert Unicode values to raw byte values going into the database, and all raw byte values to Python Unicode coming out in result sets. This is an engine-wide method to provide unicode across the board. For unicode conversion on a column-by-column level, use the Unicode column type instead. -* encoding='utf-8' : the encoding to use for Unicode translations - passed to all encode/decode methods. -* echo_uow=False : when True, logs unit of work commit plans to the standard output. +* pool_size=5 : the number of connections to keep open inside the connection pool. This is only used with `QueuePool`. +* max_overflow=10 : the number of connections to allow in "overflow", that is connections that can be opened above and beyond the initial five. this is only used with `QueuePool`. +* pool_timeout=30 : number of seconds to wait before giving up on getting a connection from the pool. This is only used with `QueuePool`. +* echo=False : if True, the Engine will log all statements as well as a repr() of their parameter lists to the engines logger, which defaults to sys.stdout. The `echo` attribute of `ComposedSQLEngine` can be modified at any time to turn logging on and off. If set to the string `"debug"`, result rows will be printed to the standard output as well. +* logger=None : a file-like object where logging output can be sent, if echo is set to True. Newlines will not be sent with log messages. This defaults to an internal logging object which references `sys.stdout`. +* module=None : used by database implementations which support multiple DBAPI modules, this is a reference to a DBAPI2 module to be used instead of the engine's default module. For Postgres, the default is psycopg2, or psycopg1 if 2 cannot be found. For Oracle, its cx_Oracle. +* use_ansi=True : used only by Oracle; when False, the Oracle driver attempts to support a particular "quirk" of Oracle versions 8 and previous, that the LEFT OUTER JOIN SQL syntax is not supported, and the "Oracle join" syntax of using `<column1>(+)=<column2>` must be used in order to achieve a LEFT OUTER JOIN. +* threaded=True : used by cx_Oracle; sets the `threaded` parameter of the connection indicating thread-safe usage. cx_Oracle docs indicate setting this flag to `False` will speed performance by 10-15%. While this defaults to `False` in cx_Oracle, SQLAlchemy defaults it to `True`, preferring stability over early optimization. +* use_oids=False : used only by Postgres, will enable the column name "oid" as the object ID column, which is also used for the default sort order of tables. Postgres as of 8.1 has object IDs disabled by default. +* convert_unicode=False : if set to True, all String/character based types will convert Unicode values to raw byte values going into the database, and all raw byte values to Python Unicode coming out in result sets. This is an engine-wide method to provide unicode across the board. For unicode conversion on a column-by-column level, use the `Unicode` column type instead. +* encoding='utf-8' : the encoding to use for all Unicode translations, both by engine-wide unicode conversion as well as the `Unicode` type object. + +### Using Connections {@name=connections} + +In this section we describe the SQL execution interface available from an `Engine` instance. Note that when using the Object Relational Mapper (ORM) as well as when dealing with with "bound" metadata objects (described later), SQLAlchemy deals with the Engine for you and you generally don't need to know much about it; in those cases, you can skip this section and go to [metadata](rel:metadata). + +The Engine provides a `connect()` method which returns a `Connection` object. This object provides methods by which literal SQL text as well as SQL clause constructs can be compiled and executed. + + {python} + engine = create_engine('sqlite:///:memory:') + connection = engine.connect() + result = connection.execute("select * from mytable where col1=:col1", col1=5) + for row in result: + print row['col1'], row['col2'] + connection.close() + +The `close` method on `Connection` does not actually remove the underlying connection to the database, but rather indicates that the underlying resources can be returned to the connection pool. When using the `connect()` method, the DBAPI connection referenced by the `Connection` object is not referenced anywhere else. + + +In both execution styles above, the `Connection` object will also automatically return its resources to the connection pool when the object is garbage collected, i.e. its `__del__()` method is called. When using the standard C implementation of Python, this method is usually called immediately as soon as the object is dereferenced. With other Python implementations such as Jython, this is not so guaranteed. -### Using the Proxy Engine {@name=proxy} - -The ProxyEngine is useful for applications that need to swap engines -at runtime, or to create their tables and mappers before they know -what engine they will use. One use case is an application meant to be -pluggable into a mix of other applications, such as a WSGI -application. Well-behaved WSGI applications should be relocatable; and -since that means that two versions of the same application may be -running in the same process (or in the same thread at different -times), WSGI applications ought not to depend on module-level or -global configuration. Using the ProxyEngine allows a WSGI application -to define tables and mappers in a module, but keep the specific -database connection uri as an application instance or thread-local -value. - -The ProxyEngine is used in the same way as any other engine, with one -additional method: +The execute method on `Engine` and `Connection` can also receive SQL clause constructs as well, which are described in [sql](rel:sql): + + {python} + connection = engine.connect() + result = connection.execute(select([table1], table1.c.col1==5)) + for row in result: + print row['col1'], row['col2'] + connection.close() + +Both `Connection` and `Engine` fulfill an interface known as `Connectable` which specifies common functionality between the two objects, such as getting a `Connection` and executing queries. Therefore, most SQLAlchemy functions which take an `Engine` as a parameter with which to execute SQL will also accept a `Connection`: + + {python title="Specify Engine or Connection"} + engine = create_engine('sqlite:///:memory:') - {python}# define the tables and mappers - from sqlalchemy import * - from sqlalchemy.ext.proxy import ProxyEngine + # specify some Table metadata + metadata = MetaData() + table = Table('sometable', metadata, Column('col1', Integer)) - engine = ProxyEngine() + # create the table with the Engine + table.create(engine=engine) - users = Table('users', engine, ... ) + # drop the table with a Connection off the Engine + connection = engine.connect() + table.drop(engine=connection) + +#### Implicit Connection Contexts {@name=context} + +An **implicit connection** refers to connections that are allocated by the `Engine` internally. There are two general cases when this occurs: when using the various `execute()` methods that are available off the `Engine` object itself, and when calling the `execute()` method on constructed SQL objects, which are described in [sqlconstruction](rel:sqlconstruction). + + {python title="Implicit Connection"} + engine = create_engine('sqlite:///:memory:') + result = engine.execute("select * from mytable where col1=:col1", col1=5) + for row in result: + print row['col1'], row['col2'] + result.close() + +When using implicit connections, the returned `ResultProxy` has a `close()` method which will return the resources used by the underlying `Connection`. + +The `strategy` keyword argument to `create_engine()` affects the algorithm used to retreive the underlying DBAPI connection used by implicit executions. When set to `plain`, each implicit execution requests a unique connection from the connection pool, which is returned to the pool when the resulting `ResultProxy` falls out of scope (i.e. `__del__()` is called) or its `close()` method is called. If a second implicit execution occurs while the `ResultProxy` from the previous execution is still open, then a second connection is pulled from the pool. + +When `strategy` is set to `threadlocal`, the `Engine` still checks out a connection which is closeable in the same manner via the `ResultProxy`, except the connection it checks out will be the **same** connection as one which is already checked out, assuming the operation is in the same thread. When all `ResultProxy` objects are closed, the connection is returned to the pool normally. + +It is crucial to note that the `plain` and `threadlocal` contexts **do not impact the connect() method on the Engine.** `connect()` always returns a unique connection. Implicit connections use a different method off of `Engine` for their operations called `contextual_connect()`. + +The `plain` strategy is better suited to an application that insures the explicit releasing of the resources used by each execution. This is because each execution uses its own distinct connection resource, and as those resources remain open, multiple connections can be checked out from the pool quickly. Since the connection pool will block further requests when too many connections have been checked out, not keeping track of this can impact an application's stability. + + {python title="Plain Strategy"} + db = create_engine('mysql://localhost/test', strategy='plain') - class Users(object): - pass - - assign_mapper(Users, users) + # execute one statement and receive results. r1 now references a DBAPI connection resource. + r1 = db.execute("select * from table1") - def app(environ, start_response): - # later, connect the proxy engine to a real engine via the connect() method - engine.connect(environ['db_uri']) - # now you have a real db connection and can select, insert, etc. + # execute a second statement and receive results. r2 now references a *second* DBAPI connection resource. + r2 = db.execute("select * from table2") + for row in r1: + ... + for row in r2: + ... + # release connection 1 + r1.close() + # release connection 2 + r2.close() -#### Using the Global Proxy {@name=defaultproxy} - -There is an instance of ProxyEngine available within the schema package as `default_engine`. You can construct Table objects and not specify the engine parameter, and they will connect to this engine by default. To connect the default_engine, use the `global_connect` function. +Advantages to `plain` include that connection resources are immediately returned to the connection pool, without any reliance upon the `__del__()` method; there is no chance of resources being left around by a Python implementation that doesn't necessarily call `__del__()` immediately. - {python}# define the tables and mappers - from sqlalchemy import * +The `threadlocal` strategy is better suited to a programming style which relies upon the `__del__()` method of Connection objects in order to return them to the connection pool, rather than explicitly issuing a `close()` statement upon the `ResultProxy` object. This is because all of the executions within a single thread will share the same connection, if one has already been checked out in the current thread. Using this style, an application will use only one connection per thread at most within the scope of all implicit executions. + + {python title="Threadlocal Strategy"} + db = create_engine('mysql://localhost/test', strategy='threadlocal') - # specify a table with no explicit engine - users = Table('users', - Column('user_id', Integer, primary_key=True), - Column('user_name', String) - ) + # execute one statement and receive results. r1 now references a DBAPI connection resource. + r1 = db.execute("select * from table1") - # connect the global proxy engine - global_connect('sqlite://filename=foo.db') + # execute a second statement and receive results. r2 now references the *same* resource as r1 + r2 = db.execute("select * from table2") - # create the table in the selected database - users.create() + for row in r1: + ... + for row in r2: + ... + # dereference r1. the connection is still held by r2. + r1 = None + # dereference r2. with no more references to the underlying connection resources, they + # are returned to the pool. + r2 = None -### Transactions {@name=transactions} +While the `close()` method is still available with the "threadlocal" strategy, it should be used carefully. Above, if we issued a `close()` call on `r1`, and then tried to further work with results from `r2`, `r2` would be in an invalid state since its connection was already returned to the pool. By relying on `__del__()` to automatically clean up resources, this condition will never occur. -A SQLEngine also provides an interface to the transactional capabilities of the underlying DBAPI connection object, as well as the connection object itself. Note that when using the object-relational-mapping package, described in a later section, basic transactional operation is handled for you automatically by its "Unit of Work" system; the methods described here will usually apply just to literal SQL update/delete/insert operations or those performed via the SQL construction library. - -Typically, a connection is opened with `autocommit=False`. So to perform SQL operations and just commit as you go, you can simply pull out a connection from the connection pool, keep it in the local scope, and call commit() on it as needed. As long as the connection remains referenced, all other SQL operations within the same thread will use this same connection, including those used by the SQL construction system as well as the object-relational mapper, both described in later sections: +Advantages to `threadlocal` include that resources can be left to clean up after themselves, application code can be more minimal, its guaranteed that only one connection is used per thread, and there is no chance of a "connection pool block", which is when an execution hangs because the current thread has already checked out all remaining resources. - {python}conn = engine.connection() - - # execute SQL via the engine - engine.execute("insert into mytable values ('foo', 'bar')") - conn.commit() - - # execute SQL via the SQL construction library - mytable.insert().execute(col1='bat', col2='lala') - conn.commit() - -There is a more automated way to do transactions, and that is to use the engine's begin()/commit() functionality. When the begin() method is called off the engine, a connection is checked out from the pool and stored in a thread-local context. That way, all subsequent SQL operations within the same thread will use that same connection. Subsequent commit() or rollback() operations are performed against that same connection. In effect, its a more automated way to perform the "commit as you go" example above. +To get at the actual `Connection` object which is used by implicit executions, call the `contextual_connection()` method on `Engine`: + + {python title="Contextual Connection"} + # threadlocal strategy + db = create_engine('mysql://localhost/test', strategy='threadlocal') - {python}engine.begin() - engine.execute("insert into mytable values ('foo', 'bar')") - mytable.insert().execute(col1='foo', col2='bar') - engine.commit() - + conn1 = db.contextual_connection() + conn2 = db.contextual_connection() -A traditional "rollback on exception" pattern looks like this: + >>> assert conn1 is conn2 + True - {python}engine.begin() +When the `plain` strategy is used, the `contextual_connection()` method is synonymous with the `connect()` method; both return a distinct connection from the pool. + +### Transactions {@name=transactions} + +The `Connection` object provides a `begin()` method which returns a `Transaction` object. This object is usually used within a try/except clause so that it is guaranteed to `rollback()` or `commit()`: + + {python} + trans = connection.begin() try: - engine.execute("insert into mytable values ('foo', 'bar')") - mytable.insert().execute(col1='foo', col2='bar') + r1 = connection.execute(table1.select()) + connection.execute(table1.insert(), col1=7, col2='this is some data') + trans.commit() except: - engine.rollback() + trans.rollback() raise - engine.commit() - - -An shortcut which is equivalent to the above is provided by the `transaction` method: - {python}def do_stuff(): - engine.execute("insert into mytable values ('foo', 'bar')") - mytable.insert().execute(col1='foo', col2='bar') +The `Transaction` object also handles "nested" behavior by keeping track of the outermost begin/commit pair. In this example, two functions both issue a transaction on a Connection, but only the outermost Transaction object actually takes effect when it is committed. - engine.transaction(do_stuff) - -An added bonus to the engine's transaction methods is "reentrant" functionality; once you call begin(), subsequent calls to begin() will increment a counter that must be decremented corresponding to each commit() statement before an actual commit can happen. This way, any number of methods that want to insure a transaction can call begin/commit, and be nested arbitrarily: - - {python}# method_a starts a transaction and calls method_b - def method_a(): - engine.begin() + {python} + # method_a starts a transaction and calls method_b + def method_a(connection): + trans = connection.begin() # open a transaction try: - method_b() + method_b(connection) + trans.commit() # transaction is committed here except: - engine.rollback() + trans.rollback() # this rolls back the transaction unconditionally raise - engine.commit() - # method_b starts a transaction, or joins the one already in progress, - # and does some SQL - def method_b(): - engine.begin() + # method_b also starts a transaction + def method_b(connection): + trans = connection.begin() # open a transaction - this runs in the context of method_a's transaction try: - engine.execute("insert into mytable values ('bat', 'lala')") - mytable.insert().execute(col1='bat', col2='lala') + connection.execute("insert into mytable values ('bat', 'lala')") + connection.execute(mytable.insert(), col1='bat', col2='lala') + trans.commit() # transaction is not committed yet except: - engine.rollback() + trans.rollback() # this rolls back the transaction unconditionally raise - engine.commit() - # call method_a - method_a() + # open a Connection and call method_a + conn = engine.connect() + method_a(conn) + conn.close() -Above, `method_a` is called first, which calls `engine.begin()`. Then it calls `method_b`. When `method_b` calls `engine.begin()`, it just increments a counter that is decremented when it calls `commit()`. If either `method_a` or `method_b` calls `rollback()`, the whole transaction is rolled back. The transaction is not committed until `method_a` calls the `commit()` method. +Above, `method_a` is called first, which calls `connection.begin()`. Then it calls `method_b`. When `method_b` calls `connection.begin()`, it just increments a counter that is decremented when it calls `commit()`. If either `method_a` or `method_b` calls `rollback()`, the whole transaction is rolled back. The transaction is not committed until `method_a` calls the `commit()` method. -The object-relational-mapper capability of SQLAlchemy includes its own `commit()` method that gathers SQL statements into a batch and runs them within one transaction. That transaction is also invokved within the scope of the "reentrant" methodology above; so multiple objectstore.commit() operations can also be bundled into a larger database transaction via the above methodology. - +Note that SQLAlchemy's Object Relational Mapper also provides a way to control transaction scope at a higher level; this is described in [unitofwork_transaction](rel:unitofwork_transaction). diff --git a/doc/build/content/document_base.myt b/doc/build/content/document_base.myt index d291240b1..db17987d7 100644 --- a/doc/build/content/document_base.myt +++ b/doc/build/content/document_base.myt @@ -3,9 +3,7 @@ <%python scope="global"> files = [ - #'tutorial', - 'trailmap', - 'pooling', + 'tutorial', 'dbengine', 'metadata', 'sqlconstruction', @@ -13,6 +11,8 @@ 'unitofwork', 'adv_datamapping', 'types', + 'pooling', + 'plugins', 'docstrings', ] @@ -23,8 +23,8 @@ wrapper='section_wrapper.myt' onepage='documentation' index='index' - title='SQLAlchemy 0.1 Documentation' - version = '0.1.7' + title='SQLAlchemy 0.2 Documentation' + version = '0.2.0' </%attr> <%method title> @@ -40,4 +40,3 @@ - diff --git a/doc/build/content/metadata.txt b/doc/build/content/metadata.txt index 6333e98a4..1eda5b171 100644 --- a/doc/build/content/metadata.txt +++ b/doc/build/content/metadata.txt @@ -1,34 +1,48 @@ Database Meta Data {@name=metadata} ================== -### Describing Tables with MetaData {@name=tables} +### Describing Databases with MetaData {@name=tables} -The core of SQLAlchemy's query and object mapping operations is table metadata, which are Python objects that describe tables. Metadata objects can be created by explicitly naming the table and all its properties, using the Table, Column, ForeignKey, and Sequence objects imported from `sqlalchemy.schema`, and a database engine constructed as described in the previous section, or they can be automatically pulled from an existing database schema. First, the explicit version: +The core of SQLAlchemy's query and object mapping operations is database metadata, which are Python objects that describe tables and other schema-level objects. Metadata objects can be created by explicitly naming the various components and their properties, using the Table, Column, ForeignKey, Index, and Sequence objects imported from `sqlalchemy.schema`. There is also support for *reflection*, which means you only specify the *name* of the entities and they are recreated from the database automatically. + +A collection of metadata entities is stored in an object aptly named `MetaData`. This object takes an optional `name` parameter: {python} from sqlalchemy import * - engine = create_engine('sqlite', {'filename':':memory:'}, **opts) - users = Table('users', engine, + metadata = MetaData(name='my metadata') + +Then to construct a Table, use the `Table` class: + + {python} + users = Table('users', metadata, Column('user_id', Integer, primary_key = True), Column('user_name', String(16), nullable = False), Column('email_address', String(60), key='email'), Column('password', String(20), nullable = False) ) - user_prefs = Table('user_prefs', engine, + user_prefs = Table('user_prefs', metadata, Column('pref_id', Integer, primary_key=True), Column('user_id', Integer, ForeignKey("users.user_id"), nullable=False), Column('pref_name', String(40), nullable=False), Column('pref_value', String(100)) ) + +The specific datatypes for each Column, such as Integer, String, etc. are described in [types](rel:types), and exist within the module `sqlalchemy.types` as well as the global `sqlalchemy` namespace. + +The `MetaData` object supports some handy methods, such as getting a list of Tables in the order (or reverse) of their dependency: + + {python} + >>> for t in metadata.table_iterator(reverse=False): + ... print t.name + users + user_prefs -The specific datatypes, such as Integer, String, etc. are defined in [types](rel:types) and are automatically pulled in when you import * from `sqlalchemy`. Note that for Column objects, an altername name can be specified via the "key" parameter; if this parameter is given, then all programmatic references to this Column object will be based on its key, instead of its actual column name. - -Once constructed, the Table object provides a clean interface to the table's properties as well as that of its columns: +And `Table` provides an interface to the table's properties as well as that of its columns: {python} - employees = Table('employees', engine, + employees = Table('employees', metadata, Column('employee_id', Integer, primary_key=True), Column('employee_name', String(60), nullable=False, key='name'), Column('employee_dept', Integer, ForeignKey("departments.department_id")) @@ -55,7 +69,10 @@ Once constructed, the Table object provides a clean interface to the table's pro for fkey in employees.foreign_keys: # ... - # access the table's SQLEngine object: + # access the table's MetaData: + employees.metadata + + # access the table's Engine, if its MetaData is bound: employees.engine # access a column's name, type, nullable, primary key, foreign key @@ -75,45 +92,101 @@ Once constructed, the Table object provides a clean interface to the table's pro # get the table related by a foreign key fcolumn = employees.c.employee_dept.foreign_key.column.table - -Metadata objects can also be <b>reflected</b> from tables that already exist in the database. Reflection means based on a table name, the names, datatypes, and attributes of all columns, including foreign keys, will be loaded automatically. This feature is supported by all database engines: + +#### Binding MetaData to an Engine {@name=binding} + +A MetaData object can be associated with one or more Engine instances. This allows the MetaData and the elements within it to perform operations automatically, using the connection resources of that Engine. This includes being able to "reflect" the columns of tables, as well as to perform create and drop operations without needing to pass an `Engine` or `Connection` around. It also allows SQL constructs to be created which know how to execute themselves (called "implicit execution"). + +To bind `MetaData` to a single `Engine`, use `BoundMetaData`: + + {python} + engine = create_engine('sqlite://', **kwargs) + + # create BoundMetaData from an Engine + meta = BoundMetaData(engine) + + # create the Engine and MetaData in one step + meta = BoundMetaData('postgres://db/', **kwargs) + +Another form of `MetaData` exists which allows connecting to any number of engines, within the context of the current thread. This is `DynamicMetaData`: + + {python} + meta = DynamicMetaData() + + meta.connect(engine) # connect to an existing Engine + + meta.connect('mysql://user@host/dsn') # create a new Engine and connect + +`DynamicMetaData` is ideal for applications that need to use the same set of `Tables` for many different database connections in the same process, such as a CherryPy web application which handles multiple application instances in one process. + +#### Reflecting Tables + +Once you have a `BoundMetaData` or a connected `DynamicMetaData`, you can create `Table` objects without specifying their columns, just their names, using `autoload=True`: {python} - >>> messages = Table('messages', engine, autoload = True) + >>> messages = Table('messages', meta, autoload = True) >>> [c.name for c in messages.columns] ['message_id', 'message_name', 'date'] - + +At the moment the Table is constructed, it will query the database for the columns and constraints of the `messages` table. + Note that if a reflected table has a foreign key referencing another table, then the metadata for the related table will be loaded as well, even if it has not been defined by the application: {python} - >>> shopping_cart_items = Table('shopping_cart_items', engine, autoload = True) + >>> shopping_cart_items = Table('shopping_cart_items', meta, autoload = True) >>> print shopping_cart_items.c.cart_id.table.name shopping_carts To get direct access to 'shopping_carts', simply instantiate it via the Table constructor. You'll get the same instance of the shopping cart Table as the one that is attached to shopping_cart_items: {python} - >>> shopping_carts = Table('shopping_carts', engine) + >>> shopping_carts = Table('shopping_carts', meta) >>> shopping_carts is shopping_cart_items.c.cart_id.table.name True -This works because when the Table constructor is called for a particular name and database engine, if the table has already been created then the instance returned will be the same as the original. This is a <b>singleton</b> constructor: +This works because when the Table constructor is called for a particular name and `MetaData` object, if the table has already been created then the instance returned will be the same as the original. This is a <b>singleton</b> constructor: {python} - >>> news_articles = Table('news', engine, + >>> news_articles = Table('news', meta, ... Column('article_id', Integer, primary_key = True), ... Column('url', String(250), nullable = False) ... ) - >>> othertable = Table('news', engine) + >>> othertable = Table('news', meta) >>> othertable is news_articles True - + +#### Specifying the Schema Name {@name=schema} + +Some databases support the concept of multiple schemas. A `Table` can reference this by specifying the `schema` keyword argument: + + {python} + financial_info = Table('financial_info', meta, + Column('id', Integer, primary_key=True), + Column('value', String(100), nullable=False), + schema='remote_banks' + ) + +Within the `MetaData` collection, this table will be identified by the combination of `financial_info` and `remote_banks`. If another table called `financial_info` is referenced without the `remote_banks` schema, it will refer to a different `Table`. `ForeignKey` objects can reference columns in this table using the form `remote_banks.financial_info.id`. + +#### Other Options {@name=options} + +`Tables` may support database-specific options, such as MySQL's `engine` option that can specify "MyISAM", "InnoDB", and other backends for the table: + + {python} + addresses = Table('engine_email_addresses', meta, + Column('address_id', Integer, primary_key = True), + Column('remote_user_id', Integer, ForeignKey(users.c.user_id)), + Column('email_address', String(20)), + mysql_engine='InnoDB' + ) + ### Creating and Dropping Database Tables {@name=creating} -Creating and dropping is easy, just use the `create()` and `drop()` methods: +Creating and dropping individual tables can be done via the `create()` and `drop()` methods of `Table`; these methods take an optional `engine` parameter which references an `Engine` or a `Connection`. If not supplied, the `Engine` bound to the `MetaData` will be used, else an error is raised: {python} - employees = Table('employees', engine, + meta = BoundMetaData('sqlite:///:memory:') + employees = Table('employees', meta, Column('employee_id', Integer, primary_key=True), Column('employee_name', String(60), nullable=False, key='name'), Column('employee_dept', Integer, ForeignKey("departments.department_id")) @@ -125,11 +198,51 @@ Creating and dropping is easy, just use the `create()` and `drop()` methods: employee_dept INTEGER REFERENCES departments(department_id) ) {} + +`drop()` method: - {sql}employees.drop() + {python} + {sql}employees.drop(engine=e) DROP TABLE employees {} + +Entire groups of Tables can be created and dropped directly from the `MetaData` object with `create_all()` and `drop_all()`, each of which take an optional `engine` keyword argument which can reference an `Engine` or a `Connection`, else the underlying bound `Engine` is used: + + {python} + engine = create_engine('sqlite:///:memory:') + + metadata = MetaData() + + users = Table('users', metadata, + Column('user_id', Integer, primary_key = True), + Column('user_name', String(16), nullable = False), + Column('email_address', String(60), key='email'), + Column('password', String(20), nullable = False) + ) + user_prefs = Table('user_prefs', metadata, + Column('pref_id', Integer, primary_key=True), + Column('user_id', Integer, ForeignKey("users.user_id"), nullable=False), + Column('pref_name', String(40), nullable=False), + Column('pref_value', String(100)) + ) + + {sql}metadata.create_all(engine=engine) + PRAGMA table_info(users){} + CREATE TABLE users( + user_id INTEGER NOT NULL PRIMARY KEY, + user_name VARCHAR(16) NOT NULL, + email_address VARCHAR(60), + password VARCHAR(20) NOT NULL + ) + PRAGMA table_info(user_prefs){} + CREATE TABLE user_prefs( + pref_id INTEGER NOT NULL PRIMARY KEY, + user_id INTEGER NOT NULL REFERENCES users(user_id), + pref_name VARCHAR(40) NOT NULL, + pref_value VARCHAR(100) + ) + ### Column Defaults and OnUpdates {@name=defaults} SQLAlchemy includes flexible constructs in which to create default values for columns upon the insertion of rows, as well as upon update. These defaults can take several forms: a constant, a Python callable to be pre-executed before the SQL is executed, a SQL expression or function to be pre-executed before the SQL is executed, a pre-executed Sequence (for databases that support sequences), or a "passive" default, which is a default function triggered by the database itself upon insert, the value of which can then be post-fetched by the engine, provided the row provides a primary key in which to call upon. @@ -146,7 +259,7 @@ A basic default is most easily specified by the "default" keyword argument to Co i += 1 return i - t = Table("mytable", db, + t = Table("mytable", meta, # function-based default Column('id', Integer, primary_key=True, default=mydefault), @@ -157,7 +270,7 @@ A basic default is most easily specified by the "default" keyword argument to Co The "default" keyword can also take SQL expressions, including select statements or direct function calls: {python} - t = Table("mytable", db, + t = Table("mytable", meta, Column('id', Integer, primary_key=True), # define 'create_date' to default to now() @@ -177,7 +290,7 @@ The "default" keyword argument is shorthand for using a ColumnDefault object in Similar to an on-insert default is an on-update default, which is most easily specified by the "onupdate" keyword to Column, which also can be a constant, plain Python function or SQL expression: {python} - t = Table("mytable", db, + t = Table("mytable", meta, Column('id', Integer, primary_key=True), # define 'last_updated' to be populated with current_timestamp (the ANSI-SQL version of now()) @@ -195,7 +308,7 @@ To use an explicit ColumnDefault object to specify an on-update, use the "for_up A PassiveDefault indicates a column default or on-update value that is executed automatically by the database. This construct is used to specify a SQL function that will be specified as "DEFAULT" when creating tables, and also to indicate the presence of new data that is available to be "post-fetched" after an insert or update execution. {python} - t = Table('test', e, + t = Table('test', meta, Column('mycolumn', DateTime, PassiveDefault("sysdate")) ) @@ -206,7 +319,7 @@ A create call for the above table will produce: mycolumn datetime default sysdate ) -PassiveDefaults also send a message to the SQLEngine that data is available after update or insert. The object-relational mapper system uses this information to post-fetch rows after insert or update, so that instances can be refreshed with the new data. Below is a simplified version: +PassiveDefaults also send a message to the `Engine` that data is available after update or insert. The object-relational mapper system uses this information to post-fetch rows after insert or update, so that instances can be refreshed with the new data. Below is a simplified version: {python} # table with passive defaults @@ -220,36 +333,35 @@ PassiveDefaults also send a message to the SQLEngine that data is available afte Column('data2', Integer, PassiveDefault("d2_func", for_update=True)) ) # insert a row - mytable.insert().execute(name='fred') + r = mytable.insert().execute(name='fred') - # ask the engine: were there defaults fired off on that row ? - if table.engine.lastrow_has_defaults(): + # check the result: were there defaults fired off on that row ? + if r.lastrow_has_defaults(): # postfetch the row based on primary key. # this only works for a table with primary key columns defined - primary_key = table.engine.last_inserted_ids() + primary_key = r.last_inserted_ids() row = table.select(table.c.id == primary_key[0]) -When Tables are reflected from the database using <code>autoload=True</code>, any DEFAULT values set on the columns will be reflected in the Table object as PassiveDefault instances. +When Tables are reflected from the database using `autoload=True`, any DEFAULT values set on the columns will be reflected in the Table object as PassiveDefault instances. ##### The Catch: Postgres Primary Key Defaults always Pre-Execute {@name=postgres} Current Postgres support does not rely upon OID's to determine the identity of a row. This is because the usage of OIDs has been deprecated with Postgres and they are disabled by default for table creates as of PG version 8. Pyscopg2's "cursor.lastrowid" function only returns OIDs. Therefore, when inserting a new row which has passive defaults set on the primary key columns, the default function is <b>still pre-executed</b> since SQLAlchemy would otherwise have no way of retrieving the row just inserted. - - + #### Defining Sequences {@name=sequences} A table with a sequence looks like: {python} - table = Table("cartitems", db, + table = Table("cartitems", meta, Column("cart_id", Integer, Sequence('cart_id_seq'), primary_key=True), Column("description", String(40)), Column("createdate", DateTime()) ) -The Sequence is used with Postgres or Oracle to indicate the name of a Sequence that will be used to create default values for a column. When a table with a Sequence on a column is created by SQLAlchemy, the Sequence object is also created. Similarly, the Sequence is dropped when the table is dropped. Sequences are typically used with primary key columns. When using Postgres, if an integer primary key column defines no explicit Sequence or other default method, SQLAlchemy will create the column with the SERIAL keyword, and will pre-execute a sequence named "tablename_columnname_seq" in order to retrieve new primary key values. Oracle, which has no "auto-increment" keyword, requires that a Sequence be created for a table if automatic primary key generation is desired. Note that for all databases, primary key values can always be explicitly stated within the bind parameters for any insert statement as well, removing the need for any kind of default generation function. +The Sequence is used with Postgres or Oracle to indicate the name of a database sequence that will be used to create default values for a column. When a table with a Sequence on a column is created in the database by SQLAlchemy, the database sequence object is also created. Similarly, the database sequence is dropped when the table is dropped. Sequences are typically used with primary key columns. When using Postgres, if an integer primary key column defines no explicit Sequence or other default method, SQLAlchemy will create the column with the SERIAL keyword, and will pre-execute a sequence named "tablename_columnname_seq" in order to retrieve new primary key values, if they were not otherwise explicitly stated. Oracle, which has no "auto-increment" keyword, requires that a Sequence be created for a table if automatic primary key generation is desired. -A Sequence object can be defined on a Table that is then used for a non-sequence-supporting database. In that case, the Sequence object is simply ignored. Note that a Sequence object is <b>entirely optional for all databases except Oracle</b>, as other databases offer options for auto-creating primary key values, such as AUTOINCREMENT, SERIAL, etc. SQLAlchemy will use these default methods for creating primary key values if no Sequence is present on the table metadata. +A Sequence object can be defined on a Table that is then used for a non-sequence-supporting database. In that case, the Sequence object is simply ignored. Note that a Sequence object is **entirely optional for all databases except Oracle**, as other databases offer options for auto-creating primary key values, such as AUTOINCREMENT, SERIAL, etc. SQLAlchemy will use these default methods for creating primary key values if no Sequence is present on the table metadata. A sequence can also be specified with `optional=True` which indicates the Sequence should only be used on a database that requires an explicit sequence, and not those that supply some other method of providing integer values. At the moment, it essentially means "use this sequence only with Oracle and not Postgres". @@ -258,7 +370,8 @@ A sequence can also be specified with `optional=True` which indicates the Sequen Indexes can be defined on table columns, including named indexes, non-unique or unique, multiple column. Indexes are included along with table create and drop statements. They are not used for any kind of run-time constraint checking; SQLAlchemy leaves that job to the expert on constraint checking, the database itself. {python} - mytable = Table('mytable', engine, + boundmeta = BoundMetaData('postgres:///scott:tiger@localhost/test') + mytable = Table('mytable', boundmeta, # define a unique index Column('col1', Integer, unique=True), @@ -287,50 +400,19 @@ Indexes can be defined on table columns, including named indexes, non-unique or # which can then be created separately (will also get created with table creates) i.create() -### Adapting Tables to Alternate Engines {@name=adapting} +### Adapting Tables to Alternate Metadata {@name=adapting} -A Table object created against a specific engine can be re-created against a new engine using the `toengine` method: +A `Table` object created against a specific `MetaData` object can be re-created against a new MetaData using the `tometadata` method: {python} - # create two engines - sqlite_engine = create_engine('sqlite', {'filename':'querytest.db'}) - postgres_engine = create_engine('postgres', - {'database':'test', - 'host':'127.0.0.1', 'user':'scott', 'password':'tiger'}) + # create two metadata + meta1 = BoundMetaData('sqlite:///querytest.db') + meta2 = MetaData() # load 'users' from the sqlite engine - users = Table('users', sqlite_engine, autoload=True) + users_table = Table('users', meta1, autoload=True) - # create the same Table object for the other engine - pg_users = users.toengine(postgres_engine) + # create the same Table object for the plain metadata + users_table_2 = users_table.tometadata(meta2) -Also available is the "database neutral" ansisql engine: - - {python} - import sqlalchemy.ansisql as ansisql - generic_engine = ansisql.engine() - - users = Table('users', generic_engine, - Column('user_id', Integer), - Column('user_name', String(50)) - ) - -Flexible "multi-engined" tables can also be achieved via the proxy engine, described in the section [dbengine_proxy](rel:dbengine_proxy). - -#### Non-engine primitives: TableClause/ColumnClause {@name=primitives} - -TableClause and ColumnClause are "primitive" versions of the Table and Column objects which dont use engines at all; applications that just want to generate SQL strings but not directly communicate with a database can use TableClause and ColumnClause objects (accessed via 'table' and 'column'), which are non-singleton and serve as the "lexical" base class of Table and Column: - - {python} - tab1 = table('table1', - column('id'), - column('name')) - - tab2 = table('table2', - column('id'), - column('email')) - - tab1.select(tab1.c.name == 'foo') - -TableClause and ColumnClause are strictly lexical. This means they are fully supported within the full range of SQL statement generation, but they don't support schema concepts like creates, drops, primary keys, defaults, nullable status, indexes, or foreign keys. diff --git a/doc/build/content/plugins.txt b/doc/build/content/plugins.txt new file mode 100644 index 000000000..1502d4c01 --- /dev/null +++ b/doc/build/content/plugins.txt @@ -0,0 +1,309 @@ +Plugins {@name=plugins} +====================== + +SQLAlchemy has a variety of extensions and "mods" available which provide extra functionality to SA, either via explicit usage or by augmenting the core behavior. + +### threadlocal + +**Author:** Mike Bayer and Daniel Miller + +Establishes `threadlocal` as the default strategy for new `ComposedSQLEngine` objects, installs a threadlocal `SessionContext` that is attached to all Mappers via a global `MapperExtension`, and establishes the global `SessionContext` under the name `sqlalchemy.objectstore`. Usually this is used in combination with `Tables` that are associated with `BoundMetaData` or `DynamicMetaData`, so that the `Session` does not need to be bound to any `Engine` explicitly. + + {python} + import sqlalchemy.mods.threadlocal + from sqlalchemy import * + + metadata = BoundMetaData('sqlite:///') + user_table = Table('users', metadata, + Column('user_id', Integer, primary_key=True), + Column('user_name', String(50), nullable=False) + ) + + class User(object): + pass + mapper(User, user_table) + + # thread local session + session = objectstore.get_session() + + # "user" object is added to the session automatically + user = User() + + session.flush() + +#### get_session() Implemented on All Mappers + +All `Mapper` objects constructed after the `threadlocal` import will receive a default `MapperExtension` which implements the `get_session()` method, returning the `Session` that is associated with the current thread by the global `SessionContext`. All newly constructed objects will automatically be attached to the `Session` corresponding to the current thread, i.e. they will skip the "transient" state and go right to "pending". + +This occurs because when a `Mapper` is first constructed for a class, it decorates the classes' `__init__()` method in a manner like the following: + + {python} + oldinit = class_.__init__ # the previous init method + def __init__(self): + session = ext.get_session() # get Session from this Mapper's MapperExtension + if session is EXT_PASS: + session = None + if session is not None: + session.save(self) # attach to the current session + oldinit(self) # call previous init method + +An instance can be redirected at construction time to a different `Session` by specifying the keyword parameter `_sa_session`: + + {python} + session = create_session() # create a new session distinct from the thread-local session + myuser = User(_sa_session=session) # make a new User that is saved to this session + +Similarly, the **entity_name** parameter, which specifies an alternate `Mapper` to be used when attaching this instance to the `Session`, can be specified via `_sa_entity_name`: + + {python} + myuser = User(_sa_session=session, _sa_entity_name='altentity') + +#### Default Query Objects + +The `MapperExtension` object's `get_session()` method is also used by the `Query` object to locate a `Session` with which to store newly loaded instances, if the `Query` is not already associated with a specific `Session`. As a result, the `Query` can be constructed standalone from a mapper or class: + + {python} + # create a Query from a class + query = Query(User) + + # specify entity name + query = Query(User, entity_name='foo') + + # create a Query from a mapper + query = Query(mapper) + +#### objectstore Namespace {@name=objectstore} + +The `objectstore` is an instance of `SessionContext`, available in the `sqlalchemy` namespace which provides a proxy to the underlying `Session` bound to the current thread. `objectstore` can be treated just like the `Session` itself: + + {python} + objectstore.save(instance) + objectstore.flush() + + objectstore.clear() + +#### Attaching Mappers to their Class {@name=attaching} + +With `get_session()` handling the details of providing a `Session` in all cases, the `assign_mapper` function provides some of the functionality of `Query` and `Session` directly off the mapped instances themselves. This is a "monkeypatch" function that creates a primary mapper, attaches the mapper to the class, and also the methods `get, get_by, select, select_by, selectone, selectfirst, commit, expire, refresh, expunge` and `delete`: + + {python} + # "assign" a mapper to the User class/users table + assign_mapper(User, users) + + # methods are attached to the class for selecting + userlist = User.select_by(user_id=12) + + myuser = User.get(1) + + # mark an object as deleted for the next commit + myuser.delete() + + # flush the changes on a specific object + myotheruser.flush() + +#### Engine Strategy Set to threadlocal By Default {@name=engine} + +The `threadlocal` mod also establishes `threadlocal` as the default *strategy* when calling the `create_engine()` function. This strategy is specified by the `strategy` keyword argument to `create_engine()` and can still be overridden to be "`plain`" or "`threadlocal`" explicitly. + +An `Engine` created with the `threadlocal` strategy will use a thread-locally managed connection object for all **implicit** statement executions and schema operations. Recall from [dbengine](rel:dbengine) that an implicit execution is an execution where the `Connection` object is opened and closed internally, and the `connect()` method on `Engine` is not used; such as: + + {python} + result = table.select().execute() + +Above, the `result` variable holds onto a `ResultProxy` which is still referencing a connection returned by the connection pool. `threadlocal` strategy means that a second `execute()` statement in the same thread will use the same connection as the one referenced by `result`, assuming `result` is still referenced in memory. + +The `Mapper`, `Session`, and `Query` implementations work equally well with either the `default` or `threadlocal` engine strategies. However, using the `threadlocal` strategy means that `Session` operations will use the same underlying connection as that of straight `execute()` calls with constructed SQL objects: + + {python} + # assume "threadlocal" strategy is enabled, and there is no transaction in progress + + result = table.select().execute() # 'result' references a DBAPI connection, bound to the current thread + + object = session.select() # the 'select' operation also uses the current thread's connection, + # i.e. the same connection referenced by 'result' + + result.close() # return the connection to the pool. now there is no connection + # associated with the current thread. the next execution will re-check out a + # connection and re-attach to the current thread. + +### SessionContext + +**Author:** Daniel Miller + +This plugin is a generalized version of the `objectstore` object provided by the `threadlocal` plugin: + + {python} + import sqlalchemy + from sqlalchemy.ext.sessioncontext import SessionContext + + ctx = SessionContext(sqlalchemy.create_session) + + class User(object): + pass + + mapper(User, users_table, extension=ctx.mapperextension) + + # 'u' is automatically added to the current session of 'ctx' + u = User() + + # get the current session and flush + ctx.current.flush() + +The construction of each `Session` instance can be customized by providing a "creation function" which returns a new `Session`. The "scope" to which the session is associated, which by default is the current thread, can be customized by providing a "scope callable" which returns a hashable key that represents the current scope: + + {python} + import sqlalchemy + from sqlalchemy.ext.sessioncontext import SessionContext + + # create an engine + someengine = sqlalchemy.create_engine('sqlite:///') + + # a function to return a Session bound to our engine + def make_session(): + return sqlalchemy.create_session(bind_to=someengine) + + # global declaration of "scope" + scope = "scope1" + + # a function to return the current "session scope" + def global_scope_func(): + return scope + + # create SessionContext with our two functions + ctx = SessionContext(make_session, scopefunc=global_scope_func) + + # get the session corresponding to "scope1", bound to engine "someengine": + session = ctx.current + + # switch the "scope" + scope = "scope2" + + # get the session corresponding to "scope2", bound to engine "someengine": + session = ctx.current + + +### ActiveMapper + +**Author:** Jonathan LaCour + +ActiveMapper is a so-called "declarative layer" which allows the construction of a class, a `Table`, and a `Mapper` all in one step: + + {python} + class Person(ActiveMapper): + class mapping: + id = column(Integer, primary_key=True) + full_name = column(String) + first_name = column(String) + middle_name = column(String) + last_name = column(String) + birth_date = column(DateTime) + ssn = column(String) + gender = column(String) + home_phone = column(String) + cell_phone = column(String) + work_phone = column(String) + prefs_id = column(Integer, foreign_key=ForeignKey('preferences.id')) + addresses = one_to_many('Address', colname='person_id', backref='person') + preferences = one_to_one('Preferences', colname='pref_id', backref='person') + + def __str__(self): + s = '%s\n' % self.full_name + s += ' * birthdate: %s\n' % (self.birth_date or 'not provided') + s += ' * fave color: %s\n' % (self.preferences.favorite_color or 'Unknown') + s += ' * personality: %s\n' % (self.preferences.personality_type or 'Unknown') + + for address in self.addresses: + s += ' * address: %s\n' % address.address_1 + s += ' %s, %s %s\n' % (address.city, address.state, address.postal_code) + + return s + + + class Preferences(ActiveMapper): + class mapping: + __table__ = 'preferences' + id = column(Integer, primary_key=True) + favorite_color = column(String) + personality_type = column(String) + + + class Address(ActiveMapper): + class mapping: + id = column(Integer, primary_key=True) + type = column(String) + address_1 = column(String) + city = column(String) + state = column(String) + postal_code = column(String) + person_id = column(Integer, foreign_key=ForeignKey('person.id')) + +More discussion on ActiveMapper can be found at [Jonathan LaCour's Blog](http://cleverdevil.org/computing/35/declarative-mapping-with-sqlalchemy) as well as the [SQLAlchemy Wiki](http://www.sqlalchemy.org/trac/wiki/ActiveMapper). + +### SqlSoup + +**Author:** Jonathan Ellis + +SqlSoup creates mapped classes on the fly from tables. It is essentially a nicer version of the "row data gateway" pattern. + + {python} + >>> from sqlalchemy.ext.sqlsoup import SqlSoup + >>> soup = SqlSoup('sqlite://filename=:memory:') + + >>> users = soup.users.select() + >>> users.sort() + >>> users + [Class_Users(name='Bhargan Basepair',email='basepair@example.edu',password='basepair',classname=None,admin=1), + Class_Users(name='Joe Student',email='student@example.edu',password='student',classname=None,admin=0)] + +Read more about SqlSoup on [Jonathan Ellis' Blog](http://spyced.blogspot.com/2006/04/introducing-sqlsoup.html). + +### ProxyEngine + +**Author:** Jason Pellerin + +The `ProxyEngine` is used to "wrap" an `Engine`, and via subclassing `ProxyEngine` one can instrument the functionality of an arbitrary `Engine` instance through the decorator pattern. It also provides a `connect()` method which will send all `Engine` requests to different underlying engines. Its functionality in that regard is largely superceded now by `DynamicMetaData` which is a better solution. + + {python} + from sqlalchemy.ext.proxy import ProxyEngine + proxy = ProxyEngine() + + proxy.connect('postgres://user:pw@host/db') + +### SelectResults + +**Author:** Jonas Borgström + +SelectResults gives generator-like behavior to the results returned from the `select` and `select_by` method of `Query`. It supports three modes of operation; per-query, per-mapper, and per-application. + + {python title="SelectResults with a Query Object"} + from sqlalchemy.ext.selectresults import SelectResults + + query = session.query(MyClass) + res = SelectResults(query, table.c.column == "something") + res = res.order_by([table.c.column]) #add an order clause + + for x in res[:10]: # Fetch and print the top ten instances + print x.column2 + + x = list(res) # execute the query + + # Count how many instances that have column2 > 42 + # and column == "something" + print res.filter(table.c.column2 > 42).count() + + +Per mapper: + + {python title="SelectResults with a Mapper Object"} + from sqlalchemy.ext.selectresults import SelectResultsExt + mapper(MyClass, mytable, extension=SelectResultsExt()) + session.query(MyClass).select(mytable.c.column=="something").order_by([mytable.c.column])[2:7] + +Or across an application via the `selectresults` mod: + + {python title="SelectResults via mod"} + import sqlalchemy.mods.selectresults + + mapper(MyClass, mytable) + session.query(MyClass).select(mytable.c.column=="something").order_by([mytable.c.column])[2:7] + diff --git a/doc/build/content/pooling.myt b/doc/build/content/pooling.myt deleted file mode 100644 index b9f5ecb14..000000000 --- a/doc/build/content/pooling.myt +++ /dev/null @@ -1,66 +0,0 @@ -<%flags>inherit='document_base.myt'</%flags> -<%attr>title='Connection Pooling'</%attr> -<&|doclib.myt:item, name="pooling", description="Connection Pooling" &> - <P><b>Note:</b>This section describes the connection pool module of SQLAlchemy, which is the smallest component of the library that can be used on its own. If you are interested in using SQLAlchemy for query construction or Object Relational Mapping, this module is automatically managed behind the scenes; you can skip ahead to <&formatting.myt:link,path="dbengine"&> in that case.</p> - <p>At the base of any database helper library is a system of efficiently acquiring connections to the database. Since the establishment of a database connection is typically a somewhat expensive operation, an application needs a way to get at database connections repeatedly without incurring the full overhead each time. Particularly for server-side web applications, a connection pool is the standard way to maintain a "pool" of database connections which are used over and over again among many requests. Connection pools typically are configured to maintain a certain "size", which represents how many connections can be used simultaneously without resorting to creating more newly-established connections. - </p> - <p>SQLAlchemy includes a pooling module that can be used completely independently of the rest of the toolset. This section describes how it can be used on its own, as well as the available options. If SQLAlchemy is being used more fully, the connection pooling described below occurs automatically. The options are still available, though, so this core feature is a good place to start. - </p> - <&|doclib.myt:item, name="establishing", description="Establishing a Transparent Connection Pool" &> - Any DBAPI module can be "proxied" through the connection pool using the following technique (note that the usage of 'psycopg2' is <b>just an example</b>; substitute whatever DBAPI module you'd like): - - <&|formatting.myt:code&> - import sqlalchemy.pool as pool - import psycopg2 as psycopg - psycopg = pool.manage(psycopg) - - # then connect normally - connection = psycopg.connect(database='test', username='scott', password='tiger') - </&> - <p>This produces a <span class="codeline">sqlalchemy.pool.DBProxy</span> object which supports the same <span class="codeline">connect()</span> function as the original DBAPI module. Upon connection, a thread-local connection proxy object is returned, which delegates its calls to a real DBAPI connection object. This connection object is stored persistently within a connection pool (an instance of <span class="codeline">sqlalchemy.pool.Pool</span>) that corresponds to the exact connection arguments sent to the <span class="codeline">connect()</span> function. The connection proxy also returns a proxied cursor object upon calling <span class="codeline">connection.cursor()</span>. When all cursors as well as the connection proxy are de-referenced, the connection is automatically made available again by the owning pool object.</p> - - <p>Basically, the <span class="codeline">connect()</span> function is used in its usual way, and the pool module transparently returns thread-local pooled connections. Each distinct set of connect arguments corresponds to a brand new connection pool created; in this way, an application can maintain connections to multiple schemas and/or databases, and each unique connect argument set will be managed by a different pool object.</p> - </&> - - <&|doclib.myt:item, name="configuration", description="Connection Pool Configuration" &> - <p>When proxying a DBAPI module through the <span class="codeline">pool</span> module, options exist for how the connections should be pooled: - </p> - <ul> - <li>echo=False : if set to True, connections being pulled and retrieved from/to the pool will be logged to the standard output, as well as pool sizing information.</li> - <li>use_threadlocal=True : if set to True, repeated calls to connect() within the same application thread will be guaranteed to return the <b>same</b> connection object, if one has already been retrieved from the pool and has not been returned yet. This allows code to retrieve a connection from the pool, and then while still holding on to that connection, to call other functions which also ask the pool for a connection of the same arguments; those functions will act upon the same connection that the calling method is using. Note that once the connection is returned to the pool, it then may be used by another thread. To guarantee a single unique connection per thread that <b>never</b> changes, use the option <span class="codeline">poolclass=SingletonThreadPool</span>, in which case the use_threadlocal parameter is automatically set to False.</li> - <li>poolclass=QueuePool : the Pool class used by the pool module to provide pooling. QueuePool uses the Python <span class="codeline">Queue.Queue</span> class to maintain a list of available connections. A developer can supply his or her own Pool class to supply a different pooling algorithm. Also included is the ThreadSingletonPool, which provides a single distinct connection per thread and is required with SQLite.</li> - <li>pool_size=5 : used by QueuePool - the size of the pool to be maintained. This is the largest number of connections that will be kept persistently in the pool. Note that the pool begins with no connections; once this number of connections is requested, that number of connections will remain.</li> - <li>max_overflow=10 : used by QueuePool - the maximum overflow size of the pool. When the number of checked-out connections reaches the size set in pool_size, additional connections will be returned up to this limit. When those additional connections are returned to the pool, they are disconnected and discarded. It follows then that the total number of simultaneous connections the pool will allow is pool_size + max_overflow, and the total number of "sleeping" connections the pool will allow is pool_size. max_overflow can be set to -1 to indicate no overflow limit; no limit will be placed on the total number of concurrent connections.</li> - </ul> - </&> - - <&|doclib.myt:item, name="custom", description="Custom Pool Construction" &> - <p>One level below using a DBProxy to make transparent pools is creating the pool yourself. The pool module comes with two implementations of connection pools: <span class="codeline">QueuePool</span> and <span class="codeline">SingletonThreadPool</span>. While QueuePool uses Queue.Queue to provide connections, SingletonThreadPool provides a single per-thread connection which SQLite requires.</p> - - <p>Constructing your own pool involves passing a callable used to create a connection. Through this method, custom connection schemes can be made, such as a connection that automatically executes some initialization commands to start. The options from the previous section can be used as they apply to QueuePool or SingletonThreadPool.</p> - <&|formatting.myt:code, title="Plain QueuePool"&> - import sqlalchemy.pool as pool - import psycopg2 - - def getconn(): - c = psycopg2.connect(username='ed', host='127.0.0.1', dbname='test') - # execute an initialization function on the connection before returning - c.cursor.execute("setup_encodings()") - return c - - p = pool.QueuePool(getconn, max_overflow=10, pool_size=5, use_threadlocal=True) - </&> - - <&|formatting.myt:code, title="SingletonThreadPool"&> - import sqlalchemy.pool as pool - import sqlite - - def getconn(): - return sqlite.connect(filename='myfile.db') - - # SQLite connections require the SingletonThreadPool - p = pool.SingletonThreadPool(getconn) - </&> - - </&> -</&>
\ No newline at end of file diff --git a/doc/build/content/pooling.txt b/doc/build/content/pooling.txt new file mode 100644 index 000000000..fd6ffb158 --- /dev/null +++ b/doc/build/content/pooling.txt @@ -0,0 +1,65 @@ +Connection Pooling {@name=pooling} +====================== + +This section describes the connection pool module of SQLAlchemy. The `Pool` object it provides is normally embedded within an `Engine` instance. For most cases, explicit access to the pool module is not required. However, the `Pool` object can be used on its own, without the rest of SA, to manage DBAPI connections; this section describes that usage. Also, this section will describe in more detail how to customize the pooling strategy used by an `Engine`. + +At the base of any database helper library is a system of efficiently acquiring connections to the database. Since the establishment of a database connection is typically a somewhat expensive operation, an application needs a way to get at database connections repeatedly without incurring the full overhead each time. Particularly for server-side web applications, a connection pool is the standard way to maintain a "pool" of database connections which are used over and over again among many requests. Connection pools typically are configured to maintain a certain "size", which represents how many connections can be used simultaneously without resorting to creating more newly-established connections. + +### Establishing a Transparent Connection Pool {@name=establishing} + +Any DBAPI module can be "proxied" through the connection pool using the following technique (note that the usage of 'psycopg2' is **just an example**; substitute whatever DBAPI module you'd like): + + {python} + import sqlalchemy.pool as pool + import psycopg2 as psycopg + psycopg = pool.manage(psycopg) + + # then connect normally + connection = psycopg.connect(database='test', username='scott', password='tiger') + +This produces a `sqlalchemy.pool.DBProxy` object which supports the same `connect()` function as the original DBAPI module. Upon connection, a thread-local connection proxy object is returned, which delegates its calls to a real DBAPI connection object. This connection object is stored persistently within a connection pool (an instance of `sqlalchemy.pool.Pool`) that corresponds to the exact connection arguments sent to the `connect()` function. The connection proxy also returns a proxied cursor object upon calling `connection.cursor()`. When all cursors as well as the connection proxy are de-referenced, the connection is automatically made available again by the owning pool object. + +Basically, the `connect()` function is used in its usual way, and the pool module transparently returns thread-local pooled connections. Each distinct set of connect arguments corresponds to a brand new connection pool created; in this way, an application can maintain connections to multiple schemas and/or databases, and each unique connect argument set will be managed by a different pool object. + +### Connection Pool Configuration {@name=configuration} + +When proxying a DBAPI module through the `pool` module, options exist for how the connections should be pooled: + +* echo=False : if set to True, connections being pulled and retrieved from/to the pool will be logged to the standard output, as well as pool sizing information. +* use\_threadlocal=True : if set to True, repeated calls to connect() within the same application thread will be guaranteed to return the **same** connection object, if one has already been retrieved from the pool and has not been returned yet. This allows code to retrieve a connection from the pool, and then while still holding on to that connection, to call other functions which also ask the pool for a connection of the same arguments; those functions will act upon the same connection that the calling method is using. Note that once the connection is returned to the pool, it then may be used by another thread. To guarantee a single unique connection per thread that **never** changes, use the option `poolclass=SingletonThreadPool`, in which case the use_threadlocal parameter is automatically set to False. +* poolclass=QueuePool : the Pool class used by the pool module to provide pooling. QueuePool uses the Python `Queue.Queue` class to maintain a list of available connections. A developer can supply his or her own Pool class to supply a different pooling algorithm. Also included is the `SingletonThreadPool`, which provides a single distinct connection per thread and is required with SQLite. +* pool\_size=5 : used by `QueuePool` - the size of the pool to be maintained. This is the largest number of connections that will be kept persistently in the pool. Note that the pool begins with no connections; once this number of connections is requested, that number of connections will remain. +* max\_overflow=10 : used by `QueuePool` - the maximum overflow size of the pool. When the number of checked-out connections reaches the size set in pool_size, additional connections will be returned up to this limit. When those additional connections are returned to the pool, they are disconnected and discarded. It follows then that the total number of simultaneous connections the pool will allow is `pool_size` + `max_overflow`, and the total number of "sleeping" connections the pool will allow is `pool_size`. `max_overflow` can be set to -1 to indicate no overflow limit; no limit will be placed on the total number of concurrent connections. +* timeout=30 : used by `QueuePool` - the timeout before giving up on returning a connection, if none are available and the `max_overflow` has been reached. + + +### Custom Pool Construction {@name=custom} + +One level below using a DBProxy to make transparent pools is creating the pool yourself. The pool module comes with two implementations of connection pools: `QueuePool` and `SingletonThreadPool`. While `QueuePool` uses `Queue.Queue` to provide connections, `SingletonThreadPool` provides a single per-thread connection which SQLite requires. + +Constructing your own pool involves passing a callable used to create a connection. Through this method, custom connection schemes can be made, such as a connection that automatically executes some initialization commands to start. The options from the previous section can be used as they apply to `QueuePool` or `SingletonThreadPool`. + + {python title="Plain QueuePool"} + import sqlalchemy.pool as pool + import psycopg2 + + def getconn(): + c = psycopg2.connect(username='ed', host='127.0.0.1', dbname='test') + # execute an initialization function on the connection before returning + c.cursor.execute("setup_encodings()") + return c + + p = pool.QueuePool(getconn, max_overflow=10, pool_size=5, use_threadlocal=True) + +Or with SingletonThreadPool: + + {python title="SingletonThreadPool"} + import sqlalchemy.pool as pool + import sqlite + + def getconn(): + return sqlite.connect(filename='myfile.db') + + # SQLite connections require the SingletonThreadPool + p = pool.SingletonThreadPool(getconn) + diff --git a/doc/build/content/sqlconstruction.txt b/doc/build/content/sqlconstruction.txt index aa9768767..6eaca5e42 100644 --- a/doc/build/content/sqlconstruction.txt +++ b/doc/build/content/sqlconstruction.txt @@ -1,29 +1,29 @@ -Constructing SQL Queries via Python Expressions +Constructing SQL Queries via Python Expressions {@name=sql} =============================================== -*Note:* This section describes how to use SQLAlchemy to construct SQL queries and receive result sets. It does *not* cover the object relational mapping capabilities of SQLAlchemy; that is covered later on in [datamapping](rel:datamapping). However, both areas of functionality work similarly in how selection criterion is constructed, so if you are interested just in ORM, you should probably skim through basic [sql_select_whereclause](rel:sql_select_whereclause) construction before moving on. +*Note:* This section describes how to use SQLAlchemy to construct SQL queries and receive result sets. It does *not* cover the object relational mapping capabilities of SQLAlchemy; that is covered later on in [datamapping](rel:datamapping). However, both areas of functionality work similarly in how selection criterion is constructed, so if you are interested just in ORM, you should probably skim through basic [sql_whereclause](rel:sql_whereclause) construction before moving on. -Once you have used the `sqlalchemy.schema` module to construct your tables and/or reflect them from the database, performing SQL queries using those table meta data objects is done via the `sqlalchemy.sql` package. This package defines a large set of classes, each of which represents a particular kind of lexical construct within a SQL query; all are descendants of the common base class `sqlalchemy.sql.ClauseElement`. A full query is represented via a structure of ClauseElements. A set of reasonably intuitive creation functions is provided by the `sqlalchemy.sql` package to create these structures; these functions are described in the rest of this section. +Once you have used the `sqlalchemy.schema` module to construct your tables and/or reflect them from the database, performing SQL queries using those table meta data objects is done via the `sqlalchemy.sql` package. This package defines a large set of classes, each of which represents a particular kind of lexical construct within a SQL query; all are descendants of the common base class `sqlalchemy.sql.ClauseElement`. A full query is represented via a structure of `ClauseElement`s. A set of reasonably intuitive creation functions is provided by the `sqlalchemy.sql` package to create these structures; these functions are described in the rest of this section. -To execute a query, you create its structure, then call the resulting structure's `execute()` method, which returns a cursor-like object (more on that later). The same clause structure can be used repeatedly. A ClauseElement is compiled into a string representation by an underlying SQLEngine object, which is located by searching through the clause's child items for a Table object, which provides a reference to its SQLEngine. - +Executing a `ClauseElement` structure can be performed in two general ways. You can use an `Engine` or a `Connection` object's `execute()` method to which you pass the query structure; this is known as **explicit style**. Or, if the `ClauseElement` structure is built upon Table metadata which is bound to an `Engine` directly, you can simply call `execute()` on the structure itself, known as **implicit style**. In both cases, the execution returns a cursor-like object (more on that later). The same clause structure can be executed repeatedly. The `ClauseElement` is compiled into a string representation by an underlying `Compiler` object which is associated with the `Engine` via its `Dialect`. -The examples below all include a dump of the generated SQL corresponding to the query object, as well as a dump of the statement's bind parameters. In all cases, bind parameters are named parameters using the colon format (i.e. ':name'). A named parameter scheme, either ':name' or '%(name)s', is used with all databases, including those that use positional schemes. For those, the named-parameter statement and its bind values are converted to the proper list-based format right before execution. Therefore a SQLAlchemy application that uses ClauseElements can standardize on named parameters for all databases. +The examples below all include a dump of the generated SQL corresponding to the query object, as well as a dump of the statement's bind parameters. In all cases, bind parameters are shown as named parameters using the colon format (i.e. ':name'). When the statement is compiled into a database-specific version, the named-parameter statement and its bind values are converted to the proper paramstyle for that database automatically. -For this section, we will assume the following tables: +For this section, we will mostly use the implcit style of execution, meaning the `Table` objects are associated with an instance of `BoundMetaData`, and constructed `ClauseElement` objects support self-execution. Assume the following configuration: - {python}from sqlalchemy import * - db = create_engine('sqlite://filename=mydb', echo=True) + {python} + from sqlalchemy import * + metadata = BoundMetaData('sqlite:///mydb.db', strategy='threadlocal', echo=True) # a table to store users - users = Table('users', db, + users = Table('users', metadata, Column('user_id', Integer, primary_key = True), Column('user_name', String(40)), Column('password', String(80)) ) # a table that stores mailing addresses associated with a specific user - addresses = Table('addresses', db, + addresses = Table('addresses', metadata, Column('address_id', Integer, primary_key = True), Column('user_id', Integer, ForeignKey("users.user_id")), Column('street', String(100)), @@ -33,13 +33,13 @@ For this section, we will assume the following tables: ) # a table that stores keywords - keywords = Table('keywords', db, + keywords = Table('keywords', metadata, Column('keyword_id', Integer, primary_key = True), Column('name', VARCHAR(50)) ) # a table that associates keywords with users - userkeywords = Table('userkeywords', db, + userkeywords = Table('userkeywords', metadata, Column('user_id', INT, ForeignKey("users")), Column('keyword_id', INT, ForeignKey("keywords")) ) @@ -48,7 +48,8 @@ For this section, we will assume the following tables: A select is done by constructing a `Select` object with the proper arguments, adding any extra arguments if desired, then calling its `execute()` method. - {python}from sqlalchemy import * + {python title="Basic Select"} + from sqlalchemy import * # use the select() function defined in the sql package s = select([users]) @@ -57,7 +58,7 @@ A select is done by constructing a `Select` object with the proper arguments, ad s = users.select() # then, call execute on the Select object: - {sql}c = s.execute() + {sql}result = s.execute() SELECT users.user_id, users.user_name, users.password FROM users {} @@ -65,15 +66,48 @@ A select is done by constructing a `Select` object with the proper arguments, ad >>> str(s) SELECT users.user_id, users.user_name, users.password FROM users -The object returned by the execute call is a `sqlalchemy.engine.ResultProxy` object, which acts much like a DBAPI `cursor` object in the context of a result set, except that the rows returned can address their columns by ordinal position, column name, or even column object: +#### Explicit Execution {@name=explicit} + +As mentioned above, `ClauseElement` structures can also be executed with a `Connection` object explicitly: + + {python} + engine = create_engine('sqlite:///myfile.db') + conn = engine.connect() + + s = users.select() + {sql}result = conn.execute(s) + SELECT users.user_id, users.user_name, users.password FROM users + {} + + conn.close() + +#### Binding ClauseElements to Engines {@name=binding} + +For queries that don't contain any tables, `ClauseElement`s that represent a fully executeable statement support an `engine` keyword parameter which can bind the object to an `Engine`, thereby allowing implicit execution: + + {python} + # select a literal + {sql}select(["current_time"], engine=myengine).execute() + SELECT current_time + {} + + # select a function + {sql}select([func.now()], engine=db).execute() + SELECT now() + {} + +#### Getting Results {@name=resultproxy} + +The object returned by `execute()` is a `sqlalchemy.engine.ResultProxy` object, which acts much like a DBAPI `cursor` object in the context of a result set, except that the rows returned can address their columns by ordinal position, column name, or even column object: - {python}# select rows, get resulting ResultProxy object - {sql}c = users.select().execute() + {python title="Using the ResultProxy"} + # select rows, get resulting ResultProxy object + {sql}result = users.select().execute() SELECT users.user_id, users.user_name, users.password FROM users {} # get one row - row = c.fetchone() + row = result.fetchone() # get the 'user_id' column via integer index: user_id = row[0] @@ -88,16 +122,23 @@ The object returned by the execute call is a `sqlalchemy.engine.ResultProxy` obj password = row.password # ResultProxy object also supports fetchall() - rows = c.fetchall() + rows = result.fetchall() # or get the underlying DBAPI cursor object - cursor = c.cursor + cursor = result.cursor + + # close the result. If the statement was implicitly executed (i.e. without an explicit Connection), this will + # return the underlying connection resources back to the connection pool. de-referencing the result + # will also have the same effect. + # if an explicit Connection was used, then close() does nothing. + result.close() #### Using Column Labels {@name=labels} A common need when writing statements that reference multiple tables is to create labels for columns, thereby separating columns from different tables with the same name. The Select construct supports automatic generation of column labels via the `use_labels=True` parameter: - {python}{sql}c = select([users, addresses], + {python title="use_labels Flag"} + {sql}c = select([users, addresses], users.c.user_id==addresses.c.address_id, use_labels=True).execute() SELECT users.user_id AS users_user_id, users.user_name AS users_user_name, @@ -111,7 +152,8 @@ A common need when writing statements that reference multiple tables is to creat The table name part of the label is affected if you use a construct such as a table alias: - {python}person = users.alias('person') + {python title="use_labels with an Alias"} + person = users.alias('person') {sql}c = select([person, addresses], person.c.user_id==addresses.c.address_id, use_labels=True).execute() @@ -122,9 +164,21 @@ The table name part of the label is affected if you use a construct such as a ta addresses.zip AS addresses_zip FROM users AS person, addresses WHERE person.user_id = addresses.address_id +Labels are also generated in such a way as to never go beyond 30 characters. Most databases support a limit on the length of symbols, such as Postgres, and particularly Oracle which has a rather short limit of 30: + + {python title="use_labels Generates Abbreviated Labels"} + long_named_table = users.alias('this_is_the_person_table') + {sql}c = select([person], use_labels=True).execute() + SELECT this_is_the_person_table.user_id AS this_is_the_person_table_b36c, + this_is_the_person_table.user_name AS this_is_the_person_table_f76a, + this_is_the_person_table.password AS this_is_the_person_table_1e7c + FROM users AS this_is_the_person_table + {} + You can also specify custom labels on a per-column basis using the `label()` function: - {python}{sql}c = select([users.c.user_id.label('id'), users.c.user_name.label('name')]).execute() + {python title="label() Function on Column"} + {sql}c = select([users.c.user_id.label('id'), users.c.user_name.label('name')]).execute() SELECT users.user_id AS id, users.user_name AS name FROM users {} @@ -135,7 +189,8 @@ Calling `select` off a table automatically generates a column clause which inclu But in addition to selecting all the columns off a single table, any set of columns can be specified, as well as full tables, and any combination of the two: - {python}# individual columns + {python title="Specify Columns to Select"} + # individual columns {sql}c = select([users.c.user_id, users.c.user_name]).execute() SELECT users.user_id, users.user_name FROM users {} @@ -154,13 +209,14 @@ But in addition to selecting all the columns off a single table, any set of colu addresses.zip FROM users, addresses {} -#### WHERE Clause {@name=whereclause} +### WHERE Clause {@name=whereclause} The WHERE condition is the named keyword argument `whereclause`, or the second positional argument to the `select()` constructor and the first positional argument to the `select()` method of `Table`. WHERE conditions are constructed using column objects, literal values, and functions defined in the `sqlalchemy.sql` module. Column objects override the standard Python operators to provide clause compositional objects, which compile down to SQL operations: - {python}{sql}c = users.select(users.c.user_id == 7).execute() + {python title="Basic WHERE Clause"} + {sql}c = users.select(users.c.user_id == 7).execute() SELECT users.user_id, users.user_name, users.password, FROM users WHERE users.user_id = :users_user_id {'users_user_id': 7} @@ -169,7 +225,8 @@ Notice that the literal value "7" was broken out of the query and placed into a More where clauses: - {python}# another comparison operator + {python} + # another comparison operator {sql}c = select([users], users.c.user_id>7).execute() SELECT users.user_id, users.user_name, users.password, FROM users WHERE users.user_id > :users_user_id @@ -233,7 +290,8 @@ More where clauses: Select statements can also generate a WHERE clause based on the parameters you give it. If a given parameter, which matches the name of a column or its "label" (the combined tablename + "_" + column name), and does not already correspond to a bind parameter in the select object, it will be added as a comparison against that column. This is a shortcut to creating a full WHERE clause: - {python}# specify a match for the "user_name" column + {python} + # specify a match for the "user_name" column {sql}c = users.select().execute(user_name='ed') SELECT users.user_id, users.user_name, users.password FROM users WHERE users.user_name = :users_user_name @@ -246,11 +304,12 @@ Select statements can also generate a WHERE clause based on the parameters you g FROM users WHERE users.user_name = :users_user_name AND users.user_id = :users_user_id {'users_user_name': 'ed', 'users_user_id': 10} -##### Operators {@name=operators} +#### Operators {@name=operators} Supported column operators so far are all the numerical comparison operators, i.e. '==', '>', '>=', etc., as well as like(), startswith(), endswith(), between(), and in(). Boolean operators include not_(), and_() and or_(), which also can be used inline via '~', '&', and '|'. Math operators are '+', '-', '*', '/'. Any custom operator can be specified via the op() function shown below. - {python}# "like" operator + {python} + # "like" operator users.select(users.c.user_name.like('%ter')) # equality operator @@ -285,22 +344,13 @@ Supported column operators so far are all the numerical comparison operators, i. SELECT users.user_id, users.user_name, users.password FROM users WHERE users.user_name IS NULL - -#### Specifying the Engine {@name=engine} - -For queries that don't contain any tables, the SQLEngine can be specified to any constructed statement via the `engine` keyword parameter: - - {python}# select a literal - select(["hi"], engine=myengine) - - # select a function - select([func.now()], engine=db) - + #### Functions {@name=functions} Functions can be specified using the `func` keyword: - {python}{sql}select([func.count(users.c.user_id)]).execute() + {python} + {sql}select([func.count(users.c.user_id)]).execute() SELECT count(users.user_id) FROM users {sql}users.select(func.substr(users.c.user_name, 1) == 'J').execute() @@ -310,7 +360,8 @@ Functions can be specified using the `func` keyword: Functions also are callable as standalone values: - {python}# call the "now()" function + {python} + # call the "now()" function time = func.now(engine=myengine).scalar() # call myfunc(1,2,3) @@ -323,7 +374,8 @@ Functions also are callable as standalone values: You can drop in a literal value anywhere there isnt a column to attach to via the `literal` keyword: - {python}{sql}select([literal('foo') + literal('bar'), users.c.user_name]).execute() + {python} + {sql}select([literal('foo') + literal('bar'), users.c.user_name]).execute() SELECT :literal + :literal_1, users.user_name FROM users {'literal_1': 'bar', 'literal': 'foo'} @@ -335,7 +387,8 @@ You can drop in a literal value anywhere there isnt a column to attach to via th Literals also take an optional `type` parameter to give literals a type. This can sometimes be significant, for example when using the "+" operator with SQLite, the String type is detected and the operator is converted to "||": - {python}{sql}select([literal('foo', type=String) + 'bar'], engine=e).execute() + {python} + {sql}select([literal('foo', type=String) + 'bar'], engine=e).execute() SELECT ? || ? ['foo', 'bar'] @@ -343,7 +396,8 @@ Literals also take an optional `type` parameter to give literals a type. This c The ORDER BY clause of a select statement can be specified as individual columns to order by within an array specified via the `order_by` parameter, and optional usage of the asc() and desc() functions: - {python}# straight order by + {python} + # straight order by {sql}c = users.select(order_by=[users.c.user_name]).execute() SELECT users.user_id, users.user_name, users.password FROM users ORDER BY users.user_name @@ -361,7 +415,8 @@ The ORDER BY clause of a select statement can be specified as individual columns These are specified as keyword arguments: - {python}{sql}c = select([users.c.user_name], distinct=True).execute() + {python} + {sql}c = select([users.c.user_name], distinct=True).execute() SELECT DISTINCT users.user_name FROM users {sql}c = users.select(limit=10, offset=20).execute() @@ -373,7 +428,8 @@ The Oracle driver does not support LIMIT and OFFSET directly, but instead wraps As some of the examples indicated above, a regular inner join can be implicitly stated, just like in a SQL expression, by just specifying the tables to be joined as well as their join conditions: - {python}{sql}addresses.select(addresses.c.user_id==users.c.user_id).execute() + {python} + {sql}addresses.select(addresses.c.user_id==users.c.user_id).execute() SELECT addresses.address_id, addresses.user_id, addresses.street, addresses.city, addresses.state, addresses.zip FROM addresses, users WHERE addresses.user_id = users.user_id @@ -381,7 +437,8 @@ As some of the examples indicated above, a regular inner join can be implicitly There is also an explicit join constructor, which can be embedded into a select query via the `from_obj` parameter of the select statement: - {python}{sql}addresses.select(from_obj=[ + {python} + {sql}addresses.select(from_obj=[ addresses.join(users, addresses.c.user_id==users.c.user_id) ]).execute() SELECT addresses.address_id, addresses.user_id, addresses.street, addresses.city, @@ -391,7 +448,8 @@ There is also an explicit join constructor, which can be embedded into a select The join constructor can also be used by itself: - {python}{sql}join(users, addresses, users.c.user_id==addresses.c.user_id).select().execute() + {python} + {sql}join(users, addresses, users.c.user_id==addresses.c.user_id).select().execute() SELECT users.user_id, users.user_name, users.password, addresses.address_id, addresses.user_id, addresses.street, addresses.city, addresses.state, addresses.zip @@ -400,7 +458,8 @@ The join constructor can also be used by itself: The join criterion in a join() call is optional. If not specified, the condition will be derived from the foreign key relationships of the two tables. If no criterion can be constructed, an exception will be raised. - {python}{sql}join(users, addresses).select().execute() + {python} + {sql}join(users, addresses).select().execute() SELECT users.user_id, users.user_name, users.password, addresses.address_id, addresses.user_id, addresses.street, addresses.city, addresses.state, addresses.zip @@ -411,7 +470,8 @@ Notice that this is the first example where the FROM criterion of the select sta A join can be created on its own using the `join` or `outerjoin` functions, or can be created off of an existing Table or other selectable unit via the `join` or `outerjoin` methods: - {python}{sql}outerjoin(users, addresses, users.c.user_id==addresses.c.address_id).select().execute() + {python} + {sql}outerjoin(users, addresses, users.c.user_id==addresses.c.address_id).select().execute() SELECT users.user_id, users.user_name, users.password, addresses.address_id, addresses.user_id, addresses.street, addresses.city, addresses.state, addresses.zip FROM users LEFT OUTER JOIN addresses ON users.user_id = addresses.address_id @@ -432,7 +492,8 @@ A join can be created on its own using the `join` or `outerjoin` functions, or c Aliases are used primarily when you want to use the same table more than once as a FROM expression in a statement: - {python}address_b = addresses.alias('addressb') + {python} + address_b = addresses.alias('addressb') {sql}# select users who have an address on Green street as well as Orange street users.select(and_( users.c.user_id==addresses.c.user_id, @@ -452,10 +513,12 @@ Aliases are used primarily when you want to use the same table more than once as SQLAlchemy allows the creation of select statements from not just Table objects, but from a whole class of objects that implement the `Selectable` interface. This includes Tables, Aliases, Joins and Selects. Therefore, if you have a Select, you can select from the Select: + {python} >>> s = users.select() >>> str(s) SELECT users.user_id, users.user_name, users.password FROM users + {python} >>> s = s.select() >>> str(s) SELECT user_id, user_name, password @@ -463,13 +526,15 @@ SQLAlchemy allows the creation of select statements from not just Table objects, Any Select, Join, or Alias object supports the same column accessors as a Table: + {python} >>> s = users.select() >>> [c.key for c in s.columns] ['user_id', 'user_name', 'password'] When you use `use_labels=True` in a Select object, the label version of the column names become the keys of the accessible columns. In effect you can create your own "view objects": - {python}s = select([users, addresses], users.c.user_id==addresses.c.user_id, use_labels=True) + {python} + s = select([users, addresses], users.c.user_id==addresses.c.user_id, use_labels=True) {sql}select([ s.c.users_user_name, s.c.addresses_street, s.c.addresses_zip ], s.c.addresses_city=='San Francisco').execute() @@ -486,7 +551,8 @@ When you use `use_labels=True` in a Select object, the label version of the colu To specify a SELECT statement as one of the selectable units in a FROM clause, it usually should be given an alias. - {python}{sql}s = users.select().alias('u') + {python} + {sql}s = users.select().alias('u') select([addresses, s]).execute() SELECT addresses.address_id, addresses.user_id, addresses.street, addresses.city, addresses.state, addresses.zip, u.user_id, u.user_name, u.password @@ -496,7 +562,8 @@ To specify a SELECT statement as one of the selectable units in a FROM clause, i Select objects can be used in a WHERE condition, in operators such as IN: - {python}# select user ids for all users whos name starts with a "p" + {python} + # select user ids for all users whos name starts with a "p" s = select([users.c.user_id], users.c.user_name.like('p%')) # now select all addresses for those users @@ -513,7 +580,8 @@ The sql package supports embedding select statements into other select statement Subqueries can be used in the column clause of a select statement by specifying the `scalar=True` flag: - {python}{sql}select([table2.c.col1, table2.c.col2, select([table1.c.col1], table1.c.col2==7, scalar=True)]) + {python} + {sql}select([table2.c.col1, table2.c.col2, select([table1.c.col1], table1.c.col2==7, scalar=True)]) SELECT table2.col1, table2.col2, (SELECT table1.col1 AS col1 FROM table1 WHERE col2=:table1_col2) FROM table2 @@ -523,7 +591,8 @@ Subqueries can be used in the column clause of a select statement by specifying When a select object is embedded inside of another select object, and both objects reference the same table, SQLAlchemy makes the assumption that the table should be correlated from the child query to the parent query. To disable this behavior, specify the flag `correlate=False` to the Select statement. - {python}# make an alias of a regular select. + {python} + # make an alias of a regular select. s = select([addresses.c.street], addresses.c.user_id==users.c.user_id).alias('s') >>> str(s) SELECT addresses.street FROM addresses, users @@ -541,7 +610,8 @@ When a select object is embedded inside of another select object, and both objec An EXISTS clause can function as a higher-scaling version of an IN clause, and is usually used in a correlated fashion: - {python}# find all users who have an address on Green street: + {python} + # find all users who have an address on Green street: {sql}users.select( exists( [addresses.c.address_id], @@ -561,7 +631,8 @@ An EXISTS clause can function as a higher-scaling version of an IN clause, and i Unions come in two flavors, UNION and UNION ALL, which are available via module level functions or methods off a Selectable: - {python}{sql}union( + {python} + {sql}union( addresses.select(addresses.c.street=='123 Green Street'), addresses.select(addresses.c.street=='44 Park Ave.'), addresses.select(addresses.c.street=='3 Mill Road'), @@ -601,27 +672,26 @@ Unions come in two flavors, UNION and UNION ALL, which are available via module ### Custom Bind Parameters {@name=bindparams} -Throughout all these examples, SQLAlchemy is busy creating bind parameters wherever literal expressions occur. You can also specify your own bind parameters with your own names, and use the same statement repeatedly. As mentioned at the top of this section, named bind parameters are always used regardless of the type of DBAPI being used; for DBAPI's that expect positional arguments, bind parameters are converted to lists right before execution, and Pyformat strings in statements, i.e. '%(name)s', are converted to the appropriate positional style. +Throughout all these examples, SQLAlchemy is busy creating bind parameters wherever literal expressions occur. You can also specify your own bind parameters with your own names, and use the same statement repeatedly. The bind parameters, shown here in the "named" format, will be converted to the appropriate named or positional style according to the database implementation being used. - {python}s = users.select(users.c.user_name==bindparam('username')) + {python title="Custom Bind Params"} + s = users.select(users.c.user_name==bindparam('username')) + + # execute implicitly {sql}s.execute(username='fred') SELECT users.user_id, users.user_name, users.password FROM users WHERE users.user_name = :username {'username': 'fred'} - {sql}s.execute(username='jane') + # execute explicitly + conn = engine.connect() + {sql}conn.execute(s, username='fred') SELECT users.user_id, users.user_name, users.password FROM users WHERE users.user_name = :username - {'username': 'jane'} + {'username': 'fred'} - {sql}s.execute(username='mary') - SELECT users.user_id, users.user_name, users.password - FROM users WHERE users.user_name = :username - {'username': 'mary'} -`executemany()` is also available, but that applies more to INSERT/UPDATE/DELETE, described later. - -The generation of bind parameters is performed specific to the engine being used. The examples in this document all show "named" parameters like those used in sqlite and oracle. Depending on the parameter type specified by the DBAPI module, the correct bind parameter scheme will be used. +`executemany()` is also available by supplying multiple dictionary arguments instead of keyword arguments to the `execute()` method of `ClauseElement` or `Connection`. Examples can be found later in the sections on INSERT/UPDATE/DELETE. #### Precompiling a Query {@name=precompiling} @@ -636,7 +706,8 @@ By throwing the `compile()` method onto the end of any query object, the query c The sql package tries to allow free textual placement in as many ways as possible. In the examples below, note that the from_obj parameter is used only when no other information exists within the select object with which to determine table metadata. Also note that in a query where there isnt even table metadata used, the SQLEngine to be used for the query has to be explicitly specified: - {python}# strings as column clauses + {python} + # strings as column clauses {sql}select(["user_id", "user_name"], from_obj=[users]).execute() SELECT user_id, user_name FROM users {} @@ -677,19 +748,9 @@ The sql package tries to allow free textual placement in as many ways as possibl # a full query {sql}text("select user_name from users", engine=db).execute() - select user_name from users + SELECT user_name FROM users {} - # or call text() off of the engine - engine.text("select user_name from users").execute() - - # execute off the engine directly - you must use the engine's native bind parameter - # style (i.e. named, pyformat, positional, etc.) - {sql}db.execute( - "select user_name from users where user_id=:user_id", - {'user_id':7}).execute() - select user_name from users where user_id=:user_id - {'user_id':7} #### Using Bind Parameters in Text Blocks {@name=textual_binds} @@ -721,24 +782,25 @@ Result-row type processing can be added via the `typemap` argument, which is a d One of the primary motivations for a programmatic SQL library is to allow the piecemeal construction of a SQL statement based on program variables. All the above examples typically show Select objects being created all at once. The Select object also includes "builder" methods to allow building up an object. The below example is a "user search" function, where users can be selected based on primary key, user name, street address, keywords, or any combination: - {python}def find_users(id=None, name=None, street=None, keywords=None): - statement = users.select() - if id is not None: - statement.append_whereclause(users.c.user_id==id) - if name is not None: - statement.append_whereclause(users.c.user_name==name) - if street is not None: - # append_whereclause joins "WHERE" conditions together with AND - statement.append_whereclause(users.c.user_id==addresses.c.user_id) - statement.append_whereclause(addresses.c.street==street) - if keywords is not None: - statement.append_from( - users.join(userkeywords, users.c.user_id==userkeywords.c.user_id).join( - keywords, userkeywords.c.keyword_id==keywords.c.keyword_id)) - statement.append_whereclause(keywords.c.name.in_(keywords)) - # to avoid multiple repeats, set query to be DISTINCT: - statement.distinct=True - return statement.execute() + {python} + def find_users(id=None, name=None, street=None, keywords=None): + statement = users.select() + if id is not None: + statement.append_whereclause(users.c.user_id==id) + if name is not None: + statement.append_whereclause(users.c.user_name==name) + if street is not None: + # append_whereclause joins "WHERE" conditions together with AND + statement.append_whereclause(users.c.user_id==addresses.c.user_id) + statement.append_whereclause(addresses.c.street==street) + if keywords is not None: + statement.append_from( + users.join(userkeywords, users.c.user_id==userkeywords.c.user_id).join( + keywords, userkeywords.c.keyword_id==keywords.c.keyword_id)) + statement.append_whereclause(keywords.c.name.in_(keywords)) + # to avoid multiple repeats, set query to be DISTINCT: + statement.distinct=True + return statement.execute() {sql}find_users(id=7) SELECT users.user_id, users.user_name, users.password @@ -765,7 +827,8 @@ An INSERT involves just one table. The Insert object is used via the insert() f The values to be populated for an INSERT or an UPDATE can be specified to the insert()/update() functions as the `values` named argument, or the query will be compiled based on the values of the parameters sent to the execute() method. - {python}# basic insert + {python title="Using insert()"} + # basic insert {sql}users.insert().execute(user_id=1, user_name='jack', password='asdfdaf') INSERT INTO users (user_id, user_name, password) VALUES (:user_id, :user_name, :password) @@ -811,7 +874,8 @@ The values to be populated for an INSERT or an UPDATE can be specified to the in Updates work a lot like INSERTS, except there is an additional WHERE clause that can be specified. - {python}# change 'jack' to 'ed' + {python title="Using update()"} + # change 'jack' to 'ed' {sql}users.update(users.c.user_name=='jack').execute(user_name='ed') UPDATE users SET user_name=:user_name WHERE users.user_name = :users_user_name {'users_user_name': 'jack', 'user_name': 'ed'} diff --git a/doc/build/content/threadlocal.txt b/doc/build/content/threadlocal.txt new file mode 100644 index 000000000..20ad270ae --- /dev/null +++ b/doc/build/content/threadlocal.txt @@ -0,0 +1,2 @@ +The threadlocal mod {@name=threadlocal} +============ diff --git a/doc/build/content/trailmap.myt b/doc/build/content/trailmap.myt deleted file mode 100644 index e0ea2d1b6..000000000 --- a/doc/build/content/trailmap.myt +++ /dev/null @@ -1,53 +0,0 @@ -<%flags>inherit='document_base.myt'</%flags> -<%attr>title='How to Read this Manual'</%attr> -<&|doclib.myt:item, name="howtoread", description="How to Read this Manual" &> - -<p>SQLAlchemy features a lot of tools and patterns to help in every area of writing applications that talk to relational databases. To achieve this, it has a lot of areas of functionality which work together to provide a cohesive package. Ultimately, just a little bit of familiarity with each concept is all that's needed to get off the ground.</p> - -<p>That said, here's two quick links that summarize the two most prominent features of SQLAlchemy: -<ul> - <li><&formatting.myt:link, path="datamapping", class_="trailbold"&> - a synopsis of how to map objects to database tables (Object Relational Mapping)</li> - <li><&formatting.myt:link, path="sql", class_="trailbold"&> - SQLAlchemy's own domain-oriented approach to constructing and executing SQL statements.</li> -</ul> -</p> - -<&|doclib.myt:item, name="trailmap", description="Trail Map" &> -<p>For a comprehensive tour through all of SQLAlchemy's components, below is a "Trail Map" of the knowledge dependencies between these components indicating the order in which concepts may be learned. Concepts marked in bold indicate features that are useful on their own. -</p> -<pre> -Start - | - | - |--- <&formatting.myt:link, class_="trailbold", path="pooling" &> - | | - | | - | |------ <&formatting.myt:link, path="pooling_configuration" &> - | | - | | - +--- <&formatting.myt:link, path="dbengine_establishing" &> | - | | - | | - |--------- <&formatting.myt:link, path="dbengine_options" &> - | - | - +---- <&formatting.myt:link, path="metadata_tables" &> - | - | - |---- <&formatting.myt:link, path="metadata_creating" &> - | - | - |---- <&formatting.myt:link, path="sql", class_="trailbold" &> - | | - | | - +---- <&formatting.myt:link, path="datamapping", class_="trailbold"&> | - | | | - | | | - | <&formatting.myt:link, path="unitofwork"&> | - | | | - | | | - | +----------- <&formatting.myt:link, path="adv_datamapping"&> - | - +----- <&formatting.myt:link, path="types"&> -</pre> -</&> -</&> diff --git a/doc/build/content/tutorial.txt b/doc/build/content/tutorial.txt index ef36f696b..44c5c2db1 100644 --- a/doc/build/content/tutorial.txt +++ b/doc/build/content/tutorial.txt @@ -1,148 +1,492 @@ -Tutorial
-========
-This tutorial provides a relatively simple walking tour through the basic concepts of SQLAlchemy. You may wish to skip it and dive into the [main manual][manual] which is more reference-oriented.
-
-[manual]: rel:howtoread
-
-Installation
-------------
-
-### Installing SQLAlchemy {@name=sqlalchemy}
-
-Installing SQLAlchemy from scratch is most easily achieved with [setuptools][]. ([setuptools installation][install setuptools]). Just run this from the command-line:
-
- $ easy_install SQLAlchemy
-
-This command will download the latest version of SQLAlchemy from the [Python Cheese Shop][cheese] and install it to your system.
-
-[setuptools]: http://peak.telecommunity.com/DevCenter/setuptools
-[install setuptools]: http://peak.telecommunity.com/DevCenter/EasyInstall#installation-instructions
-[cheese]: http://cheeseshop.python.org/pypi
-
-### Installing a Database API {@name=dbms}
-
-SQLAlchemy is designed to operate with a [DBAPI][DBAPI] implementation built for a particular database, and includes support for the most popular databases. If you have one of the [supported DBAPI implementations][supported dbms], you can proceed to the following section. Otherwise [SQLite][] is an easy-to-use database to get started with, which works with plain files or in-memory databases.
-
-[DBAPI]: http://www.python.org/doc/peps/pep-0249/
-
-To work with SQLite, you'll need:
-
- * [pysqlite][] - Python interface for SQLite
- * SQLite library
-
-Note that the SQLite library download is not required with Windows, as the Windows Pysqlite library already includes it linked in. Pysqlite and SQLite can also be installed on Linux or FreeBSD via pre-made [packages][pysqlite packages] or [from sources][pysqlite].
-
-[supported dbms]: rel:dbengine_establishing
-[sqlite]: http://sqlite.org/
-[pysqlite]: http://pysqlite.org/
-[pysqlite packages]: http://initd.org/tracker/pysqlite/wiki/PysqlitePackages
-
-Getting Started {@name=gettingstarted}
---------------------------
-
-### Connecting to the Database
-
-The first thing needed is a handle to the desired database, represented by a `SQLEngine` object. This object handles the business of managing connections and dealing with the specifics of a particular database. Below, we will make a SQLite connection to a file-based database called "tutorial.db".
-
- >>> from sqlalchemy import *
- >>> db = create_engine('sqlite://filename=tutorial.db')
-
-For full information on creating database engines, including those for SQLite and others, see [dbengine](rel:dbengine).
-
-### Creating a Table {@name=table}
-
-A core philosophy of SQLAlchemy is that tables and domain classes are different beasts. For this reason, SQLAlchemy provides constructs that represent tables by themselves (known as *table metadata*). So we will begin by constructing table metadata objects and performing SQL operations with them directly, keeping in mind that there is also an Object Relational Mapper (ORM) which does the same thing except via domain models. Let's construct an object that represents a table:
-
- >>> users = Table('users', db,
- ... Column('user_id', Integer, primary_key = True),
- ... Column('user_name', String(40)),
- ... Column('password', String(80))
- ... )
-
-As you might have guessed, we have just defined a table named `users` which has three columns: `user_id` (which is a primary key column), `user_name` and `password`. Currently it is just an object that may not correspond to an existing table in your database. To actually create the table, we use the `create()` method. To make it interesting we will have SQLAlchemy to echo the SQL statements it sends to the database:
-
- >>> db.echo = True
- >>> users.create() # doctest:+ELLIPSIS,+NORMALIZE_WHITESPACE
- CREATE TABLE users(
- user_id INTEGER NOT NULL PRIMARY KEY,
- user_name VARCHAR(40),
- password VARCHAR(80)
- )
- ...
- >>> db.echo = False # you can skip this if you want to keep logging SQL statements
-
-Alternatively, the `users` table might already exist (such as, if you're running examples from this tutorial for the second time), in which case you can just skip the `create()` method call. You can even skip defining the individual columns in the `users` table and ask SQLAlchemy to load its definition from the database:
-
- >>> users = Table('users', db, autoload = True)
- >>> list(users.columns)[0].name
- 'user_id'
-
-Documentation on table metadata is available in [metadata](rel:metadata).
-
-### Inserting Rows
-
-Inserting is achieved via the `insert()` method, which defines a *clause object* representing an INSERT statement:
-
- >>> i = users.insert()
- >>> i # doctest:+ELLIPSIS
- <sqlalchemy.sql.Insert object at 0x...>
- >>> print i
- INSERT INTO users (user_id, user_name, password) VALUES (?, ?, ?)
-
-The `execute()` method of the clause object executes the statement at the database level:
-
- >>> for name in ['Tom', 'Dick', 'Harry']: # doctest:+ELLIPSIS
- ... i.execute(user_name = name)
- <sqlalchemy.engine.ResultProxy instance at 0x...>
- ...
- >>> i.execute(user_name = 'Mary', password = 'secure') # doctest:+ELLIPSIS
- <sqlalchemy.engine.ResultProxy instance at 0x...>
-
-When constructing clause objects, SQLAlchemy will bind all literal values into bind parameters, according to the paramstyle of the underlying DBAPI. This allows for better performance, as the database may cache a compiled representation of the statement and reuse it for new executions, substituting the new values. Also, when using bound values, you need not worry about [SQL injection][] attacks.
-
-[SQL injection]: http://en.wikipedia.org/wiki/SQL_injection
-
-Documentation on inserting: [sql_insert](rel:sql_insert).
-
-### Constructing Queries
-
-Let's check that the data we have put into `users` table is actually there. The procedure is analogous to the insert example above, except you now call the `select()` method off the `users` table:
-
- >>> s = users.select()
- >>> print s
- SELECT users.user_id, users.user_name, users.password
- FROM users
- >>> r = s.execute()
-
-This time, we won't ignore the return value of `execute()`:
-
- >>> r # doctest:+ELLIPSIS
- <sqlalchemy.engine.ResultProxy instance at 0x...>
- >>> r.keys
- ['user_id', 'user_name', 'password']
- >>> row = r.fetchone()
- >>> row['user_name']
- u'Tom'
- >>> r.fetchall()
- [(2, u'Dick', None), (3, u'Harry', None), (4, u'Mary', u'secure')]
-
-Documentation on selecting: [sql_select](rel:sql_select).
-
-### Related Table
-
-Main documentation: [sql](rel:sql).
-
-### Fancier Querying {@name=fancyquery}
-
-Main documentation: [sql](rel:sql).
-
-### Data Mapping {@name=mapping}
-
-Main documentation: [datamapping](rel:datamapping), [adv_datamapping](rel:adv_datamapping).
-
-### Transactions
-
-Main documentation: [unitofwork](rel:unitofwork), [dbengine_transactions](rel:dbengine_transactions).
-
-Conclusion
-----------
+Tutorial +======== +This tutorial provides a relatively simple walking tour through the basic concepts of SQLAlchemy. You may wish to skip it and dive into the [main manual][manual] which is more reference-oriented. The examples in this tutorial comprise a fully working interactive Python session, and are guaranteed to be functioning courtesy of [doctest][]. + +[doctest]: http://www.python.org/doc/lib/module-doctest.html +[manual]: rel:howtoread + +Installation +------------ + +### Installing SQLAlchemy {@name=sqlalchemy} + +Installing SQLAlchemy from scratch is most easily achieved with [setuptools][]. ([setuptools installation][install setuptools]). Just run this from the command-line: + + $ easy_install SQLAlchemy + +This command will download the latest version of SQLAlchemy from the [Python Cheese Shop][cheese] and install it to your system. + +[setuptools]: http://peak.telecommunity.com/DevCenter/setuptools +[install setuptools]: http://peak.telecommunity.com/DevCenter/EasyInstall#installation-instructions +[cheese]: http://cheeseshop.python.org/pypi + +Otherwise, you can install from the distribution using the `setup.py` script: + + $ python setup.py install + +### Installing a Database API {@name=dbms} + +SQLAlchemy is designed to operate with a [DBAPI][DBAPI] implementation built for a particular database, and includes support for the most popular databases. If you have one of the [supported DBAPI implementations][supported dbms], you can proceed to the following section. Otherwise [SQLite][] is an easy-to-use database to get started with, which works with plain files or in-memory databases. + +[DBAPI]: http://www.python.org/doc/peps/pep-0249/ + +To work with SQLite, you'll need: + + * [pysqlite][] - Python interface for SQLite + * SQLite library + +Note that the SQLite library download is not required with Windows, as the Windows Pysqlite library already includes it linked in. Pysqlite and SQLite can also be installed on Linux or FreeBSD via pre-made [packages][pysqlite packages] or [from sources][pysqlite]. + +[supported dbms]: rel:dbengine_establishing +[sqlite]: http://sqlite.org/ +[pysqlite]: http://pysqlite.org/ +[pysqlite packages]: http://initd.org/tracker/pysqlite/wiki/PysqlitePackages + +Getting Started {@name=gettingstarted} +-------------------------- + +### Imports + +SQLAlchemy provides the entire namespace of everything you'll need under the module name `sqlalchemy`. For the purposes of this tutorial, we will import its full list of symbols into our own local namespace. + + {python} + >>> from sqlalchemy import * + +### Connecting to the Database + +After our imports, the next thing we need is a handle to the desired database, represented by an `Engine` object. This object handles the business of managing connections and dealing with the specifics of a particular database. Below, we will make a SQLite connection to a file-based database called "tutorial.db". + + {python} + >>> db = create_engine('sqlite:///tutorial.db') + + +For full information on creating database engines, including those for SQLite and others, see [dbengine](rel:dbengine). + +Working with Database Objects {@name=schemasql} +----------------------------------------------- + +A core philosophy of SQLAlchemy is that tables and domain classes are different beasts. For this reason, SQLAlchemy provides constructs that represent tables by themselves (known as *table metadata*). So we will begin by constructing table metadata objects and performing SQL operations with them directly. Later, we will look into SQLAlchemy's Object Relational Mapper (ORM), which provides an additional layer of abstraction onto table metadata, allowing us to load and save objects of any arbitrary Python class. + +### Defining Metadata, Binding to Engines {@name=metadata} + +Firstly, your Tables have to belong to a collection called `MetaData`. We will create a handy form of `MetaData` that automatically connects to our `Engine` (connecting a schema object to an Engine is called *binding*): + + {python} + >>> metadata = BoundMetaData(db) + +An equivalent operation is to create the `BoundMetaData` object directly with an Engine URL, which calls the `create_engine` call for us: + + {python} + >>> metadata = BoundMetaData('sqlite:///tutorial.db') + +Now, when we tell "metadata" about the tables in our database, we can issue CREATE statements for those tables, as well as create and execute SQL statements derived from them, without needing to open or close any connections; that will be all done automatically. Note that this feature is **entirely optional**. SQLAlchemy includes full support for explicit Connections used with schema and SQL constructs that are entirely unbound to any Engine. + +For the purposes of this tutorial, we will stick with "bound" objects, as it makes the code simpler and easier to read. + +### Creating a Table {@name=table} + +With `metadata` as our established home for tables, lets make a Table for it: + + {python} + >>> users_table = Table('users', metadata, + ... Column('user_id', Integer, primary_key=True), + ... Column('user_name', String(40)), + ... Column('password', String(10)) + ... ) + +As you might have guessed, we have just defined a table named `users` which has three columns: `user_id` (which is a primary key column), `user_name` and `password`. Currently it is just an object that doesn't necessarily correspond to an existing table in our database. To actually create the table, we use the `create()` method. To make it interesting, we will have SQLAlchemy echo the SQL statements it sends to the database, by setting the `echo` flag on the `Engine` associated with our `BoundMetaData`: + + {python} + >>> metadata.engine.echo = True + >>> users_table.create() # doctest:+ELLIPSIS,+NORMALIZE_WHITESPACE + CREATE TABLE users( + user_id INTEGER NOT NULL PRIMARY KEY, + user_name VARCHAR(40), + password VARCHAR(10) + ) + ... + +Alternatively, the `users` table might already exist (such as, if you're running examples from this tutorial for the second time), in which case you can just skip the `create()` method call. You can even skip defining the individual columns in the `users` table and ask SQLAlchemy to load its definition from the database: + + {python} + >>> users_table = Table('users', metadata, autoload=True) + >>> list(users_table.columns)[0].name + 'user_id' + +Documentation on table metadata is available in [metadata](rel:metadata). + +### Inserting Rows + +Inserting is achieved via the `insert()` method, which defines a *clause object* (known as a `ClauseElement`) representing an INSERT statement: + + {python} + >>> i = users_table.insert() + >>> i # doctest:+ELLIPSIS + <sqlalchemy.sql.Insert object at 0x...> + >>> print i + INSERT INTO users (user_id, user_name, password) VALUES (?, ?, ?) + +Since we created this insert statement object from the `users` table which is bound to our `Engine`, the statement itself is also bound to the `Engine`, and supports executing itself. The `execute()` method of the clause object will *compile* the object into a string according to the underlying *dialect* of the Engine to which the statement is bound, and will then execute the resulting statement. + + {python} + >>> i.execute(user_name='Mary', password='secure') # doctest:+ELLIPSIS + INSERT INTO users (user_name, password) VALUES (?, ?) + ['Mary', 'secure'] + COMMIT + <sqlalchemy.engine.base.ResultProxy instance at 0x...> + + >>> i.execute({'user_name':'Tom'}, {'user_name':'Fred'}, {'user_name':'Harry'}) # doctest:+ELLIPSIS,+NORMALIZE_WHITESPACE + INSERT INTO users (user_name) VALUES (?) + [['Tom'], ['Fred'], ['Harry']] + COMMIT + <sqlalchemy.engine.base.ResultProxy instance at 0x...> + + +Note that the `VALUES` clause of each `INSERT` statement was automatically adjusted to correspond to the parameters sent to the `execute()` method. This is because the compilation step of a `ClauseElement` takes into account not just the constructed SQL object and the specifics of the type of database being used, but the execution parameters sent along as well. + +When constructing clause objects, SQLAlchemy will bind all literal values into bind parameters. On the construction side, bind parameters are always treated as named parameters. At compilation time, SQLAlchemy will convert them into their proper format, based on the paramstyle of the underlying DBAPI. This works equally well for all named and positional bind parameter formats described in the DBAPI specification. + +Documentation on inserting: [sql_insert](rel:sql_insert). + +### Selecting + +Let's check that the data we have put into `users` table is actually there. The procedure is analogous to the insert example above, except you now call the `select()` method off the `users` table: + + {python} + >>> s = users_table.select() + >>> print s + SELECT users.user_id, users.user_name, users.password + FROM users + >>> r = s.execute() + SELECT users.user_id, users.user_name, users.password + FROM users + [] + +This time, we won't ignore the return value of `execute()`. Its an instance of `ResultProxy`, which is a result-holding object that behaves very similarly to the `cursor` object one deals with directly with a database API: + + {python} + >>> r # doctest:+ELLIPSIS + <sqlalchemy.engine.base.ResultProxy instance at 0x...> + >>> r.fetchone() + (1, u'Mary', u'secure') + >>> r.fetchall() + [(2, u'Tom', None), (3, u'Fred', None), (4, u'Harry', None)] + +Query criterion for the select is specified using Python expressions, using the `Column` objects in the `Table` as a base. All expressions constructed from `Column` objects are themselves instances of `ClauseElements`, just like the `Select`, `Insert`, and `Table` objects themselves. + + {python} + >>> r = users_table.select(users_table.c.user_name=='Harry').execute() + SELECT users.user_id, users.user_name, users.password + FROM users + WHERE users.user_name = ? + ['Harry'] + >>> row = r.fetchone() + >>> print row + (4, u'Harry', None) + +Pretty much the full range of standard SQL operations are supported as constructed Python expressions, including joins, ordering, grouping, functions, correlated subqueries, unions, etc. Documentation on selecting: [sql_select](rel:sql_select). + +### Working with Rows + +You can see that when we print out the rows returned by an execution result, it prints the rows as tuples. These rows in fact support both the list and dictionary interfaces. The dictionary interface allows the addressing of columns by string column name, or even the original `Column` object: + + {python} + >>> row.keys() + ['user_id', 'user_name', 'password'] + >>> row['user_id'], row[1], row[users_table.c.password] + (4, u'Harry', None) + +Addressing the columns in a row based on the original `Column` object is especially handy, as it eliminates the need to work with literal column names altogether. + +Result sets also support iteration. We'll show this with a slightly different form of `select` that allows you to specify the specific columns to be selected: + + {python} + >>> for row in select([users_table.c.user_id, users_table.c.user_name]).execute(): # doctest:+NORMALIZE_WHITESPACE + ... print row + SELECT users.user_id, users.user_name + FROM users + [] + (1, u'Mary') + (2, u'Tom') + (3, u'Fred') + (4, u'Harry') + +### Table Relationships + +Lets create a second table, `email_addresses`, which references the `users` table. To define the relationship between the two tables, we will use the `ForeignKey` construct. We will also issue the `CREATE` statement for the table in one step: + + {python} + >>> email_addresses_table = Table('email_addresses', metadata, + ... Column('address_id', Integer, primary_key=True), + ... Column('email_address', String(100), nullable=False), + ... Column('user_id', Integer, ForeignKey('users.user_id'))).create() # doctest:+ELLIPSIS,+NORMALIZE_WHITESPACE + CREATE TABLE email_addresses( + address_id INTEGER NOT NULL PRIMARY KEY, + email_address VARCHAR(100) NOT NULL, + user_id INTEGER REFERENCES users(user_id) + ) + ... + +Above, the `email_addresses` table is related to the `users` table via the `ForeignKey('users.user_id')`. The `ForeignKey` constructor can take a `Column` object or a string representing the table and column name. When using the string argument, the referenced table must exist within the same `MetaData` object; thats where it looks for the other table! + +Next, lets put a few rows in: + + {python} + >>> email_addresses_table.insert().execute( + ... {'email_address':'tom@tom.com', 'user_id':2}, + ... {'email_address':'mary@mary.com', 'user_id':1}) #doctest:+ELLIPSIS + INSERT INTO email_addresses (email_address, user_id) VALUES (?, ?) + [['tom@tom.com', 2], ['mary@mary.com', 1]] + COMMIT + <sqlalchemy.engine.base.ResultProxy instance at 0x...> + +With two related tables, we can now construct a join amongst them using the `join` method: + + {python} + >>> r = users_table.join(email_addresses_table).select().execute() + SELECT users.user_id, users.user_name, users.password, email_addresses.address_id, email_addresses.email_address, email_addresses.user_id + FROM users JOIN email_addresses ON users.user_id = email_addresses.user_id + [] + >>> print [row for row in r] + [(1, u'Mary', u'secure', 2, u'mary@mary.com', 1), (2, u'Tom', None, 1, u'tom@tom.com', 2)] + +The `join` method is also a standalone function in the `sqlalchemy` namespace. The join condition is figured out from the foreign keys of the Table objects given. The condition (also called the "ON clause") can be specified explicitly, such as in this example where we locate all users that used their email address as their password: + + {python} + >>> print join(users_table, email_addresses_table, + ... and_(users_table.c.user_id==email_addresses_table.c.user_id, + ... users_table.c.password==email_addresses_table.c.email_address) + ... ) + users JOIN email_addresses ON users.user_id = email_addresses.user_id AND users.password = email_addresses.email_address + +Working with Object Mappers {@name=orm} +----------------------------------------------- + +Now that we have a little bit of Table and SQL operations covered, lets look into SQLAlchemy's ORM (object relational mapper). With the ORM, you associate Tables (and other *Selectable* units, like queries and table aliases) with Python classes, into units called *Mappers*. Then you can execute queries that return lists of object instances, instead of result sets. The object instances themselves are associated with an object called a *Session*, which automatically tracks changes on each object and supports a "save all at once" operation called a *flush*. + +### Creating a Mapper {@name=mapper} + +A Mapper is usually created once per Python class, and at its core primarily means to say, "objects of this class are to be stored as rows in this table". Lets create a class called `User`, which will represent a user object that is stored in our `users` table: + + {python} + >>> class User(object): + ... def __repr__(self): + ... return "(User %s,password:%s)" % (self.user_name, self.password) + +The class is a new style class (i.e. it extends `object`) and does not require a constructor (although one may be provided if desired). We just have one `__repr__` method on it which will display basic information about the User. Note that the `__repr__` method references the instance variables `user_name` and `password` which otherwise aren't defined. While we are free to explicitly define these attributes and treat them normally, this is optional; as SQLAlchemy's `Mapper` construct will manage them for us, since their names correspond to the names of columns in the `users` table. Lets create a mapper, and observe that these attributes are now defined: + + {python} + >>> usermapper = mapper(User, users_table) + >>> u1 = User() + >>> print u1.user_name + None + >>> print u1.password + None + +The `mapper` function returns a new instance of `Mapper`. As it is the first Mapper we have created for the `User` class, it is known as the classes' *primary mapper*. We generally don't need to hold onto the `usermapper` instance variable; SA's ORM can automatically locate this Mapper when it deals with the class, or instances of that class. + +### Obtaining a Session {@name=session} + +After you create a Mapper, all operations with that Mapper require the usage of an important object called a `Session`. All objects loaded or saved by the Mapper must be *attached* to a `Session` object, which represents a kind of "workspace" of objects that are loaded into memory. A particular object instance can only be attached to one `Session` at a time. + +By default, you have to create a `Session` object explicitly before you can load or save objects. Theres several ways to manage sessions, but the most straightforward is to just create one, which we will do by saying, `create_session()`: + + {python} + >>> session = create_session() + >>> session # doctest:+ELLIPSIS + <sqlalchemy.orm.session.Session object at 0x...> + +### The Query Object {@name=query} + +The Session has all kinds of methods on it to retrieve and store objects, and also to view their current status. The Session also provides an easy interface which can be used to query the database, by giving you an instance to a `Query` object corresponding to a particular Python class: + + {python} + >>> query = session.query(User) + >>> print query.select_by(user_name='Harry') + SELECT users.user_name AS users_user_name, users.password AS users_password, users.user_id AS users_user_id + FROM users + WHERE users.user_name = ? ORDER BY users.oid + ['Harry'] + [(User Harry,password:None)] + +All querying for objects is performed via an instance of `Query`. The various `select` methods on an instance of `Mapper` also use an underlying `Query` object to perform the operation. A `Query` is always bound to a specific `Session`. + +Lets turn off the database echoing for a moment, and try out a few methods on `Query`. Methods that end with the suffix `_by` primarily take keyword arguments which correspond to properties on the object. Other methods take `ClauseElement` objects, which are constructed by using `Column` objects inside of Python expressions, in the same way as we did with our SQL select example in the previous section of this tutorial. Using `ClauseElement` structures to query objects is more verbose but more flexible: + + {python} + >>> metadata.engine.echo = False + >>> print query.select(User.c.user_id==3) + [(User Fred,password:None)] + >>> print query.get(2) + (User Tom,password:None) + >>> print query.get_by(user_name='Mary') + (User Mary,password:secure) + >>> print query.selectfirst(User.c.password==None) + (User Tom,password:None) + >>> print query.count() + 4 + +Notice that our `User` class has a special attribute `c` attached to it. This 'c' represents the columns on the User's mapper's Table object. Saying `User.c.user_name` is synonymous with saying `users_table.c.user_name`, recalling that `User` is the Python class and `users` is our `Table` object. + +### Making Changes {@name=changes} + +With a little experience in loading objects, lets see what its like to make changes. First, lets create a new user "Ed". We do this by just constructing the new object. Then, we just add it to the session: + + {python} + >>> ed = User() + >>> ed.user_name = 'Ed' + >>> ed.password = 'edspassword' + >>> session.save(ed) + >>> ed in session + True + +Lets also make a few changes on some of the objects in the database. We will load them with our `Query` object, and then change some things. + + {python} + >>> mary = query.get_by(user_name='Mary') + >>> harry = query.get_by(user_name='Harry') + >>> mary.password = 'marysnewpassword' + >>> harry.password = 'harrysnewpassword' + +At the moment, nothing has been saved to the database; all of our changes are in memory only. What happens if some other part of the application also tries to load 'Mary' from the database and make some changes before we had a chance to save it ? Assuming that the same `Session` is used, loading 'Mary' from the database a second time will issue a second query in order locate the primary key of 'Mary', but will *return the same object instance as the one already loaded*. This behavior is due to an important property of the `Session` known as the **identity map**: + + {python} + >>> mary2 = query.get_by(user_name='Mary') + >>> mary is mary2 + True + +With the identity map, a single `Session` can be relied upon to keep all loaded instances straight. + +As far as the issue of the same object being modified in two different Sessions, that's an issue of concurrency detection; SQLAlchemy does some basic concurrency checks when saving objects, with the option for a stronger check using version ids. See [adv_datamapping](rel:adv_datamapping) for more details. + +### Saving {@name=saving} + +With a new user "ed" and some changes made on "Mary" and "Harry", lets also mark "Fred" as deleted: + + {python} + >>> fred = query.get_by(user_name='Fred') + >>> session.delete(fred) + +Then to send all of our changes to the database, we `flush()` the Session. Lets turn echo back on to see this happen!: + + {python} + >>> metadata.engine.echo = True + >>> session.flush() + BEGIN + UPDATE users SET password=? WHERE users.user_id = ? + ['marysnewpassword', 1] + UPDATE users SET password=? WHERE users.user_id = ? + ['harrysnewpassword', 4] + INSERT INTO users (user_name, password) VALUES (?, ?) + ['Ed', 'edspassword'] + DELETE FROM users WHERE users.user_id = ? + [3] + COMMIT + +### Relationships + +When our User object contains relationships to other kinds of information, such as a list of email addresses, we can indicate this by using a function when creating the `Mapper` called `relation()`. While there is a lot you can do with relations, we'll cover a simple one here. First, recall that our `users` table has a foreign key relationship to another table called `email_addresses`. A single row in `email_addresses` has a column `user_id` that references a row in the `users` table; since many rows in the `email_addresses` table can reference a single row in `users`, this is called a *one to many* relationship. + +First, deal with the `email_addresses` table by itself. We will create a new class `Address` which represents a single row in the `email_addresses` table, and a corresponding `Mapper` which will associate the `Address` class with the `email_addresses` table: + + {python} + >>> class Address(object): + ... def __init__(self, email_address): + ... self.email_address = email_address + ... def __repr__(self): + ... return "(Address %s)" % (self.email_address) + >>> mapper(Address, email_addresses_table) # doctest: +ELLIPSIS + <sqlalchemy.orm.mapper.Mapper object at 0x...> + +Next, we associate the `User` and `Address` classes together by creating a relation using `relation()`, and then adding that relation to the `User` mapper, using the `add_property` function: + + {python} + >>> usermapper.add_property('addresses', relation(Address)) + +The `relation()` function takes either a class or a Mapper as its first argument, and has many options to further control its behavior. The 'User' mapper has now placed additional property on each `User` instance called `addresses`. SQLAlchemy will automatically determine that this relationship is a one-to-many relationship, and will subsequently create `addresses` as a list. When a new `User` is created, this list will begin as empty. + +Lets see what we get for the email addresses already in the database. Since we have made a change to the mapper's configuration, its best that we clear out our `Session`, which is currently holding onto every `User` object we have already loaded: + + {python} + >>> session.clear() + +We can then treat the `addresses` attribute on each `User` object like a regular list: + + {python} + >>> mary = query.get_by(user_name='Mary') # doctest: +NORMALIZE_WHITESPACE + SELECT users.user_name AS users_user_name, users.password AS users_password, users.user_id AS users_user_id + FROM users + WHERE users.user_name = ? ORDER BY users.oid + LIMIT 1 OFFSET 0 + ['Mary'] + >>> print [a for a in mary.addresses] + SELECT email_addresses.user_id AS email_addresses_user_id, email_addresses.address_id AS email_addresses_address_id, email_addresses.email_address AS email_addresses_email_address + FROM email_addresses + WHERE ? = email_addresses.user_id ORDER BY email_addresses.oid + [1] + [(Address mary@mary.com)] + +Adding to the list is just as easy. New `Address` objects will be detected and saved when we `flush` the Session: + + {python} + >>> mary.addresses.append(Address('mary2@gmail.com')) + >>> session.flush() # doctest: +NORMALIZE_WHITESPACE + BEGIN + INSERT INTO email_addresses (email_address, user_id) VALUES (?, ?) + ['mary2@gmail.com', 1] + COMMIT + +Main documentation for using mappers: [datamapping](rel:datamapping) + +### Transactions + +You may have noticed from the example above that when we say `session.flush()`, SQLAlchemy indicates the names `BEGIN` and `COMMIT` to indicate a transaction with the database. The `flush()` method, since it may execute many statements in a row, will automatically use a transaction in order to execute these instructions. But what if we want to use `flush()` inside of a larger transaction? This is performed via the `SessionTransaction` object, which we can establish using `session.create_transaction()`. Below, we will perform a more complicated `SELECT` statement, make several changes to our collection of users and email addresess, and then create a new user with two email addresses, within the context of a transaction. We will perform a `flush()` in the middle of it to write the changes we have so far, and then allow the remaining changes to be written when we finally `commit()` the transaction. We enclose our operations within a `try/except` block to insure that resources are properly freed: + + {python} + >>> transaction = session.create_transaction() + >>> try: # doctest: +NORMALIZE_WHITESPACE + ... (ed, harry, mary) = session.query(User).select( + ... User.c.user_name.in_('Ed', 'Harry', 'Mary'), order_by=User.c.user_name + ... ) + ... del mary.addresses[1] + ... harry.addresses.append(Address('harry2@gmail.com')) + ... session.flush() + ... print "***flushed the session***" + ... fred = User() + ... fred.user_name = 'fred_again' + ... fred.addresses.append(Address('fred@fred.com')) + ... fred.addresses.append(Address('fredsnewemail@fred.com')) + ... session.save(fred) + ... transaction.commit() + ... except: + ... transaction.rollback() + ... raise + BEGIN + SELECT users.user_name AS users_user_name, users.password AS users_password, users.user_id AS users_user_id + FROM users + WHERE users.user_name IN (?, ?, ?) ORDER BY users.user_name + ['Ed', 'Harry', 'Mary'] + SELECT email_addresses.user_id AS email_addresses_user_id, email_addresses.address_id AS email_addresses_address_id, email_addresses.email_address AS email_addresses_email_address + FROM email_addresses + WHERE ? = email_addresses.user_id ORDER BY email_addresses.oid + [4] + UPDATE email_addresses SET user_id=? WHERE email_addresses.address_id = ? + [None, 3] + INSERT INTO email_addresses (email_address, user_id) VALUES (?, ?) + ['harry2@gmail.com', 4] + ***flushed the session*** + INSERT INTO users (user_name, password) VALUES (?, ?) + ['fred_again', None] + INSERT INTO email_addresses (email_address, user_id) VALUES (?, ?) + ['fred@fred.com', 6] + INSERT INTO email_addresses (email_address, user_id) VALUES (?, ?) + ['fredsnewemail@fred.com', 6] + COMMIT + +Main documentation: [unitofwork](rel:unitofwork) + +Next Steps +---------- + +That covers a quick tour through the basic idea of SQLAlchemy, in its simplest form. Beyond that, one should familiarize oneself with the basics of Sessions, the various patterns that can be used to define different kinds of Mappers and relations among them, the rudimentary SQL types that are available when constructing Tables, and the basics of Engines, SQL statements, and database Connections. + diff --git a/doc/build/content/types.txt b/doc/build/content/types.txt index 2ed230ac9..58d07c4d9 100644 --- a/doc/build/content/types.txt +++ b/doc/build/content/types.txt @@ -23,7 +23,7 @@ The standard set of generic types are: class Float(Numeric): def __init__(self, precision=10) - # DateTime, Date, and Time work with Python datetime objects + # DateTime, Date and Time types deal with datetime objects from the Python datetime module class DateTime(TypeEngine) class Date(TypeEngine) @@ -39,11 +39,13 @@ The standard set of generic types are: # as bind params, raw bytes to unicode as # rowset values, using the unicode encoding # setting on the engine (defaults to 'utf-8') - class Unicode(TypeDecorator) + class Unicode(TypeDecorator): + impl = String # uses the pickle protocol to serialize data # in/out of Binary columns - class PickleType(TypeDecorator) + class PickleType(TypeDecorator): + impl = Binary More specific subclasses of these types are available, which various database engines may choose to implement specifically, allowing finer grained control over types: @@ -79,7 +81,7 @@ Type objects are specified to table meta data using either the class itself, or User-defined types can be created, to support either database-specific types, or customized pre-processing of query parameters as well as post-processing of result set data. You can make your own classes to perform these operations. To augment the behavior of a `TypeEngine` type, such as `String`, the `TypeDecorator` class is used: - {python title="Basic Example"} + {python} import sqlalchemy.types as types class MyType(types.TypeDecorator): diff --git a/doc/build/content/unitofwork.txt b/doc/build/content/unitofwork.txt index 84ceeaa2a..0e0a534e2 100644 --- a/doc/build/content/unitofwork.txt +++ b/doc/build/content/unitofwork.txt @@ -1,421 +1,446 @@ -Unit of Work +[alpha_api]: javascript:alphaApi() +[alpha_implementation]: javascript:alphaImplementation() + +Session / Unit of Work {@name=unitofwork} ============ ### Overview {@name=overview} -The concept behind Unit of Work is to track modifications to a field of objects, and then be able to commit those changes to the database in a single operation. Theres a lot of advantages to this, including that your application doesn't need to worry about individual save operations on objects, nor about the required order for those operations, nor about excessive repeated calls to save operations that would be more efficiently aggregated into one step. It also simplifies database transactions, providing a neat package with which to insert into the traditional database begin/commit phase. +The concept behind Unit of Work is to track modifications to a field of objects, and then be able to flush those changes to the database in a single operation. Theres a lot of advantages to this, including that your application doesn't need to worry about individual save operations on objects, nor about the required order for those operations, nor about excessive repeated calls to save operations that would be more efficiently aggregated into one step. It also simplifies database transactions, providing a neat package with which to insert into the traditional database begin/commit phase. SQLAlchemy's unit of work includes these functions: * The ability to monitor scalar and list attributes on object instances, as well as object creates. This is handled via the attributes package. * The ability to maintain and process a list of modified objects, and based on the relationships set up by the mappers for those objects as well as the foreign key relationships of the underlying tables, figure out the proper order of operations so that referential integrity is maintained, and also so that on-the-fly values such as newly created primary keys can be propigated to dependent objects that need them before they are saved. The central algorithm for this is the *topological sort*. -* The ability to define custom functionality that occurs within the unit-of-work commit phase, such as "before insert", "after insert", etc. This is accomplished via MapperExtension. +* The ability to define custom functionality that occurs within the unit-of-work flush phase, such as "before insert", "after insert", etc. This is accomplished via MapperExtension. * an Identity Map, which is a dictionary storing the one and only instance of an object for a particular table/primary key combination. This allows many parts of an application to get a handle to a particular object without any chance of modifications going to two different places. -* Thread-local operation. the Identity map as well as its enclosing Unit of Work are normally instantiated and accessed in a manner that is local to the current thread, within an object called a Session. Another concurrently executing thread will therefore have its own Session, so unless an application explicitly shares objects between threads, the operation of the object relational mapping is automatically threadsafe. Session objects can also be constructed manually to allow any user-defined scoping. +* The sole interface to the unit of work is provided via the `Session` object. Transactional capability, which rides on top of the transactions provided by `Engine` objects, is provided by the `SessionTransaction` object. +* Thread-locally scoped Session behavior is available as an option, which allows new objects to be automatically added to the Session corresponding to by the *default Session context*. Without a default Session context, an application must explicitly create a Session manually as well as add new objects to it. The default Session context, disabled by default, can also be plugged in with other user-defined schemes, which may also take into account the specific class being dealt with for a particular operation. +* The Session object in SQLAlchemy 0.2 borrows conceptually from that of [Hibernate](http://www.hibernate.org), a leading ORM for Java that is largely based on [JSR-220](http://jcp.org/aboutJava/communityprocess/pfd/jsr220/index.html). SQLAlchemy, under no obligation to conform to EJB specifications, is in general very different from Hibernate, providing a different paradigm for producing queries, a SQL API that is useable independently of the ORM, and of course Pythonic configuration as opposed to XML; however, JSR-220/Hibernate makes some pretty good suggestions with regards to the mechanisms of persistence. + +### Object States {@name=states} + +When dealing with mapped instances with regards to Sessions, an instance may be *attached* or *unattached* to a particular Session. An instance also may or may not correspond to an actual row in the database. The product of these two binary conditions yields us four general states a particular instance can have within the perspective of the Session: + +* *Transient* - a transient instance exists within memory only and is not associated with any Session. It also has no database identity and does not have a corresponding record in the database. When a new instance of a class is constructed, and no default session context exists with which to automatically attach the new instance, it is a transient instance. The instance can then be saved to a particular session in which case it becomes a *pending* instance. If a default session context exists, new instances are added to that Session by default and therefore become *pending* instances immediately. -### The Session Interface {@name=session} +* *Pending* - a pending instance is a Session-attached object that has not yet been assigned a database identity. When the Session is flushed (i.e. changes are persisted to the database), a pending instance becomes persistent. -The current unit of work is accessed via a Session object. The Session is available in a thread-local context from the objectstore module as follows: +* *Persistent* - a persistent instance has a database identity and a corresponding record in the database, and is also associated with a particular Session. By "database identity" we mean the object is associated with a table or relational concept in the database combined with a particular primary key in that table. Objects that are loaded by SQLAlchemy in the context of a particular session are automatically considered persistent, as are formerly pending instances which have been subject to a session `flush()`. + +* *Detached* - a detached instance is an instance which has a database identity and corresponding row in the database, but is not attached to any Session. This occurs when an instance has been removed from a Session, either because the session itself was cleared or closed, or the instance was explicitly removed from the Session. The object can be re-attached with a session again in which case it becomes Persistent again. Detached instances are useful when an application needs to represent a long-running operation across multiple Sessions, needs to store an object in a serialized state and then restore it later (such as within an HTTP "session" object), or in some cases where code needs to load instances locally which will later be associated with some other Session. + +### Acquiring a Session {@name=getting} + +A new Session object is constructed via the `create_session()` function: {python} - # get the current thread's session - session = objectstore.get_session() - -The Session object acts as a proxy to an underlying UnitOfWork object. Common methods include commit(), begin(), clear(), and delete(). Most of these methods are available at the module level in the objectstore module, which operate upon the Session returned by the get_session() function: + session = create_session() + +A common option used with `create_session()` is to specify a specific `Engine` or `Connection` to be used for all operations performed by this Session: + + {python} + # create an engine + e = create_engine('postgres://some/url') + + # create a Session that will use this engine for all operations. + # it will open and close Connections as needed. + session = create_session(bind_to=e) + + # open a Connection + conn = e.connect() + + # create a Session that will use this specific Connection for all operations + session = create_session(bind_to=conn) - {python}# this... - objectstore.get_session().commit() - # is the same as this: - objectstore.commit() +The session to which an object is attached can be acquired via the `object_session()` function, which returns the appropriate `Session` if the object is pending or persistent, or `None` if the object is transient or detached: -A description of the most important methods and concepts follows. + {python} + session = object_session(obj) -#### Identity Map {@name=identitymap} +It is possible to install a default "threadlocal" session context by importing a *mod* called `sqlalchemy.mods.threadlocal`. This mod creates a familiar SA 0.1 keyword `objectstore` in the `sqlalchemy` namespace. The `objectstore` may be used directly like a session; all session actions performed on `sqlalchemy.objectstore` will be *proxied* to the thread-local Session: -The first concept to understand about the Unit of Work is that it is keeping track of all mapped objects which have been loaded from the database, as well as all mapped objects which have been saved to the database in the current session. This means that everytime you issue a `select` call to a mapper which returns results, all of those objects are now installed within the current Session, mapped to their identity. + {python} + # install 'threadlocal' mod (only need to call this once per application) + import sqlalchemy.mods.threadlocal + + # then 'objectstore' is available within the 'sqlalchemy' namespace + from sqlalchemy import objectstore + + # flush the current thread-local session using the objectstore directly + objectstore.flush() -In particular, it is insuring that only *one* instance of a particular object, corresponding to a particular database identity, exists within the Session at one time. By "database identity" we mean a table or relational concept in the database combined with a particular primary key in that table. The session accomplishes this task using a dictionary known as an *Identity Map*. When `select` or `get` calls on mappers issue queries to the database, they will in nearly all cases go out to the database on each call to fetch results. However, when the mapper *instantiates* objects corresponding to the result set rows it receives, it will *check the current identity map first* before instantating a new object, and return *the same instance* already present in the identiy map if it already exists. + # which is the same as this (assuming we are still on the same thread): + session = objectstore.get_session() + session.flush() + +We will now cover some of the key concepts used by Sessions and its underlying Unit of Work. + +### Introduction to the Identity Map {@name=identitymap} + +A primary concept of the Session's underlying Unit of Work is that it is keeping track of all persistent instances; recall that a persistent instance has a database identity and is attached to a Session. In particular, the Unit of Work must insure that only *one* copy of a particular persistent instance exists within the Session at any given time. The UOW accomplishes this task using a dictionary known as an *Identity Map*. When a `Query` is used to issue `select` or `get` requests to the database, it will in nearly all cases result in an actual SQL execution to the database, and a corresponding traversal of rows received from that execution. However, when the underlying mapper *instantiates* objects corresponding to the result set rows it receives, it will check the session's identity map first before instantating a new object, and return the same instance already present in the identity map if it already exists, essentially *ignoring* the object state represented by that row. There are several ways to override this behavior and truly refresh an already-loaded instance which are described later, but the main idea is that once your instance is loaded into a particular Session, it will *never change* its state without your explicit approval, regardless of what the database says about it. -Example: +For example; below, two separate calls to load an instance with database identity "15" are issued, and the results assigned to two separate variables. However, since the same `Session` was used, the two instances are the same instance: - {python}mymapper = mapper(MyClass, mytable) + {python} + mymapper = mapper(MyClass, mytable) - obj1 = mymapper.selectfirst(mytable.c.id==15) - obj2 = mymapper.selectfirst(mytable.c.id==15) + session = create_session() + obj1 = session.query(MyClass).selectfirst(mytable.c.id==15) + obj2 = session.query(MyClass).selectfirst(mytable.c.id==15) >>> obj1 is obj2 True -The Identity Map is an instance of `weakref.WeakValueDictionary`, so that when an in-memory object falls out of scope, it will be removed automatically. However, this may not be instant if there are circular references upon the object. The current SA attributes implementation places some circular refs upon objects, although this may change in the future. There are other ways to remove object instances from the current session, as well as to clear the current session entirely, which are described later in this section. +The Identity Map is an instance of `weakref.WeakValueDictionary`, so that when an in-memory object falls out of scope, it will be removed automatically. However, this may not be instant if there are circular references upon the object. To guarantee that an instance is removed from the identity map before removing references to it, use the `expunge()` method, described later, to remove it. -To view the Session's identity map, it is accessible via the `identity_map` accessor, and is an instance of `weakref.WeakValueDictionary`: +The Session supports an iterator interface in order to see all objects in the identity map: {python} - >>> objectstore.get_session().identity_map.values() + for obj in session: + print obj + +As well as `__contains__()`: + + {python} + if obj in session: + print "Object is present" + +The identity map itself is accessible via the `identity_map` accessor: + + {python} + >>> session.identity_map.values() [<__main__.User object at 0x712630>, <__main__.Address object at 0x712a70>] -The identity of each object instance is available via the _instance_key property attached to each object instance, and is a tuple consisting of the object's class and an additional tuple of primary key values, in the order that they appear within the table definition: +The identity of each object instance is available via the `_instance_key` property attached to each object instance, and is a tuple consisting of the object's class and an additional tuple of primary key values, in the order that they appear within the table definition: {python} >>> obj._instance_key (<class 'test.tables.User'>, (7,)) -At the moment that an object is assigned this key, it is also added to the current thread's unit-of-work's identity map. +At the moment that an object is assigned this key within a `flush()` operation, it is also added to the session's identity map. -The get() method on a mapper, which retrieves an object based on primary key identity, also checks in the current identity map first to save a database round-trip if possible. In the case of an object lazy-loading a single child object, the get() method is used as well, so scalar-based lazy loads may in some cases not query the database; this is particularly important for backreference relationships as it can save a lot of queries. +The `get()` method on `Query`, which retrieves an object based on primary key identity, also checks in the Session's identity map first to save a database round-trip if possible. In the case of an object lazy-loading a single child object, the `get()` method is used as well, so scalar-based lazy loads may in some cases not query the database; this is particularly important for backreference relationships as it can save a lot of queries. + +### Whats Changed ? {@name=changed} + +The next concept is that in addition to the `Session` storing a record of all objects loaded or saved, it also stores lists of all *newly created* (i.e. pending) objects, lists of all persistent objects whose attributes have been *modified*, and lists of all persistent objects that have been marked as *deleted*. These lists are used when a `flush()` call is issued to save all changes. After the flush occurs, these lists are all cleared out. -Methods on mappers and the objectstore module, which are relevant to identity include the following: +These records are all tracked by a collection of `Set` objects (which are a SQLAlchemy-specific instance called a `HashSet`) that are also viewable off the `Session`: {python} - # assume 'm' is a mapper - m = mapper(User, users) + # pending objects recently added to the Session + session.new + + # persistent objects with modifications + session.dirty + + # persistent objects that have been marked as deleted via session.delete(obj) + session.deleted + +Unlike the identity map, the `new`, `dirty`, and `deleted` lists are *not weak referencing.* This means if you abandon all references to new or modified objects within a session, *they are still present* and will be saved on the next flush operation, unless they are removed from the Session explicitly (more on that later). The `new` list may change in a future release to be weak-referencing, however for the `deleted` list, one can see that its quite natural for a an object marked as deleted to have no references in the application, yet a DELETE operation is still required. - # get the identity key corresponding to a primary key - key = m.identity_key(7) +### The Session API {@name=api} - # for composite key, list out the values in the order they - # appear in the table - key = m.identity_key(12, 'rev2') +#### query() {@name=query} - # get the identity key given a primary key - # value as a tuple and a class - key = objectstore.get_id_key((12, 'rev2'), User) +The `query()` function takes a class or `Mapper` as an argument, along with an optional `entity_name` parameter, and returns a new `Query` object which will issue mapper queries within the context of this Session. If a Mapper is passed, then the Query uses that mapper. Otherwise, if a class is sent, it will locate the primary mapper for that class which is used to construct the Query. - # get the identity key for an object, whether or not it actually - # has one attached to it (m is the mapper for obj's class) - key = m.instance_key(obj) + {python} + # query from a class + session.query(User).select_by(name='ed') + + # query from a mapper + query = session.query(usermapper) + x = query.get(1) - # is this key in the current identity map? - session.has_key(key) + # query from a class mapped with entity name 'alt_users' + q = session.query(User, entity_name='alt_users') + y = q.options(eagerload('orders')).select() + +`entity_name` is an optional keyword argument sent with a class object, in order to further qualify which primary mapper to be used; this only applies if there was a `Mapper` created with that particular class/entity name combination, else an exception is raised. All of the methods on Session which take a class or mapper argument also take the `entity_name` argument, so that a given class can be properly matched to the desired primary mapper. - # is this object in the current identity map? - session.has_instance(obj) +All instances retrieved by the returned `Query` object will be stored as persistent instances within the originating `Session`. - # get this object from the current identity map based on - # singular/composite primary key, or if not go - # and load from the database - obj = m.get(12, 'rev2') +#### get() {@name=get} -#### Whats Changed ? {@name=changed} +Given a class or mapper, a scalar or tuple-based identity, and an optional `entity_name` keyword argument, creates a `Query` corresponding to the given mapper or class/entity_name combination, and calls the `get()` method with the given identity value. If the object already exists within this Session, it is simply returned, else it is queried from the database. If the instance is not found, the method returns `None`. -The next concept is that in addition to the Session storing a record of all objects loaded or saved, it also stores records of all *newly created* objects, records of all objects whose attributes have been *modified*, records of all objects that have been marked as *deleted*, and records of all *modified list-based attributes* where additions or deletions have occurred. These lists are used when a `commit()` call is issued to save all changes. After the commit occurs, these lists are all cleared out. + {python} + # get Employer primary key 5 + employer = session.get(Employer, 5) -These records are all tracked by a collection of `Set` objects (which are a SQLAlchemy-specific instance called a `HashSet`) that are also viewable off the Session: + # get Report composite primary key 7,12, using mapper 'report_mapper_b' + report = session.get(Report, (7,12), entity_name='report_mapper_b') + + +#### load() {@name=load} + +load() is similar to get() except it will raise an exception if the instance does not exist in the database. It will also load the object's data from the database in all cases, and **overwrite** all changes on the object if it already exists in the session with the latest data from the database. {python} - # new objects that were just constructed - session.new + # load Employer primary key 5 + employer = session.load(Employer, 5) - # objects that exist in the database, that were modified - session.dirty + # load Report composite primary key 7,12, using mapper 'report_mapper_b' + report = session.load(Report, (7,12), entity_name='report_mapper_b') - # objects that have been marked as deleted via session.delete(obj) - session.deleted +#### save() {@name=save} - # list-based attributes thave been appended - session.modified_lists - -Heres an interactive example, assuming the `User` and `Address` mapper setup first outlined in [datamapping_relations](rel:datamapping_relations): +save() is called with a single transient (unsaved, unattached) instance as an argument, which is then added to the Session and becomes pending. When the session is next `flush`ed, the instance will be saved to the database uponwhich it becomes persistent (saved, attached). If the given instance is not transient, meaning it is either attached to an existing Session or it has a database identity, an exception is raised. {python} - >>> # get the current thread's session - >>> session = objectstore.get_session() - - >>> # create a new object, with a list-based attribute - >>> # containing two more new objects - >>> u = User(user_name='Fred') - >>> u.addresses.append(Address(city='New York')) - >>> u.addresses.append(Address(city='Boston')) + user1 = User(name='user1') + user2 = User(name='user2') + session.save(user1) + session.save(user2) - >>> # objects are in the "new" list - >>> session.new - [<__main__.User object at 0x713630>, - <__main__.Address object at 0x713a70>, - <__main__.Address object at 0x713b30>] - - >>> # view the "modified lists" member, - >>> # reveals our two Address objects as well, inside of a list - >>> session.modified_lists - [[<__main__.Address object at 0x713a70>, <__main__.Address object at 0x713b30>]] + session.flush() # write changes to the database - >>> # lets view what the class/ID is for the list object - >>> ["%s %s" % (l.__class__, id(l)) for l in session.modified_lists] - ['sqlalchemy.mapping.unitofwork.UOWListElement 7391872'] - - >>> # now commit - >>> session.commit() - - >>> # the "new" list is now empty - >>> session.new - [] - - >>> # the "modified lists" list is now empty - >>> session.modified_lists - [] - - >>> # now lets modify an object - >>> u.user_name='Ed' - - >>> # it gets placed in the "dirty" list - >>> session.dirty - [<__main__.User object at 0x713630>] - - >>> # delete one of the addresses - >>> session.delete(u.addresses[0]) - - >>> # and also delete it off the User object, note that - >>> # this is *not automatic* when using session.delete() - >>> del u.addresses[0] - >>> session.deleted - [<__main__.Address object at 0x713a70>] - - >>> # commit - >>> session.commit() - - >>> # all lists are cleared out - >>> session.new, session.dirty, session.modified_lists, session.deleted - ([], [], [], []) - - >>> # identity map has the User and the one remaining Address - >>> session.identity_map.values() - [<__main__.Address object at 0x713b30>, <__main__.User object at 0x713630>] - -Unlike the identity map, the `new`, `dirty`, `modified_lists`, and `deleted` lists are *not weak referencing.* This means if you abandon all references to new or modified objects within a session, *they are still present* and will be saved on the next commit operation, unless they are removed from the Session explicitly (more on that later). The `new` list may change in a future release to be weak-referencing, however for the `deleted` list, one can see that its quite natural for a an object marked as deleted to have no references in the application, yet a DELETE operation is still required. +save() is called automatically for new instances by the classes' associated mapper, if a default Session context is in effect (such as a thread-local session), which means that newly created instances automatically become pending. If there is no default session available, then the instance remains transient (unattached) until it is explicitly added to a Session via the save() method. + +A transient instance also can be automatically `save`ed if it is associated with a parent object which specifies `save-update` within its `cascade` rules, and that parent is already attached or becomes attached to a Session. For more information on `cascade`, see the next section. + +The `save_or_update()` method, covered later, is a convenience method which will call the `save()` or `update()` methods appropriately dependening on whether or not the instance has a database identity (but the instance still must be unattached). -#### Commit {@name=commit} +#### flush() {@name=flush} -This is the main gateway to what the Unit of Work does best, which is save everything ! It should be clear by now that a commit looks like: +This is the main gateway to what the Unit of Work does best, which is save everything ! It should be clear by now what a flush looks like: {python} - objectstore.get_session().commit() + session.flush() -It also can be called with a list of objects; in this form, the commit operation will be limited only to the objects specified in the list, as well as any child objects within `private` relationships for a delete operation: +It also can be called with a list of objects; in this form, the flush operation will be limited only to the objects specified in the list, as well as any child objects within `private` relationships for a delete operation: {python} # saves only user1 and address2. all other modified # objects remain present in the session. - objectstore.get_session().commit(user1, address2) + session.flush(user1, address2) -This second form of commit should be used more carefully as it will not necessarily locate other dependent objects within the session, whose database representation may have foreign constraint relationships with the objects being operated upon. +This second form of flush should be used carefully as it will not necessarily locate other dependent objects within the session, whose database representation may have foreign constraint relationships with the objects being operated upon. -##### What Commit is, and Isn't {@name=whatis} +##### Notes on Flush {@name=whatis} -The purpose of the Commit operation, as defined by the `objectstore` package, is to instruct the Unit of Work to analyze its lists of modified objects, assemble them into a dependency graph, fire off the appopriate INSERT, UPDATE, and DELETE statements via the mappers related to those objects, and to synchronize column-based object attributes that correspond directly to updated/inserted database columns. +A common misconception about the `flush()` operation is that once performed, the newly persisted instances will automatically have related objects attached to them, based on the values of primary key identities that have been assigned to the instances before they were persisted. An example would be, you create a new `Address` object, set `address.user_id` to 5, and then `flush()` the session. The erroneous assumption would be that there is now a `User` object of identity "5" attached to the `Address` object, but in fact this is not the case. If you were to `refresh()` the `Address`, invalidating its current state and re-loading, *then* it would have the appropriate `User` object present. -Its important to note that the *objectstore.get_session().commit() operation is not the same as the commit() operation on SQLEngine.* A `SQLEngine`, described in [database](rel:database), has its own `begin` and `commit` statements which deal directly with transactions opened on DBAPI connections. While the `session.commit()` makes use of these calls in order to issue its own SQL within a database transaction, it is only dealing with "committing" its own in-memory changes and only has an indirect relationship with database connection objects. - -The `session.commit()` operation also does not affect any `relation`-based object attributes, that is attributes that reference other objects or lists of other objects, in any way. A brief list of what will *not* happen includes: - -* It will not append or delete any object instances to/from any list-based object attributes. Any objects that have been created or marked as deleted will be updated as such in the database, but if a newly deleted object instance is still attached to a parent object's list, the object itself will remain in that list. -* It will not set or remove any scalar references to other objects, even if the corresponding database identifier columns have been committed. +This misunderstanding is related to the observed behavior of backreferences ([datamapping_relations_backreferences](rel:datamapping_relations_backreferences)), which automatically associates an instance "A" with another instance "B", in response to the manual association of instance "B" to instance "A" by the user. The backreference operation occurs completely externally to the `flush()` operation, and is pretty much the only example of a SQLAlchemy feature that manipulates the relationships of persistent objects. -This means, if you set `address.user_id` to 5, that integer attribute will be saved, but it will not place an `Address` object in the `addresses` attribute of the corresponding `User` object. In some cases there may be a lazy-loader still attached to an object attribute which when first accesed performs a fresh load from the database and creates the appearance of this behavior, but this behavior should not be relied upon as it is specific to lazy loading and also may disappear in a future release. Similarly, if the `Address` object is marked as deleted and a commit is issued, the correct DELETE statements will be issued, but if the object instance itself is still attached to the `User`, it will remain. - -So the primary guideline for dealing with commit() is, *the developer is responsible for maintaining in-memory objects and their relationships to each other, the unit of work is responsible for maintaining the database representation of the in-memory objects.* The typical pattern is that the manipulation of objects *is* the way that changes get communicated to the unit of work, so that when the commit occurs, the objects are already in their correct in-memory representation and problems dont arise. The manipulation of identifier attributes like integer key values as well as deletes in particular are a frequent source of confusion. +The primary guideline for dealing with `flush()` is, the developer is responsible for maintaining in-memory objects and their relationships to each other, the unit of work is responsible for maintaining the database representation of the in-memory objects. The typical pattern is that the manipulation of objects *is* the way that changes get communicated to the unit of work, so that when the flush occurs, the objects are already in their correct in-memory representation and problems dont arise. The manipulation of identifier attributes like integer key values as well as deletes in particular are a frequent source of confusion. -A terrific feature of SQLAlchemy which is also a supreme source of confusion is the backreference feature, described in [datamapping_relations_backreferences](rel:datamapping_relations_backreferences). This feature allows two types of objects to maintain attributes that reference each other, typically one object maintaining a list of elements of the other side, which contains a scalar reference to the list-holding object. When you append an element to the list, the element gets a "backreference" back to the object which has the list. When you attach the list-holding element to the child element, the child element gets attached to the list. *This feature has nothing to do whatsoever with the Unit of Work.*`*` It is strictly a small convenience feature intended to support the developer's manual manipulation of in-memory objects, and the backreference operation happens at the moment objects are attached or removed to/from each other, independent of any kind of database operation. It does not change the golden rule, that the developer is reponsible for maintaining in-memory object relationships. +#### close() {@name=close} -`*` there is an internal relationship between two `relations` that have a backreference, which state that a change operation is only logged once to the unit of work instead of two separate changes since the two changes are "equivalent", so a backreference does affect the information that is sent to the Unit of Work. But the Unit of Work itself has no knowledge of this arrangement and has no ability to affect it. +This method first calls `clear()`, removing all objects from this `Session`, and then insures that any transactional resources are closed. -#### Delete {@name=delete} +#### delete() {@name=delete} -The delete call places an object or objects into the Unit of Work's list of objects to be marked as deleted: +The `delete` method places an instance into the Unit of Work's list of objects to be marked as deleted: {python} - # mark three objects to be deleted - objectstore.get_session().delete(obj1, obj2, obj3) + # mark two objects to be deleted + session.delete(obj1) + session.delete(obj2) - # commit - objectstore.get_session().commit() - -When objects which contain references to other objects are deleted, the mappers for those related objects will issue UPDATE statements for those objects that should no longer contain references to the deleted object, setting foreign key identifiers to NULL. Similarly, when a mapper contains relations with the `private=True` option, DELETE statements will be issued for objects within that relationship in addition to that of the primary deleted object; this is called a *cascading delete*. + # flush + session.flush() -As stated before, the purpose of delete is strictly to issue DELETE statements to the database. It does not affect the in-memory structure of objects, other than changing the identifying attributes on objects, such as setting foreign key identifiers on updated rows to None. It has no effect on the status of references between object instances, nor any effect on the Python garbage-collection status of objects. +The delete operation will have an effect on instances that are attached to the deleted instance according to the `cascade` style of the relationship; cascade rules are described further in the following section. By default, associated instances may need to be updated in the database to reflect that they no longer are associated with the parent object, before the parent is deleted. If the relationship specifies `cascade="delete"`, then the associated instance will also be deleted upon flush, assuming it is still attached to the parent. If the relationship additionally includes the `delete-orphan` cascade style, the associated instance will be deleted if it is still attached to the parent, or is unattached to any other parent. -#### Clear {@name=clear} +The `delete()` operation has no relationship to the in-memory status of the instance, including usage of the `del` Python statement. An instance marked as deleted and flushed will still exist within memory until references to it are freed; similarly, removing an instance from memory via the `del` statement will have no effect, since the persistent instance will still be referenced by its Session. Obviously, if the instance is removed from the Session and then totally dereferenced, it will no longer exist in memory, but also won't exist in any Session and is therefore not deleted from the database. -To clear out the current thread's UnitOfWork, which has the effect of discarding the Identity Map and the lists of all objects that have been modified, just issue a clear: +#### clear() {@name=clear} + +This method detaches all instances from the Session, sending them to the detached or transient state as applicable, and replaces the underlying UnitOfWork with a new one. {python} - # via module - objectstore.clear() - - # or via Session - objectstore.get_session().clear() + session.clear() -This is the easiest way to "start fresh", as in a web application that wants to have a newly loaded graph of objects on each request. Any object instances created before the clear operation should either be discarded or at least not used with any Mapper or Unit Of Work operations (with the exception of `import_instance()`), as they no longer have any relationship to the current Unit of Work, and their behavior with regards to the current session is undefined. +The `clear()` method is particularly useful with a "default context" session such as a thread-local session, which can stay attached to the current thread to handle a new field of objects without having to re-attach a new Session. -#### Refresh / Expire {@name=refreshexpire} +#### refresh() / expire() {@name=refreshexpire} To assist with the Unit of Work's "sticky" behavior, individual objects can have all of their attributes immediately re-loaded from the database, or marked as "expired" which will cause a re-load to occur upon the next access of any of the object's mapped attributes. This includes all relationships, so lazy-loaders will be re-initialized, eager relationships will be repopulated. Any changes marked on the object are discarded: {python} # immediately re-load attributes on obj1, obj2 - session.refresh(obj1, obj2) + session.refresh(obj1) + session.refresh(obj2) # expire objects obj1, obj2, attributes will be reloaded # on the next access: - session.expire(obj1, obj2, obj3) + session.expire(obj1) + session.expire(obj2) -#### Expunge {@name=expunge} +#### expunge() {@name=expunge} -Expunge simply removes all record of an object from the current Session. This includes the identity map, and all history-tracking lists: +Expunge removes an object from the Session, sending persistent instances to the detached state, and pending instances to the transient state: {python} session.expunge(obj1) -Use `expunge` when youd like to remove an object altogether from memory, such as before calling `del` on it, which will prevent any "ghost" operations occuring when the session is committed. +Use `expunge` when youd like to remove an object altogether from memory, such as before calling `del` on it, which will prevent any "ghost" operations occuring when the session is flushed. -#### Import Instance {@name=import} +#### bind\_mapper() / bind\_table() {@name=bind} -The _instance_key attribute placed on object instances is designed to work with objects that are serialized into strings and brought back again. As it contains no references to internal structures or database connections, applications that use caches or session storage which require serialization (i.e. pickling) can store SQLAlchemy-loaded objects. However, as mentioned earlier, an object with a particular database identity is only allowed to exist uniquely within the current unit-of-work scope. So, upon deserializing such an object, it has to "check in" with the current Session. This is achieved via the `import_instance()` method: +Both of these methods receive two arguments; in the case of `bind_mapper()`, it is a `Mapper` and an `Engine` or `Connection` instance; in the case of `bind_table()`, it is a `Table` instance or other `Selectable` (such as an `Alias`, `Select`, etc.), and an `Engine` or `Connection` instance. + + {python} + engine1 = create_engine('sqlite:///file1.db') + engine2 = create_engine('mysql://localhost') + + sqlite_conneciton = engine1.connect() + + sess = create_session() + + sess.bind_mapper(mymapper, sqlite_connection) # bind mymapper operations to a single SQLite connection + sess.bind_table(email_addresses_table, engine2) # bind operations with the email_addresses_table to mysql + +Normally, when a `Session` is created via `create_session()` with no arguments, the Session has no awareness of individual `Engines`, and when mappers use the `Session` to retrieve connections, the underlying `MetaData` each `Table` is associated with is expected to be "bound" to an `Engine`, else no engine can be located and an exception is raised. A second form of `create_session()` takes the argument `bind_to=engine_or_connection`, where all SQL operations performed by this `Session` use the single `Engine` or `Connection` (collectively known as a `Connectable`) passed to the constructor. With `bind_mapper()` and `bind_table()`, the operations of individual mapper and/or tables are bound to distinct engines or connections, thereby overriding not only the engine which may be "bound" to the underlying `MetaData`, but also the `Engine` or `Connection` which may have been passed to the `create_session()` function. Configurations which interact with multiple explicit database connections at one time must use either or both of these methods in order to associate `Session` operations with the appropriate connection resource. + +Binding a `Mapper` to a resource takes precedence over a `Table` bind, meaning if mapper A is associated with table B, and the Session binds mapper A to connection X and table B to connection Y, an operation with mapper A will use connection X, not connection Y. + +#### update() {@name=update} + +The update() method is used *only* with detached instances. A detached instance only exists if its `Session` was cleared or closed, or the instance was `expunge()`d from its session. `update()` will re-attach the detached instance with this Session, bringing it back to the persistent state, and allowing any changes on the instance to be saved when the `Session` is next `flush`ed. If the instance is already attached to an existing `Session`, an exception is raised. + +A detached instance also can be automatically `update`ed if it is associated with a parent object which specifies `save-update` within its `cascade` rules, and that parent is already attached or becomes attached to a Session. For more information on `cascade`, see the next section. + +The `save_or_update()` method is a convenience method which will call the `save()` or `update()` methods appropriately dependening on whether or not the instance has a database identity (but the instance still must be unattached). + +#### save\_or\_update() {@name=saveorupdate} + +This method is a combination of the `save()` and `update()` methods, which will examine the given instance for a database identity (i.e. if it is transient or detached), and will call the implementation of `save()` or `update()` as appropriate. Use `save_or_update()` to add unattached instances to a session when you're not sure if they were newly created or not. Like `save()` and `update()`, `save_or_update()` cascades along the `save-update` cascade indicator, described in the `cascade` section below. + +#### merge() {@name=merge} + +Feature Status: [Alpha Implementation][alpha_implementation] + +`merge()` is used to return the persistent version of an instance that is not attached to this Session. When passed an instance, if an instance with its database identity already exists within this Session, it is returned. If the instance does not exist in this Session, it is loaded from the database and then returned. + +A future version of `merge()` will also update the Session's instance with the state of the given instance (hence the name "merge"). + +This method is useful for bringing in objects which may have been restored from a serialization, such as those stored in an HTTP session: {python} # deserialize an object myobj = pickle.loads(mystring) - # "import" it. if the objectstore already had this object in the + # "merge" it. if the session already had this object in the # identity map, then you get back the one from the current session. - myobj = session.import_instance(myobj) + myobj = session.merge(myobj) + +Note that `merge()` *does not* associate the given instance with the Session; it remains detached (or attached to whatever Session it was already attached to). -Note that the import_instance() function will either mark the deserialized object as the official copy in the current identity map, which includes updating its _instance_key with the current application's class instance, or it will discard it and return the corresponding object that was already present. Thats why its important to receive the return results from the method and use the result as the official object instance. +### Cascade rules {@name=cascade} + +Feature Status: [Alpha Implementation][alpha_implementation] -#### Begin {@name=begin} +Mappers support the concept of configurable *cascade* behavior on `relation()`s. This behavior controls how the Session should treat the instances that have a parent-child relationship with another instance that is operated upon by the Session. Cascade is indicated as a comma-separated list of string keywords, with the possible values `all`, `delete`, `save-update`, `refresh-expire`, `merge`, `expunge`, and `delete-orphan`. -The "scope" of the unit of work commit can be controlled further by issuing a begin(). A begin operation constructs a new UnitOfWork object and sets it as the currently used UOW. It maintains a reference to the original UnitOfWork as its "parent", and shares the same identity map of objects that have been loaded from the database within the scope of the parent UnitOfWork. However, the "new", "dirty", and "deleted" lists are empty. This has the effect that only changes that take place after the begin() operation get logged to the current UnitOfWork, and therefore those are the only changes that get commit()ted. When the commit is complete, the "begun" UnitOfWork removes itself and places the parent UnitOfWork as the current one again. -The begin() method returns a transactional object, upon which you can call commit() or rollback(). *Only this transactional object controls the transaction* - commit() upon the Session will do nothing until commit() or rollback() is called upon the transactional object. +Cascading is configured by setting the `cascade` keyword argument on a `relation()`: {python} - # modify an object - myobj1.foo = "something new" - - # begin - trans = session.begin() - - # modify another object - myobj2.lala = "something new" - - # only 'myobj2' is saved - trans.commit() - -begin/commit supports the same "nesting" behavior as the SQLEngine (note this behavior is not the original "nested" behavior), meaning that many begin() calls can be made, but only the outermost transactional object will actually perform a commit(). Similarly, calls to the commit() method on the Session, which might occur in function calls within the transaction, will not do anything; this allows an external function caller to control the scope of transactions used within the functions. - -### Advanced UnitOfWork Management {@name=advscope} + mapper(Order, order_table, properties={ + 'items' : relation(Item, items_table, cascade="all, delete-orphan"), + 'customer' : relation(User, users_table, user_orders_table, cascade="save-update"), + }) -#### Nesting UnitOfWork in a Database Transaction {@name=transactionnesting} +The above mapper specifies two relations, `items` and `customer`. The `items` relationship specifies "all, delete-orphan" as its `cascade` value, indicating that all `save`, `update`, `merge`, `expunge`, `refresh` `delete` and `expire` operations performed on a parent `Order` instance should also be performed on the child `Item` instances attached to it (`save` and `update` are cascaded using the `save_or_update()` method, so that the database identity of the instance doesn't matter). The `delete-orphan` cascade value additionally indicates that if an `Item` instance is no longer associated with an `Order`, it should also be deleted. The "all, delete-orphan" cascade argument allows a so-called *lifecycle* relationship between an `Order` and an `Item` object. -The UOW commit operation places its INSERT/UPDATE/DELETE operations within the scope of a database transaction controlled by a SQLEngine: +The `customer` relationship specifies only the "save-update" cascade value, indicating most operations will not be cascaded from a parent `Order` instance to a child `User` instance, except for if the `Order` is attached with a particular session, either via the `save()`, `update()`, or `save-update()` method. - {python} - engine.begin() - try: - # run objectstore update operations - except: - engine.rollback() - raise - engine.commit() - -If you recall from the [dbengine_transactions](rel:dbengine_transactions) section, the engine's begin()/commit() methods support reentrant behavior. This means you can nest begin and commits and only have the outermost begin/commit pair actually take effect (rollbacks however, abort the whole operation at any stage). From this it follows that the UnitOfWork commit operation can be nested within a transaction as well: +Additionally, when a child item is attached to a parent item that specifies the "save-update" cascade value on the relationship, the child is automatically passed to `save_or_update()` (and the operation is further cascaded to the child item). + +Note that cascading doesn't do anything that isn't possible by manually calling Session methods on individual instances within a hierarchy, it merely automates common operations on a group of associated instances. + +The default value for `cascade` on `relation()`s is `save-update`, and the `private=True` keyword argument is a synonym for `cascade="all, delete-orphan"`. + +### SessionTransaction {@name=transaction} + +SessionTransaction is a multi-engine transaction manager, which aggregates one or more Engine/Connection pairs and keeps track of a Transaction object for each one. As the Session receives requests to execute SQL statements, it uses the Connection that is referenced by the SessionTransaction. At commit time, the underyling Session is flushed, and each Transaction is the committed. + +Example usage is as follows: {python} - engine.begin() + sess = create_session() + trans = sess.create_transaction() try: - # perform custom SQL operations - objectstore.commit() - # perform custom SQL operations + item1 = sess.query(Item).get(1) + item2 = sess.query(Item).get(2) + item1.foo = 'bar' + item2.bar = 'foo' + trans.commit() except: - engine.rollback() + trans.rollback() raise - engine.commit() -#### Per-Object Sessions {@name=object} +The `create_transaction()` method creates a new SessionTransaction object but does not declare any connection/transaction resources. At the point of the first `get()` call, a connection resource is opened off the engine that corresponds to the Item classes' mapper and is stored within the `SessionTransaction` with an open `Transaction`. When `trans.commit()` is called, the `flush()` method is called on the `Session` and the corresponding update statements are issued to the database within the scope of the transaction already opened; afterwards, the underying Transaction is committed, and connection resources are freed. -Sessions can be created on an ad-hoc basis and used for individual groups of objects and operations. This has the effect of bypassing the normal thread-local Session and explicitly using a particular Session: +`SessionTransaction`, like the `Transaction` off of `Connection` also supports "nested" behavior, and is safe to pass to other functions which then issue their own `begin()`/`commit()` pair; only the outermost `begin()`/`commit()` pair actually affects the transaction, and any call to `rollback()` within a particular call stack will issue a rollback. - {python} - # make a new Session with a global UnitOfWork - s = objectstore.Session() - - # make objects bound to this Session - x = MyObj(_sa_session=s) - - # perform mapper operations bound to this Session - # (this function coming soon) - r = MyObj.mapper.using(s).select_by(id=12) - - # get the session that corresponds to an instance - s = objectstore.get_session(x) - - # commit - s.commit() +Note that while SessionTransaction is capable of tracking multiple transactions across multiple databases, it currently is in no way a fully functioning two-phase commit engine; generally, when dealing with multiple databases simultaneously, there is the distinct possibility that a transaction can succeed on the first database and fail on the second, which for some applications may be an invalid state. If this is an issue, its best to either refrain from spanning transactions across databases, or to look into some of the available technologies in this area, such as [Zope](http://www.zope.org) which offers a two-phase commit engine; some users have already created their own SQLAlchemy/Zope hybrid implementations to deal with scenarios like these. - # perform a block of operations with this session set within the current scope - objectstore.push_session(s) - try: - r = mapper.select_by(id=12) - x = new MyObj() - objectstore.commit() - finally: - objectstore.pop_session() +#### Using SQL with SessionTransaction {@name=sql} -##### Nested Transaction Sessions {@name=nested} +The SessionTransaction can interact with direct SQL queries in two general ways. Either specific `Connection` objects can be associated with the `SessionTransaction`, which are then useable both for direct SQL as well as within `flush()` operations performed by the `SessionTransaction`, or via accessing the `Connection` object automatically referenced within the `SessionTransaction`. -Sessions also now support a "nested transaction" feature whereby a second Session can use a different database connection. This can be used inside of a larger database transaction to issue commits to the database that will be committed independently of the larger transaction's status: +To associate a specific `Connection` with the `SessionTransaction`, use the `add()` method: - {python} - engine.begin() + {python title="Associate a Connection with the SessionTransaction"} + connection = engine.connect() + trans = session.create_transaction() try: - a = MyObj() - b = MyObj() - - sess = Session(nest_on=engine) - objectstore.push_session(sess) - try: - c = MyObj() - objectstore.commit() # will commit "c" to the database, - # even if the external transaction rolls back - finally: - objectstore.pop_session() - - objectstore.commit() # commit "a" and "b" to the database - engine.commit() + trans.add(connection) + connection.execute(mytable.update(), {'col1':4, 'col2':17}) + session.flush() # flush() operation will use the same connection + trans.commit() except: - engine.rollback() + trans.rollback() raise -#### Custom Session Objects/Custom Scopes {@name=scope} +The `add()` method will key the `Connection`'s underlying `Engine` to this `SessionTransaction`. When mapper operations are performed against this `Engine`, the `Connection` explicitly added will be used. This **overrides** any other `Connection` objects that the underlying Session was associated with, corresponding to the underlying `Engine` of that `Connection`. However, if the `SessionTransaction` itself is already associated with a `Connection`, then an exception is thrown. -For users who want to make their own Session subclass, or replace the algorithm used to return scoped Session objects (i.e. the objectstore.get_session() method): +The other way is just to use the `Connection` referenced by the `SessionTransaction`. This is performed via the `connection()` method, and requires passing in a class or `Mapper` which indicates which underlying `Connection` should be returned (recall that different `Mappers` may use different underlying `Engines`). If the `class_or_mapper` argument is `None`, then the `Session` must be globally bound to a specific `Engine` when it was constructed, else the method returns `None`. - {python title="Create a Session"} - # make a new Session - s = objectstore.Session() + {python title="Get a Connection from the SessionTransaction"} + trans = session.create_transaction() + try: + connection = trans.connection(UserClass) # get the Connection used by the UserClass' Mapper + connection.execute(mytable.update(), {'col1':4, 'col2':17}) + trans.commit() + except: + trans.rollback() + raise + +The `connection()` method also exists on the `Session` object itself, and can be called regardless of whether or not a `SessionTransaction` is in progress. If a `SessionTransaction` is in progress, it will return the connection referenced by the transaction. If an `Engine` is being used with `threadlocal` strategy, the `Connection` returned will correspond to the connection resources that are bound to the current thread, if any (i.e. it is obtained by calling `contextual_connection()`). - # set it as the current thread-local session - objectstore.session_registry.set(s) +#### Using Engine-level Transactions with Sessions - {python title="Create a custom Registry Algorithm"} - # set the objectstore's session registry to a different algorithm +The transactions issued by `SessionTransaction` as well as internally by the `Session`'s `flush()` operation use the same `Transaction` object off of `Connection` that is publically available. Recall that this object supports "nestable" behavior, meaning any number of actors can call `begin()` off a particular `Connection` object, and they will all be managed within the scope of a single transaction. Therefore, the `flush()` operation can similarly take place within the scope of a regular `Transaction`: - def create_session(): - """creates new sessions""" - return objectstore.Session() - def mykey(): - """creates contextual keys to store scoped sessions""" - return "mykey" - - objectstore.session_registry = sqlalchemy.util.ScopedRegistry(createfunc=create_session, scopefunc=mykey) + {python title="Transactions with Sessions"} + connection = engine.connect() # Connection + session = create_session(bind_to=connection) # Session bound to the Connection + trans = connection.begin() # start transaction + try: + stuff = session.query(MyClass).select() # Session operation uses connection + stuff[2].foo = 'bar' + connection.execute(mytable.insert(), dict(id=12, value="bar")) # use connection explicitly + session.flush() # Session flushes with "connection", using transaction "trans" + trans.commit() # commit + except: + trans.rollback() # or rollback + raise -#### Analyzing Object Commits {@name=logging} +### Analyzing Object Flushes {@name=logging} -The objectstore module can log an extensive display of its "commit plans", which is a graph of its internal representation of objects before they are committed to the database. To turn this logging on: +The session module can log an extensive display of its "flush plans", which is a graph of its internal representation of objects before they are written to the database. To turn this logging on: {python} - # make an engine with echo_uow - engine = create_engine('myengine...', echo_uow=True) + # make an Session with echo_uow + session = create_session(echo_uow=True) -Commits will then dump to the standard output displays like the following: +The `flush()` operation will then dump to the standard output displays like the following: {code} Task dump: @@ -443,4 +468,8 @@ Commits will then dump to the standard output displays like the following: |---- The above graph can be read straight downwards to determine the order of operations. It indicates "save User 6016624, process each element in the 'addresses' list on User 6016624, save Address 6034384, Address 6034256". + +Of course, one can also get a good idea of the order of operations just by logging the actual SQL statements executed. + +The format of the above display is definitely a work in progress and amazingly, is far simpler to read than it was in earlier releases. It will hopefully be further refined in future releases to be more intuitive (while not losing any information). diff --git a/doc/build/testdocs.py b/doc/build/testdocs.py index 8394acf1e..33da3db25 100644 --- a/doc/build/testdocs.py +++ b/doc/build/testdocs.py @@ -1,6 +1,18 @@ +import sys
+sys.path = ['../../lib', './lib/'] + sys.path
+
import os
import re
import doctest
+import sqlalchemy.util as util
+
+# monkeypatch a plain logger
+class Logger(object):
+ def __init__(self, *args, **kwargs):
+ pass
+ def write(self, msg):
+ print msg
+util.Logger = Logger
def teststring(s, name, globs=None, verbose=None, report=True,
optionflags=0, extraglobs=None, raise_on_error=False,
@@ -34,16 +46,16 @@ def teststring(s, name, globs=None, verbose=None, report=True, return runner.failures, runner.tries
-def replace_file(s, oldfile, newfile):
- engine = r"(^\s*>>>\s*[a-zA-Z_]\w*\s*=\s*create_engine\('sqlite',\s*\{'filename':\s*')" + oldfile+ "('\}\)$)"
+def replace_file(s, newfile):
+ engine = r"'(sqlite|postgres|mysql):///.*'"
engine = re.compile(engine, re.MULTILINE)
- s, n = re.subn(engine, r'\1' + newfile + r'\2', s, 1)
+ s, n = re.subn(engine, "'sqlite:///" + newfile + "'", s)
if not n:
raise ValueError("Couldn't find suitable create_engine call to replace '%s' in it" % oldfile)
return s
filename = 'content/tutorial.txt'
s = open(filename).read()
-s = replace_file(s, 'tutorial.db', ':memory:')
+s = replace_file(s, ':memory:')
teststring(s, filename)
diff --git a/doc/build/txt2myt.py b/doc/build/txt2myt.py index 89730b3aa..3301a2b20 100644 --- a/doc/build/txt2myt.py +++ b/doc/build/txt2myt.py @@ -90,21 +90,31 @@ def process_code_blocks(tree): # consumed as Myghty comments.
text = re.compile(r'^(?!<&)', re.M).sub(' ', text)
- sqlre = re.compile(r'{sql}(.*?)((?:SELECT|INSERT|DELETE|UPDATE|CREATE|DROP).*?)\n\s*(\n|$)', re.S)
+ sqlre = re.compile(r'{sql}(.*?)((?:SELECT|INSERT|DELETE|UPDATE|CREATE|DROP|PRAGMA|DESCRIBE).*?)\n\s*(\n|$)', re.S)
+ if sqlre.search(text) is not None:
+ use_sliders = False
+ else:
+ use_sliders = True
+
text = sqlre.sub(r"<&formatting.myt:poplink&>\1\n<&|formatting.myt:codepopper, link='sql'&>\2</&>\n\n", text)
sqlre2 = re.compile(r'{opensql}(.*?)((?:SELECT|INSERT|DELETE|UPDATE|CREATE|DROP).*?)\n\s*(\n|$)', re.S)
text = sqlre2.sub(r"<&|formatting.myt:poppedcode &>\1\n\2</&>\n\n", text)
pre_parent = parent[pre]
+ opts = {}
if type == 'python':
- syntype = 'python'
+ opts['syntaxtype'] = 'python'
else:
- syntype = None
+ opts['syntaxtype'] = None
+
if title is not None:
- tag = MyghtyTag(CODE_BLOCK, {'title':title, 'syntaxtype':syntype})
- else:
- tag = MyghtyTag(CODE_BLOCK, {'syntaxtype':syntype})
+ opts['title'] = title
+
+ if use_sliders:
+ opts['use_sliders'] = True
+
+ tag = MyghtyTag(CODE_BLOCK, opts)
tag.text = text
tag.tail = pre.tail
pre_parent[index(pre_parent, pre)] = tag
@@ -274,6 +284,6 @@ if __name__ == '__main__': print inname, '->', outname
input = file(inname).read()
html = markdown.markdown(input)
- file(inname[:-3] + "html", 'w').write(html)
+ #file(inname[:-3] + "html", 'w').write(html)
myt = html2myghtydoc(html)
file(outname, 'w').write(myt)
diff --git a/doc/docs.css b/doc/docs.css index 6cb08c555..c86b6ded2 100644 --- a/doc/docs.css +++ b/doc/docs.css @@ -88,7 +88,7 @@ font-size: 12px; font-weight: bold; text-decoration:underline; - padding:5px; + padding:5px 5px 5px 0px; } code { @@ -197,6 +197,16 @@ pre { line-height:1.2em; } +.sliding_code { + font-family:courier, serif; + font-size:12px; + background-color: #E2E2EB; + padding:2px 2px 2px 10px; + margin: 5px 5px 5px 5px; + line-height:1.2em; + overflow:auto; +} + .codepop { font-weight:bold; font-family: verdana, sans-serif; diff --git a/doc/scripts.js b/doc/scripts.js index da87a0ac5..8558d92af 100644 --- a/doc/scripts.js +++ b/doc/scripts.js @@ -16,3 +16,10 @@ function togglePopbox(id, show, hide) { } } +function alphaApi() { + window.open("alphaapi.html", "_blank", "width=600,height=400, scrollbars=yes,resizable=yes,toolbar=no"); +} + +function alphaImplementation() { + window.open("alphaimplementation.html", "_blank", "width=600,height=400, scrollbars=yes,resizable=yes,toolbar=no"); +}
\ No newline at end of file |
