diff options
Diffstat (limited to 'doc')
106 files changed, 12438 insertions, 11184 deletions
diff --git a/doc/build/builder/__init__.py b/doc/build/builder/__init__.py deleted file mode 100644 index e69de29bb..000000000 --- a/doc/build/builder/__init__.py +++ /dev/null diff --git a/doc/build/builder/autodoc_mods.py b/doc/build/builder/autodoc_mods.py deleted file mode 100644 index 5a6e991bd..000000000 --- a/doc/build/builder/autodoc_mods.py +++ /dev/null @@ -1,102 +0,0 @@ -import re - -def autodoc_skip_member(app, what, name, obj, skip, options): - if what == 'class' and skip and \ - name in ('__init__', '__eq__', '__ne__', '__lt__', - '__le__', '__call__') and \ - obj.__doc__: - return False - else: - return skip - - -_convert_modname = { - "sqlalchemy.sql.sqltypes": "sqlalchemy.types", - "sqlalchemy.sql.type_api": "sqlalchemy.types", - "sqlalchemy.sql.schema": "sqlalchemy.schema", - "sqlalchemy.sql.elements": "sqlalchemy.sql.expression", - "sqlalchemy.sql.selectable": "sqlalchemy.sql.expression", - "sqlalchemy.sql.dml": "sqlalchemy.sql.expression", - "sqlalchemy.sql.ddl": "sqlalchemy.schema", - "sqlalchemy.sql.base": "sqlalchemy.sql.expression" -} - -_convert_modname_w_class = { - ("sqlalchemy.engine.interfaces", "Connectable"): "sqlalchemy.engine", - ("sqlalchemy.sql.base", "DialectKWArgs"): "sqlalchemy.sql.base", -} - -def _adjust_rendered_mod_name(modname, objname): - if (modname, objname) in _convert_modname_w_class: - return _convert_modname_w_class[(modname, objname)] - elif modname in _convert_modname: - return _convert_modname[modname] - else: - return modname - -# im sure this is in the app somewhere, but I don't really -# know where, so we're doing it here. -_track_autodoced = {} -_inherited_names = set() -def autodoc_process_docstring(app, what, name, obj, options, lines): - if what == "class": - _track_autodoced[name] = obj - - # need to translate module names for bases, others - # as we document lots of symbols in namespace modules - # outside of their source - bases = [] - for base in obj.__bases__: - if base is not object: - bases.append(":class:`%s.%s`" % ( - _adjust_rendered_mod_name(base.__module__, base.__name__), - base.__name__)) - - if bases: - lines[:0] = [ - "Bases: %s" % (", ".join(bases)), - "" - ] - - - elif what in ("attribute", "method") and \ - options.get("inherited-members"): - m = re.match(r'(.*?)\.([\w_]+)$', name) - if m: - clsname, attrname = m.group(1, 2) - if clsname in _track_autodoced: - cls = _track_autodoced[clsname] - for supercls in cls.__mro__: - if attrname in supercls.__dict__: - break - if supercls is not cls: - _inherited_names.add("%s.%s" % (supercls.__module__, supercls.__name__)) - _inherited_names.add("%s.%s.%s" % (supercls.__module__, supercls.__name__, attrname)) - lines[:0] = [ - ".. container:: inherited_member", - "", - " *inherited from the* :%s:`~%s.%s.%s` *%s of* :class:`~%s.%s`" % ( - "attr" if what == "attribute" - else "meth", - _adjust_rendered_mod_name(supercls.__module__, supercls.__name__), - supercls.__name__, - attrname, - what, - _adjust_rendered_mod_name(supercls.__module__, supercls.__name__), - supercls.__name__ - ), - "" - ] - -def missing_reference(app, env, node, contnode): - if node.attributes['reftarget'] in _inherited_names: - return node.children[0] - else: - return None - - -def setup(app): - app.connect('autodoc-skip-member', autodoc_skip_member) - app.connect('autodoc-process-docstring', autodoc_process_docstring) - - app.connect('missing-reference', missing_reference) diff --git a/doc/build/builder/dialect_info.py b/doc/build/builder/dialect_info.py deleted file mode 100644 index 48626393d..000000000 --- a/doc/build/builder/dialect_info.py +++ /dev/null @@ -1,175 +0,0 @@ -import re -from sphinx.util.compat import Directive -from docutils import nodes - -class DialectDirective(Directive): - has_content = True - - _dialects = {} - - def _parse_content(self): - d = {} - d['default'] = self.content[0] - d['text'] = [] - idx = 0 - for line in self.content[1:]: - idx += 1 - m = re.match(r'\:(.+?)\: +(.+)', line) - if m: - attrname, value = m.group(1, 2) - d[attrname] = value - else: - break - d["text"] = self.content[idx + 1:] - return d - - def _dbapi_node(self): - - dialect_name, dbapi_name = self.dialect_name.split("+") - - try: - dialect_directive = self._dialects[dialect_name] - except KeyError: - raise Exception("No .. dialect:: %s directive has been established" - % dialect_name) - - output = [] - - content = self._parse_content() - - parent_section_ref = self.state.parent.children[0]['ids'][0] - self._append_dbapi_bullet(dialect_name, dbapi_name, - content['name'], parent_section_ref) - - p = nodes.paragraph('', '', - nodes.Text( - "Support for the %s database via the %s driver." % ( - dialect_directive.database_name, - content['name'] - ), - "Support for the %s database via the %s driver." % ( - dialect_directive.database_name, - content['name'] - ) - ), - ) - - self.state.nested_parse(content['text'], 0, p) - output.append(p) - - if "url" in content or "driverurl" in content: - sec = nodes.section( - '', - nodes.title("DBAPI", "DBAPI"), - ids=["dialect-%s-%s-url" % (dialect_name, dbapi_name)] - ) - if "url" in content: - text = "Documentation and download information (if applicable) "\ - "for %s is available at:\n" % content["name"] - uri = content['url'] - sec.append( - nodes.paragraph('', '', - nodes.Text(text, text), - nodes.reference('', '', - nodes.Text(uri, uri), - refuri=uri, - ) - ) - ) - if "driverurl" in content: - text = "Drivers for this database are available at:\n" - sec.append( - nodes.paragraph('', '', - nodes.Text(text, text), - nodes.reference('', '', - nodes.Text(content['driverurl'], content['driverurl']), - refuri=content['driverurl'] - ) - ) - ) - output.append(sec) - - - if "connectstring" in content: - sec = nodes.section( - '', - nodes.title("Connecting", "Connecting"), - nodes.paragraph('', '', - nodes.Text("Connect String:", "Connect String:"), - nodes.literal_block(content['connectstring'], - content['connectstring']) - ), - ids=["dialect-%s-%s-connect" % (dialect_name, dbapi_name)] - ) - output.append(sec) - - return output - - def _dialect_node(self): - self._dialects[self.dialect_name] = self - - content = self._parse_content() - self.database_name = content['name'] - - self.bullets = nodes.bullet_list() - text = "The following dialect/DBAPI options are available. "\ - "Please refer to individual DBAPI sections for connect information." - sec = nodes.section('', - nodes.paragraph('', '', - nodes.Text( - "Support for the %s database." % content['name'], - "Support for the %s database." % content['name'] - ), - ), - nodes.title("DBAPI Support", "DBAPI Support"), - nodes.paragraph('', '', - nodes.Text(text, text), - self.bullets - ), - ids=["dialect-%s" % self.dialect_name] - ) - - return [sec] - - def _append_dbapi_bullet(self, dialect_name, dbapi_name, name, idname): - env = self.state.document.settings.env - dialect_directive = self._dialects[dialect_name] - try: - relative_uri = env.app.builder.get_relative_uri(dialect_directive.docname, self.docname) - except: - relative_uri = "" - list_node = nodes.list_item('', - nodes.paragraph('', '', - nodes.reference('', '', - nodes.Text(name, name), - refdocname=self.docname, - refuri= relative_uri + "#" + idname - ), - #nodes.Text(" ", " "), - #nodes.reference('', '', - # nodes.Text("(connectstring)", "(connectstring)"), - # refdocname=self.docname, - # refuri=env.app.builder.get_relative_uri( - # dialect_directive.docname, self.docname) + - ## "#" + ("dialect-%s-%s-connect" % - # (dialect_name, dbapi_name)) - # ) - ) - ) - dialect_directive.bullets.append(list_node) - - def run(self): - env = self.state.document.settings.env - self.docname = env.docname - - self.dialect_name = dialect_name = self.content[0] - - has_dbapi = "+" in dialect_name - if has_dbapi: - return self._dbapi_node() - else: - return self._dialect_node() - -def setup(app): - app.add_directive('dialect', DialectDirective) - diff --git a/doc/build/builder/mako.py b/doc/build/builder/mako.py deleted file mode 100644 index 0367bf018..000000000 --- a/doc/build/builder/mako.py +++ /dev/null @@ -1,65 +0,0 @@ -from __future__ import absolute_import - -from sphinx.application import TemplateBridge -from sphinx.jinja2glue import BuiltinTemplateLoader -from mako.lookup import TemplateLookup -import os - -rtd = os.environ.get('READTHEDOCS', None) == 'True' - -class MakoBridge(TemplateBridge): - def init(self, builder, *args, **kw): - self.jinja2_fallback = BuiltinTemplateLoader() - self.jinja2_fallback.init(builder, *args, **kw) - - builder.config.html_context['release_date'] = builder.config['release_date'] - builder.config.html_context['site_base'] = builder.config['site_base'] - - self.lookup = TemplateLookup(directories=builder.config.templates_path, - #format_exceptions=True, - imports=[ - "from builder import util" - ] - ) - - if rtd: - # RTD layout, imported from sqlalchemy.org - import urllib2 - template = urllib2.urlopen(builder.config['site_base'] + "/docs_adapter.mako").read() - self.lookup.put_string("docs_adapter.mako", template) - - setup_ctx = urllib2.urlopen(builder.config['site_base'] + "/docs_adapter.py").read() - lcls = {} - exec(setup_ctx, lcls) - self.setup_ctx = lcls['setup_context'] - - def setup_ctx(self, context): - pass - - def render(self, template, context): - template = template.replace(".html", ".mako") - context['prevtopic'] = context.pop('prev', None) - context['nexttopic'] = context.pop('next', None) - - # local docs layout - context['rtd'] = False - context['toolbar'] = False - context['base'] = "static_base.mako" - - # override context attributes - self.setup_ctx(context) - - context.setdefault('_', lambda x: x) - return self.lookup.get_template(template).render_unicode(**context) - - def render_string(self, template, context): - # this is used for .js, .css etc. and we don't have - # local copies of that stuff here so use the jinja render. - return self.jinja2_fallback.render_string(template, context) - -def setup(app): - app.config['template_bridge'] = "builder.mako.MakoBridge" - app.add_config_value('release_date', "", 'env') - app.add_config_value('site_base', "", 'env') - app.add_config_value('build_number', "", 'env') - diff --git a/doc/build/builder/sqlformatter.py b/doc/build/builder/sqlformatter.py deleted file mode 100644 index 2d8074900..000000000 --- a/doc/build/builder/sqlformatter.py +++ /dev/null @@ -1,132 +0,0 @@ -from pygments.lexer import RegexLexer, bygroups, using -from pygments.token import Token -from pygments.filter import Filter -from pygments.filter import apply_filters -from pygments.lexers import PythonLexer, PythonConsoleLexer -from sphinx.highlighting import PygmentsBridge -from pygments.formatters import HtmlFormatter, LatexFormatter - -import re - - -def _strip_trailing_whitespace(iter_): - buf = list(iter_) - if buf: - buf[-1] = (buf[-1][0], buf[-1][1].rstrip()) - for t, v in buf: - yield t, v - - -class StripDocTestFilter(Filter): - def filter(self, lexer, stream): - for ttype, value in stream: - if ttype is Token.Comment and re.match(r'#\s*doctest:', value): - continue - yield ttype, value - -class PyConWithSQLLexer(RegexLexer): - name = 'PyCon+SQL' - aliases = ['pycon+sql'] - - flags = re.IGNORECASE | re.DOTALL - - tokens = { - 'root': [ - (r'{sql}', Token.Sql.Link, 'sqlpopup'), - (r'{opensql}', Token.Sql.Open, 'opensqlpopup'), - (r'.*?\n', using(PythonConsoleLexer)) - ], - 'sqlpopup': [ - ( - r'(.*?\n)((?:PRAGMA|BEGIN|SELECT|INSERT|DELETE|ROLLBACK|' - 'COMMIT|ALTER|UPDATE|CREATE|DROP|PRAGMA' - '|DESCRIBE).*?(?:{stop}\n?|$))', - bygroups(using(PythonConsoleLexer), Token.Sql.Popup), - "#pop" - ) - ], - 'opensqlpopup': [ - ( - r'.*?(?:{stop}\n*|$)', - Token.Sql, - "#pop" - ) - ] - } - - -class PythonWithSQLLexer(RegexLexer): - name = 'Python+SQL' - aliases = ['pycon+sql'] - - flags = re.IGNORECASE | re.DOTALL - - tokens = { - 'root': [ - (r'{sql}', Token.Sql.Link, 'sqlpopup'), - (r'{opensql}', Token.Sql.Open, 'opensqlpopup'), - (r'.*?\n', using(PythonLexer)) - ], - 'sqlpopup': [ - ( - r'(.*?\n)((?:PRAGMA|BEGIN|SELECT|INSERT|DELETE|ROLLBACK' - '|COMMIT|ALTER|UPDATE|CREATE|DROP' - '|PRAGMA|DESCRIBE).*?(?:{stop}\n?|$))', - bygroups(using(PythonLexer), Token.Sql.Popup), - "#pop" - ) - ], - 'opensqlpopup': [ - ( - r'.*?(?:{stop}\n*|$)', - Token.Sql, - "#pop" - ) - ] - } - -class PopupSQLFormatter(HtmlFormatter): - def _format_lines(self, tokensource): - buf = [] - for ttype, value in apply_filters(tokensource, [StripDocTestFilter()]): - if ttype in Token.Sql: - for t, v in HtmlFormatter._format_lines(self, iter(buf)): - yield t, v - buf = [] - - if ttype is Token.Sql: - yield 1, "<div class='show_sql'>%s</div>" % \ - re.sub(r'(?:[{stop}|\n]*)$', '', value) - elif ttype is Token.Sql.Link: - yield 1, "<a href='#' class='sql_link'>sql</a>" - elif ttype is Token.Sql.Popup: - yield 1, "<div class='popup_sql'>%s</div>" % \ - re.sub(r'(?:[{stop}|\n]*)$', '', value) - else: - buf.append((ttype, value)) - - for t, v in _strip_trailing_whitespace( - HtmlFormatter._format_lines(self, iter(buf))): - yield t, v - -class PopupLatexFormatter(LatexFormatter): - def _filter_tokens(self, tokensource): - for ttype, value in apply_filters(tokensource, [StripDocTestFilter()]): - if ttype in Token.Sql: - if ttype is not Token.Sql.Link and ttype is not Token.Sql.Open: - yield Token.Literal, re.sub(r'{stop}', '', value) - else: - continue - else: - yield ttype, value - - def format(self, tokensource, outfile): - LatexFormatter.format(self, self._filter_tokens(tokensource), outfile) - -def setup(app): - app.add_lexer('pycon+sql', PyConWithSQLLexer()) - app.add_lexer('python+sql', PythonWithSQLLexer()) - - PygmentsBridge.html_formatter = PopupSQLFormatter - PygmentsBridge.latex_formatter = PopupLatexFormatter - diff --git a/doc/build/builder/util.py b/doc/build/builder/util.py deleted file mode 100644 index a9dcff001..000000000 --- a/doc/build/builder/util.py +++ /dev/null @@ -1,12 +0,0 @@ -import re - -def striptags(text): - return re.compile(r'<[^>]*>').sub('', text) - -def go(m): - # .html with no anchor if present, otherwise "#" for top of page - return m.group(1) or '#' - -def strip_toplevel_anchors(text): - return re.compile(r'(\.html)?#[-\w]+-toplevel').sub(go, text) - diff --git a/doc/build/builder/viewsource.py b/doc/build/builder/viewsource.py deleted file mode 100644 index 088cef2c2..000000000 --- a/doc/build/builder/viewsource.py +++ /dev/null @@ -1,209 +0,0 @@ -from docutils import nodes -from sphinx.ext.viewcode import collect_pages -from sphinx.pycode import ModuleAnalyzer -import imp -from sphinx import addnodes -import re -from sphinx.util.compat import Directive -import os -from docutils.statemachine import StringList -from sphinx.environment import NoUri - -import sys - -py2k = sys.version_info < (3, 0) -if py2k: - text_type = unicode -else: - text_type = str - -def view_source(name, rawtext, text, lineno, inliner, - options={}, content=[]): - - env = inliner.document.settings.env - - node = _view_source_node(env, text, None) - return [node], [] - -def _view_source_node(env, text, state): - # pretend we're using viewcode fully, - # install the context it looks for - if not hasattr(env, '_viewcode_modules'): - env._viewcode_modules = {} - - modname = text - text = modname.split(".")[-1] + ".py" - - # imitate sphinx .<modname> syntax - if modname.startswith("."): - # see if the modname needs to be corrected in terms - # of current module context - base_module = env.temp_data.get('autodoc:module') - if base_module is None: - base_module = env.temp_data.get('py:module') - - if base_module: - modname = base_module + modname - - urito = env.app.builder.get_relative_uri - - # we're showing code examples which may have dependencies - # which we really don't want to have required so load the - # module by file, not import (though we are importing) - # the top level module here... - pathname = None - for token in modname.split("."): - file_, pathname, desc = imp.find_module(token, [pathname] if pathname else None) - if file_: - file_.close() - - # unlike viewcode which silently traps exceptions, - # I want this to totally barf if the file can't be loaded. - # a failed build better than a complete build missing - # key content - analyzer = ModuleAnalyzer.for_file(pathname, modname) - # copied from viewcode - analyzer.find_tags() - if not isinstance(analyzer.code, text_type): - code = analyzer.code.decode(analyzer.encoding) - else: - code = analyzer.code - - if state is not None: - docstring = _find_mod_docstring(analyzer) - if docstring: - # get rid of "foo.py" at the top - docstring = re.sub(r"^[a-zA-Z_0-9]+\.py", "", docstring) - - # strip - docstring = docstring.strip() - - # yank only first paragraph - docstring = docstring.split("\n\n")[0].strip() - else: - docstring = None - - entry = code, analyzer.tags, {} - env._viewcode_modules[modname] = entry - pagename = '_modules/' + modname.replace('.', '/') - - try: - refuri = urito(env.docname, pagename) - except NoUri: - # if we're in the latex builder etc., this seems - # to be what we get - refuri = None - - - if docstring: - # embed the ref with the doc text so that it isn't - # a separate paragraph - if refuri: - docstring = "`%s <%s>`_ - %s" % (text, refuri, docstring) - else: - docstring = "``%s`` - %s" % (text, docstring) - para = nodes.paragraph('', '') - state.nested_parse(StringList([docstring]), 0, para) - return_node = para - else: - if refuri: - refnode = nodes.reference('', '', - nodes.Text(text, text), - refuri=urito(env.docname, pagename) - ) - else: - refnode = nodes.Text(text, text) - - if state: - return_node = nodes.paragraph('', '', refnode) - else: - return_node = refnode - - return return_node - -from sphinx.pycode.pgen2 import token - -def _find_mod_docstring(analyzer): - """attempt to locate the module-level docstring. - - Note that sphinx autodoc just uses ``__doc__``. But we don't want - to import the module, so we need to parse for it. - - """ - analyzer.tokenize() - for type_, parsed_line, start_pos, end_pos, raw_line in analyzer.tokens: - if type_ == token.COMMENT: - continue - elif type_ == token.STRING: - return eval(parsed_line) - else: - return None - -def _parse_content(content): - d = {} - d['text'] = [] - idx = 0 - for line in content: - idx += 1 - m = re.match(r' *\:(.+?)\:(?: +(.+))?', line) - if m: - attrname, value = m.group(1, 2) - d[attrname] = value or '' - else: - break - d["text"] = content[idx:] - return d - -def _comma_list(text): - return re.split(r"\s*,\s*", text.strip()) - -class AutoSourceDirective(Directive): - has_content = True - - def run(self): - content = _parse_content(self.content) - - - env = self.state.document.settings.env - self.docname = env.docname - - sourcefile = self.state.document.current_source.split(os.pathsep)[0] - dir_ = os.path.dirname(sourcefile) - files = [ - f for f in os.listdir(dir_) if f.endswith(".py") - and f != "__init__.py" - ] - - if "files" in content: - # ordered listing of files to include - files = [fname for fname in _comma_list(content["files"]) - if fname in set(files)] - - node = nodes.paragraph('', '', - nodes.Text("Listing of files:", "Listing of files:") - ) - - bullets = nodes.bullet_list() - for fname in files: - modname, ext = os.path.splitext(fname) - # relative lookup - modname = "." + modname - - link = _view_source_node(env, modname, self.state) - - list_node = nodes.list_item('', - link - ) - bullets += list_node - - node += bullets - - return [node] - -def setup(app): - app.add_role('viewsource', view_source) - - app.add_directive('autosource', AutoSourceDirective) - - # from sphinx.ext.viewcode - app.connect('html-collect-pages', collect_pages) diff --git a/doc/build/changelog/changelog_07.rst b/doc/build/changelog/changelog_07.rst index 5504a0ad6..e782ba938 100644 --- a/doc/build/changelog/changelog_07.rst +++ b/doc/build/changelog/changelog_07.rst @@ -3517,7 +3517,7 @@ :tags: orm :tickets: 2122 - Some fixes to "evaulate" and "fetch" evaluation + Some fixes to "evaluate" and "fetch" evaluation when query.update(), query.delete() are called. The retrieval of records is done after autoflush in all cases, and before update/delete is diff --git a/doc/build/changelog/changelog_08.rst b/doc/build/changelog/changelog_08.rst index 6515f731d..baaa7b15b 100644 --- a/doc/build/changelog/changelog_08.rst +++ b/doc/build/changelog/changelog_08.rst @@ -2214,7 +2214,7 @@ expr1 = mycolumn > 2 bool(expr1 == expr1) - Would evaulate as ``False``, even though this is an identity + Would evaluate as ``False``, even though this is an identity comparison, because ``mycolumn > 2`` would be "grouped" before being placed into the :class:`.BinaryExpression`, thus changing its identity. :class:`.BinaryExpression` now keeps track diff --git a/doc/build/changelog/changelog_09.rst b/doc/build/changelog/changelog_09.rst index f10d48273..b1ec9cbec 100644 --- a/doc/build/changelog/changelog_09.rst +++ b/doc/build/changelog/changelog_09.rst @@ -1,3 +1,4 @@ + ============== 0.9 Changelog ============== @@ -14,6 +15,102 @@ :version: 0.9.9 .. change:: + :tags: feature, engine + :versions: 1.0.0 + + Added new user-space accessors for viewing transaction isolation + levels; :meth:`.Connection.get_isolation_level`, + :attr:`.Connection.default_isolation_level`. + + .. change:: + :tags: bug, postgresql + :versions: 1.0.0 + :tickets: 3174 + + Fixed bug where Postgresql dialect would fail to render an + expression in an :class:`.Index` that did not correspond directly + to a table-bound column; typically when a :func:`.text` construct + was one of the expressions within the index; or could misinterpret the + list of expressions if one or more of them were such an expression. + + .. change:: + :tags: bug, orm + :versions: 1.0.0 + :tickets: 3287 + + The "wildcard" loader options, in particular the one set up by + the :func:`.orm.load_only` option to cover all attributes not + explicitly mentioned, now takes into account the superclasses + of a given entity, if that entity is mapped with inheritance mapping, + so that attribute names within the superclasses are also omitted + from the load. Additionally, the polymorphic discriminator column + is unconditionally included in the list, just in the same way that + primary key columns are, so that even with load_only() set up, + polymorphic loading of subtypes continues to function correctly. + + .. change:: + :tags: bug, sql + :versions: 1.0.0 + :pullreq: bitbucket:41 + + Added the ``native_enum`` flag to the ``__repr__()`` output + of :class:`.Enum`, which is mostly important when using it with + Alembic autogenerate. Pull request courtesy Dimitris Theodorou. + + .. change:: + :tags: bug, orm, pypy + :versions: 1.0.0 + :tickets: 3285 + + Fixed bug where if an exception were thrown at the start of a + :class:`.Query` before it fetched results, particularly when + row processors can't be formed, the cursor would stay open with + results pending and not actually be closed. This is typically only + an issue on an interpreter like Pypy where the cursor isn't + immediately GC'ed, and can in some circumstances lead to transactions/ + locks being open longer than is desirable. + + .. change:: + :tags: change, mysql + :versions: 1.0.0 + :tickets: 3275 + + The ``gaerdbms`` dialect is no longer necessary, and emits a + deprecation warning. Google now recommends using the MySQLdb + dialect directly. + + .. change:: + :tags: bug, sql + :versions: 1.0.0 + :tickets: 3278 + + Fixed bug where using a :class:`.TypeDecorator` that implemented + a type that was also a :class:`.TypeDecorator` would fail with + Python's "Cannot create a consistent method resolution order (MRO)" + error, when any kind of SQL comparison expression were used against + an object using this type. + + .. change:: + :tags: bug, mysql + :versions: 1.0.0 + :tickets: 3274 + + Added a version check to the MySQLdb dialect surrounding the + check for 'utf8_bin' collation, as this fails on MySQL server < 5.0. + + .. change:: + :tags: enhancement, orm + :versions: 1.0.0 + + Added new method :meth:`.Session.invalidate`, functions similarly + to :meth:`.Session.close`, except also calls + :meth:`.Connection.invalidate` + on all connections, guaranteeing that they will not be returned to + the connection pool. This is useful in situations e.g. dealing + with gevent timeouts when it is not safe to use the connection further, + even for rollbacks. + + .. change:: :tags: bug, examples :versions: 1.0.0 @@ -274,7 +371,7 @@ :versions: 1.0.0 :pullrequest: bitbucket:28 - Fixed bug where :ref:`ext.mutable.MutableDict` + Fixed bug where :class:`.ext.mutable.MutableDict` failed to implement the ``update()`` dictionary method, thus not catching changes. Pull request courtesy Matt Chisholm. @@ -283,9 +380,9 @@ :versions: 1.0.0 :pullrequest: bitbucket:27 - Fixed bug where a custom subclass of :ref:`ext.mutable.MutableDict` + Fixed bug where a custom subclass of :class:`.ext.mutable.MutableDict` would not show up in a "coerce" operation, and would instead - return a plain :ref:`ext.mutable.MutableDict`. Pull request + return a plain :class:`.ext.mutable.MutableDict`. Pull request courtesy Matt Chisholm. .. change:: @@ -517,7 +614,7 @@ :tags: bug, orm :tickets: 3117 - The "evaulator" for query.update()/delete() won't work with multi-table + The "evaluator" for query.update()/delete() won't work with multi-table updates, and needs to be set to `synchronize_session=False` or `synchronize_session='fetch'`; a warning is now emitted. In 1.0 this will be promoted to a full exception. @@ -537,7 +634,7 @@ :tickets: 3078 Added kw argument ``postgresql_regconfig`` to the - :meth:`.Operators.match` operator, allows the "reg config" argument + :meth:`.ColumnOperators.match` operator, allows the "reg config" argument to be specified to the ``to_tsquery()`` function emitted. Pull request courtesy Jonathan Vanasco. @@ -826,7 +923,7 @@ translated through some kind of SQL function or expression. This is kind of experimental, but the first proof of concept is a "materialized path" join condition where a path string is compared - to itself using "like". The :meth:`.Operators.like` operator has + to itself using "like". The :meth:`.ColumnOperators.like` operator has also been added to the list of valid operators to use in a primaryjoin condition. @@ -1899,8 +1996,8 @@ Fixed an issue where the C extensions in Py3K are using the wrong API to specify the top-level module function, which breaks in Python 3.4b2. Py3.4b2 changes PyMODINIT_FUNC to return - "void" instead of "PyObject *", so we now make sure to use - "PyMODINIT_FUNC" instead of "PyObject *" directly. Pull request + "void" instead of ``PyObject *``, so we now make sure to use + "PyMODINIT_FUNC" instead of ``PyObject *`` directly. Pull request courtesy cgohlke. .. change:: @@ -2884,7 +2981,7 @@ in an ``ORDER BY`` clause, if that label is also referred to in the columns clause of the select, instead of rewriting the full expression. This gives the database a better chance to - optimize the evaulation of the same expression in two different + optimize the evaluation of the same expression in two different contexts. .. seealso:: diff --git a/doc/build/changelog/changelog_10.rst b/doc/build/changelog/changelog_10.rst index f2bd43a76..2c3e26f2e 100644 --- a/doc/build/changelog/changelog_10.rst +++ b/doc/build/changelog/changelog_10.rst @@ -1,3 +1,4 @@ + ============== 1.0 Changelog ============== @@ -22,6 +23,309 @@ on compatibility concerns, see :doc:`/changelog/migration_10`. .. change:: + :tags: feature, postgresql, pypy + :tickets: 3052 + :pullreq: bitbucket:34 + + Added support for the psycopg2cffi DBAPI on pypy. Pull request + courtesy shauns. + + .. seealso:: + + :mod:`sqlalchemy.dialects.postgresql.psycopg2cffi` + + .. change:: + :tags: feature, orm + :tickets: 3262 + :pullreq: bitbucket:38 + + A warning is emitted when the same polymorphic identity is assigned + to two different mappers in the same hierarchy. This is typically a + user error and means that the two different mapping types cannot be + correctly distinguished at load time. Pull request courtesy + Sebastian Bank. + + .. change:: + :tags: feature, sql + :pullreq: github:150 + + The type of expression is reported when an object passed to a + SQL expression unit can't be interpreted as a SQL fragment; + pull request courtesy Ryan P. Kelly. + + .. change:: + :tags: bug, orm + :tickets: 3227, 3242, 1326 + + The primary :class:`.Mapper` of a :class:`.Query` is now passed to the + :meth:`.Session.get_bind` method when calling upon + :meth:`.Query.count`, :meth:`.Query.update`, :meth:`.Query.delete`, + as well as queries against mapped columns, + :obj:`.column_property` objects, and SQL functions and expressions + derived from mapped columns. This allows sessions that rely upon + either customized :meth:`.Session.get_bind` schemes or "bound" metadata + to work in all relevant cases. + + .. seealso:: + + :ref:`bug_3227` + + .. change:: + :tags: enhancement, sql + :tickets: 3074 + + Custom dialects that implement :class:`.GenericTypeCompiler` can + now be constructed such that the visit methods receive an indication + of the owning expression object, if any. Any visit method that + accepts keyword arguments (e.g. ``**kw``) will in most cases + receive a keyword argument ``type_expression``, referring to the + expression object that the type is contained within. For columns + in DDL, the dialect's compiler class may need to alter its + ``get_column_specification()`` method to support this as well. + The ``UserDefinedType.get_col_spec()`` method will also receive + ``type_expression`` if it provides ``**kw`` in its argument + signature. + + .. change:: + :tags: bug, sql + :tickets: 3288 + + The multi-values version of :meth:`.Insert.values` has been + repaired to work more usefully with tables that have Python- + side default values and/or functions, as well as server-side + defaults. The feature will now work with a dialect that uses + "positional" parameters; a Python callable will also be + invoked individually for each row just as is the case with an + "executemany" style invocation; a server- side default column + will no longer implicitly receive the value explicitly + specified for the first row, instead refusing to invoke + without an explicit value. + + .. seealso:: + + :ref:`bug_3288` + + .. change:: + :tags: feature, general + + Structural memory use has been improved via much more significant use + of ``__slots__`` for many internal objects. This optimization is + particularly geared towards the base memory size of large applications + that have lots of tables and columns, and greatly reduces memory + size for a variety of high-volume objects including event listening + internals, comparator objects and parts of the ORM attribute and + loader strategy system. + + .. seealso:: + + :ref:`feature_slots` + + .. change:: + :tags: bug, mysql + :tickets: 3283 + + The :class:`.mysql.SET` type has been overhauled to no longer + assume that the empty string, or a set with a single empty string + value, is in fact a set with a single empty string; instead, this + is by default treated as the empty set. In order to handle persistence + of a :class:`.mysql.SET` that actually wants to include the blank + value ``''`` as a legitimate value, a new bitwise operational mode + is added which is enabled by the + :paramref:`.mysql.SET.retrieve_as_bitwise` flag, which will persist + and retrieve values unambiguously using their bitflag positioning. + Storage and retrieval of unicode values for driver configurations + that aren't converting unicode natively is also repaired. + + .. seealso:: + + :ref:`change_3283` + + + .. change:: + :tags: feature, schema + :tickets: 3282 + + The DDL generation system of :meth:`.MetaData.create_all` + and :meth:`.MetaData.drop_all` has been enhanced to in most + cases automatically handle the case of mutually dependent + foreign key constraints; the need for the + :paramref:`.ForeignKeyConstraint.use_alter` flag is greatly + reduced. The system also works for constraints which aren't given + a name up front; only in the case of DROP is a name required for + at least one of the constraints involved in the cycle. + + .. seealso:: + + :ref:`feature_3282` + + .. change:: + :tags: feature, schema + + Added a new accessor :attr:`.Table.foreign_key_constraints` + to complement the :attr:`.Table.foreign_keys` collection, + as well as :attr:`.ForeignKeyConstraint.referred_table`. + + .. change:: + :tags: bug, sqlite + :tickets: 3244, 3261 + + UNIQUE and FOREIGN KEY constraints are now fully reflected on + SQLite both with and without names. Previously, foreign key + names were ignored and unnamed unique constraints were skipped. + Thanks to Jon Nelson for assistance with this. + + .. change:: + :tags: feature, examples + + A new suite of examples dedicated to providing a detailed study + into performance of SQLAlchemy ORM and Core, as well as the DBAPI, + from multiple perspectives. The suite runs within a container + that provides built in profiling displays both through console + output as well as graphically via the RunSnake tool. + + .. seealso:: + + :ref:`examples_performance` + + .. change:: + :tags: feature, orm + :tickets: 3100 + + A new series of :class:`.Session` methods which provide hooks + directly into the unit of work's facility for emitting INSERT + and UPDATE statements has been created. When used correctly, + this expert-oriented system can allow ORM-mappings to be used + to generate bulk insert and update statements batched into + executemany groups, allowing the statements to proceed at + speeds that rival direct use of the Core. + + .. seealso:: + + :ref:`bulk_operations` + + .. change:: + :tags: feature, mssql + :tickets: 3039 + + SQL Server 2012 now recommends VARCHAR(max), NVARCHAR(max), + VARBINARY(max) for large text/binary types. The MSSQL dialect will + now respect this based on version detection, as well as the new + ``deprecate_large_types`` flag. + + .. seealso:: + + :ref:`mssql_large_type_deprecation` + + .. change:: + :tags: bug, sqlite + :tickets: 3257 + + The SQLite dialect, when using the :class:`.sqlite.DATE`, + :class:`.sqlite.TIME`, + or :class:`.sqlite.DATETIME` types, and given a ``storage_format`` that + only renders numbers, will render the types in DDL as + ``DATE_CHAR``, ``TIME_CHAR``, and ``DATETIME_CHAR``, so that despite the + lack of alpha characters in the values, the column will still + deliver the "text affinity". Normally this is not needed, as the + textual values within the default storage formats already + imply text. + + .. seealso:: + + :ref:`sqlite_datetime` + + .. change:: + :tags: bug, engine + :tickets: 3266 + + The engine-level error handling and wrapping routines will now + take effect in all engine connection use cases, including + when user-custom connect routines are used via the + :paramref:`.create_engine.creator` parameter, as well as when + the :class:`.Connection` encounters a connection error on + revalidation. + + .. seealso:: + + :ref:`change_3266` + + .. change:: + :tags: feature, oracle + + New Oracle DDL features for tables, indexes: COMPRESS, BITMAP. + Patch courtesy Gabor Gombas. + + .. change:: + :tags: bug, oracle + + An alias name will be properly quoted when referred to using the + ``%(name)s`` token inside the :meth:`.Select.with_hint` method. + Previously, the Oracle backend hadn't implemented this quoting. + + .. change:: + :tags: feature, oracle + :tickets: 3220 + + Added support for CTEs under Oracle. This includes some tweaks + to the aliasing syntax, as well as a new CTE feature + :meth:`.CTE.suffix_with`, which is useful for adding in special + Oracle-specific directives to the CTE. + + .. seealso:: + + :ref:`change_3220` + + .. change:: + :tags: feature, mysql + :tickets: 3121 + + Updated the "supports_unicode_statements" flag to True for MySQLdb + and Pymysql under Python 2. This refers to the SQL statements + themselves, not the parameters, and affects issues such as table + and column names using non-ASCII characters. These drivers both + appear to support Python 2 Unicode objects without issue in modern + versions. + + .. change:: + :tags: bug, mysql + :tickets: 3263 + + The :meth:`.ColumnOperators.match` operator is now handled such that the + return type is not strictly assumed to be boolean; it now + returns a :class:`.Boolean` subclass called :class:`.MatchType`. + The type will still produce boolean behavior when used in Python + expressions, however the dialect can override its behavior at + result time. In the case of MySQL, while the MATCH operator + is typically used in a boolean context within an expression, + if one actually queries for the value of a match expression, a + floating point value is returned; this value is not compatible + with SQLAlchemy's C-based boolean processor, so MySQL's result-set + behavior now follows that of the :class:`.Float` type. + A new operator object ``notmatch_op`` is also added to better allow + dialects to define the negation of a match operation. + + .. seealso:: + + :ref:`change_3263` + + .. change:: + :tags: bug, postgresql + :tickets: 3264 + + The :meth:`.PGDialect.has_table` method will now query against + ``pg_catalog.pg_table_is_visible(c.oid)``, rather than testing + for an exact schema match, when the schema name is None; this + so that the method will also illustrate that temporary tables + are present. Note that this is a behavioral change, as Postgresql + allows a non-temporary table to silently overwrite an existing + temporary table of the same name, so this changes the behavior + of ``checkfirst`` in that unusual scenario. + + .. seealso:: + + :ref:`change_3264` + + .. change:: :tags: bug, sql :tickets: 3260 @@ -700,7 +1004,7 @@ .. change:: :tags: bug, orm, py3k - The :class:`.IdentityMap` exposed from :class:`.Session.identity` + The :class:`.IdentityMap` exposed from :attr:`.Session.identity_map` now returns lists for ``items()`` and ``values()`` in Py3K. Early porting to Py3K here had these returning iterators, when they technically should be "iterable views"..for now, lists are OK. @@ -750,7 +1054,7 @@ :tags: orm, feature :tickets: 2971 - The :meth:`.InspectionAttr.info` collection is now moved down to + The :attr:`.InspectionAttr.info` collection is now moved down to :class:`.InspectionAttr`, where in addition to being available on all :class:`.MapperProperty` objects, it is also now available on hybrid properties, association proxies, when accessed via @@ -798,7 +1102,7 @@ :tags: bug, orm :tickets: 3117 - The "evaulator" for query.update()/delete() won't work with multi-table + The "evaluator" for query.update()/delete() won't work with multi-table updates, and needs to be set to `synchronize_session=False` or `synchronize_session='fetch'`; this now raises an exception, with a message to change the synchronize setting. diff --git a/doc/build/changelog/index.rst b/doc/build/changelog/index.rst index 0f5d090a3..8c5be99b8 100644 --- a/doc/build/changelog/index.rst +++ b/doc/build/changelog/index.rst @@ -10,15 +10,15 @@ Current Migration Guide ------------------------ .. toctree:: - :maxdepth: 1 + :titlesonly: - migration_10 + migration_10 Change logs ----------- .. toctree:: - :maxdepth: 2 + :titlesonly: changelog_10 changelog_09 @@ -36,7 +36,7 @@ Older Migration Guides ---------------------- .. toctree:: - :maxdepth: 1 + :titlesonly: migration_09 migration_08 diff --git a/doc/build/changelog/migration_10.rst b/doc/build/changelog/migration_10.rst index c4157266b..23ee6f466 100644 --- a/doc/build/changelog/migration_10.rst +++ b/doc/build/changelog/migration_10.rst @@ -8,7 +8,7 @@ What's New in SQLAlchemy 1.0? undergoing maintenance releases as of May, 2014, and SQLAlchemy version 1.0, as of yet unreleased. - Document last updated: October 23, 2014 + Document last updated: January 4, 2015 Introduction ============ @@ -17,13 +17,44 @@ This guide introduces what's new in SQLAlchemy version 1.0, and also documents changes which affect users migrating their applications from the 0.9 series of SQLAlchemy to 1.0. -Please carefully review -:ref:`behavioral_changes_orm_10` and :ref:`behavioral_changes_core_10` for -potentially backwards-incompatible changes. +Please carefully review the sections on behavioral changes for +potentially backwards-incompatible changes in behavior. -New Features -============ +New Features and Improvements - ORM +=================================== + +New Session Bulk INSERT/UPDATE API +---------------------------------- + +A new series of :class:`.Session` methods which provide hooks directly +into the unit of work's facility for emitting INSERT and UPDATE +statements has been created. When used correctly, this expert-oriented system +can allow ORM-mappings to be used to generate bulk insert and update +statements batched into executemany groups, allowing the statements +to proceed at speeds that rival direct use of the Core. + +.. seealso:: + + :ref:`bulk_operations` - introduction and full documentation + +:ticket:`3100` + +New Performance Example Suite +------------------------------ + +Inspired by the benchmarking done for the :ref:`bulk_operations` feature +as well as for the :ref:`faq_how_to_profile` section of the FAQ, a new +example section has been added which features several scripts designed +to illustrate the relative performance profile of various Core and ORM +techniques. The scripts are organized into use cases, and are packaged +under a single console interface such that any combination of demonstrations +can be run, dumping out timings, Python profile results and/or RunSnake profile +displays. + +.. seealso:: + + :ref:`examples_performance` .. _feature_3150: @@ -160,218 +191,6 @@ the polymorphic union of the base. :ticket:`3150` :ticket:`2670` :ticket:`3149` :ticket:`2952` :ticket:`3050` -.. _feature_3034: - -Select/Query LIMIT / OFFSET may be specified as an arbitrary SQL expression ----------------------------------------------------------------------------- - -The :meth:`.Select.limit` and :meth:`.Select.offset` methods now accept -any SQL expression, in addition to integer values, as arguments. The ORM -:class:`.Query` object also passes through any expression to the underlying -:class:`.Select` object. Typically -this is used to allow a bound parameter to be passed, which can be substituted -with a value later:: - - sel = select([table]).limit(bindparam('mylimit')).offset(bindparam('myoffset')) - -Dialects which don't support non-integer LIMIT or OFFSET expressions may continue -to not support this behavior; third party dialects may also need modification -in order to take advantage of the new behavior. A dialect which currently -uses the ``._limit`` or ``._offset`` attributes will continue to function -for those cases where the limit/offset was specified as a simple integer value. -However, when a SQL expression is specified, these two attributes will -instead raise a :class:`.CompileError` on access. A third-party dialect which -wishes to support the new feature should now call upon the ``._limit_clause`` -and ``._offset_clause`` attributes to receive the full SQL expression, rather -than the integer value. - -.. _change_2051: - -.. _feature_insert_from_select_defaults: - -INSERT FROM SELECT now includes Python and SQL-expression defaults -------------------------------------------------------------------- - -:meth:`.Insert.from_select` now includes Python and SQL-expression defaults if -otherwise unspecified; the limitation where non-server column defaults -aren't included in an INSERT FROM SELECT is now lifted and these -expressions are rendered as constants into the SELECT statement:: - - from sqlalchemy import Table, Column, MetaData, Integer, select, func - - m = MetaData() - - t = Table( - 't', m, - Column('x', Integer), - Column('y', Integer, default=func.somefunction())) - - stmt = select([t.c.x]) - print t.insert().from_select(['x'], stmt) - -Will render:: - - INSERT INTO t (x, y) SELECT t.x, somefunction() AS somefunction_1 - FROM t - -The feature can be disabled using -:paramref:`.Insert.from_select.include_defaults`. - -New Postgresql Table options ------------------------------ - -Added support for PG table options TABLESPACE, ON COMMIT, -WITH(OUT) OIDS, and INHERITS, when rendering DDL via -the :class:`.Table` construct. - -.. seealso:: - - :ref:`postgresql_table_options` - -:ticket:`2051` - -.. _feature_get_enums: - -New get_enums() method with Postgresql Dialect ----------------------------------------------- - -The :func:`.inspect` method returns a :class:`.PGInspector` object in the -case of Postgresql, which includes a new :meth:`.PGInspector.get_enums` -method that returns information on all available ``ENUM`` types:: - - from sqlalchemy import inspect, create_engine - - engine = create_engine("postgresql+psycopg2://host/dbname") - insp = inspect(engine) - print(insp.get_enums()) - -.. seealso:: - - :meth:`.PGInspector.get_enums` - -.. _feature_2891: - -Postgresql Dialect reflects Materialized Views, Foreign Tables --------------------------------------------------------------- - -Changes are as follows: - -* the :class:`Table` construct with ``autoload=True`` will now match a name - that exists in the database as a materialized view or foriegn table. - -* :meth:`.Inspector.get_view_names` will return plain and materialized view - names. - -* :meth:`.Inspector.get_table_names` does **not** change for Postgresql, it - continues to return only the names of plain tables. - -* A new method :meth:`.PGInspector.get_foreign_table_names` is added which - will return the names of tables that are specifically marked as "foreign" - in the Postgresql schema tables. - -The change to reflection involves adding ``'m'`` and ``'f'`` to the list -of qualifiers we use when querying ``pg_class.relkind``, but this change -is new in 1.0.0 to avoid any backwards-incompatible surprises for those -running 0.9 in production. - -:ticket:`2891` - -.. _feature_gh134: - -Postgresql FILTER keyword -------------------------- - -The SQL standard FILTER keyword for aggregate functions is now supported -by Postgresql as of 9.4. SQLAlchemy allows this using -:meth:`.FunctionElement.filter`:: - - func.count(1).filter(True) - -.. seealso:: - - :meth:`.FunctionElement.filter` - - :class:`.FunctionFilter` - -.. _feature_3184: - -UniqueConstraint is now part of the Table reflection process ------------------------------------------------------------- - -A :class:`.Table` object populated using ``autoload=True`` will now -include :class:`.UniqueConstraint` constructs as well as -:class:`.Index` constructs. This logic has a few caveats for -Postgresql and Mysql: - -Postgresql -^^^^^^^^^^ - -Postgresql has the behavior such that when a UNIQUE constraint is -created, it implicitly creates a UNIQUE INDEX corresponding to that -constraint as well. The :meth:`.Inspector.get_indexes` and the -:meth:`.Inspector.get_unique_constraints` methods will continue to -**both** return these entries distinctly, where -:meth:`.Inspector.get_indexes` now features a token -``duplicates_constraint`` within the index entry indicating the -corresponding constraint when detected. However, when performing -full table reflection using ``Table(..., autoload=True)``, the -:class:`.Index` construct is detected as being linked to the -:class:`.UniqueConstraint`, and is **not** present within the -:attr:`.Table.indexes` collection; only the :class:`.UniqueConstraint` -will be present in the :attr:`.Table.constraints` collection. This -deduplication logic works by joining to the ``pg_constraint`` table -when querying ``pg_index`` to see if the two constructs are linked. - -MySQL -^^^^^ - -MySQL does not have separate concepts for a UNIQUE INDEX and a UNIQUE -constraint. While it supports both syntaxes when creating tables and indexes, -it does not store them any differently. The -:meth:`.Inspector.get_indexes` -and the :meth:`.Inspector.get_unique_constraints` methods will continue to -**both** return an entry for a UNIQUE index in MySQL, -where :meth:`.Inspector.get_unique_constraints` features a new token -``duplicates_index`` within the constraint entry indicating that this is a -dupe entry corresponding to that index. However, when performing -full table reflection using ``Table(..., autoload=True)``, -the :class:`.UniqueConstraint` construct is -**not** part of the fully reflected :class:`.Table` construct under any -circumstances; this construct is always represented by a :class:`.Index` -with the ``unique=True`` setting present in the :attr:`.Table.indexes` -collection. - -.. seealso:: - - :ref:`postgresql_index_reflection` - - :ref:`mysql_unique_constraints` - -:ticket:`3184` - - -Behavioral Improvements -======================= - -.. _feature_updatemany: - -UPDATE statements are now batched with executemany() in a flush ----------------------------------------------------------------- - -UPDATE statements can now be batched within an ORM flush -into more performant executemany() call, similarly to how INSERT -statements can be batched; this will be invoked within flush -based on the following criteria: - -* two or more UPDATE statements in sequence involve the identical set of - columns to be modified. - -* The statement has no embedded SQL expressions in the SET clause. - -* The mapping does not use a :paramref:`~.orm.mapper.version_id_col`, or - the backend dialect supports a "sane" rowcount for an executemany() - operation; most DBAPIs support this correctly now. - ORM full object fetches 25% faster ---------------------------------- @@ -419,7 +238,6 @@ at once. Without the :meth:`.Query.yield_per`, the above script on the MacBookPro is 31 seconds on 0.9 and 26 seconds on 1.0, the extra time spent setting up very large memory buffers. - .. _feature_3176: New KeyedTuple implementation dramatically faster @@ -468,6 +286,59 @@ object totally smokes both namedtuple and KeyedTuple:: :ticket:`3176` +.. _feature_slots: + +Significant Improvements in Structural Memory Use +-------------------------------------------------- + +Structural memory use has been improved via much more significant use +of ``__slots__`` for many internal objects. This optimization is +particularly geared towards the base memory size of large applications +that have lots of tables and columns, and reduces memory +size for a variety of high-volume objects including event listening +internals, comparator objects and parts of the ORM attribute and +loader strategy system. + +A bench that makes use of heapy measure the startup size of Nova +illustrates a difference of about 3.7 fewer megs, or 46%, +taken up by SQLAlchemy's objects, associated dictionaries, as +well as weakrefs, within a basic import of "nova.db.sqlalchemy.models":: + + # reported by heapy, summation of SQLAlchemy objects + + # associated dicts + weakref-related objects with core of Nova imported: + + Before: total count 26477 total bytes 7975712 + After: total count 18181 total bytes 4236456 + + # reported for the Python module space overall with the + # core of Nova imported: + + Before: Partition of a set of 355558 objects. Total size = 61661760 bytes. + After: Partition of a set of 346034 objects. Total size = 57808016 bytes. + + +.. _feature_updatemany: + +UPDATE statements are now batched with executemany() in a flush +---------------------------------------------------------------- + +UPDATE statements can now be batched within an ORM flush +into more performant executemany() call, similarly to how INSERT +statements can be batched; this will be invoked within flush +based on the following criteria: + +* two or more UPDATE statements in sequence involve the identical set of + columns to be modified. + +* The statement has no embedded SQL expressions in the SET clause. + +* The mapping does not use a :paramref:`~.orm.mapper.version_id_col`, or + the backend dialect supports a "sane" rowcount for an executemany() + operation; most DBAPIs support this correctly now. + +.. _feature_3178: + + .. _bug_3035: Session.get_bind() handles a wider variety of inheritance scenarios @@ -511,55 +382,57 @@ of inheritance-oriented scenarios, including: :ticket:`3035` -.. _feature_3178: +.. _bug_3227: -New systems to safely emit parameterized warnings -------------------------------------------------- +Session.get_bind() will receive the Mapper in all relevant Query cases +----------------------------------------------------------------------- -For a long time, there has been a restriction that warning messages could not -refer to data elements, such that a particular function might emit an -infinite number of unique warnings. The key place this occurs is in the -``Unicode type received non-unicode bind param value`` warning. Placing -the data value in this message would mean that the Python ``__warningregistry__`` -for that module, or in some cases the Python-global ``warnings.onceregistry``, -would grow unbounded, as in most warning scenarios, one of these two collections -is populated with every distinct warning message. +A series of issues were repaired where the :meth:`.Session.get_bind` +would not receive the primary :class:`.Mapper` of the :class:`.Query`, +even though this mapper was readily available (the primary mapper is the +single mapper, or alternatively the first mapper, that is associated with +a :class:`.Query` object). -The change here is that by using a special ``string`` type that purposely -changes how the string is hashed, we can control that a large number of -parameterized messages are hashed only on a small set of possible hash -values, such that a warning such as ``Unicode type received non-unicode -bind param value`` can be tailored to be emitted only a specific number -of times; beyond that, the Python warnings registry will begin recording -them as duplicates. +The :class:`.Mapper` object, when passed to :meth:`.Session.get_bind`, +is typically used by sessions that make use of the +:paramref:`.Session.binds` parameter to associate mappers with a +series of engines (although in this use case, things frequently +"worked" in most cases anyway as the bind would be located via the +mapped table object), or more specifically implement a user-defined +:meth:`.Session.get_bind` method that provies some pattern of +selecting engines based on mappers, such as horizontal sharding or a +so-called "routing" session that routes queries to different backends. -To illustrate, the following test script will show only ten warnings being -emitted for ten of the parameter sets, out of a total of 1000:: +These scenarios include: - from sqlalchemy import create_engine, Unicode, select, cast - import random - import warnings +* :meth:`.Query.count`:: - e = create_engine("sqlite://") + session.query(User).count() - # Use the "once" filter (which is also the default for Python - # warnings). Exactly ten of these warnings will - # be emitted; beyond that, the Python warnings registry will accumulate - # new values as dupes of one of the ten existing. - warnings.filterwarnings("once") +* :meth:`.Query.update` and :meth:`.Query.delete`, both for the UPDATE/DELETE + statement as well as for the SELECT used by the "fetch" strategy:: - for i in range(1000): - e.execute(select([cast( - ('foo_%d' % random.randint(0, 1000000)).encode('ascii'), Unicode)])) + session.query(User).filter(User.id == 15).update( + {"name": "foob"}, synchronize_session='fetch') -The format of the warning here is:: + session.query(User).filter(User.id == 15).delete( + synchronize_session='fetch') - /path/lib/sqlalchemy/sql/sqltypes.py:186: SAWarning: Unicode type received - non-unicode bind param value 'foo_4852'. (this warning may be - suppressed after 10 occurrences) +* Queries against individual columns:: + session.query(User.id, User.name).all() -:ticket:`3178` +* SQL functions and other expressions against indirect mappings such as + :obj:`.column_property`:: + + class User(Base): + # ... + + score = column_property(func.coalesce(self.tables.users.c.name, None))) + + session.query(func.max(User.score)).scalar() + +:ticket:`3227` :ticket:`3242` :ticket:`1326` .. _feature_2963: @@ -592,128 +465,6 @@ as remaining ORM constructs such as :func:`.orm.synonym`. :ticket:`2963` -.. _migration_3177: - -Change to single-table-inheritance criteria when using from_self(), count() ---------------------------------------------------------------------------- - -Given a single-table inheritance mapping, such as:: - - class Widget(Base): - __table__ = 'widget_table' - - class FooWidget(Widget): - pass - -Using :meth:`.Query.from_self` or :meth:`.Query.count` against a subclass -would produce a subquery, but then add the "WHERE" criteria for subtypes -to the outside:: - - sess.query(FooWidget).from_self().all() - -rendering:: - - SELECT - anon_1.widgets_id AS anon_1_widgets_id, - anon_1.widgets_type AS anon_1_widgets_type - FROM (SELECT widgets.id AS widgets_id, widgets.type AS widgets_type, - FROM widgets) AS anon_1 - WHERE anon_1.widgets_type IN (?) - -The issue with this is that if the inner query does not specify all -columns, then we can't add the WHERE clause on the outside (it actually tries, -and produces a bad query). This decision -apparently goes way back to 0.6.5 with the note "may need to make more -adjustments to this". Well, those adjustments have arrived! So now the -above query will render:: - - SELECT - anon_1.widgets_id AS anon_1_widgets_id, - anon_1.widgets_type AS anon_1_widgets_type - FROM (SELECT widgets.id AS widgets_id, widgets.type AS widgets_type, - FROM widgets - WHERE widgets.type IN (?)) AS anon_1 - -So that queries that don't include "type" will still work!:: - - sess.query(FooWidget.id).count() - -Renders:: - - SELECT count(*) AS count_1 - FROM (SELECT widgets.id AS widgets_id - FROM widgets - WHERE widgets.type IN (?)) AS anon_1 - - -:ticket:`3177` - - -.. _migration_3222: - - -single-table-inheritance criteria added to all ON clauses unconditionally -------------------------------------------------------------------------- - -When joining to a single-table inheritance subclass target, the ORM always adds -the "single table criteria" when joining on a relationship. Given a -mapping as:: - - class Widget(Base): - __tablename__ = 'widget' - id = Column(Integer, primary_key=True) - type = Column(String) - related_id = Column(ForeignKey('related.id')) - related = relationship("Related", backref="widget") - __mapper_args__ = {'polymorphic_on': type} - - - class FooWidget(Widget): - __mapper_args__ = {'polymorphic_identity': 'foo'} - - - class Related(Base): - __tablename__ = 'related' - id = Column(Integer, primary_key=True) - -It's been the behavior for quite some time that a JOIN on the relationship -will render a "single inheritance" clause for the type:: - - s.query(Related).join(FooWidget, Related.widget).all() - -SQL output:: - - SELECT related.id AS related_id - FROM related JOIN widget ON related.id = widget.related_id AND widget.type IN (:type_1) - -Above, because we joined to a subclass ``FooWidget``, :meth:`.Query.join` -knew to add the ``AND widget.type IN ('foo')`` criteria to the ON clause. - -The change here is that the ``AND widget.type IN()`` criteria is now appended -to *any* ON clause, not just those generated from a relationship, -including one that is explicitly stated:: - - # ON clause will now render as - # related.id = widget.related_id AND widget.type IN (:type_1) - s.query(Related).join(FooWidget, FooWidget.related_id == Related.id).all() - -As well as the "implicit" join when no ON clause of any kind is stated:: - - # ON clause will now render as - # related.id = widget.related_id AND widget.type IN (:type_1) - s.query(Related).join(FooWidget).all() - -Previously, the ON clause for these would not include the single-inheritance -criteria. Applications that are already adding this criteria to work around -this will want to remove its explicit use, though it should continue to work -fine if the criteria happens to be rendered twice in the meantime. - -.. seealso:: - - :ref:`bug_3233` - -:ticket:`3222` - .. _bug_3188: ColumnProperty constructs work a lot better with aliases, order_by @@ -793,31 +544,202 @@ would again fail; these have also been fixed. :ticket:`3148` :ticket:`3188` -.. _bug_3170: +New Features and Improvements - Core +==================================== -null(), false() and true() constants are no longer singletons -------------------------------------------------------------- +.. _feature_3034: -These three constants were changed to return a "singleton" value -in 0.9; unfortunately, that would lead to a query like the following -to not render as expected:: +Select/Query LIMIT / OFFSET may be specified as an arbitrary SQL expression +---------------------------------------------------------------------------- - select([null(), null()]) +The :meth:`.Select.limit` and :meth:`.Select.offset` methods now accept +any SQL expression, in addition to integer values, as arguments. The ORM +:class:`.Query` object also passes through any expression to the underlying +:class:`.Select` object. Typically +this is used to allow a bound parameter to be passed, which can be substituted +with a value later:: -rendering only ``SELECT NULL AS anon_1``, because the two :func:`.null` -constructs would come out as the same ``NULL`` object, and -SQLAlchemy's Core model is based on object identity in order to -determine lexical significance. The change in 0.9 had no -importance other than the desire to save on object overhead; in general, -an unnamed construct needs to stay lexically unique so that it gets -labeled uniquely. + sel = select([table]).limit(bindparam('mylimit')).offset(bindparam('myoffset')) -:ticket:`3170` +Dialects which don't support non-integer LIMIT or OFFSET expressions may continue +to not support this behavior; third party dialects may also need modification +in order to take advantage of the new behavior. A dialect which currently +uses the ``._limit`` or ``._offset`` attributes will continue to function +for those cases where the limit/offset was specified as a simple integer value. +However, when a SQL expression is specified, these two attributes will +instead raise a :class:`.CompileError` on access. A third-party dialect which +wishes to support the new feature should now call upon the ``._limit_clause`` +and ``._offset_clause`` attributes to receive the full SQL expression, rather +than the integer value. + +.. _feature_3282: + +The ``use_alter`` flag on ``ForeignKeyConstraint`` is no longer needed +---------------------------------------------------------------------- + +The :meth:`.MetaData.create_all` and :meth:`.MetaData.drop_all` methods will +now make use of a system that automatically renders an ALTER statement +for foreign key constraints that are involved in mutually-dependent cycles +between tables, without the +need to specify :paramref:`.ForeignKeyConstraint.use_alter`. Additionally, +the foreign key constraints no longer need to have a name in order to be +created via ALTER; only the DROP operation requires a name. In the case +of a DROP, the feature will ensure that only constraints which have +explicit names are actually included as ALTER statements. In the +case of an unresolvable cycle within a DROP, the system emits +a succinct and clear error message now if the DROP cannot proceed. + +The :paramref:`.ForeignKeyConstraint.use_alter` and +:paramref:`.ForeignKey.use_alter` flags remain in place, and continue to have +the same effect of establishing those constraints for which ALTER is +required during a CREATE/DROP scenario. + +.. seealso:: + + :ref:`use_alter` - full description of the new behavior. + + +:ticket:`3282` + +.. _change_2051: + +.. _feature_insert_from_select_defaults: + +INSERT FROM SELECT now includes Python and SQL-expression defaults +------------------------------------------------------------------- + +:meth:`.Insert.from_select` now includes Python and SQL-expression defaults if +otherwise unspecified; the limitation where non-server column defaults +aren't included in an INSERT FROM SELECT is now lifted and these +expressions are rendered as constants into the SELECT statement:: + + from sqlalchemy import Table, Column, MetaData, Integer, select, func + + m = MetaData() + + t = Table( + 't', m, + Column('x', Integer), + Column('y', Integer, default=func.somefunction())) + + stmt = select([t.c.x]) + print t.insert().from_select(['x'], stmt) + +Will render:: + + INSERT INTO t (x, y) SELECT t.x, somefunction() AS somefunction_1 + FROM t + +The feature can be disabled using +:paramref:`.Insert.from_select.include_defaults`. -.. _behavioral_changes_orm_10: +.. _feature_3184: -Behavioral Changes - ORM -======================== +UniqueConstraint is now part of the Table reflection process +------------------------------------------------------------ + +A :class:`.Table` object populated using ``autoload=True`` will now +include :class:`.UniqueConstraint` constructs as well as +:class:`.Index` constructs. This logic has a few caveats for +Postgresql and Mysql: + +Postgresql +^^^^^^^^^^ + +Postgresql has the behavior such that when a UNIQUE constraint is +created, it implicitly creates a UNIQUE INDEX corresponding to that +constraint as well. The :meth:`.Inspector.get_indexes` and the +:meth:`.Inspector.get_unique_constraints` methods will continue to +**both** return these entries distinctly, where +:meth:`.Inspector.get_indexes` now features a token +``duplicates_constraint`` within the index entry indicating the +corresponding constraint when detected. However, when performing +full table reflection using ``Table(..., autoload=True)``, the +:class:`.Index` construct is detected as being linked to the +:class:`.UniqueConstraint`, and is **not** present within the +:attr:`.Table.indexes` collection; only the :class:`.UniqueConstraint` +will be present in the :attr:`.Table.constraints` collection. This +deduplication logic works by joining to the ``pg_constraint`` table +when querying ``pg_index`` to see if the two constructs are linked. + +MySQL +^^^^^ + +MySQL does not have separate concepts for a UNIQUE INDEX and a UNIQUE +constraint. While it supports both syntaxes when creating tables and indexes, +it does not store them any differently. The +:meth:`.Inspector.get_indexes` +and the :meth:`.Inspector.get_unique_constraints` methods will continue to +**both** return an entry for a UNIQUE index in MySQL, +where :meth:`.Inspector.get_unique_constraints` features a new token +``duplicates_index`` within the constraint entry indicating that this is a +dupe entry corresponding to that index. However, when performing +full table reflection using ``Table(..., autoload=True)``, +the :class:`.UniqueConstraint` construct is +**not** part of the fully reflected :class:`.Table` construct under any +circumstances; this construct is always represented by a :class:`.Index` +with the ``unique=True`` setting present in the :attr:`.Table.indexes` +collection. + +.. seealso:: + + :ref:`postgresql_index_reflection` + + :ref:`mysql_unique_constraints` + +:ticket:`3184` + + +New systems to safely emit parameterized warnings +------------------------------------------------- + +For a long time, there has been a restriction that warning messages could not +refer to data elements, such that a particular function might emit an +infinite number of unique warnings. The key place this occurs is in the +``Unicode type received non-unicode bind param value`` warning. Placing +the data value in this message would mean that the Python ``__warningregistry__`` +for that module, or in some cases the Python-global ``warnings.onceregistry``, +would grow unbounded, as in most warning scenarios, one of these two collections +is populated with every distinct warning message. + +The change here is that by using a special ``string`` type that purposely +changes how the string is hashed, we can control that a large number of +parameterized messages are hashed only on a small set of possible hash +values, such that a warning such as ``Unicode type received non-unicode +bind param value`` can be tailored to be emitted only a specific number +of times; beyond that, the Python warnings registry will begin recording +them as duplicates. + +To illustrate, the following test script will show only ten warnings being +emitted for ten of the parameter sets, out of a total of 1000:: + + from sqlalchemy import create_engine, Unicode, select, cast + import random + import warnings + + e = create_engine("sqlite://") + + # Use the "once" filter (which is also the default for Python + # warnings). Exactly ten of these warnings will + # be emitted; beyond that, the Python warnings registry will accumulate + # new values as dupes of one of the ten existing. + warnings.filterwarnings("once") + + for i in range(1000): + e.execute(select([cast( + ('foo_%d' % random.randint(0, 1000000)).encode('ascii'), Unicode)])) + +The format of the warning here is:: + + /path/lib/sqlalchemy/sql/sqltypes.py:186: SAWarning: Unicode type received + non-unicode bind param value 'foo_4852'. (this warning may be + suppressed after 10 occurrences) + + +:ticket:`3178` + +Key Behavioral Changes - ORM +============================ .. _bug_3228: @@ -1096,7 +1018,7 @@ as all the subclasses normally refer to the same table:: -.. _migration_migration_deprecated_orm_events: +.. _migration_deprecated_orm_events: Deprecated ORM Event Hooks Removed ---------------------------------- @@ -1201,7 +1123,7 @@ join into a subquery as a join target on SQLite. query.update() with ``synchronize_session='evaluate'`` raises on multi-table update ----------------------------------------------------------------------------------- -The "evaulator" for :meth:`.Query.update` won't work with multi-table +The "evaluator" for :meth:`.Query.update` won't work with multi-table updates, and needs to be set to ``synchronize_session=False`` or ``synchronize_session='fetch'`` when multiple tables are present. The new behavior is that an explicit exception is now raised, with a message @@ -1218,10 +1140,130 @@ have any function since version 0.8 removed the older "mutable" system from the unit of work. -.. _behavioral_changes_core_10: +.. _migration_3177: -Behavioral Changes - Core -========================= +Change to single-table-inheritance criteria when using from_self(), count() +--------------------------------------------------------------------------- + +Given a single-table inheritance mapping, such as:: + + class Widget(Base): + __table__ = 'widget_table' + + class FooWidget(Widget): + pass + +Using :meth:`.Query.from_self` or :meth:`.Query.count` against a subclass +would produce a subquery, but then add the "WHERE" criteria for subtypes +to the outside:: + + sess.query(FooWidget).from_self().all() + +rendering:: + + SELECT + anon_1.widgets_id AS anon_1_widgets_id, + anon_1.widgets_type AS anon_1_widgets_type + FROM (SELECT widgets.id AS widgets_id, widgets.type AS widgets_type, + FROM widgets) AS anon_1 + WHERE anon_1.widgets_type IN (?) + +The issue with this is that if the inner query does not specify all +columns, then we can't add the WHERE clause on the outside (it actually tries, +and produces a bad query). This decision +apparently goes way back to 0.6.5 with the note "may need to make more +adjustments to this". Well, those adjustments have arrived! So now the +above query will render:: + + SELECT + anon_1.widgets_id AS anon_1_widgets_id, + anon_1.widgets_type AS anon_1_widgets_type + FROM (SELECT widgets.id AS widgets_id, widgets.type AS widgets_type, + FROM widgets + WHERE widgets.type IN (?)) AS anon_1 + +So that queries that don't include "type" will still work!:: + + sess.query(FooWidget.id).count() + +Renders:: + + SELECT count(*) AS count_1 + FROM (SELECT widgets.id AS widgets_id + FROM widgets + WHERE widgets.type IN (?)) AS anon_1 + + +:ticket:`3177` + + +.. _migration_3222: + + +single-table-inheritance criteria added to all ON clauses unconditionally +------------------------------------------------------------------------- + +When joining to a single-table inheritance subclass target, the ORM always adds +the "single table criteria" when joining on a relationship. Given a +mapping as:: + + class Widget(Base): + __tablename__ = 'widget' + id = Column(Integer, primary_key=True) + type = Column(String) + related_id = Column(ForeignKey('related.id')) + related = relationship("Related", backref="widget") + __mapper_args__ = {'polymorphic_on': type} + + + class FooWidget(Widget): + __mapper_args__ = {'polymorphic_identity': 'foo'} + + + class Related(Base): + __tablename__ = 'related' + id = Column(Integer, primary_key=True) + +It's been the behavior for quite some time that a JOIN on the relationship +will render a "single inheritance" clause for the type:: + + s.query(Related).join(FooWidget, Related.widget).all() + +SQL output:: + + SELECT related.id AS related_id + FROM related JOIN widget ON related.id = widget.related_id AND widget.type IN (:type_1) + +Above, because we joined to a subclass ``FooWidget``, :meth:`.Query.join` +knew to add the ``AND widget.type IN ('foo')`` criteria to the ON clause. + +The change here is that the ``AND widget.type IN()`` criteria is now appended +to *any* ON clause, not just those generated from a relationship, +including one that is explicitly stated:: + + # ON clause will now render as + # related.id = widget.related_id AND widget.type IN (:type_1) + s.query(Related).join(FooWidget, FooWidget.related_id == Related.id).all() + +As well as the "implicit" join when no ON clause of any kind is stated:: + + # ON clause will now render as + # related.id = widget.related_id AND widget.type IN (:type_1) + s.query(Related).join(FooWidget).all() + +Previously, the ON clause for these would not include the single-inheritance +criteria. Applications that are already adding this criteria to work around +this will want to remove its explicit use, though it should continue to work +fine if the criteria happens to be rendered twice in the meantime. + +.. seealso:: + + :ref:`bug_3233` + +:ticket:`3222` + +Key Behavioral Changes - Core +============================= .. _migration_2992: @@ -1373,6 +1415,89 @@ be qualified with :func:`.text` or similar. :ticket:`2992` +.. _bug_3288: + +Python-side defaults invoked for each row invidually when using a multivalued insert +------------------------------------------------------------------------------------ + +Support for Python-side column defaults when using the multi-valued +version of :meth:`.Insert.values` were essentially not implemented, and +would only work "by accident" in specific situations, when the dialect in +use was using a non-positional (e.g. named) style of bound parameter, and +when it was not necessary that a Python-side callable be invoked for each +row. + +The feature has been overhauled so that it works more similarly to +that of an "executemany" style of invocation:: + + import itertools + + counter = itertools.count(1) + t = Table( + 'my_table', metadata, + Column('id', Integer, default=lambda: next(counter)), + Column('data', String) + ) + + conn.execute(t.insert().values([ + {"data": "d1"}, + {"data": "d2"}, + {"data": "d3"}, + ])) + +The above example will invoke ``next(counter)`` for each row individually +as would be expected:: + + INSERT INTO my_table (id, data) VALUES (?, ?), (?, ?), (?, ?) + (1, 'd1', 2, 'd2', 3, 'd3') + +Previously, a positional dialect would fail as a bind would not be generated +for additional positions:: + + Incorrect number of bindings supplied. The current statement uses 6, + and there are 4 supplied. + [SQL: u'INSERT INTO my_table (id, data) VALUES (?, ?), (?, ?), (?, ?)'] + [parameters: (1, 'd1', 'd2', 'd3')] + +And with a "named" dialect, the same value for "id" would be re-used in +each row (hence this change is backwards-incompatible with a system that +relied on this):: + + INSERT INTO my_table (id, data) VALUES (:id, :data_0), (:id, :data_1), (:id, :data_2) + {u'data_2': 'd3', u'data_1': 'd2', u'data_0': 'd1', 'id': 1} + +The system will also refuse to invoke a "server side" default as inline-rendered +SQL, since it cannot be guaranteed that a server side default is compatible +with this. If the VALUES clause renders for a specific column, then a Python-side +value is required; if an omitted value only refers to a server-side default, +an exception is raised:: + + t = Table( + 'my_table', metadata, + Column('id', Integer, primary_key=True), + Column('data', String, server_default='some default') + ) + + conn.execute(t.insert().values([ + {"data": "d1"}, + {"data": "d2"}, + {}, + ])) + +will raise:: + + sqlalchemy.exc.CompileError: INSERT value for column my_table.data is + explicitly rendered as a boundparameter in the VALUES clause; a + Python-side value or SQL expression is required + +Previously, the value "d1" would be copied into that of the third +row (but again, only with named format!):: + + INSERT INTO my_table (data) VALUES (:data_0), (:data_1), (:data_0) + {u'data_1': 'd2', u'data_0': 'd1'} + +:ticket:`3288` + .. _change_3163: Event listeners can not be added or removed from within that event's runner @@ -1427,6 +1552,28 @@ A :class:`.Table` can be set up for reflection by passing :ticket:`3027` +.. _change_3266: + +DBAPI exception wrapping and handle_error() event improvements +-------------------------------------------------------------- + +SQLAlchemy's wrapping of DBAPI exceptions was not taking place in the +case where a :class:`.Connection` object was invalidated, and then tried +to reconnect and encountered an error; this has been resolved. + +Additionally, the recently added :meth:`.ConnectionEvents.handle_error` +event is now invoked for errors that occur upon initial connect, upon +reconnect, and when :func:`.create_engine` is used given a custom connection +function via :paramref:`.create_engine.creator`. + +The :class:`.ExceptionContext` object has a new datamember +:attr:`.ExceptionContext.engine` that will always refer to the :class:`.Engine` +in use, in those cases when the :class:`.Connection` object is not available +(e.g. on initial connect). + + +:ticket:`3266` + .. _change_3243: ForeignKeyConstraint.columns is now a ColumnCollection @@ -1443,8 +1590,241 @@ is added to unconditionally return string keys for the local set of columns regardless of how the object was constructed or its current state. -Dialect Changes -=============== + +.. _bug_3170: + +null(), false() and true() constants are no longer singletons +------------------------------------------------------------- + +These three constants were changed to return a "singleton" value +in 0.9; unfortunately, that would lead to a query like the following +to not render as expected:: + + select([null(), null()]) + +rendering only ``SELECT NULL AS anon_1``, because the two :func:`.null` +constructs would come out as the same ``NULL`` object, and +SQLAlchemy's Core model is based on object identity in order to +determine lexical significance. The change in 0.9 had no +importance other than the desire to save on object overhead; in general, +an unnamed construct needs to stay lexically unique so that it gets +labeled uniquely. + +:ticket:`3170` + +.. _change_3204: + +SQLite/Oracle have distinct methods for temporary table/view name reporting +--------------------------------------------------------------------------- + +The :meth:`.Inspector.get_table_names` and :meth:`.Inspector.get_view_names` +methods in the case of SQLite/Oracle would also return the names of temporary +tables and views, which is not provided by any other dialect (in the case +of MySQL at least it is not even possible). This logic has been moved +out to two new methods :meth:`.Inspector.get_temp_table_names` and +:meth:`.Inspector.get_temp_view_names`. + +Note that reflection of a specific named temporary table or temporary view, +either by ``Table('name', autoload=True)`` or via methods like +:meth:`.Inspector.get_columns` continues to function for most if not all +dialects. For SQLite specifically, there is a bug fix for UNIQUE constraint +reflection from temp tables as well, which is :ticket:`3203`. + +:ticket:`3204` + +Dialect Improvements and Changes - Postgresql +============================================= + +New Postgresql Table options +----------------------------- + +Added support for PG table options TABLESPACE, ON COMMIT, +WITH(OUT) OIDS, and INHERITS, when rendering DDL via +the :class:`.Table` construct. + +.. seealso:: + + :ref:`postgresql_table_options` + +:ticket:`2051` + +.. _feature_get_enums: + +New get_enums() method with Postgresql Dialect +---------------------------------------------- + +The :func:`.inspect` method returns a :class:`.PGInspector` object in the +case of Postgresql, which includes a new :meth:`.PGInspector.get_enums` +method that returns information on all available ``ENUM`` types:: + + from sqlalchemy import inspect, create_engine + + engine = create_engine("postgresql+psycopg2://host/dbname") + insp = inspect(engine) + print(insp.get_enums()) + +.. seealso:: + + :meth:`.PGInspector.get_enums` + +.. _feature_2891: + +Postgresql Dialect reflects Materialized Views, Foreign Tables +-------------------------------------------------------------- + +Changes are as follows: + +* the :class:`Table` construct with ``autoload=True`` will now match a name + that exists in the database as a materialized view or foriegn table. + +* :meth:`.Inspector.get_view_names` will return plain and materialized view + names. + +* :meth:`.Inspector.get_table_names` does **not** change for Postgresql, it + continues to return only the names of plain tables. + +* A new method :meth:`.PGInspector.get_foreign_table_names` is added which + will return the names of tables that are specifically marked as "foreign" + in the Postgresql schema tables. + +The change to reflection involves adding ``'m'`` and ``'f'`` to the list +of qualifiers we use when querying ``pg_class.relkind``, but this change +is new in 1.0.0 to avoid any backwards-incompatible surprises for those +running 0.9 in production. + +:ticket:`2891` + +.. _change_3264: + +Postgresql ``has_table()`` now works for temporary tables +--------------------------------------------------------- + +This is a simple fix such that "has table" for temporary tables now works, +so that code like the following may proceed:: + + from sqlalchemy import * + + metadata = MetaData() + user_tmp = Table( + "user_tmp", metadata, + Column("id", INT, primary_key=True), + Column('name', VARCHAR(50)), + prefixes=['TEMPORARY'] + ) + + e = create_engine("postgresql://scott:tiger@localhost/test", echo='debug') + with e.begin() as conn: + user_tmp.create(conn, checkfirst=True) + + # checkfirst will succeed + user_tmp.create(conn, checkfirst=True) + +The very unlikely case that this behavior will cause a non-failing application +to behave differently, is because Postgresql allows a non-temporary table +to silently overwrite a temporary table. So code like the following will +now act completely differently, no longer creating the real table following +the temporary table:: + + from sqlalchemy import * + + metadata = MetaData() + user_tmp = Table( + "user_tmp", metadata, + Column("id", INT, primary_key=True), + Column('name', VARCHAR(50)), + prefixes=['TEMPORARY'] + ) + + e = create_engine("postgresql://scott:tiger@localhost/test", echo='debug') + with e.begin() as conn: + user_tmp.create(conn, checkfirst=True) + + m2 = MetaData() + user = Table( + "user_tmp", m2, + Column("id", INT, primary_key=True), + Column('name', VARCHAR(50)), + ) + + # in 0.9, *will create* the new table, overwriting the old one. + # in 1.0, *will not create* the new table + user.create(conn, checkfirst=True) + +:ticket:`3264` + +.. _feature_gh134: + +Postgresql FILTER keyword +------------------------- + +The SQL standard FILTER keyword for aggregate functions is now supported +by Postgresql as of 9.4. SQLAlchemy allows this using +:meth:`.FunctionElement.filter`:: + + func.count(1).filter(True) + +.. seealso:: + + :meth:`.FunctionElement.filter` + + :class:`.FunctionFilter` + +Support for psycopg2cffi Dialect on Pypy +---------------------------------------- + +Support for the pypy psycopg2cffi dialect is added. + +.. seealso:: + + :mod:`sqlalchemy.dialects.postgresql.psycopg2cffi` + +Dialect Improvements and Changes - MySQL +============================================= + +.. _change_3283: + +MySQL SET Type Overhauled to support empty sets, unicode, blank value handling +------------------------------------------------------------------------------- + +The :class:`.mysql.SET` type historically not included a system of handling +blank sets and empty values separately; as different drivers had different +behaviors for treatment of empty strings and empty-string-set representations, +the SET type tried only to hedge between these behaviors, opting to treat the +empty set as ``set([''])`` as is still the current behavior for the +MySQL-Connector-Python DBAPI. +Part of the rationale here was that it was otherwise impossible to actually +store a blank string within a MySQL SET, as the driver gives us back strings +with no way to discern between ``set([''])`` and ``set()``. It was left +to the user to determine if ``set([''])`` actually meant "empty set" or not. + +The new behavior moves the use case for the blank string, which is an unusual +case that isn't even documented in MySQL's documentation, into a special +case, and the default behavior of :class:`.mysql.SET` is now: + +* to treat the empty string ``''`` as returned by MySQL-python into the empty + set ``set()``; + +* to convert the single-blank value set ``set([''])`` returned by + MySQL-Connector-Python into the empty set ``set()``; + +* To handle the case of a set type that actually wishes includes the blank + value ``''`` in its list of possible values, + a new feature (required in this use case) is implemented whereby the set + value is persisted and loaded as a bitwise integer value; the + flag :paramref:`.mysql.SET.retrieve_as_bitwise` is added in order to + enable this. + +Using the :paramref:`.mysql.SET.retrieve_as_bitwise` flag allows the set +to be persisted and retrieved with no ambiguity of values. Theoretically +this flag can be turned on in all cases, as long as the given list of +values to the type matches the ordering exactly as declared in the +database; it only makes the SQL echo output a bit more unusual. + +The default behavior of :class:`.mysql.SET` otherwise remains the same, +roundtripping values using strings. The string-based behavior now +supports unicode fully including MySQL-python with use_unicode=0. + +:ticket:`3283` MySQL internal "no such table" exceptions not passed to event handlers @@ -1489,6 +1869,77 @@ again works on MySQL. :ticket:`3186` +.. _change_3263: + +The match() operator now returns an agnostic MatchType compatible with MySQL's floating point return value +---------------------------------------------------------------------------------------------------------- + +The return type of a :meth:`.ColumnOperators.match` expression is now a new type +called :class:`.MatchType`. This is a subclass of :class:`.Boolean`, +that can be intercepted by the dialect in order to produce a different +result type at SQL execution time. + +Code like the following will now function correctly and return floating points +on MySQL:: + + >>> connection.execute( + ... select([ + ... matchtable.c.title.match('Agile Ruby Programming').label('ruby'), + ... matchtable.c.title.match('Dive Python').label('python'), + ... matchtable.c.title + ... ]).order_by(matchtable.c.id) + ... ) + [ + (2.0, 0.0, 'Agile Web Development with Ruby On Rails'), + (0.0, 2.0, 'Dive Into Python'), + (2.0, 0.0, "Programming Matz's Ruby"), + (0.0, 0.0, 'The Definitive Guide to Django'), + (0.0, 1.0, 'Python in a Nutshell') + ] + + +:ticket:`3263` + +.. _change_2984: + +Drizzle Dialect is now an External Dialect +------------------------------------------ + +The dialect for `Drizzle <http://www.drizzle.org/>`_ is now an external +dialect, available at https://bitbucket.org/zzzeek/sqlalchemy-drizzle. +This dialect was added to SQLAlchemy right before SQLAlchemy was able to +accommodate third party dialects well; going forward, all databases that aren't +within the "ubiquitous use" category are third party dialects. +The dialect's implementation hasn't changed and is still based on the +MySQL + MySQLdb dialects within SQLAlchemy. The dialect is as of yet +unreleased and in "attic" status; however it passes the majority of tests +and is generally in decent working order, if someone wants to pick up +on polishing it. + +Dialect Improvements and Changes - SQLite +============================================= + +SQLite named and unnamed UNIQUE and FOREIGN KEY constraints will inspect and reflect +------------------------------------------------------------------------------------- + +UNIQUE and FOREIGN KEY constraints are now fully reflected on +SQLite both with and without names. Previously, foreign key +names were ignored and unnamed unique constraints were skipped. In particular +this will help with Alembic's new SQLite migration features. + +To achieve this, for both foreign keys and unique constraints, the result +of PRAGMA foreign_keys, index_list, and index_info is combined with regular +expression parsing of the CREATE TABLE statement overall to form a complete +picture of the names of constraints, as well as differentiating UNIQUE +constraints that were created as UNIQUE vs. unnamed INDEXes. + +:ticket:`3244` + +:ticket:`3261` + +Dialect Improvements and Changes - SQL Server +============================================= + .. _change_3182: PyODBC driver name is required with hostname-based SQL Server connections @@ -1507,38 +1958,40 @@ when using ODBC to avoid this issue entirely. :ticket:`3182` -.. _change_3204: +SQL Server 2012 large text / binary types render as VARCHAR, NVARCHAR, VARBINARY +-------------------------------------------------------------------------------- -SQLite/Oracle have distinct methods for temporary table/view name reporting ---------------------------------------------------------------------------- +The rendering of the :class:`.Text`, :class:`.UnicodeText`, and :class:`.LargeBinary` +types has been changed for SQL Server 2012 and greater, with options +to control the behavior completely, based on deprecation guidelines from +Microsoft. See :ref:`mssql_large_type_deprecation` for details. -The :meth:`.Inspector.get_table_names` and :meth:`.Inspector.get_view_names` -methods in the case of SQLite/Oracle would also return the names of temporary -tables and views, which is not provided by any other dialect (in the case -of MySQL at least it is not even possible). This logic has been moved -out to two new methods :meth:`.Inspector.get_temp_table_names` and -:meth:`.Inspector.get_temp_view_names`. +Dialect Improvements and Changes - Oracle +============================================= -Note that reflection of a specific named temporary table or temporary view, -either by ``Table('name', autoload=True)`` or via methods like -:meth:`.Inspector.get_columns` continues to function for most if not all -dialects. For SQLite specifically, there is a bug fix for UNIQUE constraint -reflection from temp tables as well, which is :ticket:`3203`. +.. _change_3220: -:ticket:`3204` +Improved support for CTEs in Oracle +----------------------------------- -.. _change_2984: +CTE support has been fixed up for Oracle, and there is also a new feature +:meth:`.CTE.with_suffixes` that can assist with Oracle's special directives:: -Drizzle Dialect is now an External Dialect ------------------------------------------- + included_parts = select([ + part.c.sub_part, part.c.part, part.c.quantity + ]).where(part.c.part == "p1").\ + cte(name="included_parts", recursive=True).\ + suffix_with( + "search depth first by part set ord1", + "cycle part set y_cycle to 1 default 0", dialect='oracle') -The dialect for `Drizzle <http://www.drizzle.org/>`_ is now an external -dialect, available at https://bitbucket.org/zzzeek/sqlalchemy-drizzle. -This dialect was added to SQLAlchemy right before SQLAlchemy was able to -accommodate third party dialects well; going forward, all databases that aren't -within the "ubiquitous use" category are third party dialects. -The dialect's implementation hasn't changed and is still based on the -MySQL + MySQLdb dialects within SQLAlchemy. The dialect is as of yet -unreleased and in "attic" status; however it passes the majority of tests -and is generally in decent working order, if someone wants to pick up -on polishing it. +:ticket:`3220` + +New Oracle Keywords for DDL +----------------------------- + +Keywords such as COMPRESS, ON COMMIT, BITMAP: + +:ref:`oracle_table_options` + +:ref:`oracle_index_options` diff --git a/doc/build/conf.py b/doc/build/conf.py index 5277134e7..22b377fa1 100644 --- a/doc/build/conf.py +++ b/doc/build/conf.py @@ -34,13 +34,10 @@ import sqlalchemy extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.intersphinx', - 'builder.autodoc_mods', + 'zzzeeksphinx', 'changelog', 'sphinx_paramlinks', - 'builder.dialect_info', - 'builder.mako', - 'builder.sqlformatter', - 'builder.viewsource', + #'corrections' ] # Add any paths that contain templates here, relative to this directory. @@ -74,6 +71,23 @@ changelog_render_pullreq = { changelog_render_changeset = "http://www.sqlalchemy.org/trac/changeset/%s" +autodocmods_convert_modname = { + "sqlalchemy.sql.sqltypes": "sqlalchemy.types", + "sqlalchemy.sql.type_api": "sqlalchemy.types", + "sqlalchemy.sql.schema": "sqlalchemy.schema", + "sqlalchemy.sql.elements": "sqlalchemy.sql.expression", + "sqlalchemy.sql.selectable": "sqlalchemy.sql.expression", + "sqlalchemy.sql.dml": "sqlalchemy.sql.expression", + "sqlalchemy.sql.ddl": "sqlalchemy.schema", + "sqlalchemy.sql.base": "sqlalchemy.sql.expression", + "sqlalchemy.engine.base": "sqlalchemy.engine", + "sqlalchemy.engine.result": "sqlalchemy.engine", +} + +autodocmods_convert_modname_w_class = { + ("sqlalchemy.engine.interfaces", "Connectable"): "sqlalchemy.engine", + ("sqlalchemy.sql.base", "DialectKWArgs"): "sqlalchemy.sql.base", +} # The encoding of source files. #source_encoding = 'utf-8-sig' @@ -97,6 +111,8 @@ release = "1.0.0" release_date = "Not released" site_base = os.environ.get("RTD_SITE_BASE", "http://www.sqlalchemy.org") +site_adapter_template = "docs_adapter.mako" +site_adapter_py = "docs_adapter.py" # arbitrary number recognized by builders.py, incrementing this # will force a rebuild @@ -144,7 +160,7 @@ gettext_compact = False # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = 'default' +html_theme = 'zzzeeksphinx' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -178,7 +194,7 @@ html_title = "%s %s Documentation" % (project, version) # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['static'] +html_static_path = [] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. @@ -328,3 +344,5 @@ intersphinx_mapping = { 'alembic': ('http://alembic.readthedocs.org/en/latest/', None), 'psycopg2': ('http://pythonhosted.org/psycopg2', None), } + + diff --git a/doc/build/contents.rst b/doc/build/contents.rst index df80e9b79..a7277cf90 100644 --- a/doc/build/contents.rst +++ b/doc/build/contents.rst @@ -7,16 +7,18 @@ Full table of contents. For a high level overview of all documentation, see :ref:`index_toplevel`. .. toctree:: - :maxdepth: 3 + :titlesonly: + :includehidden: - intro - orm/index - core/index - dialects/index - changelog/index + intro + orm/index + core/index + dialects/index + faq/index + changelog/index Indices and tables ------------------ +* :ref:`glossary` * :ref:`genindex` -* :ref:`search` diff --git a/doc/build/core/api_basics.rst b/doc/build/core/api_basics.rst new file mode 100644 index 000000000..e56a1117b --- /dev/null +++ b/doc/build/core/api_basics.rst @@ -0,0 +1,12 @@ +================= +Core API Basics +================= + +.. toctree:: + :maxdepth: 2 + + event + inspection + interfaces + exceptions + internals diff --git a/doc/build/core/compiler.rst b/doc/build/core/compiler.rst index 73c9e3995..202ef2b0e 100644 --- a/doc/build/core/compiler.rst +++ b/doc/build/core/compiler.rst @@ -4,4 +4,4 @@ Custom SQL Constructs and Compilation Extension =============================================== .. automodule:: sqlalchemy.ext.compiler - :members:
\ No newline at end of file + :members: diff --git a/doc/build/core/connections.rst b/doc/build/core/connections.rst index 248309a2e..6d7e7622f 100644 --- a/doc/build/core/connections.rst +++ b/doc/build/core/connections.rst @@ -453,13 +453,36 @@ Working with Raw DBAPI Connections There are some cases where SQLAlchemy does not provide a genericized way at accessing some :term:`DBAPI` functions, such as calling stored procedures as well as dealing with multiple result sets. In these cases, it's just as expedient -to deal with the raw DBAPI connection directly. This is accessible from -a :class:`.Engine` using the :meth:`.Engine.raw_connection` method:: +to deal with the raw DBAPI connection directly. + +The most common way to access the raw DBAPI connection is to get it +from an already present :class:`.Connection` object directly. It is +present using the :attr:`.Connection.connection` attribute:: + + connection = engine.connect() + dbapi_conn = connection.connection + +The DBAPI connection here is actually a "proxied" in terms of the +originating connection pool, however this is an implementation detail +that in most cases can be ignored. As this DBAPI connection is still +contained within the scope of an owning :class:`.Connection` object, it is +best to make use of the :class:`.Connection` object for most features such +as transaction control as well as calling the :meth:`.Connection.close` +method; if these operations are performed on the DBAPI connection directly, +the owning :class:`.Connection` will not be aware of these changes in state. + +To overcome the limitations imposed by the DBAPI connection that is +maintained by an owning :class:`.Connection`, a DBAPI connection is also +available without the need to procure a +:class:`.Connection` first, using the :meth:`.Engine.raw_connection` method +of :class:`.Engine`:: dbapi_conn = engine.raw_connection() -The instance returned is a "wrapped" form of DBAPI connection. When its -``.close()`` method is called, the connection is :term:`released` back to the +This DBAPI connection is again a "proxied" form as was the case before. +The purpose of this proxying is now apparent, as when we call the ``.close()`` +method of this connection, the DBAPI connection is typically not actually +closed, but instead :term:`released` back to the engine's connection pool:: dbapi_conn.close() @@ -568,16 +591,16 @@ Connection / Engine API .. autoclass:: Engine :members: -.. autoclass:: sqlalchemy.engine.ExceptionContext +.. autoclass:: ExceptionContext :members: .. autoclass:: NestedTransaction :members: -.. autoclass:: sqlalchemy.engine.ResultProxy +.. autoclass:: ResultProxy :members: -.. autoclass:: sqlalchemy.engine.RowProxy +.. autoclass:: RowProxy :members: .. autoclass:: Transaction diff --git a/doc/build/core/constraints.rst b/doc/build/core/constraints.rst index 554d003bb..1f855c724 100644 --- a/doc/build/core/constraints.rst +++ b/doc/build/core/constraints.rst @@ -7,11 +7,11 @@ Defining Constraints and Indexes ================================= -.. _metadata_foreignkeys: - This section will discuss SQL :term:`constraints` and indexes. In SQLAlchemy the key classes include :class:`.ForeignKeyConstraint` and :class:`.Index`. +.. _metadata_foreignkeys: + Defining Foreign Keys --------------------- @@ -95,40 +95,175 @@ foreign key referencing two columns. Creating/Dropping Foreign Key Constraints via ALTER ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -In all the above examples, the :class:`~sqlalchemy.schema.ForeignKey` object -causes the "REFERENCES" keyword to be added inline to a column definition -within a "CREATE TABLE" statement when -:func:`~sqlalchemy.schema.MetaData.create_all` is issued, and -:class:`~sqlalchemy.schema.ForeignKeyConstraint` invokes the "CONSTRAINT" -keyword inline with "CREATE TABLE". There are some cases where this is -undesirable, particularly when two tables reference each other mutually, each -with a foreign key referencing the other. In such a situation at least one of -the foreign key constraints must be generated after both tables have been -built. To support such a scheme, :class:`~sqlalchemy.schema.ForeignKey` and -:class:`~sqlalchemy.schema.ForeignKeyConstraint` offer the flag -``use_alter=True``. When using this flag, the constraint will be generated -using a definition similar to "ALTER TABLE <tablename> ADD CONSTRAINT <name> -...". Since a name is required, the ``name`` attribute must also be specified. -For example:: - - node = Table('node', meta, +The behavior we've seen in tutorials and elsewhere involving +foreign keys with DDL illustrates that the constraints are typically +rendered "inline" within the CREATE TABLE statement, such as: + +.. sourcecode:: sql + + CREATE TABLE addresses ( + id INTEGER NOT NULL, + user_id INTEGER, + email_address VARCHAR NOT NULL, + PRIMARY KEY (id), + CONSTRAINT user_id_fk FOREIGN KEY(user_id) REFERENCES users (id) + ) + +The ``CONSTRAINT .. FOREIGN KEY`` directive is used to create the constraint +in an "inline" fashion within the CREATE TABLE definition. The +:meth:`.MetaData.create_all` and :meth:`.MetaData.drop_all` methods do +this by default, using a topological sort of all the :class:`.Table` objects +involved such that tables are created and dropped in order of their foreign +key dependency (this sort is also available via the +:attr:`.MetaData.sorted_tables` accessor). + +This approach can't work when two or more foreign key constraints are +involved in a "dependency cycle", where a set of tables +are mutually dependent on each other, assuming the backend enforces foreign +keys (always the case except on SQLite, MySQL/MyISAM). The methods will +therefore break out constraints in such a cycle into separate ALTER +statements, on all backends other than SQLite which does not support +most forms of ALTER. Given a schema like:: + + node = Table( + 'node', metadata, Column('node_id', Integer, primary_key=True), - Column('primary_element', Integer, - ForeignKey('element.element_id', use_alter=True, name='fk_node_element_id') + Column( + 'primary_element', Integer, + ForeignKey('element.element_id') ) ) - element = Table('element', meta, + element = Table( + 'element', metadata, Column('element_id', Integer, primary_key=True), Column('parent_node_id', Integer), ForeignKeyConstraint( - ['parent_node_id'], - ['node.node_id'], - use_alter=True, + ['parent_node_id'], ['node.node_id'], name='fk_element_parent_node_id' ) ) +When we call upon :meth:`.MetaData.create_all` on a backend such as the +Postgresql backend, the cycle between these two tables is resolved and the +constraints are created separately: + +.. sourcecode:: pycon+sql + + >>> with engine.connect() as conn: + ... metadata.create_all(conn, checkfirst=False) + {opensql}CREATE TABLE element ( + element_id SERIAL NOT NULL, + parent_node_id INTEGER, + PRIMARY KEY (element_id) + ) + + CREATE TABLE node ( + node_id SERIAL NOT NULL, + primary_element INTEGER, + PRIMARY KEY (node_id) + ) + + ALTER TABLE element ADD CONSTRAINT fk_element_parent_node_id + FOREIGN KEY(parent_node_id) REFERENCES node (node_id) + ALTER TABLE node ADD FOREIGN KEY(primary_element) + REFERENCES element (element_id) + {stop} + +In order to emit DROP for these tables, the same logic applies, however +note here that in SQL, to emit DROP CONSTRAINT requires that the constraint +has a name. In the case of the ``'node'`` table above, we haven't named +this constraint; the system will therefore attempt to emit DROP for only +those constraints that are named: + +.. sourcecode:: pycon+sql + + >>> with engine.connect() as conn: + ... metadata.drop_all(conn, checkfirst=False) + {opensql}ALTER TABLE element DROP CONSTRAINT fk_element_parent_node_id + DROP TABLE node + DROP TABLE element + {stop} + + +In the case where the cycle cannot be resolved, such as if we hadn't applied +a name to either constraint here, we will receive the following error:: + + sqlalchemy.exc.CircularDependencyError: Can't sort tables for DROP; + an unresolvable foreign key dependency exists between tables: + element, node. Please ensure that the ForeignKey and ForeignKeyConstraint + objects involved in the cycle have names so that they can be dropped + using DROP CONSTRAINT. + +This error only applies to the DROP case as we can emit "ADD CONSTRAINT" +in the CREATE case without a name; the database typically assigns one +automatically. + +The :paramref:`.ForeignKeyConstraint.use_alter` and +:paramref:`.ForeignKey.use_alter` keyword arguments can be used +to manually resolve dependency cycles. We can add this flag only to +the ``'element'`` table as follows:: + + element = Table( + 'element', metadata, + Column('element_id', Integer, primary_key=True), + Column('parent_node_id', Integer), + ForeignKeyConstraint( + ['parent_node_id'], ['node.node_id'], + use_alter=True, name='fk_element_parent_node_id' + ) + ) + +in our CREATE DDL we will see the ALTER statement only for this constraint, +and not the other one: + +.. sourcecode:: pycon+sql + + >>> with engine.connect() as conn: + ... metadata.create_all(conn, checkfirst=False) + {opensql}CREATE TABLE element ( + element_id SERIAL NOT NULL, + parent_node_id INTEGER, + PRIMARY KEY (element_id) + ) + + CREATE TABLE node ( + node_id SERIAL NOT NULL, + primary_element INTEGER, + PRIMARY KEY (node_id), + FOREIGN KEY(primary_element) REFERENCES element (element_id) + ) + + ALTER TABLE element ADD CONSTRAINT fk_element_parent_node_id + FOREIGN KEY(parent_node_id) REFERENCES node (node_id) + {stop} + +:paramref:`.ForeignKeyConstraint.use_alter` and +:paramref:`.ForeignKey.use_alter`, when used in conjunction with a drop +operation, will require that the constraint is named, else an error +like the following is generated:: + + sqlalchemy.exc.CompileError: Can't emit DROP CONSTRAINT for constraint + ForeignKeyConstraint(...); it has no name + +.. versionchanged:: 1.0.0 - The DDL system invoked by + :meth:`.MetaData.create_all` + and :meth:`.MetaData.drop_all` will now automatically resolve mutually + depdendent foreign keys between tables declared by + :class:`.ForeignKeyConstraint` and :class:`.ForeignKey` objects, without + the need to explicitly set the :paramref:`.ForeignKeyConstraint.use_alter` + flag. + +.. versionchanged:: 1.0.0 - The :paramref:`.ForeignKeyConstraint.use_alter` + flag can be used with an un-named constraint; only the DROP operation + will emit a specific error when actually called upon. + +.. seealso:: + + :ref:`constraint_naming_conventions` + + :func:`.sort_tables_and_constraints` + .. _on_update_on_delete: ON UPDATE and ON DELETE @@ -439,14 +574,10 @@ Constraints API :members: :inherited-members: -.. autoclass:: ColumnCollectionConstraint - :members: - .. autoclass:: ForeignKey :members: :inherited-members: - .. autoclass:: ForeignKeyConstraint :members: :inherited-members: diff --git a/doc/build/core/custom_types.rst b/doc/build/core/custom_types.rst new file mode 100644 index 000000000..8d0c42703 --- /dev/null +++ b/doc/build/core/custom_types.rst @@ -0,0 +1,500 @@ +.. module:: sqlalchemy.types + +.. _types_custom: + +Custom Types +------------ + +A variety of methods exist to redefine the behavior of existing types +as well as to provide new ones. + +Overriding Type Compilation +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A frequent need is to force the "string" version of a type, that is +the one rendered in a CREATE TABLE statement or other SQL function +like CAST, to be changed. For example, an application may want +to force the rendering of ``BINARY`` for all platforms +except for one, in which is wants ``BLOB`` to be rendered. Usage +of an existing generic type, in this case :class:`.LargeBinary`, is +preferred for most use cases. But to control +types more accurately, a compilation directive that is per-dialect +can be associated with any type:: + + from sqlalchemy.ext.compiler import compiles + from sqlalchemy.types import BINARY + + @compiles(BINARY, "sqlite") + def compile_binary_sqlite(type_, compiler, **kw): + return "BLOB" + +The above code allows the usage of :class:`.types.BINARY`, which +will produce the string ``BINARY`` against all backends except SQLite, +in which case it will produce ``BLOB``. + +See the section :ref:`type_compilation_extension`, a subsection of +:ref:`sqlalchemy.ext.compiler_toplevel`, for additional examples. + +.. _types_typedecorator: + +Augmenting Existing Types +~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :class:`.TypeDecorator` allows the creation of custom types which +add bind-parameter and result-processing behavior to an existing +type object. It is used when additional in-Python marshaling of data +to and from the database is required. + +.. note:: + + The bind- and result-processing of :class:`.TypeDecorator` + is *in addition* to the processing already performed by the hosted + type, which is customized by SQLAlchemy on a per-DBAPI basis to perform + processing specific to that DBAPI. To change the DBAPI-level processing + for an existing type, see the section :ref:`replacing_processors`. + +.. autoclass:: TypeDecorator + :members: + :inherited-members: + + +TypeDecorator Recipes +~~~~~~~~~~~~~~~~~~~~~ +A few key :class:`.TypeDecorator` recipes follow. + +.. _coerce_to_unicode: + +Coercing Encoded Strings to Unicode +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A common source of confusion regarding the :class:`.Unicode` type +is that it is intended to deal *only* with Python ``unicode`` objects +on the Python side, meaning values passed to it as bind parameters +must be of the form ``u'some string'`` if using Python 2 and not 3. +The encoding/decoding functions it performs are only to suit what the +DBAPI in use requires, and are primarily a private implementation detail. + +The use case of a type that can safely receive Python bytestrings, +that is strings that contain non-ASCII characters and are not ``u''`` +objects in Python 2, can be achieved using a :class:`.TypeDecorator` +which coerces as needed:: + + from sqlalchemy.types import TypeDecorator, Unicode + + class CoerceUTF8(TypeDecorator): + """Safely coerce Python bytestrings to Unicode + before passing off to the database.""" + + impl = Unicode + + def process_bind_param(self, value, dialect): + if isinstance(value, str): + value = value.decode('utf-8') + return value + +Rounding Numerics +^^^^^^^^^^^^^^^^^ + +Some database connectors like those of SQL Server choke if a Decimal is passed with too +many decimal places. Here's a recipe that rounds them down:: + + from sqlalchemy.types import TypeDecorator, Numeric + from decimal import Decimal + + class SafeNumeric(TypeDecorator): + """Adds quantization to Numeric.""" + + impl = Numeric + + def __init__(self, *arg, **kw): + TypeDecorator.__init__(self, *arg, **kw) + self.quantize_int = -(self.impl.precision - self.impl.scale) + self.quantize = Decimal(10) ** self.quantize_int + + def process_bind_param(self, value, dialect): + if isinstance(value, Decimal) and \ + value.as_tuple()[2] < self.quantize_int: + value = value.quantize(self.quantize) + return value + +.. _custom_guid_type: + +Backend-agnostic GUID Type +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Receives and returns Python uuid() objects. Uses the PG UUID type +when using Postgresql, CHAR(32) on other backends, storing them +in stringified hex format. Can be modified to store +binary in CHAR(16) if desired:: + + from sqlalchemy.types import TypeDecorator, CHAR + from sqlalchemy.dialects.postgresql import UUID + import uuid + + class GUID(TypeDecorator): + """Platform-independent GUID type. + + Uses Postgresql's UUID type, otherwise uses + CHAR(32), storing as stringified hex values. + + """ + impl = CHAR + + def load_dialect_impl(self, dialect): + if dialect.name == 'postgresql': + return dialect.type_descriptor(UUID()) + else: + return dialect.type_descriptor(CHAR(32)) + + def process_bind_param(self, value, dialect): + if value is None: + return value + elif dialect.name == 'postgresql': + return str(value) + else: + if not isinstance(value, uuid.UUID): + return "%.32x" % uuid.UUID(value) + else: + # hexstring + return "%.32x" % value + + def process_result_value(self, value, dialect): + if value is None: + return value + else: + return uuid.UUID(value) + +Marshal JSON Strings +^^^^^^^^^^^^^^^^^^^^^ + +This type uses ``simplejson`` to marshal Python data structures +to/from JSON. Can be modified to use Python's builtin json encoder:: + + from sqlalchemy.types import TypeDecorator, VARCHAR + import json + + class JSONEncodedDict(TypeDecorator): + """Represents an immutable structure as a json-encoded string. + + Usage:: + + JSONEncodedDict(255) + + """ + + impl = VARCHAR + + def process_bind_param(self, value, dialect): + if value is not None: + value = json.dumps(value) + + return value + + def process_result_value(self, value, dialect): + if value is not None: + value = json.loads(value) + return value + +Note that the ORM by default will not detect "mutability" on such a type - +meaning, in-place changes to values will not be detected and will not be +flushed. Without further steps, you instead would need to replace the existing +value with a new one on each parent object to detect changes. Note that +there's nothing wrong with this, as many applications may not require that the +values are ever mutated once created. For those which do have this requirement, +support for mutability is best applied using the ``sqlalchemy.ext.mutable`` +extension - see the example in :ref:`mutable_toplevel`. + +.. _replacing_processors: + +Replacing the Bind/Result Processing of Existing Types +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Most augmentation of type behavior at the bind/result level +is achieved using :class:`.TypeDecorator`. For the rare scenario +where the specific processing applied by SQLAlchemy at the DBAPI +level needs to be replaced, the SQLAlchemy type can be subclassed +directly, and the ``bind_processor()`` or ``result_processor()`` +methods can be overridden. Doing so requires that the +``adapt()`` method also be overridden. This method is the mechanism +by which SQLAlchemy produces DBAPI-specific type behavior during +statement execution. Overriding it allows a copy of the custom +type to be used in lieu of a DBAPI-specific type. Below we subclass +the :class:`.types.TIME` type to have custom result processing behavior. +The ``process()`` function will receive ``value`` from the DBAPI +cursor directly:: + + class MySpecialTime(TIME): + def __init__(self, special_argument): + super(MySpecialTime, self).__init__() + self.special_argument = special_argument + + def result_processor(self, dialect, coltype): + import datetime + time = datetime.time + def process(value): + if value is not None: + microseconds = value.microseconds + seconds = value.seconds + minutes = seconds / 60 + return time( + minutes / 60, + minutes % 60, + seconds - minutes * 60, + microseconds) + else: + return None + return process + + def adapt(self, impltype): + return MySpecialTime(self.special_argument) + +.. _types_sql_value_processing: + +Applying SQL-level Bind/Result Processing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +As seen in the sections :ref:`types_typedecorator` and :ref:`replacing_processors`, +SQLAlchemy allows Python functions to be invoked both when parameters are sent +to a statement, as well as when result rows are loaded from the database, to apply +transformations to the values as they are sent to or from the database. It is also +possible to define SQL-level transformations as well. The rationale here is when +only the relational database contains a particular series of functions that are necessary +to coerce incoming and outgoing data between an application and persistence format. +Examples include using database-defined encryption/decryption functions, as well +as stored procedures that handle geographic data. The Postgis extension to Postgresql +includes an extensive array of SQL functions that are necessary for coercing +data into particular formats. + +Any :class:`.TypeEngine`, :class:`.UserDefinedType` or :class:`.TypeDecorator` subclass +can include implementations of +:meth:`.TypeEngine.bind_expression` and/or :meth:`.TypeEngine.column_expression`, which +when defined to return a non-``None`` value should return a :class:`.ColumnElement` +expression to be injected into the SQL statement, either surrounding +bound parameters or a column expression. For example, to build a ``Geometry`` +type which will apply the Postgis function ``ST_GeomFromText`` to all outgoing +values and the function ``ST_AsText`` to all incoming data, we can create +our own subclass of :class:`.UserDefinedType` which provides these methods +in conjunction with :data:`~.sqlalchemy.sql.expression.func`:: + + from sqlalchemy import func + from sqlalchemy.types import UserDefinedType + + class Geometry(UserDefinedType): + def get_col_spec(self): + return "GEOMETRY" + + def bind_expression(self, bindvalue): + return func.ST_GeomFromText(bindvalue, type_=self) + + def column_expression(self, col): + return func.ST_AsText(col, type_=self) + +We can apply the ``Geometry`` type into :class:`.Table` metadata +and use it in a :func:`.select` construct:: + + geometry = Table('geometry', metadata, + Column('geom_id', Integer, primary_key=True), + Column('geom_data', Geometry) + ) + + print select([geometry]).where( + geometry.c.geom_data == 'LINESTRING(189412 252431,189631 259122)') + +The resulting SQL embeds both functions as appropriate. ``ST_AsText`` +is applied to the columns clause so that the return value is run through +the function before passing into a result set, and ``ST_GeomFromText`` +is run on the bound parameter so that the passed-in value is converted:: + + SELECT geometry.geom_id, ST_AsText(geometry.geom_data) AS geom_data_1 + FROM geometry + WHERE geometry.geom_data = ST_GeomFromText(:geom_data_2) + +The :meth:`.TypeEngine.column_expression` method interacts with the +mechanics of the compiler such that the SQL expression does not interfere +with the labeling of the wrapped expression. Such as, if we rendered +a :func:`.select` against a :func:`.label` of our expression, the string +label is moved to the outside of the wrapped expression:: + + print select([geometry.c.geom_data.label('my_data')]) + +Output:: + + SELECT ST_AsText(geometry.geom_data) AS my_data + FROM geometry + +For an example of subclassing a built in type directly, we subclass +:class:`.postgresql.BYTEA` to provide a ``PGPString``, which will make use of the +Postgresql ``pgcrypto`` extension to encrpyt/decrypt values +transparently:: + + from sqlalchemy import create_engine, String, select, func, \ + MetaData, Table, Column, type_coerce + + from sqlalchemy.dialects.postgresql import BYTEA + + class PGPString(BYTEA): + def __init__(self, passphrase, length=None): + super(PGPString, self).__init__(length) + self.passphrase = passphrase + + def bind_expression(self, bindvalue): + # convert the bind's type from PGPString to + # String, so that it's passed to psycopg2 as is without + # a dbapi.Binary wrapper + bindvalue = type_coerce(bindvalue, String) + return func.pgp_sym_encrypt(bindvalue, self.passphrase) + + def column_expression(self, col): + return func.pgp_sym_decrypt(col, self.passphrase) + + metadata = MetaData() + message = Table('message', metadata, + Column('username', String(50)), + Column('message', + PGPString("this is my passphrase", length=1000)), + ) + + engine = create_engine("postgresql://scott:tiger@localhost/test", echo=True) + with engine.begin() as conn: + metadata.create_all(conn) + + conn.execute(message.insert(), username="some user", + message="this is my message") + + print conn.scalar( + select([message.c.message]).\ + where(message.c.username == "some user") + ) + +The ``pgp_sym_encrypt`` and ``pgp_sym_decrypt`` functions are applied +to the INSERT and SELECT statements:: + + INSERT INTO message (username, message) + VALUES (%(username)s, pgp_sym_encrypt(%(message)s, %(pgp_sym_encrypt_1)s)) + {'username': 'some user', 'message': 'this is my message', + 'pgp_sym_encrypt_1': 'this is my passphrase'} + + SELECT pgp_sym_decrypt(message.message, %(pgp_sym_decrypt_1)s) AS message_1 + FROM message + WHERE message.username = %(username_1)s + {'pgp_sym_decrypt_1': 'this is my passphrase', 'username_1': 'some user'} + + +.. versionadded:: 0.8 Added the :meth:`.TypeEngine.bind_expression` and + :meth:`.TypeEngine.column_expression` methods. + +See also: + +:ref:`examples_postgis` + +.. _types_operators: + +Redefining and Creating New Operators +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +SQLAlchemy Core defines a fixed set of expression operators available to all column expressions. +Some of these operations have the effect of overloading Python's built in operators; +examples of such operators include +:meth:`.ColumnOperators.__eq__` (``table.c.somecolumn == 'foo'``), +:meth:`.ColumnOperators.__invert__` (``~table.c.flag``), +and :meth:`.ColumnOperators.__add__` (``table.c.x + table.c.y``). Other operators are exposed as +explicit methods on column expressions, such as +:meth:`.ColumnOperators.in_` (``table.c.value.in_(['x', 'y'])``) and :meth:`.ColumnOperators.like` +(``table.c.value.like('%ed%')``). + +The Core expression constructs in all cases consult the type of the expression in order to determine +the behavior of existing operators, as well as to locate additional operators that aren't part of +the built in set. The :class:`.TypeEngine` base class defines a root "comparison" implementation +:class:`.TypeEngine.Comparator`, and many specific types provide their own sub-implementations of this +class. User-defined :class:`.TypeEngine.Comparator` implementations can be built directly into a +simple subclass of a particular type in order to override or define new operations. Below, +we create a :class:`.Integer` subclass which overrides the :meth:`.ColumnOperators.__add__` operator:: + + from sqlalchemy import Integer + + class MyInt(Integer): + class comparator_factory(Integer.Comparator): + def __add__(self, other): + return self.op("goofy")(other) + +The above configuration creates a new class ``MyInt``, which +establishes the :attr:`.TypeEngine.comparator_factory` attribute as +referring to a new class, subclassing the :class:`.TypeEngine.Comparator` class +associated with the :class:`.Integer` type. + +Usage:: + + >>> sometable = Table("sometable", metadata, Column("data", MyInt)) + >>> print sometable.c.data + 5 + sometable.data goofy :data_1 + +The implementation for :meth:`.ColumnOperators.__add__` is consulted +by an owning SQL expression, by instantiating the :class:`.TypeEngine.Comparator` with +itself as the ``expr`` attribute. The mechanics of the expression +system are such that operations continue recursively until an +expression object produces a new SQL expression construct. Above, we +could just as well have said ``self.expr.op("goofy")(other)`` instead +of ``self.op("goofy")(other)``. + +New methods added to a :class:`.TypeEngine.Comparator` are exposed on an +owning SQL expression +using a ``__getattr__`` scheme, which exposes methods added to +:class:`.TypeEngine.Comparator` onto the owning :class:`.ColumnElement`. +For example, to add a ``log()`` function +to integers:: + + from sqlalchemy import Integer, func + + class MyInt(Integer): + class comparator_factory(Integer.Comparator): + def log(self, other): + return func.log(self.expr, other) + +Using the above type:: + + >>> print sometable.c.data.log(5) + log(:log_1, :log_2) + + +Unary operations +are also possible. For example, to add an implementation of the +Postgresql factorial operator, we combine the :class:`.UnaryExpression` construct +along with a :class:`.custom_op` to produce the factorial expression:: + + from sqlalchemy import Integer + from sqlalchemy.sql.expression import UnaryExpression + from sqlalchemy.sql import operators + + class MyInteger(Integer): + class comparator_factory(Integer.Comparator): + def factorial(self): + return UnaryExpression(self.expr, + modifier=operators.custom_op("!"), + type_=MyInteger) + +Using the above type:: + + >>> from sqlalchemy.sql import column + >>> print column('x', MyInteger).factorial() + x ! + +See also: + +:attr:`.TypeEngine.comparator_factory` + +.. versionadded:: 0.8 The expression system was enhanced to support + customization of operators on a per-type level. + + +Creating New Types +~~~~~~~~~~~~~~~~~~ + +The :class:`.UserDefinedType` class is provided as a simple base class +for defining entirely new database types. Use this to represent native +database types not known by SQLAlchemy. If only Python translation behavior +is needed, use :class:`.TypeDecorator` instead. + +.. autoclass:: UserDefinedType + :members: + + diff --git a/doc/build/core/ddl.rst b/doc/build/core/ddl.rst index cee6f876e..0ba2f2806 100644 --- a/doc/build/core/ddl.rst +++ b/doc/build/core/ddl.rst @@ -220,68 +220,72 @@ details. DDL Expression Constructs API ----------------------------- +.. autofunction:: sort_tables + +.. autofunction:: sort_tables_and_constraints + .. autoclass:: DDLElement :members: :undoc-members: - + .. autoclass:: DDL :members: :undoc-members: - + .. autoclass:: CreateTable :members: :undoc-members: - + .. autoclass:: DropTable :members: :undoc-members: - + .. autoclass:: CreateColumn :members: :undoc-members: - + .. autoclass:: CreateSequence :members: :undoc-members: - + .. autoclass:: DropSequence :members: :undoc-members: - + .. autoclass:: CreateIndex :members: :undoc-members: - + .. autoclass:: DropIndex :members: :undoc-members: - + .. autoclass:: AddConstraint :members: :undoc-members: - + .. autoclass:: DropConstraint :members: :undoc-members: - + .. autoclass:: CreateSchema :members: :undoc-members: - + .. autoclass:: DropSchema :members: :undoc-members: - + diff --git a/doc/build/core/engines_connections.rst b/doc/build/core/engines_connections.rst new file mode 100644 index 000000000..f163a7629 --- /dev/null +++ b/doc/build/core/engines_connections.rst @@ -0,0 +1,11 @@ +========================= +Engine and Connection Use +========================= + +.. toctree:: + :maxdepth: 2 + + engines + connections + pooling + events diff --git a/doc/build/core/exceptions.rst b/doc/build/core/exceptions.rst index 30270f8b0..63bbc1e15 100644 --- a/doc/build/core/exceptions.rst +++ b/doc/build/core/exceptions.rst @@ -2,4 +2,4 @@ Core Exceptions =============== .. automodule:: sqlalchemy.exc - :members:
\ No newline at end of file + :members: diff --git a/doc/build/core/expression_api.rst b/doc/build/core/expression_api.rst index 99bb98881..b32fa0e23 100644 --- a/doc/build/core/expression_api.rst +++ b/doc/build/core/expression_api.rst @@ -16,5 +16,5 @@ see :ref:`sqlexpression_toplevel`. selectable dml functions - types - + compiler + serializer diff --git a/doc/build/core/functions.rst b/doc/build/core/functions.rst index d284d125f..90164850d 100644 --- a/doc/build/core/functions.rst +++ b/doc/build/core/functions.rst @@ -22,6 +22,7 @@ return types are in use. .. automodule:: sqlalchemy.sql.functions :members: :undoc-members: - + :exclude-members: func + diff --git a/doc/build/core/index.rst b/doc/build/core/index.rst index 210f28412..26c26af07 100644 --- a/doc/build/core/index.rst +++ b/doc/build/core/index.rst @@ -9,19 +9,11 @@ In contrast to the ORM’s domain-centric mode of usage, the SQL Expression Language provides a schema-centric usage paradigm. .. toctree:: - :maxdepth: 3 + :maxdepth: 2 tutorial expression_api schema - engines - connections - pooling - event - events - compiler - inspection - serializer - interfaces - exceptions - internals + types + engines_connections + api_basics diff --git a/doc/build/core/internals.rst b/doc/build/core/internals.rst index 1a85e9e6c..81b4f1a81 100644 --- a/doc/build/core/internals.rst +++ b/doc/build/core/internals.rst @@ -7,6 +7,9 @@ Some key internal constructs are listed here. .. currentmodule: sqlalchemy +.. autoclass:: sqlalchemy.schema.ColumnCollectionMixin + :members: + .. autoclass:: sqlalchemy.engine.interfaces.Compiled :members: @@ -29,6 +32,10 @@ Some key internal constructs are listed here. :members: +.. autoclass:: sqlalchemy.log.Identified + :members: + + .. autoclass:: sqlalchemy.sql.compiler.IdentifierPreparer :members: diff --git a/doc/build/core/metadata.rst b/doc/build/core/metadata.rst index d6fc8c6af..e46217c17 100644 --- a/doc/build/core/metadata.rst +++ b/doc/build/core/metadata.rst @@ -316,6 +316,7 @@ Column, Table, MetaData API .. autoclass:: SchemaItem :members: + :undoc-members: .. autoclass:: Table :members: diff --git a/doc/build/core/schema.rst b/doc/build/core/schema.rst index aeb04be18..8553ebcbf 100644 --- a/doc/build/core/schema.rst +++ b/doc/build/core/schema.rst @@ -33,7 +33,7 @@ real DDL. They are therefore most intuitive to those who have some background in creating real schema generation scripts. .. toctree:: - :maxdepth: 1 + :maxdepth: 2 metadata reflection @@ -41,5 +41,3 @@ in creating real schema generation scripts. constraints ddl - - diff --git a/doc/build/core/selectable.rst b/doc/build/core/selectable.rst index 52acb28e5..03ebeb4ab 100644 --- a/doc/build/core/selectable.rst +++ b/doc/build/core/selectable.rst @@ -60,6 +60,9 @@ elements are themselves :class:`.ColumnElement` subclasses). .. autoclass:: HasPrefixes :members: +.. autoclass:: HasSuffixes + :members: + .. autoclass:: Join :members: :inherited-members: diff --git a/doc/build/core/sqla_engine_arch.png b/doc/build/core/sqla_engine_arch.png Binary files differindex f54d105bd..f040a2cf3 100644 --- a/doc/build/core/sqla_engine_arch.png +++ b/doc/build/core/sqla_engine_arch.png diff --git a/doc/build/core/tutorial.rst b/doc/build/core/tutorial.rst index 04a25b174..e96217f79 100644 --- a/doc/build/core/tutorial.rst +++ b/doc/build/core/tutorial.rst @@ -307,6 +307,8 @@ them is different across different databases; each database's determine the correct value (or values; note that ``inserted_primary_key`` returns a list so that it supports composite primary keys). +.. _execute_multiple: + Executing Multiple Statements ============================== @@ -368,7 +370,7 @@ Selecting ========== We began with inserts just so that our test database had some data in it. The -more interesting part of the data is selecting it ! We'll cover UPDATE and +more interesting part of the data is selecting it! We'll cover UPDATE and DELETE statements later. The primary construct used to generate SELECT statements is the :func:`.select` function: diff --git a/doc/build/core/type_api.rst b/doc/build/core/type_api.rst new file mode 100644 index 000000000..88da4939e --- /dev/null +++ b/doc/build/core/type_api.rst @@ -0,0 +1,22 @@ +.. module:: sqlalchemy.types + +.. _types_api: + +Base Type API +-------------- + +.. autoclass:: TypeEngine + :members: + + +.. autoclass:: Concatenable + :members: + :inherited-members: + + +.. autoclass:: NullType + + +.. autoclass:: Variant + + :members: with_variant, __init__ diff --git a/doc/build/core/type_basics.rst b/doc/build/core/type_basics.rst new file mode 100644 index 000000000..1ff1baac2 --- /dev/null +++ b/doc/build/core/type_basics.rst @@ -0,0 +1,229 @@ +Column and Data Types +===================== + +.. module:: sqlalchemy.types + +SQLAlchemy provides abstractions for most common database data types, +and a mechanism for specifying your own custom data types. + +The methods and attributes of type objects are rarely used directly. +Type objects are supplied to :class:`~sqlalchemy.schema.Table` definitions +and can be supplied as type hints to `functions` for occasions where +the database driver returns an incorrect type. + +.. code-block:: pycon + + >>> users = Table('users', metadata, + ... Column('id', Integer, primary_key=True) + ... Column('login', String(32)) + ... ) + + +SQLAlchemy will use the ``Integer`` and ``String(32)`` type +information when issuing a ``CREATE TABLE`` statement and will use it +again when reading back rows ``SELECTed`` from the database. +Functions that accept a type (such as :func:`~sqlalchemy.schema.Column`) will +typically accept a type class or instance; ``Integer`` is equivalent +to ``Integer()`` with no construction arguments in this case. + +.. _types_generic: + +Generic Types +------------- + +Generic types specify a column that can read, write and store a +particular type of Python data. SQLAlchemy will choose the best +database column type available on the target database when issuing a +``CREATE TABLE`` statement. For complete control over which column +type is emitted in ``CREATE TABLE``, such as ``VARCHAR`` see `SQL +Standard Types`_ and the other sections of this chapter. + +.. autoclass:: BigInteger + :members: + +.. autoclass:: Boolean + :members: + +.. autoclass:: Date + :members: + +.. autoclass:: DateTime + :members: + +.. autoclass:: Enum + :members: __init__, create, drop + +.. autoclass:: Float + :members: + +.. autoclass:: Integer + :members: + +.. autoclass:: Interval + :members: + +.. autoclass:: LargeBinary + :members: + +.. autoclass:: MatchType + :members: + +.. autoclass:: Numeric + :members: + +.. autoclass:: PickleType + :members: + +.. autoclass:: SchemaType + :members: + :undoc-members: + +.. autoclass:: SmallInteger + :members: + +.. autoclass:: String + :members: + +.. autoclass:: Text + :members: + +.. autoclass:: Time + :members: + +.. autoclass:: Unicode + :members: + +.. autoclass:: UnicodeText + :members: + +.. _types_sqlstandard: + +SQL Standard Types +------------------ + +The SQL standard types always create database column types of the same +name when ``CREATE TABLE`` is issued. Some types may not be supported +on all databases. + +.. autoclass:: BIGINT + + +.. autoclass:: BINARY + + +.. autoclass:: BLOB + + +.. autoclass:: BOOLEAN + + +.. autoclass:: CHAR + + +.. autoclass:: CLOB + + +.. autoclass:: DATE + + +.. autoclass:: DATETIME + + +.. autoclass:: DECIMAL + + +.. autoclass:: FLOAT + + +.. autoclass:: INT + + +.. autoclass:: sqlalchemy.types.INTEGER + + +.. autoclass:: NCHAR + + +.. autoclass:: NVARCHAR + + +.. autoclass:: NUMERIC + + +.. autoclass:: REAL + + +.. autoclass:: SMALLINT + + +.. autoclass:: TEXT + + +.. autoclass:: TIME + + +.. autoclass:: TIMESTAMP + + +.. autoclass:: VARBINARY + + +.. autoclass:: VARCHAR + + +.. _types_vendor: + +Vendor-Specific Types +--------------------- + +Database-specific types are also available for import from each +database's dialect module. See the :ref:`dialect_toplevel` +reference for the database you're interested in. + +For example, MySQL has a ``BIGINT`` type and PostgreSQL has an +``INET`` type. To use these, import them from the module explicitly:: + + from sqlalchemy.dialects import mysql + + table = Table('foo', metadata, + Column('id', mysql.BIGINT), + Column('enumerates', mysql.ENUM('a', 'b', 'c')) + ) + +Or some PostgreSQL types:: + + from sqlalchemy.dialects import postgresql + + table = Table('foo', metadata, + Column('ipaddress', postgresql.INET), + Column('elements', postgresql.ARRAY(String)) + ) + +Each dialect provides the full set of typenames supported by +that backend within its `__all__` collection, so that a simple +`import *` or similar will import all supported types as +implemented for that backend:: + + from sqlalchemy.dialects.postgresql import * + + t = Table('mytable', metadata, + Column('id', INTEGER, primary_key=True), + Column('name', VARCHAR(300)), + Column('inetaddr', INET) + ) + +Where above, the INTEGER and VARCHAR types are ultimately from +sqlalchemy.types, and INET is specific to the Postgresql dialect. + +Some dialect level types have the same name as the SQL standard type, +but also provide additional arguments. For example, MySQL implements +the full range of character and string types including additional arguments +such as `collation` and `charset`:: + + from sqlalchemy.dialects.mysql import VARCHAR, TEXT + + table = Table('foo', meta, + Column('col1', VARCHAR(200, collation='binary')), + Column('col2', TEXT(charset='latin1')) + ) + diff --git a/doc/build/core/types.rst b/doc/build/core/types.rst index 14e30e46d..ab761a1cb 100644 --- a/doc/build/core/types.rst +++ b/doc/build/core/types.rst @@ -3,744 +3,9 @@ Column and Data Types ===================== -.. module:: sqlalchemy.types +.. toctree:: + :maxdepth: 2 -SQLAlchemy provides abstractions for most common database data types, -and a mechanism for specifying your own custom data types. - -The methods and attributes of type objects are rarely used directly. -Type objects are supplied to :class:`~sqlalchemy.schema.Table` definitions -and can be supplied as type hints to `functions` for occasions where -the database driver returns an incorrect type. - -.. code-block:: pycon - - >>> users = Table('users', metadata, - ... Column('id', Integer, primary_key=True) - ... Column('login', String(32)) - ... ) - - -SQLAlchemy will use the ``Integer`` and ``String(32)`` type -information when issuing a ``CREATE TABLE`` statement and will use it -again when reading back rows ``SELECTed`` from the database. -Functions that accept a type (such as :func:`~sqlalchemy.schema.Column`) will -typically accept a type class or instance; ``Integer`` is equivalent -to ``Integer()`` with no construction arguments in this case. - -.. _types_generic: - -Generic Types -------------- - -Generic types specify a column that can read, write and store a -particular type of Python data. SQLAlchemy will choose the best -database column type available on the target database when issuing a -``CREATE TABLE`` statement. For complete control over which column -type is emitted in ``CREATE TABLE``, such as ``VARCHAR`` see `SQL -Standard Types`_ and the other sections of this chapter. - -.. autoclass:: BigInteger - :members: - -.. autoclass:: Boolean - :members: - -.. autoclass:: Date - :members: - -.. autoclass:: DateTime - :members: - -.. autoclass:: Enum - :members: __init__, create, drop - -.. autoclass:: Float - :members: - -.. autoclass:: Integer - :members: - -.. autoclass:: Interval - :members: - -.. autoclass:: LargeBinary - :members: - -.. autoclass:: Numeric - :members: - -.. autoclass:: PickleType - :members: - -.. autoclass:: SchemaType - :members: - :undoc-members: - -.. autoclass:: SmallInteger - :members: - -.. autoclass:: String - :members: - -.. autoclass:: Text - :members: - -.. autoclass:: Time - :members: - -.. autoclass:: Unicode - :members: - -.. autoclass:: UnicodeText - :members: - -.. _types_sqlstandard: - -SQL Standard Types ------------------- - -The SQL standard types always create database column types of the same -name when ``CREATE TABLE`` is issued. Some types may not be supported -on all databases. - -.. autoclass:: BIGINT - - -.. autoclass:: BINARY - - -.. autoclass:: BLOB - - -.. autoclass:: BOOLEAN - - -.. autoclass:: CHAR - - -.. autoclass:: CLOB - - -.. autoclass:: DATE - - -.. autoclass:: DATETIME - - -.. autoclass:: DECIMAL - - -.. autoclass:: FLOAT - - -.. autoclass:: INT - - -.. autoclass:: sqlalchemy.types.INTEGER - - -.. autoclass:: NCHAR - - -.. autoclass:: NVARCHAR - - -.. autoclass:: NUMERIC - - -.. autoclass:: REAL - - -.. autoclass:: SMALLINT - - -.. autoclass:: TEXT - - -.. autoclass:: TIME - - -.. autoclass:: TIMESTAMP - - -.. autoclass:: VARBINARY - - -.. autoclass:: VARCHAR - - -.. _types_vendor: - -Vendor-Specific Types ---------------------- - -Database-specific types are also available for import from each -database's dialect module. See the :ref:`dialect_toplevel` -reference for the database you're interested in. - -For example, MySQL has a ``BIGINT`` type and PostgreSQL has an -``INET`` type. To use these, import them from the module explicitly:: - - from sqlalchemy.dialects import mysql - - table = Table('foo', metadata, - Column('id', mysql.BIGINT), - Column('enumerates', mysql.ENUM('a', 'b', 'c')) - ) - -Or some PostgreSQL types:: - - from sqlalchemy.dialects import postgresql - - table = Table('foo', metadata, - Column('ipaddress', postgresql.INET), - Column('elements', postgresql.ARRAY(String)) - ) - -Each dialect provides the full set of typenames supported by -that backend within its `__all__` collection, so that a simple -`import *` or similar will import all supported types as -implemented for that backend:: - - from sqlalchemy.dialects.postgresql import * - - t = Table('mytable', metadata, - Column('id', INTEGER, primary_key=True), - Column('name', VARCHAR(300)), - Column('inetaddr', INET) - ) - -Where above, the INTEGER and VARCHAR types are ultimately from -sqlalchemy.types, and INET is specific to the Postgresql dialect. - -Some dialect level types have the same name as the SQL standard type, -but also provide additional arguments. For example, MySQL implements -the full range of character and string types including additional arguments -such as `collation` and `charset`:: - - from sqlalchemy.dialects.mysql import VARCHAR, TEXT - - table = Table('foo', meta, - Column('col1', VARCHAR(200, collation='binary')), - Column('col2', TEXT(charset='latin1')) - ) - -.. _types_custom: - -Custom Types ------------- - -A variety of methods exist to redefine the behavior of existing types -as well as to provide new ones. - -Overriding Type Compilation -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -A frequent need is to force the "string" version of a type, that is -the one rendered in a CREATE TABLE statement or other SQL function -like CAST, to be changed. For example, an application may want -to force the rendering of ``BINARY`` for all platforms -except for one, in which is wants ``BLOB`` to be rendered. Usage -of an existing generic type, in this case :class:`.LargeBinary`, is -preferred for most use cases. But to control -types more accurately, a compilation directive that is per-dialect -can be associated with any type:: - - from sqlalchemy.ext.compiler import compiles - from sqlalchemy.types import BINARY - - @compiles(BINARY, "sqlite") - def compile_binary_sqlite(type_, compiler, **kw): - return "BLOB" - -The above code allows the usage of :class:`.types.BINARY`, which -will produce the string ``BINARY`` against all backends except SQLite, -in which case it will produce ``BLOB``. - -See the section :ref:`type_compilation_extension`, a subsection of -:ref:`sqlalchemy.ext.compiler_toplevel`, for additional examples. - -.. _types_typedecorator: - -Augmenting Existing Types -~~~~~~~~~~~~~~~~~~~~~~~~~ - -The :class:`.TypeDecorator` allows the creation of custom types which -add bind-parameter and result-processing behavior to an existing -type object. It is used when additional in-Python marshaling of data -to and from the database is required. - -.. note:: - - The bind- and result-processing of :class:`.TypeDecorator` - is *in addition* to the processing already performed by the hosted - type, which is customized by SQLAlchemy on a per-DBAPI basis to perform - processing specific to that DBAPI. To change the DBAPI-level processing - for an existing type, see the section :ref:`replacing_processors`. - -.. autoclass:: TypeDecorator - :members: - :inherited-members: - - -TypeDecorator Recipes -~~~~~~~~~~~~~~~~~~~~~ -A few key :class:`.TypeDecorator` recipes follow. - -.. _coerce_to_unicode: - -Coercing Encoded Strings to Unicode -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -A common source of confusion regarding the :class:`.Unicode` type -is that it is intended to deal *only* with Python ``unicode`` objects -on the Python side, meaning values passed to it as bind parameters -must be of the form ``u'some string'`` if using Python 2 and not 3. -The encoding/decoding functions it performs are only to suit what the -DBAPI in use requires, and are primarily a private implementation detail. - -The use case of a type that can safely receive Python bytestrings, -that is strings that contain non-ASCII characters and are not ``u''`` -objects in Python 2, can be achieved using a :class:`.TypeDecorator` -which coerces as needed:: - - from sqlalchemy.types import TypeDecorator, Unicode - - class CoerceUTF8(TypeDecorator): - """Safely coerce Python bytestrings to Unicode - before passing off to the database.""" - - impl = Unicode - - def process_bind_param(self, value, dialect): - if isinstance(value, str): - value = value.decode('utf-8') - return value - -Rounding Numerics -^^^^^^^^^^^^^^^^^ - -Some database connectors like those of SQL Server choke if a Decimal is passed with too -many decimal places. Here's a recipe that rounds them down:: - - from sqlalchemy.types import TypeDecorator, Numeric - from decimal import Decimal - - class SafeNumeric(TypeDecorator): - """Adds quantization to Numeric.""" - - impl = Numeric - - def __init__(self, *arg, **kw): - TypeDecorator.__init__(self, *arg, **kw) - self.quantize_int = -(self.impl.precision - self.impl.scale) - self.quantize = Decimal(10) ** self.quantize_int - - def process_bind_param(self, value, dialect): - if isinstance(value, Decimal) and \ - value.as_tuple()[2] < self.quantize_int: - value = value.quantize(self.quantize) - return value - -.. _custom_guid_type: - -Backend-agnostic GUID Type -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Receives and returns Python uuid() objects. Uses the PG UUID type -when using Postgresql, CHAR(32) on other backends, storing them -in stringified hex format. Can be modified to store -binary in CHAR(16) if desired:: - - from sqlalchemy.types import TypeDecorator, CHAR - from sqlalchemy.dialects.postgresql import UUID - import uuid - - class GUID(TypeDecorator): - """Platform-independent GUID type. - - Uses Postgresql's UUID type, otherwise uses - CHAR(32), storing as stringified hex values. - - """ - impl = CHAR - - def load_dialect_impl(self, dialect): - if dialect.name == 'postgresql': - return dialect.type_descriptor(UUID()) - else: - return dialect.type_descriptor(CHAR(32)) - - def process_bind_param(self, value, dialect): - if value is None: - return value - elif dialect.name == 'postgresql': - return str(value) - else: - if not isinstance(value, uuid.UUID): - return "%.32x" % uuid.UUID(value) - else: - # hexstring - return "%.32x" % value - - def process_result_value(self, value, dialect): - if value is None: - return value - else: - return uuid.UUID(value) - -Marshal JSON Strings -^^^^^^^^^^^^^^^^^^^^^ - -This type uses ``simplejson`` to marshal Python data structures -to/from JSON. Can be modified to use Python's builtin json encoder:: - - from sqlalchemy.types import TypeDecorator, VARCHAR - import json - - class JSONEncodedDict(TypeDecorator): - """Represents an immutable structure as a json-encoded string. - - Usage:: - - JSONEncodedDict(255) - - """ - - impl = VARCHAR - - def process_bind_param(self, value, dialect): - if value is not None: - value = json.dumps(value) - - return value - - def process_result_value(self, value, dialect): - if value is not None: - value = json.loads(value) - return value - -Note that the ORM by default will not detect "mutability" on such a type - -meaning, in-place changes to values will not be detected and will not be -flushed. Without further steps, you instead would need to replace the existing -value with a new one on each parent object to detect changes. Note that -there's nothing wrong with this, as many applications may not require that the -values are ever mutated once created. For those which do have this requirement, -support for mutability is best applied using the ``sqlalchemy.ext.mutable`` -extension - see the example in :ref:`mutable_toplevel`. - -.. _replacing_processors: - -Replacing the Bind/Result Processing of Existing Types -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Most augmentation of type behavior at the bind/result level -is achieved using :class:`.TypeDecorator`. For the rare scenario -where the specific processing applied by SQLAlchemy at the DBAPI -level needs to be replaced, the SQLAlchemy type can be subclassed -directly, and the ``bind_processor()`` or ``result_processor()`` -methods can be overridden. Doing so requires that the -``adapt()`` method also be overridden. This method is the mechanism -by which SQLAlchemy produces DBAPI-specific type behavior during -statement execution. Overriding it allows a copy of the custom -type to be used in lieu of a DBAPI-specific type. Below we subclass -the :class:`.types.TIME` type to have custom result processing behavior. -The ``process()`` function will receive ``value`` from the DBAPI -cursor directly:: - - class MySpecialTime(TIME): - def __init__(self, special_argument): - super(MySpecialTime, self).__init__() - self.special_argument = special_argument - - def result_processor(self, dialect, coltype): - import datetime - time = datetime.time - def process(value): - if value is not None: - microseconds = value.microseconds - seconds = value.seconds - minutes = seconds / 60 - return time( - minutes / 60, - minutes % 60, - seconds - minutes * 60, - microseconds) - else: - return None - return process - - def adapt(self, impltype): - return MySpecialTime(self.special_argument) - -.. _types_sql_value_processing: - -Applying SQL-level Bind/Result Processing -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -As seen in the sections :ref:`types_typedecorator` and :ref:`replacing_processors`, -SQLAlchemy allows Python functions to be invoked both when parameters are sent -to a statement, as well as when result rows are loaded from the database, to apply -transformations to the values as they are sent to or from the database. It is also -possible to define SQL-level transformations as well. The rationale here is when -only the relational database contains a particular series of functions that are necessary -to coerce incoming and outgoing data between an application and persistence format. -Examples include using database-defined encryption/decryption functions, as well -as stored procedures that handle geographic data. The Postgis extension to Postgresql -includes an extensive array of SQL functions that are necessary for coercing -data into particular formats. - -Any :class:`.TypeEngine`, :class:`.UserDefinedType` or :class:`.TypeDecorator` subclass -can include implementations of -:meth:`.TypeEngine.bind_expression` and/or :meth:`.TypeEngine.column_expression`, which -when defined to return a non-``None`` value should return a :class:`.ColumnElement` -expression to be injected into the SQL statement, either surrounding -bound parameters or a column expression. For example, to build a ``Geometry`` -type which will apply the Postgis function ``ST_GeomFromText`` to all outgoing -values and the function ``ST_AsText`` to all incoming data, we can create -our own subclass of :class:`.UserDefinedType` which provides these methods -in conjunction with :data:`~.sqlalchemy.sql.expression.func`:: - - from sqlalchemy import func - from sqlalchemy.types import UserDefinedType - - class Geometry(UserDefinedType): - def get_col_spec(self): - return "GEOMETRY" - - def bind_expression(self, bindvalue): - return func.ST_GeomFromText(bindvalue, type_=self) - - def column_expression(self, col): - return func.ST_AsText(col, type_=self) - -We can apply the ``Geometry`` type into :class:`.Table` metadata -and use it in a :func:`.select` construct:: - - geometry = Table('geometry', metadata, - Column('geom_id', Integer, primary_key=True), - Column('geom_data', Geometry) - ) - - print select([geometry]).where( - geometry.c.geom_data == 'LINESTRING(189412 252431,189631 259122)') - -The resulting SQL embeds both functions as appropriate. ``ST_AsText`` -is applied to the columns clause so that the return value is run through -the function before passing into a result set, and ``ST_GeomFromText`` -is run on the bound parameter so that the passed-in value is converted:: - - SELECT geometry.geom_id, ST_AsText(geometry.geom_data) AS geom_data_1 - FROM geometry - WHERE geometry.geom_data = ST_GeomFromText(:geom_data_2) - -The :meth:`.TypeEngine.column_expression` method interacts with the -mechanics of the compiler such that the SQL expression does not interfere -with the labeling of the wrapped expression. Such as, if we rendered -a :func:`.select` against a :func:`.label` of our expression, the string -label is moved to the outside of the wrapped expression:: - - print select([geometry.c.geom_data.label('my_data')]) - -Output:: - - SELECT ST_AsText(geometry.geom_data) AS my_data - FROM geometry - -For an example of subclassing a built in type directly, we subclass -:class:`.postgresql.BYTEA` to provide a ``PGPString``, which will make use of the -Postgresql ``pgcrypto`` extension to encrpyt/decrypt values -transparently:: - - from sqlalchemy import create_engine, String, select, func, \ - MetaData, Table, Column, type_coerce - - from sqlalchemy.dialects.postgresql import BYTEA - - class PGPString(BYTEA): - def __init__(self, passphrase, length=None): - super(PGPString, self).__init__(length) - self.passphrase = passphrase - - def bind_expression(self, bindvalue): - # convert the bind's type from PGPString to - # String, so that it's passed to psycopg2 as is without - # a dbapi.Binary wrapper - bindvalue = type_coerce(bindvalue, String) - return func.pgp_sym_encrypt(bindvalue, self.passphrase) - - def column_expression(self, col): - return func.pgp_sym_decrypt(col, self.passphrase) - - metadata = MetaData() - message = Table('message', metadata, - Column('username', String(50)), - Column('message', - PGPString("this is my passphrase", length=1000)), - ) - - engine = create_engine("postgresql://scott:tiger@localhost/test", echo=True) - with engine.begin() as conn: - metadata.create_all(conn) - - conn.execute(message.insert(), username="some user", - message="this is my message") - - print conn.scalar( - select([message.c.message]).\ - where(message.c.username == "some user") - ) - -The ``pgp_sym_encrypt`` and ``pgp_sym_decrypt`` functions are applied -to the INSERT and SELECT statements:: - - INSERT INTO message (username, message) - VALUES (%(username)s, pgp_sym_encrypt(%(message)s, %(pgp_sym_encrypt_1)s)) - {'username': 'some user', 'message': 'this is my message', - 'pgp_sym_encrypt_1': 'this is my passphrase'} - - SELECT pgp_sym_decrypt(message.message, %(pgp_sym_decrypt_1)s) AS message_1 - FROM message - WHERE message.username = %(username_1)s - {'pgp_sym_decrypt_1': 'this is my passphrase', 'username_1': 'some user'} - - -.. versionadded:: 0.8 Added the :meth:`.TypeEngine.bind_expression` and - :meth:`.TypeEngine.column_expression` methods. - -See also: - -:ref:`examples_postgis` - -.. _types_operators: - -Redefining and Creating New Operators -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -SQLAlchemy Core defines a fixed set of expression operators available to all column expressions. -Some of these operations have the effect of overloading Python's built in operators; -examples of such operators include -:meth:`.ColumnOperators.__eq__` (``table.c.somecolumn == 'foo'``), -:meth:`.ColumnOperators.__invert__` (``~table.c.flag``), -and :meth:`.ColumnOperators.__add__` (``table.c.x + table.c.y``). Other operators are exposed as -explicit methods on column expressions, such as -:meth:`.ColumnOperators.in_` (``table.c.value.in_(['x', 'y'])``) and :meth:`.ColumnOperators.like` -(``table.c.value.like('%ed%')``). - -The Core expression constructs in all cases consult the type of the expression in order to determine -the behavior of existing operators, as well as to locate additional operators that aren't part of -the built in set. The :class:`.TypeEngine` base class defines a root "comparison" implementation -:class:`.TypeEngine.Comparator`, and many specific types provide their own sub-implementations of this -class. User-defined :class:`.TypeEngine.Comparator` implementations can be built directly into a -simple subclass of a particular type in order to override or define new operations. Below, -we create a :class:`.Integer` subclass which overrides the :meth:`.ColumnOperators.__add__` operator:: - - from sqlalchemy import Integer - - class MyInt(Integer): - class comparator_factory(Integer.Comparator): - def __add__(self, other): - return self.op("goofy")(other) - -The above configuration creates a new class ``MyInt``, which -establishes the :attr:`.TypeEngine.comparator_factory` attribute as -referring to a new class, subclassing the :class:`.TypeEngine.Comparator` class -associated with the :class:`.Integer` type. - -Usage:: - - >>> sometable = Table("sometable", metadata, Column("data", MyInt)) - >>> print sometable.c.data + 5 - sometable.data goofy :data_1 - -The implementation for :meth:`.ColumnOperators.__add__` is consulted -by an owning SQL expression, by instantiating the :class:`.TypeEngine.Comparator` with -itself as the ``expr`` attribute. The mechanics of the expression -system are such that operations continue recursively until an -expression object produces a new SQL expression construct. Above, we -could just as well have said ``self.expr.op("goofy")(other)`` instead -of ``self.op("goofy")(other)``. - -New methods added to a :class:`.TypeEngine.Comparator` are exposed on an -owning SQL expression -using a ``__getattr__`` scheme, which exposes methods added to -:class:`.TypeEngine.Comparator` onto the owning :class:`.ColumnElement`. -For example, to add a ``log()`` function -to integers:: - - from sqlalchemy import Integer, func - - class MyInt(Integer): - class comparator_factory(Integer.Comparator): - def log(self, other): - return func.log(self.expr, other) - -Using the above type:: - - >>> print sometable.c.data.log(5) - log(:log_1, :log_2) - - -Unary operations -are also possible. For example, to add an implementation of the -Postgresql factorial operator, we combine the :class:`.UnaryExpression` construct -along with a :class:`.custom_op` to produce the factorial expression:: - - from sqlalchemy import Integer - from sqlalchemy.sql.expression import UnaryExpression - from sqlalchemy.sql import operators - - class MyInteger(Integer): - class comparator_factory(Integer.Comparator): - def factorial(self): - return UnaryExpression(self.expr, - modifier=operators.custom_op("!"), - type_=MyInteger) - -Using the above type:: - - >>> from sqlalchemy.sql import column - >>> print column('x', MyInteger).factorial() - x ! - -See also: - -:attr:`.TypeEngine.comparator_factory` - -.. versionadded:: 0.8 The expression system was enhanced to support - customization of operators on a per-type level. - - -Creating New Types -~~~~~~~~~~~~~~~~~~ - -The :class:`.UserDefinedType` class is provided as a simple base class -for defining entirely new database types. Use this to represent native -database types not known by SQLAlchemy. If only Python translation behavior -is needed, use :class:`.TypeDecorator` instead. - -.. autoclass:: UserDefinedType - :members: - - -.. _types_api: - -Base Type API --------------- - -.. autoclass:: TypeEngine - :members: - - -.. autoclass:: Concatenable - :members: - :inherited-members: - - -.. autoclass:: NullType - - -.. autoclass:: Variant - - :members: with_variant, __init__ + type_basics + custom_types + type_api diff --git a/doc/build/corrections.py b/doc/build/corrections.py new file mode 100644 index 000000000..fa2e13a38 --- /dev/null +++ b/doc/build/corrections.py @@ -0,0 +1,39 @@ +targets = {} +quit = False +def missing_reference(app, env, node, contnode): + global quit + if quit: + return + reftarget = node.attributes['reftarget'] + reftype = node.attributes['reftype'] + refdoc = node.attributes['refdoc'] + rawsource = node.rawsource + if reftype == 'paramref': + return + + target = rawsource + if target in targets: + return + print "\n%s" % refdoc + print "Reftarget: %s" % rawsource + correction = raw_input("? ") + correction = correction.strip() + if correction == ".": + correction = ":%s:`.%s`" % (reftype, reftarget) + elif correction == 'q': + quit = True + else: + targets[target] = correction + +def write_corrections(app, exception): + print "#!/bin/sh\n\n" + for targ, corr in targets.items(): + if not corr: + continue + + print """find lib/ -print -type f -name "*.py" -exec sed -i '' 's/%s/%s/g' {} \;""" % (targ, corr) + print """find doc/build/ -print -type f -name "*.rst" -exec sed -i '' 's/%s/%s/g' {} \;""" % (targ, corr) + +def setup(app): + app.connect('missing-reference', missing_reference) + app.connect('build-finished', write_corrections) diff --git a/doc/build/dialects/postgresql.rst b/doc/build/dialects/postgresql.rst index e1a96493e..e5d8d51bc 100644 --- a/doc/build/dialects/postgresql.rst +++ b/doc/build/dialects/postgresql.rst @@ -188,17 +188,24 @@ psycopg2 .. automodule:: sqlalchemy.dialects.postgresql.psycopg2 +pg8000 +-------------- + +.. automodule:: sqlalchemy.dialects.postgresql.pg8000 + +psycopg2cffi +-------------- + +.. automodule:: sqlalchemy.dialects.postgresql.psycopg2cffi + py-postgresql -------------------- .. automodule:: sqlalchemy.dialects.postgresql.pypostgresql -pg8000 --------------- - -.. automodule:: sqlalchemy.dialects.postgresql.pg8000 zxjdbc -------------- .. automodule:: sqlalchemy.dialects.postgresql.zxjdbc + diff --git a/doc/build/dialects/sqlite.rst b/doc/build/dialects/sqlite.rst index a18b0ba7b..93a54ee8d 100644 --- a/doc/build/dialects/sqlite.rst +++ b/doc/build/dialects/sqlite.rst @@ -33,4 +33,4 @@ Pysqlite Pysqlcipher ----------- -.. automodule:: sqlalchemy.dialects.sqlite.pysqlcipher
\ No newline at end of file +.. automodule:: sqlalchemy.dialects.sqlite.pysqlcipher diff --git a/doc/build/faq.rst b/doc/build/faq.rst deleted file mode 100644 index 586f66754..000000000 --- a/doc/build/faq.rst +++ /dev/null @@ -1,1471 +0,0 @@ -:orphan: - -.. _faq_toplevel: - -============================ -Frequently Asked Questions -============================ - -.. contents:: - :local: - :class: faq - :backlinks: none - - -Connections / Engines -===================== - -How do I configure logging? ---------------------------- - -See :ref:`dbengine_logging`. - -How do I pool database connections? Are my connections pooled? ----------------------------------------------------------------- - -SQLAlchemy performs application-level connection pooling automatically -in most cases. With the exception of SQLite, a :class:`.Engine` object -refers to a :class:`.QueuePool` as a source of connectivity. - -For more detail, see :ref:`engines_toplevel` and :ref:`pooling_toplevel`. - -How do I pass custom connect arguments to my database API? ------------------------------------------------------------ - -The :func:`.create_engine` call accepts additional arguments either -directly via the ``connect_args`` keyword argument:: - - e = create_engine("mysql://scott:tiger@localhost/test", - connect_args={"encoding": "utf8"}) - -Or for basic string and integer arguments, they can usually be specified -in the query string of the URL:: - - e = create_engine("mysql://scott:tiger@localhost/test?encoding=utf8") - -.. seealso:: - - :ref:`custom_dbapi_args` - -"MySQL Server has gone away" ----------------------------- - -There are two major causes for this error: - -1. The MySQL client closes connections which have been idle for a set period -of time, defaulting to eight hours. This can be avoided by using the ``pool_recycle`` -setting with :func:`.create_engine`, described at :ref:`mysql_connection_timeouts`. - -2. Usage of the MySQLdb :term:`DBAPI`, or a similar DBAPI, in a non-threadsafe manner, or in an otherwise -inappropriate way. The MySQLdb connection object is not threadsafe - this expands -out to any SQLAlchemy system that links to a single connection, which includes the ORM -:class:`.Session`. For background -on how :class:`.Session` should be used in a multithreaded environment, -see :ref:`session_faq_threadsafe`. - -Why does SQLAlchemy issue so many ROLLBACKs? ---------------------------------------------- - -SQLAlchemy currently assumes DBAPI connections are in "non-autocommit" mode - -this is the default behavior of the Python database API, meaning it -must be assumed that a transaction is always in progress. The -connection pool issues ``connection.rollback()`` when a connection is returned. -This is so that any transactional resources remaining on the connection are -released. On a database like Postgresql or MSSQL where table resources are -aggressively locked, this is critical so that rows and tables don't remain -locked within connections that are no longer in use. An application can -otherwise hang. It's not just for locks, however, and is equally critical on -any database that has any kind of transaction isolation, including MySQL with -InnoDB. Any connection that is still inside an old transaction will return -stale data, if that data was already queried on that connection within -isolation. For background on why you might see stale data even on MySQL, see -http://dev.mysql.com/doc/refman/5.1/en/innodb-transaction-model.html - -I'm on MyISAM - how do I turn it off? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The behavior of the connection pool's connection return behavior can be -configured using ``reset_on_return``:: - - from sqlalchemy import create_engine - from sqlalchemy.pool import QueuePool - - engine = create_engine('mysql://scott:tiger@localhost/myisam_database', pool=QueuePool(reset_on_return=False)) - -I'm on SQL Server - how do I turn those ROLLBACKs into COMMITs? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -``reset_on_return`` accepts the values ``commit``, ``rollback`` in addition -to ``True``, ``False``, and ``None``. Setting to ``commit`` will cause -a COMMIT as any connection is returned to the pool:: - - engine = create_engine('mssql://scott:tiger@mydsn', pool=QueuePool(reset_on_return='commit')) - - -I am using multiple connections with a SQLite database (typically to test transaction operation), and my test program is not working! ----------------------------------------------------------------------------------------------------------------------------------------------------------- - -If using a SQLite ``:memory:`` database, or a version of SQLAlchemy prior -to version 0.7, the default connection pool is the :class:`.SingletonThreadPool`, -which maintains exactly one SQLite connection per thread. So two -connections in use in the same thread will actually be the same SQLite -connection. Make sure you're not using a :memory: database and -use :class:`.NullPool`, which is the default for non-memory databases in -current SQLAlchemy versions. - -.. seealso:: - - :ref:`pysqlite_threading_pooling` - info on PySQLite's behavior. - -How do I get at the raw DBAPI connection when using an Engine? --------------------------------------------------------------- - -With a regular SA engine-level Connection, you can get at a pool-proxied -version of the DBAPI connection via the :attr:`.Connection.connection` attribute on -:class:`.Connection`, and for the really-real DBAPI connection you can call the -:attr:`.ConnectionFairy.connection` attribute on that - but there should never be any need to access -the non-pool-proxied DBAPI connection, as all methods are proxied through:: - - engine = create_engine(...) - conn = engine.connect() - conn.connection.<do DBAPI things> - cursor = conn.connection.cursor(<DBAPI specific arguments..>) - -You must ensure that you revert any isolation level settings or other -operation-specific settings on the connection back to normal before returning -it to the pool. - -As an alternative to reverting settings, you can call the :meth:`.Connection.detach` method on -either :class:`.Connection` or the proxied connection, which will de-associate -the connection from the pool such that it will be closed and discarded -when :meth:`.Connection.close` is called:: - - conn = engine.connect() - conn.detach() # detaches the DBAPI connection from the connection pool - conn.connection.<go nuts> - conn.close() # connection is closed for real, the pool replaces it with a new connection - -MetaData / Schema -================== - -My program is hanging when I say ``table.drop()`` / ``metadata.drop_all()`` ----------------------------------------------------------------------------- - -This usually corresponds to two conditions: 1. using PostgreSQL, which is really -strict about table locks, and 2. you have a connection still open which -contains locks on the table and is distinct from the connection being used for -the DROP statement. Heres the most minimal version of the pattern:: - - connection = engine.connect() - result = connection.execute(mytable.select()) - - mytable.drop(engine) - -Above, a connection pool connection is still checked out; furthermore, the -result object above also maintains a link to this connection. If -"implicit execution" is used, the result will hold this connection opened until -the result object is closed or all rows are exhausted. - -The call to ``mytable.drop(engine)`` attempts to emit DROP TABLE on a second -connection procured from the :class:`.Engine` which will lock. - -The solution is to close out all connections before emitting DROP TABLE:: - - connection = engine.connect() - result = connection.execute(mytable.select()) - - # fully read result sets - result.fetchall() - - # close connections - connection.close() - - # now locks are removed - mytable.drop(engine) - -Does SQLAlchemy support ALTER TABLE, CREATE VIEW, CREATE TRIGGER, Schema Upgrade Functionality? ------------------------------------------------------------------------------------------------ - -General ALTER support isn't present in SQLAlchemy directly. For special DDL -on an ad-hoc basis, the :class:`.DDL` and related constructs can be used. -See :doc:`core/ddl` for a discussion on this subject. - -A more comprehensive option is to use schema migration tools, such as Alembic -or SQLAlchemy-Migrate; see :ref:`schema_migrations` for discussion on this. - -How can I sort Table objects in order of their dependency? ------------------------------------------------------------ - -This is available via the :attr:`.MetaData.sorted_tables` function:: - - metadata = MetaData() - # ... add Table objects to metadata - ti = metadata.sorted_tables: - for t in ti: - print t - -How can I get the CREATE TABLE/ DROP TABLE output as a string? ---------------------------------------------------------------- - -Modern SQLAlchemy has clause constructs which represent DDL operations. These -can be rendered to strings like any other SQL expression:: - - from sqlalchemy.schema import CreateTable - - print CreateTable(mytable) - -To get the string specific to a certain engine:: - - print CreateTable(mytable).compile(engine) - -There's also a special form of :class:`.Engine` that can let you dump an entire -metadata creation sequence, using this recipe:: - - def dump(sql, *multiparams, **params): - print sql.compile(dialect=engine.dialect) - engine = create_engine('postgresql://', strategy='mock', executor=dump) - metadata.create_all(engine, checkfirst=False) - -The `Alembic <https://bitbucket.org/zzzeek/alembic>`_ tool also supports -an "offline" SQL generation mode that renders database migrations as SQL scripts. - -How can I subclass Table/Column to provide certain behaviors/configurations? ------------------------------------------------------------------------------- - -:class:`.Table` and :class:`.Column` are not good targets for direct subclassing. -However, there are simple ways to get on-construction behaviors using creation -functions, and behaviors related to the linkages between schema objects such as -constraint conventions or naming conventions using attachment events. -An example of many of these -techniques can be seen at `Naming Conventions <http://www.sqlalchemy.org/trac/wiki/UsageRecipes/NamingConventions>`_. - - -SQL Expressions -================= - -.. _faq_sql_expression_string: - -How do I render SQL expressions as strings, possibly with bound parameters inlined? ------------------------------------------------------------------------------------- - -The "stringification" of a SQLAlchemy statement or Query in the vast majority -of cases is as simple as:: - - print(str(statement)) - -this applies both to an ORM :class:`~.orm.query.Query` as well as any :func:`.select` or other -statement. Additionally, to get the statement as compiled to a -specific dialect or engine, if the statement itself is not already -bound to one you can pass this in to :meth:`.ClauseElement.compile`:: - - print(statement.compile(someengine)) - -or without an :class:`.Engine`:: - - from sqlalchemy.dialects import postgresql - print(statement.compile(dialect=postgresql.dialect())) - -When given an ORM :class:`~.orm.query.Query` object, in order to get at the -:meth:`.ClauseElement.compile` -method we only need access the :attr:`~.orm.query.Query.statement` -accessor first:: - - statement = query.statement - print(statement.compile(someengine)) - -The above forms will render the SQL statement as it is passed to the Python -:term:`DBAPI`, which includes that bound parameters are not rendered inline. -SQLAlchemy normally does not stringify bound parameters, as this is handled -appropriately by the Python DBAPI, not to mention bypassing bound -parameters is probably the most widely exploited security hole in -modern web applications. SQLAlchemy has limited ability to do this -stringification in certain circumstances such as that of emitting DDL. -In order to access this functionality one can use the ``literal_binds`` -flag, passed to ``compile_kwargs``:: - - from sqlalchemy.sql import table, column, select - - t = table('t', column('x')) - - s = select([t]).where(t.c.x == 5) - - print(s.compile(compile_kwargs={"literal_binds": True})) - -the above approach has the caveats that it is only supported for basic -types, such as ints and strings, and furthermore if a :func:`.bindparam` -without a pre-set value is used directly, it won't be able to -stringify that either. - -To support inline literal rendering for types not supported, implement -a :class:`.TypeDecorator` for the target type which includes a -:meth:`.TypeDecorator.process_literal_param` method:: - - from sqlalchemy import TypeDecorator, Integer - - - class MyFancyType(TypeDecorator): - impl = Integer - - def process_literal_param(self, value, dialect): - return "my_fancy_formatting(%s)" % value - - from sqlalchemy import Table, Column, MetaData - - tab = Table('mytable', MetaData(), Column('x', MyFancyType())) - - print( - tab.select().where(tab.c.x > 5).compile( - compile_kwargs={"literal_binds": True}) - ) - -producing output like:: - - SELECT mytable.x - FROM mytable - WHERE mytable.x > my_fancy_formatting(5) - - -Why does ``.col.in_([])`` Produce ``col != col``? Why not ``1=0``? -------------------------------------------------------------------- - -A little introduction to the issue. The IN operator in SQL, given a list of -elements to compare against a column, generally does not accept an empty list, -that is while it is valid to say:: - - column IN (1, 2, 3) - -it's not valid to say:: - - column IN () - -SQLAlchemy's :meth:`.Operators.in_` operator, when given an empty list, produces this -expression:: - - column != column - -As of version 0.6, it also produces a warning stating that a less efficient -comparison operation will be rendered. This expression is the only one that is -both database agnostic and produces correct results. - -For example, the naive approach of "just evaluate to false, by comparing 1=0 -or 1!=1", does not handle nulls properly. An expression like:: - - NOT column != column - -will not return a row when "column" is null, but an expression which does not -take the column into account:: - - NOT 1=0 - -will. - -Closer to the mark is the following CASE expression:: - - CASE WHEN column IS NOT NULL THEN 1=0 ELSE NULL END - -We don't use this expression due to its verbosity, and its also not -typically accepted by Oracle within a WHERE clause - depending -on how you phrase it, you'll either get "ORA-00905: missing keyword" or -"ORA-00920: invalid relational operator". It's also still less efficient than -just rendering SQL without the clause altogether (or not issuing the SQL at -all, if the statement is just a simple search). - -The best approach therefore is to avoid the usage of IN given an argument list -of zero length. Instead, don't emit the Query in the first place, if no rows -should be returned. The warning is best promoted to a full error condition -using the Python warnings filter (see http://docs.python.org/library/warnings.html). - -ORM Configuration -================== - -.. _faq_mapper_primary_key: - -How do I map a table that has no primary key? ---------------------------------------------- - -The SQLAlchemy ORM, in order to map to a particular table, needs there to be -at least one column denoted as a primary key column; multiple-column, -i.e. composite, primary keys are of course entirely feasible as well. These -columns do **not** need to be actually known to the database as primary key -columns, though it's a good idea that they are. It's only necessary that the columns -*behave* as a primary key does, e.g. as a unique and not nullable identifier -for a row. - -Most ORMs require that objects have some kind of primary key defined -because the object in memory must correspond to a uniquely identifiable -row in the database table; at the very least, this allows the -object can be targeted for UPDATE and DELETE statements which will affect only -that object's row and no other. However, the importance of the primary key -goes far beyond that. In SQLAlchemy, all ORM-mapped objects are at all times -linked uniquely within a :class:`.Session` -to their specific database row using a pattern called the :term:`identity map`, -a pattern that's central to the unit of work system employed by SQLAlchemy, -and is also key to the most common (and not-so-common) patterns of ORM usage. - - -.. note:: - - It's important to note that we're only talking about the SQLAlchemy ORM; an - application which builds on Core and deals only with :class:`.Table` objects, - :func:`.select` constructs and the like, **does not** need any primary key - to be present on or associated with a table in any way (though again, in SQL, all tables - should really have some kind of primary key, lest you need to actually - update or delete specific rows). - -In almost all cases, a table does have a so-called :term:`candidate key`, which is a column or series -of columns that uniquely identify a row. If a table truly doesn't have this, and has actual -fully duplicate rows, the table is not corresponding to `first normal form <http://en.wikipedia.org/wiki/First_normal_form>`_ and cannot be mapped. Otherwise, whatever columns comprise the best candidate key can be -applied directly to the mapper:: - - class SomeClass(Base): - __table__ = some_table_with_no_pk - __mapper_args__ = { - 'primary_key':[some_table_with_no_pk.c.uid, some_table_with_no_pk.c.bar] - } - -Better yet is when using fully declared table metadata, use the ``primary_key=True`` -flag on those columns:: - - class SomeClass(Base): - __tablename__ = "some_table_with_no_pk" - - uid = Column(Integer, primary_key=True) - bar = Column(String, primary_key=True) - -All tables in a relational database should have primary keys. Even a many-to-many -association table - the primary key would be the composite of the two association -columns:: - - CREATE TABLE my_association ( - user_id INTEGER REFERENCES user(id), - account_id INTEGER REFERENCES account(id), - PRIMARY KEY (user_id, account_id) - ) - - -How do I configure a Column that is a Python reserved word or similar? ----------------------------------------------------------------------------- - -Column-based attributes can be given any name desired in the mapping. See -:ref:`mapper_column_distinct_names`. - -How do I get a list of all columns, relationships, mapped attributes, etc. given a mapped class? -------------------------------------------------------------------------------------------------- - -This information is all available from the :class:`.Mapper` object. - -To get at the :class:`.Mapper` for a particular mapped class, call the -:func:`.inspect` function on it:: - - from sqlalchemy import inspect - - mapper = inspect(MyClass) - -From there, all information about the class can be acquired using such methods as: - -* :attr:`.Mapper.attrs` - a namespace of all mapped attributes. The attributes - themselves are instances of :class:`.MapperProperty`, which contain additional - attributes that can lead to the mapped SQL expression or column, if applicable. - -* :attr:`.Mapper.column_attrs` - the mapped attribute namespace - limited to column and SQL expression attributes. You might want to use - :attr:`.Mapper.columns` to get at the :class:`.Column` objects directly. - -* :attr:`.Mapper.relationships` - namespace of all :class:`.RelationshipProperty` attributes. - -* :attr:`.Mapper.all_orm_descriptors` - namespace of all mapped attributes, plus user-defined - attributes defined using systems such as :class:`.hybrid_property`, :class:`.AssociationProxy` and others. - -* :attr:`.Mapper.columns` - A namespace of :class:`.Column` objects and other named - SQL expressions associated with the mapping. - -* :attr:`.Mapper.mapped_table` - The :class:`.Table` or other selectable to which - this mapper is mapped. - -* :attr:`.Mapper.local_table` - The :class:`.Table` that is "local" to this mapper; - this differs from :attr:`.Mapper.mapped_table` in the case of a mapper mapped - using inheritance to a composed selectable. - -.. _faq_combining_columns: - -I'm getting a warning or error about "Implicitly combining column X under attribute Y" --------------------------------------------------------------------------------------- - -This condition refers to when a mapping contains two columns that are being -mapped under the same attribute name due to their name, but there's no indication -that this is intentional. A mapped class needs to have explicit names for -every attribute that is to store an independent value; when two columns have the -same name and aren't disambiguated, they fall under the same attribute and -the effect is that the value from one column is **copied** into the other, based -on which column was assigned to the attribute first. - -This behavior is often desirable and is allowed without warning in the case -where the two columns are linked together via a foreign key relationship -within an inheritance mapping. When the warning or exception occurs, the -issue can be resolved by either assigning the columns to differently-named -attributes, or if combining them together is desired, by using -:func:`.column_property` to make this explicit. - -Given the example as follows:: - - from sqlalchemy import Integer, Column, ForeignKey - from sqlalchemy.ext.declarative import declarative_base - - Base = declarative_base() - - class A(Base): - __tablename__ = 'a' - - id = Column(Integer, primary_key=True) - - class B(A): - __tablename__ = 'b' - - id = Column(Integer, primary_key=True) - a_id = Column(Integer, ForeignKey('a.id')) - -As of SQLAlchemy version 0.9.5, the above condition is detected, and will -warn that the ``id`` column of ``A`` and ``B`` is being combined under -the same-named attribute ``id``, which above is a serious issue since it means -that a ``B`` object's primary key will always mirror that of its ``A``. - -A mapping which resolves this is as follows:: - - class A(Base): - __tablename__ = 'a' - - id = Column(Integer, primary_key=True) - - class B(A): - __tablename__ = 'b' - - b_id = Column('id', Integer, primary_key=True) - a_id = Column(Integer, ForeignKey('a.id')) - -Suppose we did want ``A.id`` and ``B.id`` to be mirrors of each other, despite -the fact that ``B.a_id`` is where ``A.id`` is related. We could combine -them together using :func:`.column_property`:: - - class A(Base): - __tablename__ = 'a' - - id = Column(Integer, primary_key=True) - - class B(A): - __tablename__ = 'b' - - # probably not what you want, but this is a demonstration - id = column_property(Column(Integer, primary_key=True), A.id) - a_id = Column(Integer, ForeignKey('a.id')) - - - -I'm using Declarative and setting primaryjoin/secondaryjoin using an ``and_()`` or ``or_()``, and I am getting an error message about foreign keys. ------------------------------------------------------------------------------------------------------------------------------------------------------------------- - -Are you doing this?:: - - class MyClass(Base): - # .... - - foo = relationship("Dest", primaryjoin=and_("MyClass.id==Dest.foo_id", "MyClass.foo==Dest.bar")) - -That's an ``and_()`` of two string expressions, which SQLAlchemy cannot apply any mapping towards. Declarative allows :func:`.relationship` arguments to be specified as strings, which are converted into expression objects using ``eval()``. But this doesn't occur inside of an ``and_()`` expression - it's a special operation declarative applies only to the *entirety* of what's passed to primaryjoin or other arguments as a string:: - - class MyClass(Base): - # .... - - foo = relationship("Dest", primaryjoin="and_(MyClass.id==Dest.foo_id, MyClass.foo==Dest.bar)") - -Or if the objects you need are already available, skip the strings:: - - class MyClass(Base): - # .... - - foo = relationship(Dest, primaryjoin=and_(MyClass.id==Dest.foo_id, MyClass.foo==Dest.bar)) - -The same idea applies to all the other arguments, such as ``foreign_keys``:: - - # wrong ! - foo = relationship(Dest, foreign_keys=["Dest.foo_id", "Dest.bar_id"]) - - # correct ! - foo = relationship(Dest, foreign_keys="[Dest.foo_id, Dest.bar_id]") - - # also correct ! - foo = relationship(Dest, foreign_keys=[Dest.foo_id, Dest.bar_id]) - - # if you're using columns from the class that you're inside of, just use the column objects ! - class MyClass(Base): - foo_id = Column(...) - bar_id = Column(...) - # ... - - foo = relationship(Dest, foreign_keys=[foo_id, bar_id]) - -.. _faq_subqueryload_limit_sort: - -Why is ``ORDER BY`` required with ``LIMIT`` (especially with ``subqueryload()``)? ---------------------------------------------------------------------------------- - -A relational database can return rows in any -arbitrary order, when an explicit ordering is not set. -While this ordering very often corresponds to the natural -order of rows within a table, this is not the case for all databases and -all queries. The consequence of this is that any query that limits rows -using ``LIMIT`` or ``OFFSET`` should **always** specify an ``ORDER BY``. -Otherwise, it is not deterministic which rows will actually be returned. - -When we use a SQLAlchemy method like :meth:`.Query.first`, we are in fact -applying a ``LIMIT`` of one to the query, so without an explicit ordering -it is not deterministic what row we actually get back. -While we may not notice this for simple queries on databases that usually -returns rows in their natural -order, it becomes much more of an issue if we also use :func:`.orm.subqueryload` -to load related collections, and we may not be loading the collections -as intended. - -SQLAlchemy implements :func:`.orm.subqueryload` by issuing a separate query, -the results of which are matched up to the results from the first query. -We see two queries emitted like this: - -.. sourcecode:: python+sql - - >>> session.query(User).options(subqueryload(User.addresses)).all() - {opensql}-- the "main" query - SELECT users.id AS users_id - FROM users - {stop} - {opensql}-- the "load" query issued by subqueryload - SELECT addresses.id AS addresses_id, - addresses.user_id AS addresses_user_id, - anon_1.users_id AS anon_1_users_id - FROM (SELECT users.id AS users_id FROM users) AS anon_1 - JOIN addresses ON anon_1.users_id = addresses.user_id - ORDER BY anon_1.users_id - -The second query embeds the first query as a source of rows. -When the inner query uses ``OFFSET`` and/or ``LIMIT`` without ordering, -the two queries may not see the same results: - -.. sourcecode:: python+sql - - >>> user = session.query(User).options(subqueryload(User.addresses)).first() - {opensql}-- the "main" query - SELECT users.id AS users_id - FROM users - LIMIT 1 - {stop} - {opensql}-- the "load" query issued by subqueryload - SELECT addresses.id AS addresses_id, - addresses.user_id AS addresses_user_id, - anon_1.users_id AS anon_1_users_id - FROM (SELECT users.id AS users_id FROM users LIMIT 1) AS anon_1 - JOIN addresses ON anon_1.users_id = addresses.user_id - ORDER BY anon_1.users_id - -Depending on database specifics, there is -a chance we may get the a result like the following for the two queries:: - - -- query #1 - +--------+ - |users_id| - +--------+ - | 1| - +--------+ - - -- query #2 - +------------+-----------------+---------------+ - |addresses_id|addresses_user_id|anon_1_users_id| - +------------+-----------------+---------------+ - | 3| 2| 2| - +------------+-----------------+---------------+ - | 4| 2| 2| - +------------+-----------------+---------------+ - -Above, we receive two ``addresses`` rows for ``user.id`` of 2, and none for -1. We've wasted two rows and failed to actually load the collection. This -is an insidious error because without looking at the SQL and the results, the -ORM will not show that there's any issue; if we access the ``addresses`` -for the ``User`` we have, it will emit a lazy load for the collection and we -won't see that anything actually went wrong. - -The solution to this problem is to always specify a deterministic sort order, -so that the main query always returns the same set of rows. This generally -means that you should :meth:`.Query.order_by` on a unique column on the table. -The primary key is a good choice for this:: - - session.query(User).options(subqueryload(User.addresses)).order_by(User.id).first() - -Note that :func:`.joinedload` does not suffer from the same problem because -only one query is ever issued, so the load query cannot be different from the -main query. - -.. seealso:: - - :ref:`subqueryload_ordering` - -Performance -=========== - -How can I profile a SQLAlchemy powered application? ---------------------------------------------------- - -Looking for performance issues typically involves two stratgies. One -is query profiling, and the other is code profiling. - -Query Profiling -^^^^^^^^^^^^^^^^ - -Sometimes just plain SQL logging (enabled via python's logging module -or via the ``echo=True`` argument on :func:`.create_engine`) can give an -idea how long things are taking. For example, if you log something -right after a SQL operation, you'd see something like this in your -log:: - - 17:37:48,325 INFO [sqlalchemy.engine.base.Engine.0x...048c] SELECT ... - 17:37:48,326 INFO [sqlalchemy.engine.base.Engine.0x...048c] {<params>} - 17:37:48,660 DEBUG [myapp.somemessage] - -if you logged ``myapp.somemessage`` right after the operation, you know -it took 334ms to complete the SQL part of things. - -Logging SQL will also illustrate if dozens/hundreds of queries are -being issued which could be better organized into much fewer queries. -When using the SQLAlchemy ORM, the "eager loading" -feature is provided to partially (:func:`.contains_eager()`) or fully -(:func:`.joinedload()`, :func:`.subqueryload()`) -automate this activity, but without -the ORM "eager loading" typically means to use joins so that results across multiple -tables can be loaded in one result set instead of multiplying numbers -of queries as more depth is added (i.e. ``r + r*r2 + r*r2*r3`` ...) - -For more long-term profiling of queries, or to implement an application-side -"slow query" monitor, events can be used to intercept cursor executions, -using a recipe like the following:: - - from sqlalchemy import event - from sqlalchemy.engine import Engine - import time - import logging - - logging.basicConfig() - logger = logging.getLogger("myapp.sqltime") - logger.setLevel(logging.DEBUG) - - @event.listens_for(Engine, "before_cursor_execute") - def before_cursor_execute(conn, cursor, statement, - parameters, context, executemany): - conn.info.setdefault('query_start_time', []).append(time.time()) - logger.debug("Start Query: %s", statement) - - @event.listens_for(Engine, "after_cursor_execute") - def after_cursor_execute(conn, cursor, statement, - parameters, context, executemany): - total = time.time() - conn.info['query_start_time'].pop(-1) - logger.debug("Query Complete!") - logger.debug("Total Time: %f", total) - -Above, we use the :meth:`.ConnectionEvents.before_cursor_execute` and -:meth:`.ConnectionEvents.after_cursor_execute` events to establish an interception -point around when a statement is executed. We attach a timer onto the -connection using the :class:`._ConnectionRecord.info` dictionary; we use a -stack here for the occasional case where the cursor execute events may be nested. - -Code Profiling -^^^^^^^^^^^^^^ - -If logging reveals that individual queries are taking too long, you'd -need a breakdown of how much time was spent within the database -processing the query, sending results over the network, being handled -by the :term:`DBAPI`, and finally being received by SQLAlchemy's result set -and/or ORM layer. Each of these stages can present their own -individual bottlenecks, depending on specifics. - -For that you need to use the -`Python Profiling Module <https://docs.python.org/2/library/profile.html>`_. -Below is a simple recipe which works profiling into a context manager:: - - import cProfile - import StringIO - import pstats - import contextlib - - @contextlib.contextmanager - def profiled(): - pr = cProfile.Profile() - pr.enable() - yield - pr.disable() - s = StringIO.StringIO() - ps = pstats.Stats(pr, stream=s).sort_stats('cumulative') - ps.print_stats() - # uncomment this to see who's calling what - # ps.print_callers() - print s.getvalue() - -To profile a section of code:: - - with profiled(): - Session.query(FooClass).filter(FooClass.somevalue==8).all() - -The output of profiling can be used to give an idea where time is -being spent. A section of profiling output looks like this:: - - 13726 function calls (13042 primitive calls) in 0.014 seconds - - Ordered by: cumulative time - - ncalls tottime percall cumtime percall filename:lineno(function) - 222/21 0.001 0.000 0.011 0.001 lib/sqlalchemy/orm/loading.py:26(instances) - 220/20 0.002 0.000 0.010 0.001 lib/sqlalchemy/orm/loading.py:327(_instance) - 220/20 0.000 0.000 0.010 0.000 lib/sqlalchemy/orm/loading.py:284(populate_state) - 20 0.000 0.000 0.010 0.000 lib/sqlalchemy/orm/strategies.py:987(load_collection_from_subq) - 20 0.000 0.000 0.009 0.000 lib/sqlalchemy/orm/strategies.py:935(get) - 1 0.000 0.000 0.009 0.009 lib/sqlalchemy/orm/strategies.py:940(_load) - 21 0.000 0.000 0.008 0.000 lib/sqlalchemy/orm/strategies.py:942(<genexpr>) - 2 0.000 0.000 0.004 0.002 lib/sqlalchemy/orm/query.py:2400(__iter__) - 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/orm/query.py:2414(_execute_and_instances) - 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/engine/base.py:659(execute) - 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/sql/elements.py:321(_execute_on_connection) - 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/engine/base.py:788(_execute_clauseelement) - - ... - -Above, we can see that the ``instances()`` SQLAlchemy function was called 222 -times (recursively, and 21 times from the outside), taking a total of .011 -seconds for all calls combined. - -Execution Slowness -^^^^^^^^^^^^^^^^^^ - -The specifics of these calls can tell us where the time is being spent. -If for example, you see time being spent within ``cursor.execute()``, -e.g. against the DBAPI:: - - 2 0.102 0.102 0.204 0.102 {method 'execute' of 'sqlite3.Cursor' objects} - -this would indicate that the database is taking a long time to start returning -results, and it means your query should be optimized, either by adding indexes -or restructuring the query and/or underlying schema. For that task, -analysis of the query plan is warranted, using a system such as EXPLAIN, -SHOW PLAN, etc. as is provided by the database backend. - -Result Fetching Slowness - Core -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -If on the other hand you see many thousands of calls related to fetching rows, -or very long calls to ``fetchall()``, it may -mean your query is returning more rows than expected, or that the fetching -of rows itself is slow. The ORM itself typically uses ``fetchall()`` to fetch -rows (or ``fetchmany()`` if the :meth:`.Query.yield_per` option is used). - -An inordinately large number of rows would be indicated -by a very slow call to ``fetchall()`` at the DBAPI level:: - - 2 0.300 0.600 0.300 0.600 {method 'fetchall' of 'sqlite3.Cursor' objects} - -An unexpectedly large number of rows, even if the ultimate result doesn't seem -to have many rows, can be the result of a cartesian product - when multiple -sets of rows are combined together without appropriately joining the tables -together. It's often easy to produce this behavior with SQLAlchemy Core or -ORM query if the wrong :class:`.Column` objects are used in a complex query, -pulling in additional FROM clauses that are unexpected. - -On the other hand, a fast call to ``fetchall()`` at the DBAPI level, but then -slowness when SQLAlchemy's :class:`.ResultProxy` is asked to do a ``fetchall()``, -may indicate slowness in processing of datatypes, such as unicode conversions -and similar:: - - # the DBAPI cursor is fast... - 2 0.020 0.040 0.020 0.040 {method 'fetchall' of 'sqlite3.Cursor' objects} - - ... - - # but SQLAlchemy's result proxy is slow, this is type-level processing - 2 0.100 0.200 0.100 0.200 lib/sqlalchemy/engine/result.py:778(fetchall) - -In some cases, a backend might be doing type-level processing that isn't -needed. More specifically, seeing calls within the type API that are slow -are better indicators - below is what it looks like when we use a type like -this:: - - from sqlalchemy import TypeDecorator - import time - - class Foo(TypeDecorator): - impl = String - - def process_result_value(self, value, thing): - # intentionally add slowness for illustration purposes - time.sleep(.001) - return value - -the profiling output of this intentionally slow operation can be seen like this:: - - 200 0.001 0.000 0.237 0.001 lib/sqlalchemy/sql/type_api.py:911(process) - 200 0.001 0.000 0.236 0.001 test.py:28(process_result_value) - 200 0.235 0.001 0.235 0.001 {time.sleep} - -that is, we see many expensive calls within the ``type_api`` system, and the actual -time consuming thing is the ``time.sleep()`` call. - -Make sure to check the :doc:`Dialect documentation <dialects/index>` -for notes on known performance tuning suggestions at this level, especially for -databases like Oracle. There may be systems related to ensuring numeric accuracy -or string processing that may not be needed in all cases. - -There also may be even more low-level points at which row-fetching performance is suffering; -for example, if time spent seems to focus on a call like ``socket.receive()``, -that could indicate that everything is fast except for the actual network connection, -and too much time is spent with data moving over the network. - -Result Fetching Slowness - ORM -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -To detect slowness in ORM fetching of rows (which is the most common area -of performance concern), calls like ``populate_state()`` and ``_instance()`` will -illustrate individual ORM object populations:: - - # the ORM calls _instance for each ORM-loaded row it sees, and - # populate_state for each ORM-loaded row that results in the population - # of an object's attributes - 220/20 0.001 0.000 0.010 0.000 lib/sqlalchemy/orm/loading.py:327(_instance) - 220/20 0.000 0.000 0.009 0.000 lib/sqlalchemy/orm/loading.py:284(populate_state) - -The ORM's slowness in turning rows into ORM-mapped objects is a product -of the complexity of this operation combined with the overhead of cPython. -Common strategies to mitigate this include: - -* fetch individual columns instead of full entities, that is:: - - session.query(User.id, User.name) - - instead of:: - - session.query(User) - -* Use :class:`.Bundle` objects to organize column-based results:: - - u_b = Bundle('user', User.id, User.name) - a_b = Bundle('address', Address.id, Address.email) - - for user, address in session.query(u_b, a_b).join(User.addresses): - # ... - -* Use result caching - see :ref:`examples_caching` for an in-depth example - of this. - -* Consider a faster interpreter like that of Pypy. - -The output of a profile can be a little daunting but after some -practice they are very easy to read. - -If you're feeling ambitious, there's also a more involved example of -SQLAlchemy profiling within the SQLAlchemy unit tests in the -``tests/aaa_profiling`` section. Tests in this area -use decorators that assert a -maximum number of method calls being used for particular operations, -so that if something inefficient gets checked in, the tests will -reveal it (it is important to note that in cPython, function calls have -the highest overhead of any operation, and the count of calls is more -often than not nearly proportional to time spent). Of note are the -the "zoomark" tests which use a fancy "SQL capturing" scheme which -cuts out the overhead of the DBAPI from the equation - although that -technique isn't really necessary for garden-variety profiling. - -I'm inserting 400,000 rows with the ORM and it's really slow! --------------------------------------------------------------- - -The SQLAlchemy ORM uses the :term:`unit of work` pattern when synchronizing -changes to the database. This pattern goes far beyond simple "inserts" -of data. It includes that attributes which are assigned on objects are -received using an attribute instrumentation system which tracks -changes on objects as they are made, includes that all rows inserted -are tracked in an identity map which has the effect that for each row -SQLAlchemy must retrieve its "last inserted id" if not already given, -and also involves that rows to be inserted are scanned and sorted for -dependencies as needed. Objects are also subject to a fair degree of -bookkeeping in order to keep all of this running, which for a very -large number of rows at once can create an inordinate amount of time -spent with large data structures, hence it's best to chunk these. - -Basically, unit of work is a large degree of automation in order to -automate the task of persisting a complex object graph into a -relational database with no explicit persistence code, and this -automation has a price. - -ORMs are basically not intended for high-performance bulk inserts - -this is the whole reason SQLAlchemy offers the Core in addition to the -ORM as a first-class component. - -For the use case of fast bulk inserts, the -SQL generation and execution system that the ORM builds on top of -is part of the Core. Using this system directly, we can produce an INSERT that -is competitive with using the raw database API directly. - -The example below illustrates time-based tests for four different -methods of inserting rows, going from the most automated to the least. -With cPython 2.7, runtimes observed:: - - classics-MacBook-Pro:sqlalchemy classic$ python test.py - SQLAlchemy ORM: Total time for 100000 records 14.3528850079 secs - SQLAlchemy ORM pk given: Total time for 100000 records 10.0164160728 secs - SQLAlchemy Core: Total time for 100000 records 0.775382995605 secs - sqlite3: Total time for 100000 records 0.676795005798 sec - -We can reduce the time by a factor of three using recent versions of `Pypy <http://pypy.org/>`_:: - - classics-MacBook-Pro:sqlalchemy classic$ /usr/local/src/pypy-2.1-beta2-osx64/bin/pypy test.py - SQLAlchemy ORM: Total time for 100000 records 5.88369488716 secs - SQLAlchemy ORM pk given: Total time for 100000 records 3.52294301987 secs - SQLAlchemy Core: Total time for 100000 records 0.613556146622 secs - sqlite3: Total time for 100000 records 0.442467927933 sec - -Script:: - - import time - import sqlite3 - - from sqlalchemy.ext.declarative import declarative_base - from sqlalchemy import Column, Integer, String, create_engine - from sqlalchemy.orm import scoped_session, sessionmaker - - Base = declarative_base() - DBSession = scoped_session(sessionmaker()) - engine = None - - class Customer(Base): - __tablename__ = "customer" - id = Column(Integer, primary_key=True) - name = Column(String(255)) - - def init_sqlalchemy(dbname='sqlite:///sqlalchemy.db'): - global engine - engine = create_engine(dbname, echo=False) - DBSession.remove() - DBSession.configure(bind=engine, autoflush=False, expire_on_commit=False) - Base.metadata.drop_all(engine) - Base.metadata.create_all(engine) - - def test_sqlalchemy_orm(n=100000): - init_sqlalchemy() - t0 = time.time() - for i in range(n): - customer = Customer() - customer.name = 'NAME ' + str(i) - DBSession.add(customer) - if i % 1000 == 0: - DBSession.flush() - DBSession.commit() - print("SQLAlchemy ORM: Total time for " + str(n) + - " records " + str(time.time() - t0) + " secs") - - def test_sqlalchemy_orm_pk_given(n=100000): - init_sqlalchemy() - t0 = time.time() - for i in range(n): - customer = Customer(id=i+1, name="NAME " + str(i)) - DBSession.add(customer) - if i % 1000 == 0: - DBSession.flush() - DBSession.commit() - print("SQLAlchemy ORM pk given: Total time for " + str(n) + - " records " + str(time.time() - t0) + " secs") - - def test_sqlalchemy_core(n=100000): - init_sqlalchemy() - t0 = time.time() - engine.execute( - Customer.__table__.insert(), - [{"name": 'NAME ' + str(i)} for i in range(n)] - ) - print("SQLAlchemy Core: Total time for " + str(n) + - " records " + str(time.time() - t0) + " secs") - - def init_sqlite3(dbname): - conn = sqlite3.connect(dbname) - c = conn.cursor() - c.execute("DROP TABLE IF EXISTS customer") - c.execute("CREATE TABLE customer (id INTEGER NOT NULL, " - "name VARCHAR(255), PRIMARY KEY(id))") - conn.commit() - return conn - - def test_sqlite3(n=100000, dbname='sqlite3.db'): - conn = init_sqlite3(dbname) - c = conn.cursor() - t0 = time.time() - for i in range(n): - row = ('NAME ' + str(i),) - c.execute("INSERT INTO customer (name) VALUES (?)", row) - conn.commit() - print("sqlite3: Total time for " + str(n) + - " records " + str(time.time() - t0) + " sec") - - if __name__ == '__main__': - test_sqlalchemy_orm(100000) - test_sqlalchemy_orm_pk_given(100000) - test_sqlalchemy_core(100000) - test_sqlite3(100000) - - - -Sessions / Queries -=================== - - -"This Session's transaction has been rolled back due to a previous exception during flush." (or similar) ---------------------------------------------------------------------------------------------------------- - -This is an error that occurs when a :meth:`.Session.flush` raises an exception, rolls back -the transaction, but further commands upon the `Session` are called without an -explicit call to :meth:`.Session.rollback` or :meth:`.Session.close`. - -It usually corresponds to an application that catches an exception -upon :meth:`.Session.flush` or :meth:`.Session.commit` and -does not properly handle the exception. For example:: - - from sqlalchemy import create_engine, Column, Integer - from sqlalchemy.orm import sessionmaker - from sqlalchemy.ext.declarative import declarative_base - - Base = declarative_base(create_engine('sqlite://')) - - class Foo(Base): - __tablename__ = 'foo' - id = Column(Integer, primary_key=True) - - Base.metadata.create_all() - - session = sessionmaker()() - - # constraint violation - session.add_all([Foo(id=1), Foo(id=1)]) - - try: - session.commit() - except: - # ignore error - pass - - # continue using session without rolling back - session.commit() - - -The usage of the :class:`.Session` should fit within a structure similar to this:: - - try: - <use session> - session.commit() - except: - session.rollback() - raise - finally: - session.close() # optional, depends on use case - -Many things can cause a failure within the try/except besides flushes. You -should always have some kind of "framing" of your session operations so that -connection and transaction resources have a definitive boundary, otherwise -your application doesn't really have its usage of resources under control. -This is not to say that you need to put try/except blocks all throughout your -application - on the contrary, this would be a terrible idea. You should -architect your application such that there is one (or few) point(s) of -"framing" around session operations. - -For a detailed discussion on how to organize usage of the :class:`.Session`, -please see :ref:`session_faq_whentocreate`. - -But why does flush() insist on issuing a ROLLBACK? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -It would be great if :meth:`.Session.flush` could partially complete and then not roll -back, however this is beyond its current capabilities since its internal -bookkeeping would have to be modified such that it can be halted at any time -and be exactly consistent with what's been flushed to the database. While this -is theoretically possible, the usefulness of the enhancement is greatly -decreased by the fact that many database operations require a ROLLBACK in any -case. Postgres in particular has operations which, once failed, the -transaction is not allowed to continue:: - - test=> create table foo(id integer primary key); - NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "foo_pkey" for table "foo" - CREATE TABLE - test=> begin; - BEGIN - test=> insert into foo values(1); - INSERT 0 1 - test=> commit; - COMMIT - test=> begin; - BEGIN - test=> insert into foo values(1); - ERROR: duplicate key value violates unique constraint "foo_pkey" - test=> insert into foo values(2); - ERROR: current transaction is aborted, commands ignored until end of transaction block - -What SQLAlchemy offers that solves both issues is support of SAVEPOINT, via -:meth:`.Session.begin_nested`. Using :meth:`.Session.begin_nested`, you can frame an operation that may -potentially fail within a transaction, and then "roll back" to the point -before its failure while maintaining the enclosing transaction. - -But why isn't the one automatic call to ROLLBACK enough? Why must I ROLLBACK again? -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -This is again a matter of the :class:`.Session` providing a consistent interface and -refusing to guess about what context its being used. For example, the -:class:`.Session` supports "framing" above within multiple levels. Such as, suppose -you had a decorator ``@with_session()``, which did this:: - - def with_session(fn): - def go(*args, **kw): - session.begin(subtransactions=True) - try: - ret = fn(*args, **kw) - session.commit() - return ret - except: - session.rollback() - raise - return go - -The above decorator begins a transaction if one does not exist already, and -then commits it, if it were the creator. The "subtransactions" flag means that -if :meth:`.Session.begin` were already called by an enclosing function, nothing happens -except a counter is incremented - this counter is decremented when :meth:`.Session.commit` -is called and only when it goes back to zero does the actual COMMIT happen. It -allows this usage pattern:: - - @with_session - def one(): - # do stuff - two() - - - @with_session - def two(): - # etc. - - one() - - two() - -``one()`` can call ``two()``, or ``two()`` can be called by itself, and the -``@with_session`` decorator ensures the appropriate "framing" - the transaction -boundaries stay on the outermost call level. As you can see, if ``two()`` calls -``flush()`` which throws an exception and then issues a ``rollback()``, there will -*always* be a second ``rollback()`` performed by the decorator, and possibly a -third corresponding to two levels of decorator. If the ``flush()`` pushed the -``rollback()`` all the way out to the top of the stack, and then we said that -all remaining ``rollback()`` calls are moot, there is some silent behavior going -on there. A poorly written enclosing method might suppress the exception, and -then call ``commit()`` assuming nothing is wrong, and then you have a silent -failure condition. The main reason people get this error in fact is because -they didn't write clean "framing" code and they would have had other problems -down the road. - -If you think the above use case is a little exotic, the same kind of thing -comes into play if you want to SAVEPOINT- you might call ``begin_nested()`` -several times, and the ``commit()``/``rollback()`` calls each resolve the most -recent ``begin_nested()``. The meaning of ``rollback()`` or ``commit()`` is -dependent upon which enclosing block it is called, and you might have any -sequence of ``rollback()``/``commit()`` in any order, and its the level of nesting -that determines their behavior. - -In both of the above cases, if ``flush()`` broke the nesting of transaction -blocks, the behavior is, depending on scenario, anywhere from "magic" to -silent failure to blatant interruption of code flow. - -``flush()`` makes its own "subtransaction", so that a transaction is started up -regardless of the external transactional state, and when complete it calls -``commit()``, or ``rollback()`` upon failure - but that ``rollback()`` corresponds -to its own subtransaction - it doesn't want to guess how you'd like to handle -the external "framing" of the transaction, which could be nested many levels -with any combination of subtransactions and real SAVEPOINTs. The job of -starting/ending the "frame" is kept consistently with the code external to the -``flush()``, and we made a decision that this was the most consistent approach. - - - -How do I make a Query that always adds a certain filter to every query? ------------------------------------------------------------------------------------------------- - -See the recipe at `PreFilteredQuery <http://www.sqlalchemy.org/trac/wiki/UsageRecipes/PreFilteredQuery>`_. - -I've created a mapping against an Outer Join, and while the query returns rows, no objects are returned. Why not? ------------------------------------------------------------------------------------------------------------------- - -Rows returned by an outer join may contain NULL for part of the primary key, -as the primary key is the composite of both tables. The :class:`.Query` object ignores incoming rows -that don't have an acceptable primary key. Based on the setting of the ``allow_partial_pks`` -flag on :func:`.mapper`, a primary key is accepted if the value has at least one non-NULL -value, or alternatively if the value has no NULL values. See ``allow_partial_pks`` -at :func:`.mapper`. - - -I'm using ``joinedload()`` or ``lazy=False`` to create a JOIN/OUTER JOIN and SQLAlchemy is not constructing the correct query when I try to add a WHERE, ORDER BY, LIMIT, etc. (which relies upon the (OUTER) JOIN) ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ - -The joins generated by joined eager loading are only used to fully load related -collections, and are designed to have no impact on the primary results of the query. -Since they are anonymously aliased, they cannot be referenced directly. - -For detail on this beahvior, see :doc:`orm/loading`. - -Query has no ``__len__()``, why not? ------------------------------------- - -The Python ``__len__()`` magic method applied to an object allows the ``len()`` -builtin to be used to determine the length of the collection. It's intuitive -that a SQL query object would link ``__len__()`` to the :meth:`.Query.count` -method, which emits a `SELECT COUNT`. The reason this is not possible is -because evaluating the query as a list would incur two SQL calls instead of -one:: - - class Iterates(object): - def __len__(self): - print "LEN!" - return 5 - - def __iter__(self): - print "ITER!" - return iter([1, 2, 3, 4, 5]) - - list(Iterates()) - -output:: - - ITER! - LEN! - -How Do I use Textual SQL with ORM Queries? -------------------------------------------- - -See: - -* :ref:`orm_tutorial_literal_sql` - Ad-hoc textual blocks with :class:`.Query` - -* :ref:`session_sql_expressions` - Using :class:`.Session` with textual SQL directly. - -I'm calling ``Session.delete(myobject)`` and it isn't removed from the parent collection! ------------------------------------------------------------------------------------------- - -See :ref:`session_deleting_from_collections` for a description of this behavior. - -why isn't my ``__init__()`` called when I load objects? -------------------------------------------------------- - -See :ref:`mapping_constructors` for a description of this behavior. - -how do I use ON DELETE CASCADE with SA's ORM? ----------------------------------------------- - -SQLAlchemy will always issue UPDATE or DELETE statements for dependent -rows which are currently loaded in the :class:`.Session`. For rows which -are not loaded, it will by default issue SELECT statements to load -those rows and udpate/delete those as well; in other words it assumes -there is no ON DELETE CASCADE configured. -To configure SQLAlchemy to cooperate with ON DELETE CASCADE, see -:ref:`passive_deletes`. - -I set the "foo_id" attribute on my instance to "7", but the "foo" attribute is still ``None`` - shouldn't it have loaded Foo with id #7? ----------------------------------------------------------------------------------------------------------------------------------------------------- - -The ORM is not constructed in such a way as to support -immediate population of relationships driven from foreign -key attribute changes - instead, it is designed to work the -other way around - foreign key attributes are handled by the -ORM behind the scenes, the end user sets up object -relationships naturally. Therefore, the recommended way to -set ``o.foo`` is to do just that - set it!:: - - foo = Session.query(Foo).get(7) - o.foo = foo - Session.commit() - -Manipulation of foreign key attributes is of course entirely legal. However, -setting a foreign-key attribute to a new value currently does not trigger -an "expire" event of the :func:`.relationship` in which it's involved. This means -that for the following sequence:: - - o = Session.query(SomeClass).first() - assert o.foo is None # accessing an un-set attribute sets it to None - o.foo_id = 7 - -``o.foo`` is initialized to ``None`` when we first accessed it. Setting -``o.foo_id = 7`` will have the value of "7" as pending, but no flush -has occurred - so ``o.foo`` is still ``None``:: - - # attribute is already set to None, has not been - # reconciled with o.foo_id = 7 yet - assert o.foo is None - -For ``o.foo`` to load based on the foreign key mutation is usually achieved -naturally after the commit, which both flushes the new foreign key value -and expires all state:: - - Session.commit() # expires all attributes - - foo_7 = Session.query(Foo).get(7) - - assert o.foo is foo_7 # o.foo lazyloads on access - -A more minimal operation is to expire the attribute individually - this can -be performed for any :term:`persistent` object using :meth:`.Session.expire`:: - - o = Session.query(SomeClass).first() - o.foo_id = 7 - Session.expire(o, ['foo']) # object must be persistent for this - - foo_7 = Session.query(Foo).get(7) - - assert o.foo is foo_7 # o.foo lazyloads on access - -Note that if the object is not persistent but present in the :class:`.Session`, -it's known as :term:`pending`. This means the row for the object has not been -INSERTed into the database yet. For such an object, setting ``foo_id`` does not -have meaning until the row is inserted; otherwise there is no row yet:: - - new_obj = SomeClass() - new_obj.foo_id = 7 - - Session.add(new_obj) - - # accessing an un-set attribute sets it to None - assert new_obj.foo is None - - Session.flush() # emits INSERT - - # expire this because we already set .foo to None - Session.expire(o, ['foo']) - - assert new_obj.foo is foo_7 # now it loads - - -.. topic:: Attribute loading for non-persistent objects - - One variant on the "pending" behavior above is if we use the flag - ``load_on_pending`` on :func:`.relationship`. When this flag is set, the - lazy loader will emit for ``new_obj.foo`` before the INSERT proceeds; another - variant of this is to use the :meth:`.Session.enable_relationship_loading` - method, which can "attach" an object to a :class:`.Session` in such a way that - many-to-one relationships load as according to foreign key attributes - regardless of the object being in any particular state. - Both techniques are **not recommended for general use**; they were added to suit - specific programming scenarios encountered by users which involve the repurposing - of the ORM's usual object states. - -The recipe `ExpireRelationshipOnFKChange <http://www.sqlalchemy.org/trac/wiki/UsageRecipes/ExpireRelationshipOnFKChange>`_ features an example using SQLAlchemy events -in order to coordinate the setting of foreign key attributes with many-to-one -relationships. - -Is there a way to automagically have only unique keywords (or other kinds of objects) without doing a query for the keyword and getting a reference to the row containing that keyword? ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - -When people read the many-to-many example in the docs, they get hit with the -fact that if you create the same ``Keyword`` twice, it gets put in the DB twice. -Which is somewhat inconvenient. - -This `UniqueObject <http://www.sqlalchemy.org/trac/wiki/UsageRecipes/UniqueObject>`_ recipe was created to address this issue. - - diff --git a/doc/build/faq/connections.rst b/doc/build/faq/connections.rst new file mode 100644 index 000000000..81a8678b4 --- /dev/null +++ b/doc/build/faq/connections.rst @@ -0,0 +1,138 @@ +Connections / Engines +===================== + +.. contents:: + :local: + :class: faq + :backlinks: none + + +How do I configure logging? +--------------------------- + +See :ref:`dbengine_logging`. + +How do I pool database connections? Are my connections pooled? +---------------------------------------------------------------- + +SQLAlchemy performs application-level connection pooling automatically +in most cases. With the exception of SQLite, a :class:`.Engine` object +refers to a :class:`.QueuePool` as a source of connectivity. + +For more detail, see :ref:`engines_toplevel` and :ref:`pooling_toplevel`. + +How do I pass custom connect arguments to my database API? +----------------------------------------------------------- + +The :func:`.create_engine` call accepts additional arguments either +directly via the ``connect_args`` keyword argument:: + + e = create_engine("mysql://scott:tiger@localhost/test", + connect_args={"encoding": "utf8"}) + +Or for basic string and integer arguments, they can usually be specified +in the query string of the URL:: + + e = create_engine("mysql://scott:tiger@localhost/test?encoding=utf8") + +.. seealso:: + + :ref:`custom_dbapi_args` + +"MySQL Server has gone away" +---------------------------- + +There are two major causes for this error: + +1. The MySQL client closes connections which have been idle for a set period +of time, defaulting to eight hours. This can be avoided by using the ``pool_recycle`` +setting with :func:`.create_engine`, described at :ref:`mysql_connection_timeouts`. + +2. Usage of the MySQLdb :term:`DBAPI`, or a similar DBAPI, in a non-threadsafe manner, or in an otherwise +inappropriate way. The MySQLdb connection object is not threadsafe - this expands +out to any SQLAlchemy system that links to a single connection, which includes the ORM +:class:`.Session`. For background +on how :class:`.Session` should be used in a multithreaded environment, +see :ref:`session_faq_threadsafe`. + +Why does SQLAlchemy issue so many ROLLBACKs? +--------------------------------------------- + +SQLAlchemy currently assumes DBAPI connections are in "non-autocommit" mode - +this is the default behavior of the Python database API, meaning it +must be assumed that a transaction is always in progress. The +connection pool issues ``connection.rollback()`` when a connection is returned. +This is so that any transactional resources remaining on the connection are +released. On a database like Postgresql or MSSQL where table resources are +aggressively locked, this is critical so that rows and tables don't remain +locked within connections that are no longer in use. An application can +otherwise hang. It's not just for locks, however, and is equally critical on +any database that has any kind of transaction isolation, including MySQL with +InnoDB. Any connection that is still inside an old transaction will return +stale data, if that data was already queried on that connection within +isolation. For background on why you might see stale data even on MySQL, see +http://dev.mysql.com/doc/refman/5.1/en/innodb-transaction-model.html + +I'm on MyISAM - how do I turn it off? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The behavior of the connection pool's connection return behavior can be +configured using ``reset_on_return``:: + + from sqlalchemy import create_engine + from sqlalchemy.pool import QueuePool + + engine = create_engine('mysql://scott:tiger@localhost/myisam_database', pool=QueuePool(reset_on_return=False)) + +I'm on SQL Server - how do I turn those ROLLBACKs into COMMITs? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``reset_on_return`` accepts the values ``commit``, ``rollback`` in addition +to ``True``, ``False``, and ``None``. Setting to ``commit`` will cause +a COMMIT as any connection is returned to the pool:: + + engine = create_engine('mssql://scott:tiger@mydsn', pool=QueuePool(reset_on_return='commit')) + + +I am using multiple connections with a SQLite database (typically to test transaction operation), and my test program is not working! +---------------------------------------------------------------------------------------------------------------------------------------------------------- + +If using a SQLite ``:memory:`` database, or a version of SQLAlchemy prior +to version 0.7, the default connection pool is the :class:`.SingletonThreadPool`, +which maintains exactly one SQLite connection per thread. So two +connections in use in the same thread will actually be the same SQLite +connection. Make sure you're not using a :memory: database and +use :class:`.NullPool`, which is the default for non-memory databases in +current SQLAlchemy versions. + +.. seealso:: + + :ref:`pysqlite_threading_pooling` - info on PySQLite's behavior. + +How do I get at the raw DBAPI connection when using an Engine? +-------------------------------------------------------------- + +With a regular SA engine-level Connection, you can get at a pool-proxied +version of the DBAPI connection via the :attr:`.Connection.connection` attribute on +:class:`.Connection`, and for the really-real DBAPI connection you can call the +:attr:`.ConnectionFairy.connection` attribute on that - but there should never be any need to access +the non-pool-proxied DBAPI connection, as all methods are proxied through:: + + engine = create_engine(...) + conn = engine.connect() + conn.connection.<do DBAPI things> + cursor = conn.connection.cursor(<DBAPI specific arguments..>) + +You must ensure that you revert any isolation level settings or other +operation-specific settings on the connection back to normal before returning +it to the pool. + +As an alternative to reverting settings, you can call the :meth:`.Connection.detach` method on +either :class:`.Connection` or the proxied connection, which will de-associate +the connection from the pool such that it will be closed and discarded +when :meth:`.Connection.close` is called:: + + conn = engine.connect() + conn.detach() # detaches the DBAPI connection from the connection pool + conn.connection.<go nuts> + conn.close() # connection is closed for real, the pool replaces it with a new connection diff --git a/doc/build/faq/index.rst b/doc/build/faq/index.rst new file mode 100644 index 000000000..120e0ba3a --- /dev/null +++ b/doc/build/faq/index.rst @@ -0,0 +1,19 @@ +.. _faq_toplevel: + +============================ +Frequently Asked Questions +============================ + +The Frequently Asked Questions section is a growing collection of commonly +observed questions to well-known issues. + +.. toctree:: + :maxdepth: 1 + + connections + metadata_schema + sqlexpressions + ormconfiguration + performance + sessions + diff --git a/doc/build/faq/metadata_schema.rst b/doc/build/faq/metadata_schema.rst new file mode 100644 index 000000000..9697399dc --- /dev/null +++ b/doc/build/faq/metadata_schema.rst @@ -0,0 +1,102 @@ +================== +MetaData / Schema +================== + +.. contents:: + :local: + :class: faq + :backlinks: none + + + +My program is hanging when I say ``table.drop()`` / ``metadata.drop_all()`` +=========================================================================== + +This usually corresponds to two conditions: 1. using PostgreSQL, which is really +strict about table locks, and 2. you have a connection still open which +contains locks on the table and is distinct from the connection being used for +the DROP statement. Heres the most minimal version of the pattern:: + + connection = engine.connect() + result = connection.execute(mytable.select()) + + mytable.drop(engine) + +Above, a connection pool connection is still checked out; furthermore, the +result object above also maintains a link to this connection. If +"implicit execution" is used, the result will hold this connection opened until +the result object is closed or all rows are exhausted. + +The call to ``mytable.drop(engine)`` attempts to emit DROP TABLE on a second +connection procured from the :class:`.Engine` which will lock. + +The solution is to close out all connections before emitting DROP TABLE:: + + connection = engine.connect() + result = connection.execute(mytable.select()) + + # fully read result sets + result.fetchall() + + # close connections + connection.close() + + # now locks are removed + mytable.drop(engine) + +Does SQLAlchemy support ALTER TABLE, CREATE VIEW, CREATE TRIGGER, Schema Upgrade Functionality? +=============================================================================================== + + +General ALTER support isn't present in SQLAlchemy directly. For special DDL +on an ad-hoc basis, the :class:`.DDL` and related constructs can be used. +See :doc:`core/ddl` for a discussion on this subject. + +A more comprehensive option is to use schema migration tools, such as Alembic +or SQLAlchemy-Migrate; see :ref:`schema_migrations` for discussion on this. + +How can I sort Table objects in order of their dependency? +=========================================================================== + +This is available via the :attr:`.MetaData.sorted_tables` function:: + + metadata = MetaData() + # ... add Table objects to metadata + ti = metadata.sorted_tables: + for t in ti: + print t + +How can I get the CREATE TABLE/ DROP TABLE output as a string? +=========================================================================== + +Modern SQLAlchemy has clause constructs which represent DDL operations. These +can be rendered to strings like any other SQL expression:: + + from sqlalchemy.schema import CreateTable + + print CreateTable(mytable) + +To get the string specific to a certain engine:: + + print CreateTable(mytable).compile(engine) + +There's also a special form of :class:`.Engine` that can let you dump an entire +metadata creation sequence, using this recipe:: + + def dump(sql, *multiparams, **params): + print sql.compile(dialect=engine.dialect) + engine = create_engine('postgresql://', strategy='mock', executor=dump) + metadata.create_all(engine, checkfirst=False) + +The `Alembic <https://bitbucket.org/zzzeek/alembic>`_ tool also supports +an "offline" SQL generation mode that renders database migrations as SQL scripts. + +How can I subclass Table/Column to provide certain behaviors/configurations? +============================================================================= + +:class:`.Table` and :class:`.Column` are not good targets for direct subclassing. +However, there are simple ways to get on-construction behaviors using creation +functions, and behaviors related to the linkages between schema objects such as +constraint conventions or naming conventions using attachment events. +An example of many of these +techniques can be seen at `Naming Conventions <http://www.sqlalchemy.org/trac/wiki/UsageRecipes/NamingConventions>`_. diff --git a/doc/build/faq/ormconfiguration.rst b/doc/build/faq/ormconfiguration.rst new file mode 100644 index 000000000..3a2ea29a6 --- /dev/null +++ b/doc/build/faq/ormconfiguration.rst @@ -0,0 +1,334 @@ +ORM Configuration +================== + +.. contents:: + :local: + :class: faq + :backlinks: none + +.. _faq_mapper_primary_key: + +How do I map a table that has no primary key? +--------------------------------------------- + +The SQLAlchemy ORM, in order to map to a particular table, needs there to be +at least one column denoted as a primary key column; multiple-column, +i.e. composite, primary keys are of course entirely feasible as well. These +columns do **not** need to be actually known to the database as primary key +columns, though it's a good idea that they are. It's only necessary that the columns +*behave* as a primary key does, e.g. as a unique and not nullable identifier +for a row. + +Most ORMs require that objects have some kind of primary key defined +because the object in memory must correspond to a uniquely identifiable +row in the database table; at the very least, this allows the +object can be targeted for UPDATE and DELETE statements which will affect only +that object's row and no other. However, the importance of the primary key +goes far beyond that. In SQLAlchemy, all ORM-mapped objects are at all times +linked uniquely within a :class:`.Session` +to their specific database row using a pattern called the :term:`identity map`, +a pattern that's central to the unit of work system employed by SQLAlchemy, +and is also key to the most common (and not-so-common) patterns of ORM usage. + + +.. note:: + + It's important to note that we're only talking about the SQLAlchemy ORM; an + application which builds on Core and deals only with :class:`.Table` objects, + :func:`.select` constructs and the like, **does not** need any primary key + to be present on or associated with a table in any way (though again, in SQL, all tables + should really have some kind of primary key, lest you need to actually + update or delete specific rows). + +In almost all cases, a table does have a so-called :term:`candidate key`, which is a column or series +of columns that uniquely identify a row. If a table truly doesn't have this, and has actual +fully duplicate rows, the table is not corresponding to `first normal form <http://en.wikipedia.org/wiki/First_normal_form>`_ and cannot be mapped. Otherwise, whatever columns comprise the best candidate key can be +applied directly to the mapper:: + + class SomeClass(Base): + __table__ = some_table_with_no_pk + __mapper_args__ = { + 'primary_key':[some_table_with_no_pk.c.uid, some_table_with_no_pk.c.bar] + } + +Better yet is when using fully declared table metadata, use the ``primary_key=True`` +flag on those columns:: + + class SomeClass(Base): + __tablename__ = "some_table_with_no_pk" + + uid = Column(Integer, primary_key=True) + bar = Column(String, primary_key=True) + +All tables in a relational database should have primary keys. Even a many-to-many +association table - the primary key would be the composite of the two association +columns:: + + CREATE TABLE my_association ( + user_id INTEGER REFERENCES user(id), + account_id INTEGER REFERENCES account(id), + PRIMARY KEY (user_id, account_id) + ) + + +How do I configure a Column that is a Python reserved word or similar? +---------------------------------------------------------------------------- + +Column-based attributes can be given any name desired in the mapping. See +:ref:`mapper_column_distinct_names`. + +How do I get a list of all columns, relationships, mapped attributes, etc. given a mapped class? +------------------------------------------------------------------------------------------------- + +This information is all available from the :class:`.Mapper` object. + +To get at the :class:`.Mapper` for a particular mapped class, call the +:func:`.inspect` function on it:: + + from sqlalchemy import inspect + + mapper = inspect(MyClass) + +From there, all information about the class can be acquired using such methods as: + +* :attr:`.Mapper.attrs` - a namespace of all mapped attributes. The attributes + themselves are instances of :class:`.MapperProperty`, which contain additional + attributes that can lead to the mapped SQL expression or column, if applicable. + +* :attr:`.Mapper.column_attrs` - the mapped attribute namespace + limited to column and SQL expression attributes. You might want to use + :attr:`.Mapper.columns` to get at the :class:`.Column` objects directly. + +* :attr:`.Mapper.relationships` - namespace of all :class:`.RelationshipProperty` attributes. + +* :attr:`.Mapper.all_orm_descriptors` - namespace of all mapped attributes, plus user-defined + attributes defined using systems such as :class:`.hybrid_property`, :class:`.AssociationProxy` and others. + +* :attr:`.Mapper.columns` - A namespace of :class:`.Column` objects and other named + SQL expressions associated with the mapping. + +* :attr:`.Mapper.mapped_table` - The :class:`.Table` or other selectable to which + this mapper is mapped. + +* :attr:`.Mapper.local_table` - The :class:`.Table` that is "local" to this mapper; + this differs from :attr:`.Mapper.mapped_table` in the case of a mapper mapped + using inheritance to a composed selectable. + +.. _faq_combining_columns: + +I'm getting a warning or error about "Implicitly combining column X under attribute Y" +-------------------------------------------------------------------------------------- + +This condition refers to when a mapping contains two columns that are being +mapped under the same attribute name due to their name, but there's no indication +that this is intentional. A mapped class needs to have explicit names for +every attribute that is to store an independent value; when two columns have the +same name and aren't disambiguated, they fall under the same attribute and +the effect is that the value from one column is **copied** into the other, based +on which column was assigned to the attribute first. + +This behavior is often desirable and is allowed without warning in the case +where the two columns are linked together via a foreign key relationship +within an inheritance mapping. When the warning or exception occurs, the +issue can be resolved by either assigning the columns to differently-named +attributes, or if combining them together is desired, by using +:func:`.column_property` to make this explicit. + +Given the example as follows:: + + from sqlalchemy import Integer, Column, ForeignKey + from sqlalchemy.ext.declarative import declarative_base + + Base = declarative_base() + + class A(Base): + __tablename__ = 'a' + + id = Column(Integer, primary_key=True) + + class B(A): + __tablename__ = 'b' + + id = Column(Integer, primary_key=True) + a_id = Column(Integer, ForeignKey('a.id')) + +As of SQLAlchemy version 0.9.5, the above condition is detected, and will +warn that the ``id`` column of ``A`` and ``B`` is being combined under +the same-named attribute ``id``, which above is a serious issue since it means +that a ``B`` object's primary key will always mirror that of its ``A``. + +A mapping which resolves this is as follows:: + + class A(Base): + __tablename__ = 'a' + + id = Column(Integer, primary_key=True) + + class B(A): + __tablename__ = 'b' + + b_id = Column('id', Integer, primary_key=True) + a_id = Column(Integer, ForeignKey('a.id')) + +Suppose we did want ``A.id`` and ``B.id`` to be mirrors of each other, despite +the fact that ``B.a_id`` is where ``A.id`` is related. We could combine +them together using :func:`.column_property`:: + + class A(Base): + __tablename__ = 'a' + + id = Column(Integer, primary_key=True) + + class B(A): + __tablename__ = 'b' + + # probably not what you want, but this is a demonstration + id = column_property(Column(Integer, primary_key=True), A.id) + a_id = Column(Integer, ForeignKey('a.id')) + + + +I'm using Declarative and setting primaryjoin/secondaryjoin using an ``and_()`` or ``or_()``, and I am getting an error message about foreign keys. +------------------------------------------------------------------------------------------------------------------------------------------------------------------ + +Are you doing this?:: + + class MyClass(Base): + # .... + + foo = relationship("Dest", primaryjoin=and_("MyClass.id==Dest.foo_id", "MyClass.foo==Dest.bar")) + +That's an ``and_()`` of two string expressions, which SQLAlchemy cannot apply any mapping towards. Declarative allows :func:`.relationship` arguments to be specified as strings, which are converted into expression objects using ``eval()``. But this doesn't occur inside of an ``and_()`` expression - it's a special operation declarative applies only to the *entirety* of what's passed to primaryjoin or other arguments as a string:: + + class MyClass(Base): + # .... + + foo = relationship("Dest", primaryjoin="and_(MyClass.id==Dest.foo_id, MyClass.foo==Dest.bar)") + +Or if the objects you need are already available, skip the strings:: + + class MyClass(Base): + # .... + + foo = relationship(Dest, primaryjoin=and_(MyClass.id==Dest.foo_id, MyClass.foo==Dest.bar)) + +The same idea applies to all the other arguments, such as ``foreign_keys``:: + + # wrong ! + foo = relationship(Dest, foreign_keys=["Dest.foo_id", "Dest.bar_id"]) + + # correct ! + foo = relationship(Dest, foreign_keys="[Dest.foo_id, Dest.bar_id]") + + # also correct ! + foo = relationship(Dest, foreign_keys=[Dest.foo_id, Dest.bar_id]) + + # if you're using columns from the class that you're inside of, just use the column objects ! + class MyClass(Base): + foo_id = Column(...) + bar_id = Column(...) + # ... + + foo = relationship(Dest, foreign_keys=[foo_id, bar_id]) + +.. _faq_subqueryload_limit_sort: + +Why is ``ORDER BY`` required with ``LIMIT`` (especially with ``subqueryload()``)? +--------------------------------------------------------------------------------- + +A relational database can return rows in any +arbitrary order, when an explicit ordering is not set. +While this ordering very often corresponds to the natural +order of rows within a table, this is not the case for all databases and +all queries. The consequence of this is that any query that limits rows +using ``LIMIT`` or ``OFFSET`` should **always** specify an ``ORDER BY``. +Otherwise, it is not deterministic which rows will actually be returned. + +When we use a SQLAlchemy method like :meth:`.Query.first`, we are in fact +applying a ``LIMIT`` of one to the query, so without an explicit ordering +it is not deterministic what row we actually get back. +While we may not notice this for simple queries on databases that usually +returns rows in their natural +order, it becomes much more of an issue if we also use :func:`.orm.subqueryload` +to load related collections, and we may not be loading the collections +as intended. + +SQLAlchemy implements :func:`.orm.subqueryload` by issuing a separate query, +the results of which are matched up to the results from the first query. +We see two queries emitted like this: + +.. sourcecode:: python+sql + + >>> session.query(User).options(subqueryload(User.addresses)).all() + {opensql}-- the "main" query + SELECT users.id AS users_id + FROM users + {stop} + {opensql}-- the "load" query issued by subqueryload + SELECT addresses.id AS addresses_id, + addresses.user_id AS addresses_user_id, + anon_1.users_id AS anon_1_users_id + FROM (SELECT users.id AS users_id FROM users) AS anon_1 + JOIN addresses ON anon_1.users_id = addresses.user_id + ORDER BY anon_1.users_id + +The second query embeds the first query as a source of rows. +When the inner query uses ``OFFSET`` and/or ``LIMIT`` without ordering, +the two queries may not see the same results: + +.. sourcecode:: python+sql + + >>> user = session.query(User).options(subqueryload(User.addresses)).first() + {opensql}-- the "main" query + SELECT users.id AS users_id + FROM users + LIMIT 1 + {stop} + {opensql}-- the "load" query issued by subqueryload + SELECT addresses.id AS addresses_id, + addresses.user_id AS addresses_user_id, + anon_1.users_id AS anon_1_users_id + FROM (SELECT users.id AS users_id FROM users LIMIT 1) AS anon_1 + JOIN addresses ON anon_1.users_id = addresses.user_id + ORDER BY anon_1.users_id + +Depending on database specifics, there is +a chance we may get the a result like the following for the two queries:: + + -- query #1 + +--------+ + |users_id| + +--------+ + | 1| + +--------+ + + -- query #2 + +------------+-----------------+---------------+ + |addresses_id|addresses_user_id|anon_1_users_id| + +------------+-----------------+---------------+ + | 3| 2| 2| + +------------+-----------------+---------------+ + | 4| 2| 2| + +------------+-----------------+---------------+ + +Above, we receive two ``addresses`` rows for ``user.id`` of 2, and none for +1. We've wasted two rows and failed to actually load the collection. This +is an insidious error because without looking at the SQL and the results, the +ORM will not show that there's any issue; if we access the ``addresses`` +for the ``User`` we have, it will emit a lazy load for the collection and we +won't see that anything actually went wrong. + +The solution to this problem is to always specify a deterministic sort order, +so that the main query always returns the same set of rows. This generally +means that you should :meth:`.Query.order_by` on a unique column on the table. +The primary key is a good choice for this:: + + session.query(User).options(subqueryload(User.addresses)).order_by(User.id).first() + +Note that :func:`.joinedload` does not suffer from the same problem because +only one query is ever issued, so the load query cannot be different from the +main query. + +.. seealso:: + + :ref:`subqueryload_ordering` diff --git a/doc/build/faq/performance.rst b/doc/build/faq/performance.rst new file mode 100644 index 000000000..8413cb5a2 --- /dev/null +++ b/doc/build/faq/performance.rst @@ -0,0 +1,443 @@ +.. _faq_performance: + +Performance +=========== + +.. contents:: + :local: + :class: faq + :backlinks: none + +.. _faq_how_to_profile: + +How can I profile a SQLAlchemy powered application? +--------------------------------------------------- + +Looking for performance issues typically involves two stratgies. One +is query profiling, and the other is code profiling. + +Query Profiling +^^^^^^^^^^^^^^^^ + +Sometimes just plain SQL logging (enabled via python's logging module +or via the ``echo=True`` argument on :func:`.create_engine`) can give an +idea how long things are taking. For example, if you log something +right after a SQL operation, you'd see something like this in your +log:: + + 17:37:48,325 INFO [sqlalchemy.engine.base.Engine.0x...048c] SELECT ... + 17:37:48,326 INFO [sqlalchemy.engine.base.Engine.0x...048c] {<params>} + 17:37:48,660 DEBUG [myapp.somemessage] + +if you logged ``myapp.somemessage`` right after the operation, you know +it took 334ms to complete the SQL part of things. + +Logging SQL will also illustrate if dozens/hundreds of queries are +being issued which could be better organized into much fewer queries. +When using the SQLAlchemy ORM, the "eager loading" +feature is provided to partially (:func:`.contains_eager()`) or fully +(:func:`.joinedload()`, :func:`.subqueryload()`) +automate this activity, but without +the ORM "eager loading" typically means to use joins so that results across multiple +tables can be loaded in one result set instead of multiplying numbers +of queries as more depth is added (i.e. ``r + r*r2 + r*r2*r3`` ...) + +For more long-term profiling of queries, or to implement an application-side +"slow query" monitor, events can be used to intercept cursor executions, +using a recipe like the following:: + + from sqlalchemy import event + from sqlalchemy.engine import Engine + import time + import logging + + logging.basicConfig() + logger = logging.getLogger("myapp.sqltime") + logger.setLevel(logging.DEBUG) + + @event.listens_for(Engine, "before_cursor_execute") + def before_cursor_execute(conn, cursor, statement, + parameters, context, executemany): + conn.info.setdefault('query_start_time', []).append(time.time()) + logger.debug("Start Query: %s", statement) + + @event.listens_for(Engine, "after_cursor_execute") + def after_cursor_execute(conn, cursor, statement, + parameters, context, executemany): + total = time.time() - conn.info['query_start_time'].pop(-1) + logger.debug("Query Complete!") + logger.debug("Total Time: %f", total) + +Above, we use the :meth:`.ConnectionEvents.before_cursor_execute` and +:meth:`.ConnectionEvents.after_cursor_execute` events to establish an interception +point around when a statement is executed. We attach a timer onto the +connection using the :class:`._ConnectionRecord.info` dictionary; we use a +stack here for the occasional case where the cursor execute events may be nested. + +Code Profiling +^^^^^^^^^^^^^^ + +If logging reveals that individual queries are taking too long, you'd +need a breakdown of how much time was spent within the database +processing the query, sending results over the network, being handled +by the :term:`DBAPI`, and finally being received by SQLAlchemy's result set +and/or ORM layer. Each of these stages can present their own +individual bottlenecks, depending on specifics. + +For that you need to use the +`Python Profiling Module <https://docs.python.org/2/library/profile.html>`_. +Below is a simple recipe which works profiling into a context manager:: + + import cProfile + import StringIO + import pstats + import contextlib + + @contextlib.contextmanager + def profiled(): + pr = cProfile.Profile() + pr.enable() + yield + pr.disable() + s = StringIO.StringIO() + ps = pstats.Stats(pr, stream=s).sort_stats('cumulative') + ps.print_stats() + # uncomment this to see who's calling what + # ps.print_callers() + print s.getvalue() + +To profile a section of code:: + + with profiled(): + Session.query(FooClass).filter(FooClass.somevalue==8).all() + +The output of profiling can be used to give an idea where time is +being spent. A section of profiling output looks like this:: + + 13726 function calls (13042 primitive calls) in 0.014 seconds + + Ordered by: cumulative time + + ncalls tottime percall cumtime percall filename:lineno(function) + 222/21 0.001 0.000 0.011 0.001 lib/sqlalchemy/orm/loading.py:26(instances) + 220/20 0.002 0.000 0.010 0.001 lib/sqlalchemy/orm/loading.py:327(_instance) + 220/20 0.000 0.000 0.010 0.000 lib/sqlalchemy/orm/loading.py:284(populate_state) + 20 0.000 0.000 0.010 0.000 lib/sqlalchemy/orm/strategies.py:987(load_collection_from_subq) + 20 0.000 0.000 0.009 0.000 lib/sqlalchemy/orm/strategies.py:935(get) + 1 0.000 0.000 0.009 0.009 lib/sqlalchemy/orm/strategies.py:940(_load) + 21 0.000 0.000 0.008 0.000 lib/sqlalchemy/orm/strategies.py:942(<genexpr>) + 2 0.000 0.000 0.004 0.002 lib/sqlalchemy/orm/query.py:2400(__iter__) + 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/orm/query.py:2414(_execute_and_instances) + 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/engine/base.py:659(execute) + 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/sql/elements.py:321(_execute_on_connection) + 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/engine/base.py:788(_execute_clauseelement) + + ... + +Above, we can see that the ``instances()`` SQLAlchemy function was called 222 +times (recursively, and 21 times from the outside), taking a total of .011 +seconds for all calls combined. + +Execution Slowness +^^^^^^^^^^^^^^^^^^ + +The specifics of these calls can tell us where the time is being spent. +If for example, you see time being spent within ``cursor.execute()``, +e.g. against the DBAPI:: + + 2 0.102 0.102 0.204 0.102 {method 'execute' of 'sqlite3.Cursor' objects} + +this would indicate that the database is taking a long time to start returning +results, and it means your query should be optimized, either by adding indexes +or restructuring the query and/or underlying schema. For that task, +analysis of the query plan is warranted, using a system such as EXPLAIN, +SHOW PLAN, etc. as is provided by the database backend. + +Result Fetching Slowness - Core +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If on the other hand you see many thousands of calls related to fetching rows, +or very long calls to ``fetchall()``, it may +mean your query is returning more rows than expected, or that the fetching +of rows itself is slow. The ORM itself typically uses ``fetchall()`` to fetch +rows (or ``fetchmany()`` if the :meth:`.Query.yield_per` option is used). + +An inordinately large number of rows would be indicated +by a very slow call to ``fetchall()`` at the DBAPI level:: + + 2 0.300 0.600 0.300 0.600 {method 'fetchall' of 'sqlite3.Cursor' objects} + +An unexpectedly large number of rows, even if the ultimate result doesn't seem +to have many rows, can be the result of a cartesian product - when multiple +sets of rows are combined together without appropriately joining the tables +together. It's often easy to produce this behavior with SQLAlchemy Core or +ORM query if the wrong :class:`.Column` objects are used in a complex query, +pulling in additional FROM clauses that are unexpected. + +On the other hand, a fast call to ``fetchall()`` at the DBAPI level, but then +slowness when SQLAlchemy's :class:`.ResultProxy` is asked to do a ``fetchall()``, +may indicate slowness in processing of datatypes, such as unicode conversions +and similar:: + + # the DBAPI cursor is fast... + 2 0.020 0.040 0.020 0.040 {method 'fetchall' of 'sqlite3.Cursor' objects} + + ... + + # but SQLAlchemy's result proxy is slow, this is type-level processing + 2 0.100 0.200 0.100 0.200 lib/sqlalchemy/engine/result.py:778(fetchall) + +In some cases, a backend might be doing type-level processing that isn't +needed. More specifically, seeing calls within the type API that are slow +are better indicators - below is what it looks like when we use a type like +this:: + + from sqlalchemy import TypeDecorator + import time + + class Foo(TypeDecorator): + impl = String + + def process_result_value(self, value, thing): + # intentionally add slowness for illustration purposes + time.sleep(.001) + return value + +the profiling output of this intentionally slow operation can be seen like this:: + + 200 0.001 0.000 0.237 0.001 lib/sqlalchemy/sql/type_api.py:911(process) + 200 0.001 0.000 0.236 0.001 test.py:28(process_result_value) + 200 0.235 0.001 0.235 0.001 {time.sleep} + +that is, we see many expensive calls within the ``type_api`` system, and the actual +time consuming thing is the ``time.sleep()`` call. + +Make sure to check the :doc:`Dialect documentation <dialects/index>` +for notes on known performance tuning suggestions at this level, especially for +databases like Oracle. There may be systems related to ensuring numeric accuracy +or string processing that may not be needed in all cases. + +There also may be even more low-level points at which row-fetching performance is suffering; +for example, if time spent seems to focus on a call like ``socket.receive()``, +that could indicate that everything is fast except for the actual network connection, +and too much time is spent with data moving over the network. + +Result Fetching Slowness - ORM +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To detect slowness in ORM fetching of rows (which is the most common area +of performance concern), calls like ``populate_state()`` and ``_instance()`` will +illustrate individual ORM object populations:: + + # the ORM calls _instance for each ORM-loaded row it sees, and + # populate_state for each ORM-loaded row that results in the population + # of an object's attributes + 220/20 0.001 0.000 0.010 0.000 lib/sqlalchemy/orm/loading.py:327(_instance) + 220/20 0.000 0.000 0.009 0.000 lib/sqlalchemy/orm/loading.py:284(populate_state) + +The ORM's slowness in turning rows into ORM-mapped objects is a product +of the complexity of this operation combined with the overhead of cPython. +Common strategies to mitigate this include: + +* fetch individual columns instead of full entities, that is:: + + session.query(User.id, User.name) + + instead of:: + + session.query(User) + +* Use :class:`.Bundle` objects to organize column-based results:: + + u_b = Bundle('user', User.id, User.name) + a_b = Bundle('address', Address.id, Address.email) + + for user, address in session.query(u_b, a_b).join(User.addresses): + # ... + +* Use result caching - see :ref:`examples_caching` for an in-depth example + of this. + +* Consider a faster interpreter like that of Pypy. + +The output of a profile can be a little daunting but after some +practice they are very easy to read. + +.. seealso:: + + :ref:`examples_performance` - a suite of performance demonstrations + with bundled profiling capabilities. + +I'm inserting 400,000 rows with the ORM and it's really slow! +-------------------------------------------------------------- + +The SQLAlchemy ORM uses the :term:`unit of work` pattern when synchronizing +changes to the database. This pattern goes far beyond simple "inserts" +of data. It includes that attributes which are assigned on objects are +received using an attribute instrumentation system which tracks +changes on objects as they are made, includes that all rows inserted +are tracked in an identity map which has the effect that for each row +SQLAlchemy must retrieve its "last inserted id" if not already given, +and also involves that rows to be inserted are scanned and sorted for +dependencies as needed. Objects are also subject to a fair degree of +bookkeeping in order to keep all of this running, which for a very +large number of rows at once can create an inordinate amount of time +spent with large data structures, hence it's best to chunk these. + +Basically, unit of work is a large degree of automation in order to +automate the task of persisting a complex object graph into a +relational database with no explicit persistence code, and this +automation has a price. + +ORMs are basically not intended for high-performance bulk inserts - +this is the whole reason SQLAlchemy offers the Core in addition to the +ORM as a first-class component. + +For the use case of fast bulk inserts, the +SQL generation and execution system that the ORM builds on top of +is part of the :doc:`Core <core/tutorial>`. Using this system directly, we can produce an INSERT that +is competitive with using the raw database API directly. + +Alternatively, the SQLAlchemy ORM offers the :ref:`bulk_operations` +suite of methods, which provide hooks into subsections of the unit of +work process in order to emit Core-level INSERT and UPDATE constructs with +a small degree of ORM-based automation. + +The example below illustrates time-based tests for several different +methods of inserting rows, going from the most automated to the least. +With cPython 2.7, runtimes observed:: + + classics-MacBook-Pro:sqlalchemy classic$ python test.py + SQLAlchemy ORM: Total time for 100000 records 12.0471920967 secs + SQLAlchemy ORM pk given: Total time for 100000 records 7.06283402443 secs + SQLAlchemy ORM bulk_save_objects(): Total time for 100000 records 0.856323003769 secs + SQLAlchemy Core: Total time for 100000 records 0.485800027847 secs + sqlite3: Total time for 100000 records 0.487842082977 sec + +We can reduce the time by a factor of three using recent versions of `Pypy <http://pypy.org/>`_:: + + classics-MacBook-Pro:sqlalchemy classic$ /usr/local/src/pypy-2.1-beta2-osx64/bin/pypy test.py + SQLAlchemy ORM: Total time for 100000 records 5.88369488716 secs + SQLAlchemy ORM pk given: Total time for 100000 records 3.52294301987 secs + SQLAlchemy Core: Total time for 100000 records 0.613556146622 secs + sqlite3: Total time for 100000 records 0.442467927933 sec + +Script:: + + import time + import sqlite3 + + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy import Column, Integer, String, create_engine + from sqlalchemy.orm import scoped_session, sessionmaker + + Base = declarative_base() + DBSession = scoped_session(sessionmaker()) + engine = None + + + class Customer(Base): + __tablename__ = "customer" + id = Column(Integer, primary_key=True) + name = Column(String(255)) + + + def init_sqlalchemy(dbname='sqlite:///sqlalchemy.db'): + global engine + engine = create_engine(dbname, echo=False) + DBSession.remove() + DBSession.configure(bind=engine, autoflush=False, expire_on_commit=False) + Base.metadata.drop_all(engine) + Base.metadata.create_all(engine) + + + def test_sqlalchemy_orm(n=100000): + init_sqlalchemy() + t0 = time.time() + for i in xrange(n): + customer = Customer() + customer.name = 'NAME ' + str(i) + DBSession.add(customer) + if i % 1000 == 0: + DBSession.flush() + DBSession.commit() + print( + "SQLAlchemy ORM: Total time for " + str(n) + + " records " + str(time.time() - t0) + " secs") + + + def test_sqlalchemy_orm_pk_given(n=100000): + init_sqlalchemy() + t0 = time.time() + for i in xrange(n): + customer = Customer(id=i+1, name="NAME " + str(i)) + DBSession.add(customer) + if i % 1000 == 0: + DBSession.flush() + DBSession.commit() + print( + "SQLAlchemy ORM pk given: Total time for " + str(n) + + " records " + str(time.time() - t0) + " secs") + + + def test_sqlalchemy_orm_bulk_insert(n=100000): + init_sqlalchemy() + t0 = time.time() + n1 = n + while n1 > 0: + n1 = n1 - 10000 + DBSession.bulk_insert_mappings( + Customer, + [ + dict(name="NAME " + str(i)) + for i in xrange(min(10000, n1)) + ] + ) + DBSession.commit() + print( + "SQLAlchemy ORM bulk_save_objects(): Total time for " + str(n) + + " records " + str(time.time() - t0) + " secs") + + + def test_sqlalchemy_core(n=100000): + init_sqlalchemy() + t0 = time.time() + engine.execute( + Customer.__table__.insert(), + [{"name": 'NAME ' + str(i)} for i in xrange(n)] + ) + print( + "SQLAlchemy Core: Total time for " + str(n) + + " records " + str(time.time() - t0) + " secs") + + + def init_sqlite3(dbname): + conn = sqlite3.connect(dbname) + c = conn.cursor() + c.execute("DROP TABLE IF EXISTS customer") + c.execute( + "CREATE TABLE customer (id INTEGER NOT NULL, " + "name VARCHAR(255), PRIMARY KEY(id))") + conn.commit() + return conn + + + def test_sqlite3(n=100000, dbname='sqlite3.db'): + conn = init_sqlite3(dbname) + c = conn.cursor() + t0 = time.time() + for i in xrange(n): + row = ('NAME ' + str(i),) + c.execute("INSERT INTO customer (name) VALUES (?)", row) + conn.commit() + print( + "sqlite3: Total time for " + str(n) + + " records " + str(time.time() - t0) + " sec") + + if __name__ == '__main__': + test_sqlalchemy_orm(100000) + test_sqlalchemy_orm_pk_given(100000) + test_sqlalchemy_orm_bulk_insert(100000) + test_sqlalchemy_core(100000) + test_sqlite3(100000) + diff --git a/doc/build/faq/sessions.rst b/doc/build/faq/sessions.rst new file mode 100644 index 000000000..300b4bdbc --- /dev/null +++ b/doc/build/faq/sessions.rst @@ -0,0 +1,363 @@ +Sessions / Queries +=================== + +.. contents:: + :local: + :class: faq + :backlinks: none + + +"This Session's transaction has been rolled back due to a previous exception during flush." (or similar) +--------------------------------------------------------------------------------------------------------- + +This is an error that occurs when a :meth:`.Session.flush` raises an exception, rolls back +the transaction, but further commands upon the `Session` are called without an +explicit call to :meth:`.Session.rollback` or :meth:`.Session.close`. + +It usually corresponds to an application that catches an exception +upon :meth:`.Session.flush` or :meth:`.Session.commit` and +does not properly handle the exception. For example:: + + from sqlalchemy import create_engine, Column, Integer + from sqlalchemy.orm import sessionmaker + from sqlalchemy.ext.declarative import declarative_base + + Base = declarative_base(create_engine('sqlite://')) + + class Foo(Base): + __tablename__ = 'foo' + id = Column(Integer, primary_key=True) + + Base.metadata.create_all() + + session = sessionmaker()() + + # constraint violation + session.add_all([Foo(id=1), Foo(id=1)]) + + try: + session.commit() + except: + # ignore error + pass + + # continue using session without rolling back + session.commit() + + +The usage of the :class:`.Session` should fit within a structure similar to this:: + + try: + <use session> + session.commit() + except: + session.rollback() + raise + finally: + session.close() # optional, depends on use case + +Many things can cause a failure within the try/except besides flushes. You +should always have some kind of "framing" of your session operations so that +connection and transaction resources have a definitive boundary, otherwise +your application doesn't really have its usage of resources under control. +This is not to say that you need to put try/except blocks all throughout your +application - on the contrary, this would be a terrible idea. You should +architect your application such that there is one (or few) point(s) of +"framing" around session operations. + +For a detailed discussion on how to organize usage of the :class:`.Session`, +please see :ref:`session_faq_whentocreate`. + +But why does flush() insist on issuing a ROLLBACK? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +It would be great if :meth:`.Session.flush` could partially complete and then not roll +back, however this is beyond its current capabilities since its internal +bookkeeping would have to be modified such that it can be halted at any time +and be exactly consistent with what's been flushed to the database. While this +is theoretically possible, the usefulness of the enhancement is greatly +decreased by the fact that many database operations require a ROLLBACK in any +case. Postgres in particular has operations which, once failed, the +transaction is not allowed to continue:: + + test=> create table foo(id integer primary key); + NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index "foo_pkey" for table "foo" + CREATE TABLE + test=> begin; + BEGIN + test=> insert into foo values(1); + INSERT 0 1 + test=> commit; + COMMIT + test=> begin; + BEGIN + test=> insert into foo values(1); + ERROR: duplicate key value violates unique constraint "foo_pkey" + test=> insert into foo values(2); + ERROR: current transaction is aborted, commands ignored until end of transaction block + +What SQLAlchemy offers that solves both issues is support of SAVEPOINT, via +:meth:`.Session.begin_nested`. Using :meth:`.Session.begin_nested`, you can frame an operation that may +potentially fail within a transaction, and then "roll back" to the point +before its failure while maintaining the enclosing transaction. + +But why isn't the one automatic call to ROLLBACK enough? Why must I ROLLBACK again? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This is again a matter of the :class:`.Session` providing a consistent interface and +refusing to guess about what context its being used. For example, the +:class:`.Session` supports "framing" above within multiple levels. Such as, suppose +you had a decorator ``@with_session()``, which did this:: + + def with_session(fn): + def go(*args, **kw): + session.begin(subtransactions=True) + try: + ret = fn(*args, **kw) + session.commit() + return ret + except: + session.rollback() + raise + return go + +The above decorator begins a transaction if one does not exist already, and +then commits it, if it were the creator. The "subtransactions" flag means that +if :meth:`.Session.begin` were already called by an enclosing function, nothing happens +except a counter is incremented - this counter is decremented when :meth:`.Session.commit` +is called and only when it goes back to zero does the actual COMMIT happen. It +allows this usage pattern:: + + @with_session + def one(): + # do stuff + two() + + + @with_session + def two(): + # etc. + + one() + + two() + +``one()`` can call ``two()``, or ``two()`` can be called by itself, and the +``@with_session`` decorator ensures the appropriate "framing" - the transaction +boundaries stay on the outermost call level. As you can see, if ``two()`` calls +``flush()`` which throws an exception and then issues a ``rollback()``, there will +*always* be a second ``rollback()`` performed by the decorator, and possibly a +third corresponding to two levels of decorator. If the ``flush()`` pushed the +``rollback()`` all the way out to the top of the stack, and then we said that +all remaining ``rollback()`` calls are moot, there is some silent behavior going +on there. A poorly written enclosing method might suppress the exception, and +then call ``commit()`` assuming nothing is wrong, and then you have a silent +failure condition. The main reason people get this error in fact is because +they didn't write clean "framing" code and they would have had other problems +down the road. + +If you think the above use case is a little exotic, the same kind of thing +comes into play if you want to SAVEPOINT- you might call ``begin_nested()`` +several times, and the ``commit()``/``rollback()`` calls each resolve the most +recent ``begin_nested()``. The meaning of ``rollback()`` or ``commit()`` is +dependent upon which enclosing block it is called, and you might have any +sequence of ``rollback()``/``commit()`` in any order, and its the level of nesting +that determines their behavior. + +In both of the above cases, if ``flush()`` broke the nesting of transaction +blocks, the behavior is, depending on scenario, anywhere from "magic" to +silent failure to blatant interruption of code flow. + +``flush()`` makes its own "subtransaction", so that a transaction is started up +regardless of the external transactional state, and when complete it calls +``commit()``, or ``rollback()`` upon failure - but that ``rollback()`` corresponds +to its own subtransaction - it doesn't want to guess how you'd like to handle +the external "framing" of the transaction, which could be nested many levels +with any combination of subtransactions and real SAVEPOINTs. The job of +starting/ending the "frame" is kept consistently with the code external to the +``flush()``, and we made a decision that this was the most consistent approach. + + + +How do I make a Query that always adds a certain filter to every query? +------------------------------------------------------------------------------------------------ + +See the recipe at `PreFilteredQuery <http://www.sqlalchemy.org/trac/wiki/UsageRecipes/PreFilteredQuery>`_. + +I've created a mapping against an Outer Join, and while the query returns rows, no objects are returned. Why not? +------------------------------------------------------------------------------------------------------------------ + +Rows returned by an outer join may contain NULL for part of the primary key, +as the primary key is the composite of both tables. The :class:`.Query` object ignores incoming rows +that don't have an acceptable primary key. Based on the setting of the ``allow_partial_pks`` +flag on :func:`.mapper`, a primary key is accepted if the value has at least one non-NULL +value, or alternatively if the value has no NULL values. See ``allow_partial_pks`` +at :func:`.mapper`. + + +I'm using ``joinedload()`` or ``lazy=False`` to create a JOIN/OUTER JOIN and SQLAlchemy is not constructing the correct query when I try to add a WHERE, ORDER BY, LIMIT, etc. (which relies upon the (OUTER) JOIN) +----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +The joins generated by joined eager loading are only used to fully load related +collections, and are designed to have no impact on the primary results of the query. +Since they are anonymously aliased, they cannot be referenced directly. + +For detail on this beahvior, see :doc:`orm/loading`. + +Query has no ``__len__()``, why not? +------------------------------------ + +The Python ``__len__()`` magic method applied to an object allows the ``len()`` +builtin to be used to determine the length of the collection. It's intuitive +that a SQL query object would link ``__len__()`` to the :meth:`.Query.count` +method, which emits a `SELECT COUNT`. The reason this is not possible is +because evaluating the query as a list would incur two SQL calls instead of +one:: + + class Iterates(object): + def __len__(self): + print "LEN!" + return 5 + + def __iter__(self): + print "ITER!" + return iter([1, 2, 3, 4, 5]) + + list(Iterates()) + +output:: + + ITER! + LEN! + +How Do I use Textual SQL with ORM Queries? +------------------------------------------- + +See: + +* :ref:`orm_tutorial_literal_sql` - Ad-hoc textual blocks with :class:`.Query` + +* :ref:`session_sql_expressions` - Using :class:`.Session` with textual SQL directly. + +I'm calling ``Session.delete(myobject)`` and it isn't removed from the parent collection! +------------------------------------------------------------------------------------------ + +See :ref:`session_deleting_from_collections` for a description of this behavior. + +why isn't my ``__init__()`` called when I load objects? +------------------------------------------------------- + +See :ref:`mapping_constructors` for a description of this behavior. + +how do I use ON DELETE CASCADE with SA's ORM? +---------------------------------------------- + +SQLAlchemy will always issue UPDATE or DELETE statements for dependent +rows which are currently loaded in the :class:`.Session`. For rows which +are not loaded, it will by default issue SELECT statements to load +those rows and udpate/delete those as well; in other words it assumes +there is no ON DELETE CASCADE configured. +To configure SQLAlchemy to cooperate with ON DELETE CASCADE, see +:ref:`passive_deletes`. + +I set the "foo_id" attribute on my instance to "7", but the "foo" attribute is still ``None`` - shouldn't it have loaded Foo with id #7? +---------------------------------------------------------------------------------------------------------------------------------------------------- + +The ORM is not constructed in such a way as to support +immediate population of relationships driven from foreign +key attribute changes - instead, it is designed to work the +other way around - foreign key attributes are handled by the +ORM behind the scenes, the end user sets up object +relationships naturally. Therefore, the recommended way to +set ``o.foo`` is to do just that - set it!:: + + foo = Session.query(Foo).get(7) + o.foo = foo + Session.commit() + +Manipulation of foreign key attributes is of course entirely legal. However, +setting a foreign-key attribute to a new value currently does not trigger +an "expire" event of the :func:`.relationship` in which it's involved. This means +that for the following sequence:: + + o = Session.query(SomeClass).first() + assert o.foo is None # accessing an un-set attribute sets it to None + o.foo_id = 7 + +``o.foo`` is initialized to ``None`` when we first accessed it. Setting +``o.foo_id = 7`` will have the value of "7" as pending, but no flush +has occurred - so ``o.foo`` is still ``None``:: + + # attribute is already set to None, has not been + # reconciled with o.foo_id = 7 yet + assert o.foo is None + +For ``o.foo`` to load based on the foreign key mutation is usually achieved +naturally after the commit, which both flushes the new foreign key value +and expires all state:: + + Session.commit() # expires all attributes + + foo_7 = Session.query(Foo).get(7) + + assert o.foo is foo_7 # o.foo lazyloads on access + +A more minimal operation is to expire the attribute individually - this can +be performed for any :term:`persistent` object using :meth:`.Session.expire`:: + + o = Session.query(SomeClass).first() + o.foo_id = 7 + Session.expire(o, ['foo']) # object must be persistent for this + + foo_7 = Session.query(Foo).get(7) + + assert o.foo is foo_7 # o.foo lazyloads on access + +Note that if the object is not persistent but present in the :class:`.Session`, +it's known as :term:`pending`. This means the row for the object has not been +INSERTed into the database yet. For such an object, setting ``foo_id`` does not +have meaning until the row is inserted; otherwise there is no row yet:: + + new_obj = SomeClass() + new_obj.foo_id = 7 + + Session.add(new_obj) + + # accessing an un-set attribute sets it to None + assert new_obj.foo is None + + Session.flush() # emits INSERT + + # expire this because we already set .foo to None + Session.expire(o, ['foo']) + + assert new_obj.foo is foo_7 # now it loads + + +.. topic:: Attribute loading for non-persistent objects + + One variant on the "pending" behavior above is if we use the flag + ``load_on_pending`` on :func:`.relationship`. When this flag is set, the + lazy loader will emit for ``new_obj.foo`` before the INSERT proceeds; another + variant of this is to use the :meth:`.Session.enable_relationship_loading` + method, which can "attach" an object to a :class:`.Session` in such a way that + many-to-one relationships load as according to foreign key attributes + regardless of the object being in any particular state. + Both techniques are **not recommended for general use**; they were added to suit + specific programming scenarios encountered by users which involve the repurposing + of the ORM's usual object states. + +The recipe `ExpireRelationshipOnFKChange <http://www.sqlalchemy.org/trac/wiki/UsageRecipes/ExpireRelationshipOnFKChange>`_ features an example using SQLAlchemy events +in order to coordinate the setting of foreign key attributes with many-to-one +relationships. + +Is there a way to automagically have only unique keywords (or other kinds of objects) without doing a query for the keyword and getting a reference to the row containing that keyword? +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +When people read the many-to-many example in the docs, they get hit with the +fact that if you create the same ``Keyword`` twice, it gets put in the DB twice. +Which is somewhat inconvenient. + +This `UniqueObject <http://www.sqlalchemy.org/trac/wiki/UsageRecipes/UniqueObject>`_ recipe was created to address this issue. + + diff --git a/doc/build/faq/sqlexpressions.rst b/doc/build/faq/sqlexpressions.rst new file mode 100644 index 000000000..c3504218b --- /dev/null +++ b/doc/build/faq/sqlexpressions.rst @@ -0,0 +1,140 @@ +SQL Expressions +================= + +.. contents:: + :local: + :class: faq + :backlinks: none + +.. _faq_sql_expression_string: + +How do I render SQL expressions as strings, possibly with bound parameters inlined? +------------------------------------------------------------------------------------ + +The "stringification" of a SQLAlchemy statement or Query in the vast majority +of cases is as simple as:: + + print(str(statement)) + +this applies both to an ORM :class:`~.orm.query.Query` as well as any :func:`.select` or other +statement. Additionally, to get the statement as compiled to a +specific dialect or engine, if the statement itself is not already +bound to one you can pass this in to :meth:`.ClauseElement.compile`:: + + print(statement.compile(someengine)) + +or without an :class:`.Engine`:: + + from sqlalchemy.dialects import postgresql + print(statement.compile(dialect=postgresql.dialect())) + +When given an ORM :class:`~.orm.query.Query` object, in order to get at the +:meth:`.ClauseElement.compile` +method we only need access the :attr:`~.orm.query.Query.statement` +accessor first:: + + statement = query.statement + print(statement.compile(someengine)) + +The above forms will render the SQL statement as it is passed to the Python +:term:`DBAPI`, which includes that bound parameters are not rendered inline. +SQLAlchemy normally does not stringify bound parameters, as this is handled +appropriately by the Python DBAPI, not to mention bypassing bound +parameters is probably the most widely exploited security hole in +modern web applications. SQLAlchemy has limited ability to do this +stringification in certain circumstances such as that of emitting DDL. +In order to access this functionality one can use the ``literal_binds`` +flag, passed to ``compile_kwargs``:: + + from sqlalchemy.sql import table, column, select + + t = table('t', column('x')) + + s = select([t]).where(t.c.x == 5) + + print(s.compile(compile_kwargs={"literal_binds": True})) + +the above approach has the caveats that it is only supported for basic +types, such as ints and strings, and furthermore if a :func:`.bindparam` +without a pre-set value is used directly, it won't be able to +stringify that either. + +To support inline literal rendering for types not supported, implement +a :class:`.TypeDecorator` for the target type which includes a +:meth:`.TypeDecorator.process_literal_param` method:: + + from sqlalchemy import TypeDecorator, Integer + + + class MyFancyType(TypeDecorator): + impl = Integer + + def process_literal_param(self, value, dialect): + return "my_fancy_formatting(%s)" % value + + from sqlalchemy import Table, Column, MetaData + + tab = Table('mytable', MetaData(), Column('x', MyFancyType())) + + print( + tab.select().where(tab.c.x > 5).compile( + compile_kwargs={"literal_binds": True}) + ) + +producing output like:: + + SELECT mytable.x + FROM mytable + WHERE mytable.x > my_fancy_formatting(5) + + +Why does ``.col.in_([])`` Produce ``col != col``? Why not ``1=0``? +------------------------------------------------------------------- + +A little introduction to the issue. The IN operator in SQL, given a list of +elements to compare against a column, generally does not accept an empty list, +that is while it is valid to say:: + + column IN (1, 2, 3) + +it's not valid to say:: + + column IN () + +SQLAlchemy's :meth:`.Operators.in_` operator, when given an empty list, produces this +expression:: + + column != column + +As of version 0.6, it also produces a warning stating that a less efficient +comparison operation will be rendered. This expression is the only one that is +both database agnostic and produces correct results. + +For example, the naive approach of "just evaluate to false, by comparing 1=0 +or 1!=1", does not handle nulls properly. An expression like:: + + NOT column != column + +will not return a row when "column" is null, but an expression which does not +take the column into account:: + + NOT 1=0 + +will. + +Closer to the mark is the following CASE expression:: + + CASE WHEN column IS NOT NULL THEN 1=0 ELSE NULL END + +We don't use this expression due to its verbosity, and its also not +typically accepted by Oracle within a WHERE clause - depending +on how you phrase it, you'll either get "ORA-00905: missing keyword" or +"ORA-00920: invalid relational operator". It's also still less efficient than +just rendering SQL without the clause altogether (or not issuing the SQL at +all, if the statement is just a simple search). + +The best approach therefore is to avoid the usage of IN given an argument list +of zero length. Instead, don't emit the Query in the first place, if no rows +should be returned. The warning is best promoted to a full error condition +using the Python warnings filter (see http://docs.python.org/library/warnings.html). + diff --git a/doc/build/glossary.rst b/doc/build/glossary.rst index ab9e92d26..c0ecee84b 100644 --- a/doc/build/glossary.rst +++ b/doc/build/glossary.rst @@ -99,6 +99,7 @@ Glossary instrumentation instrumented + instrumenting Instrumentation refers to the process of augmenting the functionality and attribute set of a particular class. Ideally, the behavior of the class should remain close to a regular @@ -146,7 +147,7 @@ Glossary :term:`N plus one problem` - :doc:`orm/loading` + :doc:`orm/loading_relationships` mapping mapped @@ -175,7 +176,7 @@ Glossary .. seealso:: - :doc:`orm/loading` + :doc:`orm/loading_relationships` polymorphic polymorphically diff --git a/doc/build/index.rst b/doc/build/index.rst index b65755d43..55dba45fe 100644 --- a/doc/build/index.rst +++ b/doc/build/index.rst @@ -13,7 +13,7 @@ A high level view and getting set up. :doc:`Overview <intro>` | :ref:`Installation Guide <installation>` | -:doc:`Frequently Asked Questions <faq>` | +:doc:`Frequently Asked Questions <faq/index>` | :doc:`Migration from 0.9 <changelog/migration_10>` | :doc:`Glossary <glossary>` | :doc:`Changelog catalog <changelog/index>` @@ -31,33 +31,24 @@ of Python objects, proceed first to the tutorial. * **ORM Configuration:** :doc:`Mapper Configuration <orm/mapper_config>` | - :doc:`Relationship Configuration <orm/relationships>` | - :doc:`Inheritance Mapping <orm/inheritance>` | - :doc:`Advanced Collection Configuration <orm/collections>` + :doc:`Relationship Configuration <orm/relationships>` * **Configuration Extensions:** - :doc:`Declarative Extension <orm/extensions/declarative>` | + :doc:`Declarative Extension <orm/extensions/declarative/index>` | :doc:`Association Proxy <orm/extensions/associationproxy>` | :doc:`Hybrid Attributes <orm/extensions/hybrid>` | - :doc:`Automap <orm/extensions/automap>` (**new**) | - :doc:`Mutable Scalars <orm/extensions/mutable>` | - :doc:`Ordered List <orm/extensions/orderinglist>` + :doc:`Automap <orm/extensions/automap>` | + :doc:`Mutable Scalars <orm/extensions/mutable>` * **ORM Usage:** :doc:`Session Usage and Guidelines <orm/session>` | - :doc:`Query API reference <orm/query>` | - :doc:`Relationship Loading Techniques <orm/loading>` + :doc:`Loading Objects <orm/loading_objects>` * **Extending the ORM:** - :doc:`ORM Event Interfaces <orm/events>` | - :doc:`Internals API <orm/internals>` + :doc:`ORM Events and Internals <orm/extending>` * **Other:** - :doc:`Introduction to Examples <orm/examples>` | - :doc:`Deprecated Event Interfaces <orm/deprecated>` | - :doc:`ORM Exceptions <orm/exceptions>` | - :doc:`Horizontal Sharding <orm/extensions/horizontal_shard>` | - :doc:`Alternate Instrumentation <orm/extensions/instrumentation>` + :doc:`Introduction to Examples <orm/examples>` SQLAlchemy Core =============== @@ -78,6 +69,7 @@ are documented here. In contrast to the ORM's domain-centric mode of usage, the :doc:`Connection Pooling <core/pooling>` * **Schema Definition:** + :doc:`Overview <core/schema>` | :ref:`Tables and Columns <metadata_describing_toplevel>` | :ref:`Database Introspection (Reflection) <metadata_reflection_toplevel>` | :ref:`Insert/Update Defaults <metadata_defaults_toplevel>` | @@ -86,23 +78,15 @@ are documented here. In contrast to the ORM's domain-centric mode of usage, the * **Datatypes:** :ref:`Overview <types_toplevel>` | - :ref:`Generic Types <types_generic>` | - :ref:`SQL Standard Types <types_sqlstandard>` | - :ref:`Vendor Specific Types <types_vendor>` | :ref:`Building Custom Types <types_custom>` | - :ref:`Defining New Operators <types_operators>` | :ref:`API <types_api>` -* **Extending the Core:** - :doc:`SQLAlchemy Events <core/event>` | +* **Core Basics:** + :doc:`Overview <core/api_basics>` | + :doc:`Runtime Inspection API <core/inspection>` | + :doc:`Event System <core/event>` | :doc:`Core Event Interfaces <core/events>` | :doc:`Creating Custom SQL Constructs <core/compiler>` | - :doc:`Internals API <core/internals>` - -* **Other:** - :doc:`Runtime Inspection API <core/inspection>` | - :doc:`core/interfaces` | - :doc:`core/exceptions` Dialect Documentation diff --git a/doc/build/orm/backref.rst b/doc/build/orm/backref.rst new file mode 100644 index 000000000..16cfe5606 --- /dev/null +++ b/doc/build/orm/backref.rst @@ -0,0 +1,273 @@ +.. _relationships_backref: + +Linking Relationships with Backref +---------------------------------- + +The :paramref:`~.relationship.backref` keyword argument was first introduced in :ref:`ormtutorial_toplevel`, and has been +mentioned throughout many of the examples here. What does it actually do ? Let's start +with the canonical ``User`` and ``Address`` scenario:: + + from sqlalchemy import Integer, ForeignKey, String, Column + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy.orm import relationship + + Base = declarative_base() + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + name = Column(String) + + addresses = relationship("Address", backref="user") + + class Address(Base): + __tablename__ = 'address' + id = Column(Integer, primary_key=True) + email = Column(String) + user_id = Column(Integer, ForeignKey('user.id')) + +The above configuration establishes a collection of ``Address`` objects on ``User`` called +``User.addresses``. It also establishes a ``.user`` attribute on ``Address`` which will +refer to the parent ``User`` object. + +In fact, the :paramref:`~.relationship.backref` keyword is only a common shortcut for placing a second +:func:`.relationship` onto the ``Address`` mapping, including the establishment +of an event listener on both sides which will mirror attribute operations +in both directions. The above configuration is equivalent to:: + + from sqlalchemy import Integer, ForeignKey, String, Column + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy.orm import relationship + + Base = declarative_base() + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + name = Column(String) + + addresses = relationship("Address", back_populates="user") + + class Address(Base): + __tablename__ = 'address' + id = Column(Integer, primary_key=True) + email = Column(String) + user_id = Column(Integer, ForeignKey('user.id')) + + user = relationship("User", back_populates="addresses") + +Above, we add a ``.user`` relationship to ``Address`` explicitly. On +both relationships, the :paramref:`~.relationship.back_populates` directive tells each relationship +about the other one, indicating that they should establish "bidirectional" +behavior between each other. The primary effect of this configuration +is that the relationship adds event handlers to both attributes +which have the behavior of "when an append or set event occurs here, set ourselves +onto the incoming attribute using this particular attribute name". +The behavior is illustrated as follows. Start with a ``User`` and an ``Address`` +instance. The ``.addresses`` collection is empty, and the ``.user`` attribute +is ``None``:: + + >>> u1 = User() + >>> a1 = Address() + >>> u1.addresses + [] + >>> print a1.user + None + +However, once the ``Address`` is appended to the ``u1.addresses`` collection, +both the collection and the scalar attribute have been populated:: + + >>> u1.addresses.append(a1) + >>> u1.addresses + [<__main__.Address object at 0x12a6ed0>] + >>> a1.user + <__main__.User object at 0x12a6590> + +This behavior of course works in reverse for removal operations as well, as well +as for equivalent operations on both sides. Such as +when ``.user`` is set again to ``None``, the ``Address`` object is removed +from the reverse collection:: + + >>> a1.user = None + >>> u1.addresses + [] + +The manipulation of the ``.addresses`` collection and the ``.user`` attribute +occurs entirely in Python without any interaction with the SQL database. +Without this behavior, the proper state would be apparent on both sides once the +data has been flushed to the database, and later reloaded after a commit or +expiration operation occurs. The :paramref:`~.relationship.backref`/:paramref:`~.relationship.back_populates` behavior has the advantage +that common bidirectional operations can reflect the correct state without requiring +a database round trip. + +Remember, when the :paramref:`~.relationship.backref` keyword is used on a single relationship, it's +exactly the same as if the above two relationships were created individually +using :paramref:`~.relationship.back_populates` on each. + +Backref Arguments +~~~~~~~~~~~~~~~~~~ + +We've established that the :paramref:`~.relationship.backref` keyword is merely a shortcut for building +two individual :func:`.relationship` constructs that refer to each other. Part of +the behavior of this shortcut is that certain configurational arguments applied to +the :func:`.relationship` +will also be applied to the other direction - namely those arguments that describe +the relationship at a schema level, and are unlikely to be different in the reverse +direction. The usual case +here is a many-to-many :func:`.relationship` that has a :paramref:`~.relationship.secondary` argument, +or a one-to-many or many-to-one which has a :paramref:`~.relationship.primaryjoin` argument (the +:paramref:`~.relationship.primaryjoin` argument is discussed in :ref:`relationship_primaryjoin`). Such +as if we limited the list of ``Address`` objects to those which start with "tony":: + + from sqlalchemy import Integer, ForeignKey, String, Column + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy.orm import relationship + + Base = declarative_base() + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + name = Column(String) + + addresses = relationship("Address", + primaryjoin="and_(User.id==Address.user_id, " + "Address.email.startswith('tony'))", + backref="user") + + class Address(Base): + __tablename__ = 'address' + id = Column(Integer, primary_key=True) + email = Column(String) + user_id = Column(Integer, ForeignKey('user.id')) + +We can observe, by inspecting the resulting property, that both sides +of the relationship have this join condition applied:: + + >>> print User.addresses.property.primaryjoin + "user".id = address.user_id AND address.email LIKE :email_1 || '%%' + >>> + >>> print Address.user.property.primaryjoin + "user".id = address.user_id AND address.email LIKE :email_1 || '%%' + >>> + +This reuse of arguments should pretty much do the "right thing" - it +uses only arguments that are applicable, and in the case of a many-to- +many relationship, will reverse the usage of +:paramref:`~.relationship.primaryjoin` and +:paramref:`~.relationship.secondaryjoin` to correspond to the other +direction (see the example in :ref:`self_referential_many_to_many` for +this). + +It's very often the case however that we'd like to specify arguments +that are specific to just the side where we happened to place the +"backref". This includes :func:`.relationship` arguments like +:paramref:`~.relationship.lazy`, +:paramref:`~.relationship.remote_side`, +:paramref:`~.relationship.cascade` and +:paramref:`~.relationship.cascade_backrefs`. For this case we use +the :func:`.backref` function in place of a string:: + + # <other imports> + from sqlalchemy.orm import backref + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + name = Column(String) + + addresses = relationship("Address", + backref=backref("user", lazy="joined")) + +Where above, we placed a ``lazy="joined"`` directive only on the ``Address.user`` +side, indicating that when a query against ``Address`` is made, a join to the ``User`` +entity should be made automatically which will populate the ``.user`` attribute of each +returned ``Address``. The :func:`.backref` function formatted the arguments we gave +it into a form that is interpreted by the receiving :func:`.relationship` as additional +arguments to be applied to the new relationship it creates. + +One Way Backrefs +~~~~~~~~~~~~~~~~~ + +An unusual case is that of the "one way backref". This is where the +"back-populating" behavior of the backref is only desirable in one +direction. An example of this is a collection which contains a +filtering :paramref:`~.relationship.primaryjoin` condition. We'd +like to append items to this collection as needed, and have them +populate the "parent" object on the incoming object. However, we'd +also like to have items that are not part of the collection, but still +have the same "parent" association - these items should never be in +the collection. + +Taking our previous example, where we established a +:paramref:`~.relationship.primaryjoin` that limited the collection +only to ``Address`` objects whose email address started with the word +``tony``, the usual backref behavior is that all items populate in +both directions. We wouldn't want this behavior for a case like the +following:: + + >>> u1 = User() + >>> a1 = Address(email='mary') + >>> a1.user = u1 + >>> u1.addresses + [<__main__.Address object at 0x1411910>] + +Above, the ``Address`` object that doesn't match the criterion of "starts with 'tony'" +is present in the ``addresses`` collection of ``u1``. After these objects are flushed, +the transaction committed and their attributes expired for a re-load, the ``addresses`` +collection will hit the database on next access and no longer have this ``Address`` object +present, due to the filtering condition. But we can do away with this unwanted side +of the "backref" behavior on the Python side by using two separate :func:`.relationship` constructs, +placing :paramref:`~.relationship.back_populates` only on one side:: + + from sqlalchemy import Integer, ForeignKey, String, Column + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy.orm import relationship + + Base = declarative_base() + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + name = Column(String) + addresses = relationship("Address", + primaryjoin="and_(User.id==Address.user_id, " + "Address.email.startswith('tony'))", + back_populates="user") + + class Address(Base): + __tablename__ = 'address' + id = Column(Integer, primary_key=True) + email = Column(String) + user_id = Column(Integer, ForeignKey('user.id')) + user = relationship("User") + +With the above scenario, appending an ``Address`` object to the ``.addresses`` +collection of a ``User`` will always establish the ``.user`` attribute on that +``Address``:: + + >>> u1 = User() + >>> a1 = Address(email='tony') + >>> u1.addresses.append(a1) + >>> a1.user + <__main__.User object at 0x1411850> + +However, applying a ``User`` to the ``.user`` attribute of an ``Address``, +will not append the ``Address`` object to the collection:: + + >>> a2 = Address(email='mary') + >>> a2.user = u1 + >>> a2 in u1.addresses + False + +Of course, we've disabled some of the usefulness of +:paramref:`~.relationship.backref` here, in that when we do append an +``Address`` that corresponds to the criteria of +``email.startswith('tony')``, it won't show up in the +``User.addresses`` collection until the session is flushed, and the +attributes reloaded after a commit or expire operation. While we +could consider an attribute event that checks this criterion in +Python, this starts to cross the line of duplicating too much SQL +behavior in Python. The backref behavior itself is only a slight +transgression of this philosophy - SQLAlchemy tries to keep these to a +minimum overall. diff --git a/doc/build/orm/basic_relationships.rst b/doc/build/orm/basic_relationships.rst new file mode 100644 index 000000000..9a7ad4fa2 --- /dev/null +++ b/doc/build/orm/basic_relationships.rst @@ -0,0 +1,313 @@ +.. _relationship_patterns: + +Basic Relationship Patterns +---------------------------- + +A quick walkthrough of the basic relational patterns. + +The imports used for each of the following sections is as follows:: + + from sqlalchemy import Table, Column, Integer, ForeignKey + from sqlalchemy.orm import relationship, backref + from sqlalchemy.ext.declarative import declarative_base + + Base = declarative_base() + + +One To Many +~~~~~~~~~~~~ + +A one to many relationship places a foreign key on the child table referencing +the parent. :func:`.relationship` is then specified on the parent, as referencing +a collection of items represented by the child:: + + class Parent(Base): + __tablename__ = 'parent' + id = Column(Integer, primary_key=True) + children = relationship("Child") + + class Child(Base): + __tablename__ = 'child' + id = Column(Integer, primary_key=True) + parent_id = Column(Integer, ForeignKey('parent.id')) + +To establish a bidirectional relationship in one-to-many, where the "reverse" +side is a many to one, specify the :paramref:`~.relationship.backref` option:: + + class Parent(Base): + __tablename__ = 'parent' + id = Column(Integer, primary_key=True) + children = relationship("Child", backref="parent") + + class Child(Base): + __tablename__ = 'child' + id = Column(Integer, primary_key=True) + parent_id = Column(Integer, ForeignKey('parent.id')) + +``Child`` will get a ``parent`` attribute with many-to-one semantics. + +Many To One +~~~~~~~~~~~~ + +Many to one places a foreign key in the parent table referencing the child. +:func:`.relationship` is declared on the parent, where a new scalar-holding +attribute will be created:: + + class Parent(Base): + __tablename__ = 'parent' + id = Column(Integer, primary_key=True) + child_id = Column(Integer, ForeignKey('child.id')) + child = relationship("Child") + + class Child(Base): + __tablename__ = 'child' + id = Column(Integer, primary_key=True) + +Bidirectional behavior is achieved by setting +:paramref:`~.relationship.backref` to the value ``"parents"``, which +will place a one-to-many collection on the ``Child`` class:: + + class Parent(Base): + __tablename__ = 'parent' + id = Column(Integer, primary_key=True) + child_id = Column(Integer, ForeignKey('child.id')) + child = relationship("Child", backref="parents") + +.. _relationships_one_to_one: + +One To One +~~~~~~~~~~~ + +One To One is essentially a bidirectional relationship with a scalar +attribute on both sides. To achieve this, the :paramref:`~.relationship.uselist` flag indicates +the placement of a scalar attribute instead of a collection on the "many" side +of the relationship. To convert one-to-many into one-to-one:: + + class Parent(Base): + __tablename__ = 'parent' + id = Column(Integer, primary_key=True) + child = relationship("Child", uselist=False, backref="parent") + + class Child(Base): + __tablename__ = 'child' + id = Column(Integer, primary_key=True) + parent_id = Column(Integer, ForeignKey('parent.id')) + +Or to turn a one-to-many backref into one-to-one, use the :func:`.backref` function +to provide arguments for the reverse side:: + + class Parent(Base): + __tablename__ = 'parent' + id = Column(Integer, primary_key=True) + child_id = Column(Integer, ForeignKey('child.id')) + child = relationship("Child", backref=backref("parent", uselist=False)) + + class Child(Base): + __tablename__ = 'child' + id = Column(Integer, primary_key=True) + +.. _relationships_many_to_many: + +Many To Many +~~~~~~~~~~~~~ + +Many to Many adds an association table between two classes. The association +table is indicated by the :paramref:`~.relationship.secondary` argument to +:func:`.relationship`. Usually, the :class:`.Table` uses the :class:`.MetaData` +object associated with the declarative base class, so that the :class:`.ForeignKey` +directives can locate the remote tables with which to link:: + + association_table = Table('association', Base.metadata, + Column('left_id', Integer, ForeignKey('left.id')), + Column('right_id', Integer, ForeignKey('right.id')) + ) + + class Parent(Base): + __tablename__ = 'left' + id = Column(Integer, primary_key=True) + children = relationship("Child", + secondary=association_table) + + class Child(Base): + __tablename__ = 'right' + id = Column(Integer, primary_key=True) + +For a bidirectional relationship, both sides of the relationship contain a +collection. The :paramref:`~.relationship.backref` keyword will automatically use +the same :paramref:`~.relationship.secondary` argument for the reverse relationship:: + + association_table = Table('association', Base.metadata, + Column('left_id', Integer, ForeignKey('left.id')), + Column('right_id', Integer, ForeignKey('right.id')) + ) + + class Parent(Base): + __tablename__ = 'left' + id = Column(Integer, primary_key=True) + children = relationship("Child", + secondary=association_table, + backref="parents") + + class Child(Base): + __tablename__ = 'right' + id = Column(Integer, primary_key=True) + +The :paramref:`~.relationship.secondary` argument of :func:`.relationship` also accepts a callable +that returns the ultimate argument, which is evaluated only when mappers are +first used. Using this, we can define the ``association_table`` at a later +point, as long as it's available to the callable after all module initialization +is complete:: + + class Parent(Base): + __tablename__ = 'left' + id = Column(Integer, primary_key=True) + children = relationship("Child", + secondary=lambda: association_table, + backref="parents") + +With the declarative extension in use, the traditional "string name of the table" +is accepted as well, matching the name of the table as stored in ``Base.metadata.tables``:: + + class Parent(Base): + __tablename__ = 'left' + id = Column(Integer, primary_key=True) + children = relationship("Child", + secondary="association", + backref="parents") + +.. _relationships_many_to_many_deletion: + +Deleting Rows from the Many to Many Table +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A behavior which is unique to the :paramref:`~.relationship.secondary` argument to :func:`.relationship` +is that the :class:`.Table` which is specified here is automatically subject +to INSERT and DELETE statements, as objects are added or removed from the collection. +There is **no need to delete from this table manually**. The act of removing a +record from the collection will have the effect of the row being deleted on flush:: + + # row will be deleted from the "secondary" table + # automatically + myparent.children.remove(somechild) + +A question which often arises is how the row in the "secondary" table can be deleted +when the child object is handed directly to :meth:`.Session.delete`:: + + session.delete(somechild) + +There are several possibilities here: + +* If there is a :func:`.relationship` from ``Parent`` to ``Child``, but there is + **not** a reverse-relationship that links a particular ``Child`` to each ``Parent``, + SQLAlchemy will not have any awareness that when deleting this particular + ``Child`` object, it needs to maintain the "secondary" table that links it to + the ``Parent``. No delete of the "secondary" table will occur. +* If there is a relationship that links a particular ``Child`` to each ``Parent``, + suppose it's called ``Child.parents``, SQLAlchemy by default will load in + the ``Child.parents`` collection to locate all ``Parent`` objects, and remove + each row from the "secondary" table which establishes this link. Note that + this relationship does not need to be bidrectional; SQLAlchemy is strictly + looking at every :func:`.relationship` associated with the ``Child`` object + being deleted. +* A higher performing option here is to use ON DELETE CASCADE directives + with the foreign keys used by the database. Assuming the database supports + this feature, the database itself can be made to automatically delete rows in the + "secondary" table as referencing rows in "child" are deleted. SQLAlchemy + can be instructed to forego actively loading in the ``Child.parents`` + collection in this case using the :paramref:`~.relationship.passive_deletes` + directive on :func:`.relationship`; see :ref:`passive_deletes` for more details + on this. + +Note again, these behaviors are *only* relevant to the :paramref:`~.relationship.secondary` option +used with :func:`.relationship`. If dealing with association tables that +are mapped explicitly and are *not* present in the :paramref:`~.relationship.secondary` option +of a relevant :func:`.relationship`, cascade rules can be used instead +to automatically delete entities in reaction to a related entity being +deleted - see :ref:`unitofwork_cascades` for information on this feature. + + +.. _association_pattern: + +Association Object +~~~~~~~~~~~~~~~~~~ + +The association object pattern is a variant on many-to-many: it's used +when your association table contains additional columns beyond those +which are foreign keys to the left and right tables. Instead of using +the :paramref:`~.relationship.secondary` argument, you map a new class +directly to the association table. The left side of the relationship +references the association object via one-to-many, and the association +class references the right side via many-to-one. Below we illustrate +an association table mapped to the ``Association`` class which +includes a column called ``extra_data``, which is a string value that +is stored along with each association between ``Parent`` and +``Child``:: + + class Association(Base): + __tablename__ = 'association' + left_id = Column(Integer, ForeignKey('left.id'), primary_key=True) + right_id = Column(Integer, ForeignKey('right.id'), primary_key=True) + extra_data = Column(String(50)) + child = relationship("Child") + + class Parent(Base): + __tablename__ = 'left' + id = Column(Integer, primary_key=True) + children = relationship("Association") + + class Child(Base): + __tablename__ = 'right' + id = Column(Integer, primary_key=True) + +The bidirectional version adds backrefs to both relationships:: + + class Association(Base): + __tablename__ = 'association' + left_id = Column(Integer, ForeignKey('left.id'), primary_key=True) + right_id = Column(Integer, ForeignKey('right.id'), primary_key=True) + extra_data = Column(String(50)) + child = relationship("Child", backref="parent_assocs") + + class Parent(Base): + __tablename__ = 'left' + id = Column(Integer, primary_key=True) + children = relationship("Association", backref="parent") + + class Child(Base): + __tablename__ = 'right' + id = Column(Integer, primary_key=True) + +Working with the association pattern in its direct form requires that child +objects are associated with an association instance before being appended to +the parent; similarly, access from parent to child goes through the +association object:: + + # create parent, append a child via association + p = Parent() + a = Association(extra_data="some data") + a.child = Child() + p.children.append(a) + + # iterate through child objects via association, including association + # attributes + for assoc in p.children: + print assoc.extra_data + print assoc.child + +To enhance the association object pattern such that direct +access to the ``Association`` object is optional, SQLAlchemy +provides the :ref:`associationproxy_toplevel` extension. This +extension allows the configuration of attributes which will +access two "hops" with a single access, one "hop" to the +associated object, and a second to a target attribute. + +.. note:: + + When using the association object pattern, it is advisable that the + association-mapped table not be used as the + :paramref:`~.relationship.secondary` argument on a + :func:`.relationship` elsewhere, unless that :func:`.relationship` + contains the option :paramref:`~.relationship.viewonly` set to + ``True``. SQLAlchemy otherwise may attempt to emit redundant INSERT + and DELETE statements on the same table, if similar state is + detected on the related attribute as well as the associated object. diff --git a/doc/build/orm/cascades.rst b/doc/build/orm/cascades.rst new file mode 100644 index 000000000..f645e6dae --- /dev/null +++ b/doc/build/orm/cascades.rst @@ -0,0 +1,372 @@ +.. _unitofwork_cascades: + +Cascades +======== + +Mappers support the concept of configurable :term:`cascade` behavior on +:func:`~sqlalchemy.orm.relationship` constructs. This refers +to how operations performed on a "parent" object relative to a +particular :class:`.Session` should be propagated to items +referred to by that relationship (e.g. "child" objects), and is +affected by the :paramref:`.relationship.cascade` option. + +The default behavior of cascade is limited to cascades of the +so-called :ref:`cascade_save_update` and :ref:`cascade_merge` settings. +The typical "alternative" setting for cascade is to add +the :ref:`cascade_delete` and :ref:`cascade_delete_orphan` options; +these settings are appropriate for related objects which only exist as +long as they are attached to their parent, and are otherwise deleted. + +Cascade behavior is configured using the by changing the +:paramref:`~.relationship.cascade` option on +:func:`~sqlalchemy.orm.relationship`:: + + class Order(Base): + __tablename__ = 'order' + + items = relationship("Item", cascade="all, delete-orphan") + customer = relationship("User", cascade="save-update") + +To set cascades on a backref, the same flag can be used with the +:func:`~.sqlalchemy.orm.backref` function, which ultimately feeds +its arguments back into :func:`~sqlalchemy.orm.relationship`:: + + class Item(Base): + __tablename__ = 'item' + + order = relationship("Order", + backref=backref("items", cascade="all, delete-orphan") + ) + +.. sidebar:: The Origins of Cascade + + SQLAlchemy's notion of cascading behavior on relationships, + as well as the options to configure them, are primarily derived + from the similar feature in the Hibernate ORM; Hibernate refers + to "cascade" in a few places such as in + `Example: Parent/Child <https://docs.jboss.org/hibernate/orm/3.3/reference/en-US/html/example-parentchild.html>`_. + If cascades are confusing, we'll refer to their conclusion, + stating "The sections we have just covered can be a bit confusing. + However, in practice, it all works out nicely." + +The default value of :paramref:`~.relationship.cascade` is ``save-update, merge``. +The typical alternative setting for this parameter is either +``all`` or more commonly ``all, delete-orphan``. The ``all`` symbol +is a synonym for ``save-update, merge, refresh-expire, expunge, delete``, +and using it in conjunction with ``delete-orphan`` indicates that the child +object should follow along with its parent in all cases, and be deleted once +it is no longer associated with that parent. + +The list of available values which can be specified for +the :paramref:`~.relationship.cascade` parameter are described in the following subsections. + +.. _cascade_save_update: + +save-update +----------- + +``save-update`` cascade indicates that when an object is placed into a +:class:`.Session` via :meth:`.Session.add`, all the objects associated +with it via this :func:`.relationship` should also be added to that +same :class:`.Session`. Suppose we have an object ``user1`` with two +related objects ``address1``, ``address2``:: + + >>> user1 = User() + >>> address1, address2 = Address(), Address() + >>> user1.addresses = [address1, address2] + +If we add ``user1`` to a :class:`.Session`, it will also add +``address1``, ``address2`` implicitly:: + + >>> sess = Session() + >>> sess.add(user1) + >>> address1 in sess + True + +``save-update`` cascade also affects attribute operations for objects +that are already present in a :class:`.Session`. If we add a third +object, ``address3`` to the ``user1.addresses`` collection, it +becomes part of the state of that :class:`.Session`:: + + >>> address3 = Address() + >>> user1.append(address3) + >>> address3 in sess + >>> True + +``save-update`` has the possibly surprising behavior which is that +persistent objects which were *removed* from a collection +or in some cases a scalar attribute +may also be pulled into the :class:`.Session` of a parent object; this is +so that the flush process may handle that related object appropriately. +This case can usually only arise if an object is removed from one :class:`.Session` +and added to another:: + + >>> user1 = sess1.query(User).filter_by(id=1).first() + >>> address1 = user1.addresses[0] + >>> sess1.close() # user1, address1 no longer associated with sess1 + >>> user1.addresses.remove(address1) # address1 no longer associated with user1 + >>> sess2 = Session() + >>> sess2.add(user1) # ... but it still gets added to the new session, + >>> address1 in sess2 # because it's still "pending" for flush + True + +The ``save-update`` cascade is on by default, and is typically taken +for granted; it simplifies code by allowing a single call to +:meth:`.Session.add` to register an entire structure of objects within +that :class:`.Session` at once. While it can be disabled, there +is usually not a need to do so. + +One case where ``save-update`` cascade does sometimes get in the way is in that +it takes place in both directions for bi-directional relationships, e.g. +backrefs, meaning that the association of a child object with a particular parent +can have the effect of the parent object being implicitly associated with that +child object's :class:`.Session`; this pattern, as well as how to modify its +behavior using the :paramref:`~.relationship.cascade_backrefs` flag, +is discussed in the section :ref:`backref_cascade`. + +.. _cascade_delete: + +delete +------ + +The ``delete`` cascade indicates that when a "parent" object +is marked for deletion, its related "child" objects should also be marked +for deletion. If for example we we have a relationship ``User.addresses`` +with ``delete`` cascade configured:: + + class User(Base): + # ... + + addresses = relationship("Address", cascade="save-update, merge, delete") + +If using the above mapping, we have a ``User`` object and two +related ``Address`` objects:: + + >>> user1 = sess.query(User).filter_by(id=1).first() + >>> address1, address2 = user1.addresses + +If we mark ``user1`` for deletion, after the flush operation proceeds, +``address1`` and ``address2`` will also be deleted: + +.. sourcecode:: python+sql + + >>> sess.delete(user1) + >>> sess.commit() + {opensql}DELETE FROM address WHERE address.id = ? + ((1,), (2,)) + DELETE FROM user WHERE user.id = ? + (1,) + COMMIT + +Alternatively, if our ``User.addresses`` relationship does *not* have +``delete`` cascade, SQLAlchemy's default behavior is to instead de-associate +``address1`` and ``address2`` from ``user1`` by setting their foreign key +reference to ``NULL``. Using a mapping as follows:: + + class User(Base): + # ... + + addresses = relationship("Address") + +Upon deletion of a parent ``User`` object, the rows in ``address`` are not +deleted, but are instead de-associated: + +.. sourcecode:: python+sql + + >>> sess.delete(user1) + >>> sess.commit() + {opensql}UPDATE address SET user_id=? WHERE address.id = ? + (None, 1) + UPDATE address SET user_id=? WHERE address.id = ? + (None, 2) + DELETE FROM user WHERE user.id = ? + (1,) + COMMIT + +``delete`` cascade is more often than not used in conjunction with +:ref:`cascade_delete_orphan` cascade, which will emit a DELETE for the related +row if the "child" object is deassociated from the parent. The combination +of ``delete`` and ``delete-orphan`` cascade covers both situations where +SQLAlchemy has to decide between setting a foreign key column to NULL versus +deleting the row entirely. + +.. topic:: ORM-level "delete" cascade vs. FOREIGN KEY level "ON DELETE" cascade + + The behavior of SQLAlchemy's "delete" cascade has a lot of overlap with the + ``ON DELETE CASCADE`` feature of a database foreign key, as well + as with that of the ``ON DELETE SET NULL`` foreign key setting when "delete" + cascade is not specified. Database level "ON DELETE" cascades are specific to the + "FOREIGN KEY" construct of the relational database; SQLAlchemy allows + configuration of these schema-level constructs at the :term:`DDL` level + using options on :class:`.ForeignKeyConstraint` which are described + at :ref:`on_update_on_delete`. + + It is important to note the differences between the ORM and the relational + database's notion of "cascade" as well as how they integrate: + + * A database level ``ON DELETE`` cascade is configured effectively + on the **many-to-one** side of the relationship; that is, we configure + it relative to the ``FOREIGN KEY`` constraint that is the "many" side + of a relationship. At the ORM level, **this direction is reversed**. + SQLAlchemy handles the deletion of "child" objects relative to a + "parent" from the "parent" side, which means that ``delete`` and + ``delete-orphan`` cascade are configured on the **one-to-many** + side. + + * Database level foreign keys with no ``ON DELETE`` setting + are often used to **prevent** a parent + row from being removed, as it would necessarily leave an unhandled + related row present. If this behavior is desired in a one-to-many + relationship, SQLAlchemy's default behavior of setting a foreign key + to ``NULL`` can be caught in one of two ways: + + * The easiest and most common is just to set the + foreign-key-holding column to ``NOT NULL`` at the database schema + level. An attempt by SQLAlchemy to set the column to NULL will + fail with a simple NOT NULL constraint exception. + + * The other, more special case way is to set the :paramref:`~.relationship.passive_deletes` + flag to the string ``"all"``. This has the effect of entirely + disabling SQLAlchemy's behavior of setting the foreign key column + to NULL, and a DELETE will be emitted for the parent row without + any affect on the child row, even if the child row is present + in memory. This may be desirable in the case when + database-level foreign key triggers, either special ``ON DELETE`` settings + or otherwise, need to be activated in all cases when a parent row is deleted. + + * Database level ``ON DELETE`` cascade is **vastly more efficient** + than that of SQLAlchemy. The database can chain a series of cascade + operations across many relationships at once; e.g. if row A is deleted, + all the related rows in table B can be deleted, and all the C rows related + to each of those B rows, and on and on, all within the scope of a single + DELETE statement. SQLAlchemy on the other hand, in order to support + the cascading delete operation fully, has to individually load each + related collection in order to target all rows that then may have further + related collections. That is, SQLAlchemy isn't sophisticated enough + to emit a DELETE for all those related rows at once within this context. + + * SQLAlchemy doesn't **need** to be this sophisticated, as we instead provide + smooth integration with the database's own ``ON DELETE`` functionality, + by using the :paramref:`~.relationship.passive_deletes` option in conjunction + with properly configured foreign key constraints. Under this behavior, + SQLAlchemy only emits DELETE for those rows that are already locally + present in the :class:`.Session`; for any collections that are unloaded, + it leaves them to the database to handle, rather than emitting a SELECT + for them. The section :ref:`passive_deletes` provides an example of this use. + + * While database-level ``ON DELETE`` functionality works only on the "many" + side of a relationship, SQLAlchemy's "delete" cascade + has **limited** ability to operate in the *reverse* direction as well, + meaning it can be configured on the "many" side to delete an object + on the "one" side when the reference on the "many" side is deleted. However + this can easily result in constraint violations if there are other objects + referring to this "one" side from the "many", so it typically is only + useful when a relationship is in fact a "one to one". The + :paramref:`~.relationship.single_parent` flag should be used to establish + an in-Python assertion for this case. + + +When using a :func:`.relationship` that also includes a many-to-many +table using the :paramref:`~.relationship.secondary` option, SQLAlchemy's +delete cascade handles the rows in this many-to-many table automatically. +Just like, as described in :ref:`relationships_many_to_many_deletion`, +the addition or removal of an object from a many-to-many collection +results in the INSERT or DELETE of a row in the many-to-many table, +the ``delete`` cascade, when activated as the result of a parent object +delete operation, will DELETE not just the row in the "child" table but also +in the many-to-many table. + +.. _cascade_delete_orphan: + +delete-orphan +------------- + +``delete-orphan`` cascade adds behavior to the ``delete`` cascade, +such that a child object will be marked for deletion when it is +de-associated from the parent, not just when the parent is marked +for deletion. This is a common feature when dealing with a related +object that is "owned" by its parent, with a NOT NULL foreign key, +so that removal of the item from the parent collection results +in its deletion. + +``delete-orphan`` cascade implies that each child object can only +have one parent at a time, so is configured in the vast majority of cases +on a one-to-many relationship. Setting it on a many-to-one or +many-to-many relationship is more awkward; for this use case, +SQLAlchemy requires that the :func:`~sqlalchemy.orm.relationship` +be configured with the :paramref:`~.relationship.single_parent` argument, +establishes Python-side validation that ensures the object +is associated with only one parent at a time. + +.. _cascade_merge: + +merge +----- + +``merge`` cascade indicates that the :meth:`.Session.merge` +operation should be propagated from a parent that's the subject +of the :meth:`.Session.merge` call down to referred objects. +This cascade is also on by default. + +.. _cascade_refresh_expire: + +refresh-expire +-------------- + +``refresh-expire`` is an uncommon option, indicating that the +:meth:`.Session.expire` operation should be propagated from a parent +down to referred objects. When using :meth:`.Session.refresh`, +the referred objects are expired only, but not actually refreshed. + +.. _cascade_expunge: + +expunge +------- + +``expunge`` cascade indicates that when the parent object is removed +from the :class:`.Session` using :meth:`.Session.expunge`, the +operation should be propagated down to referred objects. + +.. _backref_cascade: + +Controlling Cascade on Backrefs +------------------------------- + +The :ref:`cascade_save_update` cascade by default takes place on attribute change events +emitted from backrefs. This is probably a confusing statement more +easily described through demonstration; it means that, given a mapping such as this:: + + mapper(Order, order_table, properties={ + 'items' : relationship(Item, backref='order') + }) + +If an ``Order`` is already in the session, and is assigned to the ``order`` +attribute of an ``Item``, the backref appends the ``Order`` to the ``items`` +collection of that ``Order``, resulting in the ``save-update`` cascade taking +place:: + + >>> o1 = Order() + >>> session.add(o1) + >>> o1 in session + True + + >>> i1 = Item() + >>> i1.order = o1 + >>> i1 in o1.items + True + >>> i1 in session + True + +This behavior can be disabled using the :paramref:`~.relationship.cascade_backrefs` flag:: + + mapper(Order, order_table, properties={ + 'items' : relationship(Item, backref='order', + cascade_backrefs=False) + }) + +So above, the assignment of ``i1.order = o1`` will append ``i1`` to the ``items`` +collection of ``o1``, but will not add ``i1`` to the session. You can, of +course, :meth:`~.Session.add` ``i1`` to the session at a later point. This +option may be helpful for situations where an object needs to be kept out of a +session until it's construction is completed, but still needs to be given +associations to objects which are already persistent in the target session. diff --git a/doc/build/orm/classical.rst b/doc/build/orm/classical.rst new file mode 100644 index 000000000..3fd149f92 --- /dev/null +++ b/doc/build/orm/classical.rst @@ -0,0 +1,5 @@ +:orphan: + +Moved! :ref:`classical_mapping` + + diff --git a/doc/build/orm/collections.rst b/doc/build/orm/collections.rst index 898f70ebb..7d474ce65 100644 --- a/doc/build/orm/collections.rst +++ b/doc/build/orm/collections.rst @@ -573,7 +573,7 @@ Various internal methods. .. autoclass:: collection -.. autofunction:: collection_adapter +.. autodata:: collection_adapter .. autoclass:: CollectionAdapter diff --git a/doc/build/orm/composites.rst b/doc/build/orm/composites.rst new file mode 100644 index 000000000..1c42564b1 --- /dev/null +++ b/doc/build/orm/composites.rst @@ -0,0 +1,160 @@ +.. module:: sqlalchemy.orm + +.. _mapper_composite: + +Composite Column Types +======================= + +Sets of columns can be associated with a single user-defined datatype. The ORM +provides a single attribute which represents the group of columns using the +class you provide. + +.. versionchanged:: 0.7 + Composites have been simplified such that + they no longer "conceal" the underlying column based attributes. Additionally, + in-place mutation is no longer automatic; see the section below on + enabling mutability to support tracking of in-place changes. + +.. versionchanged:: 0.9 + Composites will return their object-form, rather than as individual columns, + when used in a column-oriented :class:`.Query` construct. See :ref:`migration_2824`. + +A simple example represents pairs of columns as a ``Point`` object. +``Point`` represents such a pair as ``.x`` and ``.y``:: + + class Point(object): + def __init__(self, x, y): + self.x = x + self.y = y + + def __composite_values__(self): + return self.x, self.y + + def __repr__(self): + return "Point(x=%r, y=%r)" % (self.x, self.y) + + def __eq__(self, other): + return isinstance(other, Point) and \ + other.x == self.x and \ + other.y == self.y + + def __ne__(self, other): + return not self.__eq__(other) + +The requirements for the custom datatype class are that it have a constructor +which accepts positional arguments corresponding to its column format, and +also provides a method ``__composite_values__()`` which returns the state of +the object as a list or tuple, in order of its column-based attributes. It +also should supply adequate ``__eq__()`` and ``__ne__()`` methods which test +the equality of two instances. + +We will create a mapping to a table ``vertice``, which represents two points +as ``x1/y1`` and ``x2/y2``. These are created normally as :class:`.Column` +objects. Then, the :func:`.composite` function is used to assign new +attributes that will represent sets of columns via the ``Point`` class:: + + from sqlalchemy import Column, Integer + from sqlalchemy.orm import composite + from sqlalchemy.ext.declarative import declarative_base + + Base = declarative_base() + + class Vertex(Base): + __tablename__ = 'vertice' + + id = Column(Integer, primary_key=True) + x1 = Column(Integer) + y1 = Column(Integer) + x2 = Column(Integer) + y2 = Column(Integer) + + start = composite(Point, x1, y1) + end = composite(Point, x2, y2) + +A classical mapping above would define each :func:`.composite` +against the existing table:: + + mapper(Vertex, vertice_table, properties={ + 'start':composite(Point, vertice_table.c.x1, vertice_table.c.y1), + 'end':composite(Point, vertice_table.c.x2, vertice_table.c.y2), + }) + +We can now persist and use ``Vertex`` instances, as well as query for them, +using the ``.start`` and ``.end`` attributes against ad-hoc ``Point`` instances: + +.. sourcecode:: python+sql + + >>> v = Vertex(start=Point(3, 4), end=Point(5, 6)) + >>> session.add(v) + >>> q = session.query(Vertex).filter(Vertex.start == Point(3, 4)) + {sql}>>> print q.first().start + BEGIN (implicit) + INSERT INTO vertice (x1, y1, x2, y2) VALUES (?, ?, ?, ?) + (3, 4, 5, 6) + SELECT vertice.id AS vertice_id, + vertice.x1 AS vertice_x1, + vertice.y1 AS vertice_y1, + vertice.x2 AS vertice_x2, + vertice.y2 AS vertice_y2 + FROM vertice + WHERE vertice.x1 = ? AND vertice.y1 = ? + LIMIT ? OFFSET ? + (3, 4, 1, 0) + {stop}Point(x=3, y=4) + +.. autofunction:: composite + + +Tracking In-Place Mutations on Composites +----------------------------------------- + +In-place changes to an existing composite value are +not tracked automatically. Instead, the composite class needs to provide +events to its parent object explicitly. This task is largely automated +via the usage of the :class:`.MutableComposite` mixin, which uses events +to associate each user-defined composite object with all parent associations. +Please see the example in :ref:`mutable_composites`. + +.. versionchanged:: 0.7 + In-place changes to an existing composite value are no longer + tracked automatically; the functionality is superseded by the + :class:`.MutableComposite` class. + +.. _composite_operations: + +Redefining Comparison Operations for Composites +----------------------------------------------- + +The "equals" comparison operation by default produces an AND of all +corresponding columns equated to one another. This can be changed using +the ``comparator_factory`` argument to :func:`.composite`, where we +specify a custom :class:`.CompositeProperty.Comparator` class +to define existing or new operations. +Below we illustrate the "greater than" operator, implementing +the same expression that the base "greater than" does:: + + from sqlalchemy.orm.properties import CompositeProperty + from sqlalchemy import sql + + class PointComparator(CompositeProperty.Comparator): + def __gt__(self, other): + """redefine the 'greater than' operation""" + + return sql.and_(*[a>b for a, b in + zip(self.__clause_element__().clauses, + other.__composite_values__())]) + + class Vertex(Base): + ___tablename__ = 'vertice' + + id = Column(Integer, primary_key=True) + x1 = Column(Integer) + y1 = Column(Integer) + x2 = Column(Integer) + y2 = Column(Integer) + + start = composite(Point, x1, y1, + comparator_factory=PointComparator) + end = composite(Point, x2, y2, + comparator_factory=PointComparator) + diff --git a/doc/build/orm/constructors.rst b/doc/build/orm/constructors.rst new file mode 100644 index 000000000..38cbb4182 --- /dev/null +++ b/doc/build/orm/constructors.rst @@ -0,0 +1,58 @@ +.. module:: sqlalchemy.orm + +.. _mapping_constructors: + +Constructors and Object Initialization +======================================= + +Mapping imposes no restrictions or requirements on the constructor +(``__init__``) method for the class. You are free to require any arguments for +the function that you wish, assign attributes to the instance that are unknown +to the ORM, and generally do anything else you would normally do when writing +a constructor for a Python class. + +The SQLAlchemy ORM does not call ``__init__`` when recreating objects from +database rows. The ORM's process is somewhat akin to the Python standard +library's ``pickle`` module, invoking the low level ``__new__`` method and +then quietly restoring attributes directly on the instance rather than calling +``__init__``. + +If you need to do some setup on database-loaded instances before they're ready +to use, you can use the ``@reconstructor`` decorator to tag a method as the +ORM counterpart to ``__init__``. SQLAlchemy will call this method with no +arguments every time it loads or reconstructs one of your instances. This is +useful for recreating transient properties that are normally assigned in your +``__init__``:: + + from sqlalchemy import orm + + class MyMappedClass(object): + def __init__(self, data): + self.data = data + # we need stuff on all instances, but not in the database. + self.stuff = [] + + @orm.reconstructor + def init_on_load(self): + self.stuff = [] + +When ``obj = MyMappedClass()`` is executed, Python calls the ``__init__`` +method as normal and the ``data`` argument is required. When instances are +loaded during a :class:`~sqlalchemy.orm.query.Query` operation as in +``query(MyMappedClass).one()``, ``init_on_load`` is called. + +Any method may be tagged as the :func:`~sqlalchemy.orm.reconstructor`, even +the ``__init__`` method. SQLAlchemy will call the reconstructor method with no +arguments. Scalar (non-collection) database-mapped attributes of the instance +will be available for use within the function. Eagerly-loaded collections are +generally not yet available and will usually only contain the first element. +ORM state changes made to objects at this stage will not be recorded for the +next flush() operation, so the activity within a reconstructor should be +conservative. + +:func:`~sqlalchemy.orm.reconstructor` is a shortcut into a larger system +of "instance level" events, which can be subscribed to using the +event API - see :class:`.InstanceEvents` for the full API description +of these events. + +.. autofunction:: reconstructor diff --git a/doc/build/orm/contextual.rst b/doc/build/orm/contextual.rst new file mode 100644 index 000000000..cc7016f80 --- /dev/null +++ b/doc/build/orm/contextual.rst @@ -0,0 +1,260 @@ +.. _unitofwork_contextual: + +Contextual/Thread-local Sessions +================================= + +Recall from the section :ref:`session_faq_whentocreate`, the concept of +"session scopes" was introduced, with an emphasis on web applications +and the practice of linking the scope of a :class:`.Session` with that +of a web request. Most modern web frameworks include integration tools +so that the scope of the :class:`.Session` can be managed automatically, +and these tools should be used as they are available. + +SQLAlchemy includes its own helper object, which helps with the establishment +of user-defined :class:`.Session` scopes. It is also used by third-party +integration systems to help construct their integration schemes. + +The object is the :class:`.scoped_session` object, and it represents a +**registry** of :class:`.Session` objects. If you're not familiar with the +registry pattern, a good introduction can be found in `Patterns of Enterprise +Architecture <http://martinfowler.com/eaaCatalog/registry.html>`_. + +.. note:: + + The :class:`.scoped_session` object is a very popular and useful object + used by many SQLAlchemy applications. However, it is important to note + that it presents **only one approach** to the issue of :class:`.Session` + management. If you're new to SQLAlchemy, and especially if the + term "thread-local variable" seems strange to you, we recommend that + if possible you familiarize first with an off-the-shelf integration + system such as `Flask-SQLAlchemy <http://packages.python.org/Flask-SQLAlchemy/>`_ + or `zope.sqlalchemy <http://pypi.python.org/pypi/zope.sqlalchemy>`_. + +A :class:`.scoped_session` is constructed by calling it, passing it a +**factory** which can create new :class:`.Session` objects. A factory +is just something that produces a new object when called, and in the +case of :class:`.Session`, the most common factory is the :class:`.sessionmaker`, +introduced earlier in this section. Below we illustrate this usage:: + + >>> from sqlalchemy.orm import scoped_session + >>> from sqlalchemy.orm import sessionmaker + + >>> session_factory = sessionmaker(bind=some_engine) + >>> Session = scoped_session(session_factory) + +The :class:`.scoped_session` object we've created will now call upon the +:class:`.sessionmaker` when we "call" the registry:: + + >>> some_session = Session() + +Above, ``some_session`` is an instance of :class:`.Session`, which we +can now use to talk to the database. This same :class:`.Session` is also +present within the :class:`.scoped_session` registry we've created. If +we call upon the registry a second time, we get back the **same** :class:`.Session`:: + + >>> some_other_session = Session() + >>> some_session is some_other_session + True + +This pattern allows disparate sections of the application to call upon a global +:class:`.scoped_session`, so that all those areas may share the same session +without the need to pass it explicitly. The :class:`.Session` we've established +in our registry will remain, until we explicitly tell our registry to dispose of it, +by calling :meth:`.scoped_session.remove`:: + + >>> Session.remove() + +The :meth:`.scoped_session.remove` method first calls :meth:`.Session.close` on +the current :class:`.Session`, which has the effect of releasing any connection/transactional +resources owned by the :class:`.Session` first, then discarding the :class:`.Session` +itself. "Releasing" here means that connections are returned to their connection pool and any transactional state is rolled back, ultimately using the ``rollback()`` method of the underlying DBAPI connection. + +At this point, the :class:`.scoped_session` object is "empty", and will create +a **new** :class:`.Session` when called again. As illustrated below, this +is not the same :class:`.Session` we had before:: + + >>> new_session = Session() + >>> new_session is some_session + False + +The above series of steps illustrates the idea of the "registry" pattern in a +nutshell. With that basic idea in hand, we can discuss some of the details +of how this pattern proceeds. + +Implicit Method Access +---------------------- + +The job of the :class:`.scoped_session` is simple; hold onto a :class:`.Session` +for all who ask for it. As a means of producing more transparent access to this +:class:`.Session`, the :class:`.scoped_session` also includes **proxy behavior**, +meaning that the registry itself can be treated just like a :class:`.Session` +directly; when methods are called on this object, they are **proxied** to the +underlying :class:`.Session` being maintained by the registry:: + + Session = scoped_session(some_factory) + + # equivalent to: + # + # session = Session() + # print session.query(MyClass).all() + # + print Session.query(MyClass).all() + +The above code accomplishes the same task as that of acquiring the current +:class:`.Session` by calling upon the registry, then using that :class:`.Session`. + +Thread-Local Scope +------------------ + +Users who are familiar with multithreaded programming will note that representing +anything as a global variable is usually a bad idea, as it implies that the +global object will be accessed by many threads concurrently. The :class:`.Session` +object is entirely designed to be used in a **non-concurrent** fashion, which +in terms of multithreading means "only in one thread at a time". So our +above example of :class:`.scoped_session` usage, where the same :class:`.Session` +object is maintained across multiple calls, suggests that some process needs +to be in place such that mutltiple calls across many threads don't actually get +a handle to the same session. We call this notion **thread local storage**, +which means, a special object is used that will maintain a distinct object +per each application thread. Python provides this via the +`threading.local() <http://docs.python.org/library/threading.html#threading.local>`_ +construct. The :class:`.scoped_session` object by default uses this object +as storage, so that a single :class:`.Session` is maintained for all who call +upon the :class:`.scoped_session` registry, but only within the scope of a single +thread. Callers who call upon the registry in a different thread get a +:class:`.Session` instance that is local to that other thread. + +Using this technique, the :class:`.scoped_session` provides a quick and relatively +simple (if one is familiar with thread-local storage) way of providing +a single, global object in an application that is safe to be called upon +from multiple threads. + +The :meth:`.scoped_session.remove` method, as always, removes the current +:class:`.Session` associated with the thread, if any. However, one advantage of the +``threading.local()`` object is that if the application thread itself ends, the +"storage" for that thread is also garbage collected. So it is in fact "safe" to +use thread local scope with an application that spawns and tears down threads, +without the need to call :meth:`.scoped_session.remove`. However, the scope +of transactions themselves, i.e. ending them via :meth:`.Session.commit` or +:meth:`.Session.rollback`, will usually still be something that must be explicitly +arranged for at the appropriate time, unless the application actually ties the +lifespan of a thread to the lifespan of a transaction. + +.. _session_lifespan: + +Using Thread-Local Scope with Web Applications +---------------------------------------------- + +As discussed in the section :ref:`session_faq_whentocreate`, a web application +is architected around the concept of a **web request**, and integrating +such an application with the :class:`.Session` usually implies that the :class:`.Session` +will be associated with that request. As it turns out, most Python web frameworks, +with notable exceptions such as the asynchronous frameworks Twisted and +Tornado, use threads in a simple way, such that a particular web request is received, +processed, and completed within the scope of a single *worker thread*. When +the request ends, the worker thread is released to a pool of workers where it +is available to handle another request. + +This simple correspondence of web request and thread means that to associate a +:class:`.Session` with a thread implies it is also associated with the web request +running within that thread, and vice versa, provided that the :class:`.Session` is +created only after the web request begins and torn down just before the web request ends. +So it is a common practice to use :class:`.scoped_session` as a quick way +to integrate the :class:`.Session` with a web application. The sequence +diagram below illustrates this flow:: + + Web Server Web Framework SQLAlchemy ORM Code + -------------- -------------- ------------------------------ + startup -> Web framework # Session registry is established + initializes Session = scoped_session(sessionmaker()) + + incoming + web request -> web request -> # The registry is *optionally* + starts # called upon explicitly to create + # a Session local to the thread and/or request + Session() + + # the Session registry can otherwise + # be used at any time, creating the + # request-local Session() if not present, + # or returning the existing one + Session.query(MyClass) # ... + + Session.add(some_object) # ... + + # if data was modified, commit the + # transaction + Session.commit() + + web request ends -> # the registry is instructed to + # remove the Session + Session.remove() + + sends output <- + outgoing web <- + response + +Using the above flow, the process of integrating the :class:`.Session` with the +web application has exactly two requirements: + +1. Create a single :class:`.scoped_session` registry when the web application + first starts, ensuring that this object is accessible by the rest of the + application. +2. Ensure that :meth:`.scoped_session.remove` is called when the web request ends, + usually by integrating with the web framework's event system to establish + an "on request end" event. + +As noted earlier, the above pattern is **just one potential way** to integrate a :class:`.Session` +with a web framework, one which in particular makes the significant assumption +that the **web framework associates web requests with application threads**. It is +however **strongly recommended that the integration tools provided with the web framework +itself be used, if available**, instead of :class:`.scoped_session`. + +In particular, while using a thread local can be convenient, it is preferable that the :class:`.Session` be +associated **directly with the request**, rather than with +the current thread. The next section on custom scopes details a more advanced configuration +which can combine the usage of :class:`.scoped_session` with direct request based scope, or +any kind of scope. + +Using Custom Created Scopes +--------------------------- + +The :class:`.scoped_session` object's default behavior of "thread local" scope is only +one of many options on how to "scope" a :class:`.Session`. A custom scope can be defined +based on any existing system of getting at "the current thing we are working with". + +Suppose a web framework defines a library function ``get_current_request()``. An application +built using this framework can call this function at any time, and the result will be +some kind of ``Request`` object that represents the current request being processed. +If the ``Request`` object is hashable, then this function can be easily integrated with +:class:`.scoped_session` to associate the :class:`.Session` with the request. Below we illustrate +this in conjunction with a hypothetical event marker provided by the web framework +``on_request_end``, which allows code to be invoked whenever a request ends:: + + from my_web_framework import get_current_request, on_request_end + from sqlalchemy.orm import scoped_session, sessionmaker + + Session = scoped_session(sessionmaker(bind=some_engine), scopefunc=get_current_request) + + @on_request_end + def remove_session(req): + Session.remove() + +Above, we instantiate :class:`.scoped_session` in the usual way, except that we pass +our request-returning function as the "scopefunc". This instructs :class:`.scoped_session` +to use this function to generate a dictionary key whenever the registry is called upon +to return the current :class:`.Session`. In this case it is particularly important +that we ensure a reliable "remove" system is implemented, as this dictionary is not +otherwise self-managed. + + +Contextual Session API +---------------------- + +.. autoclass:: sqlalchemy.orm.scoping.scoped_session + :members: + +.. autoclass:: sqlalchemy.util.ScopedRegistry + :members: + +.. autoclass:: sqlalchemy.util.ThreadLocalRegistry diff --git a/doc/build/orm/examples.rst b/doc/build/orm/examples.rst index 8803e1c34..4db7c00dc 100644 --- a/doc/build/orm/examples.rst +++ b/doc/build/orm/examples.rst @@ -62,6 +62,13 @@ Nested Sets .. automodule:: examples.nested_sets +.. _examples_performance: + +Performance +----------- + +.. automodule:: examples.performance + .. _examples_relationships: Relationship Join Conditions diff --git a/doc/build/orm/exceptions.rst b/doc/build/orm/exceptions.rst index f95b26eed..047c743e0 100644 --- a/doc/build/orm/exceptions.rst +++ b/doc/build/orm/exceptions.rst @@ -2,4 +2,4 @@ ORM Exceptions ============== .. automodule:: sqlalchemy.orm.exc - :members:
\ No newline at end of file + :members: diff --git a/doc/build/orm/extending.rst b/doc/build/orm/extending.rst new file mode 100644 index 000000000..4b2b86f62 --- /dev/null +++ b/doc/build/orm/extending.rst @@ -0,0 +1,12 @@ +==================== +Events and Internals +==================== + +.. toctree:: + :maxdepth: 2 + + events + internals + exceptions + deprecated + diff --git a/doc/build/orm/extensions/associationproxy.rst b/doc/build/orm/extensions/associationproxy.rst index 9b25c4a68..6fc57e30c 100644 --- a/doc/build/orm/extensions/associationproxy.rst +++ b/doc/build/orm/extensions/associationproxy.rst @@ -510,4 +510,4 @@ API Documentation :members: :undoc-members: -.. autodata:: ASSOCIATION_PROXY
\ No newline at end of file +.. autodata:: ASSOCIATION_PROXY diff --git a/doc/build/orm/extensions/declarative.rst b/doc/build/orm/extensions/declarative.rst deleted file mode 100644 index 7d9e634b5..000000000 --- a/doc/build/orm/extensions/declarative.rst +++ /dev/null @@ -1,33 +0,0 @@ -.. _declarative_toplevel: - -Declarative -=========== - -.. automodule:: sqlalchemy.ext.declarative - -API Reference -------------- - -.. autofunction:: declarative_base - -.. autofunction:: as_declarative - -.. autoclass:: declared_attr - :members: - -.. autofunction:: sqlalchemy.ext.declarative.api._declarative_constructor - -.. autofunction:: has_inherited_table - -.. autofunction:: synonym_for - -.. autofunction:: comparable_using - -.. autofunction:: instrument_declarative - -.. autoclass:: AbstractConcreteBase - -.. autoclass:: ConcreteBase - -.. autoclass:: DeferredReflection - :members: diff --git a/doc/build/orm/extensions/declarative/api.rst b/doc/build/orm/extensions/declarative/api.rst new file mode 100644 index 000000000..67b66a970 --- /dev/null +++ b/doc/build/orm/extensions/declarative/api.rst @@ -0,0 +1,114 @@ +.. automodule:: sqlalchemy.ext.declarative + +=============== +Declarative API +=============== + +API Reference +============= + +.. autofunction:: declarative_base + +.. autofunction:: as_declarative + +.. autoclass:: declared_attr + :members: + +.. autofunction:: sqlalchemy.ext.declarative.api._declarative_constructor + +.. autofunction:: has_inherited_table + +.. autofunction:: synonym_for + +.. autofunction:: comparable_using + +.. autofunction:: instrument_declarative + +.. autoclass:: AbstractConcreteBase + +.. autoclass:: ConcreteBase + +.. autoclass:: DeferredReflection + :members: + + +Special Directives +------------------ + +``__declare_last__()`` +~~~~~~~~~~~~~~~~~~~~~~ + +The ``__declare_last__()`` hook allows definition of +a class level function that is automatically called by the +:meth:`.MapperEvents.after_configured` event, which occurs after mappings are +assumed to be completed and the 'configure' step has finished:: + + class MyClass(Base): + @classmethod + def __declare_last__(cls): + "" + # do something with mappings + +.. versionadded:: 0.7.3 + +``__declare_first__()`` +~~~~~~~~~~~~~~~~~~~~~~~ + +Like ``__declare_last__()``, but is called at the beginning of mapper +configuration via the :meth:`.MapperEvents.before_configured` event:: + + class MyClass(Base): + @classmethod + def __declare_first__(cls): + "" + # do something before mappings are configured + +.. versionadded:: 0.9.3 + +.. _declarative_abstract: + +``__abstract__`` +~~~~~~~~~~~~~~~~~~~ + +``__abstract__`` causes declarative to skip the production +of a table or mapper for the class entirely. A class can be added within a +hierarchy in the same way as mixin (see :ref:`declarative_mixins`), allowing +subclasses to extend just from the special class:: + + class SomeAbstractBase(Base): + __abstract__ = True + + def some_helpful_method(self): + "" + + @declared_attr + def __mapper_args__(cls): + return {"helpful mapper arguments":True} + + class MyMappedClass(SomeAbstractBase): + "" + +One possible use of ``__abstract__`` is to use a distinct +:class:`.MetaData` for different bases:: + + Base = declarative_base() + + class DefaultBase(Base): + __abstract__ = True + metadata = MetaData() + + class OtherBase(Base): + __abstract__ = True + metadata = MetaData() + +Above, classes which inherit from ``DefaultBase`` will use one +:class:`.MetaData` as the registry of tables, and those which inherit from +``OtherBase`` will use a different one. The tables themselves can then be +created perhaps within distinct databases:: + + DefaultBase.metadata.create_all(some_engine) + OtherBase.metadata_create_all(some_other_engine) + +.. versionadded:: 0.7.3 + + diff --git a/doc/build/orm/extensions/declarative/basic_use.rst b/doc/build/orm/extensions/declarative/basic_use.rst new file mode 100644 index 000000000..10b79e5a6 --- /dev/null +++ b/doc/build/orm/extensions/declarative/basic_use.rst @@ -0,0 +1,133 @@ +========= +Basic Use +========= + +SQLAlchemy object-relational configuration involves the +combination of :class:`.Table`, :func:`.mapper`, and class +objects to define a mapped class. +:mod:`~sqlalchemy.ext.declarative` allows all three to be +expressed at once within the class declaration. As much as +possible, regular SQLAlchemy schema and ORM constructs are +used directly, so that configuration between "classical" ORM +usage and declarative remain highly similar. + +As a simple example:: + + from sqlalchemy.ext.declarative import declarative_base + + Base = declarative_base() + + class SomeClass(Base): + __tablename__ = 'some_table' + id = Column(Integer, primary_key=True) + name = Column(String(50)) + +Above, the :func:`declarative_base` callable returns a new base class from +which all mapped classes should inherit. When the class definition is +completed, a new :class:`.Table` and :func:`.mapper` will have been generated. + +The resulting table and mapper are accessible via +``__table__`` and ``__mapper__`` attributes on the +``SomeClass`` class:: + + # access the mapped Table + SomeClass.__table__ + + # access the Mapper + SomeClass.__mapper__ + +Defining Attributes +=================== + +In the previous example, the :class:`.Column` objects are +automatically named with the name of the attribute to which they are +assigned. + +To name columns explicitly with a name distinct from their mapped attribute, +just give the column a name. Below, column "some_table_id" is mapped to the +"id" attribute of `SomeClass`, but in SQL will be represented as +"some_table_id":: + + class SomeClass(Base): + __tablename__ = 'some_table' + id = Column("some_table_id", Integer, primary_key=True) + +Attributes may be added to the class after its construction, and they will be +added to the underlying :class:`.Table` and +:func:`.mapper` definitions as appropriate:: + + SomeClass.data = Column('data', Unicode) + SomeClass.related = relationship(RelatedInfo) + +Classes which are constructed using declarative can interact freely +with classes that are mapped explicitly with :func:`.mapper`. + +It is recommended, though not required, that all tables +share the same underlying :class:`~sqlalchemy.schema.MetaData` object, +so that string-configured :class:`~sqlalchemy.schema.ForeignKey` +references can be resolved without issue. + +Accessing the MetaData +======================= + +The :func:`declarative_base` base class contains a +:class:`.MetaData` object where newly defined +:class:`.Table` objects are collected. This object is +intended to be accessed directly for +:class:`.MetaData`-specific operations. Such as, to issue +CREATE statements for all tables:: + + engine = create_engine('sqlite://') + Base.metadata.create_all(engine) + +:func:`declarative_base` can also receive a pre-existing +:class:`.MetaData` object, which allows a +declarative setup to be associated with an already +existing traditional collection of :class:`~sqlalchemy.schema.Table` +objects:: + + mymetadata = MetaData() + Base = declarative_base(metadata=mymetadata) + + +Class Constructor +================= + +As a convenience feature, the :func:`declarative_base` sets a default +constructor on classes which takes keyword arguments, and assigns them +to the named attributes:: + + e = Engineer(primary_language='python') + +Mapper Configuration +==================== + +Declarative makes use of the :func:`~.orm.mapper` function internally +when it creates the mapping to the declared table. The options +for :func:`~.orm.mapper` are passed directly through via the +``__mapper_args__`` class attribute. As always, arguments which reference +locally mapped columns can reference them directly from within the +class declaration:: + + from datetime import datetime + + class Widget(Base): + __tablename__ = 'widgets' + + id = Column(Integer, primary_key=True) + timestamp = Column(DateTime, nullable=False) + + __mapper_args__ = { + 'version_id_col': timestamp, + 'version_id_generator': lambda v:datetime.now() + } + + +.. _declarative_sql_expressions: + +Defining SQL Expressions +======================== + +See :ref:`mapper_sql_expressions` for examples on declaratively +mapping attributes to SQL expressions. + diff --git a/doc/build/orm/extensions/declarative/index.rst b/doc/build/orm/extensions/declarative/index.rst new file mode 100644 index 000000000..dc4f392f3 --- /dev/null +++ b/doc/build/orm/extensions/declarative/index.rst @@ -0,0 +1,32 @@ +.. _declarative_toplevel: + +=========== +Declarative +=========== + +The Declarative system is the typically used system provided by the SQLAlchemy +ORM in order to define classes mapped to relational database tables. However, +as noted in :ref:`classical_mapping`, Declarative is in fact a series of +extensions that ride on top of the SQLAlchemy :func:`.mapper` construct. + +While the documentation typically refers to Declarative for most examples, +the following sections will provide detailed information on how the +Declarative API interacts with the basic :func:`.mapper` and Core :class:`.Table` +systems, as well as how sophisticated patterns can be built using systems +such as mixins. + + +.. toctree:: + :maxdepth: 2 + + basic_use + relationships + table_config + inheritance + mixins + api + + + + + diff --git a/doc/build/orm/extensions/declarative/inheritance.rst b/doc/build/orm/extensions/declarative/inheritance.rst new file mode 100644 index 000000000..684b07bfd --- /dev/null +++ b/doc/build/orm/extensions/declarative/inheritance.rst @@ -0,0 +1,318 @@ +.. _declarative_inheritance: + +Inheritance Configuration +========================= + +Declarative supports all three forms of inheritance as intuitively +as possible. The ``inherits`` mapper keyword argument is not needed +as declarative will determine this from the class itself. The various +"polymorphic" keyword arguments are specified using ``__mapper_args__``. + +Joined Table Inheritance +~~~~~~~~~~~~~~~~~~~~~~~~ + +Joined table inheritance is defined as a subclass that defines its own +table:: + + class Person(Base): + __tablename__ = 'people' + id = Column(Integer, primary_key=True) + discriminator = Column('type', String(50)) + __mapper_args__ = {'polymorphic_on': discriminator} + + class Engineer(Person): + __tablename__ = 'engineers' + __mapper_args__ = {'polymorphic_identity': 'engineer'} + id = Column(Integer, ForeignKey('people.id'), primary_key=True) + primary_language = Column(String(50)) + +Note that above, the ``Engineer.id`` attribute, since it shares the +same attribute name as the ``Person.id`` attribute, will in fact +represent the ``people.id`` and ``engineers.id`` columns together, +with the "Engineer.id" column taking precedence if queried directly. +To provide the ``Engineer`` class with an attribute that represents +only the ``engineers.id`` column, give it a different attribute name:: + + class Engineer(Person): + __tablename__ = 'engineers' + __mapper_args__ = {'polymorphic_identity': 'engineer'} + engineer_id = Column('id', Integer, ForeignKey('people.id'), + primary_key=True) + primary_language = Column(String(50)) + + +.. versionchanged:: 0.7 joined table inheritance favors the subclass + column over that of the superclass, such as querying above + for ``Engineer.id``. Prior to 0.7 this was the reverse. + +.. _declarative_single_table: + +Single Table Inheritance +~~~~~~~~~~~~~~~~~~~~~~~~ + +Single table inheritance is defined as a subclass that does not have +its own table; you just leave out the ``__table__`` and ``__tablename__`` +attributes:: + + class Person(Base): + __tablename__ = 'people' + id = Column(Integer, primary_key=True) + discriminator = Column('type', String(50)) + __mapper_args__ = {'polymorphic_on': discriminator} + + class Engineer(Person): + __mapper_args__ = {'polymorphic_identity': 'engineer'} + primary_language = Column(String(50)) + +When the above mappers are configured, the ``Person`` class is mapped +to the ``people`` table *before* the ``primary_language`` column is +defined, and this column will not be included in its own mapping. +When ``Engineer`` then defines the ``primary_language`` column, the +column is added to the ``people`` table so that it is included in the +mapping for ``Engineer`` and is also part of the table's full set of +columns. Columns which are not mapped to ``Person`` are also excluded +from any other single or joined inheriting classes using the +``exclude_properties`` mapper argument. Below, ``Manager`` will have +all the attributes of ``Person`` and ``Manager`` but *not* the +``primary_language`` attribute of ``Engineer``:: + + class Manager(Person): + __mapper_args__ = {'polymorphic_identity': 'manager'} + golf_swing = Column(String(50)) + +The attribute exclusion logic is provided by the +``exclude_properties`` mapper argument, and declarative's default +behavior can be disabled by passing an explicit ``exclude_properties`` +collection (empty or otherwise) to the ``__mapper_args__``. + +Resolving Column Conflicts +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Note above that the ``primary_language`` and ``golf_swing`` columns +are "moved up" to be applied to ``Person.__table__``, as a result of their +declaration on a subclass that has no table of its own. A tricky case +comes up when two subclasses want to specify *the same* column, as below:: + + class Person(Base): + __tablename__ = 'people' + id = Column(Integer, primary_key=True) + discriminator = Column('type', String(50)) + __mapper_args__ = {'polymorphic_on': discriminator} + + class Engineer(Person): + __mapper_args__ = {'polymorphic_identity': 'engineer'} + start_date = Column(DateTime) + + class Manager(Person): + __mapper_args__ = {'polymorphic_identity': 'manager'} + start_date = Column(DateTime) + +Above, the ``start_date`` column declared on both ``Engineer`` and ``Manager`` +will result in an error:: + + sqlalchemy.exc.ArgumentError: Column 'start_date' on class + <class '__main__.Manager'> conflicts with existing + column 'people.start_date' + +In a situation like this, Declarative can't be sure +of the intent, especially if the ``start_date`` columns had, for example, +different types. A situation like this can be resolved by using +:class:`.declared_attr` to define the :class:`.Column` conditionally, taking +care to return the **existing column** via the parent ``__table__`` if it +already exists:: + + from sqlalchemy.ext.declarative import declared_attr + + class Person(Base): + __tablename__ = 'people' + id = Column(Integer, primary_key=True) + discriminator = Column('type', String(50)) + __mapper_args__ = {'polymorphic_on': discriminator} + + class Engineer(Person): + __mapper_args__ = {'polymorphic_identity': 'engineer'} + + @declared_attr + def start_date(cls): + "Start date column, if not present already." + return Person.__table__.c.get('start_date', Column(DateTime)) + + class Manager(Person): + __mapper_args__ = {'polymorphic_identity': 'manager'} + + @declared_attr + def start_date(cls): + "Start date column, if not present already." + return Person.__table__.c.get('start_date', Column(DateTime)) + +Above, when ``Manager`` is mapped, the ``start_date`` column is +already present on the ``Person`` class. Declarative lets us return +that :class:`.Column` as a result in this case, where it knows to skip +re-assigning the same column. If the mapping is mis-configured such +that the ``start_date`` column is accidentally re-assigned to a +different table (such as, if we changed ``Manager`` to be joined +inheritance without fixing ``start_date``), an error is raised which +indicates an existing :class:`.Column` is trying to be re-assigned to +a different owning :class:`.Table`. + +.. versionadded:: 0.8 :class:`.declared_attr` can be used on a non-mixin + class, and the returned :class:`.Column` or other mapped attribute + will be applied to the mapping as any other attribute. Previously, + the resulting attribute would be ignored, and also result in a warning + being emitted when a subclass was created. + +.. versionadded:: 0.8 :class:`.declared_attr`, when used either with a + mixin or non-mixin declarative class, can return an existing + :class:`.Column` already assigned to the parent :class:`.Table`, + to indicate that the re-assignment of the :class:`.Column` should be + skipped, however should still be mapped on the target class, + in order to resolve duplicate column conflicts. + +The same concept can be used with mixin classes (see +:ref:`declarative_mixins`):: + + class Person(Base): + __tablename__ = 'people' + id = Column(Integer, primary_key=True) + discriminator = Column('type', String(50)) + __mapper_args__ = {'polymorphic_on': discriminator} + + class HasStartDate(object): + @declared_attr + def start_date(cls): + return cls.__table__.c.get('start_date', Column(DateTime)) + + class Engineer(HasStartDate, Person): + __mapper_args__ = {'polymorphic_identity': 'engineer'} + + class Manager(HasStartDate, Person): + __mapper_args__ = {'polymorphic_identity': 'manager'} + +The above mixin checks the local ``__table__`` attribute for the column. +Because we're using single table inheritance, we're sure that in this case, +``cls.__table__`` refers to ``People.__table__``. If we were mixing joined- +and single-table inheritance, we might want our mixin to check more carefully +if ``cls.__table__`` is really the :class:`.Table` we're looking for. + +Concrete Table Inheritance +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Concrete is defined as a subclass which has its own table and sets the +``concrete`` keyword argument to ``True``:: + + class Person(Base): + __tablename__ = 'people' + id = Column(Integer, primary_key=True) + name = Column(String(50)) + + class Engineer(Person): + __tablename__ = 'engineers' + __mapper_args__ = {'concrete':True} + id = Column(Integer, primary_key=True) + primary_language = Column(String(50)) + name = Column(String(50)) + +Usage of an abstract base class is a little less straightforward as it +requires usage of :func:`~sqlalchemy.orm.util.polymorphic_union`, +which needs to be created with the :class:`.Table` objects +before the class is built:: + + engineers = Table('engineers', Base.metadata, + Column('id', Integer, primary_key=True), + Column('name', String(50)), + Column('primary_language', String(50)) + ) + managers = Table('managers', Base.metadata, + Column('id', Integer, primary_key=True), + Column('name', String(50)), + Column('golf_swing', String(50)) + ) + + punion = polymorphic_union({ + 'engineer':engineers, + 'manager':managers + }, 'type', 'punion') + + class Person(Base): + __table__ = punion + __mapper_args__ = {'polymorphic_on':punion.c.type} + + class Engineer(Person): + __table__ = engineers + __mapper_args__ = {'polymorphic_identity':'engineer', 'concrete':True} + + class Manager(Person): + __table__ = managers + __mapper_args__ = {'polymorphic_identity':'manager', 'concrete':True} + +.. _declarative_concrete_helpers: + +Using the Concrete Helpers +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Helper classes provides a simpler pattern for concrete inheritance. +With these objects, the ``__declare_first__`` helper is used to configure the +"polymorphic" loader for the mapper after all subclasses have been declared. + +.. versionadded:: 0.7.3 + +An abstract base can be declared using the +:class:`.AbstractConcreteBase` class:: + + from sqlalchemy.ext.declarative import AbstractConcreteBase + + class Employee(AbstractConcreteBase, Base): + pass + +To have a concrete ``employee`` table, use :class:`.ConcreteBase` instead:: + + from sqlalchemy.ext.declarative import ConcreteBase + + class Employee(ConcreteBase, Base): + __tablename__ = 'employee' + employee_id = Column(Integer, primary_key=True) + name = Column(String(50)) + __mapper_args__ = { + 'polymorphic_identity':'employee', + 'concrete':True} + + +Either ``Employee`` base can be used in the normal fashion:: + + class Manager(Employee): + __tablename__ = 'manager' + employee_id = Column(Integer, primary_key=True) + name = Column(String(50)) + manager_data = Column(String(40)) + __mapper_args__ = { + 'polymorphic_identity':'manager', + 'concrete':True} + + class Engineer(Employee): + __tablename__ = 'engineer' + employee_id = Column(Integer, primary_key=True) + name = Column(String(50)) + engineer_info = Column(String(40)) + __mapper_args__ = {'polymorphic_identity':'engineer', + 'concrete':True} + + +The :class:`.AbstractConcreteBase` class is itself mapped, and can be +used as a target of relationships:: + + class Company(Base): + __tablename__ = 'company' + + id = Column(Integer, primary_key=True) + employees = relationship("Employee", + primaryjoin="Company.id == Employee.company_id") + + +.. versionchanged:: 0.9.3 Support for use of :class:`.AbstractConcreteBase` + as the target of a :func:`.relationship` has been improved. + +It can also be queried directly:: + + for employee in session.query(Employee).filter(Employee.name == 'qbert'): + print(employee) + diff --git a/doc/build/orm/extensions/declarative/mixins.rst b/doc/build/orm/extensions/declarative/mixins.rst new file mode 100644 index 000000000..d64477649 --- /dev/null +++ b/doc/build/orm/extensions/declarative/mixins.rst @@ -0,0 +1,541 @@ +.. _declarative_mixins: + +Mixin and Custom Base Classes +============================== + +A common need when using :mod:`~sqlalchemy.ext.declarative` is to +share some functionality, such as a set of common columns, some common +table options, or other mapped properties, across many +classes. The standard Python idioms for this is to have the classes +inherit from a base which includes these common features. + +When using :mod:`~sqlalchemy.ext.declarative`, this idiom is allowed +via the usage of a custom declarative base class, as well as a "mixin" class +which is inherited from in addition to the primary base. Declarative +includes several helper features to make this work in terms of how +mappings are declared. An example of some commonly mixed-in +idioms is below:: + + from sqlalchemy.ext.declarative import declared_attr + + class MyMixin(object): + + @declared_attr + def __tablename__(cls): + return cls.__name__.lower() + + __table_args__ = {'mysql_engine': 'InnoDB'} + __mapper_args__= {'always_refresh': True} + + id = Column(Integer, primary_key=True) + + class MyModel(MyMixin, Base): + name = Column(String(1000)) + +Where above, the class ``MyModel`` will contain an "id" column +as the primary key, a ``__tablename__`` attribute that derives +from the name of the class itself, as well as ``__table_args__`` +and ``__mapper_args__`` defined by the ``MyMixin`` mixin class. + +There's no fixed convention over whether ``MyMixin`` precedes +``Base`` or not. Normal Python method resolution rules apply, and +the above example would work just as well with:: + + class MyModel(Base, MyMixin): + name = Column(String(1000)) + +This works because ``Base`` here doesn't define any of the +variables that ``MyMixin`` defines, i.e. ``__tablename__``, +``__table_args__``, ``id``, etc. If the ``Base`` did define +an attribute of the same name, the class placed first in the +inherits list would determine which attribute is used on the +newly defined class. + +Augmenting the Base +~~~~~~~~~~~~~~~~~~~ + +In addition to using a pure mixin, most of the techniques in this +section can also be applied to the base class itself, for patterns that +should apply to all classes derived from a particular base. This is achieved +using the ``cls`` argument of the :func:`.declarative_base` function:: + + from sqlalchemy.ext.declarative import declared_attr + + class Base(object): + @declared_attr + def __tablename__(cls): + return cls.__name__.lower() + + __table_args__ = {'mysql_engine': 'InnoDB'} + + id = Column(Integer, primary_key=True) + + from sqlalchemy.ext.declarative import declarative_base + + Base = declarative_base(cls=Base) + + class MyModel(Base): + name = Column(String(1000)) + +Where above, ``MyModel`` and all other classes that derive from ``Base`` will +have a table name derived from the class name, an ``id`` primary key column, +as well as the "InnoDB" engine for MySQL. + +Mixing in Columns +~~~~~~~~~~~~~~~~~ + +The most basic way to specify a column on a mixin is by simple +declaration:: + + class TimestampMixin(object): + created_at = Column(DateTime, default=func.now()) + + class MyModel(TimestampMixin, Base): + __tablename__ = 'test' + + id = Column(Integer, primary_key=True) + name = Column(String(1000)) + +Where above, all declarative classes that include ``TimestampMixin`` +will also have a column ``created_at`` that applies a timestamp to +all row insertions. + +Those familiar with the SQLAlchemy expression language know that +the object identity of clause elements defines their role in a schema. +Two ``Table`` objects ``a`` and ``b`` may both have a column called +``id``, but the way these are differentiated is that ``a.c.id`` +and ``b.c.id`` are two distinct Python objects, referencing their +parent tables ``a`` and ``b`` respectively. + +In the case of the mixin column, it seems that only one +:class:`.Column` object is explicitly created, yet the ultimate +``created_at`` column above must exist as a distinct Python object +for each separate destination class. To accomplish this, the declarative +extension creates a **copy** of each :class:`.Column` object encountered on +a class that is detected as a mixin. + +This copy mechanism is limited to simple columns that have no foreign +keys, as a :class:`.ForeignKey` itself contains references to columns +which can't be properly recreated at this level. For columns that +have foreign keys, as well as for the variety of mapper-level constructs +that require destination-explicit context, the +:class:`~.declared_attr` decorator is provided so that +patterns common to many classes can be defined as callables:: + + from sqlalchemy.ext.declarative import declared_attr + + class ReferenceAddressMixin(object): + @declared_attr + def address_id(cls): + return Column(Integer, ForeignKey('address.id')) + + class User(ReferenceAddressMixin, Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + +Where above, the ``address_id`` class-level callable is executed at the +point at which the ``User`` class is constructed, and the declarative +extension can use the resulting :class:`.Column` object as returned by +the method without the need to copy it. + +.. versionchanged:: > 0.6.5 + Rename 0.6.5 ``sqlalchemy.util.classproperty`` + into :class:`~.declared_attr`. + +Columns generated by :class:`~.declared_attr` can also be +referenced by ``__mapper_args__`` to a limited degree, currently +by ``polymorphic_on`` and ``version_id_col``; the declarative extension +will resolve them at class construction time:: + + class MyMixin: + @declared_attr + def type_(cls): + return Column(String(50)) + + __mapper_args__= {'polymorphic_on':type_} + + class MyModel(MyMixin, Base): + __tablename__='test' + id = Column(Integer, primary_key=True) + + +Mixing in Relationships +~~~~~~~~~~~~~~~~~~~~~~~ + +Relationships created by :func:`~sqlalchemy.orm.relationship` are provided +with declarative mixin classes exclusively using the +:class:`.declared_attr` approach, eliminating any ambiguity +which could arise when copying a relationship and its possibly column-bound +contents. Below is an example which combines a foreign key column and a +relationship so that two classes ``Foo`` and ``Bar`` can both be configured to +reference a common target class via many-to-one:: + + class RefTargetMixin(object): + @declared_attr + def target_id(cls): + return Column('target_id', ForeignKey('target.id')) + + @declared_attr + def target(cls): + return relationship("Target") + + class Foo(RefTargetMixin, Base): + __tablename__ = 'foo' + id = Column(Integer, primary_key=True) + + class Bar(RefTargetMixin, Base): + __tablename__ = 'bar' + id = Column(Integer, primary_key=True) + + class Target(Base): + __tablename__ = 'target' + id = Column(Integer, primary_key=True) + + +Using Advanced Relationship Arguments (e.g. ``primaryjoin``, etc.) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`~sqlalchemy.orm.relationship` definitions which require explicit +primaryjoin, order_by etc. expressions should in all but the most +simplistic cases use **late bound** forms +for these arguments, meaning, using either the string form or a lambda. +The reason for this is that the related :class:`.Column` objects which are to +be configured using ``@declared_attr`` are not available to another +``@declared_attr`` attribute; while the methods will work and return new +:class:`.Column` objects, those are not the :class:`.Column` objects that +Declarative will be using as it calls the methods on its own, thus using +*different* :class:`.Column` objects. + +The canonical example is the primaryjoin condition that depends upon +another mixed-in column:: + + class RefTargetMixin(object): + @declared_attr + def target_id(cls): + return Column('target_id', ForeignKey('target.id')) + + @declared_attr + def target(cls): + return relationship(Target, + primaryjoin=Target.id==cls.target_id # this is *incorrect* + ) + +Mapping a class using the above mixin, we will get an error like:: + + sqlalchemy.exc.InvalidRequestError: this ForeignKey's parent column is not + yet associated with a Table. + +This is because the ``target_id`` :class:`.Column` we've called upon in our +``target()`` method is not the same :class:`.Column` that declarative is +actually going to map to our table. + +The condition above is resolved using a lambda:: + + class RefTargetMixin(object): + @declared_attr + def target_id(cls): + return Column('target_id', ForeignKey('target.id')) + + @declared_attr + def target(cls): + return relationship(Target, + primaryjoin=lambda: Target.id==cls.target_id + ) + +or alternatively, the string form (which ultimately generates a lambda):: + + class RefTargetMixin(object): + @declared_attr + def target_id(cls): + return Column('target_id', ForeignKey('target.id')) + + @declared_attr + def target(cls): + return relationship("Target", + primaryjoin="Target.id==%s.target_id" % cls.__name__ + ) + +Mixing in deferred(), column_property(), and other MapperProperty classes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Like :func:`~sqlalchemy.orm.relationship`, all +:class:`~sqlalchemy.orm.interfaces.MapperProperty` subclasses such as +:func:`~sqlalchemy.orm.deferred`, :func:`~sqlalchemy.orm.column_property`, +etc. ultimately involve references to columns, and therefore, when +used with declarative mixins, have the :class:`.declared_attr` +requirement so that no reliance on copying is needed:: + + class SomethingMixin(object): + + @declared_attr + def dprop(cls): + return deferred(Column(Integer)) + + class Something(SomethingMixin, Base): + __tablename__ = "something" + +The :func:`.column_property` or other construct may refer +to other columns from the mixin. These are copied ahead of time before +the :class:`.declared_attr` is invoked:: + + class SomethingMixin(object): + x = Column(Integer) + + y = Column(Integer) + + @declared_attr + def x_plus_y(cls): + return column_property(cls.x + cls.y) + + +.. versionchanged:: 1.0.0 mixin columns are copied to the final mapped class + so that :class:`.declared_attr` methods can access the actual column + that will be mapped. + +Mixing in Association Proxy and Other Attributes +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Mixins can specify user-defined attributes as well as other extension +units such as :func:`.association_proxy`. The usage of +:class:`.declared_attr` is required in those cases where the attribute must +be tailored specifically to the target subclass. An example is when +constructing multiple :func:`.association_proxy` attributes which each +target a different type of child object. Below is an +:func:`.association_proxy` / mixin example which provides a scalar list of +string values to an implementing class:: + + from sqlalchemy import Column, Integer, ForeignKey, String + from sqlalchemy.orm import relationship + from sqlalchemy.ext.associationproxy import association_proxy + from sqlalchemy.ext.declarative import declarative_base, declared_attr + + Base = declarative_base() + + class HasStringCollection(object): + @declared_attr + def _strings(cls): + class StringAttribute(Base): + __tablename__ = cls.string_table_name + id = Column(Integer, primary_key=True) + value = Column(String(50), nullable=False) + parent_id = Column(Integer, + ForeignKey('%s.id' % cls.__tablename__), + nullable=False) + def __init__(self, value): + self.value = value + + return relationship(StringAttribute) + + @declared_attr + def strings(cls): + return association_proxy('_strings', 'value') + + class TypeA(HasStringCollection, Base): + __tablename__ = 'type_a' + string_table_name = 'type_a_strings' + id = Column(Integer(), primary_key=True) + + class TypeB(HasStringCollection, Base): + __tablename__ = 'type_b' + string_table_name = 'type_b_strings' + id = Column(Integer(), primary_key=True) + +Above, the ``HasStringCollection`` mixin produces a :func:`.relationship` +which refers to a newly generated class called ``StringAttribute``. The +``StringAttribute`` class is generated with its own :class:`.Table` +definition which is local to the parent class making usage of the +``HasStringCollection`` mixin. It also produces an :func:`.association_proxy` +object which proxies references to the ``strings`` attribute onto the ``value`` +attribute of each ``StringAttribute`` instance. + +``TypeA`` or ``TypeB`` can be instantiated given the constructor +argument ``strings``, a list of strings:: + + ta = TypeA(strings=['foo', 'bar']) + tb = TypeA(strings=['bat', 'bar']) + +This list will generate a collection +of ``StringAttribute`` objects, which are persisted into a table that's +local to either the ``type_a_strings`` or ``type_b_strings`` table:: + + >>> print ta._strings + [<__main__.StringAttribute object at 0x10151cd90>, + <__main__.StringAttribute object at 0x10151ce10>] + +When constructing the :func:`.association_proxy`, the +:class:`.declared_attr` decorator must be used so that a distinct +:func:`.association_proxy` object is created for each of the ``TypeA`` +and ``TypeB`` classes. + +.. versionadded:: 0.8 :class:`.declared_attr` is usable with non-mapped + attributes, including user-defined attributes as well as + :func:`.association_proxy`. + + +Controlling table inheritance with mixins +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``__tablename__`` attribute may be used to provide a function that +will determine the name of the table used for each class in an inheritance +hierarchy, as well as whether a class has its own distinct table. + +This is achieved using the :class:`.declared_attr` indicator in conjunction +with a method named ``__tablename__()``. Declarative will always +invoke :class:`.declared_attr` for the special names +``__tablename__``, ``__mapper_args__`` and ``__table_args__`` +function **for each mapped class in the hierarchy**. The function therefore +needs to expect to receive each class individually and to provide the +correct answer for each. + +For example, to create a mixin that gives every class a simple table +name based on class name:: + + from sqlalchemy.ext.declarative import declared_attr + + class Tablename: + @declared_attr + def __tablename__(cls): + return cls.__name__.lower() + + class Person(Tablename, Base): + id = Column(Integer, primary_key=True) + discriminator = Column('type', String(50)) + __mapper_args__ = {'polymorphic_on': discriminator} + + class Engineer(Person): + __tablename__ = None + __mapper_args__ = {'polymorphic_identity': 'engineer'} + primary_language = Column(String(50)) + +Alternatively, we can modify our ``__tablename__`` function to return +``None`` for subclasses, using :func:`.has_inherited_table`. This has +the effect of those subclasses being mapped with single table inheritance +agaisnt the parent:: + + from sqlalchemy.ext.declarative import declared_attr + from sqlalchemy.ext.declarative import has_inherited_table + + class Tablename(object): + @declared_attr + def __tablename__(cls): + if has_inherited_table(cls): + return None + return cls.__name__.lower() + + class Person(Tablename, Base): + id = Column(Integer, primary_key=True) + discriminator = Column('type', String(50)) + __mapper_args__ = {'polymorphic_on': discriminator} + + class Engineer(Person): + primary_language = Column(String(50)) + __mapper_args__ = {'polymorphic_identity': 'engineer'} + +.. _mixin_inheritance_columns: + +Mixing in Columns in Inheritance Scenarios +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In constrast to how ``__tablename__`` and other special names are handled when +used with :class:`.declared_attr`, when we mix in columns and properties (e.g. +relationships, column properties, etc.), the function is +invoked for the **base class only** in the hierarchy. Below, only the +``Person`` class will receive a column +called ``id``; the mapping will fail on ``Engineer``, which is not given +a primary key:: + + class HasId(object): + @declared_attr + def id(cls): + return Column('id', Integer, primary_key=True) + + class Person(HasId, Base): + __tablename__ = 'person' + discriminator = Column('type', String(50)) + __mapper_args__ = {'polymorphic_on': discriminator} + + class Engineer(Person): + __tablename__ = 'engineer' + primary_language = Column(String(50)) + __mapper_args__ = {'polymorphic_identity': 'engineer'} + +It is usually the case in joined-table inheritance that we want distinctly +named columns on each subclass. However in this case, we may want to have +an ``id`` column on every table, and have them refer to each other via +foreign key. We can achieve this as a mixin by using the +:attr:`.declared_attr.cascading` modifier, which indicates that the +function should be invoked **for each class in the hierarchy**, just like +it does for ``__tablename__``:: + + class HasId(object): + @declared_attr.cascading + def id(cls): + if has_inherited_table(cls): + return Column('id', + Integer, + ForeignKey('person.id'), primary_key=True) + else: + return Column('id', Integer, primary_key=True) + + class Person(HasId, Base): + __tablename__ = 'person' + discriminator = Column('type', String(50)) + __mapper_args__ = {'polymorphic_on': discriminator} + + class Engineer(Person): + __tablename__ = 'engineer' + primary_language = Column(String(50)) + __mapper_args__ = {'polymorphic_identity': 'engineer'} + + +.. versionadded:: 1.0.0 added :attr:`.declared_attr.cascading`. + +Combining Table/Mapper Arguments from Multiple Mixins +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the case of ``__table_args__`` or ``__mapper_args__`` +specified with declarative mixins, you may want to combine +some parameters from several mixins with those you wish to +define on the class iteself. The +:class:`.declared_attr` decorator can be used +here to create user-defined collation routines that pull +from multiple collections:: + + from sqlalchemy.ext.declarative import declared_attr + + class MySQLSettings(object): + __table_args__ = {'mysql_engine':'InnoDB'} + + class MyOtherMixin(object): + __table_args__ = {'info':'foo'} + + class MyModel(MySQLSettings, MyOtherMixin, Base): + __tablename__='my_model' + + @declared_attr + def __table_args__(cls): + args = dict() + args.update(MySQLSettings.__table_args__) + args.update(MyOtherMixin.__table_args__) + return args + + id = Column(Integer, primary_key=True) + +Creating Indexes with Mixins +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +To define a named, potentially multicolumn :class:`.Index` that applies to all +tables derived from a mixin, use the "inline" form of :class:`.Index` and +establish it as part of ``__table_args__``:: + + class MyMixin(object): + a = Column(Integer) + b = Column(Integer) + + @declared_attr + def __table_args__(cls): + return (Index('test_idx_%s' % cls.__tablename__, 'a', 'b'),) + + class MyModel(MyMixin, Base): + __tablename__ = 'atable' + c = Column(Integer,primary_key=True) diff --git a/doc/build/orm/extensions/declarative/relationships.rst b/doc/build/orm/extensions/declarative/relationships.rst new file mode 100644 index 000000000..fb53c28bb --- /dev/null +++ b/doc/build/orm/extensions/declarative/relationships.rst @@ -0,0 +1,138 @@ +.. _declarative_configuring_relationships: + +========================= +Configuring Relationships +========================= + +Relationships to other classes are done in the usual way, with the added +feature that the class specified to :func:`~sqlalchemy.orm.relationship` +may be a string name. The "class registry" associated with ``Base`` +is used at mapper compilation time to resolve the name into the actual +class object, which is expected to have been defined once the mapper +configuration is used:: + + class User(Base): + __tablename__ = 'users' + + id = Column(Integer, primary_key=True) + name = Column(String(50)) + addresses = relationship("Address", backref="user") + + class Address(Base): + __tablename__ = 'addresses' + + id = Column(Integer, primary_key=True) + email = Column(String(50)) + user_id = Column(Integer, ForeignKey('users.id')) + +Column constructs, since they are just that, are immediately usable, +as below where we define a primary join condition on the ``Address`` +class using them:: + + class Address(Base): + __tablename__ = 'addresses' + + id = Column(Integer, primary_key=True) + email = Column(String(50)) + user_id = Column(Integer, ForeignKey('users.id')) + user = relationship(User, primaryjoin=user_id == User.id) + +In addition to the main argument for :func:`~sqlalchemy.orm.relationship`, +other arguments which depend upon the columns present on an as-yet +undefined class may also be specified as strings. These strings are +evaluated as Python expressions. The full namespace available within +this evaluation includes all classes mapped for this declarative base, +as well as the contents of the ``sqlalchemy`` package, including +expression functions like :func:`~sqlalchemy.sql.expression.desc` and +:attr:`~sqlalchemy.sql.expression.func`:: + + class User(Base): + # .... + addresses = relationship("Address", + order_by="desc(Address.email)", + primaryjoin="Address.user_id==User.id") + +For the case where more than one module contains a class of the same name, +string class names can also be specified as module-qualified paths +within any of these string expressions:: + + class User(Base): + # .... + addresses = relationship("myapp.model.address.Address", + order_by="desc(myapp.model.address.Address.email)", + primaryjoin="myapp.model.address.Address.user_id==" + "myapp.model.user.User.id") + +The qualified path can be any partial path that removes ambiguity between +the names. For example, to disambiguate between +``myapp.model.address.Address`` and ``myapp.model.lookup.Address``, +we can specify ``address.Address`` or ``lookup.Address``:: + + class User(Base): + # .... + addresses = relationship("address.Address", + order_by="desc(address.Address.email)", + primaryjoin="address.Address.user_id==" + "User.id") + +.. versionadded:: 0.8 + module-qualified paths can be used when specifying string arguments + with Declarative, in order to specify specific modules. + +Two alternatives also exist to using string-based attributes. A lambda +can also be used, which will be evaluated after all mappers have been +configured:: + + class User(Base): + # ... + addresses = relationship(lambda: Address, + order_by=lambda: desc(Address.email), + primaryjoin=lambda: Address.user_id==User.id) + +Or, the relationship can be added to the class explicitly after the classes +are available:: + + User.addresses = relationship(Address, + primaryjoin=Address.user_id==User.id) + + + +.. _declarative_many_to_many: + +Configuring Many-to-Many Relationships +====================================== + +Many-to-many relationships are also declared in the same way +with declarative as with traditional mappings. The +``secondary`` argument to +:func:`.relationship` is as usual passed a +:class:`.Table` object, which is typically declared in the +traditional way. The :class:`.Table` usually shares +the :class:`.MetaData` object used by the declarative base:: + + keywords = Table( + 'keywords', Base.metadata, + Column('author_id', Integer, ForeignKey('authors.id')), + Column('keyword_id', Integer, ForeignKey('keywords.id')) + ) + + class Author(Base): + __tablename__ = 'authors' + id = Column(Integer, primary_key=True) + keywords = relationship("Keyword", secondary=keywords) + +Like other :func:`~sqlalchemy.orm.relationship` arguments, a string is accepted +as well, passing the string name of the table as defined in the +``Base.metadata.tables`` collection:: + + class Author(Base): + __tablename__ = 'authors' + id = Column(Integer, primary_key=True) + keywords = relationship("Keyword", secondary="keywords") + +As with traditional mapping, its generally not a good idea to use +a :class:`.Table` as the "secondary" argument which is also mapped to +a class, unless the :func:`.relationship` is declared with ``viewonly=True``. +Otherwise, the unit-of-work system may attempt duplicate INSERT and +DELETE statements against the underlying table. + diff --git a/doc/build/orm/extensions/declarative/table_config.rst b/doc/build/orm/extensions/declarative/table_config.rst new file mode 100644 index 000000000..9a621e6dd --- /dev/null +++ b/doc/build/orm/extensions/declarative/table_config.rst @@ -0,0 +1,143 @@ +.. _declarative_table_args: + +=================== +Table Configuration +=================== + +Table arguments other than the name, metadata, and mapped Column +arguments are specified using the ``__table_args__`` class attribute. +This attribute accommodates both positional as well as keyword +arguments that are normally sent to the +:class:`~sqlalchemy.schema.Table` constructor. +The attribute can be specified in one of two forms. One is as a +dictionary:: + + class MyClass(Base): + __tablename__ = 'sometable' + __table_args__ = {'mysql_engine':'InnoDB'} + +The other, a tuple, where each argument is positional +(usually constraints):: + + class MyClass(Base): + __tablename__ = 'sometable' + __table_args__ = ( + ForeignKeyConstraint(['id'], ['remote_table.id']), + UniqueConstraint('foo'), + ) + +Keyword arguments can be specified with the above form by +specifying the last argument as a dictionary:: + + class MyClass(Base): + __tablename__ = 'sometable' + __table_args__ = ( + ForeignKeyConstraint(['id'], ['remote_table.id']), + UniqueConstraint('foo'), + {'autoload':True} + ) + +Using a Hybrid Approach with __table__ +======================================= + +As an alternative to ``__tablename__``, a direct +:class:`~sqlalchemy.schema.Table` construct may be used. The +:class:`~sqlalchemy.schema.Column` objects, which in this case require +their names, will be added to the mapping just like a regular mapping +to a table:: + + class MyClass(Base): + __table__ = Table('my_table', Base.metadata, + Column('id', Integer, primary_key=True), + Column('name', String(50)) + ) + +``__table__`` provides a more focused point of control for establishing +table metadata, while still getting most of the benefits of using declarative. +An application that uses reflection might want to load table metadata elsewhere +and pass it to declarative classes:: + + from sqlalchemy.ext.declarative import declarative_base + + Base = declarative_base() + Base.metadata.reflect(some_engine) + + class User(Base): + __table__ = metadata.tables['user'] + + class Address(Base): + __table__ = metadata.tables['address'] + +Some configuration schemes may find it more appropriate to use ``__table__``, +such as those which already take advantage of the data-driven nature of +:class:`.Table` to customize and/or automate schema definition. + +Note that when the ``__table__`` approach is used, the object is immediately +usable as a plain :class:`.Table` within the class declaration body itself, +as a Python class is only another syntactical block. Below this is illustrated +by using the ``id`` column in the ``primaryjoin`` condition of a +:func:`.relationship`:: + + class MyClass(Base): + __table__ = Table('my_table', Base.metadata, + Column('id', Integer, primary_key=True), + Column('name', String(50)) + ) + + widgets = relationship(Widget, + primaryjoin=Widget.myclass_id==__table__.c.id) + +Similarly, mapped attributes which refer to ``__table__`` can be placed inline, +as below where we assign the ``name`` column to the attribute ``_name``, +generating a synonym for ``name``:: + + from sqlalchemy.ext.declarative import synonym_for + + class MyClass(Base): + __table__ = Table('my_table', Base.metadata, + Column('id', Integer, primary_key=True), + Column('name', String(50)) + ) + + _name = __table__.c.name + + @synonym_for("_name") + def name(self): + return "Name: %s" % _name + +Using Reflection with Declarative +================================= + +It's easy to set up a :class:`.Table` that uses ``autoload=True`` +in conjunction with a mapped class:: + + class MyClass(Base): + __table__ = Table('mytable', Base.metadata, + autoload=True, autoload_with=some_engine) + +However, one improvement that can be made here is to not +require the :class:`.Engine` to be available when classes are +being first declared. To achieve this, use the +:class:`.DeferredReflection` mixin, which sets up mappings +only after a special ``prepare(engine)`` step is called:: + + from sqlalchemy.ext.declarative import declarative_base, DeferredReflection + + Base = declarative_base(cls=DeferredReflection) + + class Foo(Base): + __tablename__ = 'foo' + bars = relationship("Bar") + + class Bar(Base): + __tablename__ = 'bar' + + # illustrate overriding of "bar.foo_id" to have + # a foreign key constraint otherwise not + # reflected, such as when using MySQL + foo_id = Column(Integer, ForeignKey('foo.id')) + + Base.prepare(e) + +.. versionadded:: 0.8 + Added :class:`.DeferredReflection`. diff --git a/doc/build/orm/extensions/index.rst b/doc/build/orm/extensions/index.rst index 65836f13a..f7f58e381 100644 --- a/doc/build/orm/extensions/index.rst +++ b/doc/build/orm/extensions/index.rst @@ -17,7 +17,7 @@ behavior. In particular the "Horizontal Sharding", "Hybrid Attributes", and associationproxy automap - declarative + declarative/index mutable orderinglist horizontal_shard diff --git a/doc/build/orm/extensions/mutable.rst b/doc/build/orm/extensions/mutable.rst index 14875cd3c..969411481 100644 --- a/doc/build/orm/extensions/mutable.rst +++ b/doc/build/orm/extensions/mutable.rst @@ -21,7 +21,7 @@ API Reference .. autoclass:: MutableDict :members: - + :undoc-members: diff --git a/doc/build/orm/index.rst b/doc/build/orm/index.rst index 6c12ebd38..b7683a8ad 100644 --- a/doc/build/orm/index.rst +++ b/doc/build/orm/index.rst @@ -9,18 +9,13 @@ as well as automated persistence of Python objects, proceed first to the tutorial. .. toctree:: - :maxdepth: 3 + :maxdepth: 2 tutorial mapper_config relationships - collections - inheritance + loading_objects session - query - loading - events + extending extensions/index examples - exceptions - internals diff --git a/doc/build/orm/internals.rst b/doc/build/orm/internals.rst index 78ec2fa8e..debb1ab7e 100644 --- a/doc/build/orm/internals.rst +++ b/doc/build/orm/internals.rst @@ -11,6 +11,9 @@ sections, are listed here. .. autoclass:: sqlalchemy.orm.state.AttributeState :members: +.. autoclass:: sqlalchemy.orm.util.CascadeOptions + :members: + .. autoclass:: sqlalchemy.orm.instrumentation.ClassManager :members: :inherited-members: @@ -19,6 +22,9 @@ sections, are listed here. :members: :inherited-members: +.. autoclass:: sqlalchemy.orm.properties.ComparableProperty + :members: + .. autoclass:: sqlalchemy.orm.descriptor_props.CompositeProperty :members: @@ -26,10 +32,14 @@ sections, are listed here. .. autoclass:: sqlalchemy.orm.attributes.Event :members: +.. autoclass:: sqlalchemy.orm.identity.IdentityMap + :members: .. autoclass:: sqlalchemy.orm.base.InspectionAttr :members: +.. autoclass:: sqlalchemy.orm.base.InspectionAttrInfo + :members: .. autoclass:: sqlalchemy.orm.state.InstanceState :members: @@ -46,6 +56,29 @@ sections, are listed here. .. autoclass:: sqlalchemy.orm.interfaces.MapperProperty :members: + .. py:attribute:: info + + Info dictionary associated with the object, allowing user-defined + data to be associated with this :class:`.InspectionAttr`. + + The dictionary is generated when first accessed. Alternatively, + it can be specified as a constructor argument to the + :func:`.column_property`, :func:`.relationship`, or :func:`.composite` + functions. + + .. versionadded:: 0.8 Added support for .info to all + :class:`.MapperProperty` subclasses. + + .. versionchanged:: 1.0.0 :attr:`.InspectionAttr.info` moved + from :class:`.MapperProperty` so that it can apply to a wider + variety of ORM and extension constructs. + + .. seealso:: + + :attr:`.QueryableAttribute.info` + + :attr:`.SchemaItem.info` + .. autodata:: sqlalchemy.orm.interfaces.NOT_EXTENSION diff --git a/doc/build/orm/join_conditions.rst b/doc/build/orm/join_conditions.rst new file mode 100644 index 000000000..c39b7312e --- /dev/null +++ b/doc/build/orm/join_conditions.rst @@ -0,0 +1,740 @@ +.. _relationship_configure_joins: + +Configuring how Relationship Joins +------------------------------------ + +:func:`.relationship` will normally create a join between two tables +by examining the foreign key relationship between the two tables +to determine which columns should be compared. There are a variety +of situations where this behavior needs to be customized. + +.. _relationship_foreign_keys: + +Handling Multiple Join Paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +One of the most common situations to deal with is when +there are more than one foreign key path between two tables. + +Consider a ``Customer`` class that contains two foreign keys to an ``Address`` +class:: + + from sqlalchemy import Integer, ForeignKey, String, Column + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy.orm import relationship + + Base = declarative_base() + + class Customer(Base): + __tablename__ = 'customer' + id = Column(Integer, primary_key=True) + name = Column(String) + + billing_address_id = Column(Integer, ForeignKey("address.id")) + shipping_address_id = Column(Integer, ForeignKey("address.id")) + + billing_address = relationship("Address") + shipping_address = relationship("Address") + + class Address(Base): + __tablename__ = 'address' + id = Column(Integer, primary_key=True) + street = Column(String) + city = Column(String) + state = Column(String) + zip = Column(String) + +The above mapping, when we attempt to use it, will produce the error:: + + sqlalchemy.exc.AmbiguousForeignKeysError: Could not determine join + condition between parent/child tables on relationship + Customer.billing_address - there are multiple foreign key + paths linking the tables. Specify the 'foreign_keys' argument, + providing a list of those columns which should be + counted as containing a foreign key reference to the parent table. + +The above message is pretty long. There are many potential messages +that :func:`.relationship` can return, which have been carefully tailored +to detect a variety of common configurational issues; most will suggest +the additional configuration that's needed to resolve the ambiguity +or other missing information. + +In this case, the message wants us to qualify each :func:`.relationship` +by instructing for each one which foreign key column should be considered, and +the appropriate form is as follows:: + + class Customer(Base): + __tablename__ = 'customer' + id = Column(Integer, primary_key=True) + name = Column(String) + + billing_address_id = Column(Integer, ForeignKey("address.id")) + shipping_address_id = Column(Integer, ForeignKey("address.id")) + + billing_address = relationship("Address", foreign_keys=[billing_address_id]) + shipping_address = relationship("Address", foreign_keys=[shipping_address_id]) + +Above, we specify the ``foreign_keys`` argument, which is a :class:`.Column` or list +of :class:`.Column` objects which indicate those columns to be considered "foreign", +or in other words, the columns that contain a value referring to a parent table. +Loading the ``Customer.billing_address`` relationship from a ``Customer`` +object will use the value present in ``billing_address_id`` in order to +identify the row in ``Address`` to be loaded; similarly, ``shipping_address_id`` +is used for the ``shipping_address`` relationship. The linkage of the two +columns also plays a role during persistence; the newly generated primary key +of a just-inserted ``Address`` object will be copied into the appropriate +foreign key column of an associated ``Customer`` object during a flush. + +When specifying ``foreign_keys`` with Declarative, we can also use string +names to specify, however it is important that if using a list, the **list +is part of the string**:: + + billing_address = relationship("Address", foreign_keys="[Customer.billing_address_id]") + +In this specific example, the list is not necessary in any case as there's only +one :class:`.Column` we need:: + + billing_address = relationship("Address", foreign_keys="Customer.billing_address_id") + +.. versionchanged:: 0.8 + :func:`.relationship` can resolve ambiguity between foreign key targets on the + basis of the ``foreign_keys`` argument alone; the :paramref:`~.relationship.primaryjoin` + argument is no longer needed in this situation. + +.. _relationship_primaryjoin: + +Specifying Alternate Join Conditions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The default behavior of :func:`.relationship` when constructing a join +is that it equates the value of primary key columns +on one side to that of foreign-key-referring columns on the other. +We can change this criterion to be anything we'd like using the +:paramref:`~.relationship.primaryjoin` +argument, as well as the :paramref:`~.relationship.secondaryjoin` +argument in the case when a "secondary" table is used. + +In the example below, using the ``User`` class +as well as an ``Address`` class which stores a street address, we +create a relationship ``boston_addresses`` which will only +load those ``Address`` objects which specify a city of "Boston":: + + from sqlalchemy import Integer, ForeignKey, String, Column + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy.orm import relationship + + Base = declarative_base() + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + name = Column(String) + boston_addresses = relationship("Address", + primaryjoin="and_(User.id==Address.user_id, " + "Address.city=='Boston')") + + class Address(Base): + __tablename__ = 'address' + id = Column(Integer, primary_key=True) + user_id = Column(Integer, ForeignKey('user.id')) + + street = Column(String) + city = Column(String) + state = Column(String) + zip = Column(String) + +Within this string SQL expression, we made use of the :func:`.and_` conjunction construct to establish +two distinct predicates for the join condition - joining both the ``User.id`` and +``Address.user_id`` columns to each other, as well as limiting rows in ``Address`` +to just ``city='Boston'``. When using Declarative, rudimentary SQL functions like +:func:`.and_` are automatically available in the evaluated namespace of a string +:func:`.relationship` argument. + +The custom criteria we use in a :paramref:`~.relationship.primaryjoin` +is generally only significant when SQLAlchemy is rendering SQL in +order to load or represent this relationship. That is, it's used in +the SQL statement that's emitted in order to perform a per-attribute +lazy load, or when a join is constructed at query time, such as via +:meth:`.Query.join`, or via the eager "joined" or "subquery" styles of +loading. When in-memory objects are being manipulated, we can place +any ``Address`` object we'd like into the ``boston_addresses`` +collection, regardless of what the value of the ``.city`` attribute +is. The objects will remain present in the collection until the +attribute is expired and re-loaded from the database where the +criterion is applied. When a flush occurs, the objects inside of +``boston_addresses`` will be flushed unconditionally, assigning value +of the primary key ``user.id`` column onto the foreign-key-holding +``address.user_id`` column for each row. The ``city`` criteria has no +effect here, as the flush process only cares about synchronizing +primary key values into referencing foreign key values. + +.. _relationship_custom_foreign: + +Creating Custom Foreign Conditions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Another element of the primary join condition is how those columns +considered "foreign" are determined. Usually, some subset +of :class:`.Column` objects will specify :class:`.ForeignKey`, or otherwise +be part of a :class:`.ForeignKeyConstraint` that's relevant to the join condition. +:func:`.relationship` looks to this foreign key status as it decides +how it should load and persist data for this relationship. However, the +:paramref:`~.relationship.primaryjoin` argument can be used to create a join condition that +doesn't involve any "schema" level foreign keys. We can combine :paramref:`~.relationship.primaryjoin` +along with :paramref:`~.relationship.foreign_keys` and :paramref:`~.relationship.remote_side` explicitly in order to +establish such a join. + +Below, a class ``HostEntry`` joins to itself, equating the string ``content`` +column to the ``ip_address`` column, which is a Postgresql type called ``INET``. +We need to use :func:`.cast` in order to cast one side of the join to the +type of the other:: + + from sqlalchemy import cast, String, Column, Integer + from sqlalchemy.orm import relationship + from sqlalchemy.dialects.postgresql import INET + + from sqlalchemy.ext.declarative import declarative_base + + Base = declarative_base() + + class HostEntry(Base): + __tablename__ = 'host_entry' + + id = Column(Integer, primary_key=True) + ip_address = Column(INET) + content = Column(String(50)) + + # relationship() using explicit foreign_keys, remote_side + parent_host = relationship("HostEntry", + primaryjoin=ip_address == cast(content, INET), + foreign_keys=content, + remote_side=ip_address + ) + +The above relationship will produce a join like:: + + SELECT host_entry.id, host_entry.ip_address, host_entry.content + FROM host_entry JOIN host_entry AS host_entry_1 + ON host_entry_1.ip_address = CAST(host_entry.content AS INET) + +An alternative syntax to the above is to use the :func:`.foreign` and +:func:`.remote` :term:`annotations`, +inline within the :paramref:`~.relationship.primaryjoin` expression. +This syntax represents the annotations that :func:`.relationship` normally +applies by itself to the join condition given the :paramref:`~.relationship.foreign_keys` and +:paramref:`~.relationship.remote_side` arguments. These functions may +be more succinct when an explicit join condition is present, and additionally +serve to mark exactly the column that is "foreign" or "remote" independent +of whether that column is stated multiple times or within complex +SQL expressions:: + + from sqlalchemy.orm import foreign, remote + + class HostEntry(Base): + __tablename__ = 'host_entry' + + id = Column(Integer, primary_key=True) + ip_address = Column(INET) + content = Column(String(50)) + + # relationship() using explicit foreign() and remote() annotations + # in lieu of separate arguments + parent_host = relationship("HostEntry", + primaryjoin=remote(ip_address) == \ + cast(foreign(content), INET), + ) + + +.. _relationship_custom_operator: + +Using custom operators in join conditions +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Another use case for relationships is the use of custom operators, such +as Postgresql's "is contained within" ``<<`` operator when joining with +types such as :class:`.postgresql.INET` and :class:`.postgresql.CIDR`. +For custom operators we use the :meth:`.Operators.op` function:: + + inet_column.op("<<")(cidr_column) + +However, if we construct a :paramref:`~.relationship.primaryjoin` using this +operator, :func:`.relationship` will still need more information. This is because +when it examines our primaryjoin condition, it specifically looks for operators +used for **comparisons**, and this is typically a fixed list containing known +comparison operators such as ``==``, ``<``, etc. So for our custom operator +to participate in this system, we need it to register as a comparison operator +using the :paramref:`~.Operators.op.is_comparison` parameter:: + + inet_column.op("<<", is_comparison=True)(cidr_column) + +A complete example:: + + class IPA(Base): + __tablename__ = 'ip_address' + + id = Column(Integer, primary_key=True) + v4address = Column(INET) + + network = relationship("Network", + primaryjoin="IPA.v4address.op('<<', is_comparison=True)" + "(foreign(Network.v4representation))", + viewonly=True + ) + class Network(Base): + __tablename__ = 'network' + + id = Column(Integer, primary_key=True) + v4representation = Column(CIDR) + +Above, a query such as:: + + session.query(IPA).join(IPA.network) + +Will render as:: + + SELECT ip_address.id AS ip_address_id, ip_address.v4address AS ip_address_v4address + FROM ip_address JOIN network ON ip_address.v4address << network.v4representation + +.. versionadded:: 0.9.2 - Added the :paramref:`.Operators.op.is_comparison` + flag to assist in the creation of :func:`.relationship` constructs using + custom operators. + +.. _relationship_overlapping_foreignkeys: + +Overlapping Foreign Keys +~~~~~~~~~~~~~~~~~~~~~~~~ + +A rare scenario can arise when composite foreign keys are used, such that +a single column may be the subject of more than one column +referred to via foreign key constraint. + +Consider an (admittedly complex) mapping such as the ``Magazine`` object, +referred to both by the ``Writer`` object and the ``Article`` object +using a composite primary key scheme that includes ``magazine_id`` +for both; then to make ``Article`` refer to ``Writer`` as well, +``Article.magazine_id`` is involved in two separate relationships; +``Article.magazine`` and ``Article.writer``:: + + class Magazine(Base): + __tablename__ = 'magazine' + + id = Column(Integer, primary_key=True) + + + class Article(Base): + __tablename__ = 'article' + + article_id = Column(Integer) + magazine_id = Column(ForeignKey('magazine.id')) + writer_id = Column() + + magazine = relationship("Magazine") + writer = relationship("Writer") + + __table_args__ = ( + PrimaryKeyConstraint('article_id', 'magazine_id'), + ForeignKeyConstraint( + ['writer_id', 'magazine_id'], + ['writer.id', 'writer.magazine_id'] + ), + ) + + + class Writer(Base): + __tablename__ = 'writer' + + id = Column(Integer, primary_key=True) + magazine_id = Column(ForeignKey('magazine.id'), primary_key=True) + magazine = relationship("Magazine") + +When the above mapping is configured, we will see this warning emitted:: + + SAWarning: relationship 'Article.writer' will copy column + writer.magazine_id to column article.magazine_id, + which conflicts with relationship(s): 'Article.magazine' + (copies magazine.id to article.magazine_id). Consider applying + viewonly=True to read-only relationships, or provide a primaryjoin + condition marking writable columns with the foreign() annotation. + +What this refers to originates from the fact that ``Article.magazine_id`` is +the subject of two different foreign key constraints; it refers to +``Magazine.id`` directly as a source column, but also refers to +``Writer.magazine_id`` as a source column in the context of the +composite key to ``Writer``. If we associate an ``Article`` with a +particular ``Magazine``, but then associate the ``Article`` with a +``Writer`` that's associated with a *different* ``Magazine``, the ORM +will overwrite ``Article.magazine_id`` non-deterministically, silently +changing which magazine we refer towards; it may +also attempt to place NULL into this columnn if we de-associate a +``Writer`` from an ``Article``. The warning lets us know this is the case. + +To solve this, we need to break out the behavior of ``Article`` to include +all three of the following features: + +1. ``Article`` first and foremost writes to + ``Article.magazine_id`` based on data persisted in the ``Article.magazine`` + relationship only, that is a value copied from ``Magazine.id``. + +2. ``Article`` can write to ``Article.writer_id`` on behalf of data + persisted in the ``Article.writer`` relationship, but only the + ``Writer.id`` column; the ``Writer.magazine_id`` column should not + be written into ``Article.magazine_id`` as it ultimately is sourced + from ``Magazine.id``. + +3. ``Article`` takes ``Article.magazine_id`` into account when loading + ``Article.writer``, even though it *doesn't* write to it on behalf + of this relationship. + +To get just #1 and #2, we could specify only ``Article.writer_id`` as the +"foreign keys" for ``Article.writer``:: + + class Article(Base): + # ... + + writer = relationship("Writer", foreign_keys='Article.writer_id') + +However, this has the effect of ``Article.writer`` not taking +``Article.magazine_id`` into account when querying against ``Writer``: + +.. sourcecode:: sql + + SELECT article.article_id AS article_article_id, + article.magazine_id AS article_magazine_id, + article.writer_id AS article_writer_id + FROM article + JOIN writer ON writer.id = article.writer_id + +Therefore, to get at all of #1, #2, and #3, we express the join condition +as well as which columns to be written by combining +:paramref:`~.relationship.primaryjoin` fully, along with either the +:paramref:`~.relationship.foreign_keys` argument, or more succinctly by +annotating with :func:`~.orm.foreign`:: + + class Article(Base): + # ... + + writer = relationship( + "Writer", + primaryjoin="and_(Writer.id == foreign(Article.writer_id), " + "Writer.magazine_id == Article.magazine_id)") + +.. versionchanged:: 1.0.0 the ORM will attempt to warn when a column is used + as the synchronization target from more than one relationship + simultaneously. + + +Non-relational Comparisons / Materialized Path +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. warning:: this section details an experimental feature. + +Using custom expressions means we can produce unorthodox join conditions that +don't obey the usual primary/foreign key model. One such example is the +materialized path pattern, where we compare strings for overlapping path tokens +in order to produce a tree structure. + +Through careful use of :func:`.foreign` and :func:`.remote`, we can build +a relationship that effectively produces a rudimentary materialized path +system. Essentially, when :func:`.foreign` and :func:`.remote` are +on the *same* side of the comparison expression, the relationship is considered +to be "one to many"; when they are on *different* sides, the relationship +is considered to be "many to one". For the comparison we'll use here, +we'll be dealing with collections so we keep things configured as "one to many":: + + class Element(Base): + __tablename__ = 'element' + + path = Column(String, primary_key=True) + + descendants = relationship('Element', + primaryjoin= + remote(foreign(path)).like( + path.concat('/%')), + viewonly=True, + order_by=path) + +Above, if given an ``Element`` object with a path attribute of ``"/foo/bar2"``, +we seek for a load of ``Element.descendants`` to look like:: + + SELECT element.path AS element_path + FROM element + WHERE element.path LIKE ('/foo/bar2' || '/%') ORDER BY element.path + +.. versionadded:: 0.9.5 Support has been added to allow a single-column + comparison to itself within a primaryjoin condition, as well as for + primaryjoin conditions that use :meth:`.ColumnOperators.like` as the comparison + operator. + +.. _self_referential_many_to_many: + +Self-Referential Many-to-Many Relationship +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Many to many relationships can be customized by one or both of :paramref:`~.relationship.primaryjoin` +and :paramref:`~.relationship.secondaryjoin` - the latter is significant for a relationship that +specifies a many-to-many reference using the :paramref:`~.relationship.secondary` argument. +A common situation which involves the usage of :paramref:`~.relationship.primaryjoin` and :paramref:`~.relationship.secondaryjoin` +is when establishing a many-to-many relationship from a class to itself, as shown below:: + + from sqlalchemy import Integer, ForeignKey, String, Column, Table + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy.orm import relationship + + Base = declarative_base() + + node_to_node = Table("node_to_node", Base.metadata, + Column("left_node_id", Integer, ForeignKey("node.id"), primary_key=True), + Column("right_node_id", Integer, ForeignKey("node.id"), primary_key=True) + ) + + class Node(Base): + __tablename__ = 'node' + id = Column(Integer, primary_key=True) + label = Column(String) + right_nodes = relationship("Node", + secondary=node_to_node, + primaryjoin=id==node_to_node.c.left_node_id, + secondaryjoin=id==node_to_node.c.right_node_id, + backref="left_nodes" + ) + +Where above, SQLAlchemy can't know automatically which columns should connect +to which for the ``right_nodes`` and ``left_nodes`` relationships. The :paramref:`~.relationship.primaryjoin` +and :paramref:`~.relationship.secondaryjoin` arguments establish how we'd like to join to the association table. +In the Declarative form above, as we are declaring these conditions within the Python +block that corresponds to the ``Node`` class, the ``id`` variable is available directly +as the :class:`.Column` object we wish to join with. + +Alternatively, we can define the :paramref:`~.relationship.primaryjoin` +and :paramref:`~.relationship.secondaryjoin` arguments using strings, which is suitable +in the case that our configuration does not have either the ``Node.id`` column +object available yet or the ``node_to_node`` table perhaps isn't yet available. +When referring to a plain :class:`.Table` object in a declarative string, we +use the string name of the table as it is present in the :class:`.MetaData`:: + + class Node(Base): + __tablename__ = 'node' + id = Column(Integer, primary_key=True) + label = Column(String) + right_nodes = relationship("Node", + secondary="node_to_node", + primaryjoin="Node.id==node_to_node.c.left_node_id", + secondaryjoin="Node.id==node_to_node.c.right_node_id", + backref="left_nodes" + ) + +A classical mapping situation here is similar, where ``node_to_node`` can be joined +to ``node.c.id``:: + + from sqlalchemy import Integer, ForeignKey, String, Column, Table, MetaData + from sqlalchemy.orm import relationship, mapper + + metadata = MetaData() + + node_to_node = Table("node_to_node", metadata, + Column("left_node_id", Integer, ForeignKey("node.id"), primary_key=True), + Column("right_node_id", Integer, ForeignKey("node.id"), primary_key=True) + ) + + node = Table("node", metadata, + Column('id', Integer, primary_key=True), + Column('label', String) + ) + class Node(object): + pass + + mapper(Node, node, properties={ + 'right_nodes':relationship(Node, + secondary=node_to_node, + primaryjoin=node.c.id==node_to_node.c.left_node_id, + secondaryjoin=node.c.id==node_to_node.c.right_node_id, + backref="left_nodes" + )}) + + +Note that in both examples, the :paramref:`~.relationship.backref` +keyword specifies a ``left_nodes`` backref - when +:func:`.relationship` creates the second relationship in the reverse +direction, it's smart enough to reverse the +:paramref:`~.relationship.primaryjoin` and +:paramref:`~.relationship.secondaryjoin` arguments. + +.. _composite_secondary_join: + +Composite "Secondary" Joins +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. note:: + + This section features some new and experimental features of SQLAlchemy. + +Sometimes, when one seeks to build a :func:`.relationship` between two tables +there is a need for more than just two or three tables to be involved in +order to join them. This is an area of :func:`.relationship` where one seeks +to push the boundaries of what's possible, and often the ultimate solution to +many of these exotic use cases needs to be hammered out on the SQLAlchemy mailing +list. + +In more recent versions of SQLAlchemy, the :paramref:`~.relationship.secondary` +parameter can be used in some of these cases in order to provide a composite +target consisting of multiple tables. Below is an example of such a +join condition (requires version 0.9.2 at least to function as is):: + + class A(Base): + __tablename__ = 'a' + + id = Column(Integer, primary_key=True) + b_id = Column(ForeignKey('b.id')) + + d = relationship("D", + secondary="join(B, D, B.d_id == D.id)." + "join(C, C.d_id == D.id)", + primaryjoin="and_(A.b_id == B.id, A.id == C.a_id)", + secondaryjoin="D.id == B.d_id", + uselist=False + ) + + class B(Base): + __tablename__ = 'b' + + id = Column(Integer, primary_key=True) + d_id = Column(ForeignKey('d.id')) + + class C(Base): + __tablename__ = 'c' + + id = Column(Integer, primary_key=True) + a_id = Column(ForeignKey('a.id')) + d_id = Column(ForeignKey('d.id')) + + class D(Base): + __tablename__ = 'd' + + id = Column(Integer, primary_key=True) + +In the above example, we provide all three of :paramref:`~.relationship.secondary`, +:paramref:`~.relationship.primaryjoin`, and :paramref:`~.relationship.secondaryjoin`, +in the declarative style referring to the named tables ``a``, ``b``, ``c``, ``d`` +directly. A query from ``A`` to ``D`` looks like: + +.. sourcecode:: python+sql + + sess.query(A).join(A.d).all() + + {opensql}SELECT a.id AS a_id, a.b_id AS a_b_id + FROM a JOIN ( + b AS b_1 JOIN d AS d_1 ON b_1.d_id = d_1.id + JOIN c AS c_1 ON c_1.d_id = d_1.id) + ON a.b_id = b_1.id AND a.id = c_1.a_id JOIN d ON d.id = b_1.d_id + +In the above example, we take advantage of being able to stuff multiple +tables into a "secondary" container, so that we can join across many +tables while still keeping things "simple" for :func:`.relationship`, in that +there's just "one" table on both the "left" and the "right" side; the +complexity is kept within the middle. + +.. versionadded:: 0.9.2 Support is improved for allowing a :func:`.join()` + construct to be used directly as the target of the :paramref:`~.relationship.secondary` + argument, including support for joins, eager joins and lazy loading, + as well as support within declarative to specify complex conditions such + as joins involving class names as targets. + +.. _relationship_non_primary_mapper: + +Relationship to Non Primary Mapper +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In the previous section, we illustrated a technique where we used +:paramref:`~.relationship.secondary` in order to place additional +tables within a join condition. There is one complex join case where +even this technique is not sufficient; when we seek to join from ``A`` +to ``B``, making use of any number of ``C``, ``D``, etc. in between, +however there are also join conditions between ``A`` and ``B`` +*directly*. In this case, the join from ``A`` to ``B`` may be +difficult to express with just a complex +:paramref:`~.relationship.primaryjoin` condition, as the intermediary +tables may need special handling, and it is also not expressable with +a :paramref:`~.relationship.secondary` object, since the +``A->secondary->B`` pattern does not support any references between +``A`` and ``B`` directly. When this **extremely advanced** case +arises, we can resort to creating a second mapping as a target for the +relationship. This is where we use :func:`.mapper` in order to make a +mapping to a class that includes all the additional tables we need for +this join. In order to produce this mapper as an "alternative" mapping +for our class, we use the :paramref:`~.mapper.non_primary` flag. + +Below illustrates a :func:`.relationship` with a simple join from ``A`` to +``B``, however the primaryjoin condition is augmented with two additional +entities ``C`` and ``D``, which also must have rows that line up with +the rows in both ``A`` and ``B`` simultaneously:: + + class A(Base): + __tablename__ = 'a' + + id = Column(Integer, primary_key=True) + b_id = Column(ForeignKey('b.id')) + + class B(Base): + __tablename__ = 'b' + + id = Column(Integer, primary_key=True) + + class C(Base): + __tablename__ = 'c' + + id = Column(Integer, primary_key=True) + a_id = Column(ForeignKey('a.id')) + + class D(Base): + __tablename__ = 'd' + + id = Column(Integer, primary_key=True) + c_id = Column(ForeignKey('c.id')) + b_id = Column(ForeignKey('b.id')) + + # 1. set up the join() as a variable, so we can refer + # to it in the mapping multiple times. + j = join(B, D, D.b_id == B.id).join(C, C.id == D.c_id) + + # 2. Create a new mapper() to B, with non_primary=True. + # Columns in the join with the same name must be + # disambiguated within the mapping, using named properties. + B_viacd = mapper(B, j, non_primary=True, properties={ + "b_id": [j.c.b_id, j.c.d_b_id], + "d_id": j.c.d_id + }) + + A.b = relationship(B_viacd, primaryjoin=A.b_id == B_viacd.c.b_id) + +In the above case, our non-primary mapper for ``B`` will emit for +additional columns when we query; these can be ignored: + +.. sourcecode:: python+sql + + sess.query(A).join(A.b).all() + + {opensql}SELECT a.id AS a_id, a.b_id AS a_b_id + FROM a JOIN (b JOIN d ON d.b_id = b.id JOIN c ON c.id = d.c_id) ON a.b_id = b.id + + +Building Query-Enabled Properties +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Very ambitious custom join conditions may fail to be directly persistable, and +in some cases may not even load correctly. To remove the persistence part of +the equation, use the flag :paramref:`~.relationship.viewonly` on the +:func:`~sqlalchemy.orm.relationship`, which establishes it as a read-only +attribute (data written to the collection will be ignored on flush()). +However, in extreme cases, consider using a regular Python property in +conjunction with :class:`.Query` as follows: + +.. sourcecode:: python+sql + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + + def _get_addresses(self): + return object_session(self).query(Address).with_parent(self).filter(...).all() + addresses = property(_get_addresses) + diff --git a/doc/build/orm/loading.rst b/doc/build/orm/loading.rst index b2d8124e2..0aca6cd0c 100644 --- a/doc/build/orm/loading.rst +++ b/doc/build/orm/loading.rst @@ -1,546 +1,3 @@ -.. _loading_toplevel: +:orphan: -.. currentmodule:: sqlalchemy.orm - -Relationship Loading Techniques -=============================== - -A big part of SQLAlchemy is providing a wide range of control over how related objects get loaded when querying. This behavior -can be configured at mapper construction time using the ``lazy`` parameter to the :func:`.relationship` function, -as well as by using options with the :class:`.Query` object. - -Using Loader Strategies: Lazy Loading, Eager Loading ----------------------------------------------------- - -By default, all inter-object relationships are **lazy loading**. The scalar or -collection attribute associated with a :func:`~sqlalchemy.orm.relationship` -contains a trigger which fires the first time the attribute is accessed. This -trigger, in all but one case, issues a SQL call at the point of access -in order to load the related object or objects: - -.. sourcecode:: python+sql - - {sql}>>> jack.addresses - SELECT addresses.id AS addresses_id, addresses.email_address AS addresses_email_address, - addresses.user_id AS addresses_user_id - FROM addresses - WHERE ? = addresses.user_id - [5] - {stop}[<Address(u'jack@google.com')>, <Address(u'j25@yahoo.com')>] - -The one case where SQL is not emitted is for a simple many-to-one relationship, when -the related object can be identified by its primary key alone and that object is already -present in the current :class:`.Session`. - -This default behavior of "load upon attribute access" is known as "lazy" or -"select" loading - the name "select" because a "SELECT" statement is typically emitted -when the attribute is first accessed. - -In the :ref:`ormtutorial_toplevel`, we introduced the concept of **Eager -Loading**. We used an ``option`` in conjunction with the -:class:`~sqlalchemy.orm.query.Query` object in order to indicate that a -relationship should be loaded at the same time as the parent, within a single -SQL query. This option, known as :func:`.joinedload`, connects a JOIN (by default -a LEFT OUTER join) to the statement and populates the scalar/collection from the -same result set as that of the parent: - -.. sourcecode:: python+sql - - {sql}>>> jack = session.query(User).\ - ... options(joinedload('addresses')).\ - ... filter_by(name='jack').all() #doctest: +NORMALIZE_WHITESPACE - SELECT addresses_1.id AS addresses_1_id, addresses_1.email_address AS addresses_1_email_address, - addresses_1.user_id AS addresses_1_user_id, users.id AS users_id, users.name AS users_name, - users.fullname AS users_fullname, users.password AS users_password - FROM users LEFT OUTER JOIN addresses AS addresses_1 ON users.id = addresses_1.user_id - WHERE users.name = ? - ['jack'] - - -In addition to "joined eager loading", a second option for eager loading -exists, called "subquery eager loading". This kind of eager loading emits an -additional SQL statement for each collection requested, aggregated across all -parent objects: - -.. sourcecode:: python+sql - - {sql}>>> jack = session.query(User).\ - ... options(subqueryload('addresses')).\ - ... filter_by(name='jack').all() - SELECT users.id AS users_id, users.name AS users_name, users.fullname AS users_fullname, - users.password AS users_password - FROM users - WHERE users.name = ? - ('jack',) - SELECT addresses.id AS addresses_id, addresses.email_address AS addresses_email_address, - addresses.user_id AS addresses_user_id, anon_1.users_id AS anon_1_users_id - FROM (SELECT users.id AS users_id - FROM users - WHERE users.name = ?) AS anon_1 JOIN addresses ON anon_1.users_id = addresses.user_id - ORDER BY anon_1.users_id, addresses.id - ('jack',) - -The default **loader strategy** for any :func:`~sqlalchemy.orm.relationship` -is configured by the ``lazy`` keyword argument, which defaults to ``select`` - this indicates -a "select" statement . -Below we set it as ``joined`` so that the ``children`` relationship is eager -loaded using a JOIN:: - - # load the 'children' collection using LEFT OUTER JOIN - class Parent(Base): - __tablename__ = 'parent' - - id = Column(Integer, primary_key=True) - children = relationship("Child", lazy='joined') - -We can also set it to eagerly load using a second query for all collections, -using ``subquery``:: - - # load the 'children' collection using a second query which - # JOINS to a subquery of the original - class Parent(Base): - __tablename__ = 'parent' - - id = Column(Integer, primary_key=True) - children = relationship("Child", lazy='subquery') - -When querying, all three choices of loader strategy are available on a -per-query basis, using the :func:`~sqlalchemy.orm.joinedload`, -:func:`~sqlalchemy.orm.subqueryload` and :func:`~sqlalchemy.orm.lazyload` -query options: - -.. sourcecode:: python+sql - - # set children to load lazily - session.query(Parent).options(lazyload('children')).all() - - # set children to load eagerly with a join - session.query(Parent).options(joinedload('children')).all() - - # set children to load eagerly with a second statement - session.query(Parent).options(subqueryload('children')).all() - -.. _subqueryload_ordering: - -The Importance of Ordering --------------------------- - -A query which makes use of :func:`.subqueryload` in conjunction with a -limiting modifier such as :meth:`.Query.first`, :meth:`.Query.limit`, -or :meth:`.Query.offset` should **always** include :meth:`.Query.order_by` -against unique column(s) such as the primary key, so that the additional queries -emitted by :func:`.subqueryload` include -the same ordering as used by the parent query. Without it, there is a chance -that the inner query could return the wrong rows:: - - # incorrect, no ORDER BY - session.query(User).options(subqueryload(User.addresses)).first() - - # incorrect if User.name is not unique - session.query(User).options(subqueryload(User.addresses)).order_by(User.name).first() - - # correct - session.query(User).options(subqueryload(User.addresses)).order_by(User.name, User.id).first() - -.. seealso:: - - :ref:`faq_subqueryload_limit_sort` - detailed example - -Loading Along Paths -------------------- - -To reference a relationship that is deeper than one level, method chaining -may be used. The object returned by all loader options is an instance of -the :class:`.Load` class, which provides a so-called "generative" interface:: - - session.query(Parent).options( - joinedload('foo'). - joinedload('bar'). - joinedload('bat') - ).all() - -Using method chaining, the loader style of each link in the path is explicitly -stated. To navigate along a path without changing the existing loader style -of a particular attribute, the :func:`.defaultload` method/function may be used:: - - session.query(A).options( - defaultload("atob").joinedload("btoc") - ).all() - -.. versionchanged:: 0.9.0 - The previous approach of specifying dot-separated paths within loader - options has been superseded by the less ambiguous approach of the - :class:`.Load` object and related methods. With this system, the user - specifies the style of loading for each link along the chain explicitly, - rather than guessing between options like ``joinedload()`` vs. ``joinedload_all()``. - The :func:`.orm.defaultload` is provided to allow path navigation without - modification of existing loader options. The dot-separated path system - as well as the ``_all()`` functions will remain available for backwards- - compatibility indefinitely. - -Default Loading Strategies --------------------------- - -.. versionadded:: 0.7.5 - Default loader strategies as a new feature. - -Each of :func:`.joinedload`, :func:`.subqueryload`, :func:`.lazyload`, -and :func:`.noload` can be used to set the default style of -:func:`.relationship` loading -for a particular query, affecting all :func:`.relationship` -mapped -attributes not otherwise -specified in the :class:`.Query`. This feature is available by passing -the string ``'*'`` as the argument to any of these options:: - - session.query(MyClass).options(lazyload('*')) - -Above, the ``lazyload('*')`` option will supersede the ``lazy`` setting -of all :func:`.relationship` constructs in use for that query, -except for those which use the ``'dynamic'`` style of loading. -If some relationships specify -``lazy='joined'`` or ``lazy='subquery'``, for example, -using ``lazyload('*')`` will unilaterally -cause all those relationships to use ``'select'`` loading, e.g. emit a -SELECT statement when each attribute is accessed. - -The option does not supersede loader options stated in the -query, such as :func:`.eagerload`, -:func:`.subqueryload`, etc. The query below will still use joined loading -for the ``widget`` relationship:: - - session.query(MyClass).options( - lazyload('*'), - joinedload(MyClass.widget) - ) - -If multiple ``'*'`` options are passed, the last one overrides -those previously passed. - -Per-Entity Default Loading Strategies -------------------------------------- - -.. versionadded:: 0.9.0 - Per-entity default loader strategies. - -A variant of the default loader strategy is the ability to set the strategy -on a per-entity basis. For example, if querying for ``User`` and ``Address``, -we can instruct all relationships on ``Address`` only to use lazy loading -by first applying the :class:`.Load` object, then specifying the ``*`` as a -chained option:: - - session.query(User, Address).options(Load(Address).lazyload('*')) - -Above, all relationships on ``Address`` will be set to a lazy load. - -.. _zen_of_eager_loading: - -The Zen of Eager Loading -------------------------- - -The philosophy behind loader strategies is that any set of loading schemes can be -applied to a particular query, and *the results don't change* - only the number -of SQL statements required to fully load related objects and collections changes. A particular -query might start out using all lazy loads. After using it in context, it might be revealed -that particular attributes or collections are always accessed, and that it would be more -efficient to change the loader strategy for these. The strategy can be changed with no other -modifications to the query, the results will remain identical, but fewer SQL statements would be emitted. -In theory (and pretty much in practice), nothing you can do to the :class:`.Query` would make it load -a different set of primary or related objects based on a change in loader strategy. - -How :func:`joinedload` in particular achieves this result of not impacting -entity rows returned in any way is that it creates an anonymous alias of the joins it adds to your -query, so that they can't be referenced by other parts of the query. For example, -the query below uses :func:`.joinedload` to create a LEFT OUTER JOIN from ``users`` -to ``addresses``, however the ``ORDER BY`` added against ``Address.email_address`` -is not valid - the ``Address`` entity is not named in the query: - -.. sourcecode:: python+sql - - >>> jack = session.query(User).\ - ... options(joinedload(User.addresses)).\ - ... filter(User.name=='jack').\ - ... order_by(Address.email_address).all() - {opensql}SELECT addresses_1.id AS addresses_1_id, addresses_1.email_address AS addresses_1_email_address, - addresses_1.user_id AS addresses_1_user_id, users.id AS users_id, users.name AS users_name, - users.fullname AS users_fullname, users.password AS users_password - FROM users LEFT OUTER JOIN addresses AS addresses_1 ON users.id = addresses_1.user_id - WHERE users.name = ? ORDER BY addresses.email_address <-- this part is wrong ! - ['jack'] - -Above, ``ORDER BY addresses.email_address`` is not valid since ``addresses`` is not in the -FROM list. The correct way to load the ``User`` records and order by email -address is to use :meth:`.Query.join`: - -.. sourcecode:: python+sql - - >>> jack = session.query(User).\ - ... join(User.addresses).\ - ... filter(User.name=='jack').\ - ... order_by(Address.email_address).all() - {opensql} - SELECT users.id AS users_id, users.name AS users_name, - users.fullname AS users_fullname, users.password AS users_password - FROM users JOIN addresses ON users.id = addresses.user_id - WHERE users.name = ? ORDER BY addresses.email_address - ['jack'] - -The statement above is of course not the same as the previous one, in that the columns from ``addresses`` -are not included in the result at all. We can add :func:`.joinedload` back in, so that -there are two joins - one is that which we are ordering on, the other is used anonymously to -load the contents of the ``User.addresses`` collection: - -.. sourcecode:: python+sql - - >>> jack = session.query(User).\ - ... join(User.addresses).\ - ... options(joinedload(User.addresses)).\ - ... filter(User.name=='jack').\ - ... order_by(Address.email_address).all() - {opensql}SELECT addresses_1.id AS addresses_1_id, addresses_1.email_address AS addresses_1_email_address, - addresses_1.user_id AS addresses_1_user_id, users.id AS users_id, users.name AS users_name, - users.fullname AS users_fullname, users.password AS users_password - FROM users JOIN addresses ON users.id = addresses.user_id - LEFT OUTER JOIN addresses AS addresses_1 ON users.id = addresses_1.user_id - WHERE users.name = ? ORDER BY addresses.email_address - ['jack'] - -What we see above is that our usage of :meth:`.Query.join` is to supply JOIN clauses we'd like -to use in subsequent query criterion, whereas our usage of :func:`.joinedload` only concerns -itself with the loading of the ``User.addresses`` collection, for each ``User`` in the result. -In this case, the two joins most probably appear redundant - which they are. If we -wanted to use just one JOIN for collection loading as well as ordering, we use the -:func:`.contains_eager` option, described in :ref:`contains_eager` below. But -to see why :func:`joinedload` does what it does, consider if we were **filtering** on a -particular ``Address``: - -.. sourcecode:: python+sql - - >>> jack = session.query(User).\ - ... join(User.addresses).\ - ... options(joinedload(User.addresses)).\ - ... filter(User.name=='jack').\ - ... filter(Address.email_address=='someaddress@foo.com').\ - ... all() - {opensql}SELECT addresses_1.id AS addresses_1_id, addresses_1.email_address AS addresses_1_email_address, - addresses_1.user_id AS addresses_1_user_id, users.id AS users_id, users.name AS users_name, - users.fullname AS users_fullname, users.password AS users_password - FROM users JOIN addresses ON users.id = addresses.user_id - LEFT OUTER JOIN addresses AS addresses_1 ON users.id = addresses_1.user_id - WHERE users.name = ? AND addresses.email_address = ? - ['jack', 'someaddress@foo.com'] - -Above, we can see that the two JOINs have very different roles. One will match exactly -one row, that of the join of ``User`` and ``Address`` where ``Address.email_address=='someaddress@foo.com'``. -The other LEFT OUTER JOIN will match *all* ``Address`` rows related to ``User``, -and is only used to populate the ``User.addresses`` collection, for those ``User`` objects -that are returned. - -By changing the usage of :func:`.joinedload` to another style of loading, we can change -how the collection is loaded completely independently of SQL used to retrieve -the actual ``User`` rows we want. Below we change :func:`.joinedload` into -:func:`.subqueryload`: - -.. sourcecode:: python+sql - - >>> jack = session.query(User).\ - ... join(User.addresses).\ - ... options(subqueryload(User.addresses)).\ - ... filter(User.name=='jack').\ - ... filter(Address.email_address=='someaddress@foo.com').\ - ... all() - {opensql}SELECT users.id AS users_id, users.name AS users_name, - users.fullname AS users_fullname, users.password AS users_password - FROM users JOIN addresses ON users.id = addresses.user_id - WHERE users.name = ? AND addresses.email_address = ? - ['jack', 'someaddress@foo.com'] - - # ... subqueryload() emits a SELECT in order - # to load all address records ... - -When using joined eager loading, if the -query contains a modifier that impacts the rows returned -externally to the joins, such as when using DISTINCT, LIMIT, OFFSET -or equivalent, the completed statement is first -wrapped inside a subquery, and the joins used specifically for joined eager -loading are applied to the subquery. SQLAlchemy's -joined eager loading goes the extra mile, and then ten miles further, to -absolutely ensure that it does not affect the end result of the query, only -the way collections and related objects are loaded, no matter what the format of the query is. - -.. _what_kind_of_loading: - -What Kind of Loading to Use ? ------------------------------ - -Which type of loading to use typically comes down to optimizing the tradeoff -between number of SQL executions, complexity of SQL emitted, and amount of -data fetched. Lets take two examples, a :func:`~sqlalchemy.orm.relationship` -which references a collection, and a :func:`~sqlalchemy.orm.relationship` that -references a scalar many-to-one reference. - -* One to Many Collection - - * When using the default lazy loading, if you load 100 objects, and then access a collection on each of - them, a total of 101 SQL statements will be emitted, although each statement will typically be a - simple SELECT without any joins. - - * When using joined loading, the load of 100 objects and their collections will emit only one SQL - statement. However, the - total number of rows fetched will be equal to the sum of the size of all the collections, plus one - extra row for each parent object that has an empty collection. Each row will also contain the full - set of columns represented by the parents, repeated for each collection item - SQLAlchemy does not - re-fetch these columns other than those of the primary key, however most DBAPIs (with some - exceptions) will transmit the full data of each parent over the wire to the client connection in - any case. Therefore joined eager loading only makes sense when the size of the collections are - relatively small. The LEFT OUTER JOIN can also be performance intensive compared to an INNER join. - - * When using subquery loading, the load of 100 objects will emit two SQL statements. The second - statement will fetch a total number of rows equal to the sum of the size of all collections. An - INNER JOIN is used, and a minimum of parent columns are requested, only the primary keys. So a - subquery load makes sense when the collections are larger. - - * When multiple levels of depth are used with joined or subquery loading, loading collections-within- - collections will multiply the total number of rows fetched in a cartesian fashion. Both forms - of eager loading always join from the original parent class. - -* Many to One Reference - - * When using the default lazy loading, a load of 100 objects will like in the case of the collection - emit as many as 101 SQL statements. However - there is a significant exception to this, in that - if the many-to-one reference is a simple foreign key reference to the target's primary key, each - reference will be checked first in the current identity map using :meth:`.Query.get`. So here, - if the collection of objects references a relatively small set of target objects, or the full set - of possible target objects have already been loaded into the session and are strongly referenced, - using the default of `lazy='select'` is by far the most efficient way to go. - - * When using joined loading, the load of 100 objects will emit only one SQL statement. The join - will be a LEFT OUTER JOIN, and the total number of rows will be equal to 100 in all cases. - If you know that each parent definitely has a child (i.e. the foreign - key reference is NOT NULL), the joined load can be configured with - :paramref:`~.relationship.innerjoin` set to ``True``, which is - usually specified within the :func:`~sqlalchemy.orm.relationship`. For a load of objects where - there are many possible target references which may have not been loaded already, joined loading - with an INNER JOIN is extremely efficient. - - * Subquery loading will issue a second load for all the child objects, so for a load of 100 objects - there would be two SQL statements emitted. There's probably not much advantage here over - joined loading, however, except perhaps that subquery loading can use an INNER JOIN in all cases - whereas joined loading requires that the foreign key is NOT NULL. - -.. _joinedload_and_join: - -.. _contains_eager: - -Routing Explicit Joins/Statements into Eagerly Loaded Collections ------------------------------------------------------------------- - -The behavior of :func:`~sqlalchemy.orm.joinedload()` is such that joins are -created automatically, using anonymous aliases as targets, the results of which -are routed into collections and -scalar references on loaded objects. It is often the case that a query already -includes the necessary joins which represent a particular collection or scalar -reference, and the joins added by the joinedload feature are redundant - yet -you'd still like the collections/references to be populated. - -For this SQLAlchemy supplies the :func:`~sqlalchemy.orm.contains_eager()` -option. This option is used in the same manner as the -:func:`~sqlalchemy.orm.joinedload()` option except it is assumed that the -:class:`~sqlalchemy.orm.query.Query` will specify the appropriate joins -explicitly. Below, we specify a join between ``User`` and ``Address`` -and addtionally establish this as the basis for eager loading of ``User.addresses``:: - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - addresses = relationship("Address") - - class Address(Base): - __tablename__ = 'address' - - # ... - - q = session.query(User).join(User.addresses).\ - options(contains_eager(User.addresses)) - - -If the "eager" portion of the statement is "aliased", the ``alias`` keyword -argument to :func:`~sqlalchemy.orm.contains_eager` may be used to indicate it. -This is sent as a reference to an :func:`.aliased` or :class:`.Alias` -construct: - -.. sourcecode:: python+sql - - # use an alias of the Address entity - adalias = aliased(Address) - - # construct a Query object which expects the "addresses" results - query = session.query(User).\ - outerjoin(adalias, User.addresses).\ - options(contains_eager(User.addresses, alias=adalias)) - - # get results normally - {sql}r = query.all() - SELECT users.user_id AS users_user_id, users.user_name AS users_user_name, adalias.address_id AS adalias_address_id, - adalias.user_id AS adalias_user_id, adalias.email_address AS adalias_email_address, (...other columns...) - FROM users LEFT OUTER JOIN email_addresses AS email_addresses_1 ON users.user_id = email_addresses_1.user_id - -The path given as the argument to :func:`.contains_eager` needs -to be a full path from the starting entity. For example if we were loading -``Users->orders->Order->items->Item``, the string version would look like:: - - query(User).options(contains_eager('orders').contains_eager('items')) - -Or using the class-bound descriptor:: - - query(User).options(contains_eager(User.orders).contains_eager(Order.items)) - -Advanced Usage with Arbitrary Statements -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The ``alias`` argument can be more creatively used, in that it can be made -to represent any set of arbitrary names to match up into a statement. -Below it is linked to a :func:`.select` which links a set of column objects -to a string SQL statement:: - - # label the columns of the addresses table - eager_columns = select([ - addresses.c.address_id.label('a1'), - addresses.c.email_address.label('a2'), - addresses.c.user_id.label('a3')]) - - # select from a raw SQL statement which uses those label names for the - # addresses table. contains_eager() matches them up. - query = session.query(User).\ - from_statement("select users.*, addresses.address_id as a1, " - "addresses.email_address as a2, addresses.user_id as a3 " - "from users left outer join addresses on users.user_id=addresses.user_id").\ - options(contains_eager(User.addresses, alias=eager_columns)) - - - -Relationship Loader API ------------------------- - -.. autofunction:: contains_alias - -.. autofunction:: contains_eager - -.. autofunction:: defaultload - -.. autofunction:: eagerload - -.. autofunction:: eagerload_all - -.. autofunction:: immediateload - -.. autofunction:: joinedload - -.. autofunction:: joinedload_all - -.. autofunction:: lazyload - -.. autofunction:: noload - -.. autofunction:: subqueryload - -.. autofunction:: subqueryload_all +Moved! :doc:`/orm/loading_relationships`
\ No newline at end of file diff --git a/doc/build/orm/loading_columns.rst b/doc/build/orm/loading_columns.rst new file mode 100644 index 000000000..2d0f02ed5 --- /dev/null +++ b/doc/build/orm/loading_columns.rst @@ -0,0 +1,195 @@ +.. module:: sqlalchemy.orm + +=============== +Loading Columns +=============== + +This section presents additional options regarding the loading of columns. + +.. _deferred: + +Deferred Column Loading +======================== + +This feature allows particular columns of a table be loaded only +upon direct access, instead of when the entity is queried using +:class:`.Query`. This feature is useful when one wants to avoid +loading a large text or binary field into memory when it's not needed. +Individual columns can be lazy loaded by themselves or placed into groups that +lazy-load together, using the :func:`.orm.deferred` function to +mark them as "deferred". In the example below, we define a mapping that will load each of +``.excerpt`` and ``.photo`` in separate, individual-row SELECT statements when each +attribute is first referenced on the individual object instance:: + + from sqlalchemy.orm import deferred + from sqlalchemy import Integer, String, Text, Binary, Column + + class Book(Base): + __tablename__ = 'book' + + book_id = Column(Integer, primary_key=True) + title = Column(String(200), nullable=False) + summary = Column(String(2000)) + excerpt = deferred(Column(Text)) + photo = deferred(Column(Binary)) + +Classical mappings as always place the usage of :func:`.orm.deferred` in the +``properties`` dictionary against the table-bound :class:`.Column`:: + + mapper(Book, book_table, properties={ + 'photo':deferred(book_table.c.photo) + }) + +Deferred columns can be associated with a "group" name, so that they load +together when any of them are first accessed. The example below defines a +mapping with a ``photos`` deferred group. When one ``.photo`` is accessed, all three +photos will be loaded in one SELECT statement. The ``.excerpt`` will be loaded +separately when it is accessed:: + + class Book(Base): + __tablename__ = 'book' + + book_id = Column(Integer, primary_key=True) + title = Column(String(200), nullable=False) + summary = Column(String(2000)) + excerpt = deferred(Column(Text)) + photo1 = deferred(Column(Binary), group='photos') + photo2 = deferred(Column(Binary), group='photos') + photo3 = deferred(Column(Binary), group='photos') + +You can defer or undefer columns at the :class:`~sqlalchemy.orm.query.Query` +level using options, including :func:`.orm.defer` and :func:`.orm.undefer`:: + + from sqlalchemy.orm import defer, undefer + + query = session.query(Book) + query = query.options(defer('summary')) + query = query.options(undefer('excerpt')) + query.all() + +:func:`.orm.deferred` attributes which are marked with a "group" can be undeferred +using :func:`.orm.undefer_group`, sending in the group name:: + + from sqlalchemy.orm import undefer_group + + query = session.query(Book) + query.options(undefer_group('photos')).all() + +Load Only Cols +--------------- + +An arbitrary set of columns can be selected as "load only" columns, which will +be loaded while deferring all other columns on a given entity, using :func:`.orm.load_only`:: + + from sqlalchemy.orm import load_only + + session.query(Book).options(load_only("summary", "excerpt")) + +.. versionadded:: 0.9.0 + +Deferred Loading with Multiple Entities +--------------------------------------- + +To specify column deferral options within a :class:`.Query` that loads multiple types +of entity, the :class:`.Load` object can specify which parent entity to start with:: + + from sqlalchemy.orm import Load + + query = session.query(Book, Author).join(Book.author) + query = query.options( + Load(Book).load_only("summary", "excerpt"), + Load(Author).defer("bio") + ) + +To specify column deferral options along the path of various relationships, +the options support chaining, where the loading style of each relationship +is specified first, then is chained to the deferral options. Such as, to load +``Book`` instances, then joined-eager-load the ``Author``, then apply deferral +options to the ``Author`` entity:: + + from sqlalchemy.orm import joinedload + + query = session.query(Book) + query = query.options( + joinedload(Book.author).load_only("summary", "excerpt"), + ) + +In the case where the loading style of parent relationships should be left +unchanged, use :func:`.orm.defaultload`:: + + from sqlalchemy.orm import defaultload + + query = session.query(Book) + query = query.options( + defaultload(Book.author).load_only("summary", "excerpt"), + ) + +.. versionadded:: 0.9.0 support for :class:`.Load` and other options which + allow for better targeting of deferral options. + +Column Deferral API +------------------- + +.. autofunction:: deferred + +.. autofunction:: defer + +.. autofunction:: load_only + +.. autofunction:: undefer + +.. autofunction:: undefer_group + +.. _bundles: + +Column Bundles +=============== + +The :class:`.Bundle` may be used to query for groups of columns under one +namespace. + +.. versionadded:: 0.9.0 + +The bundle allows columns to be grouped together:: + + from sqlalchemy.orm import Bundle + + bn = Bundle('mybundle', MyClass.data1, MyClass.data2) + for row in session.query(bn).filter(bn.c.data1 == 'd1'): + print row.mybundle.data1, row.mybundle.data2 + +The bundle can be subclassed to provide custom behaviors when results +are fetched. The method :meth:`.Bundle.create_row_processor` is given +the :class:`.Query` and a set of "row processor" functions at query execution +time; these processor functions when given a result row will return the +individual attribute value, which can then be adapted into any kind of +return data structure. Below illustrates replacing the usual :class:`.KeyedTuple` +return structure with a straight Python dictionary:: + + from sqlalchemy.orm import Bundle + + class DictBundle(Bundle): + def create_row_processor(self, query, procs, labels): + """Override create_row_processor to return values as dictionaries""" + def proc(row): + return dict( + zip(labels, (proc(row) for proc in procs)) + ) + return proc + +.. versionchanged:: 1.0 + + The ``proc()`` callable passed to the ``create_row_processor()`` + method of custom :class:`.Bundle` classes now accepts only a single + "row" argument. + +A result from the above bundle will return dictionary values:: + + bn = DictBundle('mybundle', MyClass.data1, MyClass.data2) + for row in session.query(bn).filter(bn.c.data1 == 'd1'): + print row.mybundle['data1'], row.mybundle['data2'] + +The :class:`.Bundle` construct is also integrated into the behavior +of :func:`.composite`, where it is used to return composite attributes as objects +when queried as individual attributes. + diff --git a/doc/build/orm/loading_objects.rst b/doc/build/orm/loading_objects.rst new file mode 100644 index 000000000..e7eb95a3f --- /dev/null +++ b/doc/build/orm/loading_objects.rst @@ -0,0 +1,15 @@ +======================= +Loading Objects +======================= + +Notes and features regarding the general loading of mapped objects. + +For an in-depth introduction to querying with the SQLAlchemy ORM, please see the :ref:`ormtutorial_toplevel`. + +.. toctree:: + :maxdepth: 2 + + loading_columns + loading_relationships + constructors + query diff --git a/doc/build/orm/loading_relationships.rst b/doc/build/orm/loading_relationships.rst new file mode 100644 index 000000000..297392f3e --- /dev/null +++ b/doc/build/orm/loading_relationships.rst @@ -0,0 +1,622 @@ +.. _loading_toplevel: + +.. currentmodule:: sqlalchemy.orm + +Relationship Loading Techniques +=============================== + +A big part of SQLAlchemy is providing a wide range of control over how related objects get loaded when querying. This behavior +can be configured at mapper construction time using the ``lazy`` parameter to the :func:`.relationship` function, +as well as by using options with the :class:`.Query` object. + +Using Loader Strategies: Lazy Loading, Eager Loading +---------------------------------------------------- + +By default, all inter-object relationships are **lazy loading**. The scalar or +collection attribute associated with a :func:`~sqlalchemy.orm.relationship` +contains a trigger which fires the first time the attribute is accessed. This +trigger, in all but one case, issues a SQL call at the point of access +in order to load the related object or objects: + +.. sourcecode:: python+sql + + {sql}>>> jack.addresses + SELECT addresses.id AS addresses_id, addresses.email_address AS addresses_email_address, + addresses.user_id AS addresses_user_id + FROM addresses + WHERE ? = addresses.user_id + [5] + {stop}[<Address(u'jack@google.com')>, <Address(u'j25@yahoo.com')>] + +The one case where SQL is not emitted is for a simple many-to-one relationship, when +the related object can be identified by its primary key alone and that object is already +present in the current :class:`.Session`. + +This default behavior of "load upon attribute access" is known as "lazy" or +"select" loading - the name "select" because a "SELECT" statement is typically emitted +when the attribute is first accessed. + +In the :ref:`ormtutorial_toplevel`, we introduced the concept of **Eager +Loading**. We used an ``option`` in conjunction with the +:class:`~sqlalchemy.orm.query.Query` object in order to indicate that a +relationship should be loaded at the same time as the parent, within a single +SQL query. This option, known as :func:`.joinedload`, connects a JOIN (by default +a LEFT OUTER join) to the statement and populates the scalar/collection from the +same result set as that of the parent: + +.. sourcecode:: python+sql + + {sql}>>> jack = session.query(User).\ + ... options(joinedload('addresses')).\ + ... filter_by(name='jack').all() #doctest: +NORMALIZE_WHITESPACE + SELECT addresses_1.id AS addresses_1_id, addresses_1.email_address AS addresses_1_email_address, + addresses_1.user_id AS addresses_1_user_id, users.id AS users_id, users.name AS users_name, + users.fullname AS users_fullname, users.password AS users_password + FROM users LEFT OUTER JOIN addresses AS addresses_1 ON users.id = addresses_1.user_id + WHERE users.name = ? + ['jack'] + + +In addition to "joined eager loading", a second option for eager loading +exists, called "subquery eager loading". This kind of eager loading emits an +additional SQL statement for each collection requested, aggregated across all +parent objects: + +.. sourcecode:: python+sql + + {sql}>>> jack = session.query(User).\ + ... options(subqueryload('addresses')).\ + ... filter_by(name='jack').all() + SELECT users.id AS users_id, users.name AS users_name, users.fullname AS users_fullname, + users.password AS users_password + FROM users + WHERE users.name = ? + ('jack',) + SELECT addresses.id AS addresses_id, addresses.email_address AS addresses_email_address, + addresses.user_id AS addresses_user_id, anon_1.users_id AS anon_1_users_id + FROM (SELECT users.id AS users_id + FROM users + WHERE users.name = ?) AS anon_1 JOIN addresses ON anon_1.users_id = addresses.user_id + ORDER BY anon_1.users_id, addresses.id + ('jack',) + +The default **loader strategy** for any :func:`~sqlalchemy.orm.relationship` +is configured by the ``lazy`` keyword argument, which defaults to ``select`` - this indicates +a "select" statement . +Below we set it as ``joined`` so that the ``children`` relationship is eager +loaded using a JOIN:: + + # load the 'children' collection using LEFT OUTER JOIN + class Parent(Base): + __tablename__ = 'parent' + + id = Column(Integer, primary_key=True) + children = relationship("Child", lazy='joined') + +We can also set it to eagerly load using a second query for all collections, +using ``subquery``:: + + # load the 'children' collection using a second query which + # JOINS to a subquery of the original + class Parent(Base): + __tablename__ = 'parent' + + id = Column(Integer, primary_key=True) + children = relationship("Child", lazy='subquery') + +When querying, all three choices of loader strategy are available on a +per-query basis, using the :func:`~sqlalchemy.orm.joinedload`, +:func:`~sqlalchemy.orm.subqueryload` and :func:`~sqlalchemy.orm.lazyload` +query options: + +.. sourcecode:: python+sql + + # set children to load lazily + session.query(Parent).options(lazyload('children')).all() + + # set children to load eagerly with a join + session.query(Parent).options(joinedload('children')).all() + + # set children to load eagerly with a second statement + session.query(Parent).options(subqueryload('children')).all() + +.. _subqueryload_ordering: + +The Importance of Ordering +-------------------------- + +A query which makes use of :func:`.subqueryload` in conjunction with a +limiting modifier such as :meth:`.Query.first`, :meth:`.Query.limit`, +or :meth:`.Query.offset` should **always** include :meth:`.Query.order_by` +against unique column(s) such as the primary key, so that the additional queries +emitted by :func:`.subqueryload` include +the same ordering as used by the parent query. Without it, there is a chance +that the inner query could return the wrong rows:: + + # incorrect, no ORDER BY + session.query(User).options(subqueryload(User.addresses)).first() + + # incorrect if User.name is not unique + session.query(User).options(subqueryload(User.addresses)).order_by(User.name).first() + + # correct + session.query(User).options(subqueryload(User.addresses)).order_by(User.name, User.id).first() + +.. seealso:: + + :ref:`faq_subqueryload_limit_sort` - detailed example + +Loading Along Paths +------------------- + +To reference a relationship that is deeper than one level, method chaining +may be used. The object returned by all loader options is an instance of +the :class:`.Load` class, which provides a so-called "generative" interface:: + + session.query(Parent).options( + joinedload('foo'). + joinedload('bar'). + joinedload('bat') + ).all() + +Using method chaining, the loader style of each link in the path is explicitly +stated. To navigate along a path without changing the existing loader style +of a particular attribute, the :func:`.defaultload` method/function may be used:: + + session.query(A).options( + defaultload("atob").joinedload("btoc") + ).all() + +.. versionchanged:: 0.9.0 + The previous approach of specifying dot-separated paths within loader + options has been superseded by the less ambiguous approach of the + :class:`.Load` object and related methods. With this system, the user + specifies the style of loading for each link along the chain explicitly, + rather than guessing between options like ``joinedload()`` vs. ``joinedload_all()``. + The :func:`.orm.defaultload` is provided to allow path navigation without + modification of existing loader options. The dot-separated path system + as well as the ``_all()`` functions will remain available for backwards- + compatibility indefinitely. + +Default Loading Strategies +-------------------------- + +.. versionadded:: 0.7.5 + Default loader strategies as a new feature. + +Each of :func:`.joinedload`, :func:`.subqueryload`, :func:`.lazyload`, +and :func:`.noload` can be used to set the default style of +:func:`.relationship` loading +for a particular query, affecting all :func:`.relationship` -mapped +attributes not otherwise +specified in the :class:`.Query`. This feature is available by passing +the string ``'*'`` as the argument to any of these options:: + + session.query(MyClass).options(lazyload('*')) + +Above, the ``lazyload('*')`` option will supersede the ``lazy`` setting +of all :func:`.relationship` constructs in use for that query, +except for those which use the ``'dynamic'`` style of loading. +If some relationships specify +``lazy='joined'`` or ``lazy='subquery'``, for example, +using ``lazyload('*')`` will unilaterally +cause all those relationships to use ``'select'`` loading, e.g. emit a +SELECT statement when each attribute is accessed. + +The option does not supersede loader options stated in the +query, such as :func:`.eagerload`, +:func:`.subqueryload`, etc. The query below will still use joined loading +for the ``widget`` relationship:: + + session.query(MyClass).options( + lazyload('*'), + joinedload(MyClass.widget) + ) + +If multiple ``'*'`` options are passed, the last one overrides +those previously passed. + +Per-Entity Default Loading Strategies +------------------------------------- + +.. versionadded:: 0.9.0 + Per-entity default loader strategies. + +A variant of the default loader strategy is the ability to set the strategy +on a per-entity basis. For example, if querying for ``User`` and ``Address``, +we can instruct all relationships on ``Address`` only to use lazy loading +by first applying the :class:`.Load` object, then specifying the ``*`` as a +chained option:: + + session.query(User, Address).options(Load(Address).lazyload('*')) + +Above, all relationships on ``Address`` will be set to a lazy load. + +.. _zen_of_eager_loading: + +The Zen of Eager Loading +------------------------- + +The philosophy behind loader strategies is that any set of loading schemes can be +applied to a particular query, and *the results don't change* - only the number +of SQL statements required to fully load related objects and collections changes. A particular +query might start out using all lazy loads. After using it in context, it might be revealed +that particular attributes or collections are always accessed, and that it would be more +efficient to change the loader strategy for these. The strategy can be changed with no other +modifications to the query, the results will remain identical, but fewer SQL statements would be emitted. +In theory (and pretty much in practice), nothing you can do to the :class:`.Query` would make it load +a different set of primary or related objects based on a change in loader strategy. + +How :func:`joinedload` in particular achieves this result of not impacting +entity rows returned in any way is that it creates an anonymous alias of the joins it adds to your +query, so that they can't be referenced by other parts of the query. For example, +the query below uses :func:`.joinedload` to create a LEFT OUTER JOIN from ``users`` +to ``addresses``, however the ``ORDER BY`` added against ``Address.email_address`` +is not valid - the ``Address`` entity is not named in the query: + +.. sourcecode:: python+sql + + >>> jack = session.query(User).\ + ... options(joinedload(User.addresses)).\ + ... filter(User.name=='jack').\ + ... order_by(Address.email_address).all() + {opensql}SELECT addresses_1.id AS addresses_1_id, addresses_1.email_address AS addresses_1_email_address, + addresses_1.user_id AS addresses_1_user_id, users.id AS users_id, users.name AS users_name, + users.fullname AS users_fullname, users.password AS users_password + FROM users LEFT OUTER JOIN addresses AS addresses_1 ON users.id = addresses_1.user_id + WHERE users.name = ? ORDER BY addresses.email_address <-- this part is wrong ! + ['jack'] + +Above, ``ORDER BY addresses.email_address`` is not valid since ``addresses`` is not in the +FROM list. The correct way to load the ``User`` records and order by email +address is to use :meth:`.Query.join`: + +.. sourcecode:: python+sql + + >>> jack = session.query(User).\ + ... join(User.addresses).\ + ... filter(User.name=='jack').\ + ... order_by(Address.email_address).all() + {opensql} + SELECT users.id AS users_id, users.name AS users_name, + users.fullname AS users_fullname, users.password AS users_password + FROM users JOIN addresses ON users.id = addresses.user_id + WHERE users.name = ? ORDER BY addresses.email_address + ['jack'] + +The statement above is of course not the same as the previous one, in that the columns from ``addresses`` +are not included in the result at all. We can add :func:`.joinedload` back in, so that +there are two joins - one is that which we are ordering on, the other is used anonymously to +load the contents of the ``User.addresses`` collection: + +.. sourcecode:: python+sql + + >>> jack = session.query(User).\ + ... join(User.addresses).\ + ... options(joinedload(User.addresses)).\ + ... filter(User.name=='jack').\ + ... order_by(Address.email_address).all() + {opensql}SELECT addresses_1.id AS addresses_1_id, addresses_1.email_address AS addresses_1_email_address, + addresses_1.user_id AS addresses_1_user_id, users.id AS users_id, users.name AS users_name, + users.fullname AS users_fullname, users.password AS users_password + FROM users JOIN addresses ON users.id = addresses.user_id + LEFT OUTER JOIN addresses AS addresses_1 ON users.id = addresses_1.user_id + WHERE users.name = ? ORDER BY addresses.email_address + ['jack'] + +What we see above is that our usage of :meth:`.Query.join` is to supply JOIN clauses we'd like +to use in subsequent query criterion, whereas our usage of :func:`.joinedload` only concerns +itself with the loading of the ``User.addresses`` collection, for each ``User`` in the result. +In this case, the two joins most probably appear redundant - which they are. If we +wanted to use just one JOIN for collection loading as well as ordering, we use the +:func:`.contains_eager` option, described in :ref:`contains_eager` below. But +to see why :func:`joinedload` does what it does, consider if we were **filtering** on a +particular ``Address``: + +.. sourcecode:: python+sql + + >>> jack = session.query(User).\ + ... join(User.addresses).\ + ... options(joinedload(User.addresses)).\ + ... filter(User.name=='jack').\ + ... filter(Address.email_address=='someaddress@foo.com').\ + ... all() + {opensql}SELECT addresses_1.id AS addresses_1_id, addresses_1.email_address AS addresses_1_email_address, + addresses_1.user_id AS addresses_1_user_id, users.id AS users_id, users.name AS users_name, + users.fullname AS users_fullname, users.password AS users_password + FROM users JOIN addresses ON users.id = addresses.user_id + LEFT OUTER JOIN addresses AS addresses_1 ON users.id = addresses_1.user_id + WHERE users.name = ? AND addresses.email_address = ? + ['jack', 'someaddress@foo.com'] + +Above, we can see that the two JOINs have very different roles. One will match exactly +one row, that of the join of ``User`` and ``Address`` where ``Address.email_address=='someaddress@foo.com'``. +The other LEFT OUTER JOIN will match *all* ``Address`` rows related to ``User``, +and is only used to populate the ``User.addresses`` collection, for those ``User`` objects +that are returned. + +By changing the usage of :func:`.joinedload` to another style of loading, we can change +how the collection is loaded completely independently of SQL used to retrieve +the actual ``User`` rows we want. Below we change :func:`.joinedload` into +:func:`.subqueryload`: + +.. sourcecode:: python+sql + + >>> jack = session.query(User).\ + ... join(User.addresses).\ + ... options(subqueryload(User.addresses)).\ + ... filter(User.name=='jack').\ + ... filter(Address.email_address=='someaddress@foo.com').\ + ... all() + {opensql}SELECT users.id AS users_id, users.name AS users_name, + users.fullname AS users_fullname, users.password AS users_password + FROM users JOIN addresses ON users.id = addresses.user_id + WHERE users.name = ? AND addresses.email_address = ? + ['jack', 'someaddress@foo.com'] + + # ... subqueryload() emits a SELECT in order + # to load all address records ... + +When using joined eager loading, if the +query contains a modifier that impacts the rows returned +externally to the joins, such as when using DISTINCT, LIMIT, OFFSET +or equivalent, the completed statement is first +wrapped inside a subquery, and the joins used specifically for joined eager +loading are applied to the subquery. SQLAlchemy's +joined eager loading goes the extra mile, and then ten miles further, to +absolutely ensure that it does not affect the end result of the query, only +the way collections and related objects are loaded, no matter what the format of the query is. + +.. _what_kind_of_loading: + +What Kind of Loading to Use ? +----------------------------- + +Which type of loading to use typically comes down to optimizing the tradeoff +between number of SQL executions, complexity of SQL emitted, and amount of +data fetched. Lets take two examples, a :func:`~sqlalchemy.orm.relationship` +which references a collection, and a :func:`~sqlalchemy.orm.relationship` that +references a scalar many-to-one reference. + +* One to Many Collection + + * When using the default lazy loading, if you load 100 objects, and then access a collection on each of + them, a total of 101 SQL statements will be emitted, although each statement will typically be a + simple SELECT without any joins. + + * When using joined loading, the load of 100 objects and their collections will emit only one SQL + statement. However, the + total number of rows fetched will be equal to the sum of the size of all the collections, plus one + extra row for each parent object that has an empty collection. Each row will also contain the full + set of columns represented by the parents, repeated for each collection item - SQLAlchemy does not + re-fetch these columns other than those of the primary key, however most DBAPIs (with some + exceptions) will transmit the full data of each parent over the wire to the client connection in + any case. Therefore joined eager loading only makes sense when the size of the collections are + relatively small. The LEFT OUTER JOIN can also be performance intensive compared to an INNER join. + + * When using subquery loading, the load of 100 objects will emit two SQL statements. The second + statement will fetch a total number of rows equal to the sum of the size of all collections. An + INNER JOIN is used, and a minimum of parent columns are requested, only the primary keys. So a + subquery load makes sense when the collections are larger. + + * When multiple levels of depth are used with joined or subquery loading, loading collections-within- + collections will multiply the total number of rows fetched in a cartesian fashion. Both forms + of eager loading always join from the original parent class. + +* Many to One Reference + + * When using the default lazy loading, a load of 100 objects will like in the case of the collection + emit as many as 101 SQL statements. However - there is a significant exception to this, in that + if the many-to-one reference is a simple foreign key reference to the target's primary key, each + reference will be checked first in the current identity map using :meth:`.Query.get`. So here, + if the collection of objects references a relatively small set of target objects, or the full set + of possible target objects have already been loaded into the session and are strongly referenced, + using the default of `lazy='select'` is by far the most efficient way to go. + + * When using joined loading, the load of 100 objects will emit only one SQL statement. The join + will be a LEFT OUTER JOIN, and the total number of rows will be equal to 100 in all cases. + If you know that each parent definitely has a child (i.e. the foreign + key reference is NOT NULL), the joined load can be configured with + :paramref:`~.relationship.innerjoin` set to ``True``, which is + usually specified within the :func:`~sqlalchemy.orm.relationship`. For a load of objects where + there are many possible target references which may have not been loaded already, joined loading + with an INNER JOIN is extremely efficient. + + * Subquery loading will issue a second load for all the child objects, so for a load of 100 objects + there would be two SQL statements emitted. There's probably not much advantage here over + joined loading, however, except perhaps that subquery loading can use an INNER JOIN in all cases + whereas joined loading requires that the foreign key is NOT NULL. + +.. _joinedload_and_join: + +.. _contains_eager: + +Routing Explicit Joins/Statements into Eagerly Loaded Collections +------------------------------------------------------------------ + +The behavior of :func:`~sqlalchemy.orm.joinedload()` is such that joins are +created automatically, using anonymous aliases as targets, the results of which +are routed into collections and +scalar references on loaded objects. It is often the case that a query already +includes the necessary joins which represent a particular collection or scalar +reference, and the joins added by the joinedload feature are redundant - yet +you'd still like the collections/references to be populated. + +For this SQLAlchemy supplies the :func:`~sqlalchemy.orm.contains_eager()` +option. This option is used in the same manner as the +:func:`~sqlalchemy.orm.joinedload()` option except it is assumed that the +:class:`~sqlalchemy.orm.query.Query` will specify the appropriate joins +explicitly. Below, we specify a join between ``User`` and ``Address`` +and addtionally establish this as the basis for eager loading of ``User.addresses``:: + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + addresses = relationship("Address") + + class Address(Base): + __tablename__ = 'address' + + # ... + + q = session.query(User).join(User.addresses).\ + options(contains_eager(User.addresses)) + + +If the "eager" portion of the statement is "aliased", the ``alias`` keyword +argument to :func:`~sqlalchemy.orm.contains_eager` may be used to indicate it. +This is sent as a reference to an :func:`.aliased` or :class:`.Alias` +construct: + +.. sourcecode:: python+sql + + # use an alias of the Address entity + adalias = aliased(Address) + + # construct a Query object which expects the "addresses" results + query = session.query(User).\ + outerjoin(adalias, User.addresses).\ + options(contains_eager(User.addresses, alias=adalias)) + + # get results normally + {sql}r = query.all() + SELECT users.user_id AS users_user_id, users.user_name AS users_user_name, adalias.address_id AS adalias_address_id, + adalias.user_id AS adalias_user_id, adalias.email_address AS adalias_email_address, (...other columns...) + FROM users LEFT OUTER JOIN email_addresses AS email_addresses_1 ON users.user_id = email_addresses_1.user_id + +The path given as the argument to :func:`.contains_eager` needs +to be a full path from the starting entity. For example if we were loading +``Users->orders->Order->items->Item``, the string version would look like:: + + query(User).options(contains_eager('orders').contains_eager('items')) + +Or using the class-bound descriptor:: + + query(User).options(contains_eager(User.orders).contains_eager(Order.items)) + +Advanced Usage with Arbitrary Statements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``alias`` argument can be more creatively used, in that it can be made +to represent any set of arbitrary names to match up into a statement. +Below it is linked to a :func:`.select` which links a set of column objects +to a string SQL statement:: + + # label the columns of the addresses table + eager_columns = select([ + addresses.c.address_id.label('a1'), + addresses.c.email_address.label('a2'), + addresses.c.user_id.label('a3')]) + + # select from a raw SQL statement which uses those label names for the + # addresses table. contains_eager() matches them up. + query = session.query(User).\ + from_statement("select users.*, addresses.address_id as a1, " + "addresses.email_address as a2, addresses.user_id as a3 " + "from users left outer join addresses on users.user_id=addresses.user_id").\ + options(contains_eager(User.addresses, alias=eager_columns)) + +Creating Custom Load Rules +--------------------------- + +.. warning:: This is an advanced technique! Great care and testing + should be applied. + +The ORM has various edge cases where the value of an attribute is locally +available, however the ORM itself doesn't have awareness of this. There +are also cases when a user-defined system of loading attributes is desirable. +To support the use case of user-defined loading systems, a key function +:func:`.attributes.set_committed_value` is provided. This function is +basically equivalent to Python's own ``setattr()`` function, except that +when applied to a target object, SQLAlchemy's "attribute history" system +which is used to determine flush-time changes is bypassed; the attribute +is assigned in the same way as if the ORM loaded it that way from the database. + +The use of :func:`.attributes.set_committed_value` can be combined with another +key event known as :meth:`.InstanceEvents.load` to produce attribute-population +behaviors when an object is loaded. One such example is the bi-directional +"one-to-one" case, where loading the "many-to-one" side of a one-to-one +should also imply the value of the "one-to-many" side. The SQLAlchemy ORM +does not consider backrefs when loading related objects, and it views a +"one-to-one" as just another "one-to-many", that just happens to be one +row. + +Given the following mapping:: + + from sqlalchemy import Integer, ForeignKey, Column + from sqlalchemy.orm import relationship, backref + from sqlalchemy.ext.declarative import declarative_base + + Base = declarative_base() + + + class A(Base): + __tablename__ = 'a' + id = Column(Integer, primary_key=True) + b_id = Column(ForeignKey('b.id')) + b = relationship("B", backref=backref("a", uselist=False), lazy='joined') + + + class B(Base): + __tablename__ = 'b' + id = Column(Integer, primary_key=True) + + +If we query for an ``A`` row, and then ask it for ``a.b.a``, we will get +an extra SELECT:: + + >>> a1.b.a + SELECT a.id AS a_id, a.b_id AS a_b_id + FROM a + WHERE ? = a.b_id + +This SELECT is redundant becasue ``b.a`` is the same value as ``a1``. We +can create an on-load rule to populate this for us:: + + from sqlalchemy import event + from sqlalchemy.orm import attributes + + @event.listens_for(A, "load") + def load_b(target, context): + if 'b' in target.__dict__: + attributes.set_committed_value(target.b, 'a', target) + +Now when we query for ``A``, we will get ``A.b`` from the joined eager load, +and ``A.b.a`` from our event: + +.. sourcecode:: pycon+sql + + {sql}a1 = s.query(A).first() + SELECT a.id AS a_id, a.b_id AS a_b_id, b_1.id AS b_1_id + FROM a LEFT OUTER JOIN b AS b_1 ON b_1.id = a.b_id + LIMIT ? OFFSET ? + (1, 0) + {stop}assert a1.b.a is a1 + + +Relationship Loader API +------------------------ + +.. autofunction:: contains_alias + +.. autofunction:: contains_eager + +.. autofunction:: defaultload + +.. autofunction:: eagerload + +.. autofunction:: eagerload_all + +.. autofunction:: immediateload + +.. autofunction:: joinedload + +.. autofunction:: joinedload_all + +.. autofunction:: lazyload + +.. autofunction:: noload + +.. autofunction:: subqueryload + +.. autofunction:: subqueryload_all diff --git a/doc/build/orm/mapped_attributes.rst b/doc/build/orm/mapped_attributes.rst new file mode 100644 index 000000000..2e7e9b3eb --- /dev/null +++ b/doc/build/orm/mapped_attributes.rst @@ -0,0 +1,340 @@ +.. module:: sqlalchemy.orm + +Changing Attribute Behavior +============================ + +.. _simple_validators: + +Simple Validators +----------------- + +A quick way to add a "validation" routine to an attribute is to use the +:func:`~sqlalchemy.orm.validates` decorator. An attribute validator can raise +an exception, halting the process of mutating the attribute's value, or can +change the given value into something different. Validators, like all +attribute extensions, are only called by normal userland code; they are not +issued when the ORM is populating the object:: + + from sqlalchemy.orm import validates + + class EmailAddress(Base): + __tablename__ = 'address' + + id = Column(Integer, primary_key=True) + email = Column(String) + + @validates('email') + def validate_email(self, key, address): + assert '@' in address + return address + +.. versionchanged:: 1.0.0 - validators are no longer triggered within + the flush process when the newly fetched values for primary key + columns as well as some python- or server-side defaults are fetched. + Prior to 1.0, validators may be triggered in those cases as well. + + +Validators also receive collection append events, when items are added to a +collection:: + + from sqlalchemy.orm import validates + + class User(Base): + # ... + + addresses = relationship("Address") + + @validates('addresses') + def validate_address(self, key, address): + assert '@' in address.email + return address + + +The validation function by default does not get emitted for collection +remove events, as the typical expectation is that a value being discarded +doesn't require validation. However, :func:`.validates` supports reception +of these events by specifying ``include_removes=True`` to the decorator. When +this flag is set, the validation function must receive an additional boolean +argument which if ``True`` indicates that the operation is a removal:: + + from sqlalchemy.orm import validates + + class User(Base): + # ... + + addresses = relationship("Address") + + @validates('addresses', include_removes=True) + def validate_address(self, key, address, is_remove): + if is_remove: + raise ValueError( + "not allowed to remove items from the collection") + else: + assert '@' in address.email + return address + +The case where mutually dependent validators are linked via a backref +can also be tailored, using the ``include_backrefs=False`` option; this option, +when set to ``False``, prevents a validation function from emitting if the +event occurs as a result of a backref:: + + from sqlalchemy.orm import validates + + class User(Base): + # ... + + addresses = relationship("Address", backref='user') + + @validates('addresses', include_backrefs=False) + def validate_address(self, key, address): + assert '@' in address.email + return address + +Above, if we were to assign to ``Address.user`` as in ``some_address.user = some_user``, +the ``validate_address()`` function would *not* be emitted, even though an append +occurs to ``some_user.addresses`` - the event is caused by a backref. + +Note that the :func:`~.validates` decorator is a convenience function built on +top of attribute events. An application that requires more control over +configuration of attribute change behavior can make use of this system, +described at :class:`~.AttributeEvents`. + +.. autofunction:: validates + +.. _mapper_hybrids: + +Using Descriptors and Hybrids +----------------------------- + +A more comprehensive way to produce modified behavior for an attribute is to +use :term:`descriptors`. These are commonly used in Python using the ``property()`` +function. The standard SQLAlchemy technique for descriptors is to create a +plain descriptor, and to have it read/write from a mapped attribute with a +different name. Below we illustrate this using Python 2.6-style properties:: + + class EmailAddress(Base): + __tablename__ = 'email_address' + + id = Column(Integer, primary_key=True) + + # name the attribute with an underscore, + # different from the column name + _email = Column("email", String) + + # then create an ".email" attribute + # to get/set "._email" + @property + def email(self): + return self._email + + @email.setter + def email(self, email): + self._email = email + +The approach above will work, but there's more we can add. While our +``EmailAddress`` object will shuttle the value through the ``email`` +descriptor and into the ``_email`` mapped attribute, the class level +``EmailAddress.email`` attribute does not have the usual expression semantics +usable with :class:`.Query`. To provide these, we instead use the +:mod:`~sqlalchemy.ext.hybrid` extension as follows:: + + from sqlalchemy.ext.hybrid import hybrid_property + + class EmailAddress(Base): + __tablename__ = 'email_address' + + id = Column(Integer, primary_key=True) + + _email = Column("email", String) + + @hybrid_property + def email(self): + return self._email + + @email.setter + def email(self, email): + self._email = email + +The ``.email`` attribute, in addition to providing getter/setter behavior when we have an +instance of ``EmailAddress``, also provides a SQL expression when used at the class level, +that is, from the ``EmailAddress`` class directly: + +.. sourcecode:: python+sql + + from sqlalchemy.orm import Session + session = Session() + + {sql}address = session.query(EmailAddress).\ + filter(EmailAddress.email == 'address@example.com').\ + one() + SELECT address.email AS address_email, address.id AS address_id + FROM address + WHERE address.email = ? + ('address@example.com',) + {stop} + + address.email = 'otheraddress@example.com' + {sql}session.commit() + UPDATE address SET email=? WHERE address.id = ? + ('otheraddress@example.com', 1) + COMMIT + {stop} + +The :class:`~.hybrid_property` also allows us to change the behavior of the +attribute, including defining separate behaviors when the attribute is +accessed at the instance level versus at the class/expression level, using the +:meth:`.hybrid_property.expression` modifier. Such as, if we wanted to add a +host name automatically, we might define two sets of string manipulation +logic:: + + class EmailAddress(Base): + __tablename__ = 'email_address' + + id = Column(Integer, primary_key=True) + + _email = Column("email", String) + + @hybrid_property + def email(self): + """Return the value of _email up until the last twelve + characters.""" + + return self._email[:-12] + + @email.setter + def email(self, email): + """Set the value of _email, tacking on the twelve character + value @example.com.""" + + self._email = email + "@example.com" + + @email.expression + def email(cls): + """Produce a SQL expression that represents the value + of the _email column, minus the last twelve characters.""" + + return func.substr(cls._email, 0, func.length(cls._email) - 12) + +Above, accessing the ``email`` property of an instance of ``EmailAddress`` +will return the value of the ``_email`` attribute, removing or adding the +hostname ``@example.com`` from the value. When we query against the ``email`` +attribute, a SQL function is rendered which produces the same effect: + +.. sourcecode:: python+sql + + {sql}address = session.query(EmailAddress).filter(EmailAddress.email == 'address').one() + SELECT address.email AS address_email, address.id AS address_id + FROM address + WHERE substr(address.email, ?, length(address.email) - ?) = ? + (0, 12, 'address') + {stop} + +Read more about Hybrids at :ref:`hybrids_toplevel`. + +.. _synonyms: + +Synonyms +-------- + +Synonyms are a mapper-level construct that allow any attribute on a class +to "mirror" another attribute that is mapped. + +In the most basic sense, the synonym is an easy way to make a certain +attribute available by an additional name:: + + class MyClass(Base): + __tablename__ = 'my_table' + + id = Column(Integer, primary_key=True) + job_status = Column(String(50)) + + status = synonym("job_status") + +The above class ``MyClass`` has two attributes, ``.job_status`` and +``.status`` that will behave as one attribute, both at the expression +level:: + + >>> print MyClass.job_status == 'some_status' + my_table.job_status = :job_status_1 + + >>> print MyClass.status == 'some_status' + my_table.job_status = :job_status_1 + +and at the instance level:: + + >>> m1 = MyClass(status='x') + >>> m1.status, m1.job_status + ('x', 'x') + + >>> m1.job_status = 'y' + >>> m1.status, m1.job_status + ('y', 'y') + +The :func:`.synonym` can be used for any kind of mapped attribute that +subclasses :class:`.MapperProperty`, including mapped columns and relationships, +as well as synonyms themselves. + +Beyond a simple mirror, :func:`.synonym` can also be made to reference +a user-defined :term:`descriptor`. We can supply our +``status`` synonym with a ``@property``:: + + class MyClass(Base): + __tablename__ = 'my_table' + + id = Column(Integer, primary_key=True) + status = Column(String(50)) + + @property + def job_status(self): + return "Status: " + self.status + + job_status = synonym("status", descriptor=job_status) + +When using Declarative, the above pattern can be expressed more succinctly +using the :func:`.synonym_for` decorator:: + + from sqlalchemy.ext.declarative import synonym_for + + class MyClass(Base): + __tablename__ = 'my_table' + + id = Column(Integer, primary_key=True) + status = Column(String(50)) + + @synonym_for("status") + @property + def job_status(self): + return "Status: " + self.status + +While the :func:`.synonym` is useful for simple mirroring, the use case +of augmenting attribute behavior with descriptors is better handled in modern +usage using the :ref:`hybrid attribute <mapper_hybrids>` feature, which +is more oriented towards Python descriptors. Technically, a :func:`.synonym` +can do everything that a :class:`.hybrid_property` can do, as it also supports +injection of custom SQL capabilities, but the hybrid is more straightforward +to use in more complex situations. + +.. autofunction:: synonym + +.. _custom_comparators: + +Operator Customization +---------------------- + +The "operators" used by the SQLAlchemy ORM and Core expression language +are fully customizable. For example, the comparison expression +``User.name == 'ed'`` makes usage of an operator built into Python +itself called ``operator.eq`` - the actual SQL construct which SQLAlchemy +associates with such an operator can be modified. New +operations can be associated with column expressions as well. The operators +which take place for column expressions are most directly redefined at the +type level - see the +section :ref:`types_operators` for a description. + +ORM level functions like :func:`.column_property`, :func:`.relationship`, +and :func:`.composite` also provide for operator redefinition at the ORM +level, by passing a :class:`.PropComparator` subclass to the ``comparator_factory`` +argument of each function. Customization of operators at this level is a +rare use case. See the documentation at :class:`.PropComparator` +for an overview. + diff --git a/doc/build/orm/mapped_sql_expr.rst b/doc/build/orm/mapped_sql_expr.rst new file mode 100644 index 000000000..1ae5b1285 --- /dev/null +++ b/doc/build/orm/mapped_sql_expr.rst @@ -0,0 +1,208 @@ +.. module:: sqlalchemy.orm + +.. _mapper_sql_expressions: + +SQL Expressions as Mapped Attributes +===================================== + +Attributes on a mapped class can be linked to SQL expressions, which can +be used in queries. + +Using a Hybrid +-------------- + +The easiest and most flexible way to link relatively simple SQL expressions to a class is to use a so-called +"hybrid attribute", +described in the section :ref:`hybrids_toplevel`. The hybrid provides +for an expression that works at both the Python level as well as at the +SQL expression level. For example, below we map a class ``User``, +containing attributes ``firstname`` and ``lastname``, and include a hybrid that +will provide for us the ``fullname``, which is the string concatenation of the two:: + + from sqlalchemy.ext.hybrid import hybrid_property + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + firstname = Column(String(50)) + lastname = Column(String(50)) + + @hybrid_property + def fullname(self): + return self.firstname + " " + self.lastname + +Above, the ``fullname`` attribute is interpreted at both the instance and +class level, so that it is available from an instance:: + + some_user = session.query(User).first() + print some_user.fullname + +as well as usable wtihin queries:: + + some_user = session.query(User).filter(User.fullname == "John Smith").first() + +The string concatenation example is a simple one, where the Python expression +can be dual purposed at the instance and class level. Often, the SQL expression +must be distinguished from the Python expression, which can be achieved using +:meth:`.hybrid_property.expression`. Below we illustrate the case where a conditional +needs to be present inside the hybrid, using the ``if`` statement in Python and the +:func:`.sql.expression.case` construct for SQL expressions:: + + from sqlalchemy.ext.hybrid import hybrid_property + from sqlalchemy.sql import case + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + firstname = Column(String(50)) + lastname = Column(String(50)) + + @hybrid_property + def fullname(self): + if self.firstname is not None: + return self.firstname + " " + self.lastname + else: + return self.lastname + + @fullname.expression + def fullname(cls): + return case([ + (cls.firstname != None, cls.firstname + " " + cls.lastname), + ], else_ = cls.lastname) + +.. _mapper_column_property_sql_expressions: + +Using column_property +--------------------- + +The :func:`.orm.column_property` function can be used to map a SQL +expression in a manner similar to a regularly mapped :class:`.Column`. +With this technique, the attribute is loaded +along with all other column-mapped attributes at load time. This is in some +cases an advantage over the usage of hybrids, as the value can be loaded +up front at the same time as the parent row of the object, particularly if +the expression is one which links to other tables (typically as a correlated +subquery) to access data that wouldn't normally be +available on an already loaded object. + +Disadvantages to using :func:`.orm.column_property` for SQL expressions include that +the expression must be compatible with the SELECT statement emitted for the class +as a whole, and there are also some configurational quirks which can occur +when using :func:`.orm.column_property` from declarative mixins. + +Our "fullname" example can be expressed using :func:`.orm.column_property` as +follows:: + + from sqlalchemy.orm import column_property + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + firstname = Column(String(50)) + lastname = Column(String(50)) + fullname = column_property(firstname + " " + lastname) + +Correlated subqueries may be used as well. Below we use the :func:`.select` +construct to create a SELECT that links together the count of ``Address`` +objects available for a particular ``User``:: + + from sqlalchemy.orm import column_property + from sqlalchemy import select, func + from sqlalchemy import Column, Integer, String, ForeignKey + + from sqlalchemy.ext.declarative import declarative_base + + Base = declarative_base() + + class Address(Base): + __tablename__ = 'address' + id = Column(Integer, primary_key=True) + user_id = Column(Integer, ForeignKey('user.id')) + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + address_count = column_property( + select([func.count(Address.id)]).\ + where(Address.user_id==id).\ + correlate_except(Address) + ) + +In the above example, we define a :func:`.select` construct like the following:: + + select([func.count(Address.id)]).\ + where(Address.user_id==id).\ + correlate_except(Address) + +The meaning of the above statement is, select the count of ``Address.id`` rows +where the ``Address.user_id`` column is equated to ``id``, which in the context +of the ``User`` class is the :class:`.Column` named ``id`` (note that ``id`` is +also the name of a Python built in function, which is not what we want to use +here - if we were outside of the ``User`` class definition, we'd use ``User.id``). + +The :meth:`.select.correlate_except` directive indicates that each element in the +FROM clause of this :func:`.select` may be omitted from the FROM list (that is, correlated +to the enclosing SELECT statement against ``User``) except for the one corresponding +to ``Address``. This isn't strictly necessary, but prevents ``Address`` from +being inadvertently omitted from the FROM list in the case of a long string +of joins between ``User`` and ``Address`` tables where SELECT statements against +``Address`` are nested. + +If import issues prevent the :func:`.column_property` from being defined +inline with the class, it can be assigned to the class after both +are configured. In Declarative this has the effect of calling :meth:`.Mapper.add_property` +to add an additional property after the fact:: + + User.address_count = column_property( + select([func.count(Address.id)]).\ + where(Address.user_id==User.id) + ) + +For many-to-many relationships, use :func:`.and_` to join the fields of the +association table to both tables in a relation, illustrated +here with a classical mapping:: + + from sqlalchemy import and_ + + mapper(Author, authors, properties={ + 'book_count': column_property( + select([func.count(books.c.id)], + and_( + book_authors.c.author_id==authors.c.id, + book_authors.c.book_id==books.c.id + ))) + }) + +Using a plain descriptor +------------------------- + +In cases where a SQL query more elaborate than what :func:`.orm.column_property` +or :class:`.hybrid_property` can provide must be emitted, a regular Python +function accessed as an attribute can be used, assuming the expression +only needs to be available on an already-loaded instance. The function +is decorated with Python's own ``@property`` decorator to mark it as a read-only +attribute. Within the function, :func:`.object_session` +is used to locate the :class:`.Session` corresponding to the current object, +which is then used to emit a query:: + + from sqlalchemy.orm import object_session + from sqlalchemy import select, func + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + firstname = Column(String(50)) + lastname = Column(String(50)) + + @property + def address_count(self): + return object_session(self).\ + scalar( + select([func.count(Address.id)]).\ + where(Address.user_id==self.id) + ) + +The plain descriptor approach is useful as a last resort, but is less performant +in the usual case than both the hybrid and column property approaches, in that +it needs to emit a SQL query upon each access. + diff --git a/doc/build/orm/mapper_config.rst b/doc/build/orm/mapper_config.rst index 8de341a0d..60ad7f5f9 100644 --- a/doc/build/orm/mapper_config.rst +++ b/doc/build/orm/mapper_config.rst @@ -1,4 +1,3 @@ -.. module:: sqlalchemy.orm .. _mapper_config_toplevel: @@ -10,1663 +9,13 @@ This section describes a variety of configurational patterns that are usable with mappers. It assumes you've worked through :ref:`ormtutorial_toplevel` and know how to construct and use rudimentary mappers and relationships. -.. _classical_mapping: -Classical Mappings -================== - -A *Classical Mapping* refers to the configuration of a mapped class using the -:func:`.mapper` function, without using the Declarative system. As an example, -start with the declarative mapping introduced in :ref:`ormtutorial_toplevel`:: - - class User(Base): - __tablename__ = 'users' - - id = Column(Integer, primary_key=True) - name = Column(String) - fullname = Column(String) - password = Column(String) - -In "classical" form, the table metadata is created separately with the :class:`.Table` -construct, then associated with the ``User`` class via the :func:`.mapper` function:: - - from sqlalchemy import Table, MetaData, Column, ForeignKey, Integer, String - from sqlalchemy.orm import mapper - - metadata = MetaData() - - user = Table('user', metadata, - Column('id', Integer, primary_key=True), - Column('name', String(50)), - Column('fullname', String(50)), - Column('password', String(12)) - ) - - class User(object): - def __init__(self, name, fullname, password): - self.name = name - self.fullname = fullname - self.password = password - - mapper(User, user) - -Information about mapped attributes, such as relationships to other classes, are provided -via the ``properties`` dictionary. The example below illustrates a second :class:`.Table` -object, mapped to a class called ``Address``, then linked to ``User`` via :func:`.relationship`:: - - address = Table('address', metadata, - Column('id', Integer, primary_key=True), - Column('user_id', Integer, ForeignKey('user.id')), - Column('email_address', String(50)) - ) - - mapper(User, user, properties={ - 'addresses' : relationship(Address, backref='user', order_by=address.c.id) - }) - - mapper(Address, address) - -When using classical mappings, classes must be provided directly without the benefit -of the "string lookup" system provided by Declarative. SQL expressions are typically -specified in terms of the :class:`.Table` objects, i.e. ``address.c.id`` above -for the ``Address`` relationship, and not ``Address.id``, as ``Address`` may not -yet be linked to table metadata, nor can we specify a string here. - -Some examples in the documentation still use the classical approach, but note that -the classical as well as Declarative approaches are **fully interchangeable**. Both -systems ultimately create the same configuration, consisting of a :class:`.Table`, -user-defined class, linked together with a :func:`.mapper`. When we talk about -"the behavior of :func:`.mapper`", this includes when using the Declarative system -as well - it's still used, just behind the scenes. - -Customizing Column Properties -============================== - -The default behavior of :func:`~.orm.mapper` is to assemble all the columns in -the mapped :class:`.Table` into mapped object attributes, each of which are -named according to the name of the column itself (specifically, the ``key`` -attribute of :class:`.Column`). This behavior can be -modified in several ways. - -.. _mapper_column_distinct_names: - -Naming Columns Distinctly from Attribute Names ----------------------------------------------- - -A mapping by default shares the same name for a -:class:`.Column` as that of the mapped attribute - specifically -it matches the :attr:`.Column.key` attribute on :class:`.Column`, which -by default is the same as the :attr:`.Column.name`. - -The name assigned to the Python attribute which maps to -:class:`.Column` can be different from either :attr:`.Column.name` or :attr:`.Column.key` -just by assigning it that way, as we illustrate here in a Declarative mapping:: - - class User(Base): - __tablename__ = 'user' - id = Column('user_id', Integer, primary_key=True) - name = Column('user_name', String(50)) - -Where above ``User.id`` resolves to a column named ``user_id`` -and ``User.name`` resolves to a column named ``user_name``. - -When mapping to an existing table, the :class:`.Column` object -can be referenced directly:: - - class User(Base): - __table__ = user_table - id = user_table.c.user_id - name = user_table.c.user_name - -Or in a classical mapping, placed in the ``properties`` dictionary -with the desired key:: - - mapper(User, user_table, properties={ - 'id': user_table.c.user_id, - 'name': user_table.c.user_name, - }) - -In the next section we'll examine the usage of ``.key`` more closely. - -.. _mapper_automated_reflection_schemes: - -Automating Column Naming Schemes from Reflected Tables ------------------------------------------------------- - -In the previous section :ref:`mapper_column_distinct_names`, we showed how -a :class:`.Column` explicitly mapped to a class can have a different attribute -name than the column. But what if we aren't listing out :class:`.Column` -objects explicitly, and instead are automating the production of :class:`.Table` -objects using reflection (e.g. as described in :ref:`metadata_reflection_toplevel`)? -In this case we can make use of the :meth:`.DDLEvents.column_reflect` event -to intercept the production of :class:`.Column` objects and provide them -with the :attr:`.Column.key` of our choice:: - - @event.listens_for(Table, "column_reflect") - def column_reflect(inspector, table, column_info): - # set column.key = "attr_<lower_case_name>" - column_info['key'] = "attr_%s" % column_info['name'].lower() - -With the above event, the reflection of :class:`.Column` objects will be intercepted -with our event that adds a new ".key" element, such as in a mapping as below:: - - class MyClass(Base): - __table__ = Table("some_table", Base.metadata, - autoload=True, autoload_with=some_engine) - -If we want to qualify our event to only react for the specific :class:`.MetaData` -object above, we can check for it in our event:: - - @event.listens_for(Table, "column_reflect") - def column_reflect(inspector, table, column_info): - if table.metadata is Base.metadata: - # set column.key = "attr_<lower_case_name>" - column_info['key'] = "attr_%s" % column_info['name'].lower() - -.. _column_prefix: - -Naming All Columns with a Prefix --------------------------------- - -A quick approach to prefix column names, typically when mapping -to an existing :class:`.Table` object, is to use ``column_prefix``:: - - class User(Base): - __table__ = user_table - __mapper_args__ = {'column_prefix':'_'} - -The above will place attribute names such as ``_user_id``, ``_user_name``, -``_password`` etc. on the mapped ``User`` class. - -This approach is uncommon in modern usage. For dealing with reflected -tables, a more flexible approach is to use that described in -:ref:`mapper_automated_reflection_schemes`. - - -Using column_property for column level options ------------------------------------------------ - -Options can be specified when mapping a :class:`.Column` using the -:func:`.column_property` function. This function -explicitly creates the :class:`.ColumnProperty` used by the -:func:`.mapper` to keep track of the :class:`.Column`; normally, the -:func:`.mapper` creates this automatically. Using :func:`.column_property`, -we can pass additional arguments about how we'd like the :class:`.Column` -to be mapped. Below, we pass an option ``active_history``, -which specifies that a change to this column's value should -result in the former value being loaded first:: - - from sqlalchemy.orm import column_property - - class User(Base): - __tablename__ = 'user' - - id = Column(Integer, primary_key=True) - name = column_property(Column(String(50)), active_history=True) - -:func:`.column_property` is also used to map a single attribute to -multiple columns. This use case arises when mapping to a :func:`~.expression.join` -which has attributes which are equated to each other:: - - class User(Base): - __table__ = user.join(address) - - # assign "user.id", "address.user_id" to the - # "id" attribute - id = column_property(user_table.c.id, address_table.c.user_id) - -For more examples featuring this usage, see :ref:`maptojoin`. - -Another place where :func:`.column_property` is needed is to specify SQL expressions as -mapped attributes, such as below where we create an attribute ``fullname`` -that is the string concatenation of the ``firstname`` and ``lastname`` -columns:: - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - firstname = Column(String(50)) - lastname = Column(String(50)) - fullname = column_property(firstname + " " + lastname) - -See examples of this usage at :ref:`mapper_sql_expressions`. - -.. autofunction:: column_property - -.. _include_exclude_cols: - -Mapping a Subset of Table Columns ---------------------------------- - -Sometimes, a :class:`.Table` object was made available using the -reflection process described at :ref:`metadata_reflection` to load -the table's structure from the database. -For such a table that has lots of columns that don't need to be referenced -in the application, the ``include_properties`` or ``exclude_properties`` -arguments can specify that only a subset of columns should be mapped. -For example:: - - class User(Base): - __table__ = user_table - __mapper_args__ = { - 'include_properties' :['user_id', 'user_name'] - } - -...will map the ``User`` class to the ``user_table`` table, only including -the ``user_id`` and ``user_name`` columns - the rest are not referenced. -Similarly:: - - class Address(Base): - __table__ = address_table - __mapper_args__ = { - 'exclude_properties' : ['street', 'city', 'state', 'zip'] - } - -...will map the ``Address`` class to the ``address_table`` table, including -all columns present except ``street``, ``city``, ``state``, and ``zip``. - -When this mapping is used, the columns that are not included will not be -referenced in any SELECT statements emitted by :class:`.Query`, nor will there -be any mapped attribute on the mapped class which represents the column; -assigning an attribute of that name will have no effect beyond that of -a normal Python attribute assignment. - -In some cases, multiple columns may have the same name, such as when -mapping to a join of two or more tables that share some column name. -``include_properties`` and ``exclude_properties`` can also accommodate -:class:`.Column` objects to more accurately describe which columns -should be included or excluded:: - - class UserAddress(Base): - __table__ = user_table.join(addresses_table) - __mapper_args__ = { - 'exclude_properties' :[address_table.c.id], - 'primary_key' : [user_table.c.id] - } - -.. note:: - - insert and update defaults configured on individual - :class:`.Column` objects, i.e. those described at :ref:`metadata_defaults` - including those configured by the ``default``, ``update``, - ``server_default`` and ``server_onupdate`` arguments, will continue to - function normally even if those :class:`.Column` objects are not mapped. - This is because in the case of ``default`` and ``update``, the - :class:`.Column` object is still present on the underlying - :class:`.Table`, thus allowing the default functions to take place when - the ORM emits an INSERT or UPDATE, and in the case of ``server_default`` - and ``server_onupdate``, the relational database itself maintains these - functions. - - -.. _deferred: - -Deferred Column Loading -======================== - -This feature allows particular columns of a table be loaded only -upon direct access, instead of when the entity is queried using -:class:`.Query`. This feature is useful when one wants to avoid -loading a large text or binary field into memory when it's not needed. -Individual columns can be lazy loaded by themselves or placed into groups that -lazy-load together, using the :func:`.orm.deferred` function to -mark them as "deferred". In the example below, we define a mapping that will load each of -``.excerpt`` and ``.photo`` in separate, individual-row SELECT statements when each -attribute is first referenced on the individual object instance:: - - from sqlalchemy.orm import deferred - from sqlalchemy import Integer, String, Text, Binary, Column - - class Book(Base): - __tablename__ = 'book' - - book_id = Column(Integer, primary_key=True) - title = Column(String(200), nullable=False) - summary = Column(String(2000)) - excerpt = deferred(Column(Text)) - photo = deferred(Column(Binary)) - -Classical mappings as always place the usage of :func:`.orm.deferred` in the -``properties`` dictionary against the table-bound :class:`.Column`:: - - mapper(Book, book_table, properties={ - 'photo':deferred(book_table.c.photo) - }) - -Deferred columns can be associated with a "group" name, so that they load -together when any of them are first accessed. The example below defines a -mapping with a ``photos`` deferred group. When one ``.photo`` is accessed, all three -photos will be loaded in one SELECT statement. The ``.excerpt`` will be loaded -separately when it is accessed:: - - class Book(Base): - __tablename__ = 'book' - - book_id = Column(Integer, primary_key=True) - title = Column(String(200), nullable=False) - summary = Column(String(2000)) - excerpt = deferred(Column(Text)) - photo1 = deferred(Column(Binary), group='photos') - photo2 = deferred(Column(Binary), group='photos') - photo3 = deferred(Column(Binary), group='photos') - -You can defer or undefer columns at the :class:`~sqlalchemy.orm.query.Query` -level using options, including :func:`.orm.defer` and :func:`.orm.undefer`:: - - from sqlalchemy.orm import defer, undefer - - query = session.query(Book) - query = query.options(defer('summary')) - query = query.options(undefer('excerpt')) - query.all() - -:func:`.orm.deferred` attributes which are marked with a "group" can be undeferred -using :func:`.orm.undefer_group`, sending in the group name:: - - from sqlalchemy.orm import undefer_group - - query = session.query(Book) - query.options(undefer_group('photos')).all() - -Load Only Cols ---------------- - -An arbitrary set of columns can be selected as "load only" columns, which will -be loaded while deferring all other columns on a given entity, using :func:`.orm.load_only`:: - - from sqlalchemy.orm import load_only - - session.query(Book).options(load_only("summary", "excerpt")) - -.. versionadded:: 0.9.0 - -Deferred Loading with Multiple Entities ---------------------------------------- - -To specify column deferral options within a :class:`.Query` that loads multiple types -of entity, the :class:`.Load` object can specify which parent entity to start with:: - - from sqlalchemy.orm import Load - - query = session.query(Book, Author).join(Book.author) - query = query.options( - Load(Book).load_only("summary", "excerpt"), - Load(Author).defer("bio") - ) - -To specify column deferral options along the path of various relationships, -the options support chaining, where the loading style of each relationship -is specified first, then is chained to the deferral options. Such as, to load -``Book`` instances, then joined-eager-load the ``Author``, then apply deferral -options to the ``Author`` entity:: - - from sqlalchemy.orm import joinedload - - query = session.query(Book) - query = query.options( - joinedload(Book.author).load_only("summary", "excerpt"), - ) - -In the case where the loading style of parent relationships should be left -unchanged, use :func:`.orm.defaultload`:: - - from sqlalchemy.orm import defaultload - - query = session.query(Book) - query = query.options( - defaultload(Book.author).load_only("summary", "excerpt"), - ) - -.. versionadded:: 0.9.0 support for :class:`.Load` and other options which - allow for better targeting of deferral options. - -Column Deferral API -------------------- - -.. autofunction:: deferred - -.. autofunction:: defer - -.. autofunction:: load_only - -.. autofunction:: undefer - -.. autofunction:: undefer_group - -.. _mapper_sql_expressions: - -SQL Expressions as Mapped Attributes -===================================== - -Attributes on a mapped class can be linked to SQL expressions, which can -be used in queries. - -Using a Hybrid --------------- - -The easiest and most flexible way to link relatively simple SQL expressions to a class is to use a so-called -"hybrid attribute", -described in the section :ref:`hybrids_toplevel`. The hybrid provides -for an expression that works at both the Python level as well as at the -SQL expression level. For example, below we map a class ``User``, -containing attributes ``firstname`` and ``lastname``, and include a hybrid that -will provide for us the ``fullname``, which is the string concatenation of the two:: - - from sqlalchemy.ext.hybrid import hybrid_property - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - firstname = Column(String(50)) - lastname = Column(String(50)) - - @hybrid_property - def fullname(self): - return self.firstname + " " + self.lastname - -Above, the ``fullname`` attribute is interpreted at both the instance and -class level, so that it is available from an instance:: - - some_user = session.query(User).first() - print some_user.fullname - -as well as usable wtihin queries:: - - some_user = session.query(User).filter(User.fullname == "John Smith").first() - -The string concatenation example is a simple one, where the Python expression -can be dual purposed at the instance and class level. Often, the SQL expression -must be distinguished from the Python expression, which can be achieved using -:meth:`.hybrid_property.expression`. Below we illustrate the case where a conditional -needs to be present inside the hybrid, using the ``if`` statement in Python and the -:func:`.sql.expression.case` construct for SQL expressions:: - - from sqlalchemy.ext.hybrid import hybrid_property - from sqlalchemy.sql import case - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - firstname = Column(String(50)) - lastname = Column(String(50)) - - @hybrid_property - def fullname(self): - if self.firstname is not None: - return self.firstname + " " + self.lastname - else: - return self.lastname - - @fullname.expression - def fullname(cls): - return case([ - (cls.firstname != None, cls.firstname + " " + cls.lastname), - ], else_ = cls.lastname) - -.. _mapper_column_property_sql_expressions: - -Using column_property ---------------------- - -The :func:`.orm.column_property` function can be used to map a SQL -expression in a manner similar to a regularly mapped :class:`.Column`. -With this technique, the attribute is loaded -along with all other column-mapped attributes at load time. This is in some -cases an advantage over the usage of hybrids, as the value can be loaded -up front at the same time as the parent row of the object, particularly if -the expression is one which links to other tables (typically as a correlated -subquery) to access data that wouldn't normally be -available on an already loaded object. - -Disadvantages to using :func:`.orm.column_property` for SQL expressions include that -the expression must be compatible with the SELECT statement emitted for the class -as a whole, and there are also some configurational quirks which can occur -when using :func:`.orm.column_property` from declarative mixins. - -Our "fullname" example can be expressed using :func:`.orm.column_property` as -follows:: - - from sqlalchemy.orm import column_property - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - firstname = Column(String(50)) - lastname = Column(String(50)) - fullname = column_property(firstname + " " + lastname) - -Correlated subqueries may be used as well. Below we use the :func:`.select` -construct to create a SELECT that links together the count of ``Address`` -objects available for a particular ``User``:: - - from sqlalchemy.orm import column_property - from sqlalchemy import select, func - from sqlalchemy import Column, Integer, String, ForeignKey - - from sqlalchemy.ext.declarative import declarative_base - - Base = declarative_base() - - class Address(Base): - __tablename__ = 'address' - id = Column(Integer, primary_key=True) - user_id = Column(Integer, ForeignKey('user.id')) - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - address_count = column_property( - select([func.count(Address.id)]).\ - where(Address.user_id==id).\ - correlate_except(Address) - ) - -In the above example, we define a :func:`.select` construct like the following:: - - select([func.count(Address.id)]).\ - where(Address.user_id==id).\ - correlate_except(Address) - -The meaning of the above statement is, select the count of ``Address.id`` rows -where the ``Address.user_id`` column is equated to ``id``, which in the context -of the ``User`` class is the :class:`.Column` named ``id`` (note that ``id`` is -also the name of a Python built in function, which is not what we want to use -here - if we were outside of the ``User`` class definition, we'd use ``User.id``). - -The :meth:`.select.correlate_except` directive indicates that each element in the -FROM clause of this :func:`.select` may be omitted from the FROM list (that is, correlated -to the enclosing SELECT statement against ``User``) except for the one corresponding -to ``Address``. This isn't strictly necessary, but prevents ``Address`` from -being inadvertently omitted from the FROM list in the case of a long string -of joins between ``User`` and ``Address`` tables where SELECT statements against -``Address`` are nested. - -If import issues prevent the :func:`.column_property` from being defined -inline with the class, it can be assigned to the class after both -are configured. In Declarative this has the effect of calling :meth:`.Mapper.add_property` -to add an additional property after the fact:: - - User.address_count = column_property( - select([func.count(Address.id)]).\ - where(Address.user_id==User.id) - ) - -For many-to-many relationships, use :func:`.and_` to join the fields of the -association table to both tables in a relation, illustrated -here with a classical mapping:: - - from sqlalchemy import and_ - - mapper(Author, authors, properties={ - 'book_count': column_property( - select([func.count(books.c.id)], - and_( - book_authors.c.author_id==authors.c.id, - book_authors.c.book_id==books.c.id - ))) - }) - -Using a plain descriptor -------------------------- - -In cases where a SQL query more elaborate than what :func:`.orm.column_property` -or :class:`.hybrid_property` can provide must be emitted, a regular Python -function accessed as an attribute can be used, assuming the expression -only needs to be available on an already-loaded instance. The function -is decorated with Python's own ``@property`` decorator to mark it as a read-only -attribute. Within the function, :func:`.object_session` -is used to locate the :class:`.Session` corresponding to the current object, -which is then used to emit a query:: - - from sqlalchemy.orm import object_session - from sqlalchemy import select, func - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - firstname = Column(String(50)) - lastname = Column(String(50)) - - @property - def address_count(self): - return object_session(self).\ - scalar( - select([func.count(Address.id)]).\ - where(Address.user_id==self.id) - ) - -The plain descriptor approach is useful as a last resort, but is less performant -in the usual case than both the hybrid and column property approaches, in that -it needs to emit a SQL query upon each access. - -Changing Attribute Behavior -============================ - -.. _simple_validators: - -Simple Validators ------------------ - -A quick way to add a "validation" routine to an attribute is to use the -:func:`~sqlalchemy.orm.validates` decorator. An attribute validator can raise -an exception, halting the process of mutating the attribute's value, or can -change the given value into something different. Validators, like all -attribute extensions, are only called by normal userland code; they are not -issued when the ORM is populating the object:: - - from sqlalchemy.orm import validates - - class EmailAddress(Base): - __tablename__ = 'address' - - id = Column(Integer, primary_key=True) - email = Column(String) - - @validates('email') - def validate_email(self, key, address): - assert '@' in address - return address - -.. versionchanged:: 1.0.0 - validators are no longer triggered within - the flush process when the newly fetched values for primary key - columns as well as some python- or server-side defaults are fetched. - Prior to 1.0, validators may be triggered in those cases as well. - - -Validators also receive collection append events, when items are added to a -collection:: - - from sqlalchemy.orm import validates - - class User(Base): - # ... - - addresses = relationship("Address") - - @validates('addresses') - def validate_address(self, key, address): - assert '@' in address.email - return address - - -The validation function by default does not get emitted for collection -remove events, as the typical expectation is that a value being discarded -doesn't require validation. However, :func:`.validates` supports reception -of these events by specifying ``include_removes=True`` to the decorator. When -this flag is set, the validation function must receive an additional boolean -argument which if ``True`` indicates that the operation is a removal:: - - from sqlalchemy.orm import validates - - class User(Base): - # ... - - addresses = relationship("Address") - - @validates('addresses', include_removes=True) - def validate_address(self, key, address, is_remove): - if is_remove: - raise ValueError( - "not allowed to remove items from the collection") - else: - assert '@' in address.email - return address - -The case where mutually dependent validators are linked via a backref -can also be tailored, using the ``include_backrefs=False`` option; this option, -when set to ``False``, prevents a validation function from emitting if the -event occurs as a result of a backref:: - - from sqlalchemy.orm import validates - - class User(Base): - # ... - - addresses = relationship("Address", backref='user') - - @validates('addresses', include_backrefs=False) - def validate_address(self, key, address): - assert '@' in address.email - return address - -Above, if we were to assign to ``Address.user`` as in ``some_address.user = some_user``, -the ``validate_address()`` function would *not* be emitted, even though an append -occurs to ``some_user.addresses`` - the event is caused by a backref. - -Note that the :func:`~.validates` decorator is a convenience function built on -top of attribute events. An application that requires more control over -configuration of attribute change behavior can make use of this system, -described at :class:`~.AttributeEvents`. - -.. autofunction:: validates - -.. _mapper_hybrids: - -Using Descriptors and Hybrids ------------------------------ - -A more comprehensive way to produce modified behavior for an attribute is to -use :term:`descriptors`. These are commonly used in Python using the ``property()`` -function. The standard SQLAlchemy technique for descriptors is to create a -plain descriptor, and to have it read/write from a mapped attribute with a -different name. Below we illustrate this using Python 2.6-style properties:: - - class EmailAddress(Base): - __tablename__ = 'email_address' - - id = Column(Integer, primary_key=True) - - # name the attribute with an underscore, - # different from the column name - _email = Column("email", String) - - # then create an ".email" attribute - # to get/set "._email" - @property - def email(self): - return self._email - - @email.setter - def email(self, email): - self._email = email - -The approach above will work, but there's more we can add. While our -``EmailAddress`` object will shuttle the value through the ``email`` -descriptor and into the ``_email`` mapped attribute, the class level -``EmailAddress.email`` attribute does not have the usual expression semantics -usable with :class:`.Query`. To provide these, we instead use the -:mod:`~sqlalchemy.ext.hybrid` extension as follows:: - - from sqlalchemy.ext.hybrid import hybrid_property - - class EmailAddress(Base): - __tablename__ = 'email_address' - - id = Column(Integer, primary_key=True) - - _email = Column("email", String) - - @hybrid_property - def email(self): - return self._email - - @email.setter - def email(self, email): - self._email = email - -The ``.email`` attribute, in addition to providing getter/setter behavior when we have an -instance of ``EmailAddress``, also provides a SQL expression when used at the class level, -that is, from the ``EmailAddress`` class directly: - -.. sourcecode:: python+sql - - from sqlalchemy.orm import Session - session = Session() - - {sql}address = session.query(EmailAddress).\ - filter(EmailAddress.email == 'address@example.com').\ - one() - SELECT address.email AS address_email, address.id AS address_id - FROM address - WHERE address.email = ? - ('address@example.com',) - {stop} - - address.email = 'otheraddress@example.com' - {sql}session.commit() - UPDATE address SET email=? WHERE address.id = ? - ('otheraddress@example.com', 1) - COMMIT - {stop} - -The :class:`~.hybrid_property` also allows us to change the behavior of the -attribute, including defining separate behaviors when the attribute is -accessed at the instance level versus at the class/expression level, using the -:meth:`.hybrid_property.expression` modifier. Such as, if we wanted to add a -host name automatically, we might define two sets of string manipulation -logic:: - - class EmailAddress(Base): - __tablename__ = 'email_address' - - id = Column(Integer, primary_key=True) - - _email = Column("email", String) - - @hybrid_property - def email(self): - """Return the value of _email up until the last twelve - characters.""" - - return self._email[:-12] - - @email.setter - def email(self, email): - """Set the value of _email, tacking on the twelve character - value @example.com.""" - - self._email = email + "@example.com" - - @email.expression - def email(cls): - """Produce a SQL expression that represents the value - of the _email column, minus the last twelve characters.""" - - return func.substr(cls._email, 0, func.length(cls._email) - 12) - -Above, accessing the ``email`` property of an instance of ``EmailAddress`` -will return the value of the ``_email`` attribute, removing or adding the -hostname ``@example.com`` from the value. When we query against the ``email`` -attribute, a SQL function is rendered which produces the same effect: - -.. sourcecode:: python+sql - - {sql}address = session.query(EmailAddress).filter(EmailAddress.email == 'address').one() - SELECT address.email AS address_email, address.id AS address_id - FROM address - WHERE substr(address.email, ?, length(address.email) - ?) = ? - (0, 12, 'address') - {stop} - -Read more about Hybrids at :ref:`hybrids_toplevel`. - -.. _synonyms: - -Synonyms --------- - -Synonyms are a mapper-level construct that allow any attribute on a class -to "mirror" another attribute that is mapped. - -In the most basic sense, the synonym is an easy way to make a certain -attribute available by an additional name:: - - class MyClass(Base): - __tablename__ = 'my_table' - - id = Column(Integer, primary_key=True) - job_status = Column(String(50)) - - status = synonym("job_status") - -The above class ``MyClass`` has two attributes, ``.job_status`` and -``.status`` that will behave as one attribute, both at the expression -level:: - - >>> print MyClass.job_status == 'some_status' - my_table.job_status = :job_status_1 - - >>> print MyClass.status == 'some_status' - my_table.job_status = :job_status_1 - -and at the instance level:: - - >>> m1 = MyClass(status='x') - >>> m1.status, m1.job_status - ('x', 'x') - - >>> m1.job_status = 'y' - >>> m1.status, m1.job_status - ('y', 'y') - -The :func:`.synonym` can be used for any kind of mapped attribute that -subclasses :class:`.MapperProperty`, including mapped columns and relationships, -as well as synonyms themselves. - -Beyond a simple mirror, :func:`.synonym` can also be made to reference -a user-defined :term:`descriptor`. We can supply our -``status`` synonym with a ``@property``:: - - class MyClass(Base): - __tablename__ = 'my_table' - - id = Column(Integer, primary_key=True) - status = Column(String(50)) - - @property - def job_status(self): - return "Status: " + self.status - - job_status = synonym("status", descriptor=job_status) - -When using Declarative, the above pattern can be expressed more succinctly -using the :func:`.synonym_for` decorator:: - - from sqlalchemy.ext.declarative import synonym_for - - class MyClass(Base): - __tablename__ = 'my_table' - - id = Column(Integer, primary_key=True) - status = Column(String(50)) - - @synonym_for("status") - @property - def job_status(self): - return "Status: " + self.status - -While the :func:`.synonym` is useful for simple mirroring, the use case -of augmenting attribute behavior with descriptors is better handled in modern -usage using the :ref:`hybrid attribute <mapper_hybrids>` feature, which -is more oriented towards Python descriptors. Technically, a :func:`.synonym` -can do everything that a :class:`.hybrid_property` can do, as it also supports -injection of custom SQL capabilities, but the hybrid is more straightforward -to use in more complex situations. - -.. autofunction:: synonym - -.. _custom_comparators: - -Operator Customization ----------------------- - -The "operators" used by the SQLAlchemy ORM and Core expression language -are fully customizable. For example, the comparison expression -``User.name == 'ed'`` makes usage of an operator built into Python -itself called ``operator.eq`` - the actual SQL construct which SQLAlchemy -associates with such an operator can be modified. New -operations can be associated with column expressions as well. The operators -which take place for column expressions are most directly redefined at the -type level - see the -section :ref:`types_operators` for a description. - -ORM level functions like :func:`.column_property`, :func:`.relationship`, -and :func:`.composite` also provide for operator redefinition at the ORM -level, by passing a :class:`.PropComparator` subclass to the ``comparator_factory`` -argument of each function. Customization of operators at this level is a -rare use case. See the documentation at :class:`.PropComparator` -for an overview. - -.. _mapper_composite: - -Composite Column Types -======================= - -Sets of columns can be associated with a single user-defined datatype. The ORM -provides a single attribute which represents the group of columns using the -class you provide. - -.. versionchanged:: 0.7 - Composites have been simplified such that - they no longer "conceal" the underlying column based attributes. Additionally, - in-place mutation is no longer automatic; see the section below on - enabling mutability to support tracking of in-place changes. - -.. versionchanged:: 0.9 - Composites will return their object-form, rather than as individual columns, - when used in a column-oriented :class:`.Query` construct. See :ref:`migration_2824`. - -A simple example represents pairs of columns as a ``Point`` object. -``Point`` represents such a pair as ``.x`` and ``.y``:: - - class Point(object): - def __init__(self, x, y): - self.x = x - self.y = y - - def __composite_values__(self): - return self.x, self.y - - def __repr__(self): - return "Point(x=%r, y=%r)" % (self.x, self.y) - - def __eq__(self, other): - return isinstance(other, Point) and \ - other.x == self.x and \ - other.y == self.y - - def __ne__(self, other): - return not self.__eq__(other) - -The requirements for the custom datatype class are that it have a constructor -which accepts positional arguments corresponding to its column format, and -also provides a method ``__composite_values__()`` which returns the state of -the object as a list or tuple, in order of its column-based attributes. It -also should supply adequate ``__eq__()`` and ``__ne__()`` methods which test -the equality of two instances. - -We will create a mapping to a table ``vertice``, which represents two points -as ``x1/y1`` and ``x2/y2``. These are created normally as :class:`.Column` -objects. Then, the :func:`.composite` function is used to assign new -attributes that will represent sets of columns via the ``Point`` class:: - - from sqlalchemy import Column, Integer - from sqlalchemy.orm import composite - from sqlalchemy.ext.declarative import declarative_base - - Base = declarative_base() - - class Vertex(Base): - __tablename__ = 'vertice' - - id = Column(Integer, primary_key=True) - x1 = Column(Integer) - y1 = Column(Integer) - x2 = Column(Integer) - y2 = Column(Integer) - - start = composite(Point, x1, y1) - end = composite(Point, x2, y2) - -A classical mapping above would define each :func:`.composite` -against the existing table:: - - mapper(Vertex, vertice_table, properties={ - 'start':composite(Point, vertice_table.c.x1, vertice_table.c.y1), - 'end':composite(Point, vertice_table.c.x2, vertice_table.c.y2), - }) - -We can now persist and use ``Vertex`` instances, as well as query for them, -using the ``.start`` and ``.end`` attributes against ad-hoc ``Point`` instances: - -.. sourcecode:: python+sql - - >>> v = Vertex(start=Point(3, 4), end=Point(5, 6)) - >>> session.add(v) - >>> q = session.query(Vertex).filter(Vertex.start == Point(3, 4)) - {sql}>>> print q.first().start - BEGIN (implicit) - INSERT INTO vertice (x1, y1, x2, y2) VALUES (?, ?, ?, ?) - (3, 4, 5, 6) - SELECT vertice.id AS vertice_id, - vertice.x1 AS vertice_x1, - vertice.y1 AS vertice_y1, - vertice.x2 AS vertice_x2, - vertice.y2 AS vertice_y2 - FROM vertice - WHERE vertice.x1 = ? AND vertice.y1 = ? - LIMIT ? OFFSET ? - (3, 4, 1, 0) - {stop}Point(x=3, y=4) - -.. autofunction:: composite - - -Tracking In-Place Mutations on Composites ------------------------------------------ - -In-place changes to an existing composite value are -not tracked automatically. Instead, the composite class needs to provide -events to its parent object explicitly. This task is largely automated -via the usage of the :class:`.MutableComposite` mixin, which uses events -to associate each user-defined composite object with all parent associations. -Please see the example in :ref:`mutable_composites`. - -.. versionchanged:: 0.7 - In-place changes to an existing composite value are no longer - tracked automatically; the functionality is superseded by the - :class:`.MutableComposite` class. - -.. _composite_operations: - -Redefining Comparison Operations for Composites ------------------------------------------------ - -The "equals" comparison operation by default produces an AND of all -corresponding columns equated to one another. This can be changed using -the ``comparator_factory`` argument to :func:`.composite`, where we -specify a custom :class:`.CompositeProperty.Comparator` class -to define existing or new operations. -Below we illustrate the "greater than" operator, implementing -the same expression that the base "greater than" does:: - - from sqlalchemy.orm.properties import CompositeProperty - from sqlalchemy import sql - - class PointComparator(CompositeProperty.Comparator): - def __gt__(self, other): - """redefine the 'greater than' operation""" - - return sql.and_(*[a>b for a, b in - zip(self.__clause_element__().clauses, - other.__composite_values__())]) - - class Vertex(Base): - ___tablename__ = 'vertice' - - id = Column(Integer, primary_key=True) - x1 = Column(Integer) - y1 = Column(Integer) - x2 = Column(Integer) - y2 = Column(Integer) - - start = composite(Point, x1, y1, - comparator_factory=PointComparator) - end = composite(Point, x2, y2, - comparator_factory=PointComparator) - -.. _bundles: - -Column Bundles -=============== - -The :class:`.Bundle` may be used to query for groups of columns under one -namespace. - -.. versionadded:: 0.9.0 - -The bundle allows columns to be grouped together:: - - from sqlalchemy.orm import Bundle - - bn = Bundle('mybundle', MyClass.data1, MyClass.data2) - for row in session.query(bn).filter(bn.c.data1 == 'd1'): - print row.mybundle.data1, row.mybundle.data2 - -The bundle can be subclassed to provide custom behaviors when results -are fetched. The method :meth:`.Bundle.create_row_processor` is given -the :class:`.Query` and a set of "row processor" functions at query execution -time; these processor functions when given a result row will return the -individual attribute value, which can then be adapted into any kind of -return data structure. Below illustrates replacing the usual :class:`.KeyedTuple` -return structure with a straight Python dictionary:: - - from sqlalchemy.orm import Bundle - - class DictBundle(Bundle): - def create_row_processor(self, query, procs, labels): - """Override create_row_processor to return values as dictionaries""" - def proc(row): - return dict( - zip(labels, (proc(row) for proc in procs)) - ) - return proc - -.. versionchanged:: 1.0 - - The ``proc()`` callable passed to the ``create_row_processor()`` - method of custom :class:`.Bundle` classes now accepts only a single - "row" argument. - -A result from the above bundle will return dictionary values:: - - bn = DictBundle('mybundle', MyClass.data1, MyClass.data2) - for row in session.query(bn).filter(bn.c.data1 == 'd1'): - print row.mybundle['data1'], row.mybundle['data2'] - -The :class:`.Bundle` construct is also integrated into the behavior -of :func:`.composite`, where it is used to return composite attributes as objects -when queried as individual attributes. - - -.. _maptojoin: - -Mapping a Class against Multiple Tables -======================================== - -Mappers can be constructed against arbitrary relational units (called -*selectables*) in addition to plain tables. For example, the :func:`~.expression.join` -function creates a selectable unit comprised of -multiple tables, complete with its own composite primary key, which can be -mapped in the same way as a :class:`.Table`:: - - from sqlalchemy import Table, Column, Integer, \ - String, MetaData, join, ForeignKey - from sqlalchemy.ext.declarative import declarative_base - from sqlalchemy.orm import column_property - - metadata = MetaData() - - # define two Table objects - user_table = Table('user', metadata, - Column('id', Integer, primary_key=True), - Column('name', String), - ) - - address_table = Table('address', metadata, - Column('id', Integer, primary_key=True), - Column('user_id', Integer, ForeignKey('user.id')), - Column('email_address', String) - ) - - # define a join between them. This - # takes place across the user.id and address.user_id - # columns. - user_address_join = join(user_table, address_table) - - Base = declarative_base() - - # map to it - class AddressUser(Base): - __table__ = user_address_join - - id = column_property(user_table.c.id, address_table.c.user_id) - address_id = address_table.c.id - -In the example above, the join expresses columns for both the -``user`` and the ``address`` table. The ``user.id`` and ``address.user_id`` -columns are equated by foreign key, so in the mapping they are defined -as one attribute, ``AddressUser.id``, using :func:`.column_property` to -indicate a specialized column mapping. Based on this part of the -configuration, the mapping will copy -new primary key values from ``user.id`` into the ``address.user_id`` column -when a flush occurs. - -Additionally, the ``address.id`` column is mapped explicitly to -an attribute named ``address_id``. This is to **disambiguate** the -mapping of the ``address.id`` column from the same-named ``AddressUser.id`` -attribute, which here has been assigned to refer to the ``user`` table -combined with the ``address.user_id`` foreign key. - -The natural primary key of the above mapping is the composite of -``(user.id, address.id)``, as these are the primary key columns of the -``user`` and ``address`` table combined together. The identity of an -``AddressUser`` object will be in terms of these two values, and -is represented from an ``AddressUser`` object as -``(AddressUser.id, AddressUser.address_id)``. - - -Mapping a Class against Arbitrary Selects -========================================= - -Similar to mapping against a join, a plain :func:`~.expression.select` object can be used with a -mapper as well. The example fragment below illustrates mapping a class -called ``Customer`` to a :func:`~.expression.select` which includes a join to a -subquery:: - - from sqlalchemy import select, func - - subq = select([ - func.count(orders.c.id).label('order_count'), - func.max(orders.c.price).label('highest_order'), - orders.c.customer_id - ]).group_by(orders.c.customer_id).alias() - - customer_select = select([customers, subq]).\ - select_from( - join(customers, subq, - customers.c.id == subq.c.customer_id) - ).alias() - - class Customer(Base): - __table__ = customer_select - -Above, the full row represented by ``customer_select`` will be all the -columns of the ``customers`` table, in addition to those columns -exposed by the ``subq`` subquery, which are ``order_count``, -``highest_order``, and ``customer_id``. Mapping the ``Customer`` -class to this selectable then creates a class which will contain -those attributes. - -When the ORM persists new instances of ``Customer``, only the -``customers`` table will actually receive an INSERT. This is because the -primary key of the ``orders`` table is not represented in the mapping; the ORM -will only emit an INSERT into a table for which it has mapped the primary -key. - -.. note:: - - The practice of mapping to arbitrary SELECT statements, especially - complex ones as above, is - almost never needed; it necessarily tends to produce complex queries - which are often less efficient than that which would be produced - by direct query construction. The practice is to some degree - based on the very early history of SQLAlchemy where the :func:`.mapper` - construct was meant to represent the primary querying interface; - in modern usage, the :class:`.Query` object can be used to construct - virtually any SELECT statement, including complex composites, and should - be favored over the "map-to-selectable" approach. - -Multiple Mappers for One Class -============================== - -In modern SQLAlchemy, a particular class is only mapped by one :func:`.mapper` -at a time. The rationale here is that the :func:`.mapper` modifies the class itself, not only -persisting it towards a particular :class:`.Table`, but also *instrumenting* -attributes upon the class which are structured specifically according to the -table metadata. - -One potential use case for another mapper to exist at the same time is if we -wanted to load instances of our class not just from the immediate :class:`.Table` -to which it is mapped, but from another selectable that is a derivation of that -:class:`.Table`. To create a second mapper that only handles querying -when used explicitly, we can use the :paramref:`.mapper.non_primary` argument. -In practice, this approach is usually not needed, as we -can do this sort of thing at query time using methods such as -:meth:`.Query.select_from`, however it is useful in the rare case that we -wish to build a :func:`.relationship` to such a mapper. An example of this is -at :ref:`relationship_non_primary_mapper`. - -Another potential use is if we genuinely want instances of our class to -be persisted into different tables at different times; certain kinds of -data sharding configurations may persist a particular class into tables -that are identical in structure except for their name. For this kind of -pattern, Python offers a better approach than the complexity of mapping -the same class multiple times, which is to instead create new mapped classes -for each target table. SQLAlchemy refers to this as the "entity name" -pattern, which is described as a recipe at `Entity Name -<http://www.sqlalchemy.org/trac/wiki/UsageRecipes/EntityName>`_. - - -.. _mapping_constructors: - -Constructors and Object Initialization -======================================= - -Mapping imposes no restrictions or requirements on the constructor -(``__init__``) method for the class. You are free to require any arguments for -the function that you wish, assign attributes to the instance that are unknown -to the ORM, and generally do anything else you would normally do when writing -a constructor for a Python class. - -The SQLAlchemy ORM does not call ``__init__`` when recreating objects from -database rows. The ORM's process is somewhat akin to the Python standard -library's ``pickle`` module, invoking the low level ``__new__`` method and -then quietly restoring attributes directly on the instance rather than calling -``__init__``. - -If you need to do some setup on database-loaded instances before they're ready -to use, you can use the ``@reconstructor`` decorator to tag a method as the -ORM counterpart to ``__init__``. SQLAlchemy will call this method with no -arguments every time it loads or reconstructs one of your instances. This is -useful for recreating transient properties that are normally assigned in your -``__init__``:: - - from sqlalchemy import orm - - class MyMappedClass(object): - def __init__(self, data): - self.data = data - # we need stuff on all instances, but not in the database. - self.stuff = [] - - @orm.reconstructor - def init_on_load(self): - self.stuff = [] - -When ``obj = MyMappedClass()`` is executed, Python calls the ``__init__`` -method as normal and the ``data`` argument is required. When instances are -loaded during a :class:`~sqlalchemy.orm.query.Query` operation as in -``query(MyMappedClass).one()``, ``init_on_load`` is called. - -Any method may be tagged as the :func:`~sqlalchemy.orm.reconstructor`, even -the ``__init__`` method. SQLAlchemy will call the reconstructor method with no -arguments. Scalar (non-collection) database-mapped attributes of the instance -will be available for use within the function. Eagerly-loaded collections are -generally not yet available and will usually only contain the first element. -ORM state changes made to objects at this stage will not be recorded for the -next flush() operation, so the activity within a reconstructor should be -conservative. - -:func:`~sqlalchemy.orm.reconstructor` is a shortcut into a larger system -of "instance level" events, which can be subscribed to using the -event API - see :class:`.InstanceEvents` for the full API description -of these events. - -.. autofunction:: reconstructor - - -.. _mapper_version_counter: - -Configuring a Version Counter -============================= - -The :class:`.Mapper` supports management of a :term:`version id column`, which -is a single table column that increments or otherwise updates its value -each time an ``UPDATE`` to the mapped table occurs. This value is checked each -time the ORM emits an ``UPDATE`` or ``DELETE`` against the row to ensure that -the value held in memory matches the database value. - -.. warning:: - - Because the versioning feature relies upon comparison of the **in memory** - record of an object, the feature only applies to the :meth:`.Session.flush` - process, where the ORM flushes individual in-memory rows to the database. - It does **not** take effect when performing - a multirow UPDATE or DELETE using :meth:`.Query.update` or :meth:`.Query.delete` - methods, as these methods only emit an UPDATE or DELETE statement but otherwise - do not have direct access to the contents of those rows being affected. - -The purpose of this feature is to detect when two concurrent transactions -are modifying the same row at roughly the same time, or alternatively to provide -a guard against the usage of a "stale" row in a system that might be re-using -data from a previous transaction without refreshing (e.g. if one sets ``expire_on_commit=False`` -with a :class:`.Session`, it is possible to re-use the data from a previous -transaction). - -.. topic:: Concurrent transaction updates - - When detecting concurrent updates within transactions, it is typically the - case that the database's transaction isolation level is below the level of - :term:`repeatable read`; otherwise, the transaction will not be exposed - to a new row value created by a concurrent update which conflicts with - the locally updated value. In this case, the SQLAlchemy versioning - feature will typically not be useful for in-transaction conflict detection, - though it still can be used for cross-transaction staleness detection. - - The database that enforces repeatable reads will typically either have locked the - target row against a concurrent update, or is employing some form - of multi version concurrency control such that it will emit an error - when the transaction is committed. SQLAlchemy's version_id_col is an alternative - which allows version tracking to occur for specific tables within a transaction - that otherwise might not have this isolation level set. - - .. seealso:: - - `Repeatable Read Isolation Level <http://www.postgresql.org/docs/9.1/static/transaction-iso.html#XACT-REPEATABLE-READ>`_ - Postgresql's implementation of repeatable read, including a description of the error condition. - -Simple Version Counting ------------------------ - -The most straightforward way to track versions is to add an integer column -to the mapped table, then establish it as the ``version_id_col`` within the -mapper options:: - - class User(Base): - __tablename__ = 'user' - - id = Column(Integer, primary_key=True) - version_id = Column(Integer, nullable=False) - name = Column(String(50), nullable=False) - - __mapper_args__ = { - "version_id_col": version_id - } - -Above, the ``User`` mapping tracks integer versions using the column -``version_id``. When an object of type ``User`` is first flushed, the -``version_id`` column will be given a value of "1". Then, an UPDATE -of the table later on will always be emitted in a manner similar to the -following:: - - UPDATE user SET version_id=:version_id, name=:name - WHERE user.id = :user_id AND user.version_id = :user_version_id - {"name": "new name", "version_id": 2, "user_id": 1, "user_version_id": 1} - -The above UPDATE statement is updating the row that not only matches -``user.id = 1``, it also is requiring that ``user.version_id = 1``, where "1" -is the last version identifier we've been known to use on this object. -If a transaction elsewhere has modified the row independently, this version id -will no longer match, and the UPDATE statement will report that no rows matched; -this is the condition that SQLAlchemy tests, that exactly one row matched our -UPDATE (or DELETE) statement. If zero rows match, that indicates our version -of the data is stale, and a :exc:`.StaleDataError` is raised. - -.. _custom_version_counter: - -Custom Version Counters / Types -------------------------------- - -Other kinds of values or counters can be used for versioning. Common types include -dates and GUIDs. When using an alternate type or counter scheme, SQLAlchemy -provides a hook for this scheme using the ``version_id_generator`` argument, -which accepts a version generation callable. This callable is passed the value of the current -known version, and is expected to return the subsequent version. - -For example, if we wanted to track the versioning of our ``User`` class -using a randomly generated GUID, we could do this (note that some backends -support a native GUID type, but we illustrate here using a simple string):: - - import uuid - - class User(Base): - __tablename__ = 'user' - - id = Column(Integer, primary_key=True) - version_uuid = Column(String(32)) - name = Column(String(50), nullable=False) - - __mapper_args__ = { - 'version_id_col':version_uuid, - 'version_id_generator':lambda version: uuid.uuid4().hex - } - -The persistence engine will call upon ``uuid.uuid4()`` each time a -``User`` object is subject to an INSERT or an UPDATE. In this case, our -version generation function can disregard the incoming value of ``version``, -as the ``uuid4()`` function -generates identifiers without any prerequisite value. If we were using -a sequential versioning scheme such as numeric or a special character system, -we could make use of the given ``version`` in order to help determine the -subsequent value. - -.. seealso:: - - :ref:`custom_guid_type` - -.. _server_side_version_counter: - -Server Side Version Counters ----------------------------- - -The ``version_id_generator`` can also be configured to rely upon a value -that is generated by the database. In this case, the database would need -some means of generating new identifiers when a row is subject to an INSERT -as well as with an UPDATE. For the UPDATE case, typically an update trigger -is needed, unless the database in question supports some other native -version identifier. The Postgresql database in particular supports a system -column called `xmin <http://www.postgresql.org/docs/9.1/static/ddl-system-columns.html>`_ -which provides UPDATE versioning. We can make use -of the Postgresql ``xmin`` column to version our ``User`` -class as follows:: - - class User(Base): - __tablename__ = 'user' - - id = Column(Integer, primary_key=True) - name = Column(String(50), nullable=False) - xmin = Column("xmin", Integer, system=True) - - __mapper_args__ = { - 'version_id_col': xmin, - 'version_id_generator': False - } - -With the above mapping, the ORM will rely upon the ``xmin`` column for -automatically providing the new value of the version id counter. - -.. topic:: creating tables that refer to system columns - - In the above scenario, as ``xmin`` is a system column provided by Postgresql, - we use the ``system=True`` argument to mark it as a system-provided - column, omitted from the ``CREATE TABLE`` statement. - - -The ORM typically does not actively fetch the values of database-generated -values when it emits an INSERT or UPDATE, instead leaving these columns as -"expired" and to be fetched when they are next accessed, unless the ``eager_defaults`` -:func:`.mapper` flag is set. However, when a -server side version column is used, the ORM needs to actively fetch the newly -generated value. This is so that the version counter is set up *before* -any concurrent transaction may update it again. This fetching is also -best done simultaneously within the INSERT or UPDATE statement using :term:`RETURNING`, -otherwise if emitting a SELECT statement afterwards, there is still a potential -race condition where the version counter may change before it can be fetched. - -When the target database supports RETURNING, an INSERT statement for our ``User`` class will look -like this:: - - INSERT INTO "user" (name) VALUES (%(name)s) RETURNING "user".id, "user".xmin - {'name': 'ed'} - -Where above, the ORM can acquire any newly generated primary key values along -with server-generated version identifiers in one statement. When the backend -does not support RETURNING, an additional SELECT must be emitted for **every** -INSERT and UPDATE, which is much less efficient, and also introduces the possibility of -missed version counters:: - - INSERT INTO "user" (name) VALUES (%(name)s) - {'name': 'ed'} - - SELECT "user".version_id AS user_version_id FROM "user" where - "user".id = :param_1 - {"param_1": 1} - -It is *strongly recommended* that server side version counters only be used -when absolutely necessary and only on backends that support :term:`RETURNING`, -e.g. Postgresql, Oracle, SQL Server (though SQL Server has -`major caveats <http://blogs.msdn.com/b/sqlprogrammability/archive/2008/07/11/update-with-output-clause-triggers-and-sqlmoreresults.aspx>`_ when triggers are used), Firebird. - -.. versionadded:: 0.9.0 - - Support for server side version identifier tracking. - -Programmatic or Conditional Version Counters ---------------------------------------------- - -When ``version_id_generator`` is set to False, we can also programmatically -(and conditionally) set the version identifier on our object in the same way -we assign any other mapped attribute. Such as if we used our UUID example, but -set ``version_id_generator`` to ``False``, we can set the version identifier -at our choosing:: - - import uuid - - class User(Base): - __tablename__ = 'user' - - id = Column(Integer, primary_key=True) - version_uuid = Column(String(32)) - name = Column(String(50), nullable=False) - - __mapper_args__ = { - 'version_id_col':version_uuid, - 'version_id_generator': False - } - - u1 = User(name='u1', version_uuid=uuid.uuid4()) - - session.add(u1) - - session.commit() - - u1.name = 'u2' - u1.version_uuid = uuid.uuid4() - - session.commit() - -We can update our ``User`` object without incrementing the version counter -as well; the value of the counter will remain unchanged, and the UPDATE -statement will still check against the previous value. This may be useful -for schemes where only certain classes of UPDATE are sensitive to concurrency -issues:: - - # will leave version_uuid unchanged - u1.name = 'u3' - session.commit() - -.. versionadded:: 0.9.0 - - Support for programmatic and conditional version identifier tracking. - - -Class Mapping API -================= - -.. autofunction:: mapper - -.. autofunction:: object_mapper - -.. autofunction:: class_mapper - -.. autofunction:: configure_mappers - -.. autofunction:: clear_mappers - -.. autofunction:: sqlalchemy.orm.util.identity_key - -.. autofunction:: sqlalchemy.orm.util.polymorphic_union - -.. autoclass:: sqlalchemy.orm.mapper.Mapper - :members: +.. toctree:: + :maxdepth: 2 + mapping_styles + scalar_mapping + inheritance + nonstandard_mappings + versioning + mapping_api diff --git a/doc/build/orm/mapping_api.rst b/doc/build/orm/mapping_api.rst new file mode 100644 index 000000000..cd7c379cd --- /dev/null +++ b/doc/build/orm/mapping_api.rst @@ -0,0 +1,22 @@ +.. module:: sqlalchemy.orm + +Class Mapping API +================= + +.. autofunction:: mapper + +.. autofunction:: object_mapper + +.. autofunction:: class_mapper + +.. autofunction:: configure_mappers + +.. autofunction:: clear_mappers + +.. autofunction:: sqlalchemy.orm.util.identity_key + +.. autofunction:: sqlalchemy.orm.util.polymorphic_union + +.. autoclass:: sqlalchemy.orm.mapper.Mapper + :members: + diff --git a/doc/build/orm/mapping_columns.rst b/doc/build/orm/mapping_columns.rst new file mode 100644 index 000000000..b36bfd2f1 --- /dev/null +++ b/doc/build/orm/mapping_columns.rst @@ -0,0 +1,222 @@ +.. module:: sqlalchemy.orm + +Mapping Table Columns +===================== + +The default behavior of :func:`~.orm.mapper` is to assemble all the columns in +the mapped :class:`.Table` into mapped object attributes, each of which are +named according to the name of the column itself (specifically, the ``key`` +attribute of :class:`.Column`). This behavior can be +modified in several ways. + +.. _mapper_column_distinct_names: + +Naming Columns Distinctly from Attribute Names +---------------------------------------------- + +A mapping by default shares the same name for a +:class:`.Column` as that of the mapped attribute - specifically +it matches the :attr:`.Column.key` attribute on :class:`.Column`, which +by default is the same as the :attr:`.Column.name`. + +The name assigned to the Python attribute which maps to +:class:`.Column` can be different from either :attr:`.Column.name` or :attr:`.Column.key` +just by assigning it that way, as we illustrate here in a Declarative mapping:: + + class User(Base): + __tablename__ = 'user' + id = Column('user_id', Integer, primary_key=True) + name = Column('user_name', String(50)) + +Where above ``User.id`` resolves to a column named ``user_id`` +and ``User.name`` resolves to a column named ``user_name``. + +When mapping to an existing table, the :class:`.Column` object +can be referenced directly:: + + class User(Base): + __table__ = user_table + id = user_table.c.user_id + name = user_table.c.user_name + +Or in a classical mapping, placed in the ``properties`` dictionary +with the desired key:: + + mapper(User, user_table, properties={ + 'id': user_table.c.user_id, + 'name': user_table.c.user_name, + }) + +In the next section we'll examine the usage of ``.key`` more closely. + +.. _mapper_automated_reflection_schemes: + +Automating Column Naming Schemes from Reflected Tables +------------------------------------------------------ + +In the previous section :ref:`mapper_column_distinct_names`, we showed how +a :class:`.Column` explicitly mapped to a class can have a different attribute +name than the column. But what if we aren't listing out :class:`.Column` +objects explicitly, and instead are automating the production of :class:`.Table` +objects using reflection (e.g. as described in :ref:`metadata_reflection_toplevel`)? +In this case we can make use of the :meth:`.DDLEvents.column_reflect` event +to intercept the production of :class:`.Column` objects and provide them +with the :attr:`.Column.key` of our choice:: + + @event.listens_for(Table, "column_reflect") + def column_reflect(inspector, table, column_info): + # set column.key = "attr_<lower_case_name>" + column_info['key'] = "attr_%s" % column_info['name'].lower() + +With the above event, the reflection of :class:`.Column` objects will be intercepted +with our event that adds a new ".key" element, such as in a mapping as below:: + + class MyClass(Base): + __table__ = Table("some_table", Base.metadata, + autoload=True, autoload_with=some_engine) + +If we want to qualify our event to only react for the specific :class:`.MetaData` +object above, we can check for it in our event:: + + @event.listens_for(Table, "column_reflect") + def column_reflect(inspector, table, column_info): + if table.metadata is Base.metadata: + # set column.key = "attr_<lower_case_name>" + column_info['key'] = "attr_%s" % column_info['name'].lower() + +.. _column_prefix: + +Naming All Columns with a Prefix +-------------------------------- + +A quick approach to prefix column names, typically when mapping +to an existing :class:`.Table` object, is to use ``column_prefix``:: + + class User(Base): + __table__ = user_table + __mapper_args__ = {'column_prefix':'_'} + +The above will place attribute names such as ``_user_id``, ``_user_name``, +``_password`` etc. on the mapped ``User`` class. + +This approach is uncommon in modern usage. For dealing with reflected +tables, a more flexible approach is to use that described in +:ref:`mapper_automated_reflection_schemes`. + + +Using column_property for column level options +----------------------------------------------- + +Options can be specified when mapping a :class:`.Column` using the +:func:`.column_property` function. This function +explicitly creates the :class:`.ColumnProperty` used by the +:func:`.mapper` to keep track of the :class:`.Column`; normally, the +:func:`.mapper` creates this automatically. Using :func:`.column_property`, +we can pass additional arguments about how we'd like the :class:`.Column` +to be mapped. Below, we pass an option ``active_history``, +which specifies that a change to this column's value should +result in the former value being loaded first:: + + from sqlalchemy.orm import column_property + + class User(Base): + __tablename__ = 'user' + + id = Column(Integer, primary_key=True) + name = column_property(Column(String(50)), active_history=True) + +:func:`.column_property` is also used to map a single attribute to +multiple columns. This use case arises when mapping to a :func:`~.expression.join` +which has attributes which are equated to each other:: + + class User(Base): + __table__ = user.join(address) + + # assign "user.id", "address.user_id" to the + # "id" attribute + id = column_property(user_table.c.id, address_table.c.user_id) + +For more examples featuring this usage, see :ref:`maptojoin`. + +Another place where :func:`.column_property` is needed is to specify SQL expressions as +mapped attributes, such as below where we create an attribute ``fullname`` +that is the string concatenation of the ``firstname`` and ``lastname`` +columns:: + + class User(Base): + __tablename__ = 'user' + id = Column(Integer, primary_key=True) + firstname = Column(String(50)) + lastname = Column(String(50)) + fullname = column_property(firstname + " " + lastname) + +See examples of this usage at :ref:`mapper_sql_expressions`. + +.. autofunction:: column_property + +.. _include_exclude_cols: + +Mapping a Subset of Table Columns +--------------------------------- + +Sometimes, a :class:`.Table` object was made available using the +reflection process described at :ref:`metadata_reflection` to load +the table's structure from the database. +For such a table that has lots of columns that don't need to be referenced +in the application, the ``include_properties`` or ``exclude_properties`` +arguments can specify that only a subset of columns should be mapped. +For example:: + + class User(Base): + __table__ = user_table + __mapper_args__ = { + 'include_properties' :['user_id', 'user_name'] + } + +...will map the ``User`` class to the ``user_table`` table, only including +the ``user_id`` and ``user_name`` columns - the rest are not referenced. +Similarly:: + + class Address(Base): + __table__ = address_table + __mapper_args__ = { + 'exclude_properties' : ['street', 'city', 'state', 'zip'] + } + +...will map the ``Address`` class to the ``address_table`` table, including +all columns present except ``street``, ``city``, ``state``, and ``zip``. + +When this mapping is used, the columns that are not included will not be +referenced in any SELECT statements emitted by :class:`.Query`, nor will there +be any mapped attribute on the mapped class which represents the column; +assigning an attribute of that name will have no effect beyond that of +a normal Python attribute assignment. + +In some cases, multiple columns may have the same name, such as when +mapping to a join of two or more tables that share some column name. +``include_properties`` and ``exclude_properties`` can also accommodate +:class:`.Column` objects to more accurately describe which columns +should be included or excluded:: + + class UserAddress(Base): + __table__ = user_table.join(addresses_table) + __mapper_args__ = { + 'exclude_properties' :[address_table.c.id], + 'primary_key' : [user_table.c.id] + } + +.. note:: + + insert and update defaults configured on individual + :class:`.Column` objects, i.e. those described at :ref:`metadata_defaults` + including those configured by the ``default``, ``update``, + ``server_default`` and ``server_onupdate`` arguments, will continue to + function normally even if those :class:`.Column` objects are not mapped. + This is because in the case of ``default`` and ``update``, the + :class:`.Column` object is still present on the underlying + :class:`.Table`, thus allowing the default functions to take place when + the ORM emits an INSERT or UPDATE, and in the case of ``server_default`` + and ``server_onupdate``, the relational database itself maintains these + functions. + + diff --git a/doc/build/orm/mapping_styles.rst b/doc/build/orm/mapping_styles.rst new file mode 100644 index 000000000..7571ce650 --- /dev/null +++ b/doc/build/orm/mapping_styles.rst @@ -0,0 +1,170 @@ +================= +Types of Mappings +================= + +Modern SQLAlchemy features two distinct styles of mapper configuration. +The "Classical" style is SQLAlchemy's original mapping API, whereas +"Declarative" is the richer and more succinct system that builds on top +of "Classical". Both styles may be used interchangeably, as the end +result of each is exactly the same - a user-defined class mapped by the +:func:`.mapper` function onto a selectable unit, typically a :class:`.Table`. + +Declarative Mapping +=================== + +The *Declarative Mapping* is the typical way that +mappings are constructed in modern SQLAlchemy. +Making use of the :ref:`declarative_toplevel` +system, the components of the user-defined class as well as the +:class:`.Table` metadata to which the class is mapped are defined +at once:: + + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy import Column, Integer, String, ForeignKey + + Base = declarative_base() + + class User(Base): + __tablename__ = 'user' + + id = Column(Integer, primary_key=True) + name = Column(String) + fullname = Column(String) + password = Column(String) + +Above, a basic single-table mapping with four columns. Additional +attributes, such as relationships to other mapped classes, are also +declared inline within the class definition:: + + class User(Base): + __tablename__ = 'user' + + id = Column(Integer, primary_key=True) + name = Column(String) + fullname = Column(String) + password = Column(String) + + addresses = relationship("Address", backref="user", order_by="Address.id") + + class Address(Base): + __tablename__ = 'address' + + id = Column(Integer, primary_key=True) + user_id = Column(ForeignKey('user.id')) + email_address = Column(String) + +The declarative mapping system is introduced in the +:ref:`ormtutorial_toplevel`. For additional details on how this system +works, see :ref:`declarative_toplevel`. + +.. _classical_mapping: + +Classical Mappings +================== + +A *Classical Mapping* refers to the configuration of a mapped class using the +:func:`.mapper` function, without using the Declarative system. This is +SQLAlchemy's original class mapping API, and is still the base mapping +system provided by the ORM. + +In "classical" form, the table metadata is created separately with the +:class:`.Table` construct, then associated with the ``User`` class via +the :func:`.mapper` function:: + + from sqlalchemy import Table, MetaData, Column, Integer, String, ForeignKey + from sqlalchemy.orm import mapper + + metadata = MetaData() + + user = Table('user', metadata, + Column('id', Integer, primary_key=True), + Column('name', String(50)), + Column('fullname', String(50)), + Column('password', String(12)) + ) + + class User(object): + def __init__(self, name, fullname, password): + self.name = name + self.fullname = fullname + self.password = password + + mapper(User, user) + +Information about mapped attributes, such as relationships to other classes, are provided +via the ``properties`` dictionary. The example below illustrates a second :class:`.Table` +object, mapped to a class called ``Address``, then linked to ``User`` via :func:`.relationship`:: + + address = Table('address', metadata, + Column('id', Integer, primary_key=True), + Column('user_id', Integer, ForeignKey('user.id')), + Column('email_address', String(50)) + ) + + mapper(User, user, properties={ + 'addresses' : relationship(Address, backref='user', order_by=address.c.id) + }) + + mapper(Address, address) + +When using classical mappings, classes must be provided directly without the benefit +of the "string lookup" system provided by Declarative. SQL expressions are typically +specified in terms of the :class:`.Table` objects, i.e. ``address.c.id`` above +for the ``Address`` relationship, and not ``Address.id``, as ``Address`` may not +yet be linked to table metadata, nor can we specify a string here. + +Some examples in the documentation still use the classical approach, but note that +the classical as well as Declarative approaches are **fully interchangeable**. Both +systems ultimately create the same configuration, consisting of a :class:`.Table`, +user-defined class, linked together with a :func:`.mapper`. When we talk about +"the behavior of :func:`.mapper`", this includes when using the Declarative system +as well - it's still used, just behind the scenes. + +Runtime Intropsection of Mappings, Objects +========================================== + +The :class:`.Mapper` object is available from any mapped class, regardless +of method, using the :ref:`core_inspection_toplevel` system. Using the +:func:`.inspect` function, one can acquire the :class:`.Mapper` from a +mapped class:: + + >>> from sqlalchemy import inspect + >>> insp = inspect(User) + +Detailed information is available including :attr:`.Mapper.columns`:: + + >>> insp.columns + <sqlalchemy.util._collections.OrderedProperties object at 0x102f407f8> + +This is a namespace that can be viewed in a list format or +via individual names:: + + >>> list(insp.columns) + [Column('id', Integer(), table=<user>, primary_key=True, nullable=False), Column('name', String(length=50), table=<user>), Column('fullname', String(length=50), table=<user>), Column('password', String(length=12), table=<user>)] + >>> insp.columns.name + Column('name', String(length=50), table=<user>) + +Other namespaces include :attr:`.Mapper.all_orm_descriptors`, which includes all mapped +attributes as well as hybrids, association proxies:: + + >>> insp.all_orm_descriptors + <sqlalchemy.util._collections.ImmutableProperties object at 0x1040e2c68> + >>> insp.all_orm_descriptors.keys() + ['fullname', 'password', 'name', 'id'] + +As well as :attr:`.Mapper.column_attrs`:: + + >>> list(insp.column_attrs) + [<ColumnProperty at 0x10403fde0; id>, <ColumnProperty at 0x10403fce8; name>, <ColumnProperty at 0x1040e9050; fullname>, <ColumnProperty at 0x1040e9148; password>] + >>> insp.column_attrs.name + <ColumnProperty at 0x10403fce8; name> + >>> insp.column_attrs.name.expression + Column('name', String(length=50), table=<user>) + +.. seealso:: + + :ref:`core_inspection_toplevel` + + :class:`.Mapper` + + :class:`.InstanceState` diff --git a/doc/build/orm/nonstandard_mappings.rst b/doc/build/orm/nonstandard_mappings.rst new file mode 100644 index 000000000..4645a8029 --- /dev/null +++ b/doc/build/orm/nonstandard_mappings.rst @@ -0,0 +1,168 @@ +======================== +Non-Traditional Mappings +======================== + +.. _maptojoin: + +Mapping a Class against Multiple Tables +======================================== + +Mappers can be constructed against arbitrary relational units (called +*selectables*) in addition to plain tables. For example, the :func:`~.expression.join` +function creates a selectable unit comprised of +multiple tables, complete with its own composite primary key, which can be +mapped in the same way as a :class:`.Table`:: + + from sqlalchemy import Table, Column, Integer, \ + String, MetaData, join, ForeignKey + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy.orm import column_property + + metadata = MetaData() + + # define two Table objects + user_table = Table('user', metadata, + Column('id', Integer, primary_key=True), + Column('name', String), + ) + + address_table = Table('address', metadata, + Column('id', Integer, primary_key=True), + Column('user_id', Integer, ForeignKey('user.id')), + Column('email_address', String) + ) + + # define a join between them. This + # takes place across the user.id and address.user_id + # columns. + user_address_join = join(user_table, address_table) + + Base = declarative_base() + + # map to it + class AddressUser(Base): + __table__ = user_address_join + + id = column_property(user_table.c.id, address_table.c.user_id) + address_id = address_table.c.id + +In the example above, the join expresses columns for both the +``user`` and the ``address`` table. The ``user.id`` and ``address.user_id`` +columns are equated by foreign key, so in the mapping they are defined +as one attribute, ``AddressUser.id``, using :func:`.column_property` to +indicate a specialized column mapping. Based on this part of the +configuration, the mapping will copy +new primary key values from ``user.id`` into the ``address.user_id`` column +when a flush occurs. + +Additionally, the ``address.id`` column is mapped explicitly to +an attribute named ``address_id``. This is to **disambiguate** the +mapping of the ``address.id`` column from the same-named ``AddressUser.id`` +attribute, which here has been assigned to refer to the ``user`` table +combined with the ``address.user_id`` foreign key. + +The natural primary key of the above mapping is the composite of +``(user.id, address.id)``, as these are the primary key columns of the +``user`` and ``address`` table combined together. The identity of an +``AddressUser`` object will be in terms of these two values, and +is represented from an ``AddressUser`` object as +``(AddressUser.id, AddressUser.address_id)``. + + +Mapping a Class against Arbitrary Selects +========================================= + +Similar to mapping against a join, a plain :func:`~.expression.select` object can be used with a +mapper as well. The example fragment below illustrates mapping a class +called ``Customer`` to a :func:`~.expression.select` which includes a join to a +subquery:: + + from sqlalchemy import select, func + + subq = select([ + func.count(orders.c.id).label('order_count'), + func.max(orders.c.price).label('highest_order'), + orders.c.customer_id + ]).group_by(orders.c.customer_id).alias() + + customer_select = select([customers, subq]).\ + select_from( + join(customers, subq, + customers.c.id == subq.c.customer_id) + ).alias() + + class Customer(Base): + __table__ = customer_select + +Above, the full row represented by ``customer_select`` will be all the +columns of the ``customers`` table, in addition to those columns +exposed by the ``subq`` subquery, which are ``order_count``, +``highest_order``, and ``customer_id``. Mapping the ``Customer`` +class to this selectable then creates a class which will contain +those attributes. + +When the ORM persists new instances of ``Customer``, only the +``customers`` table will actually receive an INSERT. This is because the +primary key of the ``orders`` table is not represented in the mapping; the ORM +will only emit an INSERT into a table for which it has mapped the primary +key. + +.. note:: + + The practice of mapping to arbitrary SELECT statements, especially + complex ones as above, is + almost never needed; it necessarily tends to produce complex queries + which are often less efficient than that which would be produced + by direct query construction. The practice is to some degree + based on the very early history of SQLAlchemy where the :func:`.mapper` + construct was meant to represent the primary querying interface; + in modern usage, the :class:`.Query` object can be used to construct + virtually any SELECT statement, including complex composites, and should + be favored over the "map-to-selectable" approach. + +Multiple Mappers for One Class +============================== + +In modern SQLAlchemy, a particular class is mapped by only one so-called +**primary** mapper at a time. This mapper is involved in three main +areas of functionality: querying, persistence, and instrumentation of the +mapped class. The rationale of the primary mapper relates to the fact +that the :func:`.mapper` modifies the class itself, not only +persisting it towards a particular :class:`.Table`, but also :term:`instrumenting` +attributes upon the class which are structured specifically according to the +table metadata. It's not possible for more than one mapper +to be associated with a class in equal measure, since only one mapper can +actually instrument the class. + +However, there is a class of mapper known as the **non primary** mapper +with allows additional mappers to be associated with a class, but with +a limited scope of use. This scope typically applies to +being able to load rows from an alternate table or selectable unit, but +still producing classes which are ultimately persisted using the primary +mapping. The non-primary mapper is created using the classical style +of mapping against a class that is already mapped with a primary mapper, +and involves the use of the :paramref:`~sqlalchemy.orm.mapper.non_primary` +flag. + +The non primary mapper is of very limited use in modern SQLAlchemy, as the +task of being able to load classes from subqueries or other compound statements +can be now accomplished using the :class:`.Query` object directly. + +There is really only one use case for the non-primary mapper, which is that +we wish to build a :func:`.relationship` to such a mapper; this is useful +in the rare and advanced case that our relationship is attempting to join two +classes together using many tables and/or joins in between. An example of this +pattern is at :ref:`relationship_non_primary_mapper`. + +As far as the use case of a class that can actually be fully persisted +to different tables under different scenarios, very early versions of +SQLAlchemy offered a feature for this adapted from Hibernate, known +as the "entity name" feature. However, this use case became infeasable +within SQLAlchemy once the mapped class itself became the source of SQL +expression construction; that is, the class' attributes themselves link +directly to mapped table columns. The feature was removed and replaced +with a simple recipe-oriented approach to accomplishing this task +without any ambiguity of instrumentation - to create new subclasses, each +mapped individually. This pattern is now available as a recipe at `Entity Name +<http://www.sqlalchemy.org/trac/wiki/UsageRecipes/EntityName>`_. + diff --git a/doc/build/orm/persistence_techniques.rst b/doc/build/orm/persistence_techniques.rst new file mode 100644 index 000000000..aee48121d --- /dev/null +++ b/doc/build/orm/persistence_techniques.rst @@ -0,0 +1,301 @@ +================================= +Additional Persistence Techniques +================================= + +.. _flush_embedded_sql_expressions: + +Embedding SQL Insert/Update Expressions into a Flush +===================================================== + +This feature allows the value of a database column to be set to a SQL +expression instead of a literal value. It's especially useful for atomic +updates, calling stored procedures, etc. All you do is assign an expression to +an attribute:: + + class SomeClass(object): + pass + mapper(SomeClass, some_table) + + someobject = session.query(SomeClass).get(5) + + # set 'value' attribute to a SQL expression adding one + someobject.value = some_table.c.value + 1 + + # issues "UPDATE some_table SET value=value+1" + session.commit() + +This technique works both for INSERT and UPDATE statements. After the +flush/commit operation, the ``value`` attribute on ``someobject`` above is +expired, so that when next accessed the newly generated value will be loaded +from the database. + +.. _session_sql_expressions: + +Using SQL Expressions with Sessions +==================================== + +SQL expressions and strings can be executed via the +:class:`~sqlalchemy.orm.session.Session` within its transactional context. +This is most easily accomplished using the +:meth:`~.Session.execute` method, which returns a +:class:`~sqlalchemy.engine.ResultProxy` in the same manner as an +:class:`~sqlalchemy.engine.Engine` or +:class:`~sqlalchemy.engine.Connection`:: + + Session = sessionmaker(bind=engine) + session = Session() + + # execute a string statement + result = session.execute("select * from table where id=:id", {'id':7}) + + # execute a SQL expression construct + result = session.execute(select([mytable]).where(mytable.c.id==7)) + +The current :class:`~sqlalchemy.engine.Connection` held by the +:class:`~sqlalchemy.orm.session.Session` is accessible using the +:meth:`~.Session.connection` method:: + + connection = session.connection() + +The examples above deal with a :class:`~sqlalchemy.orm.session.Session` that's +bound to a single :class:`~sqlalchemy.engine.Engine` or +:class:`~sqlalchemy.engine.Connection`. To execute statements using a +:class:`~sqlalchemy.orm.session.Session` which is bound either to multiple +engines, or none at all (i.e. relies upon bound metadata), both +:meth:`~.Session.execute` and +:meth:`~.Session.connection` accept a ``mapper`` keyword +argument, which is passed a mapped class or +:class:`~sqlalchemy.orm.mapper.Mapper` instance, which is used to locate the +proper context for the desired engine:: + + Session = sessionmaker() + session = Session() + + # need to specify mapper or class when executing + result = session.execute("select * from table where id=:id", {'id':7}, mapper=MyMappedClass) + + result = session.execute(select([mytable], mytable.c.id==7), mapper=MyMappedClass) + + connection = session.connection(MyMappedClass) + +.. _session_partitioning: + +Partitioning Strategies +======================= + +Simple Vertical Partitioning +---------------------------- + +Vertical partitioning places different kinds of objects, or different tables, +across multiple databases:: + + engine1 = create_engine('postgresql://db1') + engine2 = create_engine('postgresql://db2') + + Session = sessionmaker(twophase=True) + + # bind User operations to engine 1, Account operations to engine 2 + Session.configure(binds={User:engine1, Account:engine2}) + + session = Session() + +Above, operations against either class will make usage of the :class:`.Engine` +linked to that class. Upon a flush operation, similar rules take place +to ensure each class is written to the right database. + +The transactions among the multiple databases can optionally be coordinated +via two phase commit, if the underlying backend supports it. See +:ref:`session_twophase` for an example. + +Custom Vertical Partitioning +---------------------------- + +More comprehensive rule-based class-level partitioning can be built by +overriding the :meth:`.Session.get_bind` method. Below we illustrate +a custom :class:`.Session` which delivers the following rules: + +1. Flush operations are delivered to the engine named ``master``. + +2. Operations on objects that subclass ``MyOtherClass`` all + occur on the ``other`` engine. + +3. Read operations for all other classes occur on a random + choice of the ``slave1`` or ``slave2`` database. + +:: + + engines = { + 'master':create_engine("sqlite:///master.db"), + 'other':create_engine("sqlite:///other.db"), + 'slave1':create_engine("sqlite:///slave1.db"), + 'slave2':create_engine("sqlite:///slave2.db"), + } + + from sqlalchemy.orm import Session, sessionmaker + import random + + class RoutingSession(Session): + def get_bind(self, mapper=None, clause=None): + if mapper and issubclass(mapper.class_, MyOtherClass): + return engines['other'] + elif self._flushing: + return engines['master'] + else: + return engines[ + random.choice(['slave1','slave2']) + ] + +The above :class:`.Session` class is plugged in using the ``class_`` +argument to :class:`.sessionmaker`:: + + Session = sessionmaker(class_=RoutingSession) + +This approach can be combined with multiple :class:`.MetaData` objects, +using an approach such as that of using the declarative ``__abstract__`` +keyword, described at :ref:`declarative_abstract`. + +Horizontal Partitioning +----------------------- + +Horizontal partitioning partitions the rows of a single table (or a set of +tables) across multiple databases. + +See the "sharding" example: :ref:`examples_sharding`. + +.. _bulk_operations: + +Bulk Operations +=============== + +.. note:: Bulk Operations mode is a new series of operations made available + on the :class:`.Session` object for the purpose of invoking INSERT and + UPDATE statements with greatly reduced Python overhead, at the expense + of much less functionality, automation, and error checking. + As of SQLAlchemy 1.0, these features should be considered as "beta", and + additionally are intended for advanced users. + +.. versionadded:: 1.0.0 + +Bulk operations on the :class:`.Session` include :meth:`.Session.bulk_save_objects`, +:meth:`.Session.bulk_insert_mappings`, and :meth:`.Session.bulk_update_mappings`. +The purpose of these methods is to directly expose internal elements of the unit of work system, +such that facilities for emitting INSERT and UPDATE statements given dictionaries +or object states can be utilized alone, bypassing the normal unit of work +mechanics of state, relationship and attribute management. The advantages +to this approach is strictly one of reduced Python overhead: + +* The flush() process, including the survey of all objects, their state, + their cascade status, the status of all objects associated with them + via :func:`.relationship`, and the topological sort of all operations to + be performed is completely bypassed. This reduces a great amount of + Python overhead. + +* The objects as given have no defined relationship to the target + :class:`.Session`, even when the operation is complete, meaning there's no + overhead in attaching them or managing their state in terms of the identity + map or session. + +* The :meth:`.Session.bulk_insert_mappings` and :meth:`.Session.bulk_update_mappings` + methods accept lists of plain Python dictionaries, not objects; this further + reduces a large amount of overhead associated with instantiating mapped + objects and assigning state to them, which normally is also subject to + expensive tracking of history on a per-attribute basis. + +* The process of fetching primary keys after an INSERT also is disabled by + default. When performed correctly, INSERT statements can now more readily + be batched by the unit of work process into ``executemany()`` blocks, which + perform vastly better than individual statement invocations. + +* UPDATE statements can similarly be tailored such that all attributes + are subject to the SET clase unconditionally, again making it much more + likely that ``executemany()`` blocks can be used. + +The performance behavior of the bulk routines should be studied using the +:ref:`examples_performance` example suite. This is a series of example +scripts which illustrate Python call-counts across a variety of scenarios, +including bulk insert and update scenarios. + +.. seealso:: + + :ref:`examples_performance` - includes detailed examples of bulk operations + contrasted against traditional Core and ORM methods, including performance + metrics. + +Usage +----- + +The methods each work in the context of the :class:`.Session` object's +transaction, like any other:: + + s = Session() + objects = [ + User(name="u1"), + User(name="u2"), + User(name="u3") + ] + s.bulk_save_objects(objects) + +For :meth:`.Session.bulk_insert_mappings`, and :meth:`.Session.bulk_update_mappings`, +dictionaries are passed:: + + s.bulk_insert_mappings(User, + [dict(name="u1"), dict(name="u2"), dict(name="u3")] + ) + +.. seealso:: + + :meth:`.Session.bulk_save_objects` + + :meth:`.Session.bulk_insert_mappings` + + :meth:`.Session.bulk_update_mappings` + + +Comparison to Core Insert / Update Constructs +--------------------------------------------- + +The bulk methods offer performance that under particular circumstances +can be close to that of using the core :class:`.Insert` and +:class:`.Update` constructs in an "executemany" context (for a description +of "executemany", see :ref:`execute_multiple` in the Core tutorial). +In order to achieve this, the +:paramref:`.Session.bulk_insert_mappings.return_defaults` +flag should be disabled so that rows can be batched together. The example +suite in :ref:`examples_performance` should be carefully studied in order +to gain familiarity with how fast bulk performance can be achieved. + +ORM Compatibility +----------------- + +The bulk insert / update methods lose a significant amount of functionality +versus traditional ORM use. The following is a listing of features that +are **not available** when using these methods: + +* persistence along :func:`.relationship` linkages + +* sorting of rows within order of dependency; rows are inserted or updated + directly in the order in which they are passed to the methods + +* Session-management on the given objects, including attachment to the + session, identity map management. + +* Functionality related to primary key mutation, ON UPDATE cascade + +* SQL expression inserts / updates (e.g. :ref:`flush_embedded_sql_expressions`) + +* ORM events such as :meth:`.MapperEvents.before_insert`, etc. The bulk + session methods have no event support. + +Features that **are available** include: + +* INSERTs and UPDATEs of mapped objects + +* Version identifier support + +* Multi-table mappings, such as joined-inheritance - however, an object + to be inserted across multiple tables either needs to have primary key + identifiers fully populated ahead of time, else the + :paramref:`.Session.bulk_save_objects.return_defaults` flag must be used, + which will greatly reduce the performance benefits + + diff --git a/doc/build/orm/query.rst b/doc/build/orm/query.rst index 5e31d710f..1517cb997 100644 --- a/doc/build/orm/query.rst +++ b/doc/build/orm/query.rst @@ -1,15 +1,9 @@ .. _query_api_toplevel: - -Querying -======== - -This section provides API documentation for the :class:`.Query` object and related constructs. - -For an in-depth introduction to querying with the SQLAlchemy ORM, please see the :ref:`ormtutorial_toplevel`. - - .. module:: sqlalchemy.orm +Query API +========= + The Query Object ---------------- diff --git a/doc/build/orm/relationship_api.rst b/doc/build/orm/relationship_api.rst new file mode 100644 index 000000000..03045f698 --- /dev/null +++ b/doc/build/orm/relationship_api.rst @@ -0,0 +1,19 @@ +.. automodule:: sqlalchemy.orm + +Relationships API +----------------- + +.. autofunction:: relationship + +.. autofunction:: backref + +.. autofunction:: relation + +.. autofunction:: dynamic_loader + +.. autofunction:: foreign + +.. autofunction:: remote + + + diff --git a/doc/build/orm/relationship_persistence.rst b/doc/build/orm/relationship_persistence.rst new file mode 100644 index 000000000..6d2ba7882 --- /dev/null +++ b/doc/build/orm/relationship_persistence.rst @@ -0,0 +1,229 @@ +Special Relationship Persistence Patterns +========================================= + +.. _post_update: + +Rows that point to themselves / Mutually Dependent Rows +------------------------------------------------------- + +This is a very specific case where relationship() must perform an INSERT and a +second UPDATE in order to properly populate a row (and vice versa an UPDATE +and DELETE in order to delete without violating foreign key constraints). The +two use cases are: + +* A table contains a foreign key to itself, and a single row will + have a foreign key value pointing to its own primary key. +* Two tables each contain a foreign key referencing the other + table, with a row in each table referencing the other. + +For example:: + + user + --------------------------------- + user_id name related_user_id + 1 'ed' 1 + +Or:: + + widget entry + ------------------------------------------- --------------------------------- + widget_id name favorite_entry_id entry_id name widget_id + 1 'somewidget' 5 5 'someentry' 1 + +In the first case, a row points to itself. Technically, a database that uses +sequences such as PostgreSQL or Oracle can INSERT the row at once using a +previously generated value, but databases which rely upon autoincrement-style +primary key identifiers cannot. The :func:`~sqlalchemy.orm.relationship` +always assumes a "parent/child" model of row population during flush, so +unless you are populating the primary key/foreign key columns directly, +:func:`~sqlalchemy.orm.relationship` needs to use two statements. + +In the second case, the "widget" row must be inserted before any referring +"entry" rows, but then the "favorite_entry_id" column of that "widget" row +cannot be set until the "entry" rows have been generated. In this case, it's +typically impossible to insert the "widget" and "entry" rows using just two +INSERT statements; an UPDATE must be performed in order to keep foreign key +constraints fulfilled. The exception is if the foreign keys are configured as +"deferred until commit" (a feature some databases support) and if the +identifiers were populated manually (again essentially bypassing +:func:`~sqlalchemy.orm.relationship`). + +To enable the usage of a supplementary UPDATE statement, +we use the :paramref:`~.relationship.post_update` option +of :func:`.relationship`. This specifies that the linkage between the +two rows should be created using an UPDATE statement after both rows +have been INSERTED; it also causes the rows to be de-associated with +each other via UPDATE before a DELETE is emitted. The flag should +be placed on just *one* of the relationships, preferably the +many-to-one side. Below we illustrate +a complete example, including two :class:`.ForeignKey` constructs, one which +specifies :paramref:`~.ForeignKey.use_alter` to help with emitting CREATE TABLE statements:: + + from sqlalchemy import Integer, ForeignKey, Column + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy.orm import relationship + + Base = declarative_base() + + class Entry(Base): + __tablename__ = 'entry' + entry_id = Column(Integer, primary_key=True) + widget_id = Column(Integer, ForeignKey('widget.widget_id')) + name = Column(String(50)) + + class Widget(Base): + __tablename__ = 'widget' + + widget_id = Column(Integer, primary_key=True) + favorite_entry_id = Column(Integer, + ForeignKey('entry.entry_id', + use_alter=True, + name="fk_favorite_entry")) + name = Column(String(50)) + + entries = relationship(Entry, primaryjoin= + widget_id==Entry.widget_id) + favorite_entry = relationship(Entry, + primaryjoin= + favorite_entry_id==Entry.entry_id, + post_update=True) + +When a structure against the above configuration is flushed, the "widget" row will be +INSERTed minus the "favorite_entry_id" value, then all the "entry" rows will +be INSERTed referencing the parent "widget" row, and then an UPDATE statement +will populate the "favorite_entry_id" column of the "widget" table (it's one +row at a time for the time being): + +.. sourcecode:: pycon+sql + + >>> w1 = Widget(name='somewidget') + >>> e1 = Entry(name='someentry') + >>> w1.favorite_entry = e1 + >>> w1.entries = [e1] + >>> session.add_all([w1, e1]) + {sql}>>> session.commit() + BEGIN (implicit) + INSERT INTO widget (favorite_entry_id, name) VALUES (?, ?) + (None, 'somewidget') + INSERT INTO entry (widget_id, name) VALUES (?, ?) + (1, 'someentry') + UPDATE widget SET favorite_entry_id=? WHERE widget.widget_id = ? + (1, 1) + COMMIT + +An additional configuration we can specify is to supply a more +comprehensive foreign key constraint on ``Widget``, such that +it's guaranteed that ``favorite_entry_id`` refers to an ``Entry`` +that also refers to this ``Widget``. We can use a composite foreign key, +as illustrated below:: + + from sqlalchemy import Integer, ForeignKey, String, \ + Column, UniqueConstraint, ForeignKeyConstraint + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy.orm import relationship + + Base = declarative_base() + + class Entry(Base): + __tablename__ = 'entry' + entry_id = Column(Integer, primary_key=True) + widget_id = Column(Integer, ForeignKey('widget.widget_id')) + name = Column(String(50)) + __table_args__ = ( + UniqueConstraint("entry_id", "widget_id"), + ) + + class Widget(Base): + __tablename__ = 'widget' + + widget_id = Column(Integer, autoincrement='ignore_fk', primary_key=True) + favorite_entry_id = Column(Integer) + + name = Column(String(50)) + + __table_args__ = ( + ForeignKeyConstraint( + ["widget_id", "favorite_entry_id"], + ["entry.widget_id", "entry.entry_id"], + name="fk_favorite_entry", use_alter=True + ), + ) + + entries = relationship(Entry, primaryjoin= + widget_id==Entry.widget_id, + foreign_keys=Entry.widget_id) + favorite_entry = relationship(Entry, + primaryjoin= + favorite_entry_id==Entry.entry_id, + foreign_keys=favorite_entry_id, + post_update=True) + +The above mapping features a composite :class:`.ForeignKeyConstraint` +bridging the ``widget_id`` and ``favorite_entry_id`` columns. To ensure +that ``Widget.widget_id`` remains an "autoincrementing" column we specify +:paramref:`~.Column.autoincrement` to the value ``"ignore_fk"`` +on :class:`.Column`, and additionally on each +:func:`.relationship` we must limit those columns considered as part of +the foreign key for the purposes of joining and cross-population. + +.. _passive_updates: + +Mutable Primary Keys / Update Cascades +--------------------------------------- + +When the primary key of an entity changes, related items +which reference the primary key must also be updated as +well. For databases which enforce referential integrity, +it's required to use the database's ON UPDATE CASCADE +functionality in order to propagate primary key changes +to referenced foreign keys - the values cannot be out +of sync for any moment. + +For databases that don't support this, such as SQLite and +MySQL without their referential integrity options turned +on, the :paramref:`~.relationship.passive_updates` flag can +be set to ``False``, most preferably on a one-to-many or +many-to-many :func:`.relationship`, which instructs +SQLAlchemy to issue UPDATE statements individually for +objects referenced in the collection, loading them into +memory if not already locally present. The +:paramref:`~.relationship.passive_updates` flag can also be ``False`` in +conjunction with ON UPDATE CASCADE functionality, +although in that case the unit of work will be issuing +extra SELECT and UPDATE statements unnecessarily. + +A typical mutable primary key setup might look like:: + + class User(Base): + __tablename__ = 'user' + + username = Column(String(50), primary_key=True) + fullname = Column(String(100)) + + # passive_updates=False *only* needed if the database + # does not implement ON UPDATE CASCADE + addresses = relationship("Address", passive_updates=False) + + class Address(Base): + __tablename__ = 'address' + + email = Column(String(50), primary_key=True) + username = Column(String(50), + ForeignKey('user.username', onupdate="cascade") + ) + +:paramref:`~.relationship.passive_updates` is set to ``True`` by default, +indicating that ON UPDATE CASCADE is expected to be in +place in the usual case for foreign keys that expect +to have a mutating parent key. + +A :paramref:`~.relationship.passive_updates` setting of False may be configured on any +direction of relationship, i.e. one-to-many, many-to-one, +and many-to-many, although it is much more effective when +placed just on the one-to-many or many-to-many side. +Configuring the :paramref:`~.relationship.passive_updates` +to False only on the +many-to-one side will have only a partial effect, as the +unit of work searches only through the current identity +map for objects that may be referencing the one with a +mutating primary key, not throughout the database. diff --git a/doc/build/orm/relationships.rst b/doc/build/orm/relationships.rst index f512251a7..f5cbac87e 100644 --- a/doc/build/orm/relationships.rst +++ b/doc/build/orm/relationships.rst @@ -6,1841 +6,17 @@ Relationship Configuration ========================== This section describes the :func:`relationship` function and in depth discussion -of its usage. The reference material here continues into the next section, -:ref:`collections_toplevel`, which has additional detail on configuration -of collections via :func:`relationship`. - -.. _relationship_patterns: - -Basic Relational Patterns --------------------------- - -A quick walkthrough of the basic relational patterns. - -The imports used for each of the following sections is as follows:: - - from sqlalchemy import Table, Column, Integer, ForeignKey - from sqlalchemy.orm import relationship, backref - from sqlalchemy.ext.declarative import declarative_base - - Base = declarative_base() - - -One To Many -~~~~~~~~~~~~ - -A one to many relationship places a foreign key on the child table referencing -the parent. :func:`.relationship` is then specified on the parent, as referencing -a collection of items represented by the child:: - - class Parent(Base): - __tablename__ = 'parent' - id = Column(Integer, primary_key=True) - children = relationship("Child") - - class Child(Base): - __tablename__ = 'child' - id = Column(Integer, primary_key=True) - parent_id = Column(Integer, ForeignKey('parent.id')) - -To establish a bidirectional relationship in one-to-many, where the "reverse" -side is a many to one, specify the :paramref:`~.relationship.backref` option:: - - class Parent(Base): - __tablename__ = 'parent' - id = Column(Integer, primary_key=True) - children = relationship("Child", backref="parent") - - class Child(Base): - __tablename__ = 'child' - id = Column(Integer, primary_key=True) - parent_id = Column(Integer, ForeignKey('parent.id')) - -``Child`` will get a ``parent`` attribute with many-to-one semantics. - -Many To One -~~~~~~~~~~~~ - -Many to one places a foreign key in the parent table referencing the child. -:func:`.relationship` is declared on the parent, where a new scalar-holding -attribute will be created:: - - class Parent(Base): - __tablename__ = 'parent' - id = Column(Integer, primary_key=True) - child_id = Column(Integer, ForeignKey('child.id')) - child = relationship("Child") - - class Child(Base): - __tablename__ = 'child' - id = Column(Integer, primary_key=True) - -Bidirectional behavior is achieved by setting -:paramref:`~.relationship.backref` to the value ``"parents"``, which -will place a one-to-many collection on the ``Child`` class:: - - class Parent(Base): - __tablename__ = 'parent' - id = Column(Integer, primary_key=True) - child_id = Column(Integer, ForeignKey('child.id')) - child = relationship("Child", backref="parents") - -.. _relationships_one_to_one: - -One To One -~~~~~~~~~~~ - -One To One is essentially a bidirectional relationship with a scalar -attribute on both sides. To achieve this, the :paramref:`~.relationship.uselist` flag indicates -the placement of a scalar attribute instead of a collection on the "many" side -of the relationship. To convert one-to-many into one-to-one:: - - class Parent(Base): - __tablename__ = 'parent' - id = Column(Integer, primary_key=True) - child = relationship("Child", uselist=False, backref="parent") - - class Child(Base): - __tablename__ = 'child' - id = Column(Integer, primary_key=True) - parent_id = Column(Integer, ForeignKey('parent.id')) - -Or to turn a one-to-many backref into one-to-one, use the :func:`.backref` function -to provide arguments for the reverse side:: - - class Parent(Base): - __tablename__ = 'parent' - id = Column(Integer, primary_key=True) - child_id = Column(Integer, ForeignKey('child.id')) - child = relationship("Child", backref=backref("parent", uselist=False)) - - class Child(Base): - __tablename__ = 'child' - id = Column(Integer, primary_key=True) - -.. _relationships_many_to_many: - -Many To Many -~~~~~~~~~~~~~ - -Many to Many adds an association table between two classes. The association -table is indicated by the :paramref:`~.relationship.secondary` argument to -:func:`.relationship`. Usually, the :class:`.Table` uses the :class:`.MetaData` -object associated with the declarative base class, so that the :class:`.ForeignKey` -directives can locate the remote tables with which to link:: - - association_table = Table('association', Base.metadata, - Column('left_id', Integer, ForeignKey('left.id')), - Column('right_id', Integer, ForeignKey('right.id')) - ) - - class Parent(Base): - __tablename__ = 'left' - id = Column(Integer, primary_key=True) - children = relationship("Child", - secondary=association_table) - - class Child(Base): - __tablename__ = 'right' - id = Column(Integer, primary_key=True) - -For a bidirectional relationship, both sides of the relationship contain a -collection. The :paramref:`~.relationship.backref` keyword will automatically use -the same :paramref:`~.relationship.secondary` argument for the reverse relationship:: - - association_table = Table('association', Base.metadata, - Column('left_id', Integer, ForeignKey('left.id')), - Column('right_id', Integer, ForeignKey('right.id')) - ) - - class Parent(Base): - __tablename__ = 'left' - id = Column(Integer, primary_key=True) - children = relationship("Child", - secondary=association_table, - backref="parents") - - class Child(Base): - __tablename__ = 'right' - id = Column(Integer, primary_key=True) - -The :paramref:`~.relationship.secondary` argument of :func:`.relationship` also accepts a callable -that returns the ultimate argument, which is evaluated only when mappers are -first used. Using this, we can define the ``association_table`` at a later -point, as long as it's available to the callable after all module initialization -is complete:: - - class Parent(Base): - __tablename__ = 'left' - id = Column(Integer, primary_key=True) - children = relationship("Child", - secondary=lambda: association_table, - backref="parents") - -With the declarative extension in use, the traditional "string name of the table" -is accepted as well, matching the name of the table as stored in ``Base.metadata.tables``:: - - class Parent(Base): - __tablename__ = 'left' - id = Column(Integer, primary_key=True) - children = relationship("Child", - secondary="association", - backref="parents") - -.. _relationships_many_to_many_deletion: - -Deleting Rows from the Many to Many Table -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -A behavior which is unique to the :paramref:`~.relationship.secondary` argument to :func:`.relationship` -is that the :class:`.Table` which is specified here is automatically subject -to INSERT and DELETE statements, as objects are added or removed from the collection. -There is **no need to delete from this table manually**. The act of removing a -record from the collection will have the effect of the row being deleted on flush:: - - # row will be deleted from the "secondary" table - # automatically - myparent.children.remove(somechild) - -A question which often arises is how the row in the "secondary" table can be deleted -when the child object is handed directly to :meth:`.Session.delete`:: - - session.delete(somechild) - -There are several possibilities here: - -* If there is a :func:`.relationship` from ``Parent`` to ``Child``, but there is - **not** a reverse-relationship that links a particular ``Child`` to each ``Parent``, - SQLAlchemy will not have any awareness that when deleting this particular - ``Child`` object, it needs to maintain the "secondary" table that links it to - the ``Parent``. No delete of the "secondary" table will occur. -* If there is a relationship that links a particular ``Child`` to each ``Parent``, - suppose it's called ``Child.parents``, SQLAlchemy by default will load in - the ``Child.parents`` collection to locate all ``Parent`` objects, and remove - each row from the "secondary" table which establishes this link. Note that - this relationship does not need to be bidrectional; SQLAlchemy is strictly - looking at every :func:`.relationship` associated with the ``Child`` object - being deleted. -* A higher performing option here is to use ON DELETE CASCADE directives - with the foreign keys used by the database. Assuming the database supports - this feature, the database itself can be made to automatically delete rows in the - "secondary" table as referencing rows in "child" are deleted. SQLAlchemy - can be instructed to forego actively loading in the ``Child.parents`` - collection in this case using the :paramref:`~.relationship.passive_deletes` - directive on :func:`.relationship`; see :ref:`passive_deletes` for more details - on this. - -Note again, these behaviors are *only* relevant to the :paramref:`~.relationship.secondary` option -used with :func:`.relationship`. If dealing with association tables that -are mapped explicitly and are *not* present in the :paramref:`~.relationship.secondary` option -of a relevant :func:`.relationship`, cascade rules can be used instead -to automatically delete entities in reaction to a related entity being -deleted - see :ref:`unitofwork_cascades` for information on this feature. - - -.. _association_pattern: - -Association Object -~~~~~~~~~~~~~~~~~~ - -The association object pattern is a variant on many-to-many: it's used -when your association table contains additional columns beyond those -which are foreign keys to the left and right tables. Instead of using -the :paramref:`~.relationship.secondary` argument, you map a new class -directly to the association table. The left side of the relationship -references the association object via one-to-many, and the association -class references the right side via many-to-one. Below we illustrate -an association table mapped to the ``Association`` class which -includes a column called ``extra_data``, which is a string value that -is stored along with each association between ``Parent`` and -``Child``:: - - class Association(Base): - __tablename__ = 'association' - left_id = Column(Integer, ForeignKey('left.id'), primary_key=True) - right_id = Column(Integer, ForeignKey('right.id'), primary_key=True) - extra_data = Column(String(50)) - child = relationship("Child") - - class Parent(Base): - __tablename__ = 'left' - id = Column(Integer, primary_key=True) - children = relationship("Association") - - class Child(Base): - __tablename__ = 'right' - id = Column(Integer, primary_key=True) - -The bidirectional version adds backrefs to both relationships:: - - class Association(Base): - __tablename__ = 'association' - left_id = Column(Integer, ForeignKey('left.id'), primary_key=True) - right_id = Column(Integer, ForeignKey('right.id'), primary_key=True) - extra_data = Column(String(50)) - child = relationship("Child", backref="parent_assocs") - - class Parent(Base): - __tablename__ = 'left' - id = Column(Integer, primary_key=True) - children = relationship("Association", backref="parent") - - class Child(Base): - __tablename__ = 'right' - id = Column(Integer, primary_key=True) - -Working with the association pattern in its direct form requires that child -objects are associated with an association instance before being appended to -the parent; similarly, access from parent to child goes through the -association object:: - - # create parent, append a child via association - p = Parent() - a = Association(extra_data="some data") - a.child = Child() - p.children.append(a) - - # iterate through child objects via association, including association - # attributes - for assoc in p.children: - print assoc.extra_data - print assoc.child - -To enhance the association object pattern such that direct -access to the ``Association`` object is optional, SQLAlchemy -provides the :ref:`associationproxy_toplevel` extension. This -extension allows the configuration of attributes which will -access two "hops" with a single access, one "hop" to the -associated object, and a second to a target attribute. - -.. note:: - - When using the association object pattern, it is advisable that the - association-mapped table not be used as the - :paramref:`~.relationship.secondary` argument on a - :func:`.relationship` elsewhere, unless that :func:`.relationship` - contains the option :paramref:`~.relationship.viewonly` set to - ``True``. SQLAlchemy otherwise may attempt to emit redundant INSERT - and DELETE statements on the same table, if similar state is - detected on the related attribute as well as the associated object. - -.. _self_referential: - -Adjacency List Relationships ------------------------------ - -The **adjacency list** pattern is a common relational pattern whereby a table -contains a foreign key reference to itself. This is the most common -way to represent hierarchical data in flat tables. Other methods -include **nested sets**, sometimes called "modified preorder", -as well as **materialized path**. Despite the appeal that modified preorder -has when evaluated for its fluency within SQL queries, the adjacency list model is -probably the most appropriate pattern for the large majority of hierarchical -storage needs, for reasons of concurrency, reduced complexity, and that -modified preorder has little advantage over an application which can fully -load subtrees into the application space. - -In this example, we'll work with a single mapped -class called ``Node``, representing a tree structure:: - - class Node(Base): - __tablename__ = 'node' - id = Column(Integer, primary_key=True) - parent_id = Column(Integer, ForeignKey('node.id')) - data = Column(String(50)) - children = relationship("Node") - -With this structure, a graph such as the following:: - - root --+---> child1 - +---> child2 --+--> subchild1 - | +--> subchild2 - +---> child3 - -Would be represented with data such as:: - - id parent_id data - --- ------- ---- - 1 NULL root - 2 1 child1 - 3 1 child2 - 4 3 subchild1 - 5 3 subchild2 - 6 1 child3 - -The :func:`.relationship` configuration here works in the -same way as a "normal" one-to-many relationship, with the -exception that the "direction", i.e. whether the relationship -is one-to-many or many-to-one, is assumed by default to -be one-to-many. To establish the relationship as many-to-one, -an extra directive is added known as :paramref:`~.relationship.remote_side`, which -is a :class:`.Column` or collection of :class:`.Column` objects -that indicate those which should be considered to be "remote":: - - class Node(Base): - __tablename__ = 'node' - id = Column(Integer, primary_key=True) - parent_id = Column(Integer, ForeignKey('node.id')) - data = Column(String(50)) - parent = relationship("Node", remote_side=[id]) - -Where above, the ``id`` column is applied as the :paramref:`~.relationship.remote_side` -of the ``parent`` :func:`.relationship`, thus establishing -``parent_id`` as the "local" side, and the relationship -then behaves as a many-to-one. - -As always, both directions can be combined into a bidirectional -relationship using the :func:`.backref` function:: - - class Node(Base): - __tablename__ = 'node' - id = Column(Integer, primary_key=True) - parent_id = Column(Integer, ForeignKey('node.id')) - data = Column(String(50)) - children = relationship("Node", - backref=backref('parent', remote_side=[id]) - ) - -There are several examples included with SQLAlchemy illustrating -self-referential strategies; these include :ref:`examples_adjacencylist` and -:ref:`examples_xmlpersistence`. - -Composite Adjacency Lists -~~~~~~~~~~~~~~~~~~~~~~~~~ - -A sub-category of the adjacency list relationship is the rare -case where a particular column is present on both the "local" and -"remote" side of the join condition. An example is the ``Folder`` -class below; using a composite primary key, the ``account_id`` -column refers to itself, to indicate sub folders which are within -the same account as that of the parent; while ``folder_id`` refers -to a specific folder within that account:: - - class Folder(Base): - __tablename__ = 'folder' - __table_args__ = ( - ForeignKeyConstraint( - ['account_id', 'parent_id'], - ['folder.account_id', 'folder.folder_id']), - ) - - account_id = Column(Integer, primary_key=True) - folder_id = Column(Integer, primary_key=True) - parent_id = Column(Integer) - name = Column(String) - - parent_folder = relationship("Folder", - backref="child_folders", - remote_side=[account_id, folder_id] - ) - -Above, we pass ``account_id`` into the :paramref:`~.relationship.remote_side` list. -:func:`.relationship` recognizes that the ``account_id`` column here -is on both sides, and aligns the "remote" column along with the -``folder_id`` column, which it recognizes as uniquely present on -the "remote" side. - -.. versionadded:: 0.8 - Support for self-referential composite keys in :func:`.relationship` - where a column points to itself. - -Self-Referential Query Strategies -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Querying of self-referential structures works like any other query:: - - # get all nodes named 'child2' - session.query(Node).filter(Node.data=='child2') - -However extra care is needed when attempting to join along -the foreign key from one level of the tree to the next. In SQL, -a join from a table to itself requires that at least one side of the -expression be "aliased" so that it can be unambiguously referred to. - -Recall from :ref:`ormtutorial_aliases` in the ORM tutorial that the -:func:`.orm.aliased` construct is normally used to provide an "alias" of -an ORM entity. Joining from ``Node`` to itself using this technique -looks like: - -.. sourcecode:: python+sql - - from sqlalchemy.orm import aliased - - nodealias = aliased(Node) - {sql}session.query(Node).filter(Node.data=='subchild1').\ - join(nodealias, Node.parent).\ - filter(nodealias.data=="child2").\ - all() - SELECT node.id AS node_id, - node.parent_id AS node_parent_id, - node.data AS node_data - FROM node JOIN node AS node_1 - ON node.parent_id = node_1.id - WHERE node.data = ? - AND node_1.data = ? - ['subchild1', 'child2'] - -:meth:`.Query.join` also includes a feature known as -:paramref:`.Query.join.aliased` that can shorten the verbosity self- -referential joins, at the expense of query flexibility. This feature -performs a similar "aliasing" step to that above, without the need for -an explicit entity. Calls to :meth:`.Query.filter` and similar -subsequent to the aliased join will **adapt** the ``Node`` entity to -be that of the alias: - -.. sourcecode:: python+sql - - {sql}session.query(Node).filter(Node.data=='subchild1').\ - join(Node.parent, aliased=True).\ - filter(Node.data=='child2').\ - all() - SELECT node.id AS node_id, - node.parent_id AS node_parent_id, - node.data AS node_data - FROM node - JOIN node AS node_1 ON node_1.id = node.parent_id - WHERE node.data = ? AND node_1.data = ? - ['subchild1', 'child2'] - -To add criterion to multiple points along a longer join, add -:paramref:`.Query.join.from_joinpoint` to the additional -:meth:`~.Query.join` calls: - -.. sourcecode:: python+sql - - # get all nodes named 'subchild1' with a - # parent named 'child2' and a grandparent 'root' - {sql}session.query(Node).\ - filter(Node.data=='subchild1').\ - join(Node.parent, aliased=True).\ - filter(Node.data=='child2').\ - join(Node.parent, aliased=True, from_joinpoint=True).\ - filter(Node.data=='root').\ - all() - SELECT node.id AS node_id, - node.parent_id AS node_parent_id, - node.data AS node_data - FROM node - JOIN node AS node_1 ON node_1.id = node.parent_id - JOIN node AS node_2 ON node_2.id = node_1.parent_id - WHERE node.data = ? - AND node_1.data = ? - AND node_2.data = ? - ['subchild1', 'child2', 'root'] - -:meth:`.Query.reset_joinpoint` will also remove the "aliasing" from filtering -calls:: - - session.query(Node).\ - join(Node.children, aliased=True).\ - filter(Node.data == 'foo').\ - reset_joinpoint().\ - filter(Node.data == 'bar') - -For an example of using :paramref:`.Query.join.aliased` to -arbitrarily join along a chain of self-referential nodes, see -:ref:`examples_xmlpersistence`. - -.. _self_referential_eager_loading: - -Configuring Self-Referential Eager Loading -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Eager loading of relationships occurs using joins or outerjoins from parent to -child table during a normal query operation, such that the parent and its -immediate child collection or reference can be populated from a single SQL -statement, or a second statement for all immediate child collections. -SQLAlchemy's joined and subquery eager loading use aliased tables in all cases -when joining to related items, so are compatible with self-referential -joining. However, to use eager loading with a self-referential relationship, -SQLAlchemy needs to be told how many levels deep it should join and/or query; -otherwise the eager load will not take place at all. This depth setting is -configured via :paramref:`~.relationships.join_depth`: - -.. sourcecode:: python+sql - - class Node(Base): - __tablename__ = 'node' - id = Column(Integer, primary_key=True) - parent_id = Column(Integer, ForeignKey('node.id')) - data = Column(String(50)) - children = relationship("Node", - lazy="joined", - join_depth=2) - - {sql}session.query(Node).all() - SELECT node_1.id AS node_1_id, - node_1.parent_id AS node_1_parent_id, - node_1.data AS node_1_data, - node_2.id AS node_2_id, - node_2.parent_id AS node_2_parent_id, - node_2.data AS node_2_data, - node.id AS node_id, - node.parent_id AS node_parent_id, - node.data AS node_data - FROM node - LEFT OUTER JOIN node AS node_2 - ON node.id = node_2.parent_id - LEFT OUTER JOIN node AS node_1 - ON node_2.id = node_1.parent_id - [] - -.. _relationships_backref: - -Linking Relationships with Backref ----------------------------------- - -The :paramref:`~.relationship.backref` keyword argument was first introduced in :ref:`ormtutorial_toplevel`, and has been -mentioned throughout many of the examples here. What does it actually do ? Let's start -with the canonical ``User`` and ``Address`` scenario:: - - from sqlalchemy import Integer, ForeignKey, String, Column - from sqlalchemy.ext.declarative import declarative_base - from sqlalchemy.orm import relationship - - Base = declarative_base() - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - name = Column(String) - - addresses = relationship("Address", backref="user") - - class Address(Base): - __tablename__ = 'address' - id = Column(Integer, primary_key=True) - email = Column(String) - user_id = Column(Integer, ForeignKey('user.id')) - -The above configuration establishes a collection of ``Address`` objects on ``User`` called -``User.addresses``. It also establishes a ``.user`` attribute on ``Address`` which will -refer to the parent ``User`` object. - -In fact, the :paramref:`~.relationship.backref` keyword is only a common shortcut for placing a second -:func:`.relationship` onto the ``Address`` mapping, including the establishment -of an event listener on both sides which will mirror attribute operations -in both directions. The above configuration is equivalent to:: - - from sqlalchemy import Integer, ForeignKey, String, Column - from sqlalchemy.ext.declarative import declarative_base - from sqlalchemy.orm import relationship - - Base = declarative_base() - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - name = Column(String) - - addresses = relationship("Address", back_populates="user") - - class Address(Base): - __tablename__ = 'address' - id = Column(Integer, primary_key=True) - email = Column(String) - user_id = Column(Integer, ForeignKey('user.id')) - - user = relationship("User", back_populates="addresses") - -Above, we add a ``.user`` relationship to ``Address`` explicitly. On -both relationships, the :paramref:`~.relationship.back_populates` directive tells each relationship -about the other one, indicating that they should establish "bidirectional" -behavior between each other. The primary effect of this configuration -is that the relationship adds event handlers to both attributes -which have the behavior of "when an append or set event occurs here, set ourselves -onto the incoming attribute using this particular attribute name". -The behavior is illustrated as follows. Start with a ``User`` and an ``Address`` -instance. The ``.addresses`` collection is empty, and the ``.user`` attribute -is ``None``:: - - >>> u1 = User() - >>> a1 = Address() - >>> u1.addresses - [] - >>> print a1.user - None - -However, once the ``Address`` is appended to the ``u1.addresses`` collection, -both the collection and the scalar attribute have been populated:: - - >>> u1.addresses.append(a1) - >>> u1.addresses - [<__main__.Address object at 0x12a6ed0>] - >>> a1.user - <__main__.User object at 0x12a6590> - -This behavior of course works in reverse for removal operations as well, as well -as for equivalent operations on both sides. Such as -when ``.user`` is set again to ``None``, the ``Address`` object is removed -from the reverse collection:: - - >>> a1.user = None - >>> u1.addresses - [] - -The manipulation of the ``.addresses`` collection and the ``.user`` attribute -occurs entirely in Python without any interaction with the SQL database. -Without this behavior, the proper state would be apparent on both sides once the -data has been flushed to the database, and later reloaded after a commit or -expiration operation occurs. The :paramref:`~.relationship.backref`/:paramref:`~.relationship.back_populates` behavior has the advantage -that common bidirectional operations can reflect the correct state without requiring -a database round trip. - -Remember, when the :paramref:`~.relationship.backref` keyword is used on a single relationship, it's -exactly the same as if the above two relationships were created individually -using :paramref:`~.relationship.back_populates` on each. - -Backref Arguments -~~~~~~~~~~~~~~~~~~ - -We've established that the :paramref:`~.relationship.backref` keyword is merely a shortcut for building -two individual :func:`.relationship` constructs that refer to each other. Part of -the behavior of this shortcut is that certain configurational arguments applied to -the :func:`.relationship` -will also be applied to the other direction - namely those arguments that describe -the relationship at a schema level, and are unlikely to be different in the reverse -direction. The usual case -here is a many-to-many :func:`.relationship` that has a :paramref:`~.relationship.secondary` argument, -or a one-to-many or many-to-one which has a :paramref:`~.relationship.primaryjoin` argument (the -:paramref:`~.relationship.primaryjoin` argument is discussed in :ref:`relationship_primaryjoin`). Such -as if we limited the list of ``Address`` objects to those which start with "tony":: - - from sqlalchemy import Integer, ForeignKey, String, Column - from sqlalchemy.ext.declarative import declarative_base - from sqlalchemy.orm import relationship - - Base = declarative_base() - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - name = Column(String) - - addresses = relationship("Address", - primaryjoin="and_(User.id==Address.user_id, " - "Address.email.startswith('tony'))", - backref="user") - - class Address(Base): - __tablename__ = 'address' - id = Column(Integer, primary_key=True) - email = Column(String) - user_id = Column(Integer, ForeignKey('user.id')) - -We can observe, by inspecting the resulting property, that both sides -of the relationship have this join condition applied:: - - >>> print User.addresses.property.primaryjoin - "user".id = address.user_id AND address.email LIKE :email_1 || '%%' - >>> - >>> print Address.user.property.primaryjoin - "user".id = address.user_id AND address.email LIKE :email_1 || '%%' - >>> - -This reuse of arguments should pretty much do the "right thing" - it -uses only arguments that are applicable, and in the case of a many-to- -many relationship, will reverse the usage of -:paramref:`~.relationship.primaryjoin` and -:paramref:`~.relationship.secondaryjoin` to correspond to the other -direction (see the example in :ref:`self_referential_many_to_many` for -this). - -It's very often the case however that we'd like to specify arguments -that are specific to just the side where we happened to place the -"backref". This includes :func:`.relationship` arguments like -:paramref:`~.relationship.lazy`, -:paramref:`~.relationship.remote_side`, -:paramref:`~.relationship.cascade` and -:paramref:`~.relationship.cascade_backrefs`. For this case we use -the :func:`.backref` function in place of a string:: - - # <other imports> - from sqlalchemy.orm import backref - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - name = Column(String) - - addresses = relationship("Address", - backref=backref("user", lazy="joined")) - -Where above, we placed a ``lazy="joined"`` directive only on the ``Address.user`` -side, indicating that when a query against ``Address`` is made, a join to the ``User`` -entity should be made automatically which will populate the ``.user`` attribute of each -returned ``Address``. The :func:`.backref` function formatted the arguments we gave -it into a form that is interpreted by the receiving :func:`.relationship` as additional -arguments to be applied to the new relationship it creates. - -One Way Backrefs -~~~~~~~~~~~~~~~~~ - -An unusual case is that of the "one way backref". This is where the -"back-populating" behavior of the backref is only desirable in one -direction. An example of this is a collection which contains a -filtering :paramref:`~.relationship.primaryjoin` condition. We'd -like to append items to this collection as needed, and have them -populate the "parent" object on the incoming object. However, we'd -also like to have items that are not part of the collection, but still -have the same "parent" association - these items should never be in -the collection. - -Taking our previous example, where we established a -:paramref:`~.relationship.primaryjoin` that limited the collection -only to ``Address`` objects whose email address started with the word -``tony``, the usual backref behavior is that all items populate in -both directions. We wouldn't want this behavior for a case like the -following:: - - >>> u1 = User() - >>> a1 = Address(email='mary') - >>> a1.user = u1 - >>> u1.addresses - [<__main__.Address object at 0x1411910>] - -Above, the ``Address`` object that doesn't match the criterion of "starts with 'tony'" -is present in the ``addresses`` collection of ``u1``. After these objects are flushed, -the transaction committed and their attributes expired for a re-load, the ``addresses`` -collection will hit the database on next access and no longer have this ``Address`` object -present, due to the filtering condition. But we can do away with this unwanted side -of the "backref" behavior on the Python side by using two separate :func:`.relationship` constructs, -placing :paramref:`~.relationship.back_populates` only on one side:: - - from sqlalchemy import Integer, ForeignKey, String, Column - from sqlalchemy.ext.declarative import declarative_base - from sqlalchemy.orm import relationship - - Base = declarative_base() - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - name = Column(String) - addresses = relationship("Address", - primaryjoin="and_(User.id==Address.user_id, " - "Address.email.startswith('tony'))", - back_populates="user") - - class Address(Base): - __tablename__ = 'address' - id = Column(Integer, primary_key=True) - email = Column(String) - user_id = Column(Integer, ForeignKey('user.id')) - user = relationship("User") - -With the above scenario, appending an ``Address`` object to the ``.addresses`` -collection of a ``User`` will always establish the ``.user`` attribute on that -``Address``:: - - >>> u1 = User() - >>> a1 = Address(email='tony') - >>> u1.addresses.append(a1) - >>> a1.user - <__main__.User object at 0x1411850> - -However, applying a ``User`` to the ``.user`` attribute of an ``Address``, -will not append the ``Address`` object to the collection:: - - >>> a2 = Address(email='mary') - >>> a2.user = u1 - >>> a2 in u1.addresses - False - -Of course, we've disabled some of the usefulness of -:paramref:`~.relationship.backref` here, in that when we do append an -``Address`` that corresponds to the criteria of -``email.startswith('tony')``, it won't show up in the -``User.addresses`` collection until the session is flushed, and the -attributes reloaded after a commit or expire operation. While we -could consider an attribute event that checks this criterion in -Python, this starts to cross the line of duplicating too much SQL -behavior in Python. The backref behavior itself is only a slight -transgression of this philosophy - SQLAlchemy tries to keep these to a -minimum overall. - -.. _relationship_configure_joins: - -Configuring how Relationship Joins ------------------------------------- - -:func:`.relationship` will normally create a join between two tables -by examining the foreign key relationship between the two tables -to determine which columns should be compared. There are a variety -of situations where this behavior needs to be customized. - -.. _relationship_foreign_keys: - -Handling Multiple Join Paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -One of the most common situations to deal with is when -there are more than one foreign key path between two tables. - -Consider a ``Customer`` class that contains two foreign keys to an ``Address`` -class:: - - from sqlalchemy import Integer, ForeignKey, String, Column - from sqlalchemy.ext.declarative import declarative_base - from sqlalchemy.orm import relationship - - Base = declarative_base() - - class Customer(Base): - __tablename__ = 'customer' - id = Column(Integer, primary_key=True) - name = Column(String) - - billing_address_id = Column(Integer, ForeignKey("address.id")) - shipping_address_id = Column(Integer, ForeignKey("address.id")) - - billing_address = relationship("Address") - shipping_address = relationship("Address") - - class Address(Base): - __tablename__ = 'address' - id = Column(Integer, primary_key=True) - street = Column(String) - city = Column(String) - state = Column(String) - zip = Column(String) - -The above mapping, when we attempt to use it, will produce the error:: - - sqlalchemy.exc.AmbiguousForeignKeysError: Could not determine join - condition between parent/child tables on relationship - Customer.billing_address - there are multiple foreign key - paths linking the tables. Specify the 'foreign_keys' argument, - providing a list of those columns which should be - counted as containing a foreign key reference to the parent table. - -The above message is pretty long. There are many potential messages -that :func:`.relationship` can return, which have been carefully tailored -to detect a variety of common configurational issues; most will suggest -the additional configuration that's needed to resolve the ambiguity -or other missing information. - -In this case, the message wants us to qualify each :func:`.relationship` -by instructing for each one which foreign key column should be considered, and -the appropriate form is as follows:: - - class Customer(Base): - __tablename__ = 'customer' - id = Column(Integer, primary_key=True) - name = Column(String) - - billing_address_id = Column(Integer, ForeignKey("address.id")) - shipping_address_id = Column(Integer, ForeignKey("address.id")) - - billing_address = relationship("Address", foreign_keys=[billing_address_id]) - shipping_address = relationship("Address", foreign_keys=[shipping_address_id]) - -Above, we specify the ``foreign_keys`` argument, which is a :class:`.Column` or list -of :class:`.Column` objects which indicate those columns to be considered "foreign", -or in other words, the columns that contain a value referring to a parent table. -Loading the ``Customer.billing_address`` relationship from a ``Customer`` -object will use the value present in ``billing_address_id`` in order to -identify the row in ``Address`` to be loaded; similarly, ``shipping_address_id`` -is used for the ``shipping_address`` relationship. The linkage of the two -columns also plays a role during persistence; the newly generated primary key -of a just-inserted ``Address`` object will be copied into the appropriate -foreign key column of an associated ``Customer`` object during a flush. - -When specifying ``foreign_keys`` with Declarative, we can also use string -names to specify, however it is important that if using a list, the **list -is part of the string**:: - - billing_address = relationship("Address", foreign_keys="[Customer.billing_address_id]") - -In this specific example, the list is not necessary in any case as there's only -one :class:`.Column` we need:: - - billing_address = relationship("Address", foreign_keys="Customer.billing_address_id") - -.. versionchanged:: 0.8 - :func:`.relationship` can resolve ambiguity between foreign key targets on the - basis of the ``foreign_keys`` argument alone; the :paramref:`~.relationship.primaryjoin` - argument is no longer needed in this situation. - -.. _relationship_primaryjoin: - -Specifying Alternate Join Conditions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The default behavior of :func:`.relationship` when constructing a join -is that it equates the value of primary key columns -on one side to that of foreign-key-referring columns on the other. -We can change this criterion to be anything we'd like using the -:paramref:`~.relationship.primaryjoin` -argument, as well as the :paramref:`~.relationship.secondaryjoin` -argument in the case when a "secondary" table is used. - -In the example below, using the ``User`` class -as well as an ``Address`` class which stores a street address, we -create a relationship ``boston_addresses`` which will only -load those ``Address`` objects which specify a city of "Boston":: - - from sqlalchemy import Integer, ForeignKey, String, Column - from sqlalchemy.ext.declarative import declarative_base - from sqlalchemy.orm import relationship - - Base = declarative_base() - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - name = Column(String) - boston_addresses = relationship("Address", - primaryjoin="and_(User.id==Address.user_id, " - "Address.city=='Boston')") - - class Address(Base): - __tablename__ = 'address' - id = Column(Integer, primary_key=True) - user_id = Column(Integer, ForeignKey('user.id')) - - street = Column(String) - city = Column(String) - state = Column(String) - zip = Column(String) - -Within this string SQL expression, we made use of the :func:`.and_` conjunction construct to establish -two distinct predicates for the join condition - joining both the ``User.id`` and -``Address.user_id`` columns to each other, as well as limiting rows in ``Address`` -to just ``city='Boston'``. When using Declarative, rudimentary SQL functions like -:func:`.and_` are automatically available in the evaluated namespace of a string -:func:`.relationship` argument. - -The custom criteria we use in a :paramref:`~.relationship.primaryjoin` -is generally only significant when SQLAlchemy is rendering SQL in -order to load or represent this relationship. That is, it's used in -the SQL statement that's emitted in order to perform a per-attribute -lazy load, or when a join is constructed at query time, such as via -:meth:`.Query.join`, or via the eager "joined" or "subquery" styles of -loading. When in-memory objects are being manipulated, we can place -any ``Address`` object we'd like into the ``boston_addresses`` -collection, regardless of what the value of the ``.city`` attribute -is. The objects will remain present in the collection until the -attribute is expired and re-loaded from the database where the -criterion is applied. When a flush occurs, the objects inside of -``boston_addresses`` will be flushed unconditionally, assigning value -of the primary key ``user.id`` column onto the foreign-key-holding -``address.user_id`` column for each row. The ``city`` criteria has no -effect here, as the flush process only cares about synchronizing -primary key values into referencing foreign key values. - -.. _relationship_custom_foreign: - -Creating Custom Foreign Conditions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Another element of the primary join condition is how those columns -considered "foreign" are determined. Usually, some subset -of :class:`.Column` objects will specify :class:`.ForeignKey`, or otherwise -be part of a :class:`.ForeignKeyConstraint` that's relevant to the join condition. -:func:`.relationship` looks to this foreign key status as it decides -how it should load and persist data for this relationship. However, the -:paramref:`~.relationship.primaryjoin` argument can be used to create a join condition that -doesn't involve any "schema" level foreign keys. We can combine :paramref:`~.relationship.primaryjoin` -along with :paramref:`~.relationship.foreign_keys` and :paramref:`~.relationship.remote_side` explicitly in order to -establish such a join. - -Below, a class ``HostEntry`` joins to itself, equating the string ``content`` -column to the ``ip_address`` column, which is a Postgresql type called ``INET``. -We need to use :func:`.cast` in order to cast one side of the join to the -type of the other:: - - from sqlalchemy import cast, String, Column, Integer - from sqlalchemy.orm import relationship - from sqlalchemy.dialects.postgresql import INET - - from sqlalchemy.ext.declarative import declarative_base - - Base = declarative_base() - - class HostEntry(Base): - __tablename__ = 'host_entry' - - id = Column(Integer, primary_key=True) - ip_address = Column(INET) - content = Column(String(50)) - - # relationship() using explicit foreign_keys, remote_side - parent_host = relationship("HostEntry", - primaryjoin=ip_address == cast(content, INET), - foreign_keys=content, - remote_side=ip_address - ) - -The above relationship will produce a join like:: - - SELECT host_entry.id, host_entry.ip_address, host_entry.content - FROM host_entry JOIN host_entry AS host_entry_1 - ON host_entry_1.ip_address = CAST(host_entry.content AS INET) - -An alternative syntax to the above is to use the :func:`.foreign` and -:func:`.remote` :term:`annotations`, -inline within the :paramref:`~.relationship.primaryjoin` expression. -This syntax represents the annotations that :func:`.relationship` normally -applies by itself to the join condition given the :paramref:`~.relationship.foreign_keys` and -:paramref:`~.relationship.remote_side` arguments. These functions may -be more succinct when an explicit join condition is present, and additionally -serve to mark exactly the column that is "foreign" or "remote" independent -of whether that column is stated multiple times or within complex -SQL expressions:: - - from sqlalchemy.orm import foreign, remote - - class HostEntry(Base): - __tablename__ = 'host_entry' - - id = Column(Integer, primary_key=True) - ip_address = Column(INET) - content = Column(String(50)) - - # relationship() using explicit foreign() and remote() annotations - # in lieu of separate arguments - parent_host = relationship("HostEntry", - primaryjoin=remote(ip_address) == \ - cast(foreign(content), INET), - ) - - -.. _relationship_custom_operator: - -Using custom operators in join conditions -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Another use case for relationships is the use of custom operators, such -as Postgresql's "is contained within" ``<<`` operator when joining with -types such as :class:`.postgresql.INET` and :class:`.postgresql.CIDR`. -For custom operators we use the :meth:`.Operators.op` function:: - - inet_column.op("<<")(cidr_column) - -However, if we construct a :paramref:`~.relationship.primaryjoin` using this -operator, :func:`.relationship` will still need more information. This is because -when it examines our primaryjoin condition, it specifically looks for operators -used for **comparisons**, and this is typically a fixed list containing known -comparison operators such as ``==``, ``<``, etc. So for our custom operator -to participate in this system, we need it to register as a comparison operator -using the :paramref:`~.Operators.op.is_comparison` parameter:: - - inet_column.op("<<", is_comparison=True)(cidr_column) - -A complete example:: - - class IPA(Base): - __tablename__ = 'ip_address' - - id = Column(Integer, primary_key=True) - v4address = Column(INET) - - network = relationship("Network", - primaryjoin="IPA.v4address.op('<<', is_comparison=True)" - "(foreign(Network.v4representation))", - viewonly=True - ) - class Network(Base): - __tablename__ = 'network' - - id = Column(Integer, primary_key=True) - v4representation = Column(CIDR) - -Above, a query such as:: - - session.query(IPA).join(IPA.network) - -Will render as:: - - SELECT ip_address.id AS ip_address_id, ip_address.v4address AS ip_address_v4address - FROM ip_address JOIN network ON ip_address.v4address << network.v4representation - -.. versionadded:: 0.9.2 - Added the :paramref:`.Operators.op.is_comparison` - flag to assist in the creation of :func:`.relationship` constructs using - custom operators. - -.. _relationship_overlapping_foreignkeys: - -Overlapping Foreign Keys -~~~~~~~~~~~~~~~~~~~~~~~~ - -A rare scenario can arise when composite foreign keys are used, such that -a single column may be the subject of more than one column -referred to via foreign key constraint. - -Consider an (admittedly complex) mapping such as the ``Magazine`` object, -referred to both by the ``Writer`` object and the ``Article`` object -using a composite primary key scheme that includes ``magazine_id`` -for both; then to make ``Article`` refer to ``Writer`` as well, -``Article.magazine_id`` is involved in two separate relationships; -``Article.magazine`` and ``Article.writer``:: - - class Magazine(Base): - __tablename__ = 'magazine' - - id = Column(Integer, primary_key=True) - - - class Article(Base): - __tablename__ = 'article' - - article_id = Column(Integer) - magazine_id = Column(ForeignKey('magazine.id')) - writer_id = Column() - - magazine = relationship("Magazine") - writer = relationship("Writer") - - __table_args__ = ( - PrimaryKeyConstraint('article_id', 'magazine_id'), - ForeignKeyConstraint( - ['writer_id', 'magazine_id'], - ['writer.id', 'writer.magazine_id'] - ), - ) - - - class Writer(Base): - __tablename__ = 'writer' - - id = Column(Integer, primary_key=True) - magazine_id = Column(ForeignKey('magazine.id'), primary_key=True) - magazine = relationship("Magazine") - -When the above mapping is configured, we will see this warning emitted:: - - SAWarning: relationship 'Article.writer' will copy column - writer.magazine_id to column article.magazine_id, - which conflicts with relationship(s): 'Article.magazine' - (copies magazine.id to article.magazine_id). Consider applying - viewonly=True to read-only relationships, or provide a primaryjoin - condition marking writable columns with the foreign() annotation. - -What this refers to originates from the fact that ``Article.magazine_id`` is -the subject of two different foreign key constraints; it refers to -``Magazine.id`` directly as a source column, but also refers to -``Writer.magazine_id`` as a source column in the context of the -composite key to ``Writer``. If we associate an ``Article`` with a -particular ``Magazine``, but then associate the ``Article`` with a -``Writer`` that's associated with a *different* ``Magazine``, the ORM -will overwrite ``Article.magazine_id`` non-deterministically, silently -changing which magazine we refer towards; it may -also attempt to place NULL into this columnn if we de-associate a -``Writer`` from an ``Article``. The warning lets us know this is the case. - -To solve this, we need to break out the behavior of ``Article`` to include -all three of the following features: - -1. ``Article`` first and foremost writes to - ``Article.magazine_id`` based on data persisted in the ``Article.magazine`` - relationship only, that is a value copied from ``Magazine.id``. - -2. ``Article`` can write to ``Article.writer_id`` on behalf of data - persisted in the ``Article.writer`` relationship, but only the - ``Writer.id`` column; the ``Writer.magazine_id`` column should not - be written into ``Article.magazine_id`` as it ultimately is sourced - from ``Magazine.id``. - -3. ``Article`` takes ``Article.magazine_id`` into account when loading - ``Article.writer``, even though it *doesn't* write to it on behalf - of this relationship. - -To get just #1 and #2, we could specify only ``Article.writer_id`` as the -"foreign keys" for ``Article.writer``:: - - class Article(Base): - # ... - - writer = relationship("Writer", foreign_keys='Article.writer_id') - -However, this has the effect of ``Article.writer`` not taking -``Article.magazine_id`` into account when querying against ``Writer``: - -.. sourcecode:: sql - - SELECT article.article_id AS article_article_id, - article.magazine_id AS article_magazine_id, - article.writer_id AS article_writer_id - FROM article - JOIN writer ON writer.id = article.writer_id - -Therefore, to get at all of #1, #2, and #3, we express the join condition -as well as which columns to be written by combining -:paramref:`~.relationship.primaryjoin` fully, along with either the -:paramref:`~.relationship.foreign_keys` argument, or more succinctly by -annotating with :func:`~.orm.foreign`:: - - class Article(Base): - # ... - - writer = relationship( - "Writer", - primaryjoin="and_(Writer.id == foreign(Article.writer_id), " - "Writer.magazine_id == Article.magazine_id)") - -.. versionchanged:: 1.0.0 the ORM will attempt to warn when a column is used - as the synchronization target from more than one relationship - simultaneously. - - -Non-relational Comparisons / Materialized Path -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. warning:: this section details an experimental feature. - -Using custom expressions means we can produce unorthodox join conditions that -don't obey the usual primary/foreign key model. One such example is the -materialized path pattern, where we compare strings for overlapping path tokens -in order to produce a tree structure. - -Through careful use of :func:`.foreign` and :func:`.remote`, we can build -a relationship that effectively produces a rudimentary materialized path -system. Essentially, when :func:`.foreign` and :func:`.remote` are -on the *same* side of the comparison expression, the relationship is considered -to be "one to many"; when they are on *different* sides, the relationship -is considered to be "many to one". For the comparison we'll use here, -we'll be dealing with collections so we keep things configured as "one to many":: - - class Element(Base): - __tablename__ = 'element' - - path = Column(String, primary_key=True) - - descendants = relationship('Element', - primaryjoin= - remote(foreign(path)).like( - path.concat('/%')), - viewonly=True, - order_by=path) - -Above, if given an ``Element`` object with a path attribute of ``"/foo/bar2"``, -we seek for a load of ``Element.descendants`` to look like:: - - SELECT element.path AS element_path - FROM element - WHERE element.path LIKE ('/foo/bar2' || '/%') ORDER BY element.path - -.. versionadded:: 0.9.5 Support has been added to allow a single-column - comparison to itself within a primaryjoin condition, as well as for - primaryjoin conditions that use :meth:`.Operators.like` as the comparison - operator. - -.. _self_referential_many_to_many: - -Self-Referential Many-to-Many Relationship -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Many to many relationships can be customized by one or both of :paramref:`~.relationship.primaryjoin` -and :paramref:`~.relationship.secondaryjoin` - the latter is significant for a relationship that -specifies a many-to-many reference using the :paramref:`~.relationship.secondary` argument. -A common situation which involves the usage of :paramref:`~.relationship.primaryjoin` and :paramref:`~.relationship.secondaryjoin` -is when establishing a many-to-many relationship from a class to itself, as shown below:: - - from sqlalchemy import Integer, ForeignKey, String, Column, Table - from sqlalchemy.ext.declarative import declarative_base - from sqlalchemy.orm import relationship - - Base = declarative_base() - - node_to_node = Table("node_to_node", Base.metadata, - Column("left_node_id", Integer, ForeignKey("node.id"), primary_key=True), - Column("right_node_id", Integer, ForeignKey("node.id"), primary_key=True) - ) - - class Node(Base): - __tablename__ = 'node' - id = Column(Integer, primary_key=True) - label = Column(String) - right_nodes = relationship("Node", - secondary=node_to_node, - primaryjoin=id==node_to_node.c.left_node_id, - secondaryjoin=id==node_to_node.c.right_node_id, - backref="left_nodes" - ) - -Where above, SQLAlchemy can't know automatically which columns should connect -to which for the ``right_nodes`` and ``left_nodes`` relationships. The :paramref:`~.relationship.primaryjoin` -and :paramref:`~.relationship.secondaryjoin` arguments establish how we'd like to join to the association table. -In the Declarative form above, as we are declaring these conditions within the Python -block that corresponds to the ``Node`` class, the ``id`` variable is available directly -as the :class:`.Column` object we wish to join with. - -Alternatively, we can define the :paramref:`~.relationship.primaryjoin` -and :paramref:`~.relationship.secondaryjoin` arguments using strings, which is suitable -in the case that our configuration does not have either the ``Node.id`` column -object available yet or the ``node_to_node`` table perhaps isn't yet available. -When referring to a plain :class:`.Table` object in a declarative string, we -use the string name of the table as it is present in the :class:`.MetaData`:: - - class Node(Base): - __tablename__ = 'node' - id = Column(Integer, primary_key=True) - label = Column(String) - right_nodes = relationship("Node", - secondary="node_to_node", - primaryjoin="Node.id==node_to_node.c.left_node_id", - secondaryjoin="Node.id==node_to_node.c.right_node_id", - backref="left_nodes" - ) - -A classical mapping situation here is similar, where ``node_to_node`` can be joined -to ``node.c.id``:: - - from sqlalchemy import Integer, ForeignKey, String, Column, Table, MetaData - from sqlalchemy.orm import relationship, mapper - - metadata = MetaData() - - node_to_node = Table("node_to_node", metadata, - Column("left_node_id", Integer, ForeignKey("node.id"), primary_key=True), - Column("right_node_id", Integer, ForeignKey("node.id"), primary_key=True) - ) - - node = Table("node", metadata, - Column('id', Integer, primary_key=True), - Column('label', String) - ) - class Node(object): - pass - - mapper(Node, node, properties={ - 'right_nodes':relationship(Node, - secondary=node_to_node, - primaryjoin=node.c.id==node_to_node.c.left_node_id, - secondaryjoin=node.c.id==node_to_node.c.right_node_id, - backref="left_nodes" - )}) - - -Note that in both examples, the :paramref:`~.relationship.backref` -keyword specifies a ``left_nodes`` backref - when -:func:`.relationship` creates the second relationship in the reverse -direction, it's smart enough to reverse the -:paramref:`~.relationship.primaryjoin` and -:paramref:`~.relationship.secondaryjoin` arguments. - -.. _composite_secondary_join: - -Composite "Secondary" Joins -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. note:: - - This section features some new and experimental features of SQLAlchemy. - -Sometimes, when one seeks to build a :func:`.relationship` between two tables -there is a need for more than just two or three tables to be involved in -order to join them. This is an area of :func:`.relationship` where one seeks -to push the boundaries of what's possible, and often the ultimate solution to -many of these exotic use cases needs to be hammered out on the SQLAlchemy mailing -list. - -In more recent versions of SQLAlchemy, the :paramref:`~.relationship.secondary` -parameter can be used in some of these cases in order to provide a composite -target consisting of multiple tables. Below is an example of such a -join condition (requires version 0.9.2 at least to function as is):: - - class A(Base): - __tablename__ = 'a' - - id = Column(Integer, primary_key=True) - b_id = Column(ForeignKey('b.id')) - - d = relationship("D", - secondary="join(B, D, B.d_id == D.id)." - "join(C, C.d_id == D.id)", - primaryjoin="and_(A.b_id == B.id, A.id == C.a_id)", - secondaryjoin="D.id == B.d_id", - uselist=False - ) - - class B(Base): - __tablename__ = 'b' - - id = Column(Integer, primary_key=True) - d_id = Column(ForeignKey('d.id')) - - class C(Base): - __tablename__ = 'c' - - id = Column(Integer, primary_key=True) - a_id = Column(ForeignKey('a.id')) - d_id = Column(ForeignKey('d.id')) - - class D(Base): - __tablename__ = 'd' - - id = Column(Integer, primary_key=True) - -In the above example, we provide all three of :paramref:`~.relationship.secondary`, -:paramref:`~.relationship.primaryjoin`, and :paramref:`~.relationship.secondaryjoin`, -in the declarative style referring to the named tables ``a``, ``b``, ``c``, ``d`` -directly. A query from ``A`` to ``D`` looks like: - -.. sourcecode:: python+sql - - sess.query(A).join(A.d).all() - - {opensql}SELECT a.id AS a_id, a.b_id AS a_b_id - FROM a JOIN ( - b AS b_1 JOIN d AS d_1 ON b_1.d_id = d_1.id - JOIN c AS c_1 ON c_1.d_id = d_1.id) - ON a.b_id = b_1.id AND a.id = c_1.a_id JOIN d ON d.id = b_1.d_id - -In the above example, we take advantage of being able to stuff multiple -tables into a "secondary" container, so that we can join across many -tables while still keeping things "simple" for :func:`.relationship`, in that -there's just "one" table on both the "left" and the "right" side; the -complexity is kept within the middle. - -.. versionadded:: 0.9.2 Support is improved for allowing a :func:`.join()` - construct to be used directly as the target of the :paramref:`~.relationship.secondary` - argument, including support for joins, eager joins and lazy loading, - as well as support within declarative to specify complex conditions such - as joins involving class names as targets. - -.. _relationship_non_primary_mapper: - -Relationship to Non Primary Mapper -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -In the previous section, we illustrated a technique where we used -:paramref:`~.relationship.secondary` in order to place additional -tables within a join condition. There is one complex join case where -even this technique is not sufficient; when we seek to join from ``A`` -to ``B``, making use of any number of ``C``, ``D``, etc. in between, -however there are also join conditions between ``A`` and ``B`` -*directly*. In this case, the join from ``A`` to ``B`` may be -difficult to express with just a complex -:paramref:`~.relationship.primaryjoin` condition, as the intermediary -tables may need special handling, and it is also not expressable with -a :paramref:`~.relationship.secondary` object, since the -``A->secondary->B`` pattern does not support any references between -``A`` and ``B`` directly. When this **extremely advanced** case -arises, we can resort to creating a second mapping as a target for the -relationship. This is where we use :func:`.mapper` in order to make a -mapping to a class that includes all the additional tables we need for -this join. In order to produce this mapper as an "alternative" mapping -for our class, we use the :paramref:`~.mapper.non_primary` flag. - -Below illustrates a :func:`.relationship` with a simple join from ``A`` to -``B``, however the primaryjoin condition is augmented with two additional -entities ``C`` and ``D``, which also must have rows that line up with -the rows in both ``A`` and ``B`` simultaneously:: - - class A(Base): - __tablename__ = 'a' - - id = Column(Integer, primary_key=True) - b_id = Column(ForeignKey('b.id')) - - class B(Base): - __tablename__ = 'b' - - id = Column(Integer, primary_key=True) - - class C(Base): - __tablename__ = 'c' - - id = Column(Integer, primary_key=True) - a_id = Column(ForeignKey('a.id')) - - class D(Base): - __tablename__ = 'd' - - id = Column(Integer, primary_key=True) - c_id = Column(ForeignKey('c.id')) - b_id = Column(ForeignKey('b.id')) - - # 1. set up the join() as a variable, so we can refer - # to it in the mapping multiple times. - j = join(B, D, D.b_id == B.id).join(C, C.id == D.c_id) - - # 2. Create a new mapper() to B, with non_primary=True. - # Columns in the join with the same name must be - # disambiguated within the mapping, using named properties. - B_viacd = mapper(B, j, non_primary=True, properties={ - "b_id": [j.c.b_id, j.c.d_b_id], - "d_id": j.c.d_id - }) - - A.b = relationship(B_viacd, primaryjoin=A.b_id == B_viacd.c.b_id) - -In the above case, our non-primary mapper for ``B`` will emit for -additional columns when we query; these can be ignored: - -.. sourcecode:: python+sql - - sess.query(A).join(A.b).all() - - {opensql}SELECT a.id AS a_id, a.b_id AS a_b_id - FROM a JOIN (b JOIN d ON d.b_id = b.id JOIN c ON c.id = d.c_id) ON a.b_id = b.id - - -Building Query-Enabled Properties -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Very ambitious custom join conditions may fail to be directly persistable, and -in some cases may not even load correctly. To remove the persistence part of -the equation, use the flag :paramref:`~.relationship.viewonly` on the -:func:`~sqlalchemy.orm.relationship`, which establishes it as a read-only -attribute (data written to the collection will be ignored on flush()). -However, in extreme cases, consider using a regular Python property in -conjunction with :class:`.Query` as follows: - -.. sourcecode:: python+sql - - class User(Base): - __tablename__ = 'user' - id = Column(Integer, primary_key=True) - - def _get_addresses(self): - return object_session(self).query(Address).with_parent(self).filter(...).all() - addresses = property(_get_addresses) - - -.. _post_update: - -Rows that point to themselves / Mutually Dependent Rows -------------------------------------------------------- - -This is a very specific case where relationship() must perform an INSERT and a -second UPDATE in order to properly populate a row (and vice versa an UPDATE -and DELETE in order to delete without violating foreign key constraints). The -two use cases are: - -* A table contains a foreign key to itself, and a single row will - have a foreign key value pointing to its own primary key. -* Two tables each contain a foreign key referencing the other - table, with a row in each table referencing the other. - -For example:: - - user - --------------------------------- - user_id name related_user_id - 1 'ed' 1 - -Or:: - - widget entry - ------------------------------------------- --------------------------------- - widget_id name favorite_entry_id entry_id name widget_id - 1 'somewidget' 5 5 'someentry' 1 - -In the first case, a row points to itself. Technically, a database that uses -sequences such as PostgreSQL or Oracle can INSERT the row at once using a -previously generated value, but databases which rely upon autoincrement-style -primary key identifiers cannot. The :func:`~sqlalchemy.orm.relationship` -always assumes a "parent/child" model of row population during flush, so -unless you are populating the primary key/foreign key columns directly, -:func:`~sqlalchemy.orm.relationship` needs to use two statements. - -In the second case, the "widget" row must be inserted before any referring -"entry" rows, but then the "favorite_entry_id" column of that "widget" row -cannot be set until the "entry" rows have been generated. In this case, it's -typically impossible to insert the "widget" and "entry" rows using just two -INSERT statements; an UPDATE must be performed in order to keep foreign key -constraints fulfilled. The exception is if the foreign keys are configured as -"deferred until commit" (a feature some databases support) and if the -identifiers were populated manually (again essentially bypassing -:func:`~sqlalchemy.orm.relationship`). - -To enable the usage of a supplementary UPDATE statement, -we use the :paramref:`~.relationship.post_update` option -of :func:`.relationship`. This specifies that the linkage between the -two rows should be created using an UPDATE statement after both rows -have been INSERTED; it also causes the rows to be de-associated with -each other via UPDATE before a DELETE is emitted. The flag should -be placed on just *one* of the relationships, preferably the -many-to-one side. Below we illustrate -a complete example, including two :class:`.ForeignKey` constructs, one which -specifies :paramref:`~.ForeignKey.use_alter` to help with emitting CREATE TABLE statements:: - - from sqlalchemy import Integer, ForeignKey, Column - from sqlalchemy.ext.declarative import declarative_base - from sqlalchemy.orm import relationship - - Base = declarative_base() - - class Entry(Base): - __tablename__ = 'entry' - entry_id = Column(Integer, primary_key=True) - widget_id = Column(Integer, ForeignKey('widget.widget_id')) - name = Column(String(50)) - - class Widget(Base): - __tablename__ = 'widget' - - widget_id = Column(Integer, primary_key=True) - favorite_entry_id = Column(Integer, - ForeignKey('entry.entry_id', - use_alter=True, - name="fk_favorite_entry")) - name = Column(String(50)) - - entries = relationship(Entry, primaryjoin= - widget_id==Entry.widget_id) - favorite_entry = relationship(Entry, - primaryjoin= - favorite_entry_id==Entry.entry_id, - post_update=True) - -When a structure against the above configuration is flushed, the "widget" row will be -INSERTed minus the "favorite_entry_id" value, then all the "entry" rows will -be INSERTed referencing the parent "widget" row, and then an UPDATE statement -will populate the "favorite_entry_id" column of the "widget" table (it's one -row at a time for the time being): - -.. sourcecode:: pycon+sql - - >>> w1 = Widget(name='somewidget') - >>> e1 = Entry(name='someentry') - >>> w1.favorite_entry = e1 - >>> w1.entries = [e1] - >>> session.add_all([w1, e1]) - {sql}>>> session.commit() - BEGIN (implicit) - INSERT INTO widget (favorite_entry_id, name) VALUES (?, ?) - (None, 'somewidget') - INSERT INTO entry (widget_id, name) VALUES (?, ?) - (1, 'someentry') - UPDATE widget SET favorite_entry_id=? WHERE widget.widget_id = ? - (1, 1) - COMMIT - -An additional configuration we can specify is to supply a more -comprehensive foreign key constraint on ``Widget``, such that -it's guaranteed that ``favorite_entry_id`` refers to an ``Entry`` -that also refers to this ``Widget``. We can use a composite foreign key, -as illustrated below:: - - from sqlalchemy import Integer, ForeignKey, String, \ - Column, UniqueConstraint, ForeignKeyConstraint - from sqlalchemy.ext.declarative import declarative_base - from sqlalchemy.orm import relationship - - Base = declarative_base() - - class Entry(Base): - __tablename__ = 'entry' - entry_id = Column(Integer, primary_key=True) - widget_id = Column(Integer, ForeignKey('widget.widget_id')) - name = Column(String(50)) - __table_args__ = ( - UniqueConstraint("entry_id", "widget_id"), - ) - - class Widget(Base): - __tablename__ = 'widget' - - widget_id = Column(Integer, autoincrement='ignore_fk', primary_key=True) - favorite_entry_id = Column(Integer) - - name = Column(String(50)) - - __table_args__ = ( - ForeignKeyConstraint( - ["widget_id", "favorite_entry_id"], - ["entry.widget_id", "entry.entry_id"], - name="fk_favorite_entry", use_alter=True - ), - ) - - entries = relationship(Entry, primaryjoin= - widget_id==Entry.widget_id, - foreign_keys=Entry.widget_id) - favorite_entry = relationship(Entry, - primaryjoin= - favorite_entry_id==Entry.entry_id, - foreign_keys=favorite_entry_id, - post_update=True) - -The above mapping features a composite :class:`.ForeignKeyConstraint` -bridging the ``widget_id`` and ``favorite_entry_id`` columns. To ensure -that ``Widget.widget_id`` remains an "autoincrementing" column we specify -:paramref:`~.Column.autoincrement` to the value ``"ignore_fk"`` -on :class:`.Column`, and additionally on each -:func:`.relationship` we must limit those columns considered as part of -the foreign key for the purposes of joining and cross-population. - -.. _passive_updates: - -Mutable Primary Keys / Update Cascades ---------------------------------------- - -When the primary key of an entity changes, related items -which reference the primary key must also be updated as -well. For databases which enforce referential integrity, -it's required to use the database's ON UPDATE CASCADE -functionality in order to propagate primary key changes -to referenced foreign keys - the values cannot be out -of sync for any moment. - -For databases that don't support this, such as SQLite and -MySQL without their referential integrity options turned -on, the :paramref:`~.relationship.passive_updates` flag can -be set to ``False``, most preferably on a one-to-many or -many-to-many :func:`.relationship`, which instructs -SQLAlchemy to issue UPDATE statements individually for -objects referenced in the collection, loading them into -memory if not already locally present. The -:paramref:`~.relationship.passive_updates` flag can also be ``False`` in -conjunction with ON UPDATE CASCADE functionality, -although in that case the unit of work will be issuing -extra SELECT and UPDATE statements unnecessarily. - -A typical mutable primary key setup might look like:: - - class User(Base): - __tablename__ = 'user' - - username = Column(String(50), primary_key=True) - fullname = Column(String(100)) - - # passive_updates=False *only* needed if the database - # does not implement ON UPDATE CASCADE - addresses = relationship("Address", passive_updates=False) - - class Address(Base): - __tablename__ = 'address' - - email = Column(String(50), primary_key=True) - username = Column(String(50), - ForeignKey('user.username', onupdate="cascade") - ) - -:paramref:`~.relationship.passive_updates` is set to ``True`` by default, -indicating that ON UPDATE CASCADE is expected to be in -place in the usual case for foreign keys that expect -to have a mutating parent key. - -A :paramref:`~.relationship.passive_updates` setting of False may be configured on any -direction of relationship, i.e. one-to-many, many-to-one, -and many-to-many, although it is much more effective when -placed just on the one-to-many or many-to-many side. -Configuring the :paramref:`~.relationship.passive_updates` -to False only on the -many-to-one side will have only a partial effect, as the -unit of work searches only through the current identity -map for objects that may be referencing the one with a -mutating primary key, not throughout the database. - -Relationships API ------------------ - -.. autofunction:: relationship - -.. autofunction:: backref - -.. autofunction:: relation - -.. autofunction:: dynamic_loader - -.. autofunction:: foreign - -.. autofunction:: remote - - +of its usage. For an introduction to relationships, start with the +:ref:`ormtutorial_toplevel` and head into :ref:`orm_tutorial_relationship`. + +.. toctree:: + :maxdepth: 2 + + basic_relationships + self_referential + backref + join_conditions + collections + relationship_persistence + relationship_api diff --git a/doc/build/orm/scalar_mapping.rst b/doc/build/orm/scalar_mapping.rst new file mode 100644 index 000000000..65efd5dbd --- /dev/null +++ b/doc/build/orm/scalar_mapping.rst @@ -0,0 +1,18 @@ +.. module:: sqlalchemy.orm + +=============================== +Mapping Columns and Expressions +=============================== + +The following sections discuss how table columns and SQL expressions are +mapped to individual object attributes. + +.. toctree:: + :maxdepth: 2 + + mapping_columns + mapped_sql_expr + mapped_attributes + composites + + diff --git a/doc/build/orm/self_referential.rst b/doc/build/orm/self_referential.rst new file mode 100644 index 000000000..f6ed35fd6 --- /dev/null +++ b/doc/build/orm/self_referential.rst @@ -0,0 +1,261 @@ +.. _self_referential: + +Adjacency List Relationships +----------------------------- + +The **adjacency list** pattern is a common relational pattern whereby a table +contains a foreign key reference to itself. This is the most common +way to represent hierarchical data in flat tables. Other methods +include **nested sets**, sometimes called "modified preorder", +as well as **materialized path**. Despite the appeal that modified preorder +has when evaluated for its fluency within SQL queries, the adjacency list model is +probably the most appropriate pattern for the large majority of hierarchical +storage needs, for reasons of concurrency, reduced complexity, and that +modified preorder has little advantage over an application which can fully +load subtrees into the application space. + +In this example, we'll work with a single mapped +class called ``Node``, representing a tree structure:: + + class Node(Base): + __tablename__ = 'node' + id = Column(Integer, primary_key=True) + parent_id = Column(Integer, ForeignKey('node.id')) + data = Column(String(50)) + children = relationship("Node") + +With this structure, a graph such as the following:: + + root --+---> child1 + +---> child2 --+--> subchild1 + | +--> subchild2 + +---> child3 + +Would be represented with data such as:: + + id parent_id data + --- ------- ---- + 1 NULL root + 2 1 child1 + 3 1 child2 + 4 3 subchild1 + 5 3 subchild2 + 6 1 child3 + +The :func:`.relationship` configuration here works in the +same way as a "normal" one-to-many relationship, with the +exception that the "direction", i.e. whether the relationship +is one-to-many or many-to-one, is assumed by default to +be one-to-many. To establish the relationship as many-to-one, +an extra directive is added known as :paramref:`~.relationship.remote_side`, which +is a :class:`.Column` or collection of :class:`.Column` objects +that indicate those which should be considered to be "remote":: + + class Node(Base): + __tablename__ = 'node' + id = Column(Integer, primary_key=True) + parent_id = Column(Integer, ForeignKey('node.id')) + data = Column(String(50)) + parent = relationship("Node", remote_side=[id]) + +Where above, the ``id`` column is applied as the :paramref:`~.relationship.remote_side` +of the ``parent`` :func:`.relationship`, thus establishing +``parent_id`` as the "local" side, and the relationship +then behaves as a many-to-one. + +As always, both directions can be combined into a bidirectional +relationship using the :func:`.backref` function:: + + class Node(Base): + __tablename__ = 'node' + id = Column(Integer, primary_key=True) + parent_id = Column(Integer, ForeignKey('node.id')) + data = Column(String(50)) + children = relationship("Node", + backref=backref('parent', remote_side=[id]) + ) + +There are several examples included with SQLAlchemy illustrating +self-referential strategies; these include :ref:`examples_adjacencylist` and +:ref:`examples_xmlpersistence`. + +Composite Adjacency Lists +~~~~~~~~~~~~~~~~~~~~~~~~~ + +A sub-category of the adjacency list relationship is the rare +case where a particular column is present on both the "local" and +"remote" side of the join condition. An example is the ``Folder`` +class below; using a composite primary key, the ``account_id`` +column refers to itself, to indicate sub folders which are within +the same account as that of the parent; while ``folder_id`` refers +to a specific folder within that account:: + + class Folder(Base): + __tablename__ = 'folder' + __table_args__ = ( + ForeignKeyConstraint( + ['account_id', 'parent_id'], + ['folder.account_id', 'folder.folder_id']), + ) + + account_id = Column(Integer, primary_key=True) + folder_id = Column(Integer, primary_key=True) + parent_id = Column(Integer) + name = Column(String) + + parent_folder = relationship("Folder", + backref="child_folders", + remote_side=[account_id, folder_id] + ) + +Above, we pass ``account_id`` into the :paramref:`~.relationship.remote_side` list. +:func:`.relationship` recognizes that the ``account_id`` column here +is on both sides, and aligns the "remote" column along with the +``folder_id`` column, which it recognizes as uniquely present on +the "remote" side. + +.. versionadded:: 0.8 + Support for self-referential composite keys in :func:`.relationship` + where a column points to itself. + +Self-Referential Query Strategies +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Querying of self-referential structures works like any other query:: + + # get all nodes named 'child2' + session.query(Node).filter(Node.data=='child2') + +However extra care is needed when attempting to join along +the foreign key from one level of the tree to the next. In SQL, +a join from a table to itself requires that at least one side of the +expression be "aliased" so that it can be unambiguously referred to. + +Recall from :ref:`ormtutorial_aliases` in the ORM tutorial that the +:func:`.orm.aliased` construct is normally used to provide an "alias" of +an ORM entity. Joining from ``Node`` to itself using this technique +looks like: + +.. sourcecode:: python+sql + + from sqlalchemy.orm import aliased + + nodealias = aliased(Node) + {sql}session.query(Node).filter(Node.data=='subchild1').\ + join(nodealias, Node.parent).\ + filter(nodealias.data=="child2").\ + all() + SELECT node.id AS node_id, + node.parent_id AS node_parent_id, + node.data AS node_data + FROM node JOIN node AS node_1 + ON node.parent_id = node_1.id + WHERE node.data = ? + AND node_1.data = ? + ['subchild1', 'child2'] + +:meth:`.Query.join` also includes a feature known as +:paramref:`.Query.join.aliased` that can shorten the verbosity self- +referential joins, at the expense of query flexibility. This feature +performs a similar "aliasing" step to that above, without the need for +an explicit entity. Calls to :meth:`.Query.filter` and similar +subsequent to the aliased join will **adapt** the ``Node`` entity to +be that of the alias: + +.. sourcecode:: python+sql + + {sql}session.query(Node).filter(Node.data=='subchild1').\ + join(Node.parent, aliased=True).\ + filter(Node.data=='child2').\ + all() + SELECT node.id AS node_id, + node.parent_id AS node_parent_id, + node.data AS node_data + FROM node + JOIN node AS node_1 ON node_1.id = node.parent_id + WHERE node.data = ? AND node_1.data = ? + ['subchild1', 'child2'] + +To add criterion to multiple points along a longer join, add +:paramref:`.Query.join.from_joinpoint` to the additional +:meth:`~.Query.join` calls: + +.. sourcecode:: python+sql + + # get all nodes named 'subchild1' with a + # parent named 'child2' and a grandparent 'root' + {sql}session.query(Node).\ + filter(Node.data=='subchild1').\ + join(Node.parent, aliased=True).\ + filter(Node.data=='child2').\ + join(Node.parent, aliased=True, from_joinpoint=True).\ + filter(Node.data=='root').\ + all() + SELECT node.id AS node_id, + node.parent_id AS node_parent_id, + node.data AS node_data + FROM node + JOIN node AS node_1 ON node_1.id = node.parent_id + JOIN node AS node_2 ON node_2.id = node_1.parent_id + WHERE node.data = ? + AND node_1.data = ? + AND node_2.data = ? + ['subchild1', 'child2', 'root'] + +:meth:`.Query.reset_joinpoint` will also remove the "aliasing" from filtering +calls:: + + session.query(Node).\ + join(Node.children, aliased=True).\ + filter(Node.data == 'foo').\ + reset_joinpoint().\ + filter(Node.data == 'bar') + +For an example of using :paramref:`.Query.join.aliased` to +arbitrarily join along a chain of self-referential nodes, see +:ref:`examples_xmlpersistence`. + +.. _self_referential_eager_loading: + +Configuring Self-Referential Eager Loading +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Eager loading of relationships occurs using joins or outerjoins from parent to +child table during a normal query operation, such that the parent and its +immediate child collection or reference can be populated from a single SQL +statement, or a second statement for all immediate child collections. +SQLAlchemy's joined and subquery eager loading use aliased tables in all cases +when joining to related items, so are compatible with self-referential +joining. However, to use eager loading with a self-referential relationship, +SQLAlchemy needs to be told how many levels deep it should join and/or query; +otherwise the eager load will not take place at all. This depth setting is +configured via :paramref:`~.relationships.join_depth`: + +.. sourcecode:: python+sql + + class Node(Base): + __tablename__ = 'node' + id = Column(Integer, primary_key=True) + parent_id = Column(Integer, ForeignKey('node.id')) + data = Column(String(50)) + children = relationship("Node", + lazy="joined", + join_depth=2) + + {sql}session.query(Node).all() + SELECT node_1.id AS node_1_id, + node_1.parent_id AS node_1_parent_id, + node_1.data AS node_1_data, + node_2.id AS node_2_id, + node_2.parent_id AS node_2_parent_id, + node_2.data AS node_2_data, + node.id AS node_id, + node.parent_id AS node_parent_id, + node.data AS node_data + FROM node + LEFT OUTER JOIN node AS node_2 + ON node.id = node_2.parent_id + LEFT OUTER JOIN node AS node_1 + ON node_2.id = node_1.parent_id + [] + diff --git a/doc/build/orm/session.rst b/doc/build/orm/session.rst index 78ae1ba81..624ee9f75 100644 --- a/doc/build/orm/session.rst +++ b/doc/build/orm/session.rst @@ -11,2522 +11,15 @@ are the primary configurational interface for the ORM. Once mappings are configured, the primary usage interface for persistence operations is the :class:`.Session`. -What does the Session do ? -========================== +.. toctree:: + :maxdepth: 2 -In the most general sense, the :class:`~.Session` establishes all -conversations with the database and represents a "holding zone" for all the -objects which you've loaded or associated with it during its lifespan. It -provides the entrypoint to acquire a :class:`.Query` object, which sends -queries to the database using the :class:`~.Session` object's current database -connection, populating result rows into objects that are then stored in the -:class:`.Session`, inside a structure called the `Identity Map -<http://martinfowler.com/eaaCatalog/identityMap.html>`_ - a data structure -that maintains unique copies of each object, where "unique" means "only one -object with a particular primary key". + session_basics + session_state_management + cascades + session_transaction + persistence_techniques + contextual + session_api -The :class:`.Session` begins in an essentially stateless form. Once queries -are issued or other objects are persisted with it, it requests a connection -resource from an :class:`.Engine` that is associated either with the -:class:`.Session` itself or with the mapped :class:`.Table` objects being -operated upon. This connection represents an ongoing transaction, which -remains in effect until the :class:`.Session` is instructed to commit or roll -back its pending state. - -All changes to objects maintained by a :class:`.Session` are tracked - before -the database is queried again or before the current transaction is committed, -it **flushes** all pending changes to the database. This is known as the `Unit -of Work <http://martinfowler.com/eaaCatalog/unitOfWork.html>`_ pattern. - -When using a :class:`.Session`, it's important to note that the objects -which are associated with it are **proxy objects** to the transaction being -held by the :class:`.Session` - there are a variety of events that will cause -objects to re-access the database in order to keep synchronized. It is -possible to "detach" objects from a :class:`.Session`, and to continue using -them, though this practice has its caveats. It's intended that -usually, you'd re-associate detached objects with another :class:`.Session` when you -want to work with them again, so that they can resume their normal task of -representing database state. - -.. _session_getting: - -Getting a Session -================= - -:class:`.Session` is a regular Python class which can -be directly instantiated. However, to standardize how sessions are configured -and acquired, the :class:`.sessionmaker` class is normally -used to create a top level :class:`.Session` -configuration which can then be used throughout an application without the -need to repeat the configurational arguments. - -The usage of :class:`.sessionmaker` is illustrated below: - -.. sourcecode:: python+sql - - from sqlalchemy import create_engine - from sqlalchemy.orm import sessionmaker - - # an Engine, which the Session will use for connection - # resources - some_engine = create_engine('postgresql://scott:tiger@localhost/') - - # create a configured "Session" class - Session = sessionmaker(bind=some_engine) - - # create a Session - session = Session() - - # work with sess - myobject = MyObject('foo', 'bar') - session.add(myobject) - session.commit() - -Above, the :class:`.sessionmaker` call creates a factory for us, -which we assign to the name ``Session``. This factory, when -called, will create a new :class:`.Session` object using the configurational -arguments we've given the factory. In this case, as is typical, -we've configured the factory to specify a particular :class:`.Engine` for -connection resources. - -A typical setup will associate the :class:`.sessionmaker` with an :class:`.Engine`, -so that each :class:`.Session` generated will use this :class:`.Engine` -to acquire connection resources. This association can -be set up as in the example above, using the ``bind`` argument. - -When you write your application, place the -:class:`.sessionmaker` factory at the global level. This -factory can then -be used by the rest of the applcation as the source of new :class:`.Session` -instances, keeping the configuration for how :class:`.Session` objects -are constructed in one place. - -The :class:`.sessionmaker` factory can also be used in conjunction with -other helpers, which are passed a user-defined :class:`.sessionmaker` that -is then maintained by the helper. Some of these helpers are discussed in the -section :ref:`session_faq_whentocreate`. - -Adding Additional Configuration to an Existing sessionmaker() --------------------------------------------------------------- - -A common scenario is where the :class:`.sessionmaker` is invoked -at module import time, however the generation of one or more :class:`.Engine` -instances to be associated with the :class:`.sessionmaker` has not yet proceeded. -For this use case, the :class:`.sessionmaker` construct offers the -:meth:`.sessionmaker.configure` method, which will place additional configuration -directives into an existing :class:`.sessionmaker` that will take place -when the construct is invoked:: - - - from sqlalchemy.orm import sessionmaker - from sqlalchemy import create_engine - - # configure Session class with desired options - Session = sessionmaker() - - # later, we create the engine - engine = create_engine('postgresql://...') - - # associate it with our custom Session class - Session.configure(bind=engine) - - # work with the session - session = Session() - -Creating Ad-Hoc Session Objects with Alternate Arguments ---------------------------------------------------------- - -For the use case where an application needs to create a new :class:`.Session` with -special arguments that deviate from what is normally used throughout the application, -such as a :class:`.Session` that binds to an alternate -source of connectivity, or a :class:`.Session` that should -have other arguments such as ``expire_on_commit`` established differently from -what most of the application wants, specific arguments can be passed to the -:class:`.sessionmaker` factory's :meth:`.sessionmaker.__call__` method. -These arguments will override whatever -configurations have already been placed, such as below, where a new :class:`.Session` -is constructed against a specific :class:`.Connection`:: - - # at the module level, the global sessionmaker, - # bound to a specific Engine - Session = sessionmaker(bind=engine) - - # later, some unit of code wants to create a - # Session that is bound to a specific Connection - conn = engine.connect() - session = Session(bind=conn) - -The typical rationale for the association of a :class:`.Session` with a specific -:class:`.Connection` is that of a test fixture that maintains an external -transaction - see :ref:`session_external_transaction` for an example of this. - -Using the Session -================== - -.. _session_object_states: - -Quickie Intro to Object States ------------------------------- - -It's helpful to know the states which an instance can have within a session: - -* **Transient** - an instance that's not in a session, and is not saved to the - database; i.e. it has no database identity. The only relationship such an - object has to the ORM is that its class has a ``mapper()`` associated with - it. - -* **Pending** - when you :meth:`~.Session.add` a transient - instance, it becomes pending. It still wasn't actually flushed to the - database yet, but it will be when the next flush occurs. - -* **Persistent** - An instance which is present in the session and has a record - in the database. You get persistent instances by either flushing so that the - pending instances become persistent, or by querying the database for - existing instances (or moving persistent instances from other sessions into - your local session). - -* **Detached** - an instance which has a record in the database, but is not in - any session. There's nothing wrong with this, and you can use objects - normally when they're detached, **except** they will not be able to issue - any SQL in order to load collections or attributes which are not yet loaded, - or were marked as "expired". - -Knowing these states is important, since the -:class:`.Session` tries to be strict about ambiguous -operations (such as trying to save the same object to two different sessions -at the same time). - -Getting the Current State of an Object -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The actual state of any mapped object can be viewed at any time using -the :func:`.inspect` system:: - - >>> from sqlalchemy import inspect - >>> insp = inspect(my_object) - >>> insp.persistent - True - -.. seealso:: - - :attr:`.InstanceState.transient` - - :attr:`.InstanceState.pending` - - :attr:`.InstanceState.persistent` - - :attr:`.InstanceState.detached` - - -.. _session_faq: - -Session Frequently Asked Questions ------------------------------------ - - -When do I make a :class:`.sessionmaker`? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Just one time, somewhere in your application's global scope. It should be -looked upon as part of your application's configuration. If your -application has three .py files in a package, you could, for example, -place the :class:`.sessionmaker` line in your ``__init__.py`` file; from -that point on your other modules say "from mypackage import Session". That -way, everyone else just uses :class:`.Session()`, -and the configuration of that session is controlled by that central point. - -If your application starts up, does imports, but does not know what -database it's going to be connecting to, you can bind the -:class:`.Session` at the "class" level to the -engine later on, using :meth:`.sessionmaker.configure`. - -In the examples in this section, we will frequently show the -:class:`.sessionmaker` being created right above the line where we actually -invoke :class:`.Session`. But that's just for -example's sake! In reality, the :class:`.sessionmaker` would be somewhere -at the module level. The calls to instantiate :class:`.Session` -would then be placed at the point in the application where database -conversations begin. - -.. _session_faq_whentocreate: - -When do I construct a :class:`.Session`, when do I commit it, and when do I close it? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. topic:: tl;dr; - - As a general rule, keep the lifecycle of the session **separate and - external** from functions and objects that access and/or manipulate - database data. - -A :class:`.Session` is typically constructed at the beginning of a logical -operation where database access is potentially anticipated. - -The :class:`.Session`, whenever it is used to talk to the database, -begins a database transaction as soon as it starts communicating. -Assuming the ``autocommit`` flag is left at its recommended default -of ``False``, this transaction remains in progress until the :class:`.Session` -is rolled back, committed, or closed. The :class:`.Session` will -begin a new transaction if it is used again, subsequent to the previous -transaction ending; from this it follows that the :class:`.Session` -is capable of having a lifespan across many transactions, though only -one at a time. We refer to these two concepts as **transaction scope** -and **session scope**. - -The implication here is that the SQLAlchemy ORM is encouraging the -developer to establish these two scopes in their application, -including not only when the scopes begin and end, but also the -expanse of those scopes, for example should a single -:class:`.Session` instance be local to the execution flow within a -function or method, should it be a global object used by the -entire application, or somewhere in between these two. - -The burden placed on the developer to determine this scope is one -area where the SQLAlchemy ORM necessarily has a strong opinion -about how the database should be used. The :term:`unit of work` pattern -is specifically one of accumulating changes over time and flushing -them periodically, keeping in-memory state in sync with what's -known to be present in a local transaction. This pattern is only -effective when meaningful transaction scopes are in place. - -It's usually not very hard to determine the best points at which -to begin and end the scope of a :class:`.Session`, though the wide -variety of application architectures possible can introduce -challenging situations. - -A common choice is to tear down the :class:`.Session` at the same -time the transaction ends, meaning the transaction and session scopes -are the same. This is a great choice to start out with as it -removes the need to consider session scope as separate from transaction -scope. - -While there's no one-size-fits-all recommendation for how transaction -scope should be determined, there are common patterns. Especially -if one is writing a web application, the choice is pretty much established. - -A web application is the easiest case because such an appication is already -constructed around a single, consistent scope - this is the **request**, -which represents an incoming request from a browser, the processing -of that request to formulate a response, and finally the delivery of that -response back to the client. Integrating web applications with the -:class:`.Session` is then the straightforward task of linking the -scope of the :class:`.Session` to that of the request. The :class:`.Session` -can be established as the request begins, or using a :term:`lazy initialization` -pattern which establishes one as soon as it is needed. The request -then proceeds, with some system in place where application logic can access -the current :class:`.Session` in a manner associated with how the actual -request object is accessed. As the request ends, the :class:`.Session` -is torn down as well, usually through the usage of event hooks provided -by the web framework. The transaction used by the :class:`.Session` -may also be committed at this point, or alternatively the application may -opt for an explicit commit pattern, only committing for those requests -where one is warranted, but still always tearing down the :class:`.Session` -unconditionally at the end. - -Some web frameworks include infrastructure to assist in the task -of aligning the lifespan of a :class:`.Session` with that of a web request. -This includes products such as `Flask-SQLAlchemy <http://packages.python.org/Flask-SQLAlchemy/>`_, -for usage in conjunction with the Flask web framework, -and `Zope-SQLAlchemy <http://pypi.python.org/pypi/zope.sqlalchemy>`_, -typically used with the Pyramid framework. -SQLAlchemy recommends that these products be used as available. - -In those situations where the integration libraries are not -provided or are insufficient, SQLAlchemy includes its own "helper" class known as -:class:`.scoped_session`. A tutorial on the usage of this object -is at :ref:`unitofwork_contextual`. It provides both a quick way -to associate a :class:`.Session` with the current thread, as well as -patterns to associate :class:`.Session` objects with other kinds of -scopes. - -As mentioned before, for non-web applications there is no one clear -pattern, as applications themselves don't have just one pattern -of architecture. The best strategy is to attempt to demarcate -"operations", points at which a particular thread begins to perform -a series of operations for some period of time, which can be committed -at the end. Some examples: - -* A background daemon which spawns off child forks - would want to create a :class:`.Session` local to each child - process, work with that :class:`.Session` through the life of the "job" - that the fork is handling, then tear it down when the job is completed. - -* For a command-line script, the application would create a single, global - :class:`.Session` that is established when the program begins to do its - work, and commits it right as the program is completing its task. - -* For a GUI interface-driven application, the scope of the :class:`.Session` - may best be within the scope of a user-generated event, such as a button - push. Or, the scope may correspond to explicit user interaction, such as - the user "opening" a series of records, then "saving" them. - -As a general rule, the application should manage the lifecycle of the -session *externally* to functions that deal with specific data. This is a -fundamental separation of concerns which keeps data-specific operations -agnostic of the context in which they access and manipulate that data. - -E.g. **don't do this**:: - - ### this is the **wrong way to do it** ### - - class ThingOne(object): - def go(self): - session = Session() - try: - session.query(FooBar).update({"x": 5}) - session.commit() - except: - session.rollback() - raise - - class ThingTwo(object): - def go(self): - session = Session() - try: - session.query(Widget).update({"q": 18}) - session.commit() - except: - session.rollback() - raise - - def run_my_program(): - ThingOne().go() - ThingTwo().go() - -Keep the lifecycle of the session (and usually the transaction) -**separate and external**:: - - ### this is a **better** (but not the only) way to do it ### - - class ThingOne(object): - def go(self, session): - session.query(FooBar).update({"x": 5}) - - class ThingTwo(object): - def go(self, session): - session.query(Widget).update({"q": 18}) - - def run_my_program(): - session = Session() - try: - ThingOne().go(session) - ThingTwo().go(session) - - session.commit() - except: - session.rollback() - raise - finally: - session.close() - -The advanced developer will try to keep the details of session, transaction -and exception management as far as possible from the details of the program -doing its work. For example, we can further separate concerns using a `context manager <http://docs.python.org/3/library/contextlib.html#contextlib.contextmanager>`_:: - - ### another way (but again *not the only way*) to do it ### - - from contextlib import contextmanager - - @contextmanager - def session_scope(): - """Provide a transactional scope around a series of operations.""" - session = Session() - try: - yield session - session.commit() - except: - session.rollback() - raise - finally: - session.close() - - - def run_my_program(): - with session_scope() as session: - ThingOne().go(session) - ThingTwo().go(session) - - -Is the Session a cache? -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Yeee...no. It's somewhat used as a cache, in that it implements the -:term:`identity map` pattern, and stores objects keyed to their primary key. -However, it doesn't do any kind of query caching. This means, if you say -``session.query(Foo).filter_by(name='bar')``, even if ``Foo(name='bar')`` -is right there, in the identity map, the session has no idea about that. -It has to issue SQL to the database, get the rows back, and then when it -sees the primary key in the row, *then* it can look in the local identity -map and see that the object is already there. It's only when you say -``query.get({some primary key})`` that the -:class:`~sqlalchemy.orm.session.Session` doesn't have to issue a query. - -Additionally, the Session stores object instances using a weak reference -by default. This also defeats the purpose of using the Session as a cache. - -The :class:`.Session` is not designed to be a -global object from which everyone consults as a "registry" of objects. -That's more the job of a **second level cache**. SQLAlchemy provides -a pattern for implementing second level caching using `dogpile.cache <http://dogpilecache.readthedocs.org/>`_, -via the :ref:`examples_caching` example. - -How can I get the :class:`~sqlalchemy.orm.session.Session` for a certain object? -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Use the :meth:`~.Session.object_session` classmethod -available on :class:`~sqlalchemy.orm.session.Session`:: - - session = Session.object_session(someobject) - -The newer :ref:`core_inspection_toplevel` system can also be used:: - - from sqlalchemy import inspect - session = inspect(someobject).session - -.. _session_faq_threadsafe: - -Is the session thread-safe? -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The :class:`.Session` is very much intended to be used in a -**non-concurrent** fashion, which usually means in only one thread at a -time. - -The :class:`.Session` should be used in such a way that one -instance exists for a single series of operations within a single -transaction. One expedient way to get this effect is by associating -a :class:`.Session` with the current thread (see :ref:`unitofwork_contextual` -for background). Another is to use a pattern -where the :class:`.Session` is passed between functions and is otherwise -not shared with other threads. - -The bigger point is that you should not *want* to use the session -with multiple concurrent threads. That would be like having everyone at a -restaurant all eat from the same plate. The session is a local "workspace" -that you use for a specific set of tasks; you don't want to, or need to, -share that session with other threads who are doing some other task. - -Making sure the :class:`.Session` is only used in a single concurrent thread at a time -is called a "share nothing" approach to concurrency. But actually, not -sharing the :class:`.Session` implies a more significant pattern; it -means not just the :class:`.Session` object itself, but -also **all objects that are associated with that Session**, must be kept within -the scope of a single concurrent thread. The set of mapped -objects associated with a :class:`.Session` are essentially proxies for data -within database rows accessed over a database connection, and so just like -the :class:`.Session` itself, the whole -set of objects is really just a large-scale proxy for a database connection -(or connections). Ultimately, it's mostly the DBAPI connection itself that -we're keeping away from concurrent access; but since the :class:`.Session` -and all the objects associated with it are all proxies for that DBAPI connection, -the entire graph is essentially not safe for concurrent access. - -If there are in fact multiple threads participating -in the same task, then you may consider sharing the session and its objects between -those threads; however, in this extremely unusual scenario the application would -need to ensure that a proper locking scheme is implemented so that there isn't -*concurrent* access to the :class:`.Session` or its state. A more common approach -to this situation is to maintain a single :class:`.Session` per concurrent thread, -but to instead *copy* objects from one :class:`.Session` to another, often -using the :meth:`.Session.merge` method to copy the state of an object into -a new object local to a different :class:`.Session`. - -Querying --------- - -The :meth:`~.Session.query` function takes one or more -*entities* and returns a new :class:`~sqlalchemy.orm.query.Query` object which -will issue mapper queries within the context of this Session. An entity is -defined as a mapped class, a :class:`~sqlalchemy.orm.mapper.Mapper` object, an -orm-enabled *descriptor*, or an ``AliasedClass`` object:: - - # query from a class - session.query(User).filter_by(name='ed').all() - - # query with multiple classes, returns tuples - session.query(User, Address).join('addresses').filter_by(name='ed').all() - - # query using orm-enabled descriptors - session.query(User.name, User.fullname).all() - - # query from a mapper - user_mapper = class_mapper(User) - session.query(user_mapper) - -When :class:`~sqlalchemy.orm.query.Query` returns results, each object -instantiated is stored within the identity map. When a row matches an object -which is already present, the same object is returned. In the latter case, -whether or not the row is populated onto an existing object depends upon -whether the attributes of the instance have been *expired* or not. A -default-configured :class:`~sqlalchemy.orm.session.Session` automatically -expires all instances along transaction boundaries, so that with a normally -isolated transaction, there shouldn't be any issue of instances representing -data which is stale with regards to the current transaction. - -The :class:`.Query` object is introduced in great detail in -:ref:`ormtutorial_toplevel`, and further documented in -:ref:`query_api_toplevel`. - -Adding New or Existing Items ----------------------------- - -:meth:`~.Session.add` is used to place instances in the -session. For *transient* (i.e. brand new) instances, this will have the effect -of an INSERT taking place for those instances upon the next flush. For -instances which are *persistent* (i.e. were loaded by this session), they are -already present and do not need to be added. Instances which are *detached* -(i.e. have been removed from a session) may be re-associated with a session -using this method:: - - user1 = User(name='user1') - user2 = User(name='user2') - session.add(user1) - session.add(user2) - - session.commit() # write changes to the database - -To add a list of items to the session at once, use -:meth:`~.Session.add_all`:: - - session.add_all([item1, item2, item3]) - -The :meth:`~.Session.add` operation **cascades** along -the ``save-update`` cascade. For more details see the section -:ref:`unitofwork_cascades`. - -.. _unitofwork_merging: - -Merging -------- - -:meth:`~.Session.merge` transfers state from an -outside object into a new or already existing instance within a session. It -also reconciles the incoming data against the state of the -database, producing a history stream which will be applied towards the next -flush, or alternatively can be made to produce a simple "transfer" of -state without producing change history or accessing the database. Usage is as follows:: - - merged_object = session.merge(existing_object) - -When given an instance, it follows these steps: - -* It examines the primary key of the instance. If it's present, it attempts - to locate that instance in the local identity map. If the ``load=True`` - flag is left at its default, it also checks the database for this primary - key if not located locally. -* If the given instance has no primary key, or if no instance can be found - with the primary key given, a new instance is created. -* The state of the given instance is then copied onto the located/newly - created instance. For attributes which are present on the source - instance, the value is transferred to the target instance. For mapped - attributes which aren't present on the source, the attribute is - expired on the target instance, discarding its existing value. - - If the ``load=True`` flag is left at its default, - this copy process emits events and will load the target object's - unloaded collections for each attribute present on the source object, - so that the incoming state can be reconciled against what's - present in the database. If ``load`` - is passed as ``False``, the incoming data is "stamped" directly without - producing any history. -* The operation is cascaded to related objects and collections, as - indicated by the ``merge`` cascade (see :ref:`unitofwork_cascades`). -* The new instance is returned. - -With :meth:`~.Session.merge`, the given "source" -instance is not modified nor is it associated with the target :class:`.Session`, -and remains available to be merged with any number of other :class:`.Session` -objects. :meth:`~.Session.merge` is useful for -taking the state of any kind of object structure without regard for its -origins or current session associations and copying its state into a -new session. Here's some examples: - -* An application which reads an object structure from a file and wishes to - save it to the database might parse the file, build up the - structure, and then use - :meth:`~.Session.merge` to save it - to the database, ensuring that the data within the file is - used to formulate the primary key of each element of the - structure. Later, when the file has changed, the same - process can be re-run, producing a slightly different - object structure, which can then be ``merged`` in again, - and the :class:`~sqlalchemy.orm.session.Session` will - automatically update the database to reflect those - changes, loading each object from the database by primary key and - then updating its state with the new state given. - -* An application is storing objects in an in-memory cache, shared by - many :class:`.Session` objects simultaneously. :meth:`~.Session.merge` - is used each time an object is retrieved from the cache to create - a local copy of it in each :class:`.Session` which requests it. - The cached object remains detached; only its state is moved into - copies of itself that are local to individual :class:`~.Session` - objects. - - In the caching use case, it's common to use the ``load=False`` - flag to remove the overhead of reconciling the object's state - with the database. There's also a "bulk" version of - :meth:`~.Session.merge` called :meth:`~.Query.merge_result` - that was designed to work with cache-extended :class:`.Query` - objects - see the section :ref:`examples_caching`. - -* An application wants to transfer the state of a series of objects - into a :class:`.Session` maintained by a worker thread or other - concurrent system. :meth:`~.Session.merge` makes a copy of each object - to be placed into this new :class:`.Session`. At the end of the operation, - the parent thread/process maintains the objects it started with, - and the thread/worker can proceed with local copies of those objects. - - In the "transfer between threads/processes" use case, the application - may want to use the ``load=False`` flag as well to avoid overhead and - redundant SQL queries as the data is transferred. - -Merge Tips -~~~~~~~~~~ - -:meth:`~.Session.merge` is an extremely useful method for many purposes. However, -it deals with the intricate border between objects that are transient/detached and -those that are persistent, as well as the automated transference of state. -The wide variety of scenarios that can present themselves here often require a -more careful approach to the state of objects. Common problems with merge usually involve -some unexpected state regarding the object being passed to :meth:`~.Session.merge`. - -Lets use the canonical example of the User and Address objects:: - - class User(Base): - __tablename__ = 'user' - - id = Column(Integer, primary_key=True) - name = Column(String(50), nullable=False) - addresses = relationship("Address", backref="user") - - class Address(Base): - __tablename__ = 'address' - - id = Column(Integer, primary_key=True) - email_address = Column(String(50), nullable=False) - user_id = Column(Integer, ForeignKey('user.id'), nullable=False) - -Assume a ``User`` object with one ``Address``, already persistent:: - - >>> u1 = User(name='ed', addresses=[Address(email_address='ed@ed.com')]) - >>> session.add(u1) - >>> session.commit() - -We now create ``a1``, an object outside the session, which we'd like -to merge on top of the existing ``Address``:: - - >>> existing_a1 = u1.addresses[0] - >>> a1 = Address(id=existing_a1.id) - -A surprise would occur if we said this:: - - >>> a1.user = u1 - >>> a1 = session.merge(a1) - >>> session.commit() - sqlalchemy.orm.exc.FlushError: New instance <Address at 0x1298f50> - with identity key (<class '__main__.Address'>, (1,)) conflicts with - persistent instance <Address at 0x12a25d0> - -Why is that ? We weren't careful with our cascades. The assignment -of ``a1.user`` to a persistent object cascaded to the backref of ``User.addresses`` -and made our ``a1`` object pending, as though we had added it. Now we have -*two* ``Address`` objects in the session:: - - >>> a1 = Address() - >>> a1.user = u1 - >>> a1 in session - True - >>> existing_a1 in session - True - >>> a1 is existing_a1 - False - -Above, our ``a1`` is already pending in the session. The -subsequent :meth:`~.Session.merge` operation essentially -does nothing. Cascade can be configured via the :paramref:`~.relationship.cascade` -option on :func:`.relationship`, although in this case it -would mean removing the ``save-update`` cascade from the -``User.addresses`` relationship - and usually, that behavior -is extremely convenient. The solution here would usually be to not assign -``a1.user`` to an object already persistent in the target -session. - -The ``cascade_backrefs=False`` option of :func:`.relationship` -will also prevent the ``Address`` from -being added to the session via the ``a1.user = u1`` assignment. - -Further detail on cascade operation is at :ref:`unitofwork_cascades`. - -Another example of unexpected state:: - - >>> a1 = Address(id=existing_a1.id, user_id=u1.id) - >>> assert a1.user is None - >>> True - >>> a1 = session.merge(a1) - >>> session.commit() - sqlalchemy.exc.IntegrityError: (IntegrityError) address.user_id - may not be NULL - -Here, we accessed a1.user, which returned its default value -of ``None``, which as a result of this access, has been placed in the ``__dict__`` of -our object ``a1``. Normally, this operation creates no change event, -so the ``user_id`` attribute takes precedence during a -flush. But when we merge the ``Address`` object into the session, the operation -is equivalent to:: - - >>> existing_a1.id = existing_a1.id - >>> existing_a1.user_id = u1.id - >>> existing_a1.user = None - -Where above, both ``user_id`` and ``user`` are assigned to, and change events -are emitted for both. The ``user`` association -takes precedence, and None is applied to ``user_id``, causing a failure. - -Most :meth:`~.Session.merge` issues can be examined by first checking - -is the object prematurely in the session ? - -.. sourcecode:: python+sql - - >>> a1 = Address(id=existing_a1, user_id=user.id) - >>> assert a1 not in session - >>> a1 = session.merge(a1) - -Or is there state on the object that we don't want ? Examining ``__dict__`` -is a quick way to check:: - - >>> a1 = Address(id=existing_a1, user_id=user.id) - >>> a1.user - >>> a1.__dict__ - {'_sa_instance_state': <sqlalchemy.orm.state.InstanceState object at 0x1298d10>, - 'user_id': 1, - 'id': 1, - 'user': None} - >>> # we don't want user=None merged, remove it - >>> del a1.user - >>> a1 = session.merge(a1) - >>> # success - >>> session.commit() - -Deleting --------- - -The :meth:`~.Session.delete` method places an instance -into the Session's list of objects to be marked as deleted:: - - # mark two objects to be deleted - session.delete(obj1) - session.delete(obj2) - - # commit (or flush) - session.commit() - -.. _session_deleting_from_collections: - -Deleting from Collections -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -A common confusion that arises regarding :meth:`~.Session.delete` is when -objects which are members of a collection are being deleted. While the -collection member is marked for deletion from the database, this does not -impact the collection itself in memory until the collection is expired. -Below, we illustrate that even after an ``Address`` object is marked -for deletion, it's still present in the collection associated with the -parent ``User``, even after a flush:: - - >>> address = user.addresses[1] - >>> session.delete(address) - >>> session.flush() - >>> address in user.addresses - True - -When the above session is committed, all attributes are expired. The next -access of ``user.addresses`` will re-load the collection, revealing the -desired state:: - - >>> session.commit() - >>> address in user.addresses - False - -The usual practice of deleting items within collections is to forego the usage -of :meth:`~.Session.delete` directly, and instead use cascade behavior to -automatically invoke the deletion as a result of removing the object from -the parent collection. The ``delete-orphan`` cascade accomplishes this, -as illustrated in the example below:: - - mapper(User, users_table, properties={ - 'addresses':relationship(Address, cascade="all, delete, delete-orphan") - }) - del user.addresses[1] - session.flush() - -Where above, upon removing the ``Address`` object from the ``User.addresses`` -collection, the ``delete-orphan`` cascade has the effect of marking the ``Address`` -object for deletion in the same way as passing it to :meth:`~.Session.delete`. - -See also :ref:`unitofwork_cascades` for detail on cascades. - -Deleting based on Filter Criterion -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The caveat with ``Session.delete()`` is that you need to have an object handy -already in order to delete. The Query includes a -:func:`~sqlalchemy.orm.query.Query.delete` method which deletes based on -filtering criteria:: - - session.query(User).filter(User.id==7).delete() - -The ``Query.delete()`` method includes functionality to "expire" objects -already in the session which match the criteria. However it does have some -caveats, including that "delete" and "delete-orphan" cascades won't be fully -expressed for collections which are already loaded. See the API docs for -:meth:`~sqlalchemy.orm.query.Query.delete` for more details. - -.. _session_flushing: - -Flushing --------- - -When the :class:`~sqlalchemy.orm.session.Session` is used with its default -configuration, the flush step is nearly always done transparently. -Specifically, the flush occurs before any individual -:class:`~sqlalchemy.orm.query.Query` is issued, as well as within the -:meth:`~.Session.commit` call before the transaction is -committed. It also occurs before a SAVEPOINT is issued when -:meth:`~.Session.begin_nested` is used. - -Regardless of the autoflush setting, a flush can always be forced by issuing -:meth:`~.Session.flush`:: - - session.flush() - -The "flush-on-Query" aspect of the behavior can be disabled by constructing -:class:`.sessionmaker` with the flag ``autoflush=False``:: - - Session = sessionmaker(autoflush=False) - -Additionally, autoflush can be temporarily disabled by setting the -``autoflush`` flag at any time:: - - mysession = Session() - mysession.autoflush = False - -Some autoflush-disable recipes are available at `DisableAutoFlush -<http://www.sqlalchemy.org/trac/wiki/UsageRecipes/DisableAutoflush>`_. - -The flush process *always* occurs within a transaction, even if the -:class:`~sqlalchemy.orm.session.Session` has been configured with -``autocommit=True``, a setting that disables the session's persistent -transactional state. If no transaction is present, -:meth:`~.Session.flush` creates its own transaction and -commits it. Any failures during flush will always result in a rollback of -whatever transaction is present. If the Session is not in ``autocommit=True`` -mode, an explicit call to :meth:`~.Session.rollback` is -required after a flush fails, even though the underlying transaction will have -been rolled back already - this is so that the overall nesting pattern of -so-called "subtransactions" is consistently maintained. - -.. _session_committing: - -Committing ----------- - -:meth:`~.Session.commit` is used to commit the current -transaction. It always issues :meth:`~.Session.flush` -beforehand to flush any remaining state to the database; this is independent -of the "autoflush" setting. If no transaction is present, it raises an error. -Note that the default behavior of the :class:`~sqlalchemy.orm.session.Session` -is that a "transaction" is always present; this behavior can be disabled by -setting ``autocommit=True``. In autocommit mode, a transaction can be -initiated by calling the :meth:`~.Session.begin` method. - -.. note:: - - The term "transaction" here refers to a transactional - construct within the :class:`.Session` itself which may be - maintaining zero or more actual database (DBAPI) transactions. An individual - DBAPI connection begins participation in the "transaction" as it is first - used to execute a SQL statement, then remains present until the session-level - "transaction" is completed. See :ref:`unitofwork_transaction` for - further detail. - -Another behavior of :meth:`~.Session.commit` is that by -default it expires the state of all instances present after the commit is -complete. This is so that when the instances are next accessed, either through -attribute access or by them being present in a -:class:`~sqlalchemy.orm.query.Query` result set, they receive the most recent -state. To disable this behavior, configure -:class:`.sessionmaker` with ``expire_on_commit=False``. - -Normally, instances loaded into the :class:`~sqlalchemy.orm.session.Session` -are never changed by subsequent queries; the assumption is that the current -transaction is isolated so the state most recently loaded is correct as long -as the transaction continues. Setting ``autocommit=True`` works against this -model to some degree since the :class:`~sqlalchemy.orm.session.Session` -behaves in exactly the same way with regard to attribute state, except no -transaction is present. - -.. _session_rollback: - -Rolling Back ------------- - -:meth:`~.Session.rollback` rolls back the current -transaction. With a default configured session, the post-rollback state of the -session is as follows: - - * All transactions are rolled back and all connections returned to the - connection pool, unless the Session was bound directly to a Connection, in - which case the connection is still maintained (but still rolled back). - * Objects which were initially in the *pending* state when they were added - to the :class:`~sqlalchemy.orm.session.Session` within the lifespan of the - transaction are expunged, corresponding to their INSERT statement being - rolled back. The state of their attributes remains unchanged. - * Objects which were marked as *deleted* within the lifespan of the - transaction are promoted back to the *persistent* state, corresponding to - their DELETE statement being rolled back. Note that if those objects were - first *pending* within the transaction, that operation takes precedence - instead. - * All objects not expunged are fully expired. - -With that state understood, the :class:`~sqlalchemy.orm.session.Session` may -safely continue usage after a rollback occurs. - -When a :meth:`~.Session.flush` fails, typically for -reasons like primary key, foreign key, or "not nullable" constraint -violations, a :meth:`~.Session.rollback` is issued -automatically (it's currently not possible for a flush to continue after a -partial failure). However, the flush process always uses its own transactional -demarcator called a *subtransaction*, which is described more fully in the -docstrings for :class:`~sqlalchemy.orm.session.Session`. What it means here is -that even though the database transaction has been rolled back, the end user -must still issue :meth:`~.Session.rollback` to fully -reset the state of the :class:`~sqlalchemy.orm.session.Session`. - -Expunging ---------- - -Expunge removes an object from the Session, sending persistent instances to -the detached state, and pending instances to the transient state: - -.. sourcecode:: python+sql - - session.expunge(obj1) - -To remove all items, call :meth:`~.Session.expunge_all` -(this method was formerly known as ``clear()``). - -Closing -------- - -The :meth:`~.Session.close` method issues a -:meth:`~.Session.expunge_all`, and :term:`releases` any -transactional/connection resources. When connections are returned to the -connection pool, transactional state is rolled back as well. - -.. _session_expire: - -Refreshing / Expiring ---------------------- - -:term:`Expiring` means that the database-persisted data held inside a series -of object attributes is erased, in such a way that when those attributes -are next accessed, a SQL query is emitted which will refresh that data from -the database. - -When we talk about expiration of data we are usually talking about an object -that is in the :term:`persistent` state. For example, if we load an object -as follows:: - - user = session.query(User).filter_by(name='user1').first() - -The above ``User`` object is persistent, and has a series of attributes -present; if we were to look inside its ``__dict__``, we'd see that state -loaded:: - - >>> user.__dict__ - { - 'id': 1, 'name': u'user1', - '_sa_instance_state': <...>, - } - -where ``id`` and ``name`` refer to those columns in the database. -``_sa_instance_state`` is a non-database-persisted value used by SQLAlchemy -internally (it refers to the :class:`.InstanceState` for the instance. -While not directly relevant to this section, if we want to get at it, -we should use the :func:`.inspect` function to access it). - -At this point, the state in our ``User`` object matches that of the loaded -database row. But upon expiring the object using a method such as -:meth:`.Session.expire`, we see that the state is removed:: - - >>> session.expire(user) - >>> user.__dict__ - {'_sa_instance_state': <...>} - -We see that while the internal "state" still hangs around, the values which -correspond to the ``id`` and ``name`` columns are gone. If we were to access -one of these columns and are watching SQL, we'd see this: - -.. sourcecode:: python+sql - - >>> print(user.name) - {opensql}SELECT user.id AS user_id, user.name AS user_name - FROM user - WHERE user.id = ? - (1,) - {stop}user1 - -Above, upon accessing the expired attribute ``user.name``, the ORM initiated -a :term:`lazy load` to retrieve the most recent state from the database, -by emitting a SELECT for the user row to which this user refers. Afterwards, -the ``__dict__`` is again populated:: - - >>> user.__dict__ - { - 'id': 1, 'name': u'user1', - '_sa_instance_state': <...>, - } - -.. note:: While we are peeking inside of ``__dict__`` in order to see a bit - of what SQLAlchemy does with object attributes, we **should not modify** - the contents of ``__dict__`` directly, at least as far as those attributes - which the SQLAlchemy ORM is maintaining (other attributes outside of SQLA's - realm are fine). This is because SQLAlchemy uses :term:`descriptors` in - order to track the changes we make to an object, and when we modify ``__dict__`` - directly, the ORM won't be able to track that we changed something. - -Another key behavior of both :meth:`~.Session.expire` and :meth:`~.Session.refresh` -is that all un-flushed changes on an object are discarded. That is, -if we were to modify an attribute on our ``User``:: - - >>> user.name = 'user2' - -but then we call :meth:`~.Session.expire` without first calling :meth:`~.Session.flush`, -our pending value of ``'user2'`` is discarded:: - - >>> session.expire(user) - >>> user.name - 'user1' - -The :meth:`~.Session.expire` method can be used to mark as "expired" all ORM-mapped -attributes for an instance:: - - # expire all ORM-mapped attributes on obj1 - session.expire(obj1) - -it can also be passed a list of string attribute names, referring to specific -attributes to be marked as expired:: - - # expire only attributes obj1.attr1, obj1.attr2 - session.expire(obj1, ['attr1', 'attr2']) - -The :meth:`~.Session.refresh` method has a similar interface, but instead -of expiring, it emits an immediate SELECT for the object's row immediately:: - - # reload all attributes on obj1 - session.refresh(obj1) - -:meth:`~.Session.refresh` also accepts a list of string attribute names, -but unlike :meth:`~.Session.expire`, expects at least one name to -be that of a column-mapped attribute:: - - # reload obj1.attr1, obj1.attr2 - session.refresh(obj1, ['attr1', 'attr2']) - -The :meth:`.Session.expire_all` method allows us to essentially call -:meth:`.Session.expire` on all objects contained within the :class:`.Session` -at once:: - - session.expire_all() - -What Actually Loads -~~~~~~~~~~~~~~~~~~~ - -The SELECT statement that's emitted when an object marked with :meth:`~.Session.expire` -or loaded with :meth:`~.Session.refresh` varies based on several factors, including: - -* The load of expired attributes is triggered from **column-mapped attributes only**. - While any kind of attribute can be marked as expired, including a - :func:`.relationship` - mapped attribute, accessing an expired :func:`.relationship` - attribute will emit a load only for that attribute, using standard - relationship-oriented lazy loading. Column-oriented attributes, even if - expired, will not load as part of this operation, and instead will load when - any column-oriented attribute is accessed. - -* :func:`.relationship`- mapped attributes will not load in response to - expired column-based attributes being accessed. - -* Regarding relationships, :meth:`~.Session.refresh` is more restrictive than - :meth:`~.Session.expire` with regards to attributes that aren't column-mapped. - Calling :meth:`.refresh` and passing a list of names that only includes - relationship-mapped attributes will actually raise an error. - In any case, non-eager-loading :func:`.relationship` attributes will not be - included in any refresh operation. - -* :func:`.relationship` attributes configured as "eager loading" via the - :paramref:`~.relationship.lazy` parameter will load in the case of - :meth:`~.Session.refresh`, if either no attribute names are specified, or - if their names are inclued in the list of attributes to be - refreshed. - -* Attributes that are configured as :func:`.deferred` will not normally load, - during either the expired-attribute load or during a refresh. - An unloaded attribute that's :func:`.deferred` instead loads on its own when directly - accessed, or if part of a "group" of deferred attributes where an unloaded - attribute in that group is accessed. - -* For expired attributes that are loaded on access, a joined-inheritance table - mapping will emit a SELECT that typically only includes those tables for which - unloaded attributes are present. The action here is sophisticated enough - to load only the parent or child table, for example, if the subset of columns - that were originally expired encompass only one or the other of those tables. - -* When :meth:`~.Session.refresh` is used on a joined-inheritance table mapping, - the SELECT emitted will resemble that of when :meth:`.Session.query` is - used on the target object's class. This is typically all those tables that - are set up as part of the mapping. - - -When to Expire or Refresh -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -The :class:`.Session` uses the expiration feature automatically whenever -the transaction referred to by the session ends. Meaning, whenever :meth:`.Session.commit` -or :meth:`.Session.rollback` is called, all objects within the :class:`.Session` -are expired, using a feature equivalent to that of the :meth:`.Session.expire_all` -method. The rationale is that the end of a transaction is a -demarcating point at which there is no more context available in order to know -what the current state of the database is, as any number of other transactions -may be affecting it. Only when a new transaction starts can we again have access -to the current state of the database, at which point any number of changes -may have occurred. - -.. sidebar:: Transaction Isolation - - Of course, most databases are capable of handling - multiple transactions at once, even involving the same rows of data. When - a relational database handles multiple transactions involving the same - tables or rows, this is when the :term:`isolation` aspect of the database comes - into play. The isolation behavior of different databases varies considerably - and even on a single database can be configured to behave in different ways - (via the so-called :term:`isolation level` setting). In that sense, the :class:`.Session` - can't fully predict when the same SELECT statement, emitted a second time, - will definitely return the data we already have, or will return new data. - So as a best guess, it assumes that within the scope of a transaction, unless - it is known that a SQL expression has been emitted to modify a particular row, - there's no need to refresh a row unless explicitly told to do so. - -The :meth:`.Session.expire` and :meth:`.Session.refresh` methods are used in -those cases when one wants to force an object to re-load its data from the -database, in those cases when it is known that the current state of data -is possibly stale. Reasons for this might include: - -* some SQL has been emitted within the transaction outside of the - scope of the ORM's object handling, such as if a :meth:`.Table.update` construct - were emitted using the :meth:`.Session.execute` method; - -* if the application - is attempting to acquire data that is known to have been modified in a - concurrent transaction, and it is also known that the isolation rules in effect - allow this data to be visible. - -The second bullet has the important caveat that "it is also known that the isolation rules in effect -allow this data to be visible." This means that it cannot be assumed that an -UPDATE that happened on another database connection will yet be visible here -locally; in many cases, it will not. This is why if one wishes to use -:meth:`.expire` or :meth:`.refresh` in order to view data between ongoing -transactions, an understanding of the isolation behavior in effect is essential. - -.. seealso:: - - :meth:`.Session.expire` - - :meth:`.Session.expire_all` - - :meth:`.Session.refresh` - - :term:`isolation` - glossary explanation of isolation which includes links - to Wikipedia. - - `The SQLAlchemy Session In-Depth <http://techspot.zzzeek.org/2012/11/14/pycon-canada-the-sqlalchemy-session-in-depth/>`_ - a video + slides with an in-depth discussion of the object - lifecycle including the role of data expiration. - - -Session Attributes ------------------- - -The :class:`~sqlalchemy.orm.session.Session` itself acts somewhat like a -set-like collection. All items present may be accessed using the iterator -interface:: - - for obj in session: - print obj - -And presence may be tested for using regular "contains" semantics:: - - if obj in session: - print "Object is present" - -The session is also keeping track of all newly created (i.e. pending) objects, -all objects which have had changes since they were last loaded or saved (i.e. -"dirty"), and everything that's been marked as deleted:: - - # pending objects recently added to the Session - session.new - - # persistent objects which currently have changes detected - # (this collection is now created on the fly each time the property is called) - session.dirty - - # persistent objects that have been marked as deleted via session.delete(obj) - session.deleted - - # dictionary of all persistent objects, keyed on their - # identity key - session.identity_map - -(Documentation: :attr:`.Session.new`, :attr:`.Session.dirty`, -:attr:`.Session.deleted`, :attr:`.Session.identity_map`). - -Note that objects within the session are by default *weakly referenced*. This -means that when they are dereferenced in the outside application, they fall -out of scope from within the :class:`~sqlalchemy.orm.session.Session` as well -and are subject to garbage collection by the Python interpreter. The -exceptions to this include objects which are pending, objects which are marked -as deleted, or persistent objects which have pending changes on them. After a -full flush, these collections are all empty, and all objects are again weakly -referenced. To disable the weak referencing behavior and force all objects -within the session to remain until explicitly expunged, configure -:class:`.sessionmaker` with the ``weak_identity_map=False`` -setting. - -.. _unitofwork_cascades: - -Cascades -======== - -Mappers support the concept of configurable :term:`cascade` behavior on -:func:`~sqlalchemy.orm.relationship` constructs. This refers -to how operations performed on a "parent" object relative to a -particular :class:`.Session` should be propagated to items -referred to by that relationship (e.g. "child" objects), and is -affected by the :paramref:`.relationship.cascade` option. - -The default behavior of cascade is limited to cascades of the -so-called :ref:`cascade_save_update` and :ref:`cascade_merge` settings. -The typical "alternative" setting for cascade is to add -the :ref:`cascade_delete` and :ref:`cascade_delete_orphan` options; -these settings are appropriate for related objects which only exist as -long as they are attached to their parent, and are otherwise deleted. - -Cascade behavior is configured using the by changing the -:paramref:`~.relationship.cascade` option on -:func:`~sqlalchemy.orm.relationship`:: - - class Order(Base): - __tablename__ = 'order' - - items = relationship("Item", cascade="all, delete-orphan") - customer = relationship("User", cascade="save-update") - -To set cascades on a backref, the same flag can be used with the -:func:`~.sqlalchemy.orm.backref` function, which ultimately feeds -its arguments back into :func:`~sqlalchemy.orm.relationship`:: - - class Item(Base): - __tablename__ = 'item' - - order = relationship("Order", - backref=backref("items", cascade="all, delete-orphan") - ) - -.. sidebar:: The Origins of Cascade - - SQLAlchemy's notion of cascading behavior on relationships, - as well as the options to configure them, are primarily derived - from the similar feature in the Hibernate ORM; Hibernate refers - to "cascade" in a few places such as in - `Example: Parent/Child <https://docs.jboss.org/hibernate/orm/3.3/reference/en-US/html/example-parentchild.html>`_. - If cascades are confusing, we'll refer to their conclusion, - stating "The sections we have just covered can be a bit confusing. - However, in practice, it all works out nicely." - -The default value of :paramref:`~.relationship.cascade` is ``save-update, merge``. -The typical alternative setting for this parameter is either -``all`` or more commonly ``all, delete-orphan``. The ``all`` symbol -is a synonym for ``save-update, merge, refresh-expire, expunge, delete``, -and using it in conjunction with ``delete-orphan`` indicates that the child -object should follow along with its parent in all cases, and be deleted once -it is no longer associated with that parent. - -The list of available values which can be specified for -the :paramref:`~.relationship.cascade` parameter are described in the following subsections. - -.. _cascade_save_update: - -save-update ------------ - -``save-update`` cascade indicates that when an object is placed into a -:class:`.Session` via :meth:`.Session.add`, all the objects associated -with it via this :func:`.relationship` should also be added to that -same :class:`.Session`. Suppose we have an object ``user1`` with two -related objects ``address1``, ``address2``:: - - >>> user1 = User() - >>> address1, address2 = Address(), Address() - >>> user1.addresses = [address1, address2] - -If we add ``user1`` to a :class:`.Session`, it will also add -``address1``, ``address2`` implicitly:: - - >>> sess = Session() - >>> sess.add(user1) - >>> address1 in sess - True - -``save-update`` cascade also affects attribute operations for objects -that are already present in a :class:`.Session`. If we add a third -object, ``address3`` to the ``user1.addresses`` collection, it -becomes part of the state of that :class:`.Session`:: - - >>> address3 = Address() - >>> user1.append(address3) - >>> address3 in sess - >>> True - -``save-update`` has the possibly surprising behavior which is that -persistent objects which were *removed* from a collection -or in some cases a scalar attribute -may also be pulled into the :class:`.Session` of a parent object; this is -so that the flush process may handle that related object appropriately. -This case can usually only arise if an object is removed from one :class:`.Session` -and added to another:: - - >>> user1 = sess1.query(User).filter_by(id=1).first() - >>> address1 = user1.addresses[0] - >>> sess1.close() # user1, address1 no longer associated with sess1 - >>> user1.addresses.remove(address1) # address1 no longer associated with user1 - >>> sess2 = Session() - >>> sess2.add(user1) # ... but it still gets added to the new session, - >>> address1 in sess2 # because it's still "pending" for flush - True - -The ``save-update`` cascade is on by default, and is typically taken -for granted; it simplifies code by allowing a single call to -:meth:`.Session.add` to register an entire structure of objects within -that :class:`.Session` at once. While it can be disabled, there -is usually not a need to do so. - -One case where ``save-update`` cascade does sometimes get in the way is in that -it takes place in both directions for bi-directional relationships, e.g. -backrefs, meaning that the association of a child object with a particular parent -can have the effect of the parent object being implicitly associated with that -child object's :class:`.Session`; this pattern, as well as how to modify its -behavior using the :paramref:`~.relationship.cascade_backrefs` flag, -is discussed in the section :ref:`backref_cascade`. - -.. _cascade_delete: - -delete ------- - -The ``delete`` cascade indicates that when a "parent" object -is marked for deletion, its related "child" objects should also be marked -for deletion. If for example we we have a relationship ``User.addresses`` -with ``delete`` cascade configured:: - - class User(Base): - # ... - - addresses = relationship("Address", cascade="save-update, merge, delete") - -If using the above mapping, we have a ``User`` object and two -related ``Address`` objects:: - - >>> user1 = sess.query(User).filter_by(id=1).first() - >>> address1, address2 = user1.addresses - -If we mark ``user1`` for deletion, after the flush operation proceeds, -``address1`` and ``address2`` will also be deleted: - -.. sourcecode:: python+sql - - >>> sess.delete(user1) - >>> sess.commit() - {opensql}DELETE FROM address WHERE address.id = ? - ((1,), (2,)) - DELETE FROM user WHERE user.id = ? - (1,) - COMMIT - -Alternatively, if our ``User.addresses`` relationship does *not* have -``delete`` cascade, SQLAlchemy's default behavior is to instead de-associate -``address1`` and ``address2`` from ``user1`` by setting their foreign key -reference to ``NULL``. Using a mapping as follows:: - - class User(Base): - # ... - - addresses = relationship("Address") - -Upon deletion of a parent ``User`` object, the rows in ``address`` are not -deleted, but are instead de-associated: - -.. sourcecode:: python+sql - - >>> sess.delete(user1) - >>> sess.commit() - {opensql}UPDATE address SET user_id=? WHERE address.id = ? - (None, 1) - UPDATE address SET user_id=? WHERE address.id = ? - (None, 2) - DELETE FROM user WHERE user.id = ? - (1,) - COMMIT - -``delete`` cascade is more often than not used in conjunction with -:ref:`cascade_delete_orphan` cascade, which will emit a DELETE for the related -row if the "child" object is deassociated from the parent. The combination -of ``delete`` and ``delete-orphan`` cascade covers both situations where -SQLAlchemy has to decide between setting a foreign key column to NULL versus -deleting the row entirely. - -.. topic:: ORM-level "delete" cascade vs. FOREIGN KEY level "ON DELETE" cascade - - The behavior of SQLAlchemy's "delete" cascade has a lot of overlap with the - ``ON DELETE CASCADE`` feature of a database foreign key, as well - as with that of the ``ON DELETE SET NULL`` foreign key setting when "delete" - cascade is not specified. Database level "ON DELETE" cascades are specific to the - "FOREIGN KEY" construct of the relational database; SQLAlchemy allows - configuration of these schema-level constructs at the :term:`DDL` level - using options on :class:`.ForeignKeyConstraint` which are described - at :ref:`on_update_on_delete`. - - It is important to note the differences between the ORM and the relational - database's notion of "cascade" as well as how they integrate: - - * A database level ``ON DELETE`` cascade is configured effectively - on the **many-to-one** side of the relationship; that is, we configure - it relative to the ``FOREIGN KEY`` constraint that is the "many" side - of a relationship. At the ORM level, **this direction is reversed**. - SQLAlchemy handles the deletion of "child" objects relative to a - "parent" from the "parent" side, which means that ``delete`` and - ``delete-orphan`` cascade are configured on the **one-to-many** - side. - - * Database level foreign keys with no ``ON DELETE`` setting - are often used to **prevent** a parent - row from being removed, as it would necessarily leave an unhandled - related row present. If this behavior is desired in a one-to-many - relationship, SQLAlchemy's default behavior of setting a foreign key - to ``NULL`` can be caught in one of two ways: - - * The easiest and most common is just to set the - foreign-key-holding column to ``NOT NULL`` at the database schema - level. An attempt by SQLAlchemy to set the column to NULL will - fail with a simple NOT NULL constraint exception. - - * The other, more special case way is to set the :paramref:`~.relationship.passive_deletes` - flag to the string ``"all"``. This has the effect of entirely - disabling SQLAlchemy's behavior of setting the foreign key column - to NULL, and a DELETE will be emitted for the parent row without - any affect on the child row, even if the child row is present - in memory. This may be desirable in the case when - database-level foreign key triggers, either special ``ON DELETE`` settings - or otherwise, need to be activated in all cases when a parent row is deleted. - - * Database level ``ON DELETE`` cascade is **vastly more efficient** - than that of SQLAlchemy. The database can chain a series of cascade - operations across many relationships at once; e.g. if row A is deleted, - all the related rows in table B can be deleted, and all the C rows related - to each of those B rows, and on and on, all within the scope of a single - DELETE statement. SQLAlchemy on the other hand, in order to support - the cascading delete operation fully, has to individually load each - related collection in order to target all rows that then may have further - related collections. That is, SQLAlchemy isn't sophisticated enough - to emit a DELETE for all those related rows at once within this context. - - * SQLAlchemy doesn't **need** to be this sophisticated, as we instead provide - smooth integration with the database's own ``ON DELETE`` functionality, - by using the :paramref:`~.relationship.passive_deletes` option in conjunction - with properly configured foreign key constraints. Under this behavior, - SQLAlchemy only emits DELETE for those rows that are already locally - present in the :class:`.Session`; for any collections that are unloaded, - it leaves them to the database to handle, rather than emitting a SELECT - for them. The section :ref:`passive_deletes` provides an example of this use. - - * While database-level ``ON DELETE`` functionality works only on the "many" - side of a relationship, SQLAlchemy's "delete" cascade - has **limited** ability to operate in the *reverse* direction as well, - meaning it can be configured on the "many" side to delete an object - on the "one" side when the reference on the "many" side is deleted. However - this can easily result in constraint violations if there are other objects - referring to this "one" side from the "many", so it typically is only - useful when a relationship is in fact a "one to one". The - :paramref:`~.relationship.single_parent` flag should be used to establish - an in-Python assertion for this case. - - -When using a :func:`.relationship` that also includes a many-to-many -table using the :paramref:`~.relationship.secondary` option, SQLAlchemy's -delete cascade handles the rows in this many-to-many table automatically. -Just like, as described in :ref:`relationships_many_to_many_deletion`, -the addition or removal of an object from a many-to-many collection -results in the INSERT or DELETE of a row in the many-to-many table, -the ``delete`` cascade, when activated as the result of a parent object -delete operation, will DELETE not just the row in the "child" table but also -in the many-to-many table. - -.. _cascade_delete_orphan: - -delete-orphan -------------- - -``delete-orphan`` cascade adds behavior to the ``delete`` cascade, -such that a child object will be marked for deletion when it is -de-associated from the parent, not just when the parent is marked -for deletion. This is a common feature when dealing with a related -object that is "owned" by its parent, with a NOT NULL foreign key, -so that removal of the item from the parent collection results -in its deletion. - -``delete-orphan`` cascade implies that each child object can only -have one parent at a time, so is configured in the vast majority of cases -on a one-to-many relationship. Setting it on a many-to-one or -many-to-many relationship is more awkward; for this use case, -SQLAlchemy requires that the :func:`~sqlalchemy.orm.relationship` -be configured with the :paramref:`~.relationship.single_parent` argument, -establishes Python-side validation that ensures the object -is associated with only one parent at a time. - -.. _cascade_merge: - -merge ------ - -``merge`` cascade indicates that the :meth:`.Session.merge` -operation should be propagated from a parent that's the subject -of the :meth:`.Session.merge` call down to referred objects. -This cascade is also on by default. - -.. _cascade_refresh_expire: - -refresh-expire --------------- - -``refresh-expire`` is an uncommon option, indicating that the -:meth:`.Session.expire` operation should be propagated from a parent -down to referred objects. When using :meth:`.Session.refresh`, -the referred objects are expired only, but not actually refreshed. - -.. _cascade_expunge: - -expunge -------- - -``expunge`` cascade indicates that when the parent object is removed -from the :class:`.Session` using :meth:`.Session.expunge`, the -operation should be propagated down to referred objects. - -.. _backref_cascade: - -Controlling Cascade on Backrefs -------------------------------- - -The :ref:`cascade_save_update` cascade by default takes place on attribute change events -emitted from backrefs. This is probably a confusing statement more -easily described through demonstration; it means that, given a mapping such as this:: - - mapper(Order, order_table, properties={ - 'items' : relationship(Item, backref='order') - }) - -If an ``Order`` is already in the session, and is assigned to the ``order`` -attribute of an ``Item``, the backref appends the ``Order`` to the ``items`` -collection of that ``Order``, resulting in the ``save-update`` cascade taking -place:: - - >>> o1 = Order() - >>> session.add(o1) - >>> o1 in session - True - - >>> i1 = Item() - >>> i1.order = o1 - >>> i1 in o1.items - True - >>> i1 in session - True - -This behavior can be disabled using the :paramref:`~.relationship.cascade_backrefs` flag:: - - mapper(Order, order_table, properties={ - 'items' : relationship(Item, backref='order', - cascade_backrefs=False) - }) - -So above, the assignment of ``i1.order = o1`` will append ``i1`` to the ``items`` -collection of ``o1``, but will not add ``i1`` to the session. You can, of -course, :meth:`~.Session.add` ``i1`` to the session at a later point. This -option may be helpful for situations where an object needs to be kept out of a -session until it's construction is completed, but still needs to be given -associations to objects which are already persistent in the target session. - - -.. _unitofwork_transaction: - -Managing Transactions -===================== - -A newly constructed :class:`.Session` may be said to be in the "begin" state. -In this state, the :class:`.Session` has not established any connection or -transactional state with any of the :class:`.Engine` objects that may be associated -with it. - -The :class:`.Session` then receives requests to operate upon a database connection. -Typically, this means it is called upon to execute SQL statements using a particular -:class:`.Engine`, which may be via :meth:`.Session.query`, :meth:`.Session.execute`, -or within a flush operation of pending data, which occurs when such state exists -and :meth:`.Session.commit` or :meth:`.Session.flush` is called. - -As these requests are received, each new :class:`.Engine` encountered is associated -with an ongoing transactional state maintained by the :class:`.Session`. -When the first :class:`.Engine` is operated upon, the :class:`.Session` can be said -to have left the "begin" state and entered "transactional" state. For each -:class:`.Engine` encountered, a :class:`.Connection` is associated with it, -which is acquired via the :meth:`.Engine.contextual_connect` method. If a -:class:`.Connection` was directly associated with the :class:`.Session` (see :ref:`session_external_transaction` -for an example of this), it is -added to the transactional state directly. - -For each :class:`.Connection`, the :class:`.Session` also maintains a :class:`.Transaction` object, -which is acquired by calling :meth:`.Connection.begin` on each :class:`.Connection`, -or if the :class:`.Session` -object has been established using the flag ``twophase=True``, a :class:`.TwoPhaseTransaction` -object acquired via :meth:`.Connection.begin_twophase`. These transactions are all committed or -rolled back corresponding to the invocation of the -:meth:`.Session.commit` and :meth:`.Session.rollback` methods. A commit operation will -also call the :meth:`.TwoPhaseTransaction.prepare` method on all transactions if applicable. - -When the transactional state is completed after a rollback or commit, the :class:`.Session` -:term:`releases` all :class:`.Transaction` and :class:`.Connection` resources, -and goes back to the "begin" state, which -will again invoke new :class:`.Connection` and :class:`.Transaction` objects as new -requests to emit SQL statements are received. - -The example below illustrates this lifecycle:: - - engine = create_engine("...") - Session = sessionmaker(bind=engine) - - # new session. no connections are in use. - session = Session() - try: - # first query. a Connection is acquired - # from the Engine, and a Transaction - # started. - item1 = session.query(Item).get(1) - - # second query. the same Connection/Transaction - # are used. - item2 = session.query(Item).get(2) - - # pending changes are created. - item1.foo = 'bar' - item2.bar = 'foo' - - # commit. The pending changes above - # are flushed via flush(), the Transaction - # is committed, the Connection object closed - # and discarded, the underlying DBAPI connection - # returned to the connection pool. - session.commit() - except: - # on rollback, the same closure of state - # as that of commit proceeds. - session.rollback() - raise - -.. _session_begin_nested: - -Using SAVEPOINT ---------------- - -SAVEPOINT transactions, if supported by the underlying engine, may be -delineated using the :meth:`~.Session.begin_nested` -method:: - - Session = sessionmaker() - session = Session() - session.add(u1) - session.add(u2) - - session.begin_nested() # establish a savepoint - session.add(u3) - session.rollback() # rolls back u3, keeps u1 and u2 - - session.commit() # commits u1 and u2 - -:meth:`~.Session.begin_nested` may be called any number -of times, which will issue a new SAVEPOINT with a unique identifier for each -call. For each :meth:`~.Session.begin_nested` call, a -corresponding :meth:`~.Session.rollback` or -:meth:`~.Session.commit` must be issued. (But note that if the return value is -used as a context manager, i.e. in a with-statement, then this rollback/commit -is issued by the context manager upon exiting the context, and so should not be -added explicitly.) - -When :meth:`~.Session.begin_nested` is called, a -:meth:`~.Session.flush` is unconditionally issued -(regardless of the ``autoflush`` setting). This is so that when a -:meth:`~.Session.rollback` occurs, the full state of the -session is expired, thus causing all subsequent attribute/instance access to -reference the full state of the :class:`~sqlalchemy.orm.session.Session` right -before :meth:`~.Session.begin_nested` was called. - -:meth:`~.Session.begin_nested`, in the same manner as the less often -used :meth:`~.Session.begin` method, returns a transactional object -which also works as a context manager. -It can be succinctly used around individual record inserts in order to catch -things like unique constraint exceptions:: - - for record in records: - try: - with session.begin_nested(): - session.merge(record) - except: - print "Skipped record %s" % record - session.commit() - -.. _session_autocommit: - -Autocommit Mode ---------------- - -The example of :class:`.Session` transaction lifecycle illustrated at -the start of :ref:`unitofwork_transaction` applies to a :class:`.Session` configured in the -default mode of ``autocommit=False``. Constructing a :class:`.Session` -with ``autocommit=True`` produces a :class:`.Session` placed into "autocommit" mode, where each SQL statement -invoked by a :meth:`.Session.query` or :meth:`.Session.execute` occurs -using a new connection from the connection pool, discarding it after -results have been iterated. The :meth:`.Session.flush` operation -still occurs within the scope of a single transaction, though this transaction -is closed out after the :meth:`.Session.flush` operation completes. - -.. warning:: - - "autocommit" mode should **not be considered for general use**. - If used, it should always be combined with the usage of - :meth:`.Session.begin` and :meth:`.Session.commit`, to ensure - a transaction demarcation. - - Executing queries outside of a demarcated transaction is a legacy mode - of usage, and can in some cases lead to concurrent connection - checkouts. - - In the absence of a demarcated transaction, the :class:`.Session` - cannot make appropriate decisions as to when autoflush should - occur nor when auto-expiration should occur, so these features - should be disabled with ``autoflush=False, expire_on_commit=False``. - -Modern usage of "autocommit" is for framework integrations that need to control -specifically when the "begin" state occurs. A session which is configured with -``autocommit=True`` may be placed into the "begin" state using the -:meth:`.Session.begin` method. -After the cycle completes upon :meth:`.Session.commit` or :meth:`.Session.rollback`, -connection and transaction resources are :term:`released` and the :class:`.Session` -goes back into "autocommit" mode, until :meth:`.Session.begin` is called again:: - - Session = sessionmaker(bind=engine, autocommit=True) - session = Session() - session.begin() - try: - item1 = session.query(Item).get(1) - item2 = session.query(Item).get(2) - item1.foo = 'bar' - item2.bar = 'foo' - session.commit() - except: - session.rollback() - raise - -The :meth:`.Session.begin` method also returns a transactional token which is -compatible with the Python 2.6 ``with`` statement:: - - Session = sessionmaker(bind=engine, autocommit=True) - session = Session() - with session.begin(): - item1 = session.query(Item).get(1) - item2 = session.query(Item).get(2) - item1.foo = 'bar' - item2.bar = 'foo' - -.. _session_subtransactions: - -Using Subtransactions with Autocommit -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -A subtransaction indicates usage of the :meth:`.Session.begin` method in conjunction with -the ``subtransactions=True`` flag. This produces a non-transactional, delimiting construct that -allows nesting of calls to :meth:`~.Session.begin` and :meth:`~.Session.commit`. -Its purpose is to allow the construction of code that can function within a transaction -both independently of any external code that starts a transaction, -as well as within a block that has already demarcated a transaction. - -``subtransactions=True`` is generally only useful in conjunction with -autocommit, and is equivalent to the pattern described at :ref:`connections_nested_transactions`, -where any number of functions can call :meth:`.Connection.begin` and :meth:`.Transaction.commit` -as though they are the initiator of the transaction, but in fact may be participating -in an already ongoing transaction:: - - # method_a starts a transaction and calls method_b - def method_a(session): - session.begin(subtransactions=True) - try: - method_b(session) - session.commit() # transaction is committed here - except: - session.rollback() # rolls back the transaction - raise - - # method_b also starts a transaction, but when - # called from method_a participates in the ongoing - # transaction. - def method_b(session): - session.begin(subtransactions=True) - try: - session.add(SomeObject('bat', 'lala')) - session.commit() # transaction is not committed yet - except: - session.rollback() # rolls back the transaction, in this case - # the one that was initiated in method_a(). - raise - - # create a Session and call method_a - session = Session(autocommit=True) - method_a(session) - session.close() - -Subtransactions are used by the :meth:`.Session.flush` process to ensure that the -flush operation takes place within a transaction, regardless of autocommit. When -autocommit is disabled, it is still useful in that it forces the :class:`.Session` -into a "pending rollback" state, as a failed flush cannot be resumed in mid-operation, -where the end user still maintains the "scope" of the transaction overall. - -.. _session_twophase: - -Enabling Two-Phase Commit -------------------------- - -For backends which support two-phase operaration (currently MySQL and -PostgreSQL), the session can be instructed to use two-phase commit semantics. -This will coordinate the committing of transactions across databases so that -the transaction is either committed or rolled back in all databases. You can -also :meth:`~.Session.prepare` the session for -interacting with transactions not managed by SQLAlchemy. To use two phase -transactions set the flag ``twophase=True`` on the session:: - - engine1 = create_engine('postgresql://db1') - engine2 = create_engine('postgresql://db2') - - Session = sessionmaker(twophase=True) - - # bind User operations to engine 1, Account operations to engine 2 - Session.configure(binds={User:engine1, Account:engine2}) - - session = Session() - - # .... work with accounts and users - - # commit. session will issue a flush to all DBs, and a prepare step to all DBs, - # before committing both transactions - session.commit() - -Embedding SQL Insert/Update Expressions into a Flush -===================================================== - -This feature allows the value of a database column to be set to a SQL -expression instead of a literal value. It's especially useful for atomic -updates, calling stored procedures, etc. All you do is assign an expression to -an attribute:: - - class SomeClass(object): - pass - mapper(SomeClass, some_table) - - someobject = session.query(SomeClass).get(5) - - # set 'value' attribute to a SQL expression adding one - someobject.value = some_table.c.value + 1 - - # issues "UPDATE some_table SET value=value+1" - session.commit() - -This technique works both for INSERT and UPDATE statements. After the -flush/commit operation, the ``value`` attribute on ``someobject`` above is -expired, so that when next accessed the newly generated value will be loaded -from the database. - -.. _session_sql_expressions: - -Using SQL Expressions with Sessions -==================================== - -SQL expressions and strings can be executed via the -:class:`~sqlalchemy.orm.session.Session` within its transactional context. -This is most easily accomplished using the -:meth:`~.Session.execute` method, which returns a -:class:`~sqlalchemy.engine.ResultProxy` in the same manner as an -:class:`~sqlalchemy.engine.Engine` or -:class:`~sqlalchemy.engine.Connection`:: - - Session = sessionmaker(bind=engine) - session = Session() - - # execute a string statement - result = session.execute("select * from table where id=:id", {'id':7}) - - # execute a SQL expression construct - result = session.execute(select([mytable]).where(mytable.c.id==7)) - -The current :class:`~sqlalchemy.engine.Connection` held by the -:class:`~sqlalchemy.orm.session.Session` is accessible using the -:meth:`~.Session.connection` method:: - - connection = session.connection() - -The examples above deal with a :class:`~sqlalchemy.orm.session.Session` that's -bound to a single :class:`~sqlalchemy.engine.Engine` or -:class:`~sqlalchemy.engine.Connection`. To execute statements using a -:class:`~sqlalchemy.orm.session.Session` which is bound either to multiple -engines, or none at all (i.e. relies upon bound metadata), both -:meth:`~.Session.execute` and -:meth:`~.Session.connection` accept a ``mapper`` keyword -argument, which is passed a mapped class or -:class:`~sqlalchemy.orm.mapper.Mapper` instance, which is used to locate the -proper context for the desired engine:: - - Session = sessionmaker() - session = Session() - - # need to specify mapper or class when executing - result = session.execute("select * from table where id=:id", {'id':7}, mapper=MyMappedClass) - - result = session.execute(select([mytable], mytable.c.id==7), mapper=MyMappedClass) - - connection = session.connection(MyMappedClass) - -.. _session_external_transaction: - -Joining a Session into an External Transaction (such as for test suites) -======================================================================== - -If a :class:`.Connection` is being used which is already in a transactional -state (i.e. has a :class:`.Transaction` established), a :class:`.Session` can -be made to participate within that transaction by just binding the -:class:`.Session` to that :class:`.Connection`. The usual rationale for this -is a test suite that allows ORM code to work freely with a :class:`.Session`, -including the ability to call :meth:`.Session.commit`, where afterwards the -entire database interaction is rolled back:: - - from sqlalchemy.orm import sessionmaker - from sqlalchemy import create_engine - from unittest import TestCase - - # global application scope. create Session class, engine - Session = sessionmaker() - - engine = create_engine('postgresql://...') - - class SomeTest(TestCase): - def setUp(self): - # connect to the database - self.connection = engine.connect() - - # begin a non-ORM transaction - self.trans = self.connection.begin() - - # bind an individual Session to the connection - self.session = Session(bind=self.connection) - - def test_something(self): - # use the session in tests. - - self.session.add(Foo()) - self.session.commit() - - def tearDown(self): - self.session.close() - - # rollback - everything that happened with the - # Session above (including calls to commit()) - # is rolled back. - self.trans.rollback() - - # return connection to the Engine - self.connection.close() - -Above, we issue :meth:`.Session.commit` as well as -:meth:`.Transaction.rollback`. This is an example of where we take advantage -of the :class:`.Connection` object's ability to maintain *subtransactions*, or -nested begin/commit-or-rollback pairs where only the outermost begin/commit -pair actually commits the transaction, or if the outermost block rolls back, -everything is rolled back. - -.. topic:: Supporting Tests with Rollbacks - - The above recipe works well for any kind of database enabled test, except - for a test that needs to actually invoke :meth:`.Session.rollback` within - the scope of the test itself. The above recipe can be expanded, such - that the :class:`.Session` always runs all operations within the scope - of a SAVEPOINT, which is established at the start of each transaction, - so that tests can also rollback the "transaction" as well while still - remaining in the scope of a larger "transaction" that's never committed, - using two extra events:: - - from sqlalchemy import event - - class SomeTest(TestCase): - def setUp(self): - # connect to the database - self.connection = engine.connect() - - # begin a non-ORM transaction - self.trans = connection.begin() - - # bind an individual Session to the connection - self.session = Session(bind=self.connection) - - # start the session in a SAVEPOINT... - self.session.begin_nested() - - # then each time that SAVEPOINT ends, reopen it - @event.listens_for(self.session, "after_transaction_end") - def restart_savepoint(session, transaction): - if transaction.nested and not transaction._parent.nested: - session.begin_nested() - - - # ... the tearDown() method stays the same - -.. _unitofwork_contextual: - -Contextual/Thread-local Sessions -================================= - -Recall from the section :ref:`session_faq_whentocreate`, the concept of -"session scopes" was introduced, with an emphasis on web applications -and the practice of linking the scope of a :class:`.Session` with that -of a web request. Most modern web frameworks include integration tools -so that the scope of the :class:`.Session` can be managed automatically, -and these tools should be used as they are available. - -SQLAlchemy includes its own helper object, which helps with the establishment -of user-defined :class:`.Session` scopes. It is also used by third-party -integration systems to help construct their integration schemes. - -The object is the :class:`.scoped_session` object, and it represents a -**registry** of :class:`.Session` objects. If you're not familiar with the -registry pattern, a good introduction can be found in `Patterns of Enterprise -Architecture <http://martinfowler.com/eaaCatalog/registry.html>`_. - -.. note:: - - The :class:`.scoped_session` object is a very popular and useful object - used by many SQLAlchemy applications. However, it is important to note - that it presents **only one approach** to the issue of :class:`.Session` - management. If you're new to SQLAlchemy, and especially if the - term "thread-local variable" seems strange to you, we recommend that - if possible you familiarize first with an off-the-shelf integration - system such as `Flask-SQLAlchemy <http://packages.python.org/Flask-SQLAlchemy/>`_ - or `zope.sqlalchemy <http://pypi.python.org/pypi/zope.sqlalchemy>`_. - -A :class:`.scoped_session` is constructed by calling it, passing it a -**factory** which can create new :class:`.Session` objects. A factory -is just something that produces a new object when called, and in the -case of :class:`.Session`, the most common factory is the :class:`.sessionmaker`, -introduced earlier in this section. Below we illustrate this usage:: - - >>> from sqlalchemy.orm import scoped_session - >>> from sqlalchemy.orm import sessionmaker - - >>> session_factory = sessionmaker(bind=some_engine) - >>> Session = scoped_session(session_factory) - -The :class:`.scoped_session` object we've created will now call upon the -:class:`.sessionmaker` when we "call" the registry:: - - >>> some_session = Session() - -Above, ``some_session`` is an instance of :class:`.Session`, which we -can now use to talk to the database. This same :class:`.Session` is also -present within the :class:`.scoped_session` registry we've created. If -we call upon the registry a second time, we get back the **same** :class:`.Session`:: - - >>> some_other_session = Session() - >>> some_session is some_other_session - True - -This pattern allows disparate sections of the application to call upon a global -:class:`.scoped_session`, so that all those areas may share the same session -without the need to pass it explicitly. The :class:`.Session` we've established -in our registry will remain, until we explicitly tell our registry to dispose of it, -by calling :meth:`.scoped_session.remove`:: - - >>> Session.remove() - -The :meth:`.scoped_session.remove` method first calls :meth:`.Session.close` on -the current :class:`.Session`, which has the effect of releasing any connection/transactional -resources owned by the :class:`.Session` first, then discarding the :class:`.Session` -itself. "Releasing" here means that connections are returned to their connection pool and any transactional state is rolled back, ultimately using the ``rollback()`` method of the underlying DBAPI connection. - -At this point, the :class:`.scoped_session` object is "empty", and will create -a **new** :class:`.Session` when called again. As illustrated below, this -is not the same :class:`.Session` we had before:: - - >>> new_session = Session() - >>> new_session is some_session - False - -The above series of steps illustrates the idea of the "registry" pattern in a -nutshell. With that basic idea in hand, we can discuss some of the details -of how this pattern proceeds. - -Implicit Method Access ----------------------- - -The job of the :class:`.scoped_session` is simple; hold onto a :class:`.Session` -for all who ask for it. As a means of producing more transparent access to this -:class:`.Session`, the :class:`.scoped_session` also includes **proxy behavior**, -meaning that the registry itself can be treated just like a :class:`.Session` -directly; when methods are called on this object, they are **proxied** to the -underlying :class:`.Session` being maintained by the registry:: - - Session = scoped_session(some_factory) - - # equivalent to: - # - # session = Session() - # print session.query(MyClass).all() - # - print Session.query(MyClass).all() - -The above code accomplishes the same task as that of acquiring the current -:class:`.Session` by calling upon the registry, then using that :class:`.Session`. - -Thread-Local Scope ------------------- - -Users who are familiar with multithreaded programming will note that representing -anything as a global variable is usually a bad idea, as it implies that the -global object will be accessed by many threads concurrently. The :class:`.Session` -object is entirely designed to be used in a **non-concurrent** fashion, which -in terms of multithreading means "only in one thread at a time". So our -above example of :class:`.scoped_session` usage, where the same :class:`.Session` -object is maintained across multiple calls, suggests that some process needs -to be in place such that mutltiple calls across many threads don't actually get -a handle to the same session. We call this notion **thread local storage**, -which means, a special object is used that will maintain a distinct object -per each application thread. Python provides this via the -`threading.local() <http://docs.python.org/library/threading.html#threading.local>`_ -construct. The :class:`.scoped_session` object by default uses this object -as storage, so that a single :class:`.Session` is maintained for all who call -upon the :class:`.scoped_session` registry, but only within the scope of a single -thread. Callers who call upon the registry in a different thread get a -:class:`.Session` instance that is local to that other thread. - -Using this technique, the :class:`.scoped_session` provides a quick and relatively -simple (if one is familiar with thread-local storage) way of providing -a single, global object in an application that is safe to be called upon -from multiple threads. - -The :meth:`.scoped_session.remove` method, as always, removes the current -:class:`.Session` associated with the thread, if any. However, one advantage of the -``threading.local()`` object is that if the application thread itself ends, the -"storage" for that thread is also garbage collected. So it is in fact "safe" to -use thread local scope with an application that spawns and tears down threads, -without the need to call :meth:`.scoped_session.remove`. However, the scope -of transactions themselves, i.e. ending them via :meth:`.Session.commit` or -:meth:`.Session.rollback`, will usually still be something that must be explicitly -arranged for at the appropriate time, unless the application actually ties the -lifespan of a thread to the lifespan of a transaction. - -.. _session_lifespan: - -Using Thread-Local Scope with Web Applications ----------------------------------------------- - -As discussed in the section :ref:`session_faq_whentocreate`, a web application -is architected around the concept of a **web request**, and integrating -such an application with the :class:`.Session` usually implies that the :class:`.Session` -will be associated with that request. As it turns out, most Python web frameworks, -with notable exceptions such as the asynchronous frameworks Twisted and -Tornado, use threads in a simple way, such that a particular web request is received, -processed, and completed within the scope of a single *worker thread*. When -the request ends, the worker thread is released to a pool of workers where it -is available to handle another request. - -This simple correspondence of web request and thread means that to associate a -:class:`.Session` with a thread implies it is also associated with the web request -running within that thread, and vice versa, provided that the :class:`.Session` is -created only after the web request begins and torn down just before the web request ends. -So it is a common practice to use :class:`.scoped_session` as a quick way -to integrate the :class:`.Session` with a web application. The sequence -diagram below illustrates this flow:: - - Web Server Web Framework SQLAlchemy ORM Code - -------------- -------------- ------------------------------ - startup -> Web framework # Session registry is established - initializes Session = scoped_session(sessionmaker()) - - incoming - web request -> web request -> # The registry is *optionally* - starts # called upon explicitly to create - # a Session local to the thread and/or request - Session() - - # the Session registry can otherwise - # be used at any time, creating the - # request-local Session() if not present, - # or returning the existing one - Session.query(MyClass) # ... - - Session.add(some_object) # ... - - # if data was modified, commit the - # transaction - Session.commit() - - web request ends -> # the registry is instructed to - # remove the Session - Session.remove() - - sends output <- - outgoing web <- - response - -Using the above flow, the process of integrating the :class:`.Session` with the -web application has exactly two requirements: - -1. Create a single :class:`.scoped_session` registry when the web application - first starts, ensuring that this object is accessible by the rest of the - application. -2. Ensure that :meth:`.scoped_session.remove` is called when the web request ends, - usually by integrating with the web framework's event system to establish - an "on request end" event. - -As noted earlier, the above pattern is **just one potential way** to integrate a :class:`.Session` -with a web framework, one which in particular makes the significant assumption -that the **web framework associates web requests with application threads**. It is -however **strongly recommended that the integration tools provided with the web framework -itself be used, if available**, instead of :class:`.scoped_session`. - -In particular, while using a thread local can be convenient, it is preferable that the :class:`.Session` be -associated **directly with the request**, rather than with -the current thread. The next section on custom scopes details a more advanced configuration -which can combine the usage of :class:`.scoped_session` with direct request based scope, or -any kind of scope. - -Using Custom Created Scopes ---------------------------- - -The :class:`.scoped_session` object's default behavior of "thread local" scope is only -one of many options on how to "scope" a :class:`.Session`. A custom scope can be defined -based on any existing system of getting at "the current thing we are working with". - -Suppose a web framework defines a library function ``get_current_request()``. An application -built using this framework can call this function at any time, and the result will be -some kind of ``Request`` object that represents the current request being processed. -If the ``Request`` object is hashable, then this function can be easily integrated with -:class:`.scoped_session` to associate the :class:`.Session` with the request. Below we illustrate -this in conjunction with a hypothetical event marker provided by the web framework -``on_request_end``, which allows code to be invoked whenever a request ends:: - - from my_web_framework import get_current_request, on_request_end - from sqlalchemy.orm import scoped_session, sessionmaker - - Session = scoped_session(sessionmaker(bind=some_engine), scopefunc=get_current_request) - - @on_request_end - def remove_session(req): - Session.remove() - -Above, we instantiate :class:`.scoped_session` in the usual way, except that we pass -our request-returning function as the "scopefunc". This instructs :class:`.scoped_session` -to use this function to generate a dictionary key whenever the registry is called upon -to return the current :class:`.Session`. In this case it is particularly important -that we ensure a reliable "remove" system is implemented, as this dictionary is not -otherwise self-managed. - - -Contextual Session API ----------------------- - -.. autoclass:: sqlalchemy.orm.scoping.scoped_session - :members: - -.. autoclass:: sqlalchemy.util.ScopedRegistry - :members: - -.. autoclass:: sqlalchemy.util.ThreadLocalRegistry - -.. _session_partitioning: - -Partitioning Strategies -======================= - -Simple Vertical Partitioning ----------------------------- - -Vertical partitioning places different kinds of objects, or different tables, -across multiple databases:: - - engine1 = create_engine('postgresql://db1') - engine2 = create_engine('postgresql://db2') - - Session = sessionmaker(twophase=True) - - # bind User operations to engine 1, Account operations to engine 2 - Session.configure(binds={User:engine1, Account:engine2}) - - session = Session() - -Above, operations against either class will make usage of the :class:`.Engine` -linked to that class. Upon a flush operation, similar rules take place -to ensure each class is written to the right database. - -The transactions among the multiple databases can optionally be coordinated -via two phase commit, if the underlying backend supports it. See -:ref:`session_twophase` for an example. - -Custom Vertical Partitioning ----------------------------- - -More comprehensive rule-based class-level partitioning can be built by -overriding the :meth:`.Session.get_bind` method. Below we illustrate -a custom :class:`.Session` which delivers the following rules: - -1. Flush operations are delivered to the engine named ``master``. - -2. Operations on objects that subclass ``MyOtherClass`` all - occur on the ``other`` engine. - -3. Read operations for all other classes occur on a random - choice of the ``slave1`` or ``slave2`` database. - -:: - - engines = { - 'master':create_engine("sqlite:///master.db"), - 'other':create_engine("sqlite:///other.db"), - 'slave1':create_engine("sqlite:///slave1.db"), - 'slave2':create_engine("sqlite:///slave2.db"), - } - - from sqlalchemy.orm import Session, sessionmaker - import random - - class RoutingSession(Session): - def get_bind(self, mapper=None, clause=None): - if mapper and issubclass(mapper.class_, MyOtherClass): - return engines['other'] - elif self._flushing: - return engines['master'] - else: - return engines[ - random.choice(['slave1','slave2']) - ] - -The above :class:`.Session` class is plugged in using the ``class_`` -argument to :class:`.sessionmaker`:: - - Session = sessionmaker(class_=RoutingSession) - -This approach can be combined with multiple :class:`.MetaData` objects, -using an approach such as that of using the declarative ``__abstract__`` -keyword, described at :ref:`declarative_abstract`. - -Horizontal Partitioning ------------------------ - -Horizontal partitioning partitions the rows of a single table (or a set of -tables) across multiple databases. - -See the "sharding" example: :ref:`examples_sharding`. - -Sessions API -============ - -Session and sessionmaker() ---------------------------- - -.. autoclass:: sessionmaker - :members: - :inherited-members: - -.. autoclass:: sqlalchemy.orm.session.Session - :members: - :inherited-members: - -.. autoclass:: sqlalchemy.orm.session.SessionTransaction - :members: - -Session Utilites ----------------- - -.. autofunction:: make_transient - -.. autofunction:: make_transient_to_detached - -.. autofunction:: object_session - -.. autofunction:: sqlalchemy.orm.util.was_deleted - -Attribute and State Management Utilities ------------------------------------------ - -These functions are provided by the SQLAlchemy attribute -instrumentation API to provide a detailed interface for dealing -with instances, attribute values, and history. Some of them -are useful when constructing event listener functions, such as -those described in :doc:`/orm/events`. - -.. currentmodule:: sqlalchemy.orm.util - -.. autofunction:: object_state - -.. currentmodule:: sqlalchemy.orm.attributes - -.. autofunction:: del_attribute - -.. autofunction:: get_attribute - -.. autofunction:: get_history - -.. autofunction:: init_collection - -.. autofunction:: flag_modified - -.. function:: instance_state - - Return the :class:`.InstanceState` for a given - mapped object. - - This function is the internal version - of :func:`.object_state`. The - :func:`.object_state` and/or the - :func:`.inspect` function is preferred here - as they each emit an informative exception - if the given object is not mapped. - -.. autofunction:: sqlalchemy.orm.instrumentation.is_instrumented - -.. autofunction:: set_attribute - -.. autofunction:: set_committed_value - -.. autoclass:: History - :members: diff --git a/doc/build/orm/session_api.rst b/doc/build/orm/session_api.rst new file mode 100644 index 000000000..3754ac80b --- /dev/null +++ b/doc/build/orm/session_api.rst @@ -0,0 +1,76 @@ +.. module:: sqlalchemy.orm.session + +Session API +============ + +Session and sessionmaker() +--------------------------- + +.. autoclass:: sessionmaker + :members: + :inherited-members: + +.. autoclass:: sqlalchemy.orm.session.Session + :members: + :inherited-members: + +.. autoclass:: sqlalchemy.orm.session.SessionTransaction + :members: + +Session Utilites +---------------- + +.. autofunction:: make_transient + +.. autofunction:: make_transient_to_detached + +.. autofunction:: object_session + +.. autofunction:: sqlalchemy.orm.util.was_deleted + +Attribute and State Management Utilities +----------------------------------------- + +These functions are provided by the SQLAlchemy attribute +instrumentation API to provide a detailed interface for dealing +with instances, attribute values, and history. Some of them +are useful when constructing event listener functions, such as +those described in :doc:`/orm/events`. + +.. currentmodule:: sqlalchemy.orm.util + +.. autofunction:: object_state + +.. currentmodule:: sqlalchemy.orm.attributes + +.. autofunction:: del_attribute + +.. autofunction:: get_attribute + +.. autofunction:: get_history + +.. autofunction:: init_collection + +.. autofunction:: flag_modified + +.. function:: instance_state + + Return the :class:`.InstanceState` for a given + mapped object. + + This function is the internal version + of :func:`.object_state`. The + :func:`.object_state` and/or the + :func:`.inspect` function is preferred here + as they each emit an informative exception + if the given object is not mapped. + +.. autofunction:: sqlalchemy.orm.instrumentation.is_instrumented + +.. autofunction:: set_attribute + +.. autofunction:: set_committed_value + +.. autoclass:: History + :members: + diff --git a/doc/build/orm/session_basics.rst b/doc/build/orm/session_basics.rst new file mode 100644 index 000000000..8919864ca --- /dev/null +++ b/doc/build/orm/session_basics.rst @@ -0,0 +1,744 @@ +========================== +Session Basics +========================== + +What does the Session do ? +========================== + +In the most general sense, the :class:`~.Session` establishes all +conversations with the database and represents a "holding zone" for all the +objects which you've loaded or associated with it during its lifespan. It +provides the entrypoint to acquire a :class:`.Query` object, which sends +queries to the database using the :class:`~.Session` object's current database +connection, populating result rows into objects that are then stored in the +:class:`.Session`, inside a structure called the `Identity Map +<http://martinfowler.com/eaaCatalog/identityMap.html>`_ - a data structure +that maintains unique copies of each object, where "unique" means "only one +object with a particular primary key". + +The :class:`.Session` begins in an essentially stateless form. Once queries +are issued or other objects are persisted with it, it requests a connection +resource from an :class:`.Engine` that is associated either with the +:class:`.Session` itself or with the mapped :class:`.Table` objects being +operated upon. This connection represents an ongoing transaction, which +remains in effect until the :class:`.Session` is instructed to commit or roll +back its pending state. + +All changes to objects maintained by a :class:`.Session` are tracked - before +the database is queried again or before the current transaction is committed, +it **flushes** all pending changes to the database. This is known as the `Unit +of Work <http://martinfowler.com/eaaCatalog/unitOfWork.html>`_ pattern. + +When using a :class:`.Session`, it's important to note that the objects +which are associated with it are **proxy objects** to the transaction being +held by the :class:`.Session` - there are a variety of events that will cause +objects to re-access the database in order to keep synchronized. It is +possible to "detach" objects from a :class:`.Session`, and to continue using +them, though this practice has its caveats. It's intended that +usually, you'd re-associate detached objects with another :class:`.Session` when you +want to work with them again, so that they can resume their normal task of +representing database state. + +.. _session_getting: + +Getting a Session +================= + +:class:`.Session` is a regular Python class which can +be directly instantiated. However, to standardize how sessions are configured +and acquired, the :class:`.sessionmaker` class is normally +used to create a top level :class:`.Session` +configuration which can then be used throughout an application without the +need to repeat the configurational arguments. + +The usage of :class:`.sessionmaker` is illustrated below: + +.. sourcecode:: python+sql + + from sqlalchemy import create_engine + from sqlalchemy.orm import sessionmaker + + # an Engine, which the Session will use for connection + # resources + some_engine = create_engine('postgresql://scott:tiger@localhost/') + + # create a configured "Session" class + Session = sessionmaker(bind=some_engine) + + # create a Session + session = Session() + + # work with sess + myobject = MyObject('foo', 'bar') + session.add(myobject) + session.commit() + +Above, the :class:`.sessionmaker` call creates a factory for us, +which we assign to the name ``Session``. This factory, when +called, will create a new :class:`.Session` object using the configurational +arguments we've given the factory. In this case, as is typical, +we've configured the factory to specify a particular :class:`.Engine` for +connection resources. + +A typical setup will associate the :class:`.sessionmaker` with an :class:`.Engine`, +so that each :class:`.Session` generated will use this :class:`.Engine` +to acquire connection resources. This association can +be set up as in the example above, using the ``bind`` argument. + +When you write your application, place the +:class:`.sessionmaker` factory at the global level. This +factory can then +be used by the rest of the applcation as the source of new :class:`.Session` +instances, keeping the configuration for how :class:`.Session` objects +are constructed in one place. + +The :class:`.sessionmaker` factory can also be used in conjunction with +other helpers, which are passed a user-defined :class:`.sessionmaker` that +is then maintained by the helper. Some of these helpers are discussed in the +section :ref:`session_faq_whentocreate`. + +Adding Additional Configuration to an Existing sessionmaker() +-------------------------------------------------------------- + +A common scenario is where the :class:`.sessionmaker` is invoked +at module import time, however the generation of one or more :class:`.Engine` +instances to be associated with the :class:`.sessionmaker` has not yet proceeded. +For this use case, the :class:`.sessionmaker` construct offers the +:meth:`.sessionmaker.configure` method, which will place additional configuration +directives into an existing :class:`.sessionmaker` that will take place +when the construct is invoked:: + + + from sqlalchemy.orm import sessionmaker + from sqlalchemy import create_engine + + # configure Session class with desired options + Session = sessionmaker() + + # later, we create the engine + engine = create_engine('postgresql://...') + + # associate it with our custom Session class + Session.configure(bind=engine) + + # work with the session + session = Session() + +Creating Ad-Hoc Session Objects with Alternate Arguments +--------------------------------------------------------- + +For the use case where an application needs to create a new :class:`.Session` with +special arguments that deviate from what is normally used throughout the application, +such as a :class:`.Session` that binds to an alternate +source of connectivity, or a :class:`.Session` that should +have other arguments such as ``expire_on_commit`` established differently from +what most of the application wants, specific arguments can be passed to the +:class:`.sessionmaker` factory's :meth:`.sessionmaker.__call__` method. +These arguments will override whatever +configurations have already been placed, such as below, where a new :class:`.Session` +is constructed against a specific :class:`.Connection`:: + + # at the module level, the global sessionmaker, + # bound to a specific Engine + Session = sessionmaker(bind=engine) + + # later, some unit of code wants to create a + # Session that is bound to a specific Connection + conn = engine.connect() + session = Session(bind=conn) + +The typical rationale for the association of a :class:`.Session` with a specific +:class:`.Connection` is that of a test fixture that maintains an external +transaction - see :ref:`session_external_transaction` for an example of this. + + +.. _session_faq: + +Session Frequently Asked Questions +=================================== + +By this point, many users already have questions about sessions. +This section presents a mini-FAQ (note that we have also a `real FAQ </faq/index>`) +of the most basic issues one is presented with when using a :class:`.Session`. + +When do I make a :class:`.sessionmaker`? +------------------------------------------ + +Just one time, somewhere in your application's global scope. It should be +looked upon as part of your application's configuration. If your +application has three .py files in a package, you could, for example, +place the :class:`.sessionmaker` line in your ``__init__.py`` file; from +that point on your other modules say "from mypackage import Session". That +way, everyone else just uses :class:`.Session()`, +and the configuration of that session is controlled by that central point. + +If your application starts up, does imports, but does not know what +database it's going to be connecting to, you can bind the +:class:`.Session` at the "class" level to the +engine later on, using :meth:`.sessionmaker.configure`. + +In the examples in this section, we will frequently show the +:class:`.sessionmaker` being created right above the line where we actually +invoke :class:`.Session`. But that's just for +example's sake! In reality, the :class:`.sessionmaker` would be somewhere +at the module level. The calls to instantiate :class:`.Session` +would then be placed at the point in the application where database +conversations begin. + +.. _session_faq_whentocreate: + +When do I construct a :class:`.Session`, when do I commit it, and when do I close it? +------------------------------------------------------------------------------------- + +.. topic:: tl;dr; + + As a general rule, keep the lifecycle of the session **separate and + external** from functions and objects that access and/or manipulate + database data. + +A :class:`.Session` is typically constructed at the beginning of a logical +operation where database access is potentially anticipated. + +The :class:`.Session`, whenever it is used to talk to the database, +begins a database transaction as soon as it starts communicating. +Assuming the ``autocommit`` flag is left at its recommended default +of ``False``, this transaction remains in progress until the :class:`.Session` +is rolled back, committed, or closed. The :class:`.Session` will +begin a new transaction if it is used again, subsequent to the previous +transaction ending; from this it follows that the :class:`.Session` +is capable of having a lifespan across many transactions, though only +one at a time. We refer to these two concepts as **transaction scope** +and **session scope**. + +The implication here is that the SQLAlchemy ORM is encouraging the +developer to establish these two scopes in their application, +including not only when the scopes begin and end, but also the +expanse of those scopes, for example should a single +:class:`.Session` instance be local to the execution flow within a +function or method, should it be a global object used by the +entire application, or somewhere in between these two. + +The burden placed on the developer to determine this scope is one +area where the SQLAlchemy ORM necessarily has a strong opinion +about how the database should be used. The :term:`unit of work` pattern +is specifically one of accumulating changes over time and flushing +them periodically, keeping in-memory state in sync with what's +known to be present in a local transaction. This pattern is only +effective when meaningful transaction scopes are in place. + +It's usually not very hard to determine the best points at which +to begin and end the scope of a :class:`.Session`, though the wide +variety of application architectures possible can introduce +challenging situations. + +A common choice is to tear down the :class:`.Session` at the same +time the transaction ends, meaning the transaction and session scopes +are the same. This is a great choice to start out with as it +removes the need to consider session scope as separate from transaction +scope. + +While there's no one-size-fits-all recommendation for how transaction +scope should be determined, there are common patterns. Especially +if one is writing a web application, the choice is pretty much established. + +A web application is the easiest case because such an appication is already +constructed around a single, consistent scope - this is the **request**, +which represents an incoming request from a browser, the processing +of that request to formulate a response, and finally the delivery of that +response back to the client. Integrating web applications with the +:class:`.Session` is then the straightforward task of linking the +scope of the :class:`.Session` to that of the request. The :class:`.Session` +can be established as the request begins, or using a :term:`lazy initialization` +pattern which establishes one as soon as it is needed. The request +then proceeds, with some system in place where application logic can access +the current :class:`.Session` in a manner associated with how the actual +request object is accessed. As the request ends, the :class:`.Session` +is torn down as well, usually through the usage of event hooks provided +by the web framework. The transaction used by the :class:`.Session` +may also be committed at this point, or alternatively the application may +opt for an explicit commit pattern, only committing for those requests +where one is warranted, but still always tearing down the :class:`.Session` +unconditionally at the end. + +Some web frameworks include infrastructure to assist in the task +of aligning the lifespan of a :class:`.Session` with that of a web request. +This includes products such as `Flask-SQLAlchemy <http://packages.python.org/Flask-SQLAlchemy/>`_, +for usage in conjunction with the Flask web framework, +and `Zope-SQLAlchemy <http://pypi.python.org/pypi/zope.sqlalchemy>`_, +typically used with the Pyramid framework. +SQLAlchemy recommends that these products be used as available. + +In those situations where the integration libraries are not +provided or are insufficient, SQLAlchemy includes its own "helper" class known as +:class:`.scoped_session`. A tutorial on the usage of this object +is at :ref:`unitofwork_contextual`. It provides both a quick way +to associate a :class:`.Session` with the current thread, as well as +patterns to associate :class:`.Session` objects with other kinds of +scopes. + +As mentioned before, for non-web applications there is no one clear +pattern, as applications themselves don't have just one pattern +of architecture. The best strategy is to attempt to demarcate +"operations", points at which a particular thread begins to perform +a series of operations for some period of time, which can be committed +at the end. Some examples: + +* A background daemon which spawns off child forks + would want to create a :class:`.Session` local to each child + process, work with that :class:`.Session` through the life of the "job" + that the fork is handling, then tear it down when the job is completed. + +* For a command-line script, the application would create a single, global + :class:`.Session` that is established when the program begins to do its + work, and commits it right as the program is completing its task. + +* For a GUI interface-driven application, the scope of the :class:`.Session` + may best be within the scope of a user-generated event, such as a button + push. Or, the scope may correspond to explicit user interaction, such as + the user "opening" a series of records, then "saving" them. + +As a general rule, the application should manage the lifecycle of the +session *externally* to functions that deal with specific data. This is a +fundamental separation of concerns which keeps data-specific operations +agnostic of the context in which they access and manipulate that data. + +E.g. **don't do this**:: + + ### this is the **wrong way to do it** ### + + class ThingOne(object): + def go(self): + session = Session() + try: + session.query(FooBar).update({"x": 5}) + session.commit() + except: + session.rollback() + raise + + class ThingTwo(object): + def go(self): + session = Session() + try: + session.query(Widget).update({"q": 18}) + session.commit() + except: + session.rollback() + raise + + def run_my_program(): + ThingOne().go() + ThingTwo().go() + +Keep the lifecycle of the session (and usually the transaction) +**separate and external**:: + + ### this is a **better** (but not the only) way to do it ### + + class ThingOne(object): + def go(self, session): + session.query(FooBar).update({"x": 5}) + + class ThingTwo(object): + def go(self, session): + session.query(Widget).update({"q": 18}) + + def run_my_program(): + session = Session() + try: + ThingOne().go(session) + ThingTwo().go(session) + + session.commit() + except: + session.rollback() + raise + finally: + session.close() + +The advanced developer will try to keep the details of session, transaction +and exception management as far as possible from the details of the program +doing its work. For example, we can further separate concerns using a `context manager <http://docs.python.org/3/library/contextlib.html#contextlib.contextmanager>`_:: + + ### another way (but again *not the only way*) to do it ### + + from contextlib import contextmanager + + @contextmanager + def session_scope(): + """Provide a transactional scope around a series of operations.""" + session = Session() + try: + yield session + session.commit() + except: + session.rollback() + raise + finally: + session.close() + + + def run_my_program(): + with session_scope() as session: + ThingOne().go(session) + ThingTwo().go(session) + + +Is the Session a cache? +---------------------------------- + +Yeee...no. It's somewhat used as a cache, in that it implements the +:term:`identity map` pattern, and stores objects keyed to their primary key. +However, it doesn't do any kind of query caching. This means, if you say +``session.query(Foo).filter_by(name='bar')``, even if ``Foo(name='bar')`` +is right there, in the identity map, the session has no idea about that. +It has to issue SQL to the database, get the rows back, and then when it +sees the primary key in the row, *then* it can look in the local identity +map and see that the object is already there. It's only when you say +``query.get({some primary key})`` that the +:class:`~sqlalchemy.orm.session.Session` doesn't have to issue a query. + +Additionally, the Session stores object instances using a weak reference +by default. This also defeats the purpose of using the Session as a cache. + +The :class:`.Session` is not designed to be a +global object from which everyone consults as a "registry" of objects. +That's more the job of a **second level cache**. SQLAlchemy provides +a pattern for implementing second level caching using `dogpile.cache <http://dogpilecache.readthedocs.org/>`_, +via the :ref:`examples_caching` example. + +How can I get the :class:`~sqlalchemy.orm.session.Session` for a certain object? +------------------------------------------------------------------------------------ + +Use the :meth:`~.Session.object_session` classmethod +available on :class:`~sqlalchemy.orm.session.Session`:: + + session = Session.object_session(someobject) + +The newer :ref:`core_inspection_toplevel` system can also be used:: + + from sqlalchemy import inspect + session = inspect(someobject).session + +.. _session_faq_threadsafe: + +Is the session thread-safe? +------------------------------ + +The :class:`.Session` is very much intended to be used in a +**non-concurrent** fashion, which usually means in only one thread at a +time. + +The :class:`.Session` should be used in such a way that one +instance exists for a single series of operations within a single +transaction. One expedient way to get this effect is by associating +a :class:`.Session` with the current thread (see :ref:`unitofwork_contextual` +for background). Another is to use a pattern +where the :class:`.Session` is passed between functions and is otherwise +not shared with other threads. + +The bigger point is that you should not *want* to use the session +with multiple concurrent threads. That would be like having everyone at a +restaurant all eat from the same plate. The session is a local "workspace" +that you use for a specific set of tasks; you don't want to, or need to, +share that session with other threads who are doing some other task. + +Making sure the :class:`.Session` is only used in a single concurrent thread at a time +is called a "share nothing" approach to concurrency. But actually, not +sharing the :class:`.Session` implies a more significant pattern; it +means not just the :class:`.Session` object itself, but +also **all objects that are associated with that Session**, must be kept within +the scope of a single concurrent thread. The set of mapped +objects associated with a :class:`.Session` are essentially proxies for data +within database rows accessed over a database connection, and so just like +the :class:`.Session` itself, the whole +set of objects is really just a large-scale proxy for a database connection +(or connections). Ultimately, it's mostly the DBAPI connection itself that +we're keeping away from concurrent access; but since the :class:`.Session` +and all the objects associated with it are all proxies for that DBAPI connection, +the entire graph is essentially not safe for concurrent access. + +If there are in fact multiple threads participating +in the same task, then you may consider sharing the session and its objects between +those threads; however, in this extremely unusual scenario the application would +need to ensure that a proper locking scheme is implemented so that there isn't +*concurrent* access to the :class:`.Session` or its state. A more common approach +to this situation is to maintain a single :class:`.Session` per concurrent thread, +but to instead *copy* objects from one :class:`.Session` to another, often +using the :meth:`.Session.merge` method to copy the state of an object into +a new object local to a different :class:`.Session`. + +Basics of Using a Session +=========================== + +The most basic :class:`.Session` use patterns are presented here. + +Querying +-------- + +The :meth:`~.Session.query` function takes one or more +*entities* and returns a new :class:`~sqlalchemy.orm.query.Query` object which +will issue mapper queries within the context of this Session. An entity is +defined as a mapped class, a :class:`~sqlalchemy.orm.mapper.Mapper` object, an +orm-enabled *descriptor*, or an ``AliasedClass`` object:: + + # query from a class + session.query(User).filter_by(name='ed').all() + + # query with multiple classes, returns tuples + session.query(User, Address).join('addresses').filter_by(name='ed').all() + + # query using orm-enabled descriptors + session.query(User.name, User.fullname).all() + + # query from a mapper + user_mapper = class_mapper(User) + session.query(user_mapper) + +When :class:`~sqlalchemy.orm.query.Query` returns results, each object +instantiated is stored within the identity map. When a row matches an object +which is already present, the same object is returned. In the latter case, +whether or not the row is populated onto an existing object depends upon +whether the attributes of the instance have been *expired* or not. A +default-configured :class:`~sqlalchemy.orm.session.Session` automatically +expires all instances along transaction boundaries, so that with a normally +isolated transaction, there shouldn't be any issue of instances representing +data which is stale with regards to the current transaction. + +The :class:`.Query` object is introduced in great detail in +:ref:`ormtutorial_toplevel`, and further documented in +:ref:`query_api_toplevel`. + +Adding New or Existing Items +---------------------------- + +:meth:`~.Session.add` is used to place instances in the +session. For *transient* (i.e. brand new) instances, this will have the effect +of an INSERT taking place for those instances upon the next flush. For +instances which are *persistent* (i.e. were loaded by this session), they are +already present and do not need to be added. Instances which are *detached* +(i.e. have been removed from a session) may be re-associated with a session +using this method:: + + user1 = User(name='user1') + user2 = User(name='user2') + session.add(user1) + session.add(user2) + + session.commit() # write changes to the database + +To add a list of items to the session at once, use +:meth:`~.Session.add_all`:: + + session.add_all([item1, item2, item3]) + +The :meth:`~.Session.add` operation **cascades** along +the ``save-update`` cascade. For more details see the section +:ref:`unitofwork_cascades`. + + +Deleting +-------- + +The :meth:`~.Session.delete` method places an instance +into the Session's list of objects to be marked as deleted:: + + # mark two objects to be deleted + session.delete(obj1) + session.delete(obj2) + + # commit (or flush) + session.commit() + +.. _session_deleting_from_collections: + +Deleting from Collections +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A common confusion that arises regarding :meth:`~.Session.delete` is when +objects which are members of a collection are being deleted. While the +collection member is marked for deletion from the database, this does not +impact the collection itself in memory until the collection is expired. +Below, we illustrate that even after an ``Address`` object is marked +for deletion, it's still present in the collection associated with the +parent ``User``, even after a flush:: + + >>> address = user.addresses[1] + >>> session.delete(address) + >>> session.flush() + >>> address in user.addresses + True + +When the above session is committed, all attributes are expired. The next +access of ``user.addresses`` will re-load the collection, revealing the +desired state:: + + >>> session.commit() + >>> address in user.addresses + False + +The usual practice of deleting items within collections is to forego the usage +of :meth:`~.Session.delete` directly, and instead use cascade behavior to +automatically invoke the deletion as a result of removing the object from +the parent collection. The ``delete-orphan`` cascade accomplishes this, +as illustrated in the example below:: + + mapper(User, users_table, properties={ + 'addresses':relationship(Address, cascade="all, delete, delete-orphan") + }) + del user.addresses[1] + session.flush() + +Where above, upon removing the ``Address`` object from the ``User.addresses`` +collection, the ``delete-orphan`` cascade has the effect of marking the ``Address`` +object for deletion in the same way as passing it to :meth:`~.Session.delete`. + +See also :ref:`unitofwork_cascades` for detail on cascades. + +Deleting based on Filter Criterion +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The caveat with ``Session.delete()`` is that you need to have an object handy +already in order to delete. The Query includes a +:func:`~sqlalchemy.orm.query.Query.delete` method which deletes based on +filtering criteria:: + + session.query(User).filter(User.id==7).delete() + +The ``Query.delete()`` method includes functionality to "expire" objects +already in the session which match the criteria. However it does have some +caveats, including that "delete" and "delete-orphan" cascades won't be fully +expressed for collections which are already loaded. See the API docs for +:meth:`~sqlalchemy.orm.query.Query.delete` for more details. + +.. _session_flushing: + +Flushing +-------- + +When the :class:`~sqlalchemy.orm.session.Session` is used with its default +configuration, the flush step is nearly always done transparently. +Specifically, the flush occurs before any individual +:class:`~sqlalchemy.orm.query.Query` is issued, as well as within the +:meth:`~.Session.commit` call before the transaction is +committed. It also occurs before a SAVEPOINT is issued when +:meth:`~.Session.begin_nested` is used. + +Regardless of the autoflush setting, a flush can always be forced by issuing +:meth:`~.Session.flush`:: + + session.flush() + +The "flush-on-Query" aspect of the behavior can be disabled by constructing +:class:`.sessionmaker` with the flag ``autoflush=False``:: + + Session = sessionmaker(autoflush=False) + +Additionally, autoflush can be temporarily disabled by setting the +``autoflush`` flag at any time:: + + mysession = Session() + mysession.autoflush = False + +Some autoflush-disable recipes are available at `DisableAutoFlush +<http://www.sqlalchemy.org/trac/wiki/UsageRecipes/DisableAutoflush>`_. + +The flush process *always* occurs within a transaction, even if the +:class:`~sqlalchemy.orm.session.Session` has been configured with +``autocommit=True``, a setting that disables the session's persistent +transactional state. If no transaction is present, +:meth:`~.Session.flush` creates its own transaction and +commits it. Any failures during flush will always result in a rollback of +whatever transaction is present. If the Session is not in ``autocommit=True`` +mode, an explicit call to :meth:`~.Session.rollback` is +required after a flush fails, even though the underlying transaction will have +been rolled back already - this is so that the overall nesting pattern of +so-called "subtransactions" is consistently maintained. + +.. _session_committing: + +Committing +---------- + +:meth:`~.Session.commit` is used to commit the current +transaction. It always issues :meth:`~.Session.flush` +beforehand to flush any remaining state to the database; this is independent +of the "autoflush" setting. If no transaction is present, it raises an error. +Note that the default behavior of the :class:`~sqlalchemy.orm.session.Session` +is that a "transaction" is always present; this behavior can be disabled by +setting ``autocommit=True``. In autocommit mode, a transaction can be +initiated by calling the :meth:`~.Session.begin` method. + +.. note:: + + The term "transaction" here refers to a transactional + construct within the :class:`.Session` itself which may be + maintaining zero or more actual database (DBAPI) transactions. An individual + DBAPI connection begins participation in the "transaction" as it is first + used to execute a SQL statement, then remains present until the session-level + "transaction" is completed. See :ref:`unitofwork_transaction` for + further detail. + +Another behavior of :meth:`~.Session.commit` is that by +default it expires the state of all instances present after the commit is +complete. This is so that when the instances are next accessed, either through +attribute access or by them being present in a +:class:`~sqlalchemy.orm.query.Query` result set, they receive the most recent +state. To disable this behavior, configure +:class:`.sessionmaker` with ``expire_on_commit=False``. + +Normally, instances loaded into the :class:`~sqlalchemy.orm.session.Session` +are never changed by subsequent queries; the assumption is that the current +transaction is isolated so the state most recently loaded is correct as long +as the transaction continues. Setting ``autocommit=True`` works against this +model to some degree since the :class:`~sqlalchemy.orm.session.Session` +behaves in exactly the same way with regard to attribute state, except no +transaction is present. + +.. _session_rollback: + +Rolling Back +------------ + +:meth:`~.Session.rollback` rolls back the current +transaction. With a default configured session, the post-rollback state of the +session is as follows: + + * All transactions are rolled back and all connections returned to the + connection pool, unless the Session was bound directly to a Connection, in + which case the connection is still maintained (but still rolled back). + * Objects which were initially in the *pending* state when they were added + to the :class:`~sqlalchemy.orm.session.Session` within the lifespan of the + transaction are expunged, corresponding to their INSERT statement being + rolled back. The state of their attributes remains unchanged. + * Objects which were marked as *deleted* within the lifespan of the + transaction are promoted back to the *persistent* state, corresponding to + their DELETE statement being rolled back. Note that if those objects were + first *pending* within the transaction, that operation takes precedence + instead. + * All objects not expunged are fully expired. + +With that state understood, the :class:`~sqlalchemy.orm.session.Session` may +safely continue usage after a rollback occurs. + +When a :meth:`~.Session.flush` fails, typically for +reasons like primary key, foreign key, or "not nullable" constraint +violations, a :meth:`~.Session.rollback` is issued +automatically (it's currently not possible for a flush to continue after a +partial failure). However, the flush process always uses its own transactional +demarcator called a *subtransaction*, which is described more fully in the +docstrings for :class:`~sqlalchemy.orm.session.Session`. What it means here is +that even though the database transaction has been rolled back, the end user +must still issue :meth:`~.Session.rollback` to fully +reset the state of the :class:`~sqlalchemy.orm.session.Session`. + + +Closing +------- + +The :meth:`~.Session.close` method issues a +:meth:`~.Session.expunge_all`, and :term:`releases` any +transactional/connection resources. When connections are returned to the +connection pool, transactional state is rolled back as well. + + diff --git a/doc/build/orm/session_state_management.rst b/doc/build/orm/session_state_management.rst new file mode 100644 index 000000000..1ca7ca2e4 --- /dev/null +++ b/doc/build/orm/session_state_management.rst @@ -0,0 +1,560 @@ +State Management +================ + +.. _session_object_states: + +Quickie Intro to Object States +------------------------------ + +It's helpful to know the states which an instance can have within a session: + +* **Transient** - an instance that's not in a session, and is not saved to the + database; i.e. it has no database identity. The only relationship such an + object has to the ORM is that its class has a ``mapper()`` associated with + it. + +* **Pending** - when you :meth:`~.Session.add` a transient + instance, it becomes pending. It still wasn't actually flushed to the + database yet, but it will be when the next flush occurs. + +* **Persistent** - An instance which is present in the session and has a record + in the database. You get persistent instances by either flushing so that the + pending instances become persistent, or by querying the database for + existing instances (or moving persistent instances from other sessions into + your local session). + +* **Detached** - an instance which has a record in the database, but is not in + any session. There's nothing wrong with this, and you can use objects + normally when they're detached, **except** they will not be able to issue + any SQL in order to load collections or attributes which are not yet loaded, + or were marked as "expired". + +Knowing these states is important, since the +:class:`.Session` tries to be strict about ambiguous +operations (such as trying to save the same object to two different sessions +at the same time). + +Getting the Current State of an Object +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The actual state of any mapped object can be viewed at any time using +the :func:`.inspect` system:: + + >>> from sqlalchemy import inspect + >>> insp = inspect(my_object) + >>> insp.persistent + True + +.. seealso:: + + :attr:`.InstanceState.transient` + + :attr:`.InstanceState.pending` + + :attr:`.InstanceState.persistent` + + :attr:`.InstanceState.detached` + + +Session Attributes +------------------ + +The :class:`~sqlalchemy.orm.session.Session` itself acts somewhat like a +set-like collection. All items present may be accessed using the iterator +interface:: + + for obj in session: + print obj + +And presence may be tested for using regular "contains" semantics:: + + if obj in session: + print "Object is present" + +The session is also keeping track of all newly created (i.e. pending) objects, +all objects which have had changes since they were last loaded or saved (i.e. +"dirty"), and everything that's been marked as deleted:: + + # pending objects recently added to the Session + session.new + + # persistent objects which currently have changes detected + # (this collection is now created on the fly each time the property is called) + session.dirty + + # persistent objects that have been marked as deleted via session.delete(obj) + session.deleted + + # dictionary of all persistent objects, keyed on their + # identity key + session.identity_map + +(Documentation: :attr:`.Session.new`, :attr:`.Session.dirty`, +:attr:`.Session.deleted`, :attr:`.Session.identity_map`). + +Note that objects within the session are by default *weakly referenced*. This +means that when they are dereferenced in the outside application, they fall +out of scope from within the :class:`~sqlalchemy.orm.session.Session` as well +and are subject to garbage collection by the Python interpreter. The +exceptions to this include objects which are pending, objects which are marked +as deleted, or persistent objects which have pending changes on them. After a +full flush, these collections are all empty, and all objects are again weakly +referenced. To disable the weak referencing behavior and force all objects +within the session to remain until explicitly expunged, configure +:class:`.sessionmaker` with the ``weak_identity_map=False`` +setting. + +.. _unitofwork_merging: + +Merging +------- + +:meth:`~.Session.merge` transfers state from an +outside object into a new or already existing instance within a session. It +also reconciles the incoming data against the state of the +database, producing a history stream which will be applied towards the next +flush, or alternatively can be made to produce a simple "transfer" of +state without producing change history or accessing the database. Usage is as follows:: + + merged_object = session.merge(existing_object) + +When given an instance, it follows these steps: + +* It examines the primary key of the instance. If it's present, it attempts + to locate that instance in the local identity map. If the ``load=True`` + flag is left at its default, it also checks the database for this primary + key if not located locally. +* If the given instance has no primary key, or if no instance can be found + with the primary key given, a new instance is created. +* The state of the given instance is then copied onto the located/newly + created instance. For attributes which are present on the source + instance, the value is transferred to the target instance. For mapped + attributes which aren't present on the source, the attribute is + expired on the target instance, discarding its existing value. + + If the ``load=True`` flag is left at its default, + this copy process emits events and will load the target object's + unloaded collections for each attribute present on the source object, + so that the incoming state can be reconciled against what's + present in the database. If ``load`` + is passed as ``False``, the incoming data is "stamped" directly without + producing any history. +* The operation is cascaded to related objects and collections, as + indicated by the ``merge`` cascade (see :ref:`unitofwork_cascades`). +* The new instance is returned. + +With :meth:`~.Session.merge`, the given "source" +instance is not modified nor is it associated with the target :class:`.Session`, +and remains available to be merged with any number of other :class:`.Session` +objects. :meth:`~.Session.merge` is useful for +taking the state of any kind of object structure without regard for its +origins or current session associations and copying its state into a +new session. Here's some examples: + +* An application which reads an object structure from a file and wishes to + save it to the database might parse the file, build up the + structure, and then use + :meth:`~.Session.merge` to save it + to the database, ensuring that the data within the file is + used to formulate the primary key of each element of the + structure. Later, when the file has changed, the same + process can be re-run, producing a slightly different + object structure, which can then be ``merged`` in again, + and the :class:`~sqlalchemy.orm.session.Session` will + automatically update the database to reflect those + changes, loading each object from the database by primary key and + then updating its state with the new state given. + +* An application is storing objects in an in-memory cache, shared by + many :class:`.Session` objects simultaneously. :meth:`~.Session.merge` + is used each time an object is retrieved from the cache to create + a local copy of it in each :class:`.Session` which requests it. + The cached object remains detached; only its state is moved into + copies of itself that are local to individual :class:`~.Session` + objects. + + In the caching use case, it's common to use the ``load=False`` + flag to remove the overhead of reconciling the object's state + with the database. There's also a "bulk" version of + :meth:`~.Session.merge` called :meth:`~.Query.merge_result` + that was designed to work with cache-extended :class:`.Query` + objects - see the section :ref:`examples_caching`. + +* An application wants to transfer the state of a series of objects + into a :class:`.Session` maintained by a worker thread or other + concurrent system. :meth:`~.Session.merge` makes a copy of each object + to be placed into this new :class:`.Session`. At the end of the operation, + the parent thread/process maintains the objects it started with, + and the thread/worker can proceed with local copies of those objects. + + In the "transfer between threads/processes" use case, the application + may want to use the ``load=False`` flag as well to avoid overhead and + redundant SQL queries as the data is transferred. + +Merge Tips +~~~~~~~~~~ + +:meth:`~.Session.merge` is an extremely useful method for many purposes. However, +it deals with the intricate border between objects that are transient/detached and +those that are persistent, as well as the automated transference of state. +The wide variety of scenarios that can present themselves here often require a +more careful approach to the state of objects. Common problems with merge usually involve +some unexpected state regarding the object being passed to :meth:`~.Session.merge`. + +Lets use the canonical example of the User and Address objects:: + + class User(Base): + __tablename__ = 'user' + + id = Column(Integer, primary_key=True) + name = Column(String(50), nullable=False) + addresses = relationship("Address", backref="user") + + class Address(Base): + __tablename__ = 'address' + + id = Column(Integer, primary_key=True) + email_address = Column(String(50), nullable=False) + user_id = Column(Integer, ForeignKey('user.id'), nullable=False) + +Assume a ``User`` object with one ``Address``, already persistent:: + + >>> u1 = User(name='ed', addresses=[Address(email_address='ed@ed.com')]) + >>> session.add(u1) + >>> session.commit() + +We now create ``a1``, an object outside the session, which we'd like +to merge on top of the existing ``Address``:: + + >>> existing_a1 = u1.addresses[0] + >>> a1 = Address(id=existing_a1.id) + +A surprise would occur if we said this:: + + >>> a1.user = u1 + >>> a1 = session.merge(a1) + >>> session.commit() + sqlalchemy.orm.exc.FlushError: New instance <Address at 0x1298f50> + with identity key (<class '__main__.Address'>, (1,)) conflicts with + persistent instance <Address at 0x12a25d0> + +Why is that ? We weren't careful with our cascades. The assignment +of ``a1.user`` to a persistent object cascaded to the backref of ``User.addresses`` +and made our ``a1`` object pending, as though we had added it. Now we have +*two* ``Address`` objects in the session:: + + >>> a1 = Address() + >>> a1.user = u1 + >>> a1 in session + True + >>> existing_a1 in session + True + >>> a1 is existing_a1 + False + +Above, our ``a1`` is already pending in the session. The +subsequent :meth:`~.Session.merge` operation essentially +does nothing. Cascade can be configured via the :paramref:`~.relationship.cascade` +option on :func:`.relationship`, although in this case it +would mean removing the ``save-update`` cascade from the +``User.addresses`` relationship - and usually, that behavior +is extremely convenient. The solution here would usually be to not assign +``a1.user`` to an object already persistent in the target +session. + +The ``cascade_backrefs=False`` option of :func:`.relationship` +will also prevent the ``Address`` from +being added to the session via the ``a1.user = u1`` assignment. + +Further detail on cascade operation is at :ref:`unitofwork_cascades`. + +Another example of unexpected state:: + + >>> a1 = Address(id=existing_a1.id, user_id=u1.id) + >>> assert a1.user is None + >>> True + >>> a1 = session.merge(a1) + >>> session.commit() + sqlalchemy.exc.IntegrityError: (IntegrityError) address.user_id + may not be NULL + +Here, we accessed a1.user, which returned its default value +of ``None``, which as a result of this access, has been placed in the ``__dict__`` of +our object ``a1``. Normally, this operation creates no change event, +so the ``user_id`` attribute takes precedence during a +flush. But when we merge the ``Address`` object into the session, the operation +is equivalent to:: + + >>> existing_a1.id = existing_a1.id + >>> existing_a1.user_id = u1.id + >>> existing_a1.user = None + +Where above, both ``user_id`` and ``user`` are assigned to, and change events +are emitted for both. The ``user`` association +takes precedence, and None is applied to ``user_id``, causing a failure. + +Most :meth:`~.Session.merge` issues can be examined by first checking - +is the object prematurely in the session ? + +.. sourcecode:: python+sql + + >>> a1 = Address(id=existing_a1, user_id=user.id) + >>> assert a1 not in session + >>> a1 = session.merge(a1) + +Or is there state on the object that we don't want ? Examining ``__dict__`` +is a quick way to check:: + + >>> a1 = Address(id=existing_a1, user_id=user.id) + >>> a1.user + >>> a1.__dict__ + {'_sa_instance_state': <sqlalchemy.orm.state.InstanceState object at 0x1298d10>, + 'user_id': 1, + 'id': 1, + 'user': None} + >>> # we don't want user=None merged, remove it + >>> del a1.user + >>> a1 = session.merge(a1) + >>> # success + >>> session.commit() + +Expunging +--------- + +Expunge removes an object from the Session, sending persistent instances to +the detached state, and pending instances to the transient state: + +.. sourcecode:: python+sql + + session.expunge(obj1) + +To remove all items, call :meth:`~.Session.expunge_all` +(this method was formerly known as ``clear()``). + +.. _session_expire: + +Refreshing / Expiring +--------------------- + +:term:`Expiring` means that the database-persisted data held inside a series +of object attributes is erased, in such a way that when those attributes +are next accessed, a SQL query is emitted which will refresh that data from +the database. + +When we talk about expiration of data we are usually talking about an object +that is in the :term:`persistent` state. For example, if we load an object +as follows:: + + user = session.query(User).filter_by(name='user1').first() + +The above ``User`` object is persistent, and has a series of attributes +present; if we were to look inside its ``__dict__``, we'd see that state +loaded:: + + >>> user.__dict__ + { + 'id': 1, 'name': u'user1', + '_sa_instance_state': <...>, + } + +where ``id`` and ``name`` refer to those columns in the database. +``_sa_instance_state`` is a non-database-persisted value used by SQLAlchemy +internally (it refers to the :class:`.InstanceState` for the instance. +While not directly relevant to this section, if we want to get at it, +we should use the :func:`.inspect` function to access it). + +At this point, the state in our ``User`` object matches that of the loaded +database row. But upon expiring the object using a method such as +:meth:`.Session.expire`, we see that the state is removed:: + + >>> session.expire(user) + >>> user.__dict__ + {'_sa_instance_state': <...>} + +We see that while the internal "state" still hangs around, the values which +correspond to the ``id`` and ``name`` columns are gone. If we were to access +one of these columns and are watching SQL, we'd see this: + +.. sourcecode:: python+sql + + >>> print(user.name) + {opensql}SELECT user.id AS user_id, user.name AS user_name + FROM user + WHERE user.id = ? + (1,) + {stop}user1 + +Above, upon accessing the expired attribute ``user.name``, the ORM initiated +a :term:`lazy load` to retrieve the most recent state from the database, +by emitting a SELECT for the user row to which this user refers. Afterwards, +the ``__dict__`` is again populated:: + + >>> user.__dict__ + { + 'id': 1, 'name': u'user1', + '_sa_instance_state': <...>, + } + +.. note:: While we are peeking inside of ``__dict__`` in order to see a bit + of what SQLAlchemy does with object attributes, we **should not modify** + the contents of ``__dict__`` directly, at least as far as those attributes + which the SQLAlchemy ORM is maintaining (other attributes outside of SQLA's + realm are fine). This is because SQLAlchemy uses :term:`descriptors` in + order to track the changes we make to an object, and when we modify ``__dict__`` + directly, the ORM won't be able to track that we changed something. + +Another key behavior of both :meth:`~.Session.expire` and :meth:`~.Session.refresh` +is that all un-flushed changes on an object are discarded. That is, +if we were to modify an attribute on our ``User``:: + + >>> user.name = 'user2' + +but then we call :meth:`~.Session.expire` without first calling :meth:`~.Session.flush`, +our pending value of ``'user2'`` is discarded:: + + >>> session.expire(user) + >>> user.name + 'user1' + +The :meth:`~.Session.expire` method can be used to mark as "expired" all ORM-mapped +attributes for an instance:: + + # expire all ORM-mapped attributes on obj1 + session.expire(obj1) + +it can also be passed a list of string attribute names, referring to specific +attributes to be marked as expired:: + + # expire only attributes obj1.attr1, obj1.attr2 + session.expire(obj1, ['attr1', 'attr2']) + +The :meth:`~.Session.refresh` method has a similar interface, but instead +of expiring, it emits an immediate SELECT for the object's row immediately:: + + # reload all attributes on obj1 + session.refresh(obj1) + +:meth:`~.Session.refresh` also accepts a list of string attribute names, +but unlike :meth:`~.Session.expire`, expects at least one name to +be that of a column-mapped attribute:: + + # reload obj1.attr1, obj1.attr2 + session.refresh(obj1, ['attr1', 'attr2']) + +The :meth:`.Session.expire_all` method allows us to essentially call +:meth:`.Session.expire` on all objects contained within the :class:`.Session` +at once:: + + session.expire_all() + +What Actually Loads +~~~~~~~~~~~~~~~~~~~ + +The SELECT statement that's emitted when an object marked with :meth:`~.Session.expire` +or loaded with :meth:`~.Session.refresh` varies based on several factors, including: + +* The load of expired attributes is triggered from **column-mapped attributes only**. + While any kind of attribute can be marked as expired, including a + :func:`.relationship` - mapped attribute, accessing an expired :func:`.relationship` + attribute will emit a load only for that attribute, using standard + relationship-oriented lazy loading. Column-oriented attributes, even if + expired, will not load as part of this operation, and instead will load when + any column-oriented attribute is accessed. + +* :func:`.relationship`- mapped attributes will not load in response to + expired column-based attributes being accessed. + +* Regarding relationships, :meth:`~.Session.refresh` is more restrictive than + :meth:`~.Session.expire` with regards to attributes that aren't column-mapped. + Calling :meth:`.refresh` and passing a list of names that only includes + relationship-mapped attributes will actually raise an error. + In any case, non-eager-loading :func:`.relationship` attributes will not be + included in any refresh operation. + +* :func:`.relationship` attributes configured as "eager loading" via the + :paramref:`~.relationship.lazy` parameter will load in the case of + :meth:`~.Session.refresh`, if either no attribute names are specified, or + if their names are inclued in the list of attributes to be + refreshed. + +* Attributes that are configured as :func:`.deferred` will not normally load, + during either the expired-attribute load or during a refresh. + An unloaded attribute that's :func:`.deferred` instead loads on its own when directly + accessed, or if part of a "group" of deferred attributes where an unloaded + attribute in that group is accessed. + +* For expired attributes that are loaded on access, a joined-inheritance table + mapping will emit a SELECT that typically only includes those tables for which + unloaded attributes are present. The action here is sophisticated enough + to load only the parent or child table, for example, if the subset of columns + that were originally expired encompass only one or the other of those tables. + +* When :meth:`~.Session.refresh` is used on a joined-inheritance table mapping, + the SELECT emitted will resemble that of when :meth:`.Session.query` is + used on the target object's class. This is typically all those tables that + are set up as part of the mapping. + + +When to Expire or Refresh +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The :class:`.Session` uses the expiration feature automatically whenever +the transaction referred to by the session ends. Meaning, whenever :meth:`.Session.commit` +or :meth:`.Session.rollback` is called, all objects within the :class:`.Session` +are expired, using a feature equivalent to that of the :meth:`.Session.expire_all` +method. The rationale is that the end of a transaction is a +demarcating point at which there is no more context available in order to know +what the current state of the database is, as any number of other transactions +may be affecting it. Only when a new transaction starts can we again have access +to the current state of the database, at which point any number of changes +may have occurred. + +.. sidebar:: Transaction Isolation + + Of course, most databases are capable of handling + multiple transactions at once, even involving the same rows of data. When + a relational database handles multiple transactions involving the same + tables or rows, this is when the :term:`isolation` aspect of the database comes + into play. The isolation behavior of different databases varies considerably + and even on a single database can be configured to behave in different ways + (via the so-called :term:`isolation level` setting). In that sense, the :class:`.Session` + can't fully predict when the same SELECT statement, emitted a second time, + will definitely return the data we already have, or will return new data. + So as a best guess, it assumes that within the scope of a transaction, unless + it is known that a SQL expression has been emitted to modify a particular row, + there's no need to refresh a row unless explicitly told to do so. + +The :meth:`.Session.expire` and :meth:`.Session.refresh` methods are used in +those cases when one wants to force an object to re-load its data from the +database, in those cases when it is known that the current state of data +is possibly stale. Reasons for this might include: + +* some SQL has been emitted within the transaction outside of the + scope of the ORM's object handling, such as if a :meth:`.Table.update` construct + were emitted using the :meth:`.Session.execute` method; + +* if the application + is attempting to acquire data that is known to have been modified in a + concurrent transaction, and it is also known that the isolation rules in effect + allow this data to be visible. + +The second bullet has the important caveat that "it is also known that the isolation rules in effect +allow this data to be visible." This means that it cannot be assumed that an +UPDATE that happened on another database connection will yet be visible here +locally; in many cases, it will not. This is why if one wishes to use +:meth:`.expire` or :meth:`.refresh` in order to view data between ongoing +transactions, an understanding of the isolation behavior in effect is essential. + +.. seealso:: + + :meth:`.Session.expire` + + :meth:`.Session.expire_all` + + :meth:`.Session.refresh` + + :term:`isolation` - glossary explanation of isolation which includes links + to Wikipedia. + + `The SQLAlchemy Session In-Depth <http://techspot.zzzeek.org/2012/11/14/pycon-canada-the-sqlalchemy-session-in-depth/>`_ - a video + slides with an in-depth discussion of the object + lifecycle including the role of data expiration. diff --git a/doc/build/orm/session_transaction.rst b/doc/build/orm/session_transaction.rst new file mode 100644 index 000000000..ce5757dd0 --- /dev/null +++ b/doc/build/orm/session_transaction.rst @@ -0,0 +1,365 @@ +======================================= +Transactions and Connection Management +======================================= + +.. _unitofwork_transaction: + +Managing Transactions +===================== + +A newly constructed :class:`.Session` may be said to be in the "begin" state. +In this state, the :class:`.Session` has not established any connection or +transactional state with any of the :class:`.Engine` objects that may be associated +with it. + +The :class:`.Session` then receives requests to operate upon a database connection. +Typically, this means it is called upon to execute SQL statements using a particular +:class:`.Engine`, which may be via :meth:`.Session.query`, :meth:`.Session.execute`, +or within a flush operation of pending data, which occurs when such state exists +and :meth:`.Session.commit` or :meth:`.Session.flush` is called. + +As these requests are received, each new :class:`.Engine` encountered is associated +with an ongoing transactional state maintained by the :class:`.Session`. +When the first :class:`.Engine` is operated upon, the :class:`.Session` can be said +to have left the "begin" state and entered "transactional" state. For each +:class:`.Engine` encountered, a :class:`.Connection` is associated with it, +which is acquired via the :meth:`.Engine.contextual_connect` method. If a +:class:`.Connection` was directly associated with the :class:`.Session` (see :ref:`session_external_transaction` +for an example of this), it is +added to the transactional state directly. + +For each :class:`.Connection`, the :class:`.Session` also maintains a :class:`.Transaction` object, +which is acquired by calling :meth:`.Connection.begin` on each :class:`.Connection`, +or if the :class:`.Session` +object has been established using the flag ``twophase=True``, a :class:`.TwoPhaseTransaction` +object acquired via :meth:`.Connection.begin_twophase`. These transactions are all committed or +rolled back corresponding to the invocation of the +:meth:`.Session.commit` and :meth:`.Session.rollback` methods. A commit operation will +also call the :meth:`.TwoPhaseTransaction.prepare` method on all transactions if applicable. + +When the transactional state is completed after a rollback or commit, the :class:`.Session` +:term:`releases` all :class:`.Transaction` and :class:`.Connection` resources, +and goes back to the "begin" state, which +will again invoke new :class:`.Connection` and :class:`.Transaction` objects as new +requests to emit SQL statements are received. + +The example below illustrates this lifecycle:: + + engine = create_engine("...") + Session = sessionmaker(bind=engine) + + # new session. no connections are in use. + session = Session() + try: + # first query. a Connection is acquired + # from the Engine, and a Transaction + # started. + item1 = session.query(Item).get(1) + + # second query. the same Connection/Transaction + # are used. + item2 = session.query(Item).get(2) + + # pending changes are created. + item1.foo = 'bar' + item2.bar = 'foo' + + # commit. The pending changes above + # are flushed via flush(), the Transaction + # is committed, the Connection object closed + # and discarded, the underlying DBAPI connection + # returned to the connection pool. + session.commit() + except: + # on rollback, the same closure of state + # as that of commit proceeds. + session.rollback() + raise + +.. _session_begin_nested: + +Using SAVEPOINT +--------------- + +SAVEPOINT transactions, if supported by the underlying engine, may be +delineated using the :meth:`~.Session.begin_nested` +method:: + + Session = sessionmaker() + session = Session() + session.add(u1) + session.add(u2) + + session.begin_nested() # establish a savepoint + session.add(u3) + session.rollback() # rolls back u3, keeps u1 and u2 + + session.commit() # commits u1 and u2 + +:meth:`~.Session.begin_nested` may be called any number +of times, which will issue a new SAVEPOINT with a unique identifier for each +call. For each :meth:`~.Session.begin_nested` call, a +corresponding :meth:`~.Session.rollback` or +:meth:`~.Session.commit` must be issued. (But note that if the return value is +used as a context manager, i.e. in a with-statement, then this rollback/commit +is issued by the context manager upon exiting the context, and so should not be +added explicitly.) + +When :meth:`~.Session.begin_nested` is called, a +:meth:`~.Session.flush` is unconditionally issued +(regardless of the ``autoflush`` setting). This is so that when a +:meth:`~.Session.rollback` occurs, the full state of the +session is expired, thus causing all subsequent attribute/instance access to +reference the full state of the :class:`~sqlalchemy.orm.session.Session` right +before :meth:`~.Session.begin_nested` was called. + +:meth:`~.Session.begin_nested`, in the same manner as the less often +used :meth:`~.Session.begin` method, returns a transactional object +which also works as a context manager. +It can be succinctly used around individual record inserts in order to catch +things like unique constraint exceptions:: + + for record in records: + try: + with session.begin_nested(): + session.merge(record) + except: + print "Skipped record %s" % record + session.commit() + +.. _session_autocommit: + +Autocommit Mode +--------------- + +The example of :class:`.Session` transaction lifecycle illustrated at +the start of :ref:`unitofwork_transaction` applies to a :class:`.Session` configured in the +default mode of ``autocommit=False``. Constructing a :class:`.Session` +with ``autocommit=True`` produces a :class:`.Session` placed into "autocommit" mode, where each SQL statement +invoked by a :meth:`.Session.query` or :meth:`.Session.execute` occurs +using a new connection from the connection pool, discarding it after +results have been iterated. The :meth:`.Session.flush` operation +still occurs within the scope of a single transaction, though this transaction +is closed out after the :meth:`.Session.flush` operation completes. + +.. warning:: + + "autocommit" mode should **not be considered for general use**. + If used, it should always be combined with the usage of + :meth:`.Session.begin` and :meth:`.Session.commit`, to ensure + a transaction demarcation. + + Executing queries outside of a demarcated transaction is a legacy mode + of usage, and can in some cases lead to concurrent connection + checkouts. + + In the absence of a demarcated transaction, the :class:`.Session` + cannot make appropriate decisions as to when autoflush should + occur nor when auto-expiration should occur, so these features + should be disabled with ``autoflush=False, expire_on_commit=False``. + +Modern usage of "autocommit" is for framework integrations that need to control +specifically when the "begin" state occurs. A session which is configured with +``autocommit=True`` may be placed into the "begin" state using the +:meth:`.Session.begin` method. +After the cycle completes upon :meth:`.Session.commit` or :meth:`.Session.rollback`, +connection and transaction resources are :term:`released` and the :class:`.Session` +goes back into "autocommit" mode, until :meth:`.Session.begin` is called again:: + + Session = sessionmaker(bind=engine, autocommit=True) + session = Session() + session.begin() + try: + item1 = session.query(Item).get(1) + item2 = session.query(Item).get(2) + item1.foo = 'bar' + item2.bar = 'foo' + session.commit() + except: + session.rollback() + raise + +The :meth:`.Session.begin` method also returns a transactional token which is +compatible with the Python 2.6 ``with`` statement:: + + Session = sessionmaker(bind=engine, autocommit=True) + session = Session() + with session.begin(): + item1 = session.query(Item).get(1) + item2 = session.query(Item).get(2) + item1.foo = 'bar' + item2.bar = 'foo' + +.. _session_subtransactions: + +Using Subtransactions with Autocommit +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +A subtransaction indicates usage of the :meth:`.Session.begin` method in conjunction with +the ``subtransactions=True`` flag. This produces a non-transactional, delimiting construct that +allows nesting of calls to :meth:`~.Session.begin` and :meth:`~.Session.commit`. +Its purpose is to allow the construction of code that can function within a transaction +both independently of any external code that starts a transaction, +as well as within a block that has already demarcated a transaction. + +``subtransactions=True`` is generally only useful in conjunction with +autocommit, and is equivalent to the pattern described at :ref:`connections_nested_transactions`, +where any number of functions can call :meth:`.Connection.begin` and :meth:`.Transaction.commit` +as though they are the initiator of the transaction, but in fact may be participating +in an already ongoing transaction:: + + # method_a starts a transaction and calls method_b + def method_a(session): + session.begin(subtransactions=True) + try: + method_b(session) + session.commit() # transaction is committed here + except: + session.rollback() # rolls back the transaction + raise + + # method_b also starts a transaction, but when + # called from method_a participates in the ongoing + # transaction. + def method_b(session): + session.begin(subtransactions=True) + try: + session.add(SomeObject('bat', 'lala')) + session.commit() # transaction is not committed yet + except: + session.rollback() # rolls back the transaction, in this case + # the one that was initiated in method_a(). + raise + + # create a Session and call method_a + session = Session(autocommit=True) + method_a(session) + session.close() + +Subtransactions are used by the :meth:`.Session.flush` process to ensure that the +flush operation takes place within a transaction, regardless of autocommit. When +autocommit is disabled, it is still useful in that it forces the :class:`.Session` +into a "pending rollback" state, as a failed flush cannot be resumed in mid-operation, +where the end user still maintains the "scope" of the transaction overall. + +.. _session_twophase: + +Enabling Two-Phase Commit +------------------------- + +For backends which support two-phase operaration (currently MySQL and +PostgreSQL), the session can be instructed to use two-phase commit semantics. +This will coordinate the committing of transactions across databases so that +the transaction is either committed or rolled back in all databases. You can +also :meth:`~.Session.prepare` the session for +interacting with transactions not managed by SQLAlchemy. To use two phase +transactions set the flag ``twophase=True`` on the session:: + + engine1 = create_engine('postgresql://db1') + engine2 = create_engine('postgresql://db2') + + Session = sessionmaker(twophase=True) + + # bind User operations to engine 1, Account operations to engine 2 + Session.configure(binds={User:engine1, Account:engine2}) + + session = Session() + + # .... work with accounts and users + + # commit. session will issue a flush to all DBs, and a prepare step to all DBs, + # before committing both transactions + session.commit() + +.. _session_external_transaction: + +Joining a Session into an External Transaction (such as for test suites) +======================================================================== + +If a :class:`.Connection` is being used which is already in a transactional +state (i.e. has a :class:`.Transaction` established), a :class:`.Session` can +be made to participate within that transaction by just binding the +:class:`.Session` to that :class:`.Connection`. The usual rationale for this +is a test suite that allows ORM code to work freely with a :class:`.Session`, +including the ability to call :meth:`.Session.commit`, where afterwards the +entire database interaction is rolled back:: + + from sqlalchemy.orm import sessionmaker + from sqlalchemy import create_engine + from unittest import TestCase + + # global application scope. create Session class, engine + Session = sessionmaker() + + engine = create_engine('postgresql://...') + + class SomeTest(TestCase): + def setUp(self): + # connect to the database + self.connection = engine.connect() + + # begin a non-ORM transaction + self.trans = self.connection.begin() + + # bind an individual Session to the connection + self.session = Session(bind=self.connection) + + def test_something(self): + # use the session in tests. + + self.session.add(Foo()) + self.session.commit() + + def tearDown(self): + self.session.close() + + # rollback - everything that happened with the + # Session above (including calls to commit()) + # is rolled back. + self.trans.rollback() + + # return connection to the Engine + self.connection.close() + +Above, we issue :meth:`.Session.commit` as well as +:meth:`.Transaction.rollback`. This is an example of where we take advantage +of the :class:`.Connection` object's ability to maintain *subtransactions*, or +nested begin/commit-or-rollback pairs where only the outermost begin/commit +pair actually commits the transaction, or if the outermost block rolls back, +everything is rolled back. + +.. topic:: Supporting Tests with Rollbacks + + The above recipe works well for any kind of database enabled test, except + for a test that needs to actually invoke :meth:`.Session.rollback` within + the scope of the test itself. The above recipe can be expanded, such + that the :class:`.Session` always runs all operations within the scope + of a SAVEPOINT, which is established at the start of each transaction, + so that tests can also rollback the "transaction" as well while still + remaining in the scope of a larger "transaction" that's never committed, + using two extra events:: + + from sqlalchemy import event + + class SomeTest(TestCase): + def setUp(self): + # connect to the database + self.connection = engine.connect() + + # begin a non-ORM transaction + self.trans = connection.begin() + + # bind an individual Session to the connection + self.session = Session(bind=self.connection) + + # start the session in a SAVEPOINT... + self.session.begin_nested() + + # then each time that SAVEPOINT ends, reopen it + @event.listens_for(self.session, "after_transaction_end") + def restart_savepoint(session, transaction): + if transaction.nested and not transaction._parent.nested: + session.begin_nested() + + + # ... the tearDown() method stays the same diff --git a/doc/build/orm/versioning.rst b/doc/build/orm/versioning.rst new file mode 100644 index 000000000..35304086d --- /dev/null +++ b/doc/build/orm/versioning.rst @@ -0,0 +1,253 @@ +.. _mapper_version_counter: + +Configuring a Version Counter +============================= + +The :class:`.Mapper` supports management of a :term:`version id column`, which +is a single table column that increments or otherwise updates its value +each time an ``UPDATE`` to the mapped table occurs. This value is checked each +time the ORM emits an ``UPDATE`` or ``DELETE`` against the row to ensure that +the value held in memory matches the database value. + +.. warning:: + + Because the versioning feature relies upon comparison of the **in memory** + record of an object, the feature only applies to the :meth:`.Session.flush` + process, where the ORM flushes individual in-memory rows to the database. + It does **not** take effect when performing + a multirow UPDATE or DELETE using :meth:`.Query.update` or :meth:`.Query.delete` + methods, as these methods only emit an UPDATE or DELETE statement but otherwise + do not have direct access to the contents of those rows being affected. + +The purpose of this feature is to detect when two concurrent transactions +are modifying the same row at roughly the same time, or alternatively to provide +a guard against the usage of a "stale" row in a system that might be re-using +data from a previous transaction without refreshing (e.g. if one sets ``expire_on_commit=False`` +with a :class:`.Session`, it is possible to re-use the data from a previous +transaction). + +.. topic:: Concurrent transaction updates + + When detecting concurrent updates within transactions, it is typically the + case that the database's transaction isolation level is below the level of + :term:`repeatable read`; otherwise, the transaction will not be exposed + to a new row value created by a concurrent update which conflicts with + the locally updated value. In this case, the SQLAlchemy versioning + feature will typically not be useful for in-transaction conflict detection, + though it still can be used for cross-transaction staleness detection. + + The database that enforces repeatable reads will typically either have locked the + target row against a concurrent update, or is employing some form + of multi version concurrency control such that it will emit an error + when the transaction is committed. SQLAlchemy's version_id_col is an alternative + which allows version tracking to occur for specific tables within a transaction + that otherwise might not have this isolation level set. + + .. seealso:: + + `Repeatable Read Isolation Level <http://www.postgresql.org/docs/9.1/static/transaction-iso.html#XACT-REPEATABLE-READ>`_ - Postgresql's implementation of repeatable read, including a description of the error condition. + +Simple Version Counting +----------------------- + +The most straightforward way to track versions is to add an integer column +to the mapped table, then establish it as the ``version_id_col`` within the +mapper options:: + + class User(Base): + __tablename__ = 'user' + + id = Column(Integer, primary_key=True) + version_id = Column(Integer, nullable=False) + name = Column(String(50), nullable=False) + + __mapper_args__ = { + "version_id_col": version_id + } + +Above, the ``User`` mapping tracks integer versions using the column +``version_id``. When an object of type ``User`` is first flushed, the +``version_id`` column will be given a value of "1". Then, an UPDATE +of the table later on will always be emitted in a manner similar to the +following:: + + UPDATE user SET version_id=:version_id, name=:name + WHERE user.id = :user_id AND user.version_id = :user_version_id + {"name": "new name", "version_id": 2, "user_id": 1, "user_version_id": 1} + +The above UPDATE statement is updating the row that not only matches +``user.id = 1``, it also is requiring that ``user.version_id = 1``, where "1" +is the last version identifier we've been known to use on this object. +If a transaction elsewhere has modified the row independently, this version id +will no longer match, and the UPDATE statement will report that no rows matched; +this is the condition that SQLAlchemy tests, that exactly one row matched our +UPDATE (or DELETE) statement. If zero rows match, that indicates our version +of the data is stale, and a :exc:`.StaleDataError` is raised. + +.. _custom_version_counter: + +Custom Version Counters / Types +------------------------------- + +Other kinds of values or counters can be used for versioning. Common types include +dates and GUIDs. When using an alternate type or counter scheme, SQLAlchemy +provides a hook for this scheme using the ``version_id_generator`` argument, +which accepts a version generation callable. This callable is passed the value of the current +known version, and is expected to return the subsequent version. + +For example, if we wanted to track the versioning of our ``User`` class +using a randomly generated GUID, we could do this (note that some backends +support a native GUID type, but we illustrate here using a simple string):: + + import uuid + + class User(Base): + __tablename__ = 'user' + + id = Column(Integer, primary_key=True) + version_uuid = Column(String(32)) + name = Column(String(50), nullable=False) + + __mapper_args__ = { + 'version_id_col':version_uuid, + 'version_id_generator':lambda version: uuid.uuid4().hex + } + +The persistence engine will call upon ``uuid.uuid4()`` each time a +``User`` object is subject to an INSERT or an UPDATE. In this case, our +version generation function can disregard the incoming value of ``version``, +as the ``uuid4()`` function +generates identifiers without any prerequisite value. If we were using +a sequential versioning scheme such as numeric or a special character system, +we could make use of the given ``version`` in order to help determine the +subsequent value. + +.. seealso:: + + :ref:`custom_guid_type` + +.. _server_side_version_counter: + +Server Side Version Counters +---------------------------- + +The ``version_id_generator`` can also be configured to rely upon a value +that is generated by the database. In this case, the database would need +some means of generating new identifiers when a row is subject to an INSERT +as well as with an UPDATE. For the UPDATE case, typically an update trigger +is needed, unless the database in question supports some other native +version identifier. The Postgresql database in particular supports a system +column called `xmin <http://www.postgresql.org/docs/9.1/static/ddl-system-columns.html>`_ +which provides UPDATE versioning. We can make use +of the Postgresql ``xmin`` column to version our ``User`` +class as follows:: + + class User(Base): + __tablename__ = 'user' + + id = Column(Integer, primary_key=True) + name = Column(String(50), nullable=False) + xmin = Column("xmin", Integer, system=True) + + __mapper_args__ = { + 'version_id_col': xmin, + 'version_id_generator': False + } + +With the above mapping, the ORM will rely upon the ``xmin`` column for +automatically providing the new value of the version id counter. + +.. topic:: creating tables that refer to system columns + + In the above scenario, as ``xmin`` is a system column provided by Postgresql, + we use the ``system=True`` argument to mark it as a system-provided + column, omitted from the ``CREATE TABLE`` statement. + + +The ORM typically does not actively fetch the values of database-generated +values when it emits an INSERT or UPDATE, instead leaving these columns as +"expired" and to be fetched when they are next accessed, unless the ``eager_defaults`` +:func:`.mapper` flag is set. However, when a +server side version column is used, the ORM needs to actively fetch the newly +generated value. This is so that the version counter is set up *before* +any concurrent transaction may update it again. This fetching is also +best done simultaneously within the INSERT or UPDATE statement using :term:`RETURNING`, +otherwise if emitting a SELECT statement afterwards, there is still a potential +race condition where the version counter may change before it can be fetched. + +When the target database supports RETURNING, an INSERT statement for our ``User`` class will look +like this:: + + INSERT INTO "user" (name) VALUES (%(name)s) RETURNING "user".id, "user".xmin + {'name': 'ed'} + +Where above, the ORM can acquire any newly generated primary key values along +with server-generated version identifiers in one statement. When the backend +does not support RETURNING, an additional SELECT must be emitted for **every** +INSERT and UPDATE, which is much less efficient, and also introduces the possibility of +missed version counters:: + + INSERT INTO "user" (name) VALUES (%(name)s) + {'name': 'ed'} + + SELECT "user".version_id AS user_version_id FROM "user" where + "user".id = :param_1 + {"param_1": 1} + +It is *strongly recommended* that server side version counters only be used +when absolutely necessary and only on backends that support :term:`RETURNING`, +e.g. Postgresql, Oracle, SQL Server (though SQL Server has +`major caveats <http://blogs.msdn.com/b/sqlprogrammability/archive/2008/07/11/update-with-output-clause-triggers-and-sqlmoreresults.aspx>`_ when triggers are used), Firebird. + +.. versionadded:: 0.9.0 + + Support for server side version identifier tracking. + +Programmatic or Conditional Version Counters +--------------------------------------------- + +When ``version_id_generator`` is set to False, we can also programmatically +(and conditionally) set the version identifier on our object in the same way +we assign any other mapped attribute. Such as if we used our UUID example, but +set ``version_id_generator`` to ``False``, we can set the version identifier +at our choosing:: + + import uuid + + class User(Base): + __tablename__ = 'user' + + id = Column(Integer, primary_key=True) + version_uuid = Column(String(32)) + name = Column(String(50), nullable=False) + + __mapper_args__ = { + 'version_id_col':version_uuid, + 'version_id_generator': False + } + + u1 = User(name='u1', version_uuid=uuid.uuid4()) + + session.add(u1) + + session.commit() + + u1.name = 'u2' + u1.version_uuid = uuid.uuid4() + + session.commit() + +We can update our ``User`` object without incrementing the version counter +as well; the value of the counter will remain unchanged, and the UPDATE +statement will still check against the previous value. This may be useful +for schemes where only certain classes of UPDATE are sensitive to concurrency +issues:: + + # will leave version_uuid unchanged + u1.name = 'u3' + session.commit() + +.. versionadded:: 0.9.0 + + Support for programmatic and conditional version identifier tracking. + diff --git a/doc/build/requirements.txt b/doc/build/requirements.txt index 34f031b0b..3f87e68ea 100644 --- a/doc/build/requirements.txt +++ b/doc/build/requirements.txt @@ -1,3 +1,3 @@ -mako changelog>=0.3.4 sphinx-paramlinks>=0.2.2 +git+https://bitbucket.org/zzzeek/zzzeeksphinx.git diff --git a/doc/build/static/detectmobile.js b/doc/build/static/detectmobile.js deleted file mode 100644 index f86b2d650..000000000 --- a/doc/build/static/detectmobile.js +++ /dev/null @@ -1,7 +0,0 @@ -/** - * jQuery.browser.mobile (http://detectmobilebrowser.com/) - * - * jQuery.browser.mobile will be true if the browser is a mobile device - * - **/ -(function(a){(jQuery.browser=jQuery.browser||{}).mobile=/(android|bb\d+|meego).+mobile|avantgo|bada\/|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|mobile.+firefox|netfront|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\/|plucker|pocket|psp|series(4|6)0|symbian|treo|up\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino/i.test(a)||/1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\-(n|u)|c55\/|capi|ccwa|cdm\-|cell|chtm|cldc|cmd\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\-s|devi|dica|dmob|do(c|p)o|ds(12|\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\-|_)|g1 u|g560|gene|gf\-5|g\-mo|go(\.w|od)|gr(ad|un)|haie|hcit|hd\-(m|p|t)|hei\-|hi(pt|ta)|hp( i|ip)|hs\-c|ht(c(\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\-(20|go|ma)|i230|iac( |\-|\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\/)|klon|kpt |kwc\-|kyo(c|k)|le(no|xi)|lg( g|\/(k|l|u)|50|54|\-[a-w])|libw|lynx|m1\-w|m3ga|m50\/|ma(te|ui|xo)|mc(01|21|ca)|m\-cr|me(rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\-2|po(ck|rt|se)|prox|psio|pt\-g|qa\-a|qc(07|12|21|32|60|\-[2-7]|i\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\-|oo|p\-)|sdk\/|se(c(\-|0|1)|47|mc|nd|ri)|sgh\-|shar|sie(\-|m)|sk\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\-|v\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\-|tdg\-|tel(i|m)|tim\-|t\-mo|to(pl|sh)|ts(70|m\-|m3|m5)|tx\-9|up(\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|yas\-|your|zeto|zte\-/i.test(a.substr(0,4))})(navigator.userAgent||navigator.vendor||window.opera);
\ No newline at end of file diff --git a/doc/build/static/docs.css b/doc/build/static/docs.css deleted file mode 100644 index e854d34c2..000000000 --- a/doc/build/static/docs.css +++ /dev/null @@ -1,673 +0,0 @@ -/* global */ - -.body-background { - background-color: #FDFBFC; -} - -body { - background-color: #FDFBFC; - margin:0 38px; - color:#333333; -} - -a { - font-weight:normal; - text-decoration:none; -} - -form { - display:inline; -} - -/* hyperlinks */ - -a:link, a:visited, a:active { - /*color:#0000FF;*/ - color: #990000; -} -a:hover { - color: #FF0000; - /*color:#700000;*/ - text-decoration:underline; -} - -/* paragraph links after sections. - These aren't visible until hovering - over the <h> tag, then have a - "reverse video" effect over the actual - link - */ - -a.headerlink { - font-size: 0.8em; - padding: 0 4px 0 4px; - text-decoration: none; - visibility: hidden; -} - -h1:hover > a.headerlink, -h2:hover > a.headerlink, -h3:hover > a.headerlink, -h4:hover > a.headerlink, -h5:hover > a.headerlink, -h6:hover > a.headerlink, -dt:hover > a.headerlink { - visibility: visible; -} - -a.headerlink:hover { - background-color: #990000; - color: white; -} - - -/* Container setup */ - -#docs-container { - max-width:1000px; - margin: 0 auto; - position: relative; -} - - -/* header/footer elements */ - -#docs-header h1 { - font-size:20px; - color: #222222; - margin: 0; - padding: 0; -} - -#docs-header { - font-family:Verdana,sans-serif; - - font-size:.9em; - position: relative; -} - -#docs-sidebar-popout, -#docs-bottom-navigation, -#index-nav { - font-family: Verdana, sans-serif; - background-color: #FBFBEE; - border: solid 1px #CCC; - font-size:.8em; -} - -#docs-bottom-navigation, -#index-nav { - padding:10px; -} - -#docs-sidebar-popout { - font-size:.75em; -} - -#docs-sidebar-popout p, -#docs-sidebar-popout form { - margin:5px 0 5px 0px; -} - -#docs-sidebar-popout h3 { - margin:0 0 10px 0; -} - - -#docs-version-header { - position: absolute; - right: 0; - bottom: 0; -} - -.docs-navigation-links { - font-family:Verdana,sans-serif; -} - -#docs-bottom-navigation { - float:right; - margin: 1em 0 1em 5px; -} - -#docs-copyright { - font-size:.85em; - padding:5px 0px; -} - -#docs-header h1, -#docs-top-navigation h1, -#docs-top-navigation h2 { - font-family:Tahoma,Geneva,sans-serif; - font-weight:normal; -} - -#docs-top-navigation h2 { - margin:16px 4px 7px 5px; - font-size:1.6em; -} - -#docs-top-page-control { - position: absolute; - right: 20px; - bottom: 14px; -} - -#docs-top-page-control ul { - padding:0; - margin:0; -} - -#docs-top-page-control li { - font-size:.9em; - list-style-type:none; - padding:1px 8px; -} - - -#docs-container .version-num { - font-weight: bold; -} - - -/* content container, sidebar */ - -#docs-body-container { -} - -#docs-body, -#docs-sidebar, -#index-nav - { - /*font-family: helvetica, arial, sans-serif; - font-size:.9em;*/ - - font-family: Verdana, sans-serif; - font-size:.85em; - line-height:1.5em; - -} - -#docs-body { - min-height: 700px; -} - -#docs-sidebar > ul { - font-size:.85em; -} - -#fixed-sidebar { - position: relative; -} - -#fixed-sidebar.withsidebar { - float: left; - width:224px; -} - -#fixed-sidebar.preautomated { - position: fixed; - float: none; - top:0; - bottom: 0; -} - -#fixed-sidebar.automated { - position: fixed; - float: none; - top: 120px; - min-height: 0; -} - - -#docs-sidebar { - font-size:.85em; - - border: solid 1px #CCC; - - z-index: 3; - background-color: #EFEFEF; -} - -#index-nav { - position: relative; - margin-top:10px; - padding:0 10px; -} - -#index-nav form { - padding-top:10px; - float:right; -} - -#sidebar-paginate { - position: absolute; - bottom: 4.5em; - left: 10px; -} - -#sidebar-topnav { - position: absolute; - bottom: 3em; - left: 10px; -} - -#sidebar-search { - position: absolute; - bottom: 1em; - left: 10px; -} - -#docs-sidebar { - top: 132px; - bottom: 0; - min-height: 0; - overflow-y: auto; - margin-top:5px; - width:212px; - padding-left:10px; -} - -#docs-sidebar-popout { - height:120px; - max-height: 120px; - width:212px; - padding-left:10px; - padding-top:10px; - position: relative; -} - - -#fixed-sidebar.preautomated #docs-sidebar, -#fixed-sidebar.preautomated #docs-sidebar-popout { - position:absolute; -} - -#fixed-sidebar.preautomated #docs-sidebar:after { - content: " "; - display:block; - height: 150px; -} - - -#docs-sidebar.preautomated { - position: fixed; -} - -#docs-sidebar.automated { - position: fixed; - float: none; - top: 120px; - min-height: 0; -} - - -#docs-sidebar h3, #docs-sidebar h4 { - background-color: #DDDDDD; - color: #222222; - font-family: Verdana,sans-serif; - font-size: 1.1em; - font-weight: normal; - margin: 10px 0 0 -15px; - padding: 5px 10px 5px 15px; - text-shadow: 1px 1px 0 white; - /*width:210px;*/ -} - -#docs-sidebar h3:first-child { - margin-top: 0px; -} - -#docs-sidebar h3 a, #docs-sidebar h4 a { - color: #222222; -} -#docs-sidebar ul { - margin: 10px 10px 10px 0px; - padding: 0; - list-style: none outside none; -} - - -#docs-sidebar ul ul { - margin-bottom: 0; - margin-top: 0; - list-style: square outside none; - margin-left: 20px; -} - - - - -#docs-body { - background-color:#FFFFFF; - padding:1px 10px 10px 10px; - - border: solid 1px #CCC; - margin-top:10px; -} - -#docs-body.withsidebar { - margin-left: 230px; -} - - -#docs-body h1, -#docs-body h2, -#docs-body h3, -#docs-body h4 { - font-family:Helvetica, Arial, sans-serif; -} - -#docs-body #sqlalchemy-documentation h1 { - /* hide the <h1> for each content section. */ - display:none; - font-size:2.0em; -} - - -#docs-body h2 { - font-size:1.8em; - border-top:1px solid; - /*border-bottom:1px solid;*/ - padding-top:20px; -} - -#sqlalchemy-documentation h2 { - border-top:none; - padding-top:0; -} -#docs-body h3 { - font-size:1.4em; -} - -/* SQL popup, code styles */ - -.highlight { - background:none; -} - -#docs-container pre { - font-size:1.2em; -} - -#docs-container .pre { - font-size:1.1em; -} - -#docs-container pre { - background-color: #f0f0f0; - border: solid 1px #ccc; - box-shadow: 2px 2px 3px #DFDFDF; - padding:10px; - margin: 5px 0px 5px 0px; - overflow:auto; - line-height:1.3em; -} - -.popup_sql, .show_sql -{ - background-color: #FBFBEE; - padding:5px 10px; - margin:10px -5px; - border:1px dashed; -} - -/* the [SQL] links used to display SQL */ -#docs-container .sql_link -{ - font-weight:normal; - font-family: arial, sans-serif; - font-size:.9em; - text-transform: uppercase; - color:#990000; - border:1px solid; - padding:1px 2px 1px 2px; - margin:0px 10px 0px 15px; - float:right; - line-height:1.2em; -} - -#docs-container a.sql_link, -#docs-container .sql_link -{ - text-decoration: none; - padding:1px 2px; -} - -#docs-container a.sql_link:hover { - text-decoration: none; - color:#fff; - border:1px solid #900; - background-color: #900; -} - -/* changeset stuff */ - -#docs-container a.changeset-link { - font-size: 0.8em; - padding: 0 4px 0 4px; - text-decoration: none; -} - -/* docutils-specific elements */ - -th.field-name { - text-align:right; -} - -div.section { -} - -div.note, div.warning, p.deprecated, div.topic, div.admonition { - background-color:#EEFFEF; -} - -.footnote { - font-size: .95em; -} - -div.faq { - background-color: #EFEFEF; -} - -div.faq ul { - list-style: square outside none; -} - -div.admonition, div.topic, .deprecated, .versionadded, .versionchanged { - border:1px solid #CCCCCC; - padding:5px 10px; - font-size:.9em; - margin-top:5px; - box-shadow: 2px 2px 3px #DFDFDF; -} - -div.sidebar { - background-color: #FFFFEE; - border: 1px solid #DDDDBB; - float: right; - margin: 10px 0 10px 1em; - padding: 7px 7px 0; - width: 40%; - font-size:.9em; -} - -p.sidebar-title { - font-weight: bold; -} - -/* grrr sphinx changing your document structures, removing classes.... */ - -.versionadded .versionmodified, -.versionchanged .versionmodified, -.deprecated .versionmodified, -.versionadded > p:first-child > span:first-child, -.versionchanged > p:first-child > span:first-child, -.deprecated > p:first-child > span:first-child -{ - background-color: #ECF0F3; - color: #990000; - font-style: italic; -} - - -div.inherited-member { - border:1px solid #CCCCCC; - padding:5px 5px; - font-size:.9em; - box-shadow: 2px 2px 3px #DFDFDF; -} - -div.warning .admonition-title { - color:#FF0000; -} - -div.admonition .admonition-title, div.topic .topic-title { - font-weight:bold; -} - -.viewcode-back, .viewcode-link { - float:right; -} - -dl.function > dt, -dl.attribute > dt, -dl.classmethod > dt, -dl.method > dt, -dl.class > dt, -dl.exception > dt -{ - background-color: #EFEFEF; - margin:25px -10px 10px 10px; - padding: 0px 10px; -} - - -dl.glossary > dt { - font-weight:bold; - font-size:1.1em; - padding-top:10px; -} - - -dt:target, span.highlight { - background-color:#FBE54E; -} - -a.headerlink { - font-size: 0.8em; - padding: 0 4px 0 4px; - text-decoration: none; - visibility: hidden; -} - -h1:hover > a.headerlink, -h2:hover > a.headerlink, -h3:hover > a.headerlink, -h4:hover > a.headerlink, -h5:hover > a.headerlink, -h6:hover > a.headerlink, -dt:hover > a.headerlink { - visibility: visible; -} - -a.headerlink:hover { - background-color: #00f; - color: white; -} - -.clearboth { - clear:both; -} - -tt.descname { - background-color:transparent; - font-size:1.2em; - font-weight:bold; -} - -tt.descclassname { - background-color:transparent; -} - -tt { - background-color:#ECF0F3; - padding:0 1px; -} - -/* syntax highlighting overrides */ -.k, .kn {color:#0908CE;} -.o {color:#BF0005;} -.go {color:#804049;} - - -/* special "index page" sections - with specific formatting -*/ - -div#sqlalchemy-documentation { - font-size:.95em; -} -div#sqlalchemy-documentation em { - font-style:normal; -} -div#sqlalchemy-documentation .rubric{ - font-size:14px; - background-color:#EEFFEF; - padding:5px; - border:1px solid #BFBFBF; -} -div#sqlalchemy-documentation a, div#sqlalchemy-documentation li { - padding:5px 0px; -} - -div#getting-started { - border-bottom:1px solid; -} - -div#sqlalchemy-documentation div#sqlalchemy-orm { - float:left; - width:48%; -} - -div#sqlalchemy-documentation div#sqlalchemy-core { - float:left; - width:48%; - margin:0; - padding-left:10px; - border-left:1px solid; -} - -div#dialect-documentation { - border-top:1px solid; - /*clear:left;*/ -} - -div .versionwarning, -div .version-warning { - font-size:12px; - font-color:red; - border:1px solid; - padding:4px 4px; - margin:8px 0px 2px 0px; - background:#FFBBBB; -} - -/*div .event-signatures { - background-color:#F0F0FD; - padding:0 10px; - border:1px solid #BFBFBF; -}*/ - -/*dl div.floatything { - display:none; - position:fixed; - top:25px; - left:40px; - font-size:.95em; - font-weight: bold; - border:1px solid; - background-color: #FFF; -} -dl:hover div.floatything { - display:block; -}*/ diff --git a/doc/build/static/init.js b/doc/build/static/init.js deleted file mode 100644 index 4bcb4411d..000000000 --- a/doc/build/static/init.js +++ /dev/null @@ -1,44 +0,0 @@ - -function initSQLPopups() { - $('div.popup_sql').hide(); - $('a.sql_link').click(function() { - $(this).nextAll('div.popup_sql:first').toggle(); - return false; - }); -} - -var automatedBreakpoint = -1; - -function initFloatyThings() { - - automatedBreakpoint = $("#docs-container").position().top + $("#docs-top-navigation-container").height(); - - $("#fixed-sidebar.withsidebar").addClass("preautomated"); - - - function setScroll() { - - var scrolltop = $(window).scrollTop(); - if (scrolltop >= automatedBreakpoint) { - $("#fixed-sidebar.withsidebar").css("top", 5); - } - else { - $("#fixed-sidebar.withsidebar").css( - "top", $("#docs-body").offset().top - Math.max(scrolltop, 0)); - } - - - } - $(window).scroll(setScroll) - - setScroll(); -} - - -$(document).ready(function() { - initSQLPopups(); - if (!$.browser.mobile) { - initFloatyThings(); - } -}); - diff --git a/doc/build/templates/genindex.mako b/doc/build/templates/genindex.mako deleted file mode 100644 index 9ea6795bc..000000000 --- a/doc/build/templates/genindex.mako +++ /dev/null @@ -1,77 +0,0 @@ -<%inherit file="layout.mako"/> - -<%block name="show_title" filter="util.striptags"> - ${_('Index')} -</%block> - - <h1 id="index">${_('Index')}</h1> - - % for i, (key, dummy) in enumerate(genindexentries): - ${i != 0 and '| ' or ''}<a href="#${key}"><strong>${key}</strong></a> - % endfor - - <hr /> - - % for i, (key, entries) in enumerate(genindexentries): -<h2 id="${key}">${key}</h2> -<table width="100%" class="indextable genindextable"><tr><td width="33%" valign="top"> -<dl> - <% - breakat = genindexcounts[i] // 2 - numcols = 1 - numitems = 0 - %> -% for entryname, (links, subitems) in entries: - -<dt> - % if links: - <a href="${links[0][1]}">${entryname|h}</a> - % for unknown, link in links[1:]: - , <a href="${link}">[${i}]</a> - % endfor - % else: - ${entryname|h} - % endif -</dt> - - % if subitems: - <dd><dl> - % for subentryname, subentrylinks in subitems: - <dt><a href="${subentrylinks[0][1]}">${subentryname|h}</a> - % for j, (unknown, link) in enumerate(subentrylinks[1:]): - <a href="${link}">[${j}]</a> - % endfor - </dt> - % endfor - </dl></dd> - % endif - - <% - numitems = numitems + 1 + len(subitems) - %> - % if numcols <2 and numitems > breakat: - <% - numcols = numcols + 1 - %> - </dl></td><td width="33%" valign="top"><dl> - % endif - -% endfor -<dt></dt></dl> -</td></tr></table> -% endfor - -<%def name="sidebarrel()"> -% if split_index: - <h4>${_('Index')}</h4> - <p> - % for i, (key, dummy) in enumerate(genindexentries): - ${i > 0 and '| ' or ''} - <a href="${pathto('genindex-' + key)}"><strong>${key}</strong></a> - % endfor - </p> - - <p><a href="${pathto('genindex-all')}"><strong>${_('Full index on one page')}</strong></a></p> -% endif - ${parent.sidebarrel()} -</%def> diff --git a/doc/build/templates/layout.mako b/doc/build/templates/layout.mako deleted file mode 100644 index 23e57129b..000000000 --- a/doc/build/templates/layout.mako +++ /dev/null @@ -1,243 +0,0 @@ -## coding: utf-8 - -<%! - local_script_files = [] - - default_css_files = [ - '_static/pygments.css', - '_static/docs.css', - ] -%> - - -<%doc> - Structural elements are all prefixed with "docs-" - to prevent conflicts when the structure is integrated into the - main site. - - docs-container -> - docs-top-navigation-container -> - docs-header -> - docs-version-header - docs-top-navigation - docs-top-page-control - docs-navigation-banner - docs-body-container -> - docs-sidebar - docs-body - docs-bottom-navigation - docs-copyright -</%doc> - -<%inherit file="${context['base']}"/> - -<% - if builder == 'epub': - next.body() - return -%> - - -<% -withsidebar = bool(toc) and current_page_name != 'index' -%> - -<%block name="head_title"> - % if current_page_name != 'index': - ${capture(self.show_title) | util.striptags} — - % endif - ${docstitle|h} -</%block> - - -<div id="docs-container"> - - -<%block name="headers"> - - ${parent.headers()} - - <!-- begin layout.mako headers --> - - <script type="text/javascript"> - var DOCUMENTATION_OPTIONS = { - URL_ROOT: '${pathto("", 1)}', - VERSION: '${release|h}', - COLLAPSE_MODINDEX: false, - FILE_SUFFIX: '${file_suffix}' - }; - </script> - - <!-- begin iterate through sphinx environment script_files --> - % for scriptfile in script_files + self.attr.local_script_files: - <script type="text/javascript" src="${pathto(scriptfile, 1)}"></script> - % endfor - <!-- end iterate through sphinx environment script_files --> - - <script type="text/javascript" src="${pathto('_static/detectmobile.js', 1)}"></script> - <script type="text/javascript" src="${pathto('_static/init.js', 1)}"></script> - % if hasdoc('about'): - <link rel="author" title="${_('About these documents')}" href="${pathto('about')}" /> - % endif - <link rel="index" title="${_('Index')}" href="${pathto('genindex')}" /> - <link rel="search" title="${_('Search')}" href="${pathto('search')}" /> - % if hasdoc('copyright'): - <link rel="copyright" title="${_('Copyright')}" href="${pathto('copyright')}" /> - % endif - <link rel="top" title="${docstitle|h}" href="${pathto('index')}" /> - % if parents: - <link rel="up" title="${parents[-1]['title']|util.striptags}" href="${parents[-1]['link']|h}" /> - % endif - % if nexttopic: - <link rel="next" title="${nexttopic['title']|util.striptags}" href="${nexttopic['link']|h}" /> - % endif - % if prevtopic: - <link rel="prev" title="${prevtopic['title']|util.striptags}" href="${prevtopic['link']|h}" /> - % endif - <!-- end layout.mako headers --> - -</%block> - - -<div id="docs-top-navigation-container" class="body-background"> -<div id="docs-header"> - <div id="docs-version-header"> - Release: <span class="version-num">${release}</span> | Release Date: ${release_date} - </div> - - <h1>${docstitle|h}</h1> - -</div> -</div> - -<div id="docs-body-container"> - - <div id="fixed-sidebar" class="${'withsidebar' if withsidebar else ''}"> - - % if not withsidebar: - <div id="index-nav"> - <form class="search" action="${pathto('search')}" method="get"> - <input type="text" name="q" size="12" /> <input type="submit" value="${_('Search')}" /> - <input type="hidden" name="check_keywords" value="yes" /> - <input type="hidden" name="area" value="default" /> - </form> - - <p> - <a href="${pathto('index')}">Contents</a> | - <a href="${pathto('genindex')}">Index</a> - % if pdf_url: - | <a href="${pdf_url}">Download as PDF</a> - % endif - </p> - - </div> - % endif - - % if withsidebar: - <div id="docs-sidebar-popout"> - <h3><a href="${pathto('index')}">${docstitle|h}</a></h3> - - <p id="sidebar-paginate"> - % if parents: - <a href="${parents[-1]['link']|h}" title="${parents[-1]['title']}">Up</a> | - % else: - <a href="${pathto('index')}" title="${docstitle|h}">Up</a> | - % endif - - % if prevtopic: - <a href="${prevtopic['link']|h}" title="${prevtopic['title']}">Prev</a> | - % endif - % if nexttopic: - <a href="${nexttopic['link']|h}" title="${nexttopic['title']}">Next</a> - % endif - </p> - - <p id="sidebar-topnav"> - <a href="${pathto('index')}">Contents</a> | - <a href="${pathto('genindex')}">Index</a> - % if pdf_url: - | <a href="${pdf_url}">PDF</a> - % endif - </p> - - <div id="sidebar-search"> - <form class="search" action="${pathto('search')}" method="get"> - <input type="text" name="q" size="12" /> <input type="submit" value="${_('Search')}" /> - <input type="hidden" name="check_keywords" value="yes" /> - <input type="hidden" name="area" value="default" /> - </form> - </div> - - </div> - - <div id="docs-sidebar"> - - <h3><a href="#">\ - <%block name="show_title"> - ${title} - </%block> - </a></h3> - ${toc} - - % if rtd: - <h4>Project Versions</h4> - <ul class="version-listing"> - </ul> - % endif - - - </div> - % endif - - </div> - - <%doc> - <div id="docs-top-navigation"> - <a href="${pathto('index')}">${docstitle|h}</a> - % if parents: - % for parent in parents: - » <a href="${parent['link']|h}" title="${parent['title']}">${parent['title']}</a> - % endfor - % endif - % if current_page_name != 'index': - » ${self.show_title()} - % endif - - <h2> - <%block name="show_title"> - ${title} - </%block> - </h2> - - </div> - </%doc> - - <div id="docs-body" class="${'withsidebar' if withsidebar else ''}" > - ${next.body()} - </div> - -</div> - -<div id="docs-bottom-navigation" class="docs-navigation-links"> - % if prevtopic: - Previous: - <a href="${prevtopic['link']|h}" title="${_('previous chapter')}">${prevtopic['title']}</a> - % endif - % if nexttopic: - Next: - <a href="${nexttopic['link']|h}" title="${_('next chapter')}">${nexttopic['title']}</a> - % endif - - <div id="docs-copyright"> - % if hasdoc('copyright'): - © <a href="${pathto('copyright')}">Copyright</a> ${copyright|h}. - % else: - © Copyright ${copyright|h}. - % endif - % if show_sphinx: - Created using <a href="http://sphinx.pocoo.org/">Sphinx</a> ${sphinx_version|h}. - % endif - </div> -</div> - -</div> diff --git a/doc/build/templates/page.mako b/doc/build/templates/page.mako deleted file mode 100644 index e0f98cf64..000000000 --- a/doc/build/templates/page.mako +++ /dev/null @@ -1,2 +0,0 @@ -<%inherit file="layout.mako"/> -${body| util.strip_toplevel_anchors}
\ No newline at end of file diff --git a/doc/build/templates/search.mako b/doc/build/templates/search.mako deleted file mode 100644 index d0aa3d825..000000000 --- a/doc/build/templates/search.mako +++ /dev/null @@ -1,21 +0,0 @@ -<%inherit file="layout.mako"/> - -<%! - local_script_files = ['_static/searchtools.js'] -%> -<%block name="show_title"> - ${_('Search')} -</%block> - -<%block name="headers"> - ${parent.headers()} - <script type="text/javascript"> - jQuery(function() { Search.loadIndex("searchindex.js"); }); - </script> -</%block> - -<div id="search-results"></div> - -<%block name="footer"> - ${parent.footer()} -</%block> diff --git a/doc/build/templates/static_base.mako b/doc/build/templates/static_base.mako deleted file mode 100644 index 9eb5ec046..000000000 --- a/doc/build/templates/static_base.mako +++ /dev/null @@ -1,29 +0,0 @@ -<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" - "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> - -<html xmlns="http://www.w3.org/1999/xhtml"> - <head> - <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> - ${metatags and metatags or ''} - <title> - <%block name="head_title"> - </%block> - </title> - - <%block name="css"> - <!-- begin iterate through SQLA + sphinx environment css_files --> - % for cssfile in self.attr.default_css_files + css_files: - <link rel="stylesheet" href="${pathto(cssfile, 1)}" type="text/css" /> - % endfor - <!-- end iterate through SQLA + sphinx environment css_files --> - </%block> - - <%block name="headers"/> - </head> - <body> - ${next.body()} - <%block name="footer"/> - </body> -</html> - - |