summaryrefslogtreecommitdiff
path: root/doc/build/faq/performance.rst
diff options
context:
space:
mode:
Diffstat (limited to 'doc/build/faq/performance.rst')
-rw-r--r--doc/build/faq/performance.rst443
1 files changed, 443 insertions, 0 deletions
diff --git a/doc/build/faq/performance.rst b/doc/build/faq/performance.rst
new file mode 100644
index 000000000..8413cb5a2
--- /dev/null
+++ b/doc/build/faq/performance.rst
@@ -0,0 +1,443 @@
+.. _faq_performance:
+
+Performance
+===========
+
+.. contents::
+ :local:
+ :class: faq
+ :backlinks: none
+
+.. _faq_how_to_profile:
+
+How can I profile a SQLAlchemy powered application?
+---------------------------------------------------
+
+Looking for performance issues typically involves two stratgies. One
+is query profiling, and the other is code profiling.
+
+Query Profiling
+^^^^^^^^^^^^^^^^
+
+Sometimes just plain SQL logging (enabled via python's logging module
+or via the ``echo=True`` argument on :func:`.create_engine`) can give an
+idea how long things are taking. For example, if you log something
+right after a SQL operation, you'd see something like this in your
+log::
+
+ 17:37:48,325 INFO [sqlalchemy.engine.base.Engine.0x...048c] SELECT ...
+ 17:37:48,326 INFO [sqlalchemy.engine.base.Engine.0x...048c] {<params>}
+ 17:37:48,660 DEBUG [myapp.somemessage]
+
+if you logged ``myapp.somemessage`` right after the operation, you know
+it took 334ms to complete the SQL part of things.
+
+Logging SQL will also illustrate if dozens/hundreds of queries are
+being issued which could be better organized into much fewer queries.
+When using the SQLAlchemy ORM, the "eager loading"
+feature is provided to partially (:func:`.contains_eager()`) or fully
+(:func:`.joinedload()`, :func:`.subqueryload()`)
+automate this activity, but without
+the ORM "eager loading" typically means to use joins so that results across multiple
+tables can be loaded in one result set instead of multiplying numbers
+of queries as more depth is added (i.e. ``r + r*r2 + r*r2*r3`` ...)
+
+For more long-term profiling of queries, or to implement an application-side
+"slow query" monitor, events can be used to intercept cursor executions,
+using a recipe like the following::
+
+ from sqlalchemy import event
+ from sqlalchemy.engine import Engine
+ import time
+ import logging
+
+ logging.basicConfig()
+ logger = logging.getLogger("myapp.sqltime")
+ logger.setLevel(logging.DEBUG)
+
+ @event.listens_for(Engine, "before_cursor_execute")
+ def before_cursor_execute(conn, cursor, statement,
+ parameters, context, executemany):
+ conn.info.setdefault('query_start_time', []).append(time.time())
+ logger.debug("Start Query: %s", statement)
+
+ @event.listens_for(Engine, "after_cursor_execute")
+ def after_cursor_execute(conn, cursor, statement,
+ parameters, context, executemany):
+ total = time.time() - conn.info['query_start_time'].pop(-1)
+ logger.debug("Query Complete!")
+ logger.debug("Total Time: %f", total)
+
+Above, we use the :meth:`.ConnectionEvents.before_cursor_execute` and
+:meth:`.ConnectionEvents.after_cursor_execute` events to establish an interception
+point around when a statement is executed. We attach a timer onto the
+connection using the :class:`._ConnectionRecord.info` dictionary; we use a
+stack here for the occasional case where the cursor execute events may be nested.
+
+Code Profiling
+^^^^^^^^^^^^^^
+
+If logging reveals that individual queries are taking too long, you'd
+need a breakdown of how much time was spent within the database
+processing the query, sending results over the network, being handled
+by the :term:`DBAPI`, and finally being received by SQLAlchemy's result set
+and/or ORM layer. Each of these stages can present their own
+individual bottlenecks, depending on specifics.
+
+For that you need to use the
+`Python Profiling Module <https://docs.python.org/2/library/profile.html>`_.
+Below is a simple recipe which works profiling into a context manager::
+
+ import cProfile
+ import StringIO
+ import pstats
+ import contextlib
+
+ @contextlib.contextmanager
+ def profiled():
+ pr = cProfile.Profile()
+ pr.enable()
+ yield
+ pr.disable()
+ s = StringIO.StringIO()
+ ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
+ ps.print_stats()
+ # uncomment this to see who's calling what
+ # ps.print_callers()
+ print s.getvalue()
+
+To profile a section of code::
+
+ with profiled():
+ Session.query(FooClass).filter(FooClass.somevalue==8).all()
+
+The output of profiling can be used to give an idea where time is
+being spent. A section of profiling output looks like this::
+
+ 13726 function calls (13042 primitive calls) in 0.014 seconds
+
+ Ordered by: cumulative time
+
+ ncalls tottime percall cumtime percall filename:lineno(function)
+ 222/21 0.001 0.000 0.011 0.001 lib/sqlalchemy/orm/loading.py:26(instances)
+ 220/20 0.002 0.000 0.010 0.001 lib/sqlalchemy/orm/loading.py:327(_instance)
+ 220/20 0.000 0.000 0.010 0.000 lib/sqlalchemy/orm/loading.py:284(populate_state)
+ 20 0.000 0.000 0.010 0.000 lib/sqlalchemy/orm/strategies.py:987(load_collection_from_subq)
+ 20 0.000 0.000 0.009 0.000 lib/sqlalchemy/orm/strategies.py:935(get)
+ 1 0.000 0.000 0.009 0.009 lib/sqlalchemy/orm/strategies.py:940(_load)
+ 21 0.000 0.000 0.008 0.000 lib/sqlalchemy/orm/strategies.py:942(<genexpr>)
+ 2 0.000 0.000 0.004 0.002 lib/sqlalchemy/orm/query.py:2400(__iter__)
+ 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/orm/query.py:2414(_execute_and_instances)
+ 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/engine/base.py:659(execute)
+ 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/sql/elements.py:321(_execute_on_connection)
+ 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/engine/base.py:788(_execute_clauseelement)
+
+ ...
+
+Above, we can see that the ``instances()`` SQLAlchemy function was called 222
+times (recursively, and 21 times from the outside), taking a total of .011
+seconds for all calls combined.
+
+Execution Slowness
+^^^^^^^^^^^^^^^^^^
+
+The specifics of these calls can tell us where the time is being spent.
+If for example, you see time being spent within ``cursor.execute()``,
+e.g. against the DBAPI::
+
+ 2 0.102 0.102 0.204 0.102 {method 'execute' of 'sqlite3.Cursor' objects}
+
+this would indicate that the database is taking a long time to start returning
+results, and it means your query should be optimized, either by adding indexes
+or restructuring the query and/or underlying schema. For that task,
+analysis of the query plan is warranted, using a system such as EXPLAIN,
+SHOW PLAN, etc. as is provided by the database backend.
+
+Result Fetching Slowness - Core
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+If on the other hand you see many thousands of calls related to fetching rows,
+or very long calls to ``fetchall()``, it may
+mean your query is returning more rows than expected, or that the fetching
+of rows itself is slow. The ORM itself typically uses ``fetchall()`` to fetch
+rows (or ``fetchmany()`` if the :meth:`.Query.yield_per` option is used).
+
+An inordinately large number of rows would be indicated
+by a very slow call to ``fetchall()`` at the DBAPI level::
+
+ 2 0.300 0.600 0.300 0.600 {method 'fetchall' of 'sqlite3.Cursor' objects}
+
+An unexpectedly large number of rows, even if the ultimate result doesn't seem
+to have many rows, can be the result of a cartesian product - when multiple
+sets of rows are combined together without appropriately joining the tables
+together. It's often easy to produce this behavior with SQLAlchemy Core or
+ORM query if the wrong :class:`.Column` objects are used in a complex query,
+pulling in additional FROM clauses that are unexpected.
+
+On the other hand, a fast call to ``fetchall()`` at the DBAPI level, but then
+slowness when SQLAlchemy's :class:`.ResultProxy` is asked to do a ``fetchall()``,
+may indicate slowness in processing of datatypes, such as unicode conversions
+and similar::
+
+ # the DBAPI cursor is fast...
+ 2 0.020 0.040 0.020 0.040 {method 'fetchall' of 'sqlite3.Cursor' objects}
+
+ ...
+
+ # but SQLAlchemy's result proxy is slow, this is type-level processing
+ 2 0.100 0.200 0.100 0.200 lib/sqlalchemy/engine/result.py:778(fetchall)
+
+In some cases, a backend might be doing type-level processing that isn't
+needed. More specifically, seeing calls within the type API that are slow
+are better indicators - below is what it looks like when we use a type like
+this::
+
+ from sqlalchemy import TypeDecorator
+ import time
+
+ class Foo(TypeDecorator):
+ impl = String
+
+ def process_result_value(self, value, thing):
+ # intentionally add slowness for illustration purposes
+ time.sleep(.001)
+ return value
+
+the profiling output of this intentionally slow operation can be seen like this::
+
+ 200 0.001 0.000 0.237 0.001 lib/sqlalchemy/sql/type_api.py:911(process)
+ 200 0.001 0.000 0.236 0.001 test.py:28(process_result_value)
+ 200 0.235 0.001 0.235 0.001 {time.sleep}
+
+that is, we see many expensive calls within the ``type_api`` system, and the actual
+time consuming thing is the ``time.sleep()`` call.
+
+Make sure to check the :doc:`Dialect documentation <dialects/index>`
+for notes on known performance tuning suggestions at this level, especially for
+databases like Oracle. There may be systems related to ensuring numeric accuracy
+or string processing that may not be needed in all cases.
+
+There also may be even more low-level points at which row-fetching performance is suffering;
+for example, if time spent seems to focus on a call like ``socket.receive()``,
+that could indicate that everything is fast except for the actual network connection,
+and too much time is spent with data moving over the network.
+
+Result Fetching Slowness - ORM
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To detect slowness in ORM fetching of rows (which is the most common area
+of performance concern), calls like ``populate_state()`` and ``_instance()`` will
+illustrate individual ORM object populations::
+
+ # the ORM calls _instance for each ORM-loaded row it sees, and
+ # populate_state for each ORM-loaded row that results in the population
+ # of an object's attributes
+ 220/20 0.001 0.000 0.010 0.000 lib/sqlalchemy/orm/loading.py:327(_instance)
+ 220/20 0.000 0.000 0.009 0.000 lib/sqlalchemy/orm/loading.py:284(populate_state)
+
+The ORM's slowness in turning rows into ORM-mapped objects is a product
+of the complexity of this operation combined with the overhead of cPython.
+Common strategies to mitigate this include:
+
+* fetch individual columns instead of full entities, that is::
+
+ session.query(User.id, User.name)
+
+ instead of::
+
+ session.query(User)
+
+* Use :class:`.Bundle` objects to organize column-based results::
+
+ u_b = Bundle('user', User.id, User.name)
+ a_b = Bundle('address', Address.id, Address.email)
+
+ for user, address in session.query(u_b, a_b).join(User.addresses):
+ # ...
+
+* Use result caching - see :ref:`examples_caching` for an in-depth example
+ of this.
+
+* Consider a faster interpreter like that of Pypy.
+
+The output of a profile can be a little daunting but after some
+practice they are very easy to read.
+
+.. seealso::
+
+ :ref:`examples_performance` - a suite of performance demonstrations
+ with bundled profiling capabilities.
+
+I'm inserting 400,000 rows with the ORM and it's really slow!
+--------------------------------------------------------------
+
+The SQLAlchemy ORM uses the :term:`unit of work` pattern when synchronizing
+changes to the database. This pattern goes far beyond simple "inserts"
+of data. It includes that attributes which are assigned on objects are
+received using an attribute instrumentation system which tracks
+changes on objects as they are made, includes that all rows inserted
+are tracked in an identity map which has the effect that for each row
+SQLAlchemy must retrieve its "last inserted id" if not already given,
+and also involves that rows to be inserted are scanned and sorted for
+dependencies as needed. Objects are also subject to a fair degree of
+bookkeeping in order to keep all of this running, which for a very
+large number of rows at once can create an inordinate amount of time
+spent with large data structures, hence it's best to chunk these.
+
+Basically, unit of work is a large degree of automation in order to
+automate the task of persisting a complex object graph into a
+relational database with no explicit persistence code, and this
+automation has a price.
+
+ORMs are basically not intended for high-performance bulk inserts -
+this is the whole reason SQLAlchemy offers the Core in addition to the
+ORM as a first-class component.
+
+For the use case of fast bulk inserts, the
+SQL generation and execution system that the ORM builds on top of
+is part of the :doc:`Core <core/tutorial>`. Using this system directly, we can produce an INSERT that
+is competitive with using the raw database API directly.
+
+Alternatively, the SQLAlchemy ORM offers the :ref:`bulk_operations`
+suite of methods, which provide hooks into subsections of the unit of
+work process in order to emit Core-level INSERT and UPDATE constructs with
+a small degree of ORM-based automation.
+
+The example below illustrates time-based tests for several different
+methods of inserting rows, going from the most automated to the least.
+With cPython 2.7, runtimes observed::
+
+ classics-MacBook-Pro:sqlalchemy classic$ python test.py
+ SQLAlchemy ORM: Total time for 100000 records 12.0471920967 secs
+ SQLAlchemy ORM pk given: Total time for 100000 records 7.06283402443 secs
+ SQLAlchemy ORM bulk_save_objects(): Total time for 100000 records 0.856323003769 secs
+ SQLAlchemy Core: Total time for 100000 records 0.485800027847 secs
+ sqlite3: Total time for 100000 records 0.487842082977 sec
+
+We can reduce the time by a factor of three using recent versions of `Pypy <http://pypy.org/>`_::
+
+ classics-MacBook-Pro:sqlalchemy classic$ /usr/local/src/pypy-2.1-beta2-osx64/bin/pypy test.py
+ SQLAlchemy ORM: Total time for 100000 records 5.88369488716 secs
+ SQLAlchemy ORM pk given: Total time for 100000 records 3.52294301987 secs
+ SQLAlchemy Core: Total time for 100000 records 0.613556146622 secs
+ sqlite3: Total time for 100000 records 0.442467927933 sec
+
+Script::
+
+ import time
+ import sqlite3
+
+ from sqlalchemy.ext.declarative import declarative_base
+ from sqlalchemy import Column, Integer, String, create_engine
+ from sqlalchemy.orm import scoped_session, sessionmaker
+
+ Base = declarative_base()
+ DBSession = scoped_session(sessionmaker())
+ engine = None
+
+
+ class Customer(Base):
+ __tablename__ = "customer"
+ id = Column(Integer, primary_key=True)
+ name = Column(String(255))
+
+
+ def init_sqlalchemy(dbname='sqlite:///sqlalchemy.db'):
+ global engine
+ engine = create_engine(dbname, echo=False)
+ DBSession.remove()
+ DBSession.configure(bind=engine, autoflush=False, expire_on_commit=False)
+ Base.metadata.drop_all(engine)
+ Base.metadata.create_all(engine)
+
+
+ def test_sqlalchemy_orm(n=100000):
+ init_sqlalchemy()
+ t0 = time.time()
+ for i in xrange(n):
+ customer = Customer()
+ customer.name = 'NAME ' + str(i)
+ DBSession.add(customer)
+ if i % 1000 == 0:
+ DBSession.flush()
+ DBSession.commit()
+ print(
+ "SQLAlchemy ORM: Total time for " + str(n) +
+ " records " + str(time.time() - t0) + " secs")
+
+
+ def test_sqlalchemy_orm_pk_given(n=100000):
+ init_sqlalchemy()
+ t0 = time.time()
+ for i in xrange(n):
+ customer = Customer(id=i+1, name="NAME " + str(i))
+ DBSession.add(customer)
+ if i % 1000 == 0:
+ DBSession.flush()
+ DBSession.commit()
+ print(
+ "SQLAlchemy ORM pk given: Total time for " + str(n) +
+ " records " + str(time.time() - t0) + " secs")
+
+
+ def test_sqlalchemy_orm_bulk_insert(n=100000):
+ init_sqlalchemy()
+ t0 = time.time()
+ n1 = n
+ while n1 > 0:
+ n1 = n1 - 10000
+ DBSession.bulk_insert_mappings(
+ Customer,
+ [
+ dict(name="NAME " + str(i))
+ for i in xrange(min(10000, n1))
+ ]
+ )
+ DBSession.commit()
+ print(
+ "SQLAlchemy ORM bulk_save_objects(): Total time for " + str(n) +
+ " records " + str(time.time() - t0) + " secs")
+
+
+ def test_sqlalchemy_core(n=100000):
+ init_sqlalchemy()
+ t0 = time.time()
+ engine.execute(
+ Customer.__table__.insert(),
+ [{"name": 'NAME ' + str(i)} for i in xrange(n)]
+ )
+ print(
+ "SQLAlchemy Core: Total time for " + str(n) +
+ " records " + str(time.time() - t0) + " secs")
+
+
+ def init_sqlite3(dbname):
+ conn = sqlite3.connect(dbname)
+ c = conn.cursor()
+ c.execute("DROP TABLE IF EXISTS customer")
+ c.execute(
+ "CREATE TABLE customer (id INTEGER NOT NULL, "
+ "name VARCHAR(255), PRIMARY KEY(id))")
+ conn.commit()
+ return conn
+
+
+ def test_sqlite3(n=100000, dbname='sqlite3.db'):
+ conn = init_sqlite3(dbname)
+ c = conn.cursor()
+ t0 = time.time()
+ for i in xrange(n):
+ row = ('NAME ' + str(i),)
+ c.execute("INSERT INTO customer (name) VALUES (?)", row)
+ conn.commit()
+ print(
+ "sqlite3: Total time for " + str(n) +
+ " records " + str(time.time() - t0) + " sec")
+
+ if __name__ == '__main__':
+ test_sqlalchemy_orm(100000)
+ test_sqlalchemy_orm_pk_given(100000)
+ test_sqlalchemy_orm_bulk_insert(100000)
+ test_sqlalchemy_core(100000)
+ test_sqlite3(100000)
+