diff options
Diffstat (limited to 'doc/build/faq/performance.rst')
-rw-r--r-- | doc/build/faq/performance.rst | 443 |
1 files changed, 443 insertions, 0 deletions
diff --git a/doc/build/faq/performance.rst b/doc/build/faq/performance.rst new file mode 100644 index 000000000..8413cb5a2 --- /dev/null +++ b/doc/build/faq/performance.rst @@ -0,0 +1,443 @@ +.. _faq_performance: + +Performance +=========== + +.. contents:: + :local: + :class: faq + :backlinks: none + +.. _faq_how_to_profile: + +How can I profile a SQLAlchemy powered application? +--------------------------------------------------- + +Looking for performance issues typically involves two stratgies. One +is query profiling, and the other is code profiling. + +Query Profiling +^^^^^^^^^^^^^^^^ + +Sometimes just plain SQL logging (enabled via python's logging module +or via the ``echo=True`` argument on :func:`.create_engine`) can give an +idea how long things are taking. For example, if you log something +right after a SQL operation, you'd see something like this in your +log:: + + 17:37:48,325 INFO [sqlalchemy.engine.base.Engine.0x...048c] SELECT ... + 17:37:48,326 INFO [sqlalchemy.engine.base.Engine.0x...048c] {<params>} + 17:37:48,660 DEBUG [myapp.somemessage] + +if you logged ``myapp.somemessage`` right after the operation, you know +it took 334ms to complete the SQL part of things. + +Logging SQL will also illustrate if dozens/hundreds of queries are +being issued which could be better organized into much fewer queries. +When using the SQLAlchemy ORM, the "eager loading" +feature is provided to partially (:func:`.contains_eager()`) or fully +(:func:`.joinedload()`, :func:`.subqueryload()`) +automate this activity, but without +the ORM "eager loading" typically means to use joins so that results across multiple +tables can be loaded in one result set instead of multiplying numbers +of queries as more depth is added (i.e. ``r + r*r2 + r*r2*r3`` ...) + +For more long-term profiling of queries, or to implement an application-side +"slow query" monitor, events can be used to intercept cursor executions, +using a recipe like the following:: + + from sqlalchemy import event + from sqlalchemy.engine import Engine + import time + import logging + + logging.basicConfig() + logger = logging.getLogger("myapp.sqltime") + logger.setLevel(logging.DEBUG) + + @event.listens_for(Engine, "before_cursor_execute") + def before_cursor_execute(conn, cursor, statement, + parameters, context, executemany): + conn.info.setdefault('query_start_time', []).append(time.time()) + logger.debug("Start Query: %s", statement) + + @event.listens_for(Engine, "after_cursor_execute") + def after_cursor_execute(conn, cursor, statement, + parameters, context, executemany): + total = time.time() - conn.info['query_start_time'].pop(-1) + logger.debug("Query Complete!") + logger.debug("Total Time: %f", total) + +Above, we use the :meth:`.ConnectionEvents.before_cursor_execute` and +:meth:`.ConnectionEvents.after_cursor_execute` events to establish an interception +point around when a statement is executed. We attach a timer onto the +connection using the :class:`._ConnectionRecord.info` dictionary; we use a +stack here for the occasional case where the cursor execute events may be nested. + +Code Profiling +^^^^^^^^^^^^^^ + +If logging reveals that individual queries are taking too long, you'd +need a breakdown of how much time was spent within the database +processing the query, sending results over the network, being handled +by the :term:`DBAPI`, and finally being received by SQLAlchemy's result set +and/or ORM layer. Each of these stages can present their own +individual bottlenecks, depending on specifics. + +For that you need to use the +`Python Profiling Module <https://docs.python.org/2/library/profile.html>`_. +Below is a simple recipe which works profiling into a context manager:: + + import cProfile + import StringIO + import pstats + import contextlib + + @contextlib.contextmanager + def profiled(): + pr = cProfile.Profile() + pr.enable() + yield + pr.disable() + s = StringIO.StringIO() + ps = pstats.Stats(pr, stream=s).sort_stats('cumulative') + ps.print_stats() + # uncomment this to see who's calling what + # ps.print_callers() + print s.getvalue() + +To profile a section of code:: + + with profiled(): + Session.query(FooClass).filter(FooClass.somevalue==8).all() + +The output of profiling can be used to give an idea where time is +being spent. A section of profiling output looks like this:: + + 13726 function calls (13042 primitive calls) in 0.014 seconds + + Ordered by: cumulative time + + ncalls tottime percall cumtime percall filename:lineno(function) + 222/21 0.001 0.000 0.011 0.001 lib/sqlalchemy/orm/loading.py:26(instances) + 220/20 0.002 0.000 0.010 0.001 lib/sqlalchemy/orm/loading.py:327(_instance) + 220/20 0.000 0.000 0.010 0.000 lib/sqlalchemy/orm/loading.py:284(populate_state) + 20 0.000 0.000 0.010 0.000 lib/sqlalchemy/orm/strategies.py:987(load_collection_from_subq) + 20 0.000 0.000 0.009 0.000 lib/sqlalchemy/orm/strategies.py:935(get) + 1 0.000 0.000 0.009 0.009 lib/sqlalchemy/orm/strategies.py:940(_load) + 21 0.000 0.000 0.008 0.000 lib/sqlalchemy/orm/strategies.py:942(<genexpr>) + 2 0.000 0.000 0.004 0.002 lib/sqlalchemy/orm/query.py:2400(__iter__) + 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/orm/query.py:2414(_execute_and_instances) + 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/engine/base.py:659(execute) + 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/sql/elements.py:321(_execute_on_connection) + 2 0.000 0.000 0.002 0.001 lib/sqlalchemy/engine/base.py:788(_execute_clauseelement) + + ... + +Above, we can see that the ``instances()`` SQLAlchemy function was called 222 +times (recursively, and 21 times from the outside), taking a total of .011 +seconds for all calls combined. + +Execution Slowness +^^^^^^^^^^^^^^^^^^ + +The specifics of these calls can tell us where the time is being spent. +If for example, you see time being spent within ``cursor.execute()``, +e.g. against the DBAPI:: + + 2 0.102 0.102 0.204 0.102 {method 'execute' of 'sqlite3.Cursor' objects} + +this would indicate that the database is taking a long time to start returning +results, and it means your query should be optimized, either by adding indexes +or restructuring the query and/or underlying schema. For that task, +analysis of the query plan is warranted, using a system such as EXPLAIN, +SHOW PLAN, etc. as is provided by the database backend. + +Result Fetching Slowness - Core +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If on the other hand you see many thousands of calls related to fetching rows, +or very long calls to ``fetchall()``, it may +mean your query is returning more rows than expected, or that the fetching +of rows itself is slow. The ORM itself typically uses ``fetchall()`` to fetch +rows (or ``fetchmany()`` if the :meth:`.Query.yield_per` option is used). + +An inordinately large number of rows would be indicated +by a very slow call to ``fetchall()`` at the DBAPI level:: + + 2 0.300 0.600 0.300 0.600 {method 'fetchall' of 'sqlite3.Cursor' objects} + +An unexpectedly large number of rows, even if the ultimate result doesn't seem +to have many rows, can be the result of a cartesian product - when multiple +sets of rows are combined together without appropriately joining the tables +together. It's often easy to produce this behavior with SQLAlchemy Core or +ORM query if the wrong :class:`.Column` objects are used in a complex query, +pulling in additional FROM clauses that are unexpected. + +On the other hand, a fast call to ``fetchall()`` at the DBAPI level, but then +slowness when SQLAlchemy's :class:`.ResultProxy` is asked to do a ``fetchall()``, +may indicate slowness in processing of datatypes, such as unicode conversions +and similar:: + + # the DBAPI cursor is fast... + 2 0.020 0.040 0.020 0.040 {method 'fetchall' of 'sqlite3.Cursor' objects} + + ... + + # but SQLAlchemy's result proxy is slow, this is type-level processing + 2 0.100 0.200 0.100 0.200 lib/sqlalchemy/engine/result.py:778(fetchall) + +In some cases, a backend might be doing type-level processing that isn't +needed. More specifically, seeing calls within the type API that are slow +are better indicators - below is what it looks like when we use a type like +this:: + + from sqlalchemy import TypeDecorator + import time + + class Foo(TypeDecorator): + impl = String + + def process_result_value(self, value, thing): + # intentionally add slowness for illustration purposes + time.sleep(.001) + return value + +the profiling output of this intentionally slow operation can be seen like this:: + + 200 0.001 0.000 0.237 0.001 lib/sqlalchemy/sql/type_api.py:911(process) + 200 0.001 0.000 0.236 0.001 test.py:28(process_result_value) + 200 0.235 0.001 0.235 0.001 {time.sleep} + +that is, we see many expensive calls within the ``type_api`` system, and the actual +time consuming thing is the ``time.sleep()`` call. + +Make sure to check the :doc:`Dialect documentation <dialects/index>` +for notes on known performance tuning suggestions at this level, especially for +databases like Oracle. There may be systems related to ensuring numeric accuracy +or string processing that may not be needed in all cases. + +There also may be even more low-level points at which row-fetching performance is suffering; +for example, if time spent seems to focus on a call like ``socket.receive()``, +that could indicate that everything is fast except for the actual network connection, +and too much time is spent with data moving over the network. + +Result Fetching Slowness - ORM +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To detect slowness in ORM fetching of rows (which is the most common area +of performance concern), calls like ``populate_state()`` and ``_instance()`` will +illustrate individual ORM object populations:: + + # the ORM calls _instance for each ORM-loaded row it sees, and + # populate_state for each ORM-loaded row that results in the population + # of an object's attributes + 220/20 0.001 0.000 0.010 0.000 lib/sqlalchemy/orm/loading.py:327(_instance) + 220/20 0.000 0.000 0.009 0.000 lib/sqlalchemy/orm/loading.py:284(populate_state) + +The ORM's slowness in turning rows into ORM-mapped objects is a product +of the complexity of this operation combined with the overhead of cPython. +Common strategies to mitigate this include: + +* fetch individual columns instead of full entities, that is:: + + session.query(User.id, User.name) + + instead of:: + + session.query(User) + +* Use :class:`.Bundle` objects to organize column-based results:: + + u_b = Bundle('user', User.id, User.name) + a_b = Bundle('address', Address.id, Address.email) + + for user, address in session.query(u_b, a_b).join(User.addresses): + # ... + +* Use result caching - see :ref:`examples_caching` for an in-depth example + of this. + +* Consider a faster interpreter like that of Pypy. + +The output of a profile can be a little daunting but after some +practice they are very easy to read. + +.. seealso:: + + :ref:`examples_performance` - a suite of performance demonstrations + with bundled profiling capabilities. + +I'm inserting 400,000 rows with the ORM and it's really slow! +-------------------------------------------------------------- + +The SQLAlchemy ORM uses the :term:`unit of work` pattern when synchronizing +changes to the database. This pattern goes far beyond simple "inserts" +of data. It includes that attributes which are assigned on objects are +received using an attribute instrumentation system which tracks +changes on objects as they are made, includes that all rows inserted +are tracked in an identity map which has the effect that for each row +SQLAlchemy must retrieve its "last inserted id" if not already given, +and also involves that rows to be inserted are scanned and sorted for +dependencies as needed. Objects are also subject to a fair degree of +bookkeeping in order to keep all of this running, which for a very +large number of rows at once can create an inordinate amount of time +spent with large data structures, hence it's best to chunk these. + +Basically, unit of work is a large degree of automation in order to +automate the task of persisting a complex object graph into a +relational database with no explicit persistence code, and this +automation has a price. + +ORMs are basically not intended for high-performance bulk inserts - +this is the whole reason SQLAlchemy offers the Core in addition to the +ORM as a first-class component. + +For the use case of fast bulk inserts, the +SQL generation and execution system that the ORM builds on top of +is part of the :doc:`Core <core/tutorial>`. Using this system directly, we can produce an INSERT that +is competitive with using the raw database API directly. + +Alternatively, the SQLAlchemy ORM offers the :ref:`bulk_operations` +suite of methods, which provide hooks into subsections of the unit of +work process in order to emit Core-level INSERT and UPDATE constructs with +a small degree of ORM-based automation. + +The example below illustrates time-based tests for several different +methods of inserting rows, going from the most automated to the least. +With cPython 2.7, runtimes observed:: + + classics-MacBook-Pro:sqlalchemy classic$ python test.py + SQLAlchemy ORM: Total time for 100000 records 12.0471920967 secs + SQLAlchemy ORM pk given: Total time for 100000 records 7.06283402443 secs + SQLAlchemy ORM bulk_save_objects(): Total time for 100000 records 0.856323003769 secs + SQLAlchemy Core: Total time for 100000 records 0.485800027847 secs + sqlite3: Total time for 100000 records 0.487842082977 sec + +We can reduce the time by a factor of three using recent versions of `Pypy <http://pypy.org/>`_:: + + classics-MacBook-Pro:sqlalchemy classic$ /usr/local/src/pypy-2.1-beta2-osx64/bin/pypy test.py + SQLAlchemy ORM: Total time for 100000 records 5.88369488716 secs + SQLAlchemy ORM pk given: Total time for 100000 records 3.52294301987 secs + SQLAlchemy Core: Total time for 100000 records 0.613556146622 secs + sqlite3: Total time for 100000 records 0.442467927933 sec + +Script:: + + import time + import sqlite3 + + from sqlalchemy.ext.declarative import declarative_base + from sqlalchemy import Column, Integer, String, create_engine + from sqlalchemy.orm import scoped_session, sessionmaker + + Base = declarative_base() + DBSession = scoped_session(sessionmaker()) + engine = None + + + class Customer(Base): + __tablename__ = "customer" + id = Column(Integer, primary_key=True) + name = Column(String(255)) + + + def init_sqlalchemy(dbname='sqlite:///sqlalchemy.db'): + global engine + engine = create_engine(dbname, echo=False) + DBSession.remove() + DBSession.configure(bind=engine, autoflush=False, expire_on_commit=False) + Base.metadata.drop_all(engine) + Base.metadata.create_all(engine) + + + def test_sqlalchemy_orm(n=100000): + init_sqlalchemy() + t0 = time.time() + for i in xrange(n): + customer = Customer() + customer.name = 'NAME ' + str(i) + DBSession.add(customer) + if i % 1000 == 0: + DBSession.flush() + DBSession.commit() + print( + "SQLAlchemy ORM: Total time for " + str(n) + + " records " + str(time.time() - t0) + " secs") + + + def test_sqlalchemy_orm_pk_given(n=100000): + init_sqlalchemy() + t0 = time.time() + for i in xrange(n): + customer = Customer(id=i+1, name="NAME " + str(i)) + DBSession.add(customer) + if i % 1000 == 0: + DBSession.flush() + DBSession.commit() + print( + "SQLAlchemy ORM pk given: Total time for " + str(n) + + " records " + str(time.time() - t0) + " secs") + + + def test_sqlalchemy_orm_bulk_insert(n=100000): + init_sqlalchemy() + t0 = time.time() + n1 = n + while n1 > 0: + n1 = n1 - 10000 + DBSession.bulk_insert_mappings( + Customer, + [ + dict(name="NAME " + str(i)) + for i in xrange(min(10000, n1)) + ] + ) + DBSession.commit() + print( + "SQLAlchemy ORM bulk_save_objects(): Total time for " + str(n) + + " records " + str(time.time() - t0) + " secs") + + + def test_sqlalchemy_core(n=100000): + init_sqlalchemy() + t0 = time.time() + engine.execute( + Customer.__table__.insert(), + [{"name": 'NAME ' + str(i)} for i in xrange(n)] + ) + print( + "SQLAlchemy Core: Total time for " + str(n) + + " records " + str(time.time() - t0) + " secs") + + + def init_sqlite3(dbname): + conn = sqlite3.connect(dbname) + c = conn.cursor() + c.execute("DROP TABLE IF EXISTS customer") + c.execute( + "CREATE TABLE customer (id INTEGER NOT NULL, " + "name VARCHAR(255), PRIMARY KEY(id))") + conn.commit() + return conn + + + def test_sqlite3(n=100000, dbname='sqlite3.db'): + conn = init_sqlite3(dbname) + c = conn.cursor() + t0 = time.time() + for i in xrange(n): + row = ('NAME ' + str(i),) + c.execute("INSERT INTO customer (name) VALUES (?)", row) + conn.commit() + print( + "sqlite3: Total time for " + str(n) + + " records " + str(time.time() - t0) + " sec") + + if __name__ == '__main__': + test_sqlalchemy_orm(100000) + test_sqlalchemy_orm_pk_given(100000) + test_sqlalchemy_orm_bulk_insert(100000) + test_sqlalchemy_core(100000) + test_sqlite3(100000) + |