diff options
author | Sławek Ehlert <slafs@op.pl> | 2015-01-27 22:04:38 +0100 |
---|---|---|
committer | Sławek Ehlert <slafs@op.pl> | 2015-01-27 22:04:38 +0100 |
commit | 57b2bd5dcba6140b511c898c0f682234f13d5c51 (patch) | |
tree | a0899b2a35d27e177001b163054c3c9a8f7f1c06 /examples/performance/__init__.py | |
parent | 6a1f16d09958e549502a0991890d64964c71b357 (diff) | |
parent | 8aaa8dd6bdfb85fa481efa3115b9080d935d344c (diff) | |
download | sqlalchemy-pr/152.tar.gz |
Merge branch 'master' into oracle-servicename-optionpr/152
Diffstat (limited to 'examples/performance/__init__.py')
-rw-r--r-- | examples/performance/__init__.py | 421 |
1 files changed, 421 insertions, 0 deletions
diff --git a/examples/performance/__init__.py b/examples/performance/__init__.py new file mode 100644 index 000000000..a4edfce36 --- /dev/null +++ b/examples/performance/__init__.py @@ -0,0 +1,421 @@ +"""A performance profiling suite for a variety of SQLAlchemy use cases. + +Each suite focuses on a specific use case with a particular performance +profile and associated implications: + +* bulk inserts +* individual inserts, with or without transactions +* fetching large numbers of rows +* running lots of small queries (TODO) + +All suites include a variety of use patterns illustrating both Core +and ORM use, and are generally sorted in order of performance from worst +to greatest, inversely based on amount of functionality provided by SQLAlchemy, +greatest to least (these two things generally correspond perfectly). + +A command line tool is presented at the package level which allows +individual suites to be run:: + + $ python -m examples.performance --help + usage: python -m examples.performance [-h] [--test TEST] [--dburl DBURL] + [--num NUM] [--profile] [--dump] + [--runsnake] [--echo] + + {bulk_inserts,large_resultsets,single_inserts} + + positional arguments: + {bulk_inserts,large_resultsets,single_inserts} + suite to run + + optional arguments: + -h, --help show this help message and exit + --test TEST run specific test name + --dburl DBURL database URL, default sqlite:///profile.db + --num NUM Number of iterations/items/etc for tests; default is 0 + module-specific + --profile run profiling and dump call counts + --dump dump full call profile (implies --profile) + --runsnake invoke runsnakerun (implies --profile) + --echo Echo SQL output + +An example run looks like:: + + $ python -m examples.performance bulk_inserts + +Or with options:: + + $ python -m examples.performance bulk_inserts \\ + --dburl mysql+mysqldb://scott:tiger@localhost/test \\ + --profile --num 1000 + +.. seealso:: + + :ref:`faq_how_to_profile` + +File Listing +------------- + +.. autosource:: + + +Running all tests with time +--------------------------- + +This is the default form of run:: + + $ python -m examples.performance single_inserts + Tests to run: test_orm_commit, test_bulk_save, + test_bulk_insert_dictionaries, test_core, + test_core_query_caching, test_dbapi_raw_w_connect, + test_dbapi_raw_w_pool + + test_orm_commit : Individual INSERT/COMMIT pairs via the + ORM (10000 iterations); total time 13.690218 sec + test_bulk_save : Individual INSERT/COMMIT pairs using + the "bulk" API (10000 iterations); total time 11.290371 sec + test_bulk_insert_dictionaries : Individual INSERT/COMMIT pairs using + the "bulk" API with dictionaries (10000 iterations); + total time 10.814626 sec + test_core : Individual INSERT/COMMIT pairs using Core. + (10000 iterations); total time 9.665620 sec + test_core_query_caching : Individual INSERT/COMMIT pairs using Core + with query caching (10000 iterations); total time 9.209010 sec + test_dbapi_raw_w_connect : Individual INSERT/COMMIT pairs w/ DBAPI + + connection each time (10000 iterations); total time 9.551103 sec + test_dbapi_raw_w_pool : Individual INSERT/COMMIT pairs w/ DBAPI + + connection pool (10000 iterations); total time 8.001813 sec + +Dumping Profiles for Individual Tests +-------------------------------------- + +A Python profile output can be dumped for all tests, or more commonly +individual tests:: + + $ python -m examples.performance single_inserts --test test_core --num 1000 --dump + Tests to run: test_core + test_core : Individual INSERT/COMMIT pairs using Core. (1000 iterations); total fn calls 186109 + 186109 function calls (186102 primitive calls) in 1.089 seconds + + Ordered by: internal time, call count + + ncalls tottime percall cumtime percall filename:lineno(function) + 1000 0.634 0.001 0.634 0.001 {method 'commit' of 'sqlite3.Connection' objects} + 1000 0.154 0.000 0.154 0.000 {method 'execute' of 'sqlite3.Cursor' objects} + 1000 0.021 0.000 0.074 0.000 /Users/classic/dev/sqlalchemy/lib/sqlalchemy/sql/compiler.py:1950(_get_colparams) + 1000 0.015 0.000 0.034 0.000 /Users/classic/dev/sqlalchemy/lib/sqlalchemy/engine/default.py:503(_init_compiled) + 1 0.012 0.012 1.091 1.091 examples/performance/single_inserts.py:79(test_core) + + ... + +Using RunSnake +-------------- + +This option requires the `RunSnake <https://pypi.python.org/pypi/RunSnakeRun>`_ +command line tool be installed:: + + $ python -m examples.performance single_inserts --test test_core --num 1000 --runsnake + +A graphical RunSnake output will be displayed. + +.. _examples_profiling_writeyourown: + +Writing your Own Suites +----------------------- + +The profiler suite system is extensible, and can be applied to your own set +of tests. This is a valuable technique to use in deciding upon the proper +approach for some performance-critical set of routines. For example, +if we wanted to profile the difference between several kinds of loading, +we can create a file ``test_loads.py``, with the following content:: + + from examples.performance import Profiler + from sqlalchemy import Integer, Column, create_engine, ForeignKey + from sqlalchemy.orm import relationship, joinedload, subqueryload, Session + from sqlalchemy.ext.declarative import declarative_base + + Base = declarative_base() + engine = None + session = None + + + class Parent(Base): + __tablename__ = 'parent' + id = Column(Integer, primary_key=True) + children = relationship("Child") + + + class Child(Base): + __tablename__ = 'child' + id = Column(Integer, primary_key=True) + parent_id = Column(Integer, ForeignKey('parent.id')) + + + # Init with name of file, default number of items + Profiler.init("test_loads", 1000) + + + @Profiler.setup_once + def setup_once(dburl, echo, num): + "setup once. create an engine, insert fixture data" + global engine + engine = create_engine(dburl, echo=echo) + Base.metadata.drop_all(engine) + Base.metadata.create_all(engine) + sess = Session(engine) + sess.add_all([ + Parent(children=[Child() for j in range(100)]) + for i in range(num) + ]) + sess.commit() + + + @Profiler.setup + def setup(dburl, echo, num): + "setup per test. create a new Session." + global session + session = Session(engine) + # pre-connect so this part isn't profiled (if we choose) + session.connection() + + + @Profiler.profile + def test_lazyload(n): + "load everything, no eager loading." + + for parent in session.query(Parent): + parent.children + + + @Profiler.profile + def test_joinedload(n): + "load everything, joined eager loading." + + for parent in session.query(Parent).options(joinedload("children")): + parent.children + + + @Profiler.profile + def test_subqueryload(n): + "load everything, subquery eager loading." + + for parent in session.query(Parent).options(subqueryload("children")): + parent.children + + if __name__ == '__main__': + Profiler.main() + +We can run our new script directly:: + + $ python test_loads.py --dburl postgresql+psycopg2://scott:tiger@localhost/test + Running setup once... + Tests to run: test_lazyload, test_joinedload, test_subqueryload + test_lazyload : load everything, no eager loading. (1000 iterations); total time 11.971159 sec + test_joinedload : load everything, joined eager loading. (1000 iterations); total time 2.754592 sec + test_subqueryload : load everything, subquery eager loading. (1000 iterations); total time 2.977696 sec + +As well as see RunSnake output for an individual test:: + + $ python test_loads.py --num 100 --runsnake --test test_joinedload + +""" +import argparse +import cProfile +import pstats +import os +import time +import re +import sys + + +class Profiler(object): + tests = [] + + _setup = None + _setup_once = None + name = None + num = 0 + + def __init__(self, options): + self.test = options.test + self.dburl = options.dburl + self.runsnake = options.runsnake + self.profile = options.profile + self.dump = options.dump + self.num = options.num + self.echo = options.echo + self.stats = [] + + @classmethod + def init(cls, name, num): + cls.name = name + cls.num = num + + @classmethod + def profile(cls, fn): + if cls.name is None: + raise ValueError( + "Need to call Profile.init(<suitename>, <default_num>) first.") + cls.tests.append(fn) + return fn + + @classmethod + def setup(cls, fn): + if cls._setup is not None: + raise ValueError("setup function already set to %s" % cls._setup) + cls._setup = staticmethod(fn) + return fn + + @classmethod + def setup_once(cls, fn): + if cls._setup_once is not None: + raise ValueError( + "setup_once function already set to %s" % cls._setup_once) + cls._setup_once = staticmethod(fn) + return fn + + def run(self): + if self.test: + tests = [fn for fn in self.tests if fn.__name__ == self.test] + if not tests: + raise ValueError("No such test: %s" % self.test) + else: + tests = self.tests + + if self._setup_once: + print("Running setup once...") + self._setup_once(self.dburl, self.echo, self.num) + print("Tests to run: %s" % ", ".join([t.__name__ for t in tests])) + for test in tests: + self._run_test(test) + self.stats[-1].report() + + def _run_with_profile(self, fn): + pr = cProfile.Profile() + pr.enable() + try: + result = fn(self.num) + finally: + pr.disable() + + stats = pstats.Stats(pr).sort_stats('cumulative') + + self.stats.append(TestResult(self, fn, stats=stats)) + return result + + def _run_with_time(self, fn): + now = time.time() + try: + return fn(self.num) + finally: + total = time.time() - now + self.stats.append(TestResult(self, fn, total_time=total)) + + def _run_test(self, fn): + if self._setup: + self._setup(self.dburl, self.echo, self.num) + if self.profile or self.runsnake or self.dump: + self._run_with_profile(fn) + else: + self._run_with_time(fn) + + @classmethod + def main(cls): + + parser = argparse.ArgumentParser("python -m examples.performance") + + if cls.name is None: + parser.add_argument( + "name", choices=cls._suite_names(), help="suite to run") + + if len(sys.argv) > 1: + potential_name = sys.argv[1] + try: + suite = __import__(__name__ + "." + potential_name) + except ImportError: + pass + + parser.add_argument( + "--test", type=str, + help="run specific test name" + ) + + parser.add_argument( + '--dburl', type=str, default="sqlite:///profile.db", + help="database URL, default sqlite:///profile.db" + ) + parser.add_argument( + '--num', type=int, default=cls.num, + help="Number of iterations/items/etc for tests; " + "default is %d module-specific" % cls.num + ) + parser.add_argument( + '--profile', action='store_true', + help='run profiling and dump call counts') + parser.add_argument( + '--dump', action='store_true', + help='dump full call profile (implies --profile)') + parser.add_argument( + '--runsnake', action='store_true', + help='invoke runsnakerun (implies --profile)') + parser.add_argument( + '--echo', action='store_true', + help="Echo SQL output") + args = parser.parse_args() + + args.profile = args.profile or args.dump or args.runsnake + + if cls.name is None: + suite = __import__(__name__ + "." + args.name) + + Profiler(args).run() + + @classmethod + def _suite_names(cls): + suites = [] + for file_ in os.listdir(os.path.dirname(__file__)): + match = re.match(r'^([a-z].*).py$', file_) + if match: + suites.append(match.group(1)) + return suites + + +class TestResult(object): + def __init__(self, profile, test, stats=None, total_time=None): + self.profile = profile + self.test = test + self.stats = stats + self.total_time = total_time + + def report(self): + print(self._summary()) + if self.profile.profile: + self.report_stats() + + def _summary(self): + summary = "%s : %s (%d iterations)" % ( + self.test.__name__, self.test.__doc__, self.profile.num) + if self.total_time: + summary += "; total time %f sec" % self.total_time + if self.stats: + summary += "; total fn calls %d" % self.stats.total_calls + return summary + + def report_stats(self): + if self.profile.runsnake: + self._runsnake() + elif self.profile.dump: + self._dump() + + def _dump(self): + self.stats.sort_stats('time', 'calls') + self.stats.print_stats() + + def _runsnake(self): + filename = "%s.profile" % self.test.__name__ + try: + self.stats.dump_stats(filename) + os.system("runsnake %s" % filename) + finally: + os.remove(filename) + + |