summaryrefslogtreecommitdiff
path: root/lib/sqlalchemy/orm/shard.py
blob: 6850a0bb07c2b745a4d465cf6d48a63a0855c40e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# shard.py
# Copyright (C) the SQLAlchemy authors and contributors
#
# This module is part of SQLAlchemy and is released under
# the MIT License: http://www.opensource.org/licenses/mit-license.php

"""Horizontal sharding support.

Defines a rudimental 'horizontal sharding' system which allows a Session to
distribute queries and persistence operations across multiple databases.

For a usage example, see the file ``examples/sharding/attribute_shard.py``
included in the source distrbution.

"""

import sqlalchemy.exceptions as sa_exc
from sqlalchemy import util
from sqlalchemy.orm.session import Session
from sqlalchemy.orm.query import Query

__all__ = ['ShardedSession', 'ShardedQuery']


class ShardedSession(Session):
    def __init__(self, shard_chooser, id_chooser, query_chooser, shards=None, **kwargs):
        """Construct a ShardedSession.

        shard_chooser
          A callable which, passed a Mapper, a mapped instance, and possibly a
          SQL clause, returns a shard ID.  This id may be based off of the
          attributes present within the object, or on some round-robin
          scheme. If the scheme is based on a selection, it should set
          whatever state on the instance to mark it in the future as
          participating in that shard.

        id_chooser
          A callable, passed a query and a tuple of identity values, which
          should return a list of shard ids where the ID might reside.  The
          databases will be queried in the order of this listing.

        query_chooser
          For a given Query, returns the list of shard_ids where the query
          should be issued.  Results from all shards returned will be combined
          together into a single listing.

        """
        super(ShardedSession, self).__init__(**kwargs)
        self.shard_chooser = shard_chooser
        self.id_chooser = id_chooser
        self.query_chooser = query_chooser
        self.__binds = {}
        self._mapper_flush_opts = {'connection_callable':self.connection}
        self._query_cls = ShardedQuery
        if shards is not None:
            for k in shards:
                self.bind_shard(k, shards[k])
        
    def connection(self, mapper=None, instance=None, shard_id=None, **kwargs):
        if shard_id is None:
            shard_id = self.shard_chooser(mapper, instance)

        if self.transaction is not None:
            return self.transaction.connection(mapper, shard_id=shard_id)
        else:
            return self.get_bind(mapper, shard_id=shard_id, instance=instance).contextual_connect(**kwargs)
    
    def get_bind(self, mapper, shard_id=None, instance=None, clause=None):
        if shard_id is None:
            shard_id = self.shard_chooser(mapper, instance, clause=clause)
        return self.__binds[shard_id]

    def bind_shard(self, shard_id, bind):
        self.__binds[shard_id] = bind

class ShardedQuery(Query):
    def __init__(self, *args, **kwargs):
        super(ShardedQuery, self).__init__(*args, **kwargs)
        self.id_chooser = self.session.id_chooser
        self.query_chooser = self.session.query_chooser
        self._shard_id = None
        
    def _clone(self):
        q = ShardedQuery.__new__(ShardedQuery)
        q.__dict__ = self.__dict__.copy()
        return q
    
    def set_shard(self, shard_id):
        """return a new query, limited to a single shard ID.
        
        all subsequent operations with the returned query will 
        be against the single shard regardless of other state.
        """
        
        q = self._clone()
        q._shard_id = shard_id
        return q
        
    def _execute_and_instances(self, context):
        if self._shard_id is not None:
            result = self.session.connection(mapper=self._mapper_zero(), shard_id=self._shard_id).execute(context.statement, **self._params)
            try:
                return iter(self.instances(result, context))
            finally:
                result.close()
        else:
            partial = []
            for shard_id in self.query_chooser(self):
                result = self.session.connection(mapper=self._mapper_zero(), shard_id=shard_id).execute(context.statement, **self._params)
                try:
                    partial = partial + list(self.instances(result, context))
                finally:
                    result.close()
            # if some kind of in memory 'sorting' were done, this is where it would happen
            return iter(partial)

    def get(self, ident, **kwargs):
        if self._shard_id is not None:
            return super(ShardedQuery, self).get(ident)
        else:
            ident = util.to_list(ident)
            for shard_id in self.id_chooser(self, ident):
                o = self.set_shard(shard_id).get(ident, **kwargs)
                if o is not None:
                    return o
            else:
                return None
    
    def load(self, ident, **kwargs):
        if self._shard_id is not None:
            return super(ShardedQuery, self).load(ident)
        else:
            for shard_id in self.id_chooser(self, ident):
                o = self.set_shard(shard_id).load(ident, raiseerr=False, **kwargs)
                if o is not None:
                    return o
            else:
                raise sa_exc.InvalidRequestError("No instance found for identity %s" % repr(ident))