1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
|
""" LRU caching class and decorator """
from __future__ import with_statement
import threading
import time
import uuid
try:
range = xrange
except NameError: # pragma: no cover
pass
_MARKER = object()
# By default, expire items after 2**60 seconds. This fits into 64 bit
# integers and is close enough to "never" for practical purposes.
_DEFAULT_TIMEOUT = 2 ** 60
class LRUCache(object):
""" Implements a pseudo-LRU algorithm (CLOCK)
The Clock algorithm is not kept strictly to improve performance, e.g. to
allow get() and invalidate() to work without acquiring the lock.
"""
def __init__(self, size):
size = int(size)
if size < 1:
raise ValueError('size must be >0')
self.size = size
self.lock = threading.Lock()
self.hand = 0
self.maxpos = size - 1
self.clock_keys = None
self.clock_refs = None
self.data = None
self.evictions = 0
self.hits = 0
self.misses = 0
self.lookups = 0
self.clear()
def clear(self):
"""Remove all entries from the cache"""
with self.lock:
# If really clear()ing a full cache, clean up self.data first to
# give garbage collection a chance to reduce memorey usage.
# Instantiating "[_MARKER] * size" will temporarily have 2 lists
# in memory -> high peak memory usage for tiny amount of time.
# With self.data already clear, that peak should not exceed what
# we normally use.
self.data = {}
size = self.size
self.clock_keys = [_MARKER] * size
self.clock_refs = [False] * size
self.hand = 0
self.evictions = 0
self.hits = 0
self.misses = 0
self.lookups = 0
def get(self, key, default=None):
"""Return value for key. If not in cache, return default"""
self.lookups += 1
try:
pos, val = self.data[key]
self.hits += 1
except KeyError:
self.misses += 1
return default
self.clock_refs[pos] = True
return val
def put(self, key, val):
"""Add key to the cache with value val"""
# These do not change or they are just references, no need for locking.
maxpos = self.maxpos
clock_refs = self.clock_refs
clock_keys = self.clock_keys
data = self.data
with self.lock:
entry = data.get(key)
if entry is not None:
# We already have key. Only make sure data is up to date and
# to remember that it was used.
pos, old_val = entry
if old_val is not val:
data[key] = (pos, val)
self.clock_refs[pos] = True
return
# else: key is not yet in cache. Search place to insert it.
hand = self.hand
count = 0
max_count = 107
while 1:
ref = clock_refs[hand]
if ref == True:
clock_refs[hand] = False
hand += 1
if hand > maxpos:
hand = 0
count += 1
if count >= max_count:
# We have been searching long enough. Force eviction of
# next entry, no matter what its status is.
clock_refs[hand] = False
else:
oldkey = clock_keys[hand]
# Maybe oldkey was not in self.data to begin with. If it
# was, self.invalidate() in another thread might have
# already removed it. del() would raise KeyError, so pop().
oldentry = data.pop(oldkey, _MARKER)
if oldentry is not _MARKER:
self.evictions += 1
clock_keys[hand] = key
clock_refs[hand] = True
data[key] = (hand, val)
hand += 1
if hand > maxpos:
hand = 0
self.hand = hand
break
def invalidate(self, key):
"""Remove key from the cache"""
# pop with default arg will not raise KeyError
entry = self.data.pop(key, _MARKER)
if entry is not _MARKER:
# We have no lock, but worst thing that can happen is that we
# set another key's entry to False.
self.clock_refs[entry[0]] = False
# else: key was not in cache. Nothing to do.
class ExpiringLRUCache(object):
""" Implements a pseudo-LRU algorithm (CLOCK) with expiration times
The Clock algorithm is not kept strictly to improve performance, e.g. to
allow get() and invalidate() to work without acquiring the lock.
"""
def __init__(self, size, default_timeout=_DEFAULT_TIMEOUT):
self.default_timeout = default_timeout
size = int(size)
if size < 1:
raise ValueError('size must be >0')
self.size = size
self.lock = threading.Lock()
self.hand = 0
self.maxpos = size - 1
self.clock_keys = None
self.clock_refs = None
self.data = None
self.evictions = 0
self.hits = 0
self.misses = 0
self.lookups = 0
self.clear()
def clear(self):
"""Remove all entries from the cache"""
with self.lock:
# If really clear()ing a full cache, clean up self.data first to
# give garbage collection a chance to reduce memorey usage.
# Instantiating "[_MARKER] * size" will temporarily have 2 lists
# in memory -> high peak memory usage for tiny amount of time.
# With self.data already clear, that peak should not exceed what
# we normally use.
# self.data contains (pos, val, expires) triplets
self.data = {}
size = self.size
self.clock_keys = [_MARKER] * size
self.clock_refs = [False] * size
self.hand = 0
self.evictions = 0
self.hits = 0
self.misses = 0
self.lookups = 0
def get(self, key, default=None):
"""Return value for key. If not in cache or expired, return default"""
self.lookups += 1
try:
pos, val, expires = self.data[key]
except KeyError:
self.misses += 1
return default
if expires > time.time():
# cache entry still valid
self.hits += 1
self.clock_refs[pos] = True
return val
else:
# cache entry has expired. Make sure the space in the cache can
# be recycled soon.
self.misses += 1
self.clock_refs[pos] = False
return default
def put(self, key, val, timeout=None):
"""Add key to the cache with value val
key will expire in $timeout seconds. If key is already in cache, val
and timeout will be updated.
"""
# These do not change or they are just references, no need for locking.
maxpos = self.maxpos
clock_refs = self.clock_refs
clock_keys = self.clock_keys
data = self.data
lock = self.lock
if timeout is None:
timeout = self.default_timeout
with self.lock:
entry = data.get(key)
if entry is not None:
# We already have key. Only make sure data is up to date and
# to remember that it was used.
pos = entry[0]
data[key] = (pos, val, time.time() + timeout)
clock_refs[pos] = True
return
# else: key is not yet in cache. Search place to insert it.
hand = self.hand
count = 0
max_count = 107
while 1:
ref = clock_refs[hand]
if ref == True:
clock_refs[hand] = False
hand += 1
if hand > maxpos:
hand = 0
count += 1
if count >= max_count:
# We have been searching long enough. Force eviction of
# next entry, no matter what its status is.
clock_refs[hand] = False
else:
oldkey = clock_keys[hand]
# Maybe oldkey was not in self.data to begin with. If it
# was, self.invalidate() in another thread might have
# already removed it. del() would raise KeyError, so pop().
oldentry = data.pop(oldkey, _MARKER)
if oldentry is not _MARKER:
self.evictions += 1
clock_keys[hand] = key
clock_refs[hand] = True
data[key] = (hand, val, time.time() + timeout)
hand += 1
if hand > maxpos:
hand = 0
self.hand = hand
break
def invalidate(self, key):
"""Remove key from the cache"""
# pop with default arg will not raise KeyError
entry = self.data.pop(key, _MARKER)
if entry is not _MARKER:
# We have no lock, but worst thing that can happen is that we
# set another key's entry to False.
self.clock_refs[entry[0]] = False
# else: key was not in cache. Nothing to do.
class lru_cache(object):
""" Decorator for LRU-cached function
timeout parameter specifies after how many seconds a cached entry should
be considered invalid.
"""
def __init__(self, maxsize, cache=None, timeout=None): # cache is an arg to serve tests
if cache is None:
if timeout is None:
cache = LRUCache(maxsize)
else:
cache = ExpiringLRUCache(maxsize, default_timeout=timeout)
self.cache = cache
def __call__(self, f):
cache = self.cache
marker = _MARKER
def lru_cached(*arg):
val = cache.get(arg, marker)
if val is marker:
val = f(*arg)
cache.put(arg, val)
return val
lru_cached.__module__ = f.__module__
lru_cached.__name__ = f.__name__
lru_cached.__doc__ = f.__doc__
return lru_cached
_SENTINEL=_MARKER
class CacheMaker(object):
"""Generates decorators that can be cleared later
"""
def __init__(self,**default):
"""
constructor is accepting named arguments :
* maxsize : the default size for the cache
* timeout : the defaut size for the cache if using expriring cache
"""
self._maxsize=default.get("maxsize",_SENTINEL)
self._timeout=default.get("timeout",_DEFAULT_TIMEOUT)
self._cache=dict()
def _resolve_setting(self,option):
name = option.get("name",_SENTINEL)
maxsize = option.get("maxsize",_SENTINEL)
maxsize = self._maxsize if maxsize is _SENTINEL else maxsize
if maxsize is _SENTINEL:
raise ValueError("Cache must have a maxsize set")
timeout = option.get("timeout",_SENTINEL)
timeout = self._timeout if timeout is _SENTINEL else timeout
if name is _SENTINEL:
_name= str(uuid.uuid4())
## the probability of collision is so low ....
while _name in self._cache.keys():
_name=str(uuid.uuid4())
else:
if name in self._cache:
raise KeyError("cache %s already in use" % name)
_name=name
return dict( name=_name,timeout=timeout, maxsize=maxsize)
def lrucache(self,**option):
"""named argument:
* name (optional) is a string, and should be unique amongst all
cache
* maxsize : if given will override any default value given at
the constructor"""
option=self._resolve_setting(option)
cache = self._cache[option["name"]] = LRUCache(option['maxsize'])
return lru_cache(option['maxsize'], cache)
def expiring_lrucache(self, **option):
"""named argument:
* name (optional) is a string, and should be unique amongst all
cache
* maxsize : if given will override any default value given at
the constructor
* timeout : if given will override any default value given at
constructur or the default value (%d seconds)
""" % _DEFAULT_TIMEOUT
option=self._resolve_setting(option)
cache = self._cache[option['name']]=ExpiringLRUCache(
option["maxsize"],option["timeout"]
)
return lru_cache(option["maxsize"], cache, option["timeout"])
def clear(self, name = _SENTINEL):
"""
clear all cache if no arguments, else clear cache with the given name"""
to_clear = self._cache.keys() if name is _SENTINEL else [ name ]
for cache_name in to_clear:
self._cache[cache_name].clear()
|