summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Thiel <byronimo@gmail.com>2015-01-07 17:08:03 +0100
committerSebastian Thiel <byronimo@gmail.com>2015-01-07 17:08:03 +0100
commitbe66773309e52e231156ad6a2c9e89dfd58251ed (patch)
tree6bc8f7657e50f65a7e0c43fc5c4de5f34abc6e41
parente0b0becd97afb9b7ac434c5fabdadd20070d643d (diff)
parentf071ffdcacbafff648cd29d6f75fe27c47f53210 (diff)
downloadsmmap-be66773309e52e231156ad6a2c9e89dfd58251ed.tar.gz
Merge branch 'resource-handling'v0.9.0
Should fix #22
-rw-r--r--doc/source/changes.rst6
-rw-r--r--smmap/__init__.py2
-rw-r--r--smmap/buf.py5
-rw-r--r--smmap/mman.py34
-rw-r--r--smmap/test/test_buf.py3
-rw-r--r--smmap/test/test_mman.py26
-rw-r--r--smmap/test/test_util.py9
-rw-r--r--smmap/util.py33
8 files changed, 65 insertions, 53 deletions
diff --git a/doc/source/changes.rst b/doc/source/changes.rst
index ec42369..f99e85f 100644
--- a/doc/source/changes.rst
+++ b/doc/source/changes.rst
@@ -3,6 +3,12 @@ Changelog
#########
**********
+v0.9.0
+**********
+- Fixed issue with resources never being freed as mmaps were never closed.
+- Client counting is now done manually, instead of relying on pyton's reference count
+
+**********
v0.8.5
**********
- Fixed Python 3.0-3.3 regression, which also causes smmap to become about 3 times slower depending on the code path. It's related to this bug (http://bugs.python.org/issue15958), which was fixed in python 3.4
diff --git a/smmap/__init__.py b/smmap/__init__.py
index 46b0002..e711bbb 100644
--- a/smmap/__init__.py
+++ b/smmap/__init__.py
@@ -3,7 +3,7 @@
__author__ = "Sebastian Thiel"
__contact__ = "byronimo@gmail.com"
__homepage__ = "https://github.com/Byron/smmap"
-version_info = (0, 8, 5)
+version_info = (0, 9, 0)
__version__ = '.'.join(str(i) for i in version_info)
# make everything available in root package for convenience
diff --git a/smmap/buf.py b/smmap/buf.py
index b3b71c4..e6f2434 100644
--- a/smmap/buf.py
+++ b/smmap/buf.py
@@ -93,6 +93,7 @@ class SlidingWindowMapBuffer(object):
l -= len(d)
# This is slower than the join ... but what can we do ...
out += d
+ del(d)
# END while there are bytes to read
return out
else:
@@ -103,6 +104,10 @@ class SlidingWindowMapBuffer(object):
d = c.buffer()[:l]
ofs += len(d)
l -= len(d)
+ # Make sure we don't keep references, as c.use_region() might attempt to free resources, but
+ # can't unless we use pure bytes
+ if hasattr(d, 'tobytes'):
+ d = d.tobytes()
md.append(d)
# END while there are bytes to read
return bytes().join(md)
diff --git a/smmap/mman.py b/smmap/mman.py
index c7a4595..7180c75 100644
--- a/smmap/mman.py
+++ b/smmap/mman.py
@@ -8,7 +8,6 @@ from .util import (
buffer,
)
-from weakref import ref
import sys
from functools import reduce
@@ -55,12 +54,11 @@ class WindowCursor(object):
# Actual client count, which doesn't include the reference kept by the manager, nor ours
# as we are about to be deleted
try:
- num_clients = self._rlist.client_count() - 2
- if num_clients == 0 and len(self._rlist) == 0:
+ if len(self._rlist) == 0:
# Free all resources associated with the mapped file
self._manager._fdict.pop(self._rlist.path_or_fd())
# END remove regions list from manager
- except TypeError:
+ except (TypeError, KeyError):
# sometimes, during shutdown, getrefcount is None. Its possible
# to re-import it, however, its probably better to just ignore
# this python problem (for now).
@@ -72,13 +70,16 @@ class WindowCursor(object):
def _copy_from(self, rhs):
"""Copy all data from rhs into this instance, handles usage count"""
self._manager = rhs._manager
- self._rlist = rhs._rlist
+ self._rlist = type(rhs._rlist)(rhs._rlist)
self._region = rhs._region
self._ofs = rhs._ofs
self._size = rhs._size
+ for region in self._rlist:
+ region.increment_client_count()
+
if self._region is not None:
- self._region.increment_usage_count()
+ self._region.increment_client_count()
# END handle regions
def __copy__(self):
@@ -126,20 +127,22 @@ class WindowCursor(object):
if need_region:
self._region = man._obtain_region(self._rlist, offset, size, flags, False)
+ self._region.increment_client_count()
# END need region handling
- self._region.increment_usage_count()
self._ofs = offset - self._region._b
self._size = min(size, self._region.ofs_end() - offset)
return self
def unuse_region(self):
- """Unuse the ucrrent region. Does nothing if we have no current region
+ """Unuse the current region. Does nothing if we have no current region
**Note:** the cursor unuses the region automatically upon destruction. It is recommended
to un-use the region once you are done reading from it in persistent cursors as it
helps to free up resource more quickly"""
+ if self._region is not None:
+ self._region.increment_client_count(-1)
self._region = None
# note: should reset ofs and size, but we spare that for performance. Its not
# allowed to query information if we are not valid !
@@ -184,12 +187,10 @@ class WindowCursor(object):
""":return: amount of bytes we point to"""
return self._size
- def region_ref(self):
- """:return: weak ref to our mapped region.
+ def region(self):
+ """:return: our mapped region, or None if nothing is mapped yet
:raise AssertionError: if we have no current region. This is only useful for debugging"""
- if self._region is None:
- raise AssertionError("region not set")
- return ref(self._region)
+ return self._region
def includes_ofs(self, ofs):
""":return: True if the given absolute offset is contained in the cursors
@@ -311,8 +312,8 @@ class StaticWindowMapManager(object):
lru_list = None
for regions in self._fdict.values():
for region in regions:
- # check client count - consider that we keep one reference ourselves !
- if (region.client_count() - 2 == 0 and
+ # check client count - if it's 1, it's just us
+ if (region.client_count() == 1 and
(lru_region is None or region._uc < lru_region._uc)):
lru_region = region
lru_list = regions
@@ -326,6 +327,7 @@ class StaticWindowMapManager(object):
num_found += 1
del(lru_list[lru_list.index(lru_region)])
+ lru_region.increment_client_count(-1)
self._memory_size -= lru_region.size()
self._handle_count -= 1
# END while there is more memory to free
@@ -449,7 +451,7 @@ class StaticWindowMapManager(object):
for path, rlist in self._fdict.items():
if path.startswith(base_path):
for region in rlist:
- region._mf.close()
+ region.release()
num_closed += 1
# END path matches
# END for each path
diff --git a/smmap/test/test_buf.py b/smmap/test/test_buf.py
index 0337715..984b432 100644
--- a/smmap/test/test_buf.py
+++ b/smmap/test/test_buf.py
@@ -12,7 +12,6 @@ from random import randint
from time import time
import sys
import os
-import logging
man_optimal = SlidingWindowMapManager()
@@ -104,6 +103,7 @@ class TestBuf(TestBase):
assert len(d) == ofs_end - ofs_start
assert d == data[ofs_start:ofs_end]
num_bytes += len(d)
+ del d
else:
pos = randint(0, fsize)
assert buf[pos] == data[pos]
@@ -122,6 +122,7 @@ class TestBuf(TestBase):
% (man_id, max_num_accesses, mode_str, type(item), num_bytes / mb, elapsed, (num_bytes / mb) / elapsed),
file=sys.stderr)
# END handle access mode
+ del buf
# END for each manager
# END for each input
os.close(fd)
diff --git a/smmap/test/test_mman.py b/smmap/test/test_mman.py
index b718b06..c8b9c70 100644
--- a/smmap/test/test_mman.py
+++ b/smmap/test/test_mman.py
@@ -119,21 +119,21 @@ class TestMMan(TestBase):
# window size is 0 for static managers, hence size will be 0. We take that into consideration
size = man.window_size() // 2
assert c.use_region(base_offset, size).is_valid()
- rr = c.region_ref()
- assert rr().client_count() == 2 # the manager and the cursor and us
+ rr = c.region()
+ assert rr.client_count() == 2 # the manager and the cursor and us
assert man.num_open_files() == 1
assert man.num_file_handles() == 1
- assert man.mapped_memory_size() == rr().size()
+ assert man.mapped_memory_size() == rr.size()
# assert c.size() == size # the cursor may overallocate in its static version
assert c.ofs_begin() == base_offset
- assert rr().ofs_begin() == 0 # it was aligned and expanded
+ assert rr.ofs_begin() == 0 # it was aligned and expanded
if man.window_size():
# but isn't larger than the max window (aligned)
- assert rr().size() == align_to_mmap(man.window_size(), True)
+ assert rr.size() == align_to_mmap(man.window_size(), True)
else:
- assert rr().size() == fc.size
+ assert rr.size() == fc.size
# END ignore static managers which dont use windows and are aligned to file boundaries
assert c.buffer()[:] == data[base_offset:base_offset + (size or c.size())]
@@ -141,7 +141,7 @@ class TestMMan(TestBase):
# obtain second window, which spans the first part of the file - it is a still the same window
nsize = (size or fc.size) - 10
assert c.use_region(0, nsize).is_valid()
- assert c.region_ref()() == rr()
+ assert c.region() == rr
assert man.num_file_handles() == 1
assert c.size() == nsize
assert c.ofs_begin() == 0
@@ -154,15 +154,15 @@ class TestMMan(TestBase):
if man.window_size():
assert man.num_file_handles() == 2
assert c.size() < size
- assert c.region_ref()() is not rr() # old region is still available, but has not curser ref anymore
- assert rr().client_count() == 1 # only held by manager
+ assert c.region() is not rr # old region is still available, but has not curser ref anymore
+ assert rr.client_count() == 1 # only held by manager
else:
assert c.size() < fc.size
# END ignore static managers which only have one handle per file
- rr = c.region_ref()
- assert rr().client_count() == 2 # manager + cursor
- assert rr().ofs_begin() < c.ofs_begin() # it should have extended itself to the left
- assert rr().ofs_end() <= fc.size # it cannot be larger than the file
+ rr = c.region()
+ assert rr.client_count() == 2 # manager + cursor
+ assert rr.ofs_begin() < c.ofs_begin() # it should have extended itself to the left
+ assert rr.ofs_end() <= fc.size # it cannot be larger than the file
assert c.buffer()[:] == data[base_offset:base_offset + (size or c.size())]
# unising a region makes the cursor invalid
diff --git a/smmap/test/test_util.py b/smmap/test/test_util.py
index 0bbf91b..0a16260 100644
--- a/smmap/test/test_util.py
+++ b/smmap/test/test_util.py
@@ -88,12 +88,7 @@ class TestMMan(TestBase):
# auto-refcount
assert rfull.client_count() == 1
rfull2 = rfull
- assert rfull.client_count() == 2
-
- # usage
- assert rfull.usage_count() == 0
- rfull.increment_usage_count()
- assert rfull.usage_count() == 1
+ assert rfull.client_count() == 1, "no auto-counting"
# window constructor
w = MapWindow.from_region(rfull)
@@ -106,8 +101,6 @@ class TestMMan(TestBase):
for item in (fc.path, fd):
ml = MapRegionList(item)
- assert ml.client_count() == 1
-
assert len(ml) == 0
assert ml.path_or_fd() == item
assert ml.file_size() == fc.size
diff --git a/smmap/util.py b/smmap/util.py
index 6daa1fa..1d20e50 100644
--- a/smmap/util.py
+++ b/smmap/util.py
@@ -177,6 +177,8 @@ class MapRegion(object):
os.close(fd)
# END only close it if we opened it
# END close file handle
+ # We assume the first one to use us keeps us around
+ self.increment_client_count()
def _read_into_memory(self, fd, offset, size):
""":return: string data as read from the given file descriptor, offset and size """
@@ -222,17 +224,25 @@ class MapRegion(object):
def client_count(self):
""":return: number of clients currently using this region"""
- from sys import getrefcount
- # -1: self on stack, -1 self in this method, -1 self in getrefcount
- return getrefcount(self) - 3
-
- def usage_count(self):
- """:return: amount of usages so far"""
return self._uc
- def increment_usage_count(self):
- """Adjust the usage count by the given positive or negative offset"""
- self._uc += 1
+ def increment_client_count(self, ofs = 1):
+ """Adjust the usage count by the given positive or negative offset.
+ If usage count equals 0, we will auto-release our resources
+ :return: True if we released resources, False otherwise. In the latter case, we can still be used"""
+ self._uc += ofs
+ assert self._uc > -1, "Increments must match decrements, usage counter negative: %i" % self._uc
+
+ if self.client_count() == 0:
+ self.release()
+ return True
+ else:
+ return False
+ # end handle release
+
+ def release(self):
+ """Release all resources this instance might hold. Must only be called if there usage_count() is zero"""
+ self._mf.close()
# re-define all methods which need offset adjustments in compatibility mode
if _need_compat_layer:
@@ -268,11 +278,6 @@ class MapRegionList(list):
self._path_or_fd = path_or_fd
self._file_size = None
- def client_count(self):
- """:return: amount of clients which hold a reference to this instance"""
- from sys import getrefcount
- return getrefcount(self) - 3
-
def path_or_fd(self):
""":return: path or file descriptor we are attached to"""
return self._path_or_fd