diff options
| author | Antoine Musso <hashar@free.fr> | 2014-11-16 22:26:49 +0100 |
|---|---|---|
| committer | Antoine Musso <hashar@free.fr> | 2014-11-16 22:33:36 +0100 |
| commit | 01dac15d2d2ad916083559747c3df497921cdec9 (patch) | |
| tree | 67083145d9dfcd605ef88956a84c0f0a90341431 | |
| parent | f4b6b2508cf164be237b2fdeaca01be7153efe8c (diff) | |
| download | smmap-01dac15d2d2ad916083559747c3df497921cdec9.tar.gz | |
pep8 linting
E201 whitespace after '('
E203 whitespace before ','
E221 multiple spaces before operator
E225 missing whitespace around operator
E227 missing whitespace around bitwise or shift operator
E231 missing whitespace after ','
E251 unexpected spaces around keyword / parameter equals
W291 trailing whitespace
W293 blank line contains whitespace
E302 expected 2 blank lines, found 1
E303 too many blank lines (3)
W391 blank line at end of file
| -rw-r--r-- | smmap/buf.py | 47 | ||||
| -rw-r--r-- | smmap/exc.py | 6 | ||||
| -rw-r--r-- | smmap/mman.py | 249 | ||||
| -rw-r--r-- | smmap/test/lib.py | 22 | ||||
| -rw-r--r-- | smmap/test/test_buf.py | 7 | ||||
| -rw-r--r-- | smmap/test/test_mman.py | 3 | ||||
| -rw-r--r-- | smmap/test/test_tutorial.py | 40 | ||||
| -rw-r--r-- | smmap/test/test_util.py | 51 | ||||
| -rw-r--r-- | smmap/util.py | 93 |
9 files changed, 258 insertions, 260 deletions
diff --git a/smmap/buf.py b/smmap/buf.py index ef9d49e..66029cb 100644 --- a/smmap/buf.py +++ b/smmap/buf.py @@ -10,12 +10,12 @@ except NameError: class SlidingWindowMapBuffer(object): - """A buffer like object which allows direct byte-wise object and slicing into + """A buffer like object which allows direct byte-wise object and slicing into memory of a mapped file. The mapping is controlled by the provided cursor. - - The buffer is relative, that is if you map an offset, index 0 will map to the + + The buffer is relative, that is if you map an offset, index 0 will map to the first byte at the offset you used during initialization or begin_access - + **Note:** Although this type effectively hides the fact that there are mapped windows underneath, it can unfortunately not be used in any non-pure python method which needs a buffer or string""" @@ -23,12 +23,11 @@ class SlidingWindowMapBuffer(object): '_c', # our cursor '_size', # our supposed size ) - - - def __init__(self, cursor = None, offset = 0, size = sys.maxsize, flags = 0): + + def __init__(self, cursor=None, offset=0, size=sys.maxsize, flags=0): """Initalize the instance to operate on the given cursor. :param cursor: if not None, the associated cursor to the file you want to access - If None, you have call begin_access before using the buffer and provide a cursor + If None, you have call begin_access before using the buffer and provide a cursor :param offset: absolute offset in bytes :param size: the total size of the mapping. Defaults to the maximum possible size From that point on, the __len__ of the buffer will be the given size or the file size. @@ -44,10 +43,10 @@ class SlidingWindowMapBuffer(object): def __del__(self): self.end_access() - + def __len__(self): return self._size - + def __getitem__(self, i): if isinstance(i, slice): return self.__getslice__(i.start or 0, i.stop or self._size) @@ -59,10 +58,10 @@ class SlidingWindowMapBuffer(object): c.use_region(i, 1) # END handle region usage return c.buffer()[i-c.ofs_begin()] - + def __getslice__(self, i, j): c = self._c - # fast path, slice fully included - safes a concatenate operation and + # fast path, slice fully included - safes a concatenate operation and # should be the default assert c.is_valid() if i < 0: @@ -91,18 +90,18 @@ class SlidingWindowMapBuffer(object): return bytes().join(md) # END fast or slow path #{ Interface - - def begin_access(self, cursor = None, offset = 0, size = sys.maxsize, flags = 0): + + def begin_access(self, cursor=None, offset=0, size=sys.maxsize, flags=0): """Call this before the first use of this instance. The method was already called by the constructor in case sufficient information was provided. - + For more information no the parameters, see the __init__ method - :param path: if cursor is None the existing one will be used. + :param path: if cursor is None the existing one will be used. :return: True if the buffer can be used""" if cursor: self._c = cursor #END update our cursor - + # reuse existing cursors if possible if self._c is not None and self._c.is_associated(): res = self._c.use_region(offset, size, flags).is_valid() @@ -114,27 +113,25 @@ class SlidingWindowMapBuffer(object): if size > self._c.file_size(): size = self._c.file_size() - offset #END handle size - self._size = size + self._size = size #END set size return res # END use our cursor return False - + def end_access(self): - """Call this method once you are done using the instance. It is automatically + """Call this method once you are done using the instance. It is automatically called on destruction, and should be called just in time to allow system resources to be freed. - + Once you called end_access, you must call begin access before reusing this instance!""" self._size = 0 if self._c is not None: self._c.unuse_region() #END unuse region - + def cursor(self): """:return: the currently set cursor which provides access to the data""" return self._c - - #}END interface - + #}END interface diff --git a/smmap/exc.py b/smmap/exc.py index f0ed7dc..5e90cf7 100644 --- a/smmap/exc.py +++ b/smmap/exc.py @@ -1,7 +1,9 @@ """Module with system exceptions""" + class MemoryManagerError(Exception): """Base class for all exceptions thrown by the memory manager""" - + + class RegionCollectionError(MemoryManagerError): - """Thrown if a memory region could not be collected, or if no region for collection was found""" + """Thrown if a memory region could not be collected, or if no region for collection was found""" diff --git a/smmap/mman.py b/smmap/mman.py index da6fd81..6663687 100644 --- a/smmap/mman.py +++ b/smmap/mman.py @@ -20,36 +20,36 @@ __all__ = ["StaticWindowMapManager", "SlidingWindowMapManager", "WindowCursor"] class WindowCursor(object): """ - Pointer into the mapped region of the memory manager, keeping the map + Pointer into the mapped region of the memory manager, keeping the map alive until it is destroyed and no other client uses it. Cursors should not be created manually, but are instead returned by the SlidingWindowMapManager - - **Note:**: The current implementation is suited for static and sliding window managers, but it also means - that it must be suited for the somewhat quite different sliding manager. It could be improved, but + + **Note:**: The current implementation is suited for static and sliding window managers, but it also means + that it must be suited for the somewhat quite different sliding manager. It could be improved, but I see no real need to do so.""" - __slots__ = ( + __slots__ = ( '_manager', # the manger keeping all file regions '_rlist', # a regions list with regions for our file '_region', # our current region or None '_ofs', # relative offset from the actually mapped area to our start area '_size' # maximum size we should provide ) - - def __init__(self, manager = None, regions = None): + + def __init__(self, manager=None, regions=None): self._manager = manager self._rlist = regions self._region = None self._ofs = 0 self._size = 0 - + def __del__(self): self._destroy() - + def _destroy(self): """Destruction code to decrement counters""" self.unuse_region() - + if self._rlist is not None: # Actual client count, which doesn't include the reference kept by the manager, nor ours # as we are about to be deleted @@ -67,7 +67,7 @@ class WindowCursor(object): pass #END exception handling #END handle regions - + def _copy_from(self, rhs): """Copy all data from rhs into this instance, handles usage count""" self._manager = rhs._manager @@ -75,41 +75,41 @@ class WindowCursor(object): self._region = rhs._region self._ofs = rhs._ofs self._size = rhs._size - + if self._region is not None: self._region.increment_usage_count() # END handle regions - + def __copy__(self): """copy module interface""" cpy = type(self)() cpy._copy_from(self) return cpy - + #{ Interface def assign(self, rhs): """Assign rhs to this instance. This is required in order to get a real copy. Alternativly, you can copy an existing instance using the copy module""" self._destroy() self._copy_from(rhs) - - def use_region(self, offset = 0, size = 0, flags = 0): + + def use_region(self, offset=0, size=0, flags=0): """Assure we point to a window which allows access to the given offset into the file - + :param offset: absolute offset in bytes into the file :param size: amount of bytes to map. If 0, all available bytes will be mapped :param flags: additional flags to be given to os.open in case a file handle is initially opened for mapping. Has no effect if a region can actually be reused. :return: this instance - it should be queried for whether it points to a valid memory region. This is not the case if the mapping failed because we reached the end of the file - + **Note:**: The size actually mapped may be smaller than the given size. If that is the case, either the file has reached its end, or the map was created between two existing regions""" need_region = True man = self._manager fsize = self._rlist.file_size() size = min(size or fsize, man.window_size() or fsize) # clamp size to window size - + if self._region is not None: if self._region.includes_ofs(offset): need_region = False @@ -117,91 +117,91 @@ class WindowCursor(object): self.unuse_region() # END handle existing region # END check existing region - + # offset too large ? if offset >= fsize: return self #END handle offset - + if need_region: self._region = man._obtain_region(self._rlist, offset, size, flags, False) #END need region handling - + self._region.increment_usage_count() self._ofs = offset - self._region._b self._size = min(size, self._region.ofs_end() - offset) - + return self - + def unuse_region(self): """Unuse the ucrrent region. Does nothing if we have no current region - + **Note:** the cursor unuses the region automatically upon destruction. It is recommended - to un-use the region once you are done reading from it in persistent cursors as it + to un-use the region once you are done reading from it in persistent cursors as it helps to free up resource more quickly""" self._region = None - # note: should reset ofs and size, but we spare that for performance. Its not + # note: should reset ofs and size, but we spare that for performance. Its not # allowed to query information if we are not valid ! def buffer(self): """Return a buffer object which allows access to our memory region from our offset to the window size. Please note that it might be smaller than you requested when calling use_region() - + **Note:** You can only obtain a buffer if this instance is_valid() ! - - **Note:** buffers should not be cached passed the duration of your access as it will + + **Note:** buffers should not be cached passed the duration of your access as it will prevent resources from being freed even though they might not be accounted for anymore !""" return buffer(self._region.buffer(), self._ofs, self._size) - + def map(self): """ :return: the underlying raw memory map. Please not that the offset and size is likely to be different to what you set as offset and size. Use it only if you are sure about the region it maps, which is the whole file in case of StaticWindowMapManager""" return self._region.map() - + def is_valid(self): """:return: True if we have a valid and usable region""" return self._region is not None - + def is_associated(self): """:return: True if we are associated with a specific file already""" return self._rlist is not None - + def ofs_begin(self): """:return: offset to the first byte pointed to by our cursor - + **Note:** only if is_valid() is True""" return self._region._b + self._ofs - + def ofs_end(self): """:return: offset to one past the last available byte""" # unroll method calls for performance ! return self._region._b + self._ofs + self._size - + def size(self): """:return: amount of bytes we point to""" return self._size - + def region_ref(self): """:return: weak ref to our mapped region. :raise AssertionError: if we have no current region. This is only useful for debugging""" if self._region is None: raise AssertionError("region not set") return ref(self._region) - + def includes_ofs(self, ofs): - """:return: True if the given absolute offset is contained in the cursors + """:return: True if the given absolute offset is contained in the cursors current region - + **Note:** cursor must be valid for this to work""" # unroll methods return (self._region._b + self._ofs) <= ofs < (self._region._b + self._ofs + self._size) - + def file_size(self): """:return: size of the underlying file""" return self._rlist.file_size() - + def path_or_fd(self): """:return: path or file descriptor of the underlying mapped file""" return self._rlist.path_or_fd() @@ -213,32 +213,32 @@ class WindowCursor(object): raise ValueError("Path queried although mapping was applied to a file descriptor") # END handle type return self._rlist.path_or_fd() - + def fd(self): """:return: file descriptor used to create the underlying mapping. - + **Note:** it is not required to be valid anymore :raise ValueError: if the mapping was not created by a file descriptor""" if isinstance(self._rlist.path_or_fd(), string_types()): raise ValueError("File descriptor queried although mapping was generated from path") #END handle type return self._rlist.path_or_fd() - + #} END interface - - + + class StaticWindowMapManager(object): """Provides a manager which will produce single size cursors that are allowed to always map the whole file. - + Clients must be written to specifically know that they are accessing their data through a StaticWindowMapManager, as they otherwise have to deal with their window size. - + These clients would have to use a SlidingWindowMapBuffer to hide this fact. - - This type will always use a maximum window size, and optimize certain methods to + + This type will always use a maximum window size, and optimize certain methods to accommodate this fact""" - + __slots__ = [ '_fdict', # mapping of path -> StorageHelper (of some kind '_window_size', # maximum size of a window @@ -247,26 +247,26 @@ class StaticWindowMapManager(object): '_memory_size', # currently allocated memory size '_handle_count', # amount of currently allocated file handles ] - + #{ Configuration MapRegionListCls = MapRegionList MapWindowCls = MapWindow MapRegionCls = MapRegion WindowCursorCls = WindowCursor #} END configuration - + _MB_in_bytes = 1024 * 1024 - - def __init__(self, window_size = 0, max_memory_size = 0, max_open_handles = sys.maxsize): + + def __init__(self, window_size=0, max_memory_size=0, max_open_handles=sys.maxsize): """initialize the manager with the given parameters. - :param window_size: if -1, a default window size will be chosen depending on + :param window_size: if -1, a default window size will be chosen depending on the operating system's architecture. It will internally be quantified to a multiple of the page size If 0, the window may have any size, which basically results in mapping the whole file at one :param max_memory_size: maximum amount of memory we may map at once before releasing mapped regions. If 0, a viable default will be set depending on the system's architecture. It is a soft limit that is tried to be kept, but nothing bad happens if we have to over-allocate :param max_open_handles: if not maxint, limit the amount of open file handles to the given number. - Otherwise the amount is only limited by the system itself. If a system or soft limit is hit, + Otherwise the amount is only limited by the system itself. If a system or soft limit is hit, the manager will free as many handles as possible""" self._fdict = dict() self._window_size = window_size @@ -274,7 +274,7 @@ class StaticWindowMapManager(object): self._max_handle_count = max_open_handles self._memory_size = 0 self._handle_count = 0 - + if window_size < 0: coeff = 64 if is_64_bit(): @@ -282,7 +282,7 @@ class StaticWindowMapManager(object): #END handle arch self._window_size = coeff * self._MB_in_bytes # END handle max window size - + if max_memory_size == 0: coeff = 1024 if is_64_bit(): @@ -290,18 +290,18 @@ class StaticWindowMapManager(object): #END handle arch self._max_memory_size = coeff * self._MB_in_bytes #END handle max memory size - + #{ Internal Methods - + def _collect_lru_region(self, size): """Unmap the region which was least-recently used and has no client :param size: size of the region we want to map next (assuming its not already mapped partially or full if 0, we try to free any available region :return: Amount of freed regions - + **Note:** We don't raise exceptions anymore, in order to keep the system working, allowing temporary overallocation. If the system runs out of memory, it will tell. - + **todo:** implement a case where all unusued regions are discarded efficiently. Currently its only brute force""" num_found = 0 while (size == 0) or (self._memory_size + size > self._max_memory_size): @@ -310,34 +310,34 @@ class StaticWindowMapManager(object): for regions in self._fdict.values(): for region in regions: # check client count - consider that we keep one reference ourselves ! - if (region.client_count()-2 == 0 and + if (region.client_count()-2 == 0 and (lru_region is None or region._uc < lru_region._uc)): lru_region = region lru_list = regions # END update lru_region #END for each region #END for each regions list - + if lru_region is None: break #END handle region not found - + num_found += 1 del(lru_list[lru_list.index(lru_region)]) self._memory_size -= lru_region.size() self._handle_count -= 1 #END while there is more memory to free return num_found - + def _obtain_region(self, a, offset, size, flags, is_recursive): - """Utilty to create a new region - for more information on the parameters, + """Utilty to create a new region - for more information on the parameters, see MapCursor.use_region. :param a: A regions (a)rray :return: The newly created region""" if self._memory_size + size > self._max_memory_size: self._collect_lru_region(size) #END handle collection - + r = None if a: assert len(a) == 1 @@ -351,40 +351,40 @@ class StaticWindowMapManager(object): # like reading a file from disk, etc) we free up as much as possible # As this invalidates our insert position, we have to recurse here if is_recursive: - # we already tried this, and still have no success in obtaining + # we already tried this, and still have no success in obtaining # a mapping. This is an exception, so we propagate it raise #END handle existing recursion self._collect_lru_region(0) - return self._obtain_region(a, offset, size, flags, True) + return self._obtain_region(a, offset, size, flags, True) #END handle exceptions - + self._handle_count += 1 self._memory_size += r.size() a.append(r) # END handle array - + assert r.includes_ofs(offset) return r #}END internal methods - - #{ Interface + + #{ Interface def make_cursor(self, path_or_fd): """ - :return: a cursor pointing to the given path or file descriptor. + :return: a cursor pointing to the given path or file descriptor. It can be used to map new regions of the file into memory - + **Note:** if a file descriptor is given, it is assumed to be open and valid, but may be closed afterwards. To refer to the same file, you may reuse your existing file descriptor, but keep in mind that new windows can only be mapped as long as it stays valid. This is why the using actual file paths are preferred unless you plan to keep the file descriptor open. - - **Note:** file descriptors are problematic as they are not necessarily unique, as two + + **Note:** file descriptors are problematic as they are not necessarily unique, as two different files opened and closed in succession might have the same file descriptor id. - - **Note:** Using file descriptors directly is faster once new windows are mapped as it + + **Note:** Using file descriptors directly is faster once new windows are mapped as it prevents the file to be opened again just for the purpose of mapping it.""" regions = self._fdict.get(path_or_fd) if regions is None: @@ -392,92 +392,91 @@ class StaticWindowMapManager(object): self._fdict[path_or_fd] = regions # END obtain region for path return self.WindowCursorCls(self, regions) - + def collect(self): """Collect all available free-to-collect mapped regions :return: Amount of freed handles""" return self._collect_lru_region(0) - + def num_file_handles(self): """:return: amount of file handles in use. Each mapped region uses one file handle""" return self._handle_count - + def num_open_files(self): """Amount of opened files in the system""" - return reduce(lambda x,y: x+y, (1 for rlist in self._fdict.values() if len(rlist) > 0), 0) - + return reduce(lambda x, y: x+y, (1 for rlist in self._fdict.values() if len(rlist) > 0), 0) + def window_size(self): """:return: size of each window when allocating new regions""" return self._window_size - + def mapped_memory_size(self): """:return: amount of bytes currently mapped in total""" return self._memory_size - + def max_file_handles(self): """:return: maximium amount of handles we may have opened""" return self._max_handle_count - + def max_mapped_memory_size(self): """:return: maximum amount of memory we may allocate""" return self._max_memory_size - + #} END interface - + #{ Special Purpose Interface - + def force_map_handle_removal_win(self, base_path): """ONLY AVAILABLE ON WINDOWS On windows removing files is not allowed if anybody still has it opened. If this process is ourselves, and if the whole process uses this memory manager (as far as the parent framework is concerned) we can enforce - closing all memory maps whose path matches the given base path to + closing all memory maps whose path matches the given base path to allow the respective operation after all. The respective system must NOT access the closed memory regions anymore ! - This really may only be used if you know that the items which keep + This really may only be used if you know that the items which keep the cursors alive will not be using it anymore. They need to be recreated ! :return: Amount of closed handles - + **Note:** does nothing on non-windows platforms""" if sys.platform != 'win32': return #END early bailout - + num_closed = 0 for path, rlist in self._fdict.items(): if path.startswith(base_path): for region in rlist: region._mf.close() num_closed += 1 - #END path matches + #END path matches #END for each path return num_closed #} END special purpose interface - - - + + class SlidingWindowMapManager(StaticWindowMapManager): - """Maintains a list of ranges of mapped memory regions in one or more files and allows to easily + """Maintains a list of ranges of mapped memory regions in one or more files and allows to easily obtain additional regions assuring there is no overlap. - Once a certain memory limit is reached globally, or if there cannot be more open file handles + Once a certain memory limit is reached globally, or if there cannot be more open file handles which result from each mmap call, the least recently used, and currently unused mapped regions are unloaded automatically. - + **Note:** currently not thread-safe ! - + **Note:** in the current implementation, we will automatically unload windows if we either cannot - create more memory maps (as the open file handles limit is hit) or if we have allocated more than + create more memory maps (as the open file handles limit is hit) or if we have allocated more than a safe amount of memory already, which would possibly cause memory allocations to fail as our address space is full.""" - + __slots__ = tuple() - - def __init__(self, window_size = -1, max_memory_size = 0, max_open_handles = sys.maxsize): + + def __init__(self, window_size=-1, max_memory_size=0, max_open_handles=sys.maxsize): """Adjusts the default window size to -1""" super(SlidingWindowMapManager, self).__init__(window_size, max_memory_size, max_open_handles) - + def _obtain_region(self, a, offset, size, flags, is_recursive): - # bisect to find an existing region. The c++ implementation cannot + # bisect to find an existing region. The c++ implementation cannot # do that as it uses a linked list for regions. r = None lo = 0 @@ -495,20 +494,20 @@ class SlidingWindowMapManager(StaticWindowMapManager): hi = mid #END handle position #END while bisecting - + if r is None: window_size = self._window_size left = self.MapWindowCls(0, 0) mid = self.MapWindowCls(offset, size) right = self.MapWindowCls(a.file_size(), 0) - + # we want to honor the max memory size, and assure we have anough # memory available # Save calls ! if self._memory_size + window_size > self._max_memory_size: self._collect_lru_region(window_size) #END handle collection - + # we assume the list remains sorted by offset insert_pos = 0 len_regions = len(a) @@ -526,29 +525,29 @@ class SlidingWindowMapManager(StaticWindowMapManager): #END if insert position is correct #END for each region # END obtain insert pos - - # adjust the actual offset and size values to create the largest + + # adjust the actual offset and size values to create the largest # possible mapping if insert_pos == 0: if len_regions: right = self.MapWindowCls.from_region(a[insert_pos]) - #END adjust right side + #END adjust right side else: if insert_pos != len_regions: right = self.MapWindowCls.from_region(a[insert_pos]) # END adjust right window left = self.MapWindowCls.from_region(a[insert_pos - 1]) #END adjust surrounding windows - + mid.extend_left_to(left, window_size) mid.extend_right_to(right, window_size) mid.align() - + # it can happen that we align beyond the end of the file if mid.ofs_end() > right.ofs: mid.size = right.ofs - mid.ofs #END readjust size - + # insert new region at the right offset to keep the order try: if self._handle_count >= self._max_handle_count: @@ -561,18 +560,16 @@ class SlidingWindowMapManager(StaticWindowMapManager): # like reading a file from disk, etc) we free up as much as possible # As this invalidates our insert position, we have to recurse here if is_recursive: - # we already tried this, and still have no success in obtaining + # we already tried this, and still have no success in obtaining # a mapping. This is an exception, so we propagate it raise #END handle existing recursion self._collect_lru_region(0) - return self._obtain_region(a, offset, size, flags, True) + return self._obtain_region(a, offset, size, flags, True) #END handle exceptions - + self._handle_count += 1 self._memory_size += r.size() a.insert(insert_pos, r) # END create new region return r - - diff --git a/smmap/test/lib.py b/smmap/test/lib.py index 01f6cc9..67aec63 100644 --- a/smmap/test/lib.py +++ b/smmap/test/lib.py @@ -13,18 +13,18 @@ class FileCreator(object): and provides this info to the user. Once it gets deleted, it will remove the temporary file as well.""" __slots__ = ("_size", "_path") - + def __init__(self, size, prefix=''): assert size, "Require size to be larger 0" - + self._path = tempfile.mktemp(prefix=prefix) self._size = size - + fp = open(self._path, "wb") fp.seek(size-1) fp.write(b'1') fp.close() - + assert os.path.getsize(self.path) == size def __del__(self): @@ -33,33 +33,33 @@ class FileCreator(object): except OSError: pass #END exception handling - @property def path(self): return self._path - + @property def size(self): return self._size #} END utilities + class TestBase(TestCase): """Foundation used by all tests""" - + #{ Configuration k_window_test_size = 1000 * 1000 * 8 + 5195 #} END configuration - + #{ Overrides @classmethod def setUpAll(cls): # nothing for now pass - + #END overrides - + #{ Interface - + #} END interface diff --git a/smmap/test/test_buf.py b/smmap/test/test_buf.py index d3e51e2..d07b7f4 100644 --- a/smmap/test/test_buf.py +++ b/smmap/test/test_buf.py @@ -3,7 +3,7 @@ from __future__ import print_function from .lib import TestBase, FileCreator from smmap.mman import ( - SlidingWindowMapManager, + SlidingWindowMapManager, StaticWindowMapManager ) from smmap.buf import SlidingWindowMapBuffer @@ -22,6 +22,7 @@ man_worst_case = SlidingWindowMapManager( max_open_handles=15) static_man = StaticWindowMapManager() + class TestBuf(TestBase): def test_basics(self): @@ -82,7 +83,7 @@ class TestBuf(TestBase): max_num_accesses = 100 fd = os.open(fc.path, os.O_RDONLY) for item in (fc.path, fd): - for manager, man_id in ( (man_optimal, 'optimal'), + for manager, man_id in ((man_optimal, 'optimal'), (man_worst_case, 'worst case'), (static_man, 'static optimal')): buf = SlidingWindowMapBuffer(manager.make_cursor(item)) @@ -114,7 +115,7 @@ class TestBuf(TestBase): assert manager.num_file_handles() assert manager.collect() assert manager.num_file_handles() == 0 - elapsed = max(time() - st, 0.001) # prevent zero division errors on windows + elapsed = max(time() - st, 0.001) # prevent zero division errors on windows mb = float(1000*1000) mode_str = (access_mode and "slice") or "single byte" print("%s: Made %i random %s accesses to buffer created from %s reading a total of %f mb in %f s (%f mb/s)" diff --git a/smmap/test/test_mman.py b/smmap/test/test_mman.py index cc5d914..d903af6 100644 --- a/smmap/test/test_mman.py +++ b/smmap/test/test_mman.py @@ -15,6 +15,7 @@ import os import sys from copy import copy + class TestMMan(TestBase): def test_cursor(self): @@ -101,7 +102,7 @@ class TestMMan(TestBase): fd = os.open(fc.path, os.O_RDONLY) max_num_handles = 15 #small_size = - for mtype, args in ( (StaticWindowMapManager, (0, fc.size // 3, max_num_handles)), + for mtype, args in ((StaticWindowMapManager, (0, fc.size // 3, max_num_handles)), (SlidingWindowMapManager, (fc.size // 100, fc.size // 3, max_num_handles)),): for item in (fc.path, fd): assert len(data) == fc.size diff --git a/smmap/test/test_tutorial.py b/smmap/test/test_tutorial.py index ccc113b..5c931de 100644 --- a/smmap/test/test_tutorial.py +++ b/smmap/test/test_tutorial.py @@ -1,7 +1,8 @@ from .lib import TestBase + class TestTutorial(TestBase): - + def test_example(self): # Memory Managers ################## @@ -9,76 +10,75 @@ class TestTutorial(TestBase): # This instance should be globally available in your application # It is configured to be well suitable for 32-bit or 64 bit applications. mman = smmap.SlidingWindowMapManager() - + # the manager provides much useful information about its current state # like the amount of open file handles or the amount of mapped memory assert mman.num_file_handles() == 0 assert mman.mapped_memory_size() == 0 # and many more ... - + # Cursors ########## import smmap.test.lib fc = smmap.test.lib.FileCreator(1024*1024*8, "test_file") - + # obtain a cursor to access some file. c = mman.make_cursor(fc.path) - + # the cursor is now associated with the file, but not yet usable assert c.is_associated() assert not c.is_valid() - - # before you can use the cursor, you have to specify a window you want to + + # before you can use the cursor, you have to specify a window you want to # access. The following just says you want as much data as possible starting # from offset 0. # To be sure your region could be mapped, query for validity assert c.use_region().is_valid() # use_region returns self - + # once a region was mapped, you must query its dimension regularly # to assure you don't try to access its buffer out of its bounds assert c.size() c.buffer()[0] # first byte c.buffer()[1:10] # first 9 bytes c.buffer()[c.size()-1] # last byte - + # its recommended not to create big slices when feeding the buffer - # into consumers (e.g. struct or zlib). + # into consumers (e.g. struct or zlib). # Instead, either give the buffer directly, or use pythons buffer command. from smmap.util import buffer buffer(c.buffer(), 1, 9) # first 9 bytes without copying them - + # you can query absolute offsets, and check whether an offset is included # in the cursor's data. assert c.ofs_begin() < c.ofs_end() assert c.includes_ofs(100) - - # If you are over out of bounds with one of your region requests, the + + # If you are over out of bounds with one of your region requests, the # cursor will be come invalid. It cannot be used in that state assert not c.use_region(fc.size, 100).is_valid() # map as much as possible after skipping the first 100 bytes assert c.use_region(100).is_valid() - + # You can explicitly free cursor resources by unusing the cursor's region c.unuse_region() assert not c.is_valid() - + # Buffers ######### # Create a default buffer which can operate on the whole file buf = smmap.SlidingWindowMapBuffer(mman.make_cursor(fc.path)) - + # you can use it right away assert buf.cursor().is_valid() - + buf[0] # access the first byte buf[-1] # access the last ten bytes on the file buf[-10:]# access the last ten bytes - + # If you want to keep the instance between different accesses, use the # dedicated methods buf.end_access() assert not buf.cursor().is_valid() # you cannot use the buffer anymore assert buf.begin_access(offset=10) # start using the buffer at an offset - + # it will stop using resources automatically once it goes out of scope - diff --git a/smmap/test/test_util.py b/smmap/test/test_util.py index 745da83..745fedf 100644 --- a/smmap/test/test_util.py +++ b/smmap/test/test_util.py @@ -12,18 +12,19 @@ from smmap.util import ( import os import sys + class TestMMan(TestBase): - + def test_window(self): wl = MapWindow(0, 1) # left wc = MapWindow(1, 1) # center wc2 = MapWindow(10, 5) # another center wr = MapWindow(8000, 50) # right - + assert wl.ofs_end() == 1 assert wc.ofs_end() == 2 assert wr.ofs_end() == 8050 - + # extension does nothing if already in place maxsize = 100 wc.extend_left_to(wl, maxsize) @@ -31,34 +32,33 @@ class TestMMan(TestBase): wl.extend_right_to(wc, maxsize) wl.extend_right_to(wc, maxsize) assert wl.ofs == 0 and wl.size == 1 - + # an actual left extension pofs_end = wc2.ofs_end() wc2.extend_left_to(wc, maxsize) - assert wc2.ofs == wc.ofs_end() and pofs_end == wc2.ofs_end() - - + assert wc2.ofs == wc.ofs_end() and pofs_end == wc2.ofs_end() + # respects maxsize wc.extend_right_to(wr, maxsize) assert wc.ofs == 1 and wc.size == maxsize wc.extend_right_to(wr, maxsize) assert wc.ofs == 1 and wc.size == maxsize - + # without maxsize wc.extend_right_to(wr, sys.maxsize) assert wc.ofs_end() == wr.ofs and wc.ofs == 1 - + # extend left wr.extend_left_to(wc2, maxsize) wr.extend_left_to(wc2, maxsize) assert wr.size == maxsize - + wr.extend_left_to(wc2, sys.maxsize) assert wr.ofs == wc2.ofs_end() - + wc.align() assert wc.ofs == 0 and wc.size == align_to_mmap(wc.size, True) - + def test_region(self): fc = FileCreator(self.k_window_test_size, "window_test") half_size = fc.size // 2 @@ -66,56 +66,55 @@ class TestMMan(TestBase): rfull = MapRegion(fc.path, 0, fc.size) rhalfofs = MapRegion(fc.path, rofs, fc.size) rhalfsize = MapRegion(fc.path, 0, half_size) - + # offsets assert rfull.ofs_begin() == 0 and rfull.size() == fc.size assert rfull.ofs_end() == fc.size # if this method works, it works always - + assert rhalfofs.ofs_begin() == rofs and rhalfofs.size() == fc.size - rofs assert rhalfsize.ofs_begin() == 0 and rhalfsize.size() == half_size - + assert rfull.includes_ofs(0) and rfull.includes_ofs(fc.size-1) and rfull.includes_ofs(half_size) assert not rfull.includes_ofs(-1) and not rfull.includes_ofs(sys.maxsize) - # with the values we have, this test only works on windows where an alignment + # with the values we have, this test only works on windows where an alignment # size of 4096 is assumed. - # We only test on linux as it is inconsitent between the python versions + # We only test on linux as it is inconsitent between the python versions # as they use different mapping techniques to circumvent the missing offset # argument of mmap. if sys.platform != 'win32': assert rhalfofs.includes_ofs(rofs) and not rhalfofs.includes_ofs(0) #END handle platforms - + # auto-refcount assert rfull.client_count() == 1 rfull2 = rfull assert rfull.client_count() == 2 - + # usage assert rfull.usage_count() == 0 rfull.increment_usage_count() assert rfull.usage_count() == 1 - + # window constructor w = MapWindow.from_region(rfull) assert w.ofs == rfull.ofs_begin() and w.ofs_end() == rfull.ofs_end() - + def test_region_list(self): fc = FileCreator(100, "sample_file") - + fd = os.open(fc.path, os.O_RDONLY) for item in (fc.path, fd): ml = MapRegionList(item) - + assert ml.client_count() == 1 - + assert len(ml) == 0 assert ml.path_or_fd() == item assert ml.file_size() == fc.size #END handle input os.close(fd) - + def test_util(self): assert isinstance(is_64_bit(), bool) # just call it assert align_to_mmap(1, False) == 0 assert align_to_mmap(1, True) == ALLOCATIONGRANULARITY - diff --git a/smmap/util.py b/smmap/util.py index 44e9412..137c19d 100644 --- a/smmap/util.py +++ b/smmap/util.py @@ -12,7 +12,7 @@ except ImportError: from mmap import PAGESIZE as ALLOCATIONGRANULARITY #END handle pythons missing quality assurance -__all__ = [ "align_to_mmap", "is_64_bit", "buffer", +__all__ = ["align_to_mmap", "is_64_bit", "buffer", "MapWindow", "MapRegion", "MapRegionList", "ALLOCATIONGRANULARITY"] #{ Utilities @@ -27,6 +27,7 @@ except NameError: # doing it directly is much faster ! return obj[offset:offset+size] + def string_types(): if sys.version_info[0] >= 3: return str @@ -37,7 +38,7 @@ def string_types(): def align_to_mmap(num, round_up): """ Align the given integer number to the closest page offset, which usually is 4096 bytes. - + :param round_up: if True, the next higher multiple of page size is used, otherwise the lower page_size will be used (i.e. if True, 1 becomes 4096, otherwise it becomes 0) :return: num rounded to closest page""" @@ -46,15 +47,16 @@ def align_to_mmap(num, round_up): res += ALLOCATIONGRANULARITY #END handle size return res; - + + def is_64_bit(): """:return: True if the system is 64 bit. Otherwise it can be assumed to be 32 bit""" - return sys.maxsize > (1<<32) - 1 + return sys.maxsize > (1 << 32) - 1 #}END utilities -#{ Utility Classes +#{ Utility Classes class MapWindow(object): """Utility type which is used to snap windows towards each other, and to adjust their size""" @@ -68,7 +70,7 @@ class MapWindow(object): self.size = size def __repr__(self): - return "MapWindow(%i, %i)" % (self.ofs, self.size) + return "MapWindow(%i, %i)" % (self.ofs, self.size) @classmethod def from_region(cls, region): @@ -86,7 +88,7 @@ class MapWindow(object): self.size = align_to_mmap(self.size, 1) def extend_left_to(self, window, max_size): - """Adjust the offset to start where the given window on our left ends if possible, + """Adjust the offset to start where the given window on our left ends if possible, but don't make yourself larger than max_size. The resize will assure that the new window still contains the old window area""" rofs = self.ofs - window.ofs_end() @@ -103,10 +105,10 @@ class MapWindow(object): class MapRegion(object): """Defines a mapped region of memory, aligned to pagesizes - + **Note:** deallocates used region automatically on destruction""" __slots__ = [ - '_b' , # beginning of mapping + '_b', # beginning of mapping '_mf', # mapped memory chunk (as returned by mmap) '_uc', # total amount of usages '_size', # cached size of our memory map @@ -117,32 +119,31 @@ class MapRegion(object): if _need_compat_layer: __slots__.append('_mfb') # mapped memory buffer to provide offset #END handle additional slot - + #{ Configuration # Used for testing only. If True, all data will be loaded into memory at once. # This makes sure no file handles will remain open. _test_read_into_memory = False #} END configuration - - - def __init__(self, path_or_fd, ofs, size, flags = 0): + + def __init__(self, path_or_fd, ofs, size, flags=0): """Initialize a region, allocate the memory map :param path_or_fd: path to the file to map, or the opened file descriptor - :param ofs: **aligned** offset into the file to be mapped + :param ofs: **aligned** offset into the file to be mapped :param size: if size is larger then the file on disk, the whole file will be allocated the the size automatically adjusted - :param flags: additional flags to be given when opening the file. + :param flags: additional flags to be given when opening the file. :raise Exception: if no memory can be allocated""" self._b = ofs self._size = 0 self._uc = 0 - + if isinstance(path_or_fd, int): fd = path_or_fd else: - fd = os.open(path_or_fd, os.O_RDONLY|getattr(os, 'O_BINARY', 0)|flags) + fd = os.open(path_or_fd, os.O_RDONLY | getattr(os, 'O_BINARY', 0) | flags) #END handle fd - + try: kwargs = dict(access=ACCESS_READ, offset=ofs) corrected_size = size @@ -152,8 +153,8 @@ class MapRegion(object): corrected_size += ofs sizeofs = 0 # END handle python not supporting offset ! Arg - - # have to correct size, otherwise (instead of the c version) it will + + # have to correct size, otherwise (instead of the c version) it will # bark that the size is too large ... many extra file accesses because # if this ... argh ! actual_size = min(os.fstat(fd).st_size - sizeofs, corrected_size) @@ -162,9 +163,9 @@ class MapRegion(object): else: self._mf = mmap(fd, actual_size, **kwargs) #END handle memory mode - + self._size = len(self._mf) - + if self._need_compat_layer: self._mfb = buffer(self._mf, ofs, self._size) #END handle buffer wrapping @@ -173,7 +174,7 @@ class MapRegion(object): os.close(fd) #END only close it if we opened it #END close file handle - + def _read_into_memory(self, fd, offset, size): """:return: string data as read from the given file descriptor, offset and size """ os.lseek(fd, offset, os.SEEK_SET) @@ -186,92 +187,92 @@ class MapRegion(object): mf += d #END loop copy items return mf - + def __repr__(self): return "MapRegion<%i, %i>" % (self._b, self.size()) - + #{ Interface def buffer(self): """:return: a buffer containing the memory""" return self._mf - + def map(self): """:return: a memory map containing the memory""" return self._mf - + def ofs_begin(self): """:return: absolute byte offset to the first byte of the mapping""" return self._b - + def size(self): """:return: total size of the mapped region in bytes""" return self._size - + def ofs_end(self): """:return: Absolute offset to one byte beyond the mapping into the file""" return self._b + self._size - + def includes_ofs(self, ofs): """:return: True if the given offset can be read in our mapped region""" return self._b <= ofs < self._b + self._size - + def client_count(self): """:return: number of clients currently using this region""" from sys import getrefcount # -1: self on stack, -1 self in this method, -1 self in getrefcount return getrefcount(self)-3 - + def usage_count(self): """:return: amount of usages so far""" return self._uc - + def increment_usage_count(self): """Adjust the usage count by the given positive or negative offset""" self._uc += 1 - + # re-define all methods which need offset adjustments in compatibility mode if _need_compat_layer: def size(self): return self._size - self._b - + def ofs_end(self): # always the size - we are as large as it gets return self._size - + def buffer(self): return self._mfb - + def includes_ofs(self, ofs): return self._b <= ofs < self._size #END handle compat layer - + #} END interface - - + + class MapRegionList(list): """List of MapRegion instances associating a path with a list of regions.""" __slots__ = ( '_path_or_fd', # path or file descriptor which is mapped by all our regions '_file_size' # total size of the file we map ) - + def __new__(cls, path): return super(MapRegionList, cls).__new__(cls) - + def __init__(self, path_or_fd): self._path_or_fd = path_or_fd self._file_size = None - + def client_count(self): """:return: amount of clients which hold a reference to this instance""" from sys import getrefcount return getrefcount(self)-3 - + def path_or_fd(self): """:return: path or file descriptor we are attached to""" return self._path_or_fd - + def file_size(self): """:return: size of file we manager""" if self._file_size is None: @@ -282,5 +283,5 @@ class MapRegionList(list): #END handle path type #END update file size return self._file_size - + #} END utility classes |
