diff options
author | Wouter Bolsterlee <uws@xs4all.nl> | 2014-01-25 21:39:49 +0100 |
---|---|---|
committer | Wouter Bolsterlee <uws@xs4all.nl> | 2014-01-25 21:39:49 +0100 |
commit | 8481d317804e060b12839b571ef22306074fba9c (patch) | |
tree | 8a2c356601626b3f729d9ec71d1f5edffa57916e | |
parent | 87275df09b240858bc5bb731c8c71ba7e935ca13 (diff) | |
download | happybase-8481d317804e060b12839b571ef22306074fba9c.tar.gz |
Allow batch_size=None in Table.scan() to avoid filter incompatibilities
Allow None as a valid value for the batch_size argument to Table.scan(),
since HBase does not support specifying a batch size when some scanner
filters are used.
Fixes issue #54.
-rw-r--r-- | NEWS.rst | 5 | ||||
-rw-r--r-- | happybase/table.py | 31 | ||||
-rw-r--r-- | tests/test_api.py | 9 |
3 files changed, 33 insertions, 12 deletions
@@ -14,6 +14,11 @@ Release date: *not yet released* to :py:meth:`Table.scan` (`issue #39 <https://github.com/wbolster/happybase/issues/39>`_). +* Allow `None` as a valid value for the `batch_size` argument to + :py:meth:`Table.scan`, since HBase does not support specifying a batch size + when some scanner filters are used. (`issue #54 + <https://github.com/wbolster/happybase/issues/54>`_). + HappyBase 0.7 ------------- diff --git a/happybase/table.py b/happybase/table.py index ec67092..c81f4ce 100644 --- a/happybase/table.py +++ b/happybase/table.py @@ -254,6 +254,15 @@ class Table(object): this to a low value (or even 1) if your data is large, since a low batch size results in added round-trips to the server. + .. warning:: + + Not all HBase filters can be used in combination with a batch + size. Explicitly specify `None` for the `batch_size` argument + in those cases to override the default value. Failure to do + so can result in hard to debug errors (not HappyBase's + fault), such as a non-responsive connection. The HBase logs + may contain more useful information in these situations. + **Compatibility notes:** * The `filter` argument is only available when using HBase 0.92 @@ -280,11 +289,11 @@ class Table(object): :return: generator yielding the rows matching the scan :rtype: iterable of `(row_key, row_data)` tuples """ - if batch_size < 1: - raise ValueError("'batch_size' must be >= 1") + if batch_size is not None and batch_size < 1: + raise ValueError("'batch_size' must be >= 1 (or None)") if limit is not None and limit < 1: - raise ValueError("'limit' must be >= 1") + raise ValueError("'limit' must be >= 1 (or None)") if sorted_columns and self.connection.compat < '0.96': raise NotImplementedError( @@ -349,16 +358,16 @@ class Table(object): n_returned = n_fetched = 0 try: while True: - if limit is None: - how_many = batch_size + if batch_size is None: + how_many = 1 else: - how_many = min(batch_size, limit - n_returned) + how_many = batch_size - if how_many == 1: - items = self.connection.client.scannerGet(scan_id) - else: - items = self.connection.client.scannerGetList( - scan_id, how_many) + if limit is not None: + how_many = min(how_many, limit - n_returned) + + items = self.connection.client.scannerGetList( + scan_id, how_many) n_fetched += len(items) diff --git a/tests/test_api.py b/tests/test_api.py index 2d22717..ed78f81 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -357,7 +357,7 @@ def test_scan(): list(table.scan(row_prefix='foobar', row_start='xyz')) with assert_raises(ValueError): - list(table.scan(batch_size=None)) + list(table.scan(batch_size=0)) if connection.compat == '0.90': with assert_raises(NotImplementedError): @@ -446,6 +446,13 @@ def test_scan_sorting(): row.items()) +def test_scan_filter_and_batch_size(): + # See issue #54 + filter = "SingleColumnValueFilter ('cf1', 'qual1', =, 'binary:val1')" + for k, v in table.scan(filter=filter, batch_size=None): + print v + + def test_delete(): row_key = 'row-test-delete' data = {'cf1:col1': 'v1', |