summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWouter Bolsterlee <uws@xs4all.nl>2014-01-25 21:39:49 +0100
committerWouter Bolsterlee <uws@xs4all.nl>2014-01-25 21:39:49 +0100
commit8481d317804e060b12839b571ef22306074fba9c (patch)
tree8a2c356601626b3f729d9ec71d1f5edffa57916e
parent87275df09b240858bc5bb731c8c71ba7e935ca13 (diff)
downloadhappybase-8481d317804e060b12839b571ef22306074fba9c.tar.gz
Allow batch_size=None in Table.scan() to avoid filter incompatibilities
Allow None as a valid value for the batch_size argument to Table.scan(), since HBase does not support specifying a batch size when some scanner filters are used. Fixes issue #54.
-rw-r--r--NEWS.rst5
-rw-r--r--happybase/table.py31
-rw-r--r--tests/test_api.py9
3 files changed, 33 insertions, 12 deletions
diff --git a/NEWS.rst b/NEWS.rst
index caac0a3..6fd875c 100644
--- a/NEWS.rst
+++ b/NEWS.rst
@@ -14,6 +14,11 @@ Release date: *not yet released*
to :py:meth:`Table.scan` (`issue #39
<https://github.com/wbolster/happybase/issues/39>`_).
+* Allow `None` as a valid value for the `batch_size` argument to
+ :py:meth:`Table.scan`, since HBase does not support specifying a batch size
+ when some scanner filters are used. (`issue #54
+ <https://github.com/wbolster/happybase/issues/54>`_).
+
HappyBase 0.7
-------------
diff --git a/happybase/table.py b/happybase/table.py
index ec67092..c81f4ce 100644
--- a/happybase/table.py
+++ b/happybase/table.py
@@ -254,6 +254,15 @@ class Table(object):
this to a low value (or even 1) if your data is large, since a low
batch size results in added round-trips to the server.
+ .. warning::
+
+ Not all HBase filters can be used in combination with a batch
+ size. Explicitly specify `None` for the `batch_size` argument
+ in those cases to override the default value. Failure to do
+ so can result in hard to debug errors (not HappyBase's
+ fault), such as a non-responsive connection. The HBase logs
+ may contain more useful information in these situations.
+
**Compatibility notes:**
* The `filter` argument is only available when using HBase 0.92
@@ -280,11 +289,11 @@ class Table(object):
:return: generator yielding the rows matching the scan
:rtype: iterable of `(row_key, row_data)` tuples
"""
- if batch_size < 1:
- raise ValueError("'batch_size' must be >= 1")
+ if batch_size is not None and batch_size < 1:
+ raise ValueError("'batch_size' must be >= 1 (or None)")
if limit is not None and limit < 1:
- raise ValueError("'limit' must be >= 1")
+ raise ValueError("'limit' must be >= 1 (or None)")
if sorted_columns and self.connection.compat < '0.96':
raise NotImplementedError(
@@ -349,16 +358,16 @@ class Table(object):
n_returned = n_fetched = 0
try:
while True:
- if limit is None:
- how_many = batch_size
+ if batch_size is None:
+ how_many = 1
else:
- how_many = min(batch_size, limit - n_returned)
+ how_many = batch_size
- if how_many == 1:
- items = self.connection.client.scannerGet(scan_id)
- else:
- items = self.connection.client.scannerGetList(
- scan_id, how_many)
+ if limit is not None:
+ how_many = min(how_many, limit - n_returned)
+
+ items = self.connection.client.scannerGetList(
+ scan_id, how_many)
n_fetched += len(items)
diff --git a/tests/test_api.py b/tests/test_api.py
index 2d22717..ed78f81 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -357,7 +357,7 @@ def test_scan():
list(table.scan(row_prefix='foobar', row_start='xyz'))
with assert_raises(ValueError):
- list(table.scan(batch_size=None))
+ list(table.scan(batch_size=0))
if connection.compat == '0.90':
with assert_raises(NotImplementedError):
@@ -446,6 +446,13 @@ def test_scan_sorting():
row.items())
+def test_scan_filter_and_batch_size():
+ # See issue #54
+ filter = "SingleColumnValueFilter ('cf1', 'qual1', =, 'binary:val1')"
+ for k, v in table.scan(filter=filter, batch_size=None):
+ print v
+
+
def test_delete():
row_key = 'row-test-delete'
data = {'cf1:col1': 'v1',