diff options
author | Wouter Bolsterlee <uws@xs4all.nl> | 2014-01-25 21:39:49 +0100 |
---|---|---|
committer | Wouter Bolsterlee <uws@xs4all.nl> | 2014-01-25 21:39:49 +0100 |
commit | 8481d317804e060b12839b571ef22306074fba9c (patch) | |
tree | 8a2c356601626b3f729d9ec71d1f5edffa57916e /happybase/table.py | |
parent | 87275df09b240858bc5bb731c8c71ba7e935ca13 (diff) | |
download | happybase-8481d317804e060b12839b571ef22306074fba9c.tar.gz |
Allow batch_size=None in Table.scan() to avoid filter incompatibilities
Allow None as a valid value for the batch_size argument to Table.scan(),
since HBase does not support specifying a batch size when some scanner
filters are used.
Fixes issue #54.
Diffstat (limited to 'happybase/table.py')
-rw-r--r-- | happybase/table.py | 31 |
1 files changed, 20 insertions, 11 deletions
diff --git a/happybase/table.py b/happybase/table.py index ec67092..c81f4ce 100644 --- a/happybase/table.py +++ b/happybase/table.py @@ -254,6 +254,15 @@ class Table(object): this to a low value (or even 1) if your data is large, since a low batch size results in added round-trips to the server. + .. warning:: + + Not all HBase filters can be used in combination with a batch + size. Explicitly specify `None` for the `batch_size` argument + in those cases to override the default value. Failure to do + so can result in hard to debug errors (not HappyBase's + fault), such as a non-responsive connection. The HBase logs + may contain more useful information in these situations. + **Compatibility notes:** * The `filter` argument is only available when using HBase 0.92 @@ -280,11 +289,11 @@ class Table(object): :return: generator yielding the rows matching the scan :rtype: iterable of `(row_key, row_data)` tuples """ - if batch_size < 1: - raise ValueError("'batch_size' must be >= 1") + if batch_size is not None and batch_size < 1: + raise ValueError("'batch_size' must be >= 1 (or None)") if limit is not None and limit < 1: - raise ValueError("'limit' must be >= 1") + raise ValueError("'limit' must be >= 1 (or None)") if sorted_columns and self.connection.compat < '0.96': raise NotImplementedError( @@ -349,16 +358,16 @@ class Table(object): n_returned = n_fetched = 0 try: while True: - if limit is None: - how_many = batch_size + if batch_size is None: + how_many = 1 else: - how_many = min(batch_size, limit - n_returned) + how_many = batch_size - if how_many == 1: - items = self.connection.client.scannerGet(scan_id) - else: - items = self.connection.client.scannerGetList( - scan_id, how_many) + if limit is not None: + how_many = min(how_many, limit - n_returned) + + items = self.connection.client.scannerGetList( + scan_id, how_many) n_fetched += len(items) |