summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordmosberger <davidm@egauge.net>2020-12-04 01:10:02 -0700
committerGitHub <noreply@github.com>2020-12-04 10:10:02 +0200
commite8f54811c7128d6ee849d072f27459e9b9322034 (patch)
tree273d2b67f20021d10ebaa0dcc5d611d282dbe130
parente8774043ed6496ac9664d0b4cf5bb7305f92c007 (diff)
downloadtablib-e8f54811c7128d6ee849d072f27459e9b9322034.tar.gz
Expose 'read_only' parameter for 'import_set' and 'import_book' (#483)HEADmaster
-rw-r--r--HISTORY.md1
-rw-r--r--docs/formats.rst9
-rw-r--r--src/tablib/formats/_xlsx.py8
-rw-r--r--tests/files/bad_dimensions.xlsxbin0 -> 9220 bytes
-rwxr-xr-xtests/test_tablib.py7
5 files changed, 21 insertions, 4 deletions
diff --git a/HISTORY.md b/HISTORY.md
index 00b849b..e260784 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -9,6 +9,7 @@
### Improvements
- Added Python 3.9 support
+- Added read_only option to xlsx file reader (#482).
### Bugfixes
diff --git a/docs/formats.rst b/docs/formats.rst
index 0c46733..2357efe 100644
--- a/docs/formats.rst
+++ b/docs/formats.rst
@@ -206,6 +206,15 @@ Import/export data in Excel 07+ Spreadsheet representation.
This format is optional, install Tablib with ``pip install "tablib[xlsx]"`` to
make the format available.
+The ``import_set()`` and ``import_book()`` methods accept keyword
+argument ``read_only``. If its value is ``True`` (the default), the
+XLSX data source is read lazily. Lazy reading generally reduces time
+and memory consumption, especially for large spreadsheets. However,
+it relies on the XLSX data source declaring correct dimensions. Some
+programs generate XLSX files with incorrect dimensions. Such files
+may need to be loaded with this optimization turned off by passing
+``read_only=False``.
+
.. note::
When reading an ``xlsx`` file containing formulas in its cells, Tablib will
diff --git a/src/tablib/formats/_xlsx.py b/src/tablib/formats/_xlsx.py
index e2a3fde..34911e9 100644
--- a/src/tablib/formats/_xlsx.py
+++ b/src/tablib/formats/_xlsx.py
@@ -59,12 +59,12 @@ class XLSXFormat:
return stream.getvalue()
@classmethod
- def import_set(cls, dset, in_stream, headers=True):
+ def import_set(cls, dset, in_stream, headers=True, read_only=True):
"""Returns databook from XLS stream."""
dset.wipe()
- xls_book = load_workbook(in_stream, read_only=True, data_only=True)
+ xls_book = load_workbook(in_stream, read_only=read_only, data_only=True)
sheet = xls_book.active
dset.title = sheet.title
@@ -77,12 +77,12 @@ class XLSXFormat:
dset.append(row_vals)
@classmethod
- def import_book(cls, dbook, in_stream, headers=True):
+ def import_book(cls, dbook, in_stream, headers=True, read_only=True):
"""Returns databook from XLS stream."""
dbook.wipe()
- xls_book = load_workbook(in_stream, read_only=True, data_only=True)
+ xls_book = load_workbook(in_stream, read_only=read_only, data_only=True)
for sheet in xls_book.worksheets:
data = tablib.Dataset()
diff --git a/tests/files/bad_dimensions.xlsx b/tests/files/bad_dimensions.xlsx
new file mode 100644
index 0000000..8493760
--- /dev/null
+++ b/tests/files/bad_dimensions.xlsx
Binary files differ
diff --git a/tests/test_tablib.py b/tests/test_tablib.py
index b13d17c..ccb28e1 100755
--- a/tests/test_tablib.py
+++ b/tests/test_tablib.py
@@ -1040,6 +1040,13 @@ class XLSXTests(BaseTestCase):
data = tablib.Dataset().load(fh)
self.assertEqual(data.headers[0], 'Hello World')
+ def test_xlsx_bad_dimensions(self):
+ """Test loading file with bad dimension. Must be done with
+ read_only=False."""
+ xls_source = Path(__file__).parent / 'files' / 'bad_dimensions.xlsx'
+ with xls_source.open('rb') as fh:
+ data = tablib.Dataset().load(fh, read_only=False)
+ self.assertEqual(data.height, 3)
class JSONTests(BaseTestCase):
def test_json_format_detect(self):