diff options
| -rw-r--r-- | .travis.yml | 5 | ||||
| -rw-r--r-- | AUTHORS | 2 | ||||
| -rw-r--r-- | HISTORY.rst | 5 | ||||
| -rw-r--r-- | NOTICE | 28 | ||||
| -rw-r--r-- | README.rst | 31 | ||||
| -rw-r--r-- | docs/index.rst | 13 | ||||
| -rw-r--r-- | docs/install.rst | 12 | ||||
| -rw-r--r-- | docs/intro.rst | 3 | ||||
| -rw-r--r-- | docs/tutorial.rst | 30 | ||||
| -rw-r--r-- | requirements.txt | 21 | ||||
| -rwxr-xr-x | setup.py | 17 | ||||
| -rw-r--r-- | tablib/compat.py | 22 | ||||
| -rw-r--r-- | tablib/core.py | 45 | ||||
| -rw-r--r-- | tablib/formats/__init__.py | 5 | ||||
| -rw-r--r-- | tablib/formats/_csv.py | 2 | ||||
| -rw-r--r-- | tablib/formats/_df.py | 49 | ||||
| -rw-r--r-- | tablib/formats/_jira.py | 39 | ||||
| -rw-r--r-- | tablib/formats/_json.py | 15 | ||||
| -rw-r--r-- | tablib/formats/_rst.py | 273 | ||||
| -rw-r--r-- | tablib/formats/_xlsx.py | 6 | ||||
| -rw-r--r-- | tablib/formats/_yaml.py | 2 | ||||
| -rw-r--r-- | tablib/packages/ordereddict.py | 127 | ||||
| -rw-r--r-- | tablib/packages/statistics.py | 24 | ||||
| -rwxr-xr-x | test_tablib.py | 59 | ||||
| -rw-r--r-- | tox.ini | 11 |
25 files changed, 599 insertions, 247 deletions
diff --git a/.travis.yml b/.travis.yml index 4e09b14..53af531 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,11 +1,10 @@ language: python +cache: pip python: - - 2.6 - 2.7 - - 3.3 - 3.4 - 3.5 - 3.6 install: - - python setup.py install + - pip install -r requirements.txt script: python test_tablib.py @@ -34,3 +34,5 @@ Patches and Suggestions - Mathias Loesch - Tushar Makkar - Andrii Soldatenko +- Bruno Soares +- Tsuyoshi Hombashi diff --git a/HISTORY.rst b/HISTORY.rst index a90a21b..e848c1d 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -1,6 +1,11 @@ History ------- +0.11.5 (2017-06-13) ++++++++++++++++++++ + +- Use ``yaml.safe_load`` for importing yaml. + 0.11.4 (2017-01-23) +++++++++++++++++++ @@ -1,32 +1,6 @@ -Tablib includes some vendorized python libraries: ordereddict, markup. +Tablib includes some vendorized Python libraries: markup. Markup License ============== Markup is in the public domain. - - -OrderedDict License -=================== - -Copyright (c) 2009 Raymond Hettinger - -Permission is hereby granted, free of charge, to any person -obtaining a copy of this software and associated documentation files -(the "Software"), to deal in the Software without restriction, -including without limitation the rights to use, copy, modify, merge, -publish, distribute, sublicense, and/or sell copies of the Software, -and to permit persons to whom the Software is furnished to do so, -subject to the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES - OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - OTHER DEALINGS IN THE SOFTWARE. @@ -21,14 +21,18 @@ Output formats supported: - Excel (Sets + Books) - JSON (Sets + Books) - YAML (Sets + Books) +- Pandas DataFrames (Sets) - HTML (Sets) +- Jira (Sets) - TSV (Sets) -- OSD (Sets) +- ODS (Sets) - CSV (Sets) - DBF (Sets) Note that tablib *purposefully* excludes XML support. It always will. (Note: This is a joke. Pull requests are welcome.) +If you're interested in financially supporting Kenneth Reitz open source, consider `visiting this link <https://cash.me/$KennethReitz>`_. Your support helps tremendously with sustainability of motivation, as Open Source is no longer part of my day job. + Overview -------- @@ -64,13 +68,13 @@ Intelligently add new columns: :: Slice rows: :: - >>> print data[:2] + >>> print(data[:2]) [('John', 'Adams', 90), ('George', 'Washington', 67)] Slice columns by header: :: - >>> print data['first_name'] + >>> print(data['first_name']) ['John', 'George', 'Henry'] Easily delete rows: :: @@ -86,7 +90,7 @@ JSON! +++++ :: - >>> print data.json + >>> print(data.export('json')) [ { "last_name": "Adams", @@ -105,7 +109,7 @@ YAML! +++++ :: - >>> print data.yaml + >>> print(data.export('yaml')) - {age: 90, first_name: John, last_name: Adams} - {age: 83, first_name: Henry, last_name: Ford} @@ -113,7 +117,7 @@ CSV... ++++++ :: - >>> print data.csv + >>> print(data.export('csv')) first_name,last_name,age John,Adams,90 Henry,Ford,83 @@ -123,14 +127,23 @@ EXCEL! :: >>> with open('people.xls', 'wb') as f: - ... f.write(data.xls) + ... f.write(data.export('xls')) DBF! ++++ :: >>> with open('people.dbf', 'wb') as f: - ... f.write(data.dbf) + ... f.write(data.export('dbf')) + +Pandas DataFrame! ++++++++++++++++++ +:: + + >>> print(data.export('df')): + first_name last_name age + 0 John Adams 90 + 1 Henry Ford 83 It's that easy. @@ -140,7 +153,7 @@ Installation To install tablib, simply: :: - $ pip install tablib + $ pip install tablib[pandas] Make sure to check out `Tablib on PyPi <https://pypi.python.org/pypi/tablib/>`_! diff --git a/docs/index.rst b/docs/index.rst index 55e5679..90289e2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -29,18 +29,23 @@ Tablib is an :ref:`MIT Licensed <mit>` format-agnostic tabular dataset library, >>> data = tablib.Dataset(headers=['First Name', 'Last Name', 'Age']) >>> for i in [('Kenneth', 'Reitz', 22), ('Bessie', 'Monke', 21)]: ... data.append(i) - - >>> print data.json + + >>> print(data.export('json')) [{"Last Name": "Reitz", "First Name": "Kenneth", "Age": 22}, {"Last Name": "Monke", "First Name": "Bessie", "Age": 21}] - >>> print data.yaml + >>> print(data.export('yaml')) - {Age: 22, First Name: Kenneth, Last Name: Reitz} - {Age: 21, First Name: Bessie, Last Name: Monke} - >>> data.xlsx + >>> data.export('xlsx') <censored binary data> + >>> data.export('df') + First Name Last Name Age + 0 Kenneth Reitz 22 + 1 Bessie Monke 21 + Testimonials ------------ diff --git a/docs/install.rst b/docs/install.rst index 365cca8..a236b87 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -16,7 +16,7 @@ Distribute & Pip Of course, the recommended way to install Tablib is with `pip <http://www.pip-installer.org/>`_:: - $ pip install tablib + $ pip install tablib[pandas] ------------------- @@ -40,16 +40,6 @@ To download the full source history from Git, see :ref:`Source Control <scm>`. .. _zipball: http://github.com/kennethreitz/tablib/zipball/master -.. _speed-extensions: -Speed Extensions ----------------- - -You can gain some speed improvement by optionally installing the ujson_ library. -Tablib will fallback to the standard `json` module if it doesn't find ``ujson``. - -.. _ujson: https://pypi.python.org/pypi/ujson - - .. _updates: Staying Updated --------------- diff --git a/docs/intro.rst b/docs/intro.rst index e3da4dc..6af436d 100644 --- a/docs/intro.rst +++ b/docs/intro.rst @@ -49,7 +49,7 @@ Tablib is released under terms of `The MIT License`_. Tablib License -------------- -Copyright 2016 Kenneth Reitz +Copyright 2017 Kenneth Reitz Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -77,7 +77,6 @@ Pythons Supported At this time, the following Python platforms are officially supported: -* cPython 2.6 * cPython 2.7 * cPython 3.3 * cPython 3.4 diff --git a/docs/tutorial.rst b/docs/tutorial.rst index d552e21..1fe11ee 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -115,30 +115,38 @@ Tablib's killer feature is the ability to export your :class:`Dataset` objects i **Comma-Separated Values** :: - >>> data.csv + >>> data.export('csv') Last Name,First Name,Age Reitz,Kenneth,22 Monke,Bessie,20 **JavaScript Object Notation** :: - >>> data.json + >>> data.export('json') [{"Last Name": "Reitz", "First Name": "Kenneth", "Age": 22}, {"Last Name": "Monke", "First Name": "Bessie", "Age": 20}] **YAML Ain't Markup Language** :: - >>> data.yaml + >>> data.export('yaml') - {Age: 22, First Name: Kenneth, Last Name: Reitz} - {Age: 20, First Name: Bessie, Last Name: Monke} **Microsoft Excel** :: - >>> data.xls + >>> data.export('xls') <censored binary data> +**Pandas DataFrame** :: + + >>> data.export('df') + First Name Last Name Age + 0 Kenneth Reitz 22 + 1 Bessie Monke 21 + + ------------------------ Selecting Rows & Columns ------------------------ @@ -216,7 +224,7 @@ Let's add a dynamic column to our :class:`Dataset` object. In this example, we h Let's have a look at our data. :: - >>> data.yaml + >>> data.export('yaml') - {Age: 22, First Name: Kenneth, Grade: 0.6, Last Name: Reitz} - {Age: 20, First Name: Bessie, Grade: 0.75, Last Name: Monke} @@ -246,7 +254,7 @@ For example, we can use the data available in the row to guess the gender of a s Adding this function to our dataset as a dynamic column would result in: :: - >>> data.yaml + >>> data.export('yaml') - {Age: 22, First Name: Kenneth, Gender: Male, Last Name: Reitz} - {Age: 20, First Name: Bessie, Gender: Female, Last Name: Monke} @@ -281,6 +289,14 @@ Now that we have extra meta-data on our rows, we can easily filter our :class:`D It's that simple. The original :class:`Dataset` is untouched. +Open an Excel Workbook and read first sheet +-------------------------------- + +To open an Excel 2007 and later workbook with a single sheet (or a workbook with multiple sheets but you just want the first sheet), use the following: + +data = tablib.Dataset() +data.xlsx = open('my_excel_file.xlsx', 'rb').read() +print(data) Excel Workbook With Multiple Sheets ------------------------------------ @@ -346,7 +362,7 @@ When, it's often useful to create a blank row containing information on the upco # Write spreadsheet to disk with open('grades.xls', 'wb') as f: - f.write(tests.xls) + f.write(tests.export('xls')) The resulting **tests.xls** will have the following layout: diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..2fab040 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,21 @@ +certifi==2017.7.27.1 +chardet==3.0.4 +et-xmlfile==1.0.1 +idna==2.6 +jdcal==1.3 +numpy==1.13.1 +odfpy==1.3.5 +openpyxl==2.4.8 +pandas==0.20.3 +pkginfo==1.4.1 +python-dateutil==2.6.1 +pytz==2017.2 +PyYAML==3.12 +requests==2.18.4 +requests-toolbelt==0.8.0 +six==1.10.0 +tqdm==4.15.0 +unicodecsv==0.14.1 +urllib3==1.22 +xlrd==1.1.0 +xlwt==1.3.0 @@ -14,15 +14,6 @@ if sys.argv[-1] == 'publish': os.system("python setup.py sdist upload") sys.exit() -if sys.argv[-1] == 'speedups': - try: - __import__('pip') - except ImportError: - print('Pip required.') - sys.exit(1) - - os.system('pip install ujson') - sys.exit() if sys.argv[-1] == 'test': try: @@ -43,13 +34,14 @@ packages = [ install = [ 'odfpy', - 'openpyxl', + 'openpyxl>=2.4.0', 'backports.csv', 'xlrd', 'xlwt', 'pyyaml', ] + with open('tablib/core.py', 'r') as fd: version = re.search(r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', fd.read(), re.MULTILINE).group(1) @@ -71,13 +63,14 @@ setup( 'Natural Language :: English', 'License :: OSI Approved :: MIT License', 'Programming Language :: Python', - 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', ], tests_require=['pytest'], install_requires=install, + extras_require={ + 'pandas': ['pandas'], + }, ) diff --git a/tablib/compat.py b/tablib/compat.py index d18a781..660697d 100644 --- a/tablib/compat.py +++ b/tablib/compat.py @@ -13,34 +13,24 @@ import sys is_py3 = (sys.version_info[0] > 2) - -try: - from collections import OrderedDict -except ImportError: - from tablib.packages.ordereddict import OrderedDict - - if is_py3: from io import BytesIO + from io import StringIO from tablib.packages import markup3 as markup - import tablib.packages.dbfpy3 as dbfpy - + from statistics import median + from itertools import zip_longest as izip_longest import csv - from io import StringIO - # py3 mappings + import tablib.packages.dbfpy3 as dbfpy - ifilter = filter unicode = str - bytes = bytes - basestring = str xrange = range else: from cStringIO import StringIO as BytesIO from StringIO import StringIO from tablib.packages import markup - from itertools import ifilter - + from tablib.packages.statistics import median + from itertools import izip_longest from backports import csv import tablib.packages.dbfpy as dbfpy diff --git a/tablib/core.py b/tablib/core.py index b97da54..78c4dce 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -9,20 +9,21 @@ :license: MIT, see LICENSE for more details. """ +from collections import OrderedDict from copy import copy from operator import itemgetter from tablib import formats -from tablib.compat import OrderedDict, unicode +from tablib.compat import unicode __title__ = 'tablib' -__version__ = '0.11.4' -__build__ = 0x001104 +__version__ = '0.12.1' +__build__ = 0x001201 __author__ = 'Kenneth Reitz' __license__ = 'MIT' -__copyright__ = 'Copyright 2016 Kenneth Reitz' +__copyright__ = 'Copyright 2017 Kenneth Reitz' __docformat__ = 'restructuredtext' @@ -526,9 +527,9 @@ class Dataset(object): Import assumes (for now) that headers exist. - .. admonition:: Binary Warning + .. admonition:: Binary Warning for Python 2 - :class:`Dataset.csv` uses \\r\\n line endings by default, so make + :class:`Dataset.csv` uses \\r\\n line endings by default so, in Python 2, make sure to write in binary mode:: with open('output.csv', 'wb') as f: @@ -536,6 +537,18 @@ class Dataset(object): If you do not do this, and you export the file on Windows, your CSV file will open in Excel with a blank line between each row. + + .. admonition:: Line endings for Python 3 + + :class:`Dataset.csv` uses \\r\\n line endings by default so, in Python 3, make + sure to include newline='' otherwise you will get a blank line between each row + when you open the file in Excel:: + + with open('output.csv', 'w', newline='') as f: + f.write(data.csv) + + If you do not do this, and you export the file on Windows, your + CSV file will open in Excel with a blank line between each row. """ pass @@ -570,6 +583,18 @@ class Dataset(object): """ pass + @property + def df(): + """A DataFrame representation of the :class:`Dataset` object. + + A dataset object can also be imported by setting the :class:`Dataset.df` attribute: :: + + data = tablib.Dataset() + data.df = DataFrame(np.random.randn(6,4)) + + Import assumes (for now) that headers exist. + """ + pass @property def json(): @@ -619,7 +644,6 @@ class Dataset(object): """ pass - @property def latex(): """A LaTeX booktabs representation of the :class:`Dataset` object. If a @@ -629,6 +653,13 @@ class Dataset(object): """ pass + @property + def jira(): + """A Jira table representation of the :class:`Dataset` object. + + .. note:: This method can be used for export only. + """ + pass # ---- # Rows diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py index 5cca19f..418e607 100644 --- a/tablib/formats/__init__.py +++ b/tablib/formats/__init__.py @@ -13,5 +13,8 @@ from . import _xlsx as xlsx from . import _ods as ods from . import _dbf as dbf from . import _latex as latex +from . import _df as df +from . import _rst as rst +from . import _jira as jira -available = (json, xls, yaml, csv, dbf, tsv, html, latex, xlsx, ods) +available = (json, xls, yaml, csv, dbf, tsv, html, jira, latex, xlsx, ods, df, rst) diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py index b74afd7..8b536a7 100644 --- a/tablib/formats/_csv.py +++ b/tablib/formats/_csv.py @@ -39,7 +39,7 @@ def import_set(dset, in_stream, headers=True, **kwargs): if (i == 0) and (headers): dset.headers = row - else: + elif row: dset.append(row) diff --git a/tablib/formats/_df.py b/tablib/formats/_df.py new file mode 100644 index 0000000..44b967f --- /dev/null +++ b/tablib/formats/_df.py @@ -0,0 +1,49 @@ +""" Tablib - DataFrame Support. +""" + + +import sys + + +if sys.version_info[0] > 2: + from io import BytesIO +else: + from cStringIO import StringIO as BytesIO + +try: + from pandas import DataFrame +except ImportError: + DataFrame = None + +import tablib + +from tablib.compat import unicode + +title = 'df' +extensions = ('df', ) + +def detect(stream): + """Returns True if given stream is a DataFrame.""" + if DataFrame is None: + return False + try: + DataFrame(stream) + return True + except ValueError: + return False + + +def export_set(dset, index=None): + """Returns DataFrame representation of DataBook.""" + if DataFrame is None: + raise NotImplementedError( + 'DataFrame Format requires `pandas` to be installed.' + ' Try `pip install tablib[pandas]`.') + dataframe = DataFrame(dset.dict, columns=dset.headers) + return dataframe + + +def import_set(dset, in_stream): + """Returns dataset from DataFrame.""" + dset.wipe() + dset.dict = in_stream.to_dict(orient='records') diff --git a/tablib/formats/_jira.py b/tablib/formats/_jira.py new file mode 100644 index 0000000..55fce52 --- /dev/null +++ b/tablib/formats/_jira.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- + +"""Tablib - Jira table export support. + + Generates a Jira table from the dataset. +""" +from tablib.compat import unicode + +title = 'jira' + + +def export_set(dataset): + """Formats the dataset according to the Jira table syntax: + + ||heading 1||heading 2||heading 3|| + |col A1|col A2|col A3| + |col B1|col B2|col B3| + + :param dataset: dataset to serialize + :type dataset: tablib.core.Dataset + """ + + header = _get_header(dataset.headers) if dataset.headers else '' + body = _get_body(dataset) + return '%s\n%s' % (header, body) if header else body + + +def _get_body(dataset): + return '\n'.join([_serialize_row(row) for row in dataset]) + + +def _get_header(headers): + return _serialize_row(headers, delimiter='||') + + +def _serialize_row(row, delimiter='|'): + return '%s%s%s' % (delimiter, + delimiter.join([unicode(item) if item else ' ' for item in row]), + delimiter) diff --git a/tablib/formats/_json.py b/tablib/formats/_json.py index a3d6cc3..bbd2c96 100644 --- a/tablib/formats/_json.py +++ b/tablib/formats/_json.py @@ -3,36 +3,33 @@ """ Tablib - JSON Support """ import decimal +import json +from uuid import UUID import tablib -try: - import ujson as json -except ImportError: - import json title = 'json' extensions = ('json', 'jsn') -def date_handler(obj): - if isinstance(obj, decimal.Decimal): +def serialize_objects_handler(obj): + if isinstance(obj, decimal.Decimal) or isinstance(obj, UUID): return str(obj) elif hasattr(obj, 'isoformat'): return obj.isoformat() else: return obj - # return obj.isoformat() if hasattr(obj, 'isoformat') else obj def export_set(dataset): """Returns JSON representation of Dataset.""" - return json.dumps(dataset.dict, default=date_handler) + return json.dumps(dataset.dict, default=serialize_objects_handler) def export_book(databook): """Returns JSON representation of Databook.""" - return json.dumps(databook._package(), default=date_handler) + return json.dumps(databook._package(), default=serialize_objects_handler) def import_set(dset, in_stream): diff --git a/tablib/formats/_rst.py b/tablib/formats/_rst.py new file mode 100644 index 0000000..4b53ad7 --- /dev/null +++ b/tablib/formats/_rst.py @@ -0,0 +1,273 @@ +# -*- coding: utf-8 -*- + +""" Tablib - reStructuredText Support +""" +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from textwrap import TextWrapper + +from tablib.compat import ( + median, + unicode, + izip_longest, +) + + +title = 'rst' +extensions = ('rst',) + + +MAX_TABLE_WIDTH = 80 # Roughly. It may be wider to avoid breaking words. + + +JUSTIFY_LEFT = 'left' +JUSTIFY_CENTER = 'center' +JUSTIFY_RIGHT = 'right' +JUSTIFY_VALUES = (JUSTIFY_LEFT, JUSTIFY_CENTER, JUSTIFY_RIGHT) + + +def to_unicode(value): + if isinstance(value, bytes): + return value.decode('utf-8') + return unicode(value) + + +def _max_word_len(text): + """ + Return the length of the longest word in `text`. + + + >>> _max_word_len('Python Module for Tabular Datasets') + 8 + + """ + return max((len(word) for word in text.split())) + + +def _get_column_string_lengths(dataset): + """ + Returns a list of string lengths of each column, and a list of + maximum word lengths. + """ + if dataset.headers: + column_lengths = [[len(h)] for h in dataset.headers] + word_lens = [_max_word_len(h) for h in dataset.headers] + else: + column_lengths = [[] for _ in range(dataset.width)] + word_lens = [0 for _ in range(dataset.width)] + for row in dataset.dict: + values = iter(row.values() if hasattr(row, 'values') else row) + for i, val in enumerate(values): + text = to_unicode(val) + column_lengths[i].append(len(text)) + word_lens[i] = max(word_lens[i], _max_word_len(text)) + return column_lengths, word_lens + + +def _row_to_lines(values, widths, wrapper, sep='|', justify=JUSTIFY_LEFT): + """ + Returns a table row of wrapped values as a list of lines + """ + if justify not in JUSTIFY_VALUES: + raise ValueError('Value of "justify" must be one of "{}"'.format( + '", "'.join(JUSTIFY_VALUES) + )) + if justify == JUSTIFY_LEFT: + just = lambda text, width: text.ljust(width) + elif justify == JUSTIFY_CENTER: + just = lambda text, width: text.center(width) + else: + just = lambda text, width: text.rjust(width) + lpad = sep + ' ' if sep else '' + rpad = ' ' + sep if sep else '' + pad = ' ' + sep + ' ' + cells = [] + for value, width in zip(values, widths): + wrapper.width = width + text = to_unicode(value) + cell = wrapper.wrap(text) + cells.append(cell) + lines = izip_longest(*cells, fillvalue='') + lines = ( + (just(cell_line, widths[i]) for i, cell_line in enumerate(line)) + for line in lines + ) + lines = [''.join((lpad, pad.join(line), rpad)) for line in lines] + return lines + + +def _get_column_widths(dataset, max_table_width=MAX_TABLE_WIDTH, pad_len=3): + """ + Returns a list of column widths proportional to the median length + of the text in their cells. + """ + str_lens, word_lens = _get_column_string_lengths(dataset) + median_lens = [int(median(lens)) for lens in str_lens] + total = sum(median_lens) + if total > max_table_width - (pad_len * len(median_lens)): + column_widths = (max_table_width * l // total for l in median_lens) + else: + column_widths = (l for l in median_lens) + # Allow for separator and padding: + column_widths = (w - pad_len if w > pad_len else w for w in column_widths) + # Rather widen table than break words: + column_widths = [max(w, l) for w, l in zip(column_widths, word_lens)] + return column_widths + + +def export_set_as_simple_table(dataset, column_widths=None): + """ + Returns reStructuredText grid table representation of dataset. + """ + lines = [] + wrapper = TextWrapper() + if column_widths is None: + column_widths = _get_column_widths(dataset, pad_len=2) + border = ' '.join(['=' * w for w in column_widths]) + + lines.append(border) + if dataset.headers: + lines.extend(_row_to_lines( + dataset.headers, + column_widths, + wrapper, + sep='', + justify=JUSTIFY_CENTER, + )) + lines.append(border) + for row in dataset.dict: + values = iter(row.values() if hasattr(row, 'values') else row) + lines.extend(_row_to_lines(values, column_widths, wrapper, '')) + lines.append(border) + return '\n'.join(lines) + + +def export_set_as_grid_table(dataset, column_widths=None): + """ + Returns reStructuredText grid table representation of dataset. + + + >>> from tablib import Dataset + >>> from tablib.formats import rst + >>> bits = ((0, 0), (1, 0), (0, 1), (1, 1)) + >>> data = Dataset() + >>> data.headers = ['A', 'B', 'A and B'] + >>> for a, b in bits: + ... data.append([bool(a), bool(b), bool(a * b)]) + >>> print(rst.export_set(data, force_grid=True)) + +-------+-------+-------+ + | A | B | A and | + | | | B | + +=======+=======+=======+ + | False | False | False | + +-------+-------+-------+ + | True | False | False | + +-------+-------+-------+ + | False | True | False | + +-------+-------+-------+ + | True | True | True | + +-------+-------+-------+ + + """ + lines = [] + wrapper = TextWrapper() + if column_widths is None: + column_widths = _get_column_widths(dataset) + header_sep = '+=' + '=+='.join(['=' * w for w in column_widths]) + '=+' + row_sep = '+-' + '-+-'.join(['-' * w for w in column_widths]) + '-+' + + lines.append(row_sep) + if dataset.headers: + lines.extend(_row_to_lines( + dataset.headers, + column_widths, + wrapper, + justify=JUSTIFY_CENTER, + )) + lines.append(header_sep) + for row in dataset.dict: + values = iter(row.values() if hasattr(row, 'values') else row) + lines.extend(_row_to_lines(values, column_widths, wrapper)) + lines.append(row_sep) + return '\n'.join(lines) + + +def _use_simple_table(head0, col0, width0): + """ + Use a simple table if the text in the first column is never wrapped + + + >>> _use_simple_table('menu', ['egg', 'bacon'], 10) + True + >>> _use_simple_table(None, ['lobster thermidor', 'spam'], 10) + False + + """ + if head0 is not None: + head0 = to_unicode(head0) + if len(head0) > width0: + return False + for cell in col0: + cell = to_unicode(cell) + if len(cell) > width0: + return False + return True + + +def export_set(dataset, **kwargs): + """ + Returns reStructuredText table representation of dataset. + + Returns a simple table if the text in the first column is never + wrapped, otherwise returns a grid table. + + + >>> from tablib import Dataset + >>> bits = ((0, 0), (1, 0), (0, 1), (1, 1)) + >>> data = Dataset() + >>> data.headers = ['A', 'B', 'A and B'] + >>> for a, b in bits: + ... data.append([bool(a), bool(b), bool(a * b)]) + >>> table = data.rst + >>> table.split('\\n') == [ + ... '===== ===== =====', + ... ' A B A and', + ... ' B ', + ... '===== ===== =====', + ... 'False False False', + ... 'True False False', + ... 'False True False', + ... 'True True True ', + ... '===== ===== =====', + ... ] + True + + """ + if not dataset.dict: + return '' + force_grid = kwargs.get('force_grid', False) + max_table_width = kwargs.get('max_table_width', MAX_TABLE_WIDTH) + column_widths = _get_column_widths(dataset, max_table_width) + + use_simple_table = _use_simple_table( + dataset.headers[0] if dataset.headers else None, + dataset.get_col(0), + column_widths[0], + ) + if use_simple_table and not force_grid: + return export_set_as_simple_table(dataset, column_widths) + else: + return export_set_as_grid_table(dataset, column_widths) + + +def export_book(databook): + """ + reStructuredText representation of a Databook. + + Tables are separated by a blank line. All tables use the grid + format. + """ + return '\n\n'.join(export_set(dataset, force_grid=True) + for dataset in databook._datasets) diff --git a/tablib/formats/_xlsx.py b/tablib/formats/_xlsx.py index 20f55df..816fd37 100644 --- a/tablib/formats/_xlsx.py +++ b/tablib/formats/_xlsx.py @@ -52,7 +52,7 @@ def export_book(databook, freeze_panes=True): wb = Workbook() for sheet in wb.worksheets: - wb.remove_sheet(sheet) + wb.remove(sheet) for i, dset in enumerate(databook._datasets): ws = wb.create_sheet() ws.title = dset.title if dset.title else 'Sheet%s' % (i) @@ -71,7 +71,7 @@ def import_set(dset, in_stream, headers=True): dset.wipe() xls_book = openpyxl.reader.excel.load_workbook(BytesIO(in_stream)) - sheet = xls_book.get_active_sheet() + sheet = xls_book.active dset.title = sheet.title @@ -119,7 +119,7 @@ def dset_sheet(dataset, ws, freeze_panes=True): row_number = i + 1 for j, col in enumerate(row): col_idx = get_column_letter(j + 1) - cell = ws.cell('%s%s' % (col_idx, row_number)) + cell = ws['%s%s' % (col_idx, row_number)] # bold headers if (row_number == 1) and dataset.headers: diff --git a/tablib/formats/_yaml.py b/tablib/formats/_yaml.py index 5aecb42..3d17baf 100644 --- a/tablib/formats/_yaml.py +++ b/tablib/formats/_yaml.py @@ -33,7 +33,7 @@ def import_book(dbook, in_stream): dbook.wipe() - for sheet in yaml.load(in_stream): + for sheet in yaml.safe_load(in_stream): data = tablib.Dataset() data.title = sheet['title'] data.dict = sheet['data'] diff --git a/tablib/packages/ordereddict.py b/tablib/packages/ordereddict.py deleted file mode 100644 index a5b896d..0000000 --- a/tablib/packages/ordereddict.py +++ /dev/null @@ -1,127 +0,0 @@ -# Copyright (c) 2009 Raymond Hettinger -# -# Permission is hereby granted, free of charge, to any person -# obtaining a copy of this software and associated documentation files -# (the "Software"), to deal in the Software without restriction, -# including without limitation the rights to use, copy, modify, merge, -# publish, distribute, sublicense, and/or sell copies of the Software, -# and to permit persons to whom the Software is furnished to do so, -# subject to the following conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. - -from UserDict import DictMixin - -class OrderedDict(dict, DictMixin): - - def __init__(self, *args, **kwds): - if len(args) > 1: - raise TypeError('expected at most 1 arguments, got %d' % len(args)) - try: - self.__end - except AttributeError: - self.clear() - self.update(*args, **kwds) - - def clear(self): - self.__end = end = [] - end += [None, end, end] # sentinel node for doubly linked list - self.__map = {} # key --> [key, prev, next] - dict.clear(self) - - def __setitem__(self, key, value): - if key not in self: - end = self.__end - curr = end[1] - curr[2] = end[1] = self.__map[key] = [key, curr, end] - dict.__setitem__(self, key, value) - - def __delitem__(self, key): - dict.__delitem__(self, key) - key, prev, next = self.__map.pop(key) - prev[2] = next - next[1] = prev - - def __iter__(self): - end = self.__end - curr = end[2] - while curr is not end: - yield curr[0] - curr = curr[2] - - def __reversed__(self): - end = self.__end - curr = end[1] - while curr is not end: - yield curr[0] - curr = curr[1] - - def popitem(self, last=True): - if not self: - raise KeyError('dictionary is empty') - if last: - key = next(reversed(self)) - else: - key = next(iter(self)) - value = self.pop(key) - return key, value - - def __reduce__(self): - items = [[k, self[k]] for k in self] - tmp = self.__map, self.__end - del self.__map, self.__end - inst_dict = vars(self).copy() - self.__map, self.__end = tmp - if inst_dict: - return (self.__class__, (items,), inst_dict) - return self.__class__, (items,) - - def keys(self): - return list(self) - - setdefault = DictMixin.setdefault - update = DictMixin.update - pop = DictMixin.pop - values = DictMixin.values - items = DictMixin.items - iterkeys = DictMixin.iterkeys - itervalues = DictMixin.itervalues - iteritems = DictMixin.iteritems - - def __repr__(self): - if not self: - return '%s()' % (self.__class__.__name__,) - return '%s(%r)' % (self.__class__.__name__, list(self.items())) - - def copy(self): - return self.__class__(self) - - @classmethod - def fromkeys(cls, iterable, value=None): - d = cls() - for key in iterable: - d[key] = value - return d - - def __eq__(self, other): - if isinstance(other, OrderedDict): - if len(self) != len(other): - return False - for p, q in zip(list(self.items()), list(other.items())): - if p != q: - return False - return True - return dict.__eq__(self, other) - - def __ne__(self, other): - return not self == other diff --git a/tablib/packages/statistics.py b/tablib/packages/statistics.py new file mode 100644 index 0000000..e97a6c9 --- /dev/null +++ b/tablib/packages/statistics.py @@ -0,0 +1,24 @@ +from __future__ import division + + +def median(data): + """ + Return the median (middle value) of numeric data, using the common + "mean of middle two" method. If data is empty, ValueError is raised. + + Mimics the behaviour of Python3's statistics.median + + >>> median([1, 3, 5]) + 3 + >>> median([1, 3, 5, 7]) + 4.0 + + """ + data = sorted(data) + n = len(data) + if not n: + raise ValueError("No median for empty data") + i = n // 2 + if n % 2: + return data[i] + return (data[i - 1] + data[i]) / 2 diff --git a/test_tablib.py b/test_tablib.py index 96cd884..e7b7233 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -4,9 +4,11 @@ from __future__ import unicode_literals import datetime +import doctest import json import sys import unittest +from uuid import uuid4 import tablib from tablib.compat import markup, unicode, is_py3 @@ -227,6 +229,22 @@ class TablibTestCase(unittest.TestCase): # Delete from invalid index self.assertRaises(IndexError, self.founders.__delitem__, 3) + def test_json_export(self): + """Verify exporting dataset object as JSON""" + + address_id = uuid4() + headers = self.headers + ('address_id',) + founders = tablib.Dataset(headers=headers, title='Founders') + founders.append(('John', 'Adams', 90, address_id)) + founders_json = founders.export('json') + + expected_json = ( + '[{"first_name": "John", "last_name": "Adams", "gpa": 90, ' + '"address_id": "%s"}]' % str(address_id) + ) + + self.assertEqual(founders_json, expected_json) + def test_csv_export(self): """Verify exporting dataset object as CSV.""" @@ -299,6 +317,23 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(html, d.html) + def test_jira_export(self): + + expected = """||first_name||last_name||gpa|| +|John|Adams|90| +|George|Washington|67| +|Thomas|Jefferson|50|""" + self.assertEqual(expected, self.founders.jira) + + def test_jira_export_no_headers(self): + self.assertEqual('|a|b|c|', tablib.Dataset(['a', 'b', 'c']).jira) + + def test_jira_export_none_and_empty_values(self): + self.assertEqual('| | |c|', tablib.Dataset(['', None, 'c']).jira) + + def test_jira_export_empty_dataset(self): + self.assertTrue(tablib.Dataset().jira is not None) + def test_latex_export(self): """LaTeX export""" @@ -382,7 +417,10 @@ class TablibTestCase(unittest.TestCase): data.xlsx data.ods data.html + data.jira data.latex + data.df + data.rst def test_datetime_append(self): """Passes in a single datetime and a single date and exports.""" @@ -402,7 +440,9 @@ class TablibTestCase(unittest.TestCase): data.xlsx data.ods data.html + data.jira data.latex + data.rst def test_book_export_no_exceptions(self): """Test that various exports don't error out.""" @@ -416,6 +456,7 @@ class TablibTestCase(unittest.TestCase): book.xlsx book.ods book.html + data.rst def test_json_import_set(self): """Generate and import JSON set serialization.""" @@ -970,6 +1011,24 @@ class TablibTestCase(unittest.TestCase): self.founders.append(('First\nSecond', 'Name', 42)) self.founders.export('xlsx') + def test_rst_force_grid(self): + data.append(self.john) + data.append(self.george) + data.headers = self.headers + + simple = tablib.formats._rst.export_set(data) + grid = tablib.formats._rst.export_set(data, force_grid=True) + self.assertNotEqual(simple, grid) + self.assertNotIn('+', simple) + self.assertIn('+', grid) + + +class DocTests(unittest.TestCase): + + def test_rst_formatter_doctests(self): + results = doctest.testmod(tablib.formats._rst) + self.assertEqual(results.failed, 0) + if __name__ == '__main__': unittest.main() @@ -1,11 +1,8 @@ -# Tox (http://tox.testrun.org/) is a tool for running tests -# in multiple virtualenvs. This configuration file will run the -# test suite on all supported python versions. To use it, "pip install tox" -# and then run "tox" from this directory. - [tox] -envlist = py26, py27, py33, py34, py35, py36, pypy +minversion = 2.4 +envlist = py27, py34, py35, py36, pypy [testenv] -commands = python setup.py test deps = pytest +extras = pandas +commands = python setup.py test |
