summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.travis.yml5
-rw-r--r--AUTHORS2
-rw-r--r--HISTORY.rst5
-rw-r--r--NOTICE28
-rw-r--r--README.rst31
-rw-r--r--docs/index.rst13
-rw-r--r--docs/install.rst12
-rw-r--r--docs/intro.rst3
-rw-r--r--docs/tutorial.rst30
-rw-r--r--requirements.txt21
-rwxr-xr-xsetup.py17
-rw-r--r--tablib/compat.py22
-rw-r--r--tablib/core.py45
-rw-r--r--tablib/formats/__init__.py5
-rw-r--r--tablib/formats/_csv.py2
-rw-r--r--tablib/formats/_df.py49
-rw-r--r--tablib/formats/_jira.py39
-rw-r--r--tablib/formats/_json.py15
-rw-r--r--tablib/formats/_rst.py273
-rw-r--r--tablib/formats/_xlsx.py6
-rw-r--r--tablib/formats/_yaml.py2
-rw-r--r--tablib/packages/ordereddict.py127
-rw-r--r--tablib/packages/statistics.py24
-rwxr-xr-xtest_tablib.py59
-rw-r--r--tox.ini11
25 files changed, 599 insertions, 247 deletions
diff --git a/.travis.yml b/.travis.yml
index 4e09b14..53af531 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,11 +1,10 @@
language: python
+cache: pip
python:
- - 2.6
- 2.7
- - 3.3
- 3.4
- 3.5
- 3.6
install:
- - python setup.py install
+ - pip install -r requirements.txt
script: python test_tablib.py
diff --git a/AUTHORS b/AUTHORS
index 8afb539..e574b85 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -34,3 +34,5 @@ Patches and Suggestions
- Mathias Loesch
- Tushar Makkar
- Andrii Soldatenko
+- Bruno Soares
+- Tsuyoshi Hombashi
diff --git a/HISTORY.rst b/HISTORY.rst
index a90a21b..e848c1d 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -1,6 +1,11 @@
History
-------
+0.11.5 (2017-06-13)
++++++++++++++++++++
+
+- Use ``yaml.safe_load`` for importing yaml.
+
0.11.4 (2017-01-23)
+++++++++++++++++++
diff --git a/NOTICE b/NOTICE
index 4bdbb05..6966f47 100644
--- a/NOTICE
+++ b/NOTICE
@@ -1,32 +1,6 @@
-Tablib includes some vendorized python libraries: ordereddict, markup.
+Tablib includes some vendorized Python libraries: markup.
Markup License
==============
Markup is in the public domain.
-
-
-OrderedDict License
-===================
-
-Copyright (c) 2009 Raymond Hettinger
-
-Permission is hereby granted, free of charge, to any person
-obtaining a copy of this software and associated documentation files
-(the "Software"), to deal in the Software without restriction,
-including without limitation the rights to use, copy, modify, merge,
-publish, distribute, sublicense, and/or sell copies of the Software,
-and to permit persons to whom the Software is furnished to do so,
-subject to the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- OTHER DEALINGS IN THE SOFTWARE.
diff --git a/README.rst b/README.rst
index 3ba1138..d9190bb 100644
--- a/README.rst
+++ b/README.rst
@@ -21,14 +21,18 @@ Output formats supported:
- Excel (Sets + Books)
- JSON (Sets + Books)
- YAML (Sets + Books)
+- Pandas DataFrames (Sets)
- HTML (Sets)
+- Jira (Sets)
- TSV (Sets)
-- OSD (Sets)
+- ODS (Sets)
- CSV (Sets)
- DBF (Sets)
Note that tablib *purposefully* excludes XML support. It always will. (Note: This is a joke. Pull requests are welcome.)
+If you're interested in financially supporting Kenneth Reitz open source, consider `visiting this link <https://cash.me/$KennethReitz>`_. Your support helps tremendously with sustainability of motivation, as Open Source is no longer part of my day job.
+
Overview
--------
@@ -64,13 +68,13 @@ Intelligently add new columns: ::
Slice rows: ::
- >>> print data[:2]
+ >>> print(data[:2])
[('John', 'Adams', 90), ('George', 'Washington', 67)]
Slice columns by header: ::
- >>> print data['first_name']
+ >>> print(data['first_name'])
['John', 'George', 'Henry']
Easily delete rows: ::
@@ -86,7 +90,7 @@ JSON!
+++++
::
- >>> print data.json
+ >>> print(data.export('json'))
[
{
"last_name": "Adams",
@@ -105,7 +109,7 @@ YAML!
+++++
::
- >>> print data.yaml
+ >>> print(data.export('yaml'))
- {age: 90, first_name: John, last_name: Adams}
- {age: 83, first_name: Henry, last_name: Ford}
@@ -113,7 +117,7 @@ CSV...
++++++
::
- >>> print data.csv
+ >>> print(data.export('csv'))
first_name,last_name,age
John,Adams,90
Henry,Ford,83
@@ -123,14 +127,23 @@ EXCEL!
::
>>> with open('people.xls', 'wb') as f:
- ... f.write(data.xls)
+ ... f.write(data.export('xls'))
DBF!
++++
::
>>> with open('people.dbf', 'wb') as f:
- ... f.write(data.dbf)
+ ... f.write(data.export('dbf'))
+
+Pandas DataFrame!
++++++++++++++++++
+::
+
+ >>> print(data.export('df')):
+ first_name last_name age
+ 0 John Adams 90
+ 1 Henry Ford 83
It's that easy.
@@ -140,7 +153,7 @@ Installation
To install tablib, simply: ::
- $ pip install tablib
+ $ pip install tablib[pandas]
Make sure to check out `Tablib on PyPi <https://pypi.python.org/pypi/tablib/>`_!
diff --git a/docs/index.rst b/docs/index.rst
index 55e5679..90289e2 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -29,18 +29,23 @@ Tablib is an :ref:`MIT Licensed <mit>` format-agnostic tabular dataset library,
>>> data = tablib.Dataset(headers=['First Name', 'Last Name', 'Age'])
>>> for i in [('Kenneth', 'Reitz', 22), ('Bessie', 'Monke', 21)]:
... data.append(i)
-
- >>> print data.json
+
+ >>> print(data.export('json'))
[{"Last Name": "Reitz", "First Name": "Kenneth", "Age": 22}, {"Last Name": "Monke", "First Name": "Bessie", "Age": 21}]
- >>> print data.yaml
+ >>> print(data.export('yaml'))
- {Age: 22, First Name: Kenneth, Last Name: Reitz}
- {Age: 21, First Name: Bessie, Last Name: Monke}
- >>> data.xlsx
+ >>> data.export('xlsx')
<censored binary data>
+ >>> data.export('df')
+ First Name Last Name Age
+ 0 Kenneth Reitz 22
+ 1 Bessie Monke 21
+
Testimonials
------------
diff --git a/docs/install.rst b/docs/install.rst
index 365cca8..a236b87 100644
--- a/docs/install.rst
+++ b/docs/install.rst
@@ -16,7 +16,7 @@ Distribute & Pip
Of course, the recommended way to install Tablib is with `pip <http://www.pip-installer.org/>`_::
- $ pip install tablib
+ $ pip install tablib[pandas]
-------------------
@@ -40,16 +40,6 @@ To download the full source history from Git, see :ref:`Source Control <scm>`.
.. _zipball: http://github.com/kennethreitz/tablib/zipball/master
-.. _speed-extensions:
-Speed Extensions
-----------------
-
-You can gain some speed improvement by optionally installing the ujson_ library.
-Tablib will fallback to the standard `json` module if it doesn't find ``ujson``.
-
-.. _ujson: https://pypi.python.org/pypi/ujson
-
-
.. _updates:
Staying Updated
---------------
diff --git a/docs/intro.rst b/docs/intro.rst
index e3da4dc..6af436d 100644
--- a/docs/intro.rst
+++ b/docs/intro.rst
@@ -49,7 +49,7 @@ Tablib is released under terms of `The MIT License`_.
Tablib License
--------------
-Copyright 2016 Kenneth Reitz
+Copyright 2017 Kenneth Reitz
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
@@ -77,7 +77,6 @@ Pythons Supported
At this time, the following Python platforms are officially supported:
-* cPython 2.6
* cPython 2.7
* cPython 3.3
* cPython 3.4
diff --git a/docs/tutorial.rst b/docs/tutorial.rst
index d552e21..1fe11ee 100644
--- a/docs/tutorial.rst
+++ b/docs/tutorial.rst
@@ -115,30 +115,38 @@ Tablib's killer feature is the ability to export your :class:`Dataset` objects i
**Comma-Separated Values** ::
- >>> data.csv
+ >>> data.export('csv')
Last Name,First Name,Age
Reitz,Kenneth,22
Monke,Bessie,20
**JavaScript Object Notation** ::
- >>> data.json
+ >>> data.export('json')
[{"Last Name": "Reitz", "First Name": "Kenneth", "Age": 22}, {"Last Name": "Monke", "First Name": "Bessie", "Age": 20}]
**YAML Ain't Markup Language** ::
- >>> data.yaml
+ >>> data.export('yaml')
- {Age: 22, First Name: Kenneth, Last Name: Reitz}
- {Age: 20, First Name: Bessie, Last Name: Monke}
**Microsoft Excel** ::
- >>> data.xls
+ >>> data.export('xls')
<censored binary data>
+**Pandas DataFrame** ::
+
+ >>> data.export('df')
+ First Name Last Name Age
+ 0 Kenneth Reitz 22
+ 1 Bessie Monke 21
+
+
------------------------
Selecting Rows & Columns
------------------------
@@ -216,7 +224,7 @@ Let's add a dynamic column to our :class:`Dataset` object. In this example, we h
Let's have a look at our data. ::
- >>> data.yaml
+ >>> data.export('yaml')
- {Age: 22, First Name: Kenneth, Grade: 0.6, Last Name: Reitz}
- {Age: 20, First Name: Bessie, Grade: 0.75, Last Name: Monke}
@@ -246,7 +254,7 @@ For example, we can use the data available in the row to guess the gender of a s
Adding this function to our dataset as a dynamic column would result in: ::
- >>> data.yaml
+ >>> data.export('yaml')
- {Age: 22, First Name: Kenneth, Gender: Male, Last Name: Reitz}
- {Age: 20, First Name: Bessie, Gender: Female, Last Name: Monke}
@@ -281,6 +289,14 @@ Now that we have extra meta-data on our rows, we can easily filter our :class:`D
It's that simple. The original :class:`Dataset` is untouched.
+Open an Excel Workbook and read first sheet
+--------------------------------
+
+To open an Excel 2007 and later workbook with a single sheet (or a workbook with multiple sheets but you just want the first sheet), use the following:
+
+data = tablib.Dataset()
+data.xlsx = open('my_excel_file.xlsx', 'rb').read()
+print(data)
Excel Workbook With Multiple Sheets
------------------------------------
@@ -346,7 +362,7 @@ When, it's often useful to create a blank row containing information on the upco
# Write spreadsheet to disk
with open('grades.xls', 'wb') as f:
- f.write(tests.xls)
+ f.write(tests.export('xls'))
The resulting **tests.xls** will have the following layout:
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..2fab040
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,21 @@
+certifi==2017.7.27.1
+chardet==3.0.4
+et-xmlfile==1.0.1
+idna==2.6
+jdcal==1.3
+numpy==1.13.1
+odfpy==1.3.5
+openpyxl==2.4.8
+pandas==0.20.3
+pkginfo==1.4.1
+python-dateutil==2.6.1
+pytz==2017.2
+PyYAML==3.12
+requests==2.18.4
+requests-toolbelt==0.8.0
+six==1.10.0
+tqdm==4.15.0
+unicodecsv==0.14.1
+urllib3==1.22
+xlrd==1.1.0
+xlwt==1.3.0
diff --git a/setup.py b/setup.py
index 6fc94ae..7b93754 100755
--- a/setup.py
+++ b/setup.py
@@ -14,15 +14,6 @@ if sys.argv[-1] == 'publish':
os.system("python setup.py sdist upload")
sys.exit()
-if sys.argv[-1] == 'speedups':
- try:
- __import__('pip')
- except ImportError:
- print('Pip required.')
- sys.exit(1)
-
- os.system('pip install ujson')
- sys.exit()
if sys.argv[-1] == 'test':
try:
@@ -43,13 +34,14 @@ packages = [
install = [
'odfpy',
- 'openpyxl',
+ 'openpyxl>=2.4.0',
'backports.csv',
'xlrd',
'xlwt',
'pyyaml',
]
+
with open('tablib/core.py', 'r') as fd:
version = re.search(r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]',
fd.read(), re.MULTILINE).group(1)
@@ -71,13 +63,14 @@ setup(
'Natural Language :: English',
'License :: OSI Approved :: MIT License',
'Programming Language :: Python',
- 'Programming Language :: Python :: 2.6',
'Programming Language :: Python :: 2.7',
- 'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
],
tests_require=['pytest'],
install_requires=install,
+ extras_require={
+ 'pandas': ['pandas'],
+ },
)
diff --git a/tablib/compat.py b/tablib/compat.py
index d18a781..660697d 100644
--- a/tablib/compat.py
+++ b/tablib/compat.py
@@ -13,34 +13,24 @@ import sys
is_py3 = (sys.version_info[0] > 2)
-
-try:
- from collections import OrderedDict
-except ImportError:
- from tablib.packages.ordereddict import OrderedDict
-
-
if is_py3:
from io import BytesIO
+ from io import StringIO
from tablib.packages import markup3 as markup
- import tablib.packages.dbfpy3 as dbfpy
-
+ from statistics import median
+ from itertools import zip_longest as izip_longest
import csv
- from io import StringIO
- # py3 mappings
+ import tablib.packages.dbfpy3 as dbfpy
- ifilter = filter
unicode = str
- bytes = bytes
- basestring = str
xrange = range
else:
from cStringIO import StringIO as BytesIO
from StringIO import StringIO
from tablib.packages import markup
- from itertools import ifilter
-
+ from tablib.packages.statistics import median
+ from itertools import izip_longest
from backports import csv
import tablib.packages.dbfpy as dbfpy
diff --git a/tablib/core.py b/tablib/core.py
index b97da54..78c4dce 100644
--- a/tablib/core.py
+++ b/tablib/core.py
@@ -9,20 +9,21 @@
:license: MIT, see LICENSE for more details.
"""
+from collections import OrderedDict
from copy import copy
from operator import itemgetter
from tablib import formats
-from tablib.compat import OrderedDict, unicode
+from tablib.compat import unicode
__title__ = 'tablib'
-__version__ = '0.11.4'
-__build__ = 0x001104
+__version__ = '0.12.1'
+__build__ = 0x001201
__author__ = 'Kenneth Reitz'
__license__ = 'MIT'
-__copyright__ = 'Copyright 2016 Kenneth Reitz'
+__copyright__ = 'Copyright 2017 Kenneth Reitz'
__docformat__ = 'restructuredtext'
@@ -526,9 +527,9 @@ class Dataset(object):
Import assumes (for now) that headers exist.
- .. admonition:: Binary Warning
+ .. admonition:: Binary Warning for Python 2
- :class:`Dataset.csv` uses \\r\\n line endings by default, so make
+ :class:`Dataset.csv` uses \\r\\n line endings by default so, in Python 2, make
sure to write in binary mode::
with open('output.csv', 'wb') as f:
@@ -536,6 +537,18 @@ class Dataset(object):
If you do not do this, and you export the file on Windows, your
CSV file will open in Excel with a blank line between each row.
+
+ .. admonition:: Line endings for Python 3
+
+ :class:`Dataset.csv` uses \\r\\n line endings by default so, in Python 3, make
+ sure to include newline='' otherwise you will get a blank line between each row
+ when you open the file in Excel::
+
+ with open('output.csv', 'w', newline='') as f:
+ f.write(data.csv)
+
+ If you do not do this, and you export the file on Windows, your
+ CSV file will open in Excel with a blank line between each row.
"""
pass
@@ -570,6 +583,18 @@ class Dataset(object):
"""
pass
+ @property
+ def df():
+ """A DataFrame representation of the :class:`Dataset` object.
+
+ A dataset object can also be imported by setting the :class:`Dataset.df` attribute: ::
+
+ data = tablib.Dataset()
+ data.df = DataFrame(np.random.randn(6,4))
+
+ Import assumes (for now) that headers exist.
+ """
+ pass
@property
def json():
@@ -619,7 +644,6 @@ class Dataset(object):
"""
pass
-
@property
def latex():
"""A LaTeX booktabs representation of the :class:`Dataset` object. If a
@@ -629,6 +653,13 @@ class Dataset(object):
"""
pass
+ @property
+ def jira():
+ """A Jira table representation of the :class:`Dataset` object.
+
+ .. note:: This method can be used for export only.
+ """
+ pass
# ----
# Rows
diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py
index 5cca19f..418e607 100644
--- a/tablib/formats/__init__.py
+++ b/tablib/formats/__init__.py
@@ -13,5 +13,8 @@ from . import _xlsx as xlsx
from . import _ods as ods
from . import _dbf as dbf
from . import _latex as latex
+from . import _df as df
+from . import _rst as rst
+from . import _jira as jira
-available = (json, xls, yaml, csv, dbf, tsv, html, latex, xlsx, ods)
+available = (json, xls, yaml, csv, dbf, tsv, html, jira, latex, xlsx, ods, df, rst)
diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py
index b74afd7..8b536a7 100644
--- a/tablib/formats/_csv.py
+++ b/tablib/formats/_csv.py
@@ -39,7 +39,7 @@ def import_set(dset, in_stream, headers=True, **kwargs):
if (i == 0) and (headers):
dset.headers = row
- else:
+ elif row:
dset.append(row)
diff --git a/tablib/formats/_df.py b/tablib/formats/_df.py
new file mode 100644
index 0000000..44b967f
--- /dev/null
+++ b/tablib/formats/_df.py
@@ -0,0 +1,49 @@
+""" Tablib - DataFrame Support.
+"""
+
+
+import sys
+
+
+if sys.version_info[0] > 2:
+ from io import BytesIO
+else:
+ from cStringIO import StringIO as BytesIO
+
+try:
+ from pandas import DataFrame
+except ImportError:
+ DataFrame = None
+
+import tablib
+
+from tablib.compat import unicode
+
+title = 'df'
+extensions = ('df', )
+
+def detect(stream):
+ """Returns True if given stream is a DataFrame."""
+ if DataFrame is None:
+ return False
+ try:
+ DataFrame(stream)
+ return True
+ except ValueError:
+ return False
+
+
+def export_set(dset, index=None):
+ """Returns DataFrame representation of DataBook."""
+ if DataFrame is None:
+ raise NotImplementedError(
+ 'DataFrame Format requires `pandas` to be installed.'
+ ' Try `pip install tablib[pandas]`.')
+ dataframe = DataFrame(dset.dict, columns=dset.headers)
+ return dataframe
+
+
+def import_set(dset, in_stream):
+ """Returns dataset from DataFrame."""
+ dset.wipe()
+ dset.dict = in_stream.to_dict(orient='records')
diff --git a/tablib/formats/_jira.py b/tablib/formats/_jira.py
new file mode 100644
index 0000000..55fce52
--- /dev/null
+++ b/tablib/formats/_jira.py
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+
+"""Tablib - Jira table export support.
+
+ Generates a Jira table from the dataset.
+"""
+from tablib.compat import unicode
+
+title = 'jira'
+
+
+def export_set(dataset):
+ """Formats the dataset according to the Jira table syntax:
+
+ ||heading 1||heading 2||heading 3||
+ |col A1|col A2|col A3|
+ |col B1|col B2|col B3|
+
+ :param dataset: dataset to serialize
+ :type dataset: tablib.core.Dataset
+ """
+
+ header = _get_header(dataset.headers) if dataset.headers else ''
+ body = _get_body(dataset)
+ return '%s\n%s' % (header, body) if header else body
+
+
+def _get_body(dataset):
+ return '\n'.join([_serialize_row(row) for row in dataset])
+
+
+def _get_header(headers):
+ return _serialize_row(headers, delimiter='||')
+
+
+def _serialize_row(row, delimiter='|'):
+ return '%s%s%s' % (delimiter,
+ delimiter.join([unicode(item) if item else ' ' for item in row]),
+ delimiter)
diff --git a/tablib/formats/_json.py b/tablib/formats/_json.py
index a3d6cc3..bbd2c96 100644
--- a/tablib/formats/_json.py
+++ b/tablib/formats/_json.py
@@ -3,36 +3,33 @@
""" Tablib - JSON Support
"""
import decimal
+import json
+from uuid import UUID
import tablib
-try:
- import ujson as json
-except ImportError:
- import json
title = 'json'
extensions = ('json', 'jsn')
-def date_handler(obj):
- if isinstance(obj, decimal.Decimal):
+def serialize_objects_handler(obj):
+ if isinstance(obj, decimal.Decimal) or isinstance(obj, UUID):
return str(obj)
elif hasattr(obj, 'isoformat'):
return obj.isoformat()
else:
return obj
- # return obj.isoformat() if hasattr(obj, 'isoformat') else obj
def export_set(dataset):
"""Returns JSON representation of Dataset."""
- return json.dumps(dataset.dict, default=date_handler)
+ return json.dumps(dataset.dict, default=serialize_objects_handler)
def export_book(databook):
"""Returns JSON representation of Databook."""
- return json.dumps(databook._package(), default=date_handler)
+ return json.dumps(databook._package(), default=serialize_objects_handler)
def import_set(dset, in_stream):
diff --git a/tablib/formats/_rst.py b/tablib/formats/_rst.py
new file mode 100644
index 0000000..4b53ad7
--- /dev/null
+++ b/tablib/formats/_rst.py
@@ -0,0 +1,273 @@
+# -*- coding: utf-8 -*-
+
+""" Tablib - reStructuredText Support
+"""
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from textwrap import TextWrapper
+
+from tablib.compat import (
+ median,
+ unicode,
+ izip_longest,
+)
+
+
+title = 'rst'
+extensions = ('rst',)
+
+
+MAX_TABLE_WIDTH = 80 # Roughly. It may be wider to avoid breaking words.
+
+
+JUSTIFY_LEFT = 'left'
+JUSTIFY_CENTER = 'center'
+JUSTIFY_RIGHT = 'right'
+JUSTIFY_VALUES = (JUSTIFY_LEFT, JUSTIFY_CENTER, JUSTIFY_RIGHT)
+
+
+def to_unicode(value):
+ if isinstance(value, bytes):
+ return value.decode('utf-8')
+ return unicode(value)
+
+
+def _max_word_len(text):
+ """
+ Return the length of the longest word in `text`.
+
+
+ >>> _max_word_len('Python Module for Tabular Datasets')
+ 8
+
+ """
+ return max((len(word) for word in text.split()))
+
+
+def _get_column_string_lengths(dataset):
+ """
+ Returns a list of string lengths of each column, and a list of
+ maximum word lengths.
+ """
+ if dataset.headers:
+ column_lengths = [[len(h)] for h in dataset.headers]
+ word_lens = [_max_word_len(h) for h in dataset.headers]
+ else:
+ column_lengths = [[] for _ in range(dataset.width)]
+ word_lens = [0 for _ in range(dataset.width)]
+ for row in dataset.dict:
+ values = iter(row.values() if hasattr(row, 'values') else row)
+ for i, val in enumerate(values):
+ text = to_unicode(val)
+ column_lengths[i].append(len(text))
+ word_lens[i] = max(word_lens[i], _max_word_len(text))
+ return column_lengths, word_lens
+
+
+def _row_to_lines(values, widths, wrapper, sep='|', justify=JUSTIFY_LEFT):
+ """
+ Returns a table row of wrapped values as a list of lines
+ """
+ if justify not in JUSTIFY_VALUES:
+ raise ValueError('Value of "justify" must be one of "{}"'.format(
+ '", "'.join(JUSTIFY_VALUES)
+ ))
+ if justify == JUSTIFY_LEFT:
+ just = lambda text, width: text.ljust(width)
+ elif justify == JUSTIFY_CENTER:
+ just = lambda text, width: text.center(width)
+ else:
+ just = lambda text, width: text.rjust(width)
+ lpad = sep + ' ' if sep else ''
+ rpad = ' ' + sep if sep else ''
+ pad = ' ' + sep + ' '
+ cells = []
+ for value, width in zip(values, widths):
+ wrapper.width = width
+ text = to_unicode(value)
+ cell = wrapper.wrap(text)
+ cells.append(cell)
+ lines = izip_longest(*cells, fillvalue='')
+ lines = (
+ (just(cell_line, widths[i]) for i, cell_line in enumerate(line))
+ for line in lines
+ )
+ lines = [''.join((lpad, pad.join(line), rpad)) for line in lines]
+ return lines
+
+
+def _get_column_widths(dataset, max_table_width=MAX_TABLE_WIDTH, pad_len=3):
+ """
+ Returns a list of column widths proportional to the median length
+ of the text in their cells.
+ """
+ str_lens, word_lens = _get_column_string_lengths(dataset)
+ median_lens = [int(median(lens)) for lens in str_lens]
+ total = sum(median_lens)
+ if total > max_table_width - (pad_len * len(median_lens)):
+ column_widths = (max_table_width * l // total for l in median_lens)
+ else:
+ column_widths = (l for l in median_lens)
+ # Allow for separator and padding:
+ column_widths = (w - pad_len if w > pad_len else w for w in column_widths)
+ # Rather widen table than break words:
+ column_widths = [max(w, l) for w, l in zip(column_widths, word_lens)]
+ return column_widths
+
+
+def export_set_as_simple_table(dataset, column_widths=None):
+ """
+ Returns reStructuredText grid table representation of dataset.
+ """
+ lines = []
+ wrapper = TextWrapper()
+ if column_widths is None:
+ column_widths = _get_column_widths(dataset, pad_len=2)
+ border = ' '.join(['=' * w for w in column_widths])
+
+ lines.append(border)
+ if dataset.headers:
+ lines.extend(_row_to_lines(
+ dataset.headers,
+ column_widths,
+ wrapper,
+ sep='',
+ justify=JUSTIFY_CENTER,
+ ))
+ lines.append(border)
+ for row in dataset.dict:
+ values = iter(row.values() if hasattr(row, 'values') else row)
+ lines.extend(_row_to_lines(values, column_widths, wrapper, ''))
+ lines.append(border)
+ return '\n'.join(lines)
+
+
+def export_set_as_grid_table(dataset, column_widths=None):
+ """
+ Returns reStructuredText grid table representation of dataset.
+
+
+ >>> from tablib import Dataset
+ >>> from tablib.formats import rst
+ >>> bits = ((0, 0), (1, 0), (0, 1), (1, 1))
+ >>> data = Dataset()
+ >>> data.headers = ['A', 'B', 'A and B']
+ >>> for a, b in bits:
+ ... data.append([bool(a), bool(b), bool(a * b)])
+ >>> print(rst.export_set(data, force_grid=True))
+ +-------+-------+-------+
+ | A | B | A and |
+ | | | B |
+ +=======+=======+=======+
+ | False | False | False |
+ +-------+-------+-------+
+ | True | False | False |
+ +-------+-------+-------+
+ | False | True | False |
+ +-------+-------+-------+
+ | True | True | True |
+ +-------+-------+-------+
+
+ """
+ lines = []
+ wrapper = TextWrapper()
+ if column_widths is None:
+ column_widths = _get_column_widths(dataset)
+ header_sep = '+=' + '=+='.join(['=' * w for w in column_widths]) + '=+'
+ row_sep = '+-' + '-+-'.join(['-' * w for w in column_widths]) + '-+'
+
+ lines.append(row_sep)
+ if dataset.headers:
+ lines.extend(_row_to_lines(
+ dataset.headers,
+ column_widths,
+ wrapper,
+ justify=JUSTIFY_CENTER,
+ ))
+ lines.append(header_sep)
+ for row in dataset.dict:
+ values = iter(row.values() if hasattr(row, 'values') else row)
+ lines.extend(_row_to_lines(values, column_widths, wrapper))
+ lines.append(row_sep)
+ return '\n'.join(lines)
+
+
+def _use_simple_table(head0, col0, width0):
+ """
+ Use a simple table if the text in the first column is never wrapped
+
+
+ >>> _use_simple_table('menu', ['egg', 'bacon'], 10)
+ True
+ >>> _use_simple_table(None, ['lobster thermidor', 'spam'], 10)
+ False
+
+ """
+ if head0 is not None:
+ head0 = to_unicode(head0)
+ if len(head0) > width0:
+ return False
+ for cell in col0:
+ cell = to_unicode(cell)
+ if len(cell) > width0:
+ return False
+ return True
+
+
+def export_set(dataset, **kwargs):
+ """
+ Returns reStructuredText table representation of dataset.
+
+ Returns a simple table if the text in the first column is never
+ wrapped, otherwise returns a grid table.
+
+
+ >>> from tablib import Dataset
+ >>> bits = ((0, 0), (1, 0), (0, 1), (1, 1))
+ >>> data = Dataset()
+ >>> data.headers = ['A', 'B', 'A and B']
+ >>> for a, b in bits:
+ ... data.append([bool(a), bool(b), bool(a * b)])
+ >>> table = data.rst
+ >>> table.split('\\n') == [
+ ... '===== ===== =====',
+ ... ' A B A and',
+ ... ' B ',
+ ... '===== ===== =====',
+ ... 'False False False',
+ ... 'True False False',
+ ... 'False True False',
+ ... 'True True True ',
+ ... '===== ===== =====',
+ ... ]
+ True
+
+ """
+ if not dataset.dict:
+ return ''
+ force_grid = kwargs.get('force_grid', False)
+ max_table_width = kwargs.get('max_table_width', MAX_TABLE_WIDTH)
+ column_widths = _get_column_widths(dataset, max_table_width)
+
+ use_simple_table = _use_simple_table(
+ dataset.headers[0] if dataset.headers else None,
+ dataset.get_col(0),
+ column_widths[0],
+ )
+ if use_simple_table and not force_grid:
+ return export_set_as_simple_table(dataset, column_widths)
+ else:
+ return export_set_as_grid_table(dataset, column_widths)
+
+
+def export_book(databook):
+ """
+ reStructuredText representation of a Databook.
+
+ Tables are separated by a blank line. All tables use the grid
+ format.
+ """
+ return '\n\n'.join(export_set(dataset, force_grid=True)
+ for dataset in databook._datasets)
diff --git a/tablib/formats/_xlsx.py b/tablib/formats/_xlsx.py
index 20f55df..816fd37 100644
--- a/tablib/formats/_xlsx.py
+++ b/tablib/formats/_xlsx.py
@@ -52,7 +52,7 @@ def export_book(databook, freeze_panes=True):
wb = Workbook()
for sheet in wb.worksheets:
- wb.remove_sheet(sheet)
+ wb.remove(sheet)
for i, dset in enumerate(databook._datasets):
ws = wb.create_sheet()
ws.title = dset.title if dset.title else 'Sheet%s' % (i)
@@ -71,7 +71,7 @@ def import_set(dset, in_stream, headers=True):
dset.wipe()
xls_book = openpyxl.reader.excel.load_workbook(BytesIO(in_stream))
- sheet = xls_book.get_active_sheet()
+ sheet = xls_book.active
dset.title = sheet.title
@@ -119,7 +119,7 @@ def dset_sheet(dataset, ws, freeze_panes=True):
row_number = i + 1
for j, col in enumerate(row):
col_idx = get_column_letter(j + 1)
- cell = ws.cell('%s%s' % (col_idx, row_number))
+ cell = ws['%s%s' % (col_idx, row_number)]
# bold headers
if (row_number == 1) and dataset.headers:
diff --git a/tablib/formats/_yaml.py b/tablib/formats/_yaml.py
index 5aecb42..3d17baf 100644
--- a/tablib/formats/_yaml.py
+++ b/tablib/formats/_yaml.py
@@ -33,7 +33,7 @@ def import_book(dbook, in_stream):
dbook.wipe()
- for sheet in yaml.load(in_stream):
+ for sheet in yaml.safe_load(in_stream):
data = tablib.Dataset()
data.title = sheet['title']
data.dict = sheet['data']
diff --git a/tablib/packages/ordereddict.py b/tablib/packages/ordereddict.py
deleted file mode 100644
index a5b896d..0000000
--- a/tablib/packages/ordereddict.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# Copyright (c) 2009 Raymond Hettinger
-#
-# Permission is hereby granted, free of charge, to any person
-# obtaining a copy of this software and associated documentation files
-# (the "Software"), to deal in the Software without restriction,
-# including without limitation the rights to use, copy, modify, merge,
-# publish, distribute, sublicense, and/or sell copies of the Software,
-# and to permit persons to whom the Software is furnished to do so,
-# subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be
-# included in all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-# OTHER DEALINGS IN THE SOFTWARE.
-
-from UserDict import DictMixin
-
-class OrderedDict(dict, DictMixin):
-
- def __init__(self, *args, **kwds):
- if len(args) > 1:
- raise TypeError('expected at most 1 arguments, got %d' % len(args))
- try:
- self.__end
- except AttributeError:
- self.clear()
- self.update(*args, **kwds)
-
- def clear(self):
- self.__end = end = []
- end += [None, end, end] # sentinel node for doubly linked list
- self.__map = {} # key --> [key, prev, next]
- dict.clear(self)
-
- def __setitem__(self, key, value):
- if key not in self:
- end = self.__end
- curr = end[1]
- curr[2] = end[1] = self.__map[key] = [key, curr, end]
- dict.__setitem__(self, key, value)
-
- def __delitem__(self, key):
- dict.__delitem__(self, key)
- key, prev, next = self.__map.pop(key)
- prev[2] = next
- next[1] = prev
-
- def __iter__(self):
- end = self.__end
- curr = end[2]
- while curr is not end:
- yield curr[0]
- curr = curr[2]
-
- def __reversed__(self):
- end = self.__end
- curr = end[1]
- while curr is not end:
- yield curr[0]
- curr = curr[1]
-
- def popitem(self, last=True):
- if not self:
- raise KeyError('dictionary is empty')
- if last:
- key = next(reversed(self))
- else:
- key = next(iter(self))
- value = self.pop(key)
- return key, value
-
- def __reduce__(self):
- items = [[k, self[k]] for k in self]
- tmp = self.__map, self.__end
- del self.__map, self.__end
- inst_dict = vars(self).copy()
- self.__map, self.__end = tmp
- if inst_dict:
- return (self.__class__, (items,), inst_dict)
- return self.__class__, (items,)
-
- def keys(self):
- return list(self)
-
- setdefault = DictMixin.setdefault
- update = DictMixin.update
- pop = DictMixin.pop
- values = DictMixin.values
- items = DictMixin.items
- iterkeys = DictMixin.iterkeys
- itervalues = DictMixin.itervalues
- iteritems = DictMixin.iteritems
-
- def __repr__(self):
- if not self:
- return '%s()' % (self.__class__.__name__,)
- return '%s(%r)' % (self.__class__.__name__, list(self.items()))
-
- def copy(self):
- return self.__class__(self)
-
- @classmethod
- def fromkeys(cls, iterable, value=None):
- d = cls()
- for key in iterable:
- d[key] = value
- return d
-
- def __eq__(self, other):
- if isinstance(other, OrderedDict):
- if len(self) != len(other):
- return False
- for p, q in zip(list(self.items()), list(other.items())):
- if p != q:
- return False
- return True
- return dict.__eq__(self, other)
-
- def __ne__(self, other):
- return not self == other
diff --git a/tablib/packages/statistics.py b/tablib/packages/statistics.py
new file mode 100644
index 0000000..e97a6c9
--- /dev/null
+++ b/tablib/packages/statistics.py
@@ -0,0 +1,24 @@
+from __future__ import division
+
+
+def median(data):
+ """
+ Return the median (middle value) of numeric data, using the common
+ "mean of middle two" method. If data is empty, ValueError is raised.
+
+ Mimics the behaviour of Python3's statistics.median
+
+ >>> median([1, 3, 5])
+ 3
+ >>> median([1, 3, 5, 7])
+ 4.0
+
+ """
+ data = sorted(data)
+ n = len(data)
+ if not n:
+ raise ValueError("No median for empty data")
+ i = n // 2
+ if n % 2:
+ return data[i]
+ return (data[i - 1] + data[i]) / 2
diff --git a/test_tablib.py b/test_tablib.py
index 96cd884..e7b7233 100755
--- a/test_tablib.py
+++ b/test_tablib.py
@@ -4,9 +4,11 @@
from __future__ import unicode_literals
import datetime
+import doctest
import json
import sys
import unittest
+from uuid import uuid4
import tablib
from tablib.compat import markup, unicode, is_py3
@@ -227,6 +229,22 @@ class TablibTestCase(unittest.TestCase):
# Delete from invalid index
self.assertRaises(IndexError, self.founders.__delitem__, 3)
+ def test_json_export(self):
+ """Verify exporting dataset object as JSON"""
+
+ address_id = uuid4()
+ headers = self.headers + ('address_id',)
+ founders = tablib.Dataset(headers=headers, title='Founders')
+ founders.append(('John', 'Adams', 90, address_id))
+ founders_json = founders.export('json')
+
+ expected_json = (
+ '[{"first_name": "John", "last_name": "Adams", "gpa": 90, '
+ '"address_id": "%s"}]' % str(address_id)
+ )
+
+ self.assertEqual(founders_json, expected_json)
+
def test_csv_export(self):
"""Verify exporting dataset object as CSV."""
@@ -299,6 +317,23 @@ class TablibTestCase(unittest.TestCase):
self.assertEqual(html, d.html)
+ def test_jira_export(self):
+
+ expected = """||first_name||last_name||gpa||
+|John|Adams|90|
+|George|Washington|67|
+|Thomas|Jefferson|50|"""
+ self.assertEqual(expected, self.founders.jira)
+
+ def test_jira_export_no_headers(self):
+ self.assertEqual('|a|b|c|', tablib.Dataset(['a', 'b', 'c']).jira)
+
+ def test_jira_export_none_and_empty_values(self):
+ self.assertEqual('| | |c|', tablib.Dataset(['', None, 'c']).jira)
+
+ def test_jira_export_empty_dataset(self):
+ self.assertTrue(tablib.Dataset().jira is not None)
+
def test_latex_export(self):
"""LaTeX export"""
@@ -382,7 +417,10 @@ class TablibTestCase(unittest.TestCase):
data.xlsx
data.ods
data.html
+ data.jira
data.latex
+ data.df
+ data.rst
def test_datetime_append(self):
"""Passes in a single datetime and a single date and exports."""
@@ -402,7 +440,9 @@ class TablibTestCase(unittest.TestCase):
data.xlsx
data.ods
data.html
+ data.jira
data.latex
+ data.rst
def test_book_export_no_exceptions(self):
"""Test that various exports don't error out."""
@@ -416,6 +456,7 @@ class TablibTestCase(unittest.TestCase):
book.xlsx
book.ods
book.html
+ data.rst
def test_json_import_set(self):
"""Generate and import JSON set serialization."""
@@ -970,6 +1011,24 @@ class TablibTestCase(unittest.TestCase):
self.founders.append(('First\nSecond', 'Name', 42))
self.founders.export('xlsx')
+ def test_rst_force_grid(self):
+ data.append(self.john)
+ data.append(self.george)
+ data.headers = self.headers
+
+ simple = tablib.formats._rst.export_set(data)
+ grid = tablib.formats._rst.export_set(data, force_grid=True)
+ self.assertNotEqual(simple, grid)
+ self.assertNotIn('+', simple)
+ self.assertIn('+', grid)
+
+
+class DocTests(unittest.TestCase):
+
+ def test_rst_formatter_doctests(self):
+ results = doctest.testmod(tablib.formats._rst)
+ self.assertEqual(results.failed, 0)
+
if __name__ == '__main__':
unittest.main()
diff --git a/tox.ini b/tox.ini
index 3e1d6a2..4dabf0c 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,11 +1,8 @@
-# Tox (http://tox.testrun.org/) is a tool for running tests
-# in multiple virtualenvs. This configuration file will run the
-# test suite on all supported python versions. To use it, "pip install tox"
-# and then run "tox" from this directory.
-
[tox]
-envlist = py26, py27, py33, py34, py35, py36, pypy
+minversion = 2.4
+envlist = py27, py34, py35, py36, pypy
[testenv]
-commands = python setup.py test
deps = pytest
+extras = pandas
+commands = python setup.py test