diff options
| author | Norman Hooper <nhooper@dimagi.com> | 2018-09-12 20:27:10 +0200 |
|---|---|---|
| committer | Iuri de Silvio <iurisilvio@gmail.com> | 2018-09-12 15:27:10 -0300 |
| commit | 38486231cc4dc46d1ff055a38fe7c236cbe5c8aa (patch) | |
| tree | cfa105d916117d8b43d87cbd331be46c947b3805 /tablib | |
| parent | 75f1bafd69add49d969b0f8e579baf3659f16100 (diff) | |
| download | tablib-38486231cc4dc46d1ff055a38fe7c236cbe5c8aa.tar.gz | |
reStructuredText (#336)
* median for Python 2
* More compat
* Support reStructuredText
* Tests
Diffstat (limited to 'tablib')
| -rw-r--r-- | tablib/compat.py | 5 | ||||
| -rw-r--r-- | tablib/formats/__init__.py | 3 | ||||
| -rw-r--r-- | tablib/formats/_rst.py | 273 | ||||
| -rw-r--r-- | tablib/packages/statistics.py | 24 |
4 files changed, 303 insertions, 2 deletions
diff --git a/tablib/compat.py b/tablib/compat.py index 43e0bbc..d60ce72 100644 --- a/tablib/compat.py +++ b/tablib/compat.py @@ -22,6 +22,8 @@ except ImportError: if is_py3: from io import BytesIO + from itertools import zip_longest as izip_longest + from statistics import median from tablib.packages import markup3 as markup import tablib.packages.dbfpy3 as dbfpy @@ -39,7 +41,8 @@ else: from cStringIO import StringIO as BytesIO from cStringIO import StringIO from tablib.packages import markup - from itertools import ifilter + from tablib.packages.statistics import median + from itertools import ifilter, izip_longest import unicodecsv as csv import tablib.packages.dbfpy as dbfpy diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py index 94b5bc9..3eb747e 100644 --- a/tablib/formats/__init__.py +++ b/tablib/formats/__init__.py @@ -14,5 +14,6 @@ from . import _ods as ods from . import _dbf as dbf from . import _latex as latex from . import _df as df +from . import _rst as rst -available = (json, xls, yaml, csv, dbf, tsv, html, latex, xlsx, ods, df) +available = (json, xls, yaml, csv, dbf, tsv, html, latex, xlsx, ods, df, rst) diff --git a/tablib/formats/_rst.py b/tablib/formats/_rst.py new file mode 100644 index 0000000..4b53ad7 --- /dev/null +++ b/tablib/formats/_rst.py @@ -0,0 +1,273 @@ +# -*- coding: utf-8 -*- + +""" Tablib - reStructuredText Support +""" +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +from textwrap import TextWrapper + +from tablib.compat import ( + median, + unicode, + izip_longest, +) + + +title = 'rst' +extensions = ('rst',) + + +MAX_TABLE_WIDTH = 80 # Roughly. It may be wider to avoid breaking words. + + +JUSTIFY_LEFT = 'left' +JUSTIFY_CENTER = 'center' +JUSTIFY_RIGHT = 'right' +JUSTIFY_VALUES = (JUSTIFY_LEFT, JUSTIFY_CENTER, JUSTIFY_RIGHT) + + +def to_unicode(value): + if isinstance(value, bytes): + return value.decode('utf-8') + return unicode(value) + + +def _max_word_len(text): + """ + Return the length of the longest word in `text`. + + + >>> _max_word_len('Python Module for Tabular Datasets') + 8 + + """ + return max((len(word) for word in text.split())) + + +def _get_column_string_lengths(dataset): + """ + Returns a list of string lengths of each column, and a list of + maximum word lengths. + """ + if dataset.headers: + column_lengths = [[len(h)] for h in dataset.headers] + word_lens = [_max_word_len(h) for h in dataset.headers] + else: + column_lengths = [[] for _ in range(dataset.width)] + word_lens = [0 for _ in range(dataset.width)] + for row in dataset.dict: + values = iter(row.values() if hasattr(row, 'values') else row) + for i, val in enumerate(values): + text = to_unicode(val) + column_lengths[i].append(len(text)) + word_lens[i] = max(word_lens[i], _max_word_len(text)) + return column_lengths, word_lens + + +def _row_to_lines(values, widths, wrapper, sep='|', justify=JUSTIFY_LEFT): + """ + Returns a table row of wrapped values as a list of lines + """ + if justify not in JUSTIFY_VALUES: + raise ValueError('Value of "justify" must be one of "{}"'.format( + '", "'.join(JUSTIFY_VALUES) + )) + if justify == JUSTIFY_LEFT: + just = lambda text, width: text.ljust(width) + elif justify == JUSTIFY_CENTER: + just = lambda text, width: text.center(width) + else: + just = lambda text, width: text.rjust(width) + lpad = sep + ' ' if sep else '' + rpad = ' ' + sep if sep else '' + pad = ' ' + sep + ' ' + cells = [] + for value, width in zip(values, widths): + wrapper.width = width + text = to_unicode(value) + cell = wrapper.wrap(text) + cells.append(cell) + lines = izip_longest(*cells, fillvalue='') + lines = ( + (just(cell_line, widths[i]) for i, cell_line in enumerate(line)) + for line in lines + ) + lines = [''.join((lpad, pad.join(line), rpad)) for line in lines] + return lines + + +def _get_column_widths(dataset, max_table_width=MAX_TABLE_WIDTH, pad_len=3): + """ + Returns a list of column widths proportional to the median length + of the text in their cells. + """ + str_lens, word_lens = _get_column_string_lengths(dataset) + median_lens = [int(median(lens)) for lens in str_lens] + total = sum(median_lens) + if total > max_table_width - (pad_len * len(median_lens)): + column_widths = (max_table_width * l // total for l in median_lens) + else: + column_widths = (l for l in median_lens) + # Allow for separator and padding: + column_widths = (w - pad_len if w > pad_len else w for w in column_widths) + # Rather widen table than break words: + column_widths = [max(w, l) for w, l in zip(column_widths, word_lens)] + return column_widths + + +def export_set_as_simple_table(dataset, column_widths=None): + """ + Returns reStructuredText grid table representation of dataset. + """ + lines = [] + wrapper = TextWrapper() + if column_widths is None: + column_widths = _get_column_widths(dataset, pad_len=2) + border = ' '.join(['=' * w for w in column_widths]) + + lines.append(border) + if dataset.headers: + lines.extend(_row_to_lines( + dataset.headers, + column_widths, + wrapper, + sep='', + justify=JUSTIFY_CENTER, + )) + lines.append(border) + for row in dataset.dict: + values = iter(row.values() if hasattr(row, 'values') else row) + lines.extend(_row_to_lines(values, column_widths, wrapper, '')) + lines.append(border) + return '\n'.join(lines) + + +def export_set_as_grid_table(dataset, column_widths=None): + """ + Returns reStructuredText grid table representation of dataset. + + + >>> from tablib import Dataset + >>> from tablib.formats import rst + >>> bits = ((0, 0), (1, 0), (0, 1), (1, 1)) + >>> data = Dataset() + >>> data.headers = ['A', 'B', 'A and B'] + >>> for a, b in bits: + ... data.append([bool(a), bool(b), bool(a * b)]) + >>> print(rst.export_set(data, force_grid=True)) + +-------+-------+-------+ + | A | B | A and | + | | | B | + +=======+=======+=======+ + | False | False | False | + +-------+-------+-------+ + | True | False | False | + +-------+-------+-------+ + | False | True | False | + +-------+-------+-------+ + | True | True | True | + +-------+-------+-------+ + + """ + lines = [] + wrapper = TextWrapper() + if column_widths is None: + column_widths = _get_column_widths(dataset) + header_sep = '+=' + '=+='.join(['=' * w for w in column_widths]) + '=+' + row_sep = '+-' + '-+-'.join(['-' * w for w in column_widths]) + '-+' + + lines.append(row_sep) + if dataset.headers: + lines.extend(_row_to_lines( + dataset.headers, + column_widths, + wrapper, + justify=JUSTIFY_CENTER, + )) + lines.append(header_sep) + for row in dataset.dict: + values = iter(row.values() if hasattr(row, 'values') else row) + lines.extend(_row_to_lines(values, column_widths, wrapper)) + lines.append(row_sep) + return '\n'.join(lines) + + +def _use_simple_table(head0, col0, width0): + """ + Use a simple table if the text in the first column is never wrapped + + + >>> _use_simple_table('menu', ['egg', 'bacon'], 10) + True + >>> _use_simple_table(None, ['lobster thermidor', 'spam'], 10) + False + + """ + if head0 is not None: + head0 = to_unicode(head0) + if len(head0) > width0: + return False + for cell in col0: + cell = to_unicode(cell) + if len(cell) > width0: + return False + return True + + +def export_set(dataset, **kwargs): + """ + Returns reStructuredText table representation of dataset. + + Returns a simple table if the text in the first column is never + wrapped, otherwise returns a grid table. + + + >>> from tablib import Dataset + >>> bits = ((0, 0), (1, 0), (0, 1), (1, 1)) + >>> data = Dataset() + >>> data.headers = ['A', 'B', 'A and B'] + >>> for a, b in bits: + ... data.append([bool(a), bool(b), bool(a * b)]) + >>> table = data.rst + >>> table.split('\\n') == [ + ... '===== ===== =====', + ... ' A B A and', + ... ' B ', + ... '===== ===== =====', + ... 'False False False', + ... 'True False False', + ... 'False True False', + ... 'True True True ', + ... '===== ===== =====', + ... ] + True + + """ + if not dataset.dict: + return '' + force_grid = kwargs.get('force_grid', False) + max_table_width = kwargs.get('max_table_width', MAX_TABLE_WIDTH) + column_widths = _get_column_widths(dataset, max_table_width) + + use_simple_table = _use_simple_table( + dataset.headers[0] if dataset.headers else None, + dataset.get_col(0), + column_widths[0], + ) + if use_simple_table and not force_grid: + return export_set_as_simple_table(dataset, column_widths) + else: + return export_set_as_grid_table(dataset, column_widths) + + +def export_book(databook): + """ + reStructuredText representation of a Databook. + + Tables are separated by a blank line. All tables use the grid + format. + """ + return '\n\n'.join(export_set(dataset, force_grid=True) + for dataset in databook._datasets) diff --git a/tablib/packages/statistics.py b/tablib/packages/statistics.py new file mode 100644 index 0000000..e97a6c9 --- /dev/null +++ b/tablib/packages/statistics.py @@ -0,0 +1,24 @@ +from __future__ import division + + +def median(data): + """ + Return the median (middle value) of numeric data, using the common + "mean of middle two" method. If data is empty, ValueError is raised. + + Mimics the behaviour of Python3's statistics.median + + >>> median([1, 3, 5]) + 3 + >>> median([1, 3, 5, 7]) + 4.0 + + """ + data = sorted(data) + n = len(data) + if not n: + raise ValueError("No median for empty data") + i = n // 2 + if n % 2: + return data[i] + return (data[i - 1] + data[i]) / 2 |
