tablib/formats/_latex.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134

# -*- coding: utf-8 -*-

"""Tablib - LaTeX table export support.

   Generates a LaTeX booktabs-style table from the dataset.
"""
import re

from tablib.compat import unicode

title = 'latex'
extensions = ('tex',)

TABLE_TEMPLATE = """\
%% Note: add \\usepackage{booktabs} to your preamble
%%
\\begin{table}[!htbp]
  \\centering
  %(CAPTION)s
  \\begin{tabular}{%(COLSPEC)s}
    \\toprule
%(HEADER)s
    %(MIDRULE)s
%(BODY)s
    \\bottomrule
  \\end{tabular}
\\end{table}
"""

TEX_RESERVED_SYMBOLS_MAP = dict([
    ('\\', '\\textbackslash{}'),
    ('{', '\\{'),
    ('}', '\\}'),
    ('$', '\\$'),
    ('&', '\\&'),
    ('#', '\\#'),
    ('^', '\\textasciicircum{}'),
    ('_', '\\_'),
    ('~', '\\textasciitilde{}'),
    ('%', '\\%'),
])

TEX_RESERVED_SYMBOLS_RE = re.compile(
    '(%s)' % '|'.join(map(re.escape, TEX_RESERVED_SYMBOLS_MAP.keys())))


def export_set(dataset):
    """Returns LaTeX representation of dataset

    :param dataset: dataset to serialize
    :type dataset: tablib.core.Dataset
    """

    caption = '\\caption{%s}' % dataset.title if dataset.title else '%'
    colspec = _colspec(dataset.width)
    header = _serialize_row(dataset.headers) if dataset.headers else ''
    midrule = _midrule(dataset.width)
    body = '\n'.join([_serialize_row(row) for row in dataset])
    return TABLE_TEMPLATE % dict(CAPTION=caption, COLSPEC=colspec,
                                 HEADER=header, MIDRULE=midrule, BODY=body)


def _colspec(dataset_width):
    """Generates the column specification for the LaTeX `tabular` environment
    based on the dataset width.

    The first column is justified to the left, all further columns are aligned
    to the right.

    .. note:: This is only a heuristic and most probably has to be fine-tuned
    post export. Column alignment should depend on the data type, e.g., textual
    content should usually be aligned to the left while numeric content almost
    always should be aligned to the right.

    :param dataset_width: width of the dataset
    """

    spec = 'l'
    for _ in range(1, dataset_width):
        spec += 'r'
    return spec


def _midrule(dataset_width):
    """Generates the table `midrule`, which may be composed of several
    `cmidrules`.

    :param dataset_width: width of the dataset to serialize
    """

    if not dataset_width or dataset_width == 1:
        return '\\midrule'
    return ' '.join([_cmidrule(colindex, dataset_width) for colindex in
                     range(1, dataset_width + 1)])


def _cmidrule(colindex, dataset_width):
    """Generates the `cmidrule` for a single column with appropriate trimming
    based on the column position.

    :param colindex: Column index
    :param dataset_width: width of the dataset
    """

    rule = '\\cmidrule(%s){%d-%d}'
    if colindex == 1:
        # Rule of first column is trimmed on the right
        return rule % ('r', colindex, colindex)
    if colindex == dataset_width:
        # Rule of last column is trimmed on the left
        return rule % ('l', colindex, colindex)
    # Inner columns are trimmed on the left and right
    return rule % ('lr', colindex, colindex)


def _serialize_row(row):
    """Returns string representation of a single row.

    :param row: single dataset row
    """

    new_row = [_escape_tex_reserved_symbols(unicode(item)) if item else '' for
               item in row]
    return 6 * ' ' + ' & '.join(new_row) + ' \\\\'


def _escape_tex_reserved_symbols(input):
    """Escapes all TeX reserved symbols ('_', '~', etc.) in a string.

    :param input: String to escape
    """
    def replace(match):
        return TEX_RESERVED_SYMBOLS_MAP[match.group()]
    return TEX_RESERVED_SYMBOLS_RE.sub(replace, input)