Add project release config and cleanup project setup. (#398)

* Add project release config and use Travis build stages. Refs #378. * Restructure project to use src/ and tests/ directories. * Fix testing. * Remove eggs. * More fixes. - isort and flake8 config - manifest template update - tox ini extension - docs build fixes - docs content fixes * Docs and license cleanup.
author: Jannis Leidel <jannis@leidel.info> 2019-10-18 15:57:13 +0200
committer: GitHub <noreply@github.com> 2019-10-18 15:57:13 +0200
commit: f6bf14afd22d8e5b706670590cc95f29d4483434 (patch)
tree: a65fbc8b6fd1112222beb6f66c31082ffe231460 /src/tablib/core.py
parent: f3d02aa3b088e2f9044fe5e4869e8c8cb91d2cdc (diff)
download: tablib-f6bf14afd22d8e5b706670590cc95f29d4483434.tar.gz
1 files changed, 1160 insertions, 0 deletions
diff --git a/src/tablib/core.py b/src/tablib/core.py
new file mode 100644
index 0000000..65dd901
--- /dev/null
+++ b/src/tablib/core.py
@@ -0,0 +1,1160 @@
+# -*- coding: utf-8 -*-
+"""
+    tablib.core
+    ~~~~~~~~~~~
+
+    This module implements the central Tablib objects.
+
+    :copyright: (c) 2016 by Kenneth Reitz. 2019 Jazzband.
+    :license: MIT, see LICENSE for more details.
+"""
+
+from collections import OrderedDict
+from copy import copy
+from operator import itemgetter
+
+from tablib import formats
+
+from tablib.compat import unicode
+
+
+__title__ = 'tablib'
+__author__ = 'Kenneth Reitz'
+__license__ = 'MIT'
+__copyright__ = 'Copyright 2017 Kenneth Reitz. 2019 Jazzband.'
+__docformat__ = 'restructuredtext'
+
+
+class Row(object):
+    """Internal Row object. Mainly used for filtering."""
+
+    __slots__ = ['_row', 'tags']
+
+    def __init__(self, row=list(), tags=list()):
+        self._row = list(row)
+        self.tags = list(tags)
+
+    def __iter__(self):
+        return (col for col in self._row)
+
+    def __len__(self):
+        return len(self._row)
+
+    def __repr__(self):
+        return repr(self._row)
+
+    def __getslice__(self, i, j):
+        return self._row[i:j]
+
+    def __getitem__(self, i):
+        return self._row[i]
+
+    def __setitem__(self, i, value):
+        self._row[i] = value
+
+    def __delitem__(self, i):
+        del self._row[i]
+
+    def __getstate__(self):
+
+        slots = dict()
+
+        for slot in self.__slots__:
+            attribute = getattr(self, slot)
+            slots[slot] = attribute
+
+        return slots
+
+    def __setstate__(self, state):
+        for (k, v) in list(state.items()): setattr(self, k, v)
+
+    def rpush(self, value):
+        self.insert(0, value)
+
+    def lpush(self, value):
+        self.insert(len(value), value)
+
+    def append(self, value):
+        self.rpush(value)
+
+    def insert(self, index, value):
+        self._row.insert(index, value)
+
+    def __contains__(self, item):
+        return (item in self._row)
+
+    @property
+    def tuple(self):
+        """Tuple representation of :class:`Row`."""
+        return tuple(self._row)
+
+    @property
+    def list(self):
+        """List representation of :class:`Row`."""
+        return list(self._row)
+
+    def has_tag(self, tag):
+        """Returns true if current row contains tag."""
+
+        if tag == None:
+            return False
+        elif isinstance(tag, str):
+            return (tag in self.tags)
+        else:
+            return bool(len(set(tag) & set(self.tags)))
+
+
+class Dataset(object):
+    """The :class:`Dataset` object is the heart of Tablib. It provides all core
+    functionality.
+
+    Usually you create a :class:`Dataset` instance in your main module, and append
+    rows as you collect data. ::
+
+        data = tablib.Dataset()
+        data.headers = ('name', 'age')
+
+        for (name, age) in some_collector():
+            data.append((name, age))
+
+
+    Setting columns is similar. The column data length must equal the
+    current height of the data and headers must be set ::
+
+        data = tablib.Dataset()
+        data.headers = ('first_name', 'last_name')
+
+        data.append(('John', 'Adams'))
+        data.append(('George', 'Washington'))
+
+        data.append_col((90, 67), header='age')
+
+
+    You can also set rows and headers upon instantiation. This is useful if
+    dealing with dozens or hundreds of :class:`Dataset` objects. ::
+
+        headers = ('first_name', 'last_name')
+        data = [('John', 'Adams'), ('George', 'Washington')]
+
+        data = tablib.Dataset(*data, headers=headers)
+
+    :param \\*args: (optional) list of rows to populate Dataset
+    :param headers: (optional) list strings for Dataset header row
+    :param title: (optional) string to use as title of the Dataset
+
+
+    .. admonition:: Format Attributes Definition
+
+     If you look at the code, the various output/import formats are not
+     defined within the :class:`Dataset` object. To add support for a new format, see
+     :ref:`Adding New Formats <newformats>`.
+
+    """
+
+    _formats = {}
+
+    def __init__(self, *args, **kwargs):
+        self._data = list(Row(arg) for arg in args)
+        self.__headers = None
+
+        # ('title', index) tuples
+        self._separators = []
+
+        # (column, callback) tuples
+        self._formatters = []
+
+        self.headers = kwargs.get('headers')
+
+        self.title = kwargs.get('title')
+
+        self._register_formats()
+
+    def __len__(self):
+        return self.height
+
+    def __getitem__(self, key):
+        if isinstance(key, (str, unicode)):
+            if key in self.headers:
+                pos = self.headers.index(key) # get 'key' index from each data
+                return [row[pos] for row in self._data]
+            else:
+                raise KeyError
+        else:
+            _results = self._data[key]
+            if isinstance(_results, Row):
+                return _results.tuple
+            else:
+                return [result.tuple for result in _results]
+
+    def __setitem__(self, key, value):
+        self._validate(value)
+        self._data[key] = Row(value)
+
+    def __delitem__(self, key):
+        if isinstance(key, (str, unicode)):
+
+            if key in self.headers:
+
+                pos = self.headers.index(key)
+                del self.headers[pos]
+
+                for i, row in enumerate(self._data):
+
+                    del row[pos]
+                    self._data[i] = row
+            else:
+                raise KeyError
+        else:
+            del self._data[key]
+
+    def __repr__(self):
+        try:
+            return '<%s dataset>' % (self.title.lower())
+        except AttributeError:
+            return '<dataset object>'
+
+    def __unicode__(self):
+        result = []
+
+        # Add unicode representation of headers.
+        if self.__headers:
+            result.append([unicode(h) for h in self.__headers])
+
+        # Add unicode representation of rows.
+        result.extend(list(map(unicode, row)) for row in self._data)
+
+        lens = [list(map(len, row)) for row in result]
+        field_lens = list(map(max, zip(*lens)))
+
+        # delimiter between header and data
+        if self.__headers:
+            result.insert(1, ['-' * length for length in field_lens])
+
+        format_string = '|'.join('{%s:%s}' % item for item in enumerate(field_lens))
+
+        return '\n'.join(format_string.format(*row) for row in result)
+
+    def __str__(self):
+        return self.__unicode__()
+
+    # ---------
+    # Internals
+    # ---------
+
+    @classmethod
+    def _register_formats(cls):
+        """Adds format properties."""
+        for fmt in formats.available:
+            try:
+                try:
+                    setattr(cls, fmt.title, property(fmt.export_set, fmt.import_set))
+                    setattr(cls, 'get_%s' % fmt.title, fmt.export_set)
+                    setattr(cls, 'set_%s' % fmt.title, fmt.import_set)
+                    cls._formats[fmt.title] = (fmt.export_set, fmt.import_set)
+                except AttributeError:
+                    setattr(cls, fmt.title, property(fmt.export_set))
+                    setattr(cls, 'get_%s' % fmt.title, fmt.export_set)
+                    cls._formats[fmt.title] = (fmt.export_set, None)
+
+            except AttributeError:
+                cls._formats[fmt.title] = (None, None)
+
+    def _validate(self, row=None, col=None, safety=False):
+        """Assures size of every row in dataset is of proper proportions."""
+        if row:
+            is_valid = (len(row) == self.width) if self.width else True
+        elif col:
+            if len(col) < 1:
+                is_valid = True
+            else:
+                is_valid = (len(col) == self.height) if self.height else True
+        else:
+            is_valid = all((len(x) == self.width for x in self._data))
+
+        if is_valid:
+            return True
+        else:
+            if not safety:
+                raise InvalidDimensions
+            return False
+
+    def _package(self, dicts=True, ordered=True):
+        """Packages Dataset into lists of dictionaries for transmission."""
+        # TODO: Dicts default to false?
+
+        _data = list(self._data)
+
+        if ordered:
+            dict_pack = OrderedDict
+        else:
+            dict_pack = dict
+
+        # Execute formatters
+        if self._formatters:
+            for row_i, row in enumerate(_data):
+                for col, callback in self._formatters:
+                    try:
+                        if col is None:
+                            for j, c in enumerate(row):
+                                _data[row_i][j] = callback(c)
+                        else:
+                            _data[row_i][col] = callback(row[col])
+                    except IndexError:
+                        raise InvalidDatasetIndex
+
+        if self.headers:
+            if dicts:
+                data = [dict_pack(list(zip(self.headers, data_row))) for data_row in _data]
+            else:
+                data = [list(self.headers)] + list(_data)
+        else:
+            data = [list(row) for row in _data]
+
+        return data
+
+    def _get_headers(self):
+        """An *optional* list of strings to be used for header rows and attribute names.
+
+        This must be set manually. The given list length must equal :class:`Dataset.width`.
+
+        """
+        return self.__headers
+
+    def _set_headers(self, collection):
+        """Validating headers setter."""
+        self._validate(collection)
+        if collection:
+            try:
+                self.__headers = list(collection)
+            except TypeError:
+                raise TypeError
+        else:
+            self.__headers = None
+
+    headers = property(_get_headers, _set_headers)
+
+    def _get_dict(self):
+        """A native Python representation of the :class:`Dataset` object. If headers have
+        been set, a list of Python dictionaries will be returned. If no headers have been set,
+        a list of tuples (rows) will be returned instead.
+
+        A dataset object can also be imported by setting the `Dataset.dict` attribute: ::
+
+            data = tablib.Dataset()
+            data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}]
+
+        """
+        return self._package()
+
+    def _set_dict(self, pickle):
+        """A native Python representation of the Dataset object. If headers have been
+        set, a list of Python dictionaries will be returned. If no headers have been
+        set, a list of tuples (rows) will be returned instead.
+
+        A dataset object can also be imported by setting the :class:`Dataset.dict` attribute. ::
+
+            data = tablib.Dataset()
+            data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}]
+
+        """
+
+        if not len(pickle):
+            return
+
+        # if list of rows
+        if isinstance(pickle[0], list):
+            self.wipe()
+            for row in pickle:
+                self.append(Row(row))
+
+        # if list of objects
+        elif isinstance(pickle[0], dict):
+            self.wipe()
+            self.headers = list(pickle[0].keys())
+            for row in pickle:
+                self.append(Row(list(row.values())))
+        else:
+            raise UnsupportedFormat
+
+    dict = property(_get_dict, _set_dict)
+
+    def _clean_col(self, col):
+        """Prepares the given column for insert/append."""
+
+        col = list(col)
+
+        if self.headers:
+            header = [col.pop(0)]
+        else:
+            header = []
+
+        if len(col) == 1 and hasattr(col[0], '__call__'):
+
+            col = list(map(col[0], self._data))
+        col = tuple(header + col)
+
+        return col
+
+    @property
+    def height(self):
+        """The number of rows currently in the :class:`Dataset`.
+           Cannot be directly modified.
+        """
+        return len(self._data)
+
+    @property
+    def width(self):
+        """The number of columns currently in the :class:`Dataset`.
+           Cannot be directly modified.
+        """
+
+        try:
+            return len(self._data[0])
+        except IndexError:
+            try:
+                return len(self.headers)
+            except TypeError:
+                return 0
+
+    def load(self, in_stream, format=None, **kwargs):
+        """
+        Import `in_stream` to the :class:`Dataset` object using the `format`.
+
+        :param \\*\\*kwargs: (optional) custom configuration to the format `import_set`.
+        """
+
+        if not format:
+            format = detect_format(in_stream)
+
+        export_set, import_set = self._formats.get(format, (None, None))
+        if not import_set:
+            raise UnsupportedFormat('Format {0} cannot be imported.'.format(format))
+
+        import_set(self, in_stream, **kwargs)
+        return self
+
+    def export(self, format, **kwargs):
+        """
+        Export :class:`Dataset` object to `format`.
+
+        :param \\*\\*kwargs: (optional) custom configuration to the format `export_set`.
+        """
+        export_set, import_set = self._formats.get(format, (None, None))
+        if not export_set:
+            raise UnsupportedFormat('Format {0} cannot be exported.'.format(format))
+
+        return export_set(self, **kwargs)
+
+    # -------
+    # Formats
+    # -------
+
+    @property
+    def xls():
+        """A Legacy Excel Spreadsheet representation of the :class:`Dataset` object, with :ref:`separators`. Cannot be set.
+
+        .. note::
+
+            XLS files are limited to a maximum of 65,000 rows. Use :class:`Dataset.xlsx` to avoid this limitation.
+
+         .. admonition:: Binary Warning
+
+             :class:`Dataset.xls` contains binary data, so make sure to write in binary mode::
+
+                with open('output.xls', 'wb') as f:
+                    f.write(data.xls)
+        """
+        pass
+
+    @property
+    def xlsx():
+        """An Excel '07+ Spreadsheet representation of the :class:`Dataset` object, with :ref:`separators`. Cannot be set.
+
+         .. admonition:: Binary Warning
+
+             :class:`Dataset.xlsx` contains binary data, so make sure to write in binary mode::
+
+                with open('output.xlsx', 'wb') as f:
+                    f.write(data.xlsx)
+        """
+        pass
+
+    @property
+    def ods():
+        """An OpenDocument Spreadsheet representation of the :class:`Dataset` object, with :ref:`separators`. Cannot be set.
+
+         .. admonition:: Binary Warning
+
+             :class:`Dataset.ods` contains binary data, so make sure to write in binary mode::
+
+                with open('output.ods', 'wb') as f:
+                    f.write(data.ods)
+        """
+        pass
+
+    @property
+    def csv():
+        """A CSV representation of the :class:`Dataset` object. The top row will contain
+        headers, if they have been set. Otherwise, the top row will contain
+        the first row of the dataset.
+
+        A dataset object can also be imported by setting the :class:`Dataset.csv` attribute. ::
+
+            data = tablib.Dataset()
+            data.csv = 'age, first_name, last_name\\n90, John, Adams'
+
+        Import assumes (for now) that headers exist.
+
+        .. admonition:: Binary Warning for Python 2
+
+             :class:`Dataset.csv` uses \\r\\n line endings by default so, in Python 2, make
+             sure to write in binary mode::
+
+                 with open('output.csv', 'wb') as f:
+                     f.write(data.csv)
+
+             If you do not do this, and you export the file on Windows, your
+             CSV file will open in Excel with a blank line between each row.
+
+        .. admonition:: Line endings for Python 3
+
+             :class:`Dataset.csv` uses \\r\\n line endings by default so, in Python 3, make
+             sure to include newline='' otherwise you will get a blank line between each row
+             when you open the file in Excel::
+
+                 with open('output.csv', 'w', newline='') as f:
+                     f.write(data.csv)
+
+             If you do not do this, and you export the file on Windows, your
+             CSV file will open in Excel with a blank line between each row.
+        """
+        pass
+
+    @property
+    def tsv():
+        """A TSV representation of the :class:`Dataset` object. The top row will contain
+        headers, if they have been set. Otherwise, the top row will contain
+        the first row of the dataset.
+
+        A dataset object can also be imported by setting the :class:`Dataset.tsv` attribute. ::
+
+            data = tablib.Dataset()
+            data.tsv = 'age\tfirst_name\tlast_name\\n90\tJohn\tAdams'
+
+        Import assumes (for now) that headers exist.
+        """
+        pass
+
+    @property
+    def yaml():
+        """A YAML representation of the :class:`Dataset` object. If headers have been
+        set, a YAML list of objects will be returned. If no headers have
+        been set, a YAML list of lists (rows) will be returned instead.
+
+        A dataset object can also be imported by setting the :class:`Dataset.yaml` attribute: ::
+
+            data = tablib.Dataset()
+            data.yaml = '- {age: 90, first_name: John, last_name: Adams}'
+
+        Import assumes (for now) that headers exist.
+        """
+        pass
+
+    @property
+    def df():
+        """A DataFrame representation of the :class:`Dataset` object.
+
+        A dataset object can also be imported by setting the :class:`Dataset.df` attribute: ::
+
+            data = tablib.Dataset()
+            data.df = DataFrame(np.random.randn(6,4))
+
+        Import assumes (for now) that headers exist.
+        """
+        pass
+
+    @property
+    def json():
+        """A JSON representation of the :class:`Dataset` object. If headers have been
+        set, a JSON list of objects will be returned. If no headers have
+        been set, a JSON list of lists (rows) will be returned instead.
+
+        A dataset object can also be imported by setting the :class:`Dataset.json` attribute: ::
+
+            data = tablib.Dataset()
+            data.json = '[{"age": 90, "first_name": "John", "last_name": "Adams"}]'
+
+        Import assumes (for now) that headers exist.
+        """
+        pass
+
+    @property
+    def html():
+        """A HTML table representation of the :class:`Dataset` object. If
+        headers have been set, they will be used as table headers.
+
+        ..notice:: This method can be used for export only.
+        """
+        pass
+
+    @property
+    def dbf():
+        """A dBASE representation of the :class:`Dataset` object.
+
+        A dataset object can also be imported by setting the
+        :class:`Dataset.dbf` attribute. ::
+
+            # To import data from an existing DBF file:
+            data = tablib.Dataset()
+            data.dbf = open('existing_table.dbf', mode='rb').read()
+
+            # to import data from an ASCII-encoded bytestring:
+            data = tablib.Dataset()
+            data.dbf = '<bytestring of tabular data>'
+
+        .. admonition:: Binary Warning
+
+            :class:`Dataset.dbf` contains binary data, so make sure to write in binary mode::
+
+                with open('output.dbf', 'wb') as f:
+                    f.write(data.dbf)
+        """
+        pass
+
+    @property
+    def latex():
+        """A LaTeX booktabs representation of the :class:`Dataset` object. If a
+        title has been set, it will be exported as the table caption.
+
+        .. note:: This method can be used for export only.
+        """
+        pass
+
+    @property
+    def jira():
+        """A Jira table representation of the :class:`Dataset` object.
+
+        .. note:: This method can be used for export only.
+        """
+        pass
+
+    # ----
+    # Rows
+    # ----
+
+    def insert(self, index, row, tags=list()):
+        """Inserts a row to the :class:`Dataset` at the given index.
+
+        Rows inserted must be the correct size (height or width).
+
+        The default behaviour is to insert the given row to the :class:`Dataset`
+        object at the given index.
+       """
+
+        self._validate(row)
+        self._data.insert(index, Row(row, tags=tags))
+
+    def rpush(self, row, tags=list()):
+        """Adds a row to the end of the :class:`Dataset`.
+        See :class:`Dataset.insert` for additional documentation.
+        """
+
+        self.insert(self.height, row=row, tags=tags)
+
+    def lpush(self, row, tags=list()):
+        """Adds a row to the top of the :class:`Dataset`.
+        See :class:`Dataset.insert` for additional documentation.
+        """
+
+        self.insert(0, row=row, tags=tags)
+
+    def append(self, row, tags=list()):
+        """Adds a row to the :class:`Dataset`.
+        See :class:`Dataset.insert` for additional documentation.
+        """
+
+        self.rpush(row, tags)
+
+    def extend(self, rows, tags=list()):
+        """Adds a list of rows to the :class:`Dataset` using
+        :class:`Dataset.append`
+        """
+
+        for row in rows:
+            self.append(row, tags)
+
+    def lpop(self):
+        """Removes and returns the first row of the :class:`Dataset`."""
+
+        cache = self[0]
+        del self[0]
+
+        return cache
+
+    def rpop(self):
+        """Removes and returns the last row of the :class:`Dataset`."""
+
+        cache = self[-1]
+        del self[-1]
+
+        return cache
+
+    def pop(self):
+        """Removes and returns the last row of the :class:`Dataset`."""
+
+        return self.rpop()
+
+    # -------
+    # Columns
+    # -------
+
+    def insert_col(self, index, col=None, header=None):
+        """Inserts a column to the :class:`Dataset` at the given index.
+
+        Columns inserted must be the correct height.
+
+        You can also insert a column of a single callable object, which will
+        add a new column with the return values of the callable each as an
+        item in the column. ::
+
+            data.append_col(col=random.randint)
+
+        If inserting a column, and :class:`Dataset.headers` is set, the
+        header attribute must be set, and will be considered the header for
+        that row.
+
+        See :ref:`dyncols` for an in-depth example.
+
+        .. versionchanged:: 0.9.0
+           If inserting a column, and :class:`Dataset.headers` is set, the
+           header attribute must be set, and will be considered the header for
+           that row.
+
+        .. versionadded:: 0.9.0
+           If inserting a row, you can add :ref:`tags <tags>` to the row you are inserting.
+           This gives you the ability to :class:`filter <Dataset.filter>` your
+           :class:`Dataset` later.
+
+        """
+
+        if col is None:
+            col = []
+
+        # Callable Columns...
+        if hasattr(col, '__call__'):
+            col = list(map(col, self._data))
+
+        col = self._clean_col(col)
+        self._validate(col=col)
+
+        if self.headers:
+            # pop the first item off, add to headers
+            if not header:
+                raise HeadersNeeded()
+
+            # corner case - if header is set without data
+            elif header and self.height == 0 and len(col):
+                raise InvalidDimensions
+
+            self.headers.insert(index, header)
+
+        if self.height and self.width:
+
+            for i, row in enumerate(self._data):
+
+                row.insert(index, col[i])
+                self._data[i] = row
+        else:
+            self._data = [Row([row]) for row in col]
+
+    def rpush_col(self, col, header=None):
+        """Adds a column to the end of the :class:`Dataset`.
+        See :class:`Dataset.insert` for additional documentation.
+        """
+
+        self.insert_col(self.width, col, header=header)
+
+    def lpush_col(self, col, header=None):
+        """Adds a column to the top of the :class:`Dataset`.
+        See :class:`Dataset.insert` for additional documentation.
+        """
+
+        self.insert_col(0, col, header=header)
+
+    def insert_separator(self, index, text='-'):
+        """Adds a separator to :class:`Dataset` at given index."""
+
+        sep = (index, text)
+        self._separators.append(sep)
+
+    def append_separator(self, text='-'):
+        """Adds a :ref:`separator <separators>` to the :class:`Dataset`."""
+
+        # change offsets if headers are or aren't defined
+        if not self.headers:
+            index = self.height if self.height else 0
+        else:
+            index = (self.height + 1) if self.height else 1
+
+        self.insert_separator(index, text)
+
+    def append_col(self, col, header=None):
+        """Adds a column to the :class:`Dataset`.
+        See :class:`Dataset.insert_col` for additional documentation.
+        """
+
+        self.rpush_col(col, header)
+
+    def get_col(self, index):
+        """Returns the column from the :class:`Dataset` at the given index."""
+
+        return [row[index] for row in self._data]
+
+    # ----
+    # Misc
+    # ----
+
+    def add_formatter(self, col, handler):
+        """Adds a formatter to the :class:`Dataset`.
+
+        .. versionadded:: 0.9.5
+
+        :param col: column to. Accepts index int or header str.
+        :param handler: reference to callback function to execute against
+                        each cell value.
+        """
+
+        if isinstance(col, unicode):
+            if col in self.headers:
+                col = self.headers.index(col) # get 'key' index from each data
+            else:
+                raise KeyError
+
+        if not col > self.width:
+            self._formatters.append((col, handler))
+        else:
+            raise InvalidDatasetIndex
+
+        return True
+
+    def filter(self, tag):
+        """Returns a new instance of the :class:`Dataset`, excluding any rows
+        that do not contain the given :ref:`tags <tags>`.
+        """
+        _dset = copy(self)
+        _dset._data = [row for row in _dset._data if row.has_tag(tag)]
+
+        return _dset
+
+    def sort(self, col, reverse=False):
+        """Sort a :class:`Dataset` by a specific column, given string (for
+        header) or integer (for column index). The order can be reversed by
+        setting ``reverse`` to ``True``.
+
+        Returns a new :class:`Dataset` instance where columns have been
+        sorted.
+        """
+
+        if isinstance(col, (str, unicode)):
+
+            if not self.headers:
+                raise HeadersNeeded
+
+            _sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse)
+            _dset = Dataset(headers=self.headers, title=self.title)
+
+            for item in _sorted:
+                row = [item[key] for key in self.headers]
+                _dset.append(row=row)
+
+        else:
+            if self.headers:
+                col = self.headers[col]
+
+            _sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse)
+            _dset = Dataset(headers=self.headers, title=self.title)
+
+            for item in _sorted:
+                if self.headers:
+                    row = [item[key] for key in self.headers]
+                else:
+                    row = item
+                _dset.append(row=row)
+
+        return _dset
+
+    def transpose(self):
+        """Transpose a :class:`Dataset`, turning rows into columns and vice
+        versa, returning a new ``Dataset`` instance. The first row of the
+        original instance becomes the new header row."""
+
+        # Don't transpose if there is no data
+        if not self:
+            return
+
+        _dset = Dataset()
+        # The first element of the headers stays in the headers,
+        # it is our "hinge" on which we rotate the data
+        new_headers = [self.headers[0]] + self[self.headers[0]]
+
+        _dset.headers = new_headers
+        for index, column in enumerate(self.headers):
+
+            if column == self.headers[0]:
+                # It's in the headers, so skip it
+                continue
+
+            # Adding the column name as now they're a regular column
+            # Use `get_col(index)` in case there are repeated values
+            row_data = [column] + self.get_col(index)
+            row_data = Row(row_data)
+            _dset.append(row=row_data)
+        return _dset
+
+    def stack(self, other):
+        """Stack two :class:`Dataset` instances together by
+        joining at the row level, and return new combined
+        ``Dataset`` instance."""
+
+        if not isinstance(other, Dataset):
+            return
+
+        if self.width != other.width:
+            raise InvalidDimensions
+
+        # Copy the source data
+        _dset = copy(self)
+
+        rows_to_stack = [row for row in _dset._data]
+        other_rows = [row for row in other._data]
+
+        rows_to_stack.extend(other_rows)
+        _dset._data = rows_to_stack
+
+        return _dset
+
+    def stack_cols(self, other):
+        """Stack two :class:`Dataset` instances together by
+        joining at the column level, and return a new
+        combined ``Dataset`` instance. If either ``Dataset``
+        has headers set, than the other must as well."""
+
+        if not isinstance(other, Dataset):
+            return
+
+        if self.headers or other.headers:
+            if not self.headers or not other.headers:
+                raise HeadersNeeded
+
+        if self.height != other.height:
+            raise InvalidDimensions
+
+        try:
+            new_headers = self.headers + other.headers
+        except TypeError:
+            new_headers = None
+
+        _dset = Dataset()
+
+        for column in self.headers:
+            _dset.append_col(col=self[column])
+
+        for column in other.headers:
+            _dset.append_col(col=other[column])
+
+        _dset.headers = new_headers
+
+        return _dset
+
+    def remove_duplicates(self):
+        """Removes all duplicate rows from the :class:`Dataset` object
+        while maintaining the original order."""
+        seen = set()
+        self._data[:] = [row for row in self._data if not (tuple(row) in seen or seen.add(tuple(row)))]
+
+    def wipe(self):
+        """Removes all content and headers from the :class:`Dataset` object."""
+        self._data = list()
+        self.__headers = None
+
+    def subset(self, rows=None, cols=None):
+        """Returns a new instance of the :class:`Dataset`,
+        including only specified rows and columns.
+        """
+
+        # Don't return if no data
+        if not self:
+            return
+
+        if rows is None:
+            rows = list(range(self.height))
+
+        if cols is None:
+            cols = list(self.headers)
+
+        #filter out impossible rows and columns
+        rows = [row for row in rows if row in range(self.height)]
+        cols = [header for header in cols if header in self.headers]
+
+        _dset = Dataset()
+
+        #filtering rows and columns
+        _dset.headers = list(cols)
+
+        _dset._data = []
+        for row_no, row in enumerate(self._data):
+            data_row = []
+            for key in _dset.headers:
+                if key in self.headers:
+                    pos = self.headers.index(key)
+                    data_row.append(row[pos])
+                else:
+                    raise KeyError
+
+            if row_no in rows:
+                _dset.append(row=Row(data_row))
+
+        return _dset
+
+
+class Databook(object):
+    """A book of :class:`Dataset` objects.
+    """
+
+    _formats = {}
+
+    def __init__(self, sets=None):
+
+        if sets is None:
+            self._datasets = list()
+        else:
+            self._datasets = sets
+
+        self._register_formats()
+
+    def __repr__(self):
+        try:
+            return '<%s databook>' % (self.title.lower())
+        except AttributeError:
+            return '<databook object>'
+
+    def wipe(self):
+        """Removes all :class:`Dataset` objects from the :class:`Databook`."""
+        self._datasets = []
+
+    @classmethod
+    def _register_formats(cls):
+        """Adds format properties."""
+        for fmt in formats.available:
+            try:
+                try:
+                    setattr(cls, fmt.title, property(fmt.export_book, fmt.import_book))
+                    cls._formats[fmt.title] = (fmt.export_book, fmt.import_book)
+                except AttributeError:
+                    setattr(cls, fmt.title, property(fmt.export_book))
+                    cls._formats[fmt.title] = (fmt.export_book, None)
+
+            except AttributeError:
+                cls._formats[fmt.title] = (None, None)
+
+    def sheets(self):
+        return self._datasets
+
+    def add_sheet(self, dataset):
+        """Adds given :class:`Dataset` to the :class:`Databook`."""
+        if isinstance(dataset, Dataset):
+            self._datasets.append(dataset)
+        else:
+            raise InvalidDatasetType
+
+    def _package(self, ordered=True):
+        """Packages :class:`Databook` for delivery."""
+        collector = []
+
+        if ordered:
+            dict_pack = OrderedDict
+        else:
+            dict_pack = dict
+
+        for dset in self._datasets:
+            collector.append(dict_pack(
+                title = dset.title,
+                data = dset._package(ordered=ordered)
+            ))
+        return collector
+
+    @property
+    def size(self):
+        """The number of the :class:`Dataset` objects within :class:`Databook`."""
+        return len(self._datasets)
+
+    def load(self, in_stream, format, **kwargs):
+        """
+        Import `in_stream` to the :class:`Databook` object using the `format`.
+
+        :param \\*\\*kwargs: (optional) custom configuration to the format `import_book`.
+        """
+
+        if not format:
+            format = detect_format(in_stream)
+
+        export_book, import_book = self._formats.get(format, (None, None))
+        if not import_book:
+            raise UnsupportedFormat('Format {0} cannot be loaded.'.format(format))
+
+        import_book(self, in_stream, **kwargs)
+        return self
+
+    def export(self, format, **kwargs):
+        """
+        Export :class:`Databook` object to `format`.
+
+        :param \\*\\*kwargs: (optional) custom configuration to the format `export_book`.
+        """
+        export_book, import_book = self._formats.get(format, (None, None))
+        if not export_book:
+            raise UnsupportedFormat('Format {0} cannot be exported.'.format(format))
+
+        return export_book(self, **kwargs)
+
+
+def detect_format(stream):
+    """Return format name of given stream."""
+    for fmt in formats.available:
+        try:
+            if fmt.detect(stream):
+                return fmt.title
+        except AttributeError:
+            pass
+
+
+def import_set(stream, format=None, **kwargs):
+    """Return dataset of given stream."""
+
+    return Dataset().load(stream, format, **kwargs)
+
+
+def import_book(stream, format=None, **kwargs):
+    """Return dataset of given stream."""
+
+    return Databook().load(stream, format, **kwargs)
+
+
+class InvalidDatasetType(Exception):
+    "Only Datasets can be added to a DataBook"
+
+
+class InvalidDimensions(Exception):
+    "Invalid size"
+
+
+class InvalidDatasetIndex(Exception):
+    "Outside of Dataset size"
+
+
+class HeadersNeeded(Exception):
+    "Header parameter must be given when appending a column in this Dataset."
+
+
+class UnsupportedFormat(NotImplementedError):
+    "Format is not supported"
author	Jannis Leidel <jannis@leidel.info>	2019-10-18 15:57:13 +0200
committer	GitHub <noreply@github.com>	2019-10-18 15:57:13 +0200
commit	f6bf14afd22d8e5b706670590cc95f29d4483434 (patch)
tree	a65fbc8b6fd1112222beb6f66c31082ffe231460 /src/tablib/core.py
parent	f3d02aa3b088e2f9044fe5e4869e8c8cb91d2cdc (diff)
download	tablib-f6bf14afd22d8e5b706670590cc95f29d4483434.tar.gz