summaryrefslogtreecommitdiff
path: root/tablib/packages/openpyxl/reader
diff options
context:
space:
mode:
authorClaude Paroz <claude@2xlibre.net>2017-02-20 16:41:33 +0100
committerIuri de Silvio <iurisilvio@gmail.com>2017-02-20 12:41:33 -0300
commite66eb4a18967dd4d35e6e9da265622e9e4015103 (patch)
tree6e006e65b5daa060c26d6293662b4e3a5b9eebc3 /tablib/packages/openpyxl/reader
parent0e720d78ca7e8c4466b8f13970ebae8e12700d00 (diff)
downloadtablib-e66eb4a18967dd4d35e6e9da265622e9e4015103.tar.gz
Replaced vendored openpyxl by a dependency (#221)
It is time to make it happen. * Dropped Python 3.2 support Recent dependencies are dropping Python 3.2 too. * Replaced vendored openpyxl by a dependency Thanks Tommy Anthony for the initial patch.
Diffstat (limited to 'tablib/packages/openpyxl/reader')
-rw-r--r--tablib/packages/openpyxl/reader/__init__.py33
-rw-r--r--tablib/packages/openpyxl/reader/excel.py109
-rw-r--r--tablib/packages/openpyxl/reader/iter_worksheet.py348
-rw-r--r--tablib/packages/openpyxl/reader/strings.py64
-rw-r--r--tablib/packages/openpyxl/reader/style.py69
-rw-r--r--tablib/packages/openpyxl/reader/workbook.py156
-rw-r--r--tablib/packages/openpyxl/reader/worksheet.py114
7 files changed, 0 insertions, 893 deletions
diff --git a/tablib/packages/openpyxl/reader/__init__.py b/tablib/packages/openpyxl/reader/__init__.py
deleted file mode 100644
index 9b0ee2f..0000000
--- a/tablib/packages/openpyxl/reader/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# file openpyxl/reader/__init__.py
-
-# Copyright (c) 2010 openpyxl
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# @license: http://www.opensource.org/licenses/mit-license.php
-# @author: Eric Gazoni
-
-"""Imports for the openpyxl.reader namespace."""
-
-# package imports
-from ..reader import excel
-from ..reader import strings
-from ..reader import style
-from ..reader import workbook
-from ..reader import worksheet
diff --git a/tablib/packages/openpyxl/reader/excel.py b/tablib/packages/openpyxl/reader/excel.py
deleted file mode 100644
index 16c3f91..0000000
--- a/tablib/packages/openpyxl/reader/excel.py
+++ /dev/null
@@ -1,109 +0,0 @@
-# file openpyxl/reader/excel.py
-
-# Copyright (c) 2010 openpyxl
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# @license: http://www.opensource.org/licenses/mit-license.php
-# @author: Eric Gazoni
-
-"""Read an xlsx file into Python"""
-
-# Python stdlib imports
-from zipfile import ZipFile, ZIP_DEFLATED, BadZipfile
-
-# package imports
-from ..shared.exc import OpenModeError, InvalidFileException
-from ..shared.ooxml import ARC_SHARED_STRINGS, ARC_CORE, ARC_APP, \
- ARC_WORKBOOK, PACKAGE_WORKSHEETS, ARC_STYLE
-from ..workbook import Workbook
-from ..reader.strings import read_string_table
-from ..reader.style import read_style_table
-from ..reader.workbook import read_sheets_titles, read_named_ranges, \
- read_properties_core, get_sheet_ids
-from ..reader.worksheet import read_worksheet
-from ..reader.iter_worksheet import unpack_worksheet
-
-def load_workbook(filename, use_iterators = False):
- """Open the given filename and return the workbook
-
- :param filename: the path to open
- :type filename: string
-
- :param use_iterators: use lazy load for cells
- :type use_iterators: bool
-
- :rtype: :class:`openpyxl.workbook.Workbook`
-
- .. note::
-
- When using lazy load, all worksheets will be :class:`openpyxl.reader.iter_worksheet.IterableWorksheet`
- and the returned workbook will be read-only.
-
- """
-
- if isinstance(filename, file):
- # fileobject must have been opened with 'rb' flag
- # it is required by zipfile
- if 'b' not in filename.mode:
- raise OpenModeError("File-object must be opened in binary mode")
-
- try:
- archive = ZipFile(filename, 'r', ZIP_DEFLATED)
- except (BadZipfile, RuntimeError, IOError, ValueError):
- raise InvalidFileException()
- wb = Workbook()
-
- if use_iterators:
- wb._set_optimized_read()
-
- try:
- _load_workbook(wb, archive, filename, use_iterators)
- except KeyError:
- raise InvalidFileException()
- finally:
- archive.close()
- return wb
-
-def _load_workbook(wb, archive, filename, use_iterators):
-
- # get workbook-level information
- wb.properties = read_properties_core(archive.read(ARC_CORE))
- try:
- string_table = read_string_table(archive.read(ARC_SHARED_STRINGS))
- except KeyError:
- string_table = {}
- style_table = read_style_table(archive.read(ARC_STYLE))
-
- # get worksheets
- wb.worksheets = [] # remove preset worksheet
- sheet_names = read_sheets_titles(archive.read(ARC_APP))
- for i, sheet_name in enumerate(sheet_names):
- sheet_codename = 'sheet%d.xml' % (i + 1)
- worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename)
-
- if not use_iterators:
- new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table)
- else:
- xml_source = unpack_worksheet(archive, worksheet_path)
- new_ws = read_worksheet(xml_source, wb, sheet_name, string_table, style_table, filename, sheet_codename)
- #new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table, filename, sheet_codename)
- wb.add_sheet(new_ws, index = i)
-
- wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb)
diff --git a/tablib/packages/openpyxl/reader/iter_worksheet.py b/tablib/packages/openpyxl/reader/iter_worksheet.py
deleted file mode 100644
index 46ee318..0000000
--- a/tablib/packages/openpyxl/reader/iter_worksheet.py
+++ /dev/null
@@ -1,348 +0,0 @@
-# file openpyxl/reader/iter_worksheet.py
-
-# Copyright (c) 2010 openpyxl
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# @license: http://www.opensource.org/licenses/mit-license.php
-# @author: Eric Gazoni
-
-""" Iterators-based worksheet reader
-*Still very raw*
-"""
-
-from ....compat import BytesIO as StringIO
-import warnings
-import operator
-from functools import partial
-from itertools import groupby, ifilter
-from ..worksheet import Worksheet
-from ..cell import coordinate_from_string, get_column_letter, Cell
-from ..reader.excel import get_sheet_ids
-from ..reader.strings import read_string_table
-from ..reader.style import read_style_table, NumberFormat
-from ..shared.date_time import SharedDate
-from ..reader.worksheet import read_dimension
-from ..shared.ooxml import (MIN_COLUMN, MAX_COLUMN, PACKAGE_WORKSHEETS,
- MAX_ROW, MIN_ROW, ARC_SHARED_STRINGS, ARC_APP, ARC_STYLE)
-try:
- from xml.etree.cElementTree import iterparse
-except ImportError:
- from xml.etree.ElementTree import iterparse
-
-
-from zipfile import ZipFile
-from .. import cell
-import re
-import tempfile
-import zlib
-import zipfile
-import struct
-
-TYPE_NULL = Cell.TYPE_NULL
-MISSING_VALUE = None
-
-RE_COORDINATE = re.compile('^([A-Z]+)([0-9]+)$')
-
-SHARED_DATE = SharedDate()
-
-_COL_CONVERSION_CACHE = dict((get_column_letter(i), i) for i in xrange(1, 18279))
-def column_index_from_string(str_col, _col_conversion_cache=_COL_CONVERSION_CACHE):
- # we use a function argument to get indexed name lookup
- return _col_conversion_cache[str_col]
-del _COL_CONVERSION_CACHE
-
-RAW_ATTRIBUTES = ['row', 'column', 'coordinate', 'internal_value', 'data_type', 'style_id', 'number_format']
-
-try:
- from collections import namedtuple
- BaseRawCell = namedtuple('RawCell', RAW_ATTRIBUTES)
-except ImportError:
-
- # warnings.warn("""Unable to import 'namedtuple' module, this may cause memory issues when using optimized reader. Please upgrade your Python installation to 2.6+""")
-
- class BaseRawCell(object):
-
- def __init__(self, *args):
- assert len(args)==len(RAW_ATTRIBUTES)
-
- for attr, val in zip(RAW_ATTRIBUTES, args):
- setattr(self, attr, val)
-
- def _replace(self, **kwargs):
-
- self.__dict__.update(kwargs)
-
- return self
-
-
-class RawCell(BaseRawCell):
- """Optimized version of the :class:`openpyxl.cell.Cell`, using named tuples.
-
- Useful attributes are:
-
- * row
- * column
- * coordinate
- * internal_value
-
- You can also access if needed:
-
- * data_type
- * number_format
-
- """
-
- @property
- def is_date(self):
- res = (self.data_type == Cell.TYPE_NUMERIC
- and self.number_format is not None
- and ('d' in self.number_format
- or 'm' in self.number_format
- or 'y' in self.number_format
- or 'h' in self.number_format
- or 's' in self.number_format
- ))
-
- return res
-
-def iter_rows(workbook_name, sheet_name, xml_source, range_string = '', row_offset = 0, column_offset = 0):
-
- archive = get_archive_file(workbook_name)
-
- source = xml_source
-
- if range_string:
- min_col, min_row, max_col, max_row = get_range_boundaries(range_string, row_offset, column_offset)
- else:
- min_col, min_row, max_col, max_row = read_dimension(xml_source = source)
- min_col = column_index_from_string(min_col)
- max_col = column_index_from_string(max_col) + 1
- max_row += 6
-
- try:
- string_table = read_string_table(archive.read(ARC_SHARED_STRINGS))
- except KeyError:
- string_table = {}
-
- style_table = read_style_table(archive.read(ARC_STYLE))
-
- source.seek(0)
- p = iterparse(source)
-
- return get_squared_range(p, min_col, min_row, max_col, max_row, string_table, style_table)
-
-
-def get_rows(p, min_column = MIN_COLUMN, min_row = MIN_ROW, max_column = MAX_COLUMN, max_row = MAX_ROW):
-
- return groupby(get_cells(p, min_row, min_column, max_row, max_column), operator.attrgetter('row'))
-
-def get_cells(p, min_row, min_col, max_row, max_col, _re_coordinate=RE_COORDINATE):
-
- for _event, element in p:
-
- if element.tag == '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}c':
- coord = element.get('r')
- column_str, row = _re_coordinate.match(coord).groups()
-
- row = int(row)
- column = column_index_from_string(column_str)
-
- if min_col <= column <= max_col and min_row <= row <= max_row:
- data_type = element.get('t', 'n')
- style_id = element.get('s')
- value = element.findtext('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}v')
- yield RawCell(row, column_str, coord, value, data_type, style_id, None)
-
- if element.tag == '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}v':
- continue
- element.clear()
-
-
-
-def get_range_boundaries(range_string, row = 0, column = 0):
-
- if ':' in range_string:
- min_range, max_range = range_string.split(':')
- min_col, min_row = coordinate_from_string(min_range)
- max_col, max_row = coordinate_from_string(max_range)
-
- min_col = column_index_from_string(min_col) + column
- max_col = column_index_from_string(max_col) + column
- min_row += row
- max_row += row
-
- else:
- min_col, min_row = coordinate_from_string(range_string)
- min_col = column_index_from_string(min_col)
- max_col = min_col + 1
- max_row = min_row
-
- return (min_col, min_row, max_col, max_row)
-
-def get_archive_file(archive_name):
-
- return ZipFile(archive_name, 'r')
-
-def get_xml_source(archive_file, sheet_name):
-
- return archive_file.read('%s/%s' % (PACKAGE_WORKSHEETS, sheet_name))
-
-def get_missing_cells(row, columns):
-
- return dict([(column, RawCell(row, column, '%s%s' % (column, row), MISSING_VALUE, TYPE_NULL, None, None)) for column in columns])
-
-def get_squared_range(p, min_col, min_row, max_col, max_row, string_table, style_table):
-
- expected_columns = [get_column_letter(ci) for ci in xrange(min_col, max_col)]
-
- current_row = min_row
- for row, cells in get_rows(p, min_row = min_row, max_row = max_row, min_column = min_col, max_column = max_col):
- full_row = []
- if current_row < row:
-
- for gap_row in xrange(current_row, row):
-
- dummy_cells = get_missing_cells(gap_row, expected_columns)
-
- yield tuple([dummy_cells[column] for column in expected_columns])
-
- current_row = row
-
- temp_cells = list(cells)
-
- retrieved_columns = dict([(c.column, c) for c in temp_cells])
-
- missing_columns = list(set(expected_columns) - set(retrieved_columns.keys()))
-
- replacement_columns = get_missing_cells(row, missing_columns)
-
- for column in expected_columns:
-
- if column in retrieved_columns:
- cell = retrieved_columns[column]
-
- if cell.style_id is not None:
- style = style_table[int(cell.style_id)]
- cell = cell._replace(number_format = style.number_format.format_code) #pylint: disable-msg=W0212
- if cell.internal_value is not None:
- if cell.data_type == Cell.TYPE_STRING:
- cell = cell._replace(internal_value = string_table[int(cell.internal_value)]) #pylint: disable-msg=W0212
- elif cell.data_type == Cell.TYPE_BOOL:
- cell = cell._replace(internal_value = cell.internal_value == 'True')
- elif cell.is_date:
- cell = cell._replace(internal_value = SHARED_DATE.from_julian(float(cell.internal_value)))
- elif cell.data_type == Cell.TYPE_NUMERIC:
- cell = cell._replace(internal_value = float(cell.internal_value))
- full_row.append(cell)
-
- else:
- full_row.append(replacement_columns[column])
-
- current_row = row + 1
-
- yield tuple(full_row)
-
-#------------------------------------------------------------------------------
-
-class IterableWorksheet(Worksheet):
-
- def __init__(self, parent_workbook, title, workbook_name,
- sheet_codename, xml_source):
-
- Worksheet.__init__(self, parent_workbook, title)
- self._workbook_name = workbook_name
- self._sheet_codename = sheet_codename
- self._xml_source = xml_source
-
- def iter_rows(self, range_string = '', row_offset = 0, column_offset = 0):
- """ Returns a squared range based on the `range_string` parameter,
- using generators.
-
- :param range_string: range of cells (e.g. 'A1:C4')
- :type range_string: string
-
- :param row: row index of the cell (e.g. 4)
- :type row: int
-
- :param column: column index of the cell (e.g. 3)
- :type column: int
-
- :rtype: generator
-
- """
-
- return iter_rows(workbook_name = self._workbook_name,
- sheet_name = self._sheet_codename,
- xml_source = self._xml_source,
- range_string = range_string,
- row_offset = row_offset,
- column_offset = column_offset)
-
- def cell(self, *args, **kwargs):
-
- raise NotImplementedError("use 'iter_rows()' instead")
-
- def range(self, *args, **kwargs):
-
- raise NotImplementedError("use 'iter_rows()' instead")
-
-def unpack_worksheet(archive, filename):
-
- temp_file = tempfile.TemporaryFile(mode='r+', prefix='openpyxl.', suffix='.unpack.temp')
-
- zinfo = archive.getinfo(filename)
-
- if zinfo.compress_type == zipfile.ZIP_STORED:
- decoder = None
- elif zinfo.compress_type == zipfile.ZIP_DEFLATED:
- decoder = zlib.decompressobj(-zlib.MAX_WBITS)
- else:
- raise zipfile.BadZipFile("Unrecognized compression method")
-
- archive.fp.seek(_get_file_offset(archive, zinfo))
- bytes_to_read = zinfo.compress_size
-
- while True:
- buff = archive.fp.read(min(bytes_to_read, 102400))
- if not buff:
- break
- bytes_to_read -= len(buff)
- if decoder:
- buff = decoder.decompress(buff)
- temp_file.write(buff)
-
- if decoder:
- temp_file.write(decoder.decompress('Z'))
-
- return temp_file
-
-def _get_file_offset(archive, zinfo):
-
- try:
- return zinfo.file_offset
- except AttributeError:
- # From http://stackoverflow.com/questions/3781261/how-to-simulate-zipfile-open-in-python-2-5
-
- # Seek over the fixed size fields to the "file name length" field in
- # the file header (26 bytes). Unpack this and the "extra field length"
- # field ourselves as info.extra doesn't seem to be the correct length.
- archive.fp.seek(zinfo.header_offset + 26)
- file_name_len, extra_len = struct.unpack("<HH", archive.fp.read(4))
- return zinfo.header_offset + 30 + file_name_len + extra_len
diff --git a/tablib/packages/openpyxl/reader/strings.py b/tablib/packages/openpyxl/reader/strings.py
deleted file mode 100644
index e19e291..0000000
--- a/tablib/packages/openpyxl/reader/strings.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# file openpyxl/reader/strings.py
-
-# Copyright (c) 2010 openpyxl
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# @license: http://www.opensource.org/licenses/mit-license.php
-# @author: Eric Gazoni
-
-"""Read the shared strings table."""
-
-# package imports
-from ..shared.xmltools import fromstring, QName
-from ..shared.ooxml import NAMESPACES
-
-
-def read_string_table(xml_source):
- """Read in all shared strings in the table"""
- table = {}
- xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'
- root = fromstring(text=xml_source)
- string_index_nodes = root.findall(QName(xmlns, 'si').text)
- for index, string_index_node in enumerate(string_index_nodes):
- table[index] = get_string(xmlns, string_index_node)
- return table
-
-
-def get_string(xmlns, string_index_node):
- """Read the contents of a specific string index"""
- rich_nodes = string_index_node.findall(QName(xmlns, 'r').text)
- if rich_nodes:
- reconstructed_text = []
- for rich_node in rich_nodes:
- partial_text = get_text(xmlns, rich_node)
- reconstructed_text.append(partial_text)
- return ''.join(reconstructed_text)
- else:
- return get_text(xmlns, string_index_node)
-
-
-def get_text(xmlns, rich_node):
- """Read rich text, discarding formatting if not disallowed"""
- text_node = rich_node.find(QName(xmlns, 't').text)
- partial_text = text_node.text or ''
-
- if text_node.get(QName(NAMESPACES['xml'], 'space').text) != 'preserve':
- partial_text = partial_text.strip()
- return unicode(partial_text)
diff --git a/tablib/packages/openpyxl/reader/style.py b/tablib/packages/openpyxl/reader/style.py
deleted file mode 100644
index f773070..0000000
--- a/tablib/packages/openpyxl/reader/style.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# file openpyxl/reader/style.py
-
-# Copyright (c) 2010 openpyxl
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# @license: http://www.opensource.org/licenses/mit-license.php
-# @author: Eric Gazoni
-
-"""Read shared style definitions"""
-
-# package imports
-from ..shared.xmltools import fromstring, QName
-from ..shared.exc import MissingNumberFormat
-from ..style import Style, NumberFormat
-
-
-def read_style_table(xml_source):
- """Read styles from the shared style table"""
- table = {}
- xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'
- root = fromstring(xml_source)
- custom_num_formats = parse_custom_num_formats(root, xmlns)
- builtin_formats = NumberFormat._BUILTIN_FORMATS
- cell_xfs = root.find(QName(xmlns, 'cellXfs').text)
- cell_xfs_nodes = cell_xfs.findall(QName(xmlns, 'xf').text)
- for index, cell_xfs_node in enumerate(cell_xfs_nodes):
- new_style = Style()
- number_format_id = int(cell_xfs_node.get('numFmtId'))
- if number_format_id < 164:
- new_style.number_format.format_code = \
- builtin_formats.get(number_format_id, 'General')
- else:
-
- if number_format_id in custom_num_formats:
- new_style.number_format.format_code = \
- custom_num_formats[number_format_id]
- else:
- raise MissingNumberFormat('%s' % number_format_id)
- table[index] = new_style
- return table
-
-
-def parse_custom_num_formats(root, xmlns):
- """Read in custom numeric formatting rules from the shared style table"""
- custom_formats = {}
- num_fmts = root.find(QName(xmlns, 'numFmts').text)
- if num_fmts is not None:
- num_fmt_nodes = num_fmts.findall(QName(xmlns, 'numFmt').text)
- for num_fmt_node in num_fmt_nodes:
- custom_formats[int(num_fmt_node.get('numFmtId'))] = \
- num_fmt_node.get('formatCode')
- return custom_formats
diff --git a/tablib/packages/openpyxl/reader/workbook.py b/tablib/packages/openpyxl/reader/workbook.py
deleted file mode 100644
index d9bc161..0000000
--- a/tablib/packages/openpyxl/reader/workbook.py
+++ /dev/null
@@ -1,156 +0,0 @@
-# file openpyxl/reader/workbook.py
-
-# Copyright (c) 2010 openpyxl
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# @license: http://www.opensource.org/licenses/mit-license.php
-# @author: Eric Gazoni
-
-"""Read in global settings to be maintained by the workbook object."""
-
-# package imports
-from ..shared.xmltools import fromstring, QName
-from ..shared.ooxml import NAMESPACES
-from ..workbook import DocumentProperties
-from ..shared.date_time import W3CDTF_to_datetime
-from ..namedrange import NamedRange, split_named_range
-
-import datetime
-
-# constants
-BUGGY_NAMED_RANGES = ['NA()', '#REF!']
-DISCARDED_RANGES = ['Excel_BuiltIn', 'Print_Area']
-
-def get_sheet_ids(xml_source):
-
- sheet_names = read_sheets_titles(xml_source)
-
- return dict((sheet, 'sheet%d.xml' % (i + 1)) for i, sheet in enumerate(sheet_names))
-
-
-def read_properties_core(xml_source):
- """Read assorted file properties."""
- properties = DocumentProperties()
- root = fromstring(xml_source)
- creator_node = root.find(QName(NAMESPACES['dc'], 'creator').text)
- if creator_node is not None:
- properties.creator = creator_node.text
- else:
- properties.creator = ''
- last_modified_by_node = root.find(
- QName(NAMESPACES['cp'], 'lastModifiedBy').text)
- if last_modified_by_node is not None:
- properties.last_modified_by = last_modified_by_node.text
- else:
- properties.last_modified_by = ''
-
- created_node = root.find(QName(NAMESPACES['dcterms'], 'created').text)
- if created_node is not None:
- properties.created = W3CDTF_to_datetime(created_node.text)
- else:
- properties.created = datetime.datetime.now()
-
- modified_node = root.find(QName(NAMESPACES['dcterms'], 'modified').text)
- if modified_node is not None:
- properties.modified = W3CDTF_to_datetime(modified_node.text)
- else:
- properties.modified = properties.created
-
- return properties
-
-
-def get_number_of_parts(xml_source):
- """Get a list of contents of the workbook."""
- parts_size = {}
- parts_names = []
- root = fromstring(xml_source)
- heading_pairs = root.find(QName('http://schemas.openxmlformats.org/officeDocument/2006/extended-properties',
- 'HeadingPairs').text)
- vector = heading_pairs.find(QName(NAMESPACES['vt'], 'vector').text)
- children = vector.getchildren()
- for child_id in range(0, len(children), 2):
- part_name = children[child_id].find(QName(NAMESPACES['vt'],
- 'lpstr').text).text
- if not part_name in parts_names:
- parts_names.append(part_name)
- part_size = int(children[child_id + 1].find(QName(
- NAMESPACES['vt'], 'i4').text).text)
- parts_size[part_name] = part_size
- return parts_size, parts_names
-
-
-def read_sheets_titles(xml_source):
- """Read titles for all sheets."""
- root = fromstring(xml_source)
- titles_root = root.find(QName('http://schemas.openxmlformats.org/officeDocument/2006/extended-properties',
- 'TitlesOfParts').text)
- vector = titles_root.find(QName(NAMESPACES['vt'], 'vector').text)
- parts, names = get_number_of_parts(xml_source)
-
- # we can't assume 'Worksheets' to be written in english,
- # but it's always the first item of the parts list (see bug #22)
- size = parts[names[0]]
- children = [c.text for c in vector.getchildren()]
- return children[:size]
-
-
-def read_named_ranges(xml_source, workbook):
- """Read named ranges, excluding poorly defined ranges."""
- named_ranges = []
- root = fromstring(xml_source)
- names_root = root.find(QName('http://schemas.openxmlformats.org/spreadsheetml/2006/main',
- 'definedNames').text)
- if names_root is not None:
-
- for name_node in names_root.getchildren():
- range_name = name_node.get('name')
-
- if name_node.get("hidden", '0') == '1':
- continue
-
- valid = True
-
- for discarded_range in DISCARDED_RANGES:
- if discarded_range in range_name:
- valid = False
-
- for bad_range in BUGGY_NAMED_RANGES:
- if bad_range in name_node.text:
- valid = False
-
- if valid:
- destinations = split_named_range(name_node.text)
-
- new_destinations = []
- for worksheet, cells_range in destinations:
-
- # it can happen that a valid named range references
- # a missing worksheet, when Excel didn't properly maintain
- # the named range list
- #
- # we just ignore them here
- worksheet = workbook.get_sheet_by_name(worksheet)
- if worksheet:
- new_destinations.append((worksheet, cells_range))
-
- named_range = NamedRange(range_name, new_destinations)
- named_ranges.append(named_range)
-
- return named_ranges
diff --git a/tablib/packages/openpyxl/reader/worksheet.py b/tablib/packages/openpyxl/reader/worksheet.py
deleted file mode 100644
index a14c4a8..0000000
--- a/tablib/packages/openpyxl/reader/worksheet.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# file openpyxl/reader/worksheet.py
-
-# Copyright (c) 2010 openpyxl
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# @license: http://www.opensource.org/licenses/mit-license.php
-# @author: Eric Gazoni
-
-"""Reader for a single worksheet."""
-
-# Python stdlib imports
-try:
- from xml.etree.cElementTree import iterparse
-except ImportError:
- from xml.etree.ElementTree import iterparse
-
-from ....compat import ifilter
-from ....compat import BytesIO as StringIO
-
-# package imports
-from ..cell import Cell, coordinate_from_string
-from ..worksheet import Worksheet
-
-def _get_xml_iter(xml_source):
-
- if not hasattr(xml_source, 'name'):
- return StringIO(xml_source)
- else:
- xml_source.seek(0)
- return xml_source
-
-def read_dimension(xml_source):
-
- source = _get_xml_iter(xml_source)
-
- it = iterparse(source)
-
- for event, element in it:
-
- if element.tag == '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}dimension':
- ref = element.get('ref')
-
- min_range, max_range = ref.split(':')
- min_col, min_row = coordinate_from_string(min_range)
- max_col, max_row = coordinate_from_string(max_range)
-
- return min_col, min_row, max_col, max_row
-
- else:
- element.clear()
-
- return None
-
-def filter_cells(x):
- (event, element) = x
-
- return element.tag == '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}c'
-
-def fast_parse(ws, xml_source, string_table, style_table):
-
- source = _get_xml_iter(xml_source)
-
- it = iterparse(source)
-
- for event, element in ifilter(filter_cells, it):
-
- value = element.findtext('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}v')
-
- if value is not None:
-
- coordinate = element.get('r')
- data_type = element.get('t', 'n')
- style_id = element.get('s')
-
- if data_type == Cell.TYPE_STRING:
- value = string_table.get(int(value))
-
- ws.cell(coordinate).value = value
-
- if style_id is not None:
- ws._styles[coordinate] = style_table.get(int(style_id))
-
- # to avoid memory exhaustion, clear the item after use
- element.clear()
-
-from ..reader.iter_worksheet import IterableWorksheet
-
-def read_worksheet(xml_source, parent, preset_title, string_table,
- style_table, workbook_name = None, sheet_codename = None):
- """Read an xml worksheet"""
- if workbook_name and sheet_codename:
- ws = IterableWorksheet(parent, preset_title, workbook_name,
- sheet_codename, xml_source)
- else:
- ws = Worksheet(parent, preset_title)
- fast_parse(ws, xml_source, string_table, style_table)
- return ws