diff options
| author | Claude Paroz <claude@2xlibre.net> | 2017-02-20 16:41:33 +0100 |
|---|---|---|
| committer | Iuri de Silvio <iurisilvio@gmail.com> | 2017-02-20 12:41:33 -0300 |
| commit | e66eb4a18967dd4d35e6e9da265622e9e4015103 (patch) | |
| tree | 6e006e65b5daa060c26d6293662b4e3a5b9eebc3 /tablib/packages/openpyxl/reader | |
| parent | 0e720d78ca7e8c4466b8f13970ebae8e12700d00 (diff) | |
| download | tablib-e66eb4a18967dd4d35e6e9da265622e9e4015103.tar.gz | |
Replaced vendored openpyxl by a dependency (#221)
It is time to make it happen.
* Dropped Python 3.2 support
Recent dependencies are dropping Python 3.2 too.
* Replaced vendored openpyxl by a dependency
Thanks Tommy Anthony for the initial patch.
Diffstat (limited to 'tablib/packages/openpyxl/reader')
| -rw-r--r-- | tablib/packages/openpyxl/reader/__init__.py | 33 | ||||
| -rw-r--r-- | tablib/packages/openpyxl/reader/excel.py | 109 | ||||
| -rw-r--r-- | tablib/packages/openpyxl/reader/iter_worksheet.py | 348 | ||||
| -rw-r--r-- | tablib/packages/openpyxl/reader/strings.py | 64 | ||||
| -rw-r--r-- | tablib/packages/openpyxl/reader/style.py | 69 | ||||
| -rw-r--r-- | tablib/packages/openpyxl/reader/workbook.py | 156 | ||||
| -rw-r--r-- | tablib/packages/openpyxl/reader/worksheet.py | 114 |
7 files changed, 0 insertions, 893 deletions
diff --git a/tablib/packages/openpyxl/reader/__init__.py b/tablib/packages/openpyxl/reader/__init__.py deleted file mode 100644 index 9b0ee2f..0000000 --- a/tablib/packages/openpyxl/reader/__init__.py +++ /dev/null @@ -1,33 +0,0 @@ -# file openpyxl/reader/__init__.py - -# Copyright (c) 2010 openpyxl -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -# -# @license: http://www.opensource.org/licenses/mit-license.php -# @author: Eric Gazoni - -"""Imports for the openpyxl.reader namespace.""" - -# package imports -from ..reader import excel -from ..reader import strings -from ..reader import style -from ..reader import workbook -from ..reader import worksheet diff --git a/tablib/packages/openpyxl/reader/excel.py b/tablib/packages/openpyxl/reader/excel.py deleted file mode 100644 index 16c3f91..0000000 --- a/tablib/packages/openpyxl/reader/excel.py +++ /dev/null @@ -1,109 +0,0 @@ -# file openpyxl/reader/excel.py - -# Copyright (c) 2010 openpyxl -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -# -# @license: http://www.opensource.org/licenses/mit-license.php -# @author: Eric Gazoni - -"""Read an xlsx file into Python""" - -# Python stdlib imports -from zipfile import ZipFile, ZIP_DEFLATED, BadZipfile - -# package imports -from ..shared.exc import OpenModeError, InvalidFileException -from ..shared.ooxml import ARC_SHARED_STRINGS, ARC_CORE, ARC_APP, \ - ARC_WORKBOOK, PACKAGE_WORKSHEETS, ARC_STYLE -from ..workbook import Workbook -from ..reader.strings import read_string_table -from ..reader.style import read_style_table -from ..reader.workbook import read_sheets_titles, read_named_ranges, \ - read_properties_core, get_sheet_ids -from ..reader.worksheet import read_worksheet -from ..reader.iter_worksheet import unpack_worksheet - -def load_workbook(filename, use_iterators = False): - """Open the given filename and return the workbook - - :param filename: the path to open - :type filename: string - - :param use_iterators: use lazy load for cells - :type use_iterators: bool - - :rtype: :class:`openpyxl.workbook.Workbook` - - .. note:: - - When using lazy load, all worksheets will be :class:`openpyxl.reader.iter_worksheet.IterableWorksheet` - and the returned workbook will be read-only. - - """ - - if isinstance(filename, file): - # fileobject must have been opened with 'rb' flag - # it is required by zipfile - if 'b' not in filename.mode: - raise OpenModeError("File-object must be opened in binary mode") - - try: - archive = ZipFile(filename, 'r', ZIP_DEFLATED) - except (BadZipfile, RuntimeError, IOError, ValueError): - raise InvalidFileException() - wb = Workbook() - - if use_iterators: - wb._set_optimized_read() - - try: - _load_workbook(wb, archive, filename, use_iterators) - except KeyError: - raise InvalidFileException() - finally: - archive.close() - return wb - -def _load_workbook(wb, archive, filename, use_iterators): - - # get workbook-level information - wb.properties = read_properties_core(archive.read(ARC_CORE)) - try: - string_table = read_string_table(archive.read(ARC_SHARED_STRINGS)) - except KeyError: - string_table = {} - style_table = read_style_table(archive.read(ARC_STYLE)) - - # get worksheets - wb.worksheets = [] # remove preset worksheet - sheet_names = read_sheets_titles(archive.read(ARC_APP)) - for i, sheet_name in enumerate(sheet_names): - sheet_codename = 'sheet%d.xml' % (i + 1) - worksheet_path = '%s/%s' % (PACKAGE_WORKSHEETS, sheet_codename) - - if not use_iterators: - new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table) - else: - xml_source = unpack_worksheet(archive, worksheet_path) - new_ws = read_worksheet(xml_source, wb, sheet_name, string_table, style_table, filename, sheet_codename) - #new_ws = read_worksheet(archive.read(worksheet_path), wb, sheet_name, string_table, style_table, filename, sheet_codename) - wb.add_sheet(new_ws, index = i) - - wb._named_ranges = read_named_ranges(archive.read(ARC_WORKBOOK), wb) diff --git a/tablib/packages/openpyxl/reader/iter_worksheet.py b/tablib/packages/openpyxl/reader/iter_worksheet.py deleted file mode 100644 index 46ee318..0000000 --- a/tablib/packages/openpyxl/reader/iter_worksheet.py +++ /dev/null @@ -1,348 +0,0 @@ -# file openpyxl/reader/iter_worksheet.py
-
-# Copyright (c) 2010 openpyxl
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# @license: http://www.opensource.org/licenses/mit-license.php
-# @author: Eric Gazoni
-
-""" Iterators-based worksheet reader
-*Still very raw*
-"""
-
-from ....compat import BytesIO as StringIO
-import warnings
-import operator
-from functools import partial
-from itertools import groupby, ifilter
-from ..worksheet import Worksheet
-from ..cell import coordinate_from_string, get_column_letter, Cell
-from ..reader.excel import get_sheet_ids
-from ..reader.strings import read_string_table
-from ..reader.style import read_style_table, NumberFormat
-from ..shared.date_time import SharedDate
-from ..reader.worksheet import read_dimension
-from ..shared.ooxml import (MIN_COLUMN, MAX_COLUMN, PACKAGE_WORKSHEETS,
- MAX_ROW, MIN_ROW, ARC_SHARED_STRINGS, ARC_APP, ARC_STYLE)
-try:
- from xml.etree.cElementTree import iterparse
-except ImportError:
- from xml.etree.ElementTree import iterparse
-
-
-from zipfile import ZipFile
-from .. import cell
-import re
-import tempfile
-import zlib
-import zipfile
-import struct
-
-TYPE_NULL = Cell.TYPE_NULL
-MISSING_VALUE = None
-
-RE_COORDINATE = re.compile('^([A-Z]+)([0-9]+)$')
-
-SHARED_DATE = SharedDate()
-
-_COL_CONVERSION_CACHE = dict((get_column_letter(i), i) for i in xrange(1, 18279))
-def column_index_from_string(str_col, _col_conversion_cache=_COL_CONVERSION_CACHE):
- # we use a function argument to get indexed name lookup
- return _col_conversion_cache[str_col]
-del _COL_CONVERSION_CACHE
-
-RAW_ATTRIBUTES = ['row', 'column', 'coordinate', 'internal_value', 'data_type', 'style_id', 'number_format']
-
-try:
- from collections import namedtuple
- BaseRawCell = namedtuple('RawCell', RAW_ATTRIBUTES)
-except ImportError:
-
- # warnings.warn("""Unable to import 'namedtuple' module, this may cause memory issues when using optimized reader. Please upgrade your Python installation to 2.6+""")
-
- class BaseRawCell(object):
-
- def __init__(self, *args):
- assert len(args)==len(RAW_ATTRIBUTES)
-
- for attr, val in zip(RAW_ATTRIBUTES, args):
- setattr(self, attr, val)
-
- def _replace(self, **kwargs):
-
- self.__dict__.update(kwargs)
-
- return self
-
-
-class RawCell(BaseRawCell):
- """Optimized version of the :class:`openpyxl.cell.Cell`, using named tuples.
-
- Useful attributes are:
-
- * row
- * column
- * coordinate
- * internal_value
-
- You can also access if needed:
-
- * data_type
- * number_format
-
- """
-
- @property
- def is_date(self):
- res = (self.data_type == Cell.TYPE_NUMERIC
- and self.number_format is not None
- and ('d' in self.number_format
- or 'm' in self.number_format
- or 'y' in self.number_format
- or 'h' in self.number_format
- or 's' in self.number_format
- ))
-
- return res
-
-def iter_rows(workbook_name, sheet_name, xml_source, range_string = '', row_offset = 0, column_offset = 0):
-
- archive = get_archive_file(workbook_name)
-
- source = xml_source
-
- if range_string:
- min_col, min_row, max_col, max_row = get_range_boundaries(range_string, row_offset, column_offset)
- else:
- min_col, min_row, max_col, max_row = read_dimension(xml_source = source)
- min_col = column_index_from_string(min_col)
- max_col = column_index_from_string(max_col) + 1
- max_row += 6
-
- try:
- string_table = read_string_table(archive.read(ARC_SHARED_STRINGS))
- except KeyError:
- string_table = {}
-
- style_table = read_style_table(archive.read(ARC_STYLE))
-
- source.seek(0)
- p = iterparse(source)
-
- return get_squared_range(p, min_col, min_row, max_col, max_row, string_table, style_table)
-
-
-def get_rows(p, min_column = MIN_COLUMN, min_row = MIN_ROW, max_column = MAX_COLUMN, max_row = MAX_ROW):
-
- return groupby(get_cells(p, min_row, min_column, max_row, max_column), operator.attrgetter('row'))
-
-def get_cells(p, min_row, min_col, max_row, max_col, _re_coordinate=RE_COORDINATE):
-
- for _event, element in p:
-
- if element.tag == '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}c':
- coord = element.get('r')
- column_str, row = _re_coordinate.match(coord).groups()
-
- row = int(row)
- column = column_index_from_string(column_str)
-
- if min_col <= column <= max_col and min_row <= row <= max_row:
- data_type = element.get('t', 'n')
- style_id = element.get('s')
- value = element.findtext('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}v')
- yield RawCell(row, column_str, coord, value, data_type, style_id, None)
-
- if element.tag == '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}v':
- continue
- element.clear()
-
-
-
-def get_range_boundaries(range_string, row = 0, column = 0):
-
- if ':' in range_string:
- min_range, max_range = range_string.split(':')
- min_col, min_row = coordinate_from_string(min_range)
- max_col, max_row = coordinate_from_string(max_range)
-
- min_col = column_index_from_string(min_col) + column
- max_col = column_index_from_string(max_col) + column
- min_row += row
- max_row += row
-
- else:
- min_col, min_row = coordinate_from_string(range_string)
- min_col = column_index_from_string(min_col)
- max_col = min_col + 1
- max_row = min_row
-
- return (min_col, min_row, max_col, max_row)
-
-def get_archive_file(archive_name):
-
- return ZipFile(archive_name, 'r')
-
-def get_xml_source(archive_file, sheet_name):
-
- return archive_file.read('%s/%s' % (PACKAGE_WORKSHEETS, sheet_name))
-
-def get_missing_cells(row, columns):
-
- return dict([(column, RawCell(row, column, '%s%s' % (column, row), MISSING_VALUE, TYPE_NULL, None, None)) for column in columns])
-
-def get_squared_range(p, min_col, min_row, max_col, max_row, string_table, style_table):
-
- expected_columns = [get_column_letter(ci) for ci in xrange(min_col, max_col)]
-
- current_row = min_row
- for row, cells in get_rows(p, min_row = min_row, max_row = max_row, min_column = min_col, max_column = max_col):
- full_row = []
- if current_row < row:
-
- for gap_row in xrange(current_row, row):
-
- dummy_cells = get_missing_cells(gap_row, expected_columns)
-
- yield tuple([dummy_cells[column] for column in expected_columns])
-
- current_row = row
-
- temp_cells = list(cells)
-
- retrieved_columns = dict([(c.column, c) for c in temp_cells])
-
- missing_columns = list(set(expected_columns) - set(retrieved_columns.keys()))
-
- replacement_columns = get_missing_cells(row, missing_columns)
-
- for column in expected_columns:
-
- if column in retrieved_columns:
- cell = retrieved_columns[column]
-
- if cell.style_id is not None:
- style = style_table[int(cell.style_id)]
- cell = cell._replace(number_format = style.number_format.format_code) #pylint: disable-msg=W0212
- if cell.internal_value is not None:
- if cell.data_type == Cell.TYPE_STRING:
- cell = cell._replace(internal_value = string_table[int(cell.internal_value)]) #pylint: disable-msg=W0212
- elif cell.data_type == Cell.TYPE_BOOL:
- cell = cell._replace(internal_value = cell.internal_value == 'True')
- elif cell.is_date:
- cell = cell._replace(internal_value = SHARED_DATE.from_julian(float(cell.internal_value)))
- elif cell.data_type == Cell.TYPE_NUMERIC:
- cell = cell._replace(internal_value = float(cell.internal_value))
- full_row.append(cell)
-
- else:
- full_row.append(replacement_columns[column])
-
- current_row = row + 1
-
- yield tuple(full_row)
-
-#------------------------------------------------------------------------------
-
-class IterableWorksheet(Worksheet):
-
- def __init__(self, parent_workbook, title, workbook_name,
- sheet_codename, xml_source):
-
- Worksheet.__init__(self, parent_workbook, title)
- self._workbook_name = workbook_name
- self._sheet_codename = sheet_codename
- self._xml_source = xml_source
-
- def iter_rows(self, range_string = '', row_offset = 0, column_offset = 0):
- """ Returns a squared range based on the `range_string` parameter,
- using generators.
-
- :param range_string: range of cells (e.g. 'A1:C4')
- :type range_string: string
-
- :param row: row index of the cell (e.g. 4)
- :type row: int
-
- :param column: column index of the cell (e.g. 3)
- :type column: int
-
- :rtype: generator
-
- """
-
- return iter_rows(workbook_name = self._workbook_name,
- sheet_name = self._sheet_codename,
- xml_source = self._xml_source,
- range_string = range_string,
- row_offset = row_offset,
- column_offset = column_offset)
-
- def cell(self, *args, **kwargs):
-
- raise NotImplementedError("use 'iter_rows()' instead")
-
- def range(self, *args, **kwargs):
-
- raise NotImplementedError("use 'iter_rows()' instead")
-
-def unpack_worksheet(archive, filename):
-
- temp_file = tempfile.TemporaryFile(mode='r+', prefix='openpyxl.', suffix='.unpack.temp')
-
- zinfo = archive.getinfo(filename)
-
- if zinfo.compress_type == zipfile.ZIP_STORED:
- decoder = None
- elif zinfo.compress_type == zipfile.ZIP_DEFLATED:
- decoder = zlib.decompressobj(-zlib.MAX_WBITS)
- else:
- raise zipfile.BadZipFile("Unrecognized compression method")
-
- archive.fp.seek(_get_file_offset(archive, zinfo))
- bytes_to_read = zinfo.compress_size
-
- while True:
- buff = archive.fp.read(min(bytes_to_read, 102400))
- if not buff:
- break
- bytes_to_read -= len(buff)
- if decoder:
- buff = decoder.decompress(buff)
- temp_file.write(buff)
-
- if decoder:
- temp_file.write(decoder.decompress('Z'))
-
- return temp_file
-
-def _get_file_offset(archive, zinfo):
-
- try:
- return zinfo.file_offset
- except AttributeError:
- # From http://stackoverflow.com/questions/3781261/how-to-simulate-zipfile-open-in-python-2-5
-
- # Seek over the fixed size fields to the "file name length" field in
- # the file header (26 bytes). Unpack this and the "extra field length"
- # field ourselves as info.extra doesn't seem to be the correct length.
- archive.fp.seek(zinfo.header_offset + 26)
- file_name_len, extra_len = struct.unpack("<HH", archive.fp.read(4))
- return zinfo.header_offset + 30 + file_name_len + extra_len
diff --git a/tablib/packages/openpyxl/reader/strings.py b/tablib/packages/openpyxl/reader/strings.py deleted file mode 100644 index e19e291..0000000 --- a/tablib/packages/openpyxl/reader/strings.py +++ /dev/null @@ -1,64 +0,0 @@ -# file openpyxl/reader/strings.py - -# Copyright (c) 2010 openpyxl -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -# -# @license: http://www.opensource.org/licenses/mit-license.php -# @author: Eric Gazoni - -"""Read the shared strings table.""" - -# package imports -from ..shared.xmltools import fromstring, QName -from ..shared.ooxml import NAMESPACES - - -def read_string_table(xml_source): - """Read in all shared strings in the table""" - table = {} - xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' - root = fromstring(text=xml_source) - string_index_nodes = root.findall(QName(xmlns, 'si').text) - for index, string_index_node in enumerate(string_index_nodes): - table[index] = get_string(xmlns, string_index_node) - return table - - -def get_string(xmlns, string_index_node): - """Read the contents of a specific string index""" - rich_nodes = string_index_node.findall(QName(xmlns, 'r').text) - if rich_nodes: - reconstructed_text = [] - for rich_node in rich_nodes: - partial_text = get_text(xmlns, rich_node) - reconstructed_text.append(partial_text) - return ''.join(reconstructed_text) - else: - return get_text(xmlns, string_index_node) - - -def get_text(xmlns, rich_node): - """Read rich text, discarding formatting if not disallowed""" - text_node = rich_node.find(QName(xmlns, 't').text) - partial_text = text_node.text or '' - - if text_node.get(QName(NAMESPACES['xml'], 'space').text) != 'preserve': - partial_text = partial_text.strip() - return unicode(partial_text) diff --git a/tablib/packages/openpyxl/reader/style.py b/tablib/packages/openpyxl/reader/style.py deleted file mode 100644 index f773070..0000000 --- a/tablib/packages/openpyxl/reader/style.py +++ /dev/null @@ -1,69 +0,0 @@ -# file openpyxl/reader/style.py - -# Copyright (c) 2010 openpyxl -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -# -# @license: http://www.opensource.org/licenses/mit-license.php -# @author: Eric Gazoni - -"""Read shared style definitions""" - -# package imports -from ..shared.xmltools import fromstring, QName -from ..shared.exc import MissingNumberFormat -from ..style import Style, NumberFormat - - -def read_style_table(xml_source): - """Read styles from the shared style table""" - table = {} - xmlns = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main' - root = fromstring(xml_source) - custom_num_formats = parse_custom_num_formats(root, xmlns) - builtin_formats = NumberFormat._BUILTIN_FORMATS - cell_xfs = root.find(QName(xmlns, 'cellXfs').text) - cell_xfs_nodes = cell_xfs.findall(QName(xmlns, 'xf').text) - for index, cell_xfs_node in enumerate(cell_xfs_nodes): - new_style = Style() - number_format_id = int(cell_xfs_node.get('numFmtId')) - if number_format_id < 164: - new_style.number_format.format_code = \ - builtin_formats.get(number_format_id, 'General') - else: - - if number_format_id in custom_num_formats: - new_style.number_format.format_code = \ - custom_num_formats[number_format_id] - else: - raise MissingNumberFormat('%s' % number_format_id) - table[index] = new_style - return table - - -def parse_custom_num_formats(root, xmlns): - """Read in custom numeric formatting rules from the shared style table""" - custom_formats = {} - num_fmts = root.find(QName(xmlns, 'numFmts').text) - if num_fmts is not None: - num_fmt_nodes = num_fmts.findall(QName(xmlns, 'numFmt').text) - for num_fmt_node in num_fmt_nodes: - custom_formats[int(num_fmt_node.get('numFmtId'))] = \ - num_fmt_node.get('formatCode') - return custom_formats diff --git a/tablib/packages/openpyxl/reader/workbook.py b/tablib/packages/openpyxl/reader/workbook.py deleted file mode 100644 index d9bc161..0000000 --- a/tablib/packages/openpyxl/reader/workbook.py +++ /dev/null @@ -1,156 +0,0 @@ -# file openpyxl/reader/workbook.py - -# Copyright (c) 2010 openpyxl -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -# -# @license: http://www.opensource.org/licenses/mit-license.php -# @author: Eric Gazoni - -"""Read in global settings to be maintained by the workbook object.""" - -# package imports -from ..shared.xmltools import fromstring, QName -from ..shared.ooxml import NAMESPACES -from ..workbook import DocumentProperties -from ..shared.date_time import W3CDTF_to_datetime -from ..namedrange import NamedRange, split_named_range - -import datetime - -# constants -BUGGY_NAMED_RANGES = ['NA()', '#REF!'] -DISCARDED_RANGES = ['Excel_BuiltIn', 'Print_Area'] - -def get_sheet_ids(xml_source): - - sheet_names = read_sheets_titles(xml_source) - - return dict((sheet, 'sheet%d.xml' % (i + 1)) for i, sheet in enumerate(sheet_names)) - - -def read_properties_core(xml_source): - """Read assorted file properties.""" - properties = DocumentProperties() - root = fromstring(xml_source) - creator_node = root.find(QName(NAMESPACES['dc'], 'creator').text) - if creator_node is not None: - properties.creator = creator_node.text - else: - properties.creator = '' - last_modified_by_node = root.find( - QName(NAMESPACES['cp'], 'lastModifiedBy').text) - if last_modified_by_node is not None: - properties.last_modified_by = last_modified_by_node.text - else: - properties.last_modified_by = '' - - created_node = root.find(QName(NAMESPACES['dcterms'], 'created').text) - if created_node is not None: - properties.created = W3CDTF_to_datetime(created_node.text) - else: - properties.created = datetime.datetime.now() - - modified_node = root.find(QName(NAMESPACES['dcterms'], 'modified').text) - if modified_node is not None: - properties.modified = W3CDTF_to_datetime(modified_node.text) - else: - properties.modified = properties.created - - return properties - - -def get_number_of_parts(xml_source): - """Get a list of contents of the workbook.""" - parts_size = {} - parts_names = [] - root = fromstring(xml_source) - heading_pairs = root.find(QName('http://schemas.openxmlformats.org/officeDocument/2006/extended-properties', - 'HeadingPairs').text) - vector = heading_pairs.find(QName(NAMESPACES['vt'], 'vector').text) - children = vector.getchildren() - for child_id in range(0, len(children), 2): - part_name = children[child_id].find(QName(NAMESPACES['vt'], - 'lpstr').text).text - if not part_name in parts_names: - parts_names.append(part_name) - part_size = int(children[child_id + 1].find(QName( - NAMESPACES['vt'], 'i4').text).text) - parts_size[part_name] = part_size - return parts_size, parts_names - - -def read_sheets_titles(xml_source): - """Read titles for all sheets.""" - root = fromstring(xml_source) - titles_root = root.find(QName('http://schemas.openxmlformats.org/officeDocument/2006/extended-properties', - 'TitlesOfParts').text) - vector = titles_root.find(QName(NAMESPACES['vt'], 'vector').text) - parts, names = get_number_of_parts(xml_source) - - # we can't assume 'Worksheets' to be written in english, - # but it's always the first item of the parts list (see bug #22) - size = parts[names[0]] - children = [c.text for c in vector.getchildren()] - return children[:size] - - -def read_named_ranges(xml_source, workbook): - """Read named ranges, excluding poorly defined ranges.""" - named_ranges = [] - root = fromstring(xml_source) - names_root = root.find(QName('http://schemas.openxmlformats.org/spreadsheetml/2006/main', - 'definedNames').text) - if names_root is not None: - - for name_node in names_root.getchildren(): - range_name = name_node.get('name') - - if name_node.get("hidden", '0') == '1': - continue - - valid = True - - for discarded_range in DISCARDED_RANGES: - if discarded_range in range_name: - valid = False - - for bad_range in BUGGY_NAMED_RANGES: - if bad_range in name_node.text: - valid = False - - if valid: - destinations = split_named_range(name_node.text) - - new_destinations = [] - for worksheet, cells_range in destinations: - - # it can happen that a valid named range references - # a missing worksheet, when Excel didn't properly maintain - # the named range list - # - # we just ignore them here - worksheet = workbook.get_sheet_by_name(worksheet) - if worksheet: - new_destinations.append((worksheet, cells_range)) - - named_range = NamedRange(range_name, new_destinations) - named_ranges.append(named_range) - - return named_ranges diff --git a/tablib/packages/openpyxl/reader/worksheet.py b/tablib/packages/openpyxl/reader/worksheet.py deleted file mode 100644 index a14c4a8..0000000 --- a/tablib/packages/openpyxl/reader/worksheet.py +++ /dev/null @@ -1,114 +0,0 @@ -# file openpyxl/reader/worksheet.py - -# Copyright (c) 2010 openpyxl -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -# -# @license: http://www.opensource.org/licenses/mit-license.php -# @author: Eric Gazoni - -"""Reader for a single worksheet.""" - -# Python stdlib imports -try: - from xml.etree.cElementTree import iterparse -except ImportError: - from xml.etree.ElementTree import iterparse - -from ....compat import ifilter -from ....compat import BytesIO as StringIO - -# package imports -from ..cell import Cell, coordinate_from_string -from ..worksheet import Worksheet - -def _get_xml_iter(xml_source): - - if not hasattr(xml_source, 'name'): - return StringIO(xml_source) - else: - xml_source.seek(0) - return xml_source - -def read_dimension(xml_source): - - source = _get_xml_iter(xml_source) - - it = iterparse(source) - - for event, element in it: - - if element.tag == '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}dimension': - ref = element.get('ref') - - min_range, max_range = ref.split(':') - min_col, min_row = coordinate_from_string(min_range) - max_col, max_row = coordinate_from_string(max_range) - - return min_col, min_row, max_col, max_row - - else: - element.clear() - - return None - -def filter_cells(x): - (event, element) = x - - return element.tag == '{http://schemas.openxmlformats.org/spreadsheetml/2006/main}c' - -def fast_parse(ws, xml_source, string_table, style_table): - - source = _get_xml_iter(xml_source) - - it = iterparse(source) - - for event, element in ifilter(filter_cells, it): - - value = element.findtext('{http://schemas.openxmlformats.org/spreadsheetml/2006/main}v') - - if value is not None: - - coordinate = element.get('r') - data_type = element.get('t', 'n') - style_id = element.get('s') - - if data_type == Cell.TYPE_STRING: - value = string_table.get(int(value)) - - ws.cell(coordinate).value = value - - if style_id is not None: - ws._styles[coordinate] = style_table.get(int(style_id)) - - # to avoid memory exhaustion, clear the item after use - element.clear() - -from ..reader.iter_worksheet import IterableWorksheet - -def read_worksheet(xml_source, parent, preset_title, string_table, - style_table, workbook_name = None, sheet_codename = None): - """Read an xml worksheet""" - if workbook_name and sheet_codename: - ws = IterableWorksheet(parent, preset_title, workbook_name, - sheet_codename, xml_source) - else: - ws = Worksheet(parent, preset_title) - fast_parse(ws, xml_source, string_table, style_table) - return ws |
