summaryrefslogtreecommitdiff
path: root/tablib
diff options
context:
space:
mode:
authorGreg Thornton <gthornton@cjadvertising.com>2011-07-14 13:08:06 -0500
committerGreg Thornton <gthornton@cjadvertising.com>2011-07-14 13:08:06 -0500
commiteaa2b9b8ea778e7fbe1fe5c659fe1b267d52d4b1 (patch)
treed3652fea9e91dbb26deee8382b2005c6792d6470 /tablib
parentcd5aa4fc06d9af90b076c02a9f596bb84db1d09f (diff)
downloadtablib-eaa2b9b8ea778e7fbe1fe5c659fe1b267d52d4b1.tar.gz
Added XLS import support
Diffstat (limited to 'tablib')
-rw-r--r--tablib/compat.py1
-rw-r--r--tablib/formats/_xls.py36
-rw-r--r--tablib/packages/xlrd/__init__.py1720
-rw-r--r--tablib/packages/xlrd/biffh.py639
-rw-r--r--tablib/packages/xlrd/compdoc.py358
-rw-r--r--tablib/packages/xlrd/doc/compdoc.html69
-rw-r--r--tablib/packages/xlrd/doc/xlrd.html1845
-rw-r--r--tablib/packages/xlrd/examples/namesdemo.xlsbin0 -> 22528 bytes
-rw-r--r--tablib/packages/xlrd/examples/xlrdnameAPIdemo.py178
-rw-r--r--tablib/packages/xlrd/formatting.py1256
-rw-r--r--tablib/packages/xlrd/formula.py2092
-rw-r--r--tablib/packages/xlrd/licences.py77
-rw-r--r--tablib/packages/xlrd/sheet.py1768
-rw-r--r--tablib/packages/xlrd/timemachine.py44
-rw-r--r--tablib/packages/xlrd/xldate.py171
15 files changed, 10252 insertions, 2 deletions
diff --git a/tablib/compat.py b/tablib/compat.py
index 0881369..f6bcf8d 100644
--- a/tablib/compat.py
+++ b/tablib/compat.py
@@ -39,6 +39,7 @@ else:
from cStringIO import StringIO as BytesIO
from cStringIO import StringIO
import tablib.packages.xlwt as xlwt
+ import tablib.packages.xlrd as xlrd
from tablib.packages import markup
from itertools import ifilter
from tablib.packages import openpyxl
diff --git a/tablib/formats/_xls.py b/tablib/formats/_xls.py
index 48dcc0b..1282a43 100644
--- a/tablib/formats/_xls.py
+++ b/tablib/formats/_xls.py
@@ -5,8 +5,8 @@
import sys
-from tablib.compat import BytesIO, xlwt
-
+from tablib.compat import BytesIO, xlwt, xlrd
+import tablib
title = 'xls'
extentions = ('xls',)
@@ -16,6 +16,38 @@ wrap = xlwt.easyxf("alignment: wrap on")
bold = xlwt.easyxf("font: bold on")
+def import_set(dset, in_stream, headers=True):
+ """Returns dataset from XLS stream."""
+
+ dset.wipe()
+
+ wb = xlrd.open_workbook(file_contents=in_stream)
+ ws = wb.sheet_by_index(0)
+
+ for i in range(ws.nrows):
+ if (i == 0) and (headers):
+ dset.headers = ws.row_values(i)
+ else:
+ dset.append(ws.row_values(i))
+
+
+def import_book(dbook, in_stream, headers=True):
+ """Returns databook from XLS stream."""
+
+ dbook.wipe()
+
+ wb = xlrd.open_workbook(file_contents=in_stream)
+ for ws in wb.sheets():
+ data = tablib.Dataset()
+ data.title = ws.name
+ for i in range(ws.nrows):
+ if (i == 0) and (headers):
+ data.headers = ws.row_values(i)
+ else:
+ data.append(ws.row_values(i))
+ dbook.add_sheet(data)
+
+
def export_set(dataset):
"""Returns XLS representation of Dataset."""
diff --git a/tablib/packages/xlrd/__init__.py b/tablib/packages/xlrd/__init__.py
new file mode 100644
index 0000000..9097d9d
--- /dev/null
+++ b/tablib/packages/xlrd/__init__.py
@@ -0,0 +1,1720 @@
+# -*- coding: cp1252 -*-
+
+__VERSION__ = "0.7.1" # 2009-05-31
+
+# <p>Copyright © 2005-2009 Stephen John Machin, Lingfo Pty Ltd</p>
+# <p>This module is part of the xlrd package, which is released under a
+# BSD-style licence.</p>
+
+import licences
+
+##
+# <p><b>A Python module for extracting data from MS Excel ™ spreadsheet files.
+# <br /><br />
+# Version 0.7.1 -- 2009-05-31
+# </b></p>
+#
+# <h2>General information</h2>
+#
+# <h3>Acknowledgements</h3>
+#
+# <p>
+# Development of this module would not have been possible without the document
+# "OpenOffice.org's Documentation of the Microsoft Excel File Format"
+# ("OOo docs" for short).
+# The latest version is available from OpenOffice.org in
+# <a href=http://sc.openoffice.org/excelfileformat.pdf> PDF format</a>
+# and
+# <a href=http://sc.openoffice.org/excelfileformat.odt> ODT format.</a>
+# Small portions of the OOo docs are reproduced in this
+# document. A study of the OOo docs is recommended for those who wish a
+# deeper understanding of the Excel file layout than the xlrd docs can provide.
+# </p>
+#
+# <p>Backporting to Python 2.1 was partially funded by
+# <a href=http://journyx.com/>
+# Journyx - provider of timesheet and project accounting solutions.
+# </a>
+# </p>
+#
+# <p>Provision of formatting information in version 0.6.1 was funded by
+# <a href=http://www.simplistix.co.uk>
+# Simplistix Ltd.
+# </a>
+# </p>
+#
+# <h3>Unicode</h3>
+#
+# <p>This module presents all text strings as Python unicode objects.
+# From Excel 97 onwards, text in Excel spreadsheets has been stored as Unicode.
+# Older files (Excel 95 and earlier) don't keep strings in Unicode;
+# a CODEPAGE record provides a codepage number (for example, 1252) which is
+# used by xlrd to derive the encoding (for same example: "cp1252") which is
+# used to translate to Unicode.</p>
+# <small>
+# <p>If the CODEPAGE record is missing (possible if the file was created
+# by third-party software), xlrd will assume that the encoding is ascii, and keep going.
+# If the actual encoding is not ascii, a UnicodeDecodeError exception will be raised and
+# you will need to determine the encoding yourself, and tell xlrd:
+# <pre>
+# book = xlrd.open_workbook(..., encoding_override="cp1252")
+# </pre></p>
+# <p>If the CODEPAGE record exists but is wrong (for example, the codepage
+# number is 1251, but the strings are actually encoded in koi8_r),
+# it can be overridden using the same mechanism.
+# The supplied runxlrd.py has a corresponding command-line argument, which
+# may be used for experimentation:
+# <pre>
+# runxlrd.py -e koi8_r 3rows myfile.xls
+# </pre></p>
+# <p>The first place to look for an encoding ("codec name") is
+# <a href=http://docs.python.org/lib/standard-encodings.html>
+# the Python documentation</a>.
+# </p>
+# </small>
+#
+# <h3>Dates in Excel spreadsheets</h3>
+#
+# <p>In reality, there are no such things. What you have are floating point
+# numbers and pious hope.
+# There are several problems with Excel dates:</p>
+#
+# <p>(1) Dates are not stored as a separate data type; they are stored as
+# floating point numbers and you have to rely on
+# (a) the "number format" applied to them in Excel and/or
+# (b) knowing which cells are supposed to have dates in them.
+# This module helps with (a) by inspecting the
+# format that has been applied to each number cell;
+# if it appears to be a date format, the cell
+# is classified as a date rather than a number. Feedback on this feature,
+# especially from non-English-speaking locales, would be appreciated.</p>
+#
+# <p>(2) Excel for Windows stores dates by default as the number of
+# days (or fraction thereof) since 1899-12-31T00:00:00. Excel for
+# Macintosh uses a default start date of 1904-01-01T00:00:00. The date
+# system can be changed in Excel on a per-workbook basis (for example:
+# Tools -> Options -> Calculation, tick the "1904 date system" box).
+# This is of course a bad idea if there are already dates in the
+# workbook. There is no good reason to change it even if there are no
+# dates in the workbook. Which date system is in use is recorded in the
+# workbook. A workbook transported from Windows to Macintosh (or vice
+# versa) will work correctly with the host Excel. When using this
+# module's xldate_as_tuple function to convert numbers from a workbook,
+# you must use the datemode attribute of the Book object. If you guess,
+# or make a judgement depending on where you believe the workbook was
+# created, you run the risk of being 1462 days out of kilter.</p>
+#
+# <p>Reference:
+# http://support.microsoft.com/default.aspx?scid=KB;EN-US;q180162</p>
+#
+#
+# <p>(3) The Excel implementation of the Windows-default 1900-based date system works on the
+# incorrect premise that 1900 was a leap year. It interprets the number 60 as meaning 1900-02-29,
+# which is not a valid date. Consequently any number less than 61 is ambiguous. Example: is 59 the
+# result of 1900-02-28 entered directly, or is it 1900-03-01 minus 2 days? The OpenOffice.org Calc
+# program "corrects" the Microsoft problem; entering 1900-02-27 causes the number 59 to be stored.
+# Save as an XLS file, then open the file with Excel -- you'll see 1900-02-28 displayed.</p>
+#
+# <p>Reference: http://support.microsoft.com/default.aspx?scid=kb;en-us;214326</p>
+#
+# <p>(4) The Macintosh-default 1904-based date system counts 1904-01-02 as day 1 and 1904-01-01 as day zero.
+# Thus any number such that (0.0 <= number < 1.0) is ambiguous. Is 0.625 a time of day (15:00:00),
+# independent of the calendar,
+# or should it be interpreted as an instant on a particular day (1904-01-01T15:00:00)?
+# The xldate_* functions in this module
+# take the view that such a number is a calendar-independent time of day (like Python's datetime.time type) for both
+# date systems. This is consistent with more recent Microsoft documentation
+# (for example, the help file for Excel 2002 which says that the first day
+# in the 1904 date system is 1904-01-02).
+#
+# <p>(5) Usage of the Excel DATE() function may leave strange dates in a spreadsheet. Quoting the help file,
+# in respect of the 1900 date system: "If year is between 0 (zero) and 1899 (inclusive),
+# Excel adds that value to 1900 to calculate the year. For example, DATE(108,1,2) returns January 2, 2008 (1900+108)."
+# This gimmick, semi-defensible only for arguments up to 99 and only in the pre-Y2K-awareness era,
+# means that DATE(1899, 12, 31) is interpreted as 3799-12-31.</p>
+#
+# <p>For further information, please refer to the documentation for the xldate_* functions.</p>
+#
+# <h3> Named references, constants, formulas, and macros</h3>
+#
+# <p>
+# A name is used to refer to a cell, a group of cells, a constant
+# value, a formula, or a macro. Usually the scope of a name is global
+# across the whole workbook. However it can be local to a worksheet.
+# For example, if the sales figures are in different cells in
+# different sheets, the user may define the name "Sales" in each
+# sheet. There are built-in names, like "Print_Area" and
+# "Print_Titles"; these two are naturally local to a sheet.
+# </p><p>
+# To inspect the names with a user interface like MS Excel, OOo Calc,
+# or Gnumeric, click on Insert/Names/Define. This will show the global
+# names, plus those local to the currently selected sheet.
+# </p><p>
+# A Book object provides two dictionaries (name_map and
+# name_and_scope_map) and a list (name_obj_list) which allow various
+# ways of accessing the Name objects. There is one Name object for
+# each NAME record found in the workbook. Name objects have many
+# attributes, several of which are relevant only when obj.macro is 1.
+# </p><p>
+# In the examples directory you will find namesdemo.xls which
+# showcases the many different ways that names can be used, and
+# xlrdnamesAPIdemo.py which offers 3 different queries for inspecting
+# the names in your files, and shows how to extract whatever a name is
+# referring to. There is currently one "convenience method",
+# Name.cell(), which extracts the value in the case where the name
+# refers to a single cell. More convenience methods are planned. The
+# source code for Name.cell (in __init__.py) is an extra source of
+# information on how the Name attributes hang together.
+# </p>
+#
+# <p><i>Name information is <b>not</b> extracted from files older than
+# Excel 5.0 (Book.biff_version < 50)</i></p>
+#
+# <h3>Formatting</h3>
+#
+# <h4>Introduction</h4>
+#
+# <p>This collection of features, new in xlrd version 0.6.1, is intended
+# to provide the information needed to (1) display/render spreadsheet contents
+# (say) on a screen or in a PDF file, and (2) copy spreadsheet data to another
+# file without losing the ability to display/render it.</p>
+#
+# <h4>The Palette; Colour Indexes</h4>
+#
+# <p>A colour is represented in Excel as a (red, green, blue) ("RGB") tuple
+# with each component in range(256). However it is not possible to access an
+# unlimited number of colours; each spreadsheet is limited to a palette of 64 different
+# colours (24 in Excel 3.0 and 4.0, 8 in Excel 2.0). Colours are referenced by an index
+# ("colour index") into this palette.
+#
+# Colour indexes 0 to 7 represent 8 fixed built-in colours: black, white, red, green, blue,
+# yellow, magenta, and cyan.<p>
+#
+# The remaining colours in the palette (8 to 63 in Excel 5.0 and later)
+# can be changed by the user. In the Excel 2003 UI, Tools/Options/Color presents a palette
+# of 7 rows of 8 colours. The last two rows are reserved for use in charts.<br />
+# The correspondence between this grid and the assigned
+# colour indexes is NOT left-to-right top-to-bottom.<br />
+# Indexes 8 to 15 correspond to changeable
+# parallels of the 8 fixed colours -- for example, index 7 is forever cyan;
+# index 15 starts off being cyan but can be changed by the user.<br />
+#
+# The default colour for each index depends on the file version; tables of the defaults
+# are available in the source code. If the user changes one or more colours,
+# a PALETTE record appears in the XLS file -- it gives the RGB values for *all* changeable
+# indexes.<br />
+# Note that colours can be used in "number formats": "[CYAN]...." and "[COLOR8]...." refer
+# to colour index 7; "[COLOR16]...." will produce cyan
+# unless the user changes colour index 15 to something else.<br />
+#
+# <p>In addition, there are several "magic" colour indexes used by Excel:<br />
+# 0x18 (BIFF3-BIFF4), 0x40 (BIFF5-BIFF8): System window text colour for border lines
+# (used in XF, CF, and WINDOW2 records)<br />
+# 0x19 (BIFF3-BIFF4), 0x41 (BIFF5-BIFF8): System window background colour for pattern background
+# (used in XF and CF records )<br />
+# 0x43: System face colour (dialogue background colour)<br />
+# 0x4D: System window text colour for chart border lines<br />
+# 0x4E: System window background colour for chart areas<br />
+# 0x4F: Automatic colour for chart border lines (seems to be always Black)<br />
+# 0x50: System ToolTip background colour (used in note objects)<br />
+# 0x51: System ToolTip text colour (used in note objects)<br />
+# 0x7FFF: System window text colour for fonts (used in FONT and CF records)<br />
+# Note 0x7FFF appears to be the *default* colour index. It appears quite often in FONT
+# records.<br />
+#
+# <h4>Default Formatting</h4>
+#
+# Default formatting is applied to all empty cells (those not described by a cell record).
+# Firstly row default information (ROW record, Rowinfo class) is used if available.
+# Failing that, column default information (COLINFO record, Colinfo class) is used if available.
+# As a last resort the worksheet/workbook default cell format will be used; this
+# should always be present in an Excel file,
+# described by the XF record with the fixed index 15 (0-based). By default, it uses the
+# worksheet/workbook default cell style, described by the very first XF record (index 0).
+#
+# <h4> Formatting features not included in xlrd version 0.6.1</h4>
+# <ul>
+# <li>Rich text i.e. strings containing partial <b>bold</b> <i>italic</i>
+# and <u>underlined</u> text, change of font inside a string, etc.
+# See OOo docs s3.4 and s3.2</li>
+# <li>Asian phonetic text (known as "ruby"), used for Japanese furigana. See OOo docs
+# s3.4.2 (p15)</li>
+# <li>Conditional formatting. See OOo docs
+# s5.12, s6.21 (CONDFMT record), s6.16 (CF record)</li>
+# <li>Miscellaneous sheet-level and book-level items e.g. printing layout, screen panes. </li>
+# <li>Modern Excel file versions don't keep most of the built-in
+# "number formats" in the file; Excel loads formats according to the
+# user's locale. Currently xlrd's emulation of this is limited to
+# a hard-wired table that applies to the US English locale. This may mean
+# that currency symbols, date order, thousands separator, decimals separator, etc
+# are inappropriate. Note that this does not affect users who are copying XLS
+# files, only those who are visually rendering cells.</li>
+# </ul>
+#
+# <h3>Loading worksheets on demand</h3>
+#
+# <p>This feature, new in version 0.7.1, is governed by the on_demand argument
+# to the open_workbook() function and allows saving memory and time by loading
+# only those sheets that the caller is interested in, and releasing sheets
+# when no longer required.</p>
+#
+# <p>on_demand=False (default): No change. open_workbook() loads global data
+# and all sheets, releases resources no longer required (principally the
+# str or mmap object containing the Workbook stream), and returns.</p>
+#
+# <p>on_demand=True and BIFF version < 5.0: A warning message is emitted,
+# on_demand is recorded as False, and the old process is followed.</p>
+#
+# <p>on_demand=True and BIFF version >= 5.0: open_workbook() loads global
+# data and returns without releasing resources. At this stage, the only
+# information available about sheets is Book.nsheets and Book.sheet_names().</p>
+#
+# <p>Book.sheet_by_name() and Book.sheet_by_index() will load the requested
+# sheet if it is not already loaded.</p>
+#
+# <p>Book.sheets() will load all/any unloaded sheets.</p>
+#
+# <p>The caller may save memory by calling
+# Book.unload_sheet(sheet_name_or_index) when finished with the sheet.
+# This applies irrespective of the state of on_demand.</p>
+#
+# <p>The caller may re-load an unloaded sheet by calling Book.sheet_by_xxxx()
+# -- except if those required resources have been released (which will
+# have happened automatically when on_demand is false). This is the only
+# case where an exception will be raised.</p>
+#
+# <p>The caller may query the state of a sheet:
+# Book.sheet_loaded(sheet_name_or_index) -> a bool</p>
+#
+##
+
+# 2009-04-27 SJM Integrated on_demand patch by Armando Serrano Lombillo
+# 2008-11-23 SJM Support dumping FILEPASS and EXTERNNAME records; extra info from SUPBOOK records
+# 2008-11-23 SJM colname utility function now supports more than 256 columns
+# 2008-04-24 SJM Recovery code for file with out-of-order/missing/wrong CODEPAGE record needed to be called for EXTERNSHEET/BOUNDSHEET/NAME/SHEETHDR records.
+# 2008-02-08 SJM Preparation for Excel 2.0 support
+# 2008-02-03 SJM Minor tweaks for IronPython support
+# 2008-02-02 SJM Previous change stopped dump() and count_records() ... fixed
+# 2007-12-25 SJM Decouple Book initialisation & loading -- to allow for multiple loaders.
+# 2007-12-20 SJM Better error message for unsupported file format.
+# 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files.
+# 2007-11-20 SJM Wasn't handling EXTERNSHEET record that needed CONTINUE record(s)
+# 2007-07-07 SJM Version changed to 0.7.0 (alpha 1)
+# 2007-07-07 SJM Logfile arg wasn't being passed from open_workbook to compdoc.CompDoc
+# 2007-05-21 SJM If no CODEPAGE record in pre-8.0 file, assume ascii and keep going.
+# 2007-04-22 SJM Removed antique undocumented Book.get_name_dict method.
+
+from timemachine import *
+from biffh import *
+from struct import unpack
+import sys
+import time
+import sheet
+import compdoc
+from xldate import xldate_as_tuple, XLDateError
+from formula import *
+import formatting
+if sys.version.startswith("IronPython"):
+ # print >> sys.stderr, "...importing encodings"
+ import encodings
+
+empty_cell = sheet.empty_cell # for exposure to the world ...
+
+DEBUG = 0
+
+USE_FANCY_CD = 1
+
+TOGGLE_GC = 0
+import gc
+# gc.set_debug(gc.DEBUG_STATS)
+
+try:
+ import mmap
+ MMAP_AVAILABLE = 1
+except ImportError:
+ MMAP_AVAILABLE = 0
+USE_MMAP = MMAP_AVAILABLE
+
+MY_EOF = 0xF00BAAA # not a 16-bit number
+
+SUPBOOK_UNK, SUPBOOK_INTERNAL, SUPBOOK_EXTERNAL, SUPBOOK_ADDIN, SUPBOOK_DDEOLE = range(5)
+
+SUPPORTED_VERSIONS = (80, 70, 50, 45, 40, 30, 21, 20)
+
+code_from_builtin_name = {
+ u"Consolidate_Area": u"\x00",
+ u"Auto_Open": u"\x01",
+ u"Auto_Close": u"\x02",
+ u"Extract": u"\x03",
+ u"Database": u"\x04",
+ u"Criteria": u"\x05",
+ u"Print_Area": u"\x06",
+ u"Print_Titles": u"\x07",
+ u"Recorder": u"\x08",
+ u"Data_Form": u"\x09",
+ u"Auto_Activate": u"\x0A",
+ u"Auto_Deactivate": u"\x0B",
+ u"Sheet_Title": u"\x0C",
+ u"_FilterDatabase": u"\x0D",
+ }
+builtin_name_from_code = {}
+for _bin, _bic in code_from_builtin_name.items():
+ builtin_name_from_code[_bic] = _bin
+del _bin, _bic
+
+##
+#
+# Open a spreadsheet file for data extraction.
+#
+# @param filename The path to the spreadsheet file to be opened.
+#
+# @param logfile An open file to which messages and diagnostics are written.
+#
+# @param verbosity Increases the volume of trace material written to the logfile.
+#
+# @param pickleable Default is true. In Python 2.4 or earlier, setting to false
+# will cause use of array.array objects which save some memory but can't be pickled.
+# In Python 2.5, array.arrays are used unconditionally. Note: if you have large files that
+# you need to read multiple times, it can be much faster to cPickle.dump() the xlrd.Book object
+# once, and use cPickle.load() multiple times.
+# @param use_mmap Whether to use the mmap module is determined heuristically.
+# Use this arg to override the result. Current heuristic: mmap is used if it exists.
+#
+# @param file_contents ... as a string or an mmap.mmap object or some other behave-alike object.
+# If file_contents is supplied, filename will not be used, except (possibly) in messages.
+#
+# @param encoding_override Used to overcome missing or bad codepage information
+# in older-version files. Refer to discussion in the <b>Unicode</b> section above.
+# <br /> -- New in version 0.6.0
+#
+# @param formatting_info Governs provision of a reference to an XF (eXtended Format) object
+# for each cell in the worksheet.
+# <br /> Default is <i>False</i>. This is backwards compatible and saves memory.
+# "Blank" cells (those with their own formatting information but no data) are treated as empty
+# (by ignoring the file's BLANK and MULBLANK records).
+# It cuts off any bottom "margin" of rows of empty (and blank) cells and
+# any right "margin" of columns of empty (and blank) cells.
+# Only cell_value and cell_type are available.
+# <br /> <i>True</i> provides all cells, including empty and blank cells.
+# XF information is available for each cell.
+# <br /> -- New in version 0.6.1
+#
+# @param on_demand Governs whether sheets are all loaded initially or when demanded
+# by the caller. Please refer back to the section "Loading worksheets on demand" for details.
+# -- New in version 0.7.1
+#
+# @return An instance of the Book class.
+
+def open_workbook(filename=None,
+ logfile=sys.stdout, verbosity=0, pickleable=True, use_mmap=USE_MMAP,
+ file_contents=None,
+ encoding_override=None,
+ formatting_info=False, on_demand=False,
+ ):
+ t0 = time.clock()
+ if TOGGLE_GC:
+ orig_gc_enabled = gc.isenabled()
+ if orig_gc_enabled:
+ gc.disable()
+ bk = Book()
+ bk.biff2_8_load(
+ filename=filename, file_contents=file_contents,
+ logfile=logfile, verbosity=verbosity, pickleable=pickleable, use_mmap=use_mmap,
+ encoding_override=encoding_override,
+ formatting_info=formatting_info,
+ on_demand=on_demand,
+ )
+ t1 = time.clock()
+ bk.load_time_stage_1 = t1 - t0
+ biff_version = bk.getbof(XL_WORKBOOK_GLOBALS)
+ if not biff_version:
+ raise XLRDError("Can't determine file's BIFF version")
+ if biff_version not in SUPPORTED_VERSIONS:
+ raise XLRDError(
+ "BIFF version %s is not supported"
+ % biff_text_from_num[biff_version]
+ )
+ bk.biff_version = biff_version
+ if biff_version <= 40:
+ # no workbook globals, only 1 worksheet
+ if on_demand:
+ fprintf(bk.logfile,
+ "*** WARNING: on_demand is not supported for this Excel version.\n"
+ "*** Setting on_demand to False.\n")
+ bk.on_demand = on_demand = False
+ bk.fake_globals_get_sheet()
+ elif biff_version == 45:
+ # worksheet(s) embedded in global stream
+ bk.parse_globals()
+ if on_demand:
+ fprintf(bk.logfile, "*** WARNING: on_demand is not supported for this Excel version.\n"
+ "*** Setting on_demand to False.\n")
+ bk.on_demand = on_demand = False
+ else:
+ bk.parse_globals()
+ bk._sheet_list = [None for sh in bk._sheet_names]
+ if not on_demand:
+ bk.get_sheets()
+ bk.nsheets = len(bk._sheet_list)
+ if biff_version == 45 and bk.nsheets > 1:
+ fprintf(bk.logfile,
+ "*** WARNING: Excel 4.0 workbook (.XLW) file contains %d worksheets.\n"
+ "*** Book-level data will be that of the last worksheet.\n",
+ bk.nsheets
+ )
+ if not on_demand:
+ bk.release_resources()
+ if TOGGLE_GC:
+ if orig_gc_enabled:
+ gc.enable()
+ t2 = time.clock()
+ bk.load_time_stage_2 = t2 - t1
+ return bk
+
+##
+# For debugging: dump the file's BIFF records in char & hex.
+# @param filename The path to the file to be dumped.
+# @param outfile An open file, to which the dump is written.
+# @param unnumbered If true, omit offsets (for meaningful diffs).
+
+def dump(filename, outfile=sys.stdout, unnumbered=False):
+ bk = Book()
+ bk.biff2_8_load(filename=filename, logfile=outfile, )
+ biff_dump(bk.mem, bk.base, bk.stream_len, 0, outfile, unnumbered)
+
+##
+# For debugging and analysis: summarise the file's BIFF records.
+# I.e. produce a sorted file of (record_name, count).
+# @param filename The path to the file to be summarised.
+# @param outfile An open file, to which the summary is written.
+
+def count_records(filename, outfile=sys.stdout):
+ bk = Book()
+ bk.biff2_8_load(filename=filename, logfile=outfile, )
+ biff_count_records(bk.mem, bk.base, bk.stream_len, outfile)
+
+##
+# Information relating to a named reference, formula, macro, etc.
+# <br /> -- New in version 0.6.0
+# <br /> -- <i>Name information is <b>not</b> extracted from files older than
+# Excel 5.0 (Book.biff_version < 50)</i>
+
+class Name(BaseObject):
+
+ _repr_these = ['stack']
+ book = None # parent
+
+ ##
+ # 0 = Visible; 1 = Hidden
+ hidden = 0
+
+ ##
+ # 0 = Command macro; 1 = Function macro. Relevant only if macro == 1
+ func = 0
+
+ ##
+ # 0 = Sheet macro; 1 = VisualBasic macro. Relevant only if macro == 1
+ vbasic = 0
+
+ ##
+ # 0 = Standard name; 1 = Macro name
+ macro = 0
+
+ ##
+ # 0 = Simple formula; 1 = Complex formula (array formula or user defined)<br />
+ # <i>No examples have been sighted.</i>
+ complex = 0
+
+ ##
+ # 0 = User-defined name; 1 = Built-in name
+ # (common examples: Print_Area, Print_Titles; see OOo docs for full list)
+ builtin = 0
+
+ ##
+ # Function group. Relevant only if macro == 1; see OOo docs for values.
+ funcgroup = 0
+
+ ##
+ # 0 = Formula definition; 1 = Binary data<br /> <i>No examples have been sighted.</i>
+ binary = 0
+
+ ##
+ # The index of this object in book.name_obj_list
+ name_index = 0
+
+ ##
+ # A Unicode string. If builtin, decoded as per OOo docs.
+ name = u""
+
+ ##
+ # An 8-bit string.
+ raw_formula = ""
+
+ ##
+ # -1: The name is global (visible in all calculation sheets).<br />
+ # -2: The name belongs to a macro sheet or VBA sheet.<br />
+ # -3: The name is invalid.<br />
+ # 0 <= scope < book.nsheets: The name is local to the sheet whose index is scope.
+ scope = -1
+
+ ##
+ # The result of evaluating the formula, if any.
+ # If no formula, or evaluation of the formula encountered problems,
+ # the result is None. Otherwise the result is a single instance of the
+ # Operand class.
+ #
+ result = None
+
+ ##
+ # This is a convenience method for the frequent use case where the name
+ # refers to a single cell.
+ # @return An instance of the Cell class.
+ # @throws XLRDError The name is not a constant absolute reference
+ # to a single cell.
+ def cell(self):
+ res = self.result
+ if res:
+ # result should be an instance of the Operand class
+ kind = res.kind
+ value = res.value
+ if kind == oREF and len(value) == 1:
+ ref3d = value[0]
+ if (0 <= ref3d.shtxlo == ref3d.shtxhi - 1
+ and ref3d.rowxlo == ref3d.rowxhi - 1
+ and ref3d.colxlo == ref3d.colxhi - 1):
+ sh = self.book.sheet_by_index(ref3d.shtxlo)
+ return sh.cell(ref3d.rowxlo, ref3d.colxlo)
+ self.dump(self.book.logfile,
+ header="=== Dump of Name object ===",
+ footer="======= End of dump =======",
+ )
+ raise XLRDError("Not a constant absolute reference to a single cell")
+
+ ##
+ # This is a convenience method for the use case where the name
+ # refers to one rectangular area in one worksheet.
+ # @param clipped If true (the default), the returned rectangle is clipped
+ # to fit in (0, sheet.nrows, 0, sheet.ncols) -- it is guaranteed that
+ # 0 <= rowxlo <= rowxhi <= sheet.nrows and that the number of usable rows
+ # in the area (which may be zero) is rowxhi - rowxlo; likewise for columns.
+ # @return a tuple (sheet_object, rowxlo, rowxhi, colxlo, colxhi).
+ # @throws XLRDError The name is not a constant absolute reference
+ # to a single area in a single sheet.
+ def area2d(self, clipped=True):
+ res = self.result
+ if res:
+ # result should be an instance of the Operand class
+ kind = res.kind
+ value = res.value
+ if kind == oREF and len(value) == 1: # only 1 reference
+ ref3d = value[0]
+ if 0 <= ref3d.shtxlo == ref3d.shtxhi - 1: # only 1 usable sheet
+ sh = self.book.sheet_by_index(ref3d.shtxlo)
+ if not clipped:
+ return sh, ref3d.rowxlo, ref3d.rowxhi, ref3d.colxlo, ref3d.colxhi
+ rowxlo = min(ref3d.rowxlo, sh.nrows)
+ rowxhi = max(rowxlo, min(ref3d.rowxhi, sh.nrows))
+ colxlo = min(ref3d.colxlo, sh.ncols)
+ colxhi = max(colxlo, min(ref3d.colxhi, sh.ncols))
+ assert 0 <= rowxlo <= rowxhi <= sh.nrows
+ assert 0 <= colxlo <= colxhi <= sh.ncols
+ return sh, rowxlo, rowxhi, colxlo, colxhi
+ self.dump(self.book.logfile,
+ header="=== Dump of Name object ===",
+ footer="======= End of dump =======",
+ )
+ raise XLRDError("Not a constant absolute reference to a single area in a single sheet")
+
+##
+# Contents of a "workbook".
+# <p>WARNING: You don't call this class yourself. You use the Book object that
+# was returned when you called xlrd.open_workbook("myfile.xls").</p>
+
+class Book(BaseObject):
+
+ ##
+ # The number of worksheets present in the workbook file.
+ # This information is available even when no sheets have yet been loaded.
+ nsheets = 0
+
+ ##
+ # Which date system was in force when this file was last saved.<br />
+ # 0 => 1900 system (the Excel for Windows default).<br />
+ # 1 => 1904 system (the Excel for Macintosh default).<br />
+ datemode = 0 # In case it's not specified in the file.
+
+ ##
+ # Version of BIFF (Binary Interchange File Format) used to create the file.
+ # Latest is 8.0 (represented here as 80), introduced with Excel 97.
+ # Earliest supported by this module: 2.0 (represented as 20).
+ biff_version = 0
+
+ ##
+ # List containing a Name object for each NAME record in the workbook.
+ # <br /> -- New in version 0.6.0
+ name_obj_list = []
+
+ ##
+ # An integer denoting the character set used for strings in this file.
+ # For BIFF 8 and later, this will be 1200, meaning Unicode; more precisely, UTF_16_LE.
+ # For earlier versions, this is used to derive the appropriate Python encoding
+ # to be used to convert to Unicode.
+ # Examples: 1252 -> 'cp1252', 10000 -> 'mac_roman'
+ codepage = None
+
+ ##
+ # The encoding that was derived from the codepage.
+ encoding = None
+
+ ##
+ # A tuple containing the (telephone system) country code for:<br />
+ # [0]: the user-interface setting when the file was created.<br />
+ # [1]: the regional settings.<br />
+ # Example: (1, 61) meaning (USA, Australia).
+ # This information may give a clue to the correct encoding for an unknown codepage.
+ # For a long list of observed values, refer to the OpenOffice.org documentation for
+ # the COUNTRY record.
+ countries = (0, 0)
+
+ ##
+ # What (if anything) is recorded as the name of the last user to save the file.
+ user_name = u''
+
+ ##
+ # A list of Font class instances, each corresponding to a FONT record.
+ # <br /> -- New in version 0.6.1
+ font_list = []
+
+ ##
+ # A list of XF class instances, each corresponding to an XF record.
+ # <br /> -- New in version 0.6.1
+ xf_list = []
+
+ ##
+ # A list of Format objects, each corresponding to a FORMAT record, in
+ # the order that they appear in the input file.
+ # It does <i>not</i> contain builtin formats.
+ # If you are creating an output file using (for example) pyExcelerator,
+ # use this list.
+ # The collection to be used for all visual rendering purposes is format_map.
+ # <br /> -- New in version 0.6.1
+ format_list = []
+
+ ##
+ # The mapping from XF.format_key to Format object.
+ # <br /> -- New in version 0.6.1
+ format_map = {}
+
+ ##
+ # This provides access via name to the extended format information for
+ # both built-in styles and user-defined styles.<br />
+ # It maps <i>name</i> to (<i>built_in</i>, <i>xf_index</i>), where:<br />
+ # <i>name</i> is either the name of a user-defined style,
+ # or the name of one of the built-in styles. Known built-in names are
+ # Normal, RowLevel_1 to RowLevel_7,
+ # ColLevel_1 to ColLevel_7, Comma, Currency, Percent, "Comma [0]",
+ # "Currency [0]", Hyperlink, and "Followed Hyperlink".<br />
+ # <i>built_in</i> 1 = built-in style, 0 = user-defined<br />
+ # <i>xf_index</i> is an index into Book.xf_list.<br />
+ # References: OOo docs s6.99 (STYLE record); Excel UI Format/Style
+ # <br /> -- New in version 0.6.1
+ style_name_map = {}
+
+ ##
+ # This provides definitions for colour indexes. Please refer to the
+ # above section "The Palette; Colour Indexes" for an explanation
+ # of how colours are represented in Excel.<br />
+ # Colour indexes into the palette map into (red, green, blue) tuples.
+ # "Magic" indexes e.g. 0x7FFF map to None.
+ # <i>colour_map</i> is what you need if you want to render cells on screen or in a PDF
+ # file. If you are writing an output XLS file, use <i>palette_record</i>.
+ # <br /> -- New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True)
+ colour_map = {}
+
+ ##
+ # If the user has changed any of the colours in the standard palette, the XLS
+ # file will contain a PALETTE record with 56 (16 for Excel 4.0 and earlier)
+ # RGB values in it, and this list will be e.g. [(r0, b0, g0), ..., (r55, b55, g55)].
+ # Otherwise this list will be empty. This is what you need if you are
+ # writing an output XLS file. If you want to render cells on screen or in a PDF
+ # file, use colour_map.
+ # <br /> -- New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True)
+ palette_record = []
+
+ ##
+ # Time in seconds to extract the XLS image as a contiguous string (or mmap equivalent).
+ load_time_stage_1 = -1.0
+
+ ##
+ # Time in seconds to parse the data from the contiguous string (or mmap equivalent).
+ load_time_stage_2 = -1.0
+
+ ##
+ # @return A list of all sheets in the book.
+ # All sheets not already loaded will be loaded.
+ def sheets(self):
+ for sheetx in xrange(self.nsheets):
+ if not self._sheet_list[sheetx]:
+ self.get_sheet(sheetx)
+ return self._sheet_list[:]
+
+ ##
+ # @param sheetx Sheet index in range(nsheets)
+ # @return An object of the Sheet class
+ def sheet_by_index(self, sheetx):
+ return self._sheet_list[sheetx] or self.get_sheet(sheetx)
+
+ ##
+ # @param sheet_name Name of sheet required
+ # @return An object of the Sheet class
+ def sheet_by_name(self, sheet_name):
+ try:
+ sheetx = self._sheet_names.index(sheet_name)
+ except ValueError:
+ raise XLRDError('No sheet named <%r>' % sheet_name)
+ return self.sheet_by_index(sheetx)
+
+ ##
+ # @return A list of the names of all the worksheets in the workbook file.
+ # This information is available even when no sheets have yet been loaded.
+ def sheet_names(self):
+ return self._sheet_names[:]
+
+ ##
+ # @param sheet_name_or_index Name or index of sheet enquired upon
+ # @return true if sheet is loaded, false otherwise
+ # <br /> -- New in version 0.7.1
+ def sheet_loaded(self, sheet_name_or_index):
+ # using type(1) because int won't work with Python 2.1
+ if isinstance(sheet_name_or_index, type(1)):
+ sheetx = sheet_name_or_index
+ else:
+ try:
+ sheetx = self._sheet_names.index(sheet_name_or_index)
+ except ValueError:
+ raise XLRDError('No sheet named <%r>' % sheet_name_or_index)
+ return self._sheet_list[sheetx] and True or False # Python 2.1 again
+
+ ##
+ # @param sheet_name_or_index Name or index of sheet to be unloaded.
+ # <br /> -- New in version 0.7.1
+ def unload_sheet(self, sheet_name_or_index):
+ # using type(1) because int won't work with Python 2.1
+ if isinstance(sheet_name_or_index, type(1)):
+ sheetx = sheet_name_or_index
+ else:
+ try:
+ sheetx = self._sheet_names.index(sheet_name_or_index)
+ except ValueError:
+ raise XLRDError('No sheet named <%r>' % sheet_name_or_index)
+ self._sheet_list[sheetx] = None
+
+ ##
+ # A mapping from (lower_case_name, scope) to a single Name object.
+ # <br /> -- New in version 0.6.0
+ name_and_scope_map = {}
+
+ ##
+ # A mapping from lower_case_name to a list of Name objects. The list is
+ # sorted in scope order. Typically there will be one item (of global scope)
+ # in the list.
+ # <br /> -- New in version 0.6.0
+ name_map = {}
+
+ def __init__(self):
+ self._sheet_list = []
+ self._sheet_names = []
+ self._sheet_visibility = [] # from BOUNDSHEET record
+ self.nsheets = 0
+ self._sh_abs_posn = [] # sheet's absolute position in the stream
+ self._sharedstrings = []
+ self.raw_user_name = False
+ self._sheethdr_count = 0 # BIFF 4W only
+ self.builtinfmtcount = -1 # unknown as yet. BIFF 3, 4S, 4W
+ self.initialise_format_info()
+ self._all_sheets_count = 0 # includes macro & VBA sheets
+ self._supbook_count = 0
+ self._supbook_locals_inx = None
+ self._supbook_addins_inx = None
+ self._all_sheets_map = [] # maps an all_sheets index to a calc-sheets index (or -1)
+ self._externsheet_info = []
+ self._externsheet_type_b57 = []
+ self._extnsht_name_from_num = {}
+ self._sheet_num_from_name = {}
+ self._extnsht_count = 0
+ self._supbook_types = []
+ self._resources_released = 0
+ self.addin_func_names = []
+ self.name_obj_list = []
+ self.colour_map = {}
+ self.palette_record = []
+ self.xf_list = []
+ self.style_name_map = {}
+
+ def biff2_8_load(self, filename=None, file_contents=None,
+ logfile=sys.stdout, verbosity=0, pickleable=True, use_mmap=USE_MMAP,
+ encoding_override=None,
+ formatting_info=False,
+ on_demand=False,
+ ):
+ # DEBUG = 0
+ self.logfile = logfile
+ self.verbosity = verbosity
+ self.pickleable = pickleable
+ self.use_mmap = use_mmap and MMAP_AVAILABLE
+ self.encoding_override = encoding_override
+ self.formatting_info = formatting_info
+ self.on_demand = on_demand
+
+ need_close_filestr = 0
+ if not file_contents:
+ if python_version < (2, 2) and self.use_mmap:
+ # need to open for update
+ open_mode = "r+b"
+ else:
+ open_mode = "rb"
+ retry = False
+ try:
+ f = open(filename, open_mode)
+ except IOError:
+ e, v = sys.exc_info()[:2]
+ if open_mode == "r+b" \
+ and (v.errno == 13 or v.strerror == "Permission denied"):
+ # Maybe the file is read-only
+ retry = True
+ self.use_mmap = False
+ else:
+ raise
+ if retry:
+ f = open(filename, "rb")
+ if self.use_mmap:
+ f.seek(0, 2) # EOF
+ size = f.tell()
+ f.seek(0, 0) # BOF
+ if python_version < (2, 2):
+ filestr = mmap.mmap(f.fileno(), size)
+ else:
+ filestr = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
+ need_close_filestr = 1
+ self.stream_len = size
+ else:
+ filestr = f.read()
+ self.stream_len = len(filestr)
+ f.close()
+ else:
+ filestr = file_contents
+ self.stream_len = len(file_contents)
+
+ self.base = 0
+ if filestr[:8] != compdoc.SIGNATURE:
+ # got this one at the antique store
+ self.mem = filestr
+ else:
+ cd = compdoc.CompDoc(filestr, logfile=self.logfile)
+ if USE_FANCY_CD:
+ for qname in [u'Workbook', u'Book']:
+ self.mem, self.base, self.stream_len = cd.locate_named_stream(qname)
+ if self.mem: break
+ else:
+ raise XLRDError("Can't find workbook in OLE2 compound document")
+ else:
+ for qname in [u'Workbook', u'Book']:
+ self.mem = cd.get_named_stream(qname)
+ if self.mem: break
+ else:
+ raise XLRDError("Can't find workbook in OLE2 compound document")
+ self.stream_len = len(self.mem)
+ del cd
+ if self.mem is not filestr:
+ if need_close_filestr:
+ filestr.close()
+ del filestr
+ self._position = self.base
+ if DEBUG:
+ print >> self.logfile, "mem: %s, base: %d, len: %d" % (type(self.mem), self.base, self.stream_len)
+
+ def initialise_format_info(self):
+ # needs to be done once per sheet for BIFF 4W :-(
+ self.format_map = {}
+ self.format_list = []
+ self.xfcount = 0
+ self.actualfmtcount = 0 # number of FORMAT records seen so far
+ self._xf_index_to_xl_type_map = {}
+ self._xf_epilogue_done = 0
+ self.xf_list = []
+ self.font_list = []
+
+ def release_resources(self):
+ self._resources_released = 1
+ del self.mem
+ del self._sharedstrings
+
+ def get2bytes(self):
+ pos = self._position
+ buff_two = self.mem[pos:pos+2]
+ lenbuff = len(buff_two)
+ self._position += lenbuff
+ if lenbuff < 2:
+ return MY_EOF
+ lo, hi = buff_two
+ return (ord(hi) << 8) | ord(lo)
+
+ def get_record_parts(self):
+ pos = self._position
+ mem = self.mem
+ code, length = unpack('<HH', mem[pos:pos+4])
+ pos += 4
+ data = mem[pos:pos+length]
+ self._position = pos + length
+ return (code, length, data)
+
+ def get_record_parts_conditional(self, reqd_record):
+ pos = self._position
+ mem = self.mem
+ code, length = unpack('<HH', mem[pos:pos+4])
+ if code != reqd_record:
+ return (None, 0, '')
+ pos += 4
+ data = mem[pos:pos+length]
+ self._position = pos + length
+ return (code, length, data)
+
+ def get_sheet(self, sh_number, update_pos=True):
+ if self._resources_released:
+ raise XLRDError("Can't load sheets after releasing resources.")
+ if update_pos:
+ self._position = self._sh_abs_posn[sh_number]
+ _unused_biff_version = self.getbof(XL_WORKSHEET)
+ # assert biff_version == self.biff_version ### FAILS
+ # Have an example where book is v7 but sheet reports v8!!!
+ # It appears to work OK if the sheet version is ignored.
+ # Confirmed by Daniel Rentz: happens when Excel does "save as"
+ # creating an old version file; ignore version details on sheet BOF.
+ sh = sheet.Sheet(self,
+ self._position,
+ self._sheet_names[sh_number],
+ sh_number,
+ )
+ sh.read(self)
+ self._sheet_list[sh_number] = sh
+ return sh
+
+ def get_sheets(self):
+ # DEBUG = 0
+ if DEBUG: print >> self.logfile, "GET_SHEETS:", self._sheet_names, self._sh_abs_posn
+ for sheetno in xrange(len(self._sheet_names)):
+ if DEBUG: print >> self.logfile, "GET_SHEETS: sheetno =", sheetno, self._sheet_names, self._sh_abs_posn
+ self.get_sheet(sheetno)
+
+ def fake_globals_get_sheet(self): # for BIFF 4.0 and earlier
+ formatting.initialise_book(self)
+ fake_sheet_name = u'Sheet 1'
+ self._sheet_names = [fake_sheet_name]
+ self._sh_abs_posn = [0]
+ self._sheet_visibility = [0] # one sheet, visible
+ self._sheet_list.append(None) # get_sheet updates _sheet_list but needs a None beforehand
+ self.get_sheets()
+
+ def handle_boundsheet(self, data):
+ # DEBUG = 1
+ bv = self.biff_version
+ self.derive_encoding()
+ if DEBUG:
+ fprintf(self.logfile, "BOUNDSHEET: bv=%d data %r\n", bv, data);
+ if bv == 45: # BIFF4W
+ #### Not documented in OOo docs ...
+ # In fact, the *only* data is the name of the sheet.
+ sheet_name = unpack_string(data, 0, self.encoding, lenlen=1)
+ visibility = 0
+ sheet_type = XL_BOUNDSHEET_WORKSHEET # guess, patch later
+ if len(self._sh_abs_posn) == 0:
+ abs_posn = self._sheetsoffset + self.base
+ # Note (a) this won't be used
+ # (b) it's the position of the SHEETHDR record
+ # (c) add 11 to get to the worksheet BOF record
+ else:
+ abs_posn = -1 # unknown
+ else:
+ offset, visibility, sheet_type = unpack('<iBB', data[0:6])
+ abs_posn = offset + self.base # because global BOF is always at posn 0 in the stream
+ if bv < BIFF_FIRST_UNICODE:
+ sheet_name = unpack_string(data, 6, self.encoding, lenlen=1)
+ else:
+ sheet_name = unpack_unicode(data, 6, lenlen=1)
+
+ if DEBUG or self.verbosity >= 2:
+ fprintf(self.logfile,
+ "BOUNDSHEET: inx=%d vis=%r sheet_name=%r abs_posn=%d sheet_type=0x%02x\n",
+ self._all_sheets_count, visibility, sheet_name, abs_posn, sheet_type)
+ self._all_sheets_count += 1
+ if sheet_type != XL_BOUNDSHEET_WORKSHEET:
+ self._all_sheets_map.append(-1)
+ descr = {
+ 1: 'Macro sheet',
+ 2: 'Chart',
+ 6: 'Visual Basic module',
+ }.get(sheet_type, 'UNKNOWN')
+
+ fprintf(self.logfile,
+ "NOTE *** Ignoring non-worksheet data named %r (type 0x%02x = %s)\n",
+ sheet_name, sheet_type, descr)
+ else:
+ snum = len(self._sheet_names)
+ self._all_sheets_map.append(snum)
+ self._sheet_names.append(sheet_name)
+ self._sh_abs_posn.append(abs_posn)
+ self._sheet_visibility.append(visibility)
+ self._sheet_num_from_name[sheet_name] = snum
+
+ def handle_builtinfmtcount(self, data):
+ ### N.B. This count appears to be utterly useless.
+ # DEBUG = 1
+ builtinfmtcount = unpack('<H', data[0:2])[0]
+ if DEBUG: fprintf(self.logfile, "BUILTINFMTCOUNT: %r\n", builtinfmtcount)
+ self.builtinfmtcount = builtinfmtcount
+
+ def derive_encoding(self):
+ if self.encoding_override:
+ self.encoding = self.encoding_override
+ elif self.codepage is None:
+ if self.biff_version < 80:
+ fprintf(self.logfile,
+ "*** No CODEPAGE record, no encoding_override: will use 'ascii'\n")
+ self.encoding = 'ascii'
+ else:
+ self.codepage = 1200 # utf16le
+ if self.verbosity >= 2:
+ fprintf(self.logfile, "*** No CODEPAGE record; assuming 1200 (utf_16_le)\n")
+ else:
+ codepage = self.codepage
+ if encoding_from_codepage.has_key(codepage):
+ encoding = encoding_from_codepage[codepage]
+ elif 300 <= codepage <= 1999:
+ encoding = 'cp' + str(codepage)
+ else:
+ encoding = 'unknown_codepage_' + str(codepage)
+ if DEBUG or (self.verbosity and encoding != self.encoding) :
+ fprintf(self.logfile, "CODEPAGE: codepage %r -> encoding %r\n", codepage, encoding)
+ self.encoding = encoding
+ if self.codepage != 1200: # utf_16_le
+ # If we don't have a codec that can decode ASCII into Unicode,
+ # we're well & truly stuffed -- let the punter know ASAP.
+ try:
+ _unused = unicode('trial', self.encoding)
+ except:
+ ei = sys.exc_info()[:2]
+ fprintf(self.logfile,
+ "ERROR *** codepage %r -> encoding %r -> %s: %s\n",
+ self.codepage, self.encoding, ei[0].__name__.split(".")[-1], ei[1])
+ raise
+ if self.raw_user_name:
+ strg = unpack_string(self.user_name, 0, self.encoding, lenlen=1)
+ strg = strg.rstrip()
+ # if DEBUG:
+ # print "CODEPAGE: user name decoded from %r to %r" % (self.user_name, strg)
+ self.user_name = strg
+ self.raw_user_name = False
+ return self.encoding
+
+ def handle_codepage(self, data):
+ # DEBUG = 0
+ codepage = unpack('<H', data[0:2])[0]
+ self.codepage = codepage
+ self.derive_encoding()
+
+ def handle_country(self, data):
+ countries = unpack('<HH', data[0:4])
+ if self.verbosity: print >> self.logfile, "Countries:", countries
+ # Note: in BIFF7 and earlier, country record was put (redundantly?) in each worksheet.
+ assert self.countries == (0, 0) or self.countries == countries
+ self.countries = countries
+
+ def handle_datemode(self, data):
+ datemode = unpack('<H', data[0:2])[0]
+ if DEBUG or self.verbosity:
+ fprintf(self.logfile, "DATEMODE: datemode %r\n", datemode)
+ assert datemode in (0, 1)
+ self.datemode = datemode
+
+ def handle_externname(self, data):
+ blah = DEBUG or self.verbosity >= 2
+ if self.biff_version >= 80:
+ option_flags, other_info =unpack("<HI", data[:6])
+ pos = 6
+ name, pos = unpack_unicode_update_pos(data, pos, lenlen=1)
+ extra = data[pos:]
+ if self._supbook_types[-1] == SUPBOOK_ADDIN:
+ self.addin_func_names.append(name)
+ if blah:
+ fprintf(self.logfile,
+ "EXTERNNAME: sbktype=%d oflags=0x%04x oinfo=0x%08x name=%r extra=%r\n",
+ self._supbook_types[-1], option_flags, other_info, name, extra)
+
+ def handle_externsheet(self, data):
+ self.derive_encoding() # in case CODEPAGE record missing/out of order/wrong
+ self._extnsht_count += 1 # for use as a 1-based index
+ blah1 = DEBUG or self.verbosity >= 1
+ blah2 = DEBUG or self.verbosity >= 2
+ if self.biff_version >= 80:
+ num_refs = unpack("<H", data[0:2])[0]
+ bytes_reqd = num_refs * 6 + 2
+ while len(data) < bytes_reqd:
+ if blah1:
+ fprintf(
+ self.logfile,
+ "INFO: EXTERNSHEET needs %d bytes, have %d\n",
+ bytes_reqd, len(data),
+ )
+ code2, length2, data2 = self.get_record_parts()
+ if code2 != XL_CONTINUE:
+ raise XLRDError("Missing CONTINUE after EXTERNSHEET record")
+ data += data2
+ pos = 2
+ for k in xrange(num_refs):
+ info = unpack("<HHH", data[pos:pos+6])
+ ref_recordx, ref_first_sheetx, ref_last_sheetx = info
+ self._externsheet_info.append(info)
+ pos += 6
+ if blah2:
+ fprintf(
+ self.logfile,
+ "EXTERNSHEET(b8): k = %2d, record = %2d, first_sheet = %5d, last sheet = %5d\n",
+ k, ref_recordx, ref_first_sheetx, ref_last_sheetx,
+ )
+ else:
+ nc, ty = unpack("<BB", data[:2])
+ if blah2:
+ print "EXTERNSHEET(b7-):"
+ hex_char_dump(data, 0, len(data))
+ msg = {
+ 1: "Encoded URL",
+ 2: "Current sheet!!",
+ 3: "Specific sheet in own doc't",
+ 4: "Nonspecific sheet in own doc't!!",
+ }.get(ty, "Not encoded")
+ print " %3d chars, type is %d (%s)" % (nc, ty, msg)
+ if ty == 3:
+ sheet_name = unicode(data[2:nc+2], self.encoding)
+ self._extnsht_name_from_num[self._extnsht_count] = sheet_name
+ if blah2: print self._extnsht_name_from_num
+ if not (1 <= ty <= 4):
+ ty = 0
+ self._externsheet_type_b57.append(ty)
+
+ def handle_filepass(self, data):
+ if self.verbosity >= 2:
+ logf = self.logfile
+ fprintf(logf, "FILEPASS:\n")
+ hex_char_dump(data, 0, len(data), base=0, fout=logf)
+ if self.biff_version >= 80:
+ kind1, = unpack('<H', data[:2])
+ if kind1 == 0: # weak XOR encryption
+ key, hash_value = unpack('<HH', data[2:])
+ fprintf(logf,
+ 'weak XOR: key=0x%04x hash=0x%04x\n',
+ key, hash_value)
+ elif kind1 == 1:
+ kind2, = unpack('<H', data[4:6])
+ if kind2 == 1: # BIFF8 standard encryption
+ caption = "BIFF8 std"
+ elif kind2 == 2:
+ caption = "BIFF8 strong"
+ else:
+ caption = "** UNKNOWN ENCRYPTION METHOD **"
+ fprintf(logf, "%s\n", caption)
+ raise XLRDError("Workbook is encrypted")
+
+ def handle_name(self, data):
+ blah = DEBUG or self.verbosity >= 2
+ bv = self.biff_version
+ if bv < 50:
+ return
+ self.derive_encoding()
+ # print
+ # hex_char_dump(data, 0, len(data))
+ (
+ option_flags, kb_shortcut, name_len, fmla_len, extsht_index, sheet_index,
+ menu_text_len, description_text_len, help_topic_text_len, status_bar_text_len,
+ ) = unpack("<HBBHHH4B", data[0:14])
+ nobj = Name()
+ nobj.book = self ### CIRCULAR ###
+ name_index = len(self.name_obj_list)
+ nobj.name_index = name_index
+ self.name_obj_list.append(nobj)
+ nobj.option_flags = option_flags
+ for attr, mask, nshift in (
+ ('hidden', 1, 0),
+ ('func', 2, 1),
+ ('vbasic', 4, 2),
+ ('macro', 8, 3),
+ ('complex', 0x10, 4),
+ ('builtin', 0x20, 5),
+ ('funcgroup', 0xFC0, 6),
+ ('binary', 0x1000, 12),
+ ):
+ setattr(nobj, attr, (option_flags & mask) >> nshift)
+
+ macro_flag = " M"[nobj.macro]
+ if bv < 80:
+ internal_name, pos = unpack_string_update_pos(data, 14, self.encoding, known_len=name_len)
+ else:
+ internal_name, pos = unpack_unicode_update_pos(data, 14, known_len=name_len)
+ nobj.extn_sheet_num = extsht_index
+ nobj.excel_sheet_index = sheet_index
+ nobj.scope = None # patched up in the names_epilogue() method
+ if blah:
+ print "NAME[%d]:%s oflags=%d, name_len=%d, fmla_len=%d, extsht_index=%d, sheet_index=%d, name=%r" \
+ % (name_index, macro_flag, option_flags, name_len,
+ fmla_len, extsht_index, sheet_index, internal_name)
+ name = internal_name
+ if nobj.builtin:
+ name = builtin_name_from_code.get(name, "??Unknown??")
+ if blah: print " builtin: %s" % name
+ nobj.name = name
+ nobj.raw_formula = data[pos:]
+ nobj.basic_formula_len = fmla_len
+ nobj.evaluated = 0
+ if blah:
+ nobj.dump(
+ self.logfile,
+ header="--- handle_name: name[%d] ---" % name_index,
+ footer="-------------------",
+ )
+
+ def names_epilogue(self):
+ blah = self.verbosity >= 2
+ f = self.logfile
+ if blah:
+ print >> f, "+++++ names_epilogue +++++"
+ print >> f, "_all_sheets_map", self._all_sheets_map
+ print >> f, "_extnsht_name_from_num", self._extnsht_name_from_num
+ print >> f, "_sheet_num_from_name", self._sheet_num_from_name
+ num_names = len(self.name_obj_list)
+ for namex in range(num_names):
+ nobj = self.name_obj_list[namex]
+ # Convert from excel_sheet_index to scope.
+ # This is done here because in BIFF7 and earlier, the
+ # BOUNDSHEET records (from which _all_sheets_map is derived)
+ # come after the NAME records.
+ if self.biff_version >= 80:
+ sheet_index = nobj.excel_sheet_index
+ if sheet_index == 0:
+ intl_sheet_index = -1 # global
+ elif 1 <= sheet_index <= len(self._all_sheets_map):
+ intl_sheet_index = self._all_sheets_map[sheet_index-1]
+ if intl_sheet_index == -1: # maps to a macro or VBA sheet
+ intl_sheet_index = -2 # valid sheet reference but not useful
+ else:
+ # huh?
+ intl_sheet_index = -3 # invalid
+ elif 50 <= self.biff_version <= 70:
+ sheet_index = nobj.extn_sheet_num
+ if sheet_index == 0:
+ intl_sheet_index = -1 # global
+ else:
+ sheet_name = self._extnsht_name_from_num[sheet_index]
+ intl_sheet_index = self._sheet_num_from_name.get(sheet_name, -2)
+ nobj.scope = intl_sheet_index
+
+ for namex in range(num_names):
+ nobj = self.name_obj_list[namex]
+ # Parse the formula ...
+ if nobj.macro or nobj.binary: continue
+ if nobj.evaluated: continue
+ evaluate_name_formula(self, nobj, namex, blah=blah)
+
+ if self.verbosity >= 2:
+ print >> f, "---------- name object dump ----------"
+ for namex in range(num_names):
+ nobj = self.name_obj_list[namex]
+ nobj.dump(f, header="--- name[%d] ---" % namex)
+ print >> f, "--------------------------------------"
+ #
+ # Build some dicts for access to the name objects
+ #
+ name_and_scope_map = {} # (name.lower(), scope): Name_object
+ name_map = {} # name.lower() : list of Name_objects (sorted in scope order)
+ for namex in range(num_names):
+ nobj = self.name_obj_list[namex]
+ name_lcase = nobj.name.lower()
+ key = (name_lcase, nobj.scope)
+ if name_and_scope_map.has_key(key):
+ msg = 'Duplicate entry %r in name_and_scope_map' % (key, )
+ if 0:
+ raise XLRDError(msg)
+ else:
+ if self.verbosity:
+ print >> f, msg
+ name_and_scope_map[key] = nobj
+ if name_map.has_key(name_lcase):
+ name_map[name_lcase].append((nobj.scope, nobj))
+ else:
+ name_map[name_lcase] = [(nobj.scope, nobj)]
+ for key in name_map.keys():
+ alist = name_map[key]
+ alist.sort()
+ name_map[key] = [x[1] for x in alist]
+ self.name_and_scope_map = name_and_scope_map
+ self.name_map = name_map
+
+ def handle_obj(self, data):
+ # Not doing much handling at all.
+ # Worrying about embedded (BOF ... EOF) substreams is done elsewhere.
+ # DEBUG = 1
+ obj_type, obj_id = unpack('<HI', data[4:10])
+ # if DEBUG: print "---> handle_obj type=%d id=0x%08x" % (obj_type, obj_id)
+
+ def handle_supbook(self, data):
+ self._supbook_types.append(None)
+ blah = DEBUG or self.verbosity >= 2
+ if 0:
+ print "SUPBOOK:"
+ hex_char_dump(data, 0, len(data))
+ num_sheets = unpack("<H", data[0:2])[0]
+ sbn = self._supbook_count
+ self._supbook_count += 1
+ if data[2:4] == "\x01\x04":
+ self._supbook_types[-1] = SUPBOOK_INTERNAL
+ self._supbook_locals_inx = self._supbook_count - 1
+ if blah:
+ print "SUPBOOK[%d]: internal 3D refs; %d sheets" % (sbn, num_sheets)
+ print " _all_sheets_map", self._all_sheets_map
+ return
+ if data[0:4] == "\x01\x00\x01\x3A":
+ self._supbook_types[-1] = SUPBOOK_ADDIN
+ self._supbook_addins_inx = self._supbook_count - 1
+ if blah: print "SUPBOOK[%d]: add-in functions" % sbn
+ return
+ url, pos = unpack_unicode_update_pos(data, 2, lenlen=2)
+ if num_sheets == 0:
+ self._supbook_types[-1] = SUPBOOK_DDEOLE
+ if blah: print "SUPBOOK[%d]: DDE/OLE document = %r" % (sbn, url)
+ return
+ self._supbook_types[-1] = SUPBOOK_EXTERNAL
+ if blah: print "SUPBOOK[%d]: url = %r" % (sbn, url)
+ sheet_names = []
+ for x in range(num_sheets):
+ shname, pos = unpack_unicode_update_pos(data, pos, lenlen=2)
+ sheet_names.append(shname)
+ if blah: print " sheet %d: %r" % (x, shname)
+
+ def handle_sheethdr(self, data):
+ # This a BIFF 4W special.
+ # The SHEETHDR record is followed by a (BOF ... EOF) substream containing
+ # a worksheet.
+ # DEBUG = 1
+ self.derive_encoding()
+ sheet_len = unpack('<i', data[:4])[0]
+ sheet_name = unpack_string(data, 4, self.encoding, lenlen=1)
+ sheetno = self._sheethdr_count
+ assert sheet_name == self._sheet_names[sheetno]
+ self._sheethdr_count += 1
+ BOF_posn = self._position
+ posn = BOF_posn - 4 - len(data)
+ if DEBUG: print >> self.logfile, 'SHEETHDR %d at posn %d: len=%d name=%r' % (sheetno, posn, sheet_len, sheet_name)
+ self.initialise_format_info()
+ if DEBUG: print >> self.logfile, 'SHEETHDR: xf epilogue flag is %d' % self._xf_epilogue_done
+ self._sheet_list.append(None) # get_sheet updates _sheet_list but needs a None beforehand
+ self.get_sheet(sheetno, update_pos=False)
+ if DEBUG: print >> self.logfile, 'SHEETHDR: posn after get_sheet() =', self._position
+ self._position = BOF_posn + sheet_len
+
+ def handle_sheetsoffset(self, data):
+ # DEBUG = 0
+ posn = unpack('<i', data)[0]
+ if DEBUG: print >> self.logfile, 'SHEETSOFFSET:', posn
+ self._sheetsoffset = posn
+
+ def handle_sst(self, data):
+ # DEBUG = 1
+ if DEBUG:
+ print >> self.logfile, "SST Processing"
+ t0 = time.time()
+ nbt = len(data)
+ strlist = [data]
+ uniquestrings = unpack('<i', data[4:8])[0]
+ if DEBUG or self.verbosity >= 2:
+ fprintf(self.logfile, "SST: unique strings: %d\n", uniquestrings)
+ while 1:
+ code, nb, data = self.get_record_parts_conditional(XL_CONTINUE)
+ if code is None:
+ break
+ nbt += nb
+ if DEBUG >= 2:
+ fprintf(self.logfile, "CONTINUE: adding %d bytes to SST -> %d\n", nb, nbt)
+ strlist.append(data)
+ self._sharedstrings = unpack_SST_table(strlist, uniquestrings)
+ if DEBUG:
+ t1 = time.time()
+ print >> self.logfile, "SST processing took %.2f seconds" % (t1 - t0, )
+
+ def handle_writeaccess(self, data):
+ # DEBUG = 0
+ if self.biff_version < 80:
+ if not self.encoding:
+ self.raw_user_name = True
+ self.user_name = data
+ return
+ strg = unpack_string(data, 0, self.encoding, lenlen=1)
+ else:
+ strg = unpack_unicode(data, 0, lenlen=2)
+ if DEBUG: print >> self.logfile, "WRITEACCESS: %d bytes; raw=%d %r" % (len(data), self.raw_user_name, strg)
+ strg = strg.rstrip()
+ self.user_name = strg
+
+ def parse_globals(self):
+ # DEBUG = 0
+ # no need to position, just start reading (after the BOF)
+ formatting.initialise_book(self)
+ while 1:
+ rc, length, data = self.get_record_parts()
+ if DEBUG: print "parse_globals: record code is 0x%04x" % rc
+ if rc == XL_SST:
+ self.handle_sst(data)
+ elif rc == XL_FONT or rc == XL_FONT_B3B4:
+ self.handle_font(data)
+ elif rc == XL_FORMAT: # XL_FORMAT2 is BIFF <= 3.0, can't appear in globals
+ self.handle_format(data)
+ elif rc == XL_XF:
+ self.handle_xf(data)
+ elif rc == XL_BOUNDSHEET:
+ self.handle_boundsheet(data)
+ elif rc == XL_DATEMODE:
+ self.handle_datemode(data)
+ elif rc == XL_CODEPAGE:
+ self.handle_codepage(data)
+ elif rc == XL_COUNTRY:
+ self.handle_country(data)
+ elif rc == XL_EXTERNNAME:
+ self.handle_externname(data)
+ elif rc == XL_EXTERNSHEET:
+ self.handle_externsheet(data)
+ elif rc == XL_FILEPASS:
+ self.handle_filepass(data)
+ elif rc == XL_WRITEACCESS:
+ self.handle_writeaccess(data)
+ elif rc == XL_SHEETSOFFSET:
+ self.handle_sheetsoffset(data)
+ elif rc == XL_SHEETHDR:
+ self.handle_sheethdr(data)
+ elif rc == XL_SUPBOOK:
+ self.handle_supbook(data)
+ elif rc == XL_NAME:
+ self.handle_name(data)
+ elif rc == XL_PALETTE:
+ self.handle_palette(data)
+ elif rc == XL_STYLE:
+ self.handle_style(data)
+ elif rc & 0xff == 9:
+ print >> self.logfile, "*** Unexpected BOF at posn %d: 0x%04x len=%d data=%r" \
+ % (self._position - length - 4, rc, length, data)
+ elif rc == XL_EOF:
+ self.xf_epilogue()
+ self.names_epilogue()
+ self.palette_epilogue()
+ if not self.encoding:
+ self.derive_encoding()
+ if self.biff_version == 45:
+ # DEBUG = 0
+ if DEBUG: print "global EOF: position", self._position
+ # if DEBUG:
+ # pos = self._position - 4
+ # print repr(self.mem[pos:pos+40])
+ return
+ else:
+ # if DEBUG:
+ # print "parse_globals: ignoring record code 0x%04x" % rc
+ pass
+
+ def read(self, pos, length):
+ data = self.mem[pos:pos+length]
+ self._position = pos + len(data)
+ return data
+
+ def getbof(self, rqd_stream):
+ # DEBUG = 1
+ # if DEBUG: print >> self.logfile, "getbof(): position", self._position
+ if DEBUG: print >> self.logfile, "reqd: 0x%04x" % rqd_stream
+ def bof_error(msg):
+ raise XLRDError('Unsupported format, or corrupt file: ' + msg)
+ savpos = self._position
+ opcode = self.get2bytes()
+ if opcode == MY_EOF:
+ bof_error('Expected BOF record; met end of file')
+ if opcode not in bofcodes:
+ bof_error('Expected BOF record; found %r' % self.mem[savpos:savpos+8])
+ length = self.get2bytes()
+ if length == MY_EOF:
+ bof_error('Incomplete BOF record[1]; met end of file')
+ if length < boflen[opcode] or length > 20:
+ bof_error(
+ 'Invalid length (%d) for BOF record type 0x%04x'
+ % (length, opcode))
+ data = self.read(self._position, length);
+ if DEBUG: print >> self.logfile, "\ngetbof(): data=%r" % data
+ if len(data) < length:
+ bof_error('Incomplete BOF record[2]; met end of file')
+ version1 = opcode >> 8
+ version2, streamtype = unpack('<HH', data[0:4])
+ if DEBUG:
+ print >> self.logfile, "getbof(): op=0x%04x version2=0x%04x streamtype=0x%04x" \
+ % (opcode, version2, streamtype)
+ bof_offset = self._position - 4 - length
+ if DEBUG:
+ print >> self.logfile, "getbof(): BOF found at offset %d; savpos=%d" \
+ % (bof_offset, savpos)
+ version = build = year = 0
+ if version1 == 0x08:
+ build, year = unpack('<HH', data[4:8])
+ if version2 == 0x0600:
+ version = 80
+ elif version2 == 0x0500:
+ if year < 1994 or build in (2412, 3218, 3321):
+ version = 50
+ else:
+ version = 70
+ else:
+ # dodgy one, created by a 3rd-party tool
+ version = {
+ 0x0000: 21,
+ 0x0007: 21,
+ 0x0200: 21,
+ 0x0300: 30,
+ 0x0400: 40,
+ }.get(version2, 0)
+ elif version1 in (0x04, 0x02, 0x00):
+ version = {0x04: 40, 0x02: 30, 0x00: 21}[version1]
+
+ if version == 40 and streamtype == XL_WORKBOOK_GLOBALS_4W:
+ version = 45 # i.e. 4W
+
+ if DEBUG or self.verbosity >= 2:
+ print >> self.logfile, \
+ "BOF: op=0x%04x vers=0x%04x stream=0x%04x buildid=%d buildyr=%d -> BIFF%d" \
+ % (opcode, version2, streamtype, build, year, version)
+ got_globals = streamtype == XL_WORKBOOK_GLOBALS or (
+ version == 45 and streamtype == XL_WORKBOOK_GLOBALS_4W)
+ if (rqd_stream == XL_WORKBOOK_GLOBALS and got_globals) or streamtype == rqd_stream:
+ return version
+ if version < 50 and streamtype == XL_WORKSHEET:
+ return version
+ if version >= 50 and streamtype == 0x0100:
+ bof_error("Workspace file -- no spreadsheet data")
+ bof_error(
+ 'BOF not workbook/worksheet: op=0x%04x vers=0x%04x strm=0x%04x build=%d year=%d -> BIFF%d' \
+ % (opcode, version2, streamtype, build, year, version)
+ )
+
+# === helper functions
+
+def expand_cell_address(inrow, incol):
+ # Ref : OOo docs, "4.3.4 Cell Addresses in BIFF8"
+ outrow = inrow
+ if incol & 0x8000:
+ if outrow >= 32768:
+ outrow -= 65536
+ relrow = 1
+ else:
+ relrow = 0
+ outcol = incol & 0xFF
+ if incol & 0x4000:
+ if outcol >= 128:
+ outcol -= 256
+ relcol = 1
+ else:
+ relcol = 0
+ return outrow, outcol, relrow, relcol
+
+def colname(colx, _A2Z="ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
+ assert colx >= 0
+ name = ''
+ while 1:
+ quot, rem = divmod(colx, 26)
+ name = _A2Z[rem] + name
+ if not quot:
+ return name
+ colx = quot - 1
+
+def display_cell_address(rowx, colx, relrow, relcol):
+ if relrow:
+ rowpart = "(*%s%d)" % ("+-"[rowx < 0], abs(rowx))
+ else:
+ rowpart = "$%d" % (rowx+1,)
+ if relcol:
+ colpart = "(*%s%d)" % ("+-"[colx < 0], abs(colx))
+ else:
+ colpart = "$" + colname(colx)
+ return colpart + rowpart
+
+def unpack_SST_table(datatab, nstrings):
+ "Return list of strings"
+ datainx = 0
+ ndatas = len(datatab)
+ data = datatab[0]
+ datalen = len(data)
+ pos = 8
+ strings = []
+ strappend = strings.append
+ local_unpack = unpack
+ local_min = min
+ local_ord = ord
+ latin_1 = "latin_1"
+ for _unused_i in xrange(nstrings):
+ nchars = local_unpack('<H', data[pos:pos+2])[0]
+ pos += 2
+ options = local_ord(data[pos])
+ pos += 1
+ rtsz = 0
+ if options & 0x08: # richtext
+ rtsz = 4 * local_unpack('<H', data[pos:pos+2])[0]
+ pos += 2
+ if options & 0x04: # phonetic
+ rtsz += local_unpack('<i', data[pos:pos+4])[0]
+ pos += 4
+ accstrg = u''
+ charsgot = 0
+ while 1:
+ charsneed = nchars - charsgot
+ if options & 0x01:
+ # Uncompressed UTF-16
+ charsavail = local_min((datalen - pos) >> 1, charsneed)
+ rawstrg = data[pos:pos+2*charsavail]
+ # if DEBUG: print "SST U16: nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg)
+ try:
+ accstrg += unicode(rawstrg, "utf_16_le")
+ except:
+ # print "SST U16: nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg)
+ # Probable cause: dodgy data e.g. unfinished surrogate pair.
+ # E.g. file unicode2.xls in pyExcelerator's examples has cells containing
+ # unichr(i) for i in range(0x100000)
+ # so this will include 0xD800 etc
+ raise
+ pos += 2*charsavail
+ else:
+ # Note: this is COMPRESSED (not ASCII!) encoding!!!
+ charsavail = local_min(datalen - pos, charsneed)
+ rawstrg = data[pos:pos+charsavail]
+ # if DEBUG: print "SST CMPRSD: nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg)
+ accstrg += unicode(rawstrg, latin_1)
+ pos += charsavail
+ charsgot += charsavail
+ if charsgot == nchars:
+ break
+ datainx += 1
+ data = datatab[datainx]
+ datalen = len(data)
+ options = local_ord(data[0])
+ pos = 1
+ pos += rtsz # size of richtext & phonetic stuff to skip
+ # also allow for the rich text etc being split ...
+ if pos >= datalen:
+ # adjust to correct position in next record
+ pos = pos - datalen
+ datainx += 1
+ if datainx < ndatas:
+ data = datatab[datainx]
+ datalen = len(data)
+ else:
+ assert _unused_i == nstrings - 1
+ strappend(accstrg)
+ return strings
diff --git a/tablib/packages/xlrd/biffh.py b/tablib/packages/xlrd/biffh.py
new file mode 100644
index 0000000..ba3b26c
--- /dev/null
+++ b/tablib/packages/xlrd/biffh.py
@@ -0,0 +1,639 @@
+# -*- coding: cp1252 -*-
+
+##
+# Support module for the xlrd package.
+#
+# <p>Portions copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd</p>
+# <p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
+##
+
+# 2008-02-10 SJM BIFF2 BLANK record
+# 2008-02-08 SJM Preparation for Excel 2.0 support
+# 2008-02-02 SJM Added suffixes (_B2, _B2_ONLY, etc) on record names for biff_dump & biff_count
+# 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files.
+# 2007-09-08 SJM Avoid crash when zero-length Unicode string missing options byte.
+# 2007-04-22 SJM Remove experimental "trimming" facility.
+
+DEBUG = 0
+
+from struct import unpack
+import sys
+from timemachine import *
+
+class XLRDError(Exception):
+ pass
+
+##
+# Parent of almost all other classes in the package. Defines a common "dump" method
+# for debugging.
+
+class BaseObject(object):
+
+ _repr_these = []
+
+ ##
+ # @param f open file object, to which the dump is written
+ # @param header text to write before the dump
+ # @param footer text to write after the dump
+ # @param indent number of leading spaces (for recursive calls)
+
+ def dump(self, f=None, header=None, footer=None, indent=0):
+ if f is None:
+ f = sys.stderr
+ alist = self.__dict__.items()
+ alist.sort()
+ pad = " " * indent
+ if header is not None: print >> f, header
+ list_type = type([])
+ dict_type = type({})
+ for attr, value in alist:
+ if getattr(value, 'dump', None) and attr != 'book':
+ value.dump(f,
+ header="%s%s (%s object):" % (pad, attr, value.__class__.__name__),
+ indent=indent+4)
+ elif attr not in self._repr_these and (
+ isinstance(value, list_type) or isinstance(value, dict_type)
+ ):
+ print >> f, "%s%s: %s, len = %d" % (pad, attr, type(value), len(value))
+ else:
+ print >> f, "%s%s: %r" % (pad, attr, value)
+ if footer is not None: print >> f, footer
+
+FUN, FDT, FNU, FGE, FTX = range(5) # unknown, date, number, general, text
+DATEFORMAT = FDT
+NUMBERFORMAT = FNU
+
+(
+ XL_CELL_EMPTY,
+ XL_CELL_TEXT,
+ XL_CELL_NUMBER,
+ XL_CELL_DATE,
+ XL_CELL_BOOLEAN,
+ XL_CELL_ERROR,
+ XL_CELL_BLANK, # for use in debugging, gathering stats, etc
+) = range(7)
+
+biff_text_from_num = {
+ 0: "(not BIFF)",
+ 20: "2.0",
+ 21: "2.1",
+ 30: "3",
+ 40: "4S",
+ 45: "4W",
+ 50: "5",
+ 70: "7",
+ 80: "8",
+ 85: "8X",
+ }
+
+##
+# <p>This dictionary can be used to produce a text version of the internal codes
+# that Excel uses for error cells. Here are its contents:
+# <pre>
+# 0x00: '#NULL!', # Intersection of two cell ranges is empty
+# 0x07: '#DIV/0!', # Division by zero
+# 0x0F: '#VALUE!', # Wrong type of operand
+# 0x17: '#REF!', # Illegal or deleted cell reference
+# 0x1D: '#NAME?', # Wrong function or range name
+# 0x24: '#NUM!', # Value range overflow
+# 0x2A: '#N/A!', # Argument or function not available
+# </pre></p>
+
+error_text_from_code = {
+ 0x00: '#NULL!', # Intersection of two cell ranges is empty
+ 0x07: '#DIV/0!', # Division by zero
+ 0x0F: '#VALUE!', # Wrong type of operand
+ 0x17: '#REF!', # Illegal or deleted cell reference
+ 0x1D: '#NAME?', # Wrong function or range name
+ 0x24: '#NUM!', # Value range overflow
+ 0x2A: '#N/A!', # Argument or function not available
+}
+
+BIFF_FIRST_UNICODE = 80
+
+XL_WORKBOOK_GLOBALS = WBKBLOBAL = 0x5
+XL_WORKBOOK_GLOBALS_4W = 0x100
+XL_WORKSHEET = WRKSHEET = 0x10
+
+XL_BOUNDSHEET_WORKSHEET = 0x00
+XL_BOUNDSHEET_CHART = 0x02
+XL_BOUNDSHEET_VB_MODULE = 0x06
+
+# XL_RK2 = 0x7e
+XL_ARRAY = 0x0221
+XL_ARRAY2 = 0x0021
+XL_BLANK = 0x0201
+XL_BLANK_B2 = 0x01
+XL_BOF = 0x809
+XL_BOOLERR = 0x205
+XL_BOOLERR_B2 = 0x5
+XL_BOUNDSHEET = 0x85
+XL_BUILTINFMTCOUNT = 0x56
+XL_CF = 0x01B1
+XL_CODEPAGE = 0x42
+XL_COLINFO = 0x7D
+XL_COLUMNDEFAULT = 0x20 # BIFF2 only
+XL_COLWIDTH = 0x24 # BIFF2 only
+XL_CONDFMT = 0x01B0
+XL_CONTINUE = 0x3c
+XL_COUNTRY = 0x8C
+XL_DATEMODE = 0x22
+XL_DEFAULTROWHEIGHT = 0x0225
+XL_DEFCOLWIDTH = 0x55
+XL_DIMENSION = 0x200
+XL_DIMENSION2 = 0x0
+XL_EFONT = 0x45
+XL_EOF = 0x0a
+XL_EXTERNNAME = 0x23
+XL_EXTERNSHEET = 0x17
+XL_EXTSST = 0xff
+XL_FEAT11 = 0x872
+XL_FILEPASS = 0x2f
+XL_FONT = 0x31
+XL_FONT_B3B4 = 0x231
+XL_FORMAT = 0x41e
+XL_FORMAT2 = 0x1E # BIFF2, BIFF3
+XL_FORMULA = 0x6
+XL_FORMULA3 = 0x206
+XL_FORMULA4 = 0x406
+XL_GCW = 0xab
+XL_INDEX = 0x20b
+XL_INTEGER = 0x2 # BIFF2 only
+XL_IXFE = 0x44 # BIFF2 only
+XL_LABEL = 0x204
+XL_LABEL_B2 = 0x04
+XL_LABELRANGES = 0x15f
+XL_LABELSST = 0xfd
+XL_MERGEDCELLS = 0xE5
+XL_MSO_DRAWING = 0x00EC
+XL_MSO_DRAWING_GROUP = 0x00EB
+XL_MSO_DRAWING_SELECTION = 0x00ED
+XL_MULRK = 0xbd
+XL_MULBLANK = 0xbe
+XL_NAME = 0x18
+XL_NOTE = 0x1c
+XL_NUMBER = 0x203
+XL_NUMBER_B2 = 0x3
+XL_OBJ = 0x5D
+XL_PALETTE = 0x92
+XL_RK = 0x27e
+XL_ROW = 0x208
+XL_ROW_B2 = 0x08
+XL_RSTRING = 0xd6
+XL_SHEETHDR = 0x8F # BIFF4W only
+XL_SHEETSOFFSET = 0x8E # BIFF4W only
+XL_SHRFMLA = 0x04bc
+XL_SST = 0xfc
+XL_STANDARDWIDTH = 0x99
+XL_STRING = 0x207
+XL_STRING_B2 = 0x7
+XL_STYLE = 0x293
+XL_SUPBOOK = 0x1AE
+XL_TABLEOP = 0x236
+XL_TABLEOP2 = 0x37
+XL_TABLEOP_B2 = 0x36
+XL_TXO = 0x1b6
+XL_UNCALCED = 0x5e
+XL_UNKNOWN = 0xffff
+XL_WINDOW2 = 0x023E
+XL_WRITEACCESS = 0x5C
+XL_XF = 0xe0
+XL_XF2 = 0x0043 # BIFF2 version of XF record
+XL_XF3 = 0x0243 # BIFF3 version of XF record
+XL_XF4 = 0x0443 # BIFF4 version of XF record
+
+boflen = {0x0809: 8, 0x0409: 6, 0x0209: 6, 0x0009: 4}
+bofcodes = (0x0809, 0x0409, 0x0209, 0x0009)
+
+XL_FORMULA_OPCODES = (0x0006, 0x0406, 0x0206)
+
+_cell_opcode_list = [
+ XL_BOOLERR,
+ XL_FORMULA,
+ XL_FORMULA3,
+ XL_FORMULA4,
+ XL_LABEL,
+ XL_LABELSST,
+ XL_MULRK,
+ XL_NUMBER,
+ XL_RK,
+ XL_RSTRING,
+ ]
+_cell_opcode_dict = {}
+for _cell_opcode in _cell_opcode_list:
+ _cell_opcode_dict[_cell_opcode] = 1
+is_cell_opcode = _cell_opcode_dict.has_key
+
+# def fprintf(f, fmt, *vargs): f.write(fmt % vargs)
+
+def fprintf(f, fmt, *vargs):
+ if fmt.endswith('\n'):
+ print >> f, fmt[:-1] % vargs
+ else:
+ print >> f, fmt % vargs,
+
+def upkbits(tgt_obj, src, manifest, local_setattr=setattr):
+ for n, mask, attr in manifest:
+ local_setattr(tgt_obj, attr, (src & mask) >> n)
+
+def upkbitsL(tgt_obj, src, manifest, local_setattr=setattr, local_int=int):
+ for n, mask, attr in manifest:
+ local_setattr(tgt_obj, attr, local_int((src & mask) >> n))
+
+def unpack_string(data, pos, encoding, lenlen=1):
+ nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
+ pos += lenlen
+ return unicode(data[pos:pos+nchars], encoding)
+
+def unpack_string_update_pos(data, pos, encoding, lenlen=1, known_len=None):
+ if known_len is not None:
+ # On a NAME record, the length byte is detached from the front of the string.
+ nchars = known_len
+ else:
+ nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
+ pos += lenlen
+ newpos = pos + nchars
+ return (unicode(data[pos:newpos], encoding), newpos)
+
+def unpack_unicode(data, pos, lenlen=2):
+ "Return unicode_strg"
+ nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
+ if not nchars:
+ # Ambiguous whether 0-length string should have an "options" byte.
+ # Avoid crash if missing.
+ return u""
+ pos += lenlen
+ options = ord(data[pos])
+ pos += 1
+ # phonetic = options & 0x04
+ # richtext = options & 0x08
+ if options & 0x08:
+ # rt = unpack('<H', data[pos:pos+2])[0] # unused
+ pos += 2
+ if options & 0x04:
+ # sz = unpack('<i', data[pos:pos+4])[0] # unused
+ pos += 4
+ if options & 0x01:
+ # Uncompressed UTF-16-LE
+ rawstrg = data[pos:pos+2*nchars]
+ # if DEBUG: print "nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg)
+ strg = unicode(rawstrg, 'utf_16_le')
+ # pos += 2*nchars
+ else:
+ # Note: this is COMPRESSED (not ASCII!) encoding!!!
+ # Merely returning the raw bytes would work OK 99.99% of the time
+ # if the local codepage was cp1252 -- however this would rapidly go pear-shaped
+ # for other codepages so we grit our Anglocentric teeth and return Unicode :-)
+
+ strg = unicode(data[pos:pos+nchars], "latin_1")
+ # pos += nchars
+ # if richtext:
+ # pos += 4 * rt
+ # if phonetic:
+ # pos += sz
+ # return (strg, pos)
+ return strg
+
+def unpack_unicode_update_pos(data, pos, lenlen=2, known_len=None):
+ "Return (unicode_strg, updated value of pos)"
+ if known_len is not None:
+ # On a NAME record, the length byte is detached from the front of the string.
+ nchars = known_len
+ else:
+ nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
+ pos += lenlen
+ if not nchars and not data[pos:]:
+ # Zero-length string with no options byte
+ return (u"", pos)
+ options = ord(data[pos])
+ pos += 1
+ phonetic = options & 0x04
+ richtext = options & 0x08
+ if richtext:
+ rt = unpack('<H', data[pos:pos+2])[0]
+ pos += 2
+ if phonetic:
+ sz = unpack('<i', data[pos:pos+4])[0]
+ pos += 4
+ if options & 0x01:
+ # Uncompressed UTF-16-LE
+ strg = unicode(data[pos:pos+2*nchars], 'utf_16_le')
+ pos += 2*nchars
+ else:
+ # Note: this is COMPRESSED (not ASCII!) encoding!!!
+ strg = unicode(data[pos:pos+nchars], "latin_1")
+ pos += nchars
+ if richtext:
+ pos += 4 * rt
+ if phonetic:
+ pos += sz
+ return (strg, pos)
+
+def unpack_cell_range_address_list_update_pos(
+ output_list, data, pos, biff_version, addr_size=6):
+ # output_list is updated in situ
+ if biff_version < 80:
+ assert addr_size == 6
+ else:
+ assert addr_size in (6, 8)
+ n, = unpack("<H", data[pos:pos+2])
+ pos += 2
+ if n:
+ if addr_size == 6:
+ fmt = "<HHBB"
+ else:
+ fmt = "<HHHH"
+ for _unused in xrange(n):
+ ra, rb, ca, cb = unpack(fmt, data[pos:pos+addr_size])
+ output_list.append((ra, rb+1, ca, cb+1))
+ pos += addr_size
+ return pos
+
+_brecstrg = """\
+0000 DIMENSIONS_B2
+0001 BLANK_B2
+0002 INTEGER_B2_ONLY
+0003 NUMBER_B2
+0004 LABEL_B2
+0005 BOOLERR_B2
+0006 FORMULA
+0007 STRING_B2
+0008 ROW_B2
+0009 BOF_B2
+000A EOF
+000B INDEX_B2_ONLY
+000C CALCCOUNT
+000D CALCMODE
+000E PRECISION
+000F REFMODE
+0010 DELTA
+0011 ITERATION
+0012 PROTECT
+0013 PASSWORD
+0014 HEADER
+0015 FOOTER
+0016 EXTERNCOUNT
+0017 EXTERNSHEET
+0018 NAME_B2,5+
+0019 WINDOWPROTECT
+001A VERTICALPAGEBREAKS
+001B HORIZONTALPAGEBREAKS
+001C NOTE
+001D SELECTION
+001E FORMAT_B2-3
+001F BUILTINFMTCOUNT_B2
+0020 COLUMNDEFAULT_B2_ONLY
+0021 ARRAY_B2_ONLY
+0022 DATEMODE
+0023 EXTERNNAME
+0024 COLWIDTH_B2_ONLY
+0025 DEFAULTROWHEIGHT_B2_ONLY
+0026 LEFTMARGIN
+0027 RIGHTMARGIN
+0028 TOPMARGIN
+0029 BOTTOMMARGIN
+002A PRINTHEADERS
+002B PRINTGRIDLINES
+002F FILEPASS
+0031 FONT
+0032 FONT2_B2_ONLY
+0036 TABLEOP_B2
+0037 TABLEOP2_B2
+003C CONTINUE
+003D WINDOW1
+003E WINDOW2_B2
+0040 BACKUP
+0041 PANE
+0042 CODEPAGE
+0043 XF_B2
+0044 IXFE_B2_ONLY
+0045 EFONT_B2_ONLY
+004D PLS
+0051 DCONREF
+0055 DEFCOLWIDTH
+0056 BUILTINFMTCOUNT_B3-4
+0059 XCT
+005A CRN
+005B FILESHARING
+005C WRITEACCESS
+005D OBJECT
+005E UNCALCED
+005F SAVERECALC
+0063 OBJECTPROTECT
+007D COLINFO
+007E RK2_mythical_?
+0080 GUTS
+0081 WSBOOL
+0082 GRIDSET
+0083 HCENTER
+0084 VCENTER
+0085 BOUNDSHEET
+0086 WRITEPROT
+008C COUNTRY
+008D HIDEOBJ
+008E SHEETSOFFSET
+008F SHEETHDR
+0090 SORT
+0092 PALETTE
+0099 STANDARDWIDTH
+009B FILTERMODE
+009C FNGROUPCOUNT
+009D AUTOFILTERINFO
+009E AUTOFILTER
+00A0 SCL
+00A1 SETUP
+00AB GCW
+00BD MULRK
+00BE MULBLANK
+00C1 MMS
+00D6 RSTRING
+00D7 DBCELL
+00DA BOOKBOOL
+00DD SCENPROTECT
+00E0 XF
+00E1 INTERFACEHDR
+00E2 INTERFACEEND
+00E5 MERGEDCELLS
+00E9 BITMAP
+00EB MSO_DRAWING_GROUP
+00EC MSO_DRAWING
+00ED MSO_DRAWING_SELECTION
+00EF PHONETIC
+00FC SST
+00FD LABELSST
+00FF EXTSST
+013D TABID
+015F LABELRANGES
+0160 USESELFS
+0161 DSF
+01AE SUPBOOK
+01AF PROTECTIONREV4
+01B0 CONDFMT
+01B1 CF
+01B2 DVAL
+01B6 TXO
+01B7 REFRESHALL
+01B8 HLINK
+01BC PASSWORDREV4
+01BE DV
+01C0 XL9FILE
+01C1 RECALCID
+0200 DIMENSIONS
+0201 BLANK
+0203 NUMBER
+0204 LABEL
+0205 BOOLERR
+0206 FORMULA_B3
+0207 STRING
+0208 ROW
+0209 BOF
+020B INDEX_B3+
+0218 NAME
+0221 ARRAY
+0223 EXTERNNAME_B3-4
+0225 DEFAULTROWHEIGHT
+0231 FONT_B3B4
+0236 TABLEOP
+023E WINDOW2
+0243 XF_B3
+027E RK
+0293 STYLE
+0406 FORMULA_B4
+0409 BOF
+041E FORMAT
+0443 XF_B4
+04BC SHRFMLA
+0800 QUICKTIP
+0809 BOF
+0862 SHEETLAYOUT
+0867 SHEETPROTECTION
+0868 RANGEPROTECTION
+"""
+
+biff_rec_name_dict = {}
+for _buff in _brecstrg.splitlines():
+ _numh, _name = _buff.split()
+ biff_rec_name_dict[int(_numh, 16)] = _name
+del _buff, _name, _brecstrg
+
+def hex_char_dump(strg, ofs, dlen, base=0, fout=sys.stdout, unnumbered=False):
+ endpos = min(ofs + dlen, len(strg))
+ pos = ofs
+ numbered = not unnumbered
+ num_prefix = ''
+ while pos < endpos:
+ endsub = min(pos + 16, endpos)
+ substrg = strg[pos:endsub]
+ lensub = endsub - pos
+ if lensub <= 0 or lensub != len(substrg):
+ fprintf(
+ sys.stdout,
+ '??? hex_char_dump: ofs=%d dlen=%d base=%d -> endpos=%d pos=%d endsub=%d substrg=%r\n',
+ ofs, dlen, base, endpos, pos, endsub, substrg)
+ break
+ hexd = ''.join(["%02x " % ord(c) for c in substrg])
+ chard = ''
+ for c in substrg:
+ if c == '\0':
+ c = '~'
+ elif not (' ' <= c <= '~'):
+ c = '?'
+ chard += c
+ if numbered:
+ num_prefix = "%5d: " % (base+pos-ofs)
+ fprintf(fout, "%s %-48s %s\n", num_prefix, hexd, chard)
+ pos = endsub
+
+def biff_dump(mem, stream_offset, stream_len, base=0, fout=sys.stdout, unnumbered=False):
+ pos = stream_offset
+ stream_end = stream_offset + stream_len
+ adj = base - stream_offset
+ dummies = 0
+ numbered = not unnumbered
+ num_prefix = ''
+ while stream_end - pos >= 4:
+ rc, length = unpack('<HH', mem[pos:pos+4])
+ if rc == 0 and length == 0:
+ if mem[pos:] == '\0' * (stream_end - pos):
+ dummies = stream_end - pos
+ savpos = pos
+ pos = stream_end
+ break
+ if dummies:
+ dummies += 4
+ else:
+ savpos = pos
+ dummies = 4
+ pos += 4
+ else:
+ if dummies:
+ if numbered:
+ num_prefix = "%5d: " % (adj + savpos)
+ fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies)
+ dummies = 0
+ recname = biff_rec_name_dict.get(rc, '<UNKNOWN>')
+ if numbered:
+ num_prefix = "%5d: " % (adj + pos)
+ fprintf(fout, "%s%04x %s len = %04x (%d)\n", num_prefix, rc, recname, length, length)
+ pos += 4
+ hex_char_dump(mem, pos, length, adj+pos, fout, unnumbered)
+ pos += length
+ if dummies:
+ if numbered:
+ num_prefix = "%5d: " % (adj + savpos)
+ fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies)
+ if pos < stream_end:
+ if numbered:
+ num_prefix = "%5d: " % (adj + pos)
+ fprintf(fout, "%s---- Misc bytes at end ----\n", num_prefix)
+ hex_char_dump(mem, pos, stream_end-pos, adj + pos, fout, unnumbered)
+ elif pos > stream_end:
+ fprintf(fout, "Last dumped record has length (%d) that is too large\n", length)
+
+def biff_count_records(mem, stream_offset, stream_len, fout=sys.stdout):
+ pos = stream_offset
+ stream_end = stream_offset + stream_len
+ tally = {}
+ while stream_end - pos >= 4:
+ rc, length = unpack('<HH', mem[pos:pos+4])
+ if rc == 0 and length == 0:
+ if mem[pos:] == '\0' * (stream_end - pos):
+ break
+ recname = "<Dummy (zero)>"
+ else:
+ recname = biff_rec_name_dict.get(rc, None)
+ if recname is None:
+ recname = "Unknown_0x%04X" % rc
+ if tally.has_key(recname):
+ tally[recname] += 1
+ else:
+ tally[recname] = 1
+ pos += length + 4
+ slist = tally.items()
+ slist.sort()
+ for recname, count in slist:
+ print >> fout, "%8d %s" % (count, recname)
+
+encoding_from_codepage = {
+ 1200 : 'utf_16_le',
+ 10000: 'mac_roman',
+ 10006: 'mac_greek', # guess
+ 10007: 'mac_cyrillic', # guess
+ 10029: 'mac_latin2', # guess
+ 10079: 'mac_iceland', # guess
+ 10081: 'mac_turkish', # guess
+ 32768: 'mac_roman',
+ 32769: 'cp1252',
+ }
+# some more guessing, for Indic scripts
+# codepage 57000 range:
+# 2 Devanagari [0]
+# 3 Bengali [1]
+# 4 Tamil [5]
+# 5 Telegu [6]
+# 6 Assamese [1] c.f. Bengali
+# 7 Oriya [4]
+# 8 Kannada [7]
+# 9 Malayalam [8]
+# 10 Gujarati [3]
+# 11 Gurmukhi [2]
diff --git a/tablib/packages/xlrd/compdoc.py b/tablib/packages/xlrd/compdoc.py
new file mode 100644
index 0000000..3abb7a6
--- /dev/null
+++ b/tablib/packages/xlrd/compdoc.py
@@ -0,0 +1,358 @@
+# -*- coding: cp1252 -*-
+
+##
+# Implements the minimal functionality required
+# to extract a "Workbook" or "Book" stream (as one big string)
+# from an OLE2 Compound Document file.
+# <p>Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd</p>
+# <p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
+##
+
+# No part of the content of this file was derived from the works of David Giffin.
+
+# 2008-11-04 SJM Avoid assertion error when -1 used instead of -2 for first_SID of empty SCSS [Frank Hoffsuemmer]
+# 2007-09-08 SJM Warning message if sector sizes are extremely large.
+# 2007-05-07 SJM Meaningful exception instead of IndexError if a SAT (sector allocation table) is corrupted.
+# 2007-04-22 SJM Missing "<" in a struct.unpack call => can't open files on bigendian platforms.
+
+
+import sys
+from struct import unpack
+from timemachine import *
+
+##
+# Magic cookie that should appear in the first 8 bytes of the file.
+SIGNATURE = "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1"
+
+EOCSID = -2
+FREESID = -1
+SATSID = -3
+MSATSID = -4
+
+class CompDocError(Exception):
+ pass
+
+class DirNode(object):
+
+ def __init__(self, DID, dent, DEBUG=0):
+ # dent is the 128-byte directory entry
+ self.DID = DID
+ # (cbufsize, self.etype, self.colour, self.left_DID, self.right_DID,
+ # self.root_DID,
+ # self.first_SID,
+ # self.tot_size) = \
+ # unpack('<HBBiii16x4x8x8xii4x', dent[64:128])
+ (cbufsize, self.etype, self.colour, self.left_DID, self.right_DID,
+ self.root_DID) = \
+ unpack('<HBBiii', dent[64:80])
+ (self.first_SID, self.tot_size) = \
+ unpack('<ii', dent[116:124])
+ if cbufsize == 0:
+ self.name = u''
+ else:
+ self.name = unicode(dent[0:cbufsize-2], 'utf_16_le') # omit the trailing U+0000
+ self.children = [] # filled in later
+ self.parent = -1 # indicates orphan; fixed up later
+ self.tsinfo = unpack('<IIII', dent[100:116])
+ if DEBUG:
+ self.dump(DEBUG)
+
+ def dump(self, DEBUG=1):
+ print "DID=%d name=%r etype=%d DIDs(left=%d right=%d root=%d parent=%d kids=%r) first_SID=%d tot_size=%d" \
+ % (self.DID, self.name, self.etype, self.left_DID,
+ self.right_DID, self.root_DID, self.parent, self.children, self.first_SID, self.tot_size)
+ if DEBUG == 2:
+ # cre_lo, cre_hi, mod_lo, mod_hi = tsinfo
+ print "timestamp info", self.tsinfo
+
+def _build_family_tree(dirlist, parent_DID, child_DID):
+ if child_DID < 0: return
+ _build_family_tree(dirlist, parent_DID, dirlist[child_DID].left_DID)
+ dirlist[parent_DID].children.append(child_DID)
+ dirlist[child_DID].parent = parent_DID
+ _build_family_tree(dirlist, parent_DID, dirlist[child_DID].right_DID)
+ if dirlist[child_DID].etype == 1: # storage
+ _build_family_tree(dirlist, child_DID, dirlist[child_DID].root_DID)
+
+##
+# Compound document handler.
+# @param mem The raw contents of the file, as a string, or as an mmap.mmap() object. The
+# only operation it needs to support is slicing.
+
+class CompDoc(object):
+
+ def __init__(self, mem, logfile=sys.stdout, DEBUG=0):
+ self.logfile = logfile
+ if mem[0:8] != SIGNATURE:
+ raise CompDocError('Not an OLE2 compound document')
+ if mem[28:30] != '\xFE\xFF':
+ raise CompDocError('Expected "little-endian" marker, found %r' % mem[28:30])
+ revision, version = unpack('<HH', mem[24:28])
+ if DEBUG:
+ print >> logfile, "\nCompDoc format: version=0x%04x revision=0x%04x" % (version, revision)
+ self.mem = mem
+ ssz, sssz = unpack('<HH', mem[30:34])
+ if ssz > 20: # allows for 2**20 bytes i.e. 1MB
+ print >> logfile, \
+ "WARNING: sector size (2**%d) is preposterous; assuming 512 and continuing ..." \
+ % ssz
+ ssz = 9
+ if sssz > ssz:
+ print >> logfile, \
+ "WARNING: short stream sector size (2**%d) is preposterous; assuming 64 and continuing ..." \
+ % sssz
+ sssz = 6
+ self.sec_size = sec_size = 1 << ssz
+ self.short_sec_size = 1 << sssz
+ (
+ SAT_tot_secs, self.dir_first_sec_sid, _unused, self.min_size_std_stream,
+ SSAT_first_sec_sid, SSAT_tot_secs,
+ MSAT_first_sec_sid, MSAT_tot_secs,
+ # ) = unpack('<ii4xiiiii', mem[44:76])
+ ) = unpack('<iiiiiiii', mem[44:76])
+ mem_data_len = len(mem) - 512
+ mem_data_secs, left_over = divmod(mem_data_len, sec_size)
+ if left_over:
+ #### raise CompDocError("Not a whole number of sectors")
+ print >> logfile, \
+ "WARNING *** file size (%d) not 512 + multiple of sector size (%d)" \
+ % (len(mem), sec_size)
+ if DEBUG:
+ print >> logfile, 'sec sizes', ssz, sssz, sec_size, self.short_sec_size
+ print >> logfile, "mem data: %d bytes == %d sectors" % (mem_data_len, mem_data_secs)
+ print >> logfile, "SAT_tot_secs=%d, dir_first_sec_sid=%d, min_size_std_stream=%d" \
+ % (SAT_tot_secs, self.dir_first_sec_sid, self.min_size_std_stream,)
+ print >> logfile, "SSAT_first_sec_sid=%d, SSAT_tot_secs=%d" % (SSAT_first_sec_sid, SSAT_tot_secs,)
+ print >> logfile, "MSAT_first_sec_sid=%d, MSAT_tot_secs=%d" % (MSAT_first_sec_sid, MSAT_tot_secs,)
+ nent = int_floor_div(sec_size, 4) # number of SID entries in a sector
+ fmt = "<%di" % nent
+ trunc_warned = 0
+ #
+ # === build the MSAT ===
+ #
+ MSAT = list(unpack('<109i', mem[76:512]))
+ sid = MSAT_first_sec_sid
+ while sid >= 0:
+ if sid >= mem_data_secs:
+ raise CompDocError(
+ "MSAT extension: accessing sector %d but only %d in file" % (sid, mem_data_secs)
+ )
+ offset = 512 + sec_size * sid
+ news = list(unpack(fmt, mem[offset:offset+sec_size]))
+ sid = news.pop()
+ MSAT.extend(news)
+ if DEBUG:
+ print >> logfile, "MSAT: len =", len(MSAT)
+ print >> logfile, MSAT
+ #
+ # === build the SAT ===
+ #
+ self.SAT = []
+ for msid in MSAT:
+ if msid == FREESID: continue
+ if msid >= mem_data_secs:
+ if not trunc_warned:
+ print >> logfile, "WARNING *** File is truncated, or OLE2 MSAT is corrupt!!"
+ print >> logfile, \
+ "INFO: Trying to access sector %d but only %d available" \
+ % (msid, mem_data_secs)
+ trunc_warned = 1
+ continue
+ offset = 512 + sec_size * msid
+ news = list(unpack(fmt, mem[offset:offset+sec_size]))
+ self.SAT.extend(news)
+ if DEBUG:
+ print >> logfile, "SAT: len =", len(self.SAT)
+ print >> logfile, self.SAT
+ # print >> logfile, "SAT ",
+ # for i, s in enumerate(self.SAT):
+ # print >> logfile, "entry: %4d offset: %6d, next entry: %4d" % (i, 512 + sec_size * i, s)
+ # print >> logfile, "%d:%d " % (i, s),
+ print
+
+ # === build the directory ===
+ #
+ dbytes = self._get_stream(
+ self.mem, 512, self.SAT, self.sec_size, self.dir_first_sec_sid,
+ name="directory")
+ dirlist = []
+ did = -1
+ for pos in xrange(0, len(dbytes), 128):
+ did += 1
+ dirlist.append(DirNode(did, dbytes[pos:pos+128], 0))
+ self.dirlist = dirlist
+ _build_family_tree(dirlist, 0, dirlist[0].root_DID) # and stand well back ...
+ if DEBUG:
+ for d in dirlist:
+ d.dump(DEBUG)
+ #
+ # === get the SSCS ===
+ #
+ sscs_dir = self.dirlist[0]
+ assert sscs_dir.etype == 5 # root entry
+ if sscs_dir.first_SID < 0 and sscs_dir.tot_size == 0:
+ # Problem reported by Frank Hoffsuemmer: some software was
+ # writing -1 instead of -2 (EOCSID) for the first_SID
+ # when the SCCS was empty. Not having EOCSID caused assertion
+ # failure in _get_stream.
+ # Solution: avoid calling _get_stream in any case when the
+ # SCSS appears to be empty.
+ self.SSCS = ""
+ else:
+ self.SSCS = self._get_stream(
+ self.mem, 512, self.SAT, sec_size, sscs_dir.first_SID,
+ sscs_dir.tot_size, name="SSCS")
+ # if DEBUG: print >> logfile, "SSCS", repr(self.SSCS)
+ #
+ # === build the SSAT ===
+ #
+ self.SSAT = []
+ if SSAT_tot_secs > 0 and sscs_dir.tot_size == 0:
+ print >> logfile, \
+ "WARNING *** OLE2 inconsistency: SSCS size is 0 but SSAT size is non-zero"
+ if sscs_dir.tot_size > 0:
+ sid = SSAT_first_sec_sid
+ nsecs = SSAT_tot_secs
+ while sid >= 0 and nsecs > 0:
+ nsecs -= 1
+ start_pos = 512 + sid * sec_size
+ news = list(unpack(fmt, mem[start_pos:start_pos+sec_size]))
+ self.SSAT.extend(news)
+ sid = self.SAT[sid]
+ # assert SSAT_tot_secs == 0 or sid == EOCSID
+ if DEBUG: print >> logfile, "SSAT last sid %d; remaining sectors %d" % (sid, nsecs)
+ assert nsecs == 0 and sid == EOCSID
+ if DEBUG: print >> logfile, "SSAT", self.SSAT
+
+ def _get_stream(self, mem, base, sat, sec_size, start_sid, size=None, name=''):
+ # print >> self.logfile, "_get_stream", base, sec_size, start_sid, size
+ sectors = []
+ s = start_sid
+ if size is None:
+ # nothing to check against
+ while s >= 0:
+ start_pos = base + s * sec_size
+ sectors.append(mem[start_pos:start_pos+sec_size])
+ try:
+ s = sat[s]
+ except IndexError:
+ raise CompDocError(
+ "OLE2 stream %r: sector allocation table invalid entry (%d)" %
+ (name, s)
+ )
+ assert s == EOCSID
+ else:
+ todo = size
+ while s >= 0:
+ start_pos = base + s * sec_size
+ grab = sec_size
+ if grab > todo:
+ grab = todo
+ todo -= grab
+ sectors.append(mem[start_pos:start_pos+grab])
+ try:
+ s = sat[s]
+ except IndexError:
+ raise CompDocError(
+ "OLE2 stream %r: sector allocation table invalid entry (%d)" %
+ (name, s)
+ )
+ assert s == EOCSID
+ if todo != 0:
+ print >> self.logfile, \
+ "WARNING *** OLE2 stream %r: expected size %d, actual size %d" \
+ % (name, size, size - todo)
+ return ''.join(sectors)
+
+ def _dir_search(self, path, storage_DID=0):
+ # Return matching DirNode instance, or None
+ head = path[0]
+ tail = path[1:]
+ dl = self.dirlist
+ for child in dl[storage_DID].children:
+ if dl[child].name.lower() == head.lower():
+ et = dl[child].etype
+ if et == 2:
+ return dl[child]
+ if et == 1:
+ if not tail:
+ raise CompDocError("Requested component is a 'storage'")
+ return self._dir_search(tail, child)
+ dl[child].dump(1)
+ raise CompDocError("Requested stream is not a 'user stream'")
+ return None
+
+ ##
+ # Interrogate the compound document's directory; return the stream as a string if found, otherwise
+ # return None.
+ # @param qname Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto.
+
+ def get_named_stream(self, qname):
+ d = self._dir_search(qname.split("/"))
+ if d is None:
+ return None
+ if d.tot_size >= self.min_size_std_stream:
+ return self._get_stream(
+ self.mem, 512, self.SAT, self.sec_size, d.first_SID,
+ d.tot_size, name=qname)
+ else:
+ return self._get_stream(
+ self.SSCS, 0, self.SSAT, self.short_sec_size, d.first_SID,
+ d.tot_size, name=qname + " (from SSCS)")
+
+ ##
+ # Interrogate the compound document's directory.
+ # If the named stream is not found, (None, 0, 0) will be returned.
+ # If the named stream is found and is contiguous within the original byte sequence ("mem")
+ # used when the document was opened,
+ # then (mem, offset_to_start_of_stream, length_of_stream) is returned.
+ # Otherwise a new string is built from the fragments and (new_string, 0, length_of_stream) is returned.
+ # @param qname Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto.
+
+ def locate_named_stream(self, qname):
+ d = self._dir_search(qname.split("/"))
+ if d is None:
+ return (None, 0, 0)
+ if d.tot_size >= self.min_size_std_stream:
+ return self._locate_stream(self.mem, 512, self.SAT, self.sec_size, d.first_SID, d.tot_size)
+ else:
+ return (
+ self._get_stream(
+ self.SSCS, 0, self.SSAT, self.short_sec_size, d.first_SID,
+ d.tot_size, qname + " (from SSCS)"),
+ 0,
+ d.tot_size
+ )
+ return (None, 0, 0) # not found
+
+ def _locate_stream(self, mem, base, sat, sec_size, start_sid, size):
+ # print >> self.logfile, "_locate_stream", base, sec_size, start_sid, size
+ s = start_sid
+ if s < 0:
+ raise CompDocError("_locate_stream: start_sid (%d) is -ve" % start_sid)
+ p = -99 # dummy previous SID
+ start_pos = -9999
+ end_pos = -8888
+ slices = []
+ while s >= 0:
+ if s == p+1:
+ # contiguous sectors
+ end_pos += sec_size
+ else:
+ # start new slice
+ if p >= 0:
+ # not first time
+ slices.append((start_pos, end_pos))
+ start_pos = base + s * sec_size
+ end_pos = start_pos + sec_size
+ p = s
+ s = sat[s]
+ assert s == EOCSID
+ # print >> self.logfile, len(slices) + 1, "slices"
+ if not slices:
+ # The stream is contiguous ... just what we like!
+ return (mem, start_pos, size)
+ slices.append((start_pos, end_pos))
+ return (''.join([mem[start_pos:end_pos] for start_pos, end_pos in slices]), 0, size)
+
+# ==========================================================================================
diff --git a/tablib/packages/xlrd/doc/compdoc.html b/tablib/packages/xlrd/doc/compdoc.html
new file mode 100644
index 0000000..c55a194
--- /dev/null
+++ b/tablib/packages/xlrd/doc/compdoc.html
@@ -0,0 +1,69 @@
+<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Strict//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>
+<html>
+<head>
+<meta http-equiv='Content-Type' content='text/html; charset=us-ascii' />
+<title>The compdoc Module</title>
+</head>
+<body>
+<h1>The compdoc Module</h1>
+<p>Implements the minimal functionality required
+to extract a "Workbook" or "Book" stream (as one big string)
+from an OLE2 Compound Document file.
+</p><p>Copyright &#169; 2005-2008 Stephen John Machin, Lingfo Pty Ltd</p>
+<p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
+<h2>Module Contents</h2>
+<dl>
+<dt><b>CompDoc(mem, logfile=sys.stdout, DEBUG=0)</b> (class) [<a href='#compdoc.CompDoc-class'>#</a>]</dt>
+<dd>
+<p>Compound document handler.</p>
+<dl>
+<dt><i>mem</i></dt>
+<dd>
+The raw contents of the file, as a string, or as an mmap.mmap() object. The
+only operation it needs to support is slicing.</dd>
+</dl><br />
+<p>For more information about this class, see <a href='#compdoc.CompDoc-class'><i>The CompDoc Class</i></a>.</p>
+</dd>
+<dt><a id='compdoc.SIGNATURE-variable' name='compdoc.SIGNATURE-variable'><b>SIGNATURE</b></a> (variable) [<a href='#compdoc.SIGNATURE-variable'>#</a>]</dt>
+<dd>
+<p>Magic cookie that should appear in the first 8 bytes of the file.</p>
+</dd>
+</dl>
+<h2><a id='compdoc.CompDoc-class' name='compdoc.CompDoc-class'>The CompDoc Class</a></h2>
+<dl>
+<dt><b>CompDoc(mem, logfile=sys.stdout, DEBUG=0)</b> (class) [<a href='#compdoc.CompDoc-class'>#</a>]</dt>
+<dd>
+<p>Compound document handler.</p>
+<dl>
+<dt><i>mem</i></dt>
+<dd>
+The raw contents of the file, as a string, or as an mmap.mmap() object. The
+only operation it needs to support is slicing.</dd>
+</dl><br />
+</dd>
+<dt><a id='compdoc.CompDoc.get_named_stream-method' name='compdoc.CompDoc.get_named_stream-method'><b>get_named_stream(qname)</b></a> [<a href='#compdoc.CompDoc.get_named_stream-method'>#</a>]</dt>
+<dd>
+<p>Interrogate the compound document's directory; return the stream as a string if found, otherwise
+return None.</p>
+<dl>
+<dt><i>qname</i></dt>
+<dd>
+Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto.</dd>
+</dl><br />
+</dd>
+<dt><a id='compdoc.CompDoc.locate_named_stream-method' name='compdoc.CompDoc.locate_named_stream-method'><b>locate_named_stream(qname)</b></a> [<a href='#compdoc.CompDoc.locate_named_stream-method'>#</a>]</dt>
+<dd>
+<p>Interrogate the compound document's directory.
+If the named stream is not found, (None, 0, 0) will be returned.
+If the named stream is found and is contiguous within the original byte sequence (&quot;mem&quot;)
+used when the document was opened,
+then (mem, offset_to_start_of_stream, length_of_stream) is returned.
+Otherwise a new string is built from the fragments and (new_string, 0, length_of_stream) is returned.</p>
+<dl>
+<dt><i>qname</i></dt>
+<dd>
+Name of the desired stream e.g. u'Workbook'. Should be in Unicode or convertible thereto.</dd>
+</dl><br />
+</dd>
+</dl>
+</body></html>
diff --git a/tablib/packages/xlrd/doc/xlrd.html b/tablib/packages/xlrd/doc/xlrd.html
new file mode 100644
index 0000000..f982e0e
--- /dev/null
+++ b/tablib/packages/xlrd/doc/xlrd.html
@@ -0,0 +1,1845 @@
+<!DOCTYPE html PUBLIC '-//W3C//DTD XHTML 1.0 Strict//EN' 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>
+<html>
+<head>
+<meta http-equiv='Content-Type' content='text/html; charset=us-ascii' />
+<title>The xlrd Module</title>
+</head>
+<body>
+<h1>The xlrd Module</h1>
+<p /><p><b>A Python module for extracting data from MS Excel &#8482; spreadsheet files.
+<br /><br />
+Version 0.7.1 -- 2009-05-31
+</b></p>
+
+<h2>General information</h2>
+
+<h3>Acknowledgements</h3>
+
+<p>
+Development of this module would not have been possible without the document
+"OpenOffice.org's Documentation of the Microsoft Excel File Format"
+("OOo docs" for short).
+The latest version is available from OpenOffice.org in
+<a href="http://sc.openoffice.org/excelfileformat.pdf"> PDF format</a>
+and
+<a href="http://sc.openoffice.org/excelfileformat.odt"> ODT format.</a>
+Small portions of the OOo docs are reproduced in this
+document. A study of the OOo docs is recommended for those who wish a
+deeper understanding of the Excel file layout than the xlrd docs can provide.
+</p>
+
+<p>Backporting to Python 2.1 was partially funded by
+ <a href="http://journyx.com/">
+ Journyx - provider of timesheet and project accounting solutions.
+ </a>
+</p>
+
+<p>Provision of formatting information in version 0.6.1 was funded by
+ <a href="http://www.simplistix.co.uk">
+ Simplistix Ltd.
+ </a>
+</p>
+
+<h3>Unicode</h3>
+
+<p>This module presents all text strings as Python unicode objects.
+From Excel 97 onwards, text in Excel spreadsheets has been stored as Unicode.
+Older files (Excel 95 and earlier) don't keep strings in Unicode;
+a CODEPAGE record provides a codepage number (for example, 1252) which is
+used by xlrd to derive the encoding (for same example: "cp1252") which is
+used to translate to Unicode.</p>
+<small>
+<p>If the CODEPAGE record is missing (possible if the file was created
+by third-party software), xlrd will assume that the encoding is ascii, and keep going.
+If the actual encoding is not ascii, a UnicodeDecodeError exception will be raised and
+you will need to determine the encoding yourself, and tell xlrd:
+<pre>
+ book = xlrd.open_workbook(..., encoding_override="cp1252")
+</pre></p>
+<p>If the CODEPAGE record exists but is wrong (for example, the codepage
+number is 1251, but the strings are actually encoded in koi8_r),
+it can be overridden using the same mechanism.
+The supplied runxlrd.py has a corresponding command-line argument, which
+may be used for experimentation:
+<pre>
+ runxlrd.py -e koi8_r 3rows myfile.xls
+</pre></p>
+<p>The first place to look for an encoding ("codec name") is
+<a href="http://docs.python.org/lib/standard-encodings.html">
+the Python documentation</a>.
+</p>
+</small>
+
+<h3>Dates in Excel spreadsheets</h3>
+
+<p>In reality, there are no such things. What you have are floating point
+numbers and pious hope.
+There are several problems with Excel dates:</p>
+
+<p>(1) Dates are not stored as a separate data type; they are stored as
+floating point numbers and you have to rely on
+(a) the "number format" applied to them in Excel and/or
+(b) knowing which cells are supposed to have dates in them.
+This module helps with (a) by inspecting the
+format that has been applied to each number cell;
+if it appears to be a date format, the cell
+is classified as a date rather than a number. Feedback on this feature,
+especially from non-English-speaking locales, would be appreciated.</p>
+
+<p>(2) Excel for Windows stores dates by default as the number of
+days (or fraction thereof) since 1899-12-31T00:00:00. Excel for
+Macintosh uses a default start date of 1904-01-01T00:00:00. The date
+system can be changed in Excel on a per-workbook basis (for example:
+Tools -&gt; Options -&gt; Calculation, tick the "1904 date system" box).
+This is of course a bad idea if there are already dates in the
+workbook. There is no good reason to change it even if there are no
+dates in the workbook. Which date system is in use is recorded in the
+workbook. A workbook transported from Windows to Macintosh (or vice
+versa) will work correctly with the host Excel. When using this
+module's xldate_as_tuple function to convert numbers from a workbook,
+you must use the datemode attribute of the Book object. If you guess,
+or make a judgement depending on where you believe the workbook was
+created, you run the risk of being 1462 days out of kilter.</p>
+
+<p>Reference:
+http://support.microsoft.com/default.aspx?scid=KB;EN-US;q180162</p>
+
+
+<p>(3) The Excel implementation of the Windows-default 1900-based date system works on the
+incorrect premise that 1900 was a leap year. It interprets the number 60 as meaning 1900-02-29,
+which is not a valid date. Consequently any number less than 61 is ambiguous. Example: is 59 the
+result of 1900-02-28 entered directly, or is it 1900-03-01 minus 2 days? The OpenOffice.org Calc
+program "corrects" the Microsoft problem; entering 1900-02-27 causes the number 59 to be stored.
+Save as an XLS file, then open the file with Excel -- you'll see 1900-02-28 displayed.</p>
+
+<p>Reference: http://support.microsoft.com/default.aspx?scid=kb;en-us;214326</p>
+
+<p>(4) The Macintosh-default 1904-based date system counts 1904-01-02 as day 1 and 1904-01-01 as day zero.
+Thus any number such that (0.0 &lt;= number &lt; 1.0) is ambiguous. Is 0.625 a time of day (15:00:00),
+independent of the calendar,
+or should it be interpreted as an instant on a particular day (1904-01-01T15:00:00)?
+The xldate_* functions in this module
+take the view that such a number is a calendar-independent time of day (like Python's datetime.time type) for both
+date systems. This is consistent with more recent Microsoft documentation
+(for example, the help file for Excel 2002 which says that the first day
+in the 1904 date system is 1904-01-02).
+
+</p><p>(5) Usage of the Excel DATE() function may leave strange dates in a spreadsheet. Quoting the help file,
+in respect of the 1900 date system: "If year is between 0 (zero) and 1899 (inclusive),
+Excel adds that value to 1900 to calculate the year. For example, DATE(108,1,2) returns January 2, 2008 (1900+108)."
+This gimmick, semi-defensible only for arguments up to 99 and only in the pre-Y2K-awareness era,
+means that DATE(1899, 12, 31) is interpreted as 3799-12-31.</p>
+
+<p>For further information, please refer to the documentation for the xldate_* functions.</p>
+
+<h3> Named references, constants, formulas, and macros</h3>
+
+<p>
+A name is used to refer to a cell, a group of cells, a constant
+value, a formula, or a macro. Usually the scope of a name is global
+across the whole workbook. However it can be local to a worksheet.
+For example, if the sales figures are in different cells in
+different sheets, the user may define the name "Sales" in each
+sheet. There are built-in names, like "Print_Area" and
+"Print_Titles"; these two are naturally local to a sheet.
+</p><p>
+To inspect the names with a user interface like MS Excel, OOo Calc,
+or Gnumeric, click on Insert/Names/Define. This will show the global
+names, plus those local to the currently selected sheet.
+</p><p>
+A Book object provides two dictionaries (name_map and
+name_and_scope_map) and a list (name_obj_list) which allow various
+ways of accessing the Name objects. There is one Name object for
+each NAME record found in the workbook. Name objects have many
+attributes, several of which are relevant only when obj.macro is 1.
+</p><p>
+In the examples directory you will find namesdemo.xls which
+showcases the many different ways that names can be used, and
+xlrdnamesAPIdemo.py which offers 3 different queries for inspecting
+the names in your files, and shows how to extract whatever a name is
+referring to. There is currently one "convenience method",
+Name.cell(), which extracts the value in the case where the name
+refers to a single cell. More convenience methods are planned. The
+source code for Name.cell (in __init__.py) is an extra source of
+information on how the Name attributes hang together.
+</p>
+
+<p><i>Name information is <b>not</b> extracted from files older than
+Excel 5.0 (Book.biff_version &lt; 50)</i></p>
+
+<h3>Formatting</h3>
+
+<h4>Introduction</h4>
+
+<p>This collection of features, new in xlrd version 0.6.1, is intended
+to provide the information needed to (1) display/render spreadsheet contents
+(say) on a screen or in a PDF file, and (2) copy spreadsheet data to another
+file without losing the ability to display/render it.</p>
+
+<h4>The Palette; Colour Indexes</h4>
+
+<p>A colour is represented in Excel as a (red, green, blue) ("RGB") tuple
+with each component in range(256). However it is not possible to access an
+unlimited number of colours; each spreadsheet is limited to a palette of 64 different
+colours (24 in Excel 3.0 and 4.0, 8 in Excel 2.0). Colours are referenced by an index
+("colour index") into this palette.
+
+Colour indexes 0 to 7 represent 8 fixed built-in colours: black, white, red, green, blue,
+yellow, magenta, and cyan.</p><p>
+
+The remaining colours in the palette (8 to 63 in Excel 5.0 and later)
+can be changed by the user. In the Excel 2003 UI, Tools/Options/Color presents a palette
+of 7 rows of 8 colours. The last two rows are reserved for use in charts.<br />
+The correspondence between this grid and the assigned
+colour indexes is NOT left-to-right top-to-bottom.<br />
+Indexes 8 to 15 correspond to changeable
+parallels of the 8 fixed colours -- for example, index 7 is forever cyan;
+index 15 starts off being cyan but can be changed by the user.<br />
+
+The default colour for each index depends on the file version; tables of the defaults
+are available in the source code. If the user changes one or more colours,
+a PALETTE record appears in the XLS file -- it gives the RGB values for *all* changeable
+indexes.<br />
+Note that colours can be used in "number formats": "[CYAN]...." and "[COLOR8]...." refer
+to colour index 7; "[COLOR16]...." will produce cyan
+unless the user changes colour index 15 to something else.<br />
+
+</p><p>In addition, there are several "magic" colour indexes used by Excel:<br />
+0x18 (BIFF3-BIFF4), 0x40 (BIFF5-BIFF8): System window text colour for border lines
+(used in XF, CF, and WINDOW2 records)<br />
+0x19 (BIFF3-BIFF4), 0x41 (BIFF5-BIFF8): System window background colour for pattern background
+(used in XF and CF records )<br />
+0x43: System face colour (dialogue background colour)<br />
+0x4D: System window text colour for chart border lines<br />
+0x4E: System window background colour for chart areas<br />
+0x4F: Automatic colour for chart border lines (seems to be always Black)<br />
+0x50: System ToolTip background colour (used in note objects)<br />
+0x51: System ToolTip text colour (used in note objects)<br />
+0x7FFF: System window text colour for fonts (used in FONT and CF records)<br />
+Note 0x7FFF appears to be the *default* colour index. It appears quite often in FONT
+records.<br />
+
+<h4>Default Formatting</h4>
+
+Default formatting is applied to all empty cells (those not described by a cell record).
+Firstly row default information (ROW record, Rowinfo class) is used if available.
+Failing that, column default information (COLINFO record, Colinfo class) is used if available.
+As a last resort the worksheet/workbook default cell format will be used; this
+should always be present in an Excel file,
+described by the XF record with the fixed index 15 (0-based). By default, it uses the
+worksheet/workbook default cell style, described by the very first XF record (index 0).
+
+<h4> Formatting features not included in xlrd version 0.6.1</h4>
+<ul>
+ <li>Rich text i.e. strings containing partial <b>bold</b> <i>italic</i>
+ and <u>underlined</u> text, change of font inside a string, etc.
+ See OOo docs s3.4 and s3.2</li>
+ <li>Asian phonetic text (known as "ruby"), used for Japanese furigana. See OOo docs
+ s3.4.2 (p15)</li>
+ <li>Conditional formatting. See OOo docs
+ s5.12, s6.21 (CONDFMT record), s6.16 (CF record)</li>
+ <li>Miscellaneous sheet-level and book-level items e.g. printing layout, screen panes. </li>
+ <li>Modern Excel file versions don't keep most of the built-in
+ "number formats" in the file; Excel loads formats according to the
+ user's locale. Currently xlrd's emulation of this is limited to
+ a hard-wired table that applies to the US English locale. This may mean
+ that currency symbols, date order, thousands separator, decimals separator, etc
+ are inappropriate. Note that this does not affect users who are copying XLS
+ files, only those who are visually rendering cells.</li>
+</ul>
+
+<h3>Loading worksheets on demand</h3>
+
+</p><p>This feature, new in version 0.7.1, is governed by the on_demand argument
+to the open_workbook() function and allows saving memory and time by loading
+only those sheets that the caller is interested in, and releasing sheets
+when no longer required.</p>
+
+<p>on_demand=False (default): No change. open_workbook() loads global data
+and all sheets, releases resources no longer required (principally the
+str or mmap object containing the Workbook stream), and returns.</p>
+
+<p>on_demand=True and BIFF version &lt; 5.0: A warning message is emitted,
+on_demand is recorded as False, and the old process is followed.</p>
+
+<p>on_demand=True and BIFF version &gt;= 5.0: open_workbook() loads global
+data and returns without releasing resources. At this stage, the only
+information available about sheets is Book.nsheets and Book.sheet_names().</p>
+
+<p>Book.sheet_by_name() and Book.sheet_by_index() will load the requested
+sheet if it is not already loaded.</p>
+
+<p>Book.sheets() will load all/any unloaded sheets.</p>
+
+<p>The caller may save memory by calling
+Book.unload_sheet(sheet_name_or_index) when finished with the sheet.
+This applies irrespective of the state of on_demand.</p>
+
+<p>The caller may re-load an unloaded sheet by calling Book.sheet_by_xxxx()
+ -- except if those required resources have been released (which will
+have happened automatically when on_demand is false). This is the only
+case where an exception will be raised.</p>
+
+<p>The caller may query the state of a sheet:
+Book.sheet_loaded(sheet_name_or_index) -&gt; a bool</p>
+
+<h2>Module Contents</h2>
+<dl>
+<dt><b>BaseObject</b> (class) [<a href='#biffh.BaseObject-class'>#</a>]</dt>
+<dd>
+<p>Parent of almost all other classes in the package.</p>
+<p>For more information about this class, see <a href='#biffh.BaseObject-class'><i>The BaseObject Class</i></a>.</p>
+</dd>
+<dt><b>Book()</b> (class) [<a href='#__init__.Book-class'>#</a>]</dt>
+<dd>
+<p>Contents of a &quot;workbook&quot;.</p>
+<p>For more information about this class, see <a href='#__init__.Book-class'><i>The Book Class</i></a>.</p>
+</dd>
+<dt><b>Cell(ctype, value, xf_index=None)</b> (class) [<a href='#sheet.Cell-class'>#</a>]</dt>
+<dd>
+<p>Contains the data for one cell.</p>
+<p>For more information about this class, see <a href='#sheet.Cell-class'><i>The Cell Class</i></a>.</p>
+</dd>
+<dt><a id='formula.cellname-function' name='formula.cellname-function'><b>cellname(rowx, colx)</b></a> [<a href='#formula.cellname-function'>#</a>]</dt>
+<dd>
+<p>Utility function: (5, 7) =&gt; 'H6'</p>
+</dd>
+<dt><a id='formula.cellnameabs-function' name='formula.cellnameabs-function'><b>cellnameabs(rowx, colx)</b></a> [<a href='#formula.cellnameabs-function'>#</a>]</dt>
+<dd>
+<p>Utility function: (5, 7) =&gt; '$H$6'</p>
+</dd>
+<dt><b>Colinfo</b> (class) [<a href='#sheet.Colinfo-class'>#</a>]</dt>
+<dd>
+<p>Width and default formatting information that applies to one or
+more columns in a sheet.</p>
+<p>For more information about this class, see <a href='#sheet.Colinfo-class'><i>The Colinfo Class</i></a>.</p>
+</dd>
+<dt><a id='formula.colname-function' name='formula.colname-function'><b>colname(colx)</b></a> [<a href='#formula.colname-function'>#</a>]</dt>
+<dd>
+<p>Utility function: 7 =&gt; 'H', 27 =&gt; 'AB'</p>
+</dd>
+<dt><a id='__init__.count_records-function' name='__init__.count_records-function'><b>count_records(filename, outfile=sys.stdout)</b></a> [<a href='#__init__.count_records-function'>#</a>]</dt>
+<dd>
+<p>For debugging and analysis: summarise the file's BIFF records.
+I.e. produce a sorted file of (record_name, count).</p>
+<dl>
+<dt><i>filename</i></dt>
+<dd>
+The path to the file to be summarised.</dd>
+<dt><i>outfile</i></dt>
+<dd>
+An open file, to which the summary is written.</dd>
+</dl><br />
+</dd>
+<dt><a id='__init__.dump-function' name='__init__.dump-function'><b>dump(filename, outfile=sys.stdout, unnumbered=False)</b></a> [<a href='#__init__.dump-function'>#</a>]</dt>
+<dd>
+<p>For debugging: dump the file's BIFF records in char &amp; hex.
+</p><dl>
+<dt><i>filename</i></dt>
+<dd>
+The path to the file to be dumped.</dd>
+<dt><i>outfile</i></dt>
+<dd>
+An open file, to which the dump is written.</dd>
+<dt><i>unnumbered</i></dt>
+<dd>
+If true, omit offsets (for meaningful diffs).</dd>
+</dl><br />
+</dd>
+<dt><a id='sheet.empty_cell-variable' name='sheet.empty_cell-variable'><b>empty_cell</b></a> (variable) [<a href='#sheet.empty_cell-variable'>#</a>]</dt>
+<dd>
+<p>There is one and only one instance of an empty cell -- it's a singleton. This is it.
+You may use a test like &quot;acell is empty_cell&quot;.</p>
+</dd>
+<dt><b>EqNeAttrs</b> (class) [<a href='#formatting.EqNeAttrs-class'>#</a>]</dt>
+<dd>
+<p>This mixin class exists solely so that Format, Font, and XF....</p>
+<p>For more information about this class, see <a href='#formatting.EqNeAttrs-class'><i>The EqNeAttrs Class</i></a>.</p>
+</dd>
+<dt><a id='biffh.error_text_from_code-variable' name='biffh.error_text_from_code-variable'><b>error_text_from_code</b></a> (variable) [<a href='#biffh.error_text_from_code-variable'>#</a>]</dt>
+<dd>
+<p /><p>This dictionary can be used to produce a text version of the internal codes
+that Excel uses for error cells. Here are its contents:
+<pre>
+0x00: '#NULL!', # Intersection of two cell ranges is empty
+0x07: '#DIV/0!', # Division by zero
+0x0F: '#VALUE!', # Wrong type of operand
+0x17: '#REF!', # Illegal or deleted cell reference
+0x1D: '#NAME?', # Wrong function or range name
+0x24: '#NUM!', # Value range overflow
+0x2A: '#N/A!', # Argument or function not available
+</pre></p>
+</dd>
+<dt><b>Font</b> (class) [<a href='#formatting.Font-class'>#</a>]</dt>
+<dd>
+<p>An Excel &quot;font&quot; contains the details of not only what is normally
+considered a font, but also several other display attributes.</p>
+<p>For more information about this class, see <a href='#formatting.Font-class'><i>The Font Class</i></a>.</p>
+</dd>
+<dt><b>Format(format_key, ty, format_str)</b> (class) [<a href='#formatting.Format-class'>#</a>]</dt>
+<dd>
+<p>&quot;Number format&quot; information from a FORMAT record.</p>
+<p>For more information about this class, see <a href='#formatting.Format-class'><i>The Format Class</i></a>.</p>
+</dd>
+<dt><b>Name</b> (class) [<a href='#__init__.Name-class'>#</a>]</dt>
+<dd>
+<p>Information relating to a named reference, formula, macro, etc.</p>
+<p>For more information about this class, see <a href='#__init__.Name-class'><i>The Name Class</i></a>.</p>
+</dd>
+<dt><a id='__init__.open_workbook-function' name='__init__.open_workbook-function'><b>open_workbook(filename=None,
+logfile=sys.stdout, verbosity=0, pickleable=True, use_mmap=USE_MMAP,
+file_contents=None,
+encoding_override=None,
+formatting_info=False, on_demand=False,
+)</b></a> [<a href='#__init__.open_workbook-function'>#</a>]</dt>
+<dd>
+<p>Open a spreadsheet file for data extraction.</p>
+<dl>
+<dt><i>filename</i></dt>
+<dd>
+The path to the spreadsheet file to be opened.</dd>
+<dt><i>logfile</i></dt>
+<dd>
+An open file to which messages and diagnostics are written.</dd>
+<dt><i>verbosity</i></dt>
+<dd>
+Increases the volume of trace material written to the logfile.</dd>
+<dt><i>pickleable</i></dt>
+<dd>
+Default is true. In Python 2.4 or earlier, setting to false
+will cause use of array.array objects which save some memory but can't be pickled.
+In Python 2.5, array.arrays are used unconditionally. Note: if you have large files that
+you need to read multiple times, it can be much faster to cPickle.dump() the xlrd.Book object
+once, and use cPickle.load() multiple times.</dd>
+<dt><i>use_mmap</i></dt>
+<dd>
+Whether to use the mmap module is determined heuristically.
+Use this arg to override the result. Current heuristic: mmap is used if it exists.</dd>
+<dt><i>file_contents</i></dt>
+<dd>
+... as a string or an mmap.mmap object or some other behave-alike object.
+If file_contents is supplied, filename will not be used, except (possibly) in messages.</dd>
+<dt><i>encoding_override</i></dt>
+<dd>
+Used to overcome missing or bad codepage information
+in older-version files. Refer to discussion in the <b>Unicode</b> section above.
+<br /> -- New in version 0.6.0
+
+</dd>
+<dt><i>formatting_info</i></dt>
+<dd>
+Governs provision of a reference to an XF (eXtended Format) object
+for each cell in the worksheet.
+<br /> Default is <i>False</i>. This is backwards compatible and saves memory.
+"Blank" cells (those with their own formatting information but no data) are treated as empty
+(by ignoring the file's BLANK and MULBLANK records).
+It cuts off any bottom "margin" of rows of empty (and blank) cells and
+any right "margin" of columns of empty (and blank) cells.
+Only cell_value and cell_type are available.
+<br /> <i>True</i> provides all cells, including empty and blank cells.
+XF information is available for each cell.
+<br /> -- New in version 0.6.1
+
+</dd>
+<dt><i>on_demand</i></dt>
+<dd>
+Governs whether sheets are all loaded initially or when demanded
+by the caller. Please refer back to the section &quot;Loading worksheets on demand&quot; for details.
+-- New in version 0.7.1</dd>
+<dt>Returns:</dt>
+<dd>
+An instance of the Book class.</dd>
+</dl><br />
+</dd>
+<dt><b>Operand(akind=None, avalue=None, arank=0, atext='?')</b> (class) [<a href='#formula.Operand-class'>#</a>]</dt>
+<dd>
+<p>Used in evaluating formulas.</p>
+<p>For more information about this class, see <a href='#formula.Operand-class'><i>The Operand Class</i></a>.</p>
+</dd>
+<dt><a id='formula.rangename3d-function' name='formula.rangename3d-function'><b>rangename3d(book, ref3d)</b></a> [<a href='#formula.rangename3d-function'>#</a>]</dt>
+<dd>
+<p>Utility function:
+<br /> Ref3D((1, 4, 5, 20, 7, 10)) =&gt; 'Sheet2:Sheet3!$H$6:$J$20'
+</p></dd>
+<dt><a id='formula.rangename3drel-function' name='formula.rangename3drel-function'><b>rangename3drel(book, ref3d)</b></a> [<a href='#formula.rangename3drel-function'>#</a>]</dt>
+<dd>
+<p>Utility function:
+<br /> Ref3D(coords=(0, 1, -32, -22, -13, 13), relflags=(0, 0, 1, 1, 1, 1))
+=&gt; 'Sheet1![@-13,#-32]:[@+12,#-23]'
+where '@' refers to the current or base column and '#'
+refers to the current or base row.
+</p></dd>
+<dt><b>Ref3D(atuple)</b> (class) [<a href='#formula.Ref3D-class'>#</a>]</dt>
+<dd>
+<p>Represents an absolute or relative 3-dimensional reference to a box
+of one or more cells.</p>
+<p>For more information about this class, see <a href='#formula.Ref3D-class'><i>The Ref3D Class</i></a>.</p>
+</dd>
+<dt><b>Rowinfo</b> (class) [<a href='#sheet.Rowinfo-class'>#</a>]</dt>
+<dd>
+<p>Height and default formatting information that applies to a row in a sheet.</p>
+<p>For more information about this class, see <a href='#sheet.Rowinfo-class'><i>The Rowinfo Class</i></a>.</p>
+</dd>
+<dt><b>Sheet(book, position, name, number)</b> (class) [<a href='#sheet.Sheet-class'>#</a>]</dt>
+<dd>
+<p>Contains the data for one worksheet.</p>
+<p>For more information about this class, see <a href='#sheet.Sheet-class'><i>The Sheet Class</i></a>.</p>
+</dd>
+<dt><b>XF</b> (class) [<a href='#formatting.XF-class'>#</a>]</dt>
+<dd>
+<p>eXtended Formatting information for cells, rows, columns and styles.</p>
+<p>For more information about this class, see <a href='#formatting.XF-class'><i>The XF Class</i></a>.</p>
+</dd>
+<dt><b>XFAlignment</b> (class) [<a href='#formatting.XFAlignment-class'>#</a>]</dt>
+<dd>
+<p>A collection of the alignment and similar attributes of an XF record.</p>
+<p>For more information about this class, see <a href='#formatting.XFAlignment-class'><i>The XFAlignment Class</i></a>.</p>
+</dd>
+<dt><b>XFBackground</b> (class) [<a href='#formatting.XFBackground-class'>#</a>]</dt>
+<dd>
+<p>A collection of the background-related attributes of an XF record.</p>
+<p>For more information about this class, see <a href='#formatting.XFBackground-class'><i>The XFBackground Class</i></a>.</p>
+</dd>
+<dt><b>XFBorder</b> (class) [<a href='#formatting.XFBorder-class'>#</a>]</dt>
+<dd>
+<p>A collection of the border-related attributes of an XF record.</p>
+<p>For more information about this class, see <a href='#formatting.XFBorder-class'><i>The XFBorder Class</i></a>.</p>
+</dd>
+<dt><b>XFProtection</b> (class) [<a href='#formatting.XFProtection-class'>#</a>]</dt>
+<dd>
+<p>A collection of the protection-related attributes of an XF record.</p>
+<p>For more information about this class, see <a href='#formatting.XFProtection-class'><i>The XFProtection Class</i></a>.</p>
+</dd>
+<dt><a id='xldate.xldate_as_tuple-function' name='xldate.xldate_as_tuple-function'><b>xldate_as_tuple(xldate, datemode)</b></a> [<a href='#xldate.xldate_as_tuple-function'>#</a>]</dt>
+<dd>
+<p>Convert an Excel number (presumed to represent a date, a datetime or a time) into
+a tuple suitable for feeding to datetime or mx.DateTime constructors.</p>
+<dl>
+<dt><i>xldate</i></dt>
+<dd>
+The Excel number</dd>
+<dt><i>datemode</i></dt>
+<dd>
+0: 1900-based, 1: 1904-based.
+<br />WARNING: when using this function to
+interpret the contents of a workbook, you should pass in the Book.datemode
+attribute of that workbook. Whether
+the workbook has ever been anywhere near a Macintosh is irrelevant.
+</dd>
+<dt>Returns:</dt>
+<dd>
+Gregorian (year, month, day, hour, minute, nearest_second).
+<br />Special case: if 0.0 &lt;= xldate &lt; 1.0, it is assumed to represent a time;
+(0, 0, 0, hour, minute, second) will be returned.
+<br />Note: 1904-01-01 is not regarded as a valid date in the datemode 1 system; its "serial number"
+is zero.
+</dd>
+<dt>Raises <b>XLDateNegative</b>:</dt><dd>
+xldate &lt; 0.00
+</dd>
+<dt>Raises <b>XLDateAmbiguous</b>:</dt><dd>
+The 1900 leap-year problem (datemode == 0 and 1.0 &lt;= xldate &lt; 61.0)
+</dd>
+<dt>Raises <b>XLDateTooLarge</b>:</dt><dd>
+Gregorian year 10000 or later</dd>
+<dt>Raises <b>XLDateBadDatemode</b>:</dt><dd>
+datemode arg is neither 0 nor 1</dd>
+<dt>Raises <b>XLDateError</b>:</dt><dd>
+Covers the 4 specific errors</dd>
+</dl><br />
+</dd>
+<dt><a id='xldate.xldate_from_date_tuple-function' name='xldate.xldate_from_date_tuple-function'><b>xldate_from_date_tuple((year, month, day), datemode)</b></a> [<a href='#xldate.xldate_from_date_tuple-function'>#</a>]</dt>
+<dd>
+<p>Convert a date tuple (year, month, day) to an Excel date.</p>
+<dl>
+<dt><i>year</i></dt>
+<dd>
+Gregorian year.</dd>
+<dt><i>month</i></dt>
+<dd>
+1 &lt;= month &lt;= 12
+</dd>
+<dt><i>day</i></dt>
+<dd>
+1 &lt;= day &lt;= last day of that (year, month)
+</dd>
+<dt><i>datemode</i></dt>
+<dd>
+0: 1900-based, 1: 1904-based.</dd>
+<dt>Raises <b>XLDateAmbiguous</b>:</dt><dd>
+The 1900 leap-year problem (datemode == 0 and 1.0 &lt;= xldate &lt; 61.0)
+</dd>
+<dt>Raises <b>XLDateBadDatemode</b>:</dt><dd>
+datemode arg is neither 0 nor 1</dd>
+<dt>Raises <b>XLDateBadTuple</b>:</dt><dd>
+(year, month, day) is too early/late or has invalid component(s)</dd>
+<dt>Raises <b>XLDateError</b>:</dt><dd>
+Covers the specific errors</dd>
+</dl><br />
+</dd>
+<dt><a id='xldate.xldate_from_datetime_tuple-function' name='xldate.xldate_from_datetime_tuple-function'><b>xldate_from_datetime_tuple(datetime_tuple, datemode)</b></a> [<a href='#xldate.xldate_from_datetime_tuple-function'>#</a>]</dt>
+<dd>
+<p>Convert a datetime tuple (year, month, day, hour, minute, second) to an Excel date value.
+For more details, refer to other xldate_from_*_tuple functions.</p>
+<dl>
+<dt><i>datetime_tuple</i></dt>
+<dd>
+(year, month, day, hour, minute, second)</dd>
+<dt><i>datemode</i></dt>
+<dd>
+0: 1900-based, 1: 1904-based.</dd>
+</dl><br />
+</dd>
+<dt><a id='xldate.xldate_from_time_tuple-function' name='xldate.xldate_from_time_tuple-function'><b>xldate_from_time_tuple((hour, minute, second))</b></a> [<a href='#xldate.xldate_from_time_tuple-function'>#</a>]</dt>
+<dd>
+<p>Convert a time tuple (hour, minute, second) to an Excel &quot;date&quot; value (fraction of a day).</p>
+<dl>
+<dt><i>hour</i></dt>
+<dd>
+0 &lt;= hour &lt; 24
+</dd>
+<dt><i>minute</i></dt>
+<dd>
+0 &lt;= minute &lt; 60
+</dd>
+<dt><i>second</i></dt>
+<dd>
+0 &lt;= second &lt; 60
+</dd>
+<dt>Raises <b>XLDateBadTuple</b>:</dt><dd>
+Out-of-range hour, minute, or second</dd>
+</dl><br />
+</dd>
+</dl>
+<h2><a id='biffh.BaseObject-class' name='biffh.BaseObject-class'>The BaseObject Class</a></h2>
+<dl>
+<dt><b>BaseObject</b> (class) [<a href='#biffh.BaseObject-class'>#</a>]</dt>
+<dd>
+<p>Parent of almost all other classes in the package. Defines a common &quot;dump&quot; method
+for debugging.</p>
+</dd>
+<dt><a id='biffh.BaseObject.dump-method' name='biffh.BaseObject.dump-method'><b>dump(f=None, header=None, footer=None, indent=0)</b></a> [<a href='#biffh.BaseObject.dump-method'>#</a>]</dt>
+<dd>
+<dl>
+<dt><i>f</i></dt>
+<dd>
+open file object, to which the dump is written</dd>
+<dt><i>header</i></dt>
+<dd>
+text to write before the dump</dd>
+<dt><i>footer</i></dt>
+<dd>
+text to write after the dump</dd>
+<dt><i>indent</i></dt>
+<dd>
+number of leading spaces (for recursive calls)</dd>
+</dl><br />
+</dd>
+</dl>
+<h2><a id='__init__.Book-class' name='__init__.Book-class'>The Book Class</a></h2>
+<dl>
+<dt><b>Book()</b> (class) [<a href='#__init__.Book-class'>#</a>]</dt>
+<dd>
+<p>Contents of a "workbook".
+</p><p>WARNING: You don't call this class yourself. You use the Book object that
+was returned when you called xlrd.open_workbook("myfile.xls").</p>
+</dd>
+<dt><a id='__init__.Book.biff_version-attribute' name='__init__.Book.biff_version-attribute'><b>biff_version</b></a> [<a href='#__init__.Book.biff_version-attribute'>#</a>]</dt>
+<dd>
+<p>Version of BIFF (Binary Interchange File Format) used to create the file.
+Latest is 8.0 (represented here as 80), introduced with Excel 97.
+Earliest supported by this module: 2.0 (represented as 20).</p>
+</dd>
+<dt><a id='__init__.Book.codepage-attribute' name='__init__.Book.codepage-attribute'><b>codepage</b></a> [<a href='#__init__.Book.codepage-attribute'>#</a>]</dt>
+<dd>
+<p>An integer denoting the character set used for strings in this file.
+For BIFF 8 and later, this will be 1200, meaning Unicode; more precisely, UTF_16_LE.
+For earlier versions, this is used to derive the appropriate Python encoding
+to be used to convert to Unicode.
+Examples: 1252 -&gt; 'cp1252', 10000 -&gt; 'mac_roman'</p>
+</dd>
+<dt><a id='__init__.Book.colour_map-attribute' name='__init__.Book.colour_map-attribute'><b>colour_map</b></a> [<a href='#__init__.Book.colour_map-attribute'>#</a>]</dt>
+<dd>
+<p>This provides definitions for colour indexes. Please refer to the
+above section "The Palette; Colour Indexes" for an explanation
+of how colours are represented in Excel.<br />
+Colour indexes into the palette map into (red, green, blue) tuples.
+"Magic" indexes e.g. 0x7FFF map to None.
+<i>colour_map</i> is what you need if you want to render cells on screen or in a PDF
+file. If you are writing an output XLS file, use <i>palette_record</i>.
+<br /> -- New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True)
+</p></dd>
+<dt><a id='__init__.Book.countries-attribute' name='__init__.Book.countries-attribute'><b>countries</b></a> [<a href='#__init__.Book.countries-attribute'>#</a>]</dt>
+<dd>
+<p>A tuple containing the (telephone system) country code for:<br />
+ [0]: the user-interface setting when the file was created.<br />
+ [1]: the regional settings.<br />
+Example: (1, 61) meaning (USA, Australia).
+This information may give a clue to the correct encoding for an unknown codepage.
+For a long list of observed values, refer to the OpenOffice.org documentation for
+the COUNTRY record.
+</p></dd>
+<dt><a id='__init__.Book.datemode-attribute' name='__init__.Book.datemode-attribute'><b>datemode</b></a> [<a href='#__init__.Book.datemode-attribute'>#</a>]</dt>
+<dd>
+<p>Which date system was in force when this file was last saved.<br />
+ 0 =&gt; 1900 system (the Excel for Windows default).<br />
+ 1 =&gt; 1904 system (the Excel for Macintosh default).<br />
+</p></dd>
+<dt><a id='__init__.Book.encoding-attribute' name='__init__.Book.encoding-attribute'><b>encoding</b></a> [<a href='#__init__.Book.encoding-attribute'>#</a>]</dt>
+<dd>
+<p>The encoding that was derived from the codepage.</p>
+</dd>
+<dt><a id='__init__.Book.font_list-attribute' name='__init__.Book.font_list-attribute'><b>font_list</b></a> [<a href='#__init__.Book.font_list-attribute'>#</a>]</dt>
+<dd>
+<p>A list of Font class instances, each corresponding to a FONT record.
+<br /> -- New in version 0.6.1
+</p></dd>
+<dt><a id='__init__.Book.format_list-attribute' name='__init__.Book.format_list-attribute'><b>format_list</b></a> [<a href='#__init__.Book.format_list-attribute'>#</a>]</dt>
+<dd>
+<p>A list of Format objects, each corresponding to a FORMAT record, in
+the order that they appear in the input file.
+It does <i>not</i> contain builtin formats.
+If you are creating an output file using (for example) pyExcelerator,
+use this list.
+The collection to be used for all visual rendering purposes is format_map.
+<br /> -- New in version 0.6.1
+</p></dd>
+<dt><a id='__init__.Book.format_map-attribute' name='__init__.Book.format_map-attribute'><b>format_map</b></a> [<a href='#__init__.Book.format_map-attribute'>#</a>]</dt>
+<dd>
+<p>The mapping from XF.format_key to Format object.
+<br /> -- New in version 0.6.1
+</p></dd>
+<dt><a id='__init__.Book.load_time_stage_1-attribute' name='__init__.Book.load_time_stage_1-attribute'><b>load_time_stage_1</b></a> [<a href='#__init__.Book.load_time_stage_1-attribute'>#</a>]</dt>
+<dd>
+<p>Time in seconds to extract the XLS image as a contiguous string (or mmap equivalent).</p>
+</dd>
+<dt><a id='__init__.Book.load_time_stage_2-attribute' name='__init__.Book.load_time_stage_2-attribute'><b>load_time_stage_2</b></a> [<a href='#__init__.Book.load_time_stage_2-attribute'>#</a>]</dt>
+<dd>
+<p>Time in seconds to parse the data from the contiguous string (or mmap equivalent).</p>
+</dd>
+<dt><a id='__init__.Book.name_and_scope_map-attribute' name='__init__.Book.name_and_scope_map-attribute'><b>name_and_scope_map</b></a> [<a href='#__init__.Book.name_and_scope_map-attribute'>#</a>]</dt>
+<dd>
+<p>A mapping from (lower_case_name, scope) to a single Name object.
+<br /> -- New in version 0.6.0
+</p></dd>
+<dt><a id='__init__.Book.name_map-attribute' name='__init__.Book.name_map-attribute'><b>name_map</b></a> [<a href='#__init__.Book.name_map-attribute'>#</a>]</dt>
+<dd>
+<p>A mapping from lower_case_name to a list of Name objects. The list is
+sorted in scope order. Typically there will be one item (of global scope)
+in the list.
+<br /> -- New in version 0.6.0
+</p></dd>
+<dt><a id='__init__.Book.name_obj_list-attribute' name='__init__.Book.name_obj_list-attribute'><b>name_obj_list</b></a> [<a href='#__init__.Book.name_obj_list-attribute'>#</a>]</dt>
+<dd>
+<p>List containing a Name object for each NAME record in the workbook.
+<br /> -- New in version 0.6.0
+</p></dd>
+<dt><a id='__init__.Book.nsheets-attribute' name='__init__.Book.nsheets-attribute'><b>nsheets</b></a> [<a href='#__init__.Book.nsheets-attribute'>#</a>]</dt>
+<dd>
+<p>The number of worksheets present in the workbook file.
+This information is available even when no sheets have yet been loaded.</p>
+</dd>
+<dt><a id='__init__.Book.palette_record-attribute' name='__init__.Book.palette_record-attribute'><b>palette_record</b></a> [<a href='#__init__.Book.palette_record-attribute'>#</a>]</dt>
+<dd>
+<p>If the user has changed any of the colours in the standard palette, the XLS
+file will contain a PALETTE record with 56 (16 for Excel 4.0 and earlier)
+RGB values in it, and this list will be e.g. [(r0, b0, g0), ..., (r55, b55, g55)].
+Otherwise this list will be empty. This is what you need if you are
+writing an output XLS file. If you want to render cells on screen or in a PDF
+file, use colour_map.
+<br /> -- New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True)
+</p></dd>
+<dt><a id='__init__.Book.sheet_by_index-method' name='__init__.Book.sheet_by_index-method'><b>sheet_by_index(sheetx)</b></a> [<a href='#__init__.Book.sheet_by_index-method'>#</a>]</dt>
+<dd>
+<dl>
+<dt><i>sheetx</i></dt>
+<dd>
+Sheet index in range(nsheets)</dd>
+<dt>Returns:</dt>
+<dd>
+An object of the Sheet class</dd>
+</dl><br />
+</dd>
+<dt><a id='__init__.Book.sheet_by_name-method' name='__init__.Book.sheet_by_name-method'><b>sheet_by_name(sheet_name)</b></a> [<a href='#__init__.Book.sheet_by_name-method'>#</a>]</dt>
+<dd>
+<dl>
+<dt><i>sheet_name</i></dt>
+<dd>
+Name of sheet required</dd>
+<dt>Returns:</dt>
+<dd>
+An object of the Sheet class</dd>
+</dl><br />
+</dd>
+<dt><a id='__init__.Book.sheet_loaded-method' name='__init__.Book.sheet_loaded-method'><b>sheet_loaded(sheet_name_or_index)</b></a> [<a href='#__init__.Book.sheet_loaded-method'>#</a>]</dt>
+<dd>
+<dl>
+<dt><i>sheet_name_or_index</i></dt>
+<dd>
+Name or index of sheet enquired upon</dd>
+<dt>Returns:</dt>
+<dd>
+true if sheet is loaded, false otherwise
+<br /> -- New in version 0.7.1
+</dd>
+</dl><br />
+</dd>
+<dt><a id='__init__.Book.sheet_names-method' name='__init__.Book.sheet_names-method'><b>sheet_names()</b></a> [<a href='#__init__.Book.sheet_names-method'>#</a>]</dt>
+<dd>
+<dl>
+<dt>Returns:</dt>
+<dd>
+A list of the names of all the worksheets in the workbook file.
+This information is available even when no sheets have yet been loaded.</dd>
+</dl><br />
+</dd>
+<dt><a id='__init__.Book.sheets-method' name='__init__.Book.sheets-method'><b>sheets()</b></a> [<a href='#__init__.Book.sheets-method'>#</a>]</dt>
+<dd>
+<dl>
+<dt>Returns:</dt>
+<dd>
+A list of all sheets in the book.
+All sheets not already loaded will be loaded.</dd>
+</dl><br />
+</dd>
+<dt><a id='__init__.Book.style_name_map-attribute' name='__init__.Book.style_name_map-attribute'><b>style_name_map</b></a> [<a href='#__init__.Book.style_name_map-attribute'>#</a>]</dt>
+<dd>
+<p>This provides access via name to the extended format information for
+both built-in styles and user-defined styles.<br />
+It maps <i>name</i> to (<i>built_in</i>, <i>xf_index</i>), where:<br />
+<i>name</i> is either the name of a user-defined style,
+or the name of one of the built-in styles. Known built-in names are
+Normal, RowLevel_1 to RowLevel_7,
+ColLevel_1 to ColLevel_7, Comma, Currency, Percent, "Comma [0]",
+"Currency [0]", Hyperlink, and "Followed Hyperlink".<br />
+<i>built_in</i> 1 = built-in style, 0 = user-defined<br />
+<i>xf_index</i> is an index into Book.xf_list.<br />
+References: OOo docs s6.99 (STYLE record); Excel UI Format/Style
+<br /> -- New in version 0.6.1
+</p></dd>
+<dt><a id='__init__.Book.unload_sheet-method' name='__init__.Book.unload_sheet-method'><b>unload_sheet(sheet_name_or_index)</b></a> [<a href='#__init__.Book.unload_sheet-method'>#</a>]</dt>
+<dd>
+<dl>
+<dt><i>sheet_name_or_index</i></dt>
+<dd>
+Name or index of sheet to be unloaded.
+<br /> -- New in version 0.7.1
+</dd>
+</dl><br />
+</dd>
+<dt><a id='__init__.Book.user_name-attribute' name='__init__.Book.user_name-attribute'><b>user_name</b></a> [<a href='#__init__.Book.user_name-attribute'>#</a>]</dt>
+<dd>
+<p>What (if anything) is recorded as the name of the last user to save the file.</p>
+</dd>
+<dt><a id='__init__.Book.xf_list-attribute' name='__init__.Book.xf_list-attribute'><b>xf_list</b></a> [<a href='#__init__.Book.xf_list-attribute'>#</a>]</dt>
+<dd>
+<p>A list of XF class instances, each corresponding to an XF record.
+<br /> -- New in version 0.6.1
+</p></dd>
+</dl>
+<h2><a id='sheet.Cell-class' name='sheet.Cell-class'>The Cell Class</a></h2>
+<dl>
+<dt><b>Cell(ctype, value, xf_index=None)</b> (class) [<a href='#sheet.Cell-class'>#</a>]</dt>
+<dd>
+<p /><p>Contains the data for one cell.</p>
+
+<p>WARNING: You don't call this class yourself. You access Cell objects
+via methods of the Sheet object(s) that you found in the Book object that
+was returned when you called xlrd.open_workbook("myfile.xls").</p>
+<p> Cell objects have three attributes: <i>ctype</i> is an int, <i>value</i>
+(which depends on <i>ctype</i>) and <i>xf_index</i>.
+If "formatting_info" is not enabled when the workbook is opened, xf_index will be None.
+The following table describes the types of cells and how their values
+are represented in Python.</p>
+
+<table border="1" cellpadding="7">
+<tr>
+<th>Type symbol</th>
+<th>Type number</th>
+<th>Python value</th>
+</tr>
+<tr>
+<td>XL_CELL_EMPTY</td>
+<td align="center">0</td>
+<td>empty string u''</td>
+</tr>
+<tr>
+<td>XL_CELL_TEXT</td>
+<td align="center">1</td>
+<td>a Unicode string</td>
+</tr>
+<tr>
+<td>XL_CELL_NUMBER</td>
+<td align="center">2</td>
+<td>float</td>
+</tr>
+<tr>
+<td>XL_CELL_DATE</td>
+<td align="center">3</td>
+<td>float</td>
+</tr>
+<tr>
+<td>XL_CELL_BOOLEAN</td>
+<td align="center">4</td>
+<td>int; 1 means TRUE, 0 means FALSE</td>
+</tr>
+<tr>
+<td>XL_CELL_ERROR</td>
+<td align="center">5</td>
+<td>int representing internal Excel codes; for a text representation,
+refer to the supplied dictionary error_text_from_code</td>
+</tr>
+<tr>
+<td>XL_CELL_BLANK</td>
+<td align="center">6</td>
+<td>empty string u''. Note: this type will appear only when
+open_workbook(..., formatting_info=True) is used.</td>
+</tr>
+</table>
+<p />
+</dd>
+</dl>
+<h2><a id='sheet.Colinfo-class' name='sheet.Colinfo-class'>The Colinfo Class</a></h2>
+<dl>
+<dt><b>Colinfo</b> (class) [<a href='#sheet.Colinfo-class'>#</a>]</dt>
+<dd>
+<p>Width and default formatting information that applies to one or
+more columns in a sheet. Derived from COLINFO records.
+
+</p><p> Here is the default hierarchy for width, according to the OOo docs:
+
+<br />"""In BIFF3, if a COLINFO record is missing for a column,
+the width specified in the record DEFCOLWIDTH is used instead.
+
+<br />In BIFF4-BIFF7, the width set in this [COLINFO] record is only used,
+if the corresponding bit for this column is cleared in the GCW
+record, otherwise the column width set in the DEFCOLWIDTH record
+is used (the STANDARDWIDTH record is always ignored in this case [see footnote!]).
+
+<br />In BIFF8, if a COLINFO record is missing for a column,
+the width specified in the record STANDARDWIDTH is used.
+If this [STANDARDWIDTH] record is also missing,
+the column width of the record DEFCOLWIDTH is used instead."""
+<br />
+
+Footnote: The docs on the GCW record say this:
+"""<br />
+If a bit is set, the corresponding column uses the width set in the STANDARDWIDTH
+record. If a bit is cleared, the corresponding column uses the width set in the
+COLINFO record for this column.
+<br />If a bit is set, and the worksheet does not contain the STANDARDWIDTH record, or if
+the bit is cleared, and the worksheet does not contain the COLINFO record, the DEFCOLWIDTH
+record of the worksheet will be used instead.
+<br />"""<br />
+At the moment (2007-01-17) xlrd is going with the GCW version of the story.
+Reference to the source may be useful: see the computed_column_width(colx) method
+of the Sheet class.
+<br />-- New in version 0.6.1
+</p>
+</dd>
+<dt><a id='sheet.Colinfo.bit1_flag-attribute' name='sheet.Colinfo.bit1_flag-attribute'><b>bit1_flag</b></a> [<a href='#sheet.Colinfo.bit1_flag-attribute'>#</a>]</dt>
+<dd>
+<p>Value of a 1-bit flag whose purpose is unknown
+but is often seen set to 1</p>
+</dd>
+<dt><a id='sheet.Colinfo.collapsed-attribute' name='sheet.Colinfo.collapsed-attribute'><b>collapsed</b></a> [<a href='#sheet.Colinfo.collapsed-attribute'>#</a>]</dt>
+<dd>
+<p>1 = column is collapsed</p>
+</dd>
+<dt><a id='sheet.Colinfo.hidden-attribute' name='sheet.Colinfo.hidden-attribute'><b>hidden</b></a> [<a href='#sheet.Colinfo.hidden-attribute'>#</a>]</dt>
+<dd>
+<p>1 = column is hidden</p>
+</dd>
+<dt><a id='sheet.Colinfo.outline_level-attribute' name='sheet.Colinfo.outline_level-attribute'><b>outline_level</b></a> [<a href='#sheet.Colinfo.outline_level-attribute'>#</a>]</dt>
+<dd>
+<p>Outline level of the column, in range(7).
+(0 = no outline)</p>
+</dd>
+<dt><a id='sheet.Colinfo.width-attribute' name='sheet.Colinfo.width-attribute'><b>width</b></a> [<a href='#sheet.Colinfo.width-attribute'>#</a>]</dt>
+<dd>
+<p>Width of the column in 1/256 of the width of the zero character,
+using default font (first FONT record in the file).</p>
+</dd>
+<dt><a id='sheet.Colinfo.xf_index-attribute' name='sheet.Colinfo.xf_index-attribute'><b>xf_index</b></a> [<a href='#sheet.Colinfo.xf_index-attribute'>#</a>]</dt>
+<dd>
+<p>XF index to be used for formatting empty cells.</p>
+</dd>
+</dl>
+<h2><a id='formatting.EqNeAttrs-class' name='formatting.EqNeAttrs-class'>The EqNeAttrs Class</a></h2>
+<dl>
+<dt><b>EqNeAttrs</b> (class) [<a href='#formatting.EqNeAttrs-class'>#</a>]</dt>
+<dd>
+<p>This mixin class exists solely so that Format, Font, and XF.... objects
+can be compared by value of their attributes.</p>
+</dd>
+</dl>
+<h2><a id='formatting.Font-class' name='formatting.Font-class'>The Font Class</a></h2>
+<dl>
+<dt><b>Font</b> (class) [<a href='#formatting.Font-class'>#</a>]</dt>
+<dd>
+<p>An Excel "font" contains the details of not only what is normally
+considered a font, but also several other display attributes.
+Items correspond to those in the Excel UI's Format/Cells/Font tab.
+<br /> -- New in version 0.6.1
+</p></dd>
+<dt><a id='formatting.Font.bold-attribute' name='formatting.Font.bold-attribute'><b>bold</b></a> [<a href='#formatting.Font.bold-attribute'>#</a>]</dt>
+<dd>
+<p>1 = Characters are bold. Redundant; see &quot;weight&quot; attribute.</p>
+</dd>
+<dt><a id='formatting.Font.character_set-attribute' name='formatting.Font.character_set-attribute'><b>character_set</b></a> [<a href='#formatting.Font.character_set-attribute'>#</a>]</dt>
+<dd>
+<p>Values: 0 = ANSI Latin, 1 = System default, 2 = Symbol,
+77 = Apple Roman,
+128 = ANSI Japanese Shift-JIS,
+129 = ANSI Korean (Hangul),
+130 = ANSI Korean (Johab),
+134 = ANSI Chinese Simplified GBK,
+136 = ANSI Chinese Traditional BIG5,
+161 = ANSI Greek,
+162 = ANSI Turkish,
+163 = ANSI Vietnamese,
+177 = ANSI Hebrew,
+178 = ANSI Arabic,
+186 = ANSI Baltic,
+204 = ANSI Cyrillic,
+222 = ANSI Thai,
+238 = ANSI Latin II (Central European),
+255 = OEM Latin I</p>
+</dd>
+<dt><a id='formatting.Font.colour_index-attribute' name='formatting.Font.colour_index-attribute'><b>colour_index</b></a> [<a href='#formatting.Font.colour_index-attribute'>#</a>]</dt>
+<dd>
+<p>An explanation of &quot;colour index&quot; is given in the Formatting
+section at the start of this document.</p>
+</dd>
+<dt><a id='formatting.Font.escapement-attribute' name='formatting.Font.escapement-attribute'><b>escapement</b></a> [<a href='#formatting.Font.escapement-attribute'>#</a>]</dt>
+<dd>
+<p>1 = Superscript, 2 = Subscript.</p>
+</dd>
+<dt><a id='formatting.Font.family-attribute' name='formatting.Font.family-attribute'><b>family</b></a> [<a href='#formatting.Font.family-attribute'>#</a>]</dt>
+<dd>
+<p>0 = None (unknown or don't care)<br />
+1 = Roman (variable width, serifed)<br />
+2 = Swiss (variable width, sans-serifed)<br />
+3 = Modern (fixed width, serifed or sans-serifed)<br />
+4 = Script (cursive)<br />
+5 = Decorative (specialised, for example Old English, Fraktur)
+</p></dd>
+<dt><a id='formatting.Font.font_index-attribute' name='formatting.Font.font_index-attribute'><b>font_index</b></a> [<a href='#formatting.Font.font_index-attribute'>#</a>]</dt>
+<dd>
+<p>The 0-based index used to refer to this Font() instance.
+Note that index 4 is never used; xlrd supplies a dummy place-holder.</p>
+</dd>
+<dt><a id='formatting.Font.height-attribute' name='formatting.Font.height-attribute'><b>height</b></a> [<a href='#formatting.Font.height-attribute'>#</a>]</dt>
+<dd>
+<p>Height of the font (in twips). A twip = 1/20 of a point.</p>
+</dd>
+<dt><a id='formatting.Font.italic-attribute' name='formatting.Font.italic-attribute'><b>italic</b></a> [<a href='#formatting.Font.italic-attribute'>#</a>]</dt>
+<dd>
+<p>1 = Characters are italic.</p>
+</dd>
+<dt><a id='formatting.Font.name-attribute' name='formatting.Font.name-attribute'><b>name</b></a> [<a href='#formatting.Font.name-attribute'>#</a>]</dt>
+<dd>
+<p>The name of the font. Example: u&quot;Arial&quot;</p>
+</dd>
+<dt><a id='formatting.Font.outline-attribute' name='formatting.Font.outline-attribute'><b>outline</b></a> [<a href='#formatting.Font.outline-attribute'>#</a>]</dt>
+<dd>
+<p>1 = Font is outline style (Macintosh only)</p>
+</dd>
+<dt><a id='formatting.Font.shadow-attribute' name='formatting.Font.shadow-attribute'><b>shadow</b></a> [<a href='#formatting.Font.shadow-attribute'>#</a>]</dt>
+<dd>
+<p>1 = Font is shadow style (Macintosh only)</p>
+</dd>
+<dt><a id='formatting.Font.struck_out-attribute' name='formatting.Font.struck_out-attribute'><b>struck_out</b></a> [<a href='#formatting.Font.struck_out-attribute'>#</a>]</dt>
+<dd>
+<p>1 = Characters are struck out.</p>
+</dd>
+<dt><a id='formatting.Font.underline_type-attribute' name='formatting.Font.underline_type-attribute'><b>underline_type</b></a> [<a href='#formatting.Font.underline_type-attribute'>#</a>]</dt>
+<dd>
+<p>0 = None<br />
+1 = Single; 0x21 (33) = Single accounting<br />
+2 = Double; 0x22 (34) = Double accounting
+</p></dd>
+<dt><a id='formatting.Font.underlined-attribute' name='formatting.Font.underlined-attribute'><b>underlined</b></a> [<a href='#formatting.Font.underlined-attribute'>#</a>]</dt>
+<dd>
+<p>1 = Characters are underlined. Redundant; see &quot;underline_type&quot; attribute.</p>
+</dd>
+<dt><a id='formatting.Font.weight-attribute' name='formatting.Font.weight-attribute'><b>weight</b></a> [<a href='#formatting.Font.weight-attribute'>#</a>]</dt>
+<dd>
+<p>Font weight (100-1000). Standard values are 400 for normal text
+and 700 for bold text.</p>
+</dd>
+</dl>
+<h2><a id='formatting.Format-class' name='formatting.Format-class'>The Format Class</a></h2>
+<dl>
+<dt><b>Format(format_key, ty, format_str)</b> (class) [<a href='#formatting.Format-class'>#</a>]</dt>
+<dd>
+<p>"Number format" information from a FORMAT record.
+<br /> -- New in version 0.6.1
+</p></dd>
+<dt><a id='formatting.Format.format_key-attribute' name='formatting.Format.format_key-attribute'><b>format_key</b></a> [<a href='#formatting.Format.format_key-attribute'>#</a>]</dt>
+<dd>
+<p>The key into Book.format_map</p>
+</dd>
+<dt><a id='formatting.Format.format_str-attribute' name='formatting.Format.format_str-attribute'><b>format_str</b></a> [<a href='#formatting.Format.format_str-attribute'>#</a>]</dt>
+<dd>
+<p>The format string</p>
+</dd>
+<dt><a id='formatting.Format.type-attribute' name='formatting.Format.type-attribute'><b>type</b></a> [<a href='#formatting.Format.type-attribute'>#</a>]</dt>
+<dd>
+<p>A classification that has been inferred from the format string.
+Currently, this is used only to distinguish between numbers and dates.
+<br />Values:
+<br />FUN = 0 # unknown
+<br />FDT = 1 # date
+<br />FNU = 2 # number
+<br />FGE = 3 # general
+<br />FTX = 4 # text
+</p></dd>
+</dl>
+<h2><a id='__init__.Name-class' name='__init__.Name-class'>The Name Class</a></h2>
+<dl>
+<dt><b>Name</b> (class) [<a href='#__init__.Name-class'>#</a>]</dt>
+<dd>
+<p>Information relating to a named reference, formula, macro, etc.
+<br /> -- New in version 0.6.0
+<br /> -- <i>Name information is <b>not</b> extracted from files older than
+Excel 5.0 (Book.biff_version &lt; 50)</i>
+</p></dd>
+<dt><a id='__init__.Name.area2d-method' name='__init__.Name.area2d-method'><b>area2d(clipped=True)</b></a> [<a href='#__init__.Name.area2d-method'>#</a>]</dt>
+<dd>
+<p>This is a convenience method for the use case where the name
+refers to one rectangular area in one worksheet.</p>
+<dl>
+<dt><i>clipped</i></dt>
+<dd>
+If true (the default), the returned rectangle is clipped
+to fit in (0, sheet.nrows, 0, sheet.ncols) -- it is guaranteed that
+0 &lt;= rowxlo &lt;= rowxhi &lt;= sheet.nrows and that the number of usable rows
+in the area (which may be zero) is rowxhi - rowxlo; likewise for columns.
+</dd>
+<dt>Returns:</dt>
+<dd>
+a tuple (sheet_object, rowxlo, rowxhi, colxlo, colxhi).</dd>
+<dt>Raises <b>XLRDError</b>:</dt><dd>
+The name is not a constant absolute reference
+to a single area in a single sheet.</dd>
+</dl><br />
+</dd>
+<dt><a id='__init__.Name.binary-attribute' name='__init__.Name.binary-attribute'><b>binary</b></a> [<a href='#__init__.Name.binary-attribute'>#</a>]</dt>
+<dd>
+<p>0 = Formula definition; 1 = Binary data<br /> <i>No examples have been sighted.</i>
+</p></dd>
+<dt><a id='__init__.Name.builtin-attribute' name='__init__.Name.builtin-attribute'><b>builtin</b></a> [<a href='#__init__.Name.builtin-attribute'>#</a>]</dt>
+<dd>
+<p>0 = User-defined name; 1 = Built-in name
+(common examples: Print_Area, Print_Titles; see OOo docs for full list)</p>
+</dd>
+<dt><a id='__init__.Name.cell-method' name='__init__.Name.cell-method'><b>cell()</b></a> [<a href='#__init__.Name.cell-method'>#</a>]</dt>
+<dd>
+<p>This is a convenience method for the frequent use case where the name
+refers to a single cell.</p>
+<dl>
+<dt>Returns:</dt>
+<dd>
+An instance of the Cell class.</dd>
+<dt>Raises <b>XLRDError</b>:</dt><dd>
+The name is not a constant absolute reference
+to a single cell.</dd>
+</dl><br />
+</dd>
+<dt><a id='__init__.Name.complex-attribute' name='__init__.Name.complex-attribute'><b>complex</b></a> [<a href='#__init__.Name.complex-attribute'>#</a>]</dt>
+<dd>
+<p>0 = Simple formula; 1 = Complex formula (array formula or user defined)<br />
+<i>No examples have been sighted.</i>
+</p></dd>
+<dt><a id='__init__.Name.func-attribute' name='__init__.Name.func-attribute'><b>func</b></a> [<a href='#__init__.Name.func-attribute'>#</a>]</dt>
+<dd>
+<p>0 = Command macro; 1 = Function macro. Relevant only if macro == 1</p>
+</dd>
+<dt><a id='__init__.Name.funcgroup-attribute' name='__init__.Name.funcgroup-attribute'><b>funcgroup</b></a> [<a href='#__init__.Name.funcgroup-attribute'>#</a>]</dt>
+<dd>
+<p>Function group. Relevant only if macro == 1; see OOo docs for values.</p>
+</dd>
+<dt><a id='__init__.Name.hidden-attribute' name='__init__.Name.hidden-attribute'><b>hidden</b></a> [<a href='#__init__.Name.hidden-attribute'>#</a>]</dt>
+<dd>
+<p>0 = Visible; 1 = Hidden</p>
+</dd>
+<dt><a id='__init__.Name.macro-attribute' name='__init__.Name.macro-attribute'><b>macro</b></a> [<a href='#__init__.Name.macro-attribute'>#</a>]</dt>
+<dd>
+<p>0 = Standard name; 1 = Macro name</p>
+</dd>
+<dt><a id='__init__.Name.name-attribute' name='__init__.Name.name-attribute'><b>name</b></a> [<a href='#__init__.Name.name-attribute'>#</a>]</dt>
+<dd>
+<p>A Unicode string. If builtin, decoded as per OOo docs.</p>
+</dd>
+<dt><a id='__init__.Name.name_index-attribute' name='__init__.Name.name_index-attribute'><b>name_index</b></a> [<a href='#__init__.Name.name_index-attribute'>#</a>]</dt>
+<dd>
+<p>The index of this object in book.name_obj_list</p>
+</dd>
+<dt><a id='__init__.Name.raw_formula-attribute' name='__init__.Name.raw_formula-attribute'><b>raw_formula</b></a> [<a href='#__init__.Name.raw_formula-attribute'>#</a>]</dt>
+<dd>
+<p>An 8-bit string.</p>
+</dd>
+<dt><a id='__init__.Name.result-attribute' name='__init__.Name.result-attribute'><b>result</b></a> [<a href='#__init__.Name.result-attribute'>#</a>]</dt>
+<dd>
+<p>The result of evaluating the formula, if any.
+If no formula, or evaluation of the formula encountered problems,
+the result is None. Otherwise the result is a single instance of the
+Operand class.</p>
+</dd>
+<dt><a id='__init__.Name.scope-attribute' name='__init__.Name.scope-attribute'><b>scope</b></a> [<a href='#__init__.Name.scope-attribute'>#</a>]</dt>
+<dd>
+<p>-1: The name is global (visible in all calculation sheets).<br />
+-2: The name belongs to a macro sheet or VBA sheet.<br />
+-3: The name is invalid.<br />
+0 &lt;= scope &lt; book.nsheets: The name is local to the sheet whose index is scope.
+</p></dd>
+<dt><a id='__init__.Name.vbasic-attribute' name='__init__.Name.vbasic-attribute'><b>vbasic</b></a> [<a href='#__init__.Name.vbasic-attribute'>#</a>]</dt>
+<dd>
+<p>0 = Sheet macro; 1 = VisualBasic macro. Relevant only if macro == 1</p>
+</dd>
+</dl>
+<h2><a id='formula.Operand-class' name='formula.Operand-class'>The Operand Class</a></h2>
+<dl>
+<dt><b>Operand(akind=None, avalue=None, arank=0, atext='?')</b> (class) [<a href='#formula.Operand-class'>#</a>]</dt>
+<dd>
+<p>Used in evaluating formulas.
+The following table describes the kinds and how their values
+are represented.</p>
+
+<table border="1" cellpadding="7">
+<tr>
+<th>Kind symbol</th>
+<th>Kind number</th>
+<th>Value representation</th>
+</tr>
+<tr>
+<td>oBOOL</td>
+<td align="center">3</td>
+<td>integer: 0 =&gt; False; 1 =&gt; True</td>
+</tr>
+<tr>
+<td>oERR</td>
+<td align="center">4</td>
+<td>None, or an int error code (same as XL_CELL_ERROR in the Cell class).
+</td>
+</tr>
+<tr>
+<td>oMSNG</td>
+<td align="center">5</td>
+<td>Used by Excel as a placeholder for a missing (not supplied) function
+argument. Should *not* appear as a final formula result. Value is None.</td>
+</tr>
+<tr>
+<td>oNUM</td>
+<td align="center">2</td>
+<td>A float. Note that there is no way of distinguishing dates.</td>
+</tr>
+<tr>
+<td>oREF</td>
+<td align="center">-1</td>
+<td>The value is either None or a non-empty list of
+absolute Ref3D instances.<br />
+</td>
+</tr>
+<tr>
+<td>oREL</td>
+<td align="center">-2</td>
+<td>The value is None or a non-empty list of
+fully or partially relative Ref3D instances.
+</td>
+</tr>
+<tr>
+<td>oSTRG</td>
+<td align="center">1</td>
+<td>A Unicode string.</td>
+</tr>
+<tr>
+<td>oUNK</td>
+<td align="center">0</td>
+<td>The kind is unknown or ambiguous. The value is None</td>
+</tr>
+</table>
+<p />
+</dd>
+<dt><a id='formula.Operand.kind-attribute' name='formula.Operand.kind-attribute'><b>kind</b></a> [<a href='#formula.Operand.kind-attribute'>#</a>]</dt>
+<dd>
+<p>oUNK means that the kind of operand is not known unambiguously.</p>
+</dd>
+<dt><a id='formula.Operand.text-attribute' name='formula.Operand.text-attribute'><b>text</b></a> [<a href='#formula.Operand.text-attribute'>#</a>]</dt>
+<dd>
+<p>The reconstituted text of the original formula. Function names will be
+in English irrespective of the original language, which doesn't seem
+to be recorded anywhere. The separator is &quot;,&quot;, not &quot;;&quot; or whatever else
+might be more appropriate for the end-user's locale; patches welcome.</p>
+</dd>
+<dt><a id='formula.Operand.value-attribute' name='formula.Operand.value-attribute'><b>value</b></a> [<a href='#formula.Operand.value-attribute'>#</a>]</dt>
+<dd>
+<p>None means that the actual value of the operand is a variable
+(depends on cell data), not a constant.</p>
+</dd>
+</dl>
+<h2><a id='formula.Ref3D-class' name='formula.Ref3D-class'>The Ref3D Class</a></h2>
+<dl>
+<dt><b>Ref3D(atuple)</b> (class) [<a href='#formula.Ref3D-class'>#</a>]</dt>
+<dd>
+<p /><p>Represents an absolute or relative 3-dimensional reference to a box
+of one or more cells.<br />
+-- New in version 0.6.0
+</p>
+
+<p>The <i>coords</i> attribute is a tuple of the form:<br />
+(shtxlo, shtxhi, rowxlo, rowxhi, colxlo, colxhi)<br />
+where 0 &lt;= thingxlo &lt;= thingx &lt; thingxhi.<br />
+Note that it is quite possible to have thingx &gt; nthings; for example
+Print_Titles could have colxhi == 256 and/or rowxhi == 65536
+irrespective of how many columns/rows are actually used in the worksheet.
+The caller will need to decide how to handle this situation.
+Keyword: IndexError :-)
+</p>
+
+<p>The components of the coords attribute are also available as individual
+attributes: shtxlo, shtxhi, rowxlo, rowxhi, colxlo, and colxhi.</p>
+
+<p>The <i>relflags</i> attribute is a 6-tuple of flags which indicate whether
+the corresponding (sheet|row|col)(lo|hi) is relative (1) or absolute (0).<br />
+Note that there is necessarily no information available as to what cell(s)
+the reference could possibly be relative to. The caller must decide what if
+any use to make of oREL operands. Note also that a partially relative
+reference may well be a typo.
+For example, define name A1Z10 as $a$1:$z10 (missing $ after z)
+while the cursor is on cell Sheet3!A27.<br />
+The resulting Ref3D instance will have coords = (2, 3, 0, -16, 0, 26)
+and relflags = (0, 0, 0, 1, 0, 0).<br />
+So far, only one possibility of a sheet-relative component in
+a reference has been noticed: a 2D reference located in the "current sheet".
+<br /> This will appear as coords = (0, 1, ...) and relflags = (1, 1, ...).
+</p></dd>
+</dl>
+<h2><a id='sheet.Rowinfo-class' name='sheet.Rowinfo-class'>The Rowinfo Class</a></h2>
+<dl>
+<dt><b>Rowinfo</b> (class) [<a href='#sheet.Rowinfo-class'>#</a>]</dt>
+<dd>
+<p>Height and default formatting information that applies to a row in a sheet.
+Derived from ROW records.
+<br /> -- New in version 0.6.1
+</p></dd>
+<dt><a id='sheet.Rowinfo.additional_space_above-attribute' name='sheet.Rowinfo.additional_space_above-attribute'><b>additional_space_above</b></a> [<a href='#sheet.Rowinfo.additional_space_above-attribute'>#</a>]</dt>
+<dd>
+<p>This flag is set, if the upper border of at least one cell in this row
+or if the lower border of at least one cell in the row above is
+formatted with a thick line style. Thin and medium line styles are not
+taken into account.</p>
+</dd>
+<dt><a id='sheet.Rowinfo.additional_space_below-attribute' name='sheet.Rowinfo.additional_space_below-attribute'><b>additional_space_below</b></a> [<a href='#sheet.Rowinfo.additional_space_below-attribute'>#</a>]</dt>
+<dd>
+<p>This flag is set, if the lower border of at least one cell in this row
+or if the upper border of at least one cell in the row below is
+formatted with a medium or thick line style. Thin line styles are not
+taken into account.</p>
+</dd>
+<dt><a id='sheet.Rowinfo.has_default_height-attribute' name='sheet.Rowinfo.has_default_height-attribute'><b>has_default_height</b></a> [<a href='#sheet.Rowinfo.has_default_height-attribute'>#</a>]</dt>
+<dd>
+<p>0 = Row has custom height; 1 = Row has default height</p>
+</dd>
+<dt><a id='sheet.Rowinfo.has_default_xf_index-attribute' name='sheet.Rowinfo.has_default_xf_index-attribute'><b>has_default_xf_index</b></a> [<a href='#sheet.Rowinfo.has_default_xf_index-attribute'>#</a>]</dt>
+<dd>
+<p>1 = the xf_index attribute is usable; 0 = ignore it</p>
+</dd>
+<dt><a id='sheet.Rowinfo.height-attribute' name='sheet.Rowinfo.height-attribute'><b>height</b></a> [<a href='#sheet.Rowinfo.height-attribute'>#</a>]</dt>
+<dd>
+<p>Height of the row, in twips. One twip == 1/20 of a point</p>
+</dd>
+<dt><a id='sheet.Rowinfo.height_mismatch-attribute' name='sheet.Rowinfo.height_mismatch-attribute'><b>height_mismatch</b></a> [<a href='#sheet.Rowinfo.height_mismatch-attribute'>#</a>]</dt>
+<dd>
+<p>1 = Row height and default font height do not match</p>
+</dd>
+<dt><a id='sheet.Rowinfo.hidden-attribute' name='sheet.Rowinfo.hidden-attribute'><b>hidden</b></a> [<a href='#sheet.Rowinfo.hidden-attribute'>#</a>]</dt>
+<dd>
+<p>1 = Row is hidden (manually, or by a filter or outline group)</p>
+</dd>
+<dt><a id='sheet.Rowinfo.outline_group_starts_ends-attribute' name='sheet.Rowinfo.outline_group_starts_ends-attribute'><b>outline_group_starts_ends</b></a> [<a href='#sheet.Rowinfo.outline_group_starts_ends-attribute'>#</a>]</dt>
+<dd>
+<p>1 = Outline group starts or ends here (depending on where the
+outline buttons are located, see WSBOOL record [TODO ??]),
+<i>and</i> is collapsed
+</p></dd>
+<dt><a id='sheet.Rowinfo.outline_level-attribute' name='sheet.Rowinfo.outline_level-attribute'><b>outline_level</b></a> [<a href='#sheet.Rowinfo.outline_level-attribute'>#</a>]</dt>
+<dd>
+<p>Outline level of the row</p>
+</dd>
+<dt><a id='sheet.Rowinfo.xf_index-attribute' name='sheet.Rowinfo.xf_index-attribute'><b>xf_index</b></a> [<a href='#sheet.Rowinfo.xf_index-attribute'>#</a>]</dt>
+<dd>
+<p>Index to default XF record for empty cells in this row.
+Don't use this if has_default_xf_index == 0.</p>
+</dd>
+</dl>
+<h2><a id='sheet.Sheet-class' name='sheet.Sheet-class'>The Sheet Class</a></h2>
+<dl>
+<dt><b>Sheet(book, position, name, number)</b> (class) [<a href='#sheet.Sheet-class'>#</a>]</dt>
+<dd>
+<p /><p>Contains the data for one worksheet.</p>
+
+<p>In the cell access functions, "rowx" is a row index, counting from zero, and "colx" is a
+column index, counting from zero.
+Negative values for row/column indexes and slice positions are supported in the expected fashion.</p>
+
+<p>For information about cell types and cell values, refer to the documentation of the Cell class.</p>
+
+<p>WARNING: You don't call this class yourself. You access Sheet objects via the Book object that
+was returned when you called xlrd.open_workbook("myfile.xls").</p>
+</dd>
+<dt><a id='sheet.Sheet.cell-method' name='sheet.Sheet.cell-method'><b>cell(rowx, colx)</b></a> [<a href='#sheet.Sheet.cell-method'>#</a>]</dt>
+<dd>
+<p>Cell object in the given row and column.</p>
+</dd>
+<dt><a id='sheet.Sheet.cell_type-method' name='sheet.Sheet.cell_type-method'><b>cell_type(rowx, colx)</b></a> [<a href='#sheet.Sheet.cell_type-method'>#</a>]</dt>
+<dd>
+<p>Type of the cell in the given row and column.
+Refer to the documentation of the Cell class.</p>
+</dd>
+<dt><a id='sheet.Sheet.cell_value-method' name='sheet.Sheet.cell_value-method'><b>cell_value(rowx, colx)</b></a> [<a href='#sheet.Sheet.cell_value-method'>#</a>]</dt>
+<dd>
+<p>Value of the cell in the given row and column.</p>
+</dd>
+<dt><a id='sheet.Sheet.cell_xf_index-method' name='sheet.Sheet.cell_xf_index-method'><b>cell_xf_index(rowx, colx)</b></a> [<a href='#sheet.Sheet.cell_xf_index-method'>#</a>]</dt>
+<dd>
+<p>XF index of the cell in the given row and column.
+This is an index into Book.xf_list.
+<br /> -- New in version 0.6.1
+</p></dd>
+<dt><a id='sheet.Sheet.col-method' name='sheet.Sheet.col-method'><b>col(colx)</b></a> [<a href='#sheet.Sheet.col-method'>#</a>]</dt>
+<dd>
+<p>Returns a sequence of the Cell objects in the given column.</p>
+</dd>
+<dt><a id='sheet.Sheet.col_label_ranges-attribute' name='sheet.Sheet.col_label_ranges-attribute'><b>col_label_ranges</b></a> [<a href='#sheet.Sheet.col_label_ranges-attribute'>#</a>]</dt>
+<dd>
+<p>List of address ranges of cells containing column labels.
+These are set up in Excel by Insert &gt; Name &gt; Labels &gt; Columns.
+<br /> -- New in version 0.6.0
+<br />How to deconstruct the list:
+<pre>
+for crange in thesheet.col_label_ranges:
+ rlo, rhi, clo, chi = crange
+ for rx in xrange(rlo, rhi):
+ for cx in xrange(clo, chi):
+ print "Column label at (rowx=%d, colx=%d) is %r" \
+ (rx, cx, thesheet.cell_value(rx, cx))
+</pre>
+</p></dd>
+<dt><a id='sheet.Sheet.col_slice-method' name='sheet.Sheet.col_slice-method'><b>col_slice(colx, start_rowx=0, end_rowx=None)</b></a> [<a href='#sheet.Sheet.col_slice-method'>#</a>]</dt>
+<dd>
+<p>Returns a slice of the Cell objects in the given column.</p>
+</dd>
+<dt><a id='sheet.Sheet.col_types-method' name='sheet.Sheet.col_types-method'><b>col_types(colx, start_rowx=0, end_rowx=None)</b></a> [<a href='#sheet.Sheet.col_types-method'>#</a>]</dt>
+<dd>
+<p>Returns a slice of the types of the cells in the given column.</p>
+</dd>
+<dt><a id='sheet.Sheet.col_values-method' name='sheet.Sheet.col_values-method'><b>col_values(colx, start_rowx=0, end_rowx=None)</b></a> [<a href='#sheet.Sheet.col_values-method'>#</a>]</dt>
+<dd>
+<p>Returns a slice of the values of the cells in the given column.</p>
+</dd>
+<dt><a id='sheet.Sheet.colinfo_map-attribute' name='sheet.Sheet.colinfo_map-attribute'><b>colinfo_map</b></a> [<a href='#sheet.Sheet.colinfo_map-attribute'>#</a>]</dt>
+<dd>
+<p>The map from a column index to a Colinfo object. Often there is an entry
+in COLINFO records for all column indexes in range(257).
+Note that xlrd ignores the entry for the non-existent
+257th column. On the other hand, there may be no entry for unused columns.
+<br /> -- New in version 0.6.1
+</p></dd>
+<dt><a id='sheet.Sheet.computed_column_width-method' name='sheet.Sheet.computed_column_width-method'><b>computed_column_width(colx)</b></a> [<a href='#sheet.Sheet.computed_column_width-method'>#</a>]</dt>
+<dd>
+<p>Determine column display width.
+<br /> -- New in version 0.6.1
+<br />
+</p><dl>
+<dt><i>colx</i></dt>
+<dd>
+Index of the queried column, range 0 to 255.
+Note that it is possible to find out the width that will be used to display
+columns with no cell information e.g. column IV (colx=255).</dd>
+<dt>Returns:</dt>
+<dd>
+The column width that will be used for displaying
+the given column by Excel, in units of 1/256th of the width of a
+standard character (the digit zero in the first font).</dd>
+</dl><br />
+</dd>
+<dt><a id='sheet.Sheet.default_additional_space_above-attribute' name='sheet.Sheet.default_additional_space_above-attribute'><b>default_additional_space_above</b></a> [<a href='#sheet.Sheet.default_additional_space_above-attribute'>#</a>]</dt>
+<dd>
+<p>Default value to be used for a row if there is
+no ROW record for that row.
+From the <i>optional</i> DEFAULTROWHEIGHT record.
+</p></dd>
+<dt><a id='sheet.Sheet.default_additional_space_below-attribute' name='sheet.Sheet.default_additional_space_below-attribute'><b>default_additional_space_below</b></a> [<a href='#sheet.Sheet.default_additional_space_below-attribute'>#</a>]</dt>
+<dd>
+<p>Default value to be used for a row if there is
+no ROW record for that row.
+From the <i>optional</i> DEFAULTROWHEIGHT record.
+</p></dd>
+<dt><a id='sheet.Sheet.default_row_height-attribute' name='sheet.Sheet.default_row_height-attribute'><b>default_row_height</b></a> [<a href='#sheet.Sheet.default_row_height-attribute'>#</a>]</dt>
+<dd>
+<p>Default value to be used for a row if there is
+no ROW record for that row.
+From the <i>optional</i> DEFAULTROWHEIGHT record.
+</p></dd>
+<dt><a id='sheet.Sheet.default_row_height_mismatch-attribute' name='sheet.Sheet.default_row_height_mismatch-attribute'><b>default_row_height_mismatch</b></a> [<a href='#sheet.Sheet.default_row_height_mismatch-attribute'>#</a>]</dt>
+<dd>
+<p>Default value to be used for a row if there is
+no ROW record for that row.
+From the <i>optional</i> DEFAULTROWHEIGHT record.
+</p></dd>
+<dt><a id='sheet.Sheet.default_row_hidden-attribute' name='sheet.Sheet.default_row_hidden-attribute'><b>default_row_hidden</b></a> [<a href='#sheet.Sheet.default_row_hidden-attribute'>#</a>]</dt>
+<dd>
+<p>Default value to be used for a row if there is
+no ROW record for that row.
+From the <i>optional</i> DEFAULTROWHEIGHT record.
+</p></dd>
+<dt><a id='sheet.Sheet.defcolwidth-attribute' name='sheet.Sheet.defcolwidth-attribute'><b>defcolwidth</b></a> [<a href='#sheet.Sheet.defcolwidth-attribute'>#</a>]</dt>
+<dd>
+<p>Default column width from DEFCOLWIDTH record, else None.
+From the OOo docs:<br />
+"""Column width in characters, using the width of the zero character
+from default font (first FONT record in the file). Excel adds some
+extra space to the default width, depending on the default font and
+default font size. The algorithm how to exactly calculate the resulting
+column width is not known.<br />
+Example: The default width of 8 set in this record results in a column
+width of 8.43 using Arial font with a size of 10 points."""<br />
+For the default hierarchy, refer to the Colinfo class above.
+<br /> -- New in version 0.6.1
+</p></dd>
+<dt><a id='sheet.Sheet.gcw-attribute' name='sheet.Sheet.gcw-attribute'><b>gcw</b></a> [<a href='#sheet.Sheet.gcw-attribute'>#</a>]</dt>
+<dd>
+<p>A 256-element tuple corresponding to the contents of the GCW record for this sheet.
+If no such record, treat as all bits zero.
+Applies to BIFF4-7 only. See docs of Colinfo class for discussion.</p>
+</dd>
+<dt><a id='sheet.Sheet.merged_cells-attribute' name='sheet.Sheet.merged_cells-attribute'><b>merged_cells</b></a> [<a href='#sheet.Sheet.merged_cells-attribute'>#</a>]</dt>
+<dd>
+<p>List of address ranges of cells which have been merged.
+These are set up in Excel by Format &gt; Cells &gt; Alignment, then ticking
+the "Merge cells" box.
+<br /> -- New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True)
+<br />How to deconstruct the list:
+<pre>
+for crange in thesheet.merged_cells:
+ rlo, rhi, clo, chi = crange
+ for rowx in xrange(rlo, rhi):
+ for colx in xrange(clo, chi):
+ # cell (rlo, clo) (the top left one) will carry the data
+ # and formatting info; the remainder will be recorded as
+ # blank cells, but a renderer will apply the formatting info
+ # for the top left cell (e.g. border, pattern) to all cells in
+ # the range.
+</pre>
+</p></dd>
+<dt><a id='sheet.Sheet.name-attribute' name='sheet.Sheet.name-attribute'><b>name</b></a> [<a href='#sheet.Sheet.name-attribute'>#</a>]</dt>
+<dd>
+<p>Name of sheet.</p>
+</dd>
+<dt><a id='sheet.Sheet.ncols-attribute' name='sheet.Sheet.ncols-attribute'><b>ncols</b></a> [<a href='#sheet.Sheet.ncols-attribute'>#</a>]</dt>
+<dd>
+<p>Number of columns in sheet. A column index is in range(thesheet.ncols).</p>
+</dd>
+<dt><a id='sheet.Sheet.nrows-attribute' name='sheet.Sheet.nrows-attribute'><b>nrows</b></a> [<a href='#sheet.Sheet.nrows-attribute'>#</a>]</dt>
+<dd>
+<p>Number of rows in sheet. A row index is in range(thesheet.nrows).</p>
+</dd>
+<dt><a id='sheet.Sheet.row-method' name='sheet.Sheet.row-method'><b>row(rowx)</b></a> [<a href='#sheet.Sheet.row-method'>#</a>]</dt>
+<dd>
+<p>Returns a sequence of the Cell objects in the given row.</p>
+</dd>
+<dt><a id='sheet.Sheet.row_label_ranges-attribute' name='sheet.Sheet.row_label_ranges-attribute'><b>row_label_ranges</b></a> [<a href='#sheet.Sheet.row_label_ranges-attribute'>#</a>]</dt>
+<dd>
+<p>List of address ranges of cells containing row labels.
+For more details, see <i>col_label_ranges</i> above.
+<br /> -- New in version 0.6.0
+</p></dd>
+<dt><a id='sheet.Sheet.row_slice-method' name='sheet.Sheet.row_slice-method'><b>row_slice(rowx, start_colx=0, end_colx=None)</b></a> [<a href='#sheet.Sheet.row_slice-method'>#</a>]</dt>
+<dd>
+<p>Returns a slice of the Cell objects in the given row.</p>
+</dd>
+<dt><a id='sheet.Sheet.row_types-method' name='sheet.Sheet.row_types-method'><b>row_types(rowx, start_colx=0, end_colx=None)</b></a> [<a href='#sheet.Sheet.row_types-method'>#</a>]</dt>
+<dd>
+<p>Returns a slice of the types
+of the cells in the given row.</p>
+</dd>
+<dt><a id='sheet.Sheet.row_values-method' name='sheet.Sheet.row_values-method'><b>row_values(rowx, start_colx=0, end_colx=None)</b></a> [<a href='#sheet.Sheet.row_values-method'>#</a>]</dt>
+<dd>
+<p>Returns a slice of the values
+of the cells in the given row.</p>
+</dd>
+<dt><a id='sheet.Sheet.rowinfo_map-attribute' name='sheet.Sheet.rowinfo_map-attribute'><b>rowinfo_map</b></a> [<a href='#sheet.Sheet.rowinfo_map-attribute'>#</a>]</dt>
+<dd>
+<p>The map from a row index to a Rowinfo object. Note that it is possible
+to have missing entries -- at least one source of XLS files doesn't
+bother writing ROW records.
+<br /> -- New in version 0.6.1
+</p></dd>
+<dt><a id='sheet.Sheet.standardwidth-attribute' name='sheet.Sheet.standardwidth-attribute'><b>standardwidth</b></a> [<a href='#sheet.Sheet.standardwidth-attribute'>#</a>]</dt>
+<dd>
+<p>Default column width from STANDARDWIDTH record, else None.
+From the OOo docs:<br />
+"""Default width of the columns in 1/256 of the width of the zero
+character, using default font (first FONT record in the file)."""<br />
+For the default hierarchy, refer to the Colinfo class above.
+<br /> -- New in version 0.6.1
+</p></dd>
+<dt><a id='sheet.Sheet.visibility-attribute' name='sheet.Sheet.visibility-attribute'><b>visibility</b></a> [<a href='#sheet.Sheet.visibility-attribute'>#</a>]</dt>
+<dd>
+<p>Visibility of the sheet. 0 = visible, 1 = hidden (can be unhidden
+by user -- Format/Sheet/Unhide), 2 = &quot;very hidden&quot; (can be unhidden
+only by VBA macro).</p>
+</dd>
+</dl>
+<h2><a id='formatting.XF-class' name='formatting.XF-class'>The XF Class</a></h2>
+<dl>
+<dt><b>XF</b> (class) [<a href='#formatting.XF-class'>#</a>]</dt>
+<dd>
+<p>eXtended Formatting information for cells, rows, columns and styles.
+<br /> -- New in version 0.6.1
+
+</p><p>Each of the 6 flags below describes the validity of
+a specific group of attributes.
+<br />
+In cell XFs, flag==0 means the attributes of the parent style XF are used,
+(but only if the attributes are valid there); flag==1 means the attributes
+of this XF are used.<br />
+In style XFs, flag==0 means the attribute setting is valid; flag==1 means
+the attribute should be ignored.<br />
+Note that the API
+provides both "raw" XFs and "computed" XFs -- in the latter case, cell XFs
+have had the above inheritance mechanism applied.
+</p>
+</dd>
+<dt><a id='formatting.XF._alignment_flag-attribute' name='formatting.XF._alignment_flag-attribute'><b>_alignment_flag</b></a> [<a href='#formatting.XF._alignment_flag-attribute'>#</a>]</dt>
+<dd>
+</dd>
+<dt><a id='formatting.XF._background_flag-attribute' name='formatting.XF._background_flag-attribute'><b>_background_flag</b></a> [<a href='#formatting.XF._background_flag-attribute'>#</a>]</dt>
+<dd>
+</dd>
+<dt><a id='formatting.XF._border_flag-attribute' name='formatting.XF._border_flag-attribute'><b>_border_flag</b></a> [<a href='#formatting.XF._border_flag-attribute'>#</a>]</dt>
+<dd>
+</dd>
+<dt><a id='formatting.XF._font_flag-attribute' name='formatting.XF._font_flag-attribute'><b>_font_flag</b></a> [<a href='#formatting.XF._font_flag-attribute'>#</a>]</dt>
+<dd>
+</dd>
+<dt><a id='formatting.XF._format_flag-attribute' name='formatting.XF._format_flag-attribute'><b>_format_flag</b></a> [<a href='#formatting.XF._format_flag-attribute'>#</a>]</dt>
+<dd>
+</dd>
+<dt><a id='formatting.XF._protection_flag-attribute' name='formatting.XF._protection_flag-attribute'><b>_protection_flag</b></a> [<a href='#formatting.XF._protection_flag-attribute'>#</a>]</dt>
+<dd>
+<p>&#160;
+</p></dd>
+<dt><a id='formatting.XF.alignment-attribute' name='formatting.XF.alignment-attribute'><b>alignment</b></a> [<a href='#formatting.XF.alignment-attribute'>#</a>]</dt>
+<dd>
+<p>An instance of an XFAlignment object.</p>
+</dd>
+<dt><a id='formatting.XF.background-attribute' name='formatting.XF.background-attribute'><b>background</b></a> [<a href='#formatting.XF.background-attribute'>#</a>]</dt>
+<dd>
+<p>An instance of an XFBackground object.</p>
+</dd>
+<dt><a id='formatting.XF.border-attribute' name='formatting.XF.border-attribute'><b>border</b></a> [<a href='#formatting.XF.border-attribute'>#</a>]</dt>
+<dd>
+<p>An instance of an XFBorder object.</p>
+</dd>
+<dt><a id='formatting.XF.font_index-attribute' name='formatting.XF.font_index-attribute'><b>font_index</b></a> [<a href='#formatting.XF.font_index-attribute'>#</a>]</dt>
+<dd>
+<p>Index into Book.font_list</p>
+</dd>
+<dt><a id='formatting.XF.format_key-attribute' name='formatting.XF.format_key-attribute'><b>format_key</b></a> [<a href='#formatting.XF.format_key-attribute'>#</a>]</dt>
+<dd>
+<p>Key into Book.format_map
+</p><p>
+Warning: OOo docs on the XF record call this "Index to FORMAT record".
+It is not an index in the Python sense. It is a key to a map.
+It is true <i>only</i> for Excel 4.0 and earlier files
+that the key into format_map from an XF instance
+is the same as the index into format_list, and <i>only</i>
+if the index is less than 164.
+</p>
+</dd>
+<dt><a id='formatting.XF.is_style-attribute' name='formatting.XF.is_style-attribute'><b>is_style</b></a> [<a href='#formatting.XF.is_style-attribute'>#</a>]</dt>
+<dd>
+<p>0 = cell XF, 1 = style XF</p>
+</dd>
+<dt><a id='formatting.XF.parent_style_index-attribute' name='formatting.XF.parent_style_index-attribute'><b>parent_style_index</b></a> [<a href='#formatting.XF.parent_style_index-attribute'>#</a>]</dt>
+<dd>
+<p>cell XF: Index into Book.xf_list
+of this XF's style XF<br />
+style XF: 0xFFF
+</p></dd>
+<dt><a id='formatting.XF.protection-attribute' name='formatting.XF.protection-attribute'><b>protection</b></a> [<a href='#formatting.XF.protection-attribute'>#</a>]</dt>
+<dd>
+<p>An instance of an XFProtection object.</p>
+</dd>
+<dt><a id='formatting.XF.xf_index-attribute' name='formatting.XF.xf_index-attribute'><b>xf_index</b></a> [<a href='#formatting.XF.xf_index-attribute'>#</a>]</dt>
+<dd>
+<p>Index into Book.xf_list</p>
+</dd>
+</dl>
+<h2><a id='formatting.XFAlignment-class' name='formatting.XFAlignment-class'>The XFAlignment Class</a></h2>
+<dl>
+<dt><b>XFAlignment</b> (class) [<a href='#formatting.XFAlignment-class'>#</a>]</dt>
+<dd>
+<p>A collection of the alignment and similar attributes of an XF record.
+Items correspond to those in the Excel UI's Format/Cells/Alignment tab.
+<br /> -- New in version 0.6.1
+</p></dd>
+<dt><a id='formatting.XFAlignment.hor_align-attribute' name='formatting.XFAlignment.hor_align-attribute'><b>hor_align</b></a> [<a href='#formatting.XFAlignment.hor_align-attribute'>#</a>]</dt>
+<dd>
+<p>Values: section 6.115 (p 214) of OOo docs</p>
+</dd>
+<dt><a id='formatting.XFAlignment.indent_level-attribute' name='formatting.XFAlignment.indent_level-attribute'><b>indent_level</b></a> [<a href='#formatting.XFAlignment.indent_level-attribute'>#</a>]</dt>
+<dd>
+<p>A number in range(15).</p>
+</dd>
+<dt><a id='formatting.XFAlignment.rotation-attribute' name='formatting.XFAlignment.rotation-attribute'><b>rotation</b></a> [<a href='#formatting.XFAlignment.rotation-attribute'>#</a>]</dt>
+<dd>
+<p>Values: section 6.115 (p 215) of OOo docs.<br />
+Note: file versions BIFF7 and earlier use the documented
+"orientation" attribute; this will be mapped (without loss)
+into "rotation".
+</p></dd>
+<dt><a id='formatting.XFAlignment.shrink_to_fit-attribute' name='formatting.XFAlignment.shrink_to_fit-attribute'><b>shrink_to_fit</b></a> [<a href='#formatting.XFAlignment.shrink_to_fit-attribute'>#</a>]</dt>
+<dd>
+<p>1 = shrink font size to fit text into cell.</p>
+</dd>
+<dt><a id='formatting.XFAlignment.text_direction-attribute' name='formatting.XFAlignment.text_direction-attribute'><b>text_direction</b></a> [<a href='#formatting.XFAlignment.text_direction-attribute'>#</a>]</dt>
+<dd>
+<p>0 = according to context; 1 = left-to-right; 2 = right-to-left</p>
+</dd>
+<dt><a id='formatting.XFAlignment.text_wrapped-attribute' name='formatting.XFAlignment.text_wrapped-attribute'><b>text_wrapped</b></a> [<a href='#formatting.XFAlignment.text_wrapped-attribute'>#</a>]</dt>
+<dd>
+<p>1 = text is wrapped at right margin</p>
+</dd>
+<dt><a id='formatting.XFAlignment.vert_align-attribute' name='formatting.XFAlignment.vert_align-attribute'><b>vert_align</b></a> [<a href='#formatting.XFAlignment.vert_align-attribute'>#</a>]</dt>
+<dd>
+<p>Values: section 6.115 (p 215) of OOo docs</p>
+</dd>
+</dl>
+<h2><a id='formatting.XFBackground-class' name='formatting.XFBackground-class'>The XFBackground Class</a></h2>
+<dl>
+<dt><b>XFBackground</b> (class) [<a href='#formatting.XFBackground-class'>#</a>]</dt>
+<dd>
+<p>A collection of the background-related attributes of an XF record.
+Items correspond to those in the Excel UI's Format/Cells/Patterns tab.
+An explanation of "colour index" is given in the Formatting
+section at the start of this document.
+<br /> -- New in version 0.6.1
+</p></dd>
+<dt><a id='formatting.XFBackground.background_colour_index-attribute' name='formatting.XFBackground.background_colour_index-attribute'><b>background_colour_index</b></a> [<a href='#formatting.XFBackground.background_colour_index-attribute'>#</a>]</dt>
+<dd>
+<p>See section 3.11 of the OOo docs.</p>
+</dd>
+<dt><a id='formatting.XFBackground.fill_pattern-attribute' name='formatting.XFBackground.fill_pattern-attribute'><b>fill_pattern</b></a> [<a href='#formatting.XFBackground.fill_pattern-attribute'>#</a>]</dt>
+<dd>
+<p>See section 3.11 of the OOo docs.</p>
+</dd>
+<dt><a id='formatting.XFBackground.pattern_colour_index-attribute' name='formatting.XFBackground.pattern_colour_index-attribute'><b>pattern_colour_index</b></a> [<a href='#formatting.XFBackground.pattern_colour_index-attribute'>#</a>]</dt>
+<dd>
+<p>See section 3.11 of the OOo docs.</p>
+</dd>
+</dl>
+<h2><a id='formatting.XFBorder-class' name='formatting.XFBorder-class'>The XFBorder Class</a></h2>
+<dl>
+<dt><b>XFBorder</b> (class) [<a href='#formatting.XFBorder-class'>#</a>]</dt>
+<dd>
+<p /><p>A collection of the border-related attributes of an XF record.
+Items correspond to those in the Excel UI's Format/Cells/Border tab.</p>
+<p> An explanations of &quot;colour index&quot; is given in the Formatting
+section at the start of this document.
+There are five line style attributes; possible values and the
+associated meanings are:
+0&#160;=&#160;No line,
+1&#160;=&#160;Thin,
+2&#160;=&#160;Medium,
+3&#160;=&#160;Dashed,
+4&#160;=&#160;Dotted,
+5&#160;=&#160;Thick,
+6&#160;=&#160;Double,
+7&#160;=&#160;Hair,
+8&#160;=&#160;Medium dashed,
+9&#160;=&#160;Thin dash-dotted,
+10&#160;=&#160;Medium dash-dotted,
+11&#160;=&#160;Thin dash-dot-dotted,
+12&#160;=&#160;Medium dash-dot-dotted,
+13&#160;=&#160;Slanted medium dash-dotted.
+The line styles 8 to 13 appear in BIFF8 files (Excel 97 and later) only.
+For pictures of the line styles, refer to OOo docs s3.10 (p22)
+&quot;Line Styles for Cell Borders (BIFF3-BIFF8)&quot;.</p>
+<br /> -- New in version 0.6.1
+</dd>
+<dt><a id='formatting.XFBorder.bottom_colour_index-attribute' name='formatting.XFBorder.bottom_colour_index-attribute'><b>bottom_colour_index</b></a> [<a href='#formatting.XFBorder.bottom_colour_index-attribute'>#</a>]</dt>
+<dd>
+<p>The colour index for the cell's bottom line</p>
+</dd>
+<dt><a id='formatting.XFBorder.bottom_line_style-attribute' name='formatting.XFBorder.bottom_line_style-attribute'><b>bottom_line_style</b></a> [<a href='#formatting.XFBorder.bottom_line_style-attribute'>#</a>]</dt>
+<dd>
+<p>The line style for the cell's bottom line</p>
+</dd>
+<dt><a id='formatting.XFBorder.diag_colour_index-attribute' name='formatting.XFBorder.diag_colour_index-attribute'><b>diag_colour_index</b></a> [<a href='#formatting.XFBorder.diag_colour_index-attribute'>#</a>]</dt>
+<dd>
+<p>The colour index for the cell's diagonal lines, if any</p>
+</dd>
+<dt><a id='formatting.XFBorder.diag_down-attribute' name='formatting.XFBorder.diag_down-attribute'><b>diag_down</b></a> [<a href='#formatting.XFBorder.diag_down-attribute'>#</a>]</dt>
+<dd>
+<p>1 = draw a diagonal from top left to bottom right</p>
+</dd>
+<dt><a id='formatting.XFBorder.diag_line_style-attribute' name='formatting.XFBorder.diag_line_style-attribute'><b>diag_line_style</b></a> [<a href='#formatting.XFBorder.diag_line_style-attribute'>#</a>]</dt>
+<dd>
+<p>The line style for the cell's diagonal lines, if any</p>
+</dd>
+<dt><a id='formatting.XFBorder.diag_up-attribute' name='formatting.XFBorder.diag_up-attribute'><b>diag_up</b></a> [<a href='#formatting.XFBorder.diag_up-attribute'>#</a>]</dt>
+<dd>
+<p>1 = draw a diagonal from bottom left to top right</p>
+</dd>
+<dt><a id='formatting.XFBorder.left_colour_index-attribute' name='formatting.XFBorder.left_colour_index-attribute'><b>left_colour_index</b></a> [<a href='#formatting.XFBorder.left_colour_index-attribute'>#</a>]</dt>
+<dd>
+<p>The colour index for the cell's left line</p>
+</dd>
+<dt><a id='formatting.XFBorder.left_line_style-attribute' name='formatting.XFBorder.left_line_style-attribute'><b>left_line_style</b></a> [<a href='#formatting.XFBorder.left_line_style-attribute'>#</a>]</dt>
+<dd>
+<p>The line style for the cell's left line</p>
+</dd>
+<dt><a id='formatting.XFBorder.right_colour_index-attribute' name='formatting.XFBorder.right_colour_index-attribute'><b>right_colour_index</b></a> [<a href='#formatting.XFBorder.right_colour_index-attribute'>#</a>]</dt>
+<dd>
+<p>The colour index for the cell's right line</p>
+</dd>
+<dt><a id='formatting.XFBorder.right_line_style-attribute' name='formatting.XFBorder.right_line_style-attribute'><b>right_line_style</b></a> [<a href='#formatting.XFBorder.right_line_style-attribute'>#</a>]</dt>
+<dd>
+<p>The line style for the cell's right line</p>
+</dd>
+<dt><a id='formatting.XFBorder.top_colour_index-attribute' name='formatting.XFBorder.top_colour_index-attribute'><b>top_colour_index</b></a> [<a href='#formatting.XFBorder.top_colour_index-attribute'>#</a>]</dt>
+<dd>
+<p>The colour index for the cell's top line</p>
+</dd>
+<dt><a id='formatting.XFBorder.top_line_style-attribute' name='formatting.XFBorder.top_line_style-attribute'><b>top_line_style</b></a> [<a href='#formatting.XFBorder.top_line_style-attribute'>#</a>]</dt>
+<dd>
+<p>The line style for the cell's top line</p>
+</dd>
+</dl>
+<h2><a id='formatting.XFProtection-class' name='formatting.XFProtection-class'>The XFProtection Class</a></h2>
+<dl>
+<dt><b>XFProtection</b> (class) [<a href='#formatting.XFProtection-class'>#</a>]</dt>
+<dd>
+<p>A collection of the protection-related attributes of an XF record.
+Items correspond to those in the Excel UI's Format/Cells/Protection tab.
+Note the OOo docs include the "cell or style" bit
+in this bundle of attributes.
+This is incorrect; the bit is used in determining which bundles to use.
+<br /> -- New in version 0.6.1
+</p></dd>
+<dt><a id='formatting.XFProtection.cell_locked-attribute' name='formatting.XFProtection.cell_locked-attribute'><b>cell_locked</b></a> [<a href='#formatting.XFProtection.cell_locked-attribute'>#</a>]</dt>
+<dd>
+<p>1 = Cell is prevented from being changed, moved, resized, or deleted
+(only if the sheet is protected).</p>
+</dd>
+<dt><a id='formatting.XFProtection.formula_hidden-attribute' name='formatting.XFProtection.formula_hidden-attribute'><b>formula_hidden</b></a> [<a href='#formatting.XFProtection.formula_hidden-attribute'>#</a>]</dt>
+<dd>
+<p>1 = Hide formula so that it doesn't appear in the formula bar when
+the cell is selected (only if the sheet is protected).</p>
+</dd>
+</dl>
+</body></html>
diff --git a/tablib/packages/xlrd/examples/namesdemo.xls b/tablib/packages/xlrd/examples/namesdemo.xls
new file mode 100644
index 0000000..8a16865
--- /dev/null
+++ b/tablib/packages/xlrd/examples/namesdemo.xls
Binary files differ
diff --git a/tablib/packages/xlrd/examples/xlrdnameAPIdemo.py b/tablib/packages/xlrd/examples/xlrdnameAPIdemo.py
new file mode 100644
index 0000000..6cd60df
--- /dev/null
+++ b/tablib/packages/xlrd/examples/xlrdnameAPIdemo.py
@@ -0,0 +1,178 @@
+# -*- coding: cp1252 -*-
+
+##
+# Module/script example of the xlrd API for extracting information
+# about named references, named constants, etc.
+#
+# <p>Copyright © 2006 Stephen John Machin, Lingfo Pty Ltd</p>
+# <p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
+##
+
+import xlrd
+import sys
+import glob
+
+def scope_as_string(book, scope):
+ if 0 <= scope < book.nsheets:
+ return "sheet #%d (%r)" % (scope, book.sheet_names()[scope])
+ if scope == -1:
+ return "Global"
+ if scope == -2:
+ return "Macro/VBA"
+ return "Unknown scope value (%r)" % scope
+
+def do_scope_query(book, scope_strg, show_contents=0, f=sys.stdout):
+ try:
+ qscope = int(scope_strg)
+ except ValueError:
+ if scope_strg == "*":
+ qscope = None # means "all'
+ else:
+ # so assume it's a sheet name ...
+ qscope = book.sheet_names().index(scope_strg)
+ print >> f, "%r => %d" % (scope_strg, qscope)
+ for nobj in book.name_obj_list:
+ if qscope is None or nobj.scope == qscope:
+ show_name_object(book, nobj, show_contents, f)
+
+def show_name_details(book, name, show_contents=0, f=sys.stdout):
+ """
+ book -- Book object obtained from xlrd.open_workbook().
+ name -- The name that's being investigated.
+ show_contents -- 0: Don't; 1: Non-empty cells only; 2: All cells
+ f -- Open output file handle.
+ """
+ name_lcase = name.lower() # Excel names are case-insensitive.
+ nobj_list = book.name_map.get(name_lcase)
+ if not nobj_list:
+ print >> f, "%r: unknown name" % name
+ return
+ for nobj in nobj_list:
+ show_name_object(book, nobj, show_contents, f)
+
+def show_name_details_in_scope(
+ book, name, scope_strg, show_contents=0, f=sys.stdout,
+ ):
+ try:
+ scope = int(scope_strg)
+ except ValueError:
+ # so assume it's a sheet name ...
+ scope = book.sheet_names().index(scope_strg)
+ print >> f, "%r => %d" % (scope_strg, scope)
+ name_lcase = name.lower() # Excel names are case-insensitive.
+ while 1:
+ nobj = book.name_and_scope_map.get((name_lcase, scope))
+ if nobj:
+ break
+ print >> f, "Name %r not found in scope %d" % (name, scope)
+ if scope == -1:
+ return
+ scope = -1 # Try again with global scope
+ print >> f, "Name %r found in scope %d" % (name, scope)
+ show_name_object(book, nobj, show_contents, f)
+
+def showable_cell_value(celltype, cellvalue, datemode):
+ if celltype == xlrd.XL_CELL_DATE:
+ try:
+ showval = xlrd.xldate_as_tuple(cellvalue, datemode)
+ except xlrd.XLDateError:
+ e1, e2 = sys.exc_info()[:2]
+ showval = "%s:%s" % (e1.__name__, e2)
+ elif celltype == xlrd.XL_CELL_ERROR:
+ showval = xlrd.error_text_from_code.get(
+ cellvalue, '<Unknown error code 0x%02x>' % cellvalue)
+ else:
+ showval = cellvalue
+ return showval
+
+def show_name_object(book, nobj, show_contents=0, f=sys.stdout):
+ print >> f, "\nName: %r, scope: %r (%s)" \
+ % (nobj.name, nobj.scope, scope_as_string(book, nobj.scope))
+ res = nobj.result
+ print >> f, "Formula eval result: %r" % res
+ if res is None:
+ return
+ # result should be an instance of the Operand class
+ kind = res.kind
+ value = res.value
+ if kind >= 0:
+ # A scalar, or unknown ... you've seen all there is to see.
+ pass
+ elif kind == xlrd.oREL:
+ # A list of Ref3D objects representing *relative* ranges
+ for i in xrange(len(value)):
+ ref3d = value[i]
+ print >> f, "Range %d: %r ==> %s"% (i, ref3d.coords, xlrd.rangename3drel(book, ref3d))
+ elif kind == xlrd.oREF:
+ # A list of Ref3D objects
+ for i in xrange(len(value)):
+ ref3d = value[i]
+ print >> f, "Range %d: %r ==> %s"% (i, ref3d.coords, xlrd.rangename3d(book, ref3d))
+ if not show_contents:
+ continue
+ datemode = book.datemode
+ for shx in xrange(ref3d.shtxlo, ref3d.shtxhi):
+ sh = book.sheet_by_index(shx)
+ print >> f, " Sheet #%d (%s)" % (shx, sh.name)
+ rowlim = min(ref3d.rowxhi, sh.nrows)
+ collim = min(ref3d.colxhi, sh.ncols)
+ for rowx in xrange(ref3d.rowxlo, rowlim):
+ for colx in xrange(ref3d.colxlo, collim):
+ cty = sh.cell_type(rowx, colx)
+ if cty == xlrd.XL_CELL_EMPTY and show_contents == 1:
+ continue
+ cval = sh.cell_value(rowx, colx)
+ sval = showable_cell_value(cty, cval, datemode)
+ print >> f, " (%3d,%3d) %-5s: %r" \
+ % (rowx, colx, xlrd.cellname(rowx, colx), sval)
+
+if __name__ == "__main__":
+ def usage():
+ text = """
+usage: xlrdnameAIPdemo.py glob_pattern name scope show_contents
+
+where:
+ "glob_pattern" designates a set of files
+ "name" is a name or '*' (all names)
+ "scope" is -1 (global) or a sheet number
+ or a sheet name or * (all scopes)
+ "show_contents" is one of 0 (no show),
+ 1 (only non-empty cells), or 2 (all cells)
+
+Examples (script name and glob_pattern arg omitted for brevity)
+ [Searching through book.name_obj_list]
+ * * 0 lists all names
+ * * 1 lists all names, showing referenced non-empty cells
+ * 1 0 lists all names local to the 2nd sheet
+ * Northern 0 lists all names local to the 'Northern' sheet
+ * -1 0 lists all names with global scope
+ [Initial direct access through book.name_map]
+ Sales * 0 lists all occurrences of "Sales" in any scope
+ [Direct access through book.name_and_scope_map]
+ Revenue -1 0 checks if "Revenue" exists in global scope
+
+"""
+ sys.stdout.write(text)
+
+ if len(sys.argv) != 5:
+ usage()
+ sys.exit(0)
+ arg_pattern = sys.argv[1] # glob pattern e.g. "foo*.xls"
+ arg_name = sys.argv[2] # see below
+ arg_scope = sys.argv[3] # see below
+ arg_show_contents = int(sys.argv[4]) # 0: no show, 1: only non-empty cells,
+ # 2: all cells
+ for fname in glob.glob(arg_pattern):
+ book = xlrd.open_workbook(fname)
+ if arg_name == "*":
+ # Examine book.name_obj_list to find all names
+ # in a given scope ("*" => all scopes)
+ do_scope_query(book, arg_scope, arg_show_contents)
+ elif arg_scope == "*":
+ # Using book.name_map to find all usage of a name.
+ show_name_details(book, arg_name, arg_show_contents)
+ else:
+ # Using book.name_and_scope_map to find which if any instances
+ # of a name are visible in the given scope, which can be supplied
+ # as -1 (global) or a sheet number or a sheet name.
+ show_name_details_in_scope(book, arg_name, arg_scope, arg_show_contents)
diff --git a/tablib/packages/xlrd/formatting.py b/tablib/packages/xlrd/formatting.py
new file mode 100644
index 0000000..302764b
--- /dev/null
+++ b/tablib/packages/xlrd/formatting.py
@@ -0,0 +1,1256 @@
+# -*- coding:cp1252 -*-
+
+##
+# Module for formatting information.
+#
+# <p>Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd</p>
+# <p>Copyright © 2005-2009 Stephen John Machin, Lingfo Pty Ltd</p>
+# <p>This module is part of the xlrd package, which is released under
+# a BSD-style licence.</p>
+##
+
+# No part of the content of this file was derived from the works of David Giffin.
+
+# 2009-05-31 SJM Fixed problem with non-zero reserved bits in some STYLE records in Mac Excel files
+# 2008-08-03 SJM Ignore PALETTE record when Book.formatting_info is false
+# 2008-08-03 SJM Tolerate up to 4 bytes trailing junk on PALETTE record
+# 2008-05-10 SJM Do some XF checks only when Book.formatting_info is true
+# 2008-02-08 SJM Preparation for Excel 2.0 support
+# 2008-02-03 SJM Another tweak to is_date_format_string()
+# 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files.
+# 2007-10-13 SJM Warning: style XF whose parent XF index != 0xFFF
+# 2007-09-08 SJM Work around corrupt STYLE record
+# 2007-07-11 SJM Allow for BIFF2/3-style FORMAT record in BIFF4/8 file
+
+DEBUG = 0
+import copy, re
+from timemachine import *
+from biffh import BaseObject, unpack_unicode, unpack_string, \
+ upkbits, upkbitsL, fprintf, \
+ FUN, FDT, FNU, FGE, FTX, XL_CELL_NUMBER, XL_CELL_DATE, \
+ XL_FORMAT, XL_FORMAT2, \
+ XLRDError
+from struct import unpack
+
+excel_default_palette_b5 = (
+ ( 0, 0, 0), (255, 255, 255), (255, 0, 0), ( 0, 255, 0),
+ ( 0, 0, 255), (255, 255, 0), (255, 0, 255), ( 0, 255, 255),
+ (128, 0, 0), ( 0, 128, 0), ( 0, 0, 128), (128, 128, 0),
+ (128, 0, 128), ( 0, 128, 128), (192, 192, 192), (128, 128, 128),
+ (153, 153, 255), (153, 51, 102), (255, 255, 204), (204, 255, 255),
+ (102, 0, 102), (255, 128, 128), ( 0, 102, 204), (204, 204, 255),
+ ( 0, 0, 128), (255, 0, 255), (255, 255, 0), ( 0, 255, 255),
+ (128, 0, 128), (128, 0, 0), ( 0, 128, 128), ( 0, 0, 255),
+ ( 0, 204, 255), (204, 255, 255), (204, 255, 204), (255, 255, 153),
+ (153, 204, 255), (255, 153, 204), (204, 153, 255), (227, 227, 227),
+ ( 51, 102, 255), ( 51, 204, 204), (153, 204, 0), (255, 204, 0),
+ (255, 153, 0), (255, 102, 0), (102, 102, 153), (150, 150, 150),
+ ( 0, 51, 102), ( 51, 153, 102), ( 0, 51, 0), ( 51, 51, 0),
+ (153, 51, 0), (153, 51, 102), ( 51, 51, 153), ( 51, 51, 51),
+ )
+
+excel_default_palette_b2 = excel_default_palette_b5[:16]
+
+# Following two tables borrowed from Gnumeric 1.4 source.
+excel_default_palette_b5_gnumeric_14 = (
+ #### dodgy; didn't match Excel results
+ ( 0, 0, 0), (255,255,255), (255, 0, 0), ( 0,255, 0),
+ ( 0, 0,255), (255,255, 0), (255, 0,255), ( 0,255,255),
+ (128, 0, 0), ( 0,128, 0), ( 0, 0,128), (128,128, 0),
+ (128, 0,128), ( 0,128,128), (192,192,192), (128,128,128),
+ (128,128,255), (128, 32, 96), (255,255,192), (160,224,224),
+ ( 96, 0,128), (255,128,128), ( 0,128,192), (192,192,255),
+ ( 0, 0,128), (255, 0,255), (255,255, 0), ( 0,255,255),
+ (128, 0,128), (128, 0, 0), ( 0,128,128), ( 0, 0,255),
+ ( 0,204,255), (105,255,255), (204,255,204), (255,255,153),
+ (166,202,240), (204,156,204), (204,153,255), (227,227,227),
+ ( 51,102,255), ( 51,204,204), ( 51,153, 51), (153,153, 51),
+ (153,102, 51), (153,102,102), (102,102,153), (150,150,150),
+ ( 51, 51,204), ( 51,102,102), ( 0, 51, 0), ( 51, 51, 0),
+ (102, 51, 0), (153, 51,102), ( 51, 51,153), ( 66, 66, 66),
+ )
+excel_default_palette_b8 = ( # (red, green, blue)
+ ( 0, 0, 0), (255,255,255), (255, 0, 0), ( 0,255, 0),
+ ( 0, 0,255), (255,255, 0), (255, 0,255), ( 0,255,255),
+ (128, 0, 0), ( 0,128, 0), ( 0, 0,128), (128,128, 0),
+ (128, 0,128), ( 0,128,128), (192,192,192), (128,128,128),
+ (153,153,255), (153, 51,102), (255,255,204), (204,255,255),
+ (102, 0,102), (255,128,128), ( 0,102,204), (204,204,255),
+ ( 0, 0,128), (255, 0,255), (255,255, 0), ( 0,255,255),
+ (128, 0,128), (128, 0, 0), ( 0,128,128), ( 0, 0,255),
+ ( 0,204,255), (204,255,255), (204,255,204), (255,255,153),
+ (153,204,255), (255,153,204), (204,153,255), (255,204,153),
+ ( 51,102,255), ( 51,204,204), (153,204, 0), (255,204, 0),
+ (255,153, 0), (255,102, 0), (102,102,153), (150,150,150),
+ ( 0, 51,102), ( 51,153,102), ( 0, 51, 0), ( 51, 51, 0),
+ (153, 51, 0), (153, 51,102), ( 51, 51,153), ( 51, 51, 51),
+ )
+
+default_palette = {
+ 80: excel_default_palette_b8,
+ 70: excel_default_palette_b5,
+ 50: excel_default_palette_b5,
+ 45: excel_default_palette_b2,
+ 40: excel_default_palette_b2,
+ 30: excel_default_palette_b2,
+ 21: excel_default_palette_b2,
+ 20: excel_default_palette_b2,
+ }
+
+"""
+00H = Normal
+01H = RowLevel_lv (see next field)
+02H = ColLevel_lv (see next field)
+03H = Comma
+04H = Currency
+05H = Percent
+06H = Comma [0] (BIFF4-BIFF8)
+07H = Currency [0] (BIFF4-BIFF8)
+08H = Hyperlink (BIFF8)
+09H = Followed Hyperlink (BIFF8)
+"""
+built_in_style_names = [
+ "Normal",
+ "RowLevel_",
+ "ColLevel_",
+ "Comma",
+ "Currency",
+ "Percent",
+ "Comma [0]",
+ "Currency [0]",
+ "Hyperlink",
+ "Followed Hyperlink",
+ ]
+
+def initialise_colour_map(book):
+ book.colour_map = {}
+ book.colour_indexes_used = {}
+ if not book.formatting_info:
+ return
+ # Add the 8 invariant colours
+ for i in xrange(8):
+ book.colour_map[i] = excel_default_palette_b8[i]
+ # Add the default palette depending on the version
+ dpal = default_palette[book.biff_version]
+ ndpal = len(dpal)
+ for i in xrange(ndpal):
+ book.colour_map[i+8] = dpal[i]
+ # Add the specials -- None means the RGB value is not known
+ # System window text colour for border lines
+ book.colour_map[ndpal+8] = None
+ # System window background colour for pattern background
+ book.colour_map[ndpal+8+1] = None #
+ for ci in (
+ 0x51, # System ToolTip text colour (used in note objects)
+ 0x7FFF, # 32767, system window text colour for fonts
+ ):
+ book.colour_map[ci] = None
+
+def nearest_colour_index(colour_map, rgb, debug=0):
+ # General purpose function. Uses Euclidean distance.
+ # So far used only for pre-BIFF8 WINDOW2 record.
+ # Doesn't have to be fast.
+ # Doesn't have to be fancy.
+ best_metric = 3 * 256 * 256
+ best_colourx = 0
+ for colourx, cand_rgb in colour_map.items():
+ if cand_rgb is None:
+ continue
+ metric = 0
+ for v1, v2 in zip(rgb, cand_rgb):
+ metric += (v1 - v2) * (v1 - v2)
+ if metric < best_metric:
+ best_metric = metric
+ best_colourx = colourx
+ if metric == 0:
+ break
+ if debug:
+ print "nearest_colour_index for %r is %r -> %r; best_metric is %d" \
+ % (rgb, best_colourx, colour_map[best_colourx], best_metric)
+ return best_colourx
+
+##
+# This mixin class exists solely so that Format, Font, and XF.... objects
+# can be compared by value of their attributes.
+class EqNeAttrs(object):
+
+ def __eq__(self, other):
+ return self.__dict__ == other.__dict__
+
+ def __ne__(self, other):
+ return self.__dict__ != other.__dict__
+
+##
+# An Excel "font" contains the details of not only what is normally
+# considered a font, but also several other display attributes.
+# Items correspond to those in the Excel UI's Format/Cells/Font tab.
+# <br /> -- New in version 0.6.1
+class Font(BaseObject, EqNeAttrs):
+ ##
+ # 1 = Characters are bold. Redundant; see "weight" attribute.
+ bold = 0
+ ##
+ # Values: 0 = ANSI Latin, 1 = System default, 2 = Symbol,
+ # 77 = Apple Roman,
+ # 128 = ANSI Japanese Shift-JIS,
+ # 129 = ANSI Korean (Hangul),
+ # 130 = ANSI Korean (Johab),
+ # 134 = ANSI Chinese Simplified GBK,
+ # 136 = ANSI Chinese Traditional BIG5,
+ # 161 = ANSI Greek,
+ # 162 = ANSI Turkish,
+ # 163 = ANSI Vietnamese,
+ # 177 = ANSI Hebrew,
+ # 178 = ANSI Arabic,
+ # 186 = ANSI Baltic,
+ # 204 = ANSI Cyrillic,
+ # 222 = ANSI Thai,
+ # 238 = ANSI Latin II (Central European),
+ # 255 = OEM Latin I
+ character_set = 0
+ ##
+ # An explanation of "colour index" is given in the Formatting
+ # section at the start of this document.
+ colour_index = 0
+ ##
+ # 1 = Superscript, 2 = Subscript.
+ escapement = 0
+ ##
+ # 0 = None (unknown or don't care)<br />
+ # 1 = Roman (variable width, serifed)<br />
+ # 2 = Swiss (variable width, sans-serifed)<br />
+ # 3 = Modern (fixed width, serifed or sans-serifed)<br />
+ # 4 = Script (cursive)<br />
+ # 5 = Decorative (specialised, for example Old English, Fraktur)
+ family = 0
+ ##
+ # The 0-based index used to refer to this Font() instance.
+ # Note that index 4 is never used; xlrd supplies a dummy place-holder.
+ font_index = 0
+ ##
+ # Height of the font (in twips). A twip = 1/20 of a point.
+ height = 0
+ ##
+ # 1 = Characters are italic.
+ italic = 0
+ ##
+ # The name of the font. Example: u"Arial"
+ name = u""
+ ##
+ # 1 = Characters are struck out.
+ struck_out = 0
+ ##
+ # 0 = None<br />
+ # 1 = Single; 0x21 (33) = Single accounting<br />
+ # 2 = Double; 0x22 (34) = Double accounting
+ underline_type = 0
+ ##
+ # 1 = Characters are underlined. Redundant; see "underline_type" attribute.
+ underlined = 0
+ ##
+ # Font weight (100-1000). Standard values are 400 for normal text
+ # and 700 for bold text.
+ weight = 400
+ ##
+ # 1 = Font is outline style (Macintosh only)
+ outline = 0
+ ##
+ # 1 = Font is shadow style (Macintosh only)
+ shadow = 0
+
+ # No methods ...
+
+def handle_efont(book, data): # BIFF2 only
+ if not book.formatting_info:
+ return
+ book.font_list[-1].colour_index = unpack('<H', data)[0]
+
+def handle_font(book, data):
+ if not book.formatting_info:
+ return
+ if not book.encoding:
+ book.derive_encoding()
+ blah = DEBUG or book.verbosity >= 2
+ bv = book.biff_version
+ k = len(book.font_list)
+ if k == 4:
+ f = Font()
+ f.name = u'Dummy Font'
+ f.font_index = k
+ book.font_list.append(f)
+ k += 1
+ f = Font()
+ f.font_index = k
+ book.font_list.append(f)
+ if bv >= 50:
+ (
+ f.height, option_flags, f.colour_index, f.weight,
+ f.escapement_type, f.underline_type, f.family,
+ f.character_set,
+ ) = unpack('<HHHHHBBB', data[0:13])
+ f.bold = option_flags & 1
+ f.italic = (option_flags & 2) >> 1
+ f.underlined = (option_flags & 4) >> 2
+ f.struck_out = (option_flags & 8) >> 3
+ f.outline = (option_flags & 16) >> 4
+ f.shadow = (option_flags & 32) >> 5
+ if bv >= 80:
+ f.name = unpack_unicode(data, 14, lenlen=1)
+ else:
+ f.name = unpack_string(data, 14, book.encoding, lenlen=1)
+ elif bv >= 30:
+ f.height, option_flags, f.colour_index = unpack('<HHH', data[0:6])
+ f.bold = option_flags & 1
+ f.italic = (option_flags & 2) >> 1
+ f.underlined = (option_flags & 4) >> 2
+ f.struck_out = (option_flags & 8) >> 3
+ f.outline = (option_flags & 16) >> 4
+ f.shadow = (option_flags & 32) >> 5
+ f.name = unpack_string(data, 6, book.encoding, lenlen=1)
+ # Now cook up the remaining attributes ...
+ f.weight = [400, 700][f.bold]
+ f.escapement_type = 0 # None
+ f.underline_type = f.underlined # None or Single
+ f.family = 0 # Unknown / don't care
+ f.character_set = 1 # System default (0 means "ANSI Latin")
+ else: # BIFF2
+ f.height, option_flags = unpack('<HH', data[0:4])
+ f.colour_index = 0x7FFF # "system window text colour"
+ f.bold = option_flags & 1
+ f.italic = (option_flags & 2) >> 1
+ f.underlined = (option_flags & 4) >> 2
+ f.struck_out = (option_flags & 8) >> 3
+ f.outline = 0
+ f.shadow = 0
+ f.name = unpack_string(data, 4, book.encoding, lenlen=1)
+ # Now cook up the remaining attributes ...
+ f.weight = [400, 700][f.bold]
+ f.escapement_type = 0 # None
+ f.underline_type = f.underlined # None or Single
+ f.family = 0 # Unknown / don't care
+ f.character_set = 1 # System default (0 means "ANSI Latin")
+ if blah:
+ f.dump(
+ book.logfile,
+ header="--- handle_font: font[%d] ---" % f.font_index,
+ footer="-------------------",
+ )
+
+# === "Number formats" ===
+
+##
+# "Number format" information from a FORMAT record.
+# <br /> -- New in version 0.6.1
+class Format(BaseObject, EqNeAttrs):
+ ##
+ # The key into Book.format_map
+ format_key = 0
+ ##
+ # A classification that has been inferred from the format string.
+ # Currently, this is used only to distinguish between numbers and dates.
+ # <br />Values:
+ # <br />FUN = 0 # unknown
+ # <br />FDT = 1 # date
+ # <br />FNU = 2 # number
+ # <br />FGE = 3 # general
+ # <br />FTX = 4 # text
+ type = FUN
+ ##
+ # The format string
+ format_str = u''
+
+ def __init__(self, format_key, ty, format_str):
+ self.format_key = format_key
+ self.type = ty
+ self.format_str = format_str
+
+std_format_strings = {
+ # "std" == "standard for US English locale"
+ # #### TODO ... a lot of work to tailor these to the user's locale.
+ # See e.g. gnumeric-1.x.y/src/formats.c
+ 0x00: "General",
+ 0x01: "0",
+ 0x02: "0.00",
+ 0x03: "#,##0",
+ 0x04: "#,##0.00",
+ 0x05: "$#,##0_);($#,##0)",
+ 0x06: "$#,##0_);[Red]($#,##0)",
+ 0x07: "$#,##0.00_);($#,##0.00)",
+ 0x08: "$#,##0.00_);[Red]($#,##0.00)",
+ 0x09: "0%",
+ 0x0a: "0.00%",
+ 0x0b: "0.00E+00",
+ 0x0c: "# ?/?",
+ 0x0d: "# ??/??",
+ 0x0e: "m/d/yy",
+ 0x0f: "d-mmm-yy",
+ 0x10: "d-mmm",
+ 0x11: "mmm-yy",
+ 0x12: "h:mm AM/PM",
+ 0x13: "h:mm:ss AM/PM",
+ 0x14: "h:mm",
+ 0x15: "h:mm:ss",
+ 0x16: "m/d/yy h:mm",
+ 0x25: "#,##0_);(#,##0)",
+ 0x26: "#,##0_);[Red](#,##0)",
+ 0x27: "#,##0.00_);(#,##0.00)",
+ 0x28: "#,##0.00_);[Red](#,##0.00)",
+ 0x29: "_(* #,##0_);_(* (#,##0);_(* \"-\"_);_(@_)",
+ 0x2a: "_($* #,##0_);_($* (#,##0);_($* \"-\"_);_(@_)",
+ 0x2b: "_(* #,##0.00_);_(* (#,##0.00);_(* \"-\"??_);_(@_)",
+ 0x2c: "_($* #,##0.00_);_($* (#,##0.00);_($* \"-\"??_);_(@_)",
+ 0x2d: "mm:ss",
+ 0x2e: "[h]:mm:ss",
+ 0x2f: "mm:ss.0",
+ 0x30: "##0.0E+0",
+ 0x31: "@",
+ }
+
+fmt_code_ranges = [ # both-inclusive ranges of "standard" format codes
+ # Source: the openoffice.org doc't
+ ( 0, 0, FGE),
+ ( 1, 13, FNU),
+ (14, 22, FDT),
+ #### (27, 36, FDT), # Japanese dates -- not sure of reliability of this
+ (37, 44, FNU),
+ (45, 47, FDT),
+ (48, 48, FNU),
+ (49, 49, FTX),
+ ####(50, 58, FDT), # Japanese dates -- but Gnumeric assumes
+ # built-in formats finish at 49, not at 163
+ ]
+
+std_format_code_types = {}
+for lo, hi, ty in fmt_code_ranges:
+ for x in xrange(lo, hi+1):
+ std_format_code_types[x] = ty
+del lo, hi, ty, x
+
+date_chars = u'ymdhs' # year, month/minute, day, hour, second
+date_char_dict = {}
+for _c in date_chars + date_chars.upper():
+ date_char_dict[_c] = 5
+del _c, date_chars
+
+skip_char_dict = {}
+for _c in u'$-+/(): ':
+ skip_char_dict[_c] = 1
+
+num_char_dict = {
+ u'0': 5,
+ u'#': 5,
+ u'?': 5,
+ }
+
+non_date_formats = {
+ u'0.00E+00':1,
+ u'##0.0E+0':1,
+ u'General' :1,
+ u'GENERAL' :1, # OOo Calc 1.1.4 does this.
+ u'general' :1, # pyExcelerator 0.6.3 does this.
+ u'@' :1,
+ }
+
+fmt_bracketed_sub = re.compile(r'\[[^]]*\]').sub
+
+# Boolean format strings (actual cases)
+# u'"Yes";"Yes";"No"'
+# u'"True";"True";"False"'
+# u'"On";"On";"Off"'
+
+def is_date_format_string(book, fmt):
+ # Heuristics:
+ # Ignore "text" and [stuff in square brackets (aarrgghh -- see below)].
+ # Handle backslashed-escaped chars properly.
+ # E.g. hh\hmm\mss\s should produce a display like 23h59m59s
+ # Date formats have one or more of ymdhs (caseless) in them.
+ # Numeric formats have # and 0.
+ # N.B. u'General"."' hence get rid of "text" first.
+ # TODO: Find where formats are interpreted in Gnumeric
+ # TODO: u'[h]\\ \\h\\o\\u\\r\\s' ([h] means don't care about hours > 23)
+ state = 0
+ s = ''
+ ignorable = skip_char_dict.has_key
+ for c in fmt:
+ if state == 0:
+ if c == u'"':
+ state = 1
+ elif c in ur"\_*":
+ state = 2
+ elif ignorable(c):
+ pass
+ else:
+ s += c
+ elif state == 1:
+ if c == u'"':
+ state = 0
+ elif state == 2:
+ # Ignore char after backslash, underscore or asterisk
+ state = 0
+ assert 0 <= state <= 2
+ if book.verbosity >= 4:
+ print "is_date_format_string: reduced format is %r" % s
+ s = fmt_bracketed_sub('', s)
+ if non_date_formats.has_key(s):
+ return False
+ state = 0
+ separator = ";"
+ got_sep = 0
+ date_count = num_count = 0
+ for c in s:
+ if date_char_dict.has_key(c):
+ date_count += date_char_dict[c]
+ elif num_char_dict.has_key(c):
+ num_count += num_char_dict[c]
+ elif c == separator:
+ got_sep = 1
+ # print num_count, date_count, repr(fmt)
+ if date_count and not num_count:
+ return True
+ if num_count and not date_count:
+ return False
+ if date_count:
+ fprintf(book.logfile,
+ 'WARNING *** is_date_format: ambiguous d=%d n=%d fmt=%r\n',
+ date_count, num_count, fmt)
+ elif not got_sep:
+ fprintf(book.logfile,
+ "WARNING *** format %r produces constant result\n",
+ fmt)
+ return date_count > num_count
+
+def handle_format(self, data, rectype=XL_FORMAT):
+ DEBUG = 0
+ bv = self.biff_version
+ if rectype == XL_FORMAT2:
+ bv = min(bv, 30)
+ if not self.encoding:
+ self.derive_encoding()
+ strpos = 2
+ if bv >= 50:
+ fmtkey = unpack('<H', data[0:2])[0]
+ else:
+ fmtkey = self.actualfmtcount
+ if bv <= 30:
+ strpos = 0
+ self.actualfmtcount += 1
+ if bv >= 80:
+ unistrg = unpack_unicode(data, 2)
+ else:
+ unistrg = unpack_string(data, strpos, self.encoding, lenlen=1)
+ blah = DEBUG or self.verbosity >= 3
+ if blah:
+ fprintf(self.logfile,
+ "FORMAT: count=%d fmtkey=0x%04x (%d) s=%r\n",
+ self.actualfmtcount, fmtkey, fmtkey, unistrg)
+ is_date_s = self.is_date_format_string(unistrg)
+ ty = [FGE, FDT][is_date_s]
+ if not(fmtkey > 163 or bv < 50):
+ # user_defined if fmtkey > 163
+ # N.B. Gnumeric incorrectly starts these at 50 instead of 164 :-(
+ # if earlier than BIFF 5, standard info is useless
+ std_ty = std_format_code_types.get(fmtkey, FUN)
+ # print "std ty", std_ty
+ is_date_c = std_ty == FDT
+ if 0 < fmtkey < 50 and (is_date_c ^ is_date_s):
+ DEBUG = 2
+ fprintf(self.logfile,
+ "WARNING *** Conflict between "
+ "std format key %d and its format string %r\n",
+ fmtkey, unistrg)
+ if DEBUG == 2:
+ fprintf(self.logfile,
+ "ty: %d; is_date_c: %r; is_date_s: %r; fmt_strg: %r",
+ ty, is_date_c, is_date_s, unistrg)
+ fmtobj = Format(fmtkey, ty, unistrg)
+ if blah:
+ fmtobj.dump(self.logfile,
+ header="--- handle_format [%d] ---" % (self.actualfmtcount-1, ))
+ self.format_map[fmtkey] = fmtobj
+ self.format_list.append(fmtobj)
+
+# =============================================================================
+
+def handle_palette(book, data):
+ if not book.formatting_info:
+ return
+ blah = DEBUG or book.verbosity >= 2
+ n_colours, = unpack('<H', data[:2])
+ expected_n_colours = (16, 56)[book.biff_version >= 50]
+ if ((DEBUG or book.verbosity >= 1)
+ and n_colours != expected_n_colours):
+ fprintf(book.logfile,
+ "NOTE *** Expected %d colours in PALETTE record, found %d\n",
+ expected_n_colours, n_colours)
+ elif blah:
+ fprintf(book.logfile,
+ "PALETTE record with %d colours\n", n_colours)
+ fmt = '<xx%di' % n_colours # use i to avoid long integers
+ expected_size = 4 * n_colours + 2
+ actual_size = len(data)
+ tolerance = 4
+ if not expected_size <= actual_size <= expected_size + tolerance:
+ raise XLRDError('PALETTE record: expected size %d, actual size %d' % (expected_size, actual_size))
+ colours = unpack(fmt, data[:expected_size])
+ assert book.palette_record == [] # There should be only 1 PALETTE record
+ # a colour will be 0xbbggrr
+ # IOW, red is at the little end
+ for i in xrange(n_colours):
+ c = colours[i]
+ red = c & 0xff
+ green = (c >> 8) & 0xff
+ blue = (c >> 16) & 0xff
+ old_rgb = book.colour_map[8+i]
+ new_rgb = (red, green, blue)
+ book.palette_record.append(new_rgb)
+ book.colour_map[8+i] = new_rgb
+ if blah:
+ if new_rgb != old_rgb:
+ print >> book.logfile, "%2d: %r -> %r" % (i, old_rgb, new_rgb)
+
+def palette_epilogue(book):
+ # Check colour indexes in fonts etc.
+ # This must be done here as FONT records
+ # come *before* the PALETTE record :-(
+ for font in book.font_list:
+ if font.font_index == 4: # the missing font record
+ continue
+ cx = font.colour_index
+ if cx == 0x7fff: # system window text colour
+ continue
+ if book.colour_map.has_key(cx):
+ book.colour_indexes_used[cx] = 1
+ else:
+ print "Size of colour table:", len(book.colour_map)
+ print >> book.logfile, \
+ "*** Font #%d (%r): colour index 0x%04x is unknown" \
+ % (font.font_index, font.name, cx)
+ if book.verbosity >= 1:
+ used = book.colour_indexes_used.keys()
+ used.sort()
+ print >> book.logfile, "\nColour indexes used:\n%r\n" % used
+
+def handle_style(book, data):
+ blah = DEBUG or book.verbosity >= 2
+ bv = book.biff_version
+ flag_and_xfx, built_in_id, level = unpack('<HBB', data[:4])
+ xf_index = flag_and_xfx & 0x0fff
+ if (data == "\0\0\0\0"
+ and "Normal" not in book.style_name_map):
+ # Erroneous record (doesn't have built-in bit set).
+ # Example file supplied by Jeff Bell.
+ built_in = 1
+ built_in_id = 0
+ xf_index = 0
+ name = "Normal"
+ level = 255
+ elif flag_and_xfx & 0x8000:
+ # built-in style
+ built_in = 1
+ name = built_in_style_names[built_in_id]
+ if 1 <= built_in_id <= 2:
+ name += str(level + 1)
+ else:
+ # user-defined style
+ if bv >= 80:
+ name = unpack_unicode(data, 2, lenlen=2)
+ else:
+ name = unpack_string(data, 2, book.encoding, lenlen=1)
+ if blah and not name:
+ print >> book.logfile, \
+ "WARNING *** A user-defined style has a zero-length name"
+ built_in = 0
+ built_in_id = 0
+ level = 0
+ book.style_name_map[name] = (built_in, xf_index)
+ if blah:
+ print >> book.logfile, \
+ "STYLE: built_in=%d xf_index=%d built_in_id=%d level=%d name=%r" \
+ % (built_in, xf_index, built_in_id, level, name)
+
+def check_colour_indexes_in_obj(book, obj, orig_index):
+ alist = obj.__dict__.items()
+ alist.sort()
+ for attr, nobj in alist:
+ if hasattr(nobj, 'dump'):
+ check_colour_indexes_in_obj(book, nobj, orig_index)
+ elif attr.find('colour_index') >= 0:
+ if book.colour_map.has_key(nobj):
+ book.colour_indexes_used[nobj] = 1
+ continue
+ oname = obj.__class__.__name__
+ print >> book.logfile, \
+ "*** xf #%d : %s.%s = 0x%04x (unknown)" \
+ % (orig_index, oname, attr, nobj)
+
+def handle_xf(self, data):
+ ### self is a Book instance
+ # DEBUG = 0
+ blah = DEBUG or self.verbosity >= 3
+ bv = self.biff_version
+ xf = XF()
+ xf.alignment = XFAlignment()
+ xf.alignment.indent_level = 0
+ xf.alignment.shrink_to_fit = 0
+ xf.alignment.text_direction = 0
+ xf.border = XFBorder()
+ xf.border.diag_up = 0
+ xf.border.diag_down = 0
+ xf.border.diag_colour_index = 0
+ xf.border.diag_line_style = 0 # no line
+ xf.background = XFBackground()
+ xf.protection = XFProtection()
+ # fill in the known standard formats
+ if bv >= 50 and not self.xfcount:
+ # i.e. do this once before we process the first XF record
+ for x in std_format_code_types.keys():
+ if not self.format_map.has_key(x):
+ ty = std_format_code_types[x]
+ fmt_str = std_format_strings[x]
+ fmtobj = Format(x, ty, fmt_str)
+ self.format_map[x] = fmtobj
+ if bv >= 80:
+ unpack_fmt = '<HHHBBBBIiH'
+ (xf.font_index, xf.format_key, pkd_type_par,
+ pkd_align1, xf.alignment.rotation, pkd_align2,
+ pkd_used, pkd_brdbkg1, pkd_brdbkg2, pkd_brdbkg3,
+ ) = unpack(unpack_fmt, data[0:20])
+ upkbits(xf.protection, pkd_type_par, (
+ (0, 0x01, 'cell_locked'),
+ (1, 0x02, 'formula_hidden'),
+ ))
+ upkbits(xf, pkd_type_par, (
+ (2, 0x0004, 'is_style'),
+ # Following is not in OOo docs, but is mentioned
+ # in Gnumeric source and also in (deep breath)
+ # org.apache.poi.hssf.record.ExtendedFormatRecord.java
+ (3, 0x0008, 'lotus_123_prefix'), # Meaning is not known.
+ (4, 0xFFF0, 'parent_style_index'),
+ ))
+ upkbits(xf.alignment, pkd_align1, (
+ (0, 0x07, 'hor_align'),
+ (3, 0x08, 'text_wrapped'),
+ (4, 0x70, 'vert_align'),
+ ))
+ upkbits(xf.alignment, pkd_align2, (
+ (0, 0x0f, 'indent_level'),
+ (4, 0x10, 'shrink_to_fit'),
+ (6, 0xC0, 'text_direction'),
+ ))
+ reg = pkd_used >> 2
+ for attr_stem in \
+ "format font alignment border background protection".split():
+ attr = "_" + attr_stem + "_flag"
+ setattr(xf, attr, reg & 1)
+ reg >>= 1
+ upkbitsL(xf.border, pkd_brdbkg1, (
+ (0, 0x0000000f, 'left_line_style'),
+ (4, 0x000000f0, 'right_line_style'),
+ (8, 0x00000f00, 'top_line_style'),
+ (12, 0x0000f000, 'bottom_line_style'),
+ (16, 0x007f0000, 'left_colour_index'),
+ (23, 0x3f800000, 'right_colour_index'),
+ (30, 0x40000000, 'diag_down'),
+ (31, 0x80000000L, 'diag_up'),
+ ))
+ upkbits(xf.border, pkd_brdbkg2, (
+ (0, 0x0000007F, 'top_colour_index'),
+ (7, 0x00003F80, 'bottom_colour_index'),
+ (14, 0x001FC000, 'diag_colour_index'),
+ (21, 0x01E00000, 'diag_line_style'),
+ ))
+ upkbitsL(xf.background, pkd_brdbkg2, (
+ (26, 0xFC000000L, 'fill_pattern'),
+ ))
+ upkbits(xf.background, pkd_brdbkg3, (
+ (0, 0x007F, 'pattern_colour_index'),
+ (7, 0x3F80, 'background_colour_index'),
+ ))
+ elif bv >= 50:
+ unpack_fmt = '<HHHBBIi'
+ (xf.font_index, xf.format_key, pkd_type_par,
+ pkd_align1, pkd_orient_used,
+ pkd_brdbkg1, pkd_brdbkg2,
+ ) = unpack(unpack_fmt, data[0:16])
+ upkbits(xf.protection, pkd_type_par, (
+ (0, 0x01, 'cell_locked'),
+ (1, 0x02, 'formula_hidden'),
+ ))
+ upkbits(xf, pkd_type_par, (
+ (2, 0x0004, 'is_style'),
+ (3, 0x0008, 'lotus_123_prefix'), # Meaning is not known.
+ (4, 0xFFF0, 'parent_style_index'),
+ ))
+ upkbits(xf.alignment, pkd_align1, (
+ (0, 0x07, 'hor_align'),
+ (3, 0x08, 'text_wrapped'),
+ (4, 0x70, 'vert_align'),
+ ))
+ orientation = pkd_orient_used & 0x03
+ xf.alignment.rotation = [0, 255, 90, 180][orientation]
+ reg = pkd_orient_used >> 2
+ for attr_stem in \
+ "format font alignment border background protection".split():
+ attr = "_" + attr_stem + "_flag"
+ setattr(xf, attr, reg & 1)
+ reg >>= 1
+ upkbitsL(xf.background, pkd_brdbkg1, (
+ ( 0, 0x0000007F, 'pattern_colour_index'),
+ ( 7, 0x00003F80, 'background_colour_index'),
+ (16, 0x003F0000, 'fill_pattern'),
+ ))
+ upkbitsL(xf.border, pkd_brdbkg1, (
+ (22, 0x01C00000, 'bottom_line_style'),
+ (25, 0xFE000000L, 'bottom_colour_index'),
+ ))
+ upkbits(xf.border, pkd_brdbkg2, (
+ ( 0, 0x00000007, 'top_line_style'),
+ ( 3, 0x00000038, 'left_line_style'),
+ ( 6, 0x000001C0, 'right_line_style'),
+ ( 9, 0x0000FE00, 'top_colour_index'),
+ (16, 0x007F0000, 'left_colour_index'),
+ (23, 0x3F800000, 'right_colour_index'),
+ ))
+ elif bv >= 40:
+ unpack_fmt = '<BBHBBHI'
+ (xf.font_index, xf.format_key, pkd_type_par,
+ pkd_align_orient, pkd_used,
+ pkd_bkg_34, pkd_brd_34,
+ ) = unpack(unpack_fmt, data[0:12])
+ upkbits(xf.protection, pkd_type_par, (
+ (0, 0x01, 'cell_locked'),
+ (1, 0x02, 'formula_hidden'),
+ ))
+ upkbits(xf, pkd_type_par, (
+ (2, 0x0004, 'is_style'),
+ (3, 0x0008, 'lotus_123_prefix'), # Meaning is not known.
+ (4, 0xFFF0, 'parent_style_index'),
+ ))
+ upkbits(xf.alignment, pkd_align_orient, (
+ (0, 0x07, 'hor_align'),
+ (3, 0x08, 'text_wrapped'),
+ (4, 0x30, 'vert_align'),
+ ))
+ orientation = (pkd_align_orient & 0xC0) >> 6
+ xf.alignment.rotation = [0, 255, 90, 180][orientation]
+ reg = pkd_used >> 2
+ for attr_stem in \
+ "format font alignment border background protection".split():
+ attr = "_" + attr_stem + "_flag"
+ setattr(xf, attr, reg & 1)
+ reg >>= 1
+ upkbits(xf.background, pkd_bkg_34, (
+ ( 0, 0x003F, 'fill_pattern'),
+ ( 6, 0x07C0, 'pattern_colour_index'),
+ (11, 0xF800, 'background_colour_index'),
+ ))
+ upkbitsL(xf.border, pkd_brd_34, (
+ ( 0, 0x00000007, 'top_line_style'),
+ ( 3, 0x000000F8, 'top_colour_index'),
+ ( 8, 0x00000700, 'left_line_style'),
+ (11, 0x0000F800, 'left_colour_index'),
+ (16, 0x00070000, 'bottom_line_style'),
+ (19, 0x00F80000, 'bottom_colour_index'),
+ (24, 0x07000000, 'right_line_style'),
+ (27, 0xF8000000L, 'right_colour_index'),
+ ))
+ elif bv == 30:
+ unpack_fmt = '<BBBBHHI'
+ (xf.font_index, xf.format_key, pkd_type_prot,
+ pkd_used, pkd_align_par,
+ pkd_bkg_34, pkd_brd_34,
+ ) = unpack(unpack_fmt, data[0:12])
+ upkbits(xf.protection, pkd_type_prot, (
+ (0, 0x01, 'cell_locked'),
+ (1, 0x02, 'formula_hidden'),
+ ))
+ upkbits(xf, pkd_type_prot, (
+ (2, 0x0004, 'is_style'),
+ (3, 0x0008, 'lotus_123_prefix'), # Meaning is not known.
+ ))
+ upkbits(xf.alignment, pkd_align_par, (
+ (0, 0x07, 'hor_align'),
+ (3, 0x08, 'text_wrapped'),
+ ))
+ upkbits(xf, pkd_align_par, (
+ (4, 0xFFF0, 'parent_style_index'),
+ ))
+ reg = pkd_used >> 2
+ for attr_stem in \
+ "format font alignment border background protection".split():
+ attr = "_" + attr_stem + "_flag"
+ setattr(xf, attr, reg & 1)
+ reg >>= 1
+ upkbits(xf.background, pkd_bkg_34, (
+ ( 0, 0x003F, 'fill_pattern'),
+ ( 6, 0x07C0, 'pattern_colour_index'),
+ (11, 0xF800, 'background_colour_index'),
+ ))
+ upkbitsL(xf.border, pkd_brd_34, (
+ ( 0, 0x00000007, 'top_line_style'),
+ ( 3, 0x000000F8, 'top_colour_index'),
+ ( 8, 0x00000700, 'left_line_style'),
+ (11, 0x0000F800, 'left_colour_index'),
+ (16, 0x00070000, 'bottom_line_style'),
+ (19, 0x00F80000, 'bottom_colour_index'),
+ (24, 0x07000000, 'right_line_style'),
+ (27, 0xF8000000L, 'right_colour_index'),
+ ))
+ xf.alignment.vert_align = 2 # bottom
+ xf.alignment.rotation = 0
+ elif bv == 21:
+ #### Warning: incomplete treatment; formatting_info not fully supported.
+ #### Probably need to offset incoming BIFF2 XF[n] to BIFF8-like XF[n+16],
+ #### and create XF[0:16] like the standard ones in BIFF8
+ #### *AND* add 16 to all XF references in cell records :-(
+ (xf.font_index, format_etc, halign_etc) = unpack('<BxBB', data)
+ xf.format_key = format_etc & 0x3F
+ upkbits(xf.protection, format_etc, (
+ (6, 0x40, 'cell_locked'),
+ (7, 0x80, 'formula_hidden'),
+ ))
+ upkbits(xf.alignment, halign_etc, (
+ (0, 0x07, 'hor_align'),
+ ))
+ for mask, side in ((0x08, 'left'), (0x10, 'right'), (0x20, 'top'), (0x40, 'bottom')):
+ if halign_etc & mask:
+ colour_index, line_style = 8, 1 # black, thin
+ else:
+ colour_index, line_style = 0, 0 # none, none
+ setattr(xf.border, side + '_colour_index', colour_index)
+ setattr(xf.border, side + '_line_style', line_style)
+ bg = xf.background
+ if halign_etc & 0x80:
+ bg.fill_pattern = 17
+ else:
+ bg.fill_pattern = 0
+ bg.background_colour_index = 9 # white
+ bg.pattern_colour_index = 8 # black
+ xf.parent_style_index = 0 # ???????????
+ xf.alignment.vert_align = 2 # bottom
+ xf.alignment.rotation = 0
+ for attr_stem in \
+ "format font alignment border background protection".split():
+ attr = "_" + attr_stem + "_flag"
+ setattr(xf, attr, 1)
+ else:
+ raise XLRDError('programmer stuff-up: bv=%d' % bv)
+
+ xf.xf_index = len(self.xf_list)
+ self.xf_list.append(xf)
+ self.xfcount += 1
+ if blah:
+ xf.dump(
+ self.logfile,
+ header="--- handle_xf: xf[%d] ---" % xf.xf_index,
+ footer=" ",
+ )
+ # Now for some assertions ...
+ if self.formatting_info:
+ if xf.is_style and xf.parent_style_index != 0x0FFF:
+ msg = "WARNING *** XF[%d] is a style XF but parent_style_index is 0x%04x, not 0x0fff\n"
+ fprintf(self.logfile, msg, xf.xf_index, xf.parent_style_index)
+ check_colour_indexes_in_obj(self, xf, xf.xf_index)
+ if not self.format_map.has_key(xf.format_key):
+ msg = "WARNING *** XF[%d] unknown (raw) format key (%d, 0x%04x)\n"
+ fprintf(self.logfile, msg,
+ xf.xf_index, xf.format_key, xf.format_key)
+ xf.format_key = 0
+
+def xf_epilogue(self):
+ # self is a Book instance.
+ self._xf_epilogue_done = 1
+ num_xfs = len(self.xf_list)
+ blah = DEBUG or self.verbosity >= 3
+ blah1 = DEBUG or self.verbosity >= 1
+ if blah:
+ fprintf(self.logfile, "xf_epilogue called ...\n")
+
+ def check_same(book_arg, xf_arg, parent_arg, attr):
+ # the _arg caper is to avoid a Warning msg from Python 2.1 :-(
+ if getattr(xf_arg, attr) != getattr(parent_arg, attr):
+ fprintf(book_arg.logfile,
+ "NOTE !!! XF[%d] parent[%d] %s different\n",
+ xf_arg.xf_index, parent_arg.xf_index, attr)
+
+ for xfx in xrange(num_xfs):
+ xf = self.xf_list[xfx]
+ if not self.format_map.has_key(xf.format_key):
+ msg = "ERROR *** XF[%d] unknown format key (%d, 0x%04x)\n"
+ fprintf(self.logfile, msg,
+ xf.xf_index, xf.format_key, xf.format_key)
+ xf.format_key = 0
+ cellty_from_fmtty = {
+ FNU: XL_CELL_NUMBER,
+ FUN: XL_CELL_NUMBER,
+ FGE: XL_CELL_NUMBER,
+ FDT: XL_CELL_DATE,
+ FTX: XL_CELL_NUMBER, # Yes, a number can be formatted as text.
+ }
+ fmt = self.format_map[xf.format_key]
+ cellty = cellty_from_fmtty[fmt.type]
+ self._xf_index_to_xl_type_map[xf.xf_index] = cellty
+ # Now for some assertions etc
+ if not self.formatting_info:
+ continue
+ if xf.is_style:
+ continue
+ if not(0 <= xf.parent_style_index < num_xfs):
+ fprintf(self.logfile,
+ "WARNING *** XF[%d]: is_style=%d but parent_style_index=%d\n",
+ xf.xf_index, xf.is_style, xf.parent_style_index)
+ # make it conform
+ xf.parent_style_index = 0
+ if self.biff_version >= 30:
+ assert xf.parent_style_index != xf.xf_index
+ assert self.xf_list[xf.parent_style_index].is_style
+ if blah1 and xf.parent_style_index > xf.xf_index:
+ fprintf(self.logfile,
+ "NOTE !!! XF[%d]: parent_style_index is %d; out of order?\n",
+ xf.xf_index, xf.parent_style_index)
+ parent = self.xf_list[xf.parent_style_index]
+ if not xf._alignment_flag and not parent._alignment_flag:
+ if blah1: check_same(self, xf, parent, 'alignment')
+ if not xf._background_flag and not parent._background_flag:
+ if blah1: check_same(self, xf, parent, 'background')
+ if not xf._border_flag and not parent._border_flag:
+ if blah1: check_same(self, xf, parent, 'border')
+ if not xf._protection_flag and not parent._protection_flag:
+ if blah1: check_same(self, xf, parent, 'protection')
+ if not xf._format_flag and not parent._format_flag:
+ if blah1 and xf.format_key != parent.format_key:
+ fprintf(self.logfile,
+ "NOTE !!! XF[%d] fmtk=%d, parent[%d] fmtk=%r\n%r / %r\n",
+ xf.xf_index, xf.format_key, parent.xf_index, parent.format_key,
+ self.format_map[xf.format_key].format_str,
+ self.format_map[parent.format_key].format_str)
+ if not xf._font_flag and not parent._font_flag:
+ if blah1 and xf.font_index != parent.font_index:
+ fprintf(self.logfile,
+ "NOTE !!! XF[%d] fontx=%d, parent[%d] fontx=%r\n",
+ xf.xf_index, xf.font_index, parent.xf_index, parent.font_index)
+
+def initialise_book(book):
+ initialise_colour_map(book)
+ book._xf_epilogue_done = 0
+ methods = (
+ handle_font,
+ handle_efont,
+ handle_format,
+ is_date_format_string,
+ handle_palette,
+ palette_epilogue,
+ handle_style,
+ handle_xf,
+ xf_epilogue,
+ )
+ for method in methods:
+ setattr(book.__class__, method.__name__, method)
+
+##
+# <p>A collection of the border-related attributes of an XF record.
+# Items correspond to those in the Excel UI's Format/Cells/Border tab.</p>
+# <p> An explanations of "colour index" is given in the Formatting
+# section at the start of this document.
+# There are five line style attributes; possible values and the
+# associated meanings are:
+# 0&nbsp;=&nbsp;No line,
+# 1&nbsp;=&nbsp;Thin,
+# 2&nbsp;=&nbsp;Medium,
+# 3&nbsp;=&nbsp;Dashed,
+# 4&nbsp;=&nbsp;Dotted,
+# 5&nbsp;=&nbsp;Thick,
+# 6&nbsp;=&nbsp;Double,
+# 7&nbsp;=&nbsp;Hair,
+# 8&nbsp;=&nbsp;Medium dashed,
+# 9&nbsp;=&nbsp;Thin dash-dotted,
+# 10&nbsp;=&nbsp;Medium dash-dotted,
+# 11&nbsp;=&nbsp;Thin dash-dot-dotted,
+# 12&nbsp;=&nbsp;Medium dash-dot-dotted,
+# 13&nbsp;=&nbsp;Slanted medium dash-dotted.
+# The line styles 8 to 13 appear in BIFF8 files (Excel 97 and later) only.
+# For pictures of the line styles, refer to OOo docs s3.10 (p22)
+# "Line Styles for Cell Borders (BIFF3-BIFF8)".</p>
+# <br /> -- New in version 0.6.1
+class XFBorder(BaseObject, EqNeAttrs):
+
+ ##
+ # The colour index for the cell's top line
+ top_colour_index = 0
+ ##
+ # The colour index for the cell's bottom line
+ bottom_colour_index = 0
+ ##
+ # The colour index for the cell's left line
+ left_colour_index = 0
+ ##
+ # The colour index for the cell's right line
+ right_colour_index = 0
+ ##
+ # The colour index for the cell's diagonal lines, if any
+ diag_colour_index = 0
+ ##
+ # The line style for the cell's top line
+ top_line_style = 0
+ ##
+ # The line style for the cell's bottom line
+ bottom_line_style = 0
+ ##
+ # The line style for the cell's left line
+ left_line_style = 0
+ ##
+ # The line style for the cell's right line
+ right_line_style = 0
+ ##
+ # The line style for the cell's diagonal lines, if any
+ diag_line_style = 0
+ ##
+ # 1 = draw a diagonal from top left to bottom right
+ diag_down = 0
+ ##
+ # 1 = draw a diagonal from bottom left to top right
+ diag_up = 0
+
+##
+# A collection of the background-related attributes of an XF record.
+# Items correspond to those in the Excel UI's Format/Cells/Patterns tab.
+# An explanation of "colour index" is given in the Formatting
+# section at the start of this document.
+# <br /> -- New in version 0.6.1
+class XFBackground(BaseObject, EqNeAttrs):
+
+ ##
+ # See section 3.11 of the OOo docs.
+ fill_pattern = 0
+ ##
+ # See section 3.11 of the OOo docs.
+ background_colour_index = 0
+ ##
+ # See section 3.11 of the OOo docs.
+ pattern_colour_index = 0
+
+##
+# A collection of the alignment and similar attributes of an XF record.
+# Items correspond to those in the Excel UI's Format/Cells/Alignment tab.
+# <br /> -- New in version 0.6.1
+
+class XFAlignment(BaseObject, EqNeAttrs):
+
+ ##
+ # Values: section 6.115 (p 214) of OOo docs
+ hor_align = 0
+ ##
+ # Values: section 6.115 (p 215) of OOo docs
+ vert_align = 0
+ ##
+ # Values: section 6.115 (p 215) of OOo docs.<br />
+ # Note: file versions BIFF7 and earlier use the documented
+ # "orientation" attribute; this will be mapped (without loss)
+ # into "rotation".
+ rotation = 0
+ ##
+ # 1 = text is wrapped at right margin
+ text_wrapped = 0
+ ##
+ # A number in range(15).
+ indent_level = 0
+ ##
+ # 1 = shrink font size to fit text into cell.
+ shrink_to_fit = 0
+ ##
+ # 0 = according to context; 1 = left-to-right; 2 = right-to-left
+ text_direction = 0
+
+##
+# A collection of the protection-related attributes of an XF record.
+# Items correspond to those in the Excel UI's Format/Cells/Protection tab.
+# Note the OOo docs include the "cell or style" bit
+# in this bundle of attributes.
+# This is incorrect; the bit is used in determining which bundles to use.
+# <br /> -- New in version 0.6.1
+
+class XFProtection(BaseObject, EqNeAttrs):
+
+ ##
+ # 1 = Cell is prevented from being changed, moved, resized, or deleted
+ # (only if the sheet is protected).
+ cell_locked = 0
+ ##
+ # 1 = Hide formula so that it doesn't appear in the formula bar when
+ # the cell is selected (only if the sheet is protected).
+ formula_hidden = 0
+
+##
+# eXtended Formatting information for cells, rows, columns and styles.
+# <br /> -- New in version 0.6.1
+#
+# <p>Each of the 6 flags below describes the validity of
+# a specific group of attributes.
+# <br />
+# In cell XFs, flag==0 means the attributes of the parent style XF are used,
+# (but only if the attributes are valid there); flag==1 means the attributes
+# of this XF are used.<br />
+# In style XFs, flag==0 means the attribute setting is valid; flag==1 means
+# the attribute should be ignored.<br />
+# Note that the API
+# provides both "raw" XFs and "computed" XFs -- in the latter case, cell XFs
+# have had the above inheritance mechanism applied.
+# </p>
+
+class XF(BaseObject):
+
+ ##
+ # 0 = cell XF, 1 = style XF
+ is_style = 0
+ ##
+ # cell XF: Index into Book.xf_list
+ # of this XF's style XF<br />
+ # style XF: 0xFFF
+ parent_style_index = 0
+ ##
+ #
+ _format_flag = 0
+ ##
+ #
+ _font_flag = 0
+ ##
+ #
+ _alignment_flag = 0
+ ##
+ #
+ _border_flag = 0
+ ##
+ #
+ _background_flag = 0
+ ##
+ # &nbsp;
+ _protection_flag = 0
+ ##
+ # Index into Book.xf_list
+ xf_index = 0
+ ##
+ # Index into Book.font_list
+ font_index = 0
+ ##
+ # Key into Book.format_map
+ # <p>
+ # Warning: OOo docs on the XF record call this "Index to FORMAT record".
+ # It is not an index in the Python sense. It is a key to a map.
+ # It is true <i>only</i> for Excel 4.0 and earlier files
+ # that the key into format_map from an XF instance
+ # is the same as the index into format_list, and <i>only</i>
+ # if the index is less than 164.
+ # </p>
+ format_key = 0
+ ##
+ # An instance of an XFProtection object.
+ protection = None
+ ##
+ # An instance of an XFBackground object.
+ background = None
+ ##
+ # An instance of an XFAlignment object.
+ alignment = None
+ ##
+ # An instance of an XFBorder object.
+ border = None
diff --git a/tablib/packages/xlrd/formula.py b/tablib/packages/xlrd/formula.py
new file mode 100644
index 0000000..4edbc29
--- /dev/null
+++ b/tablib/packages/xlrd/formula.py
@@ -0,0 +1,2092 @@
+# -*- coding: cp1252 -*-
+
+##
+# Module for parsing/evaluating Microsoft Excel formulas.
+#
+# <p>Copyright © 2005-2009 Stephen John Machin, Lingfo Pty Ltd</p>
+# <p>This module is part of the xlrd package, which is released under
+# a BSD-style licence.</p>
+##
+
+# No part of the content of this file was derived from the works of David Giffin.
+
+import copy
+from struct import unpack
+from timemachine import *
+from biffh import unpack_unicode_update_pos, unpack_string_update_pos, \
+ XLRDError, hex_char_dump, error_text_from_code, BaseObject
+
+__all__ = [
+ 'oBOOL', 'oERR', 'oNUM', 'oREF', 'oREL', 'oSTRG', 'oUNK',
+ 'decompile_formula',
+ 'dump_formula',
+ 'evaluate_name_formula',
+ 'okind_dict',
+ 'rangename3d', 'rangename3drel', 'cellname', 'cellnameabs', 'colname',
+ ]
+
+# sztabN[opcode] -> the number of bytes to consume.
+# -1 means variable
+# -2 means this opcode not implemented in this version.
+# Which N to use? Depends on biff_version; see szdict.
+sztab0 = [-2, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, 8, 4, 2, 2, 3, 9, 8, 2, 3, 8, 4, 7, 5, 5, 5, 2, 4, 7, 4, 7, 2, 2, -2, -2, -2, -2, -2, -2, -2, -2, 3, -2, -2, -2, -2, -2, -2, -2]
+sztab1 = [-2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, 11, 5, 2, 2, 3, 9, 9, 2, 3, 11, 4, 7, 7, 7, 7, 3, 4, 7, 4, 7, 3, 3, -2, -2, -2, -2, -2, -2, -2, -2, 3, -2, -2, -2, -2, -2, -2, -2]
+sztab2 = [-2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, 11, 5, 2, 2, 3, 9, 9, 3, 4, 11, 4, 7, 7, 7, 7, 3, 4, 7, 4, 7, 3, 3, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2]
+sztab3 = [-2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -2, -1, -2, -2, 2, 2, 3, 9, 9, 3, 4, 15, 4, 7, 7, 7, 7, 3, 4, 7, 4, 7, 3, 3, -2, -2, -2, -2, -2, -2, -2, -2, -2, 25, 18, 21, 18, 21, -2, -2]
+sztab4 = [-2, 5, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -2, -2, 2, 2, 3, 9, 9, 3, 4, 5, 5, 9, 7, 7, 7, 3, 5, 9, 5, 9, 3, 3, -2, -2, -2, -2, -2, -2, -2, -2, -2, 7, 7, 11, 7, 11, -2, -2]
+
+szdict = {
+ 20 : sztab0,
+ 30 : sztab1,
+ 40 : sztab2,
+ 45 : sztab2,
+ 50 : sztab3,
+ 70 : sztab3,
+ 80 : sztab4,
+ }
+
+# For debugging purposes ... the name for each opcode
+# (without the prefix "t" used on OOo docs)
+onames = ['Unk00', 'Exp', 'Tbl', 'Add', 'Sub', 'Mul', 'Div', 'Power', 'Concat', 'LT', 'LE', 'EQ', 'GE', 'GT', 'NE', 'Isect', 'List', 'Range', 'Uplus', 'Uminus', 'Percent', 'Paren', 'MissArg', 'Str', 'Extended', 'Attr', 'Sheet', 'EndSheet', 'Err', 'Bool', 'Int', 'Num', 'Array', 'Func', 'FuncVar', 'Name', 'Ref', 'Area', 'MemArea', 'MemErr', 'MemNoMem', 'MemFunc', 'RefErr', 'AreaErr', 'RefN', 'AreaN', 'MemAreaN', 'MemNoMemN', '', '', '', '', '', '', '', '', 'FuncCE', 'NameX', 'Ref3d', 'Area3d', 'RefErr3d', 'AreaErr3d', '', '']
+
+func_defs = {
+ # index: (name, min#args, max#args, flags, #known_args, return_type, kargs)
+ 0 : ('COUNT', 0, 30, 0x04, 1, 'V', 'R'),
+ 1 : ('IF', 2, 3, 0x04, 3, 'V', 'VRR'),
+ 2 : ('ISNA', 1, 1, 0x02, 1, 'V', 'V'),
+ 3 : ('ISERROR', 1, 1, 0x02, 1, 'V', 'V'),
+ 4 : ('SUM', 0, 30, 0x04, 1, 'V', 'R'),
+ 5 : ('AVERAGE', 1, 30, 0x04, 1, 'V', 'R'),
+ 6 : ('MIN', 1, 30, 0x04, 1, 'V', 'R'),
+ 7 : ('MAX', 1, 30, 0x04, 1, 'V', 'R'),
+ 8 : ('ROW', 0, 1, 0x04, 1, 'V', 'R'),
+ 9 : ('COLUMN', 0, 1, 0x04, 1, 'V', 'R'),
+ 10 : ('NA', 0, 0, 0x02, 0, 'V', ''),
+ 11 : ('NPV', 2, 30, 0x04, 2, 'V', 'VR'),
+ 12 : ('STDEV', 1, 30, 0x04, 1, 'V', 'R'),
+ 13 : ('DOLLAR', 1, 2, 0x04, 1, 'V', 'V'),
+ 14 : ('FIXED', 2, 3, 0x04, 3, 'V', 'VVV'),
+ 15 : ('SIN', 1, 1, 0x02, 1, 'V', 'V'),
+ 16 : ('COS', 1, 1, 0x02, 1, 'V', 'V'),
+ 17 : ('TAN', 1, 1, 0x02, 1, 'V', 'V'),
+ 18 : ('ATAN', 1, 1, 0x02, 1, 'V', 'V'),
+ 19 : ('PI', 0, 0, 0x02, 0, 'V', ''),
+ 20 : ('SQRT', 1, 1, 0x02, 1, 'V', 'V'),
+ 21 : ('EXP', 1, 1, 0x02, 1, 'V', 'V'),
+ 22 : ('LN', 1, 1, 0x02, 1, 'V', 'V'),
+ 23 : ('LOG10', 1, 1, 0x02, 1, 'V', 'V'),
+ 24 : ('ABS', 1, 1, 0x02, 1, 'V', 'V'),
+ 25 : ('INT', 1, 1, 0x02, 1, 'V', 'V'),
+ 26 : ('SIGN', 1, 1, 0x02, 1, 'V', 'V'),
+ 27 : ('ROUND', 2, 2, 0x02, 2, 'V', 'VV'),
+ 28 : ('LOOKUP', 2, 3, 0x04, 2, 'V', 'VR'),
+ 29 : ('INDEX', 2, 4, 0x0c, 4, 'R', 'RVVV'),
+ 30 : ('REPT', 2, 2, 0x02, 2, 'V', 'VV'),
+ 31 : ('MID', 3, 3, 0x02, 3, 'V', 'VVV'),
+ 32 : ('LEN', 1, 1, 0x02, 1, 'V', 'V'),
+ 33 : ('VALUE', 1, 1, 0x02, 1, 'V', 'V'),
+ 34 : ('TRUE', 0, 0, 0x02, 0, 'V', ''),
+ 35 : ('FALSE', 0, 0, 0x02, 0, 'V', ''),
+ 36 : ('AND', 1, 30, 0x04, 1, 'V', 'R'),
+ 37 : ('OR', 1, 30, 0x04, 1, 'V', 'R'),
+ 38 : ('NOT', 1, 1, 0x02, 1, 'V', 'V'),
+ 39 : ('MOD', 2, 2, 0x02, 2, 'V', 'VV'),
+ 40 : ('DCOUNT', 3, 3, 0x02, 3, 'V', 'RRR'),
+ 41 : ('DSUM', 3, 3, 0x02, 3, 'V', 'RRR'),
+ 42 : ('DAVERAGE', 3, 3, 0x02, 3, 'V', 'RRR'),
+ 43 : ('DMIN', 3, 3, 0x02, 3, 'V', 'RRR'),
+ 44 : ('DMAX', 3, 3, 0x02, 3, 'V', 'RRR'),
+ 45 : ('DSTDEV', 3, 3, 0x02, 3, 'V', 'RRR'),
+ 46 : ('VAR', 1, 30, 0x04, 1, 'V', 'R'),
+ 47 : ('DVAR', 3, 3, 0x02, 3, 'V', 'RRR'),
+ 48 : ('TEXT', 2, 2, 0x02, 2, 'V', 'VV'),
+ 49 : ('LINEST', 1, 4, 0x04, 4, 'A', 'RRVV'),
+ 50 : ('TREND', 1, 4, 0x04, 4, 'A', 'RRRV'),
+ 51 : ('LOGEST', 1, 4, 0x04, 4, 'A', 'RRVV'),
+ 52 : ('GROWTH', 1, 4, 0x04, 4, 'A', 'RRRV'),
+ 56 : ('PV', 3, 5, 0x04, 5, 'V', 'VVVVV'),
+ 57 : ('FV', 3, 5, 0x04, 5, 'V', 'VVVVV'),
+ 58 : ('NPER', 3, 5, 0x04, 5, 'V', 'VVVVV'),
+ 59 : ('PMT', 3, 5, 0x04, 5, 'V', 'VVVVV'),
+ 60 : ('RATE', 3, 6, 0x04, 6, 'V', 'VVVVVV'),
+ 61 : ('MIRR', 3, 3, 0x02, 3, 'V', 'RVV'),
+ 62 : ('IRR', 1, 2, 0x04, 2, 'V', 'RV'),
+ 63 : ('RAND', 0, 0, 0x0a, 0, 'V', ''),
+ 64 : ('MATCH', 2, 3, 0x04, 3, 'V', 'VRR'),
+ 65 : ('DATE', 3, 3, 0x02, 3, 'V', 'VVV'),
+ 66 : ('TIME', 3, 3, 0x02, 3, 'V', 'VVV'),
+ 67 : ('DAY', 1, 1, 0x02, 1, 'V', 'V'),
+ 68 : ('MONTH', 1, 1, 0x02, 1, 'V', 'V'),
+ 69 : ('YEAR', 1, 1, 0x02, 1, 'V', 'V'),
+ 70 : ('WEEKDAY', 1, 2, 0x04, 2, 'V', 'VV'),
+ 71 : ('HOUR', 1, 1, 0x02, 1, 'V', 'V'),
+ 72 : ('MINUTE', 1, 1, 0x02, 1, 'V', 'V'),
+ 73 : ('SECOND', 1, 1, 0x02, 1, 'V', 'V'),
+ 74 : ('NOW', 0, 0, 0x0a, 0, 'V', ''),
+ 75 : ('AREAS', 1, 1, 0x02, 1, 'V', 'R'),
+ 76 : ('ROWS', 1, 1, 0x02, 1, 'V', 'R'),
+ 77 : ('COLUMNS', 1, 1, 0x02, 1, 'V', 'R'),
+ 78 : ('OFFSET', 3, 5, 0x04, 5, 'R', 'RVVVV'),
+ 82 : ('SEARCH', 2, 3, 0x04, 3, 'V', 'VVV'),
+ 83 : ('TRANSPOSE', 1, 1, 0x02, 1, 'A', 'A'),
+ 86 : ('TYPE', 1, 1, 0x02, 1, 'V', 'V'),
+ 92 : ('SERIESSUM', 4, 4, 0x02, 4, 'V', 'VVVA'),
+ 97 : ('ATAN2', 2, 2, 0x02, 2, 'V', 'VV'),
+ 98 : ('ASIN', 1, 1, 0x02, 1, 'V', 'V'),
+ 99 : ('ACOS', 1, 1, 0x02, 1, 'V', 'V'),
+ 100: ('CHOOSE', 2, 30, 0x04, 2, 'V', 'VR'),
+ 101: ('HLOOKUP', 3, 4, 0x04, 4, 'V', 'VRRV'),
+ 102: ('VLOOKUP', 3, 4, 0x04, 4, 'V', 'VRRV'),
+ 105: ('ISREF', 1, 1, 0x02, 1, 'V', 'R'),
+ 109: ('LOG', 1, 2, 0x04, 2, 'V', 'VV'),
+ 111: ('CHAR', 1, 1, 0x02, 1, 'V', 'V'),
+ 112: ('LOWER', 1, 1, 0x02, 1, 'V', 'V'),
+ 113: ('UPPER', 1, 1, 0x02, 1, 'V', 'V'),
+ 114: ('PROPER', 1, 1, 0x02, 1, 'V', 'V'),
+ 115: ('LEFT', 1, 2, 0x04, 2, 'V', 'VV'),
+ 116: ('RIGHT', 1, 2, 0x04, 2, 'V', 'VV'),
+ 117: ('EXACT', 2, 2, 0x02, 2, 'V', 'VV'),
+ 118: ('TRIM', 1, 1, 0x02, 1, 'V', 'V'),
+ 119: ('REPLACE', 4, 4, 0x02, 4, 'V', 'VVVV'),
+ 120: ('SUBSTITUTE', 3, 4, 0x04, 4, 'V', 'VVVV'),
+ 121: ('CODE', 1, 1, 0x02, 1, 'V', 'V'),
+ 124: ('FIND', 2, 3, 0x04, 3, 'V', 'VVV'),
+ 125: ('CELL', 1, 2, 0x0c, 2, 'V', 'VR'),
+ 126: ('ISERR', 1, 1, 0x02, 1, 'V', 'V'),
+ 127: ('ISTEXT', 1, 1, 0x02, 1, 'V', 'V'),
+ 128: ('ISNUMBER', 1, 1, 0x02, 1, 'V', 'V'),
+ 129: ('ISBLANK', 1, 1, 0x02, 1, 'V', 'V'),
+ 130: ('T', 1, 1, 0x02, 1, 'V', 'R'),
+ 131: ('N', 1, 1, 0x02, 1, 'V', 'R'),
+ 140: ('DATEVALUE', 1, 1, 0x02, 1, 'V', 'V'),
+ 141: ('TIMEVALUE', 1, 1, 0x02, 1, 'V', 'V'),
+ 142: ('SLN', 3, 3, 0x02, 3, 'V', 'VVV'),
+ 143: ('SYD', 4, 4, 0x02, 4, 'V', 'VVVV'),
+ 144: ('DDB', 4, 5, 0x04, 5, 'V', 'VVVVV'),
+ 148: ('INDIRECT', 1, 2, 0x0c, 2, 'R', 'VV'),
+ 162: ('CLEAN', 1, 1, 0x02, 1, 'V', 'V'),
+ 163: ('MDETERM', 1, 1, 0x02, 1, 'V', 'A'),
+ 164: ('MINVERSE', 1, 1, 0x02, 1, 'A', 'A'),
+ 165: ('MMULT', 2, 2, 0x02, 2, 'A', 'AA'),
+ 167: ('IPMT', 4, 6, 0x04, 6, 'V', 'VVVVVV'),
+ 168: ('PPMT', 4, 6, 0x04, 6, 'V', 'VVVVVV'),
+ 169: ('COUNTA', 0, 30, 0x04, 1, 'V', 'R'),
+ 183: ('PRODUCT', 0, 30, 0x04, 1, 'V', 'R'),
+ 184: ('FACT', 1, 1, 0x02, 1, 'V', 'V'),
+ 189: ('DPRODUCT', 3, 3, 0x02, 3, 'V', 'RRR'),
+ 190: ('ISNONTEXT', 1, 1, 0x02, 1, 'V', 'V'),
+ 193: ('STDEVP', 1, 30, 0x04, 1, 'V', 'R'),
+ 194: ('VARP', 1, 30, 0x04, 1, 'V', 'R'),
+ 195: ('DSTDEVP', 3, 3, 0x02, 3, 'V', 'RRR'),
+ 196: ('DVARP', 3, 3, 0x02, 3, 'V', 'RRR'),
+ 197: ('TRUNC', 1, 2, 0x04, 2, 'V', 'VV'),
+ 198: ('ISLOGICAL', 1, 1, 0x02, 1, 'V', 'V'),
+ 199: ('DCOUNTA', 3, 3, 0x02, 3, 'V', 'RRR'),
+ 204: ('USDOLLAR', 1, 2, 0x04, 2, 'V', 'VV'),
+ 205: ('FINDB', 2, 3, 0x04, 3, 'V', 'VVV'),
+ 206: ('SEARCHB', 2, 3, 0x04, 3, 'V', 'VVV'),
+ 207: ('REPLACEB', 4, 4, 0x02, 4, 'V', 'VVVV'),
+ 208: ('LEFTB', 1, 2, 0x04, 2, 'V', 'VV'),
+ 209: ('RIGHTB', 1, 2, 0x04, 2, 'V', 'VV'),
+ 210: ('MIDB', 3, 3, 0x02, 3, 'V', 'VVV'),
+ 211: ('LENB', 1, 1, 0x02, 1, 'V', 'V'),
+ 212: ('ROUNDUP', 2, 2, 0x02, 2, 'V', 'VV'),
+ 213: ('ROUNDDOWN', 2, 2, 0x02, 2, 'V', 'VV'),
+ 214: ('ASC', 1, 1, 0x02, 1, 'V', 'V'),
+ 215: ('DBCS', 1, 1, 0x02, 1, 'V', 'V'),
+ 216: ('RANK', 2, 3, 0x04, 3, 'V', 'VRV'),
+ 219: ('ADDRESS', 2, 5, 0x04, 5, 'V', 'VVVVV'),
+ 220: ('DAYS360', 2, 3, 0x04, 3, 'V', 'VVV'),
+ 221: ('TODAY', 0, 0, 0x0a, 0, 'V', ''),
+ 222: ('VDB', 5, 7, 0x04, 7, 'V', 'VVVVVVV'),
+ 227: ('MEDIAN', 1, 30, 0x04, 1, 'V', 'R'),
+ 228: ('SUMPRODUCT', 1, 30, 0x04, 1, 'V', 'A'),
+ 229: ('SINH', 1, 1, 0x02, 1, 'V', 'V'),
+ 230: ('COSH', 1, 1, 0x02, 1, 'V', 'V'),
+ 231: ('TANH', 1, 1, 0x02, 1, 'V', 'V'),
+ 232: ('ASINH', 1, 1, 0x02, 1, 'V', 'V'),
+ 233: ('ACOSH', 1, 1, 0x02, 1, 'V', 'V'),
+ 234: ('ATANH', 1, 1, 0x02, 1, 'V', 'V'),
+ 235: ('DGET', 3, 3, 0x02, 3, 'V', 'RRR'),
+ 244: ('INFO', 1, 1, 0x02, 1, 'V', 'V'),
+ 247: ('DB', 4, 5, 0x04, 5, 'V', 'VVVVV'),
+ 252: ('FREQUENCY', 2, 2, 0x02, 2, 'A', 'RR'),
+ 261: ('ERROR.TYPE', 1, 1, 0x02, 1, 'V', 'V'),
+ 269: ('AVEDEV', 1, 30, 0x04, 1, 'V', 'R'),
+ 270: ('BETADIST', 3, 5, 0x04, 1, 'V', 'V'),
+ 271: ('GAMMALN', 1, 1, 0x02, 1, 'V', 'V'),
+ 272: ('BETAINV', 3, 5, 0x04, 1, 'V', 'V'),
+ 273: ('BINOMDIST', 4, 4, 0x02, 4, 'V', 'VVVV'),
+ 274: ('CHIDIST', 2, 2, 0x02, 2, 'V', 'VV'),
+ 275: ('CHIINV', 2, 2, 0x02, 2, 'V', 'VV'),
+ 276: ('COMBIN', 2, 2, 0x02, 2, 'V', 'VV'),
+ 277: ('CONFIDENCE', 3, 3, 0x02, 3, 'V', 'VVV'),
+ 278: ('CRITBINOM', 3, 3, 0x02, 3, 'V', 'VVV'),
+ 279: ('EVEN', 1, 1, 0x02, 1, 'V', 'V'),
+ 280: ('EXPONDIST', 3, 3, 0x02, 3, 'V', 'VVV'),
+ 281: ('FDIST', 3, 3, 0x02, 3, 'V', 'VVV'),
+ 282: ('FINV', 3, 3, 0x02, 3, 'V', 'VVV'),
+ 283: ('FISHER', 1, 1, 0x02, 1, 'V', 'V'),
+ 284: ('FISHERINV', 1, 1, 0x02, 1, 'V', 'V'),
+ 285: ('FLOOR', 2, 2, 0x02, 2, 'V', 'VV'),
+ 286: ('GAMMADIST', 4, 4, 0x02, 4, 'V', 'VVVV'),
+ 287: ('GAMMAINV', 3, 3, 0x02, 3, 'V', 'VVV'),
+ 288: ('CEILING', 2, 2, 0x02, 2, 'V', 'VV'),
+ 289: ('HYPGEOMDIST', 4, 4, 0x02, 4, 'V', 'VVVV'),
+ 290: ('LOGNORMDIST', 3, 3, 0x02, 3, 'V', 'VVV'),
+ 291: ('LOGINV', 3, 3, 0x02, 3, 'V', 'VVV'),
+ 292: ('NEGBINOMDIST', 3, 3, 0x02, 3, 'V', 'VVV'),
+ 293: ('NORMDIST', 4, 4, 0x02, 4, 'V', 'VVVV'),
+ 294: ('NORMSDIST', 1, 1, 0x02, 1, 'V', 'V'),
+ 295: ('NORMINV', 3, 3, 0x02, 3, 'V', 'VVV'),
+ 296: ('NORMSINV', 1, 1, 0x02, 1, 'V', 'V'),
+ 297: ('STANDARDIZE', 3, 3, 0x02, 3, 'V', 'VVV'),
+ 298: ('ODD', 1, 1, 0x02, 1, 'V', 'V'),
+ 299: ('PERMUT', 2, 2, 0x02, 2, 'V', 'VV'),
+ 300: ('POISSON', 3, 3, 0x02, 3, 'V', 'VVV'),
+ 301: ('TDIST', 3, 3, 0x02, 3, 'V', 'VVV'),
+ 302: ('WEIBULL', 4, 4, 0x02, 4, 'V', 'VVVV'),
+ 303: ('SUMXMY2', 2, 2, 0x02, 2, 'V', 'AA'),
+ 304: ('SUMX2MY2', 2, 2, 0x02, 2, 'V', 'AA'),
+ 305: ('SUMX2PY2', 2, 2, 0x02, 2, 'V', 'AA'),
+ 306: ('CHITEST', 2, 2, 0x02, 2, 'V', 'AA'),
+ 307: ('CORREL', 2, 2, 0x02, 2, 'V', 'AA'),
+ 308: ('COVAR', 2, 2, 0x02, 2, 'V', 'AA'),
+ 309: ('FORECAST', 3, 3, 0x02, 3, 'V', 'VAA'),
+ 310: ('FTEST', 2, 2, 0x02, 2, 'V', 'AA'),
+ 311: ('INTERCEPT', 2, 2, 0x02, 2, 'V', 'AA'),
+ 312: ('PEARSON', 2, 2, 0x02, 2, 'V', 'AA'),
+ 313: ('RSQ', 2, 2, 0x02, 2, 'V', 'AA'),
+ 314: ('STEYX', 2, 2, 0x02, 2, 'V', 'AA'),
+ 315: ('SLOPE', 2, 2, 0x02, 2, 'V', 'AA'),
+ 316: ('TTEST', 4, 4, 0x02, 4, 'V', 'AAVV'),
+ 317: ('PROB', 3, 4, 0x04, 3, 'V', 'AAV'),
+ 318: ('DEVSQ', 1, 30, 0x04, 1, 'V', 'R'),
+ 319: ('GEOMEAN', 1, 30, 0x04, 1, 'V', 'R'),
+ 320: ('HARMEAN', 1, 30, 0x04, 1, 'V', 'R'),
+ 321: ('SUMSQ', 0, 30, 0x04, 1, 'V', 'R'),
+ 322: ('KURT', 1, 30, 0x04, 1, 'V', 'R'),
+ 323: ('SKEW', 1, 30, 0x04, 1, 'V', 'R'),
+ 324: ('ZTEST', 2, 3, 0x04, 2, 'V', 'RV'),
+ 325: ('LARGE', 2, 2, 0x02, 2, 'V', 'RV'),
+ 326: ('SMALL', 2, 2, 0x02, 2, 'V', 'RV'),
+ 327: ('QUARTILE', 2, 2, 0x02, 2, 'V', 'RV'),
+ 328: ('PERCENTILE', 2, 2, 0x02, 2, 'V', 'RV'),
+ 329: ('PERCENTRANK', 2, 3, 0x04, 2, 'V', 'RV'),
+ 330: ('MODE', 1, 30, 0x04, 1, 'V', 'A'),
+ 331: ('TRIMMEAN', 2, 2, 0x02, 2, 'V', 'RV'),
+ 332: ('TINV', 2, 2, 0x02, 2, 'V', 'VV'),
+ 336: ('CONCATENATE', 0, 30, 0x04, 1, 'V', 'V'),
+ 337: ('POWER', 2, 2, 0x02, 2, 'V', 'VV'),
+ 342: ('RADIANS', 1, 1, 0x02, 1, 'V', 'V'),
+ 343: ('DEGREES', 1, 1, 0x02, 1, 'V', 'V'),
+ 344: ('SUBTOTAL', 2, 30, 0x04, 2, 'V', 'VR'),
+ 345: ('SUMIF', 2, 3, 0x04, 3, 'V', 'RVR'),
+ 346: ('COUNTIF', 2, 2, 0x02, 2, 'V', 'RV'),
+ 347: ('COUNTBLANK', 1, 1, 0x02, 1, 'V', 'R'),
+ 350: ('ISPMT', 4, 4, 0x02, 4, 'V', 'VVVV'),
+ 351: ('DATEDIF', 3, 3, 0x02, 3, 'V', 'VVV'),
+ 352: ('DATESTRING', 1, 1, 0x02, 1, 'V', 'V'),
+ 353: ('NUMBERSTRING', 2, 2, 0x02, 2, 'V', 'VV'),
+ 354: ('ROMAN', 1, 2, 0x04, 2, 'V', 'VV'),
+ 358: ('GETPIVOTDATA', 2, 2, 0x02, 2, 'V', 'RV'),
+ 359: ('HYPERLINK', 1, 2, 0x04, 2, 'V', 'VV'),
+ 360: ('PHONETIC', 1, 1, 0x02, 1, 'V', 'V'),
+ 361: ('AVERAGEA', 1, 30, 0x04, 1, 'V', 'R'),
+ 362: ('MAXA', 1, 30, 0x04, 1, 'V', 'R'),
+ 363: ('MINA', 1, 30, 0x04, 1, 'V', 'R'),
+ 364: ('STDEVPA', 1, 30, 0x04, 1, 'V', 'R'),
+ 365: ('VARPA', 1, 30, 0x04, 1, 'V', 'R'),
+ 366: ('STDEVA', 1, 30, 0x04, 1, 'V', 'R'),
+ 367: ('VARA', 1, 30, 0x04, 1, 'V', 'R'),
+ 368: ('BAHTTEXT', 1, 1, 0x02, 1, 'V', 'V'),
+ 369: ('THAIDAYOFWEEK', 1, 1, 0x02, 1, 'V', 'V'),
+ 370: ('THAIDIGIT', 1, 1, 0x02, 1, 'V', 'V'),
+ 371: ('THAIMONTHOFYEAR', 1, 1, 0x02, 1, 'V', 'V'),
+ 372: ('THAINUMSOUND', 1, 1, 0x02, 1, 'V', 'V'),
+ 373: ('THAINUMSTRING', 1, 1, 0x02, 1, 'V', 'V'),
+ 374: ('THAISTRINGLENGTH', 1, 1, 0x02, 1, 'V', 'V'),
+ 375: ('ISTHAIDIGIT', 1, 1, 0x02, 1, 'V', 'V'),
+ 376: ('ROUNDBAHTDOWN', 1, 1, 0x02, 1, 'V', 'V'),
+ 377: ('ROUNDBAHTUP', 1, 1, 0x02, 1, 'V', 'V'),
+ 378: ('THAIYEAR', 1, 1, 0x02, 1, 'V', 'V'),
+ 379: ('RTD', 2, 5, 0x04, 1, 'V', 'V'),
+ }
+
+tAttrNames = {
+ 0x00: "Skip??", # seen in SAMPLES.XLS which shipped with Excel 5.0
+ 0x01: "Volatile",
+ 0x02: "If",
+ 0x04: "Choose",
+ 0x08: "Skip",
+ 0x10: "Sum",
+ 0x20: "Assign",
+ 0x40: "Space",
+ 0x41: "SpaceVolatile",
+ }
+
+_error_opcodes = {}
+for _x in [0x07, 0x08, 0x0A, 0x0B, 0x1C, 0x1D, 0x2F]:
+ _error_opcodes[_x] = 1
+is_error_opcode = _error_opcodes.has_key
+
+tRangeFuncs = (min, max, min, max, min, max)
+tIsectFuncs = (max, min, max, min, max, min)
+
+def do_box_funcs(box_funcs, boxa, boxb):
+ return tuple([
+ func(numa, numb)
+ for func, numa, numb in zip(box_funcs, boxa.coords, boxb.coords)
+ ])
+
+def adjust_cell_addr_biff8(rowval, colval, reldelta, browx=None, bcolx=None):
+ row_rel = (colval >> 15) & 1
+ col_rel = (colval >> 14) & 1
+ rowx = rowval
+ colx = colval & 0xff
+ if reldelta:
+ if row_rel and rowx >= 32768:
+ rowx -= 65536
+ if col_rel and colx >= 128:
+ colx -= 256
+ else:
+ if row_rel:
+ rowx -= browx
+ if col_rel:
+ colx -= bcolx
+ return rowx, colx, row_rel, col_rel
+
+def adjust_cell_addr_biff_le7(
+ rowval, colval, reldelta, browx=None, bcolx=None):
+ row_rel = (rowval >> 15) & 1
+ col_rel = (rowval >> 14) & 1
+ rowx = rowval & 0x3fff
+ colx = colval
+ if reldelta:
+ if row_rel and rowx >= 8192:
+ rowx -= 16384
+ if col_rel and colx >= 128:
+ colx -= 256
+ else:
+ if row_rel:
+ rowx -= browx
+ if col_rel:
+ colx -= bcolx
+ return rowx, colx, row_rel, col_rel
+
+def get_cell_addr(data, pos, bv, reldelta, browx=None, bcolx=None):
+ if bv >= 80:
+ rowval, colval = unpack("<HH", data[pos:pos+4])
+ # print " rv=%04xh cv=%04xh" % (rowval, colval)
+ return adjust_cell_addr_biff8(rowval, colval, reldelta, browx, bcolx)
+ else:
+ rowval, colval = unpack("<HB", data[pos:pos+3])
+ # print " rv=%04xh cv=%04xh" % (rowval, colval)
+ return adjust_cell_addr_biff_le7(
+ rowval, colval, reldelta, browx, bcolx)
+
+def get_cell_range_addr(data, pos, bv, reldelta, browx=None, bcolx=None):
+ if bv >= 80:
+ row1val, row2val, col1val, col2val = unpack("<HHHH", data[pos:pos+8])
+ # print " rv=%04xh cv=%04xh" % (row1val, col1val)
+ # print " rv=%04xh cv=%04xh" % (row2val, col2val)
+ res1 = adjust_cell_addr_biff8(row1val, col1val, reldelta, browx, bcolx)
+ res2 = adjust_cell_addr_biff8(row2val, col2val, reldelta, browx, bcolx)
+ return res1, res2
+ else:
+ row1val, row2val, col1val, col2val = unpack("<HHBB", data[pos:pos+6])
+ # print " rv=%04xh cv=%04xh" % (row1val, col1val)
+ # print " rv=%04xh cv=%04xh" % (row2val, col2val)
+ res1 = adjust_cell_addr_biff_le7(
+ row1val, col1val, reldelta, browx, bcolx)
+ res2 = adjust_cell_addr_biff_le7(
+ row2val, col2val, reldelta, browx, bcolx)
+ return res1, res2
+
+def get_externsheet_local_range(bk, refx, blah=0):
+ try:
+ info = bk._externsheet_info[refx]
+ except IndexError:
+ print "!!! get_externsheet_local_range: refx=%d, not in range(%d)" \
+ % (refx, len(bk._externsheet_info))
+ return (-101, -101)
+ ref_recordx, ref_first_sheetx, ref_last_sheetx = info
+ if ref_recordx == bk._supbook_addins_inx:
+ if blah:
+ print "/// get_externsheet_local_range(refx=%d) -> addins %r" % (refx, info)
+ assert ref_first_sheetx == 0xFFFE == ref_last_sheetx
+ return (-5, -5)
+ if ref_recordx != bk._supbook_locals_inx:
+ if blah:
+ print "/// get_externsheet_local_range(refx=%d) -> external %r" % (refx, info)
+ return (-4, -4) # external reference
+ if ref_first_sheetx == 0xFFFE == ref_last_sheetx:
+ if blah:
+ print "/// get_externsheet_local_range(refx=%d) -> unspecified sheet %r" % (refx, info)
+ return (-1, -1) # internal reference, any sheet
+ if ref_first_sheetx == 0xFFFF == ref_last_sheetx:
+ if blah:
+ print "/// get_externsheet_local_range(refx=%d) -> deleted sheet(s)" % (refx, )
+ return (-2, -2) # internal reference, deleted sheet(s)
+ nsheets = len(bk._all_sheets_map)
+ if not(0 <= ref_first_sheetx <= ref_last_sheetx < nsheets):
+ if blah:
+ print "/// get_externsheet_local_range(refx=%d) -> %r" % (refx, info)
+ print "--- first/last sheet not in range(%d)" % nsheets
+ return (-102, -102) # stuffed up somewhere :-(
+ xlrd_sheetx1 = bk._all_sheets_map[ref_first_sheetx]
+ xlrd_sheetx2 = bk._all_sheets_map[ref_last_sheetx]
+ if not(0 <= xlrd_sheetx1 <= xlrd_sheetx2):
+ return (-3, -3) # internal reference, but to a macro sheet
+ return xlrd_sheetx1, xlrd_sheetx2
+
+def get_externsheet_local_range_b57(
+ bk, raw_extshtx, ref_first_sheetx, ref_last_sheetx, blah=0):
+ if raw_extshtx > 0:
+ if blah:
+ print "/// get_externsheet_local_range_b57(raw_extshtx=%d) -> external" % raw_extshtx
+ return (-4, -4) # external reference
+ if ref_first_sheetx == -1 and ref_last_sheetx == -1:
+ return (-2, -2) # internal reference, deleted sheet(s)
+ nsheets = len(bk._all_sheets_map)
+ if not(0 <= ref_first_sheetx <= ref_last_sheetx < nsheets):
+ if blah:
+ print "/// get_externsheet_local_range_b57(%d, %d, %d) -> ???" \
+ % (raw_extshtx, ref_first_sheetx, ref_last_sheetx)
+ print "--- first/last sheet not in range(%d)" % nsheets
+ return (-103, -103) # stuffed up somewhere :-(
+ xlrd_sheetx1 = bk._all_sheets_map[ref_first_sheetx]
+ xlrd_sheetx2 = bk._all_sheets_map[ref_last_sheetx]
+ if not(0 <= xlrd_sheetx1 <= xlrd_sheetx2):
+ return (-3, -3) # internal reference, but to a macro sheet
+ return xlrd_sheetx1, xlrd_sheetx2
+
+class FormulaError(Exception):
+ pass
+
+oBOOL = 3
+oERR = 4
+oMSNG = 5 # tMissArg
+oNUM = 2
+oREF = -1
+oREL = -2
+oSTRG = 1
+oUNK = 0
+
+okind_dict = {
+ -2: "oREL",
+ -1: "oREF",
+ 0 : "oUNK",
+ 1 : "oSTRG",
+ 2 : "oNUM",
+ 3 : "oBOOL",
+ 4 : "oERR",
+ 5 : "oMSNG",
+ }
+
+listsep = ',' #### probably should depend on locale
+
+##
+# Used in evaluating formulas.
+# The following table describes the kinds and how their values
+# are represented.</p>
+#
+# <table border="1" cellpadding="7">
+# <tr>
+# <th>Kind symbol</th>
+# <th>Kind number</th>
+# <th>Value representation</th>
+# </tr>
+# <tr>
+# <td>oBOOL</td>
+# <td align="center">3</td>
+# <td>integer: 0 => False; 1 => True</td>
+# </tr>
+# <tr>
+# <td>oERR</td>
+# <td align="center">4</td>
+# <td>None, or an int error code (same as XL_CELL_ERROR in the Cell class).
+# </td>
+# </tr>
+# <tr>
+# <td>oMSNG</td>
+# <td align="center">5</td>
+# <td>Used by Excel as a placeholder for a missing (not supplied) function
+# argument. Should *not* appear as a final formula result. Value is None.</td>
+# </tr>
+# <tr>
+# <td>oNUM</td>
+# <td align="center">2</td>
+# <td>A float. Note that there is no way of distinguishing dates.</td>
+# </tr>
+# <tr>
+# <td>oREF</td>
+# <td align="center">-1</td>
+# <td>The value is either None or a non-empty list of
+# absolute Ref3D instances.<br>
+# </td>
+# </tr>
+# <tr>
+# <td>oREL</td>
+# <td align="center">-2</td>
+# <td>The value is None or a non-empty list of
+# fully or partially relative Ref3D instances.
+# </td>
+# </tr>
+# <tr>
+# <td>oSTRG</td>
+# <td align="center">1</td>
+# <td>A Unicode string.</td>
+# </tr>
+# <tr>
+# <td>oUNK</td>
+# <td align="center">0</td>
+# <td>The kind is unknown or ambiguous. The value is None</td>
+# </tr>
+# </table>
+#<p></p>
+
+class Operand(object):
+
+ ##
+ # None means that the actual value of the operand is a variable
+ # (depends on cell data), not a constant.
+ value = None
+ ##
+ # oUNK means that the kind of operand is not known unambiguously.
+ kind = oUNK
+ ##
+ # The reconstituted text of the original formula. Function names will be
+ # in English irrespective of the original language, which doesn't seem
+ # to be recorded anywhere. The separator is ",", not ";" or whatever else
+ # might be more appropriate for the end-user's locale; patches welcome.
+ text = '?'
+
+ def __init__(self, akind=None, avalue=None, arank=0, atext='?'):
+ if akind is not None:
+ self.kind = akind
+ if avalue is not None:
+ self.value = avalue
+ self.rank = arank
+ # rank is an internal gizmo (operator precedence);
+ # it's used in reconstructing formula text.
+ self.text = atext
+
+ def __repr__(self):
+ kind_text = okind_dict.get(self.kind, "?Unknown kind?")
+ return "Operand(kind=%s, value=%r, text=%r)" \
+ % (kind_text, self.value, self.text)
+
+if CAN_SUBCLASS_BUILTIN:
+ _ref3d_base = tuple
+else:
+ _ref3d_base = object
+
+##
+# <p>Represents an absolute or relative 3-dimensional reference to a box
+# of one or more cells.<br />
+# -- New in version 0.6.0
+# </p>
+#
+# <p>The <i>coords</i> attribute is a tuple of the form:<br />
+# (shtxlo, shtxhi, rowxlo, rowxhi, colxlo, colxhi)<br />
+# where 0 <= thingxlo <= thingx < thingxhi.<br />
+# Note that it is quite possible to have thingx > nthings; for example
+# Print_Titles could have colxhi == 256 and/or rowxhi == 65536
+# irrespective of how many columns/rows are actually used in the worksheet.
+# The caller will need to decide how to handle this situation.
+# Keyword: IndexError :-)
+# </p>
+#
+# <p>The components of the coords attribute are also available as individual
+# attributes: shtxlo, shtxhi, rowxlo, rowxhi, colxlo, and colxhi.</p>
+#
+# <p>The <i>relflags</i> attribute is a 6-tuple of flags which indicate whether
+# the corresponding (sheet|row|col)(lo|hi) is relative (1) or absolute (0).<br>
+# Note that there is necessarily no information available as to what cell(s)
+# the reference could possibly be relative to. The caller must decide what if
+# any use to make of oREL operands. Note also that a partially relative
+# reference may well be a typo.
+# For example, define name A1Z10 as $a$1:$z10 (missing $ after z)
+# while the cursor is on cell Sheet3!A27.<br>
+# The resulting Ref3D instance will have coords = (2, 3, 0, -16, 0, 26)
+# and relflags = (0, 0, 0, 1, 0, 0).<br>
+# So far, only one possibility of a sheet-relative component in
+# a reference has been noticed: a 2D reference located in the "current sheet".
+# <br /> This will appear as coords = (0, 1, ...) and relflags = (1, 1, ...).
+
+class Ref3D(_ref3d_base):
+
+ def __init__(self, atuple):
+ self.coords = atuple[0:6]
+ self.relflags = atuple[6:12]
+ if not self.relflags:
+ self.relflags = (0, 0, 0, 0, 0, 0)
+ (self.shtxlo, self.shtxhi,
+ self.rowxlo, self.rowxhi,
+ self.colxlo, self.colxhi) = self.coords
+
+ def __repr__(self):
+ if not self.relflags or self.relflags == (0, 0, 0, 0, 0, 0):
+ return "Ref3D(coords=%r)" % (self.coords, )
+ else:
+ return "Ref3D(coords=%r, relflags=%r)" \
+ % (self.coords, self.relflags)
+
+tAdd = 0x03
+tSub = 0x04
+tMul = 0x05
+tDiv = 0x06
+tPower = 0x07
+tConcat = 0x08
+tLT, tLE, tEQ, tGE, tGT, tNE = range(0x09, 0x0F)
+
+import operator as opr
+
+def nop(x):
+ return x
+
+def _opr_pow(x, y): return x ** y
+
+def _opr_lt(x, y): return x < y
+def _opr_le(x, y): return x <= y
+def _opr_eq(x, y): return x == y
+def _opr_ge(x, y): return x >= y
+def _opr_gt(x, y): return x > y
+def _opr_ne(x, y): return x != y
+
+def num2strg(num):
+ """Attempt to emulate Excel's default conversion
+ from number to string.
+ """
+ s = str(num)
+ if s.endswith(".0"):
+ s = s[:-2]
+ return s
+
+_arith_argdict = {oNUM: nop, oSTRG: float}
+_cmp_argdict = {oNUM: nop, oSTRG: nop}
+# Seems no conversions done on relops; in Excel, "1" > 9 produces TRUE.
+_strg_argdict = {oNUM:num2strg, oSTRG:nop}
+binop_rules = {
+ tAdd: (_arith_argdict, oNUM, opr.add, 30, '+'),
+ tSub: (_arith_argdict, oNUM, opr.sub, 30, '-'),
+ tMul: (_arith_argdict, oNUM, opr.mul, 40, '*'),
+ tDiv: (_arith_argdict, oNUM, opr.div, 40, '/'),
+ tPower: (_arith_argdict, oNUM, _opr_pow, 50, '^',),
+ tConcat:(_strg_argdict, oSTRG, opr.add, 20, '&'),
+ tLT: (_cmp_argdict, oBOOL, _opr_lt, 10, '<'),
+ tLE: (_cmp_argdict, oBOOL, _opr_le, 10, '<='),
+ tEQ: (_cmp_argdict, oBOOL, _opr_eq, 10, '='),
+ tGE: (_cmp_argdict, oBOOL, _opr_ge, 10, '>='),
+ tGT: (_cmp_argdict, oBOOL, _opr_gt, 10, '>'),
+ tNE: (_cmp_argdict, oBOOL, _opr_ne, 10, '<>'),
+ }
+
+unop_rules = {
+ 0x13: (lambda x: -x, 70, '-', ''), # unary minus
+ 0x12: (lambda x: x, 70, '+', ''), # unary plus
+ 0x14: (lambda x: x / 100.0, 60, '', '%'),# percent
+ }
+
+LEAF_RANK = 90
+FUNC_RANK = 90
+
+STACK_ALARM_LEVEL = 5
+STACK_PANIC_LEVEL = 10
+
+def evaluate_name_formula(bk, nobj, namex, blah=0, level=0):
+ if level > STACK_ALARM_LEVEL:
+ blah = 1
+ data = nobj.raw_formula
+ fmlalen = nobj.basic_formula_len
+ bv = bk.biff_version
+ reldelta = 1 # All defined name formulas use "Method B" [OOo docs]
+ if blah:
+ print "::: evaluate_name_formula %r %r %d %d %r level=%d" \
+ % (namex, nobj.name, fmlalen, bv, data, level)
+ hex_char_dump(data, 0, fmlalen)
+ if level > STACK_PANIC_LEVEL:
+ raise XLRDError("Excessive indirect references in NAME formula")
+ sztab = szdict[bv]
+ pos = 0
+ stack = []
+ any_rel = 0
+ any_err = 0
+ any_external = 0
+ unk_opnd = Operand(oUNK, None)
+ error_opnd = Operand(oERR, None)
+ spush = stack.append
+
+ def do_binop(opcd, stk):
+ assert len(stk) >= 2
+ bop = stk.pop()
+ aop = stk.pop()
+ argdict, result_kind, func, rank, sym = binop_rules[opcd]
+ otext = ''.join([
+ '('[:aop.rank < rank],
+ aop.text,
+ ')'[:aop.rank < rank],
+ sym,
+ '('[:bop.rank < rank],
+ bop.text,
+ ')'[:bop.rank < rank],
+ ])
+ resop = Operand(result_kind, None, rank, otext)
+ try:
+ bconv = argdict[bop.kind]
+ aconv = argdict[aop.kind]
+ except KeyError:
+ stk.append(resop)
+ return
+ if bop.value is None or aop.value is None:
+ stk.append(resop)
+ return
+ bval = bconv(bop.value)
+ aval = aconv(aop.value)
+ result = func(aval, bval)
+ if result_kind == oBOOL:
+ result = intbool(result) # -> 1 or 0
+ resop.value = result
+ stk.append(resop)
+
+ def do_unaryop(opcode, arglist, result_kind, stk):
+ assert len(stk) >= 1
+ aop = stk.pop()
+ assert aop.kind in arglist
+ val = aop.value
+ func, rank, sym1, sym2 = unop_rules[opcode]
+ otext = ''.join([
+ sym1,
+ '('[:aop.rank < rank],
+ aop.text,
+ ')'[:aop.rank < rank],
+ sym2,
+ ])
+ if val is not None:
+ val = func(val)
+ stk.append(Operand(result_kind, val, rank, otext))
+
+ def not_in_name_formula(op_arg, oname_arg):
+ msg = "ERROR *** Token 0x%02x (%s) found in NAME formula" \
+ % (op_arg, oname_arg)
+ raise FormulaError(msg)
+
+ if fmlalen == 0:
+ stack = [unk_opnd]
+
+ while 0 <= pos < fmlalen:
+ op = ord(data[pos])
+ opcode = op & 0x1f
+ optype = (op & 0x60) >> 5
+ if optype:
+ opx = opcode + 32
+ else:
+ opx = opcode
+ oname = onames[opx] # + [" RVA"][optype]
+ sz = sztab[opx]
+ if blah:
+ print "Pos:%d Op:0x%02x Name:t%s Sz:%d opcode:%02xh optype:%02xh" \
+ % (pos, op, oname, sz, opcode, optype)
+ print "Stack =", stack
+ if sz == -2:
+ msg = 'ERROR *** Unexpected token 0x%02x ("%s"); biff_version=%d' \
+ % (op, oname, bv)
+ raise FormulaError(msg)
+ if not optype:
+ if 0x00 <= opcode <= 0x02: # unk_opnd, tExp, tTbl
+ not_in_name_formula(op, oname)
+ elif 0x03 <= opcode <= 0x0E:
+ # Add, Sub, Mul, Div, Power
+ # tConcat
+ # tLT, ..., tNE
+ do_binop(opcode, stack)
+ elif opcode == 0x0F: # tIsect
+ if blah: print >> bk.logfile, "tIsect pre", stack
+ assert len(stack) >= 2
+ bop = stack.pop()
+ aop = stack.pop()
+ sym = ' '
+ rank = 80 ########## check #######
+ otext = ''.join([
+ '('[:aop.rank < rank],
+ aop.text,
+ ')'[:aop.rank < rank],
+ sym,
+ '('[:bop.rank < rank],
+ bop.text,
+ ')'[:bop.rank < rank],
+ ])
+ res = Operand(oREF)
+ res.text = otext
+ if bop.kind == oERR or aop.kind == oERR:
+ res.kind = oERR
+ elif bop.kind == oUNK or aop.kind == oUNK:
+ # This can happen with undefined
+ # (go search in the current sheet) labels.
+ # For example =Bob Sales
+ # Each label gets a NAME record with an empty formula (!)
+ # Evaluation of the tName token classifies it as oUNK
+ # res.kind = oREF
+ pass
+ elif bop.kind == oREF == aop.kind:
+ if aop.value is not None and bop.value is not None:
+ assert len(aop.value) == 1
+ assert len(bop.value) == 1
+ coords = do_box_funcs(
+ tIsectFuncs, aop.value[0], bop.value[0])
+ res.value = [Ref3D(coords)]
+ elif bop.kind == oREL == aop.kind:
+ res.kind = oREL
+ if aop.value is not None and bop.value is not None:
+ assert len(aop.value) == 1
+ assert len(bop.value) == 1
+ coords = do_box_funcs(
+ tIsectFuncs, aop.value[0], bop.value[0])
+ relfa = aop.value[0].relflags
+ relfb = bop.value[0].relflags
+ if relfa == relfb:
+ res.value = [Ref3D(coords + relfa)]
+ else:
+ pass
+ spush(res)
+ if blah: print >> bk.logfile, "tIsect post", stack
+ elif opcode == 0x10: # tList
+ if blah: print >> bk.logfile, "tList pre", stack
+ assert len(stack) >= 2
+ bop = stack.pop()
+ aop = stack.pop()
+ sym = ','
+ rank = 80 ########## check #######
+ otext = ''.join([
+ '('[:aop.rank < rank],
+ aop.text,
+ ')'[:aop.rank < rank],
+ sym,
+ '('[:bop.rank < rank],
+ bop.text,
+ ')'[:bop.rank < rank],
+ ])
+ res = Operand(oREF, None, rank, otext)
+ if bop.kind == oERR or aop.kind == oERR:
+ res.kind = oERR
+ elif bop.kind in (oREF, oREL) and aop.kind in (oREF, oREL):
+ res.kind = oREF
+ if aop.kind == oREL or bop.kind == oREL:
+ res.kind = oREL
+ if aop.value is not None and bop.value is not None:
+ assert len(aop.value) >= 1
+ assert len(bop.value) == 1
+ res.value = aop.value + bop.value
+ else:
+ pass
+ spush(res)
+ if blah: print >> bk.logfile, "tList post", stack
+ elif opcode == 0x11: # tRange
+ if blah: print >> bk.logfile, "tRange pre", stack
+ assert len(stack) >= 2
+ bop = stack.pop()
+ aop = stack.pop()
+ sym = ':'
+ rank = 80 ########## check #######
+ otext = ''.join([
+ '('[:aop.rank < rank],
+ aop.text,
+ ')'[:aop.rank < rank],
+ sym,
+ '('[:bop.rank < rank],
+ bop.text,
+ ')'[:bop.rank < rank],
+ ])
+ res = Operand(oREF, None, rank, otext)
+ if bop.kind == oERR or aop.kind == oERR:
+ res = oERR
+ elif bop.kind == oREF == aop.kind:
+ if aop.value is not None and bop.value is not None:
+ assert len(aop.value) == 1
+ assert len(bop.value) == 1
+ coords = do_box_funcs(
+ tRangeFuncs, aop.value[0], bop.value[0])
+ res.value = [Ref3D(coords)]
+ elif bop.kind == oREL == aop.kind:
+ res.kind = oREL
+ if aop.value is not None and bop.value is not None:
+ assert len(aop.value) == 1
+ assert len(bop.value) == 1
+ coords = do_box_funcs(
+ tRangeFuncs, aop.value[0], bop.value[0])
+ relfa = aop.value[0].relflags
+ relfb = bop.value[0].relflags
+ if relfa == relfb:
+ res.value = [Ref3D(coords + relfa)]
+ else:
+ pass
+ spush(res)
+ if blah: print >> bk.logfile, "tRange post", stack
+ elif 0x12 <= opcode <= 0x14: # tUplus, tUminus, tPercent
+ do_unaryop(opcode, (oUNK, oNUM,), oNUM, stack)
+ elif opcode == 0x15: # tParen
+ # source cosmetics
+ pass
+ elif opcode == 0x16: # tMissArg
+ spush(Operand(oMSNG, None, LEAF_RANK, ''))
+ elif opcode == 0x17: # tStr
+ if bv <= 70:
+ strg, newpos = unpack_string_update_pos(
+ data, pos+1, bk.encoding, lenlen=1)
+ else:
+ strg, newpos = unpack_unicode_update_pos(
+ data, pos+1, lenlen=1)
+ sz = newpos - pos
+ if blah: print >> bk.logfile, " sz=%d strg=%r" % (sz, strg)
+ text = '"' + strg.replace('"', '""') + '"'
+ spush(Operand(oSTRG, strg, LEAF_RANK, text))
+ elif opcode == 0x18: # tExtended
+ # new with BIFF 8
+ assert bv >= 80
+ # not in OOo docs
+ raise FormulaError("tExtended token not implemented")
+ elif opcode == 0x19: # tAttr
+ subop, nc = unpack("<BH", data[pos+1:pos+4])
+ subname = tAttrNames.get(subop, "??Unknown??")
+ if subop == 0x04: # Choose
+ sz = nc * 2 + 6
+ elif subop == 0x10: # Sum (single arg)
+ sz = 4
+ if blah: print >> bk.logfile, "tAttrSum", stack
+ assert len(stack) >= 1
+ aop = stack[-1]
+ otext = 'SUM(%s)' % aop.text
+ stack[-1] = Operand(oNUM, None, FUNC_RANK, otext)
+ else:
+ sz = 4
+ if blah:
+ print " subop=%02xh subname=t%s sz=%d nc=%02xh" \
+ % (subop, subname, sz, nc)
+ elif 0x1A <= opcode <= 0x1B: # tSheet, tEndSheet
+ assert bv < 50
+ raise FormulaError("tSheet & tEndsheet tokens not implemented")
+ elif 0x1C <= opcode <= 0x1F: # tErr, tBool, tInt, tNum
+ inx = opcode - 0x1C
+ nb = [1, 1, 2, 8][inx]
+ kind = [oERR, oBOOL, oNUM, oNUM][inx]
+ value, = unpack("<" + "BBHd"[inx], data[pos+1:pos+1+nb])
+ if inx == 2: # tInt
+ value = float(value)
+ text = str(value)
+ elif inx == 3: # tNum
+ text = str(value)
+ elif inx == 1: # tBool
+ text = ('FALSE', 'TRUE')[value]
+ else:
+ text = '"' +error_text_from_code[value] + '"'
+ spush(Operand(kind, value, LEAF_RANK, text))
+ else:
+ raise FormulaError("Unhandled opcode: 0x%02x" % opcode)
+ if sz <= 0:
+ raise FormulaError("Size not set for opcode 0x%02x" % opcode)
+ pos += sz
+ continue
+ if opcode == 0x00: # tArray
+ spush(unk_opnd)
+ elif opcode == 0x01: # tFunc
+ nb = 1 + int(bv >= 40)
+ funcx = unpack("<" + " BH"[nb], data[pos+1:pos+1+nb])[0]
+ func_attrs = func_defs.get(funcx, None)
+ if not func_attrs:
+ print >> bk.logfile, "*** formula/tFunc unknown FuncID:%d" \
+ % funcx
+ spush(unk_opnd)
+ else:
+ func_name, nargs = func_attrs[:2]
+ if blah:
+ print " FuncID=%d name=%s nargs=%d" \
+ % (funcx, func_name, nargs)
+ assert len(stack) >= nargs
+ argtext = listsep.join([arg.text for arg in stack[-nargs:]])
+ otext = "%s(%s)" % (func_name, argtext)
+ del stack[-nargs:]
+ res = Operand(oUNK, None, FUNC_RANK, otext)
+ spush(res)
+ elif opcode == 0x02: #tFuncVar
+ nb = 1 + int(bv >= 40)
+ nargs, funcx = unpack("<B" + " BH"[nb], data[pos+1:pos+2+nb])
+ prompt, nargs = divmod(nargs, 128)
+ macro, funcx = divmod(funcx, 32768)
+ if blah:
+ print " FuncID=%d nargs=%d macro=%d prompt=%d" \
+ % (funcx, nargs, macro, prompt)
+ func_attrs = func_defs.get(funcx, None)
+ if not func_attrs:
+ print >> bk.logfile, "*** formula/tFuncVar unknown FuncID:%d" \
+ % funcx
+ spush(unk_opnd)
+ else:
+ func_name, minargs, maxargs = func_attrs[:3]
+ if blah:
+ print " name: %r, min~max args: %d~%d" \
+ % (func_name, minargs, maxargs)
+ assert minargs <= nargs <= maxargs
+ assert len(stack) >= nargs
+ assert len(stack) >= nargs
+ argtext = listsep.join([arg.text for arg in stack[-nargs:]])
+ otext = "%s(%s)" % (func_name, argtext)
+ res = Operand(oUNK, None, FUNC_RANK, otext)
+ if funcx == 1: # IF
+ testarg = stack[-nargs]
+ if testarg.kind not in (oNUM, oBOOL):
+ if blah and testarg.kind != oUNK:
+ print "IF testarg kind?"
+ elif testarg.value not in (0, 1):
+ if blah and testarg.value is not None:
+ print "IF testarg value?"
+ else:
+ if nargs == 2 and not testarg.value:
+ # IF(FALSE, tv) => FALSE
+ res.kind, res.value = oBOOL, 0
+ else:
+ respos = -nargs + 2 - int(testarg.value)
+ chosen = stack[respos]
+ if chosen.kind == oMSNG:
+ res.kind, res.value = oNUM, 0
+ else:
+ res.kind, res.value = chosen.kind, chosen.value
+ if blah:
+ print "$$$$$$ IF => constant"
+ elif funcx == 100: # CHOOSE
+ testarg = stack[-nargs]
+ if testarg.kind == oNUM:
+ if 1 <= testarg.value < nargs:
+ chosen = stack[-nargs + int(testarg.value)]
+ if chosen.kind == oMSNG:
+ res.kind, res.value = oNUM, 0
+ else:
+ res.kind, res.value = chosen.kind, chosen.value
+ del stack[-nargs:]
+ spush(res)
+ elif opcode == 0x03: #tName
+ tgtnamex = unpack("<H", data[pos+1:pos+3])[0] - 1
+ # Only change with BIFF version is number of trailing UNUSED bytes!
+ if blah: print >> bk.logfile, " tgtnamex=%d" % tgtnamex
+ tgtobj = bk.name_obj_list[tgtnamex]
+ if not tgtobj.evaluated:
+ ### recursive ###
+ evaluate_name_formula(bk, tgtobj, tgtnamex, blah, level+1)
+ if tgtobj.macro or tgtobj.binary \
+ or tgtobj.any_err:
+ if blah:
+ tgtobj.dump(
+ bk.logfile,
+ header="!!! tgtobj has problems!!!",
+ footer="----------- --------",
+ )
+ res = Operand(oUNK, None)
+ any_err = any_err or tgtobj.macro or tgtobj.binary or tgtobj.any_err
+ any_rel = any_rel or tgtobj.any_rel
+ else:
+ assert len(tgtobj.stack) == 1
+ res = copy.deepcopy(tgtobj.stack[0])
+ res.rank = LEAF_RANK
+ if tgtobj.scope == -1:
+ res.text = tgtobj.name
+ else:
+ res.text = "%s!%s" \
+ % (bk._sheet_names[tgtobj.scope], tgtobj.name)
+ if blah:
+ print >> bk.logfile, " tName: setting text to", repr(res.text)
+ spush(res)
+ elif opcode == 0x04: # tRef
+ # not_in_name_formula(op, oname)
+ res = get_cell_addr(data, pos+1, bv, reldelta)
+ if blah: print >> bk.logfile, " ", res
+ rowx, colx, row_rel, col_rel = res
+ shx1 = shx2 = 0 ####### N.B. relative to the CURRENT SHEET
+ any_rel = 1
+ coords = (shx1, shx2+1, rowx, rowx+1, colx, colx+1)
+ if blah: print >> bk.logfile, " ", coords
+ res = Operand(oUNK, None)
+ if optype == 1:
+ relflags = (1, 1, row_rel, row_rel, col_rel, col_rel)
+ res = Operand(oREL, [Ref3D(coords + relflags)])
+ spush(res)
+ elif opcode == 0x05: # tArea
+ # not_in_name_formula(op, oname)
+ res1, res2 = get_cell_range_addr(data, pos+1, bv, reldelta)
+ if blah: print >> bk.logfile, " ", res1, res2
+ rowx1, colx1, row_rel1, col_rel1 = res1
+ rowx2, colx2, row_rel2, col_rel2 = res2
+ shx1 = shx2 = 0 ####### N.B. relative to the CURRENT SHEET
+ any_rel = 1
+ coords = (shx1, shx2+1, rowx1, rowx2+1, colx1, colx2+1)
+ if blah: print >> bk.logfile, " ", coords
+ res = Operand(oUNK, None)
+ if optype == 1:
+ relflags = (1, 1, row_rel1, row_rel2, col_rel1, col_rel2)
+ res = Operand(oREL, [Ref3D(coords + relflags)])
+ spush(res)
+ elif opcode == 0x06: # tMemArea
+ not_in_name_formula(op, oname)
+ elif opcode == 0x09: # tMemFunc
+ nb = unpack("<H", data[pos+1:pos+3])[0]
+ if blah: print >> bk.logfile, " %d bytes of cell ref formula" % nb
+ # no effect on stack
+ elif opcode == 0x0C: #tRefN
+ not_in_name_formula(op, oname)
+ # res = get_cell_addr(data, pos+1, bv, reldelta=1)
+ # # note *ALL* tRefN usage has signed offset for relative addresses
+ # any_rel = 1
+ # if blah: print >> bk.logfile, " ", res
+ # spush(res)
+ elif opcode == 0x0D: #tAreaN
+ not_in_name_formula(op, oname)
+ # res = get_cell_range_addr(data, pos+1, bv, reldelta=1)
+ # # note *ALL* tAreaN usage has signed offset for relative addresses
+ # any_rel = 1
+ # if blah: print >> bk.logfile, " ", res
+ elif opcode == 0x1A: # tRef3d
+ if bv >= 80:
+ res = get_cell_addr(data, pos+3, bv, reldelta)
+ refx = unpack("<H", data[pos+1:pos+3])[0]
+ shx1, shx2 = get_externsheet_local_range(bk, refx, blah)
+ else:
+ res = get_cell_addr(data, pos+15, bv, reldelta)
+ raw_extshtx, raw_shx1, raw_shx2 = \
+ unpack("<hxxxxxxxxhh", data[pos+1:pos+15])
+ if blah:
+ print >> bk.logfile, "tRef3d", raw_extshtx, raw_shx1, raw_shx2
+ shx1, shx2 = get_externsheet_local_range_b57(
+ bk, raw_extshtx, raw_shx1, raw_shx2, blah)
+ rowx, colx, row_rel, col_rel = res
+ is_rel = row_rel or col_rel
+ any_rel = any_rel or is_rel
+ coords = (shx1, shx2+1, rowx, rowx+1, colx, colx+1)
+ any_err |= shx1 < -1
+ if blah: print >> bk.logfile, " ", coords
+ res = Operand(oUNK, None)
+ if is_rel:
+ relflags = (0, 0, row_rel, row_rel, col_rel, col_rel)
+ ref3d = Ref3D(coords + relflags)
+ res.kind = oREL
+ res.text = rangename3drel(bk, ref3d)
+ else:
+ ref3d = Ref3D(coords)
+ res.kind = oREF
+ res.text = rangename3d(bk, ref3d)
+ res.rank = LEAF_RANK
+ if optype == 1:
+ res.value = [ref3d]
+ spush(res)
+ elif opcode == 0x1B: # tArea3d
+ if bv >= 80:
+ res1, res2 = get_cell_range_addr(data, pos+3, bv, reldelta)
+ refx = unpack("<H", data[pos+1:pos+3])[0]
+ shx1, shx2 = get_externsheet_local_range(bk, refx, blah)
+ else:
+ res1, res2 = get_cell_range_addr(data, pos+15, bv, reldelta)
+ raw_extshtx, raw_shx1, raw_shx2 = \
+ unpack("<hxxxxxxxxhh", data[pos+1:pos+15])
+ if blah:
+ print >> bk.logfile, "tArea3d", raw_extshtx, raw_shx1, raw_shx2
+ shx1, shx2 = get_externsheet_local_range_b57(
+ bk, raw_extshtx, raw_shx1, raw_shx2, blah)
+ any_err |= shx1 < -1
+ rowx1, colx1, row_rel1, col_rel1 = res1
+ rowx2, colx2, row_rel2, col_rel2 = res2
+ is_rel = row_rel1 or col_rel1 or row_rel2 or col_rel2
+ any_rel = any_rel or is_rel
+ coords = (shx1, shx2+1, rowx1, rowx2+1, colx1, colx2+1)
+ if blah: print >> bk.logfile, " ", coords
+ res = Operand(oUNK, None)
+ if is_rel:
+ relflags = (0, 0, row_rel1, row_rel2, col_rel1, col_rel2)
+ ref3d = Ref3D(coords + relflags)
+ res.kind = oREL
+ res.text = rangename3drel(bk, ref3d)
+ else:
+ ref3d = Ref3D(coords)
+ res.kind = oREF
+ res.text = rangename3d(bk, ref3d)
+ res.rank = LEAF_RANK
+ if optype == 1:
+ res.value = [ref3d]
+
+ spush(res)
+ elif opcode == 0x19: # tNameX
+ dodgy = 0
+ res = Operand(oUNK, None)
+ if bv >= 80:
+ refx, tgtnamex = unpack("<HH", data[pos+1:pos+5])
+ tgtnamex -= 1
+ origrefx = refx
+ else:
+ refx, tgtnamex = unpack("<hxxxxxxxxH", data[pos+1:pos+13])
+ tgtnamex -= 1
+ origrefx = refx
+ if refx > 0:
+ refx -= 1
+ elif refx < 0:
+ refx = -refx - 1
+ else:
+ dodgy = 1
+ if blah:
+ print >> bk.logfile, \
+ " origrefx=%d refx=%d tgtnamex=%d dodgy=%d" \
+ % (origrefx, refx, tgtnamex, dodgy)
+ if tgtnamex == namex:
+ if blah: print >> bk.logfile, "!!!! Self-referential !!!!"
+ dodgy = any_err = 1
+ if not dodgy:
+ if bv >= 80:
+ shx1, shx2 = get_externsheet_local_range(bk, refx, blah)
+ elif origrefx > 0:
+ shx1, shx2 = (-4, -4) # external ref
+ else:
+ exty = bk._externsheet_type_b57[refx]
+ if exty == 4: # non-specific sheet in own doc't
+ shx1, shx2 = (-1, -1) # internal, any sheet
+ else:
+ shx1, shx2 = (-666, -666)
+ if dodgy or shx1 < -1:
+ otext = "<<Name #%d in external(?) file #%d>>" \
+ % (tgtnamex, origrefx)
+ res = Operand(oUNK, None, LEAF_RANK, otext)
+ else:
+ tgtobj = bk.name_obj_list[tgtnamex]
+ if not tgtobj.evaluated:
+ ### recursive ###
+ evaluate_name_formula(bk, tgtobj, tgtnamex, blah, level+1)
+ if tgtobj.macro or tgtobj.binary \
+ or tgtobj.any_err:
+ if blah:
+ tgtobj.dump(
+ bk.logfile,
+ header="!!! bad tgtobj !!!",
+ footer="------------------",
+ )
+ res = Operand(oUNK, None)
+ any_err = any_err or tgtobj.macro or tgtobj.binary or tgtobj.any_err
+ any_rel = any_rel or tgtobj.any_rel
+ else:
+ assert len(tgtobj.stack) == 1
+ res = copy.deepcopy(tgtobj.stack[0])
+ res.rank = LEAF_RANK
+ if tgtobj.scope == -1:
+ res.text = tgtobj.name
+ else:
+ res.text = "%s!%s" \
+ % (bk._sheet_names[tgtobj.scope], tgtobj.name)
+ if blah:
+ print >> bk.logfile, " tNameX: setting text to", repr(res.text)
+ spush(res)
+ elif is_error_opcode(opcode):
+ any_err = 1
+ spush(error_opnd)
+ else:
+ if blah:
+ print >> bk.logfile, "FORMULA: /// Not handled yet: t" + oname
+ any_err = 1
+ if sz <= 0:
+ raise FormulaError("Fatal: token size is not positive")
+ pos += sz
+ any_rel = not not any_rel
+ if blah:
+ print "End of formula. level=%d any_rel=%d any_err=%d stack=%r" % \
+ (level, not not any_rel, any_err, stack)
+ if len(stack) >= 2:
+ print "*** Stack has unprocessed args"
+ print
+ nobj.stack = stack
+ if len(stack) != 1:
+ nobj.result = None
+ else:
+ nobj.result = stack[0]
+ nobj.any_rel = any_rel
+ nobj.any_err = any_err
+ nobj.any_external = any_external
+ nobj.evaluated = 1
+
+#### under construction ####
+def decompile_formula(bk, fmla, fmlalen,
+ reldelta, browx=None, bcolx=None,
+ # browx & bcolx are required when reldelta == 0
+ blah=0, level=0):
+ if level > STACK_ALARM_LEVEL:
+ blah = 1
+ data = fmla
+ bv = bk.biff_version
+ if blah:
+ print "::: decompile_formula len=%d reldelta=%d %r level=%d" \
+ % (fmlalen, reldelta, data, level)
+ hex_char_dump(data, 0, fmlalen)
+ if level > STACK_PANIC_LEVEL:
+ raise XLRDError("Excessive indirect references in formula")
+ sztab = szdict[bv]
+ pos = 0
+ stack = []
+ any_rel = 0
+ any_err = 0
+ any_external = 0
+ unk_opnd = Operand(oUNK, None)
+ error_opnd = Operand(oERR, None)
+ spush = stack.append
+
+ def do_binop(opcd, stk):
+ assert len(stk) >= 2
+ bop = stk.pop()
+ aop = stk.pop()
+ argdict, result_kind, func, rank, sym = binop_rules[opcd]
+ otext = ''.join([
+ '('[:aop.rank < rank],
+ aop.text,
+ ')'[:aop.rank < rank],
+ sym,
+ '('[:bop.rank < rank],
+ bop.text,
+ ')'[:bop.rank < rank],
+ ])
+ resop = Operand(result_kind, None, rank, otext)
+ stk.append(resop)
+
+ def do_unaryop(opcode, arglist, result_kind, stk):
+ assert len(stk) >= 1
+ aop = stk.pop()
+ assert aop.kind in arglist
+ func, rank, sym1, sym2 = unop_rules[opcode]
+ otext = ''.join([
+ sym1,
+ '('[:aop.rank < rank],
+ aop.text,
+ ')'[:aop.rank < rank],
+ sym2,
+ ])
+ stk.append(Operand(result_kind, None, rank, otext))
+
+ def not_in_name_formula(op_arg, oname_arg):
+ msg = "ERROR *** Unexpected token 0x%02x (%s) found in formula" \
+ % (op_arg, oname_arg)
+ # print msg
+ raise FormulaError(msg)
+
+ if fmlalen == 0:
+ stack = [unk_opnd]
+
+ while 0 <= pos < fmlalen:
+ op = ord(data[pos])
+ opcode = op & 0x1f
+ optype = (op & 0x60) >> 5
+ if optype:
+ opx = opcode + 32
+ else:
+ opx = opcode
+ oname = onames[opx] # + [" RVA"][optype]
+ sz = sztab[opx]
+ if blah:
+ print "Pos:%d Op:0x%02x opname:t%s Sz:%d opcode:%02xh optype:%02xh" \
+ % (pos, op, oname, sz, opcode, optype)
+ print "Stack =", stack
+ if sz == -2:
+ msg = 'ERROR *** Unexpected token 0x%02x ("%s"); biff_version=%d' \
+ % (op, oname, bv)
+ raise FormulaError(msg)
+ if not optype:
+ if 0x00 <= opcode <= 0x02: # unk_opnd, tExp, tTbl
+ not_in_name_formula(op, oname)
+ elif 0x03 <= opcode <= 0x0E:
+ # Add, Sub, Mul, Div, Power
+ # tConcat
+ # tLT, ..., tNE
+ do_binop(opcode, stack)
+ elif opcode == 0x0F: # tIsect
+ if blah: print >> bk.logfile, "tIsect pre", stack
+ assert len(stack) >= 2
+ bop = stack.pop()
+ aop = stack.pop()
+ sym = ' '
+ rank = 80 ########## check #######
+ otext = ''.join([
+ '('[:aop.rank < rank],
+ aop.text,
+ ')'[:aop.rank < rank],
+ sym,
+ '('[:bop.rank < rank],
+ bop.text,
+ ')'[:bop.rank < rank],
+ ])
+ res = Operand(oREF)
+ res.text = otext
+ if bop.kind == oERR or aop.kind == oERR:
+ res.kind = oERR
+ elif bop.kind == oUNK or aop.kind == oUNK:
+ # This can happen with undefined
+ # (go search in the current sheet) labels.
+ # For example =Bob Sales
+ # Each label gets a NAME record with an empty formula (!)
+ # Evaluation of the tName token classifies it as oUNK
+ # res.kind = oREF
+ pass
+ elif bop.kind == oREF == aop.kind:
+ pass
+ elif bop.kind == oREL == aop.kind:
+ res.kind = oREL
+ else:
+ pass
+ spush(res)
+ if blah: print >> bk.logfile, "tIsect post", stack
+ elif opcode == 0x10: # tList
+ if blah: print >> bk.logfile, "tList pre", stack
+ assert len(stack) >= 2
+ bop = stack.pop()
+ aop = stack.pop()
+ sym = ','
+ rank = 80 ########## check #######
+ otext = ''.join([
+ '('[:aop.rank < rank],
+ aop.text,
+ ')'[:aop.rank < rank],
+ sym,
+ '('[:bop.rank < rank],
+ bop.text,
+ ')'[:bop.rank < rank],
+ ])
+ res = Operand(oREF, None, rank, otext)
+ if bop.kind == oERR or aop.kind == oERR:
+ res.kind = oERR
+ elif bop.kind in (oREF, oREL) and aop.kind in (oREF, oREL):
+ res.kind = oREF
+ if aop.kind == oREL or bop.kind == oREL:
+ res.kind = oREL
+ else:
+ pass
+ spush(res)
+ if blah: print >> bk.logfile, "tList post", stack
+ elif opcode == 0x11: # tRange
+ if blah: print >> bk.logfile, "tRange pre", stack
+ assert len(stack) >= 2
+ bop = stack.pop()
+ aop = stack.pop()
+ sym = ':'
+ rank = 80 ########## check #######
+ otext = ''.join([
+ '('[:aop.rank < rank],
+ aop.text,
+ ')'[:aop.rank < rank],
+ sym,
+ '('[:bop.rank < rank],
+ bop.text,
+ ')'[:bop.rank < rank],
+ ])
+ res = Operand(oREF, None, rank, otext)
+ if bop.kind == oERR or aop.kind == oERR:
+ res = oERR
+ elif bop.kind == oREF == aop.kind:
+ pass
+ else:
+ pass
+ spush(res)
+ if blah: print >> bk.logfile, "tRange post", stack
+ elif 0x12 <= opcode <= 0x14: # tUplus, tUminus, tPercent
+ do_unaryop(opcode, (oUNK, oNUM,), oNUM, stack)
+ elif opcode == 0x15: # tParen
+ # source cosmetics
+ pass
+ elif opcode == 0x16: # tMissArg
+ spush(Operand(oMSNG, None, LEAF_RANK, ''))
+ elif opcode == 0x17: # tStr
+ if bv <= 70:
+ strg, newpos = unpack_string_update_pos(
+ data, pos+1, bk.encoding, lenlen=1)
+ else:
+ strg, newpos = unpack_unicode_update_pos(
+ data, pos+1, lenlen=1)
+ sz = newpos - pos
+ if blah: print >> bk.logfile, " sz=%d strg=%r" % (sz, strg)
+ text = '"' + strg.replace('"', '""') + '"'
+ spush(Operand(oSTRG, None, LEAF_RANK, text))
+ elif opcode == 0x18: # tExtended
+ # new with BIFF 8
+ assert bv >= 80
+ # not in OOo docs
+ raise FormulaError("tExtended token not implemented")
+ elif opcode == 0x19: # tAttr
+ subop, nc = unpack("<BH", data[pos+1:pos+4])
+ subname = tAttrNames.get(subop, "??Unknown??")
+ if subop == 0x04: # Choose
+ sz = nc * 2 + 6
+ elif subop == 0x10: # Sum (single arg)
+ sz = 4
+ if blah: print >> bk.logfile, "tAttrSum", stack
+ assert len(stack) >= 1
+ aop = stack[-1]
+ otext = 'SUM(%s)' % aop.text
+ stack[-1] = Operand(oNUM, None, FUNC_RANK, otext)
+ else:
+ sz = 4
+ if blah:
+ print " subop=%02xh subname=t%s sz=%d nc=%02xh" \
+ % (subop, subname, sz, nc)
+ elif 0x1A <= opcode <= 0x1B: # tSheet, tEndSheet
+ assert bv < 50
+ raise FormulaError("tSheet & tEndsheet tokens not implemented")
+ elif 0x1C <= opcode <= 0x1F: # tErr, tBool, tInt, tNum
+ inx = opcode - 0x1C
+ nb = [1, 1, 2, 8][inx]
+ kind = [oERR, oBOOL, oNUM, oNUM][inx]
+ value, = unpack("<" + "BBHd"[inx], data[pos+1:pos+1+nb])
+ if inx == 2: # tInt
+ value = float(value)
+ text = str(value)
+ elif inx == 3: # tNum
+ text = str(value)
+ elif inx == 1: # tBool
+ text = ('FALSE', 'TRUE')[value]
+ else:
+ text = '"' +error_text_from_code[value] + '"'
+ spush(Operand(kind, None, LEAF_RANK, text))
+ else:
+ raise FormulaError("Unhandled opcode: 0x%02x" % opcode)
+ if sz <= 0:
+ raise FormulaError("Size not set for opcode 0x%02x" % opcode)
+ pos += sz
+ continue
+ if opcode == 0x00: # tArray
+ spush(unk_opnd)
+ elif opcode == 0x01: # tFunc
+ nb = 1 + int(bv >= 40)
+ funcx = unpack("<" + " BH"[nb], data[pos+1:pos+1+nb])[0]
+ func_attrs = func_defs.get(funcx, None)
+ if not func_attrs:
+ print >> bk.logfile, "*** formula/tFunc unknown FuncID:%d" % funcx
+ spush(unk_opnd)
+ else:
+ func_name, nargs = func_attrs[:2]
+ if blah:
+ print " FuncID=%d name=%s nargs=%d" \
+ % (funcx, func_name, nargs)
+ assert len(stack) >= nargs
+ argtext = listsep.join([arg.text for arg in stack[-nargs:]])
+ otext = "%s(%s)" % (func_name, argtext)
+ del stack[-nargs:]
+ res = Operand(oUNK, None, FUNC_RANK, otext)
+ spush(res)
+ elif opcode == 0x02: #tFuncVar
+ nb = 1 + int(bv >= 40)
+ nargs, funcx = unpack("<B" + " BH"[nb], data[pos+1:pos+2+nb])
+ prompt, nargs = divmod(nargs, 128)
+ macro, funcx = divmod(funcx, 32768)
+ if blah:
+ print " FuncID=%d nargs=%d macro=%d prompt=%d" \
+ % (funcx, nargs, macro, prompt)
+ #### TODO #### if funcx == 255: # call add-in function
+ if funcx == 255:
+ func_attrs = ("CALL_ADDIN", 1, 30)
+ else:
+ func_attrs = func_defs.get(funcx, None)
+ if not func_attrs:
+ print >> bk.logfile, "*** formula/tFuncVar unknown FuncID:%d" \
+ % funcx
+ spush(unk_opnd)
+ else:
+ func_name, minargs, maxargs = func_attrs[:3]
+ if blah:
+ print " name: %r, min~max args: %d~%d" \
+ % (func_name, minargs, maxargs)
+ assert minargs <= nargs <= maxargs
+ assert len(stack) >= nargs
+ assert len(stack) >= nargs
+ argtext = listsep.join([arg.text for arg in stack[-nargs:]])
+ otext = "%s(%s)" % (func_name, argtext)
+ res = Operand(oUNK, None, FUNC_RANK, otext)
+ del stack[-nargs:]
+ spush(res)
+ elif opcode == 0x03: #tName
+ tgtnamex = unpack("<H", data[pos+1:pos+3])[0] - 1
+ # Only change with BIFF version is number of trailing UNUSED bytes!
+ if blah: print >> bk.logfile, " tgtnamex=%d" % tgtnamex
+ tgtobj = bk.name_obj_list[tgtnamex]
+ if tgtobj.scope == -1:
+ otext = tgtobj.name
+ else:
+ otext = "%s!%s" % (bk._sheet_names[tgtobj.scope], tgtobj.name)
+ if blah:
+ print >> bk.logfile, " tName: setting text to", repr(otext)
+ res = Operand(oUNK, None, LEAF_RANK, otext)
+ spush(res)
+ elif opcode == 0x04: # tRef
+ res = get_cell_addr(data, pos+1, bv, reldelta, browx, bcolx)
+ if blah: print >> bk.logfile, " ", res
+ rowx, colx, row_rel, col_rel = res
+ is_rel = row_rel or col_rel
+ if is_rel:
+ okind = oREL
+ else:
+ okind = oREF
+ otext = cellnamerel(rowx, colx, row_rel, col_rel)
+ res = Operand(okind, None, LEAF_RANK, otext)
+ spush(res)
+ elif opcode == 0x05: # tArea
+ res1, res2 = get_cell_range_addr(
+ data, pos+1, bv, reldelta, browx, bcolx)
+ if blah: print >> bk.logfile, " ", res1, res2
+ rowx1, colx1, row_rel1, col_rel1 = res1
+ rowx2, colx2, row_rel2, col_rel2 = res2
+ coords = (rowx1, rowx2+1, colx1, colx2+1)
+ relflags = (row_rel1, row_rel2, col_rel1, col_rel2)
+ is_rel = intbool(sum(relflags))
+ if is_rel:
+ okind = oREL
+ else:
+ okind = oREF
+ if blah: print >> bk.logfile, " ", coords, relflags
+ otext = rangename2drel(coords, relflags)
+ res = Operand(okind, None, LEAF_RANK, otext)
+ spush(res)
+ elif opcode == 0x06: # tMemArea
+ not_in_name_formula(op, oname)
+ elif opcode == 0x09: # tMemFunc
+ nb = unpack("<H", data[pos+1:pos+3])[0]
+ if blah: print >> bk.logfile, " %d bytes of cell ref formula" % nb
+ # no effect on stack
+ elif opcode == 0x0C: #tRefN
+ not_in_name_formula(op, oname)
+ # res = get_cell_addr(data, pos+1, bv, reldelta=1)
+ # # note *ALL* tRefN usage has signed offset for relative addresses
+ # any_rel = 1
+ # if blah: print >> bk.logfile, " ", res
+ # spush(res)
+ elif opcode == 0x0D: #tAreaN
+ not_in_name_formula(op, oname)
+ # res = get_cell_range_addr(data, pos+1, bv, reldelta=1)
+ # # note *ALL* tAreaN usage has signed offset for relative addresses
+ # any_rel = 1
+ # if blah: print >> bk.logfile, " ", res
+ elif opcode == 0x1A: # tRef3d
+ if bv >= 80:
+ res = get_cell_addr(data, pos+3, bv, reldelta, browx, bcolx)
+ refx = unpack("<H", data[pos+1:pos+3])[0]
+ shx1, shx2 = get_externsheet_local_range(bk, refx, blah)
+ else:
+ res = get_cell_addr(data, pos+15, bv, reldelta, browx, bcolx)
+ raw_extshtx, raw_shx1, raw_shx2 = \
+ unpack("<hxxxxxxxxhh", data[pos+1:pos+15])
+ if blah:
+ print >> bk.logfile, "tRef3d", raw_extshtx, raw_shx1, raw_shx2
+ shx1, shx2 = get_externsheet_local_range_b57(
+ bk, raw_extshtx, raw_shx1, raw_shx2, blah)
+ rowx, colx, row_rel, col_rel = res
+ is_rel = row_rel or col_rel
+ any_rel = any_rel or is_rel
+ coords = (shx1, shx2+1, rowx, rowx+1, colx, colx+1)
+ any_err |= shx1 < -1
+ if blah: print >> bk.logfile, " ", coords
+ res = Operand(oUNK, None)
+ if is_rel:
+ relflags = (0, 0, row_rel, row_rel, col_rel, col_rel)
+ ref3d = Ref3D(coords + relflags)
+ res.kind = oREL
+ res.text = rangename3drel(bk, ref3d)
+ else:
+ ref3d = Ref3D(coords)
+ res.kind = oREF
+ res.text = rangename3d(bk, ref3d)
+ res.rank = LEAF_RANK
+ res.value = None
+ spush(res)
+ elif opcode == 0x1B: # tArea3d
+ if bv >= 80:
+ res1, res2 = get_cell_range_addr(data, pos+3, bv, reldelta)
+ refx = unpack("<H", data[pos+1:pos+3])[0]
+ shx1, shx2 = get_externsheet_local_range(bk, refx, blah)
+ else:
+ res1, res2 = get_cell_range_addr(data, pos+15, bv, reldelta)
+ raw_extshtx, raw_shx1, raw_shx2 = \
+ unpack("<hxxxxxxxxhh", data[pos+1:pos+15])
+ if blah:
+ print >> bk.logfile, "tArea3d", raw_extshtx, raw_shx1, raw_shx2
+ shx1, shx2 = get_externsheet_local_range_b57(
+ bk, raw_extshtx, raw_shx1, raw_shx2, blah)
+ any_err |= shx1 < -1
+ rowx1, colx1, row_rel1, col_rel1 = res1
+ rowx2, colx2, row_rel2, col_rel2 = res2
+ is_rel = row_rel1 or col_rel1 or row_rel2 or col_rel2
+ any_rel = any_rel or is_rel
+ coords = (shx1, shx2+1, rowx1, rowx2+1, colx1, colx2+1)
+ if blah: print >> bk.logfile, " ", coords
+ res = Operand(oUNK, None)
+ if is_rel:
+ relflags = (0, 0, row_rel1, row_rel2, col_rel1, col_rel2)
+ ref3d = Ref3D(coords + relflags)
+ res.kind = oREL
+ res.text = rangename3drel(bk, ref3d)
+ else:
+ ref3d = Ref3D(coords)
+ res.kind = oREF
+ res.text = rangename3d(bk, ref3d)
+ res.rank = LEAF_RANK
+ spush(res)
+ elif opcode == 0x19: # tNameX
+ dodgy = 0
+ res = Operand(oUNK, None)
+ if bv >= 80:
+ refx, tgtnamex = unpack("<HH", data[pos+1:pos+5])
+ tgtnamex -= 1
+ origrefx = refx
+ else:
+ refx, tgtnamex = unpack("<hxxxxxxxxH", data[pos+1:pos+13])
+ tgtnamex -= 1
+ origrefx = refx
+ if refx > 0:
+ refx -= 1
+ elif refx < 0:
+ refx = -refx - 1
+ else:
+ dodgy = 1
+ if blah:
+ print >> bk.logfile, \
+ " origrefx=%d refx=%d tgtnamex=%d dodgy=%d" \
+ % (origrefx, refx, tgtnamex, dodgy)
+ # if tgtnamex == namex:
+ # if blah: print >> bk.logfile, "!!!! Self-referential !!!!"
+ # dodgy = any_err = 1
+ if not dodgy:
+ if bv >= 80:
+ shx1, shx2 = get_externsheet_local_range(bk, refx, blah)
+ elif origrefx > 0:
+ shx1, shx2 = (-4, -4) # external ref
+ else:
+ exty = bk._externsheet_type_b57[refx]
+ if exty == 4: # non-specific sheet in own doc't
+ shx1, shx2 = (-1, -1) # internal, any sheet
+ else:
+ shx1, shx2 = (-666, -666)
+ okind = oUNK
+ ovalue = None
+ if shx1 == -5: # addin func name
+ okind = oSTRG
+ ovalue = bk.addin_func_names[tgtnamex]
+ otext = '"' + ovalue.replace('"', '""') + '"'
+ elif dodgy or shx1 < -1:
+ otext = "<<Name #%d in external(?) file #%d>>" \
+ % (tgtnamex, origrefx)
+ else:
+ tgtobj = bk.name_obj_list[tgtnamex]
+ if tgtobj.scope == -1:
+ otext = tgtobj.name
+ else:
+ otext = "%s!%s" \
+ % (bk._sheet_names[tgtobj.scope], tgtobj.name)
+ if blah:
+ print >> bk.logfile, " tNameX: setting text to", repr(res.text)
+ res = Operand(okind, ovalue, LEAF_RANK, otext)
+ spush(res)
+ elif is_error_opcode(opcode):
+ any_err = 1
+ spush(error_opnd)
+ else:
+ if blah:
+ print >> bk.logfile, "FORMULA: /// Not handled yet: t" + oname
+ any_err = 1
+ if sz <= 0:
+ raise FormulaError("Fatal: token size is not positive")
+ pos += sz
+ any_rel = not not any_rel
+ if blah:
+ print "End of formula. level=%d any_rel=%d any_err=%d stack=%r" % \
+ (level, not not any_rel, any_err, stack)
+ if len(stack) >= 2:
+ print "*** Stack has unprocessed args"
+ print
+
+ if len(stack) != 1:
+ result = None
+ else:
+ result = stack[0].text
+ return result
+
+#### under deconstruction ###
+def dump_formula(bk, data, fmlalen, bv, reldelta, blah=0, isname=0):
+ if blah:
+ print "dump_formula", fmlalen, bv, len(data)
+ hex_char_dump(data, 0, fmlalen)
+ assert bv >= 80 #### this function needs updating ####
+ sztab = szdict[bv]
+ pos = 0
+ stack = []
+ any_rel = 0
+ any_err = 0
+ spush = stack.append
+ while 0 <= pos < fmlalen:
+ op = ord(data[pos])
+ opcode = op & 0x1f
+ optype = (op & 0x60) >> 5
+ if optype:
+ opx = opcode + 32
+ else:
+ opx = opcode
+ oname = onames[opx] # + [" RVA"][optype]
+
+ sz = sztab[opx]
+ if blah:
+ print "Pos:%d Op:0x%02x Name:t%s Sz:%d opcode:%02xh optype:%02xh" \
+ % (pos, op, oname, sz, opcode, optype)
+ if not optype:
+ if 0x01 <= opcode <= 0x02: # tExp, tTbl
+ # reference to a shared formula or table record
+ rowx, colx = unpack("<HH", data[pos+1:pos+5])
+ if blah: print >> bk.logfile, " ", (rowx, colx)
+ elif opcode == 0x10: # tList
+ if blah: print >> bk.logfile, "tList pre", stack
+ assert len(stack) >= 2
+ bop = stack.pop()
+ aop = stack.pop()
+ spush(aop + bop)
+ if blah: print >> bk.logfile, "tlist post", stack
+ elif opcode == 0x11: # tRange
+ if blah: print >> bk.logfile, "tRange pre", stack
+ assert len(stack) >= 2
+ bop = stack.pop()
+ aop = stack.pop()
+ assert len(aop) == 1
+ assert len(bop) == 1
+ result = do_box_funcs(tRangeFuncs, aop[0], bop[0])
+ spush(result)
+ if blah: print >> bk.logfile, "tRange post", stack
+ elif opcode == 0x0F: # tIsect
+ if blah: print >> bk.logfile, "tIsect pre", stack
+ assert len(stack) >= 2
+ bop = stack.pop()
+ aop = stack.pop()
+ assert len(aop) == 1
+ assert len(bop) == 1
+ result = do_box_funcs(tIsectFuncs, aop[0], bop[0])
+ spush(result)
+ if blah: print >> bk.logfile, "tIsect post", stack
+ elif opcode == 0x19: # tAttr
+ subop, nc = unpack("<BH", data[pos+1:pos+4])
+ subname = tAttrNames.get(subop, "??Unknown??")
+ if subop == 0x04: # Choose
+ sz = nc * 2 + 6
+ else:
+ sz = 4
+ if blah: print >> bk.logfile, " subop=%02xh subname=t%s sz=%d nc=%02xh" % (subop, subname, sz, nc)
+ elif opcode == 0x17: # tStr
+ if bv <= 70:
+ nc = ord(data[pos+1])
+ strg = data[pos+2:pos+2+nc] # left in 8-bit encoding
+ sz = nc + 2
+ else:
+ strg, newpos = unpack_unicode_update_pos(data, pos+1, lenlen=1)
+ sz = newpos - pos
+ if blah: print >> bk.logfile, " sz=%d strg=%r" % (sz, strg)
+ else:
+ if sz <= 0:
+ print "**** Dud size; exiting ****"
+ return
+ pos += sz
+ continue
+ if opcode == 0x00: # tArray
+ pass
+ elif opcode == 0x01: # tFunc
+ nb = 1 + int(bv >= 40)
+ funcx = unpack("<" + " BH"[nb], data[pos+1:pos+1+nb])
+ if blah: print >> bk.logfile, " FuncID=%d" % funcx
+ elif opcode == 0x02: #tFuncVar
+ nb = 1 + int(bv >= 40)
+ nargs, funcx = unpack("<B" + " BH"[nb], data[pos+1:pos+2+nb])
+ prompt, nargs = divmod(nargs, 128)
+ macro, funcx = divmod(funcx, 32768)
+ if blah: print >> bk.logfile, " FuncID=%d nargs=%d macro=%d prompt=%d" % (funcx, nargs, macro, prompt)
+ elif opcode == 0x03: #tName
+ namex = unpack("<H", data[pos+1:pos+3])
+ # Only change with BIFF version is the number of trailing UNUSED bytes!!!
+ if blah: print >> bk.logfile, " namex=%d" % namex
+ elif opcode == 0x04: # tRef
+ res = get_cell_addr(data, pos+1, bv, reldelta)
+ if blah: print >> bk.logfile, " ", res
+ elif opcode == 0x05: # tArea
+ res = get_cell_range_addr(data, pos+1, bv, reldelta)
+ if blah: print >> bk.logfile, " ", res
+ elif opcode == 0x09: # tMemFunc
+ nb = unpack("<H", data[pos+1:pos+3])[0]
+ if blah: print >> bk.logfile, " %d bytes of cell ref formula" % nb
+ elif opcode == 0x0C: #tRefN
+ res = get_cell_addr(data, pos+1, bv, reldelta=1)
+ # note *ALL* tRefN usage has signed offset for relative addresses
+ any_rel = 1
+ if blah: print >> bk.logfile, " ", res
+ elif opcode == 0x0D: #tAreaN
+ res = get_cell_range_addr(data, pos+1, bv, reldelta=1)
+ # note *ALL* tAreaN usage has signed offset for relative addresses
+ any_rel = 1
+ if blah: print >> bk.logfile, " ", res
+ elif opcode == 0x1A: # tRef3d
+ refx = unpack("<H", data[pos+1:pos+3])[0]
+ res = get_cell_addr(data, pos+3, bv, reldelta)
+ if blah: print >> bk.logfile, " ", refx, res
+ rowx, colx, row_rel, col_rel = res
+ any_rel = any_rel or row_rel or col_rel
+ shx1, shx2 = get_externsheet_local_range(bk, refx, blah)
+ any_err |= shx1 < -1
+ coords = (shx1, shx2+1, rowx, rowx+1, colx, colx+1)
+ if blah: print >> bk.logfile, " ", coords
+ if optype == 1: spush([coords])
+ elif opcode == 0x1B: # tArea3d
+ refx = unpack("<H", data[pos+1:pos+3])[0]
+ res1, res2 = get_cell_range_addr(data, pos+3, bv, reldelta)
+ if blah: print >> bk.logfile, " ", refx, res1, res2
+ rowx1, colx1, row_rel1, col_rel1 = res1
+ rowx2, colx2, row_rel2, col_rel2 = res2
+ any_rel = any_rel or row_rel1 or col_rel1 or row_rel2 or col_rel2
+ shx1, shx2 = get_externsheet_local_range(bk, refx, blah)
+ any_err |= shx1 < -1
+ coords = (shx1, shx2+1, rowx1, rowx2+1, colx1, colx2+1)
+ if blah: print >> bk.logfile, " ", coords
+ if optype == 1: spush([coords])
+ elif opcode == 0x19: # tNameX
+ refx, namex = unpack("<HH", data[pos+1:pos+5])
+ if blah: print >> bk.logfile, " refx=%d namex=%d" % (refx, namex)
+ elif is_error_opcode(opcode):
+ any_err = 1
+ else:
+ if blah: print >> bk.logfile, "FORMULA: /// Not handled yet: t" + oname
+ any_err = 1
+ if sz <= 0:
+ print "**** Dud size; exiting ****"
+ return
+ pos += sz
+ if blah:
+ print >> bk.logfile, "End of formula. any_rel=%d any_err=%d stack=%r" % \
+ (not not any_rel, any_err, stack)
+ if len(stack) >= 2:
+ print >> bk.logfile, "*** Stack has unprocessed args"
+
+# === Some helper functions for displaying cell references ===
+
+# Note that a "non-standard" syntax is used in row and column
+# components in relative references.
+# For example, consider a relative reference: up two rows, right 3 columns.
+# On screen, with cursor in cell D10, this would appear as G8.
+# On screen, with cursor in cell Z100, this would appear as AC98.
+# On screen, with cursor in cell A1, this would appear as D65535.
+# These functions will display such a reference as [@+3,#-2].
+# "@" refers to the unknown base column.
+# "#" refers to the unknown base row.
+#
+# I'm aware of only one possibility of a sheet-relative component in
+# a reference: a 2D reference located in the "current sheet".
+# xlrd stores this internally with bounds of (0, 1, ...) and
+# relative flags of (1, 1, ...). These functions display the
+# sheet component as empty, just like Excel etc.
+
+def rownamerel(rowx, rowxrel):
+ if not rowxrel:
+ return "$%d" % rowx
+ if rowx > 0:
+ return "#+%d" % rowx
+ if rowx < 0:
+ return "#-%d" % (-rowx)
+ return "#"
+
+def colnamerel(colx, colxrel):
+ if not colxrel:
+ return "$" + colname(colx)
+ if colx > 0:
+ return "@+%d" % colx
+ if colx < 0:
+ return "@-%d" % (-colx)
+ return "@"
+##
+# Utility function: (5, 7) => 'H6'
+def cellname(rowx, colx):
+ """ (5, 7) => 'H6' """
+ return "%s%d" % (colname(colx), rowx+1)
+
+##
+# Utility function: (5, 7) => '$H$6'
+def cellnameabs(rowx, colx):
+ """ (5, 7) => '$H$6' """
+ return "$%s$%d" % (colname(colx), rowx+1)
+
+def cellnamerel(rowx, colx, rowxrel, colxrel):
+ if not rowxrel and not colxrel:
+ return cellnameabs(rowx, colx)
+ return "[%s,%s]" % (
+ colnamerel(colx, colxrel),
+ rownamerel(rowx, rowxrel))
+##
+# Utility function: 7 => 'H', 27 => 'AB'
+def colname(colx):
+ """ 7 => 'H', 27 => 'AB' """
+ alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ if colx <= 25:
+ return alphabet[colx]
+ else:
+ xdiv26, xmod26 = divmod(colx, 26)
+ return alphabet[xdiv26 - 1] + alphabet[xmod26]
+
+def rangename2d(rlo, rhi, clo, chi):
+ """ (5, 20, 7, 10) => '$H$6:$J$20' """
+ if rhi == rlo+1 and chi == clo+1:
+ return cellnameabs(rlo, clo)
+ return "%s:%s" % (cellnameabs(rlo, clo), cellnameabs(rhi-1, chi-1))
+
+def rangename2drel((rlo, rhi, clo, chi), (rlorel, rhirel, clorel, chirel)):
+ return "%s:%s" % (
+ cellnamerel(rlo, clo, rlorel, clorel),
+ cellnamerel(rhi-1, chi-1, rhirel, chirel)
+ )
+##
+# Utility function:
+# <br /> Ref3D((1, 4, 5, 20, 7, 10)) => 'Sheet2:Sheet3!$H$6:$J$20'
+def rangename3d(book, ref3d):
+ """ Ref3D(1, 4, 5, 20, 7, 10) => 'Sheet2:Sheet3!$H$6:$J$20'
+ (assuming Excel's default sheetnames) """
+ coords = ref3d.coords
+ return "%s!%s" % (
+ sheetrange(book, *coords[:2]),
+ rangename2d(*coords[2:6]))
+
+##
+# Utility function:
+# <br /> Ref3D(coords=(0, 1, -32, -22, -13, 13), relflags=(0, 0, 1, 1, 1, 1))
+# => 'Sheet1![@-13,#-32]:[@+12,#-23]'
+# where '@' refers to the current or base column and '#'
+# refers to the current or base row.
+def rangename3drel(book, ref3d):
+ coords = ref3d.coords
+ relflags = ref3d.relflags
+ shdesc = sheetrangerel(book, coords[:2], relflags[:2])
+ rngdesc = rangename2drel(coords[2:6], relflags[2:6])
+ if not shdesc:
+ return rngdesc
+ return "%s!%s" % (shdesc, rngdesc)
+
+def quotedsheetname(shnames, shx):
+ if shx >= 0:
+ shname = shnames[shx]
+ else:
+ shname = {
+ -1: "?internal; any sheet?",
+ -2: "internal; deleted sheet",
+ -3: "internal; macro sheet",
+ -4: "<<external>>",
+ }.get(shx, "?error %d?" % shx)
+ if "'" in shname:
+ return "'" + shname.replace("'", "''") + "'"
+ if " " in shname:
+ return "'" + shname + "'"
+ return shname
+
+def sheetrange(book, slo, shi):
+ shnames = book.sheet_names()
+ shdesc = quotedsheetname(shnames, slo)
+ if slo != shi-1:
+ shdesc += ":" + quotedsheetname(shnames, shi-1)
+ return shdesc
+
+def sheetrangerel(book, (slo, shi), (slorel, shirel)):
+ if not slorel and not shirel:
+ return sheetrange(book, slo, shi)
+ assert (slo == 0 == shi-1) and slorel and shirel
+ return ""
+
+# ==============================================================
diff --git a/tablib/packages/xlrd/licences.py b/tablib/packages/xlrd/licences.py
new file mode 100644
index 0000000..1e262a9
--- /dev/null
+++ b/tablib/packages/xlrd/licences.py
@@ -0,0 +1,77 @@
+# -*- coding: cp1252 -*-
+
+"""
+Portions copyright © 2005-2009, Stephen John Machin, Lingfo Pty Ltd
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice,
+this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+this list of conditions and the following disclaimer in the documentation
+and/or other materials provided with the distribution.
+
+3. None of the names of Stephen John Machin, Lingfo Pty Ltd and any
+contributors may be used to endorse or promote products derived from this
+software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+"""
+/*-
+ * Copyright (c) 2001 David Giffin.
+ * All rights reserved.
+ *
+ * Based on the the Java version: Andrew Khan Copyright (c) 2000.
+ *
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by
+ * David Giffin <david@giffin.org>."
+ *
+ * 4. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by
+ * David Giffin <david@giffin.org>."
+ *
+ * THIS SOFTWARE IS PROVIDED BY DAVID GIFFIN ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DAVID GIFFIN OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+"""
diff --git a/tablib/packages/xlrd/sheet.py b/tablib/packages/xlrd/sheet.py
new file mode 100644
index 0000000..70f7779
--- /dev/null
+++ b/tablib/packages/xlrd/sheet.py
@@ -0,0 +1,1768 @@
+# -*- coding: cp1252 -*-
+
+##
+# <p> Portions copyright © 2005-2009 Stephen John Machin, Lingfo Pty Ltd</p>
+# <p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
+##
+
+# 2009-05-31 SJM Fixed problem with no CODEPAGE record on extremely minimal BIFF2.x 3rd-party file
+# 2009-04-27 SJM Integrated on_demand patch by Armando Serrano Lombillo
+# 2008-02-09 SJM Excel 2.0: build XFs on the fly from cell attributes
+# 2007-12-04 SJM Added support for Excel 2.x (BIFF2) files.
+# 2007-10-11 SJM Added missing entry for blank cell type to ctype_text
+# 2007-07-11 SJM Allow for BIFF2/3-style FORMAT record in BIFF4/8 file
+# 2007-04-22 SJM Remove experimental "trimming" facility.
+
+from biffh import *
+from timemachine import *
+from struct import unpack
+from formula import dump_formula, decompile_formula, rangename2d
+from formatting import nearest_colour_index, Format
+import time
+
+DEBUG = 0
+OBJ_MSO_DEBUG = 0
+
+_WINDOW2_options = (
+ # Attribute names and initial values to use in case
+ # a WINDOW2 record is not written.
+ ("show_formulas", 0),
+ ("show_grid_lines", 1),
+ ("show_sheet_headers", 1),
+ ("panes_are_frozen", 0),
+ ("show_zero_values", 1),
+ ("automatic_grid_line_colour", 1),
+ ("columns_from_right_to_left", 0),
+ ("show_outline_symbols", 1),
+ ("remove_splits_if_pane_freeze_is_removed", 0),
+ ("sheet_selected", 0),
+ # "sheet_visible" appears to be merely a clone of "sheet_selected".
+ # The real thing is the visibility attribute from the BOUNDSHEET record.
+ ("sheet_visible", 0),
+ ("show_in_page_break_preview", 0),
+ )
+
+##
+# <p>Contains the data for one worksheet.</p>
+#
+# <p>In the cell access functions, "rowx" is a row index, counting from zero, and "colx" is a
+# column index, counting from zero.
+# Negative values for row/column indexes and slice positions are supported in the expected fashion.</p>
+#
+# <p>For information about cell types and cell values, refer to the documentation of the Cell class.</p>
+#
+# <p>WARNING: You don't call this class yourself. You access Sheet objects via the Book object that
+# was returned when you called xlrd.open_workbook("myfile.xls").</p>
+
+
+class Sheet(BaseObject):
+ ##
+ # Name of sheet.
+ name = ''
+
+ ##
+ # Number of rows in sheet. A row index is in range(thesheet.nrows).
+ nrows = 0
+
+ ##
+ # Number of columns in sheet. A column index is in range(thesheet.ncols).
+ ncols = 0
+
+ ##
+ # The map from a column index to a Colinfo object. Often there is an entry
+ # in COLINFO records for all column indexes in range(257).
+ # Note that xlrd ignores the entry for the non-existent
+ # 257th column. On the other hand, there may be no entry for unused columns.
+ # <br /> -- New in version 0.6.1
+ colinfo_map = {}
+
+ ##
+ # The map from a row index to a Rowinfo object. Note that it is possible
+ # to have missing entries -- at least one source of XLS files doesn't
+ # bother writing ROW records.
+ # <br /> -- New in version 0.6.1
+ rowinfo_map = {}
+
+ ##
+ # List of address ranges of cells containing column labels.
+ # These are set up in Excel by Insert > Name > Labels > Columns.
+ # <br> -- New in version 0.6.0
+ # <br>How to deconstruct the list:
+ # <pre>
+ # for crange in thesheet.col_label_ranges:
+ # rlo, rhi, clo, chi = crange
+ # for rx in xrange(rlo, rhi):
+ # for cx in xrange(clo, chi):
+ # print "Column label at (rowx=%d, colx=%d) is %r" \
+ # (rx, cx, thesheet.cell_value(rx, cx))
+ # </pre>
+ col_label_ranges = []
+
+ ##
+ # List of address ranges of cells containing row labels.
+ # For more details, see <i>col_label_ranges</i> above.
+ # <br> -- New in version 0.6.0
+ row_label_ranges = []
+
+ ##
+ # List of address ranges of cells which have been merged.
+ # These are set up in Excel by Format > Cells > Alignment, then ticking
+ # the "Merge cells" box.
+ # <br> -- New in version 0.6.1. Extracted only if open_workbook(..., formatting_info=True)
+ # <br>How to deconstruct the list:
+ # <pre>
+ # for crange in thesheet.merged_cells:
+ # rlo, rhi, clo, chi = crange
+ # for rowx in xrange(rlo, rhi):
+ # for colx in xrange(clo, chi):
+ # # cell (rlo, clo) (the top left one) will carry the data
+ # # and formatting info; the remainder will be recorded as
+ # # blank cells, but a renderer will apply the formatting info
+ # # for the top left cell (e.g. border, pattern) to all cells in
+ # # the range.
+ # </pre>
+ merged_cells = []
+
+ ##
+ # Default column width from DEFCOLWIDTH record, else None.
+ # From the OOo docs:<br />
+ # """Column width in characters, using the width of the zero character
+ # from default font (first FONT record in the file). Excel adds some
+ # extra space to the default width, depending on the default font and
+ # default font size. The algorithm how to exactly calculate the resulting
+ # column width is not known.<br />
+ # Example: The default width of 8 set in this record results in a column
+ # width of 8.43 using Arial font with a size of 10 points."""<br />
+ # For the default hierarchy, refer to the Colinfo class above.
+ # <br /> -- New in version 0.6.1
+ defcolwidth = None
+
+ ##
+ # Default column width from STANDARDWIDTH record, else None.
+ # From the OOo docs:<br />
+ # """Default width of the columns in 1/256 of the width of the zero
+ # character, using default font (first FONT record in the file)."""<br />
+ # For the default hierarchy, refer to the Colinfo class above.
+ # <br /> -- New in version 0.6.1
+ standardwidth = None
+
+ ##
+ # Default value to be used for a row if there is
+ # no ROW record for that row.
+ # From the <i>optional</i> DEFAULTROWHEIGHT record.
+ default_row_height = None
+
+ ##
+ # Default value to be used for a row if there is
+ # no ROW record for that row.
+ # From the <i>optional</i> DEFAULTROWHEIGHT record.
+ default_row_height_mismatch = None
+
+ ##
+ # Default value to be used for a row if there is
+ # no ROW record for that row.
+ # From the <i>optional</i> DEFAULTROWHEIGHT record.
+ default_row_hidden = None
+
+ ##
+ # Default value to be used for a row if there is
+ # no ROW record for that row.
+ # From the <i>optional</i> DEFAULTROWHEIGHT record.
+ default_additional_space_above = None
+
+ ##
+ # Default value to be used for a row if there is
+ # no ROW record for that row.
+ # From the <i>optional</i> DEFAULTROWHEIGHT record.
+ default_additional_space_below = None
+
+ ##
+ # Visibility of the sheet. 0 = visible, 1 = hidden (can be unhidden
+ # by user -- Format/Sheet/Unhide), 2 = "very hidden" (can be unhidden
+ # only by VBA macro).
+ visibility = 0
+
+ ##
+ # A 256-element tuple corresponding to the contents of the GCW record for this sheet.
+ # If no such record, treat as all bits zero.
+ # Applies to BIFF4-7 only. See docs of Colinfo class for discussion.
+ gcw = (0, ) * 256
+
+ def __init__(self, book, position, name, number):
+ self.book = book
+ self.biff_version = book.biff_version
+ self._position = position
+ self.logfile = book.logfile
+ self.pickleable = book.pickleable
+ self.dont_use_array = not(array_array and (CAN_PICKLE_ARRAY or not book.pickleable))
+ self.name = name
+ self.number = number
+ self.verbosity = book.verbosity
+ self.formatting_info = book.formatting_info
+ self._xf_index_to_xl_type_map = book._xf_index_to_xl_type_map
+ self.nrows = 0 # actual, including possibly empty cells
+ self.ncols = 0
+ self._maxdatarowx = -1 # highest rowx containing a non-empty cell
+ self._maxdatacolx = -1 # highest colx containing a non-empty cell
+ self._dimnrows = 0 # as per DIMENSIONS record
+ self._dimncols = 0
+ self._cell_values = []
+ self._cell_types = []
+ self._cell_xf_indexes = []
+ self._need_fix_ragged_rows = 0
+ self.defcolwidth = None
+ self.standardwidth = None
+ self.default_row_height = None
+ self.default_row_height_mismatch = 0
+ self.default_row_hidden = 0
+ self.default_additional_space_above = 0
+ self.default_additional_space_below = 0
+ self.colinfo_map = {}
+ self.rowinfo_map = {}
+ self.col_label_ranges = []
+ self.row_label_ranges = []
+ self.merged_cells = []
+ self._xf_index_stats = [0, 0, 0, 0]
+ self.visibility = book._sheet_visibility[number] # from BOUNDSHEET record
+ for attr, defval in _WINDOW2_options:
+ setattr(self, attr, defval)
+ self.first_visible_rowx = 0
+ self.first_visible_colx = 0
+ self.gridline_colour_index = 0x40
+ self.gridline_colour_rgb = None # pre-BIFF8
+ self.cached_page_break_preview_mag_factor = 0
+ self.cached_normal_view_mag_factor = 0
+ self._ixfe = None # BIFF2 only
+ self._cell_attr_to_xfx = {} # BIFF2.0 only
+
+ #### Don't initialise this here, use class attribute initialisation.
+ #### self.gcw = (0, ) * 256 ####
+
+ if self.biff_version >= 80:
+ self.utter_max_rows = 65536
+ else:
+ self.utter_max_rows = 16384
+ self.utter_max_cols = 256
+
+ ##
+ # Cell object in the given row and column.
+ def cell(self, rowx, colx):
+ if self.formatting_info:
+ xfx = self.cell_xf_index(rowx, colx)
+ else:
+ xfx = None
+ return Cell(
+ self._cell_types[rowx][colx],
+ self._cell_values[rowx][colx],
+ xfx,
+ )
+
+ ##
+ # Value of the cell in the given row and column.
+ def cell_value(self, rowx, colx):
+ return self._cell_values[rowx][colx]
+
+ ##
+ # Type of the cell in the given row and column.
+ # Refer to the documentation of the Cell class.
+ def cell_type(self, rowx, colx):
+ return self._cell_types[rowx][colx]
+
+ ##
+ # XF index of the cell in the given row and column.
+ # This is an index into Book.xf_list.
+ # <br /> -- New in version 0.6.1
+ def cell_xf_index(self, rowx, colx):
+ self.req_fmt_info()
+ xfx = self._cell_xf_indexes[rowx][colx]
+ if xfx > -1:
+ self._xf_index_stats[0] += 1
+ return xfx
+ # Check for a row xf_index
+ try:
+ xfx = self.rowinfo_map[rowx].xf_index
+ if xfx > -1:
+ self._xf_index_stats[1] += 1
+ return xfx
+ except KeyError:
+ pass
+ # Check for a column xf_index
+ try:
+ xfx = self.colinfo_map[colx].xf_index
+ assert xfx > -1
+ self._xf_index_stats[2] += 1
+ return xfx
+ except KeyError:
+ # If all else fails, 15 is used as hardwired global default xf_index.
+ self._xf_index_stats[3] += 1
+ return 15
+
+ ##
+ # Returns a sequence of the Cell objects in the given row.
+ def row(self, rowx):
+ return [
+ self.cell(rowx, colx)
+ for colx in xrange(self.ncols)
+ ]
+
+ ##
+ # Returns a slice of the types
+ # of the cells in the given row.
+ def row_types(self, rowx, start_colx=0, end_colx=None):
+ if end_colx is None:
+ return self._cell_types[rowx][start_colx:]
+ return self._cell_types[rowx][start_colx:end_colx]
+
+ ##
+ # Returns a slice of the values
+ # of the cells in the given row.
+ def row_values(self, rowx, start_colx=0, end_colx=None):
+ if end_colx is None:
+ return self._cell_values[rowx][start_colx:]
+ return self._cell_values[rowx][start_colx:end_colx]
+
+ ##
+ # Returns a slice of the Cell objects in the given row.
+ def row_slice(self, rowx, start_colx=0, end_colx=None):
+ nc = self.ncols
+ if start_colx < 0:
+ start_colx += nc
+ if start_colx < 0:
+ start_colx = 0
+ if end_colx is None or end_colx > nc:
+ end_colx = nc
+ elif end_colx < 0:
+ end_colx += nc
+ return [
+ self.cell(rowx, colx)
+ for colx in xrange(start_colx, end_colx)
+ ]
+
+ ##
+ # Returns a slice of the Cell objects in the given column.
+ def col_slice(self, colx, start_rowx=0, end_rowx=None):
+ nr = self.nrows
+ if start_rowx < 0:
+ start_rowx += nr
+ if start_rowx < 0:
+ start_rowx = 0
+ if end_rowx is None or end_rowx > nr:
+ end_rowx = nr
+ elif end_rowx < 0:
+ end_rowx += nr
+ return [
+ self.cell(rowx, colx)
+ for rowx in xrange(start_rowx, end_rowx)
+ ]
+
+ ##
+ # Returns a slice of the values of the cells in the given column.
+ def col_values(self, colx, start_rowx=0, end_rowx=None):
+ nr = self.nrows
+ if start_rowx < 0:
+ start_rowx += nr
+ if start_rowx < 0:
+ start_rowx = 0
+ if end_rowx is None or end_rowx > nr:
+ end_rowx = nr
+ elif end_rowx < 0:
+ end_rowx += nr
+ return [
+ self._cell_values[rowx][colx]
+ for rowx in xrange(start_rowx, end_rowx)
+ ]
+
+ ##
+ # Returns a slice of the types of the cells in the given column.
+ def col_types(self, colx, start_rowx=0, end_rowx=None):
+ nr = self.nrows
+ if start_rowx < 0:
+ start_rowx += nr
+ if start_rowx < 0:
+ start_rowx = 0
+ if end_rowx is None or end_rowx > nr:
+ end_rowx = nr
+ elif end_rowx < 0:
+ end_rowx += nr
+ return [
+ self._cell_types[rowx][colx]
+ for rowx in xrange(start_rowx, end_rowx)
+ ]
+
+ ##
+ # Returns a sequence of the Cell objects in the given column.
+ def col(self, colx):
+ return self.col_slice(colx)
+ # Above two lines just for the docs. Here's the real McCoy:
+ col = col_slice
+
+ # === Following methods are used in building the worksheet.
+ # === They are not part of the API.
+
+ def extend_cells(self, nr, nc):
+ # print "extend_cells_2", self.nrows, self.ncols, nr, nc
+ assert 1 <= nc <= self.utter_max_cols
+ assert 1 <= nr <= self.utter_max_rows
+ if nr <= self.nrows:
+ # New cell is in an existing row, so extend that row (if necessary).
+ # Note that nr < self.nrows means that the cell data
+ # is not in ascending row order!!
+ self._need_fix_ragged_rows = 1
+ nrx = nr - 1
+ trow = self._cell_types[nrx]
+ tlen = len(trow)
+ nextra = max(nc, self.ncols) - tlen
+ if nextra > 0:
+ xce = XL_CELL_EMPTY
+ if self.dont_use_array:
+ trow.extend([xce] * nextra)
+ if self.formatting_info:
+ self._cell_xf_indexes[nrx].extend([-1] * nextra)
+ else:
+ aa = array_array
+ trow.extend(aa('B', [xce]) * nextra)
+ if self.formatting_info:
+ self._cell_xf_indexes[nrx].extend(aa('h', [-1]) * nextra)
+ self._cell_values[nrx].extend([''] * nextra)
+ if nc > self.ncols:
+ self.ncols = nc
+ self._need_fix_ragged_rows = 1
+ if nr > self.nrows:
+ scta = self._cell_types.append
+ scva = self._cell_values.append
+ scxa = self._cell_xf_indexes.append
+ fmt_info = self.formatting_info
+ xce = XL_CELL_EMPTY
+ nc = self.ncols
+ if self.dont_use_array:
+ for _unused in xrange(self.nrows, nr):
+ scta([xce] * nc)
+ scva([''] * nc)
+ if fmt_info:
+ scxa([-1] * nc)
+ else:
+ aa = array_array
+ for _unused in xrange(self.nrows, nr):
+ scta(aa('B', [xce]) * nc)
+ scva([''] * nc)
+ if fmt_info:
+ scxa(aa('h', [-1]) * nc)
+ self.nrows = nr
+
+ def fix_ragged_rows(self):
+ t0 = time.time()
+ ncols = self.ncols
+ xce = XL_CELL_EMPTY
+ aa = array_array
+ s_cell_types = self._cell_types
+ s_cell_values = self._cell_values
+ s_cell_xf_indexes = self._cell_xf_indexes
+ s_dont_use_array = self.dont_use_array
+ s_fmt_info = self.formatting_info
+ totrowlen = 0
+ for rowx in xrange(self.nrows):
+ trow = s_cell_types[rowx]
+ rlen = len(trow)
+ totrowlen += rlen
+ nextra = ncols - rlen
+ if nextra > 0:
+ s_cell_values[rowx][rlen:] = [''] * nextra
+ if s_dont_use_array:
+ trow[rlen:] = [xce] * nextra
+ if s_fmt_info:
+ s_cell_xf_indexes[rowx][rlen:] = [-1] * nextra
+ else:
+ trow.extend(aa('B', [xce]) * nextra)
+ if s_fmt_info:
+ s_cell_xf_indexes[rowx][rlen:] = aa('h', [-1]) * nextra
+ self._fix_ragged_rows_time = time.time() - t0
+ if 0 and self.nrows:
+ avgrowlen = float(totrowlen) / self.nrows
+ print >> self.logfile, \
+ "sheet %d: avg row len %.1f; max row len %d" \
+ % (self.number, avgrowlen, self.ncols)
+
+ def tidy_dimensions(self):
+ if self.verbosity >= 3:
+ fprintf(self.logfile,
+ "tidy_dimensions: nrows=%d ncols=%d _need_fix_ragged_rows=%d\n",
+ self.nrows, self.ncols, self._need_fix_ragged_rows,
+ )
+ if 1 and self.merged_cells:
+ nr = nc = 0
+ umaxrows = self.utter_max_rows
+ umaxcols = self.utter_max_cols
+ for crange in self.merged_cells:
+ rlo, rhi, clo, chi = crange
+ if not (0 <= rlo < rhi <= umaxrows) \
+ or not (0 <= clo < chi <= umaxcols):
+ fprintf(self.logfile,
+ "*** WARNING: sheet #%d (%r), MERGEDCELLS bad range %r\n",
+ self.number, self.name, crange)
+ if rhi > nr: nr = rhi
+ if chi > nc: nc = chi
+ self.extend_cells(nr, nc)
+ if self.verbosity >= 1 \
+ and (self.nrows != self._dimnrows or self.ncols != self._dimncols):
+ fprintf(self.logfile,
+ "NOTE *** sheet %d (%r): DIMENSIONS R,C = %d,%d should be %d,%d\n",
+ self.number,
+ self.name,
+ self._dimnrows,
+ self._dimncols,
+ self.nrows,
+ self.ncols,
+ )
+ if self._need_fix_ragged_rows:
+ self.fix_ragged_rows()
+
+ def put_cell(self, rowx, colx, ctype, value, xf_index):
+ try:
+ self._cell_types[rowx][colx] = ctype
+ self._cell_values[rowx][colx] = value
+ if self.formatting_info:
+ self._cell_xf_indexes[rowx][colx] = xf_index
+ except IndexError:
+ # print >> self.logfile, "put_cell extending", rowx, colx
+ self.extend_cells(rowx+1, colx+1)
+ try:
+ self._cell_types[rowx][colx] = ctype
+ self._cell_values[rowx][colx] = value
+ if self.formatting_info:
+ self._cell_xf_indexes[rowx][colx] = xf_index
+ except:
+ print >> self.logfile, "put_cell", rowx, colx
+ raise
+ except:
+ print >> self.logfile, "put_cell", rowx, colx
+ raise
+
+ def put_blank_cell(self, rowx, colx, xf_index):
+ # This is used for cells from BLANK and MULBLANK records
+ ctype = XL_CELL_BLANK
+ value = ''
+ try:
+ self._cell_types[rowx][colx] = ctype
+ self._cell_values[rowx][colx] = value
+ self._cell_xf_indexes[rowx][colx] = xf_index
+ except IndexError:
+ # print >> self.logfile, "put_cell extending", rowx, colx
+ self.extend_cells(rowx+1, colx+1)
+ try:
+ self._cell_types[rowx][colx] = ctype
+ self._cell_values[rowx][colx] = value
+ self._cell_xf_indexes[rowx][colx] = xf_index
+ except:
+ print >> self.logfile, "put_cell", rowx, colx
+ raise
+ except:
+ print >> self.logfile, "put_cell", rowx, colx
+ raise
+
+ def put_number_cell(self, rowx, colx, value, xf_index):
+ ctype = self._xf_index_to_xl_type_map[xf_index]
+ try:
+ self._cell_types[rowx][colx] = ctype
+ self._cell_values[rowx][colx] = value
+ if self.formatting_info:
+ self._cell_xf_indexes[rowx][colx] = xf_index
+ except IndexError:
+ # print >> self.logfile, "put_number_cell extending", rowx, colx
+ self.extend_cells(rowx+1, colx+1)
+ try:
+ self._cell_types[rowx][colx] = ctype
+ self._cell_values[rowx][colx] = value
+ if self.formatting_info:
+ self._cell_xf_indexes[rowx][colx] = xf_index
+ except:
+ print >> self.logfile, "put_number_cell", rowx, colx
+ raise
+ except:
+ print >> self.logfile, "put_number_cell", rowx, colx
+ raise
+
+ # === Methods after this line neither know nor care about how cells are stored.
+
+ def read(self, bk):
+ global rc_stats
+ DEBUG = 0
+ blah = DEBUG or self.verbosity >= 2
+ blah_rows = DEBUG or self.verbosity >= 4
+ blah_formulas = 1 and blah
+ oldpos = bk._position
+ bk._position = self._position
+ XL_SHRFMLA_ETC_ETC = (
+ XL_SHRFMLA, XL_ARRAY, XL_TABLEOP, XL_TABLEOP2,
+ XL_ARRAY2, XL_TABLEOP_B2,
+ )
+ self_put_number_cell = self.put_number_cell
+ self_put_cell = self.put_cell
+ self_put_blank_cell = self.put_blank_cell
+ local_unpack = unpack
+ bk_get_record_parts = bk.get_record_parts
+ bv = self.biff_version
+ fmt_info = self.formatting_info
+ eof_found = 0
+ while 1:
+ # if DEBUG: print "SHEET.READ: about to read from position %d" % bk._position
+ rc, data_len, data = bk_get_record_parts()
+ # if rc in rc_stats:
+ # rc_stats[rc] += 1
+ # else:
+ # rc_stats[rc] = 1
+ # if DEBUG: print "SHEET.READ: op 0x%04x, %d bytes %r" % (rc, data_len, data)
+ if rc == XL_NUMBER:
+ rowx, colx, xf_index, d = local_unpack('<HHHd', data)
+ # if xf_index == 0:
+ # fprintf(self.logfile,
+ # "NUMBER: r=%d c=%d xfx=%d %f\n", rowx, colx, xf_index, d)
+ self_put_number_cell(rowx, colx, d, xf_index)
+ elif rc == XL_LABELSST:
+ rowx, colx, xf_index, sstindex = local_unpack('<HHHi', data)
+ # print "LABELSST", rowx, colx, sstindex, bk._sharedstrings[sstindex]
+ self_put_cell(rowx, colx, XL_CELL_TEXT, bk._sharedstrings[sstindex], xf_index)
+ elif rc == XL_LABEL or rc == XL_RSTRING:
+ # RSTRING has extra richtext info at the end, but we ignore it.
+ rowx, colx, xf_index = local_unpack('<HHH', data[0:6])
+ if bv < BIFF_FIRST_UNICODE:
+ strg = unpack_string(data, 6, bk.encoding or bk.derive_encoding, lenlen=2)
+ else:
+ strg = unpack_unicode(data, 6, lenlen=2)
+ self_put_cell(rowx, colx, XL_CELL_TEXT, strg, xf_index)
+ elif rc == XL_RK:
+ rowx, colx, xf_index = local_unpack('<HHH', data[:6])
+ d = unpack_RK(data[6:10])
+ self_put_number_cell(rowx, colx, d, xf_index)
+ elif rc == XL_MULRK:
+ mulrk_row, mulrk_first = local_unpack('<HH', data[0:4])
+ mulrk_last, = local_unpack('<H', data[-2:])
+ pos = 4
+ for colx in xrange(mulrk_first, mulrk_last+1):
+ xf_index, = local_unpack('<H', data[pos:pos+2])
+ d = unpack_RK(data[pos+2:pos+6])
+ pos += 6
+ self_put_number_cell(mulrk_row, colx, d, xf_index)
+ elif rc == XL_ROW:
+ # Version 0.6.0a3: ROW records are just not worth using (for memory allocation).
+ # Version 0.6.1: now used for formatting info.
+ if not fmt_info: continue
+ rowx, bits1, bits2 = local_unpack('<H4xH4xi', data[0:16])
+ if not(0 <= rowx < self.utter_max_rows):
+ print >> self.logfile, \
+ "*** NOTE: ROW record has row index %d; " \
+ "should have 0 <= rowx < %d -- record ignored!" \
+ % (rowx, self.utter_max_rows)
+ continue
+ r = Rowinfo()
+ # Using upkbits() is far too slow on a file
+ # with 30 sheets each with 10K rows :-(
+ # upkbits(r, bits1, (
+ # ( 0, 0x7FFF, 'height'),
+ # (15, 0x8000, 'has_default_height'),
+ # ))
+ # upkbits(r, bits2, (
+ # ( 0, 0x00000007, 'outline_level'),
+ # ( 4, 0x00000010, 'outline_group_starts_ends'),
+ # ( 5, 0x00000020, 'hidden'),
+ # ( 6, 0x00000040, 'height_mismatch'),
+ # ( 7, 0x00000080, 'has_default_xf_index'),
+ # (16, 0x0FFF0000, 'xf_index'),
+ # (28, 0x10000000, 'additional_space_above'),
+ # (29, 0x20000000, 'additional_space_below'),
+ # ))
+ # So:
+ r.height = bits1 & 0x7fff
+ r.has_default_height = (bits1 >> 15) & 1
+ r.outline_level = bits2 & 7
+ r.outline_group_starts_ends = (bits2 >> 4) & 1
+ r.hidden = (bits2 >> 5) & 1
+ r.height_mismatch = (bits2 >> 6) & 1
+ r.has_default_xf_index = (bits2 >> 7) & 1
+ r.xf_index = (bits2 >> 16) & 0xfff
+ r.additional_space_above = (bits2 >> 28) & 1
+ r.additional_space_below = (bits2 >> 29) & 1
+ if not r.has_default_xf_index:
+ r.xf_index = -1
+ self.rowinfo_map[rowx] = r
+ if 0 and r.xf_index > -1:
+ fprintf(self.logfile,
+ "**ROW %d %d %d\n",
+ self.number, rowx, r.xf_index)
+ if blah_rows:
+ print >> self.logfile, 'ROW', rowx, bits1, bits2
+ r.dump(self.logfile,
+ header="--- sh #%d, rowx=%d ---" % (self.number, rowx))
+ elif rc in XL_FORMULA_OPCODES: # 06, 0206, 0406
+ # DEBUG = 1
+ # if DEBUG: print "FORMULA: rc: 0x%04x data: %r" % (rc, data)
+ if bv >= 50:
+ rowx, colx, xf_index, result_str, flags = local_unpack('<HHH8sH', data[0:16])
+ lenlen = 2
+ tkarr_offset = 20
+ elif bv >= 30:
+ rowx, colx, xf_index, result_str, flags = local_unpack('<HHH8sH', data[0:16])
+ lenlen = 2
+ tkarr_offset = 16
+ else: # BIFF2
+ rowx, colx, cell_attr, result_str, flags = local_unpack('<HH3s8sB', data[0:16])
+ xf_index = self.fixed_BIFF2_xfindex(cell_attr, rowx, colx)
+ lenlen = 1
+ tkarr_offset = 16
+ if blah_formulas: # testing formula dumper
+ #### XXXX FIXME
+ fprintf(self.logfile, "FORMULA: rowx=%d colx=%d\n", rowx, colx)
+ fmlalen = local_unpack("<H", data[20:22])[0]
+ decompile_formula(bk, data[22:], fmlalen,
+ reldelta=0, browx=rowx, bcolx=colx, blah=1)
+ if result_str[6:8] == "\xFF\xFF":
+ if result_str[0] == '\x00':
+ # need to read next record (STRING)
+ gotstring = 0
+ # if flags & 8:
+ if 1: # "flags & 8" applies only to SHRFMLA
+ # actually there's an optional SHRFMLA or ARRAY etc record to skip over
+ rc2, data2_len, data2 = bk.get_record_parts()
+ if rc2 == XL_STRING or rc2 == XL_STRING_B2:
+ gotstring = 1
+ elif rc2 == XL_ARRAY:
+ row1x, rownx, col1x, colnx, array_flags, tokslen = \
+ local_unpack("<HHBBBxxxxxH", data2[:14])
+ if blah_formulas:
+ fprintf(self.logfile, "ARRAY: %d %d %d %d %d\n",
+ row1x, rownx, col1x, colnx, array_flags)
+ dump_formula(bk, data2[14:], tokslen, bv, reldelta=0, blah=1)
+ elif rc2 == XL_SHRFMLA:
+ row1x, rownx, col1x, colnx, nfmlas, tokslen = \
+ local_unpack("<HHBBxBH", data2[:10])
+ if blah_formulas:
+ fprintf(self.logfile, "SHRFMLA (sub): %d %d %d %d %d\n",
+ row1x, rownx, col1x, colnx, nfmlas)
+ decompile_formula(bk, data2[10:], tokslen, reldelta=1, blah=1)
+ elif rc2 not in XL_SHRFMLA_ETC_ETC:
+ raise XLRDError(
+ "Expected SHRFMLA, ARRAY, TABLEOP* or STRING record; found 0x%04x" % rc2)
+ # if DEBUG: print "gotstring:", gotstring
+ # now for the STRING record
+ if not gotstring:
+ rc2, _unused_len, data2 = bk.get_record_parts()
+ if rc2 not in (XL_STRING, XL_STRING_B2):
+ raise XLRDError("Expected STRING record; found 0x%04x" % rc2)
+ # if DEBUG: print "STRING: data=%r BIFF=%d cp=%d" % (data2, self.biff_version, bk.encoding)
+ if self.biff_version < BIFF_FIRST_UNICODE:
+ strg = unpack_string(data2, 0, bk.encoding or bk.derive_encoding, lenlen=1 + int(bv > 20))
+ else:
+ strg = unpack_unicode(data2, 0, lenlen=2)
+ self.put_cell(rowx, colx, XL_CELL_TEXT, strg, xf_index)
+ # if DEBUG: print "FORMULA strg %r" % strg
+ elif result_str[0] == '\x01':
+ # boolean formula result
+ value = ord(result_str[2])
+ self.put_cell(rowx, colx, XL_CELL_BOOLEAN, value, xf_index)
+ elif result_str[0] == '\x02':
+ # Error in cell
+ value = ord(result_str[2])
+ self.put_cell(rowx, colx, XL_CELL_ERROR, value, xf_index)
+ elif result_str[0] == '\x03':
+ # empty ... i.e. empty (zero-length) string, NOT an empty cell.
+ self.put_cell(rowx, colx, XL_CELL_TEXT, u"", xf_index)
+ else:
+ raise XLRDError("unexpected special case (0x%02x) in FORMULA" % ord(result_str[0]))
+ else:
+ # it is a number
+ d = local_unpack('<d', result_str)[0]
+ self_put_number_cell(rowx, colx, d, xf_index)
+ elif rc == XL_BOOLERR:
+ rowx, colx, xf_index, value, is_err = local_unpack('<HHHBB', data[:8])
+ # Note OOo Calc 2.0 writes 9-byte BOOLERR records.
+ # OOo docs say 8. Excel writes 8.
+ cellty = (XL_CELL_BOOLEAN, XL_CELL_ERROR)[is_err]
+ # if DEBUG: print "XL_BOOLERR", rowx, colx, xf_index, value, is_err
+ self.put_cell(rowx, colx, cellty, value, xf_index)
+ elif rc == XL_COLINFO:
+ if not fmt_info: continue
+ c = Colinfo()
+ first_colx, last_colx, c.width, c.xf_index, flags \
+ = local_unpack("<HHHHH", data[:10])
+ #### Colinfo.width is denominated in 256ths of a character,
+ #### *not* in characters.
+ if not(0 <= first_colx <= last_colx <= 256):
+ # Note: 256 instead of 255 is a common mistake.
+ # We silently ignore the non-existing 257th column in that case.
+ print >> self.logfile, \
+ "*** NOTE: COLINFO record has first col index %d, last %d; " \
+ "should have 0 <= first <= last <= 255 -- record ignored!" \
+ % (first_colx, last_colx)
+ del c
+ continue
+ upkbits(c, flags, (
+ ( 0, 0x0001, 'hidden'),
+ ( 1, 0x0002, 'bit1_flag'),
+ # *ALL* colinfos created by Excel in "default" cases are 0x0002!!
+ # Maybe it's "locked" by analogy with XFProtection data.
+ ( 8, 0x0700, 'outline_level'),
+ (12, 0x1000, 'collapsed'),
+ ))
+ for colx in xrange(first_colx, last_colx+1):
+ if colx > 255: break # Excel does 0 to 256 inclusive
+ self.colinfo_map[colx] = c
+ if 0:
+ fprintf(self.logfile,
+ "**COL %d %d %d\n",
+ self.number, colx, c.xf_index)
+ if blah:
+ fprintf(
+ self.logfile,
+ "COLINFO sheet #%d cols %d-%d: wid=%d xf_index=%d flags=0x%04x\n",
+ self.number, first_colx, last_colx, c.width, c.xf_index, flags,
+ )
+ c.dump(self.logfile, header='===')
+ elif rc == XL_DEFCOLWIDTH:
+ self.defcolwidth, = local_unpack("<H", data[:2])
+ if 0: print >> self.logfile, 'DEFCOLWIDTH', self.defcolwidth
+ elif rc == XL_STANDARDWIDTH:
+ if data_len != 2:
+ print >> self.logfile, '*** ERROR *** STANDARDWIDTH', data_len, repr(data)
+ self.standardwidth, = local_unpack("<H", data[:2])
+ if 0: print >> self.logfile, 'STANDARDWIDTH', self.standardwidth
+ elif rc == XL_GCW:
+ if not fmt_info: continue # useless w/o COLINFO
+ assert data_len == 34
+ assert data[0:2] == "\x20\x00"
+ iguff = unpack("<8i", data[2:34])
+ gcw = []
+ for bits in iguff:
+ for j in xrange(32):
+ gcw.append(bits & 1)
+ bits >>= 1
+ self.gcw = tuple(gcw)
+ if 0:
+ showgcw = "".join(map(lambda x: "F "[x], gcw)).rstrip().replace(' ', '.')
+ print "GCW:", showgcw
+ elif rc == XL_BLANK:
+ if not fmt_info: continue
+ rowx, colx, xf_index = local_unpack('<HHH', data[:6])
+ if 0: print >> self.logfile, "BLANK", rowx, colx, xf_index
+ self_put_blank_cell(rowx, colx, xf_index)
+ elif rc == XL_MULBLANK: # 00BE
+ if not fmt_info: continue
+ mul_row, mul_first = local_unpack('<HH', data[0:4])
+ mul_last, = local_unpack('<H', data[-2:])
+ if 0:
+ print >> self.logfile, "MULBLANK", mul_row, mul_first, mul_last
+ pos = 4
+ for colx in xrange(mul_first, mul_last+1):
+ xf_index, = local_unpack('<H', data[pos:pos+2])
+ pos += 2
+ self_put_blank_cell(mul_row, colx, xf_index)
+ elif rc == XL_DIMENSION or rc == XL_DIMENSION2:
+ # if data_len == 10:
+ # Was crashing on BIFF 4.0 file w/o the two trailing unused bytes.
+ # Reported by Ralph Heimburger.
+ if bv < 80:
+ dim_tuple = local_unpack('<HxxH', data[2:8])
+ else:
+ dim_tuple = local_unpack('<ixxH', data[4:12])
+ self.nrows, self.ncols = 0, 0
+ self._dimnrows, self._dimncols = dim_tuple
+ if not self.book._xf_epilogue_done:
+ # Needed for bv <= 40
+ self.book.xf_epilogue()
+ if blah:
+ fprintf(self.logfile,
+ "sheet %d(%r) DIMENSIONS: ncols=%d nrows=%d\n",
+ self.number, self.name, self._dimncols, self._dimnrows
+ )
+ elif rc == XL_EOF:
+ DEBUG = 0
+ if DEBUG: print >> self.logfile, "SHEET.READ: EOF"
+ eof_found = 1
+ break
+ elif rc == XL_OBJ:
+ # handle SHEET-level objects; note there's a separate Book.handle_obj
+ self.handle_obj(data)
+ elif rc == XL_MSO_DRAWING:
+ self.handle_msodrawingetc(rc, data_len, data)
+ elif rc == XL_TXO:
+ self.handle_txo(data)
+ elif rc == XL_NOTE:
+ self.handle_note(data)
+ elif rc == XL_FEAT11:
+ self.handle_feat11(data)
+ elif rc in bofcodes: ##### EMBEDDED BOF #####
+ version, boftype = local_unpack('<HH', data[0:4])
+ if boftype != 0x20: # embedded chart
+ print >> self.logfile, \
+ "*** Unexpected embedded BOF (0x%04x) at offset %d: version=0x%04x type=0x%04x" \
+ % (rc, bk._position - data_len - 4, version, boftype)
+ while 1:
+ code, data_len, data = bk.get_record_parts()
+ if code == XL_EOF:
+ break
+ if DEBUG: print >> self.logfile, "---> found EOF"
+ elif rc == XL_COUNTRY:
+ bk.handle_country(data)
+ elif rc == XL_LABELRANGES:
+ pos = 0
+ pos = unpack_cell_range_address_list_update_pos(
+ self.row_label_ranges, data, pos, bv, addr_size=8,
+ )
+ pos = unpack_cell_range_address_list_update_pos(
+ self.col_label_ranges, data, pos, bv, addr_size=8,
+ )
+ assert pos == data_len
+ elif rc == XL_ARRAY:
+ row1x, rownx, col1x, colnx, array_flags, tokslen = \
+ local_unpack("<HHBBBxxxxxH", data[:14])
+ if blah_formulas:
+ print "ARRAY:", row1x, rownx, col1x, colnx, array_flags
+ dump_formula(bk, data[14:], tokslen, bv, reldelta=0, blah=1)
+ elif rc == XL_SHRFMLA:
+ row1x, rownx, col1x, colnx, nfmlas, tokslen = \
+ local_unpack("<HHBBxBH", data[:10])
+ if blah_formulas:
+ print "SHRFMLA (main):", row1x, rownx, col1x, colnx, nfmlas
+ decompile_formula(bk, data[10:], tokslen, reldelta=0, blah=1)
+ elif rc == XL_CONDFMT:
+ if not fmt_info: continue
+ assert bv >= 80
+ num_CFs, needs_recalc, browx1, browx2, bcolx1, bcolx2 = \
+ unpack("<6H", data[0:12])
+ if self.verbosity >= 1:
+ fprintf(self.logfile,
+ "\n*** WARNING: Ignoring CONDFMT (conditional formatting) record\n" \
+ "*** in Sheet %d (%r).\n" \
+ "*** %d CF record(s); needs_recalc_or_redraw = %d\n" \
+ "*** Bounding box is %s\n",
+ self.number, self.name, num_CFs, needs_recalc,
+ rangename2d(browx1, browx2+1, bcolx1, bcolx2+1),
+ )
+ olist = [] # updated by the function
+ pos = unpack_cell_range_address_list_update_pos(
+ olist, data, 12, bv, addr_size=8)
+ # print >> self.logfile, repr(result), len(result)
+ if self.verbosity >= 1:
+ fprintf(self.logfile,
+ "*** %d individual range(s):\n" \
+ "*** %s\n",
+ len(olist),
+ ", ".join([rangename2d(*coords) for coords in olist]),
+ )
+ elif rc == XL_CF:
+ if not fmt_info: continue
+ cf_type, cmp_op, sz1, sz2, flags = unpack("<BBHHi", data[0:10])
+ font_block = (flags >> 26) & 1
+ bord_block = (flags >> 28) & 1
+ patt_block = (flags >> 29) & 1
+ if self.verbosity >= 1:
+ fprintf(self.logfile,
+ "\n*** WARNING: Ignoring CF (conditional formatting) sub-record.\n" \
+ "*** cf_type=%d, cmp_op=%d, sz1=%d, sz2=%d, flags=0x%08x\n" \
+ "*** optional data blocks: font=%d, border=%d, pattern=%d\n",
+ cf_type, cmp_op, sz1, sz2, flags,
+ font_block, bord_block, patt_block,
+ )
+ # hex_char_dump(data, 0, data_len)
+ pos = 12
+ if font_block:
+ (font_height, font_options, weight, escapement, underline,
+ font_colour_index, two_bits, font_esc, font_underl) = \
+ unpack("<64x i i H H B 3x i 4x i i i 18x", data[pos:pos+118])
+ font_style = (two_bits > 1) & 1
+ posture = (font_options > 1) & 1
+ font_canc = (two_bits > 7) & 1
+ cancellation = (font_options > 7) & 1
+ if self.verbosity >= 1:
+ fprintf(self.logfile,
+ "*** Font info: height=%d, weight=%d, escapement=%d,\n" \
+ "*** underline=%d, colour_index=%d, esc=%d, underl=%d,\n" \
+ "*** style=%d, posture=%d, canc=%d, cancellation=%d\n",
+ font_height, weight, escapement, underline,
+ font_colour_index, font_esc, font_underl,
+ font_style, posture, font_canc, cancellation,
+ )
+ pos += 118
+ if bord_block:
+ pos += 8
+ if patt_block:
+ pos += 4
+ fmla1 = data[pos:pos+sz1]
+ pos += sz1
+ if blah and sz1:
+ fprintf(self.logfile,
+ "*** formula 1:\n",
+ )
+ dump_formula(bk, fmla1, sz1, bv, reldelta=0, blah=1)
+ fmla2 = data[pos:pos+sz2]
+ pos += sz2
+ assert pos == data_len
+ if blah and sz2:
+ fprintf(self.logfile,
+ "*** formula 2:\n",
+ )
+ dump_formula(bk, fmla2, sz2, bv, reldelta=0, blah=1)
+ elif rc == XL_DEFAULTROWHEIGHT:
+ if data_len == 4:
+ bits, self.default_row_height = unpack("<HH", data[:4])
+ elif data_len == 2:
+ self.default_row_height, = unpack("<H", data)
+ bits = 0
+ fprintf(self.logfile,
+ "*** WARNING: DEFAULTROWHEIGHT record len is 2, " \
+ "should be 4; assuming BIFF2 format\n")
+ else:
+ bits = 0
+ fprintf(self.logfile,
+ "*** WARNING: DEFAULTROWHEIGHT record len is %d, " \
+ "should be 4; ignoring this record\n",
+ data_len)
+ self.default_row_height_mismatch = bits & 1
+ self.default_row_hidden = (bits >> 1) & 1
+ self.default_additional_space_above = (bits >> 2) & 1
+ self.default_additional_space_below = (bits >> 3) & 1
+ elif rc == XL_MERGEDCELLS:
+ if not fmt_info: continue
+ pos = unpack_cell_range_address_list_update_pos(
+ self.merged_cells, data, 0, bv, addr_size=8)
+ if blah:
+ fprintf(self.logfile,
+ "MERGEDCELLS: %d ranges\n", int_floor_div(pos - 2, 8))
+ assert pos == data_len, \
+ "MERGEDCELLS: pos=%d data_len=%d" % (pos, data_len)
+ elif rc == XL_WINDOW2:
+ if bv >= 80:
+ (options,
+ self.first_visible_rowx, self.first_visible_colx,
+ self.gridline_colour_index,
+ self.cached_page_break_preview_mag_factor,
+ self.cached_normal_view_mag_factor
+ ) = unpack("<HHHHxxHH", data[:14])
+ else: # BIFF3-7
+ (options,
+ self.first_visible_rowx, self.first_visible_colx,
+ ) = unpack("<HHH", data[:6])
+ self.gridline_colour_rgb = unpack("<BBB", data[6:9])
+ self.gridline_colour_index = \
+ nearest_colour_index(
+ self.book.colour_map,
+ self.gridline_colour_rgb,
+ debug=0)
+ self.cached_page_break_preview_mag_factor = 0 # default (60%)
+ self.cached_normal_view_mag_factor = 0 # default (100%)
+ # options -- Bit, Mask, Contents:
+ # 0 0001H 0 = Show formula results 1 = Show formulas
+ # 1 0002H 0 = Do not show grid lines 1 = Show grid lines
+ # 2 0004H 0 = Do not show sheet headers 1 = Show sheet headers
+ # 3 0008H 0 = Panes are not frozen 1 = Panes are frozen (freeze)
+ # 4 0010H 0 = Show zero values as empty cells 1 = Show zero values
+ # 5 0020H 0 = Manual grid line colour 1 = Automatic grid line colour
+ # 6 0040H 0 = Columns from left to right 1 = Columns from right to left
+ # 7 0080H 0 = Do not show outline symbols 1 = Show outline symbols
+ # 8 0100H 0 = Keep splits if pane freeze is removed 1 = Remove splits if pane freeze is removed
+ # 9 0200H 0 = Sheet not selected 1 = Sheet selected (BIFF5-BIFF8)
+ # 10 0400H 0 = Sheet not visible 1 = Sheet visible (BIFF5-BIFF8)
+ # 11 0800H 0 = Show in normal view 1 = Show in page break preview (BIFF8)
+ # The freeze flag specifies, if a following PANE record (6.71) describes unfrozen or frozen panes.
+ for attr, _unused_defval in _WINDOW2_options:
+ setattr(self, attr, options & 1)
+ options >>= 1
+ # print "WINDOW2: visible=%d selected=%d" \
+ # % (self.sheet_visible, self.sheet_selected)
+ #### all of the following are for BIFF <= 4W
+ elif bv <= 45:
+ if rc == XL_FORMAT or rc == XL_FORMAT2:
+ bk.handle_format(data, rc)
+ elif rc == XL_FONT or rc == XL_FONT_B3B4:
+ bk.handle_font(data)
+ elif rc == XL_STYLE:
+ if not self.book._xf_epilogue_done:
+ self.book.xf_epilogue()
+ bk.handle_style(data)
+ elif rc == XL_PALETTE:
+ bk.handle_palette(data)
+ elif rc == XL_BUILTINFMTCOUNT:
+ bk.handle_builtinfmtcount(data)
+ elif rc == XL_XF4 or rc == XL_XF3 or rc == XL_XF2: #### N.B. not XL_XF
+ bk.handle_xf(data)
+ elif rc == XL_DATEMODE:
+ bk.handle_datemode(data)
+ elif rc == XL_CODEPAGE:
+ bk.handle_codepage(data)
+ elif rc == XL_FILEPASS:
+ bk.handle_filepass(data)
+ elif rc == XL_WRITEACCESS:
+ bk.handle_writeaccess(data)
+ elif rc == XL_IXFE:
+ self._ixfe = local_unpack('<H', data)[0]
+ elif rc == XL_NUMBER_B2:
+ rowx, colx, cell_attr, d = local_unpack('<HH3sd', data)
+ self_put_number_cell(rowx, colx, d, self.fixed_BIFF2_xfindex(cell_attr, rowx, colx))
+ elif rc == XL_INTEGER:
+ rowx, colx, cell_attr, d = local_unpack('<HH3sH', data)
+ self_put_number_cell(rowx, colx, float(d), self.fixed_BIFF2_xfindex(cell_attr, rowx, colx))
+ elif rc == XL_LABEL_B2:
+ rowx, colx, cell_attr = local_unpack('<HH3s', data[0:7])
+ strg = unpack_string(data, 7, bk.encoding or bk.derive_encoding(), lenlen=1)
+ self_put_cell(rowx, colx, XL_CELL_TEXT, strg, self.fixed_BIFF2_xfindex(cell_attr, rowx, colx))
+ elif rc == XL_BOOLERR_B2:
+ rowx, colx, cell_attr, value, is_err = local_unpack('<HH3sBB', data)
+ cellty = (XL_CELL_BOOLEAN, XL_CELL_ERROR)[is_err]
+ # if DEBUG: print "XL_BOOLERR_B2", rowx, colx, cell_attr, value, is_err
+ self.put_cell(rowx, colx, cellty, value, self.fixed_BIFF2_xfindex(cell_attr, rowx, colx))
+ elif rc == XL_BLANK_B2:
+ if not fmt_info: continue
+ rowx, colx, cell_attr = local_unpack('<HH3s', data[:7])
+ self_put_blank_cell(rowx, colx, self.fixed_BIFF2_xfindex(cell_attr, rowx, colx))
+ elif rc == XL_EFONT:
+ bk.handle_efont(data)
+ elif rc == XL_ROW_B2:
+ if not fmt_info: continue
+ rowx, bits1, has_defaults = local_unpack('<H4xH2xB', data[0:11])
+ if not(0 <= rowx < self.utter_max_rows):
+ print >> self.logfile, \
+ "*** NOTE: ROW_B2 record has row index %d; " \
+ "should have 0 <= rowx < %d -- record ignored!" \
+ % (rowx, self.utter_max_rows)
+ continue
+ r = Rowinfo()
+ r.height = bits1 & 0x7fff
+ r.has_default_height = (bits1 >> 15) & 1
+ r.outline_level = 0
+ r.outline_group_starts_ends = 0
+ r.hidden = 0
+ r.height_mismatch = 0
+ r.has_default_xf_index = has_defaults & 1
+ r.additional_space_above = 0
+ r.additional_space_below = 0
+ if not r.has_default_xf_index:
+ r.xf_index = -1
+ elif data_len == 18:
+ # Seems the XF index in the cell_attr is dodgy
+ xfx = local_unpack('<H', data[16:18])[0]
+ r.xf_index = self.fixed_BIFF2_xfindex(cell_attr=None, rowx=rowx, colx=-1, true_xfx=xfx)
+ else:
+ cell_attr = data[13:16]
+ r.xf_index = self.fixed_BIFF2_xfindex(cell_attr, rowx, colx=-1)
+ self.rowinfo_map[rowx] = r
+ if 0 and r.xf_index > -1:
+ fprintf(self.logfile,
+ "**ROW %d %d %d\n",
+ self.number, rowx, r.xf_index)
+ if blah_rows:
+ print >> self.logfile, 'ROW_B2', rowx, bits1, has_defaults
+ r.dump(self.logfile,
+ header="--- sh #%d, rowx=%d ---" % (self.number, rowx))
+ elif rc == XL_COLWIDTH: # BIFF2 only
+ if not fmt_info: continue
+ first_colx, last_colx, width\
+ = local_unpack("<BBH", data[:4])
+ if not(first_colx <= last_colx):
+ print >> self.logfile, \
+ "*** NOTE: COLWIDTH record has first col index %d, last %d; " \
+ "should have first <= last -- record ignored!" \
+ % (first_colx, last_colx)
+ continue
+ for colx in xrange(first_colx, last_colx+1):
+ if self.colinfo_map.has_key(colx):
+ c = self.colinfo_map[colx]
+ else:
+ c = Colinfo()
+ self.colinfo_map[colx] = c
+ c.width = width
+ if blah:
+ fprintf(
+ self.logfile,
+ "COLWIDTH sheet #%d cols %d-%d: wid=%d\n",
+ self.number, first_colx, last_colx, width
+ )
+ elif rc == XL_COLUMNDEFAULT: # BIFF2 only
+ if not fmt_info: continue
+ first_colx, last_colx = local_unpack("<HH", data[:4])
+ #### Warning OOo docs wrong; first_colx <= colx < last_colx
+ if blah:
+ fprintf(
+ self.logfile,
+ "COLUMNDEFAULT sheet #%d cols in range(%d, %d)\n",
+ self.number, first_colx, last_colx
+ )
+ if not(0 <= first_colx < last_colx <= 256):
+ print >> self.logfile, \
+ "*** NOTE: COLUMNDEFAULT record has first col index %d, last %d; " \
+ "should have 0 <= first < last <= 256" \
+ % (first_colx, last_colx)
+ last_colx = min(last_colx, 256)
+ for colx in xrange(first_colx, last_colx):
+ offset = 4 + 3 * (colx - first_colx)
+ cell_attr = data[offset:offset+3]
+ xf_index = self.fixed_BIFF2_xfindex(cell_attr, rowx=-1, colx=colx)
+ if self.colinfo_map.has_key(colx):
+ c = self.colinfo_map[colx]
+ else:
+ c = Colinfo()
+ self.colinfo_map[colx] = c
+ c.xf_index = xf_index
+ else:
+ # if DEBUG: print "SHEET.READ: Unhandled record type %02x %d bytes %r" % (rc, data_len, data)
+ pass
+ if not eof_found:
+ raise XLRDError("Sheet %d (%r) missing EOF record" \
+ % (self.number, self.name))
+ self.tidy_dimensions()
+ bk._position = oldpos
+ return 1
+
+ def fixed_BIFF2_xfindex(self, cell_attr, rowx, colx, true_xfx=None):
+ DEBUG = 0
+ blah = DEBUG or self.verbosity >= 2
+ if self.biff_version == 21:
+ if self._xf_index_to_xl_type_map:
+ if true_xfx is not None:
+ xfx = true_xfx
+ else:
+ xfx = ord(cell_attr[0]) & 0x3F
+ if xfx == 0x3F:
+ if self._ixfe is None:
+ raise XLRDError("BIFF2 cell record has XF index 63 but no preceding IXFE record.")
+ xfx = self._ixfe
+ # OOo docs are capable of interpretation that each
+ # cell record is preceded immediately by its own IXFE record.
+ # Empirical evidence is that (sensibly) an IXFE record applies to all
+ # following cell records until another IXFE comes along.
+ return xfx
+ # Have either Excel 2.0, or broken 2.1 w/o XF records -- same effect.
+ self.biff_version = self.book.biff_version = 20
+ #### check that XF slot in cell_attr is zero
+ xfx_slot = ord(cell_attr[0]) & 0x3F
+ assert xfx_slot == 0
+ xfx = self._cell_attr_to_xfx.get(cell_attr)
+ if xfx is not None:
+ return xfx
+ if blah:
+ fprintf(self.logfile, "New cell_attr %r at (%r, %r)\n", cell_attr, rowx, colx)
+ book = self.book
+ xf = self.fake_XF_from_BIFF20_cell_attr(cell_attr)
+ xfx = len(book.xf_list)
+ xf.xf_index = xfx
+ book.xf_list.append(xf)
+ if blah:
+ xf.dump(self.logfile, header="=== Faked XF %d ===" % xfx, footer="======")
+ if not book.format_map.has_key(xf.format_key):
+ msg = "ERROR *** XF[%d] unknown format key (%d, 0x%04x)\n"
+ fprintf(self.logfile, msg,
+ xf.xf_index, xf.format_key, xf.format_key)
+ fmt = Format(xf.format_key, FUN, u"General")
+ book.format_map[xf.format_key] = fmt
+ while len(book.format_list) <= xf.format_key:
+ book.format_list.append(fmt)
+ cellty_from_fmtty = {
+ FNU: XL_CELL_NUMBER,
+ FUN: XL_CELL_NUMBER,
+ FGE: XL_CELL_NUMBER,
+ FDT: XL_CELL_DATE,
+ FTX: XL_CELL_NUMBER, # Yes, a number can be formatted as text.
+ }
+ fmt = book.format_map[xf.format_key]
+ cellty = cellty_from_fmtty[fmt.type]
+ self._xf_index_to_xl_type_map[xf.xf_index] = cellty
+ self._cell_attr_to_xfx[cell_attr] = xfx
+ return xfx
+
+ def fake_XF_from_BIFF20_cell_attr(self, cell_attr):
+ from formatting import XF, XFAlignment, XFBorder, XFBackground, XFProtection
+ xf = XF()
+ xf.alignment = XFAlignment()
+ xf.alignment.indent_level = 0
+ xf.alignment.shrink_to_fit = 0
+ xf.alignment.text_direction = 0
+ xf.border = XFBorder()
+ xf.border.diag_up = 0
+ xf.border.diag_down = 0
+ xf.border.diag_colour_index = 0
+ xf.border.diag_line_style = 0 # no line
+ xf.background = XFBackground()
+ xf.protection = XFProtection()
+ (prot_bits, font_and_format, halign_etc) = unpack('<BBB', cell_attr)
+ xf.format_key = font_and_format & 0x3F
+ xf.font_index = (font_and_format & 0xC0) >> 6
+ upkbits(xf.protection, prot_bits, (
+ (6, 0x40, 'cell_locked'),
+ (7, 0x80, 'formula_hidden'),
+ ))
+ xf.alignment.hor_align = halign_etc & 0x07
+ for mask, side in ((0x08, 'left'), (0x10, 'right'), (0x20, 'top'), (0x40, 'bottom')):
+ if halign_etc & mask:
+ colour_index, line_style = 8, 1 # black, thin
+ else:
+ colour_index, line_style = 0, 0 # none, none
+ setattr(xf.border, side + '_colour_index', colour_index)
+ setattr(xf.border, side + '_line_style', line_style)
+ bg = xf.background
+ if halign_etc & 0x80:
+ bg.fill_pattern = 17
+ else:
+ bg.fill_pattern = 0
+ bg.background_colour_index = 9 # white
+ bg.pattern_colour_index = 8 # black
+ xf.parent_style_index = 0 # ???????????
+ xf.alignment.vert_align = 2 # bottom
+ xf.alignment.rotation = 0
+ for attr_stem in \
+ "format font alignment border background protection".split():
+ attr = "_" + attr_stem + "_flag"
+ setattr(xf, attr, 1)
+ return xf
+
+ def req_fmt_info(self):
+ if not self.formatting_info:
+ raise XLRDError("Feature requires open_workbook(..., formatting_info=True)")
+
+ ##
+ # Determine column display width.
+ # <br /> -- New in version 0.6.1
+ # <br />
+ # @param colx Index of the queried column, range 0 to 255.
+ # Note that it is possible to find out the width that will be used to display
+ # columns with no cell information e.g. column IV (colx=255).
+ # @return The column width that will be used for displaying
+ # the given column by Excel, in units of 1/256th of the width of a
+ # standard character (the digit zero in the first font).
+
+ def computed_column_width(self, colx):
+ self.req_fmt_info()
+ if self.biff_version >= 80:
+ colinfo = self.colinfo_map.get(colx, None)
+ if colinfo is not None:
+ return colinfo.width
+ if self.standardwidth is not None:
+ return self.standardwidth
+ elif self.biff_version >= 40:
+ if self.gcw[colx]:
+ if self.standardwidth is not None:
+ return self.standardwidth
+ else:
+ colinfo = self.colinfo_map.get(colx, None)
+ if colinfo is not None:
+ return colinfo.width
+ elif self.biff_version == 30:
+ colinfo = self.colinfo_map.get(colx, None)
+ if colinfo is not None:
+ return colinfo.width
+ # All roads lead to Rome and the DEFCOLWIDTH ...
+ if self.defcolwidth is not None:
+ return self.defcolwidth * 256
+ return 8 * 256 # 8 is what Excel puts in a DEFCOLWIDTH record
+
+ def handle_msodrawingetc(self, recid, data_len, data):
+ if not OBJ_MSO_DEBUG:
+ return
+ DEBUG = 1
+ if self.biff_version < 80:
+ return
+ o = MSODrawing()
+ pos = 0
+ while pos < data_len:
+ tmp, fbt, cb = unpack('<HHI', data[pos:pos+8])
+ ver = tmp & 0xF
+ inst = (tmp >> 4) & 0xFFF
+ if ver == 0xF:
+ ndb = 0 # container
+ else:
+ ndb = cb
+ if DEBUG:
+ hex_char_dump(data, pos, ndb + 8, base=0, fout=self.logfile)
+ fprintf(self.logfile,
+ "fbt:0x%04X inst:%d ver:0x%X cb:%d (0x%04X)\n",
+ fbt, inst, ver, cb, cb)
+ if fbt == 0xF010: # Client Anchor
+ assert ndb == 18
+ (o.anchor_unk,
+ o.anchor_colx_lo, o.anchor_rowx_lo,
+ o.anchor_colx_hi, o.anchor_rowx_hi) = unpack('<Hiiii', data[pos+8:pos+8+ndb])
+ elif fbt == 0xF011: # Client Data
+ # must be followed by an OBJ record
+ assert cb == 0
+ assert pos + 8 == data_len
+ else:
+ pass
+ pos += ndb + 8
+ else:
+ # didn't break out of while loop
+ assert pos == data_len
+ if DEBUG:
+ o.dump(self.logfile, header="=== MSODrawing ===", footer= " ")
+
+
+ def handle_obj(self, data):
+ if not OBJ_MSO_DEBUG:
+ return
+ DEBUG = 1
+ if self.biff_version < 80:
+ return
+ o = MSObj()
+ data_len = len(data)
+ pos = 0
+ if DEBUG:
+ fprintf(self.logfile, "... OBJ record ...\n")
+ while pos < data_len:
+ ft, cb = unpack('<HH', data[pos:pos+4])
+ if DEBUG:
+ hex_char_dump(data, pos, cb, base=0, fout=self.logfile)
+ if ft == 0x15: # ftCmo ... s/b first
+ assert pos == 0
+ o.type, o.id, option_flags = unpack('<HHH', data[pos+4:pos+10])
+ upkbits(o, option_flags, (
+ ( 0, 0x0001, 'locked'),
+ ( 4, 0x0010, 'printable'),
+ ( 8, 0x0100, 'autofilter'), # not documented in Excel 97 dev kit
+ ( 9, 0x0200, 'scrollbar_flag'), # not documented in Excel 97 dev kit
+ (13, 0x2000, 'autofill'),
+ (14, 0x4000, 'autoline'),
+ ))
+ elif ft == 0x00:
+ assert cb == 0
+ assert pos + 4 == data_len
+ elif ft == 0x0C: # Scrollbar
+ values = unpack('<5H', data[pos+8:pos+18])
+ for value, tag in zip(values, ('value', 'min', 'max', 'inc', 'page')):
+ setattr(o, 'scrollbar_' + tag, value)
+ elif ft == 0x0D: # "Notes structure" [used for cell comments]
+ pass ############## not documented in Excel 97 dev kit
+ elif ft == 0x13: # list box data
+ if o.autofilter: # non standard exit. NOT documented
+ break
+ else:
+ pass
+ pos += cb + 4
+ else:
+ # didn't break out of while loop
+ assert pos == data_len
+ if DEBUG:
+ o.dump(self.logfile, header="=== MSOBj ===", footer= " ")
+
+ def handle_note(self, data):
+ if not OBJ_MSO_DEBUG:
+ return
+ DEBUG = 1
+ if self.biff_version < 80:
+ return
+ if DEBUG:
+ fprintf(self.logfile, '... NOTE record ...\n')
+ hex_char_dump(data, 0, len(data), base=0, fout=self.logfile)
+ o = MSNote()
+ data_len = len(data)
+ o.rowx, o.colx, option_flags, o.object_id = unpack('<4H', data[:8])
+ o.show = (option_flags >> 1) & 1
+ # Docs say NULL [sic] bytes padding between string count and string data
+ # to ensure that string is word-aligned. Appears to be nonsense.
+ # There also seems to be a random(?) byte after the string (not counted in the
+ # string length.
+ o.original_author, endpos = unpack_unicode_update_pos(data, 8, lenlen=2)
+ assert endpos == data_len - 1
+ o.last_byte = data[-1]
+ if DEBUG:
+ o.dump(self.logfile, header="=== MSNote ===", footer= " ")
+
+ def handle_txo(self, data):
+ if not OBJ_MSO_DEBUG:
+ return
+ DEBUG = 1
+ if self.biff_version < 80:
+ return
+ o = MSTxo()
+ data_len = len(data)
+ option_flags, o.rot, cchText, cbRuns = unpack('<HH6xHH4x', data)
+ upkbits(o, option_flags, (
+ (3, 0x000E, 'horz_align'),
+ (6, 0x0070, 'vert_align'),
+ (9, 0x0200, 'lock_text'),
+ ))
+ rc2, data2_len, data2 = self.book.get_record_parts()
+ assert rc2 == XL_CONTINUE
+ o.text, endpos = unpack_unicode_update_pos(data2, 0, known_len=cchText)
+ assert endpos == data2_len
+ rc3, data3_len, data3 = self.book.get_record_parts()
+ assert rc3 == XL_CONTINUE
+ # ignore the formatting runs for the moment
+ if DEBUG:
+ o.dump(self.logfile, header="=== MSTxo ===", footer= " ")
+
+ def handle_feat11(self, data):
+ if not OBJ_MSO_DEBUG:
+ return
+ # rt: Record type; this matches the BIFF rt in the first two bytes of the record; =0872h
+ # grbitFrt: FRT cell reference flag (see table below for details)
+ # Ref0: Range reference to a worksheet cell region if grbitFrt=1 (bitFrtRef). Otherwise blank.
+ # isf: Shared feature type index =5 for Table
+ # fHdr: =0 since this is for feat not feat header
+ # reserved0: Reserved for future use =0 for Table
+ # cref: Count of ref ranges this feature is on
+ # cbFeatData: Count of byte for the current feature data.
+ # reserved1: =0 currently not used
+ # Ref1: Repeat of Ref0. UNDOCUMENTED
+ rt, grbitFrt, Ref0, isf, fHdr, reserved0, cref, cbFeatData, reserved1, Ref1 = unpack('<HH8sHBiHiH8s', data[0:35])
+ assert reserved0 == 0
+ assert reserved1 == 0
+ assert isf == 5
+ assert rt == 0x872
+ assert fHdr == 0
+ assert Ref1 == Ref0
+ print "FEAT11: grbitFrt=%d Ref0=%r cref=%d cbFeatData=%d" % (grbitFrt, Ref0, cref, cbFeatData)
+ # lt: Table data source type:
+ # =0 for Excel Worksheet Table =1 for read-write SharePoint linked List
+ # =2 for XML mapper Table =3 for Query Table
+ # idList: The ID of the Table (unique per worksheet)
+ # crwHeader: How many header/title rows the Table has at the top
+ # crwTotals: How many total rows the Table has at the bottom
+ # idFieldNext: Next id to try when assigning a unique id to a new field
+ # cbFSData: The size of the Fixed Data portion of the Table data structure.
+ # rupBuild: the rupBuild that generated the record
+ # unusedShort: UNUSED short that can be used later. The value is reserved during round-tripping.
+ # listFlags: Collection of bit flags: (see listFlags' bit setting table below for detail.)
+ # lPosStmCache: Table data stream position of cached data
+ # cbStmCache: Count of bytes of cached data
+ # cchStmCache: Count of characters of uncompressed cached data in the stream
+ # lem: Table edit mode (see List (Table) Editing Mode (lem) setting table below for details.)
+ # rgbHashParam: Hash value for SharePoint Table
+ # cchName: Count of characters in the Table name string rgbName
+ (lt, idList, crwHeader, crwTotals, idFieldNext, cbFSData,
+ rupBuild, unusedShort, listFlags, lPosStmCache, cbStmCache,
+ cchStmCache, lem, rgbHashParam, cchName) = unpack('<iiiiiiHHiiiii16sH', data[35:35+66])
+ print "lt=%d idList=%d crwHeader=%d crwTotals=%d idFieldNext=%d cbFSData=%d\n"\
+ "rupBuild=%d unusedShort=%d listFlags=%04X lPosStmCache=%d cbStmCache=%d\n"\
+ "cchStmCache=%d lem=%d rgbHashParam=%r cchName=%d" % (
+ lt, idList, crwHeader, crwTotals, idFieldNext, cbFSData,
+ rupBuild, unusedShort,listFlags, lPosStmCache, cbStmCache,
+ cchStmCache, lem, rgbHashParam, cchName)
+
+class MSODrawing(BaseObject):
+ pass
+
+class MSObj(BaseObject):
+ pass
+
+class MSTxo(BaseObject):
+ pass
+
+class MSNote(BaseObject):
+ pass
+
+# === helpers ===
+
+def unpack_RK(rk_str):
+ flags = ord(rk_str[0])
+ if flags & 2:
+ # There's a SIGNED 30-bit integer in there!
+ i, = unpack('<i', rk_str)
+ i >>= 2 # div by 4 to drop the 2 flag bits
+ if flags & 1:
+ return i / 100.0
+ return float(i)
+ else:
+ # It's the most significant 30 bits of an IEEE 754 64-bit FP number
+ d, = unpack('<d', '\0\0\0\0' + chr(flags & 252) + rk_str[1:4])
+ if flags & 1:
+ return d / 100.0
+ return d
+
+##### =============== Cell ======================================== #####
+
+cellty_from_fmtty = {
+ FNU: XL_CELL_NUMBER,
+ FUN: XL_CELL_NUMBER,
+ FGE: XL_CELL_NUMBER,
+ FDT: XL_CELL_DATE,
+ FTX: XL_CELL_NUMBER, # Yes, a number can be formatted as text.
+ }
+
+ctype_text = {
+ XL_CELL_EMPTY: 'empty',
+ XL_CELL_TEXT: 'text',
+ XL_CELL_NUMBER: 'number',
+ XL_CELL_DATE: 'xldate',
+ XL_CELL_BOOLEAN: 'bool',
+ XL_CELL_ERROR: 'error',
+ XL_CELL_BLANK: 'blank',
+ }
+
+##
+# <p>Contains the data for one cell.</p>
+#
+# <p>WARNING: You don't call this class yourself. You access Cell objects
+# via methods of the Sheet object(s) that you found in the Book object that
+# was returned when you called xlrd.open_workbook("myfile.xls").</p>
+# <p> Cell objects have three attributes: <i>ctype</i> is an int, <i>value</i>
+# (which depends on <i>ctype</i>) and <i>xf_index</i>.
+# If "formatting_info" is not enabled when the workbook is opened, xf_index will be None.
+# The following table describes the types of cells and how their values
+# are represented in Python.</p>
+#
+# <table border="1" cellpadding="7">
+# <tr>
+# <th>Type symbol</th>
+# <th>Type number</th>
+# <th>Python value</th>
+# </tr>
+# <tr>
+# <td>XL_CELL_EMPTY</td>
+# <td align="center">0</td>
+# <td>empty string u''</td>
+# </tr>
+# <tr>
+# <td>XL_CELL_TEXT</td>
+# <td align="center">1</td>
+# <td>a Unicode string</td>
+# </tr>
+# <tr>
+# <td>XL_CELL_NUMBER</td>
+# <td align="center">2</td>
+# <td>float</td>
+# </tr>
+# <tr>
+# <td>XL_CELL_DATE</td>
+# <td align="center">3</td>
+# <td>float</td>
+# </tr>
+# <tr>
+# <td>XL_CELL_BOOLEAN</td>
+# <td align="center">4</td>
+# <td>int; 1 means TRUE, 0 means FALSE</td>
+# </tr>
+# <tr>
+# <td>XL_CELL_ERROR</td>
+# <td align="center">5</td>
+# <td>int representing internal Excel codes; for a text representation,
+# refer to the supplied dictionary error_text_from_code</td>
+# </tr>
+# <tr>
+# <td>XL_CELL_BLANK</td>
+# <td align="center">6</td>
+# <td>empty string u''. Note: this type will appear only when
+# open_workbook(..., formatting_info=True) is used.</td>
+# </tr>
+# </table>
+#<p></p>
+
+class Cell(BaseObject):
+
+ __slots__ = ['ctype', 'value', 'xf_index']
+
+ def __init__(self, ctype, value, xf_index=None):
+ self.ctype = ctype
+ self.value = value
+ self.xf_index = xf_index
+
+ def __repr__(self):
+ if self.xf_index is None:
+ return "%s:%r" % (ctype_text[self.ctype], self.value)
+ else:
+ return "%s:%r (XF:%r)" % (ctype_text[self.ctype], self.value, self.xf_index)
+
+##
+# There is one and only one instance of an empty cell -- it's a singleton. This is it.
+# You may use a test like "acell is empty_cell".
+empty_cell = Cell(XL_CELL_EMPTY, '')
+
+##### =============== Colinfo and Rowinfo ============================== #####
+
+##
+# Width and default formatting information that applies to one or
+# more columns in a sheet. Derived from COLINFO records.
+#
+# <p> Here is the default hierarchy for width, according to the OOo docs:
+#
+# <br />"""In BIFF3, if a COLINFO record is missing for a column,
+# the width specified in the record DEFCOLWIDTH is used instead.
+#
+# <br />In BIFF4-BIFF7, the width set in this [COLINFO] record is only used,
+# if the corresponding bit for this column is cleared in the GCW
+# record, otherwise the column width set in the DEFCOLWIDTH record
+# is used (the STANDARDWIDTH record is always ignored in this case [see footnote!]).
+#
+# <br />In BIFF8, if a COLINFO record is missing for a column,
+# the width specified in the record STANDARDWIDTH is used.
+# If this [STANDARDWIDTH] record is also missing,
+# the column width of the record DEFCOLWIDTH is used instead."""
+# <br />
+#
+# Footnote: The docs on the GCW record say this:
+# """<br />
+# If a bit is set, the corresponding column uses the width set in the STANDARDWIDTH
+# record. If a bit is cleared, the corresponding column uses the width set in the
+# COLINFO record for this column.
+# <br />If a bit is set, and the worksheet does not contain the STANDARDWIDTH record, or if
+# the bit is cleared, and the worksheet does not contain the COLINFO record, the DEFCOLWIDTH
+# record of the worksheet will be used instead.
+# <br />"""<br />
+# At the moment (2007-01-17) xlrd is going with the GCW version of the story.
+# Reference to the source may be useful: see the computed_column_width(colx) method
+# of the Sheet class.
+# <br />-- New in version 0.6.1
+# </p>
+
+class Colinfo(BaseObject):
+ ##
+ # Width of the column in 1/256 of the width of the zero character,
+ # using default font (first FONT record in the file).
+ width = 0
+ ##
+ # XF index to be used for formatting empty cells.
+ xf_index = -1
+ ##
+ # 1 = column is hidden
+ hidden = 0
+ ##
+ # Value of a 1-bit flag whose purpose is unknown
+ # but is often seen set to 1
+ bit1_flag = 0
+ ##
+ # Outline level of the column, in range(7).
+ # (0 = no outline)
+ outline_level = 0
+ ##
+ # 1 = column is collapsed
+ collapsed = 0
+
+##
+# Height and default formatting information that applies to a row in a sheet.
+# Derived from ROW records.
+# <br /> -- New in version 0.6.1
+
+class Rowinfo(BaseObject):
+ ##
+ # Height of the row, in twips. One twip == 1/20 of a point
+ height = 0
+ ##
+ # 0 = Row has custom height; 1 = Row has default height
+ has_default_height = 0
+ ##
+ # Outline level of the row
+ outline_level = 0
+ ##
+ # 1 = Outline group starts or ends here (depending on where the
+ # outline buttons are located, see WSBOOL record [TODO ??]),
+ # <i>and</i> is collapsed
+ outline_group_starts_ends = 0
+ ##
+ # 1 = Row is hidden (manually, or by a filter or outline group)
+ hidden = 0
+ ##
+ # 1 = Row height and default font height do not match
+ height_mismatch = 0
+ ##
+ # 1 = the xf_index attribute is usable; 0 = ignore it
+ has_default_xf_index = 0
+ ##
+ # Index to default XF record for empty cells in this row.
+ # Don't use this if has_default_xf_index == 0.
+ xf_index = -9999
+ ##
+ # This flag is set, if the upper border of at least one cell in this row
+ # or if the lower border of at least one cell in the row above is
+ # formatted with a thick line style. Thin and medium line styles are not
+ # taken into account.
+ additional_space_above = 0
+ ##
+ # This flag is set, if the lower border of at least one cell in this row
+ # or if the upper border of at least one cell in the row below is
+ # formatted with a medium or thick line style. Thin line styles are not
+ # taken into account.
+ additional_space_below = 0
diff --git a/tablib/packages/xlrd/timemachine.py b/tablib/packages/xlrd/timemachine.py
new file mode 100644
index 0000000..1718c03
--- /dev/null
+++ b/tablib/packages/xlrd/timemachine.py
@@ -0,0 +1,44 @@
+# -*- coding: cp1252 -*-
+
+##
+# <p>Copyright © 2006-2008 Stephen John Machin, Lingfo Pty Ltd</p>
+# <p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
+##
+
+# timemachine.py -- adaptation for earlier Pythons e.g. 2.1
+# usage: from timemachine import *
+
+# 2008-02-08 SJM Generalised method of detecting IronPython
+
+import sys
+
+python_version = sys.version_info[:2] # e.g. version 2.4 -> (2, 4)
+
+CAN_PICKLE_ARRAY = python_version >= (2, 5)
+CAN_SUBCLASS_BUILTIN = python_version >= (2, 2)
+
+if sys.version.find("IronPython") >= 0:
+ array_array = None
+else:
+ from array import array as array_array
+
+if python_version < (2, 2):
+ class object:
+ pass
+ False = 0
+ True = 1
+
+def int_floor_div(x, y):
+ return divmod(x, y)[0]
+
+def intbool(x):
+ if x:
+ return 1
+ return 0
+
+if python_version < (2, 3):
+ def sum(sequence, start=0):
+ tot = start
+ for item in aseq:
+ tot += item
+ return tot
diff --git a/tablib/packages/xlrd/xldate.py b/tablib/packages/xlrd/xldate.py
new file mode 100644
index 0000000..e5f7591
--- /dev/null
+++ b/tablib/packages/xlrd/xldate.py
@@ -0,0 +1,171 @@
+# -*- coding: cp1252 -*-
+
+# No part of the content of this file was derived from the works of David Giffin.
+
+##
+# <p>Copyright © 2005-2008 Stephen John Machin, Lingfo Pty Ltd</p>
+# <p>This module is part of the xlrd package, which is released under a BSD-style licence.</p>
+#
+# <p>Provides function(s) for dealing with Microsoft Excel ™ dates.</p>
+##
+
+# 2008-10-18 SJM Fix bug in xldate_from_date_tuple (affected some years after 2099)
+
+# The conversion from days to (year, month, day) starts with
+# an integral "julian day number" aka JDN.
+# FWIW, JDN 0 corresponds to noon on Monday November 24 in Gregorian year -4713.
+# More importantly:
+# Noon on Gregorian 1900-03-01 (day 61 in the 1900-based system) is JDN 2415080.0
+# Noon on Gregorian 1904-01-02 (day 1 in the 1904-based system) is JDN 2416482.0
+
+from timemachine import int_floor_div as ifd
+
+_JDN_delta = (2415080 - 61, 2416482 - 1)
+assert _JDN_delta[1] - _JDN_delta[0] == 1462
+
+class XLDateError(ValueError): pass
+
+class XLDateNegative(XLDateError): pass
+class XLDateAmbiguous(XLDateError): pass
+class XLDateTooLarge(XLDateError): pass
+class XLDateBadDatemode(XLDateError): pass
+class XLDateBadTuple(XLDateError): pass
+
+_XLDAYS_TOO_LARGE = (2958466, 2958466 - 1462) # This is equivalent to 10000-01-01
+
+##
+# Convert an Excel number (presumed to represent a date, a datetime or a time) into
+# a tuple suitable for feeding to datetime or mx.DateTime constructors.
+# @param xldate The Excel number
+# @param datemode 0: 1900-based, 1: 1904-based.
+# <br>WARNING: when using this function to
+# interpret the contents of a workbook, you should pass in the Book.datemode
+# attribute of that workbook. Whether
+# the workbook has ever been anywhere near a Macintosh is irrelevant.
+# @return Gregorian (year, month, day, hour, minute, nearest_second).
+# <br>Special case: if 0.0 <= xldate < 1.0, it is assumed to represent a time;
+# (0, 0, 0, hour, minute, second) will be returned.
+# <br>Note: 1904-01-01 is not regarded as a valid date in the datemode 1 system; its "serial number"
+# is zero.
+# @throws XLDateNegative xldate < 0.00
+# @throws XLDateAmbiguous The 1900 leap-year problem (datemode == 0 and 1.0 <= xldate < 61.0)
+# @throws XLDateTooLarge Gregorian year 10000 or later
+# @throws XLDateBadDatemode datemode arg is neither 0 nor 1
+# @throws XLDateError Covers the 4 specific errors
+
+def xldate_as_tuple(xldate, datemode):
+ if datemode not in (0, 1):
+ raise XLDateBadDatemode(datemode)
+ if xldate == 0.00:
+ return (0, 0, 0, 0, 0, 0)
+ if xldate < 0.00:
+ raise XLDateNegative(xldate)
+ xldays = int(xldate)
+ frac = xldate - xldays
+ seconds = int(round(frac * 86400.0))
+ assert 0 <= seconds <= 86400
+ if seconds == 86400:
+ hour = minute = second = 0
+ xldays += 1
+ else:
+ # second = seconds % 60; minutes = seconds // 60
+ minutes, second = divmod(seconds, 60)
+ # minute = minutes % 60; hour = minutes // 60
+ hour, minute = divmod(minutes, 60)
+ if xldays >= _XLDAYS_TOO_LARGE[datemode]:
+ raise XLDateTooLarge(xldate)
+
+ if xldays == 0:
+ return (0, 0, 0, hour, minute, second)
+
+ if xldays < 61 and datemode == 0:
+ raise XLDateAmbiguous(xldate)
+
+ jdn = xldays + _JDN_delta[datemode]
+ yreg = (ifd(ifd(jdn * 4 + 274277, 146097) * 3, 4) + jdn + 1363) * 4 + 3
+ mp = ifd(yreg % 1461, 4) * 535 + 333
+ d = ifd(mp % 16384, 535) + 1
+ # mp /= 16384
+ mp >>= 14
+ if mp >= 10:
+ return (ifd(yreg, 1461) - 4715, mp - 9, d, hour, minute, second)
+ else:
+ return (ifd(yreg, 1461) - 4716, mp + 3, d, hour, minute, second)
+
+# === conversions from date/time to xl numbers
+
+def _leap(y):
+ if y % 4: return 0
+ if y % 100: return 1
+ if y % 400: return 0
+ return 1
+
+_days_in_month = (None, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
+
+##
+# Convert a date tuple (year, month, day) to an Excel date.
+# @param year Gregorian year.
+# @param month 1 <= month <= 12
+# @param day 1 <= day <= last day of that (year, month)
+# @param datemode 0: 1900-based, 1: 1904-based.
+# @throws XLDateAmbiguous The 1900 leap-year problem (datemode == 0 and 1.0 <= xldate < 61.0)
+# @throws XLDateBadDatemode datemode arg is neither 0 nor 1
+# @throws XLDateBadTuple (year, month, day) is too early/late or has invalid component(s)
+# @throws XLDateError Covers the specific errors
+
+def xldate_from_date_tuple((year, month, day), datemode):
+
+ if datemode not in (0, 1):
+ raise XLDateBadDatemode(datemode)
+
+ if year == 0 and month == 0 and day == 0:
+ return 0.00
+
+ if not (1900 <= year <= 9999):
+ raise XLDateBadTuple("Invalid year: %r" % ((year, month, day),))
+ if not (1 <= month <= 12):
+ raise XLDateBadTuple("Invalid month: %r" % ((year, month, day),))
+ if day < 1 \
+ or (day > _days_in_month[month] and not(day == 29 and month == 2 and _leap(year))):
+ raise XLDateBadTuple("Invalid day: %r" % ((year, month, day),))
+
+ Yp = year + 4716
+ M = month
+ if M <= 2:
+ Yp = Yp - 1
+ Mp = M + 9
+ else:
+ Mp = M - 3
+ jdn = ifd(1461 * Yp, 4) + ifd(979 * Mp + 16, 32) + \
+ day - 1364 - ifd(ifd(Yp + 184, 100) * 3, 4)
+ xldays = jdn - _JDN_delta[datemode]
+ if xldays <= 0:
+ raise XLDateBadTuple("Invalid (year, month, day): %r" % ((year, month, day),))
+ if xldays < 61 and datemode == 0:
+ raise XLDateAmbiguous("Before 1900-03-01: %r" % ((year, month, day),))
+ return float(xldays)
+
+##
+# Convert a time tuple (hour, minute, second) to an Excel "date" value (fraction of a day).
+# @param hour 0 <= hour < 24
+# @param minute 0 <= minute < 60
+# @param second 0 <= second < 60
+# @throws XLDateBadTuple Out-of-range hour, minute, or second
+
+def xldate_from_time_tuple((hour, minute, second)):
+ if 0 <= hour < 24 and 0 <= minute < 60 and 0 <= second < 60:
+ return ((second / 60.0 + minute) / 60.0 + hour) / 24.0
+ raise XLDateBadTuple("Invalid (hour, minute, second): %r" % ((hour, minute, second),))
+
+##
+# Convert a datetime tuple (year, month, day, hour, minute, second) to an Excel date value.
+# For more details, refer to other xldate_from_*_tuple functions.
+# @param datetime_tuple (year, month, day, hour, minute, second)
+# @param datemode 0: 1900-based, 1: 1904-based.
+
+def xldate_from_datetime_tuple(datetime_tuple, datemode):
+ return (
+ xldate_from_date_tuple(datetime_tuple[:3], datemode)
+ +
+ xldate_from_time_tuple(datetime_tuple[3:])
+ )