diff options
| author | Kenneth Reitz <me@kennethreitz.org> | 2016-02-07 07:00:55 -0500 |
|---|---|---|
| committer | Kenneth Reitz <me@kennethreitz.org> | 2016-02-07 07:00:55 -0500 |
| commit | 8debeb26aca7cc9ef427c70ee9d577eb169f9560 (patch) | |
| tree | 58316fe5669e86cbea9a1044e4978ffd3b4cc308 /tablib | |
| parent | 66d9e50984e7c66fd36fd86f2b469c2dae3636ba (diff) | |
| parent | a774789252d41522d4ec8b0e2c212aff4a33904d (diff) | |
| download | tablib-8debeb26aca7cc9ef427c70ee9d577eb169f9560.tar.gz | |
Merge branch 'develop' into import_export
# Conflicts:
# tablib/core.py
# tablib/formats/_csv.py
# tablib/formats/_xlsx.py
Diffstat (limited to 'tablib')
25 files changed, 3813 insertions, 137 deletions
diff --git a/tablib/compat.py b/tablib/compat.py index 919f464..e03526d 100644 --- a/tablib/compat.py +++ b/tablib/compat.py @@ -28,6 +28,7 @@ if is_py3: from tablib.packages import markup3 as markup from tablib.packages import openpyxl3 as openpyxl from tablib.packages.odf3 import opendocument, style, text, table + import tablib.packages.dbfpy3 as dbfpy import csv from io import StringIO @@ -36,6 +37,7 @@ if is_py3: unicode = str bytes = bytes basestring = str + xrange = range else: from cStringIO import StringIO as BytesIO @@ -49,5 +51,7 @@ else: from tablib.packages.odf import opendocument, style, text, table from tablib.packages import unicodecsv as csv + import tablib.packages.dbfpy as dbfpy unicode = unicode + xrange = xrange diff --git a/tablib/core.py b/tablib/core.py index b1de323..cf60967 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -45,7 +45,7 @@ class Row(object): return repr(self._row) def __getslice__(self, i, j): - return self._row[i,j] + return self._row[i:j] def __getitem__(self, i): return self._row[i] @@ -165,15 +165,9 @@ class Dataset(object): # (column, callback) tuples self._formatters = [] - try: - self.headers = kwargs['headers'] - except KeyError: - self.headers = None + self.headers = kwargs.get('headers') - try: - self.title = kwargs['title'] - except KeyError: - self.title = None + self.title = kwargs.get('title') self._register_formats() @@ -260,6 +254,7 @@ class Dataset(object): except AttributeError: setattr(cls, fmt.title, property(fmt.export_set)) cls._formats[fmt.title] = (fmt.export_set, None) + setattr(cls, 'get_%s' % fmt.title, fmt.export_set) except AttributeError: cls._formats[fmt.title] = (None, None) @@ -353,7 +348,7 @@ class Dataset(object): A dataset object can also be imported by setting the `Dataset.dict` attribute: :: data = tablib.Dataset() - data.json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]' + data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}] """ return self._package() @@ -570,7 +565,7 @@ class Dataset(object): A dataset object can also be imported by setting the :class:`Dataset.json` attribute: :: data = tablib.Dataset() - data.json = '[{age: 90, first_name: "John", liast_name: "Adams"}]' + data.json = '[{"age": 90, "first_name": "John", "last_name": "Adams"}]' Import assumes (for now) that headers exist. """ @@ -585,6 +580,40 @@ class Dataset(object): """ pass + @property + def dbf(): + """A dBASE representation of the :class:`Dataset` object. + + A dataset object can also be imported by setting the + :class:`Dataset.dbf` attribute. :: + + # To import data from an existing DBF file: + data = tablib.Dataset() + data.dbf = open('existing_table.dbf').read() + + # to import data from an ASCII-encoded bytestring: + data = tablib.Dataset() + data.dbf = '<bytestring of tabular data>' + + .. admonition:: Binary Warning + + :class:`Dataset.dbf` contains binary data, so make sure to write in binary mode:: + + with open('output.dbf', 'wb') as f: + f.write(data.dbf) + """ + pass + + + @property + def latex(): + """A LaTeX booktabs representation of the :class:`Dataset` object. If a + title has been set, it will be exported as the table caption. + + .. note:: This method can be used for export only. + """ + pass + # ---- # Rows @@ -936,12 +965,59 @@ class Dataset(object): return _dset + def remove_duplicates(self): + """Removes all duplicate rows from the :class:`Dataset` object + while maintaining the original order.""" + seen = set() + self._data[:] = [row for row in self._data if not (tuple(row) in seen or seen.add(tuple(row)))] + + def wipe(self): """Removes all content and headers from the :class:`Dataset` object.""" self._data = list() self.__headers = None + def subset(self, rows=None, cols=None): + """Returns a new instance of the :class:`Dataset`, + including only specified rows and columns. + """ + + # Don't return if no data + if not self: + return + + if rows is None: + rows = list(range(self.height)) + + if cols is None: + cols = list(self.headers) + + #filter out impossible rows and columns + rows = [row for row in rows if row in range(self.height)] + cols = [header for header in cols if header in self.headers] + + _dset = Dataset() + + #filtering rows and columns + _dset.headers = list(cols) + + _dset._data = [] + for row_no, row in enumerate(self._data): + data_row = [] + for key in _dset.headers: + if key in self.headers: + pos = self.headers.index(key) + data_row.append(row[pos]) + else: + raise KeyError + + if row_no in rows: + _dset.append(row=Row(data_row)) + + return _dset + + class Databook(object): """A book of :class:`Dataset` objects. diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py index 5fdf279..5cca19f 100644 --- a/tablib/formats/__init__.py +++ b/tablib/formats/__init__.py @@ -11,5 +11,7 @@ from . import _tsv as tsv from . import _html as html from . import _xlsx as xlsx from . import _ods as ods +from . import _dbf as dbf +from . import _latex as latex -available = (json, xls, yaml, csv, tsv, html, xlsx, ods) +available = (json, xls, yaml, csv, dbf, tsv, html, latex, xlsx, ods) diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py index 7d29318..4c00809 100644 --- a/tablib/formats/_csv.py +++ b/tablib/formats/_csv.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -""" Tablib - CSV Support. +""" Tablib - *SV Support. """ from tablib.compat import is_py3, csv, StringIO @@ -11,13 +11,14 @@ extensions = ('csv',) DEFAULT_ENCODING = 'utf-8' - +DEFAULT_DELIMITER = ',' def export_set(dataset, **kwargs): """Returns CSV representation of Dataset.""" stream = StringIO() + kwargs.setdefault('delimeter', DEFAULT_DELIMITER) if not is_py3: kwargs.setdefault('encoding', DEFAULT_ENCODING) @@ -34,6 +35,7 @@ def import_set(dset, in_stream, headers=True, **kwargs): dset.wipe() + kwargs.setdefault('delimeter', DEFAULT_DELIMITER) if not is_py3: kwargs.setdefault('encoding', DEFAULT_ENCODING) @@ -46,10 +48,10 @@ def import_set(dset, in_stream, headers=True, **kwargs): dset.append(row) -def detect(stream): +def detect(stream, delimiter=DEFAULT_DELIMITER): """Returns True if given stream is valid CSV.""" try: - csv.Sniffer().sniff(stream, delimiters=',') + csv.Sniffer().sniff(stream, delimiters=delimiter) return True except (csv.Error, TypeError): return False diff --git a/tablib/formats/_dbf.py b/tablib/formats/_dbf.py new file mode 100644 index 0000000..41c2ef4 --- /dev/null +++ b/tablib/formats/_dbf.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8 -*- + +""" Tablib - DBF Support. +""" +import tempfile +import struct +import os + +from tablib.compat import StringIO +from tablib.compat import dbfpy +from tablib.compat import is_py3 + +if is_py3: + from tablib.packages.dbfpy3 import dbf + from tablib.packages.dbfpy3 import dbfnew + from tablib.packages.dbfpy3 import record as dbfrecord + import io +else: + from tablib.packages.dbfpy import dbf + from tablib.packages.dbfpy import dbfnew + from tablib.packages.dbfpy import record as dbfrecord + + +title = 'dbf' +extensions = ('csv',) + +DEFAULT_ENCODING = 'utf-8' + +def export_set(dataset): + """Returns DBF representation of a Dataset""" + new_dbf = dbfnew.dbf_new() + temp_file, temp_uri = tempfile.mkstemp() + + # create the appropriate fields based on the contents of the first row + first_row = dataset[0] + for fieldname, field_value in zip(dataset.headers, first_row): + if type(field_value) in [int, float]: + new_dbf.add_field(fieldname, 'N', 10, 8) + else: + new_dbf.add_field(fieldname, 'C', 80) + + new_dbf.write(temp_uri) + + dbf_file = dbf.Dbf(temp_uri, readOnly=0) + for row in dataset: + record = dbfrecord.DbfRecord(dbf_file) + for fieldname, field_value in zip(dataset.headers, row): + record[fieldname] = field_value + record.store() + + dbf_file.close() + dbf_stream = open(temp_uri, 'rb') + if is_py3: + stream = io.BytesIO(dbf_stream.read()) + else: + stream = StringIO(dbf_stream.read()) + dbf_stream.close() + os.remove(temp_uri) + return stream.getvalue() + +def import_set(dset, in_stream, headers=True): + """Returns a dataset from a DBF stream.""" + + dset.wipe() + if is_py3: + _dbf = dbf.Dbf(io.BytesIO(in_stream)) + else: + _dbf = dbf.Dbf(StringIO(in_stream)) + dset.headers = _dbf.fieldNames + for record in range(_dbf.recordCount): + row = [_dbf[record][f] for f in _dbf.fieldNames] + dset.append(row) + +def detect(stream): + """Returns True if the given stream is valid DBF""" + #_dbf = dbf.Table(StringIO(stream)) + try: + if is_py3: + if type(stream) is not bytes: + stream = bytes(stream, 'utf-8') + _dbf = dbf.Dbf(io.BytesIO(stream), readOnly=True) + else: + _dbf = dbf.Dbf(StringIO(stream), readOnly=True) + return True + except (ValueError, struct.error): + # When we try to open up a file that's not a DBF, dbfpy raises a + # ValueError. + # When unpacking a string argument with less than 8 chars, struct.error is + # raised. + return False + + + diff --git a/tablib/formats/_html.py b/tablib/formats/_html.py index 7bb77f0..0b45f14 100644 --- a/tablib/formats/_html.py +++ b/tablib/formats/_html.py @@ -23,45 +23,45 @@ extensions = ('html', ) def export_set(dataset): - """HTML representation of a Dataset.""" + """HTML representation of a Dataset.""" - stream = StringIO() + stream = StringIO() - page = markup.page() - page.table.open() + page = markup.page() + page.table.open() - if dataset.headers is not None: - new_header = [item if item is not None else '' for item in dataset.headers] + if dataset.headers is not None: + new_header = [item if item is not None else '' for item in dataset.headers] - page.thead.open() - headers = markup.oneliner.th(new_header) - page.tr(headers) - page.thead.close() + page.thead.open() + headers = markup.oneliner.th(new_header) + page.tr(headers) + page.thead.close() - for row in dataset: - new_row = [item if item is not None else '' for item in row] + for row in dataset: + new_row = [item if item is not None else '' for item in row] - html_row = markup.oneliner.td(new_row) - page.tr(html_row) + html_row = markup.oneliner.td(new_row) + page.tr(html_row) - page.table.close() + page.table.close() # Allow unicode characters in output - wrapper = codecs.getwriter("utf8")(stream) - wrapper.writelines(unicode(page)) + wrapper = codecs.getwriter("utf8")(stream) + wrapper.writelines(unicode(page)) - return stream.getvalue().decode('utf-8') + return stream.getvalue().decode('utf-8') def export_book(databook): - """HTML representation of a Databook.""" + """HTML representation of a Databook.""" - stream = StringIO() + stream = StringIO() - for i, dset in enumerate(databook._datasets): - title = (dset.title if dset.title else 'Set %s' % (i)) - stream.write('<%s>%s</%s>\n' % (BOOK_ENDINGS, title, BOOK_ENDINGS)) - stream.write(dset.html) - stream.write('\n') + for i, dset in enumerate(databook._datasets): + title = (dset.title if dset.title else 'Set %s' % (i)) + stream.write('<%s>%s</%s>\n' % (BOOK_ENDINGS, title, BOOK_ENDINGS)) + stream.write(dset.html) + stream.write('\n') - return stream.getvalue() + return stream.getvalue() diff --git a/tablib/formats/_latex.py b/tablib/formats/_latex.py new file mode 100644 index 0000000..44ee101 --- /dev/null +++ b/tablib/formats/_latex.py @@ -0,0 +1,134 @@ +# -*- coding: utf-8 -*- + +"""Tablib - LaTeX table export support. + + Generates a LaTeX booktabs-style table from the dataset. +""" +import re + +from tablib.compat import unicode + +title = 'latex' +extensions = ('tex',) + +TABLE_TEMPLATE = """\ +%% Note: add \\usepackage{booktabs} to your preamble +%% +\\begin{table}[!htbp] + \\centering + %(CAPTION)s + \\begin{tabular}{%(COLSPEC)s} + \\toprule +%(HEADER)s + %(MIDRULE)s +%(BODY)s + \\bottomrule + \\end{tabular} +\\end{table} +""" + +TEX_RESERVED_SYMBOLS_MAP = dict([ + ('\\', '\\textbackslash{}'), + ('{', '\\{'), + ('}', '\\}'), + ('$', '\\$'), + ('&', '\\&'), + ('#', '\\#'), + ('^', '\\textasciicircum{}'), + ('_', '\\_'), + ('~', '\\textasciitilde{}'), + ('%', '\\%'), +]) + +TEX_RESERVED_SYMBOLS_RE = re.compile( + '(%s)' % '|'.join(map(re.escape, TEX_RESERVED_SYMBOLS_MAP.keys()))) + + +def export_set(dataset): + """Returns LaTeX representation of dataset + + :param dataset: dataset to serialize + :type dataset: tablib.core.Dataset + """ + + caption = '\\caption{%s}' % dataset.title if dataset.title else '%' + colspec = _colspec(dataset.width) + header = _serialize_row(dataset.headers) if dataset.headers else '' + midrule = _midrule(dataset.width) + body = '\n'.join([_serialize_row(row) for row in dataset]) + return TABLE_TEMPLATE % dict(CAPTION=caption, COLSPEC=colspec, + HEADER=header, MIDRULE=midrule, BODY=body) + + +def _colspec(dataset_width): + """Generates the column specification for the LaTeX `tabular` environment + based on the dataset width. + + The first column is justified to the left, all further columns are aligned + to the right. + + .. note:: This is only a heuristic and most probably has to be fine-tuned + post export. Column alignment should depend on the data type, e.g., textual + content should usually be aligned to the left while numeric content almost + always should be aligned to the right. + + :param dataset_width: width of the dataset + """ + + spec = 'l' + for _ in range(1, dataset_width): + spec += 'r' + return spec + + +def _midrule(dataset_width): + """Generates the table `midrule`, which may be composed of several + `cmidrules`. + + :param dataset_width: width of the dataset to serialize + """ + + if not dataset_width or dataset_width == 1: + return '\\midrule' + return ' '.join([_cmidrule(colindex, dataset_width) for colindex in + range(1, dataset_width + 1)]) + + +def _cmidrule(colindex, dataset_width): + """Generates the `cmidrule` for a single column with appropriate trimming + based on the column position. + + :param colindex: Column index + :param dataset_width: width of the dataset + """ + + rule = '\\cmidrule(%s){%d-%d}' + if colindex == 1: + # Rule of first column is trimmed on the right + return rule % ('r', colindex, colindex) + if colindex == dataset_width: + # Rule of last column is trimmed on the left + return rule % ('l', colindex, colindex) + # Inner columns are trimmed on the left and right + return rule % ('lr', colindex, colindex) + + +def _serialize_row(row): + """Returns string representation of a single row. + + :param row: single dataset row + """ + + new_row = [_escape_tex_reserved_symbols(unicode(item)) if item else '' for + item in row] + return 6 * ' ' + ' & '.join(new_row) + ' \\\\' + + +def _escape_tex_reserved_symbols(input): + """Escapes all TeX reserved symbols ('_', '~', etc.) in a string. + + :param input: String to escape + """ + def replace(match): + return TEX_RESERVED_SYMBOLS_MAP[match.group()] + return TEX_RESERVED_SYMBOLS_RE.sub(replace, input) diff --git a/tablib/formats/_tsv.py b/tablib/formats/_tsv.py index 8ef2b67..9380b3b 100644 --- a/tablib/formats/_tsv.py +++ b/tablib/formats/_tsv.py @@ -3,57 +3,28 @@ """ Tablib - TSV (Tab Separated Values) Support. """ -from tablib.compat import is_py3, csv, StringIO - - +from tablib.formats._csv import ( + export_set as export_set_wrapper, + import_set as import_set_wrapper, + detect as detect_wrapper, +) title = 'tsv' extensions = ('tsv',) DEFAULT_ENCODING = 'utf-8' +DELIMITER = '\t' def export_set(dataset): - """Returns a TSV representation of Dataset.""" - - stream = StringIO() - - if is_py3: - _tsv = csv.writer(stream, delimiter='\t') - else: - _tsv = csv.writer(stream, encoding=DEFAULT_ENCODING, delimiter='\t') - - for row in dataset._package(dicts=False): - _tsv.writerow(row) - - return stream.getvalue() + """Returns TSV representation of Dataset.""" + return export_set_wrapper(dataset, delimiter=DELIMITER) def import_set(dset, in_stream, headers=True): """Returns dataset from TSV stream.""" - - dset.wipe() - - if is_py3: - rows = csv.reader(in_stream.splitlines(), delimiter='\t') - else: - rows = csv.reader(in_stream.splitlines(), delimiter='\t', - encoding=DEFAULT_ENCODING) - - for i, row in enumerate(rows): - # Skip empty rows - if not row: - continue - - if (i == 0) and (headers): - dset.headers = row - else: - dset.append(row) + return import_set_wrapper(dset, in_stream, headers=headers, delimiter=DELIMITER) def detect(stream): """Returns True if given stream is valid TSV.""" - try: - csv.Sniffer().sniff(stream, delimiters='\t') - return True - except (csv.Error, TypeError): - return False + return detect_wrapper(stream, delimiter=DELIMITER) diff --git a/tablib/formats/_xls.py b/tablib/formats/_xls.py index 67b87ea..787907a 100644 --- a/tablib/formats/_xls.py +++ b/tablib/formats/_xls.py @@ -5,7 +5,7 @@ import sys -from tablib.compat import BytesIO, xlwt, xlrd, XLRDError +from tablib.compat import BytesIO, xlwt, xlrd, XLRDError, xrange import tablib title = 'xls' diff --git a/tablib/formats/_xlsx.py b/tablib/formats/_xlsx.py index 0cd8500..d9d3d57 100644 --- a/tablib/formats/_xlsx.py +++ b/tablib/formats/_xlsx.py @@ -69,7 +69,7 @@ def import_set(dset, in_stream, headers=True): dset.wipe() - xls_book = openpyxl.reader.excel.load_workbook(in_stream) + xls_book = openpyxl.reader.excel.load_workbook(BytesIO(in_stream)) sheet = xls_book.get_active_sheet() dset.title = sheet.title @@ -87,7 +87,7 @@ def import_book(dbook, in_stream, headers=True): dbook.wipe() - xls_book = openpyxl.reader.excel.load_workbook(in_stream) + xls_book = openpyxl.reader.excel.load_workbook(BytesIO(in_stream)) for sheet in xls_book.worksheets: data = tablib.Dataset() @@ -115,8 +115,6 @@ def dset_sheet(dataset, ws, freeze_panes=True): row_number = i + 1 for j, col in enumerate(row): col_idx = get_column_letter(j + 1) - # We want to freeze the column after the last column - frzn_col_idx = get_column_letter(j + 2) # bold headers if (row_number == 1) and dataset.headers: diff --git a/tablib/packages/dbfpy/__init__.py b/tablib/packages/dbfpy/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tablib/packages/dbfpy/__init__.py diff --git a/tablib/packages/dbfpy/dbf.py b/tablib/packages/dbfpy/dbf.py new file mode 100644 index 0000000..b3d2e21 --- /dev/null +++ b/tablib/packages/dbfpy/dbf.py @@ -0,0 +1,292 @@ +#! /usr/bin/env python +"""DBF accessing helpers. + +FIXME: more documentation needed + +Examples: + + Create new table, setup structure, add records: + + dbf = Dbf(filename, new=True) + dbf.addField( + ("NAME", "C", 15), + ("SURNAME", "C", 25), + ("INITIALS", "C", 10), + ("BIRTHDATE", "D"), + ) + for (n, s, i, b) in ( + ("John", "Miller", "YC", (1980, 10, 11)), + ("Andy", "Larkin", "", (1980, 4, 11)), + ): + rec = dbf.newRecord() + rec["NAME"] = n + rec["SURNAME"] = s + rec["INITIALS"] = i + rec["BIRTHDATE"] = b + rec.store() + dbf.close() + + Open existed dbf, read some data: + + dbf = Dbf(filename, True) + for rec in dbf: + for fldName in dbf.fieldNames: + print '%s:\t %s (%s)' % (fldName, rec[fldName], + type(rec[fldName])) + print + dbf.close() + +""" +"""History (most recent first): +11-feb-2007 [als] export INVALID_VALUE; + Dbf: added .ignoreErrors, .INVALID_VALUE +04-jul-2006 [als] added export declaration +20-dec-2005 [yc] removed fromStream and newDbf methods: + use argument of __init__ call must be used instead; + added class fields pointing to the header and + record classes. +17-dec-2005 [yc] split to several modules; reimplemented +13-dec-2005 [yc] adapted to the changes of the `strutil` module. +13-sep-2002 [als] support FoxPro Timestamp datatype +15-nov-1999 [jjk] documentation updates, add demo +24-aug-1998 [jjk] add some encodeValue methods (not tested), other tweaks +08-jun-1998 [jjk] fix problems, add more features +20-feb-1998 [jjk] fix problems, add more features +19-feb-1998 [jjk] add create/write capabilities +18-feb-1998 [jjk] from dbfload.py +""" + +__version__ = "$Revision: 1.7 $"[11:-2] +__date__ = "$Date: 2007/02/11 09:23:13 $"[7:-2] +__author__ = "Jeff Kunce <kuncej@mail.conservation.state.mo.us>" + +__all__ = ["Dbf"] + +from . import header +from .import record +from utils import INVALID_VALUE + +class Dbf(object): + """DBF accessor. + + FIXME: + docs and examples needed (dont' forget to tell + about problems adding new fields on the fly) + + Implementation notes: + ``_new`` field is used to indicate whether this is + a new data table. `addField` could be used only for + the new tables! If at least one record was appended + to the table it's structure couldn't be changed. + + """ + + __slots__ = ("name", "header", "stream", + "_changed", "_new", "_ignore_errors") + + HeaderClass = header.DbfHeader + RecordClass = record.DbfRecord + INVALID_VALUE = INVALID_VALUE + + ## initialization and creation helpers + + def __init__(self, f, readOnly=False, new=False, ignoreErrors=False): + """Initialize instance. + + Arguments: + f: + Filename or file-like object. + new: + True if new data table must be created. Assume + data table exists if this argument is False. + readOnly: + if ``f`` argument is a string file will + be opend in read-only mode; in other cases + this argument is ignored. This argument is ignored + even if ``new`` argument is True. + headerObj: + `header.DbfHeader` instance or None. If this argument + is None, new empty header will be used with the + all fields set by default. + ignoreErrors: + if set, failing field value conversion will return + ``INVALID_VALUE`` instead of raising conversion error. + + """ + if isinstance(f, basestring): + # a filename + self.name = f + if new: + # new table (table file must be + # created or opened and truncated) + self.stream = file(f, "w+b") + else: + # tabe file must exist + self.stream = file(f, ("r+b", "rb")[bool(readOnly)]) + else: + # a stream + self.name = getattr(f, "name", "") + self.stream = f + if new: + # if this is a new table, header will be empty + self.header = self.HeaderClass() + else: + # or instantiated using stream + self.header = self.HeaderClass.fromStream(self.stream) + self.ignoreErrors = ignoreErrors + self._new = bool(new) + self._changed = False + + ## properties + + closed = property(lambda self: self.stream.closed) + recordCount = property(lambda self: self.header.recordCount) + fieldNames = property( + lambda self: [_fld.name for _fld in self.header.fields]) + fieldDefs = property(lambda self: self.header.fields) + changed = property(lambda self: self._changed or self.header.changed) + + def ignoreErrors(self, value): + """Update `ignoreErrors` flag on the header object and self""" + self.header.ignoreErrors = self._ignore_errors = bool(value) + ignoreErrors = property( + lambda self: self._ignore_errors, + ignoreErrors, + doc="""Error processing mode for DBF field value conversion + + if set, failing field value conversion will return + ``INVALID_VALUE`` instead of raising conversion error. + + """) + + ## protected methods + + def _fixIndex(self, index): + """Return fixed index. + + This method fails if index isn't a numeric object + (long or int). Or index isn't in a valid range + (less or equal to the number of records in the db). + + If ``index`` is a negative number, it will be + treated as a negative indexes for list objects. + + Return: + Return value is numeric object maning valid index. + + """ + if not isinstance(index, (int, long)): + raise TypeError("Index must be a numeric object") + if index < 0: + # index from the right side + # fix it to the left-side index + index += len(self) + 1 + if index >= len(self): + raise IndexError("Record index out of range") + return index + + ## iterface methods + + def close(self): + self.flush() + self.stream.close() + + def flush(self): + """Flush data to the associated stream.""" + if self.changed: + self.header.setCurrentDate() + self.header.write(self.stream) + self.stream.flush() + self._changed = False + + def indexOfFieldName(self, name): + """Index of field named ``name``.""" + # FIXME: move this to header class + return self.header.fields.index(name) + + def newRecord(self): + """Return new record, which belong to this table.""" + return self.RecordClass(self) + + def append(self, record): + """Append ``record`` to the database.""" + record.index = self.header.recordCount + record._write() + self.header.recordCount += 1 + self._changed = True + self._new = False + + def addField(self, *defs): + """Add field definitions. + + For more information see `header.DbfHeader.addField`. + + """ + if self._new: + self.header.addField(*defs) + else: + raise TypeError("At least one record was added, " + "structure can't be changed") + + ## 'magic' methods (representation and sequence interface) + + def __repr__(self): + return "Dbf stream '%s'\n" % self.stream + repr(self.header) + + def __len__(self): + """Return number of records.""" + return self.recordCount + + def __getitem__(self, index): + """Return `DbfRecord` instance.""" + return self.RecordClass.fromStream(self, self._fixIndex(index)) + + def __setitem__(self, index, record): + """Write `DbfRecord` instance to the stream.""" + record.index = self._fixIndex(index) + record._write() + self._changed = True + self._new = False + + #def __del__(self): + # """Flush stream upon deletion of the object.""" + # self.flush() + + +def demoRead(filename): + _dbf = Dbf(filename, True) + for _rec in _dbf: + print + print(repr(_rec)) + _dbf.close() + +def demoCreate(filename): + _dbf = Dbf(filename, new=True) + _dbf.addField( + ("NAME", "C", 15), + ("SURNAME", "C", 25), + ("INITIALS", "C", 10), + ("BIRTHDATE", "D"), + ) + for (_n, _s, _i, _b) in ( + ("John", "Miller", "YC", (1981, 1, 2)), + ("Andy", "Larkin", "AL", (1982, 3, 4)), + ("Bill", "Clinth", "", (1983, 5, 6)), + ("Bobb", "McNail", "", (1984, 7, 8)), + ): + _rec = _dbf.newRecord() + _rec["NAME"] = _n + _rec["SURNAME"] = _s + _rec["INITIALS"] = _i + _rec["BIRTHDATE"] = _b + _rec.store() + print(repr(_dbf)) + _dbf.close() + +if (__name__=='__main__'): + import sys + _name = len(sys.argv) > 1 and sys.argv[1] or "county.dbf" + demoCreate(_name) + demoRead(_name) + +# vim: set et sw=4 sts=4 : diff --git a/tablib/packages/dbfpy/dbfnew.py b/tablib/packages/dbfpy/dbfnew.py new file mode 100644 index 0000000..dea7e52 --- /dev/null +++ b/tablib/packages/dbfpy/dbfnew.py @@ -0,0 +1,188 @@ +#!/usr/bin/python +""".DBF creation helpers. + +Note: this is a legacy interface. New code should use Dbf class + for table creation (see examples in dbf.py) + +TODO: + - handle Memo fields. + - check length of the fields accoring to the + `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` + +""" +"""History (most recent first) +04-jul-2006 [als] added export declaration; + updated for dbfpy 2.0 +15-dec-2005 [yc] define dbf_new.__slots__ +14-dec-2005 [yc] added vim modeline; retab'd; added doc-strings; + dbf_new now is a new class (inherited from object) +??-jun-2000 [--] added by Hans Fiby +""" + +__version__ = "$Revision: 1.4 $"[11:-2] +__date__ = "$Date: 2006/07/04 08:18:18 $"[7:-2] + +__all__ = ["dbf_new"] + +from dbf import * +from fields import * +from header import * +from record import * + +class _FieldDefinition(object): + """Field definition. + + This is a simple structure, which contains ``name``, ``type``, + ``len``, ``dec`` and ``cls`` fields. + + Objects also implement get/setitem magic functions, so fields + could be accessed via sequence iterface, where 'name' has + index 0, 'type' index 1, 'len' index 2, 'dec' index 3 and + 'cls' could be located at index 4. + + """ + + __slots__ = "name", "type", "len", "dec", "cls" + + # WARNING: be attentive - dictionaries are mutable! + FLD_TYPES = { + # type: (cls, len) + "C": (DbfCharacterFieldDef, None), + "N": (DbfNumericFieldDef, None), + "L": (DbfLogicalFieldDef, 1), + # FIXME: support memos + # "M": (DbfMemoFieldDef), + "D": (DbfDateFieldDef, 8), + # FIXME: I'm not sure length should be 14 characters! + # but temporary I use it, cuz date is 8 characters + # and time 6 (hhmmss) + "T": (DbfDateTimeFieldDef, 14), + } + + def __init__(self, name, type, len=None, dec=0): + _cls, _len = self.FLD_TYPES[type] + if _len is None: + if len is None: + raise ValueError("Field length must be defined") + _len = len + self.name = name + self.type = type + self.len = _len + self.dec = dec + self.cls = _cls + + def getDbfField(self): + "Return `DbfFieldDef` instance from the current definition." + return self.cls(self.name, self.len, self.dec) + + def appendToHeader(self, dbfh): + """Create a `DbfFieldDef` instance and append it to the dbf header. + + Arguments: + dbfh: `DbfHeader` instance. + + """ + _dbff = self.getDbfField() + dbfh.addField(_dbff) + + +class dbf_new(object): + """New .DBF creation helper. + + Example Usage: + + dbfn = dbf_new() + dbfn.add_field("name",'C',80) + dbfn.add_field("price",'N',10,2) + dbfn.add_field("date",'D',8) + dbfn.write("tst.dbf") + + Note: + This module cannot handle Memo-fields, + they are special. + + """ + + __slots__ = ("fields",) + + FieldDefinitionClass = _FieldDefinition + + def __init__(self): + self.fields = [] + + def add_field(self, name, typ, len, dec=0): + """Add field definition. + + Arguments: + name: + field name (str object). field name must not + contain ASCII NULs and it's length shouldn't + exceed 10 characters. + typ: + type of the field. this must be a single character + from the "CNLMDT" set meaning character, numeric, + logical, memo, date and date/time respectively. + len: + length of the field. this argument is used only for + the character and numeric fields. all other fields + have fixed length. + FIXME: use None as a default for this argument? + dec: + decimal precision. used only for the numric fields. + + """ + self.fields.append(self.FieldDefinitionClass(name, typ, len, dec)) + + def write(self, filename): + """Create empty .DBF file using current structure.""" + _dbfh = DbfHeader() + _dbfh.setCurrentDate() + for _fldDef in self.fields: + _fldDef.appendToHeader(_dbfh) + _dbfStream = file(filename, "wb") + _dbfh.write(_dbfStream) + _dbfStream.close() + + def write_stream(self, stream): + _dbfh = DbfHeader() + _dbfh.setCurrentDate() + for _fldDef in self.fields: + _fldDef.appendToHeader(_dbfh) + _dbfh.write(stream) + + +if (__name__=='__main__'): + # create a new DBF-File + dbfn=dbf_new() + dbfn.add_field("name",'C',80) + dbfn.add_field("price",'N',10,2) + dbfn.add_field("date",'D',8) + dbfn.write("tst.dbf") + # test new dbf + print "*** created tst.dbf: ***" + dbft = Dbf('tst.dbf', readOnly=0) + print repr(dbft) + # add a record + rec=DbfRecord(dbft) + rec['name']='something' + rec['price']=10.5 + rec['date']=(2000,1,12) + rec.store() + # add another record + rec=DbfRecord(dbft) + rec['name']='foo and bar' + rec['price']=12234 + rec['date']=(1992,7,15) + rec.store() + + # show the records + print "*** inserted 2 records into tst.dbf: ***" + print repr(dbft) + for i1 in range(len(dbft)): + rec = dbft[i1] + for fldName in dbft.fieldNames: + print '%s:\t %s'%(fldName, rec[fldName]) + print + dbft.close() + +# vim: set et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy/fields.py b/tablib/packages/dbfpy/fields.py new file mode 100644 index 0000000..69cd436 --- /dev/null +++ b/tablib/packages/dbfpy/fields.py @@ -0,0 +1,466 @@ +"""DBF fields definitions. + +TODO: + - make memos work +""" +"""History (most recent first): +26-may-2009 [als] DbfNumericFieldDef.decodeValue: strip zero bytes +05-feb-2009 [als] DbfDateFieldDef.encodeValue: empty arg produces empty date +16-sep-2008 [als] DbfNumericFieldDef decoding looks for decimal point + in the value to select float or integer return type +13-mar-2008 [als] check field name length in constructor +11-feb-2007 [als] handle value conversion errors +10-feb-2007 [als] DbfFieldDef: added .rawFromRecord() +01-dec-2006 [als] Timestamp columns use None for empty values +31-oct-2006 [als] support field types 'F' (float), 'I' (integer) + and 'Y' (currency); + automate export and registration of field classes +04-jul-2006 [als] added export declaration +10-mar-2006 [als] decode empty values for Date and Logical fields; + show field name in errors +10-mar-2006 [als] fix Numeric value decoding: according to spec, + value always is string representation of the number; + ensure that encoded Numeric value fits into the field +20-dec-2005 [yc] use field names in upper case +15-dec-2005 [yc] field definitions moved from `dbf`. +""" + +__version__ = "$Revision: 1.14 $"[11:-2] +__date__ = "$Date: 2009/05/26 05:16:51 $"[7:-2] + +__all__ = ["lookupFor",] # field classes added at the end of the module + +import datetime +import struct +import sys + +from . import utils + +## abstract definitions + +class DbfFieldDef(object): + """Abstract field definition. + + Child classes must override ``type`` class attribute to provide datatype + infromation of the field definition. For more info about types visit + `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` + + Also child classes must override ``defaultValue`` field to provide + default value for the field value. + + If child class has fixed length ``length`` class attribute must be + overriden and set to the valid value. None value means, that field + isn't of fixed length. + + Note: ``name`` field must not be changed after instantiation. + + """ + + __slots__ = ("name", "length", "decimalCount", + "start", "end", "ignoreErrors") + + # length of the field, None in case of variable-length field, + # or a number if this field is a fixed-length field + length = None + + # field type. for more information about fields types visit + # `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` + # must be overriden in child classes + typeCode = None + + # default value for the field. this field must be + # overriden in child classes + defaultValue = None + + def __init__(self, name, length=None, decimalCount=None, + start=None, stop=None, ignoreErrors=False, + ): + """Initialize instance.""" + assert self.typeCode is not None, "Type code must be overriden" + assert self.defaultValue is not None, "Default value must be overriden" + ## fix arguments + if len(name) >10: + raise ValueError("Field name \"%s\" is too long" % name) + name = str(name).upper() + if self.__class__.length is None: + if length is None: + raise ValueError("[%s] Length isn't specified" % name) + length = int(length) + if length <= 0: + raise ValueError("[%s] Length must be a positive integer" + % name) + else: + length = self.length + if decimalCount is None: + decimalCount = 0 + ## set fields + self.name = name + # FIXME: validate length according to the specification at + # http://www.clicketyclick.dk/databases/xbase/format/data_types.html + self.length = length + self.decimalCount = decimalCount + self.ignoreErrors = ignoreErrors + self.start = start + self.end = stop + + def __cmp__(self, other): + return cmp(self.name, str(other).upper()) + + def __hash__(self): + return hash(self.name) + + def fromString(cls, string, start, ignoreErrors=False): + """Decode dbf field definition from the string data. + + Arguments: + string: + a string, dbf definition is decoded from. length of + the string must be 32 bytes. + start: + position in the database file. + ignoreErrors: + initial error processing mode for the new field (boolean) + + """ + assert len(string) == 32 + _length = ord(string[16]) + return cls(utils.unzfill(string)[:11], _length, ord(string[17]), + start, start + _length, ignoreErrors=ignoreErrors) + fromString = classmethod(fromString) + + def toString(self): + """Return encoded field definition. + + Return: + Return value is a string object containing encoded + definition of this field. + + """ + if sys.version_info < (2, 4): + # earlier versions did not support padding character + _name = self.name[:11] + "\0" * (11 - len(self.name)) + else: + _name = self.name.ljust(11, '\0') + return ( + _name + + self.typeCode + + #data address + chr(0) * 4 + + chr(self.length) + + chr(self.decimalCount) + + chr(0) * 14 + ) + + def __repr__(self): + return "%-10s %1s %3d %3d" % self.fieldInfo() + + def fieldInfo(self): + """Return field information. + + Return: + Return value is a (name, type, length, decimals) tuple. + + """ + return (self.name, self.typeCode, self.length, self.decimalCount) + + def rawFromRecord(self, record): + """Return a "raw" field value from the record string.""" + return record[self.start:self.end] + + def decodeFromRecord(self, record): + """Return decoded field value from the record string.""" + try: + return self.decodeValue(self.rawFromRecord(record)) + except: + if self.ignoreErrors: + return utils.INVALID_VALUE + else: + raise + + def decodeValue(self, value): + """Return decoded value from string value. + + This method shouldn't be used publicly. It's called from the + `decodeFromRecord` method. + + This is an abstract method and it must be overridden in child classes. + """ + raise NotImplementedError + + def encodeValue(self, value): + """Return str object containing encoded field value. + + This is an abstract method and it must be overriden in child classes. + """ + raise NotImplementedError + +## real classes + +class DbfCharacterFieldDef(DbfFieldDef): + """Definition of the character field.""" + + typeCode = "C" + defaultValue = "" + + def decodeValue(self, value): + """Return string object. + + Return value is a ``value`` argument with stripped right spaces. + + """ + return value.rstrip(" ") + + def encodeValue(self, value): + """Return raw data string encoded from a ``value``.""" + return str(value)[:self.length].ljust(self.length) + + +class DbfNumericFieldDef(DbfFieldDef): + """Definition of the numeric field.""" + + typeCode = "N" + # XXX: now I'm not sure it was a good idea to make a class field + # `defaultValue` instead of a generic method as it was implemented + # previously -- it's ok with all types except number, cuz + # if self.decimalCount is 0, we should return 0 and 0.0 otherwise. + defaultValue = 0 + + def decodeValue(self, value): + """Return a number decoded from ``value``. + + If decimals is zero, value will be decoded as an integer; + or as a float otherwise. + + Return: + Return value is a int (long) or float instance. + + """ + value = value.strip(" \0") + if "." in value: + # a float (has decimal separator) + return float(value) + elif value: + # must be an integer + return int(value) + else: + return 0 + + def encodeValue(self, value): + """Return string containing encoded ``value``.""" + _rv = ("%*.*f" % (self.length, self.decimalCount, value)) + if len(_rv) > self.length: + _ppos = _rv.find(".") + if 0 <= _ppos <= self.length: + _rv = _rv[:self.length] + else: + raise ValueError("[%s] Numeric overflow: %s (field width: %i)" + % (self.name, _rv, self.length)) + return _rv + +class DbfFloatFieldDef(DbfNumericFieldDef): + """Definition of the float field - same as numeric.""" + + typeCode = "F" + +class DbfIntegerFieldDef(DbfFieldDef): + """Definition of the integer field.""" + + typeCode = "I" + length = 4 + defaultValue = 0 + + def decodeValue(self, value): + """Return an integer number decoded from ``value``.""" + return struct.unpack("<i", value)[0] + + def encodeValue(self, value): + """Return string containing encoded ``value``.""" + return struct.pack("<i", int(value)) + +class DbfCurrencyFieldDef(DbfFieldDef): + """Definition of the currency field.""" + + typeCode = "Y" + length = 8 + defaultValue = 0.0 + + def decodeValue(self, value): + """Return float number decoded from ``value``.""" + return struct.unpack("<q", value)[0] / 10000. + + def encodeValue(self, value): + """Return string containing encoded ``value``.""" + return struct.pack("<q", round(value * 10000)) + +class DbfLogicalFieldDef(DbfFieldDef): + """Definition of the logical field.""" + + typeCode = "L" + defaultValue = -1 + length = 1 + + def decodeValue(self, value): + """Return True, False or -1 decoded from ``value``.""" + # Note: value always is 1-char string + if value == "?": + return -1 + if value in "NnFf ": + return False + if value in "YyTt": + return True + raise ValueError("[%s] Invalid logical value %r" % (self.name, value)) + + def encodeValue(self, value): + """Return a character from the "TF?" set. + + Return: + Return value is "T" if ``value`` is True + "?" if value is -1 or False otherwise. + + """ + if value is True: + return "T" + if value == -1: + return "?" + return "F" + + +class DbfMemoFieldDef(DbfFieldDef): + """Definition of the memo field. + + Note: memos aren't currenly completely supported. + + """ + + typeCode = "M" + defaultValue = " " * 10 + length = 10 + + def decodeValue(self, value): + """Return int .dbt block number decoded from the string object.""" + #return int(value) + raise NotImplementedError + + def encodeValue(self, value): + """Return raw data string encoded from a ``value``. + + Note: this is an internal method. + + """ + #return str(value)[:self.length].ljust(self.length) + raise NotImplementedError + + +class DbfDateFieldDef(DbfFieldDef): + """Definition of the date field.""" + + typeCode = "D" + defaultValue = utils.classproperty(lambda cls: datetime.date.today()) + # "yyyymmdd" gives us 8 characters + length = 8 + + def decodeValue(self, value): + """Return a ``datetime.date`` instance decoded from ``value``.""" + if value.strip(): + return utils.getDate(value) + else: + return None + + def encodeValue(self, value): + """Return a string-encoded value. + + ``value`` argument should be a value suitable for the + `utils.getDate` call. + + Return: + Return value is a string in format "yyyymmdd". + + """ + if value: + return utils.getDate(value).strftime("%Y%m%d") + else: + return " " * self.length + + +class DbfDateTimeFieldDef(DbfFieldDef): + """Definition of the timestamp field.""" + + # a difference between JDN (Julian Day Number) + # and GDN (Gregorian Day Number). note, that GDN < JDN + JDN_GDN_DIFF = 1721425 + typeCode = "T" + defaultValue = utils.classproperty(lambda cls: datetime.datetime.now()) + # two 32-bits integers representing JDN and amount of + # milliseconds respectively gives us 8 bytes. + # note, that values must be encoded in LE byteorder. + length = 8 + + def decodeValue(self, value): + """Return a `datetime.datetime` instance.""" + assert len(value) == self.length + # LE byteorder + _jdn, _msecs = struct.unpack("<2I", value) + if _jdn >= 1: + _rv = datetime.datetime.fromordinal(_jdn - self.JDN_GDN_DIFF) + _rv += datetime.timedelta(0, _msecs / 1000.0) + else: + # empty date + _rv = None + return _rv + + def encodeValue(self, value): + """Return a string-encoded ``value``.""" + if value: + value = utils.getDateTime(value) + # LE byteorder + _rv = struct.pack("<2I", value.toordinal() + self.JDN_GDN_DIFF, + (value.hour * 3600 + value.minute * 60 + value.second) * 1000) + else: + _rv = "\0" * self.length + assert len(_rv) == self.length + return _rv + + +_fieldsRegistry = {} + +def registerField(fieldCls): + """Register field definition class. + + ``fieldCls`` should be subclass of the `DbfFieldDef`. + + Use `lookupFor` to retrieve field definition class + by the type code. + + """ + assert fieldCls.typeCode is not None, "Type code isn't defined" + # XXX: use fieldCls.typeCode.upper()? in case of any decign + # don't forget to look to the same comment in ``lookupFor`` method + _fieldsRegistry[fieldCls.typeCode] = fieldCls + + +def lookupFor(typeCode): + """Return field definition class for the given type code. + + ``typeCode`` must be a single character. That type should be + previously registered. + + Use `registerField` to register new field class. + + Return: + Return value is a subclass of the `DbfFieldDef`. + + """ + # XXX: use typeCode.upper()? in case of any decign don't + # forget to look to the same comment in ``registerField`` + return _fieldsRegistry[typeCode] + +## register generic types + +for (_name, _val) in globals().items(): + if isinstance(_val, type) and issubclass(_val, DbfFieldDef) \ + and (_name != "DbfFieldDef"): + __all__.append(_name) + registerField(_val) +del _name, _val + +# vim: et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy/header.py b/tablib/packages/dbfpy/header.py new file mode 100644 index 0000000..03a877c --- /dev/null +++ b/tablib/packages/dbfpy/header.py @@ -0,0 +1,275 @@ +"""DBF header definition. + +TODO: + - handle encoding of the character fields + (encoding information stored in the DBF header) + +""" +"""History (most recent first): +16-sep-2010 [als] fromStream: fix century of the last update field +11-feb-2007 [als] added .ignoreErrors +10-feb-2007 [als] added __getitem__: return field definitions + by field name or field number (zero-based) +04-jul-2006 [als] added export declaration +15-dec-2005 [yc] created +""" + +__version__ = "$Revision: 1.6 $"[11:-2] +__date__ = "$Date: 2010/09/16 05:06:39 $"[7:-2] + +__all__ = ["DbfHeader"] + +try: + import cStringIO +except ImportError: + # when we're in python3, we cStringIO has been replaced by io.StringIO + import io as cStringIO +import datetime +import struct +import time + +from . import fields +from . import utils + + +class DbfHeader(object): + """Dbf header definition. + + For more information about dbf header format visit + `http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_STRUCT` + + Examples: + Create an empty dbf header and add some field definitions: + dbfh = DbfHeader() + dbfh.addField(("name", "C", 10)) + dbfh.addField(("date", "D")) + dbfh.addField(DbfNumericFieldDef("price", 5, 2)) + Create a dbf header with field definitions: + dbfh = DbfHeader([ + ("name", "C", 10), + ("date", "D"), + DbfNumericFieldDef("price", 5, 2), + ]) + + """ + + __slots__ = ("signature", "fields", "lastUpdate", "recordLength", + "recordCount", "headerLength", "changed", "_ignore_errors") + + ## instance construction and initialization methods + + def __init__(self, fields=None, headerLength=0, recordLength=0, + recordCount=0, signature=0x03, lastUpdate=None, ignoreErrors=False, + ): + """Initialize instance. + + Arguments: + fields: + a list of field definitions; + recordLength: + size of the records; + headerLength: + size of the header; + recordCount: + number of records stored in DBF; + signature: + version number (aka signature). using 0x03 as a default meaning + "File without DBT". for more information about this field visit + ``http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_NOTE_1_TARGET`` + lastUpdate: + date of the DBF's update. this could be a string ('yymmdd' or + 'yyyymmdd'), timestamp (int or float), datetime/date value, + a sequence (assuming (yyyy, mm, dd, ...)) or an object having + callable ``ticks`` field. + ignoreErrors: + error processing mode for DBF fields (boolean) + + """ + self.signature = signature + if fields is None: + self.fields = [] + else: + self.fields = list(fields) + self.lastUpdate = utils.getDate(lastUpdate) + self.recordLength = recordLength + self.headerLength = headerLength + self.recordCount = recordCount + self.ignoreErrors = ignoreErrors + # XXX: I'm not sure this is safe to + # initialize `self.changed` in this way + self.changed = bool(self.fields) + + # @classmethod + def fromString(cls, string): + """Return header instance from the string object.""" + return cls.fromStream(cStringIO.StringIO(str(string))) + fromString = classmethod(fromString) + + # @classmethod + def fromStream(cls, stream): + """Return header object from the stream.""" + stream.seek(0) + _data = stream.read(32) + (_cnt, _hdrLen, _recLen) = struct.unpack("<I2H", _data[4:12]) + #reserved = _data[12:32] + _year = ord(_data[1]) + if _year < 80: + # dBase II started at 1980. It is quite unlikely + # that actual last update date is before that year. + _year += 2000 + else: + _year += 1900 + ## create header object + _obj = cls(None, _hdrLen, _recLen, _cnt, ord(_data[0]), + (_year, ord(_data[2]), ord(_data[3]))) + ## append field definitions + # position 0 is for the deletion flag + _pos = 1 + _data = stream.read(1) + + # The field definitions are ended either by \x0D OR a newline + # character, so we need to handle both when reading from a stream. + # When writing, dbfpy appears to write newlines instead of \x0D. + while _data[0] not in ["\x0D", "\n"]: + _data += stream.read(31) + _fld = fields.lookupFor(_data[11]).fromString(_data, _pos) + _obj._addField(_fld) + _pos = _fld.end + _data = stream.read(1) + return _obj + fromStream = classmethod(fromStream) + + ## properties + + year = property(lambda self: self.lastUpdate.year) + month = property(lambda self: self.lastUpdate.month) + day = property(lambda self: self.lastUpdate.day) + + def ignoreErrors(self, value): + """Update `ignoreErrors` flag on self and all fields""" + self._ignore_errors = value = bool(value) + for _field in self.fields: + _field.ignoreErrors = value + ignoreErrors = property( + lambda self: self._ignore_errors, + ignoreErrors, + doc="""Error processing mode for DBF field value conversion + + if set, failing field value conversion will return + ``INVALID_VALUE`` instead of raising conversion error. + + """) + + ## object representation + + def __repr__(self): + _rv = """\ +Version (signature): 0x%02x + Last update: %s + Header length: %d + Record length: %d + Record count: %d + FieldName Type Len Dec +""" % (self.signature, self.lastUpdate, self.headerLength, + self.recordLength, self.recordCount) + _rv += "\n".join( + ["%10s %4s %3s %3s" % _fld.fieldInfo() for _fld in self.fields] + ) + return _rv + + ## internal methods + + def _addField(self, *defs): + """Internal variant of the `addField` method. + + This method doesn't set `self.changed` field to True. + + Return value is a length of the appended records. + Note: this method doesn't modify ``recordLength`` and + ``headerLength`` fields. Use `addField` instead of this + method if you don't exactly know what you're doing. + + """ + # insure we have dbf.DbfFieldDef instances first (instantiation + # from the tuple could raise an error, in such a case I don't + # wanna add any of the definitions -- all will be ignored) + _defs = [] + _recordLength = 0 + for _def in defs: + if isinstance(_def, fields.DbfFieldDef): + _obj = _def + else: + (_name, _type, _len, _dec) = (tuple(_def) + (None,) * 4)[:4] + _cls = fields.lookupFor(_type) + _obj = _cls(_name, _len, _dec, + ignoreErrors=self._ignore_errors) + _recordLength += _obj.length + _defs.append(_obj) + # and now extend field definitions and + # update record length + self.fields += _defs + return _recordLength + + ## interface methods + + def addField(self, *defs): + """Add field definition to the header. + + Examples: + dbfh.addField( + ("name", "C", 20), + dbf.DbfCharacterFieldDef("surname", 20), + dbf.DbfDateFieldDef("birthdate"), + ("member", "L"), + ) + dbfh.addField(("price", "N", 5, 2)) + dbfh.addField(dbf.DbfNumericFieldDef("origprice", 5, 2)) + + """ + _oldLen = self.recordLength + self.recordLength += self._addField(*defs) + if not _oldLen: + self.recordLength += 1 + # XXX: may be just use: + # self.recordeLength += self._addField(*defs) + bool(not _oldLen) + # recalculate headerLength + self.headerLength = 32 + (32 * len(self.fields)) + 1 + self.changed = True + + def write(self, stream): + """Encode and write header to the stream.""" + stream.seek(0) + stream.write(self.toString()) + stream.write("".join([_fld.toString() for _fld in self.fields])) + stream.write(chr(0x0D)) # cr at end of all hdr data + self.changed = False + + def toString(self): + """Returned 32 chars length string with encoded header.""" + return struct.pack("<4BI2H", + self.signature, + self.year - 1900, + self.month, + self.day, + self.recordCount, + self.headerLength, + self.recordLength) + "\0" * 20 + + def setCurrentDate(self): + """Update ``self.lastUpdate`` field with current date value.""" + self.lastUpdate = datetime.date.today() + + def __getitem__(self, item): + """Return a field definition by numeric index or name string""" + if isinstance(item, basestring): + _name = item.upper() + for _field in self.fields: + if _field.name == _name: + return _field + else: + raise KeyError(item) + else: + # item must be field index + return self.fields[item] + +# vim: et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy/record.py b/tablib/packages/dbfpy/record.py new file mode 100644 index 0000000..97bbfb3 --- /dev/null +++ b/tablib/packages/dbfpy/record.py @@ -0,0 +1,262 @@ +"""DBF record definition. + +""" +"""History (most recent first): +11-feb-2007 [als] __repr__: added special case for invalid field values +10-feb-2007 [als] added .rawFromStream() +30-oct-2006 [als] fix record length in .fromStream() +04-jul-2006 [als] added export declaration +20-dec-2005 [yc] DbfRecord.write() -> DbfRecord._write(); + added delete() method. +16-dec-2005 [yc] record definition moved from `dbf`. +""" + +__version__ = "$Revision: 1.7 $"[11:-2] +__date__ = "$Date: 2007/02/11 09:05:49 $"[7:-2] + +__all__ = ["DbfRecord"] + +from itertools import izip + +import utils + +class DbfRecord(object): + """DBF record. + + Instances of this class shouldn't be created manualy, + use `dbf.Dbf.newRecord` instead. + + Class implements mapping/sequence interface, so + fields could be accessed via their names or indexes + (names is a preffered way to access fields). + + Hint: + Use `store` method to save modified record. + + Examples: + Add new record to the database: + db = Dbf(filename) + rec = db.newRecord() + rec["FIELD1"] = value1 + rec["FIELD2"] = value2 + rec.store() + Or the same, but modify existed + (second in this case) record: + db = Dbf(filename) + rec = db[2] + rec["FIELD1"] = value1 + rec["FIELD2"] = value2 + rec.store() + + """ + + __slots__ = "dbf", "index", "deleted", "fieldData" + + ## creation and initialization + + def __init__(self, dbf, index=None, deleted=False, data=None): + """Instance initialiation. + + Arguments: + dbf: + A `Dbf.Dbf` instance this record belonogs to. + index: + An integer record index or None. If this value is + None, record will be appended to the DBF. + deleted: + Boolean flag indicating whether this record + is a deleted record. + data: + A sequence or None. This is a data of the fields. + If this argument is None, default values will be used. + + """ + self.dbf = dbf + # XXX: I'm not sure ``index`` is necessary + self.index = index + self.deleted = deleted + if data is None: + self.fieldData = [_fd.defaultValue for _fd in dbf.header.fields] + else: + self.fieldData = list(data) + + # XXX: validate self.index before calculating position? + position = property(lambda self: self.dbf.header.headerLength + \ + self.index * self.dbf.header.recordLength) + + def rawFromStream(cls, dbf, index): + """Return raw record contents read from the stream. + + Arguments: + dbf: + A `Dbf.Dbf` instance containing the record. + index: + Index of the record in the records' container. + This argument can't be None in this call. + + Return value is a string containing record data in DBF format. + + """ + # XXX: may be write smth assuming, that current stream + # position is the required one? it could save some + # time required to calculate where to seek in the file + dbf.stream.seek(dbf.header.headerLength + + index * dbf.header.recordLength) + return dbf.stream.read(dbf.header.recordLength) + rawFromStream = classmethod(rawFromStream) + + def fromStream(cls, dbf, index): + """Return a record read from the stream. + + Arguments: + dbf: + A `Dbf.Dbf` instance new record should belong to. + index: + Index of the record in the records' container. + This argument can't be None in this call. + + Return value is an instance of the current class. + + """ + return cls.fromString(dbf, cls.rawFromStream(dbf, index), index) + fromStream = classmethod(fromStream) + + def fromString(cls, dbf, string, index=None): + """Return record read from the string object. + + Arguments: + dbf: + A `Dbf.Dbf` instance new record should belong to. + string: + A string new record should be created from. + index: + Index of the record in the container. If this + argument is None, record will be appended. + + Return value is an instance of the current class. + + """ + return cls(dbf, index, string[0]=="*", + [_fd.decodeFromRecord(string) for _fd in dbf.header.fields]) + fromString = classmethod(fromString) + + ## object representation + + def __repr__(self): + _template = "%%%ds: %%s (%%s)" % max([len(_fld) + for _fld in self.dbf.fieldNames]) + _rv = [] + for _fld in self.dbf.fieldNames: + _val = self[_fld] + if _val is utils.INVALID_VALUE: + _rv.append(_template % + (_fld, "None", "value cannot be decoded")) + else: + _rv.append(_template % (_fld, _val, type(_val))) + return "\n".join(_rv) + + ## protected methods + + def _write(self): + """Write data to the dbf stream. + + Note: + This isn't a public method, it's better to + use 'store' instead publically. + Be design ``_write`` method should be called + only from the `Dbf` instance. + + + """ + self._validateIndex(False) + self.dbf.stream.seek(self.position) + self.dbf.stream.write(self.toString()) + # FIXME: may be move this write somewhere else? + # why we should check this condition for each record? + if self.index == len(self.dbf): + # this is the last record, + # we should write SUB (ASCII 26) + self.dbf.stream.write("\x1A") + + ## utility methods + + def _validateIndex(self, allowUndefined=True, checkRange=False): + """Valid ``self.index`` value. + + If ``allowUndefined`` argument is True functions does nothing + in case of ``self.index`` pointing to None object. + + """ + if self.index is None: + if not allowUndefined: + raise ValueError("Index is undefined") + elif self.index < 0: + raise ValueError("Index can't be negative (%s)" % self.index) + elif checkRange and self.index <= self.dbf.header.recordCount: + raise ValueError("There are only %d records in the DBF" % + self.dbf.header.recordCount) + + ## interface methods + + def store(self): + """Store current record in the DBF. + + If ``self.index`` is None, this record will be appended to the + records of the DBF this records belongs to; or replaced otherwise. + + """ + self._validateIndex() + if self.index is None: + self.index = len(self.dbf) + self.dbf.append(self) + else: + self.dbf[self.index] = self + + def delete(self): + """Mark method as deleted.""" + self.deleted = True + + def toString(self): + """Return string packed record values.""" + return "".join([" *"[self.deleted]] + [ + _def.encodeValue(_dat) + for (_def, _dat) in izip(self.dbf.header.fields, self.fieldData) + ]) + + def asList(self): + """Return a flat list of fields. + + Note: + Change of the list's values won't change + real values stored in this object. + + """ + return self.fieldData[:] + + def asDict(self): + """Return a dictionary of fields. + + Note: + Change of the dicts's values won't change + real values stored in this object. + + """ + return dict([_i for _i in izip(self.dbf.fieldNames, self.fieldData)]) + + def __getitem__(self, key): + """Return value by field name or field index.""" + if isinstance(key, (long, int)): + # integer index of the field + return self.fieldData[key] + # assuming string field name + return self.fieldData[self.dbf.indexOfFieldName(key)] + + def __setitem__(self, key, value): + """Set field value by integer index of the field or string name.""" + if isinstance(key, (int, long)): + # integer index of the field + return self.fieldData[key] + # assuming string field name + self.fieldData[self.dbf.indexOfFieldName(key)] = value + +# vim: et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy/utils.py b/tablib/packages/dbfpy/utils.py new file mode 100644 index 0000000..cef8aa5 --- /dev/null +++ b/tablib/packages/dbfpy/utils.py @@ -0,0 +1,170 @@ +"""String utilities. + +TODO: + - allow strings in getDateTime routine; +""" +"""History (most recent first): +11-feb-2007 [als] added INVALID_VALUE +10-feb-2007 [als] allow date strings padded with spaces instead of zeroes +20-dec-2005 [yc] handle long objects in getDate/getDateTime +16-dec-2005 [yc] created from ``strutil`` module. +""" + +__version__ = "$Revision: 1.4 $"[11:-2] +__date__ = "$Date: 2007/02/11 08:57:17 $"[7:-2] + +import datetime +import time + + +def unzfill(str): + """Return a string without ASCII NULs. + + This function searchers for the first NUL (ASCII 0) occurance + and truncates string till that position. + + """ + try: + return str[:str.index('\0')] + except ValueError: + return str + + +def getDate(date=None): + """Return `datetime.date` instance. + + Type of the ``date`` argument could be one of the following: + None: + use current date value; + datetime.date: + this value will be returned; + datetime.datetime: + the result of the date.date() will be returned; + string: + assuming "%Y%m%d" or "%y%m%dd" format; + number: + assuming it's a timestamp (returned for example + by the time.time() call; + sequence: + assuming (year, month, day, ...) sequence; + + Additionaly, if ``date`` has callable ``ticks`` attribute, + it will be used and result of the called would be treated + as a timestamp value. + + """ + if date is None: + # use current value + return datetime.date.today() + if isinstance(date, datetime.date): + return date + if isinstance(date, datetime.datetime): + return date.date() + if isinstance(date, (int, long, float)): + # date is a timestamp + return datetime.date.fromtimestamp(date) + if isinstance(date, basestring): + date = date.replace(" ", "0") + if len(date) == 6: + # yymmdd + return datetime.date(*time.strptime(date, "%y%m%d")[:3]) + # yyyymmdd + return datetime.date(*time.strptime(date, "%Y%m%d")[:3]) + if hasattr(date, "__getitem__"): + # a sequence (assuming date/time tuple) + return datetime.date(*date[:3]) + return datetime.date.fromtimestamp(date.ticks()) + + +def getDateTime(value=None): + """Return `datetime.datetime` instance. + + Type of the ``value`` argument could be one of the following: + None: + use current date value; + datetime.date: + result will be converted to the `datetime.datetime` instance + using midnight; + datetime.datetime: + ``value`` will be returned as is; + string: + *** CURRENTLY NOT SUPPORTED ***; + number: + assuming it's a timestamp (returned for example + by the time.time() call; + sequence: + assuming (year, month, day, ...) sequence; + + Additionaly, if ``value`` has callable ``ticks`` attribute, + it will be used and result of the called would be treated + as a timestamp value. + + """ + if value is None: + # use current value + return datetime.datetime.today() + if isinstance(value, datetime.datetime): + return value + if isinstance(value, datetime.date): + return datetime.datetime.fromordinal(value.toordinal()) + if isinstance(value, (int, long, float)): + # value is a timestamp + return datetime.datetime.fromtimestamp(value) + if isinstance(value, basestring): + raise NotImplementedError("Strings aren't currently implemented") + if hasattr(value, "__getitem__"): + # a sequence (assuming date/time tuple) + return datetime.datetime(*tuple(value)[:6]) + return datetime.datetime.fromtimestamp(value.ticks()) + + +class classproperty(property): + """Works in the same way as a ``property``, but for the classes.""" + + def __get__(self, obj, cls): + return self.fget(cls) + + +class _InvalidValue(object): + + """Value returned from DBF records when field validation fails + + The value is not equal to anything except for itself + and equal to all empty values: None, 0, empty string etc. + In other words, invalid value is equal to None and not equal + to None at the same time. + + This value yields zero upon explicit conversion to a number type, + empty string for string types, and False for boolean. + + """ + + def __eq__(self, other): + return not other + + def __ne__(self, other): + return not (other is self) + + def __nonzero__(self): + return False + + def __int__(self): + return 0 + __long__ = __int__ + + def __float__(self): + return 0.0 + + def __str__(self): + return "" + + def __unicode__(self): + return u"" + + def __repr__(self): + return "<INVALID>" + +# invalid value is a constant singleton +INVALID_VALUE = _InvalidValue() + +# vim: set et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy3/__init__.py b/tablib/packages/dbfpy3/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/tablib/packages/dbfpy3/__init__.py diff --git a/tablib/packages/dbfpy3/dbf.py b/tablib/packages/dbfpy3/dbf.py new file mode 100644 index 0000000..42de8a4 --- /dev/null +++ b/tablib/packages/dbfpy3/dbf.py @@ -0,0 +1,293 @@ +#! /usr/bin/env python +"""DBF accessing helpers. + +FIXME: more documentation needed + +Examples: + + Create new table, setup structure, add records: + + dbf = Dbf(filename, new=True) + dbf.addField( + ("NAME", "C", 15), + ("SURNAME", "C", 25), + ("INITIALS", "C", 10), + ("BIRTHDATE", "D"), + ) + for (n, s, i, b) in ( + ("John", "Miller", "YC", (1980, 10, 11)), + ("Andy", "Larkin", "", (1980, 4, 11)), + ): + rec = dbf.newRecord() + rec["NAME"] = n + rec["SURNAME"] = s + rec["INITIALS"] = i + rec["BIRTHDATE"] = b + rec.store() + dbf.close() + + Open existed dbf, read some data: + + dbf = Dbf(filename, True) + for rec in dbf: + for fldName in dbf.fieldNames: + print '%s:\t %s (%s)' % (fldName, rec[fldName], + type(rec[fldName])) + print + dbf.close() + +""" +"""History (most recent first): +11-feb-2007 [als] export INVALID_VALUE; + Dbf: added .ignoreErrors, .INVALID_VALUE +04-jul-2006 [als] added export declaration +20-dec-2005 [yc] removed fromStream and newDbf methods: + use argument of __init__ call must be used instead; + added class fields pointing to the header and + record classes. +17-dec-2005 [yc] split to several modules; reimplemented +13-dec-2005 [yc] adapted to the changes of the `strutil` module. +13-sep-2002 [als] support FoxPro Timestamp datatype +15-nov-1999 [jjk] documentation updates, add demo +24-aug-1998 [jjk] add some encodeValue methods (not tested), other tweaks +08-jun-1998 [jjk] fix problems, add more features +20-feb-1998 [jjk] fix problems, add more features +19-feb-1998 [jjk] add create/write capabilities +18-feb-1998 [jjk] from dbfload.py +""" + +__version__ = "$Revision: 1.7 $"[11:-2] +__date__ = "$Date: 2007/02/11 09:23:13 $"[7:-2] +__author__ = "Jeff Kunce <kuncej@mail.conservation.state.mo.us>" + +__all__ = ["Dbf"] + +from . import header +from . import record +from .utils import INVALID_VALUE + +class Dbf(object): + """DBF accessor. + + FIXME: + docs and examples needed (dont' forget to tell + about problems adding new fields on the fly) + + Implementation notes: + ``_new`` field is used to indicate whether this is + a new data table. `addField` could be used only for + the new tables! If at least one record was appended + to the table it's structure couldn't be changed. + + """ + + __slots__ = ("name", "header", "stream", + "_changed", "_new", "_ignore_errors") + + HeaderClass = header.DbfHeader + RecordClass = record.DbfRecord + INVALID_VALUE = INVALID_VALUE + + ## initialization and creation helpers + + def __init__(self, f, readOnly=False, new=False, ignoreErrors=False): + """Initialize instance. + + Arguments: + f: + Filename or file-like object. + new: + True if new data table must be created. Assume + data table exists if this argument is False. + readOnly: + if ``f`` argument is a string file will + be opend in read-only mode; in other cases + this argument is ignored. This argument is ignored + even if ``new`` argument is True. + headerObj: + `header.DbfHeader` instance or None. If this argument + is None, new empty header will be used with the + all fields set by default. + ignoreErrors: + if set, failing field value conversion will return + ``INVALID_VALUE`` instead of raising conversion error. + + """ + if isinstance(f, str): + # a filename + self.name = f + if new: + # new table (table file must be + # created or opened and truncated) + self.stream = open(f, "w+b") + else: + # tabe file must exist + self.stream = open(f, ("r+b", "rb")[bool(readOnly)]) + else: + # a stream + self.name = getattr(f, "name", "") + self.stream = f + if new: + # if this is a new table, header will be empty + self.header = self.HeaderClass() + else: + # or instantiated using stream + self.header = self.HeaderClass.fromStream(self.stream) + self.ignoreErrors = ignoreErrors + self._new = bool(new) + self._changed = False + + ## properties + + closed = property(lambda self: self.stream.closed) + recordCount = property(lambda self: self.header.recordCount) + fieldNames = property( + lambda self: [_fld.name for _fld in self.header.fields]) + fieldDefs = property(lambda self: self.header.fields) + changed = property(lambda self: self._changed or self.header.changed) + + def ignoreErrors(self, value): + """Update `ignoreErrors` flag on the header object and self""" + self.header.ignoreErrors = self._ignore_errors = bool(value) + ignoreErrors = property( + lambda self: self._ignore_errors, + ignoreErrors, + doc="""Error processing mode for DBF field value conversion + + if set, failing field value conversion will return + ``INVALID_VALUE`` instead of raising conversion error. + + """) + + ## protected methods + + def _fixIndex(self, index): + """Return fixed index. + + This method fails if index isn't a numeric object + (long or int). Or index isn't in a valid range + (less or equal to the number of records in the db). + + If ``index`` is a negative number, it will be + treated as a negative indexes for list objects. + + Return: + Return value is numeric object maning valid index. + + """ + if not isinstance(index, int): + raise TypeError("Index must be a numeric object") + if index < 0: + # index from the right side + # fix it to the left-side index + index += len(self) + 1 + if index >= len(self): + raise IndexError("Record index out of range") + return index + + ## iterface methods + + def close(self): + self.flush() + self.stream.close() + + def flush(self): + """Flush data to the associated stream.""" + if self.changed: + self.header.setCurrentDate() + self.header.write(self.stream) + self.stream.flush() + self._changed = False + + def indexOfFieldName(self, name): + """Index of field named ``name``.""" + # FIXME: move this to header class + names = [f.name for f in self.header.fields] + return names.index(name.upper()) + + def newRecord(self): + """Return new record, which belong to this table.""" + return self.RecordClass(self) + + def append(self, record): + """Append ``record`` to the database.""" + record.index = self.header.recordCount + record._write() + self.header.recordCount += 1 + self._changed = True + self._new = False + + def addField(self, *defs): + """Add field definitions. + + For more information see `header.DbfHeader.addField`. + + """ + if self._new: + self.header.addField(*defs) + else: + raise TypeError("At least one record was added, " + "structure can't be changed") + + ## 'magic' methods (representation and sequence interface) + + def __repr__(self): + return "Dbf stream '%s'\n" % self.stream + repr(self.header) + + def __len__(self): + """Return number of records.""" + return self.recordCount + + def __getitem__(self, index): + """Return `DbfRecord` instance.""" + return self.RecordClass.fromStream(self, self._fixIndex(index)) + + def __setitem__(self, index, record): + """Write `DbfRecord` instance to the stream.""" + record.index = self._fixIndex(index) + record._write() + self._changed = True + self._new = False + + #def __del__(self): + # """Flush stream upon deletion of the object.""" + # self.flush() + + +def demoRead(filename): + _dbf = Dbf(filename, True) + for _rec in _dbf: + print() + print(repr(_rec)) + _dbf.close() + +def demoCreate(filename): + _dbf = Dbf(filename, new=True) + _dbf.addField( + ("NAME", "C", 15), + ("SURNAME", "C", 25), + ("INITIALS", "C", 10), + ("BIRTHDATE", "D"), + ) + for (_n, _s, _i, _b) in ( + ("John", "Miller", "YC", (1981, 1, 2)), + ("Andy", "Larkin", "AL", (1982, 3, 4)), + ("Bill", "Clinth", "", (1983, 5, 6)), + ("Bobb", "McNail", "", (1984, 7, 8)), + ): + _rec = _dbf.newRecord() + _rec["NAME"] = _n + _rec["SURNAME"] = _s + _rec["INITIALS"] = _i + _rec["BIRTHDATE"] = _b + _rec.store() + print(repr(_dbf)) + _dbf.close() + +if (__name__=='__main__'): + import sys + _name = len(sys.argv) > 1 and sys.argv[1] or "county.dbf" + demoCreate(_name) + demoRead(_name) + +# vim: set et sw=4 sts=4 : diff --git a/tablib/packages/dbfpy3/dbfnew.py b/tablib/packages/dbfpy3/dbfnew.py new file mode 100644 index 0000000..4051bc6 --- /dev/null +++ b/tablib/packages/dbfpy3/dbfnew.py @@ -0,0 +1,182 @@ +#!/usr/bin/python +""".DBF creation helpers. + +Note: this is a legacy interface. New code should use Dbf class + for table creation (see examples in dbf.py) + +TODO: + - handle Memo fields. + - check length of the fields accoring to the + `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` + +""" +"""History (most recent first) +04-jul-2006 [als] added export declaration; + updated for dbfpy 2.0 +15-dec-2005 [yc] define dbf_new.__slots__ +14-dec-2005 [yc] added vim modeline; retab'd; added doc-strings; + dbf_new now is a new class (inherited from object) +??-jun-2000 [--] added by Hans Fiby +""" + +__version__ = "$Revision: 1.4 $"[11:-2] +__date__ = "$Date: 2006/07/04 08:18:18 $"[7:-2] + +__all__ = ["dbf_new"] + +from .dbf import * +from .fields import * +from .header import * +from .record import * + +class _FieldDefinition(object): + """Field definition. + + This is a simple structure, which contains ``name``, ``type``, + ``len``, ``dec`` and ``cls`` fields. + + Objects also implement get/setitem magic functions, so fields + could be accessed via sequence iterface, where 'name' has + index 0, 'type' index 1, 'len' index 2, 'dec' index 3 and + 'cls' could be located at index 4. + + """ + + __slots__ = "name", "type", "len", "dec", "cls" + + # WARNING: be attentive - dictionaries are mutable! + FLD_TYPES = { + # type: (cls, len) + "C": (DbfCharacterFieldDef, None), + "N": (DbfNumericFieldDef, None), + "L": (DbfLogicalFieldDef, 1), + # FIXME: support memos + # "M": (DbfMemoFieldDef), + "D": (DbfDateFieldDef, 8), + # FIXME: I'm not sure length should be 14 characters! + # but temporary I use it, cuz date is 8 characters + # and time 6 (hhmmss) + "T": (DbfDateTimeFieldDef, 14), + } + + def __init__(self, name, type, len=None, dec=0): + _cls, _len = self.FLD_TYPES[type] + if _len is None: + if len is None: + raise ValueError("Field length must be defined") + _len = len + self.name = name + self.type = type + self.len = _len + self.dec = dec + self.cls = _cls + + def getDbfField(self): + "Return `DbfFieldDef` instance from the current definition." + return self.cls(self.name, self.len, self.dec) + + def appendToHeader(self, dbfh): + """Create a `DbfFieldDef` instance and append it to the dbf header. + + Arguments: + dbfh: `DbfHeader` instance. + + """ + _dbff = self.getDbfField() + dbfh.addField(_dbff) + + +class dbf_new(object): + """New .DBF creation helper. + + Example Usage: + + dbfn = dbf_new() + dbfn.add_field("name",'C',80) + dbfn.add_field("price",'N',10,2) + dbfn.add_field("date",'D',8) + dbfn.write("tst.dbf") + + Note: + This module cannot handle Memo-fields, + they are special. + + """ + + __slots__ = ("fields",) + + FieldDefinitionClass = _FieldDefinition + + def __init__(self): + self.fields = [] + + def add_field(self, name, typ, len, dec=0): + """Add field definition. + + Arguments: + name: + field name (str object). field name must not + contain ASCII NULs and it's length shouldn't + exceed 10 characters. + typ: + type of the field. this must be a single character + from the "CNLMDT" set meaning character, numeric, + logical, memo, date and date/time respectively. + len: + length of the field. this argument is used only for + the character and numeric fields. all other fields + have fixed length. + FIXME: use None as a default for this argument? + dec: + decimal precision. used only for the numric fields. + + """ + self.fields.append(self.FieldDefinitionClass(name, typ, len, dec)) + + def write(self, filename): + """Create empty .DBF file using current structure.""" + _dbfh = DbfHeader() + _dbfh.setCurrentDate() + for _fldDef in self.fields: + _fldDef.appendToHeader(_dbfh) + + _dbfStream = open(filename, "wb") + _dbfh.write(_dbfStream) + _dbfStream.close() + + +if (__name__=='__main__'): + # create a new DBF-File + dbfn=dbf_new() + dbfn.add_field("name",'C',80) + dbfn.add_field("price",'N',10,2) + dbfn.add_field("date",'D',8) + dbfn.write("tst.dbf") + # test new dbf + print("*** created tst.dbf: ***") + dbft = Dbf('tst.dbf', readOnly=0) + print(repr(dbft)) + # add a record + rec=DbfRecord(dbft) + rec['name']='something' + rec['price']=10.5 + rec['date']=(2000,1,12) + rec.store() + # add another record + rec=DbfRecord(dbft) + rec['name']='foo and bar' + rec['price']=12234 + rec['date']=(1992,7,15) + rec.store() + + # show the records + print("*** inserted 2 records into tst.dbf: ***") + print(repr(dbft)) + for i1 in range(len(dbft)): + rec = dbft[i1] + for fldName in dbft.fieldNames: + print('%s:\t %s'%(fldName, rec[fldName])) + print() + dbft.close() + +# vim: set et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy3/fields.py b/tablib/packages/dbfpy3/fields.py new file mode 100644 index 0000000..883d035 --- /dev/null +++ b/tablib/packages/dbfpy3/fields.py @@ -0,0 +1,467 @@ +"""DBF fields definitions. + +TODO: + - make memos work +""" +"""History (most recent first): +26-may-2009 [als] DbfNumericFieldDef.decodeValue: strip zero bytes +05-feb-2009 [als] DbfDateFieldDef.encodeValue: empty arg produces empty date +16-sep-2008 [als] DbfNumericFieldDef decoding looks for decimal point + in the value to select float or integer return type +13-mar-2008 [als] check field name length in constructor +11-feb-2007 [als] handle value conversion errors +10-feb-2007 [als] DbfFieldDef: added .rawFromRecord() +01-dec-2006 [als] Timestamp columns use None for empty values +31-oct-2006 [als] support field types 'F' (float), 'I' (integer) + and 'Y' (currency); + automate export and registration of field classes +04-jul-2006 [als] added export declaration +10-mar-2006 [als] decode empty values for Date and Logical fields; + show field name in errors +10-mar-2006 [als] fix Numeric value decoding: according to spec, + value always is string representation of the number; + ensure that encoded Numeric value fits into the field +20-dec-2005 [yc] use field names in upper case +15-dec-2005 [yc] field definitions moved from `dbf`. +""" + +__version__ = "$Revision: 1.14 $"[11:-2] +__date__ = "$Date: 2009/05/26 05:16:51 $"[7:-2] + +__all__ = ["lookupFor",] # field classes added at the end of the module + +import datetime +import struct +import sys + +from . import utils + +## abstract definitions + +class DbfFieldDef(object): + """Abstract field definition. + + Child classes must override ``type`` class attribute to provide datatype + infromation of the field definition. For more info about types visit + `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` + + Also child classes must override ``defaultValue`` field to provide + default value for the field value. + + If child class has fixed length ``length`` class attribute must be + overriden and set to the valid value. None value means, that field + isn't of fixed length. + + Note: ``name`` field must not be changed after instantiation. + + """ + + + __slots__ = ("name", "decimalCount", + "start", "end", "ignoreErrors") + + # length of the field, None in case of variable-length field, + # or a number if this field is a fixed-length field + length = None + + # field type. for more information about fields types visit + # `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` + # must be overriden in child classes + typeCode = None + + # default value for the field. this field must be + # overriden in child classes + defaultValue = None + + def __init__(self, name, length=None, decimalCount=None, + start=None, stop=None, ignoreErrors=False, + ): + """Initialize instance.""" + assert self.typeCode is not None, "Type code must be overriden" + assert self.defaultValue is not None, "Default value must be overriden" + ## fix arguments + if len(name) >10: + raise ValueError("Field name \"%s\" is too long" % name) + name = str(name).upper() + if self.__class__.length is None: + if length is None: + raise ValueError("[%s] Length isn't specified" % name) + length = int(length) + if length <= 0: + raise ValueError("[%s] Length must be a positive integer" + % name) + else: + length = self.length + if decimalCount is None: + decimalCount = 0 + ## set fields + self.name = name + # FIXME: validate length according to the specification at + # http://www.clicketyclick.dk/databases/xbase/format/data_types.html + self.length = length + self.decimalCount = decimalCount + self.ignoreErrors = ignoreErrors + self.start = start + self.end = stop + + def __cmp__(self, other): + return cmp(self.name, str(other).upper()) + + def __hash__(self): + return hash(self.name) + + def fromString(cls, string, start, ignoreErrors=False): + """Decode dbf field definition from the string data. + + Arguments: + string: + a string, dbf definition is decoded from. length of + the string must be 32 bytes. + start: + position in the database file. + ignoreErrors: + initial error processing mode for the new field (boolean) + + """ + assert len(string) == 32 + _length = string[16] + return cls(utils.unzfill(string)[:11].decode('utf-8'), _length, + string[17], start, start + _length, ignoreErrors=ignoreErrors) + fromString = classmethod(fromString) + + def toString(self): + """Return encoded field definition. + + Return: + Return value is a string object containing encoded + definition of this field. + + """ + if sys.version_info < (2, 4): + # earlier versions did not support padding character + _name = self.name[:11] + "\0" * (11 - len(self.name)) + else: + _name = self.name.ljust(11, '\0') + return ( + _name + + self.typeCode + + #data address + chr(0) * 4 + + chr(self.length) + + chr(self.decimalCount) + + chr(0) * 14 + ) + + def __repr__(self): + return "%-10s %1s %3d %3d" % self.fieldInfo() + + def fieldInfo(self): + """Return field information. + + Return: + Return value is a (name, type, length, decimals) tuple. + + """ + return (self.name, self.typeCode, self.length, self.decimalCount) + + def rawFromRecord(self, record): + """Return a "raw" field value from the record string.""" + return record[self.start:self.end] + + def decodeFromRecord(self, record): + """Return decoded field value from the record string.""" + try: + return self.decodeValue(self.rawFromRecord(record)) + except: + if self.ignoreErrors: + return utils.INVALID_VALUE + else: + raise + + def decodeValue(self, value): + """Return decoded value from string value. + + This method shouldn't be used publicly. It's called from the + `decodeFromRecord` method. + + This is an abstract method and it must be overridden in child classes. + """ + raise NotImplementedError + + def encodeValue(self, value): + """Return str object containing encoded field value. + + This is an abstract method and it must be overriden in child classes. + """ + raise NotImplementedError + +## real classes + +class DbfCharacterFieldDef(DbfFieldDef): + """Definition of the character field.""" + + typeCode = "C" + defaultValue = b'' + + def decodeValue(self, value): + """Return string object. + + Return value is a ``value`` argument with stripped right spaces. + + """ + return value.rstrip(b' ').decode('utf-8') + + def encodeValue(self, value): + """Return raw data string encoded from a ``value``.""" + return str(value)[:self.length].ljust(self.length) + + +class DbfNumericFieldDef(DbfFieldDef): + """Definition of the numeric field.""" + + typeCode = "N" + # XXX: now I'm not sure it was a good idea to make a class field + # `defaultValue` instead of a generic method as it was implemented + # previously -- it's ok with all types except number, cuz + # if self.decimalCount is 0, we should return 0 and 0.0 otherwise. + defaultValue = 0 + + def decodeValue(self, value): + """Return a number decoded from ``value``. + + If decimals is zero, value will be decoded as an integer; + or as a float otherwise. + + Return: + Return value is a int (long) or float instance. + + """ + value = value.strip(b' \0') + if b'.' in value: + # a float (has decimal separator) + return float(value) + elif value: + # must be an integer + return int(value) + else: + return 0 + + def encodeValue(self, value): + """Return string containing encoded ``value``.""" + _rv = ("%*.*f" % (self.length, self.decimalCount, value)) + if len(_rv) > self.length: + _ppos = _rv.find(".") + if 0 <= _ppos <= self.length: + _rv = _rv[:self.length] + else: + raise ValueError("[%s] Numeric overflow: %s (field width: %i)" + % (self.name, _rv, self.length)) + return _rv + +class DbfFloatFieldDef(DbfNumericFieldDef): + """Definition of the float field - same as numeric.""" + + typeCode = "F" + +class DbfIntegerFieldDef(DbfFieldDef): + """Definition of the integer field.""" + + typeCode = "I" + length = 4 + defaultValue = 0 + + def decodeValue(self, value): + """Return an integer number decoded from ``value``.""" + return struct.unpack("<i", value)[0] + + def encodeValue(self, value): + """Return string containing encoded ``value``.""" + return struct.pack("<i", int(value)) + +class DbfCurrencyFieldDef(DbfFieldDef): + """Definition of the currency field.""" + + typeCode = "Y" + length = 8 + defaultValue = 0.0 + + def decodeValue(self, value): + """Return float number decoded from ``value``.""" + return struct.unpack("<q", value)[0] / 10000. + + def encodeValue(self, value): + """Return string containing encoded ``value``.""" + return struct.pack("<q", round(value * 10000)) + +class DbfLogicalFieldDef(DbfFieldDef): + """Definition of the logical field.""" + + typeCode = "L" + defaultValue = -1 + length = 1 + + def decodeValue(self, value): + """Return True, False or -1 decoded from ``value``.""" + # Note: value always is 1-char string + if value == "?": + return -1 + if value in "NnFf ": + return False + if value in "YyTt": + return True + raise ValueError("[%s] Invalid logical value %r" % (self.name, value)) + + def encodeValue(self, value): + """Return a character from the "TF?" set. + + Return: + Return value is "T" if ``value`` is True + "?" if value is -1 or False otherwise. + + """ + if value is True: + return "T" + if value == -1: + return "?" + return "F" + + +class DbfMemoFieldDef(DbfFieldDef): + """Definition of the memo field. + + Note: memos aren't currenly completely supported. + + """ + + typeCode = "M" + defaultValue = " " * 10 + length = 10 + + def decodeValue(self, value): + """Return int .dbt block number decoded from the string object.""" + #return int(value) + raise NotImplementedError + + def encodeValue(self, value): + """Return raw data string encoded from a ``value``. + + Note: this is an internal method. + + """ + #return str(value)[:self.length].ljust(self.length) + raise NotImplementedError + + +class DbfDateFieldDef(DbfFieldDef): + """Definition of the date field.""" + + typeCode = "D" + defaultValue = utils.classproperty(lambda cls: datetime.date.today()) + # "yyyymmdd" gives us 8 characters + length = 8 + + def decodeValue(self, value): + """Return a ``datetime.date`` instance decoded from ``value``.""" + if value.strip(): + return utils.getDate(value) + else: + return None + + def encodeValue(self, value): + """Return a string-encoded value. + + ``value`` argument should be a value suitable for the + `utils.getDate` call. + + Return: + Return value is a string in format "yyyymmdd". + + """ + if value: + return utils.getDate(value).strftime("%Y%m%d") + else: + return " " * self.length + + +class DbfDateTimeFieldDef(DbfFieldDef): + """Definition of the timestamp field.""" + + # a difference between JDN (Julian Day Number) + # and GDN (Gregorian Day Number). note, that GDN < JDN + JDN_GDN_DIFF = 1721425 + typeCode = "T" + defaultValue = utils.classproperty(lambda cls: datetime.datetime.now()) + # two 32-bits integers representing JDN and amount of + # milliseconds respectively gives us 8 bytes. + # note, that values must be encoded in LE byteorder. + length = 8 + + def decodeValue(self, value): + """Return a `datetime.datetime` instance.""" + assert len(value) == self.length + # LE byteorder + _jdn, _msecs = struct.unpack("<2I", value) + if _jdn >= 1: + _rv = datetime.datetime.fromordinal(_jdn - self.JDN_GDN_DIFF) + _rv += datetime.timedelta(0, _msecs / 1000.0) + else: + # empty date + _rv = None + return _rv + + def encodeValue(self, value): + """Return a string-encoded ``value``.""" + if value: + value = utils.getDateTime(value) + # LE byteorder + _rv = struct.pack("<2I", value.toordinal() + self.JDN_GDN_DIFF, + (value.hour * 3600 + value.minute * 60 + value.second) * 1000) + else: + _rv = "\0" * self.length + assert len(_rv) == self.length + return _rv + + +_fieldsRegistry = {} + +def registerField(fieldCls): + """Register field definition class. + + ``fieldCls`` should be subclass of the `DbfFieldDef`. + + Use `lookupFor` to retrieve field definition class + by the type code. + + """ + assert fieldCls.typeCode is not None, "Type code isn't defined" + # XXX: use fieldCls.typeCode.upper()? in case of any decign + # don't forget to look to the same comment in ``lookupFor`` method + _fieldsRegistry[fieldCls.typeCode] = fieldCls + + +def lookupFor(typeCode): + """Return field definition class for the given type code. + + ``typeCode`` must be a single character. That type should be + previously registered. + + Use `registerField` to register new field class. + + Return: + Return value is a subclass of the `DbfFieldDef`. + + """ + # XXX: use typeCode.upper()? in case of any decign don't + # forget to look to the same comment in ``registerField`` + return _fieldsRegistry[chr(typeCode)] + +## register generic types + +for (_name, _val) in list(globals().items()): + if isinstance(_val, type) and issubclass(_val, DbfFieldDef) \ + and (_name != "DbfFieldDef"): + __all__.append(_name) + registerField(_val) +del _name, _val + +# vim: et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy3/header.py b/tablib/packages/dbfpy3/header.py new file mode 100644 index 0000000..6c0dc4f --- /dev/null +++ b/tablib/packages/dbfpy3/header.py @@ -0,0 +1,273 @@ +"""DBF header definition. + +TODO: + - handle encoding of the character fields + (encoding information stored in the DBF header) + +""" +"""History (most recent first): +16-sep-2010 [als] fromStream: fix century of the last update field +11-feb-2007 [als] added .ignoreErrors +10-feb-2007 [als] added __getitem__: return field definitions + by field name or field number (zero-based) +04-jul-2006 [als] added export declaration +15-dec-2005 [yc] created +""" + +__version__ = "$Revision: 1.6 $"[11:-2] +__date__ = "$Date: 2010/09/16 05:06:39 $"[7:-2] + +__all__ = ["DbfHeader"] + +import io +import datetime +import struct +import time +import sys + +from . import fields +from .utils import getDate + + +class DbfHeader(object): + """Dbf header definition. + + For more information about dbf header format visit + `http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_STRUCT` + + Examples: + Create an empty dbf header and add some field definitions: + dbfh = DbfHeader() + dbfh.addField(("name", "C", 10)) + dbfh.addField(("date", "D")) + dbfh.addField(DbfNumericFieldDef("price", 5, 2)) + Create a dbf header with field definitions: + dbfh = DbfHeader([ + ("name", "C", 10), + ("date", "D"), + DbfNumericFieldDef("price", 5, 2), + ]) + + """ + + __slots__ = ("signature", "fields", "lastUpdate", "recordLength", + "recordCount", "headerLength", "changed", "_ignore_errors") + + ## instance construction and initialization methods + + def __init__(self, fields=None, headerLength=0, recordLength=0, + recordCount=0, signature=0x03, lastUpdate=None, ignoreErrors=False, + ): + """Initialize instance. + + Arguments: + fields: + a list of field definitions; + recordLength: + size of the records; + headerLength: + size of the header; + recordCount: + number of records stored in DBF; + signature: + version number (aka signature). using 0x03 as a default meaning + "File without DBT". for more information about this field visit + ``http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_NOTE_1_TARGET`` + lastUpdate: + date of the DBF's update. this could be a string ('yymmdd' or + 'yyyymmdd'), timestamp (int or float), datetime/date value, + a sequence (assuming (yyyy, mm, dd, ...)) or an object having + callable ``ticks`` field. + ignoreErrors: + error processing mode for DBF fields (boolean) + + """ + self.signature = signature + if fields is None: + self.fields = [] + else: + self.fields = list(fields) + self.lastUpdate = getDate(lastUpdate) + self.recordLength = recordLength + self.headerLength = headerLength + self.recordCount = recordCount + self.ignoreErrors = ignoreErrors + # XXX: I'm not sure this is safe to + # initialize `self.changed` in this way + self.changed = bool(self.fields) + + # @classmethod + def fromString(cls, string): + """Return header instance from the string object.""" + return cls.fromStream(io.StringIO(str(string))) + fromString = classmethod(fromString) + + # @classmethod + def fromStream(cls, stream): + """Return header object from the stream.""" + stream.seek(0) + first_32 = stream.read(32) + if type(first_32) != bytes: + _data = bytes(first_32, sys.getfilesystemencoding()) + _data = first_32 + (_cnt, _hdrLen, _recLen) = struct.unpack("<I2H", _data[4:12]) + #reserved = _data[12:32] + _year = _data[1] + if _year < 80: + # dBase II started at 1980. It is quite unlikely + # that actual last update date is before that year. + _year += 2000 + else: + _year += 1900 + ## create header object + _obj = cls(None, _hdrLen, _recLen, _cnt, _data[0], + (_year, _data[2], _data[3])) + ## append field definitions + # position 0 is for the deletion flag + _pos = 1 + _data = stream.read(1) + while _data != b'\r': + _data += stream.read(31) + _fld = fields.lookupFor(_data[11]).fromString(_data, _pos) + _obj._addField(_fld) + _pos = _fld.end + _data = stream.read(1) + return _obj + fromStream = classmethod(fromStream) + + ## properties + + year = property(lambda self: self.lastUpdate.year) + month = property(lambda self: self.lastUpdate.month) + day = property(lambda self: self.lastUpdate.day) + + def ignoreErrors(self, value): + """Update `ignoreErrors` flag on self and all fields""" + self._ignore_errors = value = bool(value) + for _field in self.fields: + _field.ignoreErrors = value + ignoreErrors = property( + lambda self: self._ignore_errors, + ignoreErrors, + doc="""Error processing mode for DBF field value conversion + + if set, failing field value conversion will return + ``INVALID_VALUE`` instead of raising conversion error. + + """) + + ## object representation + + def __repr__(self): + _rv = """\ +Version (signature): 0x%02x + Last update: %s + Header length: %d + Record length: %d + Record count: %d + FieldName Type Len Dec +""" % (self.signature, self.lastUpdate, self.headerLength, + self.recordLength, self.recordCount) + _rv += "\n".join( + ["%10s %4s %3s %3s" % _fld.fieldInfo() for _fld in self.fields] + ) + return _rv + + ## internal methods + + def _addField(self, *defs): + """Internal variant of the `addField` method. + + This method doesn't set `self.changed` field to True. + + Return value is a length of the appended records. + Note: this method doesn't modify ``recordLength`` and + ``headerLength`` fields. Use `addField` instead of this + method if you don't exactly know what you're doing. + + """ + # insure we have dbf.DbfFieldDef instances first (instantiation + # from the tuple could raise an error, in such a case I don't + # wanna add any of the definitions -- all will be ignored) + _defs = [] + _recordLength = 0 + for _def in defs: + if isinstance(_def, fields.DbfFieldDef): + _obj = _def + else: + (_name, _type, _len, _dec) = (tuple(_def) + (None,) * 4)[:4] + _cls = fields.lookupFor(_type) + _obj = _cls(_name, _len, _dec, + ignoreErrors=self._ignore_errors) + _recordLength += _obj.length + _defs.append(_obj) + # and now extend field definitions and + # update record length + self.fields += _defs + return _recordLength + + ## interface methods + + def addField(self, *defs): + """Add field definition to the header. + + Examples: + dbfh.addField( + ("name", "C", 20), + dbf.DbfCharacterFieldDef("surname", 20), + dbf.DbfDateFieldDef("birthdate"), + ("member", "L"), + ) + dbfh.addField(("price", "N", 5, 2)) + dbfh.addField(dbf.DbfNumericFieldDef("origprice", 5, 2)) + + """ + _oldLen = self.recordLength + self.recordLength += self._addField(*defs) + if not _oldLen: + self.recordLength += 1 + # XXX: may be just use: + # self.recordeLength += self._addField(*defs) + bool(not _oldLen) + # recalculate headerLength + self.headerLength = 32 + (32 * len(self.fields)) + 1 + self.changed = True + + def write(self, stream): + """Encode and write header to the stream.""" + stream.seek(0) + stream.write(self.toString()) + fields = [_fld.toString() for _fld in self.fields] + stream.write(''.join(fields).encode(sys.getfilesystemencoding())) + stream.write(b'\x0D') # cr at end of all header data + self.changed = False + + def toString(self): + """Returned 32 chars length string with encoded header.""" + return struct.pack("<4BI2H", + self.signature, + self.year - 1900, + self.month, + self.day, + self.recordCount, + self.headerLength, + self.recordLength) + (b'\x00' * 20) + #TODO: figure out if bytes(utf-8) is correct here. + + def setCurrentDate(self): + """Update ``self.lastUpdate`` field with current date value.""" + self.lastUpdate = datetime.date.today() + + def __getitem__(self, item): + """Return a field definition by numeric index or name string""" + if isinstance(item, str): + _name = item.upper() + for _field in self.fields: + if _field.name == _name: + return _field + else: + raise KeyError(item) + else: + # item must be field index + return self.fields[item] + +# vim: et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy3/record.py b/tablib/packages/dbfpy3/record.py new file mode 100644 index 0000000..73b6952 --- /dev/null +++ b/tablib/packages/dbfpy3/record.py @@ -0,0 +1,266 @@ +"""DBF record definition. + +""" +"""History (most recent first): +11-feb-2007 [als] __repr__: added special case for invalid field values +10-feb-2007 [als] added .rawFromStream() +30-oct-2006 [als] fix record length in .fromStream() +04-jul-2006 [als] added export declaration +20-dec-2005 [yc] DbfRecord.write() -> DbfRecord._write(); + added delete() method. +16-dec-2005 [yc] record definition moved from `dbf`. +""" + +__version__ = "$Revision: 1.7 $"[11:-2] +__date__ = "$Date: 2007/02/11 09:05:49 $"[7:-2] + +__all__ = ["DbfRecord"] + +import sys + +from . import utils + +class DbfRecord(object): + """DBF record. + + Instances of this class shouldn't be created manualy, + use `dbf.Dbf.newRecord` instead. + + Class implements mapping/sequence interface, so + fields could be accessed via their names or indexes + (names is a preffered way to access fields). + + Hint: + Use `store` method to save modified record. + + Examples: + Add new record to the database: + db = Dbf(filename) + rec = db.newRecord() + rec["FIELD1"] = value1 + rec["FIELD2"] = value2 + rec.store() + Or the same, but modify existed + (second in this case) record: + db = Dbf(filename) + rec = db[2] + rec["FIELD1"] = value1 + rec["FIELD2"] = value2 + rec.store() + + """ + + __slots__ = "dbf", "index", "deleted", "fieldData" + + ## creation and initialization + + def __init__(self, dbf, index=None, deleted=False, data=None): + """Instance initialiation. + + Arguments: + dbf: + A `Dbf.Dbf` instance this record belonogs to. + index: + An integer record index or None. If this value is + None, record will be appended to the DBF. + deleted: + Boolean flag indicating whether this record + is a deleted record. + data: + A sequence or None. This is a data of the fields. + If this argument is None, default values will be used. + + """ + self.dbf = dbf + # XXX: I'm not sure ``index`` is necessary + self.index = index + self.deleted = deleted + if data is None: + self.fieldData = [_fd.defaultValue for _fd in dbf.header.fields] + else: + self.fieldData = list(data) + + # XXX: validate self.index before calculating position? + position = property(lambda self: self.dbf.header.headerLength + \ + self.index * self.dbf.header.recordLength) + + def rawFromStream(cls, dbf, index): + """Return raw record contents read from the stream. + + Arguments: + dbf: + A `Dbf.Dbf` instance containing the record. + index: + Index of the record in the records' container. + This argument can't be None in this call. + + Return value is a string containing record data in DBF format. + + """ + # XXX: may be write smth assuming, that current stream + # position is the required one? it could save some + # time required to calculate where to seek in the file + dbf.stream.seek(dbf.header.headerLength + + index * dbf.header.recordLength) + return dbf.stream.read(dbf.header.recordLength) + rawFromStream = classmethod(rawFromStream) + + def fromStream(cls, dbf, index): + """Return a record read from the stream. + + Arguments: + dbf: + A `Dbf.Dbf` instance new record should belong to. + index: + Index of the record in the records' container. + This argument can't be None in this call. + + Return value is an instance of the current class. + + """ + return cls.fromString(dbf, cls.rawFromStream(dbf, index), index) + fromStream = classmethod(fromStream) + + def fromString(cls, dbf, string, index=None): + """Return record read from the string object. + + Arguments: + dbf: + A `Dbf.Dbf` instance new record should belong to. + string: + A string new record should be created from. + index: + Index of the record in the container. If this + argument is None, record will be appended. + + Return value is an instance of the current class. + + """ + return cls(dbf, index, string[0]=="*", + [_fd.decodeFromRecord(string) for _fd in dbf.header.fields]) + fromString = classmethod(fromString) + + ## object representation + + def __repr__(self): + _template = "%%%ds: %%s (%%s)" % max([len(_fld) + for _fld in self.dbf.fieldNames]) + _rv = [] + for _fld in self.dbf.fieldNames: + _val = self[_fld] + if _val is utils.INVALID_VALUE: + _rv.append(_template % + (_fld, "None", "value cannot be decoded")) + else: + _rv.append(_template % (_fld, _val, type(_val))) + return "\n".join(_rv) + + ## protected methods + + def _write(self): + """Write data to the dbf stream. + + Note: + This isn't a public method, it's better to + use 'store' instead publically. + Be design ``_write`` method should be called + only from the `Dbf` instance. + + + """ + self._validateIndex(False) + self.dbf.stream.seek(self.position) + self.dbf.stream.write(bytes(self.toString(), + sys.getfilesystemencoding())) + # FIXME: may be move this write somewhere else? + # why we should check this condition for each record? + if self.index == len(self.dbf): + # this is the last record, + # we should write SUB (ASCII 26) + self.dbf.stream.write(b"\x1A") + + ## utility methods + + def _validateIndex(self, allowUndefined=True, checkRange=False): + """Valid ``self.index`` value. + + If ``allowUndefined`` argument is True functions does nothing + in case of ``self.index`` pointing to None object. + + """ + if self.index is None: + if not allowUndefined: + raise ValueError("Index is undefined") + elif self.index < 0: + raise ValueError("Index can't be negative (%s)" % self.index) + elif checkRange and self.index <= self.dbf.header.recordCount: + raise ValueError("There are only %d records in the DBF" % + self.dbf.header.recordCount) + + ## interface methods + + def store(self): + """Store current record in the DBF. + + If ``self.index`` is None, this record will be appended to the + records of the DBF this records belongs to; or replaced otherwise. + + """ + self._validateIndex() + if self.index is None: + self.index = len(self.dbf) + self.dbf.append(self) + else: + self.dbf[self.index] = self + + def delete(self): + """Mark method as deleted.""" + self.deleted = True + + def toString(self): + """Return string packed record values.""" +# for (_def, _dat) in zip(self.dbf.header.fields, self.fieldData): +# + + return "".join([" *"[self.deleted]] + [ + _def.encodeValue(_dat) + for (_def, _dat) in zip(self.dbf.header.fields, self.fieldData) + ]) + + def asList(self): + """Return a flat list of fields. + + Note: + Change of the list's values won't change + real values stored in this object. + + """ + return self.fieldData[:] + + def asDict(self): + """Return a dictionary of fields. + + Note: + Change of the dicts's values won't change + real values stored in this object. + + """ + return dict([_i for _i in zip(self.dbf.fieldNames, self.fieldData)]) + + def __getitem__(self, key): + """Return value by field name or field index.""" + if isinstance(key, int): + # integer index of the field + return self.fieldData[key] + # assuming string field name + return self.fieldData[self.dbf.indexOfFieldName(key)] + + def __setitem__(self, key, value): + """Set field value by integer index of the field or string name.""" + if isinstance(key, int): + # integer index of the field + return self.fieldData[key] + # assuming string field name + self.fieldData[self.dbf.indexOfFieldName(key)] = value + +# vim: et sts=4 sw=4 : diff --git a/tablib/packages/dbfpy3/utils.py b/tablib/packages/dbfpy3/utils.py new file mode 100644 index 0000000..856ade8 --- /dev/null +++ b/tablib/packages/dbfpy3/utils.py @@ -0,0 +1,170 @@ +"""String utilities. + +TODO: + - allow strings in getDateTime routine; +""" +"""History (most recent first): +11-feb-2007 [als] added INVALID_VALUE +10-feb-2007 [als] allow date strings padded with spaces instead of zeroes +20-dec-2005 [yc] handle long objects in getDate/getDateTime +16-dec-2005 [yc] created from ``strutil`` module. +""" + +__version__ = "$Revision: 1.4 $"[11:-2] +__date__ = "$Date: 2007/02/11 08:57:17 $"[7:-2] + +import datetime +import time + + +def unzfill(str): + """Return a string without ASCII NULs. + + This function searchers for the first NUL (ASCII 0) occurance + and truncates string till that position. + + """ + try: + return str[:str.index(b'\0')] + except ValueError: + return str + + +def getDate(date=None): + """Return `datetime.date` instance. + + Type of the ``date`` argument could be one of the following: + None: + use current date value; + datetime.date: + this value will be returned; + datetime.datetime: + the result of the date.date() will be returned; + string: + assuming "%Y%m%d" or "%y%m%dd" format; + number: + assuming it's a timestamp (returned for example + by the time.time() call; + sequence: + assuming (year, month, day, ...) sequence; + + Additionaly, if ``date`` has callable ``ticks`` attribute, + it will be used and result of the called would be treated + as a timestamp value. + + """ + if date is None: + # use current value + return datetime.date.today() + if isinstance(date, datetime.date): + return date + if isinstance(date, datetime.datetime): + return date.date() + if isinstance(date, (int, float)): + # date is a timestamp + return datetime.date.fromtimestamp(date) + if isinstance(date, str): + date = date.replace(" ", "0") + if len(date) == 6: + # yymmdd + return datetime.date(*time.strptime(date, "%y%m%d")[:3]) + # yyyymmdd + return datetime.date(*time.strptime(date, "%Y%m%d")[:3]) + if hasattr(date, "__getitem__"): + # a sequence (assuming date/time tuple) + return datetime.date(*date[:3]) + return datetime.date.fromtimestamp(date.ticks()) + + +def getDateTime(value=None): + """Return `datetime.datetime` instance. + + Type of the ``value`` argument could be one of the following: + None: + use current date value; + datetime.date: + result will be converted to the `datetime.datetime` instance + using midnight; + datetime.datetime: + ``value`` will be returned as is; + string: + *** CURRENTLY NOT SUPPORTED ***; + number: + assuming it's a timestamp (returned for example + by the time.time() call; + sequence: + assuming (year, month, day, ...) sequence; + + Additionaly, if ``value`` has callable ``ticks`` attribute, + it will be used and result of the called would be treated + as a timestamp value. + + """ + if value is None: + # use current value + return datetime.datetime.today() + if isinstance(value, datetime.datetime): + return value + if isinstance(value, datetime.date): + return datetime.datetime.fromordinal(value.toordinal()) + if isinstance(value, (int, float)): + # value is a timestamp + return datetime.datetime.fromtimestamp(value) + if isinstance(value, str): + raise NotImplementedError("Strings aren't currently implemented") + if hasattr(value, "__getitem__"): + # a sequence (assuming date/time tuple) + return datetime.datetime(*tuple(value)[:6]) + return datetime.datetime.fromtimestamp(value.ticks()) + + +class classproperty(property): + """Works in the same way as a ``property``, but for the classes.""" + + def __get__(self, obj, cls): + return self.fget(cls) + + +class _InvalidValue(object): + + """Value returned from DBF records when field validation fails + + The value is not equal to anything except for itself + and equal to all empty values: None, 0, empty string etc. + In other words, invalid value is equal to None and not equal + to None at the same time. + + This value yields zero upon explicit conversion to a number type, + empty string for string types, and False for boolean. + + """ + + def __eq__(self, other): + return not other + + def __ne__(self, other): + return not (other is self) + + def __bool__(self): + return False + + def __int__(self): + return 0 + __long__ = __int__ + + def __float__(self): + return 0.0 + + def __str__(self): + return "" + + def __unicode__(self): + return "" + + def __repr__(self): + return "<INVALID>" + +# invalid value is a constant singleton +INVALID_VALUE = _InvalidValue() + +# vim: set et sts=4 sw=4 : diff --git a/tablib/packages/unicodecsv/__init__.py b/tablib/packages/unicodecsv/__init__.py index e640987..6a20118 100644 --- a/tablib/packages/unicodecsv/__init__.py +++ b/tablib/packages/unicodecsv/__init__.py @@ -1,22 +1,65 @@ # -*- coding: utf-8 -*- import csv -from csv import * +try: + from itertools import izip +except ImportError: + izip = zip #http://semver.org/ -VERSION = (0, 8, 0) +VERSION = (0, 10, 1) __version__ = ".".join(map(str,VERSION)) -def _stringify(s, encoding): - if type(s)==unicode: - return s.encode(encoding) +pass_throughs = [ + 'register_dialect', + 'unregister_dialect', + 'get_dialect', + 'list_dialects', + 'field_size_limit', + 'Dialect', + 'excel', + 'excel_tab', + 'Sniffer', + 'QUOTE_ALL', + 'QUOTE_MINIMAL', + 'QUOTE_NONNUMERIC', + 'QUOTE_NONE', + 'Error' +] +__all__ = [ + 'reader', + 'writer', + 'DictReader', + 'DictWriter', +] + pass_throughs + +for prop in pass_throughs: + globals()[prop]=getattr(csv, prop) + +def _stringify(s, encoding, errors): + if s is None: + return '' + if isinstance(s, unicode): + return s.encode(encoding, errors) elif isinstance(s, (int , float)): pass #let csv.QUOTE_NONNUMERIC do its thing. - elif type(s) != str: + elif not isinstance(s, str): s=str(s) return s -def _stringify_list(l, encoding): - return [_stringify(s, encoding) for s in l] +def _stringify_list(l, encoding, errors='strict'): + try: + return [_stringify(s, encoding, errors) for s in iter(l)] + except TypeError as e: + raise csv.Error(str(e)) + +def _unicodify(s, encoding): + if s is None: + return None + if isinstance(s, (unicode, int, float)): + return s + elif isinstance(s, str): + return s.decode(encoding) + return s class UnicodeWriter(object): """ @@ -28,78 +71,127 @@ class UnicodeWriter(object): >>> f.seek(0) >>> r = unicodecsv.reader(f, encoding='utf-8') >>> row = r.next() - >>> print row[0], row[1] - é ñ + >>> row[0] == u'é' + True + >>> row[1] == u'ñ' + True """ - def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): - self.writer = csv.writer(f) - self.dialect = dialect + def __init__(self, f, dialect=csv.excel, encoding='utf-8', errors='strict', + *args, **kwds): self.encoding = encoding - self.writer = csv.writer(f, dialect=dialect, **kwds) + self.writer = csv.writer(f, dialect, *args, **kwds) + self.encoding_errors = errors def writerow(self, row): - self.writer.writerow(_stringify_list(row, self.encoding)) + self.writer.writerow(_stringify_list(row, self.encoding, self.encoding_errors)) def writerows(self, rows): for row in rows: self.writerow(row) + + @property + def dialect(self): + return self.writer.dialect writer = UnicodeWriter class UnicodeReader(object): - def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): - self.reader = csv.reader(f, dialect=dialect, **kwds) + def __init__(self, f, dialect=None, encoding='utf-8', errors='strict', + **kwds): + format_params = ['delimiter', 'doublequote', 'escapechar', 'lineterminator', 'quotechar', 'quoting', 'skipinitialspace'] + if dialect is None: + if not any([kwd_name in format_params for kwd_name in kwds.keys()]): + dialect = csv.excel + self.reader = csv.reader(f, dialect, **kwds) self.encoding = encoding + self.encoding_errors = errors def next(self): row = self.reader.next() - return [unicode(s, self.encoding) for s in row] + encoding = self.encoding + encoding_errors = self.encoding_errors + float_ = float + unicode_ = unicode + return [(value if isinstance(value, float_) else + unicode_(value, encoding, encoding_errors)) for value in row] def __iter__(self): return self + + @property + def dialect(self): + return self.reader.dialect + + @property + def line_num(self): + return self.reader.line_num reader = UnicodeReader class DictWriter(csv.DictWriter): """ >>> from cStringIO import StringIO >>> f = StringIO() - >>> w = DictWriter(f, ['a', 'b'], restval=u'î') - >>> w.writerow({'a':'1'}) - >>> w.writerow({'a':'1', 'b':u'ø'}) - >>> w.writerow({'a':u'é'}) + >>> w = DictWriter(f, ['a', u'ñ', 'b'], restval=u'î') + >>> w.writerow({'a':'1', u'ñ':'2'}) + >>> w.writerow({'a':'1', u'ñ':'2', 'b':u'ø'}) + >>> w.writerow({'a':u'é', u'ñ':'2'}) >>> f.seek(0) - >>> r = DictReader(f, fieldnames=['a'], restkey='r') - >>> r.next() == {'a':u'1', 'r':[u"î"]} + >>> r = DictReader(f, fieldnames=['a', u'ñ'], restkey='r') + >>> r.next() == {'a': u'1', u'ñ':'2', 'r': [u'î']} True - >>> r.next() == {'a':u'1', 'r':[u"ø"]} + >>> r.next() == {'a': u'1', u'ñ':'2', 'r': [u'\xc3\xb8']} + True + >>> r.next() == {'a': u'\xc3\xa9', u'ñ':'2', 'r': [u'\xc3\xae']} True - >>> r.next() == {'a':u'é', 'r':[u"î"]} """ - def __init__(self, csvfile, fieldnames, restval='', extrasaction='raise', dialect='excel', encoding='utf-8', *args, **kwds): - self.fieldnames = fieldnames + def __init__(self, csvfile, fieldnames, restval='', extrasaction='raise', dialect='excel', encoding='utf-8', errors='strict', *args, **kwds): self.encoding = encoding - self.restval = restval - self.writer = csv.DictWriter(csvfile, fieldnames, restval, extrasaction, dialect, *args, **kwds) - def writerow(self, d): - for fieldname in self.fieldnames: - if fieldname in d: - d[fieldname] = _stringify(d[fieldname], self.encoding) - else: - d[fieldname] = _stringify(self.restval, self.encoding) - self.writer.writerow(d) + csv.DictWriter.__init__(self, csvfile, fieldnames, restval, extrasaction, dialect, *args, **kwds) + self.writer = UnicodeWriter(csvfile, dialect, encoding=encoding, errors=errors, *args, **kwds) + self.encoding_errors = errors + + def writeheader(self): + fieldnames = _stringify_list(self.fieldnames, self.encoding, self.encoding_errors) + header = dict(zip(self.fieldnames, self.fieldnames)) + self.writerow(header) class DictReader(csv.DictReader): - def __init__(self, csvfile, fieldnames=None, restkey=None, restval=None, dialect='excel', encoding='utf-8', *args, **kwds): - self.restkey = restkey - self.encoding = encoding - self.reader = csv.DictReader(csvfile, fieldnames, restkey, restval, dialect, *args, **kwds) + """ + >>> from cStringIO import StringIO + >>> f = StringIO() + >>> w = DictWriter(f, fieldnames=['name', 'place']) + >>> w.writerow({'name': 'Cary Grant', 'place': 'hollywood'}) + >>> w.writerow({'name': 'Nathan Brillstone', 'place': u'øLand'}) + >>> w.writerow({'name': u'Willam ø. Unicoder', 'place': u'éSpandland'}) + >>> f.seek(0) + >>> r = DictReader(f, fieldnames=['name', 'place']) + >>> print r.next() == {'name': 'Cary Grant', 'place': 'hollywood'} + True + >>> print r.next() == {'name': 'Nathan Brillstone', 'place': u'øLand'} + True + >>> print r.next() == {'name': u'Willam ø. Unicoder', 'place': u'éSpandland'} + True + """ + def __init__(self, csvfile, fieldnames=None, restkey=None, restval=None, + dialect='excel', encoding='utf-8', errors='strict', *args, + **kwds): + if fieldnames is not None: + fieldnames = _stringify_list(fieldnames, encoding) + csv.DictReader.__init__(self, csvfile, fieldnames, restkey, restval, dialect, *args, **kwds) + self.reader = UnicodeReader(csvfile, dialect, encoding=encoding, + errors=errors, *args, **kwds) + if fieldnames is None and not hasattr(csv.DictReader, 'fieldnames'): + # Python 2.5 fieldnames workaround. (http://bugs.python.org/issue3436) + reader = UnicodeReader(csvfile, dialect, encoding=encoding, *args, **kwds) + self.fieldnames = _stringify_list(reader.next(), reader.encoding) + self.unicode_fieldnames = [_unicodify(f, encoding) for f in + self.fieldnames] + self.unicode_restkey = _unicodify(restkey, encoding) + def next(self): - d = self.reader.next() - for k, v in d.items(): - if k == self.restkey: - rest = v - if rest: - d[self.restkey] = [unicode(v, self.encoding) for v in rest] - else: - if v is not None: - d[k] = unicode(v, self.encoding) - return d + row = csv.DictReader.next(self) + result = dict((uni_key, row[str_key]) for (str_key, uni_key) in + izip(self.fieldnames, self.unicode_fieldnames)) + rest = row.get(self.restkey) + if rest: + result[self.unicode_restkey] = rest + return result |
