summaryrefslogtreecommitdiff
path: root/tablib
diff options
context:
space:
mode:
authorKenneth Reitz <me@kennethreitz.org>2016-02-07 07:00:55 -0500
committerKenneth Reitz <me@kennethreitz.org>2016-02-07 07:00:55 -0500
commit8debeb26aca7cc9ef427c70ee9d577eb169f9560 (patch)
tree58316fe5669e86cbea9a1044e4978ffd3b4cc308 /tablib
parent66d9e50984e7c66fd36fd86f2b469c2dae3636ba (diff)
parenta774789252d41522d4ec8b0e2c212aff4a33904d (diff)
downloadtablib-8debeb26aca7cc9ef427c70ee9d577eb169f9560.tar.gz
Merge branch 'develop' into import_export
# Conflicts: # tablib/core.py # tablib/formats/_csv.py # tablib/formats/_xlsx.py
Diffstat (limited to 'tablib')
-rw-r--r--tablib/compat.py4
-rw-r--r--tablib/core.py98
-rw-r--r--tablib/formats/__init__.py4
-rw-r--r--tablib/formats/_csv.py10
-rw-r--r--tablib/formats/_dbf.py93
-rw-r--r--tablib/formats/_html.py52
-rw-r--r--tablib/formats/_latex.py134
-rw-r--r--tablib/formats/_tsv.py49
-rw-r--r--tablib/formats/_xls.py2
-rw-r--r--tablib/formats/_xlsx.py6
-rw-r--r--tablib/packages/dbfpy/__init__.py0
-rw-r--r--tablib/packages/dbfpy/dbf.py292
-rw-r--r--tablib/packages/dbfpy/dbfnew.py188
-rw-r--r--tablib/packages/dbfpy/fields.py466
-rw-r--r--tablib/packages/dbfpy/header.py275
-rw-r--r--tablib/packages/dbfpy/record.py262
-rw-r--r--tablib/packages/dbfpy/utils.py170
-rw-r--r--tablib/packages/dbfpy3/__init__.py0
-rw-r--r--tablib/packages/dbfpy3/dbf.py293
-rw-r--r--tablib/packages/dbfpy3/dbfnew.py182
-rw-r--r--tablib/packages/dbfpy3/fields.py467
-rw-r--r--tablib/packages/dbfpy3/header.py273
-rw-r--r--tablib/packages/dbfpy3/record.py266
-rw-r--r--tablib/packages/dbfpy3/utils.py170
-rw-r--r--tablib/packages/unicodecsv/__init__.py194
25 files changed, 3813 insertions, 137 deletions
diff --git a/tablib/compat.py b/tablib/compat.py
index 919f464..e03526d 100644
--- a/tablib/compat.py
+++ b/tablib/compat.py
@@ -28,6 +28,7 @@ if is_py3:
from tablib.packages import markup3 as markup
from tablib.packages import openpyxl3 as openpyxl
from tablib.packages.odf3 import opendocument, style, text, table
+ import tablib.packages.dbfpy3 as dbfpy
import csv
from io import StringIO
@@ -36,6 +37,7 @@ if is_py3:
unicode = str
bytes = bytes
basestring = str
+ xrange = range
else:
from cStringIO import StringIO as BytesIO
@@ -49,5 +51,7 @@ else:
from tablib.packages.odf import opendocument, style, text, table
from tablib.packages import unicodecsv as csv
+ import tablib.packages.dbfpy as dbfpy
unicode = unicode
+ xrange = xrange
diff --git a/tablib/core.py b/tablib/core.py
index b1de323..cf60967 100644
--- a/tablib/core.py
+++ b/tablib/core.py
@@ -45,7 +45,7 @@ class Row(object):
return repr(self._row)
def __getslice__(self, i, j):
- return self._row[i,j]
+ return self._row[i:j]
def __getitem__(self, i):
return self._row[i]
@@ -165,15 +165,9 @@ class Dataset(object):
# (column, callback) tuples
self._formatters = []
- try:
- self.headers = kwargs['headers']
- except KeyError:
- self.headers = None
+ self.headers = kwargs.get('headers')
- try:
- self.title = kwargs['title']
- except KeyError:
- self.title = None
+ self.title = kwargs.get('title')
self._register_formats()
@@ -260,6 +254,7 @@ class Dataset(object):
except AttributeError:
setattr(cls, fmt.title, property(fmt.export_set))
cls._formats[fmt.title] = (fmt.export_set, None)
+ setattr(cls, 'get_%s' % fmt.title, fmt.export_set)
except AttributeError:
cls._formats[fmt.title] = (None, None)
@@ -353,7 +348,7 @@ class Dataset(object):
A dataset object can also be imported by setting the `Dataset.dict` attribute: ::
data = tablib.Dataset()
- data.json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]'
+ data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}]
"""
return self._package()
@@ -570,7 +565,7 @@ class Dataset(object):
A dataset object can also be imported by setting the :class:`Dataset.json` attribute: ::
data = tablib.Dataset()
- data.json = '[{age: 90, first_name: "John", liast_name: "Adams"}]'
+ data.json = '[{"age": 90, "first_name": "John", "last_name": "Adams"}]'
Import assumes (for now) that headers exist.
"""
@@ -585,6 +580,40 @@ class Dataset(object):
"""
pass
+ @property
+ def dbf():
+ """A dBASE representation of the :class:`Dataset` object.
+
+ A dataset object can also be imported by setting the
+ :class:`Dataset.dbf` attribute. ::
+
+ # To import data from an existing DBF file:
+ data = tablib.Dataset()
+ data.dbf = open('existing_table.dbf').read()
+
+ # to import data from an ASCII-encoded bytestring:
+ data = tablib.Dataset()
+ data.dbf = '<bytestring of tabular data>'
+
+ .. admonition:: Binary Warning
+
+ :class:`Dataset.dbf` contains binary data, so make sure to write in binary mode::
+
+ with open('output.dbf', 'wb') as f:
+ f.write(data.dbf)
+ """
+ pass
+
+
+ @property
+ def latex():
+ """A LaTeX booktabs representation of the :class:`Dataset` object. If a
+ title has been set, it will be exported as the table caption.
+
+ .. note:: This method can be used for export only.
+ """
+ pass
+
# ----
# Rows
@@ -936,12 +965,59 @@ class Dataset(object):
return _dset
+ def remove_duplicates(self):
+ """Removes all duplicate rows from the :class:`Dataset` object
+ while maintaining the original order."""
+ seen = set()
+ self._data[:] = [row for row in self._data if not (tuple(row) in seen or seen.add(tuple(row)))]
+
+
def wipe(self):
"""Removes all content and headers from the :class:`Dataset` object."""
self._data = list()
self.__headers = None
+ def subset(self, rows=None, cols=None):
+ """Returns a new instance of the :class:`Dataset`,
+ including only specified rows and columns.
+ """
+
+ # Don't return if no data
+ if not self:
+ return
+
+ if rows is None:
+ rows = list(range(self.height))
+
+ if cols is None:
+ cols = list(self.headers)
+
+ #filter out impossible rows and columns
+ rows = [row for row in rows if row in range(self.height)]
+ cols = [header for header in cols if header in self.headers]
+
+ _dset = Dataset()
+
+ #filtering rows and columns
+ _dset.headers = list(cols)
+
+ _dset._data = []
+ for row_no, row in enumerate(self._data):
+ data_row = []
+ for key in _dset.headers:
+ if key in self.headers:
+ pos = self.headers.index(key)
+ data_row.append(row[pos])
+ else:
+ raise KeyError
+
+ if row_no in rows:
+ _dset.append(row=Row(data_row))
+
+ return _dset
+
+
class Databook(object):
"""A book of :class:`Dataset` objects.
diff --git a/tablib/formats/__init__.py b/tablib/formats/__init__.py
index 5fdf279..5cca19f 100644
--- a/tablib/formats/__init__.py
+++ b/tablib/formats/__init__.py
@@ -11,5 +11,7 @@ from . import _tsv as tsv
from . import _html as html
from . import _xlsx as xlsx
from . import _ods as ods
+from . import _dbf as dbf
+from . import _latex as latex
-available = (json, xls, yaml, csv, tsv, html, xlsx, ods)
+available = (json, xls, yaml, csv, dbf, tsv, html, latex, xlsx, ods)
diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py
index 7d29318..4c00809 100644
--- a/tablib/formats/_csv.py
+++ b/tablib/formats/_csv.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
-""" Tablib - CSV Support.
+""" Tablib - *SV Support.
"""
from tablib.compat import is_py3, csv, StringIO
@@ -11,13 +11,14 @@ extensions = ('csv',)
DEFAULT_ENCODING = 'utf-8'
-
+DEFAULT_DELIMITER = ','
def export_set(dataset, **kwargs):
"""Returns CSV representation of Dataset."""
stream = StringIO()
+ kwargs.setdefault('delimeter', DEFAULT_DELIMITER)
if not is_py3:
kwargs.setdefault('encoding', DEFAULT_ENCODING)
@@ -34,6 +35,7 @@ def import_set(dset, in_stream, headers=True, **kwargs):
dset.wipe()
+ kwargs.setdefault('delimeter', DEFAULT_DELIMITER)
if not is_py3:
kwargs.setdefault('encoding', DEFAULT_ENCODING)
@@ -46,10 +48,10 @@ def import_set(dset, in_stream, headers=True, **kwargs):
dset.append(row)
-def detect(stream):
+def detect(stream, delimiter=DEFAULT_DELIMITER):
"""Returns True if given stream is valid CSV."""
try:
- csv.Sniffer().sniff(stream, delimiters=',')
+ csv.Sniffer().sniff(stream, delimiters=delimiter)
return True
except (csv.Error, TypeError):
return False
diff --git a/tablib/formats/_dbf.py b/tablib/formats/_dbf.py
new file mode 100644
index 0000000..41c2ef4
--- /dev/null
+++ b/tablib/formats/_dbf.py
@@ -0,0 +1,93 @@
+# -*- coding: utf-8 -*-
+
+""" Tablib - DBF Support.
+"""
+import tempfile
+import struct
+import os
+
+from tablib.compat import StringIO
+from tablib.compat import dbfpy
+from tablib.compat import is_py3
+
+if is_py3:
+ from tablib.packages.dbfpy3 import dbf
+ from tablib.packages.dbfpy3 import dbfnew
+ from tablib.packages.dbfpy3 import record as dbfrecord
+ import io
+else:
+ from tablib.packages.dbfpy import dbf
+ from tablib.packages.dbfpy import dbfnew
+ from tablib.packages.dbfpy import record as dbfrecord
+
+
+title = 'dbf'
+extensions = ('csv',)
+
+DEFAULT_ENCODING = 'utf-8'
+
+def export_set(dataset):
+ """Returns DBF representation of a Dataset"""
+ new_dbf = dbfnew.dbf_new()
+ temp_file, temp_uri = tempfile.mkstemp()
+
+ # create the appropriate fields based on the contents of the first row
+ first_row = dataset[0]
+ for fieldname, field_value in zip(dataset.headers, first_row):
+ if type(field_value) in [int, float]:
+ new_dbf.add_field(fieldname, 'N', 10, 8)
+ else:
+ new_dbf.add_field(fieldname, 'C', 80)
+
+ new_dbf.write(temp_uri)
+
+ dbf_file = dbf.Dbf(temp_uri, readOnly=0)
+ for row in dataset:
+ record = dbfrecord.DbfRecord(dbf_file)
+ for fieldname, field_value in zip(dataset.headers, row):
+ record[fieldname] = field_value
+ record.store()
+
+ dbf_file.close()
+ dbf_stream = open(temp_uri, 'rb')
+ if is_py3:
+ stream = io.BytesIO(dbf_stream.read())
+ else:
+ stream = StringIO(dbf_stream.read())
+ dbf_stream.close()
+ os.remove(temp_uri)
+ return stream.getvalue()
+
+def import_set(dset, in_stream, headers=True):
+ """Returns a dataset from a DBF stream."""
+
+ dset.wipe()
+ if is_py3:
+ _dbf = dbf.Dbf(io.BytesIO(in_stream))
+ else:
+ _dbf = dbf.Dbf(StringIO(in_stream))
+ dset.headers = _dbf.fieldNames
+ for record in range(_dbf.recordCount):
+ row = [_dbf[record][f] for f in _dbf.fieldNames]
+ dset.append(row)
+
+def detect(stream):
+ """Returns True if the given stream is valid DBF"""
+ #_dbf = dbf.Table(StringIO(stream))
+ try:
+ if is_py3:
+ if type(stream) is not bytes:
+ stream = bytes(stream, 'utf-8')
+ _dbf = dbf.Dbf(io.BytesIO(stream), readOnly=True)
+ else:
+ _dbf = dbf.Dbf(StringIO(stream), readOnly=True)
+ return True
+ except (ValueError, struct.error):
+ # When we try to open up a file that's not a DBF, dbfpy raises a
+ # ValueError.
+ # When unpacking a string argument with less than 8 chars, struct.error is
+ # raised.
+ return False
+
+
+
diff --git a/tablib/formats/_html.py b/tablib/formats/_html.py
index 7bb77f0..0b45f14 100644
--- a/tablib/formats/_html.py
+++ b/tablib/formats/_html.py
@@ -23,45 +23,45 @@ extensions = ('html', )
def export_set(dataset):
- """HTML representation of a Dataset."""
+ """HTML representation of a Dataset."""
- stream = StringIO()
+ stream = StringIO()
- page = markup.page()
- page.table.open()
+ page = markup.page()
+ page.table.open()
- if dataset.headers is not None:
- new_header = [item if item is not None else '' for item in dataset.headers]
+ if dataset.headers is not None:
+ new_header = [item if item is not None else '' for item in dataset.headers]
- page.thead.open()
- headers = markup.oneliner.th(new_header)
- page.tr(headers)
- page.thead.close()
+ page.thead.open()
+ headers = markup.oneliner.th(new_header)
+ page.tr(headers)
+ page.thead.close()
- for row in dataset:
- new_row = [item if item is not None else '' for item in row]
+ for row in dataset:
+ new_row = [item if item is not None else '' for item in row]
- html_row = markup.oneliner.td(new_row)
- page.tr(html_row)
+ html_row = markup.oneliner.td(new_row)
+ page.tr(html_row)
- page.table.close()
+ page.table.close()
# Allow unicode characters in output
- wrapper = codecs.getwriter("utf8")(stream)
- wrapper.writelines(unicode(page))
+ wrapper = codecs.getwriter("utf8")(stream)
+ wrapper.writelines(unicode(page))
- return stream.getvalue().decode('utf-8')
+ return stream.getvalue().decode('utf-8')
def export_book(databook):
- """HTML representation of a Databook."""
+ """HTML representation of a Databook."""
- stream = StringIO()
+ stream = StringIO()
- for i, dset in enumerate(databook._datasets):
- title = (dset.title if dset.title else 'Set %s' % (i))
- stream.write('<%s>%s</%s>\n' % (BOOK_ENDINGS, title, BOOK_ENDINGS))
- stream.write(dset.html)
- stream.write('\n')
+ for i, dset in enumerate(databook._datasets):
+ title = (dset.title if dset.title else 'Set %s' % (i))
+ stream.write('<%s>%s</%s>\n' % (BOOK_ENDINGS, title, BOOK_ENDINGS))
+ stream.write(dset.html)
+ stream.write('\n')
- return stream.getvalue()
+ return stream.getvalue()
diff --git a/tablib/formats/_latex.py b/tablib/formats/_latex.py
new file mode 100644
index 0000000..44ee101
--- /dev/null
+++ b/tablib/formats/_latex.py
@@ -0,0 +1,134 @@
+# -*- coding: utf-8 -*-
+
+"""Tablib - LaTeX table export support.
+
+ Generates a LaTeX booktabs-style table from the dataset.
+"""
+import re
+
+from tablib.compat import unicode
+
+title = 'latex'
+extensions = ('tex',)
+
+TABLE_TEMPLATE = """\
+%% Note: add \\usepackage{booktabs} to your preamble
+%%
+\\begin{table}[!htbp]
+ \\centering
+ %(CAPTION)s
+ \\begin{tabular}{%(COLSPEC)s}
+ \\toprule
+%(HEADER)s
+ %(MIDRULE)s
+%(BODY)s
+ \\bottomrule
+ \\end{tabular}
+\\end{table}
+"""
+
+TEX_RESERVED_SYMBOLS_MAP = dict([
+ ('\\', '\\textbackslash{}'),
+ ('{', '\\{'),
+ ('}', '\\}'),
+ ('$', '\\$'),
+ ('&', '\\&'),
+ ('#', '\\#'),
+ ('^', '\\textasciicircum{}'),
+ ('_', '\\_'),
+ ('~', '\\textasciitilde{}'),
+ ('%', '\\%'),
+])
+
+TEX_RESERVED_SYMBOLS_RE = re.compile(
+ '(%s)' % '|'.join(map(re.escape, TEX_RESERVED_SYMBOLS_MAP.keys())))
+
+
+def export_set(dataset):
+ """Returns LaTeX representation of dataset
+
+ :param dataset: dataset to serialize
+ :type dataset: tablib.core.Dataset
+ """
+
+ caption = '\\caption{%s}' % dataset.title if dataset.title else '%'
+ colspec = _colspec(dataset.width)
+ header = _serialize_row(dataset.headers) if dataset.headers else ''
+ midrule = _midrule(dataset.width)
+ body = '\n'.join([_serialize_row(row) for row in dataset])
+ return TABLE_TEMPLATE % dict(CAPTION=caption, COLSPEC=colspec,
+ HEADER=header, MIDRULE=midrule, BODY=body)
+
+
+def _colspec(dataset_width):
+ """Generates the column specification for the LaTeX `tabular` environment
+ based on the dataset width.
+
+ The first column is justified to the left, all further columns are aligned
+ to the right.
+
+ .. note:: This is only a heuristic and most probably has to be fine-tuned
+ post export. Column alignment should depend on the data type, e.g., textual
+ content should usually be aligned to the left while numeric content almost
+ always should be aligned to the right.
+
+ :param dataset_width: width of the dataset
+ """
+
+ spec = 'l'
+ for _ in range(1, dataset_width):
+ spec += 'r'
+ return spec
+
+
+def _midrule(dataset_width):
+ """Generates the table `midrule`, which may be composed of several
+ `cmidrules`.
+
+ :param dataset_width: width of the dataset to serialize
+ """
+
+ if not dataset_width or dataset_width == 1:
+ return '\\midrule'
+ return ' '.join([_cmidrule(colindex, dataset_width) for colindex in
+ range(1, dataset_width + 1)])
+
+
+def _cmidrule(colindex, dataset_width):
+ """Generates the `cmidrule` for a single column with appropriate trimming
+ based on the column position.
+
+ :param colindex: Column index
+ :param dataset_width: width of the dataset
+ """
+
+ rule = '\\cmidrule(%s){%d-%d}'
+ if colindex == 1:
+ # Rule of first column is trimmed on the right
+ return rule % ('r', colindex, colindex)
+ if colindex == dataset_width:
+ # Rule of last column is trimmed on the left
+ return rule % ('l', colindex, colindex)
+ # Inner columns are trimmed on the left and right
+ return rule % ('lr', colindex, colindex)
+
+
+def _serialize_row(row):
+ """Returns string representation of a single row.
+
+ :param row: single dataset row
+ """
+
+ new_row = [_escape_tex_reserved_symbols(unicode(item)) if item else '' for
+ item in row]
+ return 6 * ' ' + ' & '.join(new_row) + ' \\\\'
+
+
+def _escape_tex_reserved_symbols(input):
+ """Escapes all TeX reserved symbols ('_', '~', etc.) in a string.
+
+ :param input: String to escape
+ """
+ def replace(match):
+ return TEX_RESERVED_SYMBOLS_MAP[match.group()]
+ return TEX_RESERVED_SYMBOLS_RE.sub(replace, input)
diff --git a/tablib/formats/_tsv.py b/tablib/formats/_tsv.py
index 8ef2b67..9380b3b 100644
--- a/tablib/formats/_tsv.py
+++ b/tablib/formats/_tsv.py
@@ -3,57 +3,28 @@
""" Tablib - TSV (Tab Separated Values) Support.
"""
-from tablib.compat import is_py3, csv, StringIO
-
-
+from tablib.formats._csv import (
+ export_set as export_set_wrapper,
+ import_set as import_set_wrapper,
+ detect as detect_wrapper,
+)
title = 'tsv'
extensions = ('tsv',)
DEFAULT_ENCODING = 'utf-8'
+DELIMITER = '\t'
def export_set(dataset):
- """Returns a TSV representation of Dataset."""
-
- stream = StringIO()
-
- if is_py3:
- _tsv = csv.writer(stream, delimiter='\t')
- else:
- _tsv = csv.writer(stream, encoding=DEFAULT_ENCODING, delimiter='\t')
-
- for row in dataset._package(dicts=False):
- _tsv.writerow(row)
-
- return stream.getvalue()
+ """Returns TSV representation of Dataset."""
+ return export_set_wrapper(dataset, delimiter=DELIMITER)
def import_set(dset, in_stream, headers=True):
"""Returns dataset from TSV stream."""
-
- dset.wipe()
-
- if is_py3:
- rows = csv.reader(in_stream.splitlines(), delimiter='\t')
- else:
- rows = csv.reader(in_stream.splitlines(), delimiter='\t',
- encoding=DEFAULT_ENCODING)
-
- for i, row in enumerate(rows):
- # Skip empty rows
- if not row:
- continue
-
- if (i == 0) and (headers):
- dset.headers = row
- else:
- dset.append(row)
+ return import_set_wrapper(dset, in_stream, headers=headers, delimiter=DELIMITER)
def detect(stream):
"""Returns True if given stream is valid TSV."""
- try:
- csv.Sniffer().sniff(stream, delimiters='\t')
- return True
- except (csv.Error, TypeError):
- return False
+ return detect_wrapper(stream, delimiter=DELIMITER)
diff --git a/tablib/formats/_xls.py b/tablib/formats/_xls.py
index 67b87ea..787907a 100644
--- a/tablib/formats/_xls.py
+++ b/tablib/formats/_xls.py
@@ -5,7 +5,7 @@
import sys
-from tablib.compat import BytesIO, xlwt, xlrd, XLRDError
+from tablib.compat import BytesIO, xlwt, xlrd, XLRDError, xrange
import tablib
title = 'xls'
diff --git a/tablib/formats/_xlsx.py b/tablib/formats/_xlsx.py
index 0cd8500..d9d3d57 100644
--- a/tablib/formats/_xlsx.py
+++ b/tablib/formats/_xlsx.py
@@ -69,7 +69,7 @@ def import_set(dset, in_stream, headers=True):
dset.wipe()
- xls_book = openpyxl.reader.excel.load_workbook(in_stream)
+ xls_book = openpyxl.reader.excel.load_workbook(BytesIO(in_stream))
sheet = xls_book.get_active_sheet()
dset.title = sheet.title
@@ -87,7 +87,7 @@ def import_book(dbook, in_stream, headers=True):
dbook.wipe()
- xls_book = openpyxl.reader.excel.load_workbook(in_stream)
+ xls_book = openpyxl.reader.excel.load_workbook(BytesIO(in_stream))
for sheet in xls_book.worksheets:
data = tablib.Dataset()
@@ -115,8 +115,6 @@ def dset_sheet(dataset, ws, freeze_panes=True):
row_number = i + 1
for j, col in enumerate(row):
col_idx = get_column_letter(j + 1)
- # We want to freeze the column after the last column
- frzn_col_idx = get_column_letter(j + 2)
# bold headers
if (row_number == 1) and dataset.headers:
diff --git a/tablib/packages/dbfpy/__init__.py b/tablib/packages/dbfpy/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tablib/packages/dbfpy/__init__.py
diff --git a/tablib/packages/dbfpy/dbf.py b/tablib/packages/dbfpy/dbf.py
new file mode 100644
index 0000000..b3d2e21
--- /dev/null
+++ b/tablib/packages/dbfpy/dbf.py
@@ -0,0 +1,292 @@
+#! /usr/bin/env python
+"""DBF accessing helpers.
+
+FIXME: more documentation needed
+
+Examples:
+
+ Create new table, setup structure, add records:
+
+ dbf = Dbf(filename, new=True)
+ dbf.addField(
+ ("NAME", "C", 15),
+ ("SURNAME", "C", 25),
+ ("INITIALS", "C", 10),
+ ("BIRTHDATE", "D"),
+ )
+ for (n, s, i, b) in (
+ ("John", "Miller", "YC", (1980, 10, 11)),
+ ("Andy", "Larkin", "", (1980, 4, 11)),
+ ):
+ rec = dbf.newRecord()
+ rec["NAME"] = n
+ rec["SURNAME"] = s
+ rec["INITIALS"] = i
+ rec["BIRTHDATE"] = b
+ rec.store()
+ dbf.close()
+
+ Open existed dbf, read some data:
+
+ dbf = Dbf(filename, True)
+ for rec in dbf:
+ for fldName in dbf.fieldNames:
+ print '%s:\t %s (%s)' % (fldName, rec[fldName],
+ type(rec[fldName]))
+ print
+ dbf.close()
+
+"""
+"""History (most recent first):
+11-feb-2007 [als] export INVALID_VALUE;
+ Dbf: added .ignoreErrors, .INVALID_VALUE
+04-jul-2006 [als] added export declaration
+20-dec-2005 [yc] removed fromStream and newDbf methods:
+ use argument of __init__ call must be used instead;
+ added class fields pointing to the header and
+ record classes.
+17-dec-2005 [yc] split to several modules; reimplemented
+13-dec-2005 [yc] adapted to the changes of the `strutil` module.
+13-sep-2002 [als] support FoxPro Timestamp datatype
+15-nov-1999 [jjk] documentation updates, add demo
+24-aug-1998 [jjk] add some encodeValue methods (not tested), other tweaks
+08-jun-1998 [jjk] fix problems, add more features
+20-feb-1998 [jjk] fix problems, add more features
+19-feb-1998 [jjk] add create/write capabilities
+18-feb-1998 [jjk] from dbfload.py
+"""
+
+__version__ = "$Revision: 1.7 $"[11:-2]
+__date__ = "$Date: 2007/02/11 09:23:13 $"[7:-2]
+__author__ = "Jeff Kunce <kuncej@mail.conservation.state.mo.us>"
+
+__all__ = ["Dbf"]
+
+from . import header
+from .import record
+from utils import INVALID_VALUE
+
+class Dbf(object):
+ """DBF accessor.
+
+ FIXME:
+ docs and examples needed (dont' forget to tell
+ about problems adding new fields on the fly)
+
+ Implementation notes:
+ ``_new`` field is used to indicate whether this is
+ a new data table. `addField` could be used only for
+ the new tables! If at least one record was appended
+ to the table it's structure couldn't be changed.
+
+ """
+
+ __slots__ = ("name", "header", "stream",
+ "_changed", "_new", "_ignore_errors")
+
+ HeaderClass = header.DbfHeader
+ RecordClass = record.DbfRecord
+ INVALID_VALUE = INVALID_VALUE
+
+ ## initialization and creation helpers
+
+ def __init__(self, f, readOnly=False, new=False, ignoreErrors=False):
+ """Initialize instance.
+
+ Arguments:
+ f:
+ Filename or file-like object.
+ new:
+ True if new data table must be created. Assume
+ data table exists if this argument is False.
+ readOnly:
+ if ``f`` argument is a string file will
+ be opend in read-only mode; in other cases
+ this argument is ignored. This argument is ignored
+ even if ``new`` argument is True.
+ headerObj:
+ `header.DbfHeader` instance or None. If this argument
+ is None, new empty header will be used with the
+ all fields set by default.
+ ignoreErrors:
+ if set, failing field value conversion will return
+ ``INVALID_VALUE`` instead of raising conversion error.
+
+ """
+ if isinstance(f, basestring):
+ # a filename
+ self.name = f
+ if new:
+ # new table (table file must be
+ # created or opened and truncated)
+ self.stream = file(f, "w+b")
+ else:
+ # tabe file must exist
+ self.stream = file(f, ("r+b", "rb")[bool(readOnly)])
+ else:
+ # a stream
+ self.name = getattr(f, "name", "")
+ self.stream = f
+ if new:
+ # if this is a new table, header will be empty
+ self.header = self.HeaderClass()
+ else:
+ # or instantiated using stream
+ self.header = self.HeaderClass.fromStream(self.stream)
+ self.ignoreErrors = ignoreErrors
+ self._new = bool(new)
+ self._changed = False
+
+ ## properties
+
+ closed = property(lambda self: self.stream.closed)
+ recordCount = property(lambda self: self.header.recordCount)
+ fieldNames = property(
+ lambda self: [_fld.name for _fld in self.header.fields])
+ fieldDefs = property(lambda self: self.header.fields)
+ changed = property(lambda self: self._changed or self.header.changed)
+
+ def ignoreErrors(self, value):
+ """Update `ignoreErrors` flag on the header object and self"""
+ self.header.ignoreErrors = self._ignore_errors = bool(value)
+ ignoreErrors = property(
+ lambda self: self._ignore_errors,
+ ignoreErrors,
+ doc="""Error processing mode for DBF field value conversion
+
+ if set, failing field value conversion will return
+ ``INVALID_VALUE`` instead of raising conversion error.
+
+ """)
+
+ ## protected methods
+
+ def _fixIndex(self, index):
+ """Return fixed index.
+
+ This method fails if index isn't a numeric object
+ (long or int). Or index isn't in a valid range
+ (less or equal to the number of records in the db).
+
+ If ``index`` is a negative number, it will be
+ treated as a negative indexes for list objects.
+
+ Return:
+ Return value is numeric object maning valid index.
+
+ """
+ if not isinstance(index, (int, long)):
+ raise TypeError("Index must be a numeric object")
+ if index < 0:
+ # index from the right side
+ # fix it to the left-side index
+ index += len(self) + 1
+ if index >= len(self):
+ raise IndexError("Record index out of range")
+ return index
+
+ ## iterface methods
+
+ def close(self):
+ self.flush()
+ self.stream.close()
+
+ def flush(self):
+ """Flush data to the associated stream."""
+ if self.changed:
+ self.header.setCurrentDate()
+ self.header.write(self.stream)
+ self.stream.flush()
+ self._changed = False
+
+ def indexOfFieldName(self, name):
+ """Index of field named ``name``."""
+ # FIXME: move this to header class
+ return self.header.fields.index(name)
+
+ def newRecord(self):
+ """Return new record, which belong to this table."""
+ return self.RecordClass(self)
+
+ def append(self, record):
+ """Append ``record`` to the database."""
+ record.index = self.header.recordCount
+ record._write()
+ self.header.recordCount += 1
+ self._changed = True
+ self._new = False
+
+ def addField(self, *defs):
+ """Add field definitions.
+
+ For more information see `header.DbfHeader.addField`.
+
+ """
+ if self._new:
+ self.header.addField(*defs)
+ else:
+ raise TypeError("At least one record was added, "
+ "structure can't be changed")
+
+ ## 'magic' methods (representation and sequence interface)
+
+ def __repr__(self):
+ return "Dbf stream '%s'\n" % self.stream + repr(self.header)
+
+ def __len__(self):
+ """Return number of records."""
+ return self.recordCount
+
+ def __getitem__(self, index):
+ """Return `DbfRecord` instance."""
+ return self.RecordClass.fromStream(self, self._fixIndex(index))
+
+ def __setitem__(self, index, record):
+ """Write `DbfRecord` instance to the stream."""
+ record.index = self._fixIndex(index)
+ record._write()
+ self._changed = True
+ self._new = False
+
+ #def __del__(self):
+ # """Flush stream upon deletion of the object."""
+ # self.flush()
+
+
+def demoRead(filename):
+ _dbf = Dbf(filename, True)
+ for _rec in _dbf:
+ print
+ print(repr(_rec))
+ _dbf.close()
+
+def demoCreate(filename):
+ _dbf = Dbf(filename, new=True)
+ _dbf.addField(
+ ("NAME", "C", 15),
+ ("SURNAME", "C", 25),
+ ("INITIALS", "C", 10),
+ ("BIRTHDATE", "D"),
+ )
+ for (_n, _s, _i, _b) in (
+ ("John", "Miller", "YC", (1981, 1, 2)),
+ ("Andy", "Larkin", "AL", (1982, 3, 4)),
+ ("Bill", "Clinth", "", (1983, 5, 6)),
+ ("Bobb", "McNail", "", (1984, 7, 8)),
+ ):
+ _rec = _dbf.newRecord()
+ _rec["NAME"] = _n
+ _rec["SURNAME"] = _s
+ _rec["INITIALS"] = _i
+ _rec["BIRTHDATE"] = _b
+ _rec.store()
+ print(repr(_dbf))
+ _dbf.close()
+
+if (__name__=='__main__'):
+ import sys
+ _name = len(sys.argv) > 1 and sys.argv[1] or "county.dbf"
+ demoCreate(_name)
+ demoRead(_name)
+
+# vim: set et sw=4 sts=4 :
diff --git a/tablib/packages/dbfpy/dbfnew.py b/tablib/packages/dbfpy/dbfnew.py
new file mode 100644
index 0000000..dea7e52
--- /dev/null
+++ b/tablib/packages/dbfpy/dbfnew.py
@@ -0,0 +1,188 @@
+#!/usr/bin/python
+""".DBF creation helpers.
+
+Note: this is a legacy interface. New code should use Dbf class
+ for table creation (see examples in dbf.py)
+
+TODO:
+ - handle Memo fields.
+ - check length of the fields accoring to the
+ `http://www.clicketyclick.dk/databases/xbase/format/data_types.html`
+
+"""
+"""History (most recent first)
+04-jul-2006 [als] added export declaration;
+ updated for dbfpy 2.0
+15-dec-2005 [yc] define dbf_new.__slots__
+14-dec-2005 [yc] added vim modeline; retab'd; added doc-strings;
+ dbf_new now is a new class (inherited from object)
+??-jun-2000 [--] added by Hans Fiby
+"""
+
+__version__ = "$Revision: 1.4 $"[11:-2]
+__date__ = "$Date: 2006/07/04 08:18:18 $"[7:-2]
+
+__all__ = ["dbf_new"]
+
+from dbf import *
+from fields import *
+from header import *
+from record import *
+
+class _FieldDefinition(object):
+ """Field definition.
+
+ This is a simple structure, which contains ``name``, ``type``,
+ ``len``, ``dec`` and ``cls`` fields.
+
+ Objects also implement get/setitem magic functions, so fields
+ could be accessed via sequence iterface, where 'name' has
+ index 0, 'type' index 1, 'len' index 2, 'dec' index 3 and
+ 'cls' could be located at index 4.
+
+ """
+
+ __slots__ = "name", "type", "len", "dec", "cls"
+
+ # WARNING: be attentive - dictionaries are mutable!
+ FLD_TYPES = {
+ # type: (cls, len)
+ "C": (DbfCharacterFieldDef, None),
+ "N": (DbfNumericFieldDef, None),
+ "L": (DbfLogicalFieldDef, 1),
+ # FIXME: support memos
+ # "M": (DbfMemoFieldDef),
+ "D": (DbfDateFieldDef, 8),
+ # FIXME: I'm not sure length should be 14 characters!
+ # but temporary I use it, cuz date is 8 characters
+ # and time 6 (hhmmss)
+ "T": (DbfDateTimeFieldDef, 14),
+ }
+
+ def __init__(self, name, type, len=None, dec=0):
+ _cls, _len = self.FLD_TYPES[type]
+ if _len is None:
+ if len is None:
+ raise ValueError("Field length must be defined")
+ _len = len
+ self.name = name
+ self.type = type
+ self.len = _len
+ self.dec = dec
+ self.cls = _cls
+
+ def getDbfField(self):
+ "Return `DbfFieldDef` instance from the current definition."
+ return self.cls(self.name, self.len, self.dec)
+
+ def appendToHeader(self, dbfh):
+ """Create a `DbfFieldDef` instance and append it to the dbf header.
+
+ Arguments:
+ dbfh: `DbfHeader` instance.
+
+ """
+ _dbff = self.getDbfField()
+ dbfh.addField(_dbff)
+
+
+class dbf_new(object):
+ """New .DBF creation helper.
+
+ Example Usage:
+
+ dbfn = dbf_new()
+ dbfn.add_field("name",'C',80)
+ dbfn.add_field("price",'N',10,2)
+ dbfn.add_field("date",'D',8)
+ dbfn.write("tst.dbf")
+
+ Note:
+ This module cannot handle Memo-fields,
+ they are special.
+
+ """
+
+ __slots__ = ("fields",)
+
+ FieldDefinitionClass = _FieldDefinition
+
+ def __init__(self):
+ self.fields = []
+
+ def add_field(self, name, typ, len, dec=0):
+ """Add field definition.
+
+ Arguments:
+ name:
+ field name (str object). field name must not
+ contain ASCII NULs and it's length shouldn't
+ exceed 10 characters.
+ typ:
+ type of the field. this must be a single character
+ from the "CNLMDT" set meaning character, numeric,
+ logical, memo, date and date/time respectively.
+ len:
+ length of the field. this argument is used only for
+ the character and numeric fields. all other fields
+ have fixed length.
+ FIXME: use None as a default for this argument?
+ dec:
+ decimal precision. used only for the numric fields.
+
+ """
+ self.fields.append(self.FieldDefinitionClass(name, typ, len, dec))
+
+ def write(self, filename):
+ """Create empty .DBF file using current structure."""
+ _dbfh = DbfHeader()
+ _dbfh.setCurrentDate()
+ for _fldDef in self.fields:
+ _fldDef.appendToHeader(_dbfh)
+ _dbfStream = file(filename, "wb")
+ _dbfh.write(_dbfStream)
+ _dbfStream.close()
+
+ def write_stream(self, stream):
+ _dbfh = DbfHeader()
+ _dbfh.setCurrentDate()
+ for _fldDef in self.fields:
+ _fldDef.appendToHeader(_dbfh)
+ _dbfh.write(stream)
+
+
+if (__name__=='__main__'):
+ # create a new DBF-File
+ dbfn=dbf_new()
+ dbfn.add_field("name",'C',80)
+ dbfn.add_field("price",'N',10,2)
+ dbfn.add_field("date",'D',8)
+ dbfn.write("tst.dbf")
+ # test new dbf
+ print "*** created tst.dbf: ***"
+ dbft = Dbf('tst.dbf', readOnly=0)
+ print repr(dbft)
+ # add a record
+ rec=DbfRecord(dbft)
+ rec['name']='something'
+ rec['price']=10.5
+ rec['date']=(2000,1,12)
+ rec.store()
+ # add another record
+ rec=DbfRecord(dbft)
+ rec['name']='foo and bar'
+ rec['price']=12234
+ rec['date']=(1992,7,15)
+ rec.store()
+
+ # show the records
+ print "*** inserted 2 records into tst.dbf: ***"
+ print repr(dbft)
+ for i1 in range(len(dbft)):
+ rec = dbft[i1]
+ for fldName in dbft.fieldNames:
+ print '%s:\t %s'%(fldName, rec[fldName])
+ print
+ dbft.close()
+
+# vim: set et sts=4 sw=4 :
diff --git a/tablib/packages/dbfpy/fields.py b/tablib/packages/dbfpy/fields.py
new file mode 100644
index 0000000..69cd436
--- /dev/null
+++ b/tablib/packages/dbfpy/fields.py
@@ -0,0 +1,466 @@
+"""DBF fields definitions.
+
+TODO:
+ - make memos work
+"""
+"""History (most recent first):
+26-may-2009 [als] DbfNumericFieldDef.decodeValue: strip zero bytes
+05-feb-2009 [als] DbfDateFieldDef.encodeValue: empty arg produces empty date
+16-sep-2008 [als] DbfNumericFieldDef decoding looks for decimal point
+ in the value to select float or integer return type
+13-mar-2008 [als] check field name length in constructor
+11-feb-2007 [als] handle value conversion errors
+10-feb-2007 [als] DbfFieldDef: added .rawFromRecord()
+01-dec-2006 [als] Timestamp columns use None for empty values
+31-oct-2006 [als] support field types 'F' (float), 'I' (integer)
+ and 'Y' (currency);
+ automate export and registration of field classes
+04-jul-2006 [als] added export declaration
+10-mar-2006 [als] decode empty values for Date and Logical fields;
+ show field name in errors
+10-mar-2006 [als] fix Numeric value decoding: according to spec,
+ value always is string representation of the number;
+ ensure that encoded Numeric value fits into the field
+20-dec-2005 [yc] use field names in upper case
+15-dec-2005 [yc] field definitions moved from `dbf`.
+"""
+
+__version__ = "$Revision: 1.14 $"[11:-2]
+__date__ = "$Date: 2009/05/26 05:16:51 $"[7:-2]
+
+__all__ = ["lookupFor",] # field classes added at the end of the module
+
+import datetime
+import struct
+import sys
+
+from . import utils
+
+## abstract definitions
+
+class DbfFieldDef(object):
+ """Abstract field definition.
+
+ Child classes must override ``type`` class attribute to provide datatype
+ infromation of the field definition. For more info about types visit
+ `http://www.clicketyclick.dk/databases/xbase/format/data_types.html`
+
+ Also child classes must override ``defaultValue`` field to provide
+ default value for the field value.
+
+ If child class has fixed length ``length`` class attribute must be
+ overriden and set to the valid value. None value means, that field
+ isn't of fixed length.
+
+ Note: ``name`` field must not be changed after instantiation.
+
+ """
+
+ __slots__ = ("name", "length", "decimalCount",
+ "start", "end", "ignoreErrors")
+
+ # length of the field, None in case of variable-length field,
+ # or a number if this field is a fixed-length field
+ length = None
+
+ # field type. for more information about fields types visit
+ # `http://www.clicketyclick.dk/databases/xbase/format/data_types.html`
+ # must be overriden in child classes
+ typeCode = None
+
+ # default value for the field. this field must be
+ # overriden in child classes
+ defaultValue = None
+
+ def __init__(self, name, length=None, decimalCount=None,
+ start=None, stop=None, ignoreErrors=False,
+ ):
+ """Initialize instance."""
+ assert self.typeCode is not None, "Type code must be overriden"
+ assert self.defaultValue is not None, "Default value must be overriden"
+ ## fix arguments
+ if len(name) >10:
+ raise ValueError("Field name \"%s\" is too long" % name)
+ name = str(name).upper()
+ if self.__class__.length is None:
+ if length is None:
+ raise ValueError("[%s] Length isn't specified" % name)
+ length = int(length)
+ if length <= 0:
+ raise ValueError("[%s] Length must be a positive integer"
+ % name)
+ else:
+ length = self.length
+ if decimalCount is None:
+ decimalCount = 0
+ ## set fields
+ self.name = name
+ # FIXME: validate length according to the specification at
+ # http://www.clicketyclick.dk/databases/xbase/format/data_types.html
+ self.length = length
+ self.decimalCount = decimalCount
+ self.ignoreErrors = ignoreErrors
+ self.start = start
+ self.end = stop
+
+ def __cmp__(self, other):
+ return cmp(self.name, str(other).upper())
+
+ def __hash__(self):
+ return hash(self.name)
+
+ def fromString(cls, string, start, ignoreErrors=False):
+ """Decode dbf field definition from the string data.
+
+ Arguments:
+ string:
+ a string, dbf definition is decoded from. length of
+ the string must be 32 bytes.
+ start:
+ position in the database file.
+ ignoreErrors:
+ initial error processing mode for the new field (boolean)
+
+ """
+ assert len(string) == 32
+ _length = ord(string[16])
+ return cls(utils.unzfill(string)[:11], _length, ord(string[17]),
+ start, start + _length, ignoreErrors=ignoreErrors)
+ fromString = classmethod(fromString)
+
+ def toString(self):
+ """Return encoded field definition.
+
+ Return:
+ Return value is a string object containing encoded
+ definition of this field.
+
+ """
+ if sys.version_info < (2, 4):
+ # earlier versions did not support padding character
+ _name = self.name[:11] + "\0" * (11 - len(self.name))
+ else:
+ _name = self.name.ljust(11, '\0')
+ return (
+ _name +
+ self.typeCode +
+ #data address
+ chr(0) * 4 +
+ chr(self.length) +
+ chr(self.decimalCount) +
+ chr(0) * 14
+ )
+
+ def __repr__(self):
+ return "%-10s %1s %3d %3d" % self.fieldInfo()
+
+ def fieldInfo(self):
+ """Return field information.
+
+ Return:
+ Return value is a (name, type, length, decimals) tuple.
+
+ """
+ return (self.name, self.typeCode, self.length, self.decimalCount)
+
+ def rawFromRecord(self, record):
+ """Return a "raw" field value from the record string."""
+ return record[self.start:self.end]
+
+ def decodeFromRecord(self, record):
+ """Return decoded field value from the record string."""
+ try:
+ return self.decodeValue(self.rawFromRecord(record))
+ except:
+ if self.ignoreErrors:
+ return utils.INVALID_VALUE
+ else:
+ raise
+
+ def decodeValue(self, value):
+ """Return decoded value from string value.
+
+ This method shouldn't be used publicly. It's called from the
+ `decodeFromRecord` method.
+
+ This is an abstract method and it must be overridden in child classes.
+ """
+ raise NotImplementedError
+
+ def encodeValue(self, value):
+ """Return str object containing encoded field value.
+
+ This is an abstract method and it must be overriden in child classes.
+ """
+ raise NotImplementedError
+
+## real classes
+
+class DbfCharacterFieldDef(DbfFieldDef):
+ """Definition of the character field."""
+
+ typeCode = "C"
+ defaultValue = ""
+
+ def decodeValue(self, value):
+ """Return string object.
+
+ Return value is a ``value`` argument with stripped right spaces.
+
+ """
+ return value.rstrip(" ")
+
+ def encodeValue(self, value):
+ """Return raw data string encoded from a ``value``."""
+ return str(value)[:self.length].ljust(self.length)
+
+
+class DbfNumericFieldDef(DbfFieldDef):
+ """Definition of the numeric field."""
+
+ typeCode = "N"
+ # XXX: now I'm not sure it was a good idea to make a class field
+ # `defaultValue` instead of a generic method as it was implemented
+ # previously -- it's ok with all types except number, cuz
+ # if self.decimalCount is 0, we should return 0 and 0.0 otherwise.
+ defaultValue = 0
+
+ def decodeValue(self, value):
+ """Return a number decoded from ``value``.
+
+ If decimals is zero, value will be decoded as an integer;
+ or as a float otherwise.
+
+ Return:
+ Return value is a int (long) or float instance.
+
+ """
+ value = value.strip(" \0")
+ if "." in value:
+ # a float (has decimal separator)
+ return float(value)
+ elif value:
+ # must be an integer
+ return int(value)
+ else:
+ return 0
+
+ def encodeValue(self, value):
+ """Return string containing encoded ``value``."""
+ _rv = ("%*.*f" % (self.length, self.decimalCount, value))
+ if len(_rv) > self.length:
+ _ppos = _rv.find(".")
+ if 0 <= _ppos <= self.length:
+ _rv = _rv[:self.length]
+ else:
+ raise ValueError("[%s] Numeric overflow: %s (field width: %i)"
+ % (self.name, _rv, self.length))
+ return _rv
+
+class DbfFloatFieldDef(DbfNumericFieldDef):
+ """Definition of the float field - same as numeric."""
+
+ typeCode = "F"
+
+class DbfIntegerFieldDef(DbfFieldDef):
+ """Definition of the integer field."""
+
+ typeCode = "I"
+ length = 4
+ defaultValue = 0
+
+ def decodeValue(self, value):
+ """Return an integer number decoded from ``value``."""
+ return struct.unpack("<i", value)[0]
+
+ def encodeValue(self, value):
+ """Return string containing encoded ``value``."""
+ return struct.pack("<i", int(value))
+
+class DbfCurrencyFieldDef(DbfFieldDef):
+ """Definition of the currency field."""
+
+ typeCode = "Y"
+ length = 8
+ defaultValue = 0.0
+
+ def decodeValue(self, value):
+ """Return float number decoded from ``value``."""
+ return struct.unpack("<q", value)[0] / 10000.
+
+ def encodeValue(self, value):
+ """Return string containing encoded ``value``."""
+ return struct.pack("<q", round(value * 10000))
+
+class DbfLogicalFieldDef(DbfFieldDef):
+ """Definition of the logical field."""
+
+ typeCode = "L"
+ defaultValue = -1
+ length = 1
+
+ def decodeValue(self, value):
+ """Return True, False or -1 decoded from ``value``."""
+ # Note: value always is 1-char string
+ if value == "?":
+ return -1
+ if value in "NnFf ":
+ return False
+ if value in "YyTt":
+ return True
+ raise ValueError("[%s] Invalid logical value %r" % (self.name, value))
+
+ def encodeValue(self, value):
+ """Return a character from the "TF?" set.
+
+ Return:
+ Return value is "T" if ``value`` is True
+ "?" if value is -1 or False otherwise.
+
+ """
+ if value is True:
+ return "T"
+ if value == -1:
+ return "?"
+ return "F"
+
+
+class DbfMemoFieldDef(DbfFieldDef):
+ """Definition of the memo field.
+
+ Note: memos aren't currenly completely supported.
+
+ """
+
+ typeCode = "M"
+ defaultValue = " " * 10
+ length = 10
+
+ def decodeValue(self, value):
+ """Return int .dbt block number decoded from the string object."""
+ #return int(value)
+ raise NotImplementedError
+
+ def encodeValue(self, value):
+ """Return raw data string encoded from a ``value``.
+
+ Note: this is an internal method.
+
+ """
+ #return str(value)[:self.length].ljust(self.length)
+ raise NotImplementedError
+
+
+class DbfDateFieldDef(DbfFieldDef):
+ """Definition of the date field."""
+
+ typeCode = "D"
+ defaultValue = utils.classproperty(lambda cls: datetime.date.today())
+ # "yyyymmdd" gives us 8 characters
+ length = 8
+
+ def decodeValue(self, value):
+ """Return a ``datetime.date`` instance decoded from ``value``."""
+ if value.strip():
+ return utils.getDate(value)
+ else:
+ return None
+
+ def encodeValue(self, value):
+ """Return a string-encoded value.
+
+ ``value`` argument should be a value suitable for the
+ `utils.getDate` call.
+
+ Return:
+ Return value is a string in format "yyyymmdd".
+
+ """
+ if value:
+ return utils.getDate(value).strftime("%Y%m%d")
+ else:
+ return " " * self.length
+
+
+class DbfDateTimeFieldDef(DbfFieldDef):
+ """Definition of the timestamp field."""
+
+ # a difference between JDN (Julian Day Number)
+ # and GDN (Gregorian Day Number). note, that GDN < JDN
+ JDN_GDN_DIFF = 1721425
+ typeCode = "T"
+ defaultValue = utils.classproperty(lambda cls: datetime.datetime.now())
+ # two 32-bits integers representing JDN and amount of
+ # milliseconds respectively gives us 8 bytes.
+ # note, that values must be encoded in LE byteorder.
+ length = 8
+
+ def decodeValue(self, value):
+ """Return a `datetime.datetime` instance."""
+ assert len(value) == self.length
+ # LE byteorder
+ _jdn, _msecs = struct.unpack("<2I", value)
+ if _jdn >= 1:
+ _rv = datetime.datetime.fromordinal(_jdn - self.JDN_GDN_DIFF)
+ _rv += datetime.timedelta(0, _msecs / 1000.0)
+ else:
+ # empty date
+ _rv = None
+ return _rv
+
+ def encodeValue(self, value):
+ """Return a string-encoded ``value``."""
+ if value:
+ value = utils.getDateTime(value)
+ # LE byteorder
+ _rv = struct.pack("<2I", value.toordinal() + self.JDN_GDN_DIFF,
+ (value.hour * 3600 + value.minute * 60 + value.second) * 1000)
+ else:
+ _rv = "\0" * self.length
+ assert len(_rv) == self.length
+ return _rv
+
+
+_fieldsRegistry = {}
+
+def registerField(fieldCls):
+ """Register field definition class.
+
+ ``fieldCls`` should be subclass of the `DbfFieldDef`.
+
+ Use `lookupFor` to retrieve field definition class
+ by the type code.
+
+ """
+ assert fieldCls.typeCode is not None, "Type code isn't defined"
+ # XXX: use fieldCls.typeCode.upper()? in case of any decign
+ # don't forget to look to the same comment in ``lookupFor`` method
+ _fieldsRegistry[fieldCls.typeCode] = fieldCls
+
+
+def lookupFor(typeCode):
+ """Return field definition class for the given type code.
+
+ ``typeCode`` must be a single character. That type should be
+ previously registered.
+
+ Use `registerField` to register new field class.
+
+ Return:
+ Return value is a subclass of the `DbfFieldDef`.
+
+ """
+ # XXX: use typeCode.upper()? in case of any decign don't
+ # forget to look to the same comment in ``registerField``
+ return _fieldsRegistry[typeCode]
+
+## register generic types
+
+for (_name, _val) in globals().items():
+ if isinstance(_val, type) and issubclass(_val, DbfFieldDef) \
+ and (_name != "DbfFieldDef"):
+ __all__.append(_name)
+ registerField(_val)
+del _name, _val
+
+# vim: et sts=4 sw=4 :
diff --git a/tablib/packages/dbfpy/header.py b/tablib/packages/dbfpy/header.py
new file mode 100644
index 0000000..03a877c
--- /dev/null
+++ b/tablib/packages/dbfpy/header.py
@@ -0,0 +1,275 @@
+"""DBF header definition.
+
+TODO:
+ - handle encoding of the character fields
+ (encoding information stored in the DBF header)
+
+"""
+"""History (most recent first):
+16-sep-2010 [als] fromStream: fix century of the last update field
+11-feb-2007 [als] added .ignoreErrors
+10-feb-2007 [als] added __getitem__: return field definitions
+ by field name or field number (zero-based)
+04-jul-2006 [als] added export declaration
+15-dec-2005 [yc] created
+"""
+
+__version__ = "$Revision: 1.6 $"[11:-2]
+__date__ = "$Date: 2010/09/16 05:06:39 $"[7:-2]
+
+__all__ = ["DbfHeader"]
+
+try:
+ import cStringIO
+except ImportError:
+ # when we're in python3, we cStringIO has been replaced by io.StringIO
+ import io as cStringIO
+import datetime
+import struct
+import time
+
+from . import fields
+from . import utils
+
+
+class DbfHeader(object):
+ """Dbf header definition.
+
+ For more information about dbf header format visit
+ `http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_STRUCT`
+
+ Examples:
+ Create an empty dbf header and add some field definitions:
+ dbfh = DbfHeader()
+ dbfh.addField(("name", "C", 10))
+ dbfh.addField(("date", "D"))
+ dbfh.addField(DbfNumericFieldDef("price", 5, 2))
+ Create a dbf header with field definitions:
+ dbfh = DbfHeader([
+ ("name", "C", 10),
+ ("date", "D"),
+ DbfNumericFieldDef("price", 5, 2),
+ ])
+
+ """
+
+ __slots__ = ("signature", "fields", "lastUpdate", "recordLength",
+ "recordCount", "headerLength", "changed", "_ignore_errors")
+
+ ## instance construction and initialization methods
+
+ def __init__(self, fields=None, headerLength=0, recordLength=0,
+ recordCount=0, signature=0x03, lastUpdate=None, ignoreErrors=False,
+ ):
+ """Initialize instance.
+
+ Arguments:
+ fields:
+ a list of field definitions;
+ recordLength:
+ size of the records;
+ headerLength:
+ size of the header;
+ recordCount:
+ number of records stored in DBF;
+ signature:
+ version number (aka signature). using 0x03 as a default meaning
+ "File without DBT". for more information about this field visit
+ ``http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_NOTE_1_TARGET``
+ lastUpdate:
+ date of the DBF's update. this could be a string ('yymmdd' or
+ 'yyyymmdd'), timestamp (int or float), datetime/date value,
+ a sequence (assuming (yyyy, mm, dd, ...)) or an object having
+ callable ``ticks`` field.
+ ignoreErrors:
+ error processing mode for DBF fields (boolean)
+
+ """
+ self.signature = signature
+ if fields is None:
+ self.fields = []
+ else:
+ self.fields = list(fields)
+ self.lastUpdate = utils.getDate(lastUpdate)
+ self.recordLength = recordLength
+ self.headerLength = headerLength
+ self.recordCount = recordCount
+ self.ignoreErrors = ignoreErrors
+ # XXX: I'm not sure this is safe to
+ # initialize `self.changed` in this way
+ self.changed = bool(self.fields)
+
+ # @classmethod
+ def fromString(cls, string):
+ """Return header instance from the string object."""
+ return cls.fromStream(cStringIO.StringIO(str(string)))
+ fromString = classmethod(fromString)
+
+ # @classmethod
+ def fromStream(cls, stream):
+ """Return header object from the stream."""
+ stream.seek(0)
+ _data = stream.read(32)
+ (_cnt, _hdrLen, _recLen) = struct.unpack("<I2H", _data[4:12])
+ #reserved = _data[12:32]
+ _year = ord(_data[1])
+ if _year < 80:
+ # dBase II started at 1980. It is quite unlikely
+ # that actual last update date is before that year.
+ _year += 2000
+ else:
+ _year += 1900
+ ## create header object
+ _obj = cls(None, _hdrLen, _recLen, _cnt, ord(_data[0]),
+ (_year, ord(_data[2]), ord(_data[3])))
+ ## append field definitions
+ # position 0 is for the deletion flag
+ _pos = 1
+ _data = stream.read(1)
+
+ # The field definitions are ended either by \x0D OR a newline
+ # character, so we need to handle both when reading from a stream.
+ # When writing, dbfpy appears to write newlines instead of \x0D.
+ while _data[0] not in ["\x0D", "\n"]:
+ _data += stream.read(31)
+ _fld = fields.lookupFor(_data[11]).fromString(_data, _pos)
+ _obj._addField(_fld)
+ _pos = _fld.end
+ _data = stream.read(1)
+ return _obj
+ fromStream = classmethod(fromStream)
+
+ ## properties
+
+ year = property(lambda self: self.lastUpdate.year)
+ month = property(lambda self: self.lastUpdate.month)
+ day = property(lambda self: self.lastUpdate.day)
+
+ def ignoreErrors(self, value):
+ """Update `ignoreErrors` flag on self and all fields"""
+ self._ignore_errors = value = bool(value)
+ for _field in self.fields:
+ _field.ignoreErrors = value
+ ignoreErrors = property(
+ lambda self: self._ignore_errors,
+ ignoreErrors,
+ doc="""Error processing mode for DBF field value conversion
+
+ if set, failing field value conversion will return
+ ``INVALID_VALUE`` instead of raising conversion error.
+
+ """)
+
+ ## object representation
+
+ def __repr__(self):
+ _rv = """\
+Version (signature): 0x%02x
+ Last update: %s
+ Header length: %d
+ Record length: %d
+ Record count: %d
+ FieldName Type Len Dec
+""" % (self.signature, self.lastUpdate, self.headerLength,
+ self.recordLength, self.recordCount)
+ _rv += "\n".join(
+ ["%10s %4s %3s %3s" % _fld.fieldInfo() for _fld in self.fields]
+ )
+ return _rv
+
+ ## internal methods
+
+ def _addField(self, *defs):
+ """Internal variant of the `addField` method.
+
+ This method doesn't set `self.changed` field to True.
+
+ Return value is a length of the appended records.
+ Note: this method doesn't modify ``recordLength`` and
+ ``headerLength`` fields. Use `addField` instead of this
+ method if you don't exactly know what you're doing.
+
+ """
+ # insure we have dbf.DbfFieldDef instances first (instantiation
+ # from the tuple could raise an error, in such a case I don't
+ # wanna add any of the definitions -- all will be ignored)
+ _defs = []
+ _recordLength = 0
+ for _def in defs:
+ if isinstance(_def, fields.DbfFieldDef):
+ _obj = _def
+ else:
+ (_name, _type, _len, _dec) = (tuple(_def) + (None,) * 4)[:4]
+ _cls = fields.lookupFor(_type)
+ _obj = _cls(_name, _len, _dec,
+ ignoreErrors=self._ignore_errors)
+ _recordLength += _obj.length
+ _defs.append(_obj)
+ # and now extend field definitions and
+ # update record length
+ self.fields += _defs
+ return _recordLength
+
+ ## interface methods
+
+ def addField(self, *defs):
+ """Add field definition to the header.
+
+ Examples:
+ dbfh.addField(
+ ("name", "C", 20),
+ dbf.DbfCharacterFieldDef("surname", 20),
+ dbf.DbfDateFieldDef("birthdate"),
+ ("member", "L"),
+ )
+ dbfh.addField(("price", "N", 5, 2))
+ dbfh.addField(dbf.DbfNumericFieldDef("origprice", 5, 2))
+
+ """
+ _oldLen = self.recordLength
+ self.recordLength += self._addField(*defs)
+ if not _oldLen:
+ self.recordLength += 1
+ # XXX: may be just use:
+ # self.recordeLength += self._addField(*defs) + bool(not _oldLen)
+ # recalculate headerLength
+ self.headerLength = 32 + (32 * len(self.fields)) + 1
+ self.changed = True
+
+ def write(self, stream):
+ """Encode and write header to the stream."""
+ stream.seek(0)
+ stream.write(self.toString())
+ stream.write("".join([_fld.toString() for _fld in self.fields]))
+ stream.write(chr(0x0D)) # cr at end of all hdr data
+ self.changed = False
+
+ def toString(self):
+ """Returned 32 chars length string with encoded header."""
+ return struct.pack("<4BI2H",
+ self.signature,
+ self.year - 1900,
+ self.month,
+ self.day,
+ self.recordCount,
+ self.headerLength,
+ self.recordLength) + "\0" * 20
+
+ def setCurrentDate(self):
+ """Update ``self.lastUpdate`` field with current date value."""
+ self.lastUpdate = datetime.date.today()
+
+ def __getitem__(self, item):
+ """Return a field definition by numeric index or name string"""
+ if isinstance(item, basestring):
+ _name = item.upper()
+ for _field in self.fields:
+ if _field.name == _name:
+ return _field
+ else:
+ raise KeyError(item)
+ else:
+ # item must be field index
+ return self.fields[item]
+
+# vim: et sts=4 sw=4 :
diff --git a/tablib/packages/dbfpy/record.py b/tablib/packages/dbfpy/record.py
new file mode 100644
index 0000000..97bbfb3
--- /dev/null
+++ b/tablib/packages/dbfpy/record.py
@@ -0,0 +1,262 @@
+"""DBF record definition.
+
+"""
+"""History (most recent first):
+11-feb-2007 [als] __repr__: added special case for invalid field values
+10-feb-2007 [als] added .rawFromStream()
+30-oct-2006 [als] fix record length in .fromStream()
+04-jul-2006 [als] added export declaration
+20-dec-2005 [yc] DbfRecord.write() -> DbfRecord._write();
+ added delete() method.
+16-dec-2005 [yc] record definition moved from `dbf`.
+"""
+
+__version__ = "$Revision: 1.7 $"[11:-2]
+__date__ = "$Date: 2007/02/11 09:05:49 $"[7:-2]
+
+__all__ = ["DbfRecord"]
+
+from itertools import izip
+
+import utils
+
+class DbfRecord(object):
+ """DBF record.
+
+ Instances of this class shouldn't be created manualy,
+ use `dbf.Dbf.newRecord` instead.
+
+ Class implements mapping/sequence interface, so
+ fields could be accessed via their names or indexes
+ (names is a preffered way to access fields).
+
+ Hint:
+ Use `store` method to save modified record.
+
+ Examples:
+ Add new record to the database:
+ db = Dbf(filename)
+ rec = db.newRecord()
+ rec["FIELD1"] = value1
+ rec["FIELD2"] = value2
+ rec.store()
+ Or the same, but modify existed
+ (second in this case) record:
+ db = Dbf(filename)
+ rec = db[2]
+ rec["FIELD1"] = value1
+ rec["FIELD2"] = value2
+ rec.store()
+
+ """
+
+ __slots__ = "dbf", "index", "deleted", "fieldData"
+
+ ## creation and initialization
+
+ def __init__(self, dbf, index=None, deleted=False, data=None):
+ """Instance initialiation.
+
+ Arguments:
+ dbf:
+ A `Dbf.Dbf` instance this record belonogs to.
+ index:
+ An integer record index or None. If this value is
+ None, record will be appended to the DBF.
+ deleted:
+ Boolean flag indicating whether this record
+ is a deleted record.
+ data:
+ A sequence or None. This is a data of the fields.
+ If this argument is None, default values will be used.
+
+ """
+ self.dbf = dbf
+ # XXX: I'm not sure ``index`` is necessary
+ self.index = index
+ self.deleted = deleted
+ if data is None:
+ self.fieldData = [_fd.defaultValue for _fd in dbf.header.fields]
+ else:
+ self.fieldData = list(data)
+
+ # XXX: validate self.index before calculating position?
+ position = property(lambda self: self.dbf.header.headerLength + \
+ self.index * self.dbf.header.recordLength)
+
+ def rawFromStream(cls, dbf, index):
+ """Return raw record contents read from the stream.
+
+ Arguments:
+ dbf:
+ A `Dbf.Dbf` instance containing the record.
+ index:
+ Index of the record in the records' container.
+ This argument can't be None in this call.
+
+ Return value is a string containing record data in DBF format.
+
+ """
+ # XXX: may be write smth assuming, that current stream
+ # position is the required one? it could save some
+ # time required to calculate where to seek in the file
+ dbf.stream.seek(dbf.header.headerLength +
+ index * dbf.header.recordLength)
+ return dbf.stream.read(dbf.header.recordLength)
+ rawFromStream = classmethod(rawFromStream)
+
+ def fromStream(cls, dbf, index):
+ """Return a record read from the stream.
+
+ Arguments:
+ dbf:
+ A `Dbf.Dbf` instance new record should belong to.
+ index:
+ Index of the record in the records' container.
+ This argument can't be None in this call.
+
+ Return value is an instance of the current class.
+
+ """
+ return cls.fromString(dbf, cls.rawFromStream(dbf, index), index)
+ fromStream = classmethod(fromStream)
+
+ def fromString(cls, dbf, string, index=None):
+ """Return record read from the string object.
+
+ Arguments:
+ dbf:
+ A `Dbf.Dbf` instance new record should belong to.
+ string:
+ A string new record should be created from.
+ index:
+ Index of the record in the container. If this
+ argument is None, record will be appended.
+
+ Return value is an instance of the current class.
+
+ """
+ return cls(dbf, index, string[0]=="*",
+ [_fd.decodeFromRecord(string) for _fd in dbf.header.fields])
+ fromString = classmethod(fromString)
+
+ ## object representation
+
+ def __repr__(self):
+ _template = "%%%ds: %%s (%%s)" % max([len(_fld)
+ for _fld in self.dbf.fieldNames])
+ _rv = []
+ for _fld in self.dbf.fieldNames:
+ _val = self[_fld]
+ if _val is utils.INVALID_VALUE:
+ _rv.append(_template %
+ (_fld, "None", "value cannot be decoded"))
+ else:
+ _rv.append(_template % (_fld, _val, type(_val)))
+ return "\n".join(_rv)
+
+ ## protected methods
+
+ def _write(self):
+ """Write data to the dbf stream.
+
+ Note:
+ This isn't a public method, it's better to
+ use 'store' instead publically.
+ Be design ``_write`` method should be called
+ only from the `Dbf` instance.
+
+
+ """
+ self._validateIndex(False)
+ self.dbf.stream.seek(self.position)
+ self.dbf.stream.write(self.toString())
+ # FIXME: may be move this write somewhere else?
+ # why we should check this condition for each record?
+ if self.index == len(self.dbf):
+ # this is the last record,
+ # we should write SUB (ASCII 26)
+ self.dbf.stream.write("\x1A")
+
+ ## utility methods
+
+ def _validateIndex(self, allowUndefined=True, checkRange=False):
+ """Valid ``self.index`` value.
+
+ If ``allowUndefined`` argument is True functions does nothing
+ in case of ``self.index`` pointing to None object.
+
+ """
+ if self.index is None:
+ if not allowUndefined:
+ raise ValueError("Index is undefined")
+ elif self.index < 0:
+ raise ValueError("Index can't be negative (%s)" % self.index)
+ elif checkRange and self.index <= self.dbf.header.recordCount:
+ raise ValueError("There are only %d records in the DBF" %
+ self.dbf.header.recordCount)
+
+ ## interface methods
+
+ def store(self):
+ """Store current record in the DBF.
+
+ If ``self.index`` is None, this record will be appended to the
+ records of the DBF this records belongs to; or replaced otherwise.
+
+ """
+ self._validateIndex()
+ if self.index is None:
+ self.index = len(self.dbf)
+ self.dbf.append(self)
+ else:
+ self.dbf[self.index] = self
+
+ def delete(self):
+ """Mark method as deleted."""
+ self.deleted = True
+
+ def toString(self):
+ """Return string packed record values."""
+ return "".join([" *"[self.deleted]] + [
+ _def.encodeValue(_dat)
+ for (_def, _dat) in izip(self.dbf.header.fields, self.fieldData)
+ ])
+
+ def asList(self):
+ """Return a flat list of fields.
+
+ Note:
+ Change of the list's values won't change
+ real values stored in this object.
+
+ """
+ return self.fieldData[:]
+
+ def asDict(self):
+ """Return a dictionary of fields.
+
+ Note:
+ Change of the dicts's values won't change
+ real values stored in this object.
+
+ """
+ return dict([_i for _i in izip(self.dbf.fieldNames, self.fieldData)])
+
+ def __getitem__(self, key):
+ """Return value by field name or field index."""
+ if isinstance(key, (long, int)):
+ # integer index of the field
+ return self.fieldData[key]
+ # assuming string field name
+ return self.fieldData[self.dbf.indexOfFieldName(key)]
+
+ def __setitem__(self, key, value):
+ """Set field value by integer index of the field or string name."""
+ if isinstance(key, (int, long)):
+ # integer index of the field
+ return self.fieldData[key]
+ # assuming string field name
+ self.fieldData[self.dbf.indexOfFieldName(key)] = value
+
+# vim: et sts=4 sw=4 :
diff --git a/tablib/packages/dbfpy/utils.py b/tablib/packages/dbfpy/utils.py
new file mode 100644
index 0000000..cef8aa5
--- /dev/null
+++ b/tablib/packages/dbfpy/utils.py
@@ -0,0 +1,170 @@
+"""String utilities.
+
+TODO:
+ - allow strings in getDateTime routine;
+"""
+"""History (most recent first):
+11-feb-2007 [als] added INVALID_VALUE
+10-feb-2007 [als] allow date strings padded with spaces instead of zeroes
+20-dec-2005 [yc] handle long objects in getDate/getDateTime
+16-dec-2005 [yc] created from ``strutil`` module.
+"""
+
+__version__ = "$Revision: 1.4 $"[11:-2]
+__date__ = "$Date: 2007/02/11 08:57:17 $"[7:-2]
+
+import datetime
+import time
+
+
+def unzfill(str):
+ """Return a string without ASCII NULs.
+
+ This function searchers for the first NUL (ASCII 0) occurance
+ and truncates string till that position.
+
+ """
+ try:
+ return str[:str.index('\0')]
+ except ValueError:
+ return str
+
+
+def getDate(date=None):
+ """Return `datetime.date` instance.
+
+ Type of the ``date`` argument could be one of the following:
+ None:
+ use current date value;
+ datetime.date:
+ this value will be returned;
+ datetime.datetime:
+ the result of the date.date() will be returned;
+ string:
+ assuming "%Y%m%d" or "%y%m%dd" format;
+ number:
+ assuming it's a timestamp (returned for example
+ by the time.time() call;
+ sequence:
+ assuming (year, month, day, ...) sequence;
+
+ Additionaly, if ``date`` has callable ``ticks`` attribute,
+ it will be used and result of the called would be treated
+ as a timestamp value.
+
+ """
+ if date is None:
+ # use current value
+ return datetime.date.today()
+ if isinstance(date, datetime.date):
+ return date
+ if isinstance(date, datetime.datetime):
+ return date.date()
+ if isinstance(date, (int, long, float)):
+ # date is a timestamp
+ return datetime.date.fromtimestamp(date)
+ if isinstance(date, basestring):
+ date = date.replace(" ", "0")
+ if len(date) == 6:
+ # yymmdd
+ return datetime.date(*time.strptime(date, "%y%m%d")[:3])
+ # yyyymmdd
+ return datetime.date(*time.strptime(date, "%Y%m%d")[:3])
+ if hasattr(date, "__getitem__"):
+ # a sequence (assuming date/time tuple)
+ return datetime.date(*date[:3])
+ return datetime.date.fromtimestamp(date.ticks())
+
+
+def getDateTime(value=None):
+ """Return `datetime.datetime` instance.
+
+ Type of the ``value`` argument could be one of the following:
+ None:
+ use current date value;
+ datetime.date:
+ result will be converted to the `datetime.datetime` instance
+ using midnight;
+ datetime.datetime:
+ ``value`` will be returned as is;
+ string:
+ *** CURRENTLY NOT SUPPORTED ***;
+ number:
+ assuming it's a timestamp (returned for example
+ by the time.time() call;
+ sequence:
+ assuming (year, month, day, ...) sequence;
+
+ Additionaly, if ``value`` has callable ``ticks`` attribute,
+ it will be used and result of the called would be treated
+ as a timestamp value.
+
+ """
+ if value is None:
+ # use current value
+ return datetime.datetime.today()
+ if isinstance(value, datetime.datetime):
+ return value
+ if isinstance(value, datetime.date):
+ return datetime.datetime.fromordinal(value.toordinal())
+ if isinstance(value, (int, long, float)):
+ # value is a timestamp
+ return datetime.datetime.fromtimestamp(value)
+ if isinstance(value, basestring):
+ raise NotImplementedError("Strings aren't currently implemented")
+ if hasattr(value, "__getitem__"):
+ # a sequence (assuming date/time tuple)
+ return datetime.datetime(*tuple(value)[:6])
+ return datetime.datetime.fromtimestamp(value.ticks())
+
+
+class classproperty(property):
+ """Works in the same way as a ``property``, but for the classes."""
+
+ def __get__(self, obj, cls):
+ return self.fget(cls)
+
+
+class _InvalidValue(object):
+
+ """Value returned from DBF records when field validation fails
+
+ The value is not equal to anything except for itself
+ and equal to all empty values: None, 0, empty string etc.
+ In other words, invalid value is equal to None and not equal
+ to None at the same time.
+
+ This value yields zero upon explicit conversion to a number type,
+ empty string for string types, and False for boolean.
+
+ """
+
+ def __eq__(self, other):
+ return not other
+
+ def __ne__(self, other):
+ return not (other is self)
+
+ def __nonzero__(self):
+ return False
+
+ def __int__(self):
+ return 0
+ __long__ = __int__
+
+ def __float__(self):
+ return 0.0
+
+ def __str__(self):
+ return ""
+
+ def __unicode__(self):
+ return u""
+
+ def __repr__(self):
+ return "<INVALID>"
+
+# invalid value is a constant singleton
+INVALID_VALUE = _InvalidValue()
+
+# vim: set et sts=4 sw=4 :
diff --git a/tablib/packages/dbfpy3/__init__.py b/tablib/packages/dbfpy3/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/tablib/packages/dbfpy3/__init__.py
diff --git a/tablib/packages/dbfpy3/dbf.py b/tablib/packages/dbfpy3/dbf.py
new file mode 100644
index 0000000..42de8a4
--- /dev/null
+++ b/tablib/packages/dbfpy3/dbf.py
@@ -0,0 +1,293 @@
+#! /usr/bin/env python
+"""DBF accessing helpers.
+
+FIXME: more documentation needed
+
+Examples:
+
+ Create new table, setup structure, add records:
+
+ dbf = Dbf(filename, new=True)
+ dbf.addField(
+ ("NAME", "C", 15),
+ ("SURNAME", "C", 25),
+ ("INITIALS", "C", 10),
+ ("BIRTHDATE", "D"),
+ )
+ for (n, s, i, b) in (
+ ("John", "Miller", "YC", (1980, 10, 11)),
+ ("Andy", "Larkin", "", (1980, 4, 11)),
+ ):
+ rec = dbf.newRecord()
+ rec["NAME"] = n
+ rec["SURNAME"] = s
+ rec["INITIALS"] = i
+ rec["BIRTHDATE"] = b
+ rec.store()
+ dbf.close()
+
+ Open existed dbf, read some data:
+
+ dbf = Dbf(filename, True)
+ for rec in dbf:
+ for fldName in dbf.fieldNames:
+ print '%s:\t %s (%s)' % (fldName, rec[fldName],
+ type(rec[fldName]))
+ print
+ dbf.close()
+
+"""
+"""History (most recent first):
+11-feb-2007 [als] export INVALID_VALUE;
+ Dbf: added .ignoreErrors, .INVALID_VALUE
+04-jul-2006 [als] added export declaration
+20-dec-2005 [yc] removed fromStream and newDbf methods:
+ use argument of __init__ call must be used instead;
+ added class fields pointing to the header and
+ record classes.
+17-dec-2005 [yc] split to several modules; reimplemented
+13-dec-2005 [yc] adapted to the changes of the `strutil` module.
+13-sep-2002 [als] support FoxPro Timestamp datatype
+15-nov-1999 [jjk] documentation updates, add demo
+24-aug-1998 [jjk] add some encodeValue methods (not tested), other tweaks
+08-jun-1998 [jjk] fix problems, add more features
+20-feb-1998 [jjk] fix problems, add more features
+19-feb-1998 [jjk] add create/write capabilities
+18-feb-1998 [jjk] from dbfload.py
+"""
+
+__version__ = "$Revision: 1.7 $"[11:-2]
+__date__ = "$Date: 2007/02/11 09:23:13 $"[7:-2]
+__author__ = "Jeff Kunce <kuncej@mail.conservation.state.mo.us>"
+
+__all__ = ["Dbf"]
+
+from . import header
+from . import record
+from .utils import INVALID_VALUE
+
+class Dbf(object):
+ """DBF accessor.
+
+ FIXME:
+ docs and examples needed (dont' forget to tell
+ about problems adding new fields on the fly)
+
+ Implementation notes:
+ ``_new`` field is used to indicate whether this is
+ a new data table. `addField` could be used only for
+ the new tables! If at least one record was appended
+ to the table it's structure couldn't be changed.
+
+ """
+
+ __slots__ = ("name", "header", "stream",
+ "_changed", "_new", "_ignore_errors")
+
+ HeaderClass = header.DbfHeader
+ RecordClass = record.DbfRecord
+ INVALID_VALUE = INVALID_VALUE
+
+ ## initialization and creation helpers
+
+ def __init__(self, f, readOnly=False, new=False, ignoreErrors=False):
+ """Initialize instance.
+
+ Arguments:
+ f:
+ Filename or file-like object.
+ new:
+ True if new data table must be created. Assume
+ data table exists if this argument is False.
+ readOnly:
+ if ``f`` argument is a string file will
+ be opend in read-only mode; in other cases
+ this argument is ignored. This argument is ignored
+ even if ``new`` argument is True.
+ headerObj:
+ `header.DbfHeader` instance or None. If this argument
+ is None, new empty header will be used with the
+ all fields set by default.
+ ignoreErrors:
+ if set, failing field value conversion will return
+ ``INVALID_VALUE`` instead of raising conversion error.
+
+ """
+ if isinstance(f, str):
+ # a filename
+ self.name = f
+ if new:
+ # new table (table file must be
+ # created or opened and truncated)
+ self.stream = open(f, "w+b")
+ else:
+ # tabe file must exist
+ self.stream = open(f, ("r+b", "rb")[bool(readOnly)])
+ else:
+ # a stream
+ self.name = getattr(f, "name", "")
+ self.stream = f
+ if new:
+ # if this is a new table, header will be empty
+ self.header = self.HeaderClass()
+ else:
+ # or instantiated using stream
+ self.header = self.HeaderClass.fromStream(self.stream)
+ self.ignoreErrors = ignoreErrors
+ self._new = bool(new)
+ self._changed = False
+
+ ## properties
+
+ closed = property(lambda self: self.stream.closed)
+ recordCount = property(lambda self: self.header.recordCount)
+ fieldNames = property(
+ lambda self: [_fld.name for _fld in self.header.fields])
+ fieldDefs = property(lambda self: self.header.fields)
+ changed = property(lambda self: self._changed or self.header.changed)
+
+ def ignoreErrors(self, value):
+ """Update `ignoreErrors` flag on the header object and self"""
+ self.header.ignoreErrors = self._ignore_errors = bool(value)
+ ignoreErrors = property(
+ lambda self: self._ignore_errors,
+ ignoreErrors,
+ doc="""Error processing mode for DBF field value conversion
+
+ if set, failing field value conversion will return
+ ``INVALID_VALUE`` instead of raising conversion error.
+
+ """)
+
+ ## protected methods
+
+ def _fixIndex(self, index):
+ """Return fixed index.
+
+ This method fails if index isn't a numeric object
+ (long or int). Or index isn't in a valid range
+ (less or equal to the number of records in the db).
+
+ If ``index`` is a negative number, it will be
+ treated as a negative indexes for list objects.
+
+ Return:
+ Return value is numeric object maning valid index.
+
+ """
+ if not isinstance(index, int):
+ raise TypeError("Index must be a numeric object")
+ if index < 0:
+ # index from the right side
+ # fix it to the left-side index
+ index += len(self) + 1
+ if index >= len(self):
+ raise IndexError("Record index out of range")
+ return index
+
+ ## iterface methods
+
+ def close(self):
+ self.flush()
+ self.stream.close()
+
+ def flush(self):
+ """Flush data to the associated stream."""
+ if self.changed:
+ self.header.setCurrentDate()
+ self.header.write(self.stream)
+ self.stream.flush()
+ self._changed = False
+
+ def indexOfFieldName(self, name):
+ """Index of field named ``name``."""
+ # FIXME: move this to header class
+ names = [f.name for f in self.header.fields]
+ return names.index(name.upper())
+
+ def newRecord(self):
+ """Return new record, which belong to this table."""
+ return self.RecordClass(self)
+
+ def append(self, record):
+ """Append ``record`` to the database."""
+ record.index = self.header.recordCount
+ record._write()
+ self.header.recordCount += 1
+ self._changed = True
+ self._new = False
+
+ def addField(self, *defs):
+ """Add field definitions.
+
+ For more information see `header.DbfHeader.addField`.
+
+ """
+ if self._new:
+ self.header.addField(*defs)
+ else:
+ raise TypeError("At least one record was added, "
+ "structure can't be changed")
+
+ ## 'magic' methods (representation and sequence interface)
+
+ def __repr__(self):
+ return "Dbf stream '%s'\n" % self.stream + repr(self.header)
+
+ def __len__(self):
+ """Return number of records."""
+ return self.recordCount
+
+ def __getitem__(self, index):
+ """Return `DbfRecord` instance."""
+ return self.RecordClass.fromStream(self, self._fixIndex(index))
+
+ def __setitem__(self, index, record):
+ """Write `DbfRecord` instance to the stream."""
+ record.index = self._fixIndex(index)
+ record._write()
+ self._changed = True
+ self._new = False
+
+ #def __del__(self):
+ # """Flush stream upon deletion of the object."""
+ # self.flush()
+
+
+def demoRead(filename):
+ _dbf = Dbf(filename, True)
+ for _rec in _dbf:
+ print()
+ print(repr(_rec))
+ _dbf.close()
+
+def demoCreate(filename):
+ _dbf = Dbf(filename, new=True)
+ _dbf.addField(
+ ("NAME", "C", 15),
+ ("SURNAME", "C", 25),
+ ("INITIALS", "C", 10),
+ ("BIRTHDATE", "D"),
+ )
+ for (_n, _s, _i, _b) in (
+ ("John", "Miller", "YC", (1981, 1, 2)),
+ ("Andy", "Larkin", "AL", (1982, 3, 4)),
+ ("Bill", "Clinth", "", (1983, 5, 6)),
+ ("Bobb", "McNail", "", (1984, 7, 8)),
+ ):
+ _rec = _dbf.newRecord()
+ _rec["NAME"] = _n
+ _rec["SURNAME"] = _s
+ _rec["INITIALS"] = _i
+ _rec["BIRTHDATE"] = _b
+ _rec.store()
+ print(repr(_dbf))
+ _dbf.close()
+
+if (__name__=='__main__'):
+ import sys
+ _name = len(sys.argv) > 1 and sys.argv[1] or "county.dbf"
+ demoCreate(_name)
+ demoRead(_name)
+
+# vim: set et sw=4 sts=4 :
diff --git a/tablib/packages/dbfpy3/dbfnew.py b/tablib/packages/dbfpy3/dbfnew.py
new file mode 100644
index 0000000..4051bc6
--- /dev/null
+++ b/tablib/packages/dbfpy3/dbfnew.py
@@ -0,0 +1,182 @@
+#!/usr/bin/python
+""".DBF creation helpers.
+
+Note: this is a legacy interface. New code should use Dbf class
+ for table creation (see examples in dbf.py)
+
+TODO:
+ - handle Memo fields.
+ - check length of the fields accoring to the
+ `http://www.clicketyclick.dk/databases/xbase/format/data_types.html`
+
+"""
+"""History (most recent first)
+04-jul-2006 [als] added export declaration;
+ updated for dbfpy 2.0
+15-dec-2005 [yc] define dbf_new.__slots__
+14-dec-2005 [yc] added vim modeline; retab'd; added doc-strings;
+ dbf_new now is a new class (inherited from object)
+??-jun-2000 [--] added by Hans Fiby
+"""
+
+__version__ = "$Revision: 1.4 $"[11:-2]
+__date__ = "$Date: 2006/07/04 08:18:18 $"[7:-2]
+
+__all__ = ["dbf_new"]
+
+from .dbf import *
+from .fields import *
+from .header import *
+from .record import *
+
+class _FieldDefinition(object):
+ """Field definition.
+
+ This is a simple structure, which contains ``name``, ``type``,
+ ``len``, ``dec`` and ``cls`` fields.
+
+ Objects also implement get/setitem magic functions, so fields
+ could be accessed via sequence iterface, where 'name' has
+ index 0, 'type' index 1, 'len' index 2, 'dec' index 3 and
+ 'cls' could be located at index 4.
+
+ """
+
+ __slots__ = "name", "type", "len", "dec", "cls"
+
+ # WARNING: be attentive - dictionaries are mutable!
+ FLD_TYPES = {
+ # type: (cls, len)
+ "C": (DbfCharacterFieldDef, None),
+ "N": (DbfNumericFieldDef, None),
+ "L": (DbfLogicalFieldDef, 1),
+ # FIXME: support memos
+ # "M": (DbfMemoFieldDef),
+ "D": (DbfDateFieldDef, 8),
+ # FIXME: I'm not sure length should be 14 characters!
+ # but temporary I use it, cuz date is 8 characters
+ # and time 6 (hhmmss)
+ "T": (DbfDateTimeFieldDef, 14),
+ }
+
+ def __init__(self, name, type, len=None, dec=0):
+ _cls, _len = self.FLD_TYPES[type]
+ if _len is None:
+ if len is None:
+ raise ValueError("Field length must be defined")
+ _len = len
+ self.name = name
+ self.type = type
+ self.len = _len
+ self.dec = dec
+ self.cls = _cls
+
+ def getDbfField(self):
+ "Return `DbfFieldDef` instance from the current definition."
+ return self.cls(self.name, self.len, self.dec)
+
+ def appendToHeader(self, dbfh):
+ """Create a `DbfFieldDef` instance and append it to the dbf header.
+
+ Arguments:
+ dbfh: `DbfHeader` instance.
+
+ """
+ _dbff = self.getDbfField()
+ dbfh.addField(_dbff)
+
+
+class dbf_new(object):
+ """New .DBF creation helper.
+
+ Example Usage:
+
+ dbfn = dbf_new()
+ dbfn.add_field("name",'C',80)
+ dbfn.add_field("price",'N',10,2)
+ dbfn.add_field("date",'D',8)
+ dbfn.write("tst.dbf")
+
+ Note:
+ This module cannot handle Memo-fields,
+ they are special.
+
+ """
+
+ __slots__ = ("fields",)
+
+ FieldDefinitionClass = _FieldDefinition
+
+ def __init__(self):
+ self.fields = []
+
+ def add_field(self, name, typ, len, dec=0):
+ """Add field definition.
+
+ Arguments:
+ name:
+ field name (str object). field name must not
+ contain ASCII NULs and it's length shouldn't
+ exceed 10 characters.
+ typ:
+ type of the field. this must be a single character
+ from the "CNLMDT" set meaning character, numeric,
+ logical, memo, date and date/time respectively.
+ len:
+ length of the field. this argument is used only for
+ the character and numeric fields. all other fields
+ have fixed length.
+ FIXME: use None as a default for this argument?
+ dec:
+ decimal precision. used only for the numric fields.
+
+ """
+ self.fields.append(self.FieldDefinitionClass(name, typ, len, dec))
+
+ def write(self, filename):
+ """Create empty .DBF file using current structure."""
+ _dbfh = DbfHeader()
+ _dbfh.setCurrentDate()
+ for _fldDef in self.fields:
+ _fldDef.appendToHeader(_dbfh)
+
+ _dbfStream = open(filename, "wb")
+ _dbfh.write(_dbfStream)
+ _dbfStream.close()
+
+
+if (__name__=='__main__'):
+ # create a new DBF-File
+ dbfn=dbf_new()
+ dbfn.add_field("name",'C',80)
+ dbfn.add_field("price",'N',10,2)
+ dbfn.add_field("date",'D',8)
+ dbfn.write("tst.dbf")
+ # test new dbf
+ print("*** created tst.dbf: ***")
+ dbft = Dbf('tst.dbf', readOnly=0)
+ print(repr(dbft))
+ # add a record
+ rec=DbfRecord(dbft)
+ rec['name']='something'
+ rec['price']=10.5
+ rec['date']=(2000,1,12)
+ rec.store()
+ # add another record
+ rec=DbfRecord(dbft)
+ rec['name']='foo and bar'
+ rec['price']=12234
+ rec['date']=(1992,7,15)
+ rec.store()
+
+ # show the records
+ print("*** inserted 2 records into tst.dbf: ***")
+ print(repr(dbft))
+ for i1 in range(len(dbft)):
+ rec = dbft[i1]
+ for fldName in dbft.fieldNames:
+ print('%s:\t %s'%(fldName, rec[fldName]))
+ print()
+ dbft.close()
+
+# vim: set et sts=4 sw=4 :
diff --git a/tablib/packages/dbfpy3/fields.py b/tablib/packages/dbfpy3/fields.py
new file mode 100644
index 0000000..883d035
--- /dev/null
+++ b/tablib/packages/dbfpy3/fields.py
@@ -0,0 +1,467 @@
+"""DBF fields definitions.
+
+TODO:
+ - make memos work
+"""
+"""History (most recent first):
+26-may-2009 [als] DbfNumericFieldDef.decodeValue: strip zero bytes
+05-feb-2009 [als] DbfDateFieldDef.encodeValue: empty arg produces empty date
+16-sep-2008 [als] DbfNumericFieldDef decoding looks for decimal point
+ in the value to select float or integer return type
+13-mar-2008 [als] check field name length in constructor
+11-feb-2007 [als] handle value conversion errors
+10-feb-2007 [als] DbfFieldDef: added .rawFromRecord()
+01-dec-2006 [als] Timestamp columns use None for empty values
+31-oct-2006 [als] support field types 'F' (float), 'I' (integer)
+ and 'Y' (currency);
+ automate export and registration of field classes
+04-jul-2006 [als] added export declaration
+10-mar-2006 [als] decode empty values for Date and Logical fields;
+ show field name in errors
+10-mar-2006 [als] fix Numeric value decoding: according to spec,
+ value always is string representation of the number;
+ ensure that encoded Numeric value fits into the field
+20-dec-2005 [yc] use field names in upper case
+15-dec-2005 [yc] field definitions moved from `dbf`.
+"""
+
+__version__ = "$Revision: 1.14 $"[11:-2]
+__date__ = "$Date: 2009/05/26 05:16:51 $"[7:-2]
+
+__all__ = ["lookupFor",] # field classes added at the end of the module
+
+import datetime
+import struct
+import sys
+
+from . import utils
+
+## abstract definitions
+
+class DbfFieldDef(object):
+ """Abstract field definition.
+
+ Child classes must override ``type`` class attribute to provide datatype
+ infromation of the field definition. For more info about types visit
+ `http://www.clicketyclick.dk/databases/xbase/format/data_types.html`
+
+ Also child classes must override ``defaultValue`` field to provide
+ default value for the field value.
+
+ If child class has fixed length ``length`` class attribute must be
+ overriden and set to the valid value. None value means, that field
+ isn't of fixed length.
+
+ Note: ``name`` field must not be changed after instantiation.
+
+ """
+
+
+ __slots__ = ("name", "decimalCount",
+ "start", "end", "ignoreErrors")
+
+ # length of the field, None in case of variable-length field,
+ # or a number if this field is a fixed-length field
+ length = None
+
+ # field type. for more information about fields types visit
+ # `http://www.clicketyclick.dk/databases/xbase/format/data_types.html`
+ # must be overriden in child classes
+ typeCode = None
+
+ # default value for the field. this field must be
+ # overriden in child classes
+ defaultValue = None
+
+ def __init__(self, name, length=None, decimalCount=None,
+ start=None, stop=None, ignoreErrors=False,
+ ):
+ """Initialize instance."""
+ assert self.typeCode is not None, "Type code must be overriden"
+ assert self.defaultValue is not None, "Default value must be overriden"
+ ## fix arguments
+ if len(name) >10:
+ raise ValueError("Field name \"%s\" is too long" % name)
+ name = str(name).upper()
+ if self.__class__.length is None:
+ if length is None:
+ raise ValueError("[%s] Length isn't specified" % name)
+ length = int(length)
+ if length <= 0:
+ raise ValueError("[%s] Length must be a positive integer"
+ % name)
+ else:
+ length = self.length
+ if decimalCount is None:
+ decimalCount = 0
+ ## set fields
+ self.name = name
+ # FIXME: validate length according to the specification at
+ # http://www.clicketyclick.dk/databases/xbase/format/data_types.html
+ self.length = length
+ self.decimalCount = decimalCount
+ self.ignoreErrors = ignoreErrors
+ self.start = start
+ self.end = stop
+
+ def __cmp__(self, other):
+ return cmp(self.name, str(other).upper())
+
+ def __hash__(self):
+ return hash(self.name)
+
+ def fromString(cls, string, start, ignoreErrors=False):
+ """Decode dbf field definition from the string data.
+
+ Arguments:
+ string:
+ a string, dbf definition is decoded from. length of
+ the string must be 32 bytes.
+ start:
+ position in the database file.
+ ignoreErrors:
+ initial error processing mode for the new field (boolean)
+
+ """
+ assert len(string) == 32
+ _length = string[16]
+ return cls(utils.unzfill(string)[:11].decode('utf-8'), _length,
+ string[17], start, start + _length, ignoreErrors=ignoreErrors)
+ fromString = classmethod(fromString)
+
+ def toString(self):
+ """Return encoded field definition.
+
+ Return:
+ Return value is a string object containing encoded
+ definition of this field.
+
+ """
+ if sys.version_info < (2, 4):
+ # earlier versions did not support padding character
+ _name = self.name[:11] + "\0" * (11 - len(self.name))
+ else:
+ _name = self.name.ljust(11, '\0')
+ return (
+ _name +
+ self.typeCode +
+ #data address
+ chr(0) * 4 +
+ chr(self.length) +
+ chr(self.decimalCount) +
+ chr(0) * 14
+ )
+
+ def __repr__(self):
+ return "%-10s %1s %3d %3d" % self.fieldInfo()
+
+ def fieldInfo(self):
+ """Return field information.
+
+ Return:
+ Return value is a (name, type, length, decimals) tuple.
+
+ """
+ return (self.name, self.typeCode, self.length, self.decimalCount)
+
+ def rawFromRecord(self, record):
+ """Return a "raw" field value from the record string."""
+ return record[self.start:self.end]
+
+ def decodeFromRecord(self, record):
+ """Return decoded field value from the record string."""
+ try:
+ return self.decodeValue(self.rawFromRecord(record))
+ except:
+ if self.ignoreErrors:
+ return utils.INVALID_VALUE
+ else:
+ raise
+
+ def decodeValue(self, value):
+ """Return decoded value from string value.
+
+ This method shouldn't be used publicly. It's called from the
+ `decodeFromRecord` method.
+
+ This is an abstract method and it must be overridden in child classes.
+ """
+ raise NotImplementedError
+
+ def encodeValue(self, value):
+ """Return str object containing encoded field value.
+
+ This is an abstract method and it must be overriden in child classes.
+ """
+ raise NotImplementedError
+
+## real classes
+
+class DbfCharacterFieldDef(DbfFieldDef):
+ """Definition of the character field."""
+
+ typeCode = "C"
+ defaultValue = b''
+
+ def decodeValue(self, value):
+ """Return string object.
+
+ Return value is a ``value`` argument with stripped right spaces.
+
+ """
+ return value.rstrip(b' ').decode('utf-8')
+
+ def encodeValue(self, value):
+ """Return raw data string encoded from a ``value``."""
+ return str(value)[:self.length].ljust(self.length)
+
+
+class DbfNumericFieldDef(DbfFieldDef):
+ """Definition of the numeric field."""
+
+ typeCode = "N"
+ # XXX: now I'm not sure it was a good idea to make a class field
+ # `defaultValue` instead of a generic method as it was implemented
+ # previously -- it's ok with all types except number, cuz
+ # if self.decimalCount is 0, we should return 0 and 0.0 otherwise.
+ defaultValue = 0
+
+ def decodeValue(self, value):
+ """Return a number decoded from ``value``.
+
+ If decimals is zero, value will be decoded as an integer;
+ or as a float otherwise.
+
+ Return:
+ Return value is a int (long) or float instance.
+
+ """
+ value = value.strip(b' \0')
+ if b'.' in value:
+ # a float (has decimal separator)
+ return float(value)
+ elif value:
+ # must be an integer
+ return int(value)
+ else:
+ return 0
+
+ def encodeValue(self, value):
+ """Return string containing encoded ``value``."""
+ _rv = ("%*.*f" % (self.length, self.decimalCount, value))
+ if len(_rv) > self.length:
+ _ppos = _rv.find(".")
+ if 0 <= _ppos <= self.length:
+ _rv = _rv[:self.length]
+ else:
+ raise ValueError("[%s] Numeric overflow: %s (field width: %i)"
+ % (self.name, _rv, self.length))
+ return _rv
+
+class DbfFloatFieldDef(DbfNumericFieldDef):
+ """Definition of the float field - same as numeric."""
+
+ typeCode = "F"
+
+class DbfIntegerFieldDef(DbfFieldDef):
+ """Definition of the integer field."""
+
+ typeCode = "I"
+ length = 4
+ defaultValue = 0
+
+ def decodeValue(self, value):
+ """Return an integer number decoded from ``value``."""
+ return struct.unpack("<i", value)[0]
+
+ def encodeValue(self, value):
+ """Return string containing encoded ``value``."""
+ return struct.pack("<i", int(value))
+
+class DbfCurrencyFieldDef(DbfFieldDef):
+ """Definition of the currency field."""
+
+ typeCode = "Y"
+ length = 8
+ defaultValue = 0.0
+
+ def decodeValue(self, value):
+ """Return float number decoded from ``value``."""
+ return struct.unpack("<q", value)[0] / 10000.
+
+ def encodeValue(self, value):
+ """Return string containing encoded ``value``."""
+ return struct.pack("<q", round(value * 10000))
+
+class DbfLogicalFieldDef(DbfFieldDef):
+ """Definition of the logical field."""
+
+ typeCode = "L"
+ defaultValue = -1
+ length = 1
+
+ def decodeValue(self, value):
+ """Return True, False or -1 decoded from ``value``."""
+ # Note: value always is 1-char string
+ if value == "?":
+ return -1
+ if value in "NnFf ":
+ return False
+ if value in "YyTt":
+ return True
+ raise ValueError("[%s] Invalid logical value %r" % (self.name, value))
+
+ def encodeValue(self, value):
+ """Return a character from the "TF?" set.
+
+ Return:
+ Return value is "T" if ``value`` is True
+ "?" if value is -1 or False otherwise.
+
+ """
+ if value is True:
+ return "T"
+ if value == -1:
+ return "?"
+ return "F"
+
+
+class DbfMemoFieldDef(DbfFieldDef):
+ """Definition of the memo field.
+
+ Note: memos aren't currenly completely supported.
+
+ """
+
+ typeCode = "M"
+ defaultValue = " " * 10
+ length = 10
+
+ def decodeValue(self, value):
+ """Return int .dbt block number decoded from the string object."""
+ #return int(value)
+ raise NotImplementedError
+
+ def encodeValue(self, value):
+ """Return raw data string encoded from a ``value``.
+
+ Note: this is an internal method.
+
+ """
+ #return str(value)[:self.length].ljust(self.length)
+ raise NotImplementedError
+
+
+class DbfDateFieldDef(DbfFieldDef):
+ """Definition of the date field."""
+
+ typeCode = "D"
+ defaultValue = utils.classproperty(lambda cls: datetime.date.today())
+ # "yyyymmdd" gives us 8 characters
+ length = 8
+
+ def decodeValue(self, value):
+ """Return a ``datetime.date`` instance decoded from ``value``."""
+ if value.strip():
+ return utils.getDate(value)
+ else:
+ return None
+
+ def encodeValue(self, value):
+ """Return a string-encoded value.
+
+ ``value`` argument should be a value suitable for the
+ `utils.getDate` call.
+
+ Return:
+ Return value is a string in format "yyyymmdd".
+
+ """
+ if value:
+ return utils.getDate(value).strftime("%Y%m%d")
+ else:
+ return " " * self.length
+
+
+class DbfDateTimeFieldDef(DbfFieldDef):
+ """Definition of the timestamp field."""
+
+ # a difference between JDN (Julian Day Number)
+ # and GDN (Gregorian Day Number). note, that GDN < JDN
+ JDN_GDN_DIFF = 1721425
+ typeCode = "T"
+ defaultValue = utils.classproperty(lambda cls: datetime.datetime.now())
+ # two 32-bits integers representing JDN and amount of
+ # milliseconds respectively gives us 8 bytes.
+ # note, that values must be encoded in LE byteorder.
+ length = 8
+
+ def decodeValue(self, value):
+ """Return a `datetime.datetime` instance."""
+ assert len(value) == self.length
+ # LE byteorder
+ _jdn, _msecs = struct.unpack("<2I", value)
+ if _jdn >= 1:
+ _rv = datetime.datetime.fromordinal(_jdn - self.JDN_GDN_DIFF)
+ _rv += datetime.timedelta(0, _msecs / 1000.0)
+ else:
+ # empty date
+ _rv = None
+ return _rv
+
+ def encodeValue(self, value):
+ """Return a string-encoded ``value``."""
+ if value:
+ value = utils.getDateTime(value)
+ # LE byteorder
+ _rv = struct.pack("<2I", value.toordinal() + self.JDN_GDN_DIFF,
+ (value.hour * 3600 + value.minute * 60 + value.second) * 1000)
+ else:
+ _rv = "\0" * self.length
+ assert len(_rv) == self.length
+ return _rv
+
+
+_fieldsRegistry = {}
+
+def registerField(fieldCls):
+ """Register field definition class.
+
+ ``fieldCls`` should be subclass of the `DbfFieldDef`.
+
+ Use `lookupFor` to retrieve field definition class
+ by the type code.
+
+ """
+ assert fieldCls.typeCode is not None, "Type code isn't defined"
+ # XXX: use fieldCls.typeCode.upper()? in case of any decign
+ # don't forget to look to the same comment in ``lookupFor`` method
+ _fieldsRegistry[fieldCls.typeCode] = fieldCls
+
+
+def lookupFor(typeCode):
+ """Return field definition class for the given type code.
+
+ ``typeCode`` must be a single character. That type should be
+ previously registered.
+
+ Use `registerField` to register new field class.
+
+ Return:
+ Return value is a subclass of the `DbfFieldDef`.
+
+ """
+ # XXX: use typeCode.upper()? in case of any decign don't
+ # forget to look to the same comment in ``registerField``
+ return _fieldsRegistry[chr(typeCode)]
+
+## register generic types
+
+for (_name, _val) in list(globals().items()):
+ if isinstance(_val, type) and issubclass(_val, DbfFieldDef) \
+ and (_name != "DbfFieldDef"):
+ __all__.append(_name)
+ registerField(_val)
+del _name, _val
+
+# vim: et sts=4 sw=4 :
diff --git a/tablib/packages/dbfpy3/header.py b/tablib/packages/dbfpy3/header.py
new file mode 100644
index 0000000..6c0dc4f
--- /dev/null
+++ b/tablib/packages/dbfpy3/header.py
@@ -0,0 +1,273 @@
+"""DBF header definition.
+
+TODO:
+ - handle encoding of the character fields
+ (encoding information stored in the DBF header)
+
+"""
+"""History (most recent first):
+16-sep-2010 [als] fromStream: fix century of the last update field
+11-feb-2007 [als] added .ignoreErrors
+10-feb-2007 [als] added __getitem__: return field definitions
+ by field name or field number (zero-based)
+04-jul-2006 [als] added export declaration
+15-dec-2005 [yc] created
+"""
+
+__version__ = "$Revision: 1.6 $"[11:-2]
+__date__ = "$Date: 2010/09/16 05:06:39 $"[7:-2]
+
+__all__ = ["DbfHeader"]
+
+import io
+import datetime
+import struct
+import time
+import sys
+
+from . import fields
+from .utils import getDate
+
+
+class DbfHeader(object):
+ """Dbf header definition.
+
+ For more information about dbf header format visit
+ `http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_STRUCT`
+
+ Examples:
+ Create an empty dbf header and add some field definitions:
+ dbfh = DbfHeader()
+ dbfh.addField(("name", "C", 10))
+ dbfh.addField(("date", "D"))
+ dbfh.addField(DbfNumericFieldDef("price", 5, 2))
+ Create a dbf header with field definitions:
+ dbfh = DbfHeader([
+ ("name", "C", 10),
+ ("date", "D"),
+ DbfNumericFieldDef("price", 5, 2),
+ ])
+
+ """
+
+ __slots__ = ("signature", "fields", "lastUpdate", "recordLength",
+ "recordCount", "headerLength", "changed", "_ignore_errors")
+
+ ## instance construction and initialization methods
+
+ def __init__(self, fields=None, headerLength=0, recordLength=0,
+ recordCount=0, signature=0x03, lastUpdate=None, ignoreErrors=False,
+ ):
+ """Initialize instance.
+
+ Arguments:
+ fields:
+ a list of field definitions;
+ recordLength:
+ size of the records;
+ headerLength:
+ size of the header;
+ recordCount:
+ number of records stored in DBF;
+ signature:
+ version number (aka signature). using 0x03 as a default meaning
+ "File without DBT". for more information about this field visit
+ ``http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_NOTE_1_TARGET``
+ lastUpdate:
+ date of the DBF's update. this could be a string ('yymmdd' or
+ 'yyyymmdd'), timestamp (int or float), datetime/date value,
+ a sequence (assuming (yyyy, mm, dd, ...)) or an object having
+ callable ``ticks`` field.
+ ignoreErrors:
+ error processing mode for DBF fields (boolean)
+
+ """
+ self.signature = signature
+ if fields is None:
+ self.fields = []
+ else:
+ self.fields = list(fields)
+ self.lastUpdate = getDate(lastUpdate)
+ self.recordLength = recordLength
+ self.headerLength = headerLength
+ self.recordCount = recordCount
+ self.ignoreErrors = ignoreErrors
+ # XXX: I'm not sure this is safe to
+ # initialize `self.changed` in this way
+ self.changed = bool(self.fields)
+
+ # @classmethod
+ def fromString(cls, string):
+ """Return header instance from the string object."""
+ return cls.fromStream(io.StringIO(str(string)))
+ fromString = classmethod(fromString)
+
+ # @classmethod
+ def fromStream(cls, stream):
+ """Return header object from the stream."""
+ stream.seek(0)
+ first_32 = stream.read(32)
+ if type(first_32) != bytes:
+ _data = bytes(first_32, sys.getfilesystemencoding())
+ _data = first_32
+ (_cnt, _hdrLen, _recLen) = struct.unpack("<I2H", _data[4:12])
+ #reserved = _data[12:32]
+ _year = _data[1]
+ if _year < 80:
+ # dBase II started at 1980. It is quite unlikely
+ # that actual last update date is before that year.
+ _year += 2000
+ else:
+ _year += 1900
+ ## create header object
+ _obj = cls(None, _hdrLen, _recLen, _cnt, _data[0],
+ (_year, _data[2], _data[3]))
+ ## append field definitions
+ # position 0 is for the deletion flag
+ _pos = 1
+ _data = stream.read(1)
+ while _data != b'\r':
+ _data += stream.read(31)
+ _fld = fields.lookupFor(_data[11]).fromString(_data, _pos)
+ _obj._addField(_fld)
+ _pos = _fld.end
+ _data = stream.read(1)
+ return _obj
+ fromStream = classmethod(fromStream)
+
+ ## properties
+
+ year = property(lambda self: self.lastUpdate.year)
+ month = property(lambda self: self.lastUpdate.month)
+ day = property(lambda self: self.lastUpdate.day)
+
+ def ignoreErrors(self, value):
+ """Update `ignoreErrors` flag on self and all fields"""
+ self._ignore_errors = value = bool(value)
+ for _field in self.fields:
+ _field.ignoreErrors = value
+ ignoreErrors = property(
+ lambda self: self._ignore_errors,
+ ignoreErrors,
+ doc="""Error processing mode for DBF field value conversion
+
+ if set, failing field value conversion will return
+ ``INVALID_VALUE`` instead of raising conversion error.
+
+ """)
+
+ ## object representation
+
+ def __repr__(self):
+ _rv = """\
+Version (signature): 0x%02x
+ Last update: %s
+ Header length: %d
+ Record length: %d
+ Record count: %d
+ FieldName Type Len Dec
+""" % (self.signature, self.lastUpdate, self.headerLength,
+ self.recordLength, self.recordCount)
+ _rv += "\n".join(
+ ["%10s %4s %3s %3s" % _fld.fieldInfo() for _fld in self.fields]
+ )
+ return _rv
+
+ ## internal methods
+
+ def _addField(self, *defs):
+ """Internal variant of the `addField` method.
+
+ This method doesn't set `self.changed` field to True.
+
+ Return value is a length of the appended records.
+ Note: this method doesn't modify ``recordLength`` and
+ ``headerLength`` fields. Use `addField` instead of this
+ method if you don't exactly know what you're doing.
+
+ """
+ # insure we have dbf.DbfFieldDef instances first (instantiation
+ # from the tuple could raise an error, in such a case I don't
+ # wanna add any of the definitions -- all will be ignored)
+ _defs = []
+ _recordLength = 0
+ for _def in defs:
+ if isinstance(_def, fields.DbfFieldDef):
+ _obj = _def
+ else:
+ (_name, _type, _len, _dec) = (tuple(_def) + (None,) * 4)[:4]
+ _cls = fields.lookupFor(_type)
+ _obj = _cls(_name, _len, _dec,
+ ignoreErrors=self._ignore_errors)
+ _recordLength += _obj.length
+ _defs.append(_obj)
+ # and now extend field definitions and
+ # update record length
+ self.fields += _defs
+ return _recordLength
+
+ ## interface methods
+
+ def addField(self, *defs):
+ """Add field definition to the header.
+
+ Examples:
+ dbfh.addField(
+ ("name", "C", 20),
+ dbf.DbfCharacterFieldDef("surname", 20),
+ dbf.DbfDateFieldDef("birthdate"),
+ ("member", "L"),
+ )
+ dbfh.addField(("price", "N", 5, 2))
+ dbfh.addField(dbf.DbfNumericFieldDef("origprice", 5, 2))
+
+ """
+ _oldLen = self.recordLength
+ self.recordLength += self._addField(*defs)
+ if not _oldLen:
+ self.recordLength += 1
+ # XXX: may be just use:
+ # self.recordeLength += self._addField(*defs) + bool(not _oldLen)
+ # recalculate headerLength
+ self.headerLength = 32 + (32 * len(self.fields)) + 1
+ self.changed = True
+
+ def write(self, stream):
+ """Encode and write header to the stream."""
+ stream.seek(0)
+ stream.write(self.toString())
+ fields = [_fld.toString() for _fld in self.fields]
+ stream.write(''.join(fields).encode(sys.getfilesystemencoding()))
+ stream.write(b'\x0D') # cr at end of all header data
+ self.changed = False
+
+ def toString(self):
+ """Returned 32 chars length string with encoded header."""
+ return struct.pack("<4BI2H",
+ self.signature,
+ self.year - 1900,
+ self.month,
+ self.day,
+ self.recordCount,
+ self.headerLength,
+ self.recordLength) + (b'\x00' * 20)
+ #TODO: figure out if bytes(utf-8) is correct here.
+
+ def setCurrentDate(self):
+ """Update ``self.lastUpdate`` field with current date value."""
+ self.lastUpdate = datetime.date.today()
+
+ def __getitem__(self, item):
+ """Return a field definition by numeric index or name string"""
+ if isinstance(item, str):
+ _name = item.upper()
+ for _field in self.fields:
+ if _field.name == _name:
+ return _field
+ else:
+ raise KeyError(item)
+ else:
+ # item must be field index
+ return self.fields[item]
+
+# vim: et sts=4 sw=4 :
diff --git a/tablib/packages/dbfpy3/record.py b/tablib/packages/dbfpy3/record.py
new file mode 100644
index 0000000..73b6952
--- /dev/null
+++ b/tablib/packages/dbfpy3/record.py
@@ -0,0 +1,266 @@
+"""DBF record definition.
+
+"""
+"""History (most recent first):
+11-feb-2007 [als] __repr__: added special case for invalid field values
+10-feb-2007 [als] added .rawFromStream()
+30-oct-2006 [als] fix record length in .fromStream()
+04-jul-2006 [als] added export declaration
+20-dec-2005 [yc] DbfRecord.write() -> DbfRecord._write();
+ added delete() method.
+16-dec-2005 [yc] record definition moved from `dbf`.
+"""
+
+__version__ = "$Revision: 1.7 $"[11:-2]
+__date__ = "$Date: 2007/02/11 09:05:49 $"[7:-2]
+
+__all__ = ["DbfRecord"]
+
+import sys
+
+from . import utils
+
+class DbfRecord(object):
+ """DBF record.
+
+ Instances of this class shouldn't be created manualy,
+ use `dbf.Dbf.newRecord` instead.
+
+ Class implements mapping/sequence interface, so
+ fields could be accessed via their names or indexes
+ (names is a preffered way to access fields).
+
+ Hint:
+ Use `store` method to save modified record.
+
+ Examples:
+ Add new record to the database:
+ db = Dbf(filename)
+ rec = db.newRecord()
+ rec["FIELD1"] = value1
+ rec["FIELD2"] = value2
+ rec.store()
+ Or the same, but modify existed
+ (second in this case) record:
+ db = Dbf(filename)
+ rec = db[2]
+ rec["FIELD1"] = value1
+ rec["FIELD2"] = value2
+ rec.store()
+
+ """
+
+ __slots__ = "dbf", "index", "deleted", "fieldData"
+
+ ## creation and initialization
+
+ def __init__(self, dbf, index=None, deleted=False, data=None):
+ """Instance initialiation.
+
+ Arguments:
+ dbf:
+ A `Dbf.Dbf` instance this record belonogs to.
+ index:
+ An integer record index or None. If this value is
+ None, record will be appended to the DBF.
+ deleted:
+ Boolean flag indicating whether this record
+ is a deleted record.
+ data:
+ A sequence or None. This is a data of the fields.
+ If this argument is None, default values will be used.
+
+ """
+ self.dbf = dbf
+ # XXX: I'm not sure ``index`` is necessary
+ self.index = index
+ self.deleted = deleted
+ if data is None:
+ self.fieldData = [_fd.defaultValue for _fd in dbf.header.fields]
+ else:
+ self.fieldData = list(data)
+
+ # XXX: validate self.index before calculating position?
+ position = property(lambda self: self.dbf.header.headerLength + \
+ self.index * self.dbf.header.recordLength)
+
+ def rawFromStream(cls, dbf, index):
+ """Return raw record contents read from the stream.
+
+ Arguments:
+ dbf:
+ A `Dbf.Dbf` instance containing the record.
+ index:
+ Index of the record in the records' container.
+ This argument can't be None in this call.
+
+ Return value is a string containing record data in DBF format.
+
+ """
+ # XXX: may be write smth assuming, that current stream
+ # position is the required one? it could save some
+ # time required to calculate where to seek in the file
+ dbf.stream.seek(dbf.header.headerLength +
+ index * dbf.header.recordLength)
+ return dbf.stream.read(dbf.header.recordLength)
+ rawFromStream = classmethod(rawFromStream)
+
+ def fromStream(cls, dbf, index):
+ """Return a record read from the stream.
+
+ Arguments:
+ dbf:
+ A `Dbf.Dbf` instance new record should belong to.
+ index:
+ Index of the record in the records' container.
+ This argument can't be None in this call.
+
+ Return value is an instance of the current class.
+
+ """
+ return cls.fromString(dbf, cls.rawFromStream(dbf, index), index)
+ fromStream = classmethod(fromStream)
+
+ def fromString(cls, dbf, string, index=None):
+ """Return record read from the string object.
+
+ Arguments:
+ dbf:
+ A `Dbf.Dbf` instance new record should belong to.
+ string:
+ A string new record should be created from.
+ index:
+ Index of the record in the container. If this
+ argument is None, record will be appended.
+
+ Return value is an instance of the current class.
+
+ """
+ return cls(dbf, index, string[0]=="*",
+ [_fd.decodeFromRecord(string) for _fd in dbf.header.fields])
+ fromString = classmethod(fromString)
+
+ ## object representation
+
+ def __repr__(self):
+ _template = "%%%ds: %%s (%%s)" % max([len(_fld)
+ for _fld in self.dbf.fieldNames])
+ _rv = []
+ for _fld in self.dbf.fieldNames:
+ _val = self[_fld]
+ if _val is utils.INVALID_VALUE:
+ _rv.append(_template %
+ (_fld, "None", "value cannot be decoded"))
+ else:
+ _rv.append(_template % (_fld, _val, type(_val)))
+ return "\n".join(_rv)
+
+ ## protected methods
+
+ def _write(self):
+ """Write data to the dbf stream.
+
+ Note:
+ This isn't a public method, it's better to
+ use 'store' instead publically.
+ Be design ``_write`` method should be called
+ only from the `Dbf` instance.
+
+
+ """
+ self._validateIndex(False)
+ self.dbf.stream.seek(self.position)
+ self.dbf.stream.write(bytes(self.toString(),
+ sys.getfilesystemencoding()))
+ # FIXME: may be move this write somewhere else?
+ # why we should check this condition for each record?
+ if self.index == len(self.dbf):
+ # this is the last record,
+ # we should write SUB (ASCII 26)
+ self.dbf.stream.write(b"\x1A")
+
+ ## utility methods
+
+ def _validateIndex(self, allowUndefined=True, checkRange=False):
+ """Valid ``self.index`` value.
+
+ If ``allowUndefined`` argument is True functions does nothing
+ in case of ``self.index`` pointing to None object.
+
+ """
+ if self.index is None:
+ if not allowUndefined:
+ raise ValueError("Index is undefined")
+ elif self.index < 0:
+ raise ValueError("Index can't be negative (%s)" % self.index)
+ elif checkRange and self.index <= self.dbf.header.recordCount:
+ raise ValueError("There are only %d records in the DBF" %
+ self.dbf.header.recordCount)
+
+ ## interface methods
+
+ def store(self):
+ """Store current record in the DBF.
+
+ If ``self.index`` is None, this record will be appended to the
+ records of the DBF this records belongs to; or replaced otherwise.
+
+ """
+ self._validateIndex()
+ if self.index is None:
+ self.index = len(self.dbf)
+ self.dbf.append(self)
+ else:
+ self.dbf[self.index] = self
+
+ def delete(self):
+ """Mark method as deleted."""
+ self.deleted = True
+
+ def toString(self):
+ """Return string packed record values."""
+# for (_def, _dat) in zip(self.dbf.header.fields, self.fieldData):
+#
+
+ return "".join([" *"[self.deleted]] + [
+ _def.encodeValue(_dat)
+ for (_def, _dat) in zip(self.dbf.header.fields, self.fieldData)
+ ])
+
+ def asList(self):
+ """Return a flat list of fields.
+
+ Note:
+ Change of the list's values won't change
+ real values stored in this object.
+
+ """
+ return self.fieldData[:]
+
+ def asDict(self):
+ """Return a dictionary of fields.
+
+ Note:
+ Change of the dicts's values won't change
+ real values stored in this object.
+
+ """
+ return dict([_i for _i in zip(self.dbf.fieldNames, self.fieldData)])
+
+ def __getitem__(self, key):
+ """Return value by field name or field index."""
+ if isinstance(key, int):
+ # integer index of the field
+ return self.fieldData[key]
+ # assuming string field name
+ return self.fieldData[self.dbf.indexOfFieldName(key)]
+
+ def __setitem__(self, key, value):
+ """Set field value by integer index of the field or string name."""
+ if isinstance(key, int):
+ # integer index of the field
+ return self.fieldData[key]
+ # assuming string field name
+ self.fieldData[self.dbf.indexOfFieldName(key)] = value
+
+# vim: et sts=4 sw=4 :
diff --git a/tablib/packages/dbfpy3/utils.py b/tablib/packages/dbfpy3/utils.py
new file mode 100644
index 0000000..856ade8
--- /dev/null
+++ b/tablib/packages/dbfpy3/utils.py
@@ -0,0 +1,170 @@
+"""String utilities.
+
+TODO:
+ - allow strings in getDateTime routine;
+"""
+"""History (most recent first):
+11-feb-2007 [als] added INVALID_VALUE
+10-feb-2007 [als] allow date strings padded with spaces instead of zeroes
+20-dec-2005 [yc] handle long objects in getDate/getDateTime
+16-dec-2005 [yc] created from ``strutil`` module.
+"""
+
+__version__ = "$Revision: 1.4 $"[11:-2]
+__date__ = "$Date: 2007/02/11 08:57:17 $"[7:-2]
+
+import datetime
+import time
+
+
+def unzfill(str):
+ """Return a string without ASCII NULs.
+
+ This function searchers for the first NUL (ASCII 0) occurance
+ and truncates string till that position.
+
+ """
+ try:
+ return str[:str.index(b'\0')]
+ except ValueError:
+ return str
+
+
+def getDate(date=None):
+ """Return `datetime.date` instance.
+
+ Type of the ``date`` argument could be one of the following:
+ None:
+ use current date value;
+ datetime.date:
+ this value will be returned;
+ datetime.datetime:
+ the result of the date.date() will be returned;
+ string:
+ assuming "%Y%m%d" or "%y%m%dd" format;
+ number:
+ assuming it's a timestamp (returned for example
+ by the time.time() call;
+ sequence:
+ assuming (year, month, day, ...) sequence;
+
+ Additionaly, if ``date`` has callable ``ticks`` attribute,
+ it will be used and result of the called would be treated
+ as a timestamp value.
+
+ """
+ if date is None:
+ # use current value
+ return datetime.date.today()
+ if isinstance(date, datetime.date):
+ return date
+ if isinstance(date, datetime.datetime):
+ return date.date()
+ if isinstance(date, (int, float)):
+ # date is a timestamp
+ return datetime.date.fromtimestamp(date)
+ if isinstance(date, str):
+ date = date.replace(" ", "0")
+ if len(date) == 6:
+ # yymmdd
+ return datetime.date(*time.strptime(date, "%y%m%d")[:3])
+ # yyyymmdd
+ return datetime.date(*time.strptime(date, "%Y%m%d")[:3])
+ if hasattr(date, "__getitem__"):
+ # a sequence (assuming date/time tuple)
+ return datetime.date(*date[:3])
+ return datetime.date.fromtimestamp(date.ticks())
+
+
+def getDateTime(value=None):
+ """Return `datetime.datetime` instance.
+
+ Type of the ``value`` argument could be one of the following:
+ None:
+ use current date value;
+ datetime.date:
+ result will be converted to the `datetime.datetime` instance
+ using midnight;
+ datetime.datetime:
+ ``value`` will be returned as is;
+ string:
+ *** CURRENTLY NOT SUPPORTED ***;
+ number:
+ assuming it's a timestamp (returned for example
+ by the time.time() call;
+ sequence:
+ assuming (year, month, day, ...) sequence;
+
+ Additionaly, if ``value`` has callable ``ticks`` attribute,
+ it will be used and result of the called would be treated
+ as a timestamp value.
+
+ """
+ if value is None:
+ # use current value
+ return datetime.datetime.today()
+ if isinstance(value, datetime.datetime):
+ return value
+ if isinstance(value, datetime.date):
+ return datetime.datetime.fromordinal(value.toordinal())
+ if isinstance(value, (int, float)):
+ # value is a timestamp
+ return datetime.datetime.fromtimestamp(value)
+ if isinstance(value, str):
+ raise NotImplementedError("Strings aren't currently implemented")
+ if hasattr(value, "__getitem__"):
+ # a sequence (assuming date/time tuple)
+ return datetime.datetime(*tuple(value)[:6])
+ return datetime.datetime.fromtimestamp(value.ticks())
+
+
+class classproperty(property):
+ """Works in the same way as a ``property``, but for the classes."""
+
+ def __get__(self, obj, cls):
+ return self.fget(cls)
+
+
+class _InvalidValue(object):
+
+ """Value returned from DBF records when field validation fails
+
+ The value is not equal to anything except for itself
+ and equal to all empty values: None, 0, empty string etc.
+ In other words, invalid value is equal to None and not equal
+ to None at the same time.
+
+ This value yields zero upon explicit conversion to a number type,
+ empty string for string types, and False for boolean.
+
+ """
+
+ def __eq__(self, other):
+ return not other
+
+ def __ne__(self, other):
+ return not (other is self)
+
+ def __bool__(self):
+ return False
+
+ def __int__(self):
+ return 0
+ __long__ = __int__
+
+ def __float__(self):
+ return 0.0
+
+ def __str__(self):
+ return ""
+
+ def __unicode__(self):
+ return ""
+
+ def __repr__(self):
+ return "<INVALID>"
+
+# invalid value is a constant singleton
+INVALID_VALUE = _InvalidValue()
+
+# vim: set et sts=4 sw=4 :
diff --git a/tablib/packages/unicodecsv/__init__.py b/tablib/packages/unicodecsv/__init__.py
index e640987..6a20118 100644
--- a/tablib/packages/unicodecsv/__init__.py
+++ b/tablib/packages/unicodecsv/__init__.py
@@ -1,22 +1,65 @@
# -*- coding: utf-8 -*-
import csv
-from csv import *
+try:
+ from itertools import izip
+except ImportError:
+ izip = zip
#http://semver.org/
-VERSION = (0, 8, 0)
+VERSION = (0, 10, 1)
__version__ = ".".join(map(str,VERSION))
-def _stringify(s, encoding):
- if type(s)==unicode:
- return s.encode(encoding)
+pass_throughs = [
+ 'register_dialect',
+ 'unregister_dialect',
+ 'get_dialect',
+ 'list_dialects',
+ 'field_size_limit',
+ 'Dialect',
+ 'excel',
+ 'excel_tab',
+ 'Sniffer',
+ 'QUOTE_ALL',
+ 'QUOTE_MINIMAL',
+ 'QUOTE_NONNUMERIC',
+ 'QUOTE_NONE',
+ 'Error'
+]
+__all__ = [
+ 'reader',
+ 'writer',
+ 'DictReader',
+ 'DictWriter',
+] + pass_throughs
+
+for prop in pass_throughs:
+ globals()[prop]=getattr(csv, prop)
+
+def _stringify(s, encoding, errors):
+ if s is None:
+ return ''
+ if isinstance(s, unicode):
+ return s.encode(encoding, errors)
elif isinstance(s, (int , float)):
pass #let csv.QUOTE_NONNUMERIC do its thing.
- elif type(s) != str:
+ elif not isinstance(s, str):
s=str(s)
return s
-def _stringify_list(l, encoding):
- return [_stringify(s, encoding) for s in l]
+def _stringify_list(l, encoding, errors='strict'):
+ try:
+ return [_stringify(s, encoding, errors) for s in iter(l)]
+ except TypeError as e:
+ raise csv.Error(str(e))
+
+def _unicodify(s, encoding):
+ if s is None:
+ return None
+ if isinstance(s, (unicode, int, float)):
+ return s
+ elif isinstance(s, str):
+ return s.decode(encoding)
+ return s
class UnicodeWriter(object):
"""
@@ -28,78 +71,127 @@ class UnicodeWriter(object):
>>> f.seek(0)
>>> r = unicodecsv.reader(f, encoding='utf-8')
>>> row = r.next()
- >>> print row[0], row[1]
- é ñ
+ >>> row[0] == u'é'
+ True
+ >>> row[1] == u'ñ'
+ True
"""
- def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
- self.writer = csv.writer(f)
- self.dialect = dialect
+ def __init__(self, f, dialect=csv.excel, encoding='utf-8', errors='strict',
+ *args, **kwds):
self.encoding = encoding
- self.writer = csv.writer(f, dialect=dialect, **kwds)
+ self.writer = csv.writer(f, dialect, *args, **kwds)
+ self.encoding_errors = errors
def writerow(self, row):
- self.writer.writerow(_stringify_list(row, self.encoding))
+ self.writer.writerow(_stringify_list(row, self.encoding, self.encoding_errors))
def writerows(self, rows):
for row in rows:
self.writerow(row)
+
+ @property
+ def dialect(self):
+ return self.writer.dialect
writer = UnicodeWriter
class UnicodeReader(object):
- def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
- self.reader = csv.reader(f, dialect=dialect, **kwds)
+ def __init__(self, f, dialect=None, encoding='utf-8', errors='strict',
+ **kwds):
+ format_params = ['delimiter', 'doublequote', 'escapechar', 'lineterminator', 'quotechar', 'quoting', 'skipinitialspace']
+ if dialect is None:
+ if not any([kwd_name in format_params for kwd_name in kwds.keys()]):
+ dialect = csv.excel
+ self.reader = csv.reader(f, dialect, **kwds)
self.encoding = encoding
+ self.encoding_errors = errors
def next(self):
row = self.reader.next()
- return [unicode(s, self.encoding) for s in row]
+ encoding = self.encoding
+ encoding_errors = self.encoding_errors
+ float_ = float
+ unicode_ = unicode
+ return [(value if isinstance(value, float_) else
+ unicode_(value, encoding, encoding_errors)) for value in row]
def __iter__(self):
return self
+
+ @property
+ def dialect(self):
+ return self.reader.dialect
+
+ @property
+ def line_num(self):
+ return self.reader.line_num
reader = UnicodeReader
class DictWriter(csv.DictWriter):
"""
>>> from cStringIO import StringIO
>>> f = StringIO()
- >>> w = DictWriter(f, ['a', 'b'], restval=u'î')
- >>> w.writerow({'a':'1'})
- >>> w.writerow({'a':'1', 'b':u'ø'})
- >>> w.writerow({'a':u'é'})
+ >>> w = DictWriter(f, ['a', u'ñ', 'b'], restval=u'î')
+ >>> w.writerow({'a':'1', u'ñ':'2'})
+ >>> w.writerow({'a':'1', u'ñ':'2', 'b':u'ø'})
+ >>> w.writerow({'a':u'é', u'ñ':'2'})
>>> f.seek(0)
- >>> r = DictReader(f, fieldnames=['a'], restkey='r')
- >>> r.next() == {'a':u'1', 'r':[u"î"]}
+ >>> r = DictReader(f, fieldnames=['a', u'ñ'], restkey='r')
+ >>> r.next() == {'a': u'1', u'ñ':'2', 'r': [u'î']}
True
- >>> r.next() == {'a':u'1', 'r':[u"ø"]}
+ >>> r.next() == {'a': u'1', u'ñ':'2', 'r': [u'\xc3\xb8']}
+ True
+ >>> r.next() == {'a': u'\xc3\xa9', u'ñ':'2', 'r': [u'\xc3\xae']}
True
- >>> r.next() == {'a':u'é', 'r':[u"î"]}
"""
- def __init__(self, csvfile, fieldnames, restval='', extrasaction='raise', dialect='excel', encoding='utf-8', *args, **kwds):
- self.fieldnames = fieldnames
+ def __init__(self, csvfile, fieldnames, restval='', extrasaction='raise', dialect='excel', encoding='utf-8', errors='strict', *args, **kwds):
self.encoding = encoding
- self.restval = restval
- self.writer = csv.DictWriter(csvfile, fieldnames, restval, extrasaction, dialect, *args, **kwds)
- def writerow(self, d):
- for fieldname in self.fieldnames:
- if fieldname in d:
- d[fieldname] = _stringify(d[fieldname], self.encoding)
- else:
- d[fieldname] = _stringify(self.restval, self.encoding)
- self.writer.writerow(d)
+ csv.DictWriter.__init__(self, csvfile, fieldnames, restval, extrasaction, dialect, *args, **kwds)
+ self.writer = UnicodeWriter(csvfile, dialect, encoding=encoding, errors=errors, *args, **kwds)
+ self.encoding_errors = errors
+
+ def writeheader(self):
+ fieldnames = _stringify_list(self.fieldnames, self.encoding, self.encoding_errors)
+ header = dict(zip(self.fieldnames, self.fieldnames))
+ self.writerow(header)
class DictReader(csv.DictReader):
- def __init__(self, csvfile, fieldnames=None, restkey=None, restval=None, dialect='excel', encoding='utf-8', *args, **kwds):
- self.restkey = restkey
- self.encoding = encoding
- self.reader = csv.DictReader(csvfile, fieldnames, restkey, restval, dialect, *args, **kwds)
+ """
+ >>> from cStringIO import StringIO
+ >>> f = StringIO()
+ >>> w = DictWriter(f, fieldnames=['name', 'place'])
+ >>> w.writerow({'name': 'Cary Grant', 'place': 'hollywood'})
+ >>> w.writerow({'name': 'Nathan Brillstone', 'place': u'øLand'})
+ >>> w.writerow({'name': u'Willam ø. Unicoder', 'place': u'éSpandland'})
+ >>> f.seek(0)
+ >>> r = DictReader(f, fieldnames=['name', 'place'])
+ >>> print r.next() == {'name': 'Cary Grant', 'place': 'hollywood'}
+ True
+ >>> print r.next() == {'name': 'Nathan Brillstone', 'place': u'øLand'}
+ True
+ >>> print r.next() == {'name': u'Willam ø. Unicoder', 'place': u'éSpandland'}
+ True
+ """
+ def __init__(self, csvfile, fieldnames=None, restkey=None, restval=None,
+ dialect='excel', encoding='utf-8', errors='strict', *args,
+ **kwds):
+ if fieldnames is not None:
+ fieldnames = _stringify_list(fieldnames, encoding)
+ csv.DictReader.__init__(self, csvfile, fieldnames, restkey, restval, dialect, *args, **kwds)
+ self.reader = UnicodeReader(csvfile, dialect, encoding=encoding,
+ errors=errors, *args, **kwds)
+ if fieldnames is None and not hasattr(csv.DictReader, 'fieldnames'):
+ # Python 2.5 fieldnames workaround. (http://bugs.python.org/issue3436)
+ reader = UnicodeReader(csvfile, dialect, encoding=encoding, *args, **kwds)
+ self.fieldnames = _stringify_list(reader.next(), reader.encoding)
+ self.unicode_fieldnames = [_unicodify(f, encoding) for f in
+ self.fieldnames]
+ self.unicode_restkey = _unicodify(restkey, encoding)
+
def next(self):
- d = self.reader.next()
- for k, v in d.items():
- if k == self.restkey:
- rest = v
- if rest:
- d[self.restkey] = [unicode(v, self.encoding) for v in rest]
- else:
- if v is not None:
- d[k] = unicode(v, self.encoding)
- return d
+ row = csv.DictReader.next(self)
+ result = dict((uni_key, row[str_key]) for (str_key, uni_key) in
+ izip(self.fieldnames, self.unicode_fieldnames))
+ rest = row.get(self.restkey)
+ if rest:
+ result[self.unicode_restkey] = rest
+ return result