diff options
| author | Timo Furrer <tuxtimo@gmail.com> | 2019-03-02 15:06:21 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-03-02 15:06:21 +0100 |
| commit | a10327a2833da8eee952d8b832afc33c3c55b282 (patch) | |
| tree | cf50b2c819fc562b90158264c7a05eb4727382f8 | |
| parent | dc24fda41505d9961cd43939893a1cea3598ad18 (diff) | |
| parent | e0de42ef061d90e92463964202d2807ce861f5c0 (diff) | |
| download | tablib-a10327a2833da8eee952d8b832afc33c3c55b282.tar.gz | |
Merge pull request #350 from browniebroke/bugfix/invalid-ascii-csv
Import ascii characters not valid with unicode literals - updated
| -rw-r--r-- | requirements.txt | 1 | ||||
| -rwxr-xr-x | setup.py | 2 | ||||
| -rw-r--r-- | tablib/compat.py | 4 | ||||
| -rw-r--r-- | tablib/core.py | 2 | ||||
| -rw-r--r-- | tablib/formats/_csv.py | 9 | ||||
| -rw-r--r-- | tablib/formats/_tsv.py | 4 | ||||
| -rwxr-xr-x | test_tablib.py | 24 |
7 files changed, 26 insertions, 20 deletions
diff --git a/requirements.txt b/requirements.txt index 2fab040..05db063 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +backports.csv==1.0.6 certifi==2017.7.27.1 chardet==3.0.4 et-xmlfile==1.0.1 @@ -35,7 +35,7 @@ packages = [ install = [ 'odfpy', 'openpyxl>=2.4.0', - 'unicodecsv', + 'backports.csv', 'xlrd', 'xlwt', 'pyyaml', diff --git a/tablib/compat.py b/tablib/compat.py index f054ebb..660697d 100644 --- a/tablib/compat.py +++ b/tablib/compat.py @@ -27,11 +27,11 @@ if is_py3: else: from cStringIO import StringIO as BytesIO - from cStringIO import StringIO + from StringIO import StringIO from tablib.packages import markup from tablib.packages.statistics import median from itertools import izip_longest - import unicodecsv as csv + from backports import csv import tablib.packages.dbfpy as dbfpy unicode = unicode diff --git a/tablib/core.py b/tablib/core.py index 809bb6e..78c4dce 100644 --- a/tablib/core.py +++ b/tablib/core.py @@ -862,7 +862,7 @@ class Dataset(object): against each cell value. """ - if isinstance(col, str): + if isinstance(col, unicode): if col in self.headers: col = self.headers.index(col) # get 'key' index from each data else: diff --git a/tablib/formats/_csv.py b/tablib/formats/_csv.py index 06e7830..8b536a7 100644 --- a/tablib/formats/_csv.py +++ b/tablib/formats/_csv.py @@ -3,15 +3,14 @@ """ Tablib - *SV Support. """ -from tablib.compat import is_py3, csv, StringIO +from tablib.compat import csv, StringIO, unicode title = 'csv' extensions = ('csv',) -DEFAULT_ENCODING = 'utf-8' -DEFAULT_DELIMITER = ',' +DEFAULT_DELIMITER = unicode(',') def export_set(dataset, **kwargs): @@ -19,8 +18,6 @@ def export_set(dataset, **kwargs): stream = StringIO() kwargs.setdefault('delimiter', DEFAULT_DELIMITER) - if not is_py3: - kwargs.setdefault('encoding', DEFAULT_ENCODING) _csv = csv.writer(stream, **kwargs) @@ -36,8 +33,6 @@ def import_set(dset, in_stream, headers=True, **kwargs): dset.wipe() kwargs.setdefault('delimiter', DEFAULT_DELIMITER) - if not is_py3: - kwargs.setdefault('encoding', DEFAULT_ENCODING) rows = csv.reader(StringIO(in_stream), **kwargs) for i, row in enumerate(rows): diff --git a/tablib/formats/_tsv.py b/tablib/formats/_tsv.py index 9380b3b..1c6d6a1 100644 --- a/tablib/formats/_tsv.py +++ b/tablib/formats/_tsv.py @@ -3,6 +3,7 @@ """ Tablib - TSV (Tab Separated Values) Support. """ +from tablib.compat import unicode from tablib.formats._csv import ( export_set as export_set_wrapper, import_set as import_set_wrapper, @@ -12,8 +13,7 @@ from tablib.formats._csv import ( title = 'tsv' extensions = ('tsv',) -DEFAULT_ENCODING = 'utf-8' -DELIMITER = '\t' +DELIMITER = unicode('\t') def export_set(dataset): """Returns TSV representation of Dataset.""" diff --git a/test_tablib.py b/test_tablib.py index 57b1b39..e7b7233 100755 --- a/test_tablib.py +++ b/test_tablib.py @@ -1,18 +1,19 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- """Tests for Tablib.""" +from __future__ import unicode_literals +import datetime import doctest import json -import unittest import sys +import unittest from uuid import uuid4 -import datetime - import tablib from tablib.compat import markup, unicode, is_py3 from tablib.core import Row +from tablib.formats import csv as csv_format class TablibTestCase(unittest.TestCase): @@ -227,21 +228,21 @@ class TablibTestCase(unittest.TestCase): # Delete from invalid index self.assertRaises(IndexError, self.founders.__delitem__, 3) - + def test_json_export(self): """Verify exporting dataset object as JSON""" - + address_id = uuid4() headers = self.headers + ('address_id',) founders = tablib.Dataset(headers=headers, title='Founders') founders.append(('John', 'Adams', 90, address_id)) founders_json = founders.export('json') - + expected_json = ( '[{"first_name": "John", "last_name": "Adams", "gpa": 90, ' '"address_id": "%s"}]' % str(address_id) ) - + self.assertEqual(founders_json, expected_json) def test_csv_export(self): @@ -571,6 +572,15 @@ class TablibTestCase(unittest.TestCase): self.assertEqual(_csv, data.csv) + def test_csv_import_set_with_unicode_str(self): + """Import CSV set with non-ascii characters in unicode literal""" + csv_text = ( + "id,givenname,surname,loginname,email,pref_firstname,pref_lastname\n" + "13765,Ævar,Arnfjörð,testing,test@example.com,Ævar,Arnfjörð" + ) + data.csv = csv_text + self.assertEqual(data.width, 7) + def test_tsv_import_set(self): """Generate and import TSV set serialization.""" data.append(self.john) |
