summaryrefslogtreecommitdiff
path: root/Lib/csv.py
diff options
context:
space:
mode:
authorSkip Montanaro <skip@pobox.com>2003-05-19 15:33:36 +0000
committerSkip Montanaro <skip@pobox.com>2003-05-19 15:33:36 +0000
commit77892373313b7195e16755abe0604bd734df4736 (patch)
tree71fe6dd39510d42b94b9f6a5831efb5ce9210af7 /Lib/csv.py
parentc626658a2803bb48a25b6c845ab60d72be1ed5d4 (diff)
downloadcpython-git-77892373313b7195e16755abe0604bd734df4736.tar.gz
* Correct Sniffer doc to correspond to the implementation.
* Add optional delimiters arg to Sniffer.sniff() which restricts the set of candidate field delimiters.
Diffstat (limited to 'Lib/csv.py')
-rw-r--r--Lib/csv.py16
1 files changed, 9 insertions, 7 deletions
diff --git a/Lib/csv.py b/Lib/csv.py
index 7e297b69e7..83b8aa447d 100644
--- a/Lib/csv.py
+++ b/Lib/csv.py
@@ -159,15 +159,16 @@ class Sniffer:
self.preferred = [',', '\t', ';', ' ', ':']
- def sniff(self, sample):
+ def sniff(self, sample, delimiters=None):
"""
Returns a dialect (or None) corresponding to the sample
"""
quotechar, delimiter, skipinitialspace = \
- self._guess_quote_and_delimiter(sample)
+ self._guess_quote_and_delimiter(sample, delimiters)
if delimiter is None:
- delimiter, skipinitialspace = self._guess_delimiter(sample)
+ delimiter, skipinitialspace = self._guess_delimiter(sample,
+ delimiters)
class dialect(Dialect):
_name = "sniffed"
@@ -184,7 +185,7 @@ class Sniffer:
return dialect
- def _guess_quote_and_delimiter(self, data):
+ def _guess_quote_and_delimiter(self, data, delimiters):
"""
Looks for text enclosed between two identical quotes
(the probable quotechar) which are preceded and followed
@@ -222,7 +223,7 @@ class Sniffer:
key = m[n]
except KeyError:
continue
- if key:
+ if key and (delimiters is None or key in delimiters):
delims[key] = delims.get(key, 0) + 1
try:
n = regexp.groupindex['space'] - 1
@@ -248,7 +249,7 @@ class Sniffer:
return (quotechar, delim, skipinitialspace)
- def _guess_delimiter(self, data):
+ def _guess_delimiter(self, data, delimiters):
"""
The delimiter /should/ occur the same number of times on
each row. However, due to malformed data, it may not. We don't want
@@ -316,7 +317,8 @@ class Sniffer:
while len(delims) == 0 and consistency >= threshold:
for k, v in modeList:
if v[0] > 0 and v[1] > 0:
- if (v[1]/total) >= consistency:
+ if ((v[1]/total) >= consistency and
+ (delimiters is None or k in delimiters)):
delims[k] = v
consistency -= 0.01