diff options
| author | Skip Montanaro <skip@pobox.com> | 2003-05-19 15:33:36 +0000 |
|---|---|---|
| committer | Skip Montanaro <skip@pobox.com> | 2003-05-19 15:33:36 +0000 |
| commit | 77892373313b7195e16755abe0604bd734df4736 (patch) | |
| tree | 71fe6dd39510d42b94b9f6a5831efb5ce9210af7 /Lib/csv.py | |
| parent | c626658a2803bb48a25b6c845ab60d72be1ed5d4 (diff) | |
| download | cpython-git-77892373313b7195e16755abe0604bd734df4736.tar.gz | |
* Correct Sniffer doc to correspond to the implementation.
* Add optional delimiters arg to Sniffer.sniff() which restricts the set of
candidate field delimiters.
Diffstat (limited to 'Lib/csv.py')
| -rw-r--r-- | Lib/csv.py | 16 |
1 files changed, 9 insertions, 7 deletions
diff --git a/Lib/csv.py b/Lib/csv.py index 7e297b69e7..83b8aa447d 100644 --- a/Lib/csv.py +++ b/Lib/csv.py @@ -159,15 +159,16 @@ class Sniffer: self.preferred = [',', '\t', ';', ' ', ':'] - def sniff(self, sample): + def sniff(self, sample, delimiters=None): """ Returns a dialect (or None) corresponding to the sample """ quotechar, delimiter, skipinitialspace = \ - self._guess_quote_and_delimiter(sample) + self._guess_quote_and_delimiter(sample, delimiters) if delimiter is None: - delimiter, skipinitialspace = self._guess_delimiter(sample) + delimiter, skipinitialspace = self._guess_delimiter(sample, + delimiters) class dialect(Dialect): _name = "sniffed" @@ -184,7 +185,7 @@ class Sniffer: return dialect - def _guess_quote_and_delimiter(self, data): + def _guess_quote_and_delimiter(self, data, delimiters): """ Looks for text enclosed between two identical quotes (the probable quotechar) which are preceded and followed @@ -222,7 +223,7 @@ class Sniffer: key = m[n] except KeyError: continue - if key: + if key and (delimiters is None or key in delimiters): delims[key] = delims.get(key, 0) + 1 try: n = regexp.groupindex['space'] - 1 @@ -248,7 +249,7 @@ class Sniffer: return (quotechar, delim, skipinitialspace) - def _guess_delimiter(self, data): + def _guess_delimiter(self, data, delimiters): """ The delimiter /should/ occur the same number of times on each row. However, due to malformed data, it may not. We don't want @@ -316,7 +317,8 @@ class Sniffer: while len(delims) == 0 and consistency >= threshold: for k, v in modeList: if v[0] > 0 and v[1] > 0: - if (v[1]/total) >= consistency: + if ((v[1]/total) >= consistency and + (delimiters is None or k in delimiters)): delims[k] = v consistency -= 0.01 |
