summaryrefslogtreecommitdiff
path: root/Lib/csv.py
diff options
context:
space:
mode:
authorSkip Montanaro <skip@pobox.com>2005-12-30 05:09:48 +0000
committerSkip Montanaro <skip@pobox.com>2005-12-30 05:09:48 +0000
commit9a40c68b694721508f34b3c2d4bccce6bb502087 (patch)
tree385febeb17112ca6f4bb25154a83658221b37183 /Lib/csv.py
parent58066f86256236385a0e2e47f3761c80e260b060 (diff)
downloadcpython-9a40c68b694721508f34b3c2d4bccce6bb502087.tar.gz
Fix a delimiter detection problem in sniffer. Sniffing "a|b|c\r\n" was
returning 'a' as the delimiter. It now returns '|', but not because I understood better what the code was supposed to do. Would someone that understands the idea behind _guess_delimiter() (see its doc string) look to see if my fallback choice is better than before or if it's just serendipity that I picked the proper delimiter?
Diffstat (limited to 'Lib/csv.py')
-rw-r--r--Lib/csv.py13
1 files changed, 10 insertions, 3 deletions
diff --git a/Lib/csv.py b/Lib/csv.py
index 7516380083..f213854783 100644
--- a/Lib/csv.py
+++ b/Lib/csv.py
@@ -152,10 +152,13 @@ class Sniffer:
quotechar, delimiter, skipinitialspace = \
self._guess_quote_and_delimiter(sample, delimiters)
- if delimiter is None:
+ if not delimiter:
delimiter, skipinitialspace = self._guess_delimiter(sample,
delimiters)
+ if not delimiter:
+ raise Error, "Could not determine delimiter"
+
class dialect(Dialect):
_name = "sniffed"
lineterminator = '\r\n'
@@ -329,8 +332,12 @@ class Sniffer:
data[0].count("%c " % d))
return (d, skipinitialspace)
- # finally, just return the first damn character in the list
- delim = delims.keys()[0]
+ # nothing else indicates a preference, pick the character that
+ # dominates(?)
+ items = [(v,k) for (k,v) in delims.items()]
+ items.sort()
+ delim = items[-1][1]
+
skipinitialspace = (data[0].count(delim) ==
data[0].count("%c " % delim))
return (delim, skipinitialspace)