summaryrefslogtreecommitdiff
path: root/Lib/statistics.py
diff options
context:
space:
mode:
authorRaymond Hettinger <rhettinger@users.noreply.github.com>2019-03-12 00:43:27 -0700
committerGitHub <noreply@github.com>2019-03-12 00:43:27 -0700
commitfc06a192fdc44225ef1cc879f615a81931ad0a85 (patch)
tree3a6eb28aac5ebf320cb665fa53a9a08cf75fbcc0 /Lib/statistics.py
parent3e936431e23b424b1e4665e8165c245924f0ab02 (diff)
downloadcpython-git-fc06a192fdc44225ef1cc879f615a81931ad0a85.tar.gz
bpo-35892: Fix mode() and add multimode() (#12089)
Diffstat (limited to 'Lib/statistics.py')
-rw-r--r--Lib/statistics.py72
1 files changed, 40 insertions, 32 deletions
diff --git a/Lib/statistics.py b/Lib/statistics.py
index e85aaa996c..97f154373d 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -17,6 +17,7 @@ median_low Low median of data.
median_high High median of data.
median_grouped Median, or 50th percentile, of grouped data.
mode Mode (most common value) of data.
+multimode List of modes (most common values of data)
================== =============================================
Calculate the arithmetic mean ("the average") of data:
@@ -79,10 +80,9 @@ A single exception is defined: StatisticsError is a subclass of ValueError.
__all__ = [ 'StatisticsError', 'NormalDist',
'pstdev', 'pvariance', 'stdev', 'variance',
'median', 'median_low', 'median_high', 'median_grouped',
- 'mean', 'mode', 'harmonic_mean', 'fmean',
+ 'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean',
]
-import collections
import math
import numbers
import random
@@ -92,8 +92,8 @@ from decimal import Decimal
from itertools import groupby
from bisect import bisect_left, bisect_right
from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum
-
-
+from operator import itemgetter
+from collections import Counter
# === Exceptions ===
@@ -249,20 +249,6 @@ def _convert(value, T):
raise
-def _counts(data):
- # Generate a table of sorted (value, frequency) pairs.
- table = collections.Counter(iter(data)).most_common()
- if not table:
- return table
- # Extract the values with the highest frequency.
- maxfreq = table[0][1]
- for i in range(1, len(table)):
- if table[i][1] != maxfreq:
- table = table[:i]
- break
- return table
-
-
def _find_lteq(a, x):
'Locate the leftmost value exactly equal to x'
i = bisect_left(a, x)
@@ -334,9 +320,9 @@ def fmean(data):
nonlocal n
n += 1
return x
- total = math.fsum(map(count, data))
+ total = fsum(map(count, data))
else:
- total = math.fsum(data)
+ total = fsum(data)
try:
return total / n
except ZeroDivisionError:
@@ -523,19 +509,38 @@ def mode(data):
>>> mode(["red", "blue", "blue", "red", "green", "red", "red"])
'red'
- If there is not exactly one most common value, ``mode`` will raise
- StatisticsError.
+ If there are multiple modes, return the first one encountered.
+
+ >>> mode(['red', 'red', 'green', 'blue', 'blue'])
+ 'red'
+
+ If *data* is empty, ``mode``, raises StatisticsError.
+
"""
- # Generate a table of sorted (value, frequency) pairs.
- table = _counts(data)
- if len(table) == 1:
- return table[0][0]
- elif table:
- raise StatisticsError(
- 'no unique mode; found %d equally common values' % len(table)
- )
- else:
- raise StatisticsError('no mode for empty data')
+ data = iter(data)
+ try:
+ return Counter(data).most_common(1)[0][0]
+ except IndexError:
+ raise StatisticsError('no mode for empty data') from None
+
+
+def multimode(data):
+ """ Return a list of the most frequently occurring values.
+
+ Will return more than one result if there are multiple modes
+ or an empty list if *data* is empty.
+
+ >>> multimode('aabbbbbbbbcc')
+ ['b']
+ >>> multimode('aabbbbccddddeeffffgg')
+ ['b', 'd', 'f']
+ >>> multimode('')
+ []
+
+ """
+ counts = Counter(iter(data)).most_common()
+ maxcount, mode_items = next(groupby(counts, key=itemgetter(1)), (0, []))
+ return list(map(itemgetter(0), mode_items))
# === Measures of spread ===
@@ -836,6 +841,7 @@ if __name__ == '__main__':
from math import isclose
from operator import add, sub, mul, truediv
from itertools import repeat
+ import doctest
g1 = NormalDist(10, 20)
g2 = NormalDist(-5, 25)
@@ -893,3 +899,5 @@ if __name__ == '__main__':
S = NormalDist.from_samples([x - y for x, y in zip(X.samples(n),
Y.samples(n))])
assert_close(X - Y, S)
+
+ print(doctest.testmod())