bpo-35892: Fix mode() and add multimode() (#12089)

author: Raymond Hettinger <rhettinger@users.noreply.github.com> 2019-03-12 00:43:27 -0700
committer: GitHub <noreply@github.com> 2019-03-12 00:43:27 -0700
commit: fc06a192fdc44225ef1cc879f615a81931ad0a85 (patch)
tree: 3a6eb28aac5ebf320cb665fa53a9a08cf75fbcc0 /Lib/statistics.py
parent: 3e936431e23b424b1e4665e8165c245924f0ab02 (diff)
download: cpython-git-fc06a192fdc44225ef1cc879f615a81931ad0a85.tar.gz
1 files changed, 40 insertions, 32 deletions
diff --git a/Lib/statistics.py b/Lib/statistics.py
index e85aaa996c..97f154373d 100644
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@@ -17,6 +17,7 @@ median_low          Low median of data.
 median_high         High median of data.
 median_grouped      Median, or 50th percentile, of grouped data.
 mode                Mode (most common value) of data.
+multimode           List of modes (most common values of data)
 ==================  =============================================
 
 Calculate the arithmetic mean ("the average") of data:
@@ -79,10 +80,9 @@ A single exception is defined: StatisticsError is a subclass of ValueError.
 __all__ = [ 'StatisticsError', 'NormalDist',
             'pstdev', 'pvariance', 'stdev', 'variance',
             'median',  'median_low', 'median_high', 'median_grouped',
-            'mean', 'mode', 'harmonic_mean', 'fmean',
+            'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean',
           ]
 
-import collections
 import math
 import numbers
 import random
@@ -92,8 +92,8 @@ from decimal import Decimal
 from itertools import groupby
 from bisect import bisect_left, bisect_right
 from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum
-
-
+from operator import itemgetter
+from collections import Counter
 
 # === Exceptions ===
 
@@ -249,20 +249,6 @@ def _convert(value, T):
             raise
 
 
-def _counts(data):
-    # Generate a table of sorted (value, frequency) pairs.
-    table = collections.Counter(iter(data)).most_common()
-    if not table:
-        return table
-    # Extract the values with the highest frequency.
-    maxfreq = table[0][1]
-    for i in range(1, len(table)):
-        if table[i][1] != maxfreq:
-            table = table[:i]
-            break
-    return table
-
-
 def _find_lteq(a, x):
     'Locate the leftmost value exactly equal to x'
     i = bisect_left(a, x)
@@ -334,9 +320,9 @@ def fmean(data):
             nonlocal n
             n += 1
             return x
-        total = math.fsum(map(count, data))
+        total = fsum(map(count, data))
     else:
-        total = math.fsum(data)
+        total = fsum(data)
     try:
         return total / n
     except ZeroDivisionError:
@@ -523,19 +509,38 @@ def mode(data):
     >>> mode(["red", "blue", "blue", "red", "green", "red", "red"])
     'red'
 
-    If there is not exactly one most common value, ``mode`` will raise
-    StatisticsError.
+    If there are multiple modes, return the first one encountered.
+
+        >>> mode(['red', 'red', 'green', 'blue', 'blue'])
+        'red'
+
+    If *data* is empty, ``mode``, raises StatisticsError.
+
     """
-    # Generate a table of sorted (value, frequency) pairs.
-    table = _counts(data)
-    if len(table) == 1:
-        return table[0][0]
-    elif table:
-        raise StatisticsError(
-                'no unique mode; found %d equally common values' % len(table)
-                )
-    else:
-        raise StatisticsError('no mode for empty data')
+    data = iter(data)
+    try:
+        return Counter(data).most_common(1)[0][0]
+    except IndexError:
+        raise StatisticsError('no mode for empty data') from None
+
+
+def multimode(data):
+    """ Return a list of the most frequently occurring values.
+
+        Will return more than one result if there are multiple modes
+        or an empty list if *data* is empty.
+
+        >>> multimode('aabbbbbbbbcc')
+        ['b']
+        >>> multimode('aabbbbccddddeeffffgg')
+        ['b', 'd', 'f']
+        >>> multimode('')
+        []
+
+    """
+    counts = Counter(iter(data)).most_common()
+    maxcount, mode_items = next(groupby(counts, key=itemgetter(1)), (0, []))
+    return list(map(itemgetter(0), mode_items))
 
 
 # === Measures of spread ===
@@ -836,6 +841,7 @@ if __name__ == '__main__':
     from math import isclose
     from operator import add, sub, mul, truediv
     from itertools import repeat
+    import doctest
 
     g1 = NormalDist(10, 20)
     g2 = NormalDist(-5, 25)
@@ -893,3 +899,5 @@ if __name__ == '__main__':
     S = NormalDist.from_samples([x - y for x, y in zip(X.samples(n),
                                                        Y.samples(n))])
     assert_close(X - Y, S)
+
+    print(doctest.testmod())
author	Raymond Hettinger <rhettinger@users.noreply.github.com>	2019-03-12 00:43:27 -0700
committer	GitHub <noreply@github.com>	2019-03-12 00:43:27 -0700
commit	fc06a192fdc44225ef1cc879f615a81931ad0a85 (patch)
tree	3a6eb28aac5ebf320cb665fa53a9a08cf75fbcc0 /Lib/statistics.py
parent	3e936431e23b424b1e4665e8165c245924f0ab02 (diff)
download	cpython-git-fc06a192fdc44225ef1cc879f615a81931ad0a85.tar.gz