diff options
| author | Raymond Hettinger <rhettinger@users.noreply.github.com> | 2019-03-12 00:43:27 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2019-03-12 00:43:27 -0700 |
| commit | fc06a192fdc44225ef1cc879f615a81931ad0a85 (patch) | |
| tree | 3a6eb28aac5ebf320cb665fa53a9a08cf75fbcc0 /Lib/statistics.py | |
| parent | 3e936431e23b424b1e4665e8165c245924f0ab02 (diff) | |
| download | cpython-git-fc06a192fdc44225ef1cc879f615a81931ad0a85.tar.gz | |
bpo-35892: Fix mode() and add multimode() (#12089)
Diffstat (limited to 'Lib/statistics.py')
| -rw-r--r-- | Lib/statistics.py | 72 |
1 files changed, 40 insertions, 32 deletions
diff --git a/Lib/statistics.py b/Lib/statistics.py index e85aaa996c..97f154373d 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -17,6 +17,7 @@ median_low Low median of data. median_high High median of data. median_grouped Median, or 50th percentile, of grouped data. mode Mode (most common value) of data. +multimode List of modes (most common values of data) ================== ============================================= Calculate the arithmetic mean ("the average") of data: @@ -79,10 +80,9 @@ A single exception is defined: StatisticsError is a subclass of ValueError. __all__ = [ 'StatisticsError', 'NormalDist', 'pstdev', 'pvariance', 'stdev', 'variance', 'median', 'median_low', 'median_high', 'median_grouped', - 'mean', 'mode', 'harmonic_mean', 'fmean', + 'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean', ] -import collections import math import numbers import random @@ -92,8 +92,8 @@ from decimal import Decimal from itertools import groupby from bisect import bisect_left, bisect_right from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum - - +from operator import itemgetter +from collections import Counter # === Exceptions === @@ -249,20 +249,6 @@ def _convert(value, T): raise -def _counts(data): - # Generate a table of sorted (value, frequency) pairs. - table = collections.Counter(iter(data)).most_common() - if not table: - return table - # Extract the values with the highest frequency. - maxfreq = table[0][1] - for i in range(1, len(table)): - if table[i][1] != maxfreq: - table = table[:i] - break - return table - - def _find_lteq(a, x): 'Locate the leftmost value exactly equal to x' i = bisect_left(a, x) @@ -334,9 +320,9 @@ def fmean(data): nonlocal n n += 1 return x - total = math.fsum(map(count, data)) + total = fsum(map(count, data)) else: - total = math.fsum(data) + total = fsum(data) try: return total / n except ZeroDivisionError: @@ -523,19 +509,38 @@ def mode(data): >>> mode(["red", "blue", "blue", "red", "green", "red", "red"]) 'red' - If there is not exactly one most common value, ``mode`` will raise - StatisticsError. + If there are multiple modes, return the first one encountered. + + >>> mode(['red', 'red', 'green', 'blue', 'blue']) + 'red' + + If *data* is empty, ``mode``, raises StatisticsError. + """ - # Generate a table of sorted (value, frequency) pairs. - table = _counts(data) - if len(table) == 1: - return table[0][0] - elif table: - raise StatisticsError( - 'no unique mode; found %d equally common values' % len(table) - ) - else: - raise StatisticsError('no mode for empty data') + data = iter(data) + try: + return Counter(data).most_common(1)[0][0] + except IndexError: + raise StatisticsError('no mode for empty data') from None + + +def multimode(data): + """ Return a list of the most frequently occurring values. + + Will return more than one result if there are multiple modes + or an empty list if *data* is empty. + + >>> multimode('aabbbbbbbbcc') + ['b'] + >>> multimode('aabbbbccddddeeffffgg') + ['b', 'd', 'f'] + >>> multimode('') + [] + + """ + counts = Counter(iter(data)).most_common() + maxcount, mode_items = next(groupby(counts, key=itemgetter(1)), (0, [])) + return list(map(itemgetter(0), mode_items)) # === Measures of spread === @@ -836,6 +841,7 @@ if __name__ == '__main__': from math import isclose from operator import add, sub, mul, truediv from itertools import repeat + import doctest g1 = NormalDist(10, 20) g2 = NormalDist(-5, 25) @@ -893,3 +899,5 @@ if __name__ == '__main__': S = NormalDist.from_samples([x - y for x, y in zip(X.samples(n), Y.samples(n))]) assert_close(X - Y, S) + + print(doctest.testmod()) |
