diff options
| author | Steven D'Aprano <steve@pearwood.info> | 2016-10-05 03:24:45 +1100 | 
|---|---|---|
| committer | Steven D'Aprano <steve@pearwood.info> | 2016-10-05 03:24:45 +1100 | 
| commit | fb315dbe82853be833cf2d5ca1ecbaed7dab8d8e (patch) | |
| tree | abf309a040780841d89a1aff1b057090cbda60e4 /Lib/statistics.py | |
| parent | bafe2e33b72aee3aa6493ee483175dfe754f6ad5 (diff) | |
| download | cpython-git-fb315dbe82853be833cf2d5ca1ecbaed7dab8d8e.tar.gz | |
Issue #27181 remove geometric_mean and defer for 3.7.
Diffstat (limited to 'Lib/statistics.py')
| -rw-r--r-- | Lib/statistics.py | 269 | 
1 files changed, 1 insertions, 268 deletions
| diff --git a/Lib/statistics.py b/Lib/statistics.py index 7d53e0c0e2..30fe55c86a 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -11,7 +11,6 @@ Calculating averages  Function            Description  ==================  =============================================  mean                Arithmetic mean (average) of data. -geometric_mean      Geometric mean of data.  harmonic_mean       Harmonic mean of data.  median              Median (middle value) of data.  median_low          Low median of data. @@ -80,7 +79,7 @@ A single exception is defined: StatisticsError is a subclass of ValueError.  __all__ = [ 'StatisticsError',              'pstdev', 'pvariance', 'stdev', 'variance',              'median',  'median_low', 'median_high', 'median_grouped', -            'mean', 'mode', 'geometric_mean', 'harmonic_mean', +            'mean', 'mode', 'harmonic_mean',            ]  import collections @@ -287,229 +286,6 @@ def _fail_neg(values, errmsg='negative value'):          yield x -class _nroot_NS: -    """Hands off! Don't touch! - -    Everything inside this namespace (class) is an even-more-private -    implementation detail of the private _nth_root function. -    """ -    # This class exists only to be used as a namespace, for convenience -    # of being able to keep the related functions together, and to -    # collapse the group in an editor. If this were C# or C++, I would -    # use a Namespace, but the closest Python has is a class. -    # -    # FIXME possibly move this out into a separate module? -    # That feels like overkill, and may encourage people to treat it as -    # a public feature. -    def __init__(self): -        raise TypeError('namespace only, do not instantiate') - -    def nth_root(x, n): -        """Return the positive nth root of numeric x. - -        This may be more accurate than ** or pow(): - -        >>> math.pow(1000, 1.0/3)  #doctest:+SKIP -        9.999999999999998 - -        >>> _nth_root(1000, 3) -        10.0 -        >>> _nth_root(11**5, 5) -        11.0 -        >>> _nth_root(2, 12) -        1.0594630943592953 - -        """ -        if not isinstance(n, int): -            raise TypeError('degree n must be an int') -        if n < 2: -            raise ValueError('degree n must be 2 or more') -        if isinstance(x, decimal.Decimal): -            return _nroot_NS.decimal_nroot(x, n) -        elif isinstance(x, numbers.Real): -            return _nroot_NS.float_nroot(x, n) -        else: -            raise TypeError('expected a number, got %s') % type(x).__name__ - -    def float_nroot(x, n): -        """Handle nth root of Reals, treated as a float.""" -        assert isinstance(n, int) and n > 1 -        if x < 0: -            raise ValueError('domain error: root of negative number') -        elif x == 0: -            return math.copysign(0.0, x) -        elif x > 0: -            try: -                isinfinity = math.isinf(x) -            except OverflowError: -                return _nroot_NS.bignum_nroot(x, n) -            else: -                if isinfinity: -                    return float('inf') -                else: -                    return _nroot_NS.nroot(x, n) -        else: -            assert math.isnan(x) -            return float('nan') - -    def nroot(x, n): -        """Calculate x**(1/n), then improve the answer.""" -        # This uses math.pow() to calculate an initial guess for the root, -        # then uses the iterated nroot algorithm to improve it. -        # -        # By my testing, about 8% of the time the iterated algorithm ends -        # up converging to a result which is less accurate than the initial -        # guess. [FIXME: is this still true?] In that case, we use the -        # guess instead of the "improved" value. This way, we're never -        # less accurate than math.pow(). -        r1 = math.pow(x, 1.0/n) -        eps1 = abs(r1**n - x) -        if eps1 == 0.0: -            # r1 is the exact root, so we're done. By my testing, this -            # occurs about 80% of the time for x < 1 and 30% of the -            # time for x > 1. -            return r1 -        else: -            try: -                r2 = _nroot_NS.iterated_nroot(x, n, r1) -            except RuntimeError: -                return r1 -            else: -                eps2 = abs(r2**n - x) -                if eps1 < eps2: -                    return r1 -                return r2 - -    def iterated_nroot(a, n, g): -        """Return the nth root of a, starting with guess g. - -        This is a special case of Newton's Method. -        https://en.wikipedia.org/wiki/Nth_root_algorithm -        """ -        np = n - 1 -        def iterate(r): -            try: -                return (np*r + a/math.pow(r, np))/n -            except OverflowError: -                # If r is large enough, r**np may overflow. If that -                # happens, r**-np will be small, but not necessarily zero. -                return (np*r + a*math.pow(r, -np))/n -        # With a good guess, such as g = a**(1/n), this will converge in -        # only a few iterations. However a poor guess can take thousands -        # of iterations to converge, if at all. We guard against poor -        # guesses by setting an upper limit to the number of iterations. -        r1 = g -        r2 = iterate(g) -        for i in range(1000): -            if r1 == r2: -                break -            # Use Floyd's cycle-finding algorithm to avoid being trapped -            # in a cycle. -            # https://en.wikipedia.org/wiki/Cycle_detection#Tortoise_and_hare -            r1 = iterate(r1) -            r2 = iterate(iterate(r2)) -        else: -            # If the guess is particularly bad, the above may fail to -            # converge in any reasonable time. -            raise RuntimeError('nth-root failed to converge') -        return r2 - -    def decimal_nroot(x, n): -        """Handle nth root of Decimals.""" -        assert isinstance(x, decimal.Decimal) -        assert isinstance(n, int) -        if x.is_snan(): -            # Signalling NANs always raise. -            raise decimal.InvalidOperation('nth-root of snan') -        if x.is_qnan(): -            # Quiet NANs only raise if the context is set to raise, -            # otherwise return a NAN. -            ctx = decimal.getcontext() -            if ctx.traps[decimal.InvalidOperation]: -                raise decimal.InvalidOperation('nth-root of nan') -            else: -                # Preserve the input NAN. -                return x -        if x < 0: -            raise ValueError('domain error: root of negative number') -        if x.is_infinite(): -            return x -        # FIXME this hasn't had the extensive testing of the float -        # version _iterated_nroot so there's possibly some buggy -        # corner cases buried in here. Can it overflow? Fail to -        # converge or get trapped in a cycle? Converge to a less -        # accurate root? -        np = n - 1 -        def iterate(r): -            return (np*r + x/r**np)/n -        r0 = x**(decimal.Decimal(1)/n) -        assert isinstance(r0, decimal.Decimal) -        r1 = iterate(r0) -        while True: -            if r1 == r0: -                return r1 -            r0, r1 = r1, iterate(r1) - -    def bignum_nroot(x, n): -        """Return the nth root of a positive huge number.""" -        assert x > 0 -        # I state without proof that ⁿ√x ≈ ⁿ√2·ⁿ√(x//2) -        # and that for sufficiently big x the error is acceptable. -        # We now halve x until it is small enough to get the root. -        m = 0 -        while True: -            x //= 2 -            m += 1 -            try: -                y = float(x) -            except OverflowError: -                continue -            break -        a = _nroot_NS.nroot(y, n) -        # At this point, we want the nth-root of 2**m, or 2**(m/n). -        # We can write that as 2**(q + r/n) = 2**q * ⁿ√2**r where q = m//n. -        q, r = divmod(m, n) -        b = 2**q * _nroot_NS.nroot(2**r, n) -        return a * b - - -# This is the (private) function for calculating nth roots: -_nth_root = _nroot_NS.nth_root -assert type(_nth_root) is type(lambda: None) - - -def _product(values): -    """Return product of values as (exponent, mantissa).""" -    errmsg = 'mixed Decimal and float is not supported' -    prod = 1 -    for x in values: -        if isinstance(x, float): -            break -        prod *= x -    else: -        return (0, prod) -    if isinstance(prod, Decimal): -        raise TypeError(errmsg) -    # Since floats can overflow easily, we calculate the product as a -    # sort of poor-man's BigFloat. Given that: -    # -    #   x = 2**p * m  # p == power or exponent (scale), m = mantissa -    # -    # we can calculate the product of two (or more) x values as: -    # -    #   x1*x2 = 2**p1*m1 * 2**p2*m2 = 2**(p1+p2)*(m1*m2) -    # -    mant, scale = 1, 0  #math.frexp(prod)  # FIXME -    for y in chain([x], values): -        if isinstance(y, Decimal): -            raise TypeError(errmsg) -        m1, e1 = math.frexp(y) -        m2, e2 = math.frexp(mant) -        scale += (e1 + e2) -        mant = m1*m2 -    return (scale, mant) - -  # === Measures of central tendency (averages) ===  def mean(data): @@ -538,49 +314,6 @@ def mean(data):      return _convert(total/n, T) -def geometric_mean(data): -    """Return the geometric mean of data. - -    The geometric mean is appropriate when averaging quantities which -    are multiplied together rather than added, for example growth rates. -    Suppose an investment grows by 10% in the first year, falls by 5% in -    the second, then grows by 12% in the third, what is the average rate -    of growth over the three years? - -    >>> geometric_mean([1.10, 0.95, 1.12]) -    1.0538483123382172 - -    giving an average growth of 5.385%. Using the arithmetic mean will -    give approximately 5.667%, which is too high. - -    ``StatisticsError`` will be raised if ``data`` is empty, or any -    element is less than zero. -    """ -    if iter(data) is data: -        data = list(data) -    errmsg = 'geometric mean does not support negative values' -    n = len(data) -    if n < 1: -        raise StatisticsError('geometric_mean requires at least one data point') -    elif n == 1: -        x = data[0] -        if isinstance(g, (numbers.Real, Decimal)): -            if x < 0: -                raise StatisticsError(errmsg) -            return x -        else: -            raise TypeError('unsupported type') -    else: -        scale, prod = _product(_fail_neg(data, errmsg)) -        r = _nth_root(prod, n) -        if scale: -            p, q = divmod(scale, n) -            s = 2**p * _nth_root(2**q, n) -        else: -            s = 1 -        return s*r - -  def harmonic_mean(data):      """Return the harmonic mean of data. | 
