diff options
Diffstat (limited to 'Lib/statistics.py')
| -rw-r--r-- | Lib/statistics.py | 127 | 
1 files changed, 74 insertions, 53 deletions
| diff --git a/Lib/statistics.py b/Lib/statistics.py index ff07dc4a6b..8a6be7c759 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -80,12 +80,25 @@ A single exception is defined: StatisticsError is a subclass of ValueError.  """ -__all__ = [ 'StatisticsError', 'NormalDist', 'quantiles', -            'pstdev', 'pvariance', 'stdev', 'variance', -            'median',  'median_low', 'median_high', 'median_grouped', -            'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean', -            'geometric_mean', -          ] +__all__ = [ +    'NormalDist', +    'StatisticsError', +    'fmean', +    'geometric_mean', +    'harmonic_mean', +    'mean', +    'median', +    'median_grouped', +    'median_high', +    'median_low', +    'mode', +    'multimode', +    'pstdev', +    'pvariance', +    'quantiles', +    'stdev', +    'variance', +]  import math  import numbers @@ -304,8 +317,9 @@ def mean(data):      assert count == n      return _convert(total/n, T) +  def fmean(data): -    """ Convert data to floats and compute the arithmetic mean. +    """Convert data to floats and compute the arithmetic mean.      This runs faster than the mean() function and it always returns a float.      The result is highly accurate but not as perfect as mean(). @@ -313,7 +327,6 @@ def fmean(data):      >>> fmean([3.5, 4.0, 5.25])      4.25 -      """      try:          n = len(data) @@ -332,6 +345,7 @@ def fmean(data):      except ZeroDivisionError:          raise StatisticsError('fmean requires at least one data point') from None +  def geometric_mean(data):      """Convert data to floats and compute the geometric mean. @@ -350,6 +364,7 @@ def geometric_mean(data):          raise StatisticsError('geometric mean requires a non-empty dataset '                                ' containing positive numbers') from None +  def harmonic_mean(data):      """Return the harmonic mean of data. @@ -547,23 +562,23 @@ def mode(data):  def multimode(data): -    """ Return a list of the most frequently occurring values. - -        Will return more than one result if there are multiple modes -        or an empty list if *data* is empty. +    """Return a list of the most frequently occurring values. -        >>> multimode('aabbbbbbbbcc') -        ['b'] -        >>> multimode('aabbbbccddddeeffffgg') -        ['b', 'd', 'f'] -        >>> multimode('') -        [] +    Will return more than one result if there are multiple modes +    or an empty list if *data* is empty. +    >>> multimode('aabbbbbbbbcc') +    ['b'] +    >>> multimode('aabbbbccddddeeffffgg') +    ['b', 'd', 'f'] +    >>> multimode('') +    []      """      counts = Counter(iter(data)).most_common()      maxcount, mode_items = next(groupby(counts, key=itemgetter(1)), (0, []))      return list(map(itemgetter(0), mode_items)) +  # Notes on methods for computing quantiles  # ----------------------------------------  # @@ -601,7 +616,7 @@ def multimode(data):  # external packages can be used for anything more advanced.  def quantiles(dist, /, *, n=4, method='exclusive'): -    '''Divide *dist* into *n* continuous intervals with equal probability. +    """Divide *dist* into *n* continuous intervals with equal probability.      Returns a list of (n - 1) cut points separating the intervals. @@ -616,7 +631,7 @@ def quantiles(dist, /, *, n=4, method='exclusive'):      If *method* is set to *inclusive*, *dist* is treated as population      data.  The minimum value is treated as the 0th percentile and the      maximum value is treated as the 100th percentile. -    ''' +    """      if n < 1:          raise StatisticsError('n must be at least 1')      if hasattr(dist, 'inv_cdf'): @@ -646,6 +661,7 @@ def quantiles(dist, /, *, n=4, method='exclusive'):          return result      raise ValueError(f'Unknown method: {method!r}') +  # === Measures of spread ===  # See http://mathworld.wolfram.com/Variance.html @@ -805,18 +821,21 @@ def pstdev(data, mu=None):      except AttributeError:          return math.sqrt(var) +  ## Normal Distribution #####################################################  class NormalDist: -    'Normal distribution of a random variable' +    "Normal distribution of a random variable"      # https://en.wikipedia.org/wiki/Normal_distribution      # https://en.wikipedia.org/wiki/Variance#Properties -    __slots__ = {'_mu': 'Arithmetic mean of a normal distribution', -                 '_sigma': 'Standard deviation of a normal distribution'} +    __slots__ = { +        '_mu': 'Arithmetic mean of a normal distribution', +        '_sigma': 'Standard deviation of a normal distribution', +    }      def __init__(self, mu=0.0, sigma=1.0): -        'NormalDist where mu is the mean and sigma is the standard deviation.' +        "NormalDist where mu is the mean and sigma is the standard deviation."          if sigma < 0.0:              raise StatisticsError('sigma must be non-negative')          self._mu = mu @@ -824,40 +843,42 @@ class NormalDist:      @classmethod      def from_samples(cls, data): -        'Make a normal distribution instance from sample data.' +        "Make a normal distribution instance from sample data."          if not isinstance(data, (list, tuple)):              data = list(data)          xbar = fmean(data)          return cls(xbar, stdev(data, xbar))      def samples(self, n, *, seed=None): -        'Generate *n* samples for a given mean and standard deviation.' +        "Generate *n* samples for a given mean and standard deviation."          gauss = random.gauss if seed is None else random.Random(seed).gauss          mu, sigma = self._mu, self._sigma          return [gauss(mu, sigma) for i in range(n)]      def pdf(self, x): -        'Probability density function.  P(x <= X < x+dx) / dx' +        "Probability density function.  P(x <= X < x+dx) / dx"          variance = self._sigma ** 2.0          if not variance:              raise StatisticsError('pdf() not defined when sigma is zero') -        return exp((x - self._mu)**2.0 / (-2.0*variance)) / sqrt(tau * variance) +        return exp((x - self._mu)**2.0 / (-2.0*variance)) / sqrt(tau*variance)      def cdf(self, x): -        'Cumulative distribution function.  P(X <= x)' +        "Cumulative distribution function.  P(X <= x)"          if not self._sigma:              raise StatisticsError('cdf() not defined when sigma is zero')          return 0.5 * (1.0 + erf((x - self._mu) / (self._sigma * sqrt(2.0))))      def inv_cdf(self, p): -        '''Inverse cumulative distribution function.  x : P(X <= x) = p +        """Inverse cumulative distribution function.  x : P(X <= x) = p -        Finds the value of the random variable such that the probability of the -        variable being less than or equal to that value equals the given probability. +        Finds the value of the random variable such that the probability of +        the variable being less than or equal to that value equals the given +        probability. -        This function is also called the percent point function or quantile function. -        ''' -        if (p <= 0.0 or p >= 1.0): +        This function is also called the percent point function or quantile +        function. +        """ +        if p <= 0.0 or p >= 1.0:              raise StatisticsError('p must be in the range 0.0 < p < 1.0')          if self._sigma <= 0.0:              raise StatisticsError('cdf() not defined when sigma at or below zero') @@ -933,7 +954,7 @@ class NormalDist:          return self._mu + (x * self._sigma)      def overlap(self, other): -        '''Compute the overlapping coefficient (OVL) between two normal distributions. +        """Compute the overlapping coefficient (OVL) between two normal distributions.          Measures the agreement between two normal probability distributions.          Returns a value between 0.0 and 1.0 giving the overlapping area in @@ -943,7 +964,7 @@ class NormalDist:              >>> N2 = NormalDist(3.2, 2.0)              >>> N1.overlap(N2)              0.8035050657330205 -        ''' +        """          # See: "The overlapping coefficient as a measure of agreement between          # probability distributions and point estimation of the overlap of two          # normal densities" -- Henry F. Inman and Edwin L. Bradley Jr @@ -968,21 +989,21 @@ class NormalDist:      @property      def mean(self): -        'Arithmetic mean of the normal distribution.' +        "Arithmetic mean of the normal distribution."          return self._mu      @property      def stdev(self): -        'Standard deviation of the normal distribution.' +        "Standard deviation of the normal distribution."          return self._sigma      @property      def variance(self): -        'Square of the standard deviation.' +        "Square of the standard deviation."          return self._sigma ** 2.0      def __add__(x1, x2): -        '''Add a constant or another NormalDist instance. +        """Add a constant or another NormalDist instance.          If *other* is a constant, translate mu by the constant,          leaving sigma unchanged. @@ -990,13 +1011,13 @@ class NormalDist:          If *other* is a NormalDist, add both the means and the variances.          Mathematically, this works only if the two distributions are          independent or if they are jointly normally distributed. -        ''' +        """          if isinstance(x2, NormalDist):              return NormalDist(x1._mu + x2._mu, hypot(x1._sigma, x2._sigma))          return NormalDist(x1._mu + x2, x1._sigma)      def __sub__(x1, x2): -        '''Subtract a constant or another NormalDist instance. +        """Subtract a constant or another NormalDist instance.          If *other* is a constant, translate by the constant mu,          leaving sigma unchanged. @@ -1004,51 +1025,51 @@ class NormalDist:          If *other* is a NormalDist, subtract the means and add the variances.          Mathematically, this works only if the two distributions are          independent or if they are jointly normally distributed. -        ''' +        """          if isinstance(x2, NormalDist):              return NormalDist(x1._mu - x2._mu, hypot(x1._sigma, x2._sigma))          return NormalDist(x1._mu - x2, x1._sigma)      def __mul__(x1, x2): -        '''Multiply both mu and sigma by a constant. +        """Multiply both mu and sigma by a constant.          Used for rescaling, perhaps to change measurement units.          Sigma is scaled with the absolute value of the constant. -        ''' +        """          return NormalDist(x1._mu * x2, x1._sigma * fabs(x2))      def __truediv__(x1, x2): -        '''Divide both mu and sigma by a constant. +        """Divide both mu and sigma by a constant.          Used for rescaling, perhaps to change measurement units.          Sigma is scaled with the absolute value of the constant. -        ''' +        """          return NormalDist(x1._mu / x2, x1._sigma / fabs(x2))      def __pos__(x1): -        'Return a copy of the instance.' +        "Return a copy of the instance."          return NormalDist(x1._mu, x1._sigma)      def __neg__(x1): -        'Negates mu while keeping sigma the same.' +        "Negates mu while keeping sigma the same."          return NormalDist(-x1._mu, x1._sigma)      __radd__ = __add__      def __rsub__(x1, x2): -        'Subtract a NormalDist from a constant or another NormalDist.' +        "Subtract a NormalDist from a constant or another NormalDist."          return -(x1 - x2)      __rmul__ = __mul__      def __eq__(x1, x2): -        'Two NormalDist objects are equal if their mu and sigma are both equal.' +        "Two NormalDist objects are equal if their mu and sigma are both equal."          if not isinstance(x2, NormalDist):              return NotImplemented          return (x1._mu, x2._sigma) == (x2._mu, x2._sigma)      def __hash__(self): -        'NormalDist objects hash equal if their mu and sigma are both equal.' +        "NormalDist objects hash equal if their mu and sigma are both equal."          return hash((self._mu, self._sigma))      def __repr__(self): | 
