gh-67790: Support float-style formatting for Fraction instances (#100161)

This PR adds support for float-style formatting for `Fraction` objects: it supports the `"e"`, `"E"`, `"f"`, `"F"`, `"g"`, `"G"` and `"%"` presentation types, and all the various bells and whistles of the formatting mini-language for those presentation types. The behaviour almost exactly matches that of `float`, but the implementation works with the exact `Fraction` value and does not do an intermediate conversion to `float`, and so avoids loss of precision or issues with numbers that are outside the dynamic range of the `float` type. Note that the `"n"` presentation type is _not_ supported. That support could be added later if people have a need for it. There's one corner-case where the behaviour differs from that of float: for the `float` type, if explicit alignment is specified with a fill character of `'0'` and alignment type `'='`, then thousands separators (if specified) are inserted into the padding string: ```python >>> format(3.14, '0=11,.2f') '0,000,003.14' ``` The exact same effect can be achieved by using the `'0'` flag: ```python >>> format(3.14, '011,.2f') '0,000,003.14' ``` For `Fraction`, only the `'0'` flag has the above behaviour with respect to thousands separators: there's no special-casing of the particular `'0='` fill-character/alignment combination. Instead, we treat the fill character `'0'` just like any other: ```python >>> format(Fraction('3.14'), '0=11,.2f') '00000003.14' >>> format(Fraction('3.14'), '011,.2f') '0,000,003.14' ``` The `Fraction` formatter is also stricter about combining these two things: it's not permitted to use both the `'0'` flag _and_ explicit alignment, on the basis that we should refuse the temptation to guess in the face of ambiguity. `float` is less picky: ```python >>> format(3.14, '0<011,.2f') '3.140000000' >>> format(Fraction('3.14'), '0<011,.2f') Traceback (most recent call last): File "<stdin>", line 1, in <module> File "/Users/mdickinson/Repositories/python/cpython/Lib/fractions.py", line 414, in __format__ raise ValueError( ValueError: Invalid format specifier '0<011,.2f' for object of type 'Fraction'; can't use explicit alignment when zero-padding ```
author: Mark Dickinson <dickinsm@gmail.com> 2023-01-22 18:44:49 +0000
committer: GitHub <noreply@github.com> 2023-01-22 18:44:49 +0000
commit: 3e09f3152e518cdc8779b52943b86812114ce071 (patch)
tree: e0305c7963c297e1c36734bd41bd28aa3ee26244 /Lib/fractions.py
parent: b53bad6dd08db78f5b5e2f41bea69c4b657fca13 (diff)
download: cpython-git-3e09f3152e518cdc8779b52943b86812114ce071.tar.gz
1 files changed, 206 insertions, 0 deletions
diff --git a/Lib/fractions.py b/Lib/fractions.py
index bdba6c3395..49a3f2841a 100644
--- a/Lib/fractions.py
+++ b/Lib/fractions.py
@@ -69,6 +69,96 @@ _RATIONAL_FORMAT = re.compile(r"""
 """, re.VERBOSE | re.IGNORECASE)
 
 
+# Helpers for formatting
+
+def _round_to_exponent(n, d, exponent, no_neg_zero=False):
+    """Round a rational number to the nearest multiple of a given power of 10.
+
+    Rounds the rational number n/d to the nearest integer multiple of
+    10**exponent, rounding to the nearest even integer multiple in the case of
+    a tie. Returns a pair (sign: bool, significand: int) representing the
+    rounded value (-1)**sign * significand * 10**exponent.
+
+    If no_neg_zero is true, then the returned sign will always be False when
+    the significand is zero. Otherwise, the sign reflects the sign of the
+    input.
+
+    d must be positive, but n and d need not be relatively prime.
+    """
+    if exponent >= 0:
+        d *= 10**exponent
+    else:
+        n *= 10**-exponent
+
+    # The divmod quotient is correct for round-ties-towards-positive-infinity;
+    # In the case of a tie, we zero out the least significant bit of q.
+    q, r = divmod(n + (d >> 1), d)
+    if r == 0 and d & 1 == 0:
+        q &= -2
+
+    sign = q < 0 if no_neg_zero else n < 0
+    return sign, abs(q)
+
+
+def _round_to_figures(n, d, figures):
+    """Round a rational number to a given number of significant figures.
+
+    Rounds the rational number n/d to the given number of significant figures
+    using the round-ties-to-even rule, and returns a triple
+    (sign: bool, significand: int, exponent: int) representing the rounded
+    value (-1)**sign * significand * 10**exponent.
+
+    In the special case where n = 0, returns a significand of zero and
+    an exponent of 1 - figures, for compatibility with formatting.
+    Otherwise, the returned significand satisfies
+    10**(figures - 1) <= significand < 10**figures.
+
+    d must be positive, but n and d need not be relatively prime.
+    figures must be positive.
+    """
+    # Special case for n == 0.
+    if n == 0:
+        return False, 0, 1 - figures
+
+    # Find integer m satisfying 10**(m - 1) <= abs(n)/d <= 10**m. (If abs(n)/d
+    # is a power of 10, either of the two possible values for m is fine.)
+    str_n, str_d = str(abs(n)), str(d)
+    m = len(str_n) - len(str_d) + (str_d <= str_n)
+
+    # Round to a multiple of 10**(m - figures). The significand we get
+    # satisfies 10**(figures - 1) <= significand <= 10**figures.
+    exponent = m - figures
+    sign, significand = _round_to_exponent(n, d, exponent)
+
+    # Adjust in the case where significand == 10**figures, to ensure that
+    # 10**(figures - 1) <= significand < 10**figures.
+    if len(str(significand)) == figures + 1:
+        significand //= 10
+        exponent += 1
+
+    return sign, significand, exponent
+
+
+# Pattern for matching float-style format specifications;
+# supports 'e', 'E', 'f', 'F', 'g', 'G' and '%' presentation types.
+_FLOAT_FORMAT_SPECIFICATION_MATCHER = re.compile(r"""
+    (?:
+        (?P<fill>.)?
+        (?P<align>[<>=^])
+    )?
+    (?P<sign>[-+ ]?)
+    (?P<no_neg_zero>z)?
+    (?P<alt>\#)?
+    # A '0' that's *not* followed by another digit is parsed as a minimum width
+    # rather than a zeropad flag.
+    (?P<zeropad>0(?=[0-9]))?
+    (?P<minimumwidth>0|[1-9][0-9]*)?
+    (?P<thousands_sep>[,_])?
+    (?:\.(?P<precision>0|[1-9][0-9]*))?
+    (?P<presentation_type>[eEfFgG%])
+""", re.DOTALL | re.VERBOSE).fullmatch
+
+
 class Fraction(numbers.Rational):
     """This class implements rational numbers.
 
@@ -314,6 +404,122 @@ class Fraction(numbers.Rational):
         else:
             return '%s/%s' % (self._numerator, self._denominator)
 
+    def __format__(self, format_spec, /):
+        """Format this fraction according to the given format specification."""
+
+        # Backwards compatiblility with existing formatting.
+        if not format_spec:
+            return str(self)
+
+        # Validate and parse the format specifier.
+        match = _FLOAT_FORMAT_SPECIFICATION_MATCHER(format_spec)
+        if match is None:
+            raise ValueError(
+                f"Invalid format specifier {format_spec!r} "
+                f"for object of type {type(self).__name__!r}"
+            )
+        elif match["align"] is not None and match["zeropad"] is not None:
+            # Avoid the temptation to guess.
+            raise ValueError(
+                f"Invalid format specifier {format_spec!r} "
+                f"for object of type {type(self).__name__!r}; "
+                "can't use explicit alignment when zero-padding"
+            )
+        fill = match["fill"] or " "
+        align = match["align"] or ">"
+        pos_sign = "" if match["sign"] == "-" else match["sign"]
+        no_neg_zero = bool(match["no_neg_zero"])
+        alternate_form = bool(match["alt"])
+        zeropad = bool(match["zeropad"])
+        minimumwidth = int(match["minimumwidth"] or "0")
+        thousands_sep = match["thousands_sep"]
+        precision = int(match["precision"] or "6")
+        presentation_type = match["presentation_type"]
+        trim_zeros = presentation_type in "gG" and not alternate_form
+        trim_point = not alternate_form
+        exponent_indicator = "E" if presentation_type in "EFG" else "e"
+
+        # Round to get the digits we need, figure out where to place the point,
+        # and decide whether to use scientific notation. 'point_pos' is the
+        # relative to the _end_ of the digit string: that is, it's the number
+        # of digits that should follow the point.
+        if presentation_type in "fF%":
+            exponent = -precision
+            if presentation_type == "%":
+                exponent -= 2
+            negative, significand = _round_to_exponent(
+                self._numerator, self._denominator, exponent, no_neg_zero)
+            scientific = False
+            point_pos = precision
+        else:  # presentation_type in "eEgG"
+            figures = (
+                max(precision, 1)
+                if presentation_type in "gG"
+                else precision + 1
+            )
+            negative, significand, exponent = _round_to_figures(
+                self._numerator, self._denominator, figures)
+            scientific = (
+                presentation_type in "eE"
+                or exponent > 0
+                or exponent + figures <= -4
+            )
+            point_pos = figures - 1 if scientific else -exponent
+
+        # Get the suffix - the part following the digits, if any.
+        if presentation_type == "%":
+            suffix = "%"
+        elif scientific:
+            suffix = f"{exponent_indicator}{exponent + point_pos:+03d}"
+        else:
+            suffix = ""
+
+        # String of output digits, padded sufficiently with zeros on the left
+        # so that we'll have at least one digit before the decimal point.
+        digits = f"{significand:0{point_pos + 1}d}"
+
+        # Before padding, the output has the form f"{sign}{leading}{trailing}",
+        # where `leading` includes thousands separators if necessary and
+        # `trailing` includes the decimal separator where appropriate.
+        sign = "-" if negative else pos_sign
+        leading = digits[: len(digits) - point_pos]
+        frac_part = digits[len(digits) - point_pos :]
+        if trim_zeros:
+            frac_part = frac_part.rstrip("0")
+        separator = "" if trim_point and not frac_part else "."
+        trailing = separator + frac_part + suffix
+
+        # Do zero padding if required.
+        if zeropad:
+            min_leading = minimumwidth - len(sign) - len(trailing)
+            # When adding thousands separators, they'll be added to the
+            # zero-padded portion too, so we need to compensate.
+            leading = leading.zfill(
+                3 * min_leading // 4 + 1 if thousands_sep else min_leading
+            )
+
+        # Insert thousands separators if required.
+        if thousands_sep:
+            first_pos = 1 + (len(leading) - 1) % 3
+            leading = leading[:first_pos] + "".join(
+                thousands_sep + leading[pos : pos + 3]
+                for pos in range(first_pos, len(leading), 3)
+            )
+
+        # We now have a sign and a body. Pad with fill character if necessary
+        # and return.
+        body = leading + trailing
+        padding = fill * (minimumwidth - len(sign) - len(body))
+        if align == ">":
+            return padding + sign + body
+        elif align == "<":
+            return sign + body + padding
+        elif align == "^":
+            half = len(padding) // 2
+            return padding[:half] + sign + body + padding[half:]
+        else:  # align == "="
+            return sign + padding + body
+
     def _operator_fallbacks(monomorphic_operator, fallback_operator):
         """Generates forward and reverse operators given a purely-rational
         operator and a function from the operator module.
author	Mark Dickinson <dickinsm@gmail.com>	2023-01-22 18:44:49 +0000
committer	GitHub <noreply@github.com>	2023-01-22 18:44:49 +0000
commit	3e09f3152e518cdc8779b52943b86812114ce071 (patch)
tree	e0305c7963c297e1c36734bd41bd28aa3ee26244 /Lib/fractions.py
parent	b53bad6dd08db78f5b5e2f41bea69c4b657fca13 (diff)
download	cpython-git-3e09f3152e518cdc8779b52943b86812114ce071.tar.gz