libc/src/math/generic/tanf.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142

//===-- Single-precision tan function -------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/math/tanf.h"
#include "sincosf_utils.h"
#include "src/__support/FPUtil/FEnvImpl.h"
#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/PolyEval.h"
#include "src/__support/FPUtil/except_value_utils.h"
#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/FPUtil/nearest_integer.h"
#include "src/__support/common.h"
#include "src/__support/macros/optimization.h"            // LIBC_UNLIKELY
#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA

#include <errno.h>

namespace __llvm_libc {

// Exceptional cases for tanf.
constexpr size_t N_EXCEPTS = 6;

constexpr fputil::ExceptValues<float, N_EXCEPTS> TANF_EXCEPTS{{
    // (inputs, RZ output, RU offset, RD offset, RN offset)
    // x = 0x1.ada6aap27, tan(x) = 0x1.e80304p-3 (RZ)
    {0x4d56d355, 0x3e740182, 1, 0, 0},
    // x = 0x1.862064p33, tan(x) = -0x1.8dee56p-3 (RZ)
    {0x50431032, 0xbe46f72b, 0, 1, 1},
    // x = 0x1.af61dap48, tan(x) = 0x1.60d1c6p-2 (RZ)
    {0x57d7b0ed, 0x3eb068e3, 1, 0, 1},
    // x = 0x1.0088bcp52, tan(x) = 0x1.ca1edp0 (RZ)
    {0x5980445e, 0x3fe50f68, 1, 0, 0},
    // x = 0x1.f90dfcp72, tan(x) = 0x1.597f9cp-1 (RZ)
    {0x63fc86fe, 0x3f2cbfce, 1, 0, 0},
    // x = 0x1.a6ce12p86, tan(x) = -0x1.c5612ep-1 (RZ)
    {0x6ad36709, 0xbf62b097, 0, 1, 0},
}};

LLVM_LIBC_FUNCTION(float, tanf, (float x)) {
  using FPBits = typename fputil::FPBits<float>;
  FPBits xbits(x);
  bool x_sign = xbits.uintval() >> 31;
  uint32_t x_abs = xbits.uintval() & 0x7fff'ffffU;

  // |x| < pi/32
  if (LIBC_UNLIKELY(x_abs <= 0x3dc9'0fdbU)) {
    double xd = static_cast<double>(x);

    // |x| < 0x1.0p-12f
    if (LIBC_UNLIKELY(x_abs < 0x3980'0000U)) {
      if (LIBC_UNLIKELY(x_abs == 0U)) {
        // For signed zeros.
        return x;
      }
      // When |x| < 2^-12, the relative error of the approximation tan(x) ~ x
      // is:
      //   |tan(x) - x| / |tan(x)| < |x^3| / (3|x|)
      //                           = x^2 / 3
      //                           < 2^-25
      //                           < epsilon(1)/2.
      // So the correctly rounded values of tan(x) are:
      //   = x + sign(x)*eps(x) if rounding mode = FE_UPWARD and x is positive,
      //                        or (rounding mode = FE_DOWNWARD and x is
      //                        negative),
      //   = x otherwise.
      // To simplify the rounding decision and make it more efficient, we use
      //   fma(x, 2^-25, x) instead.
      // Note: to use the formula x + 2^-25*x to decide the correct rounding, we
      // do need fma(x, 2^-25, x) to prevent underflow caused by 2^-25*x when
      // |x| < 2^-125. For targets without FMA instructions, we simply use
      // double for intermediate results as it is more efficient than using an
      // emulated version of FMA.
#if defined(LIBC_TARGET_CPU_HAS_FMA)
      return fputil::multiply_add(x, 0x1.0p-25f, x);
#else
      return static_cast<float>(fputil::multiply_add(xd, 0x1.0p-25, xd));
#endif // LIBC_TARGET_CPU_HAS_FMA
    }

    // |x| < pi/32
    double xsq = xd * xd;

    // Degree-9 minimax odd polynomial of tan(x) generated by Sollya with:
    // > P = fpminimax(tan(x)/x, [|0, 2, 4, 6, 8|], [|1, D...|], [0, pi/32]);
    double result =
        fputil::polyeval(xsq, 1.0, 0x1.555555553d022p-2, 0x1.111111ce442c1p-3,
                         0x1.ba180a6bbdecdp-5, 0x1.69c0a88a0b71fp-6);
    return static_cast<float>(xd * result);
  }

  // Check for exceptional values
  if (LIBC_UNLIKELY(x_abs == 0x3f8a1f62U)) {
    // |x| = 0x1.143ec4p0
    float sign = x_sign ? -1.0f : 1.0f;

    // volatile is used to prevent compiler (gcc) from optimizing the
    // computation, making the results incorrect in different rounding modes.
    volatile float tmp = 0x1.ddf9f4p0f;
    tmp = fputil::multiply_add(sign, tmp, sign * 0x1.1p-24f);

    return tmp;
  }

  // |x| > 0x1.ada6a8p+27f
  if (LIBC_UNLIKELY(x_abs > 0x4d56'd354U)) {
    // Inf or NaN
    if (LIBC_UNLIKELY(x_abs >= 0x7f80'0000U)) {
      if (x_abs == 0x7f80'0000U) {
        fputil::set_errno_if_required(EDOM);
        fputil::raise_except_if_required(FE_INVALID);
      }
      return x + FPBits::build_quiet_nan(0);
    }
    // Other large exceptional values
    if (auto r = TANF_EXCEPTS.lookup_odd(x_abs, x_sign);
        LIBC_UNLIKELY(r.has_value()))
      return r.value();
  }

  // For |x| >= pi/32, we use the definition of tan(x) function:
  //   tan(x) = sin(x) / cos(x)
  // The we follow the same computations of sin(x) and cos(x) as sinf, cosf,
  // and sincosf.

  double xd = static_cast<double>(x);
  double sin_k, cos_k, sin_y, cosm1_y;

  sincosf_eval(xd, x_abs, sin_k, cos_k, sin_y, cosm1_y);
  // tan(x) = sin(x) / cos(x)
  //        = (sin_y * cos_k + cos_y * sin_k) / (cos_y * cos_k - sin_y * sin_k)
  using fputil::multiply_add;
  return static_cast<float>(
      multiply_add(sin_y, cos_k, multiply_add(cosm1_y, sin_k, sin_k)) /
      multiply_add(sin_y, -sin_k, multiply_add(cosm1_y, cos_k, cos_k)));
}

} // namespace __llvm_libc