blob: 2829ebb6d0047f5e29582a57021f42cdc1674d09 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
//===-- Common header for multiply-add implementations ----------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_MULTIPLY_ADD_H
#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_MULTIPLY_ADD_H
#include "src/__support/common.h"
#include "src/__support/macros/properties/architectures.h"
#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA
namespace __llvm_libc {
namespace fputil {
// Implement a simple wrapper for multiply-add operation:
// multiply_add(x, y, z) = x*y + z
// which uses FMA instructions to speed up if available.
template <typename T> LIBC_INLINE T multiply_add(T x, T y, T z) {
return x * y + z;
}
} // namespace fputil
} // namespace __llvm_libc
#if defined(LIBC_TARGET_CPU_HAS_FMA)
// FMA instructions are available.
#include "FMA.h"
namespace __llvm_libc {
namespace fputil {
template <> LIBC_INLINE float multiply_add<float>(float x, float y, float z) {
return fma(x, y, z);
}
template <>
LIBC_INLINE double multiply_add<double>(double x, double y, double z) {
return fma(x, y, z);
}
} // namespace fputil
} // namespace __llvm_libc
#endif // LIBC_TARGET_CPU_HAS_FMA
#endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_MULTIPLY_ADD_H
|