//===-- Generic implementation of memory function building blocks ---------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file provides generic C++ building blocks. // Depending on the requested size, the block operation uses unsigned integral // types, vector types or an array of the type with the maximum size. // // The maximum size is passed as a template argument. For instance, on x86 // platforms that only supports integral types the maximum size would be 8 // (corresponding to uint64_t). On this platform if we request the size 32, this // would be treated as a cpp::array. // // On the other hand, if the platform is x86 with support for AVX the maximum // size is 32 and the operation can be handled with a single native operation. // //===----------------------------------------------------------------------===// #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H #include "src/__support/CPP/array.h" #include "src/__support/CPP/type_traits.h" #include "src/__support/common.h" #include "src/__support/endian.h" #include "src/__support/macros/optimization.h" #include "src/string/memory_utils/op_builtin.h" #include "src/string/memory_utils/utils.h" #include namespace __llvm_libc { // Compiler types using the vector attributes. using uint8x1_t = uint8_t __attribute__((__vector_size__(1))); using uint8x2_t = uint8_t __attribute__((__vector_size__(2))); using uint8x4_t = uint8_t __attribute__((__vector_size__(4))); using uint8x8_t = uint8_t __attribute__((__vector_size__(8))); using uint8x16_t = uint8_t __attribute__((__vector_size__(16))); using uint8x32_t = uint8_t __attribute__((__vector_size__(32))); using uint8x64_t = uint8_t __attribute__((__vector_size__(64))); } // namespace __llvm_libc namespace __llvm_libc::generic { // We accept three types of values as elements for generic operations: // - scalar : unsigned integral types // - vector : compiler types using the vector attributes // - array : a cpp::array where T is itself either a scalar or a vector. // The following traits help discriminate between these cases. template constexpr bool is_scalar_v = cpp::is_integral_v && cpp::is_unsigned_v; template constexpr bool is_vector_v = cpp::details::is_unqualified_any_of(); template struct is_array : cpp::false_type {}; template struct is_array> { static constexpr bool value = is_scalar_v || is_vector_v; }; template constexpr bool is_array_v = is_array::value; template constexpr bool is_element_type_v = is_scalar_v || is_vector_v || is_array_v; // template struct array_size {}; template struct array_size> : cpp::integral_constant {}; template constexpr size_t array_size_v = array_size::value; // Generic operations for the above type categories. template T load(CPtr src) { static_assert(is_element_type_v); if constexpr (is_scalar_v || is_vector_v) { return ::__llvm_libc::load(src); } else if constexpr (is_array_v) { using value_type = typename T::value_type; T Value; for (size_t I = 0; I < array_size_v; ++I) Value[I] = load(src + (I * sizeof(value_type))); return Value; } } template void store(Ptr dst, T value) { static_assert(is_element_type_v); if constexpr (is_scalar_v || is_vector_v) { ::__llvm_libc::store(dst, value); } else if constexpr (is_array_v) { using value_type = typename T::value_type; for (size_t I = 0; I < array_size_v; ++I) store(dst + (I * sizeof(value_type)), value[I]); } } template T splat(uint8_t value) { static_assert(is_scalar_v || is_vector_v); if constexpr (is_scalar_v) return T(~0) / T(0xFF) * T(value); else if constexpr (is_vector_v) { T Out; // This for loop is optimized out for vector types. for (size_t i = 0; i < sizeof(T); ++i) Out[i] = value; return Out; } } static_assert((UINTPTR_MAX == 4294967295U) || (UINTPTR_MAX == 18446744073709551615UL), "We currently only support 32- or 64-bit platforms"); #if defined(LIBC_TARGET_ARCH_IS_X86_64) || defined(LIBC_TARGET_ARCH_IS_AARCH64) #define LLVM_LIBC_HAS_UINT64 #endif namespace details { // Checks that each type is sorted in strictly decreasing order of size. // i.e. sizeof(First) > sizeof(Second) > ... > sizeof(Last) template constexpr bool is_decreasing_size() { return sizeof(First) == 1; } template constexpr bool is_decreasing_size() { if constexpr (sizeof...(Next) > 0) return sizeof(First) > sizeof(Second) && is_decreasing_size(); else return sizeof(First) > sizeof(Second) && is_decreasing_size(); } template struct Largest; template struct Largest : cpp::type_identity {}; template struct Largest { using next = Largest; using type = cpp::conditional_t<(Size >= sizeof(T)), T, typename next::type>; }; } // namespace details // 'SupportedTypes' holds a list of natively supported types. // The types are instanciations of ScalarType or VectorType. // They should be ordered in strictly decreasing order. // The 'TypeFor' type retrieves is the largest supported type that can // handle 'Size' bytes. e.g. // // using ST = SupportedTypes, ScalarType>; // using Type = ST::TypeFor<10>; // static_assert(cpp:is_same_v>); template struct SupportedTypes { static_assert(details::is_decreasing_size()); using MaxType = First; template using TypeFor = typename details::Largest::type; }; // Map from sizes to structures offering static load, store and splat methods. // Note: On platforms lacking vector support, we use the ArrayType below and // decompose the operation in smaller pieces. // Lists a generic native types to use for Memset and Memmove operations. // TODO: Inject the native types within Memset and Memmove depending on the // target architectures and derive MaxSize from it. using NativeTypeMap = SupportedTypes; namespace details { // Helper to test if a type is void. template inline constexpr bool is_void_v = cpp::is_same_v; // In case the 'Size' is not supported we can fall back to a sequence of smaller // operations using the largest natively supported type. template static constexpr bool useArrayType() { return (Size > MaxSize) && ((Size % MaxSize) == 0) && !details::is_void_v>; } // Compute the type to handle an operation of 'Size' bytes knowing that the // underlying platform only support native types up to MaxSize bytes. template using getTypeFor = cpp::conditional_t< useArrayType(), cpp::array, Size / MaxSize>, NativeTypeMap::TypeFor>; } // namespace details /////////////////////////////////////////////////////////////////////////////// // Memset /////////////////////////////////////////////////////////////////////////////// template struct Memset { static constexpr size_t SIZE = sizeof(T); LIBC_INLINE static void block(Ptr dst, uint8_t value) { static_assert(is_element_type_v); if constexpr (is_scalar_v || is_vector_v) { store(dst, splat(value)); } else if constexpr (is_array_v) { using value_type = typename T::value_type; const auto Splat = splat(value); for (size_t I = 0; I < array_size_v; ++I) store(dst + (I * sizeof(value_type)), Splat); } } LIBC_INLINE static void tail(Ptr dst, uint8_t value, size_t count) { block(dst + count - SIZE, value); } LIBC_INLINE static void head_tail(Ptr dst, uint8_t value, size_t count) { block(dst, value); tail(dst, value, count); } LIBC_INLINE static void loop_and_tail(Ptr dst, uint8_t value, size_t count) { static_assert(SIZE > 1, "a loop of size 1 does not need tail"); size_t offset = 0; do { block(dst + offset, value); offset += SIZE; } while (offset < count - SIZE); tail(dst, value, count); } }; template struct MemsetSequence { static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS)); LIBC_INLINE static void block(Ptr dst, uint8_t value) { Memset::block(dst, value); if constexpr (sizeof...(TS) > 0) { return MemsetSequence::block(dst + sizeof(T), value); } } }; /////////////////////////////////////////////////////////////////////////////// // Memmove /////////////////////////////////////////////////////////////////////////////// template struct Memmove { static constexpr size_t SIZE = sizeof(T); LIBC_INLINE static void block(Ptr dst, CPtr src) { store(dst, load(src)); } LIBC_INLINE static void head_tail(Ptr dst, CPtr src, size_t count) { const size_t offset = count - SIZE; // The load and store operations can be performed in any order as long as // they are not interleaved. More investigations are needed to determine // the best order. const auto head = load(src); const auto tail = load(src + offset); store(dst, head); store(dst + offset, tail); } // Align forward suitable when dst < src. The alignment is performed with // an HeadTail operation of count ∈ [Alignment, 2 x Alignment]. // // e.g. Moving two bytes forward, we make sure src is aligned. // [ | | | | ] // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_] // [____LLLLLLLL_____________________] // [___________LLLLLLLA______________] // [_SSSSSSSS________________________] // [________SSSSSSSS_________________] // // e.g. Moving two bytes forward, we make sure dst is aligned. // [ | | | | ] // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_] // [____LLLLLLLL_____________________] // [______LLLLLLLL___________________] // [_SSSSSSSS________________________] // [___SSSSSSSA______________________] template LIBC_INLINE static void align_forward(Ptr &dst, CPtr &src, size_t &count) { Ptr prev_dst = dst; CPtr prev_src = src; size_t prev_count = count; align_to_next_boundary(dst, src, count); adjust(SIZE, dst, src, count); head_tail(prev_dst, prev_src, prev_count - count); } // Align backward suitable when dst > src. The alignment is performed with // an HeadTail operation of count ∈ [Alignment, 2 x Alignment]. // // e.g. Moving two bytes backward, we make sure src is aligned. // [ | | | | ] // [____XXXXXXXXXXXXXXXXXXXXXXXX_____] // [ _________________ALLLLLLL_______] // [ ___________________LLLLLLLL_____] // [____________________SSSSSSSS_____] // [______________________SSSSSSSS___] // // e.g. Moving two bytes backward, we make sure dst is aligned. // [ | | | | ] // [____XXXXXXXXXXXXXXXXXXXXXXXX_____] // [ _______________LLLLLLLL_________] // [ ___________________LLLLLLLL_____] // [__________________ASSSSSSS_______] // [______________________SSSSSSSS___] template LIBC_INLINE static void align_backward(Ptr &dst, CPtr &src, size_t &count) { Ptr headtail_dst = dst + count; CPtr headtail_src = src + count; size_t headtail_size = 0; align_to_next_boundary(headtail_dst, headtail_src, headtail_size); adjust(-2 * SIZE, headtail_dst, headtail_src, headtail_size); head_tail(headtail_dst, headtail_src, headtail_size); count -= headtail_size; } // Move forward suitable when dst < src. We load the tail bytes before // handling the loop. // // e.g. Moving two bytes // [ | | | | |] // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___] // [_________________________LLLLLLLL___] // [___LLLLLLLL_________________________] // [_SSSSSSSS___________________________] // [___________LLLLLLLL_________________] // [_________SSSSSSSS___________________] // [___________________LLLLLLLL_________] // [_________________SSSSSSSS___________] // [_______________________SSSSSSSS_____] LIBC_INLINE static void loop_and_tail_forward(Ptr dst, CPtr src, size_t count) { static_assert(SIZE > 1, "a loop of size 1 does not need tail"); const size_t tail_offset = count - SIZE; const auto tail_value = load(src + tail_offset); size_t offset = 0; LIBC_LOOP_NOUNROLL do { block(dst + offset, src + offset); offset += SIZE; } while (offset < count - SIZE); store(dst + tail_offset, tail_value); } // Move backward suitable when dst > src. We load the head bytes before // handling the loop. // // e.g. Moving two bytes // [ | | | | |] // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___] // [___LLLLLLLL_________________________] // [_________________________LLLLLLLL___] // [___________________________SSSSSSSS_] // [_________________LLLLLLLL___________] // [___________________SSSSSSSS_________] // [_________LLLLLLLL___________________] // [___________SSSSSSSS_________________] // [_____SSSSSSSS_______________________] LIBC_INLINE static void loop_and_tail_backward(Ptr dst, CPtr src, size_t count) { static_assert(SIZE > 1, "a loop of size 1 does not need tail"); const auto head_value = load(src); ptrdiff_t offset = count - SIZE; LIBC_LOOP_NOUNROLL do { block(dst + offset, src + offset); offset -= SIZE; } while (offset >= 0); store(dst, head_value); } }; /////////////////////////////////////////////////////////////////////////////// // Bcmp /////////////////////////////////////////////////////////////////////////////// template struct Bcmp { static constexpr size_t SIZE = Size; static constexpr size_t MaxSize = LLVM_LIBC_IS_DEFINED(LLVM_LIBC_HAS_UINT64) ? sizeof(uint64_t) : sizeof(uint32_t); template LIBC_INLINE static uint32_t load_xor(CPtr p1, CPtr p2) { static_assert(sizeof(T) <= sizeof(uint32_t)); return load(p1) ^ load(p2); } template LIBC_INLINE static uint32_t load_not_equal(CPtr p1, CPtr p2) { return load(p1) != load(p2); } LIBC_INLINE static BcmpReturnType block(CPtr p1, CPtr p2) { if constexpr (Size == 1) { return load_xor(p1, p2); } else if constexpr (Size == 2) { return load_xor(p1, p2); } else if constexpr (Size == 4) { return load_xor(p1, p2); } else if constexpr (Size == 8) { return load_not_equal(p1, p2); } else if constexpr (details::useArrayType()) { for (size_t offset = 0; offset < Size; offset += MaxSize) if (auto value = Bcmp::block(p1 + offset, p2 + offset)) return value; } else { deferred_static_assert("Unimplemented Size"); } return BcmpReturnType::ZERO(); } LIBC_INLINE static BcmpReturnType tail(CPtr p1, CPtr p2, size_t count) { return block(p1 + count - SIZE, p2 + count - SIZE); } LIBC_INLINE static BcmpReturnType head_tail(CPtr p1, CPtr p2, size_t count) { return block(p1, p2) | tail(p1, p2, count); } LIBC_INLINE static BcmpReturnType loop_and_tail(CPtr p1, CPtr p2, size_t count) { static_assert(Size > 1, "a loop of size 1 does not need tail"); size_t offset = 0; do { if (auto value = block(p1 + offset, p2 + offset)) return value; offset += SIZE; } while (offset < count - SIZE); return tail(p1, p2, count); } }; /////////////////////////////////////////////////////////////////////////////// // Memcmp /////////////////////////////////////////////////////////////////////////////// template struct Memcmp { static constexpr size_t SIZE = Size; static constexpr size_t MaxSize = LLVM_LIBC_IS_DEFINED(LLVM_LIBC_HAS_UINT64) ? sizeof(uint64_t) : sizeof(uint32_t); template LIBC_INLINE static T load_be(CPtr ptr) { return Endian::to_big_endian(load(ptr)); } template LIBC_INLINE static MemcmpReturnType load_be_diff(CPtr p1, CPtr p2) { return load_be(p1) - load_be(p2); } template LIBC_INLINE static MemcmpReturnType load_be_cmp(CPtr p1, CPtr p2) { const auto la = load_be(p1); const auto lb = load_be(p2); return la > lb ? 1 : la < lb ? -1 : 0; } LIBC_INLINE static MemcmpReturnType block(CPtr p1, CPtr p2) { if constexpr (Size == 1) { return load_be_diff(p1, p2); } else if constexpr (Size == 2) { return load_be_diff(p1, p2); } else if constexpr (Size == 4) { return load_be_cmp(p1, p2); } else if constexpr (Size == 8) { return load_be_cmp(p1, p2); } else if constexpr (details::useArrayType()) { for (size_t offset = 0; offset < Size; offset += MaxSize) if (Bcmp::block(p1 + offset, p2 + offset)) return Memcmp::block(p1 + offset, p2 + offset); return MemcmpReturnType::ZERO(); } else if constexpr (Size == 3) { if (auto value = Memcmp<2>::block(p1, p2)) return value; return Memcmp<1>::block(p1 + 2, p2 + 2); } else { deferred_static_assert("Unimplemented Size"); } } LIBC_INLINE static MemcmpReturnType tail(CPtr p1, CPtr p2, size_t count) { return block(p1 + count - SIZE, p2 + count - SIZE); } LIBC_INLINE static MemcmpReturnType head_tail(CPtr p1, CPtr p2, size_t count) { if (auto value = block(p1, p2)) return value; return tail(p1, p2, count); } LIBC_INLINE static MemcmpReturnType loop_and_tail(CPtr p1, CPtr p2, size_t count) { static_assert(Size > 1, "a loop of size 1 does not need tail"); size_t offset = 0; do { if (auto value = block(p1 + offset, p2 + offset)) return value; offset += SIZE; } while (offset < count - SIZE); return tail(p1, p2, count); } }; } // namespace __llvm_libc::generic #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H