diff options
Diffstat (limited to 'third_party/highway/hwy/targets.h')
-rw-r--r-- | third_party/highway/hwy/targets.h | 326 |
1 files changed, 326 insertions, 0 deletions
diff --git a/third_party/highway/hwy/targets.h b/third_party/highway/hwy/targets.h new file mode 100644 index 0000000000..5dba12ae96 --- /dev/null +++ b/third_party/highway/hwy/targets.h @@ -0,0 +1,326 @@ +// Copyright 2020 Google LLC +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef HIGHWAY_HWY_TARGETS_H_ +#define HIGHWAY_HWY_TARGETS_H_ + +// Allows opting out of C++ standard library usage, which is not available in +// some Compiler Explorer environments. +#ifndef HWY_NO_LIBCXX +#include <vector> +#endif + +// For SIMD module implementations and their callers. Defines which targets to +// generate and call. + +#include "hwy/base.h" +#include "hwy/detect_targets.h" +#include "hwy/highway_export.h" + +#if !HWY_ARCH_RVV && !defined(HWY_NO_LIBCXX) +#include <atomic> +#endif + +namespace hwy { + +// Returns bitfield of enabled targets that are supported on this CPU; there is +// always at least one such target, hence the return value is never 0. The +// targets returned may change after calling DisableTargets. This function is +// always defined, but the HWY_SUPPORTED_TARGETS wrapper may allow eliding +// calls to it if there is only a single target enabled. +HWY_DLLEXPORT int64_t SupportedTargets(); + +// Evaluates to a function call, or literal if there is a single target. +#if (HWY_TARGETS & (HWY_TARGETS - 1)) == 0 +#define HWY_SUPPORTED_TARGETS HWY_TARGETS +#else +#define HWY_SUPPORTED_TARGETS hwy::SupportedTargets() +#endif + +// Subsequent SupportedTargets will not return targets whose bit(s) are set in +// `disabled_targets`. Exception: if SupportedTargets would return 0, it will +// instead return HWY_STATIC_TARGET (there must always be one target to call). +// +// This function is useful for disabling targets known to be buggy, or if the +// best available target is undesirable (perhaps due to throttling or memory +// bandwidth limitations). Use SetSupportedTargetsForTest instead of this +// function for iteratively enabling specific targets for testing. +HWY_DLLEXPORT void DisableTargets(int64_t disabled_targets); + +// Subsequent SupportedTargets will return the given set of targets, except +// those disabled via DisableTargets. Call with a mask of 0 to disable the mock +// and return to the normal SupportedTargets behavior. Used to run tests for +// all targets. +HWY_DLLEXPORT void SetSupportedTargetsForTest(int64_t targets); + +#ifndef HWY_NO_LIBCXX + +// Return the list of targets in HWY_TARGETS supported by the CPU as a list of +// individual HWY_* target macros such as HWY_SCALAR or HWY_NEON. This list +// is affected by the current SetSupportedTargetsForTest() mock if any. +HWY_INLINE std::vector<int64_t> SupportedAndGeneratedTargets() { + std::vector<int64_t> ret; + for (int64_t targets = SupportedTargets() & HWY_TARGETS; targets != 0; + targets = targets & (targets - 1)) { + int64_t current_target = targets & ~(targets - 1); + ret.push_back(current_target); + } + return ret; +} + +#endif // HWY_NO_LIBCXX + +static inline HWY_MAYBE_UNUSED const char* TargetName(int64_t target) { + switch (target) { +#if HWY_ARCH_X86 + case HWY_SSSE3: + return "SSSE3"; + case HWY_SSE4: + return "SSE4"; + case HWY_AVX2: + return "AVX2"; + case HWY_AVX3: + return "AVX3"; + case HWY_AVX3_DL: + return "AVX3_DL"; +#endif + +#if HWY_ARCH_ARM + case HWY_SVE2_128: + return "SVE2_128"; + case HWY_SVE_256: + return "SVE_256"; + case HWY_SVE2: + return "SVE2"; + case HWY_SVE: + return "SVE"; + case HWY_NEON: + return "NEON"; +#endif + +#if HWY_ARCH_PPC + case HWY_PPC8: + return "PPC8"; +#endif + +#if HWY_ARCH_WASM + case HWY_WASM: + return "WASM"; + case HWY_WASM_EMU256: + return "WASM_EMU256"; +#endif + +#if HWY_ARCH_RVV + case HWY_RVV: + return "RVV"; +#endif + + case HWY_EMU128: + return "EMU128"; + case HWY_SCALAR: + return "SCALAR"; + + default: + return "Unknown"; // must satisfy gtest IsValidParamName() + } +} + +// The maximum number of dynamic targets on any architecture is defined by +// HWY_MAX_DYNAMIC_TARGETS and depends on the arch. + +// For the ChosenTarget mask and index we use a different bit arrangement than +// in the HWY_TARGETS mask. Only the targets involved in the current +// architecture are used in this mask, and therefore only the least significant +// (HWY_MAX_DYNAMIC_TARGETS + 2) bits of the int64_t mask are used. The least +// significant bit is set when the mask is not initialized, the next +// HWY_MAX_DYNAMIC_TARGETS more significant bits are a range of bits from the +// HWY_TARGETS or SupportedTargets() mask for the given architecture shifted to +// that position and the next more significant bit is used for HWY_SCALAR (if +// HWY_COMPILE_ONLY_SCALAR is defined) or HWY_EMU128. Because of this we need to +// define equivalent values for HWY_TARGETS in this representation. +// This mask representation allows to use ctz() on this mask and obtain a small +// number that's used as an index of the table for dynamic dispatch. In this +// way the first entry is used when the mask is uninitialized, the following +// HWY_MAX_DYNAMIC_TARGETS are for dynamic dispatch and the last one is for +// scalar. + +// The HWY_SCALAR/HWY_EMU128 bit in the ChosenTarget mask format. +#define HWY_CHOSEN_TARGET_MASK_SCALAR (1LL << (HWY_MAX_DYNAMIC_TARGETS + 1)) + +// Converts from a HWY_TARGETS mask to a ChosenTarget mask format for the +// current architecture. +#define HWY_CHOSEN_TARGET_SHIFT(X) \ + ((((X) >> (HWY_HIGHEST_TARGET_BIT + 1 - HWY_MAX_DYNAMIC_TARGETS)) & \ + ((1LL << HWY_MAX_DYNAMIC_TARGETS) - 1)) \ + << 1) + +// The HWY_TARGETS mask in the ChosenTarget mask format. +#define HWY_CHOSEN_TARGET_MASK_TARGETS \ + (HWY_CHOSEN_TARGET_SHIFT(HWY_TARGETS) | HWY_CHOSEN_TARGET_MASK_SCALAR | 1LL) + +#if HWY_ARCH_X86 +// Maximum number of dynamic targets, changing this value is an ABI incompatible +// change +#define HWY_MAX_DYNAMIC_TARGETS 15 +#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_X86 +// These must match the order in which the HWY_TARGETS are defined +// starting by the least significant (HWY_HIGHEST_TARGET_BIT + 1 - +// HWY_MAX_DYNAMIC_TARGETS) bit. This list must contain exactly +// HWY_MAX_DYNAMIC_TARGETS elements and does not include SCALAR. The first entry +// corresponds to the best target. Don't include a "," at the end of the list. +#define HWY_CHOOSE_TARGET_LIST(func_name) \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + HWY_CHOOSE_AVX3_DL(func_name), /* AVX3_DL */ \ + HWY_CHOOSE_AVX3(func_name), /* AVX3 */ \ + HWY_CHOOSE_AVX2(func_name), /* AVX2 */ \ + nullptr, /* AVX */ \ + HWY_CHOOSE_SSE4(func_name), /* SSE4 */ \ + HWY_CHOOSE_SSSE3(func_name), /* SSSE3 */ \ + nullptr , /* reserved - SSE3? */ \ + nullptr /* reserved - SSE2? */ + +#elif HWY_ARCH_ARM +// See HWY_ARCH_X86 above for details. +#define HWY_MAX_DYNAMIC_TARGETS 15 +#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_ARM +#define HWY_CHOOSE_TARGET_LIST(func_name) \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + HWY_CHOOSE_SVE2_128(func_name), /* SVE2 128-bit */ \ + HWY_CHOOSE_SVE_256(func_name), /* SVE 256-bit */ \ + HWY_CHOOSE_SVE2(func_name), /* SVE2 */ \ + HWY_CHOOSE_SVE(func_name), /* SVE */ \ + HWY_CHOOSE_NEON(func_name), /* NEON */ \ + nullptr /* reserved - Helium? */ + +#elif HWY_ARCH_RVV +// See HWY_ARCH_X86 above for details. +#define HWY_MAX_DYNAMIC_TARGETS 9 +#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_RVV +#define HWY_CHOOSE_TARGET_LIST(func_name) \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + HWY_CHOOSE_RVV(func_name), /* RVV */ \ + nullptr /* reserved */ + +#elif HWY_ARCH_PPC +// See HWY_ARCH_X86 above for details. +#define HWY_MAX_DYNAMIC_TARGETS 9 +#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_PPC +#define HWY_CHOOSE_TARGET_LIST(func_name) \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + HWY_CHOOSE_PPC8(func_name), /* PPC8 */ \ + nullptr, /* reserved (VSX or AltiVec) */ \ + nullptr /* reserved (VSX or AltiVec) */ + +#elif HWY_ARCH_WASM +// See HWY_ARCH_X86 above for details. +#define HWY_MAX_DYNAMIC_TARGETS 9 +#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_WASM +#define HWY_CHOOSE_TARGET_LIST(func_name) \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + nullptr, /* reserved */ \ + HWY_CHOOSE_WASM_EMU256(func_name), /* WASM_EMU256 */ \ + HWY_CHOOSE_WASM(func_name), /* WASM */ \ + nullptr /* reserved */ + +#else +// Unknown architecture, will use HWY_SCALAR without dynamic dispatch, though +// still creating single-entry tables in HWY_EXPORT to ensure portability. +#define HWY_MAX_DYNAMIC_TARGETS 1 +#define HWY_HIGHEST_TARGET_BIT HWY_HIGHEST_TARGET_BIT_SCALAR +#endif + +// Bitfield of supported and enabled targets. The format differs from that of +// HWY_TARGETS; the lowest bit governs the first function pointer (which is +// special in that it calls FunctionCache, then Update, then dispatches to the +// actual implementation) in the tables created by HWY_EXPORT. Monostate (see +// GetChosenTarget), thread-safe except on RVV. +struct ChosenTarget { + public: + // Reset bits according to `targets` (typically the return value of + // SupportedTargets()). Postcondition: IsInitialized() == true. + void Update(int64_t targets) { + // These are `targets` shifted downwards, see above. Also include SCALAR + // (corresponds to the last entry in the function table) as fallback. + StoreMask(HWY_CHOSEN_TARGET_SHIFT(targets) | HWY_CHOSEN_TARGET_MASK_SCALAR); + } + + // Reset to the uninitialized state, so that FunctionCache will call Update + // during the next HWY_DYNAMIC_DISPATCH, and IsInitialized returns false. + void DeInit() { StoreMask(1); } + + // Whether Update was called. This indicates whether any HWY_DYNAMIC_DISPATCH + // function was called, which we check in tests. + bool IsInitialized() const { return LoadMask() != 1; } + + // Return the index in the dynamic dispatch table to be used by the current + // CPU. Note that this method must be in the header file so it uses the value + // of HWY_CHOSEN_TARGET_MASK_TARGETS defined in the translation unit that + // calls it, which may be different from others. This means we only enable + // those targets that were actually compiled in this module. + size_t HWY_INLINE GetIndex() const { + return hwy::Num0BitsBelowLS1Bit_Nonzero64( + static_cast<uint64_t>(LoadMask() & HWY_CHOSEN_TARGET_MASK_TARGETS)); + } + + private: + // TODO(janwas): remove RVV once <atomic> is available +#if HWY_ARCH_RVV || defined(HWY_NO_LIBCXX) + int64_t LoadMask() const { return mask_; } + void StoreMask(int64_t mask) { mask_ = mask; } + + int64_t mask_{1}; // Initialized to 1 so GetIndex() returns 0. +#else + int64_t LoadMask() const { return mask_.load(); } + void StoreMask(int64_t mask) { mask_.store(mask); } + + std::atomic<int64_t> mask_{1}; // Initialized to 1 so GetIndex() returns 0. +#endif // HWY_ARCH_RVV +}; + +// For internal use (e.g. by FunctionCache and DisableTargets). +HWY_DLLEXPORT ChosenTarget& GetChosenTarget(); + +} // namespace hwy + +#endif // HIGHWAY_HWY_TARGETS_H_ |