diff --git a/.github/scripts/utils.zsh/setup_ubuntu b/.github/scripts/utils.zsh/setup_ubuntu index a4e7a6515ebfc7..a013ccd3ff2104 100644 --- a/.github/scripts/utils.zsh/setup_ubuntu +++ b/.github/scripts/utils.zsh/setup_ubuntu @@ -75,6 +75,7 @@ sudo apt-get install -y --no-install-recommends \ libgl1-mesa-dev \ libjansson-dev \ uthash-dev \ + libsimde-dev \ libluajit-5.1-dev python3-dev \ libx11-dev libxcb-randr0-dev libxcb-shm0-dev libxcb-xinerama0-dev \ libxcb-composite0-dev libxinerama-dev libxcb1-dev libx11-xcb-dev libxcb-xfixes0-dev \ diff --git a/build-aux/com.obsproject.Studio.json b/build-aux/com.obsproject.Studio.json index 9f578ab6c49c31..39d32325adb231 100644 --- a/build-aux/com.obsproject.Studio.json +++ b/build-aux/com.obsproject.Studio.json @@ -65,6 +65,7 @@ "modules/50-vpl-gpu-rt.json", "modules/90-asio.json", "modules/90-nlohmann-json.json", + "modules/90-simde.json", "modules/90-uthash.json", "modules/90-websocketpp.json", "modules/99-cef.json", diff --git a/build-aux/modules/90-simde.json b/build-aux/modules/90-simde.json new file mode 100644 index 00000000000000..78c19f13f96a94 --- /dev/null +++ b/build-aux/modules/90-simde.json @@ -0,0 +1,16 @@ +{ + "name": "simde", + "buildsystem": "meson", + "builddir": true, + "config-opts": [ + "-Dtests=false" + ], + "sources": [ + { + "type": "git", + "url": "https://github.com/simd-everywhere/simde.git", + "tag": "v0.8.2", + "commit": "71fd833d9666141edcd1d3c109a80e228303d8d7" + } + ] +} diff --git a/cmake/common/helpers_common.cmake b/cmake/common/helpers_common.cmake index f5534ed6660ff8..6a21e321ef11e9 100644 --- a/cmake/common/helpers_common.cmake +++ b/cmake/common/helpers_common.cmake @@ -399,6 +399,15 @@ function(target_export target) COMPONENT Development ${exclude_variant} ) + + if(target STREQUAL libobs) + install( + FILES "${CMAKE_SOURCE_DIR}/cmake/finders/FindSIMDE.cmake" + DESTINATION "${package_destination}/finders" + COMPONENT Development + ${exclude_variant} + ) + endif() endfunction() # check_uuid: Helper function to check for valid UUID diff --git a/libobs/CMakeLists.txt b/libobs/CMakeLists.txt index f55917da422f56..55fafde5e27d26 100644 --- a/libobs/CMakeLists.txt +++ b/libobs/CMakeLists.txt @@ -7,6 +7,7 @@ if(OS_WINDOWS AND NOT OBS_PARENT_ARCHITECTURE STREQUAL CMAKE_VS_PLATFORM_NAME) return() endif() +find_package(SIMDE REQUIRED) find_package(Threads REQUIRED) find_package(FFmpeg 6.1 REQUIRED avformat avutil swscale swresample OPTIONAL_COMPONENTS avcodec) @@ -135,25 +136,6 @@ target_sources( util/util_uint64.h ) -target_sources( - libobs - PRIVATE - util/simde/check.h - util/simde/debug-trap.h - util/simde/hedley.h - util/simde/simde-align.h - util/simde/simde-arch.h - util/simde/simde-common.h - util/simde/simde-constify.h - util/simde/simde-detect-clang.h - util/simde/simde-diagnostic.h - util/simde/simde-features.h - util/simde/simde-math.h - util/simde/x86/mmx.h - util/simde/x86/sse.h - util/simde/x86/sse2.h -) - target_sources( libobs PRIVATE @@ -260,7 +242,7 @@ target_link_libraries( jansson::jansson Uthash::Uthash ZLIB::ZLIB - PUBLIC Threads::Threads + PUBLIC SIMDE::SIMDE Threads::Threads ) if(OS_WINDOWS) @@ -357,20 +339,6 @@ set( util/profiler.h util/profiler.hpp util/serializer.h - util/simde/check.h - util/simde/debug-trap.h - util/simde/hedley.h - util/simde/simde-align.h - util/simde/simde-arch.h - util/simde/simde-common.h - util/simde/simde-constify.h - util/simde/simde-detect-clang.h - util/simde/simde-diagnostic.h - util/simde/simde-features.h - util/simde/simde-math.h - util/simde/x86/mmx.h - util/simde/x86/sse.h - util/simde/x86/sse2.h util/sse-intrin.h util/task.h util/text-lookup.h diff --git a/libobs/cmake/libobsConfig.cmake.in b/libobs/cmake/libobsConfig.cmake.in index 44e17220d30355..31929e96850947 100644 --- a/libobs/cmake/libobsConfig.cmake.in +++ b/libobs/cmake/libobsConfig.cmake.in @@ -2,9 +2,12 @@ include(CMakeFindDependencyMacro) +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/finders") + if(MSVC) find_dependency(w32-pthreads REQUIRED) endif() +find_dependency(SIMDE REQUIRED) find_dependency(Threads REQUIRED) include("${CMAKE_CURRENT_LIST_DIR}/@TARGETS_EXPORT_NAME@.cmake") diff --git a/libobs/util/simde/.clang-format b/libobs/util/simde/.clang-format deleted file mode 100644 index 6420a46881e054..00000000000000 --- a/libobs/util/simde/.clang-format +++ /dev/null @@ -1,3 +0,0 @@ -Language: Cpp -SortIncludes: false -DisableFormat: true diff --git a/libobs/util/simde/LICENSE.simde b/libobs/util/simde/LICENSE.simde deleted file mode 100644 index 78d482e755eaa6..00000000000000 --- a/libobs/util/simde/LICENSE.simde +++ /dev/null @@ -1,40 +0,0 @@ -simde is licensed as a combination of MIT and CC0 code. - -License notices for both are reproduced below: - -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* Portable Snippets - https://gitub.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ diff --git a/libobs/util/simde/README.libobs b/libobs/util/simde/README.libobs deleted file mode 100644 index e41a32290002a6..00000000000000 --- a/libobs/util/simde/README.libobs +++ /dev/null @@ -1,6 +0,0 @@ -This is a slightly modified version of the simde directory in -https://github.com/simd-everywhere/simde/commit/c3d7abfaba6729a8b11d09a314b34a4db628911d - -Modifications: -1. Unused files have removed. -2. The code was reformatted using the "formatcode.sh" script in the root of this repository. diff --git a/libobs/util/simde/check.h b/libobs/util/simde/check.h deleted file mode 100644 index 7ffa08a6424bc6..00000000000000 --- a/libobs/util/simde/check.h +++ /dev/null @@ -1,285 +0,0 @@ -/* Check (assertions) - * Portable Snippets - https://gitub.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_CHECK_H) -#define SIMDE_CHECK_H - -#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) -#define SIMDE_NDEBUG 1 -#endif - -#include "hedley.h" -#include "simde-diagnostic.h" -#include - -#if !defined(_WIN32) -#define SIMDE_SIZE_MODIFIER "z" -#define SIMDE_CHAR_MODIFIER "hh" -#define SIMDE_SHORT_MODIFIER "h" -#else -#if defined(_M_X64) || defined(__amd64__) -#define SIMDE_SIZE_MODIFIER "I64" -#else -#define SIMDE_SIZE_MODIFIER "" -#endif -#define SIMDE_CHAR_MODIFIER "" -#define SIMDE_SHORT_MODIFIER "" -#endif - -#if defined(_MSC_VER) && (_MSC_VER >= 1500) -#define SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - __pragma(warning(push)) __pragma(warning(disable : 4127)) -#define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) -#else -#define SIMDE_PUSH_DISABLE_MSVC_C4127_ -#define SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#if !defined(simde_errorf) -#if defined(__has_include) -#if __has_include() -#include -#endif -#elif defined(SIMDE_STDC_HOSTED) -#if SIMDE_STDC_HOSTED == 1 -#include -#endif -#elif defined(__STDC_HOSTED__) -#if __STDC_HOSTETD__ == 1 -#include -#endif -#endif - -#include "debug-trap.h" - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -#if defined(EOF) -#define simde_errorf(format, ...) \ - (fprintf(stderr, format, __VA_ARGS__), abort()) -#else -#define simde_errorf(format, ...) (simde_trap()) -#endif -HEDLEY_DIAGNOSTIC_POP -#endif - -#define simde_error(msg) simde_errorf("%s", msg) - -#if defined(SIMDE_NDEBUG) || \ - (defined(__cplusplus) && (__cplusplus < 201103L)) || \ - (defined(__STDC__) && (__STDC__ < 199901L)) -#if defined(SIMDE_CHECK_FAIL_DEFINED) -#define simde_assert(expr) -#else -#if defined(HEDLEY_ASSUME) -#define simde_assert(expr) HEDLEY_ASSUME(expr) -#elif HEDLEY_GCC_VERSION_CHECK(4, 5, 0) -#define simde_assert(expr) ((void)(!!(expr) ? 1 : (__builtin_unreachable(), 1))) -#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0) -#define simde_assert(expr) __assume(expr) -#else -#define simde_assert(expr) -#endif -#endif -#define simde_assert_true(expr) simde_assert(expr) -#define simde_assert_false(expr) simde_assert(!(expr)) -#define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ - simde_assert(((a)op(b))) -#define simde_assert_double_equal(a, b, precision) -#define simde_assert_string_equal(a, b) -#define simde_assert_string_not_equal(a, b) -#define simde_assert_memory_equal(size, a, b) -#define simde_assert_memory_not_equal(size, a, b) -#else -#define simde_assert(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr "\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) SIMDE_POP_DISABLE_MSVC_C4127_ - -#define simde_assert_true(expr) \ - do { \ - if (!HEDLEY_LIKELY(expr)) { \ - simde_error("assertion failed: " #expr \ - " is not true\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) SIMDE_POP_DISABLE_MSVC_C4127_ - -#define simde_assert_false(expr) \ - do { \ - if (!HEDLEY_LIKELY(!(expr))) { \ - simde_error("assertion failed: " #expr \ - " is not false\n"); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) SIMDE_POP_DISABLE_MSVC_C4127_ - -#define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ - do { \ - T simde_tmp_a_ = (a); \ - T simde_tmp_b_ = (b); \ - if (!(simde_tmp_a_ op simde_tmp_b_)) { \ - simde_errorf("assertion failed: %s %s %s (" prefix \ - "%" fmt suffix " %s " prefix \ - "%" fmt suffix ")\n", \ - #a, #op, #b, simde_tmp_a_, #op, \ - simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) SIMDE_POP_DISABLE_MSVC_C4127_ - -#define simde_assert_double_equal(a, b, precision) \ - do { \ - const double simde_tmp_a_ = (a); \ - const double simde_tmp_b_ = (b); \ - const double simde_tmp_diff_ = \ - ((simde_tmp_a_ - simde_tmp_b_) < 0) \ - ? -(simde_tmp_a_ - simde_tmp_b_) \ - : (simde_tmp_a_ - simde_tmp_b_); \ - if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ - simde_errorf( \ - "assertion failed: %s == %s (%0." #precision \ - "g == %0." #precision "g)\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) SIMDE_POP_DISABLE_MSVC_C4127_ - -#include -#define simde_assert_string_equal(a, b) \ - do { \ - const char *simde_tmp_a_ = a; \ - const char *simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != \ - 0)) { \ - simde_errorf( \ - "assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) SIMDE_POP_DISABLE_MSVC_C4127_ - -#define simde_assert_string_not_equal(a, b) \ - do { \ - const char *simde_tmp_a_ = a; \ - const char *simde_tmp_b_ = b; \ - if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == \ - 0)) { \ - simde_errorf( \ - "assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ - #a, #b, simde_tmp_a_, simde_tmp_b_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) SIMDE_POP_DISABLE_MSVC_C4127_ - -#define simde_assert_memory_equal(size, a, b) \ - do { \ - const unsigned char *simde_tmp_a_ = \ - (const unsigned char *)(a); \ - const unsigned char *simde_tmp_b_ = \ - (const unsigned char *)(b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, \ - simde_tmp_size_)) != 0) { \ - size_t simde_tmp_pos_; \ - for (simde_tmp_pos_ = 0; \ - simde_tmp_pos_ < simde_tmp_size_; \ - simde_tmp_pos_++) { \ - if (simde_tmp_a_[simde_tmp_pos_] != \ - simde_tmp_b_[simde_tmp_pos_]) { \ - simde_errorf( \ - "assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER \ - "u\n", \ - #a, #b, simde_tmp_pos_); \ - break; \ - } \ - } \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) SIMDE_POP_DISABLE_MSVC_C4127_ - -#define simde_assert_memory_not_equal(size, a, b) \ - do { \ - const unsigned char *simde_tmp_a_ = \ - (const unsigned char *)(a); \ - const unsigned char *simde_tmp_b_ = \ - (const unsigned char *)(b); \ - const size_t simde_tmp_size_ = (size); \ - if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, \ - simde_tmp_size_)) == 0) { \ - simde_errorf( \ - "assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER \ - "u bytes)\n", \ - #a, #b, simde_tmp_size_); \ - } \ - SIMDE_PUSH_DISABLE_MSVC_C4127_ \ - } while (0) SIMDE_POP_DISABLE_MSVC_C4127_ -#endif - -#define simde_assert_type(T, fmt, a, op, b) \ - simde_assert_type_full("", "", T, fmt, a, op, b) - -#define simde_assert_char(a, op, b) \ - simde_assert_type_full("'\\x", "'", char, \ - "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_uchar(a, op, b) \ - simde_assert_type_full("'\\x", "'", unsigned char, \ - "02" SIMDE_CHAR_MODIFIER "x", a, op, b) -#define simde_assert_short(a, op, b) \ - simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) -#define simde_assert_ushort(a, op, b) \ - simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) -#define simde_assert_int(a, op, b) simde_assert_type(int, "d", a, op, b) -#define simde_assert_uint(a, op, b) \ - simde_assert_type(unsigned int, "u", a, op, b) -#define simde_assert_long(a, op, b) simde_assert_type(long int, "ld", a, op, b) -#define simde_assert_ulong(a, op, b) \ - simde_assert_type(unsigned long int, "lu", a, op, b) -#define simde_assert_llong(a, op, b) \ - simde_assert_type(long long int, "lld", a, op, b) -#define simde_assert_ullong(a, op, b) \ - simde_assert_type(unsigned long long int, "llu", a, op, b) - -#define simde_assert_size(a, op, b) \ - simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) - -#define simde_assert_float(a, op, b) simde_assert_type(float, "f", a, op, b) -#define simde_assert_double(a, op, b) simde_assert_type(double, "g", a, op, b) -#define simde_assert_ptr(a, op, b) \ - simde_assert_type(const void *, "p", a, op, b) - -#define simde_assert_int8(a, op, b) simde_assert_type(int8_t, PRIi8, a, op, b) -#define simde_assert_uint8(a, op, b) simde_assert_type(uint8_t, PRIu8, a, op, b) -#define simde_assert_int16(a, op, b) \ - simde_assert_type(int16_t, PRIi16, a, op, b) -#define simde_assert_uint16(a, op, b) \ - simde_assert_type(uint16_t, PRIu16, a, op, b) -#define simde_assert_int32(a, op, b) \ - simde_assert_type(int32_t, PRIi32, a, op, b) -#define simde_assert_uint32(a, op, b) \ - simde_assert_type(uint32_t, PRIu32, a, op, b) -#define simde_assert_int64(a, op, b) \ - simde_assert_type(int64_t, PRIi64, a, op, b) -#define simde_assert_uint64(a, op, b) \ - simde_assert_type(uint64_t, PRIu64, a, op, b) - -#define simde_assert_ptr_equal(a, b) simde_assert_ptr(a, ==, b) -#define simde_assert_ptr_not_equal(a, b) simde_assert_ptr(a, !=, b) -#define simde_assert_null(ptr) simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_not_null(ptr) simde_assert_ptr(ptr, !=, NULL) -#define simde_assert_ptr_null(ptr) simde_assert_ptr(ptr, ==, NULL) -#define simde_assert_ptr_not_null(ptr) simde_assert_ptr(ptr, !=, NULL) - -#endif /* !defined(SIMDE_CHECK_H) */ diff --git a/libobs/util/simde/debug-trap.h b/libobs/util/simde/debug-trap.h deleted file mode 100644 index be901fafa18c20..00000000000000 --- a/libobs/util/simde/debug-trap.h +++ /dev/null @@ -1,117 +0,0 @@ -/* Debugging assertions and traps - * Portable Snippets - https://gitub.com/nemequ/portable-snippets - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * https://creativecommons.org/publicdomain/zero/1.0/ - * - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(SIMDE_DEBUG_TRAP_H) -#define SIMDE_DEBUG_TRAP_H - -#if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) -#define SIMDE_NDEBUG 1 -#endif - -#if defined(__has_builtin) && !defined(__ibmxl__) -#if __has_builtin(__builtin_debugtrap) -#define simde_trap() __builtin_debugtrap() -#elif __has_builtin(__debugbreak) -#define simde_trap() __debugbreak() -#endif -#endif -#if !defined(simde_trap) -#if defined(_MSC_VER) || defined(__INTEL_COMPILER) -#define simde_trap() __debugbreak() -#elif defined(__ARMCC_VERSION) -#define simde_trap() __breakpoint(42) -#elif defined(__ibmxl__) || defined(__xlC__) -#include -#define simde_trap() __trap(42) -#elif defined(__DMC__) && defined(_M_IX86) -static inline void simde_trap(void) -{ - __asm int 3h; -} -#elif defined(__i386__) || defined(__x86_64__) -static inline void simde_trap(void) -{ - __asm__ __volatile__("int $03"); -} -#elif defined(__thumb__) -static inline void simde_trap(void) -{ - __asm__ __volatile__(".inst 0xde01"); -} -#elif defined(__aarch64__) -static inline void simde_trap(void) -{ - __asm__ __volatile__(".inst 0xd4200000"); -} -#elif defined(__arm__) -static inline void simde_trap(void) -{ - __asm__ __volatile__(".inst 0xe7f001f0"); -} -#elif defined(__alpha__) && !defined(__osf__) -static inline void simde_trap(void) -{ - __asm__ __volatile__("bpt"); -} -#elif defined(_54_) -static inline void simde_trap(void) -{ - __asm__ __volatile__("ESTOP"); -} -#elif defined(_55_) -static inline void simde_trap(void) -{ - __asm__ __volatile__( - ";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); -} -#elif defined(_64P_) -static inline void simde_trap(void) -{ - __asm__ __volatile__("SWBP 0"); -} -#elif defined(_6x_) -static inline void simde_trap(void) -{ - __asm__ __volatile__("NOP\n .word 0x10000000"); -} -#elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) -#define simde_trap() __builtin_trap() -#else -#include -#if defined(SIGTRAP) -#define simde_trap() raise(SIGTRAP) -#else -#define simde_trap() raise(SIGABRT) -#endif -#endif -#endif - -#if defined(HEDLEY_LIKELY) -#define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) -#elif defined(__GNUC__) && (__GNUC__ >= 3) -#define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) -#else -#define SIMDE_DBG_LIKELY(expr) (!!(expr)) -#endif - -#if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) -#define simde_dbg_assert(expr) \ - do { \ - if (!SIMDE_DBG_LIKELY(expr)) { \ - simde_trap(); \ - } \ - } while (0) -#else -#define simde_dbg_assert(expr) -#endif - -#endif /* !defined(SIMDE_DEBUG_TRAP_H) */ diff --git a/libobs/util/simde/hedley.h b/libobs/util/simde/hedley.h deleted file mode 100644 index 6d9995a48c9853..00000000000000 --- a/libobs/util/simde/hedley.h +++ /dev/null @@ -1,2123 +0,0 @@ -/* Hedley - https://nemequ.github.io/hedley - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -#if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 14) -#if defined(HEDLEY_VERSION) -#undef HEDLEY_VERSION -#endif -#define HEDLEY_VERSION 14 - -#if defined(HEDLEY_STRINGIFY_EX) -#undef HEDLEY_STRINGIFY_EX -#endif -#define HEDLEY_STRINGIFY_EX(x) #x - -#if defined(HEDLEY_STRINGIFY) -#undef HEDLEY_STRINGIFY -#endif -#define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) - -#if defined(HEDLEY_CONCAT_EX) -#undef HEDLEY_CONCAT_EX -#endif -#define HEDLEY_CONCAT_EX(a, b) a##b - -#if defined(HEDLEY_CONCAT) -#undef HEDLEY_CONCAT -#endif -#define HEDLEY_CONCAT(a, b) HEDLEY_CONCAT_EX(a, b) - -#if defined(HEDLEY_CONCAT3_EX) -#undef HEDLEY_CONCAT3_EX -#endif -#define HEDLEY_CONCAT3_EX(a, b, c) a##b##c - -#if defined(HEDLEY_CONCAT3) -#undef HEDLEY_CONCAT3 -#endif -#define HEDLEY_CONCAT3(a, b, c) HEDLEY_CONCAT3_EX(a, b, c) - -#if defined(HEDLEY_VERSION_ENCODE) -#undef HEDLEY_VERSION_ENCODE -#endif -#define HEDLEY_VERSION_ENCODE(major, minor, revision) \ - (((major)*1000000) + ((minor)*1000) + (revision)) - -#if defined(HEDLEY_VERSION_DECODE_MAJOR) -#undef HEDLEY_VERSION_DECODE_MAJOR -#endif -#define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) - -#if defined(HEDLEY_VERSION_DECODE_MINOR) -#undef HEDLEY_VERSION_DECODE_MINOR -#endif -#define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) - -#if defined(HEDLEY_VERSION_DECODE_REVISION) -#undef HEDLEY_VERSION_DECODE_REVISION -#endif -#define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) - -#if defined(HEDLEY_GNUC_VERSION) -#undef HEDLEY_GNUC_VERSION -#endif -#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) -#define HEDLEY_GNUC_VERSION \ - HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#elif defined(__GNUC__) -#define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) -#endif - -#if defined(HEDLEY_GNUC_VERSION_CHECK) -#undef HEDLEY_GNUC_VERSION_CHECK -#endif -#if defined(HEDLEY_GNUC_VERSION) -#define HEDLEY_GNUC_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define HEDLEY_GNUC_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(HEDLEY_MSVC_VERSION) -#undef HEDLEY_MSVC_VERSION -#endif -#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) -#define HEDLEY_MSVC_VERSION \ - HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, \ - (_MSC_FULL_VER % 10000000) / 100000, \ - (_MSC_FULL_VER % 100000) / 100) -#elif defined(_MSC_FULL_VER) && !defined(__ICL) -#define HEDLEY_MSVC_VERSION \ - HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, \ - (_MSC_FULL_VER % 1000000) / 10000, \ - (_MSC_FULL_VER % 10000) / 10) -#elif defined(_MSC_VER) && !defined(__ICL) -#define HEDLEY_MSVC_VERSION \ - HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) -#endif - -#if defined(HEDLEY_MSVC_VERSION_CHECK) -#undef HEDLEY_MSVC_VERSION_CHECK -#endif -#if !defined(HEDLEY_MSVC_VERSION) -#define HEDLEY_MSVC_VERSION_CHECK(major, minor, patch) (0) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -#define HEDLEY_MSVC_VERSION_CHECK(major, minor, patch) \ - (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) -#elif defined(_MSC_VER) && (_MSC_VER >= 1200) -#define HEDLEY_MSVC_VERSION_CHECK(major, minor, patch) \ - (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) -#else -#define HEDLEY_MSVC_VERSION_CHECK(major, minor, patch) \ - (_MSC_VER >= ((major * 100) + (minor))) -#endif - -#if defined(HEDLEY_INTEL_VERSION) -#undef HEDLEY_INTEL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && \ - !defined(__ICL) -#define HEDLEY_INTEL_VERSION \ - HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, \ - __INTEL_COMPILER_UPDATE) -#elif defined(__INTEL_COMPILER) && !defined(__ICL) -#define HEDLEY_INTEL_VERSION \ - HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) -#endif - -#if defined(HEDLEY_INTEL_VERSION_CHECK) -#undef HEDLEY_INTEL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_VERSION) -#define HEDLEY_INTEL_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define HEDLEY_INTEL_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION) -#undef HEDLEY_INTEL_CL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && \ - defined(__ICL) -#define HEDLEY_INTEL_CL_VERSION \ - HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) -#endif - -#if defined(HEDLEY_INTEL_CL_VERSION_CHECK) -#undef HEDLEY_INTEL_CL_VERSION_CHECK -#endif -#if defined(HEDLEY_INTEL_CL_VERSION) -#define HEDLEY_INTEL_CL_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define HEDLEY_INTEL_CL_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(HEDLEY_PGI_VERSION) -#undef HEDLEY_PGI_VERSION -#endif -#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && \ - defined(__PGIC_PATCHLEVEL__) -#define HEDLEY_PGI_VERSION \ - HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -#endif - -#if defined(HEDLEY_PGI_VERSION_CHECK) -#undef HEDLEY_PGI_VERSION_CHECK -#endif -#if defined(HEDLEY_PGI_VERSION) -#define HEDLEY_PGI_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define HEDLEY_PGI_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION) -#undef HEDLEY_SUNPRO_VERSION -#endif -#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) -#define HEDLEY_SUNPRO_VERSION \ - HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + \ - ((__SUNPRO_C >> 12) & 0xf), \ - (((__SUNPRO_C >> 8) & 0xf) * 10) + \ - ((__SUNPRO_C >> 4) & 0xf), \ - (__SUNPRO_C & 0xf) * 10) -#elif defined(__SUNPRO_C) -#define HEDLEY_SUNPRO_VERSION \ - HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, \ - (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C)&0xf) -#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) -#define HEDLEY_SUNPRO_VERSION \ - HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + \ - ((__SUNPRO_CC >> 12) & 0xf), \ - (((__SUNPRO_CC >> 8) & 0xf) * 10) + \ - ((__SUNPRO_CC >> 4) & 0xf), \ - (__SUNPRO_CC & 0xf) * 10) -#elif defined(__SUNPRO_CC) -#define HEDLEY_SUNPRO_VERSION \ - HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, \ - (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC)&0xf) -#endif - -#if defined(HEDLEY_SUNPRO_VERSION_CHECK) -#undef HEDLEY_SUNPRO_VERSION_CHECK -#endif -#if defined(HEDLEY_SUNPRO_VERSION) -#define HEDLEY_SUNPRO_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define HEDLEY_SUNPRO_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -#undef HEDLEY_EMSCRIPTEN_VERSION -#endif -#if defined(__EMSCRIPTEN__) -#define HEDLEY_EMSCRIPTEN_VERSION \ - HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, \ - __EMSCRIPTEN_tiny__) -#endif - -#if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) -#undef HEDLEY_EMSCRIPTEN_VERSION_CHECK -#endif -#if defined(HEDLEY_EMSCRIPTEN_VERSION) -#define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_EMSCRIPTEN_VERSION >= \ - HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(HEDLEY_ARM_VERSION) -#undef HEDLEY_ARM_VERSION -#endif -#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) -#define HEDLEY_ARM_VERSION \ - HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, \ - (__ARMCOMPILER_VERSION % 1000000) / 10000, \ - (__ARMCOMPILER_VERSION % 10000) / 100) -#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) -#define HEDLEY_ARM_VERSION \ - HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, \ - (__ARMCC_VERSION % 1000000) / 10000, \ - (__ARMCC_VERSION % 10000) / 100) -#endif - -#if defined(HEDLEY_ARM_VERSION_CHECK) -#undef HEDLEY_ARM_VERSION_CHECK -#endif -#if defined(HEDLEY_ARM_VERSION) -#define HEDLEY_ARM_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define HEDLEY_ARM_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(HEDLEY_IBM_VERSION) -#undef HEDLEY_IBM_VERSION -#endif -#if defined(__ibmxl__) -#define HEDLEY_IBM_VERSION \ - HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, \ - __ibmxl_modification__) -#elif defined(__xlC__) && defined(__xlC_ver__) -#define HEDLEY_IBM_VERSION \ - HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, \ - (__xlC_ver__ >> 8) & 0xff) -#elif defined(__xlC__) -#define HEDLEY_IBM_VERSION \ - HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) -#endif - -#if defined(HEDLEY_IBM_VERSION_CHECK) -#undef HEDLEY_IBM_VERSION_CHECK -#endif -#if defined(HEDLEY_IBM_VERSION) -#define HEDLEY_IBM_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define HEDLEY_IBM_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(HEDLEY_TI_VERSION) -#undef HEDLEY_TI_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && \ - (defined(__TMS470__) || defined(__TI_ARM__) || defined(__MSP430__) || \ - defined(__TMS320C2000__)) -#if (__TI_COMPILER_VERSION__ >= 16000000) -#define HEDLEY_TI_VERSION \ - HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, \ - (__TI_COMPILER_VERSION__ % 1000000) / 1000, \ - (__TI_COMPILER_VERSION__ % 1000)) -#endif -#endif - -#if defined(HEDLEY_TI_VERSION_CHECK) -#undef HEDLEY_TI_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_VERSION) -#define HEDLEY_TI_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define HEDLEY_TI_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION) -#undef HEDLEY_TI_CL2000_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) -#define HEDLEY_TI_CL2000_VERSION \ - HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, \ - (__TI_COMPILER_VERSION__ % 1000000) / 1000, \ - (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL2000_VERSION_CHECK) -#undef HEDLEY_TI_CL2000_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL2000_VERSION) -#define HEDLEY_TI_CL2000_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define HEDLEY_TI_CL2000_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION) -#undef HEDLEY_TI_CL430_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) -#define HEDLEY_TI_CL430_VERSION \ - HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, \ - (__TI_COMPILER_VERSION__ % 1000000) / 1000, \ - (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL430_VERSION_CHECK) -#undef HEDLEY_TI_CL430_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL430_VERSION) -#define HEDLEY_TI_CL430_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define HEDLEY_TI_CL430_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION) -#undef HEDLEY_TI_ARMCL_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && \ - (defined(__TMS470__) || defined(__TI_ARM__)) -#define HEDLEY_TI_ARMCL_VERSION \ - HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, \ - (__TI_COMPILER_VERSION__ % 1000000) / 1000, \ - (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) -#undef HEDLEY_TI_ARMCL_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_ARMCL_VERSION) -#define HEDLEY_TI_ARMCL_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define HEDLEY_TI_ARMCL_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION) -#undef HEDLEY_TI_CL6X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) -#define HEDLEY_TI_CL6X_VERSION \ - HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, \ - (__TI_COMPILER_VERSION__ % 1000000) / 1000, \ - (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL6X_VERSION_CHECK) -#undef HEDLEY_TI_CL6X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL6X_VERSION) -#define HEDLEY_TI_CL6X_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define HEDLEY_TI_CL6X_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION) -#undef HEDLEY_TI_CL7X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) -#define HEDLEY_TI_CL7X_VERSION \ - HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, \ - (__TI_COMPILER_VERSION__ % 1000000) / 1000, \ - (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CL7X_VERSION_CHECK) -#undef HEDLEY_TI_CL7X_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CL7X_VERSION) -#define HEDLEY_TI_CL7X_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define HEDLEY_TI_CL7X_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION) -#undef HEDLEY_TI_CLPRU_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) -#define HEDLEY_TI_CLPRU_VERSION \ - HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, \ - (__TI_COMPILER_VERSION__ % 1000000) / 1000, \ - (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) -#undef HEDLEY_TI_CLPRU_VERSION_CHECK -#endif -#if defined(HEDLEY_TI_CLPRU_VERSION) -#define HEDLEY_TI_CLPRU_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define HEDLEY_TI_CLPRU_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(HEDLEY_CRAY_VERSION) -#undef HEDLEY_CRAY_VERSION -#endif -#if defined(_CRAYC) -#if defined(_RELEASE_PATCHLEVEL) -#define HEDLEY_CRAY_VERSION \ - HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, \ - _RELEASE_PATCHLEVEL) -#else -#define HEDLEY_CRAY_VERSION \ - HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) -#endif -#endif - -#if defined(HEDLEY_CRAY_VERSION_CHECK) -#undef HEDLEY_CRAY_VERSION_CHECK -#endif -#if defined(HEDLEY_CRAY_VERSION) -#define HEDLEY_CRAY_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define HEDLEY_CRAY_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(HEDLEY_IAR_VERSION) -#undef HEDLEY_IAR_VERSION -#endif -#if defined(__IAR_SYSTEMS_ICC__) -#if __VER__ > 1000 -#define HEDLEY_IAR_VERSION \ - HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), \ - (__VER__ % 1000)) -#else -#define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(VER / 100, __VER__ % 100, 0) -#endif -#endif - -#if defined(HEDLEY_IAR_VERSION_CHECK) -#undef HEDLEY_IAR_VERSION_CHECK -#endif -#if defined(HEDLEY_IAR_VERSION) -#define HEDLEY_IAR_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define HEDLEY_IAR_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(HEDLEY_TINYC_VERSION) -#undef HEDLEY_TINYC_VERSION -#endif -#if defined(__TINYC__) -#define HEDLEY_TINYC_VERSION \ - HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, \ - __TINYC__ % 100) -#endif - -#if defined(HEDLEY_TINYC_VERSION_CHECK) -#undef HEDLEY_TINYC_VERSION_CHECK -#endif -#if defined(HEDLEY_TINYC_VERSION) -#define HEDLEY_TINYC_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define HEDLEY_TINYC_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(HEDLEY_DMC_VERSION) -#undef HEDLEY_DMC_VERSION -#endif -#if defined(__DMC__) -#define HEDLEY_DMC_VERSION \ - HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) -#endif - -#if defined(HEDLEY_DMC_VERSION_CHECK) -#undef HEDLEY_DMC_VERSION_CHECK -#endif -#if defined(HEDLEY_DMC_VERSION) -#define HEDLEY_DMC_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define HEDLEY_DMC_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION) -#undef HEDLEY_COMPCERT_VERSION -#endif -#if defined(__COMPCERT_VERSION__) -#define HEDLEY_COMPCERT_VERSION \ - HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, \ - (__COMPCERT_VERSION__ / 100) % 100, \ - __COMPCERT_VERSION__ % 100) -#endif - -#if defined(HEDLEY_COMPCERT_VERSION_CHECK) -#undef HEDLEY_COMPCERT_VERSION_CHECK -#endif -#if defined(HEDLEY_COMPCERT_VERSION) -#define HEDLEY_COMPCERT_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define HEDLEY_COMPCERT_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(HEDLEY_PELLES_VERSION) -#undef HEDLEY_PELLES_VERSION -#endif -#if defined(__POCC__) -#define HEDLEY_PELLES_VERSION \ - HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) -#endif - -#if defined(HEDLEY_PELLES_VERSION_CHECK) -#undef HEDLEY_PELLES_VERSION_CHECK -#endif -#if defined(HEDLEY_PELLES_VERSION) -#define HEDLEY_PELLES_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define HEDLEY_PELLES_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(HEDLEY_GCC_VERSION) -#undef HEDLEY_GCC_VERSION -#endif -#if defined(HEDLEY_GNUC_VERSION) && !defined(__clang__) && \ - !defined(HEDLEY_INTEL_VERSION) && !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_ARM_VERSION) && !defined(HEDLEY_TI_VERSION) && \ - !defined(HEDLEY_TI_ARMCL_VERSION) && \ - !defined(HEDLEY_TI_CL430_VERSION) && \ - !defined(HEDLEY_TI_CL2000_VERSION) && \ - !defined(HEDLEY_TI_CL6X_VERSION) && \ - !defined(HEDLEY_TI_CL7X_VERSION) && \ - !defined(HEDLEY_TI_CLPRU_VERSION) && !defined(__COMPCERT__) -#define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION -#endif - -#if defined(HEDLEY_GCC_VERSION_CHECK) -#undef HEDLEY_GCC_VERSION_CHECK -#endif -#if defined(HEDLEY_GCC_VERSION) -#define HEDLEY_GCC_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define HEDLEY_GCC_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(HEDLEY_HAS_ATTRIBUTE) -#undef HEDLEY_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -#define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) -#else -#define HEDLEY_HAS_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) -#undef HEDLEY_GNUC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -#define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute, major, minor, patch) \ - __has_attribute(attribute) -#else -#define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute, major, minor, patch) \ - HEDLEY_GNUC_VERSION_CHECK(major, minor, patch) -#endif - -#if defined(HEDLEY_GCC_HAS_ATTRIBUTE) -#undef HEDLEY_GCC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) -#define HEDLEY_GCC_HAS_ATTRIBUTE(attribute, major, minor, patch) \ - __has_attribute(attribute) -#else -#define HEDLEY_GCC_HAS_ATTRIBUTE(attribute, major, minor, patch) \ - HEDLEY_GCC_VERSION_CHECK(major, minor, patch) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE) -#undef HEDLEY_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5, 15, 0)) -#define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) -#else -#define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) -#undef HEDLEY_HAS_CPP_ATTRIBUTE_NS -#endif -#if !defined(__cplusplus) || !defined(__has_cpp_attribute) -#define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns, attribute) (0) -#elif !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_IAR_VERSION) && \ - (!defined(HEDLEY_SUNPRO_VERSION) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5, 15, 0)) && \ - (!defined(HEDLEY_MSVC_VERSION) || \ - HEDLEY_MSVC_VERSION_CHECK(19, 20, 0)) -#define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns, attribute) \ - HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) -#else -#define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns, attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) -#undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -#define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute, major, minor, patch) \ - __has_cpp_attribute(attribute) -#else -#define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute, major, minor, patch) \ - HEDLEY_GNUC_VERSION_CHECK(major, minor, patch) -#endif - -#if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) -#undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) -#define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute, major, minor, patch) \ - __has_cpp_attribute(attribute) -#else -#define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute, major, minor, patch) \ - HEDLEY_GCC_VERSION_CHECK(major, minor, patch) -#endif - -#if defined(HEDLEY_HAS_BUILTIN) -#undef HEDLEY_HAS_BUILTIN -#endif -#if defined(__has_builtin) -#define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) -#else -#define HEDLEY_HAS_BUILTIN(builtin) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_BUILTIN) -#undef HEDLEY_GNUC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -#define HEDLEY_GNUC_HAS_BUILTIN(builtin, major, minor, patch) \ - __has_builtin(builtin) -#else -#define HEDLEY_GNUC_HAS_BUILTIN(builtin, major, minor, patch) \ - HEDLEY_GNUC_VERSION_CHECK(major, minor, patch) -#endif - -#if defined(HEDLEY_GCC_HAS_BUILTIN) -#undef HEDLEY_GCC_HAS_BUILTIN -#endif -#if defined(__has_builtin) -#define HEDLEY_GCC_HAS_BUILTIN(builtin, major, minor, patch) \ - __has_builtin(builtin) -#else -#define HEDLEY_GCC_HAS_BUILTIN(builtin, major, minor, patch) \ - HEDLEY_GCC_VERSION_CHECK(major, minor, patch) -#endif - -#if defined(HEDLEY_HAS_FEATURE) -#undef HEDLEY_HAS_FEATURE -#endif -#if defined(__has_feature) -#define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) -#else -#define HEDLEY_HAS_FEATURE(feature) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_FEATURE) -#undef HEDLEY_GNUC_HAS_FEATURE -#endif -#if defined(__has_feature) -#define HEDLEY_GNUC_HAS_FEATURE(feature, major, minor, patch) \ - __has_feature(feature) -#else -#define HEDLEY_GNUC_HAS_FEATURE(feature, major, minor, patch) \ - HEDLEY_GNUC_VERSION_CHECK(major, minor, patch) -#endif - -#if defined(HEDLEY_GCC_HAS_FEATURE) -#undef HEDLEY_GCC_HAS_FEATURE -#endif -#if defined(__has_feature) -#define HEDLEY_GCC_HAS_FEATURE(feature, major, minor, patch) \ - __has_feature(feature) -#else -#define HEDLEY_GCC_HAS_FEATURE(feature, major, minor, patch) \ - HEDLEY_GCC_VERSION_CHECK(major, minor, patch) -#endif - -#if defined(HEDLEY_HAS_EXTENSION) -#undef HEDLEY_HAS_EXTENSION -#endif -#if defined(__has_extension) -#define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) -#else -#define HEDLEY_HAS_EXTENSION(extension) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_EXTENSION) -#undef HEDLEY_GNUC_HAS_EXTENSION -#endif -#if defined(__has_extension) -#define HEDLEY_GNUC_HAS_EXTENSION(extension, major, minor, patch) \ - __has_extension(extension) -#else -#define HEDLEY_GNUC_HAS_EXTENSION(extension, major, minor, patch) \ - HEDLEY_GNUC_VERSION_CHECK(major, minor, patch) -#endif - -#if defined(HEDLEY_GCC_HAS_EXTENSION) -#undef HEDLEY_GCC_HAS_EXTENSION -#endif -#if defined(__has_extension) -#define HEDLEY_GCC_HAS_EXTENSION(extension, major, minor, patch) \ - __has_extension(extension) -#else -#define HEDLEY_GCC_HAS_EXTENSION(extension, major, minor, patch) \ - HEDLEY_GCC_VERSION_CHECK(major, minor, patch) -#endif - -#if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) -#undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -#define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) \ - __has_declspec_attribute(attribute) -#else -#define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) -#undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -#define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute, major, minor, patch) \ - __has_declspec_attribute(attribute) -#else -#define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute, major, minor, patch) \ - HEDLEY_GNUC_VERSION_CHECK(major, minor, patch) -#endif - -#if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) -#undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) -#define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute, major, minor, patch) \ - __has_declspec_attribute(attribute) -#else -#define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute, major, minor, patch) \ - HEDLEY_GCC_VERSION_CHECK(major, minor, patch) -#endif - -#if defined(HEDLEY_HAS_WARNING) -#undef HEDLEY_HAS_WARNING -#endif -#if defined(__has_warning) -#define HEDLEY_HAS_WARNING(warning) __has_warning(warning) -#else -#define HEDLEY_HAS_WARNING(warning) (0) -#endif - -#if defined(HEDLEY_GNUC_HAS_WARNING) -#undef HEDLEY_GNUC_HAS_WARNING -#endif -#if defined(__has_warning) -#define HEDLEY_GNUC_HAS_WARNING(warning, major, minor, patch) \ - __has_warning(warning) -#else -#define HEDLEY_GNUC_HAS_WARNING(warning, major, minor, patch) \ - HEDLEY_GNUC_VERSION_CHECK(major, minor, patch) -#endif - -#if defined(HEDLEY_GCC_HAS_WARNING) -#undef HEDLEY_GCC_HAS_WARNING -#endif -#if defined(__has_warning) -#define HEDLEY_GCC_HAS_WARNING(warning, major, minor, patch) \ - __has_warning(warning) -#else -#define HEDLEY_GCC_HAS_WARNING(warning, major, minor, patch) \ - HEDLEY_GCC_VERSION_CHECK(major, minor, patch) -#endif - -#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3, 0, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_IAR_VERSION_CHECK(8, 0, 0) || \ - HEDLEY_PGI_VERSION_CHECK(18, 4, 0) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \ - HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4, 7, 0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(2, 0, 1) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6, 1, 0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7, 0, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) || \ - HEDLEY_CRAY_VERSION_CHECK(5, 0, 0) || \ - HEDLEY_TINYC_VERSION_CHECK(0, 9, 17) || \ - HEDLEY_SUNPRO_VERSION_CHECK(8, 0, 0) || \ - (HEDLEY_IBM_VERSION_CHECK(10, 1, 0) && defined(__C99_PRAGMA_OPERATOR)) -#define HEDLEY_PRAGMA(value) _Pragma(#value) -#elif HEDLEY_MSVC_VERSION_CHECK(15, 0, 0) -#define HEDLEY_PRAGMA(value) __pragma(value) -#else -#define HEDLEY_PRAGMA(value) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_PUSH) -#undef HEDLEY_DIAGNOSTIC_PUSH -#endif -#if defined(HEDLEY_DIAGNOSTIC_POP) -#undef HEDLEY_DIAGNOSTIC_POP -#endif -#if defined(__clang__) -#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") -#define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) -#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -#define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#elif HEDLEY_GCC_VERSION_CHECK(4, 6, 0) -#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") -#define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -#elif HEDLEY_MSVC_VERSION_CHECK(15, 0, 0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) -#define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) -#define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) -#elif HEDLEY_ARM_VERSION_CHECK(5, 6, 0) -#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") -#define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") -#elif HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4, 4, 0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8, 1, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) -#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") -#define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") -#elif HEDLEY_PELLES_VERSION_CHECK(2, 90, 0) -#define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") -#define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#else -#define HEDLEY_DIAGNOSTIC_PUSH -#define HEDLEY_DIAGNOSTIC_POP -#endif - -/* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -#undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ -#endif -#if defined(__cplusplus) -#if HEDLEY_HAS_WARNING("-Wc++98-compat") -#if HEDLEY_HAS_WARNING("-Wc++17-extensions") -#if HEDLEY_HAS_WARNING("-Wc++1z-extensions") -#define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") _Pragma( \ - "clang diagnostic ignored \"-Wc++17-extensions\"") \ - _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ - xpr HEDLEY_DIAGNOSTIC_POP -#else -#define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - xpr HEDLEY_DIAGNOSTIC_POP -#endif -#else -#define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - xpr HEDLEY_DIAGNOSTIC_POP -#endif -#endif -#endif -#if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) -#define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x -#endif - -#if defined(HEDLEY_CONST_CAST) -#undef HEDLEY_CONST_CAST -#endif -#if defined(__cplusplus) -#define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) -#elif HEDLEY_HAS_WARNING("-Wcast-qual") || \ - HEDLEY_GCC_VERSION_CHECK(4, 6, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) -#define HEDLEY_CONST_CAST(T, expr) \ - (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL((T)(expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -#define HEDLEY_CONST_CAST(T, expr) ((T)(expr)) -#endif - -#if defined(HEDLEY_REINTERPRET_CAST) -#undef HEDLEY_REINTERPRET_CAST -#endif -#if defined(__cplusplus) -#define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) -#else -#define HEDLEY_REINTERPRET_CAST(T, expr) ((T)(expr)) -#endif - -#if defined(HEDLEY_STATIC_CAST) -#undef HEDLEY_STATIC_CAST -#endif -#if defined(__cplusplus) -#define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) -#else -#define HEDLEY_STATIC_CAST(T, expr) ((T)(expr)) -#endif - -#if defined(HEDLEY_CPP_CAST) -#undef HEDLEY_CPP_CAST -#endif -#if defined(__cplusplus) -#if HEDLEY_HAS_WARNING("-Wold-style-cast") -#define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"")((T)(expr)) \ - HEDLEY_DIAGNOSTIC_POP -#elif HEDLEY_IAR_VERSION_CHECK(8, 3, 0) -#define HEDLEY_CPP_CAST(T, expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("diag_suppress=Pe137") HEDLEY_DIAGNOSTIC_POP -#else -#define HEDLEY_CPP_CAST(T, expr) ((T)(expr)) -#endif -#else -#define HEDLEY_CPP_CAST(T, expr) (expr) -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) -#undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif -#if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") -#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED \ - _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED \ - _Pragma("warning(disable:1478 1786)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED \ - __pragma(warning(disable : 1478 1786)) -#elif HEDLEY_PGI_VERSION_CHECK(20, 7, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED \ - _Pragma("diag_suppress 1215,1216,1444,1445") -#elif HEDLEY_PGI_VERSION_CHECK(17, 10, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif HEDLEY_GCC_VERSION_CHECK(4, 3, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED \ - _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15, 0, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable : 4996)) -#elif HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4, 8, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6, 0, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6, 4, 0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4, 0, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7, 2, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7, 5, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 13, 0) && !defined(__cplusplus) -#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED \ - _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 13, 0) && defined(__cplusplus) -#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED \ - _Pragma("error_messages(off,symdeprecated,symdeprecated2)") -#elif HEDLEY_IAR_VERSION_CHECK(8, 0, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED \ - _Pragma("diag_suppress=Pe1444,Pe1215") -#elif HEDLEY_PELLES_VERSION_CHECK(2, 90, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") -#else -#define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) -#undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - _Pragma("warning(disable:161)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - __pragma(warning(disable : 161)) -#elif HEDLEY_PGI_VERSION_CHECK(17, 10, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") -#elif HEDLEY_GCC_VERSION_CHECK(4, 3, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") -#elif HEDLEY_MSVC_VERSION_CHECK(15, 0, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - __pragma(warning(disable : 4068)) -#elif HEDLEY_TI_VERSION_CHECK(16, 9, 0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8, 0, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2, 3, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(8, 0, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif HEDLEY_IAR_VERSION_CHECK(8, 0, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") -#else -#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) -#undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-attributes") -#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \ - _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") -#elif HEDLEY_GCC_VERSION_CHECK(4, 6, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \ - _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif HEDLEY_INTEL_VERSION_CHECK(17, 0, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \ - _Pragma("warning(disable:1292)") -#elif HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \ - __pragma(warning(disable : 1292)) -#elif HEDLEY_MSVC_VERSION_CHECK(19, 0, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \ - __pragma(warning(disable : 5030)) -#elif HEDLEY_PGI_VERSION_CHECK(20, 7, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \ - _Pragma("diag_suppress 1097,1098") -#elif HEDLEY_PGI_VERSION_CHECK(17, 10, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \ - _Pragma("diag_suppress 1097") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 14, 0) && defined(__cplusplus) -#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \ - _Pragma("error_messages(off,attrskipunsup)") -#elif HEDLEY_TI_VERSION_CHECK(18, 1, 0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8, 3, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \ - _Pragma("diag_suppress 1173") -#elif HEDLEY_IAR_VERSION_CHECK(8, 0, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES \ - _Pragma("diag_suppress=Pe1097") -#else -#define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif - -#if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) -#undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif -#if HEDLEY_HAS_WARNING("-Wcast-qual") -#define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - _Pragma("warning(disable:2203 2331)") -#elif HEDLEY_GCC_VERSION_CHECK(3, 0, 0) -#define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#else -#define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif - -#if defined(HEDLEY_DEPRECATED) -#undef HEDLEY_DEPRECATED -#endif -#if defined(HEDLEY_DEPRECATED_FOR) -#undef HEDLEY_DEPRECATED_FOR -#endif -#if HEDLEY_MSVC_VERSION_CHECK(14, 0, 0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) -#define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " #since)) -#define HEDLEY_DEPRECATED_FOR(since, replacement) \ - __declspec(deprecated("Since " #since "; use " #replacement)) -#elif HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) || \ - HEDLEY_GCC_VERSION_CHECK(4, 5, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_ARM_VERSION_CHECK(5, 6, 0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5, 13, 0) || \ - HEDLEY_PGI_VERSION_CHECK(17, 10, 0) || \ - HEDLEY_TI_VERSION_CHECK(18, 1, 0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(18, 1, 0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8, 3, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2, 3, 0) -#define HEDLEY_DEPRECATED(since) \ - __attribute__((__deprecated__("Since " #since))) -#define HEDLEY_DEPRECATED_FOR(since, replacement) \ - __attribute__((__deprecated__("Since " #since "; use " #replacement))) -#elif defined(__cplusplus) && (__cplusplus >= 201402L) -#define HEDLEY_DEPRECATED(since) \ - HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_( \ - [[deprecated("Since " #since)]]) -#define HEDLEY_DEPRECATED_FOR(since, replacement) \ - HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_( \ - [[deprecated("Since " #since "; use " #replacement)]]) -#elif HEDLEY_HAS_ATTRIBUTE(deprecated) || HEDLEY_GCC_VERSION_CHECK(3, 1, 0) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \ - HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4, 8, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6, 0, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6, 4, 0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4, 0, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7, 2, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7, 5, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) -#define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) -#define HEDLEY_DEPRECATED_FOR(since, replacement) \ - __attribute__((__deprecated__)) -#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0) || \ - HEDLEY_PELLES_VERSION_CHECK(6, 50, 0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) -#define HEDLEY_DEPRECATED(since) __declspec(deprecated) -#define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) -#elif HEDLEY_IAR_VERSION_CHECK(8, 0, 0) -#define HEDLEY_DEPRECATED(since) _Pragma("deprecated") -#define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") -#else -#define HEDLEY_DEPRECATED(since) -#define HEDLEY_DEPRECATED_FOR(since, replacement) -#endif - -#if defined(HEDLEY_UNAVAILABLE) -#undef HEDLEY_UNAVAILABLE -#endif -#if HEDLEY_HAS_ATTRIBUTE(warning) || HEDLEY_GCC_VERSION_CHECK(4, 3, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) -#define HEDLEY_UNAVAILABLE(available_since) \ - __attribute__((__warning__("Not available until " #available_since))) -#else -#define HEDLEY_UNAVAILABLE(available_since) -#endif - -#if defined(HEDLEY_WARN_UNUSED_RESULT) -#undef HEDLEY_WARN_UNUSED_RESULT -#endif -#if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) -#undef HEDLEY_WARN_UNUSED_RESULT_MSG -#endif -#if HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ - HEDLEY_GCC_VERSION_CHECK(3, 4, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4, 8, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6, 0, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6, 4, 0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4, 0, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7, 2, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7, 5, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5, 15, 0) && defined(__cplusplus)) || \ - HEDLEY_PGI_VERSION_CHECK(17, 10, 0) -#define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) -#define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) \ - __attribute__((__warn_unused_result__)) -#elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) -#define HEDLEY_WARN_UNUSED_RESULT \ - HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) \ - HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) -#define HEDLEY_WARN_UNUSED_RESULT \ - HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) \ - HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#elif defined(_Check_return_) /* SAL */ -#define HEDLEY_WARN_UNUSED_RESULT _Check_return_ -#define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ -#else -#define HEDLEY_WARN_UNUSED_RESULT -#define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) -#endif - -#if defined(HEDLEY_SENTINEL) -#undef HEDLEY_SENTINEL -#endif -#if HEDLEY_HAS_ATTRIBUTE(sentinel) || HEDLEY_GCC_VERSION_CHECK(4, 0, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_ARM_VERSION_CHECK(5, 4, 0) -#define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) -#else -#define HEDLEY_SENTINEL(position) -#endif - -#if defined(HEDLEY_NO_RETURN) -#undef HEDLEY_NO_RETURN -#endif -#if HEDLEY_IAR_VERSION_CHECK(8, 0, 0) -#define HEDLEY_NO_RETURN __noreturn -#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) -#define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L -#define HEDLEY_NO_RETURN _Noreturn -#elif defined(__cplusplus) && (__cplusplus >= 201103L) -#define HEDLEY_NO_RETURN \ - HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) -#elif HEDLEY_HAS_ATTRIBUTE(noreturn) || HEDLEY_GCC_VERSION_CHECK(3, 2, 0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5, 11, 0) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \ - HEDLEY_IBM_VERSION_CHECK(10, 1, 0) || \ - HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4, 8, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6, 0, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6, 4, 0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4, 0, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7, 2, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7, 5, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) -#define HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 10, 0) -#define HEDLEY_NO_RETURN _Pragma("does_not_return") -#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) -#define HEDLEY_NO_RETURN __declspec(noreturn) -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6, 0, 0) && defined(__cplusplus) -#define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3, 2, 0) -#define HEDLEY_NO_RETURN __attribute((noreturn)) -#elif HEDLEY_PELLES_VERSION_CHECK(9, 0, 0) -#define HEDLEY_NO_RETURN __declspec(noreturn) -#else -#define HEDLEY_NO_RETURN -#endif - -#if defined(HEDLEY_NO_ESCAPE) -#undef HEDLEY_NO_ESCAPE -#endif -#if HEDLEY_HAS_ATTRIBUTE(noescape) -#define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) -#else -#define HEDLEY_NO_ESCAPE -#endif - -#if defined(HEDLEY_UNREACHABLE) -#undef HEDLEY_UNREACHABLE -#endif -#if defined(HEDLEY_UNREACHABLE_RETURN) -#undef HEDLEY_UNREACHABLE_RETURN -#endif -#if defined(HEDLEY_ASSUME) -#undef HEDLEY_ASSUME -#endif -#if HEDLEY_MSVC_VERSION_CHECK(13, 10, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) -#define HEDLEY_ASSUME(expr) __assume(expr) -#elif HEDLEY_HAS_BUILTIN(__builtin_assume) -#define HEDLEY_ASSUME(expr) __builtin_assume(expr) -#elif HEDLEY_TI_CL2000_VERSION_CHECK(6, 2, 0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4, 0, 0) -#if defined(__cplusplus) -#define HEDLEY_ASSUME(expr) std::_nassert(expr) -#else -#define HEDLEY_ASSUME(expr) _nassert(expr) -#endif -#endif -#if (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && \ - (!defined(HEDLEY_ARM_VERSION))) || \ - HEDLEY_GCC_VERSION_CHECK(4, 5, 0) || \ - HEDLEY_PGI_VERSION_CHECK(18, 10, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_IBM_VERSION_CHECK(13, 1, 5) -#define HEDLEY_UNREACHABLE() __builtin_unreachable() -#elif defined(HEDLEY_ASSUME) -#define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif -#if !defined(HEDLEY_ASSUME) -#if defined(HEDLEY_UNREACHABLE) -#define HEDLEY_ASSUME(expr) \ - HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) -#else -#define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) -#endif -#endif -#if defined(HEDLEY_UNREACHABLE) -#if HEDLEY_TI_CL2000_VERSION_CHECK(6, 2, 0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4, 0, 0) -#define HEDLEY_UNREACHABLE_RETURN(value) \ - return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) -#else -#define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() -#endif -#else -#define HEDLEY_UNREACHABLE_RETURN(value) return (value) -#endif -#if !defined(HEDLEY_UNREACHABLE) -#define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) -#endif - -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wpedantic") -#pragma clang diagnostic ignored "-Wpedantic" -#endif -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) -#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros", 4, 0, 0) -#if defined(__clang__) -#pragma clang diagnostic ignored "-Wvariadic-macros" -#elif defined(HEDLEY_GCC_VERSION) -#pragma GCC diagnostic ignored "-Wvariadic-macros" -#endif -#endif -#if defined(HEDLEY_NON_NULL) -#undef HEDLEY_NON_NULL -#endif -#if HEDLEY_HAS_ATTRIBUTE(nonnull) || HEDLEY_GCC_VERSION_CHECK(3, 3, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) -#define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) -#else -#define HEDLEY_NON_NULL(...) -#endif -HEDLEY_DIAGNOSTIC_POP - -#if defined(HEDLEY_PRINTF_FORMAT) -#undef HEDLEY_PRINTF_FORMAT -#endif -#if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format, 4, 4, 0) && \ - !defined(__USE_MINGW_ANSI_STDIO) -#define HEDLEY_PRINTF_FORMAT(string_idx, first_to_check) \ - __attribute__((__format__(ms_printf, string_idx, first_to_check))) -#elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format, 4, 4, 0) && \ - defined(__USE_MINGW_ANSI_STDIO) -#define HEDLEY_PRINTF_FORMAT(string_idx, first_to_check) \ - __attribute__((__format__(gnu_printf, string_idx, first_to_check))) -#elif HEDLEY_HAS_ATTRIBUTE(format) || HEDLEY_GCC_VERSION_CHECK(3, 1, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_ARM_VERSION_CHECK(5, 6, 0) || \ - HEDLEY_IBM_VERSION_CHECK(10, 1, 0) || \ - HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4, 8, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6, 0, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6, 4, 0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4, 0, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7, 2, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7, 5, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) -#define HEDLEY_PRINTF_FORMAT(string_idx, first_to_check) \ - __attribute__((__format__(__printf__, string_idx, first_to_check))) -#elif HEDLEY_PELLES_VERSION_CHECK(6, 0, 0) -#define HEDLEY_PRINTF_FORMAT(string_idx, first_to_check) \ - __declspec(vaformat(printf, string_idx, first_to_check)) -#else -#define HEDLEY_PRINTF_FORMAT(string_idx, first_to_check) -#endif - -#if defined(HEDLEY_CONSTEXPR) -#undef HEDLEY_CONSTEXPR -#endif -#if defined(__cplusplus) -#if __cplusplus >= 201103L -#define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) -#endif -#endif -#if !defined(HEDLEY_CONSTEXPR) -#define HEDLEY_CONSTEXPR -#endif - -#if defined(HEDLEY_PREDICT) -#undef HEDLEY_PREDICT -#endif -#if defined(HEDLEY_LIKELY) -#undef HEDLEY_LIKELY -#endif -#if defined(HEDLEY_UNLIKELY) -#undef HEDLEY_UNLIKELY -#endif -#if defined(HEDLEY_UNPREDICTABLE) -#undef HEDLEY_UNPREDICTABLE -#endif -#if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) -#define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) -#endif -#if (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && \ - !defined(HEDLEY_PGI_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(9, 0, 0) -#define HEDLEY_PREDICT(expr, value, probability) \ - __builtin_expect_with_probability((expr), (value), (probability)) -#define HEDLEY_PREDICT_TRUE(expr, probability) \ - __builtin_expect_with_probability(!!(expr), 1, (probability)) -#define HEDLEY_PREDICT_FALSE(expr, probability) \ - __builtin_expect_with_probability(!!(expr), 0, (probability)) -#define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -#define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#elif (HEDLEY_HAS_BUILTIN(__builtin_expect) && \ - !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3, 0, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5, 15, 0) && defined(__cplusplus)) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \ - HEDLEY_IBM_VERSION_CHECK(10, 1, 0) || \ - HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(4, 7, 0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3, 1, 0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6, 1, 0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6, 1, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) || \ - HEDLEY_TINYC_VERSION_CHECK(0, 9, 27) || \ - HEDLEY_CRAY_VERSION_CHECK(8, 1, 0) -#define HEDLEY_PREDICT(expr, expected, probability) \ - (((probability) >= 0.9) \ - ? __builtin_expect((expr), (expected)) \ - : (HEDLEY_STATIC_CAST(void, expected), (expr))) -#define HEDLEY_PREDICT_TRUE(expr, probability) \ - (__extension__({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) \ - ? __builtin_expect(!!(expr), 1) \ - : ((hedley_probability_ <= 0.1) \ - ? __builtin_expect(!!(expr), 0) \ - : !!(expr))); \ - })) -#define HEDLEY_PREDICT_FALSE(expr, probability) \ - (__extension__({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) \ - ? __builtin_expect(!!(expr), 0) \ - : ((hedley_probability_ <= 0.1) \ - ? __builtin_expect(!!(expr), 1) \ - : !!(expr))); \ - })) -#define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -#define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -#define HEDLEY_PREDICT(expr, expected, probability) \ - (HEDLEY_STATIC_CAST(void, expected), (expr)) -#define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) -#define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) -#define HEDLEY_LIKELY(expr) (!!(expr)) -#define HEDLEY_UNLIKELY(expr) (!!(expr)) -#endif -#if !defined(HEDLEY_UNPREDICTABLE) -#define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) -#endif - -#if defined(HEDLEY_MALLOC) -#undef HEDLEY_MALLOC -#endif -#if HEDLEY_HAS_ATTRIBUTE(malloc) || HEDLEY_GCC_VERSION_CHECK(3, 1, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5, 11, 0) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \ - HEDLEY_IBM_VERSION_CHECK(12, 1, 0) || \ - HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4, 8, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6, 0, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6, 4, 0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4, 0, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7, 2, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7, 5, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) -#define HEDLEY_MALLOC __attribute__((__malloc__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 10, 0) -#define HEDLEY_MALLOC _Pragma("returns_new_memory") -#elif HEDLEY_MSVC_VERSION_CHECK(14, 0, 0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) -#define HEDLEY_MALLOC __declspec(restrict) -#else -#define HEDLEY_MALLOC -#endif - -#if defined(HEDLEY_PURE) -#undef HEDLEY_PURE -#endif -#if HEDLEY_HAS_ATTRIBUTE(pure) || HEDLEY_GCC_VERSION_CHECK(2, 96, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5, 11, 0) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \ - HEDLEY_IBM_VERSION_CHECK(10, 1, 0) || \ - HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4, 8, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6, 0, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6, 4, 0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4, 0, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7, 2, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7, 5, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) || \ - HEDLEY_PGI_VERSION_CHECK(17, 10, 0) -#define HEDLEY_PURE __attribute__((__pure__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 10, 0) -#define HEDLEY_PURE _Pragma("does_not_write_global_data") -#elif defined(__cplusplus) && (HEDLEY_TI_CL430_VERSION_CHECK(2, 0, 1) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(4, 0, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0)) -#define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") -#else -#define HEDLEY_PURE -#endif - -#if defined(HEDLEY_CONST) -#undef HEDLEY_CONST -#endif -#if HEDLEY_HAS_ATTRIBUTE(const) || HEDLEY_GCC_VERSION_CHECK(2, 5, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5, 11, 0) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \ - HEDLEY_IBM_VERSION_CHECK(10, 1, 0) || \ - HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4, 8, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6, 0, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6, 4, 0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4, 0, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7, 2, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7, 5, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) || \ - HEDLEY_PGI_VERSION_CHECK(17, 10, 0) -#define HEDLEY_CONST __attribute__((__const__)) -#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 10, 0) -#define HEDLEY_CONST _Pragma("no_side_effect") -#else -#define HEDLEY_CONST HEDLEY_PURE -#endif - -#if defined(HEDLEY_RESTRICT) -#undef HEDLEY_RESTRICT -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__cplusplus) -#define HEDLEY_RESTRICT restrict -#elif HEDLEY_GCC_VERSION_CHECK(3, 1, 0) || \ - HEDLEY_MSVC_VERSION_CHECK(14, 0, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \ - HEDLEY_IBM_VERSION_CHECK(10, 1, 0) || \ - HEDLEY_PGI_VERSION_CHECK(17, 10, 0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6, 2, 4) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8, 1, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5, 14, 0) && defined(__cplusplus)) || \ - HEDLEY_IAR_VERSION_CHECK(8, 0, 0) || defined(__clang__) -#define HEDLEY_RESTRICT __restrict -#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 3, 0) && !defined(__cplusplus) -#define HEDLEY_RESTRICT _Restrict -#else -#define HEDLEY_RESTRICT -#endif - -#if defined(HEDLEY_INLINE) -#undef HEDLEY_INLINE -#endif -#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - (defined(__cplusplus) && (__cplusplus >= 199711L)) -#define HEDLEY_INLINE inline -#elif defined(HEDLEY_GCC_VERSION) || HEDLEY_ARM_VERSION_CHECK(6, 2, 0) -#define HEDLEY_INLINE __inline__ -#elif HEDLEY_MSVC_VERSION_CHECK(12, 0, 0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5, 1, 0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(3, 1, 0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6, 2, 0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8, 0, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) -#define HEDLEY_INLINE __inline -#else -#define HEDLEY_INLINE -#endif - -#if defined(HEDLEY_ALWAYS_INLINE) -#undef HEDLEY_ALWAYS_INLINE -#endif -#if HEDLEY_HAS_ATTRIBUTE(always_inline) || \ - HEDLEY_GCC_VERSION_CHECK(4, 0, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5, 11, 0) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \ - HEDLEY_IBM_VERSION_CHECK(10, 1, 0) || \ - HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4, 8, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6, 0, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6, 4, 0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4, 0, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7, 2, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7, 5, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) -#define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE -#elif HEDLEY_MSVC_VERSION_CHECK(12, 0, 0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) -#define HEDLEY_ALWAYS_INLINE __forceinline -#elif defined(__cplusplus) && (HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6, 4, 0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6, 1, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0)) -#define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8, 0, 0) -#define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") -#else -#define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE -#endif - -#if defined(HEDLEY_NEVER_INLINE) -#undef HEDLEY_NEVER_INLINE -#endif -#if HEDLEY_HAS_ATTRIBUTE(noinline) || HEDLEY_GCC_VERSION_CHECK(4, 0, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5, 11, 0) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \ - HEDLEY_IBM_VERSION_CHECK(10, 1, 0) || \ - HEDLEY_TI_VERSION_CHECK(15, 12, 0) || \ - (HEDLEY_TI_ARMCL_VERSION_CHECK(4, 8, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(5, 2, 0) || \ - (HEDLEY_TI_CL2000_VERSION_CHECK(6, 0, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(6, 4, 0) || \ - (HEDLEY_TI_CL430_VERSION_CHECK(4, 0, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL430_VERSION_CHECK(4, 3, 0) || \ - (HEDLEY_TI_CL6X_VERSION_CHECK(7, 2, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7, 5, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2, 1, 0) -#define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) -#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) -#define HEDLEY_NEVER_INLINE __declspec(noinline) -#elif HEDLEY_PGI_VERSION_CHECK(10, 2, 0) -#define HEDLEY_NEVER_INLINE _Pragma("noinline") -#elif HEDLEY_TI_CL6X_VERSION_CHECK(6, 0, 0) && defined(__cplusplus) -#define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") -#elif HEDLEY_IAR_VERSION_CHECK(8, 0, 0) -#define HEDLEY_NEVER_INLINE _Pragma("inline=never") -#elif HEDLEY_COMPCERT_VERSION_CHECK(3, 2, 0) -#define HEDLEY_NEVER_INLINE __attribute((noinline)) -#elif HEDLEY_PELLES_VERSION_CHECK(9, 0, 0) -#define HEDLEY_NEVER_INLINE __declspec(noinline) -#else -#define HEDLEY_NEVER_INLINE -#endif - -#if defined(HEDLEY_PRIVATE) -#undef HEDLEY_PRIVATE -#endif -#if defined(HEDLEY_PUBLIC) -#undef HEDLEY_PUBLIC -#endif -#if defined(HEDLEY_IMPORT) -#undef HEDLEY_IMPORT -#endif -#if defined(_WIN32) || defined(__CYGWIN__) -#define HEDLEY_PRIVATE -#define HEDLEY_PUBLIC __declspec(dllexport) -#define HEDLEY_IMPORT __declspec(dllimport) -#else -#if HEDLEY_HAS_ATTRIBUTE(visibility) || HEDLEY_GCC_VERSION_CHECK(3, 3, 0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5, 11, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \ - HEDLEY_IBM_VERSION_CHECK(13, 1, 0) || \ - (defined(__TI_EABI__) && ((HEDLEY_TI_CL6X_VERSION_CHECK(7, 2, 0) && \ - defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(7, 5, 0))) -#define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) -#define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) -#else -#define HEDLEY_PRIVATE -#define HEDLEY_PUBLIC -#endif -#define HEDLEY_IMPORT extern -#endif - -#if defined(HEDLEY_NO_THROW) -#undef HEDLEY_NO_THROW -#endif -#if HEDLEY_HAS_ATTRIBUTE(nothrow) || HEDLEY_GCC_VERSION_CHECK(3, 3, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) -#define HEDLEY_NO_THROW __attribute__((__nothrow__)) -#elif HEDLEY_MSVC_VERSION_CHECK(13, 1, 0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) -#define HEDLEY_NO_THROW __declspec(nothrow) -#else -#define HEDLEY_NO_THROW -#endif - -#if defined(HEDLEY_FALL_THROUGH) -#undef HEDLEY_FALL_THROUGH -#endif -#if HEDLEY_HAS_ATTRIBUTE(fallthrough) || HEDLEY_GCC_VERSION_CHECK(7, 0, 0) -#define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) -#elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang, fallthrough) -#define HEDLEY_FALL_THROUGH \ - HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) -#elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) -#define HEDLEY_FALL_THROUGH \ - HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) -#elif defined(__fallthrough) /* SAL */ -#define HEDLEY_FALL_THROUGH __fallthrough -#else -#define HEDLEY_FALL_THROUGH -#endif - -#if defined(HEDLEY_RETURNS_NON_NULL) -#undef HEDLEY_RETURNS_NON_NULL -#endif -#if HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || HEDLEY_GCC_VERSION_CHECK(4, 9, 0) -#define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) -#elif defined(_Ret_notnull_) /* SAL */ -#define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ -#else -#define HEDLEY_RETURNS_NON_NULL -#endif - -#if defined(HEDLEY_ARRAY_PARAM) -#undef HEDLEY_ARRAY_PARAM -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__STDC_NO_VLA__) && !defined(__cplusplus) && \ - !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_TINYC_VERSION) -#define HEDLEY_ARRAY_PARAM(name) (name) -#else -#define HEDLEY_ARRAY_PARAM(name) -#endif - -#if defined(HEDLEY_IS_CONSTANT) -#undef HEDLEY_IS_CONSTANT -#endif -#if defined(HEDLEY_REQUIRE_CONSTEXPR) -#undef HEDLEY_REQUIRE_CONSTEXPR -#endif -/* HEDLEY_IS_CONSTEXPR_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(HEDLEY_IS_CONSTEXPR_) -#undef HEDLEY_IS_CONSTEXPR_ -#endif -#if HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3, 4, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_TINYC_VERSION_CHECK(0, 9, 19) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \ - HEDLEY_IBM_VERSION_CHECK(13, 1, 0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6, 1, 0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5, 10, 0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8, 1, 0) -#define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) -#endif -#if !defined(__cplusplus) -#if HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ - HEDLEY_GCC_VERSION_CHECK(3, 4, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_IBM_VERSION_CHECK(13, 1, 0) || \ - HEDLEY_CRAY_VERSION_CHECK(8, 1, 0) || \ - HEDLEY_ARM_VERSION_CHECK(5, 4, 0) || \ - HEDLEY_TINYC_VERSION_CHECK(0, 9, 24) -#if defined(__INTPTR_TYPE__) -#define HEDLEY_IS_CONSTEXPR_(expr) \ - __builtin_types_compatible_p( \ - __typeof__((1 ? (void *)((__INTPTR_TYPE__)((expr)*0)) \ - : (int *)0)), \ - int *) -#else -#include -#define HEDLEY_IS_CONSTEXPR_(expr) \ - __builtin_types_compatible_p( \ - __typeof__((1 ? (void *)((intptr_t)((expr)*0)) : (int *)0)), \ - int *) -#endif -#elif (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ - !defined(HEDLEY_SUNPRO_VERSION) && !defined(HEDLEY_PGI_VERSION) && \ - !defined(HEDLEY_IAR_VERSION)) || \ - HEDLEY_HAS_EXTENSION(c_generic_selections) || \ - HEDLEY_GCC_VERSION_CHECK(4, 9, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(17, 0, 0) || \ - HEDLEY_IBM_VERSION_CHECK(12, 1, 0) || \ - HEDLEY_ARM_VERSION_CHECK(5, 3, 0) -#if defined(__INTPTR_TYPE__) -#define HEDLEY_IS_CONSTEXPR_(expr) \ - _Generic((1 ? (void *)((__INTPTR_TYPE__)((expr)*0)) : (int *)0), \ - int * : 1, void * : 0) -#else -#include -#define HEDLEY_IS_CONSTEXPR_(expr) \ - _Generic((1 ? (void *)((intptr_t)*0) : (int *)0), int * : 1, void * : 0) -#endif -#elif defined(HEDLEY_GCC_VERSION) || defined(HEDLEY_INTEL_VERSION) || \ - defined(HEDLEY_TINYC_VERSION) || defined(HEDLEY_TI_ARMCL_VERSION) || \ - HEDLEY_TI_CL430_VERSION_CHECK(18, 12, 0) || \ - defined(HEDLEY_TI_CL2000_VERSION) || \ - defined(HEDLEY_TI_CL6X_VERSION) || defined(HEDLEY_TI_CL7X_VERSION) || \ - defined(HEDLEY_TI_CLPRU_VERSION) || defined(__clang__) -#define HEDLEY_IS_CONSTEXPR_(expr) \ - (sizeof(void) != sizeof(*(1 ? ((void *)((expr)*0L)) : ((struct { \ - char v[sizeof(void) * 2]; \ - } *)1)))) -#endif -#endif -#if defined(HEDLEY_IS_CONSTEXPR_) -#if !defined(HEDLEY_IS_CONSTANT) -#define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) -#endif -#define HEDLEY_REQUIRE_CONSTEXPR(expr) \ - (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) -#else -#if !defined(HEDLEY_IS_CONSTANT) -#define HEDLEY_IS_CONSTANT(expr) (0) -#endif -#define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) -#endif - -#if defined(HEDLEY_BEGIN_C_DECLS) -#undef HEDLEY_BEGIN_C_DECLS -#endif -#if defined(HEDLEY_END_C_DECLS) -#undef HEDLEY_END_C_DECLS -#endif -#if defined(HEDLEY_C_DECL) -#undef HEDLEY_C_DECL -#endif -#if defined(__cplusplus) -#define HEDLEY_BEGIN_C_DECLS extern "C" { -#define HEDLEY_END_C_DECLS } -#define HEDLEY_C_DECL extern "C" -#else -#define HEDLEY_BEGIN_C_DECLS -#define HEDLEY_END_C_DECLS -#define HEDLEY_C_DECL -#endif - -#if defined(HEDLEY_STATIC_ASSERT) -#undef HEDLEY_STATIC_ASSERT -#endif -#if !defined(__cplusplus) && \ - ((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (HEDLEY_HAS_FEATURE(c_static_assert) && \ - !defined(HEDLEY_INTEL_CL_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(6, 0, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || defined(_Static_assert)) -#define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16, 0, 0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) -#define HEDLEY_STATIC_ASSERT(expr, message) \ - HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_( \ - static_assert(expr, message)) -#else -#define HEDLEY_STATIC_ASSERT(expr, message) -#endif - -#if defined(HEDLEY_NULL) -#undef HEDLEY_NULL -#endif -#if defined(__cplusplus) -#if __cplusplus >= 201103L -#define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) -#elif defined(NULL) -#define HEDLEY_NULL NULL -#else -#define HEDLEY_NULL HEDLEY_STATIC_CAST(void *, 0) -#endif -#elif defined(NULL) -#define HEDLEY_NULL NULL -#else -#define HEDLEY_NULL ((void *)0) -#endif - -#if defined(HEDLEY_MESSAGE) -#undef HEDLEY_MESSAGE -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -#define HEDLEY_MESSAGE(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(message msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif HEDLEY_GCC_VERSION_CHECK(4, 4, 0) || HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) -#define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) -#elif HEDLEY_CRAY_VERSION_CHECK(5, 0, 0) -#define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) -#elif HEDLEY_IAR_VERSION_CHECK(8, 0, 0) -#define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#elif HEDLEY_PELLES_VERSION_CHECK(2, 0, 0) -#define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) -#else -#define HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_WARNING) -#undef HEDLEY_WARNING -#endif -#if HEDLEY_HAS_WARNING("-Wunknown-pragmas") -#define HEDLEY_WARNING(msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - HEDLEY_PRAGMA(clang warning msg) \ - HEDLEY_DIAGNOSTIC_POP -#elif HEDLEY_GCC_VERSION_CHECK(4, 8, 0) || \ - HEDLEY_PGI_VERSION_CHECK(18, 4, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) -#define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) -#elif HEDLEY_MSVC_VERSION_CHECK(15, 0, 0) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) -#define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) -#else -#define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) -#endif - -#if defined(HEDLEY_REQUIRE) -#undef HEDLEY_REQUIRE -#endif -#if defined(HEDLEY_REQUIRE_MSG) -#undef HEDLEY_REQUIRE_MSG -#endif -#if HEDLEY_HAS_ATTRIBUTE(diagnose_if) -#if HEDLEY_HAS_WARNING("-Wgcc-compat") -#define HEDLEY_REQUIRE(expr) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -#define HEDLEY_REQUIRE_MSG(expr, msg) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), msg, "error"))) \ - HEDLEY_DIAGNOSTIC_POP -#else -#define HEDLEY_REQUIRE(expr) \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) -#define HEDLEY_REQUIRE_MSG(expr, msg) \ - __attribute__((diagnose_if(!(expr), msg, "error"))) -#endif -#else -#define HEDLEY_REQUIRE(expr) -#define HEDLEY_REQUIRE_MSG(expr, msg) -#endif - -#if defined(HEDLEY_FLAGS) -#undef HEDLEY_FLAGS -#endif -#if HEDLEY_HAS_ATTRIBUTE(flag_enum) -#define HEDLEY_FLAGS __attribute__((__flag_enum__)) -#else -#define HEDLEY_FLAGS -#endif - -#if defined(HEDLEY_FLAGS_CAST) -#undef HEDLEY_FLAGS_CAST -#endif -#if HEDLEY_INTEL_VERSION_CHECK(19, 0, 0) -#define HEDLEY_FLAGS_CAST(T, expr) \ - (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("warning(disable:188)")((T)(expr)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -#define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) -#endif - -#if defined(HEDLEY_EMPTY_BASES) -#undef HEDLEY_EMPTY_BASES -#endif -#if (HEDLEY_MSVC_VERSION_CHECK(19, 0, 23918) && \ - !HEDLEY_MSVC_VERSION_CHECK(20, 0, 0)) || \ - HEDLEY_INTEL_CL_VERSION_CHECK(2021, 1, 0) -#define HEDLEY_EMPTY_BASES __declspec(empty_bases) -#else -#define HEDLEY_EMPTY_BASES -#endif - -/* Remaining macros are deprecated. */ - -#if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) -#undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK -#endif -#if defined(__clang__) -#define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major, minor, patch) (0) -#else -#define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major, minor, patch) \ - HEDLEY_GCC_VERSION_CHECK(major, minor, patch) -#endif - -#if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) -#undef HEDLEY_CLANG_HAS_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) -#undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) \ - HEDLEY_HAS_CPP_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_BUILTIN) -#undef HEDLEY_CLANG_HAS_BUILTIN -#endif -#define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) - -#if defined(HEDLEY_CLANG_HAS_FEATURE) -#undef HEDLEY_CLANG_HAS_FEATURE -#endif -#define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) - -#if defined(HEDLEY_CLANG_HAS_EXTENSION) -#undef HEDLEY_CLANG_HAS_EXTENSION -#endif -#define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) - -#if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) -#undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE -#endif -#define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) \ - HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) - -#if defined(HEDLEY_CLANG_HAS_WARNING) -#undef HEDLEY_CLANG_HAS_WARNING -#endif -#define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) - -#endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ diff --git a/libobs/util/simde/simde-align.h b/libobs/util/simde/simde-align.h deleted file mode 100644 index 7f9b5eae54f85a..00000000000000 --- a/libobs/util/simde/simde-align.h +++ /dev/null @@ -1,481 +0,0 @@ -/* Alignment - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - ********************************************************************** - * - * This is portability layer which should help iron out some - * differences across various compilers, as well as various verisons of - * C and C++. - * - * It was originally developed for SIMD Everywhere - * (), but since its only - * dependency is Hedley (, also CC0) - * it can easily be used in other projects, so please feel free to do - * so. - * - * If you do use this in your project, please keep a link to SIMDe in - * your code to remind you where to report any bugs and/or check for - * updated versions. - * - * # API Overview - * - * The API has several parts, and most macros have a few variations. - * There are APIs for declaring aligned fields/variables, optimization - * hints, and run-time alignment checks. - * - * Briefly, macros ending with "_TO" take numeric values and are great - * when you know the value you would like to use. Macros ending with - * "_LIKE", on the other hand, accept a type and are used when you want - * to use the alignment of a type instead of hardcoding a value. - * - * Documentation for each section of the API is inline. - * - * True to form, MSVC is the main problem and imposes several - * limitations on the effectiveness of the APIs. Detailed descriptions - * of the limitations of each macro are inline, but in general: - * - * * On C11+ or C++11+ code written using this API will work. The - * ASSUME macros may or may not generate a hint to the compiler, but - * that is only an optimization issue and will not actually cause - * failures. - * * If you're using pretty much any compiler other than MSVC, - * everything should basically work as well as in C11/C++11. - */ - -#if !defined(SIMDE_ALIGN_H) -#define SIMDE_ALIGN_H - -#include "hedley.h" - -/* I know this seems a little silly, but some non-hosted compilers - * don't have stddef.h, so we try to accomodate them. */ -#if !defined(SIMDE_ALIGN_SIZE_T_) -#if defined(__SIZE_TYPE__) -#define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ -#elif defined(__SIZE_T_TYPE__) -#define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ -#elif defined(__cplusplus) -#include -#define SIMDE_ALIGN_SIZE_T_ size_t -#else -#include -#define SIMDE_ALIGN_SIZE_T_ size_t -#endif -#endif - -#if !defined(SIMDE_ALIGN_INTPTR_T_) -#if defined(__INTPTR_TYPE__) -#define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ -#elif defined(__PTRDIFF_TYPE__) -#define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ -#elif defined(__PTRDIFF_T_TYPE__) -#define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ -#elif defined(__cplusplus) -#include -#define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t -#else -#include -#define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t -#endif -#endif - -#if defined(SIMDE_ALIGN_DEBUG) -#if defined(__cplusplus) -#include -#else -#include -#endif -#endif - -/* SIMDE_ALIGN_OF(Type) - * - * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or - * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. - * It isn't defined everywhere (only when the compiler has some alignof- - * like feature we can use to implement it), but it should work in most - * modern compilers, as well as C11 and C++11. - * - * If we can't find an implementation for SIMDE_ALIGN_OF then the macro - * will not be defined, so if you can handle that situation sensibly - * you may need to sprinkle some ifdefs into your code. - */ -#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (0 && HEDLEY_HAS_FEATURE(c_alignof)) -#define SIMDE_ALIGN_OF(Type) _Alignof(Type) -#elif (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) -#define SIMDE_ALIGN_OF(Type) alignof(Type) -#elif HEDLEY_GCC_VERSION_CHECK(2, 95, 0) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_SUNPRO_VERSION_CHECK(5, 13, 0) || \ - HEDLEY_TINYC_VERSION_CHECK(0, 9, 24) || \ - HEDLEY_PGI_VERSION_CHECK(19, 10, 0) || \ - HEDLEY_CRAY_VERSION_CHECK(10, 0, 0) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16, 9, 0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16, 9, 0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8, 0, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16, 9, 0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2, 3, 2) || defined(__IBM__ALIGNOF__) || \ - defined(__clang__) -#define SIMDE_ALIGN_OF(Type) __alignof__(Type) -#elif HEDLEY_IAR_VERSION_CHECK(8, 40, 0) -#define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) -#elif HEDLEY_MSVC_VERSION_CHECK(19, 0, 0) -/* Probably goes back much further, but MS takes down their old docs. - * If you can verify that this works in earlier versions please let - * me know! */ -#define SIMDE_ALIGN_OF(Type) __alignof(Type) -#endif - -/* SIMDE_ALIGN_MAXIMUM: - * - * This is the maximum alignment that the compiler supports. You can - * define the value prior to including SIMDe if necessary, but in that - * case *please* submit an issue so we can add the platform to the - * detection code. - * - * Most compilers are okay with types which are aligned beyond what - * they think is the maximum, as long as the alignment is a power - * of two. MSVC is the exception (of course), so we need to cap the - * alignment requests at values that the implementation supports. - * - * XL C/C++ will accept values larger than 16 (which is the alignment - * of an AltiVec vector), but will not reliably align to the larger - * value, so so we cap the value at 16 there. - * - * If the compiler accepts any power-of-two value within reason then - * this macro should be left undefined, and the SIMDE_ALIGN_CAP - * macro will just return the value passed to it. */ -#if !defined(SIMDE_ALIGN_MAXIMUM) -#if defined(HEDLEY_MSVC_VERSION) -#if defined(_M_IX86) || defined(_M_AMD64) -#if HEDLEY_MSVC_VERSION_CHECK(19, 14, 0) -#define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 -#elif HEDLEY_MSVC_VERSION_CHECK(16, 0, 0) -/* VS 2010 is really a guess based on Wikipedia; if anyone can - * test with old VS versions I'd really appreciate it. */ -#define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 -#else -#define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 -#endif -#elif defined(_M_ARM) || defined(_M_ARM64) -#define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 -#endif -#elif defined(HEDLEY_IBM_VERSION) -#define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 -#endif -#endif - -/* You can mostly ignore these; they're intended for internal use. - * If you do need to use them please let me know; if they fulfill - * a common use case I'll probably drop the trailing underscore - * and make them part of the public API. */ -#if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) -#if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 -#define SIMDE_ALIGN_64_ 64 -#define SIMDE_ALIGN_32_ 32 -#define SIMDE_ALIGN_16_ 16 -#define SIMDE_ALIGN_8_ 8 -#elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 -#define SIMDE_ALIGN_64_ 32 -#define SIMDE_ALIGN_32_ 32 -#define SIMDE_ALIGN_16_ 16 -#define SIMDE_ALIGN_8_ 8 -#elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 -#define SIMDE_ALIGN_64_ 16 -#define SIMDE_ALIGN_32_ 16 -#define SIMDE_ALIGN_16_ 16 -#define SIMDE_ALIGN_8_ 8 -#elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 -#define SIMDE_ALIGN_64_ 8 -#define SIMDE_ALIGN_32_ 8 -#define SIMDE_ALIGN_16_ 8 -#define SIMDE_ALIGN_8_ 8 -#else -#error Max alignment expected to be >= 8 -#endif -#else -#define SIMDE_ALIGN_64_ 64 -#define SIMDE_ALIGN_32_ 32 -#define SIMDE_ALIGN_16_ 16 -#define SIMDE_ALIGN_8_ 8 -#endif - -/** - * SIMDE_ALIGN_CAP(Alignment) - * - * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. - */ -#if defined(SIMDE_ALIGN_MAXIMUM) -#define SIMDE_ALIGN_CAP(Alignment) \ - (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) \ - ? (Alignment) \ - : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) -#else -#define SIMDE_ALIGN_CAP(Alignment) (Alignment) -#endif - -/* SIMDE_ALIGN_TO(Alignment) - * - * SIMDE_ALIGN_TO is used to declare types or variables. It basically - * maps to the align attribute in most compilers, the align declspec - * in MSVC, or _Alignas/alignas in C11/C++11. - * - * Example: - * - * struct i32x4 { - * SIMDE_ALIGN_TO(16) int32_t values[4]; - * } - * - * Limitations: - * - * MSVC requires that the Alignment parameter be numeric; you can't do - * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is - * unfortunate because that's really how the LIKE macros are - * implemented, and I am not aware of a way to get anything like this - * to work without using the C11/C++11 keywords. - * - * It also means that we can't use SIMDE_ALIGN_CAP to limit the - * alignment to the value specified, which MSVC also requires, so on - * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. - * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, - * but should be safe to use on MSVC. - * - * All this is to say that, if you want your code to work on MSVC, you - * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of - * SIMDE_ALIGN_TO(8/16/32/64). - */ -#if HEDLEY_HAS_ATTRIBUTE(aligned) || HEDLEY_GCC_VERSION_CHECK(2, 95, 0) || \ - HEDLEY_CRAY_VERSION_CHECK(8, 4, 0) || \ - HEDLEY_IBM_VERSION_CHECK(11, 1, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_PGI_VERSION_CHECK(19, 4, 0) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \ - HEDLEY_TINYC_VERSION_CHECK(0, 9, 24) || \ - HEDLEY_TI_ARMCL_VERSION_CHECK(16, 9, 0) || \ - HEDLEY_TI_CL2000_VERSION_CHECK(16, 9, 0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8, 0, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \ - HEDLEY_TI_CL430_VERSION_CHECK(16, 9, 0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2, 3, 2) -#define SIMDE_ALIGN_TO(Alignment) \ - __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) -#elif (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) -#define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif (defined(__cplusplus) && (__cplusplus >= 201103L)) -#define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) -#elif defined(HEDLEY_MSVC_VERSION) -#define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) -/* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); - * the alignment passed to the declspec has to be an integer. */ -#define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE -#endif -#define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) -#define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) -#define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) -#define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) - -/* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) - * - * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's - * std::assume_aligned, or __builtin_assume_aligned. It tells the - * compiler to assume that the provided pointer is aligned to an - * `Alignment`-byte boundary. - * - * If you define SIMDE_ALIGN_DEBUG prior to including this header then - * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't - * integrate with NDEBUG in this header, but it may be a good idea to - * put something like this in your code: - * - * #if !defined(NDEBUG) - * #define SIMDE_ALIGN_DEBUG - * #endif - * #include <.../simde-align.h> - */ -#if HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ - HEDLEY_GCC_VERSION_CHECK(4, 7, 0) -#define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - HEDLEY_REINTERPRET_CAST( \ - __typeof__(Pointer), \ - __builtin_assume_aligned( \ - HEDLEY_CONST_CAST( \ - void *, HEDLEY_REINTERPRET_CAST(const void *, \ - Pointer)), \ - Alignment)) -#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) -#define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - (__extension__({ \ - __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ - __assume_aligned(simde_assume_aligned_t_, Alignment); \ - simde_assume_aligned_t_; \ - })) -#elif defined(__cplusplus) && (__cplusplus > 201703L) -#include -#define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - std::assume_aligned(Pointer) -#else -#if defined(__cplusplus) -template -HEDLEY_ALWAYS_INLINE static T * -simde_align_assume_to_unchecked(T *ptr, const size_t alignment) -#else -HEDLEY_ALWAYS_INLINE static void * -simde_align_assume_to_unchecked(void *ptr, const size_t alignment) -#endif -{ - HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % - SIMDE_ALIGN_CAP(alignment)) == 0); - return ptr; -} -#if defined(__cplusplus) -#define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - simde_align_assume_to_unchecked((Pointer), (Alignment)) -#else -#define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ - simde_align_assume_to_unchecked( \ - HEDLEY_CONST_CAST(void *, HEDLEY_REINTERPRET_CAST( \ - const void *, Pointer)), \ - (Alignment)) -#endif -#endif - -#if !defined(SIMDE_ALIGN_DEBUG) -#define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) \ - SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) -#else -#include -#if defined(__cplusplus) -template -static HEDLEY_ALWAYS_INLINE T * -simde_align_assume_to_checked_uncapped(T *ptr, const size_t alignment, - const char *file, int line, - const char *ptrname) -#else -static HEDLEY_ALWAYS_INLINE void * -simde_align_assume_to_checked_uncapped(void *ptr, const size_t alignment, - const char *file, int line, - const char *ptrname) -#endif -{ - if (HEDLEY_UNLIKELY( - (HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % - HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, - SIMDE_ALIGN_CAP(alignment))) != 0)) { - fprintf(stderr, - "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", - file, line, ptrname, - HEDLEY_REINTERPRET_CAST(const void *, ptr), - HEDLEY_STATIC_CAST(unsigned int, - SIMDE_ALIGN_CAP(alignment)), - HEDLEY_STATIC_CAST( - unsigned int, - HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, - (ptr)) % - HEDLEY_STATIC_CAST( - SIMDE_ALIGN_INTPTR_T_, - SIMDE_ALIGN_CAP(alignment)))); - } - - return ptr; -} - -#if defined(__cplusplus) -#define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) \ - simde_align_assume_to_checked_uncapped((Pointer), (Alignment), \ - __FILE__, __LINE__, #Pointer) -#else -#define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) \ - simde_align_assume_to_checked_uncapped( \ - HEDLEY_CONST_CAST(void *, HEDLEY_REINTERPRET_CAST( \ - const void *, Pointer)), \ - (Alignment), __FILE__, __LINE__, #Pointer) -#endif -#endif - -/* SIMDE_ALIGN_LIKE(Type) - * SIMDE_ALIGN_LIKE_#(Type) - * - * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros - * except instead of an integer they take a type; basically, it's just - * a more convenient way to do something like: - * - * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) - * - * The versions with a numeric suffix will fall back on using a numeric - * value in the event we can't use SIMDE_ALIGN_OF(Type). This is - * mainly for MSVC, where __declspec(align()) can't handle anything - * other than hard-coded numeric values. - */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && \ - !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) -#define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) -#define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) -#define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) -#define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) -#define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) -#else -#define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 -#define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 -#define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 -#define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 -#endif - -/* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) - * - * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a - * type instead of a numeric value. */ -#if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) -#define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) \ - SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) -#endif - -/* SIMDE_ALIGN_CAST(Type, Pointer) - * - * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try - * to silence warnings that some compilers may produce if you try - * to assign to a type with increased alignment requirements. - * - * Note that it does *not* actually attempt to tell the compiler that - * the pointer is aligned like the destination should be; that's the - * job of the next macro. This macro is necessary for stupid APIs - * like _mm_loadu_si128 where the input is a __m128i* but the function - * is specifically for data which isn't necessarily aligned to - * _Alignof(__m128i). - */ -#if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || \ - HEDLEY_GCC_VERSION_CHECK(3, 4, 0) -#define SIMDE_ALIGN_CAST(Type, Pointer) \ - (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ - Type simde_r_ = \ - HEDLEY_REINTERPRET_CAST(Type, Pointer); \ - HEDLEY_DIAGNOSTIC_POP \ - simde_r_; \ - })) -#else -#define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) -#endif - -/* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) - * - * This is sort of like a combination of a reinterpret_cast and a - * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell - * the compiler that the pointer is aligned like the specified type - * and casts the pointer to the specified type while suppressing any - * warnings from the compiler about casting to a type with greater - * alignment requirements. - */ -#define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) \ - SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) - -#endif /* !defined(SIMDE_ALIGN_H) */ diff --git a/libobs/util/simde/simde-arch.h b/libobs/util/simde/simde-arch.h deleted file mode 100644 index afafeba723d438..00000000000000 --- a/libobs/util/simde/simde-arch.h +++ /dev/null @@ -1,537 +0,0 @@ -/* Architecture detection - * Created by Evan Nemerson - * - * To the extent possible under law, the authors have waived all - * copyright and related or neighboring rights to this code. For - * details, see the Creative Commons Zero 1.0 Universal license at - * - * - * SPDX-License-Identifier: CC0-1.0 - * - * Different compilers define different preprocessor macros for the - * same architecture. This is an attempt to provide a single - * interface which is usable on any compiler. - * - * In general, a macro named SIMDE_ARCH_* is defined for each - * architecture the CPU supports. When there are multiple possible - * versions, we try to define the macro to the target version. For - * example, if you want to check for i586+, you could do something - * like: - * - * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) - * ... - * #endif - * - * You could also just check that SIMDE_ARCH_X86 >= 5 without checking - * if it's defined first, but some compilers may emit a warning about - * an undefined macro being used (e.g., GCC with -Wundef). - * - * This was originally created for SIMDe - * (hence the prefix), but this - * header has no dependencies and may be used anywhere. It is - * originally based on information from - * , though it - * has been enhanced with additional information. - * - * If you improve this file, or find a bug, please file the issue at - * . If you copy this into - * your project, even if you change the prefix, please keep the links - * to SIMDe intact so others know where to report issues, submit - * enhancements, and find the latest version. */ - -#if !defined(SIMDE_ARCH_H) -#define SIMDE_ARCH_H - -/* Alpha - */ -#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) -#if defined(__alpha_ev6__) -#define SIMDE_ARCH_ALPHA 6 -#elif defined(__alpha_ev5__) -#define SIMDE_ARCH_ALPHA 5 -#elif defined(__alpha_ev4__) -#define SIMDE_ARCH_ALPHA 4 -#else -#define SIMDE_ARCH_ALPHA 1 -#endif -#endif -#if defined(SIMDE_ARCH_ALPHA) -#define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) -#else -#define SIMDE_ARCH_ALPHA_CHECK(version) (0) -#endif - -/* Atmel AVR - */ -#if defined(__AVR_ARCH__) -#define SIMDE_ARCH_AVR __AVR_ARCH__ -#endif - -/* AMD64 / x86_64 - */ -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || \ - defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) -#define SIMDE_ARCH_AMD64 1000 -#endif - -/* ARM - */ -#if defined(__ARM_ARCH_8A__) -#define SIMDE_ARCH_ARM 82 -#elif defined(__ARM_ARCH_8R__) -#define SIMDE_ARCH_ARM 81 -#elif defined(__ARM_ARCH_8__) -#define SIMDE_ARCH_ARM 80 -#elif defined(__ARM_ARCH_7S__) -#define SIMDE_ARCH_ARM 74 -#elif defined(__ARM_ARCH_7M__) -#define SIMDE_ARCH_ARM 73 -#elif defined(__ARM_ARCH_7R__) -#define SIMDE_ARCH_ARM 72 -#elif defined(__ARM_ARCH_7A__) -#define SIMDE_ARCH_ARM 71 -#elif defined(__ARM_ARCH_7__) -#define SIMDE_ARCH_ARM 70 -#elif defined(__ARM_ARCH) -#define SIMDE_ARCH_ARM (__ARM_ARCH * 10) -#elif defined(_M_ARM) -#define SIMDE_ARCH_ARM (_M_ARM * 10) -#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || \ - defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) -#define SIMDE_ARCH_ARM 1 -#endif -#if defined(SIMDE_ARCH_ARM) -#define SIMDE_ARCH_ARM_CHECK(version) ((version) <= SIMDE_ARCH_ARM) -#else -#define SIMDE_ARCH_ARM_CHECK(version) (0) -#endif - -/* AArch64 - */ -#if defined(__aarch64__) || defined(_M_ARM64) -#define SIMDE_ARCH_AARCH64 1000 -#endif -#if defined(SIMDE_ARCH_AARCH64) -#define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) -#else -#define SIMDE_ARCH_AARCH64_CHECK(version) (0) -#endif - -/* ARM SIMD ISA extensions */ -#if defined(__ARM_NEON) -#if defined(SIMDE_ARCH_AARCH64) -#define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 -#elif defined(SIMDE_ARCH_ARM) -#define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM -#endif -#endif -#if defined(__ARM_FEATURE_SVE) -#define SIMDE_ARCH_ARM_SVE -#endif - -/* Blackfin - */ -#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) -#define SIMDE_ARCH_BLACKFIN 1 -#endif - -/* CRIS - */ -#if defined(__CRIS_arch_version) -#define SIMDE_ARCH_CRIS __CRIS_arch_version -#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || \ - defined(__CRIS__) -#define SIMDE_ARCH_CRIS 1 -#endif - -/* Convex - */ -#if defined(__convex_c38__) -#define SIMDE_ARCH_CONVEX 38 -#elif defined(__convex_c34__) -#define SIMDE_ARCH_CONVEX 34 -#elif defined(__convex_c32__) -#define SIMDE_ARCH_CONVEX 32 -#elif defined(__convex_c2__) -#define SIMDE_ARCH_CONVEX 2 -#elif defined(__convex__) -#define SIMDE_ARCH_CONVEX 1 -#endif -#if defined(SIMDE_ARCH_CONVEX) -#define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) -#else -#define SIMDE_ARCH_CONVEX_CHECK(version) (0) -#endif - -/* Adapteva Epiphany - */ -#if defined(__epiphany__) -#define SIMDE_ARCH_EPIPHANY 1 -#endif - -/* Fujitsu FR-V - */ -#if defined(__frv__) -#define SIMDE_ARCH_FRV 1 -#endif - -/* H8/300 - */ -#if defined(__H8300__) -#define SIMDE_ARCH_H8300 -#endif - -/* HP/PA / PA-RISC - */ -#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || \ - defined(_PA_RISC2_0) -#define SIMDE_ARCH_HPPA 20 -#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) -#define SIMDE_ARCH_HPPA 11 -#elif defined(_PA_RISC1_0) -#define SIMDE_ARCH_HPPA 10 -#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) -#define SIMDE_ARCH_HPPA 1 -#endif -#if defined(SIMDE_ARCH_HPPA) -#define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) -#else -#define SIMDE_ARCH_HPPA_CHECK(version) (0) -#endif - -/* x86 - */ -#if defined(_M_IX86) -#define SIMDE_ARCH_X86 (_M_IX86 / 100) -#elif defined(__I86__) -#define SIMDE_ARCH_X86 __I86__ -#elif defined(i686) || defined(__i686) || defined(__i686__) -#define SIMDE_ARCH_X86 6 -#elif defined(i586) || defined(__i586) || defined(__i586__) -#define SIMDE_ARCH_X86 5 -#elif defined(i486) || defined(__i486) || defined(__i486__) -#define SIMDE_ARCH_X86 4 -#elif defined(i386) || defined(__i386) || defined(__i386__) -#define SIMDE_ARCH_X86 3 -#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) -#define SIMDE_ARCH_X86 3 -#endif -#if defined(SIMDE_ARCH_X86) -#define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) -#else -#define SIMDE_ARCH_X86_CHECK(version) (0) -#endif - -/* SIMD ISA extensions for x86/x86_64 */ -#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -#if defined(_M_IX86_FP) -#define SIMDE_ARCH_X86_MMX -#if (_M_IX86_FP >= 1) -#define SIMDE_ARCH_X86_SSE 1 -#endif -#if (_M_IX86_FP >= 2) -#define SIMDE_ARCH_X86_SSE2 1 -#endif -#elif defined(_M_X64) -#define SIMDE_ARCH_X86_SSE 1 -#define SIMDE_ARCH_X86_SSE2 1 -#else -#if defined(__MMX__) -#define SIMDE_ARCH_X86_MMX 1 -#endif -#if defined(__SSE__) -#define SIMDE_ARCH_X86_SSE 1 -#endif -#if defined(__SSE2__) -#define SIMDE_ARCH_X86_SSE2 1 -#endif -#endif -#if defined(__SSE3__) -#define SIMDE_ARCH_X86_SSE3 1 -#endif -#if defined(__SSSE3__) -#define SIMDE_ARCH_X86_SSSE3 1 -#endif -#if defined(__SSE4_1__) -#define SIMDE_ARCH_X86_SSE4_1 1 -#endif -#if defined(__SSE4_2__) -#define SIMDE_ARCH_X86_SSE4_2 1 -#endif -#if defined(__AVX__) -#define SIMDE_ARCH_X86_AVX 1 -#if !defined(SIMDE_ARCH_X86_SSE3) -#define SIMDE_ARCH_X86_SSE3 1 -#endif -#if !defined(SIMDE_ARCH_X86_SSE4_1) -#define SIMDE_ARCH_X86_SSE4_1 1 -#endif -#if !defined(SIMDE_ARCH_X86_SSE4_1) -#define SIMDE_ARCH_X86_SSE4_2 1 -#endif -#endif -#if defined(__AVX2__) -#define SIMDE_ARCH_X86_AVX2 1 -#endif -#if defined(__FMA__) -#define SIMDE_ARCH_X86_FMA 1 -#if !defined(SIMDE_ARCH_X86_AVX) -#define SIMDE_ARCH_X86_AVX 1 -#endif -#endif -#if defined(__AVX512VP2INTERSECT__) -#define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 -#endif -#if defined(__AVX512VBMI__) -#define SIMDE_ARCH_X86_AVX512VBMI 1 -#endif -#if defined(__AVX512BW__) -#define SIMDE_ARCH_X86_AVX512BW 1 -#endif -#if defined(__AVX512CD__) -#define SIMDE_ARCH_X86_AVX512CD 1 -#endif -#if defined(__AVX512DQ__) -#define SIMDE_ARCH_X86_AVX512DQ 1 -#endif -#if defined(__AVX512F__) -#define SIMDE_ARCH_X86_AVX512F 1 -#endif -#if defined(__AVX512VL__) -#define SIMDE_ARCH_X86_AVX512VL 1 -#endif -#if defined(__GFNI__) -#define SIMDE_ARCH_X86_GFNI 1 -#endif -#if defined(__PCLMUL__) -#define SIMDE_ARCH_X86_PCLMUL 1 -#endif -#if defined(__VPCLMULQDQ__) -#define SIMDE_ARCH_X86_VPCLMULQDQ 1 -#endif -#endif - -/* Itanium - */ -#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || \ - defined(__ia64) || defined(_M_IA64) || defined(__itanium__) -#define SIMDE_ARCH_IA64 1 -#endif - -/* Renesas M32R - */ -#if defined(__m32r__) || defined(__M32R__) -#define SIMDE_ARCH_M32R -#endif - -/* Motorola 68000 - */ -#if defined(__mc68060__) || defined(__MC68060__) -#define SIMDE_ARCH_M68K 68060 -#elif defined(__mc68040__) || defined(__MC68040__) -#define SIMDE_ARCH_M68K 68040 -#elif defined(__mc68030__) || defined(__MC68030__) -#define SIMDE_ARCH_M68K 68030 -#elif defined(__mc68020__) || defined(__MC68020__) -#define SIMDE_ARCH_M68K 68020 -#elif defined(__mc68010__) || defined(__MC68010__) -#define SIMDE_ARCH_M68K 68010 -#elif defined(__mc68000__) || defined(__MC68000__) -#define SIMDE_ARCH_M68K 68000 -#endif -#if defined(SIMDE_ARCH_M68K) -#define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) -#else -#define SIMDE_ARCH_M68K_CHECK(version) (0) -#endif - -/* Xilinx MicroBlaze - */ -#if defined(__MICROBLAZE__) || defined(__microblaze__) -#define SIMDE_ARCH_MICROBLAZE -#endif - -/* MIPS - */ -#if defined(_MIPS_ISA_MIPS64R2) -#define SIMDE_ARCH_MIPS 642 -#elif defined(_MIPS_ISA_MIPS64) -#define SIMDE_ARCH_MIPS 640 -#elif defined(_MIPS_ISA_MIPS32R2) -#define SIMDE_ARCH_MIPS 322 -#elif defined(_MIPS_ISA_MIPS32) -#define SIMDE_ARCH_MIPS 320 -#elif defined(_MIPS_ISA_MIPS4) -#define SIMDE_ARCH_MIPS 4 -#elif defined(_MIPS_ISA_MIPS3) -#define SIMDE_ARCH_MIPS 3 -#elif defined(_MIPS_ISA_MIPS2) -#define SIMDE_ARCH_MIPS 2 -#elif defined(_MIPS_ISA_MIPS1) -#define SIMDE_ARCH_MIPS 1 -#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) -#define SIMDE_ARCH_MIPS 1 -#endif -#if defined(SIMDE_ARCH_MIPS) -#define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) -#else -#define SIMDE_ARCH_MIPS_CHECK(version) (0) -#endif - -#if defined(__mips_loongson_mmi) -#define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 -#endif - -/* Matsushita MN10300 - */ -#if defined(__MN10300__) || defined(__mn10300__) -#define SIMDE_ARCH_MN10300 1 -#endif - -/* POWER - */ -#if defined(_M_PPC) -#define SIMDE_ARCH_POWER _M_PPC -#elif defined(_ARCH_PWR9) -#define SIMDE_ARCH_POWER 900 -#elif defined(_ARCH_PWR8) -#define SIMDE_ARCH_POWER 800 -#elif defined(_ARCH_PWR7) -#define SIMDE_ARCH_POWER 700 -#elif defined(_ARCH_PWR6) -#define SIMDE_ARCH_POWER 600 -#elif defined(_ARCH_PWR5) -#define SIMDE_ARCH_POWER 500 -#elif defined(_ARCH_PWR4) -#define SIMDE_ARCH_POWER 400 -#elif defined(_ARCH_440) || defined(__ppc440__) -#define SIMDE_ARCH_POWER 440 -#elif defined(_ARCH_450) || defined(__ppc450__) -#define SIMDE_ARCH_POWER 450 -#elif defined(_ARCH_601) || defined(__ppc601__) -#define SIMDE_ARCH_POWER 601 -#elif defined(_ARCH_603) || defined(__ppc603__) -#define SIMDE_ARCH_POWER 603 -#elif defined(_ARCH_604) || defined(__ppc604__) -#define SIMDE_ARCH_POWER 604 -#elif defined(_ARCH_605) || defined(__ppc605__) -#define SIMDE_ARCH_POWER 605 -#elif defined(_ARCH_620) || defined(__ppc620__) -#define SIMDE_ARCH_POWER 620 -#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || \ - defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || \ - defined(__ppc) -#define SIMDE_ARCH_POWER 1 -#endif -#if defined(SIMDE_ARCH_POWER) -#define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else -#define SIMDE_ARCH_POWER_CHECK(version) (0) -#endif - -#if defined(__ALTIVEC__) -#define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER -#endif -#if defined(SIMDE_ARCH_POWER) -#define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) -#else -#define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) -#endif - -/* SPARC - */ -#if defined(__sparc_v9__) || defined(__sparcv9) -#define SIMDE_ARCH_SPARC 9 -#elif defined(__sparc_v8__) || defined(__sparcv8) -#define SIMDE_ARCH_SPARC 8 -#elif defined(__sparc_v7__) || defined(__sparcv7) -#define SIMDE_ARCH_SPARC 7 -#elif defined(__sparc_v6__) || defined(__sparcv6) -#define SIMDE_ARCH_SPARC 6 -#elif defined(__sparc_v5__) || defined(__sparcv5) -#define SIMDE_ARCH_SPARC 5 -#elif defined(__sparc_v4__) || defined(__sparcv4) -#define SIMDE_ARCH_SPARC 4 -#elif defined(__sparc_v3__) || defined(__sparcv3) -#define SIMDE_ARCH_SPARC 3 -#elif defined(__sparc_v2__) || defined(__sparcv2) -#define SIMDE_ARCH_SPARC 2 -#elif defined(__sparc_v1__) || defined(__sparcv1) -#define SIMDE_ARCH_SPARC 1 -#elif defined(__sparc__) || defined(__sparc) -#define SIMDE_ARCH_SPARC 1 -#endif -#if defined(SIMDE_ARCH_SPARC) -#define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) -#else -#define SIMDE_ARCH_SPARC_CHECK(version) (0) -#endif - -/* SuperH - */ -#if defined(__sh5__) || defined(__SH5__) -#define SIMDE_ARCH_SUPERH 5 -#elif defined(__sh4__) || defined(__SH4__) -#define SIMDE_ARCH_SUPERH 4 -#elif defined(__sh3__) || defined(__SH3__) -#define SIMDE_ARCH_SUPERH 3 -#elif defined(__sh2__) || defined(__SH2__) -#define SIMDE_ARCH_SUPERH 2 -#elif defined(__sh1__) || defined(__SH1__) -#define SIMDE_ARCH_SUPERH 1 -#elif defined(__sh__) || defined(__SH__) -#define SIMDE_ARCH_SUPERH 1 -#endif - -/* IBM System z - */ -#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || \ - defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) -#define SIMDE_ARCH_SYSTEMZ -#endif - -/* TMS320 DSP - */ -#if defined(_TMS320C6740) || defined(__TMS320C6740__) -#define SIMDE_ARCH_TMS320 6740 -#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) -#define SIMDE_ARCH_TMS320 6701 -#elif defined(_TMS320C6700) || defined(__TMS320C6700__) -#define SIMDE_ARCH_TMS320 6700 -#elif defined(_TMS320C6600) || defined(__TMS320C6600__) -#define SIMDE_ARCH_TMS320 6600 -#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) -#define SIMDE_ARCH_TMS320 6401 -#elif defined(_TMS320C6400) || defined(__TMS320C6400__) -#define SIMDE_ARCH_TMS320 6400 -#elif defined(_TMS320C6200) || defined(__TMS320C6200__) -#define SIMDE_ARCH_TMS320 6200 -#elif defined(_TMS320C55X) || defined(__TMS320C55X__) -#define SIMDE_ARCH_TMS320 550 -#elif defined(_TMS320C54X) || defined(__TMS320C54X__) -#define SIMDE_ARCH_TMS320 540 -#elif defined(_TMS320C28X) || defined(__TMS320C28X__) -#define SIMDE_ARCH_TMS320 280 -#endif -#if defined(SIMDE_ARCH_TMS320) -#define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) -#else -#define SIMDE_ARCH_TMS320_CHECK(version) (0) -#endif - -/* WebAssembly */ -#if defined(__wasm__) -#define SIMDE_ARCH_WASM 1 -#endif - -#if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) -#define SIMDE_ARCH_WASM_SIMD128 -#endif - -/* Xtensa - */ -#if defined(__xtensa__) || defined(__XTENSA__) -#define SIMDE_ARCH_XTENSA 1 -#endif - -#endif /* !defined(SIMDE_ARCH_H) */ diff --git a/libobs/util/simde/simde-common.h b/libobs/util/simde/simde-common.h deleted file mode 100644 index 42ef56893522b7..00000000000000 --- a/libobs/util/simde/simde-common.h +++ /dev/null @@ -1,918 +0,0 @@ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_COMMON_H) -#define SIMDE_COMMON_H - -#include "hedley.h" - -#define SIMDE_VERSION_MAJOR 0 -#define SIMDE_VERSION_MINOR 7 -#define SIMDE_VERSION_MICRO 1 -#define SIMDE_VERSION \ - HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, \ - SIMDE_VERSION_MICRO) - -#include -#include - -#include "simde-detect-clang.h" -#include "simde-arch.h" -#include "simde-features.h" -#include "simde-diagnostic.h" -#include "simde-math.h" -#include "simde-constify.h" -#include "simde-align.h" - -/* In some situations, SIMDe has to make large performance sacrifices - * for small increases in how faithfully it reproduces an API, but - * only a relatively small number of users will actually need the API - * to be completely accurate. The SIMDE_FAST_* options can be used to - * disable these trade-offs. - * - * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or - * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to - * enable some optimizations. Using -ffast-math and/or - * -ffinite-math-only will also enable the relevant options. If you - * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ - -/* Most programs avoid NaNs by never passing values which can result in - * a NaN; for example, if you only pass non-negative values to the sqrt - * functions, it won't generate a NaN. On some platforms, similar - * functions handle NaNs differently; for example, the _mm_min_ps SSE - * function will return 0.0 if you pass it (0.0, NaN), but the NEON - * vminq_f32 function will return NaN. Making them behave like one - * another is expensive; it requires generating a mask of all lanes - * with NaNs, then performing the operation (e.g., vminq_f32), then - * blending together the result with another vector using the mask. - * - * If you don't want SIMDe to worry about the differences between how - * NaNs are handled on the two platforms, define this (or pass - * -ffinite-math-only) */ -#if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && \ - defined(__FAST_MATH__) -#define SIMDE_FAST_MATH -#endif - -#if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) -#if defined(SIMDE_FAST_MATH) -#define SIMDE_FAST_NANS -#elif defined(__FINITE_MATH_ONLY__) -#if __FINITE_MATH_ONLY__ -#define SIMDE_FAST_NANS -#endif -#endif -#endif - -/* Many functions are defined as using the current rounding mode - * (i.e., the SIMD version of fegetround()) when converting to - * an integer. For example, _mm_cvtpd_epi32. Unfortunately, - * on some platforms (such as ARMv8+ where round-to-nearest is - * always used, regardless of the FPSCR register) this means we - * have to first query the current rounding mode, then choose - * the proper function (rounnd - , ceil, floor, etc.) */ -#if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && \ - defined(SIMDE_FAST_MATH) -#define SIMDE_FAST_ROUND_MODE -#endif - -/* This controls how ties are rounded. For example, does 10.5 round to - * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for - * example) doesn't support it and it must be emulated (which is rather - * slow). If you're okay with just using the default for whatever arch - * you're on, you should definitely define this. - * - * Note that we don't use this macro to avoid correct implementations - * in functions which are explicitly about rounding (such as vrnd* on - * NEON, _mm_round_* on x86, etc.); it is only used for code where - * rounding is a component in another function, and even then it isn't - * usually a problem since such functions will use the current rounding - * mode. */ -#if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && \ - defined(SIMDE_FAST_MATH) -#define SIMDE_FAST_ROUND_TIES -#endif - -/* For functions which convert from one type to another (mostly from - * floating point to integer types), sometimes we need to do a range - * check and potentially return a different result if the value - * falls outside that range. Skipping this check can provide a - * performance boost, at the expense of faithfulness to the API we're - * emulating. */ -#if !defined(SIMDE_FAST_CONVERSION_RANGE) && \ - !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) -#define SIMDE_FAST_CONVERSION_RANGE -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - HEDLEY_GCC_VERSION_CHECK(3, 4, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_TINYC_VERSION_CHECK(0, 9, 19) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \ - HEDLEY_IBM_VERSION_CHECK(13, 1, 0) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(6, 1, 0) || \ - (HEDLEY_SUNPRO_VERSION_CHECK(5, 10, 0) && !defined(__cplusplus)) || \ - HEDLEY_CRAY_VERSION_CHECK(8, 1, 0) -#define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) -#elif defined(__cplusplus) && (__cplusplus > 201703L) -#include -#define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) -#endif - -#if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) -#if defined(SIMDE_CHECK_CONSTANT_) && \ - SIMDE_DETECT_CLANG_VERSION_CHECK(9, 0, 0) && \ - (!defined(__apple_build_version__) || \ - ((__apple_build_version__ < 11000000) || \ - (__apple_build_version__ >= 12000000))) -#define SIMDE_REQUIRE_CONSTANT(arg) \ - HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), \ - "`" #arg "' must be constant") -#else -#define SIMDE_REQUIRE_CONSTANT(arg) -#endif -#else -#define SIMDE_REQUIRE_CONSTANT(arg) -#endif - -#define SIMDE_REQUIRE_RANGE(arg, min, max) \ - HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), \ - "'" #arg "' must be in [" #min ", " #max "]") - -#define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ - SIMDE_REQUIRE_CONSTANT(arg) \ - SIMDE_REQUIRE_RANGE(arg, min, max) - -/* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty - * fallback if we can't find an implementation; instead we have to - * check if SIMDE_STATIC_ASSERT is defined before using it. */ -#if !defined(__cplusplus) && \ - ((defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - HEDLEY_HAS_FEATURE(c_static_assert) || \ - HEDLEY_GCC_VERSION_CHECK(6, 0, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || defined(_Static_assert)) -#define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - HEDLEY_MSVC_VERSION_CHECK(16, 0, 0) -#define SIMDE_STATIC_ASSERT(expr, message) \ - HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_( \ - static_assert(expr, message)) -#endif - -#if (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ - HEDLEY_GCC_VERSION_CHECK(3, 3, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_IBM_VERSION_CHECK(13, 1, 0) -#define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) -#else -#define SIMDE_MAY_ALIAS -#endif - -/* Lots of compilers support GCC-style vector extensions, but many - don't support all the features. Define different macros depending - on support for - - * SIMDE_VECTOR - Declaring a vector. - * SIMDE_VECTOR_OPS - basic operations (binary and unary). - * SIMDE_VECTOR_NEGATE - negating a vector - * SIMDE_VECTOR_SCALAR - For binary operators, the second argument - can be a scalar, in which case the result is as if that scalar - had been broadcast to all lanes of a vector. - * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for - extracting/inserting a single element.= - - SIMDE_VECTOR can be assumed if any others are defined, the - others are independent. */ -#if !defined(SIMDE_NO_VECTOR) -#if HEDLEY_GCC_VERSION_CHECK(4, 8, 0) -#define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -#define SIMDE_VECTOR_OPS -#define SIMDE_VECTOR_NEGATE -#define SIMDE_VECTOR_SCALAR -#define SIMDE_VECTOR_SUBSCRIPT -#elif HEDLEY_INTEL_VERSION_CHECK(16, 0, 0) -#define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -#define SIMDE_VECTOR_OPS -#define SIMDE_VECTOR_NEGATE -/* ICC only supports SIMDE_VECTOR_SCALAR for constants */ -#define SIMDE_VECTOR_SUBSCRIPT -#elif HEDLEY_GCC_VERSION_CHECK(4, 1, 0) || HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) -#define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -#define SIMDE_VECTOR_OPS -#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 12, 0) -#define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -#elif HEDLEY_HAS_ATTRIBUTE(vector_size) -#define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) -#define SIMDE_VECTOR_OPS -#define SIMDE_VECTOR_NEGATE -#define SIMDE_VECTOR_SUBSCRIPT -#if SIMDE_DETECT_CLANG_VERSION_CHECK(5, 0, 0) -#define SIMDE_VECTOR_SCALAR -#endif -#endif - -/* GCC and clang have built-in functions to handle shuffling and - converting of vectors, but the implementations are slightly - different. This macro is just an abstraction over them. Note that - elem_size is in bits but vec_size is in bytes. */ -#if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) -HEDLEY_DIAGNOSTIC_PUSH -/* We don't care about -Wvariadic-macros; all compilers that support - * shufflevector/shuffle support them. */ -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") -#pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4, 0, 0) -#pragma GCC diagnostic ignored "-Wvariadic-macros" -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) -#define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) \ - __builtin_shufflevector(a, b, __VA_ARGS__) -#elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle, 4, 7, 0) && \ - !defined(__INTEL_COMPILER) -#define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) \ - (__extension__({ \ - int##elem_size##_t SIMDE_VECTOR(vec_size) \ - simde_shuffle_ = {__VA_ARGS__}; \ - __builtin_shuffle(a, b, simde_shuffle_); \ - })) -#endif -HEDLEY_DIAGNOSTIC_POP -#endif - -/* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT - but the code needs to be refactored a bit to take advantage. */ -#if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) -#if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || \ - HEDLEY_GCC_VERSION_CHECK(9, 0, 0) -#if HEDLEY_GCC_VERSION_CHECK(9, 0, 0) && !HEDLEY_GCC_VERSION_CHECK(9, 3, 0) -/* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ -#define SIMDE_CONVERT_VECTOR_(to, from) \ - ((to) = (__extension__({ \ - __typeof__(from) from_ = (from); \ - ((void)from_); \ - __builtin_convertvector(from_, __typeof__(to)); \ - }))) -#else -#define SIMDE_CONVERT_VECTOR_(to, from) \ - ((to) = __builtin_convertvector((from), __typeof__(to))) -#endif -#endif -#endif -#endif - -/* Since we currently require SUBSCRIPT before using a vector in a - union, we define these as dependencies of SUBSCRIPT. They are - likely to disappear in the future, once SIMDe learns how to make - use of vectors without using the union members. Do not use them - in your code unless you're okay with it breaking when SIMDe - changes. */ -#if defined(SIMDE_VECTOR_SUBSCRIPT) -#if defined(SIMDE_VECTOR_OPS) -#define SIMDE_VECTOR_SUBSCRIPT_OPS -#endif -#if defined(SIMDE_VECTOR_SCALAR) -#define SIMDE_VECTOR_SUBSCRIPT_SCALAR -#endif -#endif - -#if !defined(SIMDE_ENABLE_OPENMP) && \ - ((defined(_OPENMP) && (_OPENMP >= 201307L)) || \ - (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) -#define SIMDE_ENABLE_OPENMP -#endif - -#if !defined(SIMDE_ENABLE_CILKPLUS) && \ - (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) -#define SIMDE_ENABLE_CILKPLUS -#endif - -#if defined(SIMDE_ENABLE_OPENMP) -#define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) -#define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) -#if defined(__clang__) -#define SIMDE_VECTORIZE_REDUCTION(r) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ - HEDLEY_PRAGMA(omp simd reduction(r)) HEDLEY_DIAGNOSTIC_POP -#else -#define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) -#endif -#define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) -#elif defined(SIMDE_ENABLE_CILKPLUS) -#define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) -#define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) -#define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) -#define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) -#elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) -#define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) -#define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) -#define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -#define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_GCC_VERSION_CHECK(4, 9, 0) -#define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) -#define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -#define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -#define SIMDE_VECTORIZE_ALIGNED(a) -#elif HEDLEY_CRAY_VERSION_CHECK(5, 0, 0) -#define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) -#define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE -#define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE -#define SIMDE_VECTORIZE_ALIGNED(a) -#else -#define SIMDE_VECTORIZE -#define SIMDE_VECTORIZE_SAFELEN(l) -#define SIMDE_VECTORIZE_REDUCTION(r) -#define SIMDE_VECTORIZE_ALIGNED(a) -#endif - -#define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) - -/* Intended for checking coverage, you should never use this in - production. */ -#if defined(SIMDE_NO_INLINE) -#define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static -#else -#define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static -#endif - -#if HEDLEY_HAS_ATTRIBUTE(unused) || HEDLEY_GCC_VERSION_CHECK(2, 95, 0) -#define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) -#else -#define SIMDE_FUNCTION_POSSIBLY_UNUSED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") -#define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED \ - _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED -#endif - -#if defined(_MSC_VER) -#define SIMDE_BEGIN_DECLS_ \ - HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable : 4996 4204)) \ - HEDLEY_BEGIN_C_DECLS -#define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS -#else -#define SIMDE_BEGIN_DECLS_ \ - HEDLEY_DIAGNOSTIC_PUSH \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED \ - HEDLEY_BEGIN_C_DECLS -#define SIMDE_END_DECLS_ \ - HEDLEY_END_C_DECLS \ - HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(__SIZEOF_INT128__) -#define SIMDE_HAVE_INT128_ -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -typedef __int128 simde_int128; -typedef unsigned __int128 simde_uint128; -HEDLEY_DIAGNOSTIC_POP -#endif - -#if !defined(SIMDE_ENDIAN_LITTLE) -#define SIMDE_ENDIAN_LITTLE 1234 -#endif -#if !defined(SIMDE_ENDIAN_BIG) -#define SIMDE_ENDIAN_BIG 4321 -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -/* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ -#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \ - (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) -#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \ - (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) -#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ -#elif defined(_BIG_ENDIAN) -#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -#elif defined(_LITTLE_ENDIAN) -#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -/* We know the endianness of some common architectures. Common - * architectures not listed (ARM, POWER, MIPS, etc.) here are - * bi-endian. */ -#elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) -#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -#elif defined(__s390x__) || defined(__zarch__) -#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -/* Looks like we'll have to rely on the platform. If we're missing a - * platform, please let us know. */ -#elif defined(_WIN32) -#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -#elif defined(sun) || defined(__sun) /* Solaris */ -#include -#if defined(_LITTLE_ENDIAN) -#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -#elif defined(_BIG_ENDIAN) -#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -#endif -#elif defined(__APPLE__) -#include -#if defined(__LITTLE_ENDIAN__) -#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -#elif defined(__BIG_ENDIAN__) -#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -#endif -#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ - defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) -#include -#if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) -#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -#elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) -#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -#endif -#elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) -#include -#if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \ - (__BYTE_ORDER == __LITTLE_ENDIAN) -#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE -#elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \ - (__BYTE_ORDER == __BIG_ENDIAN) -#define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG -#endif -#endif -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ - HEDLEY_GCC_VERSION_CHECK(4, 3, 0) || \ - HEDLEY_IBM_VERSION_CHECK(13, 1, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) -#define simde_bswap64(v) __builtin_bswap64(v) -#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0) -#define simde_bswap64(v) _byteswap_uint64(v) -#else -SIMDE_FUNCTION_ATTRIBUTES -uint64_t simde_bswap64(uint64_t v) -{ - return ((v & (((uint64_t)0xff) << 56)) >> 56) | - ((v & (((uint64_t)0xff) << 48)) >> 40) | - ((v & (((uint64_t)0xff) << 40)) >> 24) | - ((v & (((uint64_t)0xff) << 32)) >> 8) | - ((v & (((uint64_t)0xff) << 24)) << 8) | - ((v & (((uint64_t)0xff) << 16)) << 24) | - ((v & (((uint64_t)0xff) << 8)) << 40) | - ((v & (((uint64_t)0xff))) << 56); -} -#endif - -#if !defined(SIMDE_ENDIAN_ORDER) -#error Unknown byte order; please file a bug -#else -#if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE -#define simde_endian_bswap64_be(value) simde_bswap64(value) -#define simde_endian_bswap64_le(value) (value) -#elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG -#define simde_endian_bswap64_be(value) (value) -#define simde_endian_bswap64_le(value) simde_bswap64(value) -#endif -#endif - -/* TODO: we should at least make an attempt to detect the correct - types for simde_float32/float64 instead of just assuming float and - double. */ - -#if !defined(SIMDE_FLOAT32_TYPE) -#define SIMDE_FLOAT32_TYPE float -#define SIMDE_FLOAT32_C(value) value##f -#else -#define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE)value) -#endif -typedef SIMDE_FLOAT32_TYPE simde_float32; - -#if !defined(SIMDE_FLOAT64_TYPE) -#define SIMDE_FLOAT64_TYPE double -#define SIMDE_FLOAT64_C(value) value -#else -#define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT64_TYPE)value) -#endif -typedef SIMDE_FLOAT64_TYPE simde_float64; - -#if HEDLEY_HAS_WARNING("-Wbad-function-cast") -#define SIMDE_CONVERT_FTOI(T, v) \ - HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ - HEDLEY_STATIC_CAST(T, (v)) HEDLEY_DIAGNOSTIC_POP -#else -#define SIMDE_CONVERT_FTOI(T, v) ((T)(v)) -#endif - -/* TODO: detect compilers which support this outside of C11 mode */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) -#define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) \ - _Generic((value), to \ - : (value), default \ - : (_Generic((value), from \ - : ((to)(value))))) -#define SIMDE_CHECKED_STATIC_CAST(to, from, value) \ - _Generic((value), to \ - : (value), default \ - : (_Generic((value), from \ - : ((to)(value))))) -#else -#define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) \ - HEDLEY_REINTERPRET_CAST(to, value) -#define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) -#endif - -#if HEDLEY_HAS_WARNING("-Wfloat-equal") -#define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL \ - _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") -#elif HEDLEY_GCC_VERSION_CHECK(3, 0, 0) -#define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL \ - _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL -#endif - -/* Some functions can trade accuracy for speed. For those functions - you can control the trade-off using this macro. Possible values: - - 0: prefer speed - 1: reasonable trade-offs - 2: prefer accuracy */ -#if !defined(SIMDE_ACCURACY_PREFERENCE) -#define SIMDE_ACCURACY_PREFERENCE 1 -#endif - -#if defined(__STDC_HOSTED__) -#define SIMDE_STDC_HOSTED __STDC_HOSTED__ -#else -#if defined(HEDLEY_PGI_VERSION) || defined(HEDLEY_MSVC_VERSION) -#define SIMDE_STDC_HOSTED 1 -#else -#define SIMDE_STDC_HOSTED 0 -#endif -#endif - -/* Try to deal with environments without a standard library. */ -#if !defined(simde_memcpy) -#if HEDLEY_HAS_BUILTIN(__builtin_memcpy) -#define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) -#endif -#endif -#if !defined(simde_memset) -#if HEDLEY_HAS_BUILTIN(__builtin_memset) -#define simde_memset(s, c, n) __builtin_memset(s, c, n) -#endif -#endif -#if !defined(simde_memcmp) -#if HEDLEY_HAS_BUILTIN(__builtin_memcmp) -#define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) -#endif -#endif - -#if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) -#if !defined(SIMDE_NO_STRING_H) -#if defined(__has_include) -#if !__has_include() -#define SIMDE_NO_STRING_H -#endif -#elif (SIMDE_STDC_HOSTED == 0) -#define SIMDE_NO_STRING_H -#endif -#endif - -#if !defined(SIMDE_NO_STRING_H) -#include -#if !defined(simde_memcpy) -#define simde_memcpy(dest, src, n) memcpy(dest, src, n) -#endif -#if !defined(simde_memset) -#define simde_memset(s, c, n) memset(s, c, n) -#endif -#if !defined(simde_memcmp) -#define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) -#endif -#else -/* These are meant to be portable, not fast. If you're hitting them you - * should think about providing your own (by defining the simde_memcpy - * macro prior to including any SIMDe files) or submitting a patch to - * SIMDe so we can detect your system-provided memcpy/memset, like by - * adding your compiler to the checks for __builtin_memcpy and/or - * __builtin_memset. */ -#if !defined(simde_memcpy) -SIMDE_FUNCTION_ATTRIBUTES -void simde_memcpy_(void *dest, const void *src, size_t len) -{ - char *dest_ = HEDLEY_STATIC_CAST(char *, dest); - char *src_ = HEDLEY_STATIC_CAST(const char *, src); - for (size_t i = 0; i < len; i++) { - dest_[i] = src_[i]; - } -} -#define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) -#endif - -#if !defined(simde_memset) -SIMDE_FUNCTION_ATTRIBUTES -void simde_memset_(void *s, int c, size_t len) -{ - char *s_ = HEDLEY_STATIC_CAST(char *, s); - char c_ = HEDLEY_STATIC_CAST(char, c); - for (size_t i = 0; i < len; i++) { - s_[i] = c_[i]; - } -} -#define simde_memset(s, c, n) simde_memset_(s, c, n) -#endif - -#if !defined(simde_memcmp) -SIMDE_FUCTION_ATTRIBUTES -int simde_memcmp_(const void *s1, const void *s2, size_t n) -{ - unsigned char *s1_ = HEDLEY_STATIC_CAST(unsigned char *, s1); - unsigned char *s2_ = HEDLEY_STATIC_CAST(unsigned char *, s2); - for (size_t i = 0; i < len; i++) { - if (s1_[i] != s2_[i]) { - return (int)(s1_[i] - s2_[i]); - } - } - return 0; -} -#define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) -#endif -#endif -#endif - -#if defined(FE_ALL_EXCEPT) -#define SIMDE_HAVE_FENV_H -#elif defined(__has_include) -#if __has_include() -#include -#define SIMDE_HAVE_FENV_H -#endif -#elif SIMDE_STDC_HOSTED == 1 -#include -#define SIMDE_HAVE_FENV_H -#endif - -#if defined(EXIT_FAILURE) -#define SIMDE_HAVE_STDLIB_H -#elif defined(__has_include) -#if __has_include() -#include -#define SIMDE_HAVE_STDLIB_H -#endif -#elif SIMDE_STDC_HOSTED == 1 -#include -#define SIMDE_HAVE_STDLIB_H -#endif - -#if defined(__has_include) -#if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() -#include -#elif __has_include() -#include -#endif -#if __has_include() -#include -#endif -#elif SIMDE_STDC_HOSTED == 1 -#include -#include -#endif - -#include "check.h" - -/* GCC/clang have a bunch of functionality in builtins which we would - * like to access, but the suffixes indicate whether the operate on - * int, long, or long long, not fixed width types (e.g., int32_t). - * we use these macros to attempt to map from fixed-width to the - * names GCC uses. Note that you should still cast the input(s) and - * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if - * types are the same size they may not be compatible according to the - * compiler. For example, on x86 long and long lonsg are generally - * both 64 bits, but platforms vary on whether an int64_t is mapped - * to a long or long long. */ - -#include - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) -#define SIMDE_BUILTIN_SUFFIX_8_ -#define SIMDE_BUILTIN_TYPE_8_ int -#elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) -#define SIMDE_BUILTIN_SUFFIX_8_ l -#define SIMDE_BUILTIN_TYPE_8_ long -#elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) -#define SIMDE_BUILTIN_SUFFIX_8_ ll -#define SIMDE_BUILTIN_TYPE_8_ long long -#endif - -#if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) -#define SIMDE_BUILTIN_SUFFIX_16_ -#define SIMDE_BUILTIN_TYPE_16_ int -#elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) -#define SIMDE_BUILTIN_SUFFIX_16_ l -#define SIMDE_BUILTIN_TYPE_16_ long -#elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) -#define SIMDE_BUILTIN_SUFFIX_16_ ll -#define SIMDE_BUILTIN_TYPE_16_ long long -#endif - -#if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) -#define SIMDE_BUILTIN_SUFFIX_32_ -#define SIMDE_BUILTIN_TYPE_32_ int -#elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) -#define SIMDE_BUILTIN_SUFFIX_32_ l -#define SIMDE_BUILTIN_TYPE_32_ long -#elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) -#define SIMDE_BUILTIN_SUFFIX_32_ ll -#define SIMDE_BUILTIN_TYPE_32_ long long -#endif - -#if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) -#define SIMDE_BUILTIN_SUFFIX_64_ -#define SIMDE_BUILTIN_TYPE_64_ int -#elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) -#define SIMDE_BUILTIN_SUFFIX_64_ l -#define SIMDE_BUILTIN_TYPE_64_ long -#elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) -#define SIMDE_BUILTIN_SUFFIX_64_ ll -#define SIMDE_BUILTIN_TYPE_64_ long long -#endif - -#if defined(SIMDE_BUILTIN_SUFFIX_8_) -#define SIMDE_BUILTIN_8_(name) \ - HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) -#define SIMDE_BUILTIN_HAS_8_(name) \ - HEDLEY_HAS_BUILTIN( \ - HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) -#else -#define SIMDE_BUILTIN_HAS_8_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_16_) -#define SIMDE_BUILTIN_16_(name) \ - HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) -#define SIMDE_BUILTIN_HAS_16_(name) \ - HEDLEY_HAS_BUILTIN( \ - HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) -#else -#define SIMDE_BUILTIN_HAS_16_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_32_) -#define SIMDE_BUILTIN_32_(name) \ - HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) -#define SIMDE_BUILTIN_HAS_32_(name) \ - HEDLEY_HAS_BUILTIN( \ - HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) -#else -#define SIMDE_BUILTIN_HAS_32_(name) 0 -#endif -#if defined(SIMDE_BUILTIN_SUFFIX_64_) -#define SIMDE_BUILTIN_64_(name) \ - HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) -#define SIMDE_BUILTIN_HAS_64_(name) \ - HEDLEY_HAS_BUILTIN( \ - HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) -#else -#define SIMDE_BUILTIN_HAS_64_(name) 0 -#endif - -HEDLEY_DIAGNOSTIC_POP - -/* Sometimes we run into problems with specific versions of compilers - which make the native versions unusable for us. Often this is due - to missing functions, sometimes buggy implementations, etc. These - macros are how we check for specific bugs. As they are fixed we'll - start only defining them for problematic compiler versions. */ - -#if !defined(SIMDE_IGNORE_COMPILER_BUGS) -#if defined(HEDLEY_GCC_VERSION) -#if !HEDLEY_GCC_VERSION_CHECK(4, 9, 0) -#define SIMDE_BUG_GCC_REV_208793 -#endif -#if !HEDLEY_GCC_VERSION_CHECK(5, 0, 0) -#define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ -#endif -#if !HEDLEY_GCC_VERSION_CHECK(4, 6, 0) -#define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ -#endif -#if !HEDLEY_GCC_VERSION_CHECK(8, 0, 0) -#define SIMDE_BUG_GCC_REV_247851 -#endif -#if !HEDLEY_GCC_VERSION_CHECK(10, 0, 0) -#define SIMDE_BUG_GCC_REV_274313 -#define SIMDE_BUG_GCC_91341 -#endif -#if !HEDLEY_GCC_VERSION_CHECK(9, 0, 0) && defined(SIMDE_ARCH_AARCH64) -#define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR -#endif -#if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) -#define SIMDE_BUG_GCC_94482 -#endif -#if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || \ - defined(SIMDE_ARCH_SYSTEMZ) -#define SIMDE_BUG_GCC_53784 -#endif -#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -#if HEDLEY_GCC_VERSION_CHECK(4, 3, 0) /* -Wsign-conversion */ -#define SIMDE_BUG_GCC_95144 -#endif -#endif -#if !HEDLEY_GCC_VERSION_CHECK(9, 4, 0) && defined(SIMDE_ARCH_AARCH64) -#define SIMDE_BUG_GCC_94488 -#endif -#if defined(SIMDE_ARCH_ARM) -#define SIMDE_BUG_GCC_95399 -#define SIMDE_BUG_GCC_95471 -#elif defined(SIMDE_ARCH_POWER) -#define SIMDE_BUG_GCC_95227 -#define SIMDE_BUG_GCC_95782 -#elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -#if !HEDLEY_GCC_VERSION_CHECK(10, 2, 0) && !defined(__OPTIMIZE__) -#define SIMDE_BUG_GCC_96174 -#endif -#endif -#define SIMDE_BUG_GCC_95399 -#elif defined(__clang__) -#if defined(SIMDE_ARCH_AARCH64) -#define SIMDE_BUG_CLANG_45541 -#define SIMDE_BUG_CLANG_46844 -#define SIMDE_BUG_CLANG_48257 -#if SIMDE_DETECT_CLANG_VERSION_CHECK(10, 0, 0) && \ - SIMDE_DETECT_CLANG_VERSION_NOT(11, 0, 0) -#define SIMDE_BUG_CLANG_BAD_VI64_OPS -#endif -#endif -#if defined(SIMDE_ARCH_POWER) -#define SIMDE_BUG_CLANG_46770 -#endif -#if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12, 0, 0) && \ - !defined(__OPTIMIZE__) -#define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT -#endif -#if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) -#if HEDLEY_HAS_WARNING("-Wsign-conversion") && \ - SIMDE_DETECT_CLANG_VERSION_NOT(11, 0, 0) -#define SIMDE_BUG_CLANG_45931 -#endif -#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \ - SIMDE_DETECT_CLANG_VERSION_NOT(11, 0, 0) -#define SIMDE_BUG_CLANG_44589 -#endif -#endif -#define SIMDE_BUG_CLANG_45959 -#elif defined(HEDLEY_MSVC_VERSION) -#if defined(SIMDE_ARCH_X86) -#define SIMDE_BUG_MSVC_ROUND_EXTRACT -#endif -#elif defined(HEDLEY_INTEL_VERSION) -#define SIMDE_BUG_INTEL_857088 -#endif -#endif - -/* GCC and Clang both have the same issue: - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 - * https://bugs.llvm.org/show_bug.cgi?id=45931 - * This is just an easy way to work around it. - */ -#if (HEDLEY_HAS_WARNING("-Wsign-conversion") && \ - SIMDE_DETECT_CLANG_VERSION_NOT(11, 0, 0)) || \ - HEDLEY_GCC_VERSION_CHECK(4, 3, 0) -#define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) \ - (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_POP \ - _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") __typeof__(expr) \ - simde_bug_ignore_sign_conversion_v_ = (expr); \ - HEDLEY_DIAGNOSTIC_PUSH \ - simde_bug_ignore_sign_conversion_v_; \ - })) -#else -#define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) -#endif - -#endif /* !defined(SIMDE_COMMON_H) */ diff --git a/libobs/util/simde/simde-constify.h b/libobs/util/simde/simde-constify.h deleted file mode 100644 index 9dc069bd984b08..00000000000000 --- a/libobs/util/simde/simde-constify.h +++ /dev/null @@ -1,925 +0,0 @@ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* Constify macros. For internal use only. - * - * These are used to make it possible to call a function which takes - * an Integer Constant Expression (ICE) using a compile time constant. - * Technically it would also be possible to use a value not trivially - * known by the compiler, but there would be a siginficant performance - * hit (a switch switch is used). - * - * The basic idea is pretty simple; we just emit a do while loop which - * contains a switch with a case for every possible value of the - * constant. - * - * As long as the value you pass to the function in constant, pretty - * much any copmiler shouldn't have a problem generating exactly the - * same code as if you had used an ICE. - * - * This is intended to be used in the SIMDe implementations of - * functions the compilers require to be an ICE, but the other benefit - * is that if we also disable the warnings from - * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests - * to use non-ICE parameters - */ - -#if !defined(SIMDE_CONSTIFY_H) -#define SIMDE_CONSTIFY_H - -#include "simde-diagnostic.h" - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ - -#define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ - do { \ - switch (imm) { \ - case 0: \ - result = func_name(__VA_ARGS__, 0); \ - break; \ - case 1: \ - result = func_name(__VA_ARGS__, 1); \ - break; \ - default: \ - result = default_case; \ - break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ - do { \ - switch (imm) { \ - case 0: \ - result = func_name(__VA_ARGS__, 0); \ - break; \ - case 1: \ - result = func_name(__VA_ARGS__, 1); \ - break; \ - case 2: \ - result = func_name(__VA_ARGS__, 2); \ - break; \ - case 3: \ - result = func_name(__VA_ARGS__, 3); \ - break; \ - default: \ - result = default_case; \ - break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ - do { \ - switch (imm) { \ - case 0: \ - result = func_name(__VA_ARGS__, 0); \ - break; \ - case 1: \ - result = func_name(__VA_ARGS__, 1); \ - break; \ - case 2: \ - result = func_name(__VA_ARGS__, 2); \ - break; \ - case 3: \ - result = func_name(__VA_ARGS__, 3); \ - break; \ - case 4: \ - result = func_name(__VA_ARGS__, 4); \ - break; \ - case 5: \ - result = func_name(__VA_ARGS__, 5); \ - break; \ - case 6: \ - result = func_name(__VA_ARGS__, 6); \ - break; \ - case 7: \ - result = func_name(__VA_ARGS__, 7); \ - break; \ - default: \ - result = default_case; \ - break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ - do { \ - switch (imm) { \ - case 0: \ - result = func_name(__VA_ARGS__, 0); \ - break; \ - case 1: \ - result = func_name(__VA_ARGS__, 1); \ - break; \ - case 2: \ - result = func_name(__VA_ARGS__, 2); \ - break; \ - case 3: \ - result = func_name(__VA_ARGS__, 3); \ - break; \ - case 4: \ - result = func_name(__VA_ARGS__, 4); \ - break; \ - case 5: \ - result = func_name(__VA_ARGS__, 5); \ - break; \ - case 6: \ - result = func_name(__VA_ARGS__, 6); \ - break; \ - case 7: \ - result = func_name(__VA_ARGS__, 7); \ - break; \ - case 8: \ - result = func_name(__VA_ARGS__, 8); \ - break; \ - case 9: \ - result = func_name(__VA_ARGS__, 9); \ - break; \ - case 10: \ - result = func_name(__VA_ARGS__, 10); \ - break; \ - case 11: \ - result = func_name(__VA_ARGS__, 11); \ - break; \ - case 12: \ - result = func_name(__VA_ARGS__, 12); \ - break; \ - case 13: \ - result = func_name(__VA_ARGS__, 13); \ - break; \ - case 14: \ - result = func_name(__VA_ARGS__, 14); \ - break; \ - case 15: \ - result = func_name(__VA_ARGS__, 15); \ - break; \ - default: \ - result = default_case; \ - break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ - do { \ - switch (imm) { \ - case 0: \ - result = func_name(__VA_ARGS__, 0); \ - break; \ - case 1: \ - result = func_name(__VA_ARGS__, 1); \ - break; \ - case 2: \ - result = func_name(__VA_ARGS__, 2); \ - break; \ - case 3: \ - result = func_name(__VA_ARGS__, 3); \ - break; \ - case 4: \ - result = func_name(__VA_ARGS__, 4); \ - break; \ - case 5: \ - result = func_name(__VA_ARGS__, 5); \ - break; \ - case 6: \ - result = func_name(__VA_ARGS__, 6); \ - break; \ - case 7: \ - result = func_name(__VA_ARGS__, 7); \ - break; \ - case 8: \ - result = func_name(__VA_ARGS__, 8); \ - break; \ - case 9: \ - result = func_name(__VA_ARGS__, 9); \ - break; \ - case 10: \ - result = func_name(__VA_ARGS__, 10); \ - break; \ - case 11: \ - result = func_name(__VA_ARGS__, 11); \ - break; \ - case 12: \ - result = func_name(__VA_ARGS__, 12); \ - break; \ - case 13: \ - result = func_name(__VA_ARGS__, 13); \ - break; \ - case 14: \ - result = func_name(__VA_ARGS__, 14); \ - break; \ - case 15: \ - result = func_name(__VA_ARGS__, 15); \ - break; \ - case 16: \ - result = func_name(__VA_ARGS__, 16); \ - break; \ - case 17: \ - result = func_name(__VA_ARGS__, 17); \ - break; \ - case 18: \ - result = func_name(__VA_ARGS__, 18); \ - break; \ - case 19: \ - result = func_name(__VA_ARGS__, 19); \ - break; \ - case 20: \ - result = func_name(__VA_ARGS__, 20); \ - break; \ - case 21: \ - result = func_name(__VA_ARGS__, 21); \ - break; \ - case 22: \ - result = func_name(__VA_ARGS__, 22); \ - break; \ - case 23: \ - result = func_name(__VA_ARGS__, 23); \ - break; \ - case 24: \ - result = func_name(__VA_ARGS__, 24); \ - break; \ - case 25: \ - result = func_name(__VA_ARGS__, 25); \ - break; \ - case 26: \ - result = func_name(__VA_ARGS__, 26); \ - break; \ - case 27: \ - result = func_name(__VA_ARGS__, 27); \ - break; \ - case 28: \ - result = func_name(__VA_ARGS__, 28); \ - break; \ - case 29: \ - result = func_name(__VA_ARGS__, 29); \ - break; \ - case 30: \ - result = func_name(__VA_ARGS__, 30); \ - break; \ - case 31: \ - result = func_name(__VA_ARGS__, 31); \ - break; \ - default: \ - result = default_case; \ - break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ - do { \ - switch (imm) { \ - case 0: \ - result = func_name(__VA_ARGS__, 0); \ - break; \ - case 1: \ - result = func_name(__VA_ARGS__, 1); \ - break; \ - case 2: \ - result = func_name(__VA_ARGS__, 2); \ - break; \ - case 3: \ - result = func_name(__VA_ARGS__, 3); \ - break; \ - case 4: \ - result = func_name(__VA_ARGS__, 4); \ - break; \ - case 5: \ - result = func_name(__VA_ARGS__, 5); \ - break; \ - case 6: \ - result = func_name(__VA_ARGS__, 6); \ - break; \ - case 7: \ - result = func_name(__VA_ARGS__, 7); \ - break; \ - case 8: \ - result = func_name(__VA_ARGS__, 8); \ - break; \ - case 9: \ - result = func_name(__VA_ARGS__, 9); \ - break; \ - case 10: \ - result = func_name(__VA_ARGS__, 10); \ - break; \ - case 11: \ - result = func_name(__VA_ARGS__, 11); \ - break; \ - case 12: \ - result = func_name(__VA_ARGS__, 12); \ - break; \ - case 13: \ - result = func_name(__VA_ARGS__, 13); \ - break; \ - case 14: \ - result = func_name(__VA_ARGS__, 14); \ - break; \ - case 15: \ - result = func_name(__VA_ARGS__, 15); \ - break; \ - case 16: \ - result = func_name(__VA_ARGS__, 16); \ - break; \ - case 17: \ - result = func_name(__VA_ARGS__, 17); \ - break; \ - case 18: \ - result = func_name(__VA_ARGS__, 18); \ - break; \ - case 19: \ - result = func_name(__VA_ARGS__, 19); \ - break; \ - case 20: \ - result = func_name(__VA_ARGS__, 20); \ - break; \ - case 21: \ - result = func_name(__VA_ARGS__, 21); \ - break; \ - case 22: \ - result = func_name(__VA_ARGS__, 22); \ - break; \ - case 23: \ - result = func_name(__VA_ARGS__, 23); \ - break; \ - case 24: \ - result = func_name(__VA_ARGS__, 24); \ - break; \ - case 25: \ - result = func_name(__VA_ARGS__, 25); \ - break; \ - case 26: \ - result = func_name(__VA_ARGS__, 26); \ - break; \ - case 27: \ - result = func_name(__VA_ARGS__, 27); \ - break; \ - case 28: \ - result = func_name(__VA_ARGS__, 28); \ - break; \ - case 29: \ - result = func_name(__VA_ARGS__, 29); \ - break; \ - case 30: \ - result = func_name(__VA_ARGS__, 30); \ - break; \ - case 31: \ - result = func_name(__VA_ARGS__, 31); \ - break; \ - case 32: \ - result = func_name(__VA_ARGS__, 32); \ - break; \ - case 33: \ - result = func_name(__VA_ARGS__, 33); \ - break; \ - case 34: \ - result = func_name(__VA_ARGS__, 34); \ - break; \ - case 35: \ - result = func_name(__VA_ARGS__, 35); \ - break; \ - case 36: \ - result = func_name(__VA_ARGS__, 36); \ - break; \ - case 37: \ - result = func_name(__VA_ARGS__, 37); \ - break; \ - case 38: \ - result = func_name(__VA_ARGS__, 38); \ - break; \ - case 39: \ - result = func_name(__VA_ARGS__, 39); \ - break; \ - case 40: \ - result = func_name(__VA_ARGS__, 40); \ - break; \ - case 41: \ - result = func_name(__VA_ARGS__, 41); \ - break; \ - case 42: \ - result = func_name(__VA_ARGS__, 42); \ - break; \ - case 43: \ - result = func_name(__VA_ARGS__, 43); \ - break; \ - case 44: \ - result = func_name(__VA_ARGS__, 44); \ - break; \ - case 45: \ - result = func_name(__VA_ARGS__, 45); \ - break; \ - case 46: \ - result = func_name(__VA_ARGS__, 46); \ - break; \ - case 47: \ - result = func_name(__VA_ARGS__, 47); \ - break; \ - case 48: \ - result = func_name(__VA_ARGS__, 48); \ - break; \ - case 49: \ - result = func_name(__VA_ARGS__, 49); \ - break; \ - case 50: \ - result = func_name(__VA_ARGS__, 50); \ - break; \ - case 51: \ - result = func_name(__VA_ARGS__, 51); \ - break; \ - case 52: \ - result = func_name(__VA_ARGS__, 52); \ - break; \ - case 53: \ - result = func_name(__VA_ARGS__, 53); \ - break; \ - case 54: \ - result = func_name(__VA_ARGS__, 54); \ - break; \ - case 55: \ - result = func_name(__VA_ARGS__, 55); \ - break; \ - case 56: \ - result = func_name(__VA_ARGS__, 56); \ - break; \ - case 57: \ - result = func_name(__VA_ARGS__, 57); \ - break; \ - case 58: \ - result = func_name(__VA_ARGS__, 58); \ - break; \ - case 59: \ - result = func_name(__VA_ARGS__, 59); \ - break; \ - case 60: \ - result = func_name(__VA_ARGS__, 60); \ - break; \ - case 61: \ - result = func_name(__VA_ARGS__, 61); \ - break; \ - case 62: \ - result = func_name(__VA_ARGS__, 62); \ - break; \ - case 63: \ - result = func_name(__VA_ARGS__, 63); \ - break; \ - default: \ - result = default_case; \ - break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch (imm) { \ - case 0: \ - func_name(__VA_ARGS__, 0); \ - break; \ - case 1: \ - func_name(__VA_ARGS__, 1); \ - break; \ - default: \ - default_case; \ - break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch (imm) { \ - case 0: \ - func_name(__VA_ARGS__, 0); \ - break; \ - case 1: \ - func_name(__VA_ARGS__, 1); \ - break; \ - case 2: \ - func_name(__VA_ARGS__, 2); \ - break; \ - case 3: \ - func_name(__VA_ARGS__, 3); \ - break; \ - default: \ - default_case; \ - break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch (imm) { \ - case 0: \ - func_name(__VA_ARGS__, 0); \ - break; \ - case 1: \ - func_name(__VA_ARGS__, 1); \ - break; \ - case 2: \ - func_name(__VA_ARGS__, 2); \ - break; \ - case 3: \ - func_name(__VA_ARGS__, 3); \ - break; \ - case 4: \ - func_name(__VA_ARGS__, 4); \ - break; \ - case 5: \ - func_name(__VA_ARGS__, 5); \ - break; \ - case 6: \ - func_name(__VA_ARGS__, 6); \ - break; \ - case 7: \ - func_name(__VA_ARGS__, 7); \ - break; \ - default: \ - default_case; \ - break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch (imm) { \ - case 0: \ - func_name(__VA_ARGS__, 0); \ - break; \ - case 1: \ - func_name(__VA_ARGS__, 1); \ - break; \ - case 2: \ - func_name(__VA_ARGS__, 2); \ - break; \ - case 3: \ - func_name(__VA_ARGS__, 3); \ - break; \ - case 4: \ - func_name(__VA_ARGS__, 4); \ - break; \ - case 5: \ - func_name(__VA_ARGS__, 5); \ - break; \ - case 6: \ - func_name(__VA_ARGS__, 6); \ - break; \ - case 7: \ - func_name(__VA_ARGS__, 7); \ - break; \ - case 8: \ - func_name(__VA_ARGS__, 8); \ - break; \ - case 9: \ - func_name(__VA_ARGS__, 9); \ - break; \ - case 10: \ - func_name(__VA_ARGS__, 10); \ - break; \ - case 11: \ - func_name(__VA_ARGS__, 11); \ - break; \ - case 12: \ - func_name(__VA_ARGS__, 12); \ - break; \ - case 13: \ - func_name(__VA_ARGS__, 13); \ - break; \ - case 14: \ - func_name(__VA_ARGS__, 14); \ - break; \ - case 15: \ - func_name(__VA_ARGS__, 15); \ - break; \ - default: \ - default_case; \ - break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch (imm) { \ - case 0: \ - func_name(__VA_ARGS__, 0); \ - break; \ - case 1: \ - func_name(__VA_ARGS__, 1); \ - break; \ - case 2: \ - func_name(__VA_ARGS__, 2); \ - break; \ - case 3: \ - func_name(__VA_ARGS__, 3); \ - break; \ - case 4: \ - func_name(__VA_ARGS__, 4); \ - break; \ - case 5: \ - func_name(__VA_ARGS__, 5); \ - break; \ - case 6: \ - func_name(__VA_ARGS__, 6); \ - break; \ - case 7: \ - func_name(__VA_ARGS__, 7); \ - break; \ - case 8: \ - func_name(__VA_ARGS__, 8); \ - break; \ - case 9: \ - func_name(__VA_ARGS__, 9); \ - break; \ - case 10: \ - func_name(__VA_ARGS__, 10); \ - break; \ - case 11: \ - func_name(__VA_ARGS__, 11); \ - break; \ - case 12: \ - func_name(__VA_ARGS__, 12); \ - break; \ - case 13: \ - func_name(__VA_ARGS__, 13); \ - break; \ - case 14: \ - func_name(__VA_ARGS__, 14); \ - break; \ - case 15: \ - func_name(__VA_ARGS__, 15); \ - break; \ - case 16: \ - func_name(__VA_ARGS__, 16); \ - break; \ - case 17: \ - func_name(__VA_ARGS__, 17); \ - break; \ - case 18: \ - func_name(__VA_ARGS__, 18); \ - break; \ - case 19: \ - func_name(__VA_ARGS__, 19); \ - break; \ - case 20: \ - func_name(__VA_ARGS__, 20); \ - break; \ - case 21: \ - func_name(__VA_ARGS__, 21); \ - break; \ - case 22: \ - func_name(__VA_ARGS__, 22); \ - break; \ - case 23: \ - func_name(__VA_ARGS__, 23); \ - break; \ - case 24: \ - func_name(__VA_ARGS__, 24); \ - break; \ - case 25: \ - func_name(__VA_ARGS__, 25); \ - break; \ - case 26: \ - func_name(__VA_ARGS__, 26); \ - break; \ - case 27: \ - func_name(__VA_ARGS__, 27); \ - break; \ - case 28: \ - func_name(__VA_ARGS__, 28); \ - break; \ - case 29: \ - func_name(__VA_ARGS__, 29); \ - break; \ - case 30: \ - func_name(__VA_ARGS__, 30); \ - break; \ - case 31: \ - func_name(__VA_ARGS__, 31); \ - break; \ - default: \ - default_case; \ - break; \ - } \ - } while (0) - -#define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ - do { \ - switch (imm) { \ - case 0: \ - func_name(__VA_ARGS__, 0); \ - break; \ - case 1: \ - func_name(__VA_ARGS__, 1); \ - break; \ - case 2: \ - func_name(__VA_ARGS__, 2); \ - break; \ - case 3: \ - func_name(__VA_ARGS__, 3); \ - break; \ - case 4: \ - func_name(__VA_ARGS__, 4); \ - break; \ - case 5: \ - func_name(__VA_ARGS__, 5); \ - break; \ - case 6: \ - func_name(__VA_ARGS__, 6); \ - break; \ - case 7: \ - func_name(__VA_ARGS__, 7); \ - break; \ - case 8: \ - func_name(__VA_ARGS__, 8); \ - break; \ - case 9: \ - func_name(__VA_ARGS__, 9); \ - break; \ - case 10: \ - func_name(__VA_ARGS__, 10); \ - break; \ - case 11: \ - func_name(__VA_ARGS__, 11); \ - break; \ - case 12: \ - func_name(__VA_ARGS__, 12); \ - break; \ - case 13: \ - func_name(__VA_ARGS__, 13); \ - break; \ - case 14: \ - func_name(__VA_ARGS__, 14); \ - break; \ - case 15: \ - func_name(__VA_ARGS__, 15); \ - break; \ - case 16: \ - func_name(__VA_ARGS__, 16); \ - break; \ - case 17: \ - func_name(__VA_ARGS__, 17); \ - break; \ - case 18: \ - func_name(__VA_ARGS__, 18); \ - break; \ - case 19: \ - func_name(__VA_ARGS__, 19); \ - break; \ - case 20: \ - func_name(__VA_ARGS__, 20); \ - break; \ - case 21: \ - func_name(__VA_ARGS__, 21); \ - break; \ - case 22: \ - func_name(__VA_ARGS__, 22); \ - break; \ - case 23: \ - func_name(__VA_ARGS__, 23); \ - break; \ - case 24: \ - func_name(__VA_ARGS__, 24); \ - break; \ - case 25: \ - func_name(__VA_ARGS__, 25); \ - break; \ - case 26: \ - func_name(__VA_ARGS__, 26); \ - break; \ - case 27: \ - func_name(__VA_ARGS__, 27); \ - break; \ - case 28: \ - func_name(__VA_ARGS__, 28); \ - break; \ - case 29: \ - func_name(__VA_ARGS__, 29); \ - break; \ - case 30: \ - func_name(__VA_ARGS__, 30); \ - break; \ - case 31: \ - func_name(__VA_ARGS__, 31); \ - break; \ - case 32: \ - func_name(__VA_ARGS__, 32); \ - break; \ - case 33: \ - func_name(__VA_ARGS__, 33); \ - break; \ - case 34: \ - func_name(__VA_ARGS__, 34); \ - break; \ - case 35: \ - func_name(__VA_ARGS__, 35); \ - break; \ - case 36: \ - func_name(__VA_ARGS__, 36); \ - break; \ - case 37: \ - func_name(__VA_ARGS__, 37); \ - break; \ - case 38: \ - func_name(__VA_ARGS__, 38); \ - break; \ - case 39: \ - func_name(__VA_ARGS__, 39); \ - break; \ - case 40: \ - func_name(__VA_ARGS__, 40); \ - break; \ - case 41: \ - func_name(__VA_ARGS__, 41); \ - break; \ - case 42: \ - func_name(__VA_ARGS__, 42); \ - break; \ - case 43: \ - func_name(__VA_ARGS__, 43); \ - break; \ - case 44: \ - func_name(__VA_ARGS__, 44); \ - break; \ - case 45: \ - func_name(__VA_ARGS__, 45); \ - break; \ - case 46: \ - func_name(__VA_ARGS__, 46); \ - break; \ - case 47: \ - func_name(__VA_ARGS__, 47); \ - break; \ - case 48: \ - func_name(__VA_ARGS__, 48); \ - break; \ - case 49: \ - func_name(__VA_ARGS__, 49); \ - break; \ - case 50: \ - func_name(__VA_ARGS__, 50); \ - break; \ - case 51: \ - func_name(__VA_ARGS__, 51); \ - break; \ - case 52: \ - func_name(__VA_ARGS__, 52); \ - break; \ - case 53: \ - func_name(__VA_ARGS__, 53); \ - break; \ - case 54: \ - func_name(__VA_ARGS__, 54); \ - break; \ - case 55: \ - func_name(__VA_ARGS__, 55); \ - break; \ - case 56: \ - func_name(__VA_ARGS__, 56); \ - break; \ - case 57: \ - func_name(__VA_ARGS__, 57); \ - break; \ - case 58: \ - func_name(__VA_ARGS__, 58); \ - break; \ - case 59: \ - func_name(__VA_ARGS__, 59); \ - break; \ - case 60: \ - func_name(__VA_ARGS__, 60); \ - break; \ - case 61: \ - func_name(__VA_ARGS__, 61); \ - break; \ - case 62: \ - func_name(__VA_ARGS__, 62); \ - break; \ - case 63: \ - func_name(__VA_ARGS__, 63); \ - break; \ - default: \ - default_case; \ - break; \ - } \ - } while (0) - -HEDLEY_DIAGNOSTIC_POP - -#endif diff --git a/libobs/util/simde/simde-detect-clang.h b/libobs/util/simde/simde-detect-clang.h deleted file mode 100644 index 86dee7567d9f7c..00000000000000 --- a/libobs/util/simde/simde-detect-clang.h +++ /dev/null @@ -1,114 +0,0 @@ -/* Detect Clang Version - * Created by Evan Nemerson - * - * To the extent possible under law, the author(s) have dedicated all - * copyright and related and neighboring rights to this software to - * the public domain worldwide. This software is distributed without - * any warranty. - * - * For details, see . - * SPDX-License-Identifier: CC0-1.0 - */ - -/* This file was originally part of SIMDe - * (). You're free to do with it as - * you please, but I do have a few small requests: - * - * * If you make improvements, please submit them back to SIMDe - * (at ) so others can - * benefit from them. - * * Please keep a link to SIMDe intact so people know where to submit - * improvements. - * * If you expose it publicly, please change the SIMDE_ prefix to - * something specific to your project. - * - * The version numbers clang exposes (in the ___clang_major__, - * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. - * Vendors such as Apple will define these values to their version - * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but - * __clang_major__ and __clang_minor__ are defined to 4 and 0 - * respectively, instead of 3 and 1. - * - * The solution is *usually* to use clang's feature detection macros - * () - * to determine if the feature you're interested in is available. This - * generally works well, and it should probably be the first thing you - * try. Unfortunately, it's not possible to check for everything. In - * particular, compiler bugs. - * - * This file just uses the feature checking macros to detect features - * added in specific versions of clang to identify which version of - * clang the compiler is based on. - * - * Right now it only goes back to 3.6, but I'm happy to accept patches - * to go back further. And, of course, newer versions are welcome if - * they're not already present, and if you find a way to detect a point - * release that would be great, too! - */ - -#if !defined(SIMDE_DETECT_CLANG_H) -#define SIMDE_DETECT_CLANG_H 1 - -/* Attempt to detect the upstream clang version number. I usually only - * worry about major version numbers (at least for 4.0+), but if you - * need more resolution I'm happy to accept patches that are able to - * detect minor versions as well. That said, you'll probably have a - * hard time with detection since AFAIK most minor releases don't add - * anything we can detect. */ - -#if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) -#if __has_warning("-Wformat-insufficient-args") -#define SIMDE_DETECT_CLANG_VERSION 120000 -#elif __has_warning("-Wimplicit-const-int-float-conversion") -#define SIMDE_DETECT_CLANG_VERSION 110000 -#elif __has_warning("-Wmisleading-indentation") -#define SIMDE_DETECT_CLANG_VERSION 100000 -#elif defined(__FILE_NAME__) -#define SIMDE_DETECT_CLANG_VERSION 90000 -#elif __has_warning("-Wextra-semi-stmt") || \ - __has_builtin(__builtin_rotateleft32) -#define SIMDE_DETECT_CLANG_VERSION 80000 -#elif __has_warning("-Wc++98-compat-extra-semi") -#define SIMDE_DETECT_CLANG_VERSION 70000 -#elif __has_warning("-Wpragma-pack") -#define SIMDE_DETECT_CLANG_VERSION 60000 -#elif __has_warning("-Wbitfield-enum-conversion") -#define SIMDE_DETECT_CLANG_VERSION 50000 -#elif __has_attribute(diagnose_if) -#define SIMDE_DETECT_CLANG_VERSION 40000 -#elif __has_warning("-Wcast-calling-convention") -#define SIMDE_DETECT_CLANG_VERSION 30900 -#elif __has_warning("-WCL4") -#define SIMDE_DETECT_CLANG_VERSION 30800 -#elif __has_warning("-WIndependentClass-attribute") -#define SIMDE_DETECT_CLANG_VERSION 30700 -#elif __has_warning("-Wambiguous-ellipsis") -#define SIMDE_DETECT_CLANG_VERSION 30600 -#else -#define SIMDE_DETECT_CLANG_VERSION 1 -#endif -#endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ - -/* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty - * straightforward; it returns true if the compiler is a derivative - * of clang >= the specified version. - * - * Since this file is often (primarily?) useful for working around bugs - * it is also helpful to have a macro which returns true if only if the - * compiler is a version of clang *older* than the specified version to - * make it a bit easier to ifdef regions to add code for older versions, - * such as pragmas to disable a specific warning. */ - -#if defined(SIMDE_DETECT_CLANG_VERSION) -#define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) \ - (SIMDE_DETECT_CLANG_VERSION >= \ - ((major * 10000) + (minor * 1000) + (revision))) -#define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) \ - (SIMDE_DETECT_CLANG_VERSION < \ - ((major * 10000) + (minor * 1000) + (revision))) -#else -#define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) -#define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (1) -#endif - -#endif /* !defined(SIMDE_DETECT_CLANG_H) */ diff --git a/libobs/util/simde/simde-diagnostic.h b/libobs/util/simde/simde-diagnostic.h deleted file mode 100644 index 1d10aaf3ffc94d..00000000000000 --- a/libobs/util/simde/simde-diagnostic.h +++ /dev/null @@ -1,447 +0,0 @@ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* SIMDe targets a very wide range of standards and compilers, and our - * goal is to compile cleanly even with extremely aggressive warnings - * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) - * treated as errors. - * - * While our preference is to resolve the underlying issue a given - * diagnostic is warning us about, sometimes that's not possible. - * Fixing a warning in one compiler may cause problems in another. - * Sometimes a warning doesn't really apply to us (false positives), - * and sometimes adhering to a warning would mean dropping a feature - * we *know* the compiler supports since we have tested specifically - * for the compiler or feature. - * - * When practical, warnings are only disabled for specific code. For - * a list of warnings which are enabled by default in all SIMDe code, - * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the - * warning stack when SIMDe is done parsing, so code which includes - * SIMDe is not deprived of these warnings. - */ - -#if !defined(SIMDE_DIAGNOSTIC_H) -#define SIMDE_DIAGNOSTIC_H - -#include "hedley.h" -#include "simde-detect-clang.h" - -/* This is only to help us implement functions like _mm_undefined_ps. */ -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -#undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif -#if HEDLEY_HAS_WARNING("-Wuninitialized") -#define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ \ - _Pragma("clang diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_GCC_VERSION_CHECK(4, 2, 0) -#define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ \ - _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") -#elif HEDLEY_PGI_VERSION_CHECK(19, 10, 0) -#define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 14, 0) && defined(__cplusplus) -#define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ \ - _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 14, 0) -#define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ \ - _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") -#elif HEDLEY_SUNPRO_VERSION_CHECK(5, 12, 0) && defined(__cplusplus) -#define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ \ - _Pragma("error_messages(off,unassigned)") -#elif HEDLEY_TI_VERSION_CHECK(16, 9, 9) || \ - HEDLEY_TI_CL6X_VERSION_CHECK(8, 0, 0) || \ - HEDLEY_TI_CL7X_VERSION_CHECK(1, 2, 0) || \ - HEDLEY_TI_CLPRU_VERSION_CHECK(2, 3, 2) -#define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") -#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) -#define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") -#elif HEDLEY_MSVC_VERSION_CHECK(19, 0, 0) && !defined(__MSVC_RUNTIME_CHECKS) -#define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ \ - __pragma(warning(disable : 4700)) -#endif - -/* GCC emits a lot of "notes" about the ABI being different for things - * in newer versions of GCC. We don't really care because all our - * functions are inlined and don't generate ABI. */ -#if HEDLEY_GCC_VERSION_CHECK(7, 0, 0) -#define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ - _Pragma("GCC diagnostic ignored \"-Wpsabi\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ -#endif - -/* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() - * after each MMX function before any floating point instructions. - * Some compilers warn about functions which use MMX functions but - * don't call _mm_empty(). However, since SIMDe is implementyng the - * MMX API we shouldn't be calling _mm_empty(); we leave it to the - * caller to invoke simde_mm_empty(). */ -#if HEDLEY_INTEL_VERSION_CHECK(19, 0, 0) -#define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ - _Pragma("warning(disable:13200 13203)") -#elif defined(HEDLEY_MSVC_VERSION) -#define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ - __pragma(warning(disable : 4799)) -#else -#define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ -#endif - -/* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they - * emit a diagnostic if you use #pragma simd instead of - * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to - * compile with -qopenmp or -qopenmp-simd and define - * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ -#if HEDLEY_INTEL_VERSION_CHECK(18, 0, 0) -#define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ - _Pragma("warning(disable:3948)") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ -#endif - -/* MSVC emits a diagnostic when we call a function (like - * simde_mm_set_epi32) while initializing a struct. We currently do - * this a *lot* in the tests. */ -#if defined(HEDLEY_MSVC_VERSION) -#define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ - __pragma(warning(disable : 4204)) -#else -#define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ -#endif - -/* This warning needs a lot of work. It is triggered if all you do is - * pass the value to memcpy/__builtin_memcpy, or if you initialize a - * member of the union, even if that member takes up the entire union. - * Last tested with clang-10, hopefully things will improve in the - * future; if clang fixes this I'd love to enable it. */ -#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") -#define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ - _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ -#endif - -/* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which - * will is false. However, SIMDe uses these operations exclusively - * for things like _mm_cmpeq_ps, for which we really do want to check - * for equality (or inequality). - * - * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro - * which just wraps a check in some code do disable this diagnostic I'd - * be happy to accept it. */ -#if HEDLEY_HAS_WARNING("-Wfloat-equal") || HEDLEY_GCC_VERSION_CHECK(3, 0, 0) -#define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ - _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ -#endif - -/* This is because we use HEDLEY_STATIC_ASSERT for static assertions. - * If Hedley can't find an implementation it will preprocess to - * nothing, which means there will be a trailing semi-colon. */ -#if HEDLEY_HAS_WARNING("-Wextra-semi") -#define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ - _Pragma("clang diagnostic ignored \"-Wextra-semi\"") -#elif HEDLEY_GCC_VERSION_CHECK(8, 1, 0) && defined(__cplusplus) -#define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ - _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ -#endif - -/* We do use a few variadic macros, which technically aren't available - * until C99 and C++11, but every compiler I'm aware of has supported - * them for much longer. That said, usage is isolated to the test - * suite and compilers known to support them. */ -#if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4, 0, 0) -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") -#define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ - _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") _Pragma( \ - "clang diagnostic ignored \"-Wc++98-compat-pedantic\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ - _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") -#endif -#else -#define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ -#endif - -/* emscripten requires us to use a __wasm_unimplemented_simd128__ macro - * before we can access certain SIMD intrinsics, but this diagnostic - * warns about it being a reserved name. It is a reserved name, but - * it's reserved for the compiler and we are using it to convey - * information to the compiler. - * - * This is also used when enabling native aliases since we don't get to - * choose the macro names. */ -#if HEDLEY_HAS_WARNING("-Wdouble-promotion") -#define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ \ - _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#endif - -/* clang 3.8 warns about the packed attribute being unnecessary when - * used in the _mm_loadu_* functions. That *may* be true for version - * 3.8, but for later versions it is crucial in order to make unaligned - * access safe. */ -#if HEDLEY_HAS_WARNING("-Wpacked") -#define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ \ - _Pragma("clang diagnostic ignored \"-Wpacked\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ -#endif - -/* Triggered when assigning a float to a double implicitly. We use - * explicit casts in SIMDe, this is only used in the test suite. */ -#if HEDLEY_HAS_WARNING("-Wdouble-promotion") -#define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ \ - _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ -#endif - -/* Several compilers treat conformant array parameters as VLAs. We - * test to make sure we're in C mode (C++ doesn't support CAPs), and - * that the version of the standard supports CAPs. We also reject - * some buggy compilers like MSVC (the logic is in Hedley if you want - * to take a look), but with certain warnings enabled some compilers - * still like to emit a diagnostic. */ -#if HEDLEY_HAS_WARNING("-Wvla") -#define SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ - _Pragma("clang diagnostic ignored \"-Wvla\"") -#elif HEDLEY_GCC_VERSION_CHECK(4, 3, 0) -#define SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ - _Pragma("GCC diagnostic ignored \"-Wvla\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_VLA_ -#endif - -#if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") -#define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wunused-function") -#define SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_ \ - _Pragma("clang diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_GCC_VERSION_CHECK(3, 4, 0) -#define SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_ \ - _Pragma("GCC diagnostic ignored \"-Wunused-function\"") -#elif HEDLEY_MSVC_VERSION_CHECK(19, 0, 0) /* Likely goes back further */ -#define SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_ \ - __pragma(warning(disable : 4505)) -#else -#define SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpass-failed") -#define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ - _Pragma("clang diagnostic ignored \"-Wpass-failed\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wpadded") -#define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ \ - _Pragma("clang diagnostic ignored \"-Wpadded\"") -#elif HEDLEY_MSVC_VERSION_CHECK(19, 0, 0) /* Likely goes back further */ -#define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable : 4324)) -#else -#define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ -#endif - -#if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") -#define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ \ - _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ -#endif - -#if HEDLEY_HAS_WARNING("-Wold-style-cast") -#define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ -#endif - -#if HEDLEY_HAS_WARNING("-Wcast-function-type") || \ - HEDLEY_GCC_VERSION_CHECK(8, 0, 0) -#define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ \ - _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ -#endif - -/* clang will emit this warning when we use C99 extensions whan not in - * C99 mode, even though it does support this. In such cases we check - * the compiler and version first, so we know it's not a problem. */ -#if HEDLEY_HAS_WARNING("-Wc99-extensions") -#define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ \ - _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ -#endif - -/* https://github.com/simd-everywhere/simde/issues/277 */ -#if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4, 6, 0) && \ - !HEDLEY_GCC_VERSION_CHECK(6, 4, 0) && defined(__cplusplus) -#define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ - _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ -#endif - -/* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS - * to silence, but you have to do that before including anything and - * that would require reordering includes. */ -#if defined(_MSC_VER) -#define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable : 4996)) -#else -#define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ -#endif - -/* Some compilers, such as clang, may use `long long` for 64-bit - * integers, but `long long` triggers a diagnostic with - * -Wc++98-compat-pedantic which says 'long long' is incompatible with - * C++98. */ -#if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") -#define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ -#endif - -/* Some problem as above */ -#if HEDLEY_HAS_WARNING("-Wc++11-long-long") -#define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ - _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ -#endif - -/* emscripten emits this whenever stdin/stdout/stderr is used in a - * macro. */ -#if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") -#define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ \ - _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ -#endif - -/* Clang uses C11 generic selections to implement some AltiVec - * functions, which triggers this diagnostic when not compiling - * in C11 mode */ -#if HEDLEY_HAS_WARNING("-Wc11-extensions") -#define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ \ - _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ -#endif - -/* Clang sometimes triggers this warning in macros in the AltiVec and - * NEON headers, or due to missing functions. */ -#if HEDLEY_HAS_WARNING("-Wvector-conversion") -#define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ \ - _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") -/* For NEON, the situation with -Wvector-conversion in clang < 10 is - * bad enough that we just disable the warning altogether. */ -#if defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0) -#define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ -#endif -#else -#define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ -#endif -#if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) -#define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ -#endif - -/* SLEEF triggers this a *lot* in their headers */ -#if HEDLEY_HAS_WARNING("-Wignored-qualifiers") -#define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ \ - _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") -#elif HEDLEY_GCC_VERSION_CHECK(4, 3, 0) -#define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ \ - _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ -#endif - -/* GCC emits this under some circumstances when using __int128 */ -#if HEDLEY_GCC_VERSION_CHECK(4, 8, 0) -#define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ \ - _Pragma("GCC diagnostic ignored \"-Wpedantic\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ -#endif - -/* MSVC doesn't like (__assume(0), code) and will warn about code being - * unreachable, but we want it there because not all compilers - * understand the unreachable macro and will complain if it is missing. - * I'm planning on adding a new macro to Hedley to handle this a bit - * more elegantly, but until then... */ -#if defined(HEDLEY_MSVC_VERSION) -#define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable : 4702)) -#else -#define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ -#endif - -/* This is a false positive from GCC in a few places. */ -#if HEDLEY_GCC_VERSION_CHECK(4, 7, 0) -#define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ \ - _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") -#else -#define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ -#endif - -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) -#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#else -#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ -#endif - -#define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ - SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ - SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ - SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ - SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ - SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ - SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ - SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ - SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ - SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ - SIMDE_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION_ \ - SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ - SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ - SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ - -#endif /* !defined(SIMDE_DIAGNOSTIC_H) */ diff --git a/libobs/util/simde/simde-features.h b/libobs/util/simde/simde-features.h deleted file mode 100644 index f6129129aa639a..00000000000000 --- a/libobs/util/simde/simde-features.h +++ /dev/null @@ -1,550 +0,0 @@ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2020 Evan Nemerson - */ - -/* simde-arch.h is used to determine which features are available according - to the compiler. However, we want to make it possible to forcibly enable - or disable APIs */ - -#if !defined(SIMDE_FEATURES_H) -#define SIMDE_FEATURES_H - -#include "simde-arch.h" -#include "simde-diagnostic.h" - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && \ - !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_X86_SVML) -#define SIMDE_X86_SVML_NATIVE -#endif -#endif -#if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) -#define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && \ - !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && \ - !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) -#define SIMDE_X86_AVX512VP2INTERSECT_NATIVE -#endif -#endif -#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && \ - !defined(SIMDE_X86_AVX512F_NATIVE) -#define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && \ - !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_X86_AVX512VBMI) -#define SIMDE_X86_AVX512VBMI_NATIVE -#endif -#endif -#if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) -#define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512CD_NATIVE) && \ - !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_X86_AVX512CD) -#define SIMDE_X86_AVX512CD_NATIVE -#endif -#endif -#if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) -#define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512DQ_NATIVE) && \ - !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_X86_AVX512DQ) -#define SIMDE_X86_AVX512DQ_NATIVE -#endif -#endif -#if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) -#define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512VL_NATIVE) && \ - !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_X86_AVX512VL) -#define SIMDE_X86_AVX512VL_NATIVE -#endif -#endif -#if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) -#define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512BW_NATIVE) && \ - !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_X86_AVX512BW) -#define SIMDE_X86_AVX512BW_NATIVE -#endif -#endif -#if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) -#define SIMDE_X86_AVX512F_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX512F_NATIVE) && \ - !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_X86_AVX512F) -#define SIMDE_X86_AVX512F_NATIVE -#endif -#endif -#if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) -#define SIMDE_X86_AVX2_NATIVE -#endif - -#if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && \ - !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_X86_FMA) -#define SIMDE_X86_FMA_NATIVE -#endif -#endif -#if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) -#define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && \ - !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_X86_AVX2) -#define SIMDE_X86_AVX2_NATIVE -#endif -#endif -#if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) -#define SIMDE_X86_AVX_NATIVE -#endif - -#if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && \ - !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_X86_AVX) -#define SIMDE_X86_AVX_NATIVE -#endif -#endif -#if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) -#define SIMDE_X86_SSE4_2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_2_NATIVE) && \ - !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_X86_SSE4_2) -#define SIMDE_X86_SSE4_2_NATIVE -#endif -#endif -#if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) -#define SIMDE_X86_SSE4_1_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE4_1_NATIVE) && \ - !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_X86_SSE4_1) -#define SIMDE_X86_SSE4_1_NATIVE -#endif -#endif -#if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) -#define SIMDE_X86_SSSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && \ - !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_X86_SSSE3) -#define SIMDE_X86_SSSE3_NATIVE -#endif -#endif -#if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) -#define SIMDE_X86_SSE3_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && \ - !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_X86_SSE3) -#define SIMDE_X86_SSE3_NATIVE -#endif -#endif -#if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) -#define SIMDE_X86_SSE2_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && \ - !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_X86_SSE2) -#define SIMDE_X86_SSE2_NATIVE -#endif -#endif -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) -#define SIMDE_X86_SSE_NATIVE -#endif - -#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && \ - !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_X86_SSE) -#define SIMDE_X86_SSE_NATIVE -#endif -#endif - -#if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && \ - !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_X86_MMX) -#define SIMDE_X86_MMX_NATIVE -#endif -#endif - -#if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && \ - !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_X86_GFNI) -#define SIMDE_X86_GFNI_NATIVE -#endif -#endif - -#if !defined(SIMDE_X86_PCLMUL_NATIVE) && \ - !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_X86_PCLMUL) -#define SIMDE_X86_PCLMUL_NATIVE -#endif -#endif - -#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && \ - !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_X86_VPCLMULQDQ) -#define SIMDE_X86_VPCLMULQDQ_NATIVE -#endif -#endif - -#if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && \ - !defined(SIMDE_NO_NATIVE) -#if defined(__INTEL_COMPILER) -#define SIMDE_X86_SVML_NATIVE -#endif -#endif - -#if defined(HEDLEY_MSVC_VERSION) -#pragma warning(push) -#pragma warning(disable : 4799) -#endif - -#if defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) -#include -#elif defined(SIMDE_X86_SSE4_2_NATIVE) -#include -#elif defined(SIMDE_X86_SSE4_1_NATIVE) -#include -#elif defined(SIMDE_X86_SSSE3_NATIVE) -#include -#elif defined(SIMDE_X86_SSE3_NATIVE) -#include -#elif defined(SIMDE_X86_SSE2_NATIVE) -#include -#elif defined(SIMDE_X86_SSE_NATIVE) -#include -#elif defined(SIMDE_X86_MMX_NATIVE) -#include -#endif - -#if defined(HEDLEY_MSVC_VERSION) -#pragma warning(pop) -#endif - -#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && \ - !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && \ - SIMDE_ARCH_ARM_CHECK(80) -#define SIMDE_ARM_NEON_A64V8_NATIVE -#endif -#endif -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && \ - !defined(SIMDE_ARM_NEON_A32V8_NATIVE) -#define SIMDE_ARM_NEON_A32V8_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \ - !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(80) && \ - (__ARM_NEON_FP & 0x02) -#define SIMDE_ARM_NEON_A32V8_NATIVE -#endif -#endif -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \ - !defined(SIMDE_ARM_NEON_A32V7_NATIVE) -#define SIMDE_ARM_NEON_A32V7_NATIVE -#endif - -#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(70) -#define SIMDE_ARM_NEON_A32V7_NATIVE -#endif -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -#include -#endif - -#if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && \ - !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_ARM_SVE) -#define SIMDE_ARM_SVE_NATIVE -#include -#endif -#endif - -#if !defined(SIMDE_WASM_SIMD128_NATIVE) && \ - !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_WASM_SIMD128) -#define SIMDE_WASM_SIMD128_NATIVE -#endif -#endif -#if defined(SIMDE_WASM_SIMD128_NATIVE) -#if !defined(__wasm_unimplemented_simd128__) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ -#define __wasm_unimplemented_simd128__ -HEDLEY_DIAGNOSTIC_POP -#endif -#include -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && \ - !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && \ - !defined(SIMDE_NO_NATIVE) -#if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) -#define SIMDE_POWER_ALTIVEC_P9_NATIVE -#endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) -#define SIMDE_POWER_ALTIVEC_P8_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ - !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && \ - !defined(SIMDE_NO_NATIVE) -#if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) -#define SIMDE_POWER_ALTIVEC_P8_NATIVE -#endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) -#define SIMDE_POWER_ALTIVEC_P7_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && \ - !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && \ - !defined(SIMDE_NO_NATIVE) -#if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) -#define SIMDE_POWER_ALTIVEC_P7_NATIVE -#endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) -#define SIMDE_POWER_ALTIVEC_P6_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && \ - !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && \ - !defined(SIMDE_NO_NATIVE) -#if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) -#define SIMDE_POWER_ALTIVEC_P6_NATIVE -#endif -#endif -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) -#define SIMDE_POWER_ALTIVEC_P5_NATIVE -#endif - -#if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && \ - !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && \ - !defined(SIMDE_NO_NATIVE) -#if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) -#define SIMDE_POWER_ALTIVEC_P5_NATIVE -#endif -#endif - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -/* AltiVec conflicts with lots of stuff. The bool keyword conflicts - * with the bool keyword in C++ and the bool macro in C99+ (defined - * in stdbool.h). The vector keyword conflicts with std::vector in - * C++ if you are `using std;`. - * - * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` - * instead, but altivec.h will unconditionally define - * `vector`/`bool`/`pixel` so we need to work around that. - * - * Unfortunately this means that if your code uses AltiVec directly - * it may break. If this is the case you'll want to define - * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even - * better, port your code to use the double-underscore versions. */ -#if defined(bool) -#undef bool -#endif - -#include - -#if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) -#if defined(vector) -#undef vector -#endif -#if defined(pixel) -#undef pixel -#endif -#if defined(bool) -#undef bool -#endif -#endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ - -/* Use these intsead of vector/pixel/bool in SIMDe. */ -#define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T -#define SIMDE_POWER_ALTIVEC_PIXEL __pixel -#define SIMDE_POWER_ALTIVEC_BOOL __bool - -/* Re-define bool if we're using stdbool.h */ -#if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && \ - !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) -#define bool _Bool -#endif -#endif - -#if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && \ - !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && \ - !defined(SIMDE_NO_NATIVE) -#if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) -#define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 -#endif -#endif -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) -#include -#endif - -/* This is used to determine whether or not to fall back on a vector - * function in an earlier ISA extensions, as well as whether - * we expected any attempts at vectorization to be fruitful or if we - * expect to always be running serial code. */ - -#if !defined(SIMDE_NATURAL_VECTOR_SIZE) -#if defined(SIMDE_X86_AVX512F_NATIVE) -#define SIMDE_NATURAL_VECTOR_SIZE (512) -#elif defined(SIMDE_X86_AVX_NATIVE) -#define SIMDE_NATURAL_VECTOR_SIZE (256) -#elif defined(SIMDE_X86_SSE_NATIVE) || defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ - defined(SIMDE_WASM_SIMD128_NATIVE) || \ - defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) -#define SIMDE_NATURAL_VECTOR_SIZE (128) -#endif - -#if !defined(SIMDE_NATURAL_VECTOR_SIZE) -#define SIMDE_NATURAL_VECTOR_SIZE (0) -#endif -#endif - -#define SIMDE_NATURAL_VECTOR_SIZE_LE(x) \ - ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) -#define SIMDE_NATURAL_VECTOR_SIZE_GE(x) \ - ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) - -/* Native aliases */ -#if defined(SIMDE_ENABLE_NATIVE_ALIASES) -#if !defined(SIMDE_X86_MMX_NATIVE) -#define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES -#endif -#if !defined(SIMDE_X86_SSE_NATIVE) -#define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES -#endif -#if !defined(SIMDE_X86_SSE2_NATIVE) -#define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES -#endif -#if !defined(SIMDE_X86_SSE3_NATIVE) -#define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES -#endif -#if !defined(SIMDE_X86_SSSE3_NATIVE) -#define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES -#endif -#if !defined(SIMDE_X86_SSE4_1_NATIVE) -#define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES -#endif -#if !defined(SIMDE_X86_SSE4_2_NATIVE) -#define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES -#endif -#if !defined(SIMDE_X86_AVX_NATIVE) -#define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES -#endif -#if !defined(SIMDE_X86_AVX2_NATIVE) -#define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES -#endif -#if !defined(SIMDE_X86_FMA_NATIVE) -#define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES -#endif -#if !defined(SIMDE_X86_AVX512F_NATIVE) -#define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES -#endif -#if !defined(SIMDE_X86_AVX512VL_NATIVE) -#define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES -#endif -#if !defined(SIMDE_X86_AVX512BW_NATIVE) -#define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES -#endif -#if !defined(SIMDE_X86_AVX512DQ_NATIVE) -#define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES -#endif -#if !defined(SIMDE_X86_AVX512CD_NATIVE) -#define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES -#endif -#if !defined(SIMDE_X86_GFNI_NATIVE) -#define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES -#endif -#if !defined(SIMDE_X86_PCLMUL_NATIVE) -#define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES -#endif -#if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) -#define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES -#endif - -#if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) -#define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES -#endif -#if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) -#define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES -#endif -#if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) -#define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES -#endif -#endif - -/* Are floating point values stored using IEEE 754? Knowing - * this at during preprocessing is a bit tricky, mostly because what - * we're curious about is how values are stored and not whether the - * implementation is fully conformant in terms of rounding, NaN - * handling, etc. - * - * For example, if you use -ffast-math or -Ofast on - * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 - * support is not advertised (by defining __STDC_IEC_559__). - * - * However, what we care about is whether it is safe to assume that - * floating point values are stored in IEEE 754 format, in which case - * we can provide faster implementations of some functions. - * - * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- - * so we just assume IEEE 754 for now. There is a test which verifies - * this, if that test fails sowewhere please let us know and we'll add - * an exception for that platform. Meanwhile, you can define - * SIMDE_NO_IEEE754_STORAGE. */ -#if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) -#define SIMDE_IEEE754_STORAGE -#endif - -#endif /* !defined(SIMDE_FEATURES_H) */ diff --git a/libobs/util/simde/simde-math.h b/libobs/util/simde/simde-math.h deleted file mode 100644 index 5e3ed5e8398e36..00000000000000 --- a/libobs/util/simde/simde-math.h +++ /dev/null @@ -1,1858 +0,0 @@ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -/* Attempt to find math functions. Functions may be in , - * , compiler built-ins/intrinsics, or platform/architecture - * specific headers. In some cases, especially those not built in to - * libm, we may need to define our own implementations. */ - -#if !defined(SIMDE_MATH_H) - -#include "hedley.h" -#include "simde-features.h" - -#include -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) -#include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -/* SLEEF support - * https://sleef.org/ - * - * If you include prior to including SIMDe, SIMDe will use - * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to - * including SIMDe to force the issue. - * - * Note that SLEEF does requires linking to libsleef. - * - * By default, SIMDe will use the 1 ULP functions, but if you use - * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is - * only the case for the simde_math_* functions; for code in other - * SIMDe headers which calls SLEEF directly we may use functions with - * greater error if the API we're implementing is less precise (for - * example, SVML guarantees 4 ULP, so we will generally use the 3.5 - * ULP functions from SLEEF). */ -#if !defined(SIMDE_MATH_SLEEF_DISABLE) -#if defined(__SLEEF_H__) -#define SIMDE_MATH_SLEEF_ENABLE -#endif -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ -#include -HEDLEY_DIAGNOSTIC_POP -#endif - -#if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) -#if defined(SLEEF_VERSION_MAJOR) -#define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, \ - SLEEF_VERSION_PATCHLEVEL) >= \ - HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else -#define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) \ - (HEDLEY_VERSION_ENCODE(3, 0, 0) >= \ - HEDLEY_VERSION_ENCODE(major, minor, patch)) -#endif -#else -#define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) -#endif - -#if defined(__has_builtin) -#define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) -#elif HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || HEDLEY_GCC_VERSION_CHECK(4, 4, 0) -#define SIMDE_MATH_BUILTIN_LIBM(func) (1) -#else -#define SIMDE_MATH_BUILTIN_LIBM(func) (0) -#endif - -#if defined(HUGE_VAL) -/* Looks like or has already been included. */ - -/* The math.h from libc++ (yes, the C header from the C++ standard - * library) will define an isnan function, but not an isnan macro - * like the C standard requires. So we detect the header guards - * macro libc++ uses. */ -#if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) -#define SIMDE_MATH_HAVE_MATH_H -#elif defined(__cplusplus) -#define SIMDE_MATH_HAVE_CMATH -#endif -#elif defined(__has_include) -#if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() -#define SIMDE_MATH_HAVE_CMATH -#include -#elif __has_include() -#define SIMDE_MATH_HAVE_MATH_H -#include -#elif !defined(SIMDE_MATH_NO_LIBM) -#define SIMDE_MATH_NO_LIBM -#endif -#elif !defined(SIMDE_MATH_NO_LIBM) -#if defined(__cplusplus) && (__cplusplus >= 201103L) -#define SIMDE_MATH_HAVE_CMATH -HEDLEY_DIAGNOSTIC_PUSH -#if defined(HEDLEY_MSVC_VERSION) -/* VS 14 emits this diagnostic about noexcept being used on a - * function, which we can't do anything about. */ -#pragma warning(disable : 4996) -#endif -#include -HEDLEY_DIAGNOSTIC_POP -#else -#define SIMDE_MATH_HAVE_MATH_H -#include -#endif -#endif - -/* Try to avoid including since it pulls in a *lot* of code. */ -#if HEDLEY_HAS_BUILTIN(__builtin_creal) || \ - HEDLEY_GCC_VERSION_CHECK(4, 7, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ -typedef __complex__ float simde_cfloat32; -typedef __complex__ double simde_cfloat64; -HEDLEY_DIAGNOSTIC_POP -#define SIMDE_MATH_CMPLX(x, y) \ - (HEDLEY_STATIC_CAST(double, x) + \ - HEDLEY_STATIC_CAST(double, y) * (__extension__ 1.0j)) -#define SIMDE_MATH_CMPLXF(x, y) \ - (HEDLEY_STATIC_CAST(float, x) + \ - HEDLEY_STATIC_CAST(float, y) * (__extension__ 1.0fj)) - -#if !defined(simde_math_creal) -#define simde_math_crealf(z) __builtin_crealf(z) -#endif -#if !defined(simde_math_crealf) -#define simde_math_creal(z) __builtin_creal(z) -#endif -#if !defined(simde_math_cimag) -#define simde_math_cimagf(z) __builtin_cimagf(z) -#endif -#if !defined(simde_math_cimagf) -#define simde_math_cimag(z) __builtin_cimag(z) -#endif -#elif !defined(__cplusplus) -#include - -#if !defined(HEDLEY_MSVC_VERSION) -typedef float _Complex simde_cfloat32; -typedef double _Complex simde_cfloat64; -#else -typedef _Fcomplex simde_cfloat32; -typedef _Dcomplex simde_cfloat64; -#endif - -#if defined(HEDLEY_MSVC_VERSION) -#define SIMDE_MATH_CMPLX(x, y) ((simde_cfloat64){(x), (y)}) -#define SIMDE_MATH_CMPLXF(x, y) ((simde_cfloat32){(x), (y)}) -#elif defined(CMPLX) && defined(CMPLXF) -#define SIMDE_MATH_CMPLX(x, y) CMPLX(x, y) -#define SIMDE_MATH_CMPLXF(x, y) CMPLXF(x, y) -#else -#define SIMDE_MATH_CMPLX(x, y) \ - (HEDLEY_STATIC_CAST(double, x) + HEDLEY_STATIC_CAST(double, y) * I) -#define SIMDE_MATH_CMPLXF(x, y) \ - (HEDLEY_STATIC_CAST(float, x) + HEDLEY_STATIC_CAST(float, y) * I) -#endif - -#if !defined(simde_math_creal) -#define simde_math_creal(z) creal(z) -#endif -#if !defined(simde_math_crealf) -#define simde_math_crealf(z) crealf(z) -#endif -#if !defined(simde_math_cimag) -#define simde_math_cimag(z) cimag(z) -#endif -#if !defined(simde_math_cimagf) -#define simde_math_cimagf(z) cimagf(z) -#endif -#else -HEDLEY_DIAGNOSTIC_PUSH -#if defined(HEDLEY_MSVC_VERSION) -#pragma warning(disable : 4530) -#endif -#include -HEDLEY_DIAGNOSTIC_POP - -typedef std::complex simde_cfloat32; -typedef std::complex simde_cfloat64; -#define SIMDE_MATH_CMPLX(x, y) (std::complex(x, y)) -#define SIMDE_MATH_CMPLXF(x, y) (std::complex(x, y)) - -#if !defined(simde_math_creal) -#define simde_math_creal(z) ((z).real()) -#endif -#if !defined(simde_math_crealf) -#define simde_math_crealf(z) ((z).real()) -#endif -#if !defined(simde_math_cimag) -#define simde_math_cimag(z) ((z).imag()) -#endif -#if !defined(simde_math_cimagf) -#define simde_math_cimagf(z) ((z).imag()) -#endif -#endif - -#if !defined(SIMDE_MATH_INFINITY) -#if HEDLEY_HAS_BUILTIN(__builtin_inf) || HEDLEY_GCC_VERSION_CHECK(3, 3, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \ - HEDLEY_CRAY_VERSION_CHECK(8, 1, 0) -#define SIMDE_MATH_INFINITY (__builtin_inf()) -#elif defined(INFINITY) -#define SIMDE_MATH_INFINITY INFINITY -#endif -#endif - -#if !defined(SIMDE_INFINITYF) -#if HEDLEY_HAS_BUILTIN(__builtin_inff) || HEDLEY_GCC_VERSION_CHECK(3, 3, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_CRAY_VERSION_CHECK(8, 1, 0) || \ - HEDLEY_IBM_VERSION_CHECK(13, 1, 0) -#define SIMDE_MATH_INFINITYF (__builtin_inff()) -#elif defined(INFINITYF) -#define SIMDE_MATH_INFINITYF INFINITYF -#elif defined(SIMDE_MATH_INFINITY) -#define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) -#endif -#endif - -#if !defined(SIMDE_MATH_NAN) -#if HEDLEY_HAS_BUILTIN(__builtin_nan) || HEDLEY_GCC_VERSION_CHECK(3, 3, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \ - HEDLEY_CRAY_VERSION_CHECK(8, 1, 0) || \ - HEDLEY_IBM_VERSION_CHECK(13, 1, 0) -#define SIMDE_MATH_NAN (__builtin_nan("")) -#elif defined(NAN) -#define SIMDE_MATH_NAN NAN -#endif -#endif - -#if !defined(SIMDE_NANF) -#if HEDLEY_HAS_BUILTIN(__builtin_nanf) || HEDLEY_GCC_VERSION_CHECK(3, 3, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \ - HEDLEY_CRAY_VERSION_CHECK(8, 1, 0) -#define SIMDE_MATH_NANF (__builtin_nanf("")) -#elif defined(NANF) -#define SIMDE_MATH_NANF NANF -#elif defined(SIMDE_MATH_NAN) -#define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) -#endif -#endif - -#if !defined(SIMDE_MATH_PI) -#if defined(M_PI) -#define SIMDE_MATH_PI M_PI -#else -#define SIMDE_MATH_PI 3.14159265358979323846 -#endif -#endif - -#if !defined(SIMDE_MATH_PIF) -#if defined(M_PI) -#define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) -#else -#define SIMDE_MATH_PIF 3.14159265358979323846f -#endif -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180) -#define SIMDE_MATH_PI_OVER_180 \ - 0.0174532925199432957692369076848861271344287188854172545609719144 -#endif - -#if !defined(SIMDE_MATH_PI_OVER_180F) -#define SIMDE_MATH_PI_OVER_180F \ - 0.0174532925199432957692369076848861271344287188854172545609719144f -#endif - -#if !defined(SIMDE_MATH_180_OVER_PI) -#define SIMDE_MATH_180_OVER_PI \ - 57.295779513082320876798154814105170332405472466564321549160243861 -#endif - -#if !defined(SIMDE_MATH_180_OVER_PIF) -#define SIMDE_MATH_180_OVER_PIF \ - 57.295779513082320876798154814105170332405472466564321549160243861f -#endif - -#if !defined(SIMDE_MATH_FLT_MIN) -#if defined(FLT_MIN) -#define SIMDE_MATH_FLT_MIN FLT_MIN -#elif defined(__FLT_MIN__) -#define SIMDE_MATH_FLT_MIN __FLT_MIN__ -#elif defined(__cplusplus) -#include -#define SIMDE_MATH_FLT_MIN FLT_MIN -#else -#include -#define SIMDE_MATH_FLT_MIN FLT_MIN -#endif -#endif - -#if !defined(SIMDE_MATH_DBL_MIN) -#if defined(DBL_MIN) -#define SIMDE_MATH_DBL_MIN DBL_MIN -#elif defined(__DBL_MIN__) -#define SIMDE_MATH_DBL_MIN __DBL_MIN__ -#elif defined(__cplusplus) -#include -#define SIMDE_MATH_DBL_MIN DBL_MIN -#else -#include -#define SIMDE_MATH_DBL_MIN DBL_MIN -#endif -#endif - -/*** Classification macros from C99 ***/ - -#if !defined(simde_math_isinf) -#if SIMDE_MATH_BUILTIN_LIBM(isinf) -#define simde_math_isinf(v) __builtin_isinf(v) -#elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_isinf(v) isinf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_isinf(v) std::isinf(v) -#endif -#endif - -#if !defined(simde_math_isinff) -#if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) -#define simde_math_isinff(v) __builtin_isinff(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_isinff(v) std::isinf(v) -#elif defined(simde_math_isinf) -#define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) -#endif -#endif - -#if !defined(simde_math_isnan) -#if SIMDE_MATH_BUILTIN_LIBM(isnan) -#define simde_math_isnan(v) __builtin_isnan(v) -#elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_isnan(v) isnan(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_isnan(v) std::isnan(v) -#endif -#endif - -#if !defined(simde_math_isnanf) -#if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) -/* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ -#define simde_math_isnanf(v) __builtin_isnanf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_isnanf(v) std::isnan(v) -#elif defined(simde_math_isnan) -#define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) -#endif -#endif - -#if !defined(simde_math_isnormal) -#if SIMDE_MATH_BUILTIN_LIBM(isnormal) -#define simde_math_isnormal(v) __builtin_isnormal(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_isnormal(v) isnormal(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_isnormal(v) std::isnormal(v) -#endif -#endif - -#if !defined(simde_math_isnormalf) -#if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) -#define simde_math_isnormalf(v) __builtin_isnormalf(v) -#elif SIMDE_MATH_BUILTIN_LIBM(isnormal) -#define simde_math_isnormalf(v) __builtin_isnormal(v) -#elif defined(isnormalf) -#define simde_math_isnormalf(v) isnormalf(v) -#elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_isnormalf(v) isnormal(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_isnormalf(v) std::isnormal(v) -#elif defined(simde_math_isnormal) -#define simde_math_isnormalf(v) simde_math_isnormal(v) -#endif -#endif - -/*** Manipulation functions ***/ - -#if !defined(simde_math_nextafter) -#if (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && \ - !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \ - HEDLEY_GCC_VERSION_CHECK(3, 4, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) -#define simde_math_nextafter(x, y) __builtin_nextafter(x, y) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_nextafter(x, y) std::nextafter(x, y) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_nextafter(x, y) nextafter(x, y) -#endif -#endif - -#if !defined(simde_math_nextafterf) -#if (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && \ - !defined(HEDLEY_IBM_VERSION)) || \ - HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \ - HEDLEY_GCC_VERSION_CHECK(3, 4, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) -#define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_nextafterf(x, y) std::nextafter(x, y) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_nextafterf(x, y) nextafterf(x, y) -#endif -#endif - -/*** Functions from C99 ***/ - -#if !defined(simde_math_abs) -#if SIMDE_MATH_BUILTIN_LIBM(abs) -#define simde_math_abs(v) __builtin_abs(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_abs(v) std::abs(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_abs(v) abs(v) -#endif -#endif - -#if !defined(simde_math_fabsf) -#if SIMDE_MATH_BUILTIN_LIBM(fabsf) -#define simde_math_fabsf(v) __builtin_fabsf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_fabsf(v) std::abs(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_fabsf(v) fabsf(v) -#endif -#endif - -#if !defined(simde_math_acos) -#if SIMDE_MATH_BUILTIN_LIBM(acos) -#define simde_math_acos(v) __builtin_acos(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_acos(v) std::acos(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_acos(v) acos(v) -#endif -#endif - -#if !defined(simde_math_acosf) -#if SIMDE_MATH_BUILTIN_LIBM(acosf) -#define simde_math_acosf(v) __builtin_acosf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_acosf(v) std::acos(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_acosf(v) acosf(v) -#endif -#endif - -#if !defined(simde_math_acosh) -#if SIMDE_MATH_BUILTIN_LIBM(acosh) -#define simde_math_acosh(v) __builtin_acosh(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_acosh(v) std::acosh(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_acosh(v) acosh(v) -#endif -#endif - -#if !defined(simde_math_acoshf) -#if SIMDE_MATH_BUILTIN_LIBM(acoshf) -#define simde_math_acoshf(v) __builtin_acoshf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_acoshf(v) std::acosh(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_acoshf(v) acoshf(v) -#endif -#endif - -#if !defined(simde_math_asin) -#if SIMDE_MATH_BUILTIN_LIBM(asin) -#define simde_math_asin(v) __builtin_asin(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_asin(v) std::asin(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_asin(v) asin(v) -#endif -#endif - -#if !defined(simde_math_asinf) -#if SIMDE_MATH_BUILTIN_LIBM(asinf) -#define simde_math_asinf(v) __builtin_asinf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_asinf(v) std::asin(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_asinf(v) asinf(v) -#endif -#endif - -#if !defined(simde_math_asinh) -#if SIMDE_MATH_BUILTIN_LIBM(asinh) -#define simde_math_asinh(v) __builtin_asinh(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_asinh(v) std::asinh(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_asinh(v) asinh(v) -#endif -#endif - -#if !defined(simde_math_asinhf) -#if SIMDE_MATH_BUILTIN_LIBM(asinhf) -#define simde_math_asinhf(v) __builtin_asinhf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_asinhf(v) std::asinh(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_asinhf(v) asinhf(v) -#endif -#endif - -#if !defined(simde_math_atan) -#if SIMDE_MATH_BUILTIN_LIBM(atan) -#define simde_math_atan(v) __builtin_atan(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_atan(v) std::atan(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_atan(v) atan(v) -#endif -#endif - -#if !defined(simde_math_atan2) -#if SIMDE_MATH_BUILTIN_LIBM(atan2) -#define simde_math_atan2(y, x) __builtin_atan2(y, x) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_atan2(y, x) std::atan2(y, x) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_atan2(y, x) atan2(y, x) -#endif -#endif - -#if !defined(simde_math_atan2f) -#if SIMDE_MATH_BUILTIN_LIBM(atan2f) -#define simde_math_atan2f(y, x) __builtin_atan2f(y, x) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_atan2f(y, x) std::atan2(y, x) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_atan2f(y, x) atan2f(y, x) -#endif -#endif - -#if !defined(simde_math_atanf) -#if SIMDE_MATH_BUILTIN_LIBM(atanf) -#define simde_math_atanf(v) __builtin_atanf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_atanf(v) std::atan(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_atanf(v) atanf(v) -#endif -#endif - -#if !defined(simde_math_atanh) -#if SIMDE_MATH_BUILTIN_LIBM(atanh) -#define simde_math_atanh(v) __builtin_atanh(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_atanh(v) std::atanh(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_atanh(v) atanh(v) -#endif -#endif - -#if !defined(simde_math_atanhf) -#if SIMDE_MATH_BUILTIN_LIBM(atanhf) -#define simde_math_atanhf(v) __builtin_atanhf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_atanhf(v) std::atanh(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_atanhf(v) atanhf(v) -#endif -#endif - -#if !defined(simde_math_cbrt) -#if SIMDE_MATH_BUILTIN_LIBM(cbrt) -#define simde_math_cbrt(v) __builtin_cbrt(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_cbrt(v) std::cbrt(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_cbrt(v) cbrt(v) -#endif -#endif - -#if !defined(simde_math_cbrtf) -#if SIMDE_MATH_BUILTIN_LIBM(cbrtf) -#define simde_math_cbrtf(v) __builtin_cbrtf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_cbrtf(v) std::cbrt(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_cbrtf(v) cbrtf(v) -#endif -#endif - -#if !defined(simde_math_ceil) -#if SIMDE_MATH_BUILTIN_LIBM(ceil) -#define simde_math_ceil(v) __builtin_ceil(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_ceil(v) std::ceil(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_ceil(v) ceil(v) -#endif -#endif - -#if !defined(simde_math_ceilf) -#if SIMDE_MATH_BUILTIN_LIBM(ceilf) -#define simde_math_ceilf(v) __builtin_ceilf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_ceilf(v) std::ceil(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_ceilf(v) ceilf(v) -#endif -#endif - -#if !defined(simde_math_copysign) -#if SIMDE_MATH_BUILTIN_LIBM(copysign) -#define simde_math_copysign(x, y) __builtin_copysign(x, y) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_copysign(x, y) std::copysign(x, y) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_copysign(x, y) copysign(x, y) -#endif -#endif - -#if !defined(simde_math_copysignf) -#if SIMDE_MATH_BUILTIN_LIBM(copysignf) -#define simde_math_copysignf(x, y) __builtin_copysignf(x, y) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_copysignf(x, y) std::copysignf(x, y) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_copysignf(x, y) copysignf(x, y) -#endif -#endif - -#if !defined(simde_math_cos) -#if SIMDE_MATH_BUILTIN_LIBM(cos) -#define simde_math_cos(v) __builtin_cos(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_cos(v) std::cos(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_cos(v) cos(v) -#endif -#endif - -#if !defined(simde_math_cosf) -#if defined(SIMDE_MATH_SLEEF_ENABLE) -#if SIMDE_ACCURACY_PREFERENCE < 1 -#define simde_math_cosf(v) Sleef_cosf_u35(v) -#else -#define simde_math_cosf(v) Sleef_cosf_u10(v) -#endif -#elif SIMDE_MATH_BUILTIN_LIBM(cosf) -#define simde_math_cosf(v) __builtin_cosf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_cosf(v) std::cos(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_cosf(v) cosf(v) -#endif -#endif - -#if !defined(simde_math_cosh) -#if SIMDE_MATH_BUILTIN_LIBM(cosh) -#define simde_math_cosh(v) __builtin_cosh(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_cosh(v) std::cosh(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_cosh(v) cosh(v) -#endif -#endif - -#if !defined(simde_math_coshf) -#if SIMDE_MATH_BUILTIN_LIBM(coshf) -#define simde_math_coshf(v) __builtin_coshf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_coshf(v) std::cosh(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_coshf(v) coshf(v) -#endif -#endif - -#if !defined(simde_math_erf) -#if SIMDE_MATH_BUILTIN_LIBM(erf) -#define simde_math_erf(v) __builtin_erf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_erf(v) std::erf(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_erf(v) erf(v) -#endif -#endif - -#if !defined(simde_math_erff) -#if SIMDE_MATH_BUILTIN_LIBM(erff) -#define simde_math_erff(v) __builtin_erff(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_erff(v) std::erf(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_erff(v) erff(v) -#endif -#endif - -#if !defined(simde_math_erfc) -#if SIMDE_MATH_BUILTIN_LIBM(erfc) -#define simde_math_erfc(v) __builtin_erfc(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_erfc(v) std::erfc(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_erfc(v) erfc(v) -#endif -#endif - -#if !defined(simde_math_erfcf) -#if SIMDE_MATH_BUILTIN_LIBM(erfcf) -#define simde_math_erfcf(v) __builtin_erfcf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_erfcf(v) std::erfc(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_erfcf(v) erfcf(v) -#endif -#endif - -#if !defined(simde_math_exp) -#if SIMDE_MATH_BUILTIN_LIBM(exp) -#define simde_math_exp(v) __builtin_exp(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_exp(v) std::exp(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_exp(v) exp(v) -#endif -#endif - -#if !defined(simde_math_expf) -#if SIMDE_MATH_BUILTIN_LIBM(expf) -#define simde_math_expf(v) __builtin_expf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_expf(v) std::exp(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_expf(v) expf(v) -#endif -#endif - -#if !defined(simde_math_expm1) -#if SIMDE_MATH_BUILTIN_LIBM(expm1) -#define simde_math_expm1(v) __builtin_expm1(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_expm1(v) std::expm1(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_expm1(v) expm1(v) -#endif -#endif - -#if !defined(simde_math_expm1f) -#if SIMDE_MATH_BUILTIN_LIBM(expm1f) -#define simde_math_expm1f(v) __builtin_expm1f(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_expm1f(v) std::expm1(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_expm1f(v) expm1f(v) -#endif -#endif - -#if !defined(simde_math_exp2) -#if SIMDE_MATH_BUILTIN_LIBM(exp2) -#define simde_math_exp2(v) __builtin_exp2(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_exp2(v) std::exp2(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_exp2(v) exp2(v) -#endif -#endif - -#if !defined(simde_math_exp2f) -#if SIMDE_MATH_BUILTIN_LIBM(exp2f) -#define simde_math_exp2f(v) __builtin_exp2f(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_exp2f(v) std::exp2(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_exp2f(v) exp2f(v) -#endif -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3, 4, 0) -#define simde_math_exp10(v) __builtin_exp10(v) -#else -#define simde_math_exp10(v) pow(10.0, (v)) -#endif - -#if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3, 4, 0) -#define simde_math_exp10f(v) __builtin_exp10f(v) -#else -#define simde_math_exp10f(v) powf(10.0f, (v)) -#endif - -#if !defined(simde_math_fabs) -#if SIMDE_MATH_BUILTIN_LIBM(fabs) -#define simde_math_fabs(v) __builtin_fabs(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_fabs(v) std::fabs(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_fabs(v) fabs(v) -#endif -#endif - -#if !defined(simde_math_fabsf) -#if SIMDE_MATH_BUILTIN_LIBM(fabsf) -#define simde_math_fabsf(v) __builtin_fabsf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_fabsf(v) std::fabs(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_fabsf(v) fabsf(v) -#endif -#endif - -#if !defined(simde_math_floor) -#if SIMDE_MATH_BUILTIN_LIBM(floor) -#define simde_math_floor(v) __builtin_floor(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_floor(v) std::floor(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_floor(v) floor(v) -#endif -#endif - -#if !defined(simde_math_floorf) -#if SIMDE_MATH_BUILTIN_LIBM(floorf) -#define simde_math_floorf(v) __builtin_floorf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_floorf(v) std::floor(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_floorf(v) floorf(v) -#endif -#endif - -#if !defined(simde_math_fma) -#if SIMDE_MATH_BUILTIN_LIBM(fma) -#define simde_math_fma(x, y, z) __builtin_fma(x, y, z) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_fma(x, y, z) std::fma(x, y, z) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_fma(x, y, z) fma(x, y, z) -#endif -#endif - -#if !defined(simde_math_fmaf) -#if SIMDE_MATH_BUILTIN_LIBM(fmaf) -#define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_fmaf(x, y, z) std::fma(x, y, z) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_fmaf(x, y, z) fmaf(x, y, z) -#endif -#endif - -#if !defined(simde_math_fmax) -#if SIMDE_MATH_BUILTIN_LIBM(fmax) -#define simde_math_fmax(x, y, z) __builtin_fmax(x, y, z) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_fmax(x, y, z) std::fmax(x, y, z) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_fmax(x, y, z) fmax(x, y, z) -#endif -#endif - -#if !defined(simde_math_fmaxf) -#if SIMDE_MATH_BUILTIN_LIBM(fmaxf) -#define simde_math_fmaxf(x, y, z) __builtin_fmaxf(x, y, z) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_fmaxf(x, y, z) std::fmax(x, y, z) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_fmaxf(x, y, z) fmaxf(x, y, z) -#endif -#endif - -#if !defined(simde_math_hypot) -#if SIMDE_MATH_BUILTIN_LIBM(hypot) -#define simde_math_hypot(y, x) __builtin_hypot(y, x) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_hypot(y, x) std::hypot(y, x) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_hypot(y, x) hypot(y, x) -#endif -#endif - -#if !defined(simde_math_hypotf) -#if SIMDE_MATH_BUILTIN_LIBM(hypotf) -#define simde_math_hypotf(y, x) __builtin_hypotf(y, x) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_hypotf(y, x) std::hypot(y, x) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_hypotf(y, x) hypotf(y, x) -#endif -#endif - -#if !defined(simde_math_log) -#if SIMDE_MATH_BUILTIN_LIBM(log) -#define simde_math_log(v) __builtin_log(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_log(v) std::log(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_log(v) log(v) -#endif -#endif - -#if !defined(simde_math_logf) -#if SIMDE_MATH_BUILTIN_LIBM(logf) -#define simde_math_logf(v) __builtin_logf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_logf(v) std::log(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_logf(v) logf(v) -#endif -#endif - -#if !defined(simde_math_logb) -#if SIMDE_MATH_BUILTIN_LIBM(logb) -#define simde_math_logb(v) __builtin_logb(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_logb(v) std::logb(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_logb(v) logb(v) -#endif -#endif - -#if !defined(simde_math_logbf) -#if SIMDE_MATH_BUILTIN_LIBM(logbf) -#define simde_math_logbf(v) __builtin_logbf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_logbf(v) std::logb(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_logbf(v) logbf(v) -#endif -#endif - -#if !defined(simde_math_log1p) -#if SIMDE_MATH_BUILTIN_LIBM(log1p) -#define simde_math_log1p(v) __builtin_log1p(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_log1p(v) std::log1p(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_log1p(v) log1p(v) -#endif -#endif - -#if !defined(simde_math_log1pf) -#if SIMDE_MATH_BUILTIN_LIBM(log1pf) -#define simde_math_log1pf(v) __builtin_log1pf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_log1pf(v) std::log1p(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_log1pf(v) log1pf(v) -#endif -#endif - -#if !defined(simde_math_log2) -#if SIMDE_MATH_BUILTIN_LIBM(log2) -#define simde_math_log2(v) __builtin_log2(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_log2(v) std::log2(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_log2(v) log2(v) -#endif -#endif - -#if !defined(simde_math_log2f) -#if SIMDE_MATH_BUILTIN_LIBM(log2f) -#define simde_math_log2f(v) __builtin_log2f(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_log2f(v) std::log2(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_log2f(v) log2f(v) -#endif -#endif - -#if !defined(simde_math_log10) -#if SIMDE_MATH_BUILTIN_LIBM(log10) -#define simde_math_log10(v) __builtin_log10(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_log10(v) std::log10(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_log10(v) log10(v) -#endif -#endif - -#if !defined(simde_math_log10f) -#if SIMDE_MATH_BUILTIN_LIBM(log10f) -#define simde_math_log10f(v) __builtin_log10f(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_log10f(v) std::log10(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_log10f(v) log10f(v) -#endif -#endif - -#if !defined(simde_math_modf) -#if SIMDE_MATH_BUILTIN_LIBM(modf) -#define simde_math_modf(x, iptr) __builtin_modf(x, iptr) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_modf(x, iptr) std::modf(x, iptr) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_modf(x, iptr) modf(x, iptr) -#endif -#endif - -#if !defined(simde_math_modff) -#if SIMDE_MATH_BUILTIN_LIBM(modff) -#define simde_math_modff(x, iptr) __builtin_modff(x, iptr) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_modff(x, iptr) std::modf(x, iptr) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_modff(x, iptr) modff(x, iptr) -#endif -#endif - -#if !defined(simde_math_nearbyint) -#if SIMDE_MATH_BUILTIN_LIBM(nearbyint) -#define simde_math_nearbyint(v) __builtin_nearbyint(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_nearbyint(v) std::nearbyint(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_nearbyint(v) nearbyint(v) -#endif -#endif - -#if !defined(simde_math_nearbyintf) -#if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) -#define simde_math_nearbyintf(v) __builtin_nearbyintf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_nearbyintf(v) std::nearbyint(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_nearbyintf(v) nearbyintf(v) -#endif -#endif - -#if !defined(simde_math_pow) -#if SIMDE_MATH_BUILTIN_LIBM(pow) -#define simde_math_pow(y, x) __builtin_pow(y, x) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_pow(y, x) std::pow(y, x) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_pow(y, x) pow(y, x) -#endif -#endif - -#if !defined(simde_math_powf) -#if SIMDE_MATH_BUILTIN_LIBM(powf) -#define simde_math_powf(y, x) __builtin_powf(y, x) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_powf(y, x) std::pow(y, x) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_powf(y, x) powf(y, x) -#endif -#endif - -#if !defined(simde_math_rint) -#if SIMDE_MATH_BUILTIN_LIBM(rint) -#define simde_math_rint(v) __builtin_rint(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_rint(v) std::rint(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_rint(v) rint(v) -#endif -#endif - -#if !defined(simde_math_rintf) -#if SIMDE_MATH_BUILTIN_LIBM(rintf) -#define simde_math_rintf(v) __builtin_rintf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_rintf(v) std::rint(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_rintf(v) rintf(v) -#endif -#endif - -#if !defined(simde_math_round) -#if SIMDE_MATH_BUILTIN_LIBM(round) -#define simde_math_round(v) __builtin_round(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_round(v) std::round(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_round(v) round(v) -#endif -#endif - -#if !defined(simde_math_roundf) -#if SIMDE_MATH_BUILTIN_LIBM(roundf) -#define simde_math_roundf(v) __builtin_roundf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_roundf(v) std::round(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_roundf(v) roundf(v) -#endif -#endif - -#if !defined(simde_math_roundeven) -#if HEDLEY_HAS_BUILTIN(__builtin_roundeven) || \ - HEDLEY_GCC_VERSION_CHECK(10, 0, 0) -#define simde_math_roundeven(v) __builtin_roundeven(v) -#elif defined(simde_math_round) && defined(simde_math_fabs) -static HEDLEY_INLINE double simde_math_roundeven(double v) -{ - double rounded = simde_math_round(v); - double diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && - (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; -} -#define simde_math_roundeven simde_math_roundeven -#endif -#endif - -#if !defined(simde_math_roundevenf) -#if HEDLEY_HAS_BUILTIN(__builtin_roundevenf) || \ - HEDLEY_GCC_VERSION_CHECK(10, 0, 0) -#define simde_math_roundevenf(v) __builtin_roundevenf(v) -#elif defined(simde_math_roundf) && defined(simde_math_fabsf) -static HEDLEY_INLINE float simde_math_roundevenf(float v) -{ - float rounded = simde_math_roundf(v); - float diff = rounded - v; - if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && - (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { - rounded = v - diff; - } - return rounded; -} -#define simde_math_roundevenf simde_math_roundevenf -#endif -#endif - -#if !defined(simde_math_sin) -#if SIMDE_MATH_BUILTIN_LIBM(sin) -#define simde_math_sin(v) __builtin_sin(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_sin(v) std::sin(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_sin(v) sin(v) -#endif -#endif - -#if !defined(simde_math_sinf) -#if SIMDE_MATH_BUILTIN_LIBM(sinf) -#define simde_math_sinf(v) __builtin_sinf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_sinf(v) std::sin(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_sinf(v) sinf(v) -#endif -#endif - -#if !defined(simde_math_sinh) -#if SIMDE_MATH_BUILTIN_LIBM(sinh) -#define simde_math_sinh(v) __builtin_sinh(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_sinh(v) std::sinh(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_sinh(v) sinh(v) -#endif -#endif - -#if !defined(simde_math_sinhf) -#if SIMDE_MATH_BUILTIN_LIBM(sinhf) -#define simde_math_sinhf(v) __builtin_sinhf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_sinhf(v) std::sinh(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_sinhf(v) sinhf(v) -#endif -#endif - -#if !defined(simde_math_sqrt) -#if SIMDE_MATH_BUILTIN_LIBM(sqrt) -#define simde_math_sqrt(v) __builtin_sqrt(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_sqrt(v) std::sqrt(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_sqrt(v) sqrt(v) -#endif -#endif - -#if !defined(simde_math_sqrtf) -#if SIMDE_MATH_BUILTIN_LIBM(sqrtf) -#define simde_math_sqrtf(v) __builtin_sqrtf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_sqrtf(v) std::sqrt(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_sqrtf(v) sqrtf(v) -#endif -#endif - -#if !defined(simde_math_tan) -#if SIMDE_MATH_BUILTIN_LIBM(tan) -#define simde_math_tan(v) __builtin_tan(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_tan(v) std::tan(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_tan(v) tan(v) -#endif -#endif - -#if !defined(simde_math_tanf) -#if SIMDE_MATH_BUILTIN_LIBM(tanf) -#define simde_math_tanf(v) __builtin_tanf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_tanf(v) std::tan(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_tanf(v) tanf(v) -#endif -#endif - -#if !defined(simde_math_tanh) -#if SIMDE_MATH_BUILTIN_LIBM(tanh) -#define simde_math_tanh(v) __builtin_tanh(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_tanh(v) std::tanh(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_tanh(v) tanh(v) -#endif -#endif - -#if !defined(simde_math_tanhf) -#if SIMDE_MATH_BUILTIN_LIBM(tanhf) -#define simde_math_tanhf(v) __builtin_tanhf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_tanhf(v) std::tanh(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_tanhf(v) tanhf(v) -#endif -#endif - -#if !defined(simde_math_trunc) -#if SIMDE_MATH_BUILTIN_LIBM(trunc) -#define simde_math_trunc(v) __builtin_trunc(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_trunc(v) std::trunc(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_trunc(v) trunc(v) -#endif -#endif - -#if !defined(simde_math_truncf) -#if SIMDE_MATH_BUILTIN_LIBM(truncf) -#define simde_math_truncf(v) __builtin_truncf(v) -#elif defined(SIMDE_MATH_HAVE_CMATH) -#define simde_math_truncf(v) std::trunc(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_truncf(v) truncf(v) -#endif -#endif - -/*** Complex functions ***/ - -#if !defined(simde_math_cexp) -#if SIMDE_MATH_BUILTIN_LIBM(cexp) -#define simde_math_cexp(v) __builtin_cexp(v) -#elif defined(__cplusplus) -#define simde_math_cexp(v) std::cexp(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_cexp(v) cexp(v) -#endif -#endif - -#if !defined(simde_math_cexpf) -#if SIMDE_MATH_BUILTIN_LIBM(cexpf) -#define simde_math_cexpf(v) __builtin_cexpf(v) -#elif defined(__cplusplus) -#define simde_math_cexpf(v) std::exp(v) -#elif defined(SIMDE_MATH_HAVE_MATH_H) -#define simde_math_cexpf(v) cexpf(v) -#endif -#endif - -/*** Additional functions not in libm ***/ - -#if defined(simde_math_fabs) && defined(simde_math_sqrt) && \ - defined(simde_math_exp) -static HEDLEY_INLINE double simde_math_cdfnorm(double x) -{ - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const double a1 = 0.254829592; - static const double a2 = -0.284496736; - static const double a3 = 1.421413741; - static const double a4 = -1.453152027; - static const double a5 = 1.061405429; - static const double p = 0.3275911; - - const int sign = x < 0; - x = simde_math_fabs(x) / simde_math_sqrt(2.0); - - /* A&S formula 7.1.26 */ - double t = 1.0 / (1.0 + p * x); - double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * - simde_math_exp(-x * x); - - return 0.5 * (1.0 + (sign ? -y : y)); -} -#define simde_math_cdfnorm simde_math_cdfnorm -#endif - -#if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && \ - defined(simde_math_expf) -static HEDLEY_INLINE float simde_math_cdfnormf(float x) -{ - /* https://www.johndcook.com/blog/cpp_phi/ - * Public Domain */ - static const float a1 = 0.254829592f; - static const float a2 = -0.284496736f; - static const float a3 = 1.421413741f; - static const float a4 = -1.453152027f; - static const float a5 = 1.061405429f; - static const float p = 0.3275911f; - - const int sign = x < 0; - x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); - - /* A&S formula 7.1.26 */ - float t = 1.0f / (1.0f + p * x); - float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * - simde_math_expf(-x * x); - - return 0.5f * (1.0f + (sign ? -y : y)); -} -#define simde_math_cdfnormf simde_math_cdfnormf -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ - -#if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && \ - defined(simde_math_sqrt) -/*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ -static HEDLEY_INLINE double simde_math_cdfnorminv(double p) -{ - static const double a[] = { - -3.969683028665376e+01, 2.209460984245205e+02, - -2.759285104469687e+02, 1.383577518672690e+02, - -3.066479806614716e+01, 2.506628277459239e+00}; - - static const double b[] = {-5.447609879822406e+01, - 1.615858368580409e+02, - -1.556989798598866e+02, - 6.680131188771972e+01, - -1.328068155288572e+01}; - - static const double c[] = { - -7.784894002430293e-03, -3.223964580411365e-01, - -2.400758277161838e+00, -2.549732539343734e+00, - 4.374664141464968e+00, 2.938163982698783e+00}; - - static const double d[] = {7.784695709041462e-03, 3.224671290700398e-01, - 2.445134137142996e+00, - 3.754408661907416e+00}; - - static const double low = 0.02425; - static const double high = 0.97575; - double q, r; - - if (p < 0 || p > 1) { - return 0.0; - } else if (p == 0) { - return -SIMDE_MATH_INFINITY; - } else if (p == 1) { - return SIMDE_MATH_INFINITY; - } else if (p < low) { - q = simde_math_sqrt(-2.0 * simde_math_log(p)); - return (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + - c[4]) * q + - c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); - return -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + - c[4]) * q + - c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + - a[4]) * r + - a[5]) * - q / - (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + - b[4]) * r + - 1); - } -} -#define simde_math_cdfnorminv simde_math_cdfnorminv -#endif - -#if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && \ - defined(simde_math_sqrtf) -static HEDLEY_INLINE float simde_math_cdfnorminvf(float p) -{ - static const float a[] = { - -3.969683028665376e+01f, 2.209460984245205e+02f, - -2.759285104469687e+02f, 1.383577518672690e+02f, - -3.066479806614716e+01f, 2.506628277459239e+00f}; - static const float b[] = {-5.447609879822406e+01f, - 1.615858368580409e+02f, - -1.556989798598866e+02f, - 6.680131188771972e+01f, - -1.328068155288572e+01f}; - static const float c[] = { - -7.784894002430293e-03f, -3.223964580411365e-01f, - -2.400758277161838e+00f, -2.549732539343734e+00f, - 4.374664141464968e+00f, 2.938163982698783e+00f}; - static const float d[] = {7.784695709041462e-03f, - 3.224671290700398e-01f, - 2.445134137142996e+00f, - 3.754408661907416e+00f}; - static const float low = 0.02425f; - static const float high = 0.97575f; - float q, r; - - if (p < 0 || p > 1) { - return 0.0f; - } else if (p == 0) { - return -SIMDE_MATH_INFINITYF; - } else if (p == 1) { - return SIMDE_MATH_INFINITYF; - } else if (p < low) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); - return (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + - c[4]) * q + - c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else if (p > high) { - q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); - return -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + - c[4]) * q + - c[5]) / - (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); - } else { - q = p - 0.5f; - r = q * q; - return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + - a[4]) * r + - a[5]) * - q / - (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + - b[4]) * r + - 1); - } -} -#define simde_math_cdfnorminvf simde_math_cdfnorminvf -#endif - -#if !defined(simde_math_erfinv) && defined(simde_math_log) && \ - defined(simde_math_copysign) && defined(simde_math_sqrt) -static HEDLEY_INLINE double simde_math_erfinv(double x) -{ - /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c - * - * The original answer on SO uses a constant of 0.147, but in my - * testing 0.14829094707965850830078125 gives a lower average absolute error - * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). - * That said, if your goal is to minimize the *maximum* absolute - * error, 0.15449436008930206298828125 provides significantly better - * results; 0.0009250640869140625000000000 vs ~ 0.005. */ - double tt1, tt2, lnx; - double sgn = simde_math_copysign(1.0, x); - - x = (1.0 - x) * (1.0 + x); - lnx = simde_math_log(x); - - tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; - tt2 = (1.0 / 0.14829094707965850830078125) * lnx; - - return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); -} -#define simde_math_erfinv simde_math_erfinv -#endif - -#if !defined(simde_math_erfinvf) && defined(simde_math_logf) && \ - defined(simde_math_copysignf) && defined(simde_math_sqrtf) -static HEDLEY_INLINE float simde_math_erfinvf(float x) -{ - float tt1, tt2, lnx; - float sgn = simde_math_copysignf(1.0f, x); - - x = (1.0f - x) * (1.0f + x); - lnx = simde_math_logf(x); - - tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + - 0.5f * lnx; - tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; - - return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); -} -#define simde_math_erfinvf simde_math_erfinvf -#endif - -#if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && \ - defined(simde_math_log) && defined(simde_math_sqrt) -static HEDLEY_INLINE double simde_math_erfcinv(double x) -{ - if (x >= 0.0625 && x < 2.0) { - return simde_math_erfinv(1.0 - x); - } else if (x < 0.0625 && x >= 1.0e-100) { - double p[6] = {0.1550470003116, 1.382719649631, 0.690969348887, - -1.128081391617, 0.680544246825, -0.16444156791}; - double q[3] = {0.155024849822, 1.385228141995, 1.000000000000}; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + - t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { - double p[4] = {0.00980456202915, 0.363667889171, 0.97302949837, - -0.5374947401}; - double q[3] = {0.00980451277802, 0.363699971544, - 1.000000000000}; - - const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (!simde_math_isnormal(x)) { - return SIMDE_MATH_INFINITY; - } else { - return -SIMDE_MATH_INFINITY; - } -} - -#define simde_math_erfcinv simde_math_erfcinv -#endif - -#if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && \ - defined(simde_math_logf) && defined(simde_math_sqrtf) -static HEDLEY_INLINE float simde_math_erfcinvf(float x) -{ - if (x >= 0.0625f && x < 2.0f) { - return simde_math_erfinvf(1.0f - x); - } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { - static const float p[6] = {0.1550470003116f, 1.382719649631f, - 0.690969348887f, -1.128081391617f, - 0.680544246825f - 0.164441567910f}; - static const float q[3] = {0.155024849822f, 1.385228141995f, - 1.000000000000f}; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + - t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / - (q[0] + t * (q[1] + t * (q[2]))); - } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { - static const float p[4] = {0.00980456202915f, 0.36366788917100f, - 0.97302949837000f, - -0.5374947401000f}; - static const float q[3] = {0.00980451277802f, 0.36369997154400f, - 1.00000000000000f}; - - const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); - return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / - (q[0] + t * (q[1] + t * (q[2]))); - } else { - return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF - : SIMDE_MATH_INFINITYF; - } -} - -#define simde_math_erfcinvf simde_math_erfcinvf -#endif - -HEDLEY_DIAGNOSTIC_POP - -static HEDLEY_INLINE double simde_math_rad2deg(double radians) -{ - return radians * SIMDE_MATH_180_OVER_PI; -} - -static HEDLEY_INLINE float simde_math_rad2degf(float radians) -{ - return radians * SIMDE_MATH_180_OVER_PIF; -} - -static HEDLEY_INLINE double simde_math_deg2rad(double degrees) -{ - return degrees * SIMDE_MATH_PI_OVER_180; -} - -static HEDLEY_INLINE float simde_math_deg2radf(float degrees) -{ - return degrees * (SIMDE_MATH_PI_OVER_180F); -} - -/*** Saturated arithmetic ***/ - -static HEDLEY_INLINE int8_t simde_math_adds_i8(int8_t a, int8_t b) -{ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_s8(a, b); -#else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; - if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); -#endif -} - -static HEDLEY_INLINE int16_t simde_math_adds_i16(int16_t a, int16_t b) -{ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_s16(a, b); -#else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; - if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); -#endif -} - -static HEDLEY_INLINE int32_t simde_math_adds_i32(int32_t a, int32_t b) -{ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_s32(a, b); -#else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; - if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); -#endif -} - -static HEDLEY_INLINE int64_t simde_math_adds_i64(int64_t a, int64_t b) -{ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_s64(a, b); -#else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ + b_; - - a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; - if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); -#endif -} - -static HEDLEY_INLINE uint8_t simde_math_adds_u8(uint8_t a, uint8_t b) -{ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddb_u8(a, b); -#else - uint8_t r = a + b; - r |= -(r < a); - return r; -#endif -} - -static HEDLEY_INLINE uint16_t simde_math_adds_u16(uint16_t a, uint16_t b) -{ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddh_u16(a, b); -#else - uint16_t r = a + b; - r |= -(r < a); - return r; -#endif -} - -static HEDLEY_INLINE uint32_t simde_math_adds_u32(uint32_t a, uint32_t b) -{ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqadds_u32(a, b); -#else - uint32_t r = a + b; - r |= -(r < a); - return r; -#endif -} - -static HEDLEY_INLINE uint64_t simde_math_adds_u64(uint64_t a, uint64_t b) -{ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqaddd_u64(a, b); -#else - uint64_t r = a + b; - r |= -(r < a); - return r; -#endif -} - -static HEDLEY_INLINE int8_t simde_math_subs_i8(int8_t a, int8_t b) -{ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_s8(a, b); -#else - uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); - uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); - uint8_t r_ = a_ - b_; - - a_ = (a_ >> 7) + INT8_MAX; - - if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int8_t, r_); -#endif -} - -static HEDLEY_INLINE int16_t simde_math_subs_i16(int16_t a, int16_t b) -{ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_s16(a, b); -#else - uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); - uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); - uint16_t r_ = a_ - b_; - - a_ = (a_ >> 15) + INT16_MAX; - - if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int16_t, r_); -#endif -} - -static HEDLEY_INLINE int32_t simde_math_subs_i32(int32_t a, int32_t b) -{ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_s32(a, b); -#else - uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); - uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); - uint32_t r_ = a_ - b_; - - a_ = (a_ >> 31) + INT32_MAX; - - if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int32_t, r_); -#endif -} - -static HEDLEY_INLINE int64_t simde_math_subs_i64(int64_t a, int64_t b) -{ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_s64(a, b); -#else - uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); - uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); - uint64_t r_ = a_ - b_; - - a_ = (a_ >> 63) + INT64_MAX; - - if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { - r_ = a_; - } - - return HEDLEY_STATIC_CAST(int64_t, r_); -#endif -} - -static HEDLEY_INLINE uint8_t simde_math_subs_u8(uint8_t a, uint8_t b) -{ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubb_u8(a, b); -#else - uint8_t res = a - b; - res &= -(res <= a); - return res; -#endif -} - -static HEDLEY_INLINE uint16_t simde_math_subs_u16(uint16_t a, uint16_t b) -{ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubh_u16(a, b); -#else - uint16_t res = a - b; - res &= -(res <= a); - return res; -#endif -} - -static HEDLEY_INLINE uint32_t simde_math_subs_u32(uint32_t a, uint32_t b) -{ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubs_u32(a, b); -#else - uint32_t res = a - b; - res &= -(res <= a); - return res; -#endif -} - -static HEDLEY_INLINE uint64_t simde_math_subs_u64(uint64_t a, uint64_t b) -{ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vqsubd_u64(a, b); -#else - uint64_t res = a - b; - res &= -(res <= a); - return res; -#endif -} - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_MATH_H) */ diff --git a/libobs/util/simde/x86/mmx.h b/libobs/util/simde/x86/mmx.h deleted file mode 100644 index 68bc3f36c76bf4..00000000000000 --- a/libobs/util/simde/x86/mmx.h +++ /dev/null @@ -1,2456 +0,0 @@ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - */ - -#if !defined(SIMDE_X86_MMX_H) -#define SIMDE_X86_MMX_H - -#include "../simde-common.h" - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS - -#if defined(SIMDE_X86_MMX_NATIVE) -#define SIMDE_X86_MMX_USE_NATIVE_TYPE -#elif defined(SIMDE_X86_SSE_NATIVE) -#define SIMDE_X86_MMX_USE_NATIVE_TYPE -#endif - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) -#include -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -#include -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) -#include -#endif - -#include -#include - -SIMDE_BEGIN_DECLS_ - -typedef union { -#if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; -#else - SIMDE_ALIGN_TO_8 int8_t i8[8]; - SIMDE_ALIGN_TO_8 int16_t i16[4]; - SIMDE_ALIGN_TO_8 int32_t i32[2]; - SIMDE_ALIGN_TO_8 int64_t i64[1]; - SIMDE_ALIGN_TO_8 uint8_t u8[8]; - SIMDE_ALIGN_TO_8 uint16_t u16[4]; - SIMDE_ALIGN_TO_8 uint32_t u32[2]; - SIMDE_ALIGN_TO_8 uint64_t u64[1]; - SIMDE_ALIGN_TO_8 simde_float32 f32[2]; - SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; -#endif - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) - __m64 n; -#endif -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t neon_i8; - int16x4_t neon_i16; - int32x2_t neon_i32; - int64x1_t neon_i64; - uint8x8_t neon_u8; - uint16x4_t neon_u16; - uint32x2_t neon_u32; - uint64x1_t neon_u64; - float32x2_t neon_f32; -#endif -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - int8x8_t mmi_i8; - int16x4_t mmi_i16; - int32x2_t mmi_i32; - int64_t mmi_i64; - uint8x8_t mmi_u8; - uint16x4_t mmi_u16; - uint32x2_t mmi_u32; - uint64_t mmi_u64; -#endif -} simde__m64_private; - -#if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) -typedef __m64 simde__m64; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -typedef int32x2_t simde__m64; -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) -typedef int32x2_t simde__m64; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) -typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; -#else -typedef simde__m64_private simde__m64; -#endif - -#if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && \ - defined(SIMDE_ENABLE_NATIVE_ALIASES) -#define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES -typedef simde__m64 __m64; -#endif - -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); -HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, - "simde__m64 is not 8-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, - "simde__m64_private is not 8-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde__m64_from_private(simde__m64_private v) -{ - simde__m64 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64_private simde__m64_to_private(simde__m64 v) -{ - simde__m64_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, \ - fragment) \ - SIMDE_FUNCTION_ATTRIBUTES \ - simde__##simde_type simde__##simde_type##_from_##isax##_##fragment( \ - source_type value) \ - { \ - simde__##simde_type##_private r_; \ - r_.isax##_##fragment = value; \ - return simde__##simde_type##_from_private(r_); \ - } \ - \ - SIMDE_FUNCTION_ATTRIBUTES \ - source_type simde__##simde_type##_to_##isax##_##fragment( \ - simde__##simde_type value) \ - { \ - simde__##simde_type##_private r_ = \ - simde__##simde_type##_to_private(value); \ - return r_.isax##_##fragment; \ - } - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) -#endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_add_pi8(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi8(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) -#define _m_paddb(a, b) simde_m_paddb(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_add_pi16(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi16(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) -#define _m_paddw(a, b) simde_mm_add_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_add_pi32(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_pi32(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) -#define _m_paddd(a, b) simde_mm_add_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_adds_pi8(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi8(a, b); -#else - simde__m64_private r_, a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) { - if ((((b_.i8[i]) > 0) && - ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { - r_.i8[i] = INT8_MAX; - } else if ((((b_.i8[i]) < 0) && - ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { - r_.i8[i] = INT8_MIN; - } else { - r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); - } - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) -#define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_adds_pu8(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu8(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) { - const uint_fast16_t x = - HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + - HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); - if (x > UINT8_MAX) - r_.u8[i] = UINT8_MAX; - else - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) -#define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_adds_pi16(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pi16(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - if ((((b_.i16[i]) > 0) && - ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { - r_.i16[i] = INT16_MAX; - } else if ((((b_.i16[i]) < 0) && - ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { - r_.i16[i] = SHRT_MIN; - } else { - r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); - } - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) -#define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_adds_pu16(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_adds_pu16(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - const uint32_t x = a_.u16[i] + b_.u16[i]; - if (x > UINT16_MAX) - r_.u16[i] = UINT16_MAX; - else - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) -#define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_and_si64(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_and_si64(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 & b_.i64; -#else - r_.i64[0] = a_.i64[0] & b_.i64[0]; -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_pand(a, b) simde_mm_and_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_and_si64(a, b) simde_mm_and_si64(a, b) -#define _m_pand(a, b) simde_mm_and_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_andnot_si64(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_andnot_si64(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; -#else - r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) -#define _m_pandn(a, b) simde_mm_andnot_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_cmpeq_pi8(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi8(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) -#define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_cmpeq_pi16(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi16(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) -#define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_cmpeq_pi32(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpeq_pi32(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) -#define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_cmpgt_pi8(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi8(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) -#define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_cmpgt_pi16(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi16(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) -#define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_cmpgt_pi32(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cmpgt_pi32(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) -#define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t simde_mm_cvtm64_si64(simde__m64 a) -{ -#if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ - !defined(__PGI) - return _mm_cvtm64_si64(a); -#else - simde__m64_private a_ = simde__m64_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \ - SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0) -#pragma clang diagnostic ignored "-Wvector-conversion" -#endif - return vget_lane_s64(a_.neon_i64, 0); - HEDLEY_DIAGNOSTIC_POP -#else - return a_.i64[0]; -#endif -#endif -} -#define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) -#define _m_to_int64(a) simde_mm_cvtm64_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_cvtsi32_si64(int32_t a) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi32_si64(a); -#else - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t av[sizeof(r_.neon_i32) / sizeof(r_.neon_i32[0])] = {a, 0}; - r_.neon_i32 = vld1_s32(av); -#else - r_.i32[0] = a; - r_.i32[1] = 0; -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) -#define _m_from_int(a) simde_mm_cvtsi32_si64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_cvtsi64_m64(int64_t a) -{ -#if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ - !defined(__PGI) - return _mm_cvtsi64_m64(a); -#else - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vld1_s64(&a); -#else - r_.i64[0] = a; -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) -#define _m_from_int64(a) simde_mm_cvtsi64_m64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t simde_mm_cvtsi64_si32(simde__m64 a) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtsi64_si32(a); -#else - simde__m64_private a_ = simde__m64_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \ - SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0) -#pragma clang diagnostic ignored "-Wvector-conversion" -#endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP -#else - return a_.i32[0]; -#endif -#endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_empty(void) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - _mm_empty(); -#else - /* noop */ -#endif -} -#define simde_m_empty() simde_mm_empty() -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_empty() simde_mm_empty() -#define _m_empty() simde_mm_empty() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_madd_pi16(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_madd_pi16(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + - (a_.i16[i + 1] * b_.i16[i + 1]); - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) -#define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_mulhi_pi16(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pi16(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, - ((a_.i16[i] * b_.i16[i]) >> 16)); - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) -#define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_mullo_pi16(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_mullo_pi16(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); - const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); - r_.neon_u16 = t2; -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = HEDLEY_STATIC_CAST( - int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) -#define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_or_si64(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_or_si64(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 | b_.i64; -#else - r_.i64[0] = a_.i64[0] | b_.i64[0]; -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_por(a, b) simde_mm_or_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_or_si64(a, b) simde_mm_or_si64(a, b) -#define _m_por(a, b) simde_mm_or_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_packs_pi16(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi16(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - if (a_.i16[i] < INT8_MIN) { - r_.i8[i] = INT8_MIN; - } else if (a_.i16[i] > INT8_MAX) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - if (b_.i16[i] < INT8_MIN) { - r_.i8[i + 4] = INT8_MIN; - } else if (b_.i16[i] > INT8_MAX) { - r_.i8[i + 4] = INT8_MAX; - } else { - r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); - } - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) -#define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_packs_pi32(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pi32(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (8 / sizeof(a_.i32[0])); i++) { - if (a_.i32[i] < SHRT_MIN) { - r_.i16[i] = SHRT_MIN; - } else if (a_.i32[i] > INT16_MAX) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0; i < (8 / sizeof(b_.i32[0])); i++) { - if (b_.i32[i] < SHRT_MIN) { - r_.i16[i + 2] = SHRT_MIN; - } else if (b_.i32[i] > INT16_MAX) { - r_.i16[i + 2] = INT16_MAX; - } else { - r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); - } - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) -#define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_packs_pu16(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_packs_pu16(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); - - /* Set elements which are < 0 to 0 */ - const int16x8_t t2 = - vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); - - /* Vector with all s16 elements set to UINT8_MAX */ - const int16x8_t vmax = - vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); - - /* Elements which are within the acceptable range */ - const int16x8_t le_max = - vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); - const int16x8_t gt_max = - vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); - - /* Final values as 16-bit integers */ - const int16x8_t values = vorrq_s16(le_max, gt_max); - - r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - if (a_.i16[i] > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else if (a_.i16[i] < 0) { - r_.u8[i] = 0; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); - } - } - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - if (b_.i16[i] > UINT8_MAX) { - r_.u8[i + 4] = UINT8_MAX; - } else if (b_.i16[i] < 0) { - r_.u8[i + 4] = 0; - } else { - r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); - } - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) -#define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_set_pi8(int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); -#else - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = {e0, e1, e2, e3, - e4, e5, e6, e7}; - r_.neon_i8 = vld1_s8(v); -#else - r_.i8[0] = e0; - r_.i8[1] = e1; - r_.i8[2] = e2; - r_.i8[3] = e3; - r_.i8[4] = e4; - r_.i8[5] = e5; - r_.i8[6] = e6; - r_.i8[7] = e7; -#endif - - return simde__m64_from_private(r_); -#endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_x_mm_set_pu8(uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, - uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) -{ - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi8( - HEDLEY_STATIC_CAST(int8_t, e7), HEDLEY_STATIC_CAST(int8_t, e6), - HEDLEY_STATIC_CAST(int8_t, e5), HEDLEY_STATIC_CAST(int8_t, e4), - HEDLEY_STATIC_CAST(int8_t, e3), HEDLEY_STATIC_CAST(int8_t, e2), - HEDLEY_STATIC_CAST(int8_t, e1), HEDLEY_STATIC_CAST(int8_t, e0)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = {e0, e1, e2, e3, - e4, e5, e6, e7}; - r_.neon_u8 = vld1_u8(v); -#else - r_.u8[0] = e0; - r_.u8[1] = e1; - r_.u8[2] = e2; - r_.u8[3] = e3; - r_.u8[4] = e4; - r_.u8[5] = e5; - r_.u8[6] = e6; - r_.u8[7] = e7; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_set_pi16(int16_t e3, int16_t e2, int16_t e1, int16_t e0) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_pi16(e3, e2, e1, e0); -#else - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = {e0, e1, e2, e3}; - r_.neon_i16 = vld1_s16(v); -#else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; -#endif - - return simde__m64_from_private(r_); -#endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_x_mm_set_pu16(uint16_t e3, uint16_t e2, uint16_t e1, - uint16_t e0) -{ - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi16(HEDLEY_STATIC_CAST(int16_t, e3), - HEDLEY_STATIC_CAST(int16_t, e2), - HEDLEY_STATIC_CAST(int16_t, e1), - HEDLEY_STATIC_CAST(int16_t, e0)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = {e0, e1, e2, e3}; - r_.neon_u16 = vld1_u16(v); -#else - r_.u16[0] = e0; - r_.u16[1] = e1; - r_.u16[2] = e2; - r_.u16[3] = e3; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_x_mm_set_pu32(uint32_t e1, uint32_t e0) -{ - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32(HEDLEY_STATIC_CAST(int32_t, e1), - HEDLEY_STATIC_CAST(int32_t, e0)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = {e0, e1}; - r_.neon_u32 = vld1_u32(v); -#else - r_.u32[0] = e0; - r_.u32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_set_pi32(int32_t e1, int32_t e0) -{ - simde__m64_private r_; - -#if defined(SIMDE_X86_MMX_NATIVE) - r_.n = _mm_set_pi32(e1, e0); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = {e0, e1}; - r_.neon_i32 = vld1_s32(v); -#else - r_.i32[0] = e0; - r_.i32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_x_mm_set_pi64(int64_t e0) -{ - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = {e0}; - r_.neon_i64 = vld1_s64(v); -#else - r_.i64[0] = e0; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_x_mm_set_f32x2(simde_float32 e1, simde_float32 e0) -{ - simde__m64_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = {e0, e1}; - r_.neon_f32 = vld1_f32(v); -#else - r_.f32[0] = e0; - r_.f32[1] = e1; -#endif - - return simde__m64_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_set1_pi8(int8_t a) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi8(a); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i8 = vmov_n_s8(a); - return simde__m64_from_private(r_); -#else - return simde_mm_set_pi8(a, a, a, a, a, a, a, a); -#endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_set1_pi8(a) simde_mm_set1_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_set1_pi16(int16_t a) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi16(a); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i16 = vmov_n_s16(a); - return simde__m64_from_private(r_); -#else - return simde_mm_set_pi16(a, a, a, a); -#endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_set1_pi16(a) simde_mm_set1_pi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_set1_pi32(int32_t a) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_pi32(a); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_i32 = vmov_n_s32(a); - return simde__m64_from_private(r_); -#else - return simde_mm_set_pi32(a, a); -#endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_set1_pi32(a) simde_mm_set1_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_setr_pi8(int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); -#else - return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); -#endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_setr_pi16(int16_t e3, int16_t e2, int16_t e1, int16_t e0) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi16(e3, e2, e1, e0); -#else - return simde_mm_set_pi16(e0, e1, e2, e3); -#endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_setr_pi32(int32_t e1, int32_t e0) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_pi32(e1, e0); -#else - return simde_mm_set_pi32(e0, e1); -#endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_setzero_si64(void) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_setzero_si64(); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - simde__m64_private r_; - r_.neon_u32 = vmov_n_u32(0); - return simde__m64_from_private(r_); -#else - return simde_mm_set_pi32(0, 0); -#endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_setzero_si64() simde_mm_setzero_si64() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_x_mm_load_si64(const void *mem_addr) -{ - simde__m64 r; - simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), - sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_x_mm_loadu_si64(const void *mem_addr) -{ - simde__m64 r; - simde_memcpy(&r, mem_addr, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -void simde_x_mm_store_si64(void *mem_addr, simde__m64 value) -{ - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, - sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -void simde_x_mm_storeu_si64(void *mem_addr, simde__m64 value) -{ - simde_memcpy(mem_addr, &value, sizeof(value)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_x_mm_setone_si64(void) -{ - return simde_mm_set1_pi32(~INT32_C(0)); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_sll_pi16(simde__m64 a, simde__m64 count) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi16(a, count); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \ - SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0) -#pragma clang diagnostic ignored "-Wvector-conversion" -#endif - r_.neon_i16 = - vshl_s16(a_.neon_i16, - vmov_n_s16(HEDLEY_STATIC_CAST( - int16_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP -#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \ - defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count_.u64[0]; -#else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, - a_.u16[i] << count_.u64[0]); - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) -#define _m_psllw(a, count) simde_mm_sll_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_sll_pi32(simde__m64 a, simde__m64 count) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_pi32(a, count); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \ - SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0) -#pragma clang diagnostic ignored "-Wvector-conversion" -#endif - r_.neon_i32 = - vshl_s32(a_.neon_i32, - vmov_n_s32(HEDLEY_STATIC_CAST( - int32_t, vget_lane_u64(count_.neon_u64, 0)))); - HEDLEY_DIAGNOSTIC_POP -#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count_.u64[0]; -#else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) { - r_.u32[i] = a_.u32[i] << count_.u64[0]; - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) -#define _m_pslld(a, count) simde_mm_sll_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_slli_pi16(simde__m64 a, int count) -{ -#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi16(a, count); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \ - defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << count; -#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t)count)); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psllh_s(a_.mmi_i16, b_.mmi_i16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) -#define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_slli_pi32(simde__m64 a, int count) -{ -#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_slli_pi32(a, count); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << count; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t)count)); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) { - r_.u32[i] = a_.u32[i] << count; - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) -#define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_slli_si64(simde__m64 a, int count) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_slli_si64(a, count); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << count; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t)count)); -#else - r_.u64[0] = a_.u64[0] << count; -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) -#define _m_psllqi(a, count) simde_mm_slli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_sll_si64(simde__m64 a, simde__m64 count) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sll_si64(a, count); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 << count_.i64; -#else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] << count_.u64[0]; -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) -#define _m_psllq(a, count) simde_mm_sll_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_srl_pi16(simde__m64 a, simde__m64 count) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi16(a, count); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \ - defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) - return simde_mm_setzero_si64(); - - r_.i16 = a_.i16 >> HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count_.u64[0]; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16( - a_.neon_u16, - vmov_n_s16(-((int16_t)vget_lane_u64(count_.neon_u64, 0)))); -#else - if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0; i < sizeof(r_.u16) / sizeof(r_.u16[0]); i++) { - r_.u16[i] = a_.u16[i] >> count_.u64[0]; - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) -#define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_srl_pi32(simde__m64 a, simde__m64 count) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_pi32(a, count); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count_.u64[0]; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32( - a_.neon_u32, - vmov_n_s32(-((int32_t)vget_lane_u64(count_.neon_u64, 0)))); -#else - if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - SIMDE_VECTORIZE - for (size_t i = 0; i < sizeof(r_.u32) / sizeof(r_.u32[0]); i++) { - r_.u32[i] = a_.u32[i] >> count_.u64[0]; - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) -#define _m_psrld(a, count) simde_mm_srl_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_srli_pi16(simde__m64 a, int count) -{ -#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi16(a, count); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> count; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t)count))); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) { - r_.u16[i] = a_.u16[i] >> count; - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) -#define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_srli_pi32(simde__m64 a, int count) -{ -#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_pi32(a, count); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> count; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t)count))); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) { - r_.u32[i] = a_.u32[i] >> count; - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) -#define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_srli_si64(simde__m64 a, int count) -{ -#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srli_si64(a, count); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u64 = a_.u64 >> count; -#else - r_.u64[0] = a_.u64[0] >> count; -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) -#define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_srl_si64(simde__m64 a, simde__m64 count) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_srl_si64(a, count); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u64 = a_.u64 >> count_.u64; -#else - if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { - simde_memset(&r_, 0, sizeof(r_)); - return simde__m64_from_private(r_); - } - - r_.u64[0] = a_.u64[0] >> count_.u64[0]; -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) -#define _m_psrlq(a, count) simde_mm_srl_si64(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_srai_pi16(simde__m64 a, int count) -{ -#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi16(a, count); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> (count & 0xff); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshl_s16(a_.neon_i16, - vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psrah_s(a_.mmi_i16, count); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = a_.i16[i] >> (count & 0xff); - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) -#define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_srai_pi32(simde__m64 a, int count) -{ -#if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) - return _mm_srai_pi32(a, count); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> (count & 0xff); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshl_s32(a_.neon_i32, - vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psraw_s(a_.mmi_i32, count); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = a_.i32[i] >> (count & 0xff); - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) -#define _m_psradi(a, count) simde_mm_srai_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_sra_pi16(simde__m64 a, simde__m64 count) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi16(a, count); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int cnt = HEDLEY_STATIC_CAST( - int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 >> cnt; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = - vshl_s16(a_.neon_i16, - vmov_n_s16(-HEDLEY_STATIC_CAST( - int16_t, vget_lane_u64(count_.neon_u64, 0)))); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) -#define _m_psraw(a, count) simde_mm_sra_pi16(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_sra_pi32(simde__m64 a, simde__m64 count) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sra_pi32(a, count); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private count_ = simde__m64_to_private(count); - const int32_t cnt = - (count_.u64[0] > 31) - ? 31 - : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 >> cnt; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = - vshl_s32(a_.neon_i32, - vmov_n_s32(-HEDLEY_STATIC_CAST( - int32_t, vget_lane_u64(count_.neon_u64, 0)))); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) -#define _m_psrad(a, count) simde_mm_sra_pi32(a, count) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_sub_pi8(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi8(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) -#define _m_psubb(a, b) simde_mm_sub_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_sub_pi16(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi16(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) -#define _m_psubw(a, b) simde_mm_sub_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_sub_pi32(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_pi32(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) -#define _m_psubd(a, b) simde_mm_sub_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_subs_pi8(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi8(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) { - if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { - r_.i8[i] = INT8_MIN; - } else if ((b_.i8[i]) < 0 && - (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); - } - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) -#define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_subs_pu8(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu8(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) { - const int32_t x = a_.u8[i] - b_.u8[i]; - if (x < 0) { - r_.u8[i] = 0; - } else if (x > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) -#define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_subs_pi16(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pi16(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { - r_.i16[i] = SHRT_MIN; - } else if ((b_.i16[i]) < 0 && - (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); - } - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) -#define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_subs_pu16(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_subs_pu16(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) { - const int x = a_.u16[i] - b_.u16[i]; - if (x < 0) { - r_.u16[i] = 0; - } else if (x > UINT16_MAX) { - r_.u16[i] = UINT16_MAX; - } else { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) -#define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_unpackhi_pi8(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi8(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, - 7, 15); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); -#else - r_.i8[0] = a_.i8[4]; - r_.i8[1] = b_.i8[4]; - r_.i8[2] = a_.i8[5]; - r_.i8[3] = b_.i8[5]; - r_.i8[4] = a_.i8[6]; - r_.i8[5] = b_.i8[6]; - r_.i8[6] = a_.i8[7]; - r_.i8[7] = b_.i8[7]; -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) -#define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_unpackhi_pi16(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi16(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); -#else - r_.i16[0] = a_.i16[2]; - r_.i16[1] = b_.i16[2]; - r_.i16[2] = a_.i16[3]; - r_.i16[3] = b_.i16[3]; -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) -#define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_unpackhi_pi32(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpackhi_pi32(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); -#else - r_.i32[0] = a_.i32[1]; - r_.i32[1] = b_.i32[1]; -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) -#define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_unpacklo_pi8(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi8(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, - 11); -#else - r_.i8[0] = a_.i8[0]; - r_.i8[1] = b_.i8[0]; - r_.i8[2] = a_.i8[1]; - r_.i8[3] = b_.i8[1]; - r_.i8[4] = a_.i8[2]; - r_.i8[5] = b_.i8[2]; - r_.i8[6] = a_.i8[3]; - r_.i8[7] = b_.i8[3]; -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) -#define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_unpacklo_pi16(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi16(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); -#else - r_.i16[0] = a_.i16[0]; - r_.i16[1] = b_.i16[0]; - r_.i16[2] = a_.i16[1]; - r_.i16[3] = b_.i16[1]; -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) -#define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_unpacklo_pi32(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_unpacklo_pi32(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); -#elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) - r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); -#else - r_.i32[0] = a_.i32[0]; - r_.i32[1] = b_.i32[0]; -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) -#define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_xor_si64(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _mm_xor_si64(a, b); -#else - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; -#else - r_.u64[0] = a_.u64[0] ^ b_.u64[0]; -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) -#define _m_pxor(a, b) simde_mm_xor_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t simde_m_to_int(simde__m64 a) -{ -#if defined(SIMDE_X86_MMX_NATIVE) - return _m_to_int(a); -#else - simde__m64_private a_ = simde__m64_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wvector-conversion") && \ - SIMDE_DETECT_CLANG_VERSION_NOT(10, 0, 0) -#pragma clang diagnostic ignored "-Wvector-conversion" -#endif - return vget_lane_s32(a_.neon_i32, 0); - HEDLEY_DIAGNOSTIC_POP -#else - return a_.i32[0]; -#endif -#endif -} -#if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) -#define _m_to_int(a) simde_m_to_int(a) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_MMX_H) */ diff --git a/libobs/util/simde/x86/sse.h b/libobs/util/simde/x86/sse.h deleted file mode 100644 index 52f36b331970c5..00000000000000 --- a/libobs/util/simde/x86/sse.h +++ /dev/null @@ -1,4479 +0,0 @@ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - */ - -#if !defined(SIMDE_X86_SSE_H) -#define SIMDE_X86_SSE_H - -#include "mmx.h" - -#if defined(_WIN32) -#include -#endif - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { -#if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; -#if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; -#endif - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; -#endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - -#if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_ALIGN_TO_16 __m128 n; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; -#endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 - SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 - SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; -#endif -#endif -} simde__m128_private; - -#if defined(SIMDE_X86_SSE_NATIVE) -typedef __m128 simde__m128; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -typedef float32x4_t simde__m128; -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -typedef v128_t simde__m128; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) -typedef simde_float32 - simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else -typedef simde__m128_private simde__m128; -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -typedef simde__m128 __m128; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), - "simde__m128_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, - "simde__m128 is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, - "simde__m128_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde__m128_from_private(simde__m128_private v) -{ - simde__m128 r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128_private simde__m128_to_private(simde__m128 v) -{ - simde__m128_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) -#endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, - SIMDE_POWER_ALTIVEC_VECTOR(signed char), - altivec, i8) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, - SIMDE_POWER_ALTIVEC_VECTOR(signed short), - altivec, i16) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, - SIMDE_POWER_ALTIVEC_VECTOR(signed int), - altivec, i32) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION( - m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION( - m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), - altivec, u32) - -#if defined(SIMDE_BUG_GCC_95782) -SIMDE_FUNCTION_ATTRIBUTES -SIMDE_POWER_ALTIVEC_VECTOR(float) -simde__m128_to_altivec_f32(simde__m128 value) -{ - simde__m128_private r_ = simde__m128_to_private(value); - return r_.altivec_f32; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) - value) -{ - simde__m128_private r_; - r_.altivec_f32 = value; - return simde__m128_from_private(r_); -} -#else -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), - altivec, f32) -#endif - -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION( - m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION( - m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) -#endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); -#endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ - -enum { -#if defined(SIMDE_X86_SSE_NATIVE) - SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, - SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, - SIMDE_MM_ROUND_UP = _MM_ROUND_UP, - SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO -#else - SIMDE_MM_ROUND_NEAREST = 0x0000, - SIMDE_MM_ROUND_DOWN = 0x2000, - SIMDE_MM_ROUND_UP = 0x4000, - SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 -#endif -}; - -#if defined(_MM_FROUND_TO_NEAREST_INT) -#define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT -#define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF -#define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF -#define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO -#define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION - -#define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC -#define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC -#else -#define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 -#define SIMDE_MM_FROUND_TO_NEG_INF 0x01 -#define SIMDE_MM_FROUND_TO_POS_INF 0x02 -#define SIMDE_MM_FROUND_TO_ZERO 0x03 -#define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 - -#define SIMDE_MM_FROUND_RAISE_EXC 0x00 -#define SIMDE_MM_FROUND_NO_EXC 0x08 -#endif - -#define SIMDE_MM_FROUND_NINT \ - (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_FLOOR \ - (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_CEIL \ - (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_TRUNC \ - (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_RINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) -#define SIMDE_MM_FROUND_NEARBYINT \ - (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) - -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && \ - !defined(_MM_FROUND_TO_NEAREST_INT) -#define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT -#define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF -#define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF -#define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO -#define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION -#define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC -#define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT -#define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR -#define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL -#define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC -#define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT -#define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT -#endif - -SIMDE_FUNCTION_ATTRIBUTES -unsigned int SIMDE_MM_GET_ROUNDING_MODE(void) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _MM_GET_ROUNDING_MODE(); -#elif defined(SIMDE_HAVE_FENV_H) - unsigned int vfe_mode; - - switch (fegetround()) { -#if defined(FE_TONEAREST) - case FE_TONEAREST: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; -#endif - -#if defined(FE_TOWARDZERO) - case FE_TOWARDZERO: - vfe_mode = SIMDE_MM_ROUND_DOWN; - break; -#endif - -#if defined(FE_UPWARD) - case FE_UPWARD: - vfe_mode = SIMDE_MM_ROUND_UP; - break; -#endif - -#if defined(FE_DOWNWARD) - case FE_DOWNWARD: - vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; - break; -#endif - - default: - vfe_mode = SIMDE_MM_ROUND_NEAREST; - break; - } - - return vfe_mode; -#else - return SIMDE_MM_ROUND_NEAREST; -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - _MM_SET_ROUNDING_MODE(a); -#elif defined(SIMDE_HAVE_FENV_H) - int fe_mode = FE_TONEAREST; - - switch (a) { -#if defined(FE_TONEAREST) - case SIMDE_MM_ROUND_NEAREST: - fe_mode = FE_TONEAREST; - break; -#endif - -#if defined(FE_TOWARDZERO) - case SIMDE_MM_ROUND_TOWARD_ZERO: - fe_mode = FE_TOWARDZERO; - break; -#endif - -#if defined(FE_DOWNWARD) - case SIMDE_MM_ROUND_DOWN: - fe_mode = FE_DOWNWARD; - break; -#endif - -#if defined(FE_UPWARD) - case SIMDE_MM_ROUND_UP: - fe_mode = FE_UPWARD; - break; -#endif - - default: - return; - } - - fesetround(fe_mode); -#else - (void)a; -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -uint32_t simde_mm_getcsr(void) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_getcsr(); -#else - return SIMDE_MM_GET_ROUNDING_MODE(); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_getcsr() simde_mm_getcsr() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_setcsr(uint32_t a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - _mm_setcsr(a); -#else - SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a)); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_setcsr(a) simde_mm_setcsr(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_x_mm_round_ps(simde__m128 a, int rounding, int lax_rounding) - SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) - SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) -{ - simde__m128_private r_, a_ = simde__m128_to_private(a); - - (void)lax_rounding; - -/* For architectures which lack a current direction SIMD instruction. - * - * Note that NEON actually has a current rounding mode instruction, - * but in ARMv8+ the rounding mode is ignored and nearest is always - * used, so we treat ARMv7 as having a rounding mode but ARMv8 as - * not. */ -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ARM_NEON_A32V8) - if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) - rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) - << 13; -#endif - - switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { - case SIMDE_MM_FROUND_CUR_DIRECTION: -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_round(a_.altivec_f32)); -#elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_f32 = vrndiq_f32(a_.neon_f32); -#elif defined(simde_math_nearbyintf) - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); - i++) { - r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); - } -#else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); -#endif - break; - - case SIMDE_MM_FROUND_TO_NEAREST_INT: -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_rint(a_.altivec_f32)); -#elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndnq_f32(a_.neon_f32); -#elif defined(simde_math_roundevenf) - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); - i++) { - r_.f32[i] = simde_math_roundevenf(a_.f32[i]); - } -#else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); -#endif - break; - - case SIMDE_MM_FROUND_TO_NEG_INF: -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_floor(a_.altivec_f32)); -#elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndmq_f32(a_.neon_f32); -#elif defined(simde_math_floorf) - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); - i++) { - r_.f32[i] = simde_math_floorf(a_.f32[i]); - } -#else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); -#endif - break; - - case SIMDE_MM_FROUND_TO_POS_INF: -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_ceil(a_.altivec_f32)); -#elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndpq_f32(a_.neon_f32); -#elif defined(simde_math_ceilf) - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); - i++) { - r_.f32[i] = simde_math_ceilf(a_.f32[i]); - } -#else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); -#endif - break; - - case SIMDE_MM_FROUND_TO_ZERO: -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_trunc(a_.altivec_f32)); -#elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vrndq_f32(a_.neon_f32); -#elif defined(simde_math_truncf) - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); - i++) { - r_.f32[i] = simde_math_truncf(a_.f32[i]); - } -#else - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); -#endif - break; - - default: - HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); - } - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE4_1_NATIVE) -#define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) -#else -#define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) -#endif -#if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) -#define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_set_ps(simde_float32 e3, simde_float32 e2, - simde_float32 e1, simde_float32 e0) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps(e3, e2, e1, e0); -#else - simde__m128_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 simde_float32 data[4] = {e0, e1, e2, e3}; - r_.neon_f32 = vld1q_f32(data); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); -#else - r_.f32[0] = e0; - r_.f32[1] = e1; - r_.f32[2] = e2; - r_.f32[3] = e3; -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_set_ps1(simde_float32 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ps1(a); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(a); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void)a; - return vec_splats(a); -#else - return simde_mm_set_ps(a, a, a, a); -#endif -} -#define simde_mm_set1_ps(a) simde_mm_set_ps1(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_set_ps1(a) simde_mm_set_ps1(a) -#define _mm_set1_ps(a) simde_mm_set1_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_move_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_move_ss(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = - vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) - m = {16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; - r_.altivec_f32 = vec_perm(a_.altivec_f32, b_.altivec_f32, m); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, - 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, - 29, 30, 31); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); -#else - r_.f32[0] = b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_add_ps(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ps(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 + b_.f32; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.f32[i] = a_.f32[i] + b_.f32[i]; - } -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_add_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_add_ss(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); - float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); - // the upper values in the result must be the remnants of . - r_.neon_f32 = vaddq_f32(a_.neon_f32, value); -#else - r_.f32[0] = a_.f32[0] + b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_and_ps(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_and_ps(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 & b_.i32; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = a_.i32[i] & b_.i32[i]; - } -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_andnot_ps(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_andnot_ps(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32 & b_.i32; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; - } -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_xor_ps(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_xor_ps(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) { - r_.u32[i] = a_.u32[i] ^ b_.u32[i]; - } -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_or_ps(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_or_ps(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) { - r_.u32[i] = a_.u32[i] | b_.u32[i]; - } -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_x_mm_not_ps(simde__m128 a) -{ -#if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); -#elif defined(SIMDE_X86_SSE2_NATIVE) - /* Note: we use ints instead of floats because we don't want cmpeq - * to return false for (NaN, NaN) */ - __m128i ai = _mm_castps_si128(a); - return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = ~a_.i32; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = ~(a_.i32[i]); - } -#endif - - return simde__m128_from_private(r_); -#endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) -{ -/* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_ps, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ -#if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_ps(a, b, mask); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b), - mask_ = simde__m128_to_private(mask); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, - mask_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = - vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = a_.i32[i] ^ - ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); - } -#endif - - return simde__m128_from_private(r_); -#endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_avg_pu16(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu16(a, b); -#else - simde__m64_private r_, a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && \ - defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \ - defined(SIMDE_CONVERT_VECTOR_) - uint32_t wa SIMDE_VECTOR(16); - uint32_t wb SIMDE_VECTOR(16); - uint32_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) -#define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_avg_pu8(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_avg_pu8(a, b); -#else - simde__m64_private r_, a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && \ - defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \ - defined(SIMDE_CONVERT_VECTOR_) - uint16_t wa SIMDE_VECTOR(16); - uint16_t wb SIMDE_VECTOR(16); - uint16_t wr SIMDE_VECTOR(16); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) -#define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_x_mm_abs_ps(simde__m128 a) -{ -#if defined(SIMDE_X86_AVX512F_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7, 1, 0)) - return _mm512_castps512_ps128(_mm512_abs_ps(_mm512_castps128_ps512(a))); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vabsq_f32(a_.neon_f32); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_abs(a_.altivec_f32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.f32[i] = simde_math_fabsf(a_.f32[i]); - } -#endif - - return simde__m128_from_private(r_); -#endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmpeq_ps(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ps(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) - : UINT32_C(0); - } -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmpeq_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpeq_ss(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmpge_ps(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpge_ps(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_cmpge(a_.altivec_f32, b_.altivec_f32)); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) - : UINT32_C(0); - } -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmpge_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpge_ss(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmpgt_ps(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpgt_ps(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) - : UINT32_C(0); - } -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmpgt_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpgt_ss(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmple_ps(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ps(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_cmple(a_.altivec_f32, b_.altivec_f32)); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) - : UINT32_C(0); - } -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmple_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmple_ss(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmplt_ps(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ps(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_cmplt(a_.altivec_f32, b_.altivec_f32)); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) - : UINT32_C(0); - } -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmplt_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmplt_ss(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmpneq_ps(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ps(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && SIMDE_ARCH_POWER_CHECK(900) && \ - !defined(HEDLEY_IBM_VERSION) - /* vec_cmpne(SIMDE_POWER_ALTIVEC_VECTOR(float), SIMDE_POWER_ALTIVEC_VECTOR(float)) - is missing from XL C/C++ v16.1.1, - though the documentation (table 89 on page 432 of the IBM XL C/C++ for - Linux Compiler Reference, Version 16.1.1) shows that it should be - present. Both GCC and clang support it. */ - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_cmpne(a_.altivec_f32, b_.altivec_f32)); -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_nor(r_.altivec_f32, r_.altivec_f32)); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) - : UINT32_C(0); - } -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmpneq_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpneq_ss(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.u32[i] = a_.u32[i]; - } - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmpnge_ps(simde__m128 a, simde__m128 b) -{ - return simde_mm_cmplt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmpnge_ss(simde__m128 a, simde__m128 b) -{ - return simde_mm_cmplt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmpngt_ps(simde__m128 a, simde__m128 b) -{ - return simde_mm_cmple_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmpngt_ss(simde__m128 a, simde__m128 b) -{ - return simde_mm_cmple_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmpnle_ps(simde__m128 a, simde__m128 b) -{ - return simde_mm_cmpgt_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmpnle_ss(simde__m128 a, simde__m128 b) -{ - return simde_mm_cmpgt_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmpnlt_ps(simde__m128 a, simde__m128 b) -{ - return simde_mm_cmpge_ps(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmpnlt_ss(simde__m128 a, simde__m128 b) -{ - return simde_mm_cmpge_ss(a, b); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmpord_ps(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ps(a, b); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vandq_u32(ceqaa, ceqbb); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), - wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), - vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); -#elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || - simde_math_isnanf(b_.f32[i])) - ? UINT32_C(0) - : ~UINT32_C(0); - } -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmpunord_ps(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpunord_ps(a, b); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); - r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), - wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); -#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), - vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), - vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); - r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); -#elif defined(simde_math_isnanf) - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || - simde_math_isnanf(b_.f32[i])) - ? ~UINT32_C(0) - : UINT32_C(0); - } -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmpunord_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) - return _mm_cmpunord_ss(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(simde_math_isnanf) - r_.u32[0] = - (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) - ? ~UINT32_C(0) - : UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) { - r_.u32[i] = a_.u32[i]; - } -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_comieq_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comieq_ss(a, b); -#else - simde__m128_private a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); -#else - return a_.f32[0] == b_.f32[0]; -#endif -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_comige_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comige_ss(a, b); -#else - simde__m128_private a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); -#else - return a_.f32[0] >= b_.f32[0]; -#endif -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_comigt_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comigt_ss(a, b); -#else - simde__m128_private a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); -#else - return a_.f32[0] > b_.f32[0]; -#endif -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_comile_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comile_ss(a, b); -#else - simde__m128_private a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); -#else - return a_.f32[0] <= b_.f32[0]; -#endif -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_comilt_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comilt_ss(a, b); -#else - simde__m128_private a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); -#else - return a_.f32[0] < b_.f32[0]; -#endif -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_comineq_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_comineq_ss(a, b); -#else - simde__m128_private a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); -#else - return a_.f32[0] != b_.f32[0]; -#endif -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) -{ - simde__m128_private r_, dest_ = simde__m128_to_private(dest), - src_ = simde__m128_to_private(src); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t sign_pos = - vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); - r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - const v128_t sign_pos = wasm_f32x4_splat(-0.0f); - r_.wasm_v128 = - wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); -#elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) -#if !defined(HEDLEY_IBM_VERSION) - r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); -#else - r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); -#endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) - sign_pos = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), - vec_splats(-0.0f)); - r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); -#elif defined(SIMDE_IEEE754_STORAGE) - (void)src_; - (void)dest_; - simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); - r_ = simde__m128_to_private(simde_mm_xor_ps( - dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); - } -#endif - - return simde__m128_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) -{ - return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), - dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cvt_pi2ps(simde__m128 a, simde__m64 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_pi2ps(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), - vget_high_f32(a_.neon_f32)); -#elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; -#else - r_.f32[0] = (simde_float32)b_.i32[0]; - r_.f32[1] = (simde_float32)b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_cvt_ps2pi(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvt_ps2pi(a); -#else - simde__m64_private r_; - simde__m128_private a_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - a_ = simde__m128_to_private( - simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); -#elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) - a_ = simde__m128_to_private( - simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); -#else - a_ = simde__m128_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = HEDLEY_STATIC_CAST( - int32_t, simde_math_nearbyintf(a_.f32[i])); - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cvt_si2ss(simde__m128 a, int32_t b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_si2ss(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = - vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); -#else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); - r_.i32[1] = a_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t simde_mm_cvt_ss2si(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvt_ss2si(a); -#elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); -#else - simde__m128_private a_ = simde__m128_to_private( - simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); - return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cvtpi16_ps(simde__m64 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi16_ps(a); -#else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); -#elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - simde_float32 v = a_.i16[i]; - r_.f32[i] = v; - } -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cvtpi32_ps(simde__m128 a, simde__m64 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_ps(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a); - simde__m64_private b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), - vget_high_f32(a_.neon_f32)); -#elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); - r_.m64_private[1] = a_.m64_private[1]; -#else - r_.f32[0] = (simde_float32)b_.i32[0]; - r_.f32[1] = (simde_float32)b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cvtpi32x2_ps(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32x2_ps(a, b); -#else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); -#elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); - SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); -#else - r_.f32[0] = (simde_float32)a_.i32[0]; - r_.f32[1] = (simde_float32)a_.i32[1]; - r_.f32[2] = (simde_float32)b_.i32[0]; - r_.f32[3] = (simde_float32)b_.i32[1]; -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cvtpi8_ps(simde__m64 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi8_ps(a); -#else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = - vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); -#else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); - r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); - r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); - r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_cvtps_pi16(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi16(a); -#else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, - simde_math_roundf(a_.f32[i])); - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_cvtps_pi32(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi32(a); -#else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \ - defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); -#if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = - ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && - (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) - ? SIMDE_CONVERT_FTOI(int32_t, v) - : INT32_MIN; -#else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); -#endif - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_cvtps_pi8(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtps_pi8(a); -#else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) - /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to - * i16, combine with an all-zero vector of i16 (which will become the upper - * half), narrow to i8. */ - float32x4_t max = - vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); - float32x4_t min = - vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); - float32x4_t values = - vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); - r_.neon_i8 = vmovn_s16( - vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(a_.f32) / sizeof(a_.f32[0])); i++) { - if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) - r_.i8[i] = INT8_MAX; - else if (a_.f32[i] < - HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) - r_.i8[i] = INT8_MIN; - else - r_.i8[i] = SIMDE_CONVERT_FTOI( - int8_t, simde_math_roundf(a_.f32[i])); - } - /* Note: the upper half is undefined */ -#endif - - return simde__m64_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cvtpu16_ps(simde__m64 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu16_ps(a); -#else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); -#elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.f32[i] = (simde_float32)a_.u16[i]; - } -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cvtpu8_ps(simde__m64 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpu8_ps(a); -#else - simde__m128_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = - vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); - } -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cvtsi32_ss(simde__m128 a, int32_t b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtsi32_ss(a, b); -#else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), - a_.neon_f32, 0); -#else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cvtsi64_ss(simde__m128 a, int64_t b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) -#if !defined(__PGI) - return _mm_cvtsi64_ss(a, b); -#else - return _mm_cvtsi64x_ss(a, b); -#endif -#else - simde__m128_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), - a_.neon_f32, 0); -#else - r_ = a_; - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float32 simde_mm_cvtss_f32(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtss_f32(a); -#else - simde__m128_private a_ = simde__m128_to_private(a); -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_f32(a_.neon_f32, 0); -#else - return a_.f32[0]; -#endif -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t simde_mm_cvtss_si32(simde__m128 a) -{ - return simde_mm_cvt_ss2si(a); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t simde_mm_cvtss_si64(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) -#if !defined(__PGI) - return _mm_cvtss_si64(a); -#else - return _mm_cvtss_si64x(a); -#endif -#else - simde__m128_private a_ = simde__m128_to_private(a); -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI( - int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); -#else - return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); -#endif -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_cvtt_ps2pi(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtt_ps2pi(a); -#else - simde__m64_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - simde_float32 v = a_.f32[i]; -#if !defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = - ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && - (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) - ? SIMDE_CONVERT_FTOI(int32_t, v) - : INT32_MIN; -#else - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); -#endif - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) -#define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t simde_mm_cvtt_ss2si(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cvtt_ss2si(a); -#else - simde__m128_private a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); -#else - simde_float32 v = a_.f32[0]; -#if !defined(SIMDE_FAST_CONVERSION_RANGE) - return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && - (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) - ? SIMDE_CONVERT_FTOI(int32_t, v) - : INT32_MIN; -#else - return SIMDE_CONVERT_FTOI(int32_t, v); -#endif -#endif -#endif -} -#define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) -#define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t simde_mm_cvttss_si64(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ - !defined(_MSC_VER) -#if defined(__PGI) - return _mm_cvttss_si64x(a); -#else - return _mm_cvttss_si64(a); -#endif -#else - simde__m128_private a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); -#else - return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); -#endif -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cmpord_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_cmpord_ss(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a); - -#if defined(simde_math_isnanf) - r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || - simde_math_isnanf(simde_mm_cvtss_f32(b))) - ? UINT32_C(0) - : ~UINT32_C(0); - SIMDE_VECTORIZE - for (size_t i = 1; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.u32[i] = a_.u32[i]; - } -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_div_ps(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ps(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); - float32x4_t recip1 = - vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); - r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 / b_.f32; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.f32[i] = a_.f32[i] / b_.f32[i]; - } -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_div_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_div_ss(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32( - simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); -#else - r_.f32[0] = a_.f32[0] / b_.f32[0]; - SIMDE_VECTORIZE - for (size_t i = 1; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.f32[i] = a_.f32[i]; - } -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t simde_mm_extract_pi16(simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) -{ - simde__m64_private a_ = simde__m64_to_private(a); - return a_.i16[imm8]; -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && \ - !defined(HEDLEY_PGI_VERSION) -#if defined(SIMDE_BUG_CLANG_44589) -#define simde_mm_extract_pi16(a, imm8) \ - (HEDLEY_DIAGNOSTIC_PUSH _Pragma( \ - "clang diagnostic ignored \"-Wvector-conversion\"") \ - HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16((a), (imm8))) \ - HEDLEY_DIAGNOSTIC_POP) -#else -#define simde_mm_extract_pi16(a, imm8) \ - HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) -#endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -#define simde_mm_extract_pi16(a, imm8) \ - vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) -#endif -#define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) -#define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_insert_pi16(simde__m64 a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) -{ - simde__m64_private r_, a_ = simde__m64_to_private(a); - - r_.i64[0] = a_.i64[0]; - r_.i16[imm8] = i; - - return simde__m64_from_private(r_); -} -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && \ - !defined(__PGI) -#if defined(SIMDE_BUG_CLANG_44589) -#define ssimde_mm_insert_pi16(a, i, imm8) \ - (HEDLEY_DIAGNOSTIC_PUSH _Pragma( \ - "clang diagnostic ignored \"-Wvector-conversion\"")( \ - _mm_insert_pi16((a), (i), (imm8))) HEDLEY_DIAGNOSTIC_POP) -#else -#define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) -#endif -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -#define simde_mm_insert_pi16(a, i, imm8) \ - simde__m64_from_neon_i16( \ - vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) -#endif -#define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -#define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_load_ps(simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps(mem_addr); -#else - simde__m128_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_f32(mem_addr); -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_ld(0, mem_addr); -#else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), - sizeof(r_)); -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_load1_ps(simde_float32 const *mem_addr) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ps1(mem_addr); -#else - simde__m128_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vld1q_dup_f32(mem_addr); -#else - r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); -#endif - - return simde__m128_from_private(r_); -#endif -} -#define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) -#define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_load_ss(simde_float32 const *mem_addr) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_load_ss(mem_addr); -#else - simde__m128_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); -#else - r_.f32[0] = *mem_addr; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_loadh_pi(simde__m128 a, simde__m64 const *mem_addr) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_loadh_pi(a, - HEDLEY_REINTERPRET_CAST(__m64 const *, mem_addr)); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32( - vget_low_f32(a_.neon_f32), - vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t *, mem_addr))); -#else - simde__m64_private b_ = - *HEDLEY_REINTERPRET_CAST(simde__m64_private const *, mem_addr); - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#if HEDLEY_HAS_WARNING("-Wold-style-cast") -#define _mm_loadh_pi(a, mem_addr) \ - simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const *, \ - (mem_addr))) -#else -#define _mm_loadh_pi(a, mem_addr) \ - simde_mm_loadh_pi((a), (simde__m64 const *)(mem_addr)) -#endif -#endif - -/* The SSE documentation says that there are no alignment requirements - for mem_addr. Unfortunately they used the __m64 type for the argument - which is supposed to be 8-byte aligned, so some compilers (like clang - with -Wcast-align) will generate a warning if you try to cast, say, - a simde_float32* to a simde__m64* for this function. - - I think the choice of argument type is unfortunate, but I do think we - need to stick to it here. If there is demand I can always add something - like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_loadl_pi(simde__m128 a, simde__m64 const *mem_addr) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadl_pi(a, - HEDLEY_REINTERPRET_CAST(__m64 const *, mem_addr)); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcombine_f32( - vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t *, mem_addr)), - vget_high_f32(a_.neon_f32)); -#else - simde__m64_private b_; - simde_memcpy(&b_, mem_addr, sizeof(b_)); - r_.i32[0] = b_.i32[0]; - r_.i32[1] = b_.i32[1]; - r_.i32[2] = a_.i32[2]; - r_.i32[3] = a_.i32[3]; -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#if HEDLEY_HAS_WARNING("-Wold-style-cast") -#define _mm_loadl_pi(a, mem_addr) \ - simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const *, \ - (mem_addr))) -#else -#define _mm_loadl_pi(a, mem_addr) \ - simde_mm_loadl_pi((a), (simde__m64 const *)(mem_addr)) -#endif -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadr_ps(simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadr_ps(mem_addr); -#else - simde__m128_private r_, - v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrev64q_f32(v_.neon_f32); - r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_reve(v_.altivec_f32); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); -#else - r_.f32[0] = v_.f32[3]; - r_.f32[1] = v_.f32[2]; - r_.f32[2] = v_.f32[1]; - r_.f32[3] = v_.f32[0]; -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 -simde_mm_loadu_ps(simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_loadu_ps(mem_addr); -#else - simde__m128_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = - vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t *, mem_addr)); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_load(mem_addr); -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) - r_.altivec_f32 = vec_vsx_ld(0, mem_addr); -#else - simde_memcpy(&r_, mem_addr, sizeof(r_)); -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_maskmove_si64(simde__m64 a, simde__m64 mask, int8_t *mem_addr) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char *, mem_addr)); -#else - simde__m64_private a_ = simde__m64_to_private(a), - mask_ = simde__m64_to_private(mask); - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(a_.i8) / sizeof(a_.i8[0])); i++) - if (mask_.i8[i] < 0) - mem_addr[i] = a_.i8[i]; -#endif -} -#define simde_m_maskmovq(a, mask, mem_addr) \ - simde_mm_maskmove_si64(a, mask, mem_addr) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_maskmove_si64(a, mask, mem_addr) \ - simde_mm_maskmove_si64( \ - (a), (mask), \ - SIMDE_CHECKED_REINTERPRET_CAST(int8_t *, char *, (mem_addr))) -#define _m_maskmovq(a, mask, mem_addr) \ - simde_mm_maskmove_si64( \ - (a), (mask), \ - SIMDE_CHECKED_REINTERPRET_CAST(int8_t *, char *, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_max_pi16(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pi16(a, b); -#else - simde__m64_private r_, a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) -#define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_max_ps(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ps(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) - r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), - a_.neon_f32, b_.neon_f32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) - r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = - wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, - wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, - vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_max_pu8(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_max_pu8(a, b); -#else - simde__m64_private r_, a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) -#define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_max_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_max_ss(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); -#else - r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_min_pi16(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pi16(a, b); -#else - simde__m64_private r_, a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) -#define _m_pminsw(a, b) simde_mm_min_pi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_min_ps(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ps(a, b); -#elif defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128_from_neon_f32(vminq_f32(simde__m128_to_neon_f32(a), - simde__m128_to_neon_f32(b))); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); -#if defined(SIMDE_FAST_NANS) - r_.wasm_v128 = wasm_f32x4_min(a_.wasm_v128, b_.wasm_v128); -#else - r_.wasm_v128 = - wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, - wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128)); -#endif - return simde__m128_from_private(r_); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_FAST_NANS) - r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); -#else - r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, - vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); -#endif - - return simde__m128_from_private(r_); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - simde__m128 mask = simde_mm_cmplt_ps(a, b); - return simde_mm_or_ps(simde_mm_and_ps(mask, a), - simde_mm_andnot_ps(mask, b)); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; - } - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_min_pu8(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_min_pu8(a, b); -#else - simde__m64_private r_, a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) -#define _m_pminub(a, b) simde_mm_min_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_min_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_min_ss(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = - vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); -#else - r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_movehl_ps(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movehl_ps(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a32 = vget_high_f32(a_.neon_f32); - float32x2_t b32 = vget_high_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(b32, a32); -#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergel(b_.altivec_i64, a_.altivec_i64)); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); -#else - r_.f32[0] = b_.f32[2]; - r_.f32[1] = b_.f32[3]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_movelh_ps(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_movelh_ps(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a10 = vget_low_f32(a_.neon_f32); - float32x2_t b10 = vget_low_f32(b_.neon_f32); - r_.neon_f32 = vcombine_f32(a10, b10); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); -#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(float), - vec_mergeh(a_.altivec_i64, b_.altivec_i64)); -#else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = b_.f32[0]; - r_.f32[3] = b_.f32[1]; -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_movemask_pi8(simde__m64 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movemask_pi8(a); -#else - simde__m64_private a_ = simde__m64_to_private(a); - int r = 0; - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x8_t input = a_.neon_u8; - const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; - const uint8x8_t mask_and = vdup_n_u8(0x80); - const int8x8_t mask_shift = vld1_s8(xr); - const uint8x8_t mask_result = - vshl_u8(vand_u8(input, mask_and), mask_shift); - uint8x8_t lo = mask_result; - r = vaddv_u8(lo); -#else - const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); - SIMDE_VECTORIZE_REDUCTION(| : r) - for (size_t i = 0; i < nmemb; i++) { - r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); - } -#endif - - return r; -#endif -} -#define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) -#define _m_pmovmskb(a) simde_mm_movemask_pi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_movemask_ps(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movemask_ps(a); -#else - int r = 0; - simde__m128_private a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - static const int32_t shift_amount[] = {0, 1, 2, 3}; - const int32x4_t shift = vld1q_s32(shift_amount); - uint32x4_t tmp = vshrq_n_u32(a_.neon_u32, 31); - return HEDLEY_STATIC_CAST(int, vaddvq_u32(vshlq_u32(tmp, shift))); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // Shift out everything but the sign bits with a 32-bit unsigned shift right. - uint64x2_t high_bits = - vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); - // Merge the two pairs together with a 64-bit unsigned shift right + add. - uint8x16_t paired = - vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); - // Extract the result. - return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); -#else - SIMDE_VECTORIZE_REDUCTION(| : r) - for (size_t i = 0; i < sizeof(a_.u32) / sizeof(a_.u32[0]); i++) { - r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; - } -#endif - - return r; -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_mul_ps(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ps(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 * b_.f32; -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.f32[i] = a_.f32[i] * b_.f32[i]; - } -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_mul_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_mul_ss(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] * b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_mulhi_pu16(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_mulhi_pu16(a, b); -#else - simde__m64_private r_, a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); - const uint32x4_t t2 = vshrq_n_u32(t1, 16); - const uint16x4_t t3 = vmovn_u32(t2); - r_.neon_u16 = t3; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) { - r_.u16[i] = HEDLEY_STATIC_CAST( - uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * - HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> - UINT32_C(16))); - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) -#define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) -#define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) -#define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) -#define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) -#define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) -#define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) -#define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) -#define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) -#define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) -#else -#define SIMDE_MM_HINT_NTA 0 -#define SIMDE_MM_HINT_T0 1 -#define SIMDE_MM_HINT_T1 2 -#define SIMDE_MM_HINT_T2 3 -#define SIMDE_MM_HINT_ENTA 4 -#define SIMDE_MM_HINT_ET0 5 -#define SIMDE_MM_HINT_ET1 6 -#define SIMDE_MM_HINT_ET2 7 -#endif - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wreserved-id-macro") -_Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") -#endif -#undef _MM_HINT_NTA -#define _MM_HINT_NTA SIMDE_MM_HINT_NTA -#undef _MM_HINT_T0 -#define _MM_HINT_T0 SIMDE_MM_HINT_T0 -#undef _MM_HINT_T1 -#define _MM_HINT_T1 SIMDE_MM_HINT_T1 -#undef _MM_HINT_T2 -#define _MM_HINT_T2 SIMDE_MM_HINT_T2 -#undef _MM_HINT_ETNA -#define _MM_HINT_ETNA SIMDE_MM_HINT_ETNA -#undef _MM_HINT_ET0 -#define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 -#undef _MM_HINT_ET1 -#define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 -#undef _MM_HINT_ET1 -#define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 - HEDLEY_DIAGNOSTIC_POP -#endif - - SIMDE_FUNCTION_ATTRIBUTES void simde_mm_prefetch(char const *p, int i) -{ -#if defined(HEDLEY_GCC_VERSION) - __builtin_prefetch(p); -#else - (void)p; -#endif - - (void)i; -} -#if defined(SIMDE_X86_SSE_NATIVE) -#if defined(__clang__) && \ - !SIMDE_DETECT_CLANG_VERSION_CHECK( \ - 10, 0, 0) /* https://reviews.llvm.org/D71718 */ -#define simde_mm_prefetch(p, i) \ - (__extension__({ \ - HEDLEY_DIAGNOSTIC_PUSH \ - HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - _mm_prefetch((p), (i)); \ - HEDLEY_DIAGNOSTIC_POP \ - })) -#else -#define simde_mm_prefetch(p, i) _mm_prefetch(p, i) -#endif -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_prefetch(p, i) simde_mm_prefetch(p, i) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_x_mm_negate_ps(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a); - -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8, 1, 0)) - r_.altivec_f32 = vec_neg(a_.altivec_f32); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vnegq_f32(a_.neon_f32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f32 = vec_neg(a_.altivec_f32); -#elif defined(SIMDE_VECTOR_NEGATE) - r_.f32 = -a_.f32; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.f32[i] = -a_.f32[i]; - } -#endif - - return simde__m128_from_private(r_); -#endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_rcp_ps(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ps(a); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t recip = vrecpeq_f32(a_.neon_f32); - -#if SIMDE_ACCURACY_PREFERENCE > 0 - for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE; ++i) { - recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); - } -#endif - - r_.neon_f32 = recip; -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_re(a_.altivec_f32); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.f32 = 1.0f / a_.f32; -#elif defined(SIMDE_IEEE754_STORAGE) - /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - int32_t ix; - simde_float32 fx = a_.f32[i]; - simde_memcpy(&ix, &fx, sizeof(ix)); - int32_t x = INT32_C(0x7EF311C3) - ix; - simde_float32 temp; - simde_memcpy(&temp, &x, sizeof(temp)); - r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); - } -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.f32[i] = 1.0f / a_.f32[i]; - } -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_rcp_ss(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rcp_ss(a); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a); - - r_.f32[0] = 1.0f / a_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_rsqrt_ps(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ps(a); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); -#elif defined(SIMDE_IEEE754_STORAGE) - /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf - Pages 100 - 103 */ - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { -#if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); -#else - simde_float32 x = a_.f32[i]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - -#if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); -#else - ix = INT32_C(0x5F37599E) - (ix >> 1); -#endif - - simde_memcpy(&x, &ix, sizeof(x)); - -#if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); -#endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[i] = x; -#endif - } -#elif defined(simde_math_sqrtf) - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); - } -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_rsqrt_ss(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_rsqrt_ss(a); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = - vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), - a_.neon_f32, 0); -#elif defined(SIMDE_IEEE754_STORAGE) - { -#if SIMDE_ACCURACY_PREFERENCE <= 0 - r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); -#else - simde_float32 x = a_.f32[0]; - simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; - int32_t ix; - - simde_memcpy(&ix, &x, sizeof(ix)); - -#if SIMDE_ACCURACY_PREFERENCE == 1 - ix = INT32_C(0x5F375A82) - (ix >> 1); -#else - ix = INT32_C(0x5F37599E) - (ix >> 1); -#endif - - simde_memcpy(&x, &ix, sizeof(x)); - -#if SIMDE_ACCURACY_PREFERENCE >= 2 - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); -#endif - x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); - - r_.f32[0] = x; -#endif - } - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; -#elif defined(simde_math_sqrtf) - r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_sad_pu8(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sad_pu8(a, b); -#else - simde__m64_private r_, a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t t = vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)); - uint16_t r0 = t[0] + t[1] + t[2] + t[3]; - r_.neon_u16 = vset_lane_u16(r0, vdup_n_u16(0), 0); -#else - uint16_t sum = 0; - -#if defined(SIMDE_HAVE_STDLIB_H) - SIMDE_VECTORIZE_REDUCTION(+ : sum) - for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) { - sum += HEDLEY_STATIC_CAST(uint8_t, abs(a_.u8[i] - b_.u8[i])); - } - - r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; -#else - HEDLEY_UNREACHABLE(); -#endif -#endif - - return simde__m64_from_private(r_); -#endif -} -#define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) -#define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_set_ss(simde_float32 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_set_ss(a); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); -#else - return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), - SIMDE_FLOAT32_C(0.0), a); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_set_ss(a) simde_mm_set_ss(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_setr_ps(simde_float32 e3, simde_float32 e2, - simde_float32 e1, simde_float32 e0) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setr_ps(e3, e2, e1, e0); -#else - return simde_mm_set_ps(e0, e1, e2, e3); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_setzero_ps(void) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_setzero_ps(); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return vec_splats(SIMDE_FLOAT32_C(0.0)); -#else - simde__m128 r; - simde_memset(&r, 0, sizeof(r)); - return r; -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_setzero_ps() simde_mm_setzero_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_undefined_ps(void) -{ - simde__m128_private r_; - -#if defined(SIMDE_HAVE_UNDEFINED128) - r_.n = _mm_undefined_ps(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128_to_private(simde_mm_setzero_ps()); -#endif - - return simde__m128_from_private(r_); -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_undefined_ps() simde_mm_undefined_ps() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_x_mm_setone_ps(void) -{ - simde__m128 t = simde_mm_setzero_ps(); - return simde_mm_cmpeq_ps(t, t); -} - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_sfence(void) -{ - /* TODO: Use Hedley. */ -#if defined(SIMDE_X86_SSE_NATIVE) - _mm_sfence(); -#elif defined(__GNUC__) && \ - ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) - __atomic_thread_fence(__ATOMIC_SEQ_CST); -#elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && \ - (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) -#if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) - __atomic_thread_fence(__ATOMIC_SEQ_CST); -#else - atomic_thread_fence(memory_order_seq_cst); -#endif -#elif defined(_MSC_VER) - MemoryBarrier(); -#elif HEDLEY_HAS_EXTENSION(c_atomic) - __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); -#elif defined(__GNUC__) && \ - ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) - __sync_synchronize(); -#elif defined(_OPENMP) -#pragma omp critical(simde_mm_sfence_) - { - } -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_sfence() simde_mm_sfence() -#endif - -#define SIMDE_MM_SHUFFLE(z, y, x, w) \ - (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && \ - !defined(__PGI) -#define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -#define simde_mm_shuffle_pi16(a, imm8) \ - (__extension__({ \ - const simde__m64_private simde__tmp_a_ = \ - simde__m64_to_private(a); \ - simde__m64_from_private((simde__m64_private){ \ - .i16 = SIMDE_SHUFFLE_VECTOR_( \ - 16, 8, (simde__tmp_a_).i16, \ - (simde__tmp_a_).i16, (((imm8)) & 3), \ - (((imm8) >> 2) & 3), (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3))}); \ - })) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_shuffle_pi16(simde__m64 a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) -{ - simde__m64_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - - for (size_t i = 0; i < sizeof(r_.i16) / sizeof(r_.i16[0]); i++) { - r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; - } - - HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") -#pragma clang diagnostic ignored "-Wconditional-uninitialized" -#endif - return simde__m64_from_private(r_); - HEDLEY_DIAGNOSTIC_POP -} -#endif -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && \ - !defined(__PGI) -#define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) -#else -#define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) -#define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -#define simde_mm_shuffle_ps(a, b, imm8) \ - __extension__({ \ - float32x4_t ret; \ - ret = vmovq_n_f32(vgetq_lane_f32(a, (imm8) & (0x3))); \ - ret = vsetq_lane_f32(vgetq_lane_f32(a, ((imm8) >> 2) & 0x3), \ - ret, 1); \ - ret = vsetq_lane_f32(vgetq_lane_f32(b, ((imm8) >> 4) & 0x3), \ - ret, 2); \ - ret = vsetq_lane_f32(vgetq_lane_f32(b, ((imm8) >> 6) & 0x3), \ - ret, 3); \ - }) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -#define simde_mm_shuffle_ps(a, b, imm8) \ - (__extension__({ \ - simde__m128_from_private((simde__m128_private){ \ - .f32 = SIMDE_SHUFFLE_VECTOR_( \ - 32, 16, simde__m128_to_private(a).f32, \ - simde__m128_to_private(b).f32, (((imm8)) & 3), \ - (((imm8) >> 2) & 3), (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4)}); \ - })) -#else -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_shuffle_ps(simde__m128 a, simde__m128 b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) -{ - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; - r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; - r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; - r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; - - return simde__m128_from_private(r_); -} -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_sqrt_ps(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ps(a); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsqrtq_f32(a_.neon_f32); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t est = vrsqrteq_f32(a_.neon_f32); - for (int i = 0; i <= SIMDE_ACCURACY_PREFERENCE; i++) { - est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), - est); - } - r_.neon_f32 = vmulq_f32(a_.neon_f32, est); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_sqrt(a_.altivec_f32); -#elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0; i < sizeof(r_.f32) / sizeof(r_.f32[0]); i++) { - r_.f32[i] = simde_math_sqrtf(a_.f32[i]); - } -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_sqrt_ss(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sqrt_ss(a); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32_t value = vgetq_lane_f32( - simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); - r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); -#elif defined(simde_math_sqrtf) - r_.f32[0] = simde_math_sqrtf(a_.f32[0]); - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_store_ps(simde_float32 mem_addr[4], simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps(mem_addr, a); -#else - simde__m128_private a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(a_.altivec_f32, 0, mem_addr); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr, a_.wasm_v128); -#else - simde_memcpy(mem_addr, &a_, sizeof(a)); -#endif -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_store_ps(mem_addr, a) \ - simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST( \ - float *, simde_float32 *, mem_addr), \ - (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_store1_ps(simde_float32 mem_addr[4], simde__m128 a) -{ - simde_float32 *mem_addr_ = - SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); - -#if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ps1(mem_addr_, a); -#else - simde__m128_private a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - wasm_v128_store(mem_addr_, - wasm_v32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, - 0)); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - simde__m128_private tmp_; - tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); - simde_mm_store_ps(mem_addr_, tmp_.f32); -#else - SIMDE_VECTORIZE_ALIGNED(mem_addr_ : 16) - for (size_t i = 0; i < sizeof(a_.f32) / sizeof(a_.f32[0]); i++) { - mem_addr_[i] = a_.f32[0]; - } -#endif -#endif -} -#define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_store_ps1(mem_addr, a) \ - simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST( \ - float *, simde_float32 *, mem_addr), \ - (a)) -#define _mm_store1_ps(mem_addr, a) \ - simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST( \ - float *, simde_float32 *, mem_addr), \ - (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_store_ss(simde_float32 *mem_addr, simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - _mm_store_ss(mem_addr, a); -#else - simde__m128_private a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_lane_f32(mem_addr, a_.neon_f32, 0); -#else - *mem_addr = a_.f32[0]; -#endif -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_store_ss(mem_addr, a) \ - simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST( \ - float *, simde_float32 *, mem_addr), \ - (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_storeh_pi(simde__m64 *mem_addr, simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64 *, mem_addr), a); -#else - simde__m128_private a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t *, mem_addr), - vget_high_f32(a_.neon_f32)); -#else - simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); -#endif -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_storel_pi(simde__m64 *mem_addr, simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64 *, mem_addr), a); -#else - simde__m64_private *dest_ = - HEDLEY_REINTERPRET_CAST(simde__m64_private *, mem_addr); - simde__m128_private a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - dest_->neon_f32 = vget_low_f32(a_.neon_f32); -#else - dest_->f32[0] = a_.f32[0]; - dest_->f32[1] = a_.f32[1]; -#endif -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_storer_ps(simde_float32 mem_addr[4], simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - _mm_storer_ps(mem_addr, a); -#else - simde__m128_private a_ = simde__m128_to_private(a); - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x4_t tmp = vrev64q_f32(a_.neon_f32); - vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); - simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); -#else - SIMDE_VECTORIZE_ALIGNED(mem_addr : 16) - for (size_t i = 0; i < sizeof(a_.f32) / sizeof(a_.f32[0]); i++) { - mem_addr[i] = - a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; - } -#endif -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_storer_ps(mem_addr, a) \ - simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST( \ - float *, simde_float32 *, mem_addr), \ - (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_storeu_ps(simde_float32 mem_addr[4], simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - _mm_storeu_ps(mem_addr, a); -#else - simde__m128_private a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_f32(mem_addr, a_.neon_f32); -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - vec_vsx_st(a_.altivec_f32, 0, mem_addr); -#else - simde_memcpy(mem_addr, &a_, sizeof(a_)); -#endif -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_storeu_ps(mem_addr, a) \ - simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST( \ - float *, simde_float32 *, mem_addr), \ - (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_sub_ps(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ps(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f32 = a_.f32 - b_.f32; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.f32[i] = a_.f32[i] - b_.f32[i]; - } -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_sub_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_sub_ss(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - - r_.f32[0] = a_.f32[0] - b_.f32[0]; - r_.f32[1] = a_.f32[1]; - r_.f32[2] = a_.f32[2]; - r_.f32[3] = a_.f32[3]; - - return simde__m128_from_private(r_); -#endif -} - -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_ucomieq_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomieq_ss(a, b); -#else - simde__m128_private a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); -#elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] == b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); -#else - r = a_.f32[0] == b_.f32[0]; -#endif - - return r; -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_ucomige_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomige_ss(a, b); -#else - simde__m128_private a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); -#elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] >= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); -#else - r = a_.f32[0] >= b_.f32[0]; -#endif - - return r; -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_ucomigt_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomigt_ss(a, b); -#else - simde__m128_private a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); -#elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] > b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); -#else - r = a_.f32[0] > b_.f32[0]; -#endif - - return r; -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_ucomile_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomile_ss(a, b); -#else - simde__m128_private a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); -#elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] <= b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); -#else - r = a_.f32[0] <= b_.f32[0]; -#endif - - return r; -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_ucomilt_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomilt_ss(a, b); -#else - simde__m128_private a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); - uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); - r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); -#elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] < b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); -#else - r = a_.f32[0] < b_.f32[0]; -#endif - - return r; -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_ucomineq_ss(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_ucomineq_ss(a, b); -#else - simde__m128_private a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - int r; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); - uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); - uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); - uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); - r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); -#elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f32[0] != b_.f32[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); -#else - r = a_.f32[0] != b_.f32[0]; -#endif - - return r; -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) -#endif - -#if defined(SIMDE_X86_SSE_NATIVE) -#if defined(__has_builtin) -#if __has_builtin(__builtin_ia32_undef128) -#define SIMDE_HAVE_UNDEFINED128 -#endif -#elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && \ - !defined(_MSC_VER) -#define SIMDE_HAVE_UNDEFINED128 -#endif -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_unpackhi_ps(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpackhi_ps(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_high_f32(a_.neon_f32); - float32x2_t b1 = vget_high_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); -#else - r_.f32[0] = a_.f32[2]; - r_.f32[1] = b_.f32[2]; - r_.f32[2] = a_.f32[3]; - r_.f32[3] = b_.f32[3]; -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_unpacklo_ps(simde__m128 a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return _mm_unpacklo_ps(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a), - b_ = simde__m128_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - float32x2_t a1 = vget_low_f32(a_.neon_f32); - float32x2_t b1 = vget_low_f32(b_.neon_f32); - float32x2x2_t result = vzip_f32(a1, b1); - r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); -#else - r_.f32[0] = a_.f32[0]; - r_.f32[1] = b_.f32[0]; - r_.f32[2] = a_.f32[1]; - r_.f32[3] = b_.f32[1]; -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_stream_pi(simde__m64 *mem_addr, simde__m64 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64 *, mem_addr), a); -#else - simde__m64_private *dest = HEDLEY_REINTERPRET_CAST(simde__m64_private *, - mem_addr), - a_ = simde__m64_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - dest->i64[0] = vget_lane_s64(a_.neon_i64, 0); -#else - dest->i64[0] = a_.i64[0]; -#endif -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_stream_ps(simde_float32 mem_addr[4], simde__m128 a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - _mm_stream_ps(mem_addr, a); -#elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && \ - defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - simde__m128_private a_ = simde__m128_to_private(a); - __builtin_nontemporal_store( - a_.f32, SIMDE_ALIGN_CAST(__typeof__(a_.f32) *, mem_addr)); -#else - simde_mm_store_ps(mem_addr, a); -#endif -} -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _mm_stream_ps(mem_addr, a) \ - simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST( \ - float *, simde_float32 *, mem_addr), \ - (a)) -#endif - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -#define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - float32x4x2_t ROW01 = vtrnq_f32(row0, row1); \ - float32x4x2_t ROW23 = vtrnq_f32(row2, row3); \ - row0 = vcombine_f32(vget_low_f32(ROW01.val[0]), \ - vget_low_f32(ROW23.val[0])); \ - row1 = vcombine_f32(vget_low_f32(ROW01.val[1]), \ - vget_low_f32(ROW23.val[1])); \ - row2 = vcombine_f32(vget_high_f32(ROW01.val[0]), \ - vget_high_f32(ROW23.val[0])); \ - row3 = vcombine_f32(vget_high_f32(ROW01.val[1]), \ - vget_high_f32(ROW23.val[1])); \ - } while (0) -#else -#define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - do { \ - simde__m128 tmp3, tmp2, tmp1, tmp0; \ - tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ - tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ - tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ - tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ - row0 = simde_mm_movelh_ps(tmp0, tmp2); \ - row1 = simde_mm_movehl_ps(tmp2, tmp0); \ - row2 = simde_mm_movelh_ps(tmp1, tmp3); \ - row3 = simde_mm_movehl_ps(tmp3, tmp1); \ - } while (0) -#endif -#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) -#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ - SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) -#endif - -#if defined(_MM_EXCEPT_INVALID) -#define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID -#else -#define SIMDE_MM_EXCEPT_INVALID (0x0001) -#endif -#if defined(_MM_EXCEPT_DENORM) -#define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM -#else -#define SIMDE_MM_EXCEPT_DENORM (0x0002) -#endif -#if defined(_MM_EXCEPT_DIV_ZERO) -#define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO -#else -#define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) -#endif -#if defined(_MM_EXCEPT_OVERFLOW) -#define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW -#else -#define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) -#endif -#if defined(_MM_EXCEPT_UNDERFLOW) -#define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW -#else -#define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) -#endif -#if defined(_MM_EXCEPT_INEXACT) -#define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT -#else -#define SIMDE_MM_EXCEPT_INEXACT (0x0020) -#endif -#if defined(_MM_EXCEPT_MASK) -#define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK -#else -#define SIMDE_MM_EXCEPT_MASK \ - (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ - SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ - SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) -#endif - -#if defined(_MM_MASK_INVALID) -#define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID -#else -#define SIMDE_MM_MASK_INVALID (0x0080) -#endif -#if defined(_MM_MASK_DENORM) -#define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM -#else -#define SIMDE_MM_MASK_DENORM (0x0100) -#endif -#if defined(_MM_MASK_DIV_ZERO) -#define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO -#else -#define SIMDE_MM_MASK_DIV_ZERO (0x0200) -#endif -#if defined(_MM_MASK_OVERFLOW) -#define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW -#else -#define SIMDE_MM_MASK_OVERFLOW (0x0400) -#endif -#if defined(_MM_MASK_UNDERFLOW) -#define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW -#else -#define SIMDE_MM_MASK_UNDERFLOW (0x0800) -#endif -#if defined(_MM_MASK_INEXACT) -#define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT -#else -#define SIMDE_MM_MASK_INEXACT (0x1000) -#endif -#if defined(_MM_MASK_MASK) -#define SIMDE_MM_MASK_MASK _MM_MASK_MASK -#else -#define SIMDE_MM_MASK_MASK \ - (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ - SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ - SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) -#endif - -#if defined(_MM_FLUSH_ZERO_MASK) -#define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK -#else -#define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_ON) -#define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON -#else -#define SIMDE_MM_FLUSH_ZERO_ON (0x8000) -#endif -#if defined(_MM_FLUSH_ZERO_OFF) -#define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF -#else -#define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE_H) */ diff --git a/libobs/util/simde/x86/sse2.h b/libobs/util/simde/x86/sse2.h deleted file mode 100644 index 09c65b360baca3..00000000000000 --- a/libobs/util/simde/x86/sse2.h +++ /dev/null @@ -1,7549 +0,0 @@ -/* SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person - * obtaining a copy of this software and associated documentation - * files (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, - * modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be - * included in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - * Copyright: - * 2017-2020 Evan Nemerson - * 2015-2017 John W. Ratcliff - * 2015 Brandon Rowlett - * 2015 Ken Fast - * 2017 Hasindu Gamaarachchi - * 2018 Jeff Daily - */ - -#if !defined(SIMDE_X86_SSE2_H) -#define SIMDE_X86_SSE2_H - -#include "sse.h" - -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DISABLE_UNWANTED_DIAGNOSTICS -SIMDE_BEGIN_DECLS_ - -typedef union { -#if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#endif - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; -#if defined(SIMDE_HAVE_INT128_) - SIMDE_ALIGN_TO_16 simde_int128 i128[1]; - SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; -#endif - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; -#endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - -#if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128i n; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; -#if defined(SIMDE_ARCH_AARCH64) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; -#endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; -#if defined(__UINT_FAST32_TYPE__) && defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 - SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; -#else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; -#endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; -#if defined(__UINT_FAST32_TYPE__) && defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 - SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; -#else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; -#endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 - SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 - SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; -#endif -#endif -} simde__m128i_private; - -typedef union { -#if defined(SIMDE_VECTOR_SUBSCRIPT) - SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else - SIMDE_ALIGN_TO_16 int8_t i8[16]; - SIMDE_ALIGN_TO_16 int16_t i16[8]; - SIMDE_ALIGN_TO_16 int32_t i32[4]; - SIMDE_ALIGN_TO_16 int64_t i64[2]; - SIMDE_ALIGN_TO_16 uint8_t u8[16]; - SIMDE_ALIGN_TO_16 uint16_t u16[8]; - SIMDE_ALIGN_TO_16 uint32_t u32[4]; - SIMDE_ALIGN_TO_16 uint64_t u64[2]; - SIMDE_ALIGN_TO_16 simde_float32 f32[4]; - SIMDE_ALIGN_TO_16 simde_float64 f64[2]; - SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; - SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; -#endif - - SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; - SIMDE_ALIGN_TO_16 simde__m64 m64[2]; - -#if defined(SIMDE_X86_SSE2_NATIVE) - SIMDE_ALIGN_TO_16 __m128d n; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_TO_16 int8x16_t neon_i8; - SIMDE_ALIGN_TO_16 int16x8_t neon_i16; - SIMDE_ALIGN_TO_16 int32x4_t neon_i32; - SIMDE_ALIGN_TO_16 int64x2_t neon_i64; - SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; - SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; - SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; - SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; - SIMDE_ALIGN_TO_16 float32x4_t neon_f32; -#if defined(SIMDE_ARCH_AARCH64) - SIMDE_ALIGN_TO_16 float64x2_t neon_f64; -#endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - SIMDE_ALIGN_TO_16 v128_t wasm_v128; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; -#if defined(__INT_FAST32_TYPE__) && defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 - SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; -#else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; -#endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; - SIMDE_ALIGN_TO_16 - SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; -#if defined(__UINT_FAST32_TYPE__) && defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 - SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; -#else - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; -#endif - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - SIMDE_ALIGN_TO_16 - SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; - SIMDE_ALIGN_TO_16 - SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; - SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; -#endif -#endif -} simde__m128d_private; - -#if defined(SIMDE_X86_SSE2_NATIVE) -typedef __m128i simde__m128i; -typedef __m128d simde__m128d; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -typedef int64x2_t simde__m128i; -#if defined(SIMDE_ARCH_AARCH64) -typedef float64x2_t simde__m128d; -#elif defined(SIMDE_VECTOR_SUBSCRIPT) -typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else -typedef simde__m128d_private simde__m128d; -#endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -typedef v128_t simde__m128i; -typedef v128_t simde__m128d; -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) -typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; -#else -typedef simde__m128d_private simde__m128d; -#endif -#elif defined(SIMDE_VECTOR_SUBSCRIPT) -typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -typedef simde_float64 - simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; -#else -typedef simde__m128i_private simde__m128i; -typedef simde__m128d_private simde__m128d; -#endif - -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -typedef simde__m128i __m128i; -typedef simde__m128d __m128d; -#endif - -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), - "simde__m128i_private size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); -HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), - "simde__m128d_private size incorrect"); -#if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, - "simde__m128i is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, - "simde__m128i_private is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, - "simde__m128d is not 16-byte aligned"); -HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, - "simde__m128d_private is not 16-byte aligned"); -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde__m128i_from_private(simde__m128i_private v) -{ - simde__m128i r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i_private simde__m128i_to_private(simde__m128i v) -{ - simde__m128i_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde__m128d_from_private(simde__m128d_private v) -{ - simde__m128d r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d_private simde__m128d_to_private(simde__m128d v) -{ - simde__m128d_private r; - simde_memcpy(&r, &v, sizeof(r)); - return r; -} - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) -#endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, - SIMDE_POWER_ALTIVEC_VECTOR(signed char), - altivec, i8) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, - SIMDE_POWER_ALTIVEC_VECTOR(signed short), - altivec, i16) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, - SIMDE_POWER_ALTIVEC_VECTOR(signed int), - altivec, i32) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION( - m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION( - m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), - altivec, u32) -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION( - m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION( - m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) -#endif -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) -#endif -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, - SIMDE_POWER_ALTIVEC_VECTOR(signed char), - altivec, i8) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, - SIMDE_POWER_ALTIVEC_VECTOR(signed short), - altivec, i16) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, - SIMDE_POWER_ALTIVEC_VECTOR(signed int), - altivec, i32) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION( - m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION( - m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, - SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), - altivec, u32) -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION( - m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION( - m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) -#if defined(SIMDE_BUG_GCC_95782) -SIMDE_FUNCTION_ATTRIBUTES -SIMDE_POWER_ALTIVEC_VECTOR(double) -simde__m128d_to_altivec_f64(simde__m128d value) -{ - simde__m128d_private r_ = simde__m128d_to_private(value); - return r_.altivec_f64; -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) - value) -{ - simde__m128d_private r_; - r_.altivec_f64 = value; - return simde__m128d_from_private(r_); -} -#else -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, - SIMDE_POWER_ALTIVEC_VECTOR(double), - altivec, f64) -#endif -#endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); -SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); -#endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_set_pd(simde_float64 e1, simde_float64 e0) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_pd(e1, e0); -#else - simde__m128d_private r_; - -#if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_make(e0, e1); -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - SIMDE_ALIGN_TO_16 simde_float64 data[2] = {e0, e1}; - r_.neon_f64 = vld1q_f64(data); -#else - r_.f64[0] = e0; - r_.f64[1] = e1; -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_set1_pd(simde_float64 a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_pd(a); -#else - simde__m128d_private r_; - -#if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_splat(a); -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdupq_n_f64(a); -#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { - r_.f64[i] = a; - } -#endif - - return simde__m128d_from_private(r_); -#endif -} -#define simde_mm_set_pd1(a) simde_mm_set1_pd(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_set1_pd(a) simde_mm_set1_pd(a) -#define _mm_set_pd1(a) simde_mm_set1_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_x_mm_abs_pd(simde__m128d a) -{ -#if defined(SIMDE_X86_AVX512F_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(7, 4, 0)) - return _mm512_castpd512_pd128(_mm512_abs_pd(_mm512_castpd128_pd512(a))); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) - r_.neon_f32 = vabsq_f32(a_.neon_f32); -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f32 = vec_abs(a_.altivec_f32); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.f64[i] = simde_math_fabs(a_.f64[i]); - } -#endif - - return simde__m128d_from_private(r_); -#endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_x_mm_not_pd(simde__m128d a) -{ -#if defined(SIMDE_X86_AVX512VL_NATIVE) - __m128i ai = _mm_castpd_si128(a); - return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); -#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } -#endif - - return simde__m128d_from_private(r_); -#endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_x_mm_select_pd(simde__m128d a, simde__m128d b, - simde__m128d mask) -{ -/* This function is for when you want to blend two elements together - * according to a mask. It is similar to _mm_blendv_pd, except that - * it is undefined whether the blend is based on the highest bit in - * each lane (like blendv) or just bitwise operations. This allows - * us to implement the function efficiently everywhere. - * - * Basically, you promise that all the lanes in mask are either 0 or - * ~0. */ -#if defined(SIMDE_X86_SSE4_1_NATIVE) - return _mm_blendv_pd(a, b, mask); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b), - mask_ = simde__m128d_to_private(mask); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { - r_.i64[i] = a_.i64[i] ^ - ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); - } -#endif - - return simde__m128d_from_private(r_); -#endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_add_epi8(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi8(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 + b_.i8; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) { - r_.i8[i] = a_.i8[i] + b_.i8[i]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_add_epi16(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi16(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 + b_.i16; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = a_.i16[i] + b_.i16[i]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_add_epi32(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi32(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 + b_.i32; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = a_.i32[i] + b_.i32[i]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_add_epi64(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_epi64(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); -#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 + b_.i64; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { - r_.i64[i] = a_.i64[i] + b_.i64[i]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_add_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_pd(a, b); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 + b_.f64; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.f64[i] = a_.f64[i] + b_.f64[i]; - } -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_add_pd(a, b) simde_mm_add_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_move_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_sd(a, b); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = - vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) -#if defined(HEDLEY_IBM_VERSION) - r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); -#else - r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); -#endif -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); -#else - r_.f64[0] = b_.f64[0]; - r_.f64[1] = a_.f64[1]; -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_move_sd(a, b) simde_mm_move_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_add_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_add_sd(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] + b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_add_sd(a, b) simde_mm_add_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_add_si64(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_add_si64(a, b); -#else - simde__m64_private r_, a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); -#else - r_.i64[0] = a_.i64[0] + b_.i64[0]; -#endif - - return simde__m64_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_add_si64(a, b) simde_mm_add_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_adds_epi8(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi8(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_add_saturate(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) { - const int_fast16_t tmp = - HEDLEY_STATIC_CAST(int_fast16_t, a_.i8[i]) + - HEDLEY_STATIC_CAST(int_fast16_t, b_.i8[i]); - r_.i8[i] = HEDLEY_STATIC_CAST( - int8_t, - ((tmp < INT8_MAX) ? ((tmp > INT8_MIN) ? tmp : INT8_MIN) - : INT8_MAX)); - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_adds_epi16(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epi16(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_add_saturate(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - const int_fast32_t tmp = - HEDLEY_STATIC_CAST(int_fast32_t, a_.i16[i]) + - HEDLEY_STATIC_CAST(int_fast32_t, b_.i16[i]); - r_.i16[i] = HEDLEY_STATIC_CAST( - int16_t, - ((tmp < INT16_MAX) - ? ((tmp > INT16_MIN) ? tmp : INT16_MIN) - : INT16_MAX)); - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_adds_epu8(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu8(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_add_saturate(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) { - r_.u8[i] = ((UINT8_MAX - a_.u8[i]) > b_.u8[i]) - ? (a_.u8[i] + b_.u8[i]) - : UINT8_MAX; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_adds_epu16(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_adds_epu16(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_add_saturate(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) { - r_.u16[i] = ((UINT16_MAX - a_.u16[i]) > b_.u16[i]) - ? (a_.u16[i] + b_.u16[i]) - : UINT16_MAX; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_and_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_pd(a, b); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_and_pd(a, b) simde_mm_and_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_and_si128(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_and_si128(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f & b_.i32f; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) { - r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_and_si128(a, b) simde_mm_and_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_andnot_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_pd(a, b); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u64) / sizeof(r_.u64[0])); i++) { - r_.u64[i] = ~a_.u64[i] & b_.u64[i]; - } -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_andnot_si128(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_andnot_si128(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f & b_.i32f; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) { - r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_xor_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_pd(a, b); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_avg_epu8(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu8(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && \ - defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \ - defined(SIMDE_CONVERT_VECTOR_) - uint16_t wa SIMDE_VECTOR(32); - uint16_t wb SIMDE_VECTOR(32); - uint16_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u8); - SIMDE_CONVERT_VECTOR_(wb, b_.u8); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u8, wr); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) { - r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_avg_epu16(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_avg_epu16(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && \ - defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && \ - defined(SIMDE_CONVERT_VECTOR_) - uint32_t wa SIMDE_VECTOR(32); - uint32_t wb SIMDE_VECTOR(32); - uint32_t wr SIMDE_VECTOR(32); - SIMDE_CONVERT_VECTOR_(wa, a_.u16); - SIMDE_CONVERT_VECTOR_(wb, b_.u16); - wr = (wa + wb + 1) >> 1; - SIMDE_CONVERT_VECTOR_(r_.u16, wr); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) { - r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_setzero_si128(void) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_si128(); -#else - simde__m128i_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(0); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); -#elif defined(SIMDE_VECTOR_SUBSCRIPT) - r_.i32 = __extension__(__typeof__(r_.i32)){0, 0, 0, 0}; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) { - r_.i32f[i] = 0; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_setzero_si128() (simde_mm_setzero_si128()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_bslli_si128(simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) -{ - simde__m128i_private r_, a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = -#if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_slo -#else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_sro -#endif - (a_.altivec_i8, - vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); -#elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - r_.u128[0] = a_.u128[0] << (imm8 * 8); -#else - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); - for (int i = imm8; - i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])); - i++) { - r_.i8[i] = a_.i8[i - imm8]; - } -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) -#define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) -#define simde_mm_bslli_si128(a, imm8) \ - simde__m128i_from_neon_i8( \ - ((imm8) <= 0) \ - ? simde__m128i_to_neon_i8(a) \ - : (((imm8) > 15) \ - ? (vdupq_n_s8(0)) \ - : (vextq_s8(vdupq_n_s8(0), \ - simde__m128i_to_neon_i8(a), \ - 16 - (imm8))))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -#define simde_mm_bslli_si128(a, imm8) \ - (__extension__({ \ - const simde__m128i_private simde__tmp_a_ = \ - simde__m128i_to_private(a); \ - const simde__m128i_private simde__tmp_z_ = \ - simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde__tmp_r_; \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde__tmp_r_ = simde__m128i_to_private( \ - simde_mm_setzero_si128()); \ - } else { \ - simde__tmp_r_.i8 = SIMDE_SHUFFLE_VECTOR_( \ - 8, 16, simde__tmp_z_.i8, (simde__tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ - } \ - simde__m128i_from_private(simde__tmp_r_); \ - })) -#endif -#define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_bsrli_si128(simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) -{ - simde__m128i_private r_, a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & ~15))) { - return simde_mm_setzero_si128(); - } - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) - r_.altivec_i8 = -#if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - vec_sro -#else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ - vec_slo -#endif - (a_.altivec_i8, - vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) { - const int e = HEDLEY_STATIC_CAST(int, i) + imm8; - r_.i8[i] = (e < 16) ? a_.i8[e] : 0; - } -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) -#define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) -#define simde_mm_bsrli_si128(a, imm8) \ - simde__m128i_from_neon_i8( \ - ((imm8 < 0) || (imm8 > 15)) \ - ? vdupq_n_s8(0) \ - : (vextq_s8(simde__m128i_to_private(a).neon_i8, \ - vdupq_n_s8(0), \ - ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) -#elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -#define simde_mm_bsrli_si128(a, imm8) \ - (__extension__({ \ - const simde__m128i_private simde__tmp_a_ = \ - simde__m128i_to_private(a); \ - const simde__m128i_private simde__tmp_z_ = \ - simde__m128i_to_private(simde_mm_setzero_si128()); \ - simde__m128i_private simde__tmp_r_ = \ - simde__m128i_to_private(a); \ - if (HEDLEY_UNLIKELY(imm8 > 15)) { \ - simde__tmp_r_ = simde__m128i_to_private( \ - simde_mm_setzero_si128()); \ - } else { \ - simde__tmp_r_.i8 = SIMDE_SHUFFLE_VECTOR_( \ - 8, 16, simde__tmp_z_.i8, (simde__tmp_a_).i8, \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ - HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ - } \ - simde__m128i_from_private(simde__tmp_r_); \ - })) -#endif -#define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_clflush(void const *p) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - _mm_clflush(p); -#else - (void)p; -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_clflush(a, b) simde_mm_clflush() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_comieq_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comieq_sd(a, b); -#else - simde__m128d_private a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == - wasm_f64x2_extract_lane(b_.wasm_v128, 0); -#else - return a_.f64[0] == b_.f64[0]; -#endif -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_comige_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comige_sd(a, b); -#else - simde__m128d_private a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= - wasm_f64x2_extract_lane(b_.wasm_v128, 0); -#else - return a_.f64[0] >= b_.f64[0]; -#endif -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_comigt_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comigt_sd(a, b); -#else - simde__m128d_private a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > - wasm_f64x2_extract_lane(b_.wasm_v128, 0); -#else - return a_.f64[0] > b_.f64[0]; -#endif -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_comile_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comile_sd(a, b); -#else - simde__m128d_private a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= - wasm_f64x2_extract_lane(b_.wasm_v128, 0); -#else - return a_.f64[0] <= b_.f64[0]; -#endif -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_comilt_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comilt_sd(a, b); -#else - simde__m128d_private a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < - wasm_f64x2_extract_lane(b_.wasm_v128, 0); -#else - return a_.f64[0] < b_.f64[0]; -#endif -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_comineq_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_comineq_sd(a, b); -#else - simde__m128d_private a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != - wasm_f64x2_extract_lane(b_.wasm_v128, 0); -#else - return a_.f64[0] != b_.f64[0]; -#endif -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) -{ - simde__m128d_private r_, dest_ = simde__m128d_to_private(dest), - src_ = simde__m128d_to_private(src); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t sign_pos = - vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); -#else - simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); - uint64_t u64_nz; - simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); - uint64x2_t sign_pos = vdupq_n_u64(u64_nz); -#endif - r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); -#elif defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) -#if !defined(HEDLEY_IBM_VERSION) - r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); -#else - r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); -#endif -#elif defined(simde_math_copysign) - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); - } -#else - simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); - return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), - simde_mm_andnot_pd(sgnbit, dest)); -#endif - - return simde__m128d_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) -{ - return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), - dest); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_castpd_ps(simde__m128d a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_ps(a); -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f32_f64(a); -#else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_castpd_ps(a) simde_mm_castpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_castpd_si128(simde__m128d a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castpd_si128(a); -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_s64_f64(a); -#else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_castpd_si128(a) simde_mm_castpd_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_castps_pd(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_pd(a); -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_f32(a); -#else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_castps_pd(a) simde_mm_castps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_castps_si128(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castps_si128(a); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); -#else - simde__m128i r; - simde_memcpy(&r, &a, sizeof(a)); - return r; -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_castps_si128(a) simde_mm_castps_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_castsi128_pd(simde__m128i a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_pd(a); -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vreinterpretq_f64_s64(a); -#else - simde__m128d r; - simde_memcpy(&r, &a, sizeof(a)); - return r; -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_castsi128_ps(simde__m128i a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_castsi128_ps(a); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); -#else - simde__m128 r; - simde_memcpy(&r, &a, sizeof(a)); - return r; -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_cmpeq_epi8(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi8(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(signed char), - vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_STATIC_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) { - r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_cmpeq_epi16(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi16(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(signed short), - vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = (a_.i16 == b_.i16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_cmpeq_epi32(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_epi32(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(signed int), - vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmpeq_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_pd(a, b); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vceqq_s64(b_.neon_i64, a_.neon_i64); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(double), - vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) - : UINT64_C(0); - } -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmpeq_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpeq_sd(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmpneq_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_pd(a, b); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u32 = vmvnq_u32( - vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) - : UINT64_C(0); - } -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmpneq_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpneq_sd(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_cmplt_epi8(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi8(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(signed char), - vec_cmplt(a_.altivec_i8, b_.altivec_i8)); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_STATIC_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) { - r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_cmplt_epi16(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi16(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(signed short), - vec_cmplt(a_.altivec_i16, b_.altivec_i16)); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_STATIC_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_cmplt_epi32(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_epi32(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(signed int), - vec_cmplt(a_.altivec_i32, b_.altivec_i32)); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmplt_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_pd(a, b); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) - : UINT64_C(0); - } -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmplt_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmplt_sd(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmple_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_pd(a, b); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f64 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(double), - vec_cmple(a_.altivec_f64, b_.altivec_f64)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) - : UINT64_C(0); - } -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmple_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmple_sd(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_cmpgt_epi8(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi8(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i8 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(signed char), - vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = HEDLEY_STATIC_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) { - r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_cmpgt_epi16(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi16(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(signed short), - vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = HEDLEY_STATIC_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_cmpgt_epi32(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_epi32(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(signed int), - vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = HEDLEY_STATIC_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmpgt_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpgt_pd(a, b); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f64 = - HEDLEY_STATIC_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), - vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) - : UINT64_C(0); - } -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmpgt_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpgt_sd(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmpge_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpge_pd(a, b); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = HEDLEY_STATIC_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_f64 = - HEDLEY_STATIC_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), - vec_cmpge(a_.altivec_f64, b_.altivec_f64)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) - : UINT64_C(0); - } -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmpge_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpge_sd(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); - r_.u64[1] = a_.u64[1]; - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmpngt_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpngt_pd(a, b); -#else - return simde_mm_cmple_pd(a, b); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmpngt_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpngt_sd(a, b); -#else - return simde_mm_cmple_sd(a, b); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmpnge_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnge_pd(a, b); -#else - return simde_mm_cmplt_pd(a, b); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmpnge_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cmpnge_sd(a, b); -#else - return simde_mm_cmplt_sd(a, b); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmpnlt_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_pd(a, b); -#else - return simde_mm_cmpge_pd(a, b); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmpnlt_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnlt_sd(a, b); -#else - return simde_mm_cmpge_sd(a, b); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmpnle_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_pd(a, b); -#else - return simde_mm_cmpgt_pd(a, b); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmpnle_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpnle_sd(a, b); -#else - return simde_mm_cmpgt_sd(a, b); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmpord_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_pd(a, b); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - /* Note: NEON does not have ordered compare builtin - Need to compare a eq a and b eq b to check for NaN - Do AND of results to get final */ - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vandq_u64(ceqaa, ceqbb); -#elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && - !simde_math_isnan(b_.f64[i])) - ? ~UINT64_C(0) - : UINT64_C(0); - } -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float64 simde_mm_cvtsd_f64(simde__m128d a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_cvtsd_f64(a); -#else - simde__m128d_private a_ = simde__m128d_to_private(a); -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return HEDLEY_STATIC_CAST(simde_float64, - vgetq_lane_f64(a_.neon_f64, 0)); -#else - return a_.f64[0]; -#endif -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmpord_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpord_sd(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(simde_math_isnan) - r_.u64[0] = - (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) - ? ~UINT64_C(0) - : UINT64_C(0); - r_.u64[1] = a_.u64[1]; -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmpunord_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_pd(a, b); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); - r_.neon_u64 = vreinterpretq_u64_u32( - vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); -#elif defined(simde_math_isnan) - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.u64[i] = (simde_math_isnan(a_.f64[i]) || - simde_math_isnan(b_.f64[i])) - ? ~UINT64_C(0) - : UINT64_C(0); - } -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cmpunord_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cmpunord_sd(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(simde_math_isnan) - r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) - ? ~UINT64_C(0) - : UINT64_C(0); - r_.u64[1] = a_.u64[1]; -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cvtepi32_pd(simde__m128i a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_pd(a); -#else - simde__m128d_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - -#if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.f64[i] = (simde_float64)a_.i32[i]; - } -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cvtepi32_ps(simde__m128i a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtepi32_ps(a); -#else - simde__m128_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - HEDLEY_DIAGNOSTIC_PUSH -#if HEDLEY_HAS_WARNING("-Wc11-extensions") -#pragma clang diagnostic ignored "-Wc11-extensions" -#endif - r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); - HEDLEY_DIAGNOSTIC_POP -#elif defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f32) / sizeof(r_.f32[0])); i++) { - r_.f32[i] = (simde_float32)a_.i32[i]; - } -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_cvtpd_pi32(simde__m128d a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpd_pi32(a); -#else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - simde_float64 v = simde_math_round(a_.f64[i]); -#if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); -#else - r_.i32[i] = - ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && - (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) - ? SIMDE_CONVERT_FTOI(int32_t, v) - : INT32_MIN; -#endif - } - - return simde__m64_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_cvtpd_epi32(simde__m128d a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtpd_epi32(a); -#else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvtpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cvtpd_ps(simde__m128d a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtpd_ps(a); -#else - simde__m128_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - -#if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.f64); - r_.m64_private[1] = simde__m64_to_private(simde_mm_setzero_si64()); -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vreinterpretq_f32_f64( - vcombine_f64(vreinterpret_f64_f32(vcvtx_f32_f64(a_.neon_f64)), - vdup_n_f64(0))); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(a_.f64) / sizeof(a_.f64[0])); i++) { - r_.f32[i] = (simde_float32)a_.f64[i]; - } - simde_memset(&(r_.m64_private[1]), 0, sizeof(r_.m64_private[1])); -#endif - - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cvtpi32_pd(simde__m64 a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvtpi32_pd(a); -#else - simde__m128d_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - -#if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.f64[i] = (simde_float64)a_.i32[i]; - } -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_cvtps_epi32(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_epi32(a); -#else - simde__m128i_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ - defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && \ - defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ - SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ - r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); - HEDLEY_DIAGNOSTIC_POP -#else - a_ = simde__m128_to_private( - simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - simde_float32 v = simde_math_roundf(a_.f32[i]); -#if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); -#else - r_.i32[i] = - ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && - (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) - ? SIMDE_CONVERT_FTOI(int32_t, v) - : INT32_MIN; -#endif - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cvtps_pd(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtps_pd(a); -#else - simde__m128d_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - -#if defined(SIMDE_CONVERT_VECTOR_) - SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.f64[i] = a_.f32[i]; - } -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t simde_mm_cvtsd_si32(simde__m128d a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_si32(a); -#else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 v = simde_math_round(a_.f64[0]); -#if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); -#else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && - (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) - ? SIMDE_CONVERT_FTOI(int32_t, v) - : INT32_MIN; -#endif -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t simde_mm_cvtsd_si64(simde__m128d a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) -#if defined(__PGI) - return _mm_cvtsd_si64x(a); -#else - return _mm_cvtsd_si64(a); -#endif -#else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); -#endif -} -#define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) -#define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128 simde_mm_cvtsd_ss(simde__m128 a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsd_ss(a, b); -#else - simde__m128_private r_, a_ = simde__m128_to_private(a); - simde__m128d_private b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f32 = vsetq_lane_f32( - vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); -#else - r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); - - SIMDE_VECTORIZE - for (size_t i = 1; i < (sizeof(r_) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = a_.i32[i]; - } -#endif - return simde__m128_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int16_t simde_x_mm_cvtsi128_si16(simde__m128i a) -{ - simde__m128i_private a_ = simde__m128i_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s16(a_.neon_i16, 0); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int16_t, - wasm_i16x8_extract_lane(a_.wasm_v128, 0)); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -#if defined(SIMDE_BUG_GCC_95227) - (void)a_; -#endif - return vec_extract(a_.altivec_i16, 0); -#else - return a_.i16[0]; -#endif -} - -SIMDE_FUNCTION_ATTRIBUTES -int32_t simde_mm_cvtsi128_si32(simde__m128i a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi128_si32(a); -#else - simde__m128i_private a_ = simde__m128i_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s32(a_.neon_i32, 0); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int32_t, - wasm_i32x4_extract_lane(a_.wasm_v128, 0)); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -#if defined(SIMDE_BUG_GCC_95227) - (void)a_; -#endif - return vec_extract(a_.altivec_i32, 0); -#else - return a_.i32[0]; -#endif -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t simde_mm_cvtsi128_si64(simde__m128i a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) -#if defined(__PGI) - return _mm_cvtsi128_si64x(a); -#else - return _mm_cvtsi128_si64(a); -#endif -#else - simde__m128i_private a_ = simde__m128i_to_private(a); -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) - return vec_extract(HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(signed long long), - a_.i64), - 0); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - return vgetq_lane_s64(a_.neon_i64, 0); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - return HEDLEY_STATIC_CAST(int64_t, - wasm_i64x2_extract_lane(a_.wasm_v128, 0)); -#endif - return a_.i64[0]; -#endif -} -#define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) -#define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cvtsi32_sd(simde__m128d a, int32_t b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_sd(a, b); -#else - simde__m128d_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_AMD64) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), - a_.neon_f64, 0); -#else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.i64[1] = a_.i64[1]; -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_x_mm_cvtsi16_si128(int16_t a) -{ - simde__m128i_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); -#else - r_.i16[0] = a; - r_.i16[1] = 0; - r_.i16[2] = 0; - r_.i16[3] = 0; - r_.i16[4] = 0; - r_.i16[5] = 0; - r_.i16[6] = 0; - r_.i16[7] = 0; -#endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_cvtsi32_si128(int32_t a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtsi32_si128(a); -#else - simde__m128i_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); -#else - r_.i32[0] = a; - r_.i32[1] = 0; - r_.i32[2] = 0; - r_.i32[3] = 0; -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cvtsi64_sd(simde__m128d a, int64_t b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) -#if !defined(__PGI) - return _mm_cvtsi64_sd(a, b); -#else - return _mm_cvtsi64x_sd(a, b); -#endif -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), - a_.neon_f64, 0); -#else - r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); - r_.f64[1] = a_.f64[1]; -#endif - - return simde__m128d_from_private(r_); -#endif -} -#define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) -#define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_cvtsi64_si128(int64_t a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) -#if !defined(__PGI) - return _mm_cvtsi64_si128(a); -#else - return _mm_cvtsi64x_si128(a); -#endif -#else - simde__m128i_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(a, 0); -#else - r_.i64[0] = a; - r_.i64[1] = 0; -#endif - - return simde__m128i_from_private(r_); -#endif -} -#define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) -#define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_cvtss_sd(simde__m128d a, simde__m128 b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvtss_sd(a, b); -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vcvt_f64_f32(vset_lane_f32( - vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), - vdup_n_f32(0), 0)); - return vsetq_lane_f64( - vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, - 1); -#else - simde__m128d_private a_ = simde__m128d_to_private(a); - simde__m128_private b_ = simde__m128_to_private(b); - - a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); - - return simde__m128d_from_private(a_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_cvttpd_pi32(simde__m128d a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_cvttpd_pi32(a); -#else - simde__m64_private r_; - simde__m128d_private a_ = simde__m128d_to_private(a); - -#if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); -#else - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - simde_float64 v = a_.f64[i]; -#if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); -#else - r_.i32[i] = - ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && - (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) - ? SIMDE_CONVERT_FTOI(int32_t, v) - : INT32_MIN; -#endif - } -#endif - - return simde__m64_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_cvttpd_epi32(simde__m128d a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttpd_epi32(a); -#else - simde__m128i_private r_; - - r_.m64[0] = simde_mm_cvttpd_pi32(a); - r_.m64[1] = simde_mm_setzero_si64(); - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_cvttps_epi32(simde__m128 a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttps_epi32(a); -#else - simde__m128i_private r_; - simde__m128_private a_ = simde__m128_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) - r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); -#elif defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) - SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); -#else - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - simde_float32 v = a_.f32[i]; -#if defined(SIMDE_FAST_CONVERSION_RANGE) - r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); -#else - r_.i32[i] = - ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && - (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) - ? SIMDE_CONVERT_FTOI(int32_t, v) - : INT32_MIN; -#endif - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t simde_mm_cvttsd_si32(simde__m128d a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_cvttsd_si32(a); -#else - simde__m128d_private a_ = simde__m128d_to_private(a); - simde_float64 v = a_.f64[0]; -#if defined(SIMDE_FAST_CONVERSION_RANGE) - return SIMDE_CONVERT_FTOI(int32_t, v); -#else - return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && - (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) - ? SIMDE_CONVERT_FTOI(int32_t, v) - : INT32_MIN; -#endif -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int64_t simde_mm_cvttsd_si64(simde__m128d a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) -#if !defined(__PGI) - return _mm_cvttsd_si64(a); -#else - return _mm_cvttsd_si64x(a); -#endif -#else - simde__m128d_private a_ = simde__m128d_to_private(a); - return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); -#endif -} -#define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) -#define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_div_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_pd(a, b); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 / b_.f64; -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.f64[i] = a_.f64[i] / b_.f64[i]; - } -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_div_pd(a, b) simde_mm_div_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_div_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_div_sd(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); -#else - r_.f64[0] = a_.f64[0] / b_.f64[0]; - r_.f64[1] = a_.f64[1]; -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_div_sd(a, b) simde_mm_div_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t simde_mm_extract_epi16(simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) -{ - uint16_t r; - simde__m128i_private a_ = simde__m128i_to_private(a); - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) -#if defined(SIMDE_BUG_GCC_95227) - (void)a_; - (void)imm8; -#endif - r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); -#else - r = a_.u16[imm8 & 7]; -#endif - - return HEDLEY_STATIC_CAST(int32_t, r); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4, 6, 0)) -#define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -#define simde_mm_extract_epi16(a, imm8) \ - (HEDLEY_STATIC_CAST( \ - int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, \ - (imm8))) & \ - (INT32_C(0x0000ffff))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_insert_epi16(simde__m128i a, int16_t i, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) -{ - simde__m128i_private a_ = simde__m128i_to_private(a); - a_.i16[imm8 & 7] = i; - return simde__m128i_from_private(a_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) -#define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -#define simde_mm_insert_epi16(a, i, imm8) \ - simde__m128i_from_neon_i16( \ - vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_load_pd(simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_pd(mem_addr); -#else - simde__m128d_private r_; - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = - vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const *, mem_addr)); -#else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), - sizeof(r_)); -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_load1_pd(simde_float64 const *mem_addr) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load1_pd(mem_addr); -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - return simde__m128d_from_wasm_v128(wasm_v64x2_load_splat(mem_addr)); -#else - return simde_mm_set1_pd(*mem_addr); -#endif -} -#define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) -#define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_load_sd(simde_float64 const *mem_addr) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_sd(mem_addr); -#else - simde__m128d_private r_; - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); -#else - r_.f64[0] = *mem_addr; - r_.u64[1] = UINT64_C(0); -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_load_si128(simde__m128i const *mem_addr) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_load_si128( - HEDLEY_REINTERPRET_CAST(__m128i const *, mem_addr)); -#else - simde__m128i_private r_; - -#if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_ld( - 0, HEDLEY_REINTERPRET_CAST( - SIMDE_POWER_ALTIVEC_VECTOR(int) const *, mem_addr)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = - vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const *, mem_addr)); -#else - simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), - sizeof(simde__m128i)); -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_loadh_pd(simde__m128d a, simde_float64 const *mem_addr) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadh_pd(a, mem_addr); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64( - vget_low_f64(a_.neon_f64), - vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t *, mem_addr))); -#else - simde_float64 t; - - simde_memcpy(&t, mem_addr, sizeof(t)); - r_.f64[0] = a_.f64[0]; - r_.f64[1] = t; -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_loadl_epi64(simde__m128i const *mem_addr) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_epi64(mem_addr); -#else - simde__m128i_private r_; - - int64_t value; - simde_memcpy(&value, mem_addr, sizeof(value)); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64( - vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), - vdup_n_s64(0)); -#else - r_.i64[0] = value; - r_.i64[1] = 0; -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_loadl_pd(simde__m128d a, simde_float64 const *mem_addr) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadl_pd(a, mem_addr); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vcombine_f64( - vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t *, mem_addr)), - vget_high_f64(a_.neon_f64)); -#else - r_.f64[0] = *mem_addr; - r_.u64[1] = a_.u64[1]; -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadr_pd(simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadr_pd(mem_addr); -#else - simde__m128d_private r_; - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vld1q_f64(mem_addr); - r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = - vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); - r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - v128_t tmp = wasm_v128_load(mem_addr); - r_.wasm_v128 = wasm_v64x2_shuffle(tmp, tmp, 1, 0); -#else - r_.f64[0] = mem_addr[1]; - r_.f64[1] = mem_addr[0]; -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d -simde_mm_loadu_pd(simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_pd(mem_addr); -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vld1q_f64(mem_addr); -#else - simde__m128d_private r_; - - simde_memcpy(&r_, mem_addr, sizeof(r_)); - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_x_mm_loadu_epi8(int8_t const *mem_addr) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128( - SIMDE_ALIGN_CAST(simde__m128i const *, mem_addr)); -#else - simde__m128i_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = - vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const *, mem_addr)); -#else - simde_memcpy(&r_, mem_addr, sizeof(r_)); -#endif - - return simde__m128i_from_private(r_); -#endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_x_mm_loadu_epi16(int16_t const *mem_addr) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128( - SIMDE_ALIGN_CAST(simde__m128i const *, mem_addr)); -#else - simde__m128i_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = - vld1q_s16(HEDLEY_REINTERPRET_CAST(int16_t const *, mem_addr)); -#else - simde_memcpy(&r_, mem_addr, sizeof(r_)); -#endif - - return simde__m128i_from_private(r_); -#endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_x_mm_loadu_epi32(int32_t const *mem_addr) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128( - SIMDE_ALIGN_CAST(simde__m128i const *, mem_addr)); -#else - simde__m128i_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = - vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const *, mem_addr)); -#else - simde_memcpy(&r_, mem_addr, sizeof(r_)); -#endif - - return simde__m128i_from_private(r_); -#endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_x_mm_loadu_epi64(int64_t const *mem_addr) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128( - SIMDE_ALIGN_CAST(simde__m128i const *, mem_addr)); -#else - simde__m128i_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = - vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); -#else - simde_memcpy(&r_, mem_addr, sizeof(r_)); -#endif - - return simde__m128i_from_private(r_); -#endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_loadu_si128(void const *mem_addr) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const *, mem_addr)); -#else - simde__m128i_private r_; - -#if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias, 3, 3, 0) - HEDLEY_DIAGNOSTIC_PUSH - SIMDE_DIAGNOSTIC_DISABLE_PACKED_ - struct simde_mm_loadu_si128_s { - __typeof__(r_) v; - } __attribute__((__packed__, __may_alias__)); - r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, - mem_addr) - ->v; - HEDLEY_DIAGNOSTIC_POP -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - /* Note that this is a lower priority than the struct above since - * clang assumes mem_addr is aligned (since it is a __m128i*). */ - r_.neon_i32 = - vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const *, mem_addr)); -#else - simde_memcpy(&r_, mem_addr, sizeof(r_)); -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_madd_epi16(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_madd_epi16(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t pl = - vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i32 = vpaddq_s32(pl, ph); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x4_t pl = - vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); - int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), - vget_high_s16(b_.neon_i16)); - int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); - int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); - r_.neon_i32 = vcombine_s32(rl, rh); -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - static const SIMDE_POWER_ALTIVEC_VECTOR(int) tz = {0, 0, 0, 0}; - r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, tz); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_.i16[0])); i += 2) { - r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + - (a_.i16[i + 1] * b_.i16[i + 1]); - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_maskmoveu_si128(simde__m128i a, simde__m128i mask, - int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char *, mem_addr)); -#else - simde__m128i_private a_ = simde__m128i_to_private(a), - mask_ = simde__m128i_to_private(mask); - - for (size_t i = 0; i < (sizeof(a_.i8) / sizeof(a_.i8[0])); i++) { - if (mask_.u8[i] & 0x80) { - mem_addr[i] = a_.i8[i]; - } - } -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_maskmoveu_si128(a, mask, mem_addr) \ - simde_mm_maskmoveu_si128( \ - (a), (mask), \ - SIMDE_CHECKED_REINTERPRET_CAST(int8_t *, char *, (mem_addr))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t simde_mm_movemask_epi8(simde__m128i a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) - /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ - return _mm_movemask_epi8(a); -#else - int32_t r = 0; - simde__m128i_private a_ = simde__m128i_to_private(a); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint8x16_t input = a_.neon_u8; - const int8_t xr[16] = {-7, -6, -5, -4, -3, -2, -1, 0, - -7, -6, -5, -4, -3, -2, -1, 0}; - const uint8x16_t mask_and = vdupq_n_u8(0x80); - const int8x16_t mask_shift = vld1q_s8(xr); - const uint8x16_t mask_result = - vshlq_u8(vandq_u8(input, mask_and), mask_shift); - uint8x8_t lo = vget_low_u8(mask_result); - uint8x8_t hi = vget_high_u8(mask_result); - r = vaddv_u8(lo) + (vaddv_u8(hi) << 8); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - // Use increasingly wide shifts+adds to collect the sign bits - // together. - // Since the widening shifts would be rather confusing to follow in little endian, everything - // will be illustrated in big endian order instead. This has a different result - the bits - // would actually be reversed on a big endian machine. - - // Starting input (only half the elements are shown): - // 89 ff 1d c0 00 10 99 33 - uint8x16_t input = a_.neon_u8; - - // Shift out everything but the sign bits with an unsigned shift right. - // - // Bytes of the vector:: - // 89 ff 1d c0 00 10 99 33 - // \ \ \ \ \ \ \ \ high_bits = (uint16x4_t)(input >> 7) - // | | | | | | | | - // 01 01 00 01 00 00 01 00 - // - // Bits of first important lane(s): - // 10001001 (89) - // \______ - // | - // 00000001 (01) - uint16x8_t high_bits = vreinterpretq_u16_u8(vshrq_n_u8(input, 7)); - - // Merge the even lanes together with a 16-bit unsigned shift right + add. - // 'xx' represents garbage data which will be ignored in the final result. - // In the important bytes, the add functions like a binary OR. - // - // 01 01 00 01 00 00 01 00 - // \_ | \_ | \_ | \_ | paired16 = (uint32x4_t)(input + (input >> 7)) - // \| \| \| \| - // xx 03 xx 01 xx 00 xx 02 - // - // 00000001 00000001 (01 01) - // \_______ | - // \| - // xxxxxxxx xxxxxx11 (xx 03) - uint32x4_t paired16 = - vreinterpretq_u32_u16(vsraq_n_u16(high_bits, high_bits, 7)); - - // Repeat with a wider 32-bit shift + add. - // xx 03 xx 01 xx 00 xx 02 - // \____ | \____ | paired32 = (uint64x1_t)(paired16 + (paired16 >> 14)) - // \| \| - // xx xx xx 0d xx xx xx 02 - // - // 00000011 00000001 (03 01) - // \\_____ || - // '----.\|| - // xxxxxxxx xxxx1101 (xx 0d) - uint64x2_t paired32 = - vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 14)); - - // Last, an even wider 64-bit shift + add to get our result in the low 8 bit lanes. - // xx xx xx 0d xx xx xx 02 - // \_________ | paired64 = (uint8x8_t)(paired32 + (paired32 >> 28)) - // \| - // xx xx xx xx xx xx xx d2 - // - // 00001101 00000010 (0d 02) - // \ \___ | | - // '---. \| | - // xxxxxxxx 11010010 (xx d2) - uint8x16_t paired64 = - vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 28)); - - // Extract the low 8 bits from each 64-bit lane with 2 8-bit extracts. - // xx xx xx xx xx xx xx d2 - // || return paired64[0] - // d2 - // Note: Little endian would return the correct value 4b (01001011) instead. - r = vgetq_lane_u8(paired64, 0) | - (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u8(paired64, 8)) << 8); -#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ - !defined(HEDLEY_IBM_VERSION) && \ - (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) - perm = {120, 112, 104, 96, 88, 80, 72, 64, - 56, 48, 40, 32, 24, 16, 8, 0}; - r = HEDLEY_STATIC_CAST( - int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); -#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ - !defined(HEDLEY_IBM_VERSION) && \ - (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) - static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) - perm = {120, 112, 104, 96, 88, 80, 72, 64, - 56, 48, 40, 32, 24, 16, 8, 0}; - r = HEDLEY_STATIC_CAST( - int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); -#else - SIMDE_VECTORIZE_REDUCTION(| : r) - for (size_t i = 0; i < (sizeof(a_.u8) / sizeof(a_.u8[0])); i++) { - r |= (a_.u8[15 - i] >> 7) << (15 - i); - } -#endif - - return r; -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int32_t simde_mm_movemask_pd(simde__m128d a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_movemask_pd(a); -#else - int32_t r = 0; - simde__m128d_private a_ = simde__m128d_to_private(a); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - static const int64_t shift_amount[] = {0, 1}; - const int64x2_t shift = vld1q_s64(shift_amount); - uint64x2_t tmp = vshrq_n_u64(a_.neon_u64, 63); - return HEDLEY_STATIC_CAST(int32_t, vaddvq_u64(vshlq_u64(tmp, shift))); -#else - SIMDE_VECTORIZE_REDUCTION(| : r) - for (size_t i = 0; i < (sizeof(a_.u64) / sizeof(a_.u64[0])); i++) { - r |= (a_.u64[i] >> 63) << i; - } -#endif - - return r; -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_movemask_pd(a) simde_mm_movemask_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_movepi64_pi64(simde__m128i a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movepi64_pi64(a); -#else - simde__m64_private r_; - simde__m128i_private a_ = simde__m128i_to_private(a); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i64 = vget_low_s64(a_.neon_i64); -#else - r_.i64[0] = a_.i64[0]; -#endif - - return simde__m64_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_movpi64_epi64(simde__m64 a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_movpi64_epi64(a); -#else - simde__m128i_private r_; - simde__m64_private a_ = simde__m64_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); -#else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_min_epi16(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epi16(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_min_epu8(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_epu8(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) { - r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_min_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_pd(a, b); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_min_pd(a, b) simde_mm_min_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_min_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_min_sd(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); -#else - r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_min_sd(a, b) simde_mm_min_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_max_epi16(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epi16(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_max_epu8(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_epu8(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u8) / sizeof(r_.u8[0])); i++) { - r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_max_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_pd(a, b); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) - r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; - } -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_max_pd(a, b) simde_mm_max_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_max_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_max_sd(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); -#else - r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; - r_.f64[1] = a_.f64[1]; -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_max_sd(a, b) simde_mm_max_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_move_epi64(simde__m128i a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_move_epi64(a); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); -#else - r_.i64[0] = a_.i64[0]; - r_.i64[1] = 0; -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_move_epi64(a) simde_mm_move_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_mul_epu32(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_epu32(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint32x2_t a_lo = vmovn_u64(a_.neon_u64); - uint32x2_t b_lo = vmovn_u64(b_.neon_u64); - r_.neon_u64 = vmull_u32(a_lo, b_lo); -#elif defined(SIMDE_SHUFFLE_VECTOR_) && \ - (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) - __typeof__(a_.u32) z = { - 0, - }; - a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); - b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); - r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * - HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u64) / sizeof(r_.u64[0])); i++) { - r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * - HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_x_mm_mul_epi64(simde__m128i a, simde__m128i b) -{ - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 * b_.i64; -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_s64(a_.neon_f64, b_.neon_f64); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { - r_.i64[i] = a_.i64[i] * b_.i64[i]; - } -#endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_x_mm_mod_epi64(simde__m128i a, simde__m128i b) -{ - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 % b_.i64; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { - r_.i64[i] = a_.i64[i] % b_.i64[i]; - } -#endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_mul_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_pd(a, b); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 * b_.f64; -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.f64[i] = a_.f64[i] * b_.f64[i]; - } -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_mul_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mul_sd(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); - r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); -#else - r_.f64[0] = a_.f64[0] * b_.f64[0]; - r_.f64[1] = a_.f64[1]; -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_mul_su32(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && \ - !defined(__PGI) - return _mm_mul_su32(a, b); -#else - simde__m64_private r_, a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.u64[0] = vget_lane_u64( - vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), - vreinterpret_u32_s64(b_.neon_i64))), - 0); -#else - r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * - HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); -#endif - - return simde__m64_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_mulhi_epi16(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mulhi_epi16(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a3210 = vget_low_s16(a_.neon_i16); - int16x4_t b3210 = vget_low_s16(b_.neon_i16); - int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); - r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), - vreinterpretq_s16_s32(ab7654)); -#else - int16x4_t a7654 = vget_high_s16(a_.neon_i16); - int16x4_t b7654 = vget_high_s16(b_.neon_i16); - int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), - vreinterpretq_u16_s32(ab7654)); - r_.neon_u16 = rv.val[1]; -#endif -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.u16[i] = HEDLEY_STATIC_CAST( - uint16_t, - (HEDLEY_STATIC_CAST( - uint32_t, - HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * - HEDLEY_STATIC_CAST(int32_t, - b_.i16[i])) >> - 16)); - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_mulhi_epu16(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) - return _mm_mulhi_epu16(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - uint16x4_t a3210 = vget_low_u16(a_.neon_u16); - uint16x4_t b3210 = vget_low_u16(b_.neon_u16); - uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); - r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), - vreinterpretq_u16_u32(ab7654)); -#else - uint16x4_t a7654 = vget_high_u16(a_.neon_u16); - uint16x4_t b7654 = vget_high_u16(b_.neon_u16); - uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ - uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), - vreinterpretq_u16_u32(ab7654)); - r_.neon_u16 = neon_r.val[1]; -#endif -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) { - r_.u16[i] = HEDLEY_STATIC_CAST( - uint16_t, - HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * - HEDLEY_STATIC_CAST(uint32_t, - b_.u16[i]) >> - 16); - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_mullo_epi16(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_mullo_epi16(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - (void)a_; - (void)b_; - r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.u16[i] = HEDLEY_STATIC_CAST( - uint16_t, - HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * - HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_or_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_pd(a, b); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_or_pd(a, b) simde_mm_or_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_or_si128(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_or_si128(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f | b_.i32f; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) { - r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_or_si128(a, b) simde_mm_or_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_packs_epi16(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi16(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = - vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i8[i] = (a_.i16[i] > INT8_MAX) - ? INT8_MAX - : ((a_.i16[i] < INT8_MIN) - ? INT8_MIN - : HEDLEY_STATIC_CAST(int8_t, - a_.i16[i])); - r_.i8[i + 8] = (b_.i16[i] > INT8_MAX) - ? INT8_MAX - : ((b_.i16[i] < INT8_MIN) - ? INT8_MIN - : HEDLEY_STATIC_CAST( - int8_t, b_.i16[i])); - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_packs_epi32(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packs_epi32(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = - vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i16[i] = (a_.i32[i] > INT16_MAX) - ? INT16_MAX - : ((a_.i32[i] < INT16_MIN) - ? INT16_MIN - : HEDLEY_STATIC_CAST(int16_t, - a_.i32[i])); - r_.i16[i + 4] = - (b_.i32[i] > INT16_MAX) - ? INT16_MAX - : ((b_.i32[i] < INT16_MIN) - ? INT16_MIN - : HEDLEY_STATIC_CAST(int16_t, - b_.i32[i])); - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_packus_epi16(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_packus_epi16(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = - vcombine_u8(vqmovun_s16(a_.neon_i16), vqmovun_s16(b_.neon_i16)); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.u8[i] = (a_.i16[i] > UINT8_MAX) - ? UINT8_MAX - : ((a_.i16[i] < 0) - ? UINT8_C(0) - : HEDLEY_STATIC_CAST(uint8_t, - a_.i16[i])); - r_.u8[i + 8] = - (b_.i16[i] > UINT8_MAX) - ? UINT8_MAX - : ((b_.i16[i] < 0) - ? UINT8_C(0) - : HEDLEY_STATIC_CAST(uint8_t, - b_.i16[i])); - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_pause(void) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - _mm_pause(); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_pause() (simde_mm_pause()) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_sad_epu8(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sad_epu8(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); - r_.neon_u64 = vcombine_u64(vpaddl_u32(vpaddl_u16(vget_low_u16(t))), - vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); -#else - for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { - uint16_t tmp = 0; - SIMDE_VECTORIZE_REDUCTION(+ : tmp) - for (size_t j = 0; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2); - j++) { - const size_t e = j + (i * 8); - tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) - : (b_.u8[e] - a_.u8[e]); - } - r_.i64[i] = tmp; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_set_epi8(int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) -{ - -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, - e4, e3, e2, e1, e0); -#else - simde__m128i_private r_; - -#if defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, - e10, e11, e12, e13, e14, e15); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int8x16_t) - int8_t data[16] = {e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15}; - r_.neon_i8 = vld1q_s8(data); -#else - r_.i8[0] = e0; - r_.i8[1] = e1; - r_.i8[2] = e2; - r_.i8[3] = e3; - r_.i8[4] = e4; - r_.i8[5] = e5; - r_.i8[6] = e6; - r_.i8[7] = e7; - r_.i8[8] = e8; - r_.i8[9] = e9; - r_.i8[10] = e10; - r_.i8[11] = e11; - r_.i8[12] = e12; - r_.i8[13] = e13; - r_.i8[14] = e14; - r_.i8[15] = e15; -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, \ - e2, e1, e0) \ - simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, \ - e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_set_epi16(int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); -#else - simde__m128i_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int16x8_t) - int16_t data[8] = {e0, e1, e2, e3, e4, e5, e6, e7}; - r_.neon_i16 = vld1q_s16(data); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); -#else - r_.i16[0] = e0; - r_.i16[1] = e1; - r_.i16[2] = e2; - r_.i16[3] = e3; - r_.i16[4] = e4; - r_.i16[5] = e5; - r_.i16[6] = e6; - r_.i16[7] = e7; -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_loadu_si16(void const *mem_addr) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && \ - (SIMDE_DETECT_CLANG_VERSION_CHECK(8, 0, 0) || \ - HEDLEY_GCC_VERSION_CHECK(11, 0, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(20, 21, 1)) - return _mm_loadu_si16(mem_addr); -#else - int16_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_x_mm_cvtsi16_si128(val); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_set_epi32(int32_t e3, int32_t e2, int32_t e1, int32_t e0) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32(e3, e2, e1, e0); -#else - simde__m128i_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = {e0, e1, e2, e3}; - r_.neon_i32 = vld1q_s32(data); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); -#else - r_.i32[0] = e0; - r_.i32[1] = e1; - r_.i32[2] = e2; - r_.i32[3] = e3; -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_loadu_si32(void const *mem_addr) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && \ - (SIMDE_DETECT_CLANG_VERSION_CHECK(8, 0, 0) || \ - HEDLEY_GCC_VERSION_CHECK(11, 0, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(20, 21, 1)) - return _mm_loadu_si32(mem_addr); -#else - int32_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi32_si128(val); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_set_epi64(simde__m64 e1, simde__m64 e0) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set_epi64(e1, e0); -#else - simde__m128i_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), - simde__m64_to_neon_i64(e1)); -#else - r_.m64[0] = e0; - r_.m64[1] = e1; -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_set_epi64x(int64_t e1, int64_t e0) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19, 0, 0)) - return _mm_set_epi64x(e1, e0); -#else - simde__m128i_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; - r_.neon_i64 = vld1q_s64(data); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_make(e0, e1); -#else - r_.i64[0] = e0; - r_.i64[1] = e1; -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_loadu_si64(void const *mem_addr) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && \ - (SIMDE_DETECT_CLANG_VERSION_CHECK(8, 0, 0) || \ - HEDLEY_GCC_VERSION_CHECK(11, 0, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(20, 21, 1)) - return _mm_loadu_si64(mem_addr); -#else - int64_t val; - simde_memcpy(&val, mem_addr, sizeof(val)); - return simde_mm_cvtsi64_si128(val); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_x_mm_set_epu8(uint8_t e15, uint8_t e14, uint8_t e13, - uint8_t e12, uint8_t e11, uint8_t e10, - uint8_t e9, uint8_t e8, uint8_t e7, uint8_t e6, - uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, - uint8_t e1, uint8_t e0) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi8( - HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), - HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), - HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), - HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), - HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), - HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), - HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), - HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); -#else - simde__m128i_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint8x16_t) - uint8_t data[16] = {e0, e1, e2, e3, e4, e5, e6, e7, - e8, e9, e10, e11, e12, e13, e14, e15}; - r_.neon_u8 = vld1q_u8(data); -#else - r_.u8[0] = e0; - r_.u8[1] = e1; - r_.u8[2] = e2; - r_.u8[3] = e3; - r_.u8[4] = e4; - r_.u8[5] = e5; - r_.u8[6] = e6; - r_.u8[7] = e7; - r_.u8[8] = e8; - r_.u8[9] = e9; - r_.u8[10] = e10; - r_.u8[11] = e11; - r_.u8[12] = e12; - r_.u8[13] = e13; - r_.u8[14] = e14; - r_.u8[15] = e15; -#endif - - return simde__m128i_from_private(r_); -#endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_x_mm_set_epu16(uint16_t e7, uint16_t e6, uint16_t e5, - uint16_t e4, uint16_t e3, uint16_t e2, - uint16_t e1, uint16_t e0) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi16( - HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), - HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), - HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), - HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); -#else - simde__m128i_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint16x8_t) - uint16_t data[8] = {e0, e1, e2, e3, e4, e5, e6, e7}; - r_.neon_u16 = vld1q_u16(data); -#else - r_.u16[0] = e0; - r_.u16[1] = e1; - r_.u16[2] = e2; - r_.u16[3] = e3; - r_.u16[4] = e4; - r_.u16[5] = e5; - r_.u16[6] = e6; - r_.u16[7] = e7; -#endif - - return simde__m128i_from_private(r_); -#endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_x_mm_set_epu32(uint32_t e3, uint32_t e2, uint32_t e1, - uint32_t e0) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_epi32(HEDLEY_STATIC_CAST(int, e3), - HEDLEY_STATIC_CAST(int, e2), - HEDLEY_STATIC_CAST(int, e1), - HEDLEY_STATIC_CAST(int, e0)); -#else - simde__m128i_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = {e0, e1, e2, e3}; - r_.neon_u32 = vld1q_u32(data); -#else - r_.u32[0] = e0; - r_.u32[1] = e1; - r_.u32[2] = e2; - r_.u32[3] = e3; -#endif - - return simde__m128i_from_private(r_); -#endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_x_mm_set_epu64x(uint64_t e1, uint64_t e0) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19, 0, 0)) - return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), - HEDLEY_STATIC_CAST(int64_t, e0)); -#else - simde__m128i_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; - r_.neon_u64 = vld1q_u64(data); -#else - r_.u64[0] = e0; - r_.u64[1] = e1; -#endif - - return simde__m128i_from_private(r_); -#endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_set_sd(simde_float64 a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set_sd(a); -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); -#else - return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_set_sd(a) simde_mm_set_sd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_set1_epi8(int8_t a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi8(a); -#else - simde__m128i_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vdupq_n_s8(a); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_splat(a); -#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) { - r_.i8[i] = a; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_set1_epi8(a) simde_mm_set1_epi8(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_set1_epi16(int16_t a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi16(a); -#else - simde__m128i_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vdupq_n_s16(a); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_splat(a); -#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = a; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_set1_epi16(a) simde_mm_set1_epi16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_set1_epi32(int32_t a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_set1_epi32(a); -#else - simde__m128i_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vdupq_n_s32(a); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_splat(a); -#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = a; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_set1_epi32(a) simde_mm_set1_epi32(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_set1_epi64x(int64_t a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && \ - (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19, 0, 0)) - return _mm_set1_epi64x(a); -#else - simde__m128i_private r_; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vdupq_n_s64(a); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i64x2_splat(a); -#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { - r_.i64[i] = a; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_set1_epi64(simde__m64 a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_set1_epi64(a); -#else - simde__m64_private a_ = simde__m64_to_private(a); - return simde_mm_set1_epi64x(a_.i64[0]); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_set1_epi64(a) simde_mm_set1_epi64(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_x_mm_set1_epu8(uint8_t value) -{ -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return simde__m128i_from_altivec_u8( - vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); -#else - return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); -#endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_x_mm_set1_epu16(uint16_t value) -{ -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return simde__m128i_from_altivec_u16( - vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); -#else - return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); -#endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_x_mm_set1_epu32(uint32_t value) -{ -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return simde__m128i_from_altivec_u32( - vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); -#else - return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); -#endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_x_mm_set1_epu64(uint64_t value) -{ -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) - return simde__m128i_from_altivec_u64( - vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); -#else - return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); -#endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_setr_epi8(int8_t e15, int8_t e14, int8_t e13, int8_t e12, - int8_t e11, int8_t e10, int8_t e9, int8_t e8, - int8_t e7, int8_t e6, int8_t e5, int8_t e4, - int8_t e3, int8_t e2, int8_t e1, int8_t e0) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, - e4, e3, e2, e1, e0); -#else - return simde_mm_set_epi8(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, - e11, e12, e13, e14, e15); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, \ - e3, e2, e1, e0) \ - simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, \ - e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_setr_epi16(int16_t e7, int16_t e6, int16_t e5, int16_t e4, - int16_t e3, int16_t e2, int16_t e1, int16_t e0) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); -#else - return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) \ - simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_setr_epi32(int32_t e3, int32_t e2, int32_t e1, int32_t e0) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_epi32(e3, e2, e1, e0); -#else - return simde_mm_set_epi32(e0, e1, e2, e3); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_setr_epi64(simde__m64 e1, simde__m64 e0) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_setr_epi64(e1, e0); -#else - return simde_mm_set_epi64(e0, e1); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_setr_pd(simde_float64 e1, simde_float64 e0) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setr_pd(e1, e0); -#else - return simde_mm_set_pd(e0, e1); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_setzero_pd(void) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_setzero_pd(); -#else - return simde_mm_castsi128_pd(simde_mm_setzero_si128()); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_setzero_pd() simde_mm_setzero_pd() -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_undefined_pd(void) -{ - simde__m128d_private r_; - -#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_pd(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128d_to_private(simde_mm_setzero_pd()); -#endif - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_undefined_pd() simde_mm_undefined_pd() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_undefined_si128(void) -{ - simde__m128i_private r_; - -#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) - r_.n = _mm_undefined_si128(); -#elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) - r_ = simde__m128i_to_private(simde_mm_setzero_si128()); -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_undefined_si128() (simde_mm_undefined_si128()) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_x_mm_setone_pd(void) -{ - return simde_mm_castps_pd(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_x_mm_setone_si128(void) -{ - return simde_mm_castps_si128(simde_x_mm_setone_ps()); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_shuffle_epi32(simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) -{ - simde__m128i_private r_, a_ = simde__m128i_to_private(a); - - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) -#define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -#define simde_mm_shuffle_epi32(a, imm8) \ - __extension__({ \ - int32x4_t ret; \ - ret = vmovq_n_s32(vgetq_lane_s32(vreinterpretq_s32_s64(a), \ - (imm8) & (0x3))); \ - ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_s64(a), \ - ((imm8) >> 2) & 0x3), \ - ret, 1); \ - ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_s64(a), \ - ((imm8) >> 4) & 0x3), \ - ret, 2); \ - ret = vsetq_lane_s32(vgetq_lane_s32(vreinterpretq_s32_s64(a), \ - ((imm8) >> 6) & 0x3), \ - ret, 3); \ - vreinterpretq_s64_s32(ret); \ - }) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -#define simde_mm_shuffle_epi32(a, imm8) \ - (__extension__({ \ - const simde__m128i_private simde__tmp_a_ = \ - simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private){ \ - .i32 = SIMDE_SHUFFLE_VECTOR_( \ - 32, 16, (simde__tmp_a_).i32, \ - (simde__tmp_a_).i32, ((imm8)) & 3, \ - ((imm8) >> 2) & 3, ((imm8) >> 4) & 3, \ - ((imm8) >> 6) & 3)}); \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_shuffle_pd(simde__m128d a, simde__m128d b, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) -{ - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; - r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; - - return simde__m128d_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) -#define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -#define simde_mm_shuffle_pd(a, b, imm8) \ - (__extension__({ \ - simde__m128d_from_private((simde__m128d_private){ \ - .f64 = SIMDE_SHUFFLE_VECTOR_( \ - 64, 16, simde__m128d_to_private(a).f64, \ - simde__m128d_to_private(b).f64, \ - (((imm8)) & 1), (((imm8) >> 1) & 1) + 2)}); \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_shufflehi_epi16(simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) -{ - simde__m128i_private r_, a_ = simde__m128i_to_private(a); - - SIMDE_VECTORIZE - for (size_t i = 0; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2); - i++) { - r_.i16[i] = a_.i16[i]; - } - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2); - i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) -#define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -#define simde_mm_shufflehi_epi16(a, imm8) \ - __extension__({ \ - int16x8_t ret = vreinterpretq_s16_s64(a); \ - int16x4_t highBits = vget_high_s16(ret); \ - ret = vsetq_lane_s16(vget_lane_s16(highBits, (imm8) & (0x3)), \ - ret, 4); \ - ret = vsetq_lane_s16( \ - vget_lane_s16(highBits, ((imm8) >> 2) & 0x3), ret, 5); \ - ret = vsetq_lane_s16( \ - vget_lane_s16(highBits, ((imm8) >> 4) & 0x3), ret, 6); \ - ret = vsetq_lane_s16( \ - vget_lane_s16(highBits, ((imm8) >> 6) & 0x3), ret, 7); \ - vreinterpretq_s64_s16(ret); \ - }) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -#define simde_mm_shufflehi_epi16(a, imm8) \ - (__extension__({ \ - const simde__m128i_private simde__tmp_a_ = \ - simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private){ \ - .i16 = SIMDE_SHUFFLE_VECTOR_( \ - 16, 16, (simde__tmp_a_).i16, \ - (simde__tmp_a_).i16, 0, 1, 2, 3, \ - (((imm8)) & 3) + 4, (((imm8) >> 2) & 3) + 4, \ - (((imm8) >> 4) & 3) + 4, \ - (((imm8) >> 6) & 3) + 4)}); \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_shufflelo_epi16(simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) -{ - simde__m128i_private r_, a_ = simde__m128i_to_private(a); - - for (size_t i = 0; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2); - i++) { - r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; - } - SIMDE_VECTORIZE - for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2); - i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = a_.i16[i]; - } - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) -#define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -#define simde_mm_shufflelo_epi16(a, imm8) \ - __extension__({ \ - int16x8_t ret = vreinterpretq_s16_s64(a); \ - int16x4_t lowBits = vget_low_s16(ret); \ - ret = vsetq_lane_s16(vget_lane_s16(lowBits, (imm8) & (0x3)), \ - ret, 0); \ - ret = vsetq_lane_s16( \ - vget_lane_s16(lowBits, ((imm8) >> 2) & 0x3), ret, 1); \ - ret = vsetq_lane_s16( \ - vget_lane_s16(lowBits, ((imm8) >> 4) & 0x3), ret, 2); \ - ret = vsetq_lane_s16( \ - vget_lane_s16(lowBits, ((imm8) >> 6) & 0x3), ret, 3); \ - vreinterpretq_s64_s16(ret); \ - }) -#elif defined(SIMDE_SHUFFLE_VECTOR_) -#define simde_mm_shufflelo_epi16(a, imm8) \ - (__extension__({ \ - const simde__m128i_private simde__tmp_a_ = \ - simde__m128i_to_private(a); \ - simde__m128i_from_private((simde__m128i_private){ \ - .i16 = SIMDE_SHUFFLE_VECTOR_( \ - 16, 16, (simde__tmp_a_).i16, \ - (simde__tmp_a_).i16, (((imm8)) & 3), \ - (((imm8) >> 2) & 3), (((imm8) >> 4) & 3), \ - (((imm8) >> 6) & 3), 4, 5, 6, 7)}); \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_sll_epi16(simde__m128i a, simde__m128i count) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi16(a, count); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 15) - return simde_mm_setzero_si128(); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = (a_.u16 << count_.u64[0]); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST( - int16_t, count_.u64[0]))); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = - ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) - ? wasm_i16x8_shl(a_.wasm_v128, - HEDLEY_STATIC_CAST( - int32_t, - wasm_i64x2_extract_lane( - count_.wasm_v128, 0))) - : wasm_i16x8_const(0, 0, 0, 0, 0, 0, 0, 0)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, - (a_.u16[i] << count_.u64[0])); - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_sll_epi32(simde__m128i a, simde__m128i count) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi32(a, count); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 31) - return simde_mm_setzero_si128(); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = (a_.u32 << count_.u64[0]); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST( - int32_t, count_.u64[0]))); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = - ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) - ? wasm_i32x4_shl(a_.wasm_v128, - HEDLEY_STATIC_CAST( - int32_t, - wasm_i64x2_extract_lane( - count_.wasm_v128, 0))) - : wasm_i32x4_const(0, 0, 0, 0)); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) { - r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, - (a_.u32[i] << count_.u64[0])); - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_sll_epi64(simde__m128i a, simde__m128i count) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sll_epi64(a, count); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - if (count_.u64[0] > 63) - return simde_mm_setzero_si128(); - - const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, - vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, s) - : wasm_i64x2_const(0, 0); -#else -#if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE -#endif - for (size_t i = 0; i < (sizeof(r_.u64) / sizeof(r_.u64[0])); i++) { - r_.u64[i] = a_.u64[i] << s; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_sqrt_pd(simde__m128d a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_pd(a); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsqrtq_f64(a_.neon_f64); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); -#elif defined(simde_math_sqrt) - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.f64[i] = simde_math_sqrt(a_.f64[i]); - } -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_sqrt_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sqrt_sd(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(simde_math_sqrt) - r_.f64[0] = simde_math_sqrt(b_.f64[0]); - r_.f64[1] = a_.f64[1]; -#else - HEDLEY_UNREACHABLE(); -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_srl_epi16(simde__m128i a, simde__m128i count) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi16(a, count); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST( - int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vshlq_u16(a_.neon_u16, - vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u16) / sizeof(r_.u16[0])); i++) { - r_.u16[i] = a_.u16[i] >> cnt; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_srl_epi32(simde__m128i a, simde__m128i count) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi32(a, count); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST( - int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vshlq_u32(a_.neon_u32, - vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, cnt); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) { - r_.u32[i] = a_.u32[i] >> cnt; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_srl_epi64(simde__m128i a, simde__m128i count) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_srl_epi64(a, count); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST( - int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, - vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, cnt); -#else -#if !defined(SIMDE_BUG_GCC_94488) - SIMDE_VECTORIZE -#endif - for (size_t i = 0; i < (sizeof(r_.u64) / sizeof(r_.u64[0])); i++) { - r_.u64[i] = a_.u64[i] >> cnt; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_srai_epi16(simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) -{ - /* MSVC requires a range of (0, 255). */ - simde__m128i_private r_, a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~15) ? 15 : imm8; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, - vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, cnt); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) -#define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_srai_epi32(simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) -{ - /* MSVC requires a range of (0, 255). */ - simde__m128i_private r_, a_ = simde__m128i_to_private(a); - - const int cnt = (imm8 & ~31) ? 31 : imm8; - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, cnt); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) -#define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_sra_epi16(simde__m128i a, simde__m128i count) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sra_epi16(a, count); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = HEDLEY_STATIC_CAST( - int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vshlq_s16(a_.neon_i16, - vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, cnt); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = a_.i16[i] >> cnt; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_sra_epi32(simde__m128i a, simde__m128i count) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) - return _mm_sra_epi32(a, count); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - count_ = simde__m128i_to_private(count); - - const int cnt = count_.u64[0] > 31 - ? 31 - : HEDLEY_STATIC_CAST(int, count_.u64[0]); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vshlq_s32(a_.neon_i32, - vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, cnt); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = a_.i32[i] >> cnt; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_slli_epi16(simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) -{ - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - - simde__m128i_private r_, a_ = simde__m128i_to_private(a); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i16 = a_.i16 << (imm8 & 0xff); -#else - const int s = - (imm8 > - HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) - ? 0 - : imm8; - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); - } -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) -#define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -#define simde_mm_slli_epi16(a, imm8) \ - (__extension__({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 15) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_neon_i16(vshlq_n_s16( \ - simde__m128i_to_neon_i16(a), ((imm8)&15))); \ - } \ - ret; \ - })) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -#define simde_mm_slli_epi16(a, imm8) \ - ((imm8 < 16) \ - ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) \ - : wasm_i16x8_const(0, 0, 0, 0, 0, 0, 0, 0)) -#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) -#define simde_mm_slli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() \ - : simde__m128i_from_altivec_i16( \ - vec_sl(simde__m128i_to_altivec_i16(a), \ - vec_splat_u16(HEDLEY_STATIC_CAST( \ - unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_slli_epi32(simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) -{ - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private r_, a_ = simde__m128i_to_private(a); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i32 = a_.i32 << imm8; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = a_.i32[i] << (imm8 & 0xff); - } -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) -#define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -#define simde_mm_slli_epi32(a, imm8) \ - (__extension__({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_neon_i32(vshlq_n_s32( \ - simde__m128i_to_neon_i32(a), ((imm8)&31))); \ - } \ - ret; \ - })) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -#define simde_mm_slli_epi32(a, imm8) \ - ((imm8 < 32) \ - ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) \ - : wasm_i32x4_const(0, 0, 0, 0)) -#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) -#define simde_mm_slli_epi32(a, imm8) \ - (__extension__({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sl(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST( \ - unsigned int, (imm8)&31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_slli_epi64(simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) -{ - if (HEDLEY_UNLIKELY((imm8 > 63))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private r_, a_ = simde__m128i_to_private(a); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.i64 = a_.i64 << imm8; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { - r_.i64[i] = a_.i64[i] << (imm8 & 0xff); - } -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) -#define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -#define simde_mm_slli_epi64(a, imm8) \ - (__extension__({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 63) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_neon_i64(vshlq_n_s64( \ - simde__m128i_to_neon_i64(a), ((imm8)&63))); \ - } \ - ret; \ - })) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -#define simde_mm_slli_epi64(a, imm8) \ - ((imm8 < 64) \ - ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) \ - : wasm_i64x2_const(0, 0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_srli_epi16(simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) -{ - if (HEDLEY_UNLIKELY((imm8 > 15))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private r_, a_ = simde__m128i_to_private(a); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u16 = a_.u16 >> imm8; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); - } -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) -#define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -#define simde_mm_srli_epi16(a, imm8) \ - (__extension__({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 15) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_neon_u16(vshrq_n_u16( \ - simde__m128i_to_neon_u16(a), \ - (((imm8)&15) | (((imm8)&15) == 0)))); \ - } \ - ret; \ - })) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -#define simde_mm_srli_epi16(a, imm8) \ - ((imm8 < 16) \ - ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) \ - : wasm_i16x8_const(0, 0, 0, 0, 0, 0, 0, 0)) -#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) -#define simde_mm_srli_epi16(a, imm8) \ - ((imm8 & ~15) ? simde_mm_setzero_si128() \ - : simde__m128i_from_altivec_i16( \ - vec_sr(simde__m128i_to_altivec_i16(a), \ - vec_splat_u16(HEDLEY_STATIC_CAST( \ - unsigned short, imm8))))) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_srli_epi32(simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) -{ - if (HEDLEY_UNLIKELY((imm8 > 31))) { - return simde_mm_setzero_si128(); - } - simde__m128i_private r_, a_ = simde__m128i_to_private(a); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) - r_.u32 = a_.u32 >> (imm8 & 0xff); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); - } -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) -#define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -#define simde_mm_srli_epi32(a, imm8) \ - (__extension__({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_neon_u32(vshrq_n_u32( \ - simde__m128i_to_neon_u32(a), \ - (((imm8)&31) | (((imm8)&31) == 0)))); \ - } \ - ret; \ - })) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -#define simde_mm_srli_epi32(a, imm8) \ - ((imm8 < 32) \ - ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) \ - : wasm_i32x4_const(0, 0, 0, 0)) -#elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) -#define simde_mm_srli_epi32(a, imm8) \ - (__extension__({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 31) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_altivec_i32( \ - vec_sr(simde__m128i_to_altivec_i32(a), \ - vec_splats(HEDLEY_STATIC_CAST( \ - unsigned int, (imm8)&31)))); \ - } \ - ret; \ - })) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_srli_epi64(simde__m128i a, const int imm8) - SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) -{ - simde__m128i_private r_, a_ = simde__m128i_to_private(a); - - if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) - return simde_mm_setzero_si128(); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); -#else -#if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) - r_.u64 = a_.u64 >> imm8; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { - r_.u64[i] = a_.u64[i] >> imm8; - } -#endif -#endif - - return simde__m128i_from_private(r_); -} -#if defined(SIMDE_X86_SSE2_NATIVE) -#define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) -#define simde_mm_srli_epi64(a, imm8) \ - (__extension__({ \ - simde__m128i ret; \ - if ((imm8) <= 0) { \ - ret = a; \ - } else if ((imm8) > 63) { \ - ret = simde_mm_setzero_si128(); \ - } else { \ - ret = simde__m128i_from_neon_u64(vshrq_n_u64( \ - simde__m128i_to_neon_u64(a), \ - (((imm8)&63) | (((imm8)&63) == 0)))); \ - } \ - ret; \ - })) -#elif defined(SIMDE_WASM_SIMD128_NATIVE) -#define simde_mm_srli_epi64(a, imm8) \ - ((imm8 < 64) \ - ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) \ - : wasm_i64x2_const(0, 0)) -#endif -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_store_pd(simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], - simde__m128d a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_pd(mem_addr, a); -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), - simde__m128d_to_private(a).neon_i64); -#else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, - sizeof(a)); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_store_pd(mem_addr, a) \ - simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_store1_pd(simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], - simde__m128d a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store1_pd(mem_addr, a); -#else - simde__m128d_private a_ = simde__m128d_to_private(a); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); -#else - mem_addr[0] = a_.f64[0]; - mem_addr[1] = a_.f64[0]; -#endif -#endif -} -#define simde_mm_store_pd1(mem_addr, a) \ - simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_store1_pd(mem_addr, a) \ - simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a) -#define _mm_store_pd1(mem_addr, a) \ - simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_store_sd(simde_float64 *mem_addr, simde__m128d a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_sd(mem_addr, a); -#else - simde__m128d_private a_ = simde__m128d_to_private(a); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); - simde_memcpy(mem_addr, &v, sizeof(v)); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); - simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), &v, - sizeof(v)); -#else - simde_float64 v = a_.f64[0]; - simde_memcpy(mem_addr, &v, sizeof(simde_float64)); -#endif -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_store_sd(mem_addr, a) \ - simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_store_si128(simde__m128i *mem_addr, simde__m128i a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - _mm_store_si128(HEDLEY_STATIC_CAST(__m128i *, mem_addr), a); -#else - simde__m128i_private a_ = simde__m128i_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t *, mem_addr), a_.neon_i32); -#else - simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, - sizeof(a_)); -#endif -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_storeh_pd(simde_float64 *mem_addr, simde__m128d a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeh_pd(mem_addr, a); -#else - simde__m128d_private a_ = simde__m128d_to_private(a); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); -#else - *mem_addr = a_.f64[1]; -#endif -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_storeh_pd(mem_addr, a) \ - simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_storel_epi64(simde__m128i *mem_addr, simde__m128i a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i *, mem_addr), a); -#else - simde__m128i_private a_ = simde__m128i_to_private(a); - int64_t tmp; - - /* memcpy to prevent aliasing, tmp because we can't take the - * address of a vector element. */ - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - tmp = vgetq_lane_s64(a_.neon_i64, 0); -#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) -#if defined(SIMDE_BUG_GCC_95227) - (void)a_; -#endif - tmp = vec_extract(a_.altivec_i64, 0); -#else - tmp = a_.i64[0]; -#endif - - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_storel_pd(simde_float64 *mem_addr, simde__m128d a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storel_pd(mem_addr, a); -#else - simde__m128d_private a_ = simde__m128d_to_private(a); - - simde_float64 tmp; -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - tmp = vgetq_lane_f64(a_.neon_f64, 0); -#else - tmp = a_.f64[0]; -#endif - simde_memcpy(mem_addr, &tmp, sizeof(tmp)); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_storel_pd(mem_addr, a) \ - simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_storer_pd(simde_float64 mem_addr[2], simde__m128d a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storer_pd(mem_addr, a); -#else - simde__m128d_private a_ = simde__m128d_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t *, mem_addr), - vextq_s64(a_.neon_i64, a_.neon_i64, 1)); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); - simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); -#else - mem_addr[0] = a_.f64[1]; - mem_addr[1] = a_.f64[0]; -#endif -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_storer_pd(mem_addr, a) \ - simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_storeu_pd(simde_float64 *mem_addr, simde__m128d a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_pd(mem_addr, a); -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); -#else - simde_memcpy(mem_addr, &a, sizeof(a)); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_storeu_pd(mem_addr, a) \ - simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_storeu_si128(simde__m128i *mem_addr, simde__m128i a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i *, mem_addr), a); -#else - simde_memcpy(mem_addr, &a, sizeof(a)); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_storeu_si16(void *mem_addr, simde__m128i a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && \ - (SIMDE_DETECT_CLANG_VERSION_CHECK(8, 0, 0) || \ - HEDLEY_GCC_VERSION_CHECK(11, 0, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(20, 21, 1)) - _mm_storeu_si16(mem_addr, a); -#else - int16_t val = simde_x_mm_cvtsi128_si16(a); - simde_memcpy(mem_addr, &val, sizeof(val)); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_storeu_si32(void *mem_addr, simde__m128i a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && \ - (SIMDE_DETECT_CLANG_VERSION_CHECK(8, 0, 0) || \ - HEDLEY_GCC_VERSION_CHECK(11, 0, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(20, 21, 1)) - _mm_storeu_si32(mem_addr, a); -#else - int32_t val = simde_mm_cvtsi128_si32(a); - simde_memcpy(mem_addr, &val, sizeof(val)); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_storeu_si64(void *mem_addr, simde__m128i a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && \ - (SIMDE_DETECT_CLANG_VERSION_CHECK(8, 0, 0) || \ - HEDLEY_GCC_VERSION_CHECK(11, 0, 0) || \ - HEDLEY_INTEL_VERSION_CHECK(20, 21, 1)) - _mm_storeu_si64(mem_addr, a); -#else - int64_t val = simde_mm_cvtsi128_si64(a); - simde_memcpy(mem_addr, &val, sizeof(val)); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_stream_pd(simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], - simde__m128d a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_pd(mem_addr, a); -#else - simde_memcpy(mem_addr, &a, sizeof(a)); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_stream_pd(mem_addr, a) \ - simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double *, mem_addr), a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_stream_si128(simde__m128i *mem_addr, simde__m128i a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) - _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i *, mem_addr), a); -#else - simde_memcpy(mem_addr, &a, sizeof(a)); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_stream_si32(int32_t *mem_addr, int32_t a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - _mm_stream_si32(mem_addr, a); -#else - *mem_addr = a; -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_stream_si64(int64_t *mem_addr, int64_t a) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && \ - !defined(HEDLEY_MSVC_VERSION) - _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int *, - int64_t *, mem_addr), - a); -#else - *mem_addr = a; -#endif -} -#define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_stream_si64(mem_addr, a) \ - simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST( \ - int64_t *, __int64 *, mem_addr), \ - a) -#define _mm_stream_si64x(mem_addr, a) \ - simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST( \ - int64_t *, __int64 *, mem_addr), \ - a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_sub_epi8(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi8(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i8 = a_.i8 - b_.i8; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i8) / sizeof(r_.i8[0])); i++) { - r_.i8[i] = a_.i8[i] - b_.i8[i]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_sub_epi16(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi16(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i16 = a_.i16 - b_.i16; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i16) / sizeof(r_.i16[0])); i++) { - r_.i16[i] = a_.i16[i] - b_.i16[i]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_sub_epi32(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi32(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32 = a_.i32 - b_.i32; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32) / sizeof(r_.i32[0])); i++) { - r_.i32[i] = a_.i32[i] - b_.i32[i]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_sub_epi64(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_epi64(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i64) / sizeof(r_.i64[0])); i++) { - r_.i64[i] = a_.i64[i] - b_.i64[i]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_x_mm_sub_epu32(simde__m128i a, simde__m128i b) -{ - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.u32 = a_.u32 - b_.u32; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.u32) / sizeof(r_.u32[0])); i++) { - r_.u32[i] = a_.u32[i] - b_.u32[i]; - } -#endif - - return simde__m128i_from_private(r_); -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_sub_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_pd(a, b); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.f64 = a_.f64 - b_.f64; -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.f64[i] = a_.f64[i] - b_.f64[i]; - } -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_sub_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_sub_sd(a, b); -#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) - return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - - r_.f64[0] = a_.f64[0] - b_.f64[0]; - r_.f64[1] = a_.f64[1]; - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m64 simde_mm_sub_si64(simde__m64 a, simde__m64 b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) - return _mm_sub_si64(a, b); -#else - simde__m64_private r_, a_ = simde__m64_to_private(a), - b_ = simde__m64_to_private(b); - -#if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i64 = a_.i64 - b_.i64; -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); -#else - r_.i64[0] = a_.i64[0] - b_.i64[0]; -#endif - - return simde__m64_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_subs_epi8(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi8(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i8x16_sub_saturate(a_.wasm_v128, b_.wasm_v128); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_.i8[0])); i++) { - if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { - r_.i8[i] = INT8_MIN; - } else if ((b_.i8[i]) < 0 && - (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { - r_.i8[i] = INT8_MAX; - } else { - r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); - } - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_subs_epi16(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epi16(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_i16x8_sub_saturate(a_.wasm_v128, b_.wasm_v128); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_.i16[0])); i++) { - if (((b_.i16[i]) > 0 && - (a_.i16[i]) < INT16_MIN + (b_.i16[i]))) { - r_.i16[i] = INT16_MIN; - } else if ((b_.i16[i]) < 0 && - (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { - r_.i16[i] = INT16_MAX; - } else { - r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); - } - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_subs_epu8(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu8(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u8x16_sub_saturate(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_.i8[0])); i++) { - const int32_t x = a_.u8[i] - b_.u8[i]; - if (x < 0) { - r_.u8[i] = 0; - } else if (x > UINT8_MAX) { - r_.u8[i] = UINT8_MAX; - } else { - r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); - } - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_subs_epu16(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_subs_epu16(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_u16x8_sub_saturate(a_.wasm_v128, b_.wasm_v128); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_) / sizeof(r_.i16[0])); i++) { - const int32_t x = a_.u16[i] - b_.u16[i]; - if (x < 0) { - r_.u16[i] = 0; - } else if (x > UINT16_MAX) { - r_.u16[i] = UINT16_MAX; - } else { - r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); - } - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_ucomieq_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomieq_sd(a, b); -#else - simde__m128d_private a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32( - vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == - wasm_f64x2_extract_lane(b_.wasm_v128, 0); -#elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] == b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); -#else - r = a_.f64[0] == b_.f64[0]; -#endif - - return r; -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_ucomige_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomige_sd(a, b); -#else - simde__m128d_private a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= - wasm_f64x2_extract_lane(b_.wasm_v128, 0); -#elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] >= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); -#else - r = a_.f64[0] >= b_.f64[0]; -#endif - - return r; -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_ucomigt_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomigt_sd(a, b); -#else - simde__m128d_private a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > - wasm_f64x2_extract_lane(b_.wasm_v128, 0); -#elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] > b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); -#else - r = a_.f64[0] > b_.f64[0]; -#endif - - return r; -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_ucomile_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomile_sd(a, b); -#else - simde__m128d_private a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32( - vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= - wasm_f64x2_extract_lane(b_.wasm_v128, 0); -#elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] <= b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); -#else - r = a_.f64[0] <= b_.f64[0]; -#endif - - return r; -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_ucomilt_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomilt_sd(a, b); -#else - simde__m128d_private a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32( - vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); - uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); - r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < - wasm_f64x2_extract_lane(b_.wasm_v128, 0); -#elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] < b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); -#else - r = a_.f64[0] < b_.f64[0]; -#endif - - return r; -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -int simde_mm_ucomineq_sd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_ucomineq_sd(a, b); -#else - simde__m128d_private a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - int r; - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); - uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); - uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); - uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32( - vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); - r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != - wasm_f64x2_extract_lane(b_.wasm_v128, 0); -#elif defined(SIMDE_HAVE_FENV_H) - fenv_t envp; - int x = feholdexcept(&envp); - r = a_.f64[0] != b_.f64[0]; - if (HEDLEY_LIKELY(x == 0)) - fesetenv(&envp); -#else - r = a_.f64[0] != b_.f64[0]; -#endif - - return r; -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_PUSH -SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ -#endif - -#if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) -HEDLEY_DIAGNOSTIC_POP -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_lfence(void) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - _mm_lfence(); -#else - simde_mm_sfence(); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_lfence() simde_mm_lfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -void simde_mm_mfence(void) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - _mm_mfence(); -#else - simde_mm_sfence(); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_mfence() simde_mm_mfence() -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_unpackhi_epi8(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi8(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, - 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2); i++) { - r_.i8[(i * 2)] = - a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - r_.i8[(i * 2) + 1] = - b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_unpackhi_epi16(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi16(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_high_s16(a_.neon_i16); - int16x4_t b1 = vget_high_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, - 14, 7, 15); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2); i++) { - r_.i16[(i * 2)] = - a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - r_.i16[(i * 2) + 1] = - b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_unpackhi_epi32(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi32(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_high_s32(a_.neon_i32); - int32x2_t b1 = vget_high_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2); i++) { - r_.i32[(i * 2)] = - a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - r_.i32[(i * 2) + 1] = - b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_unpackhi_epi64(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_epi64(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_h = vget_high_s64(a_.neon_i64); - int64x1_t b_h = vget_high_s64(b_.neon_i64); - r_.neon_i64 = vcombine_s64(a_h, b_h); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2); i++) { - r_.i64[(i * 2)] = - a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - r_.i64[(i * 2) + 1] = - b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_unpackhi_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpackhi_pd(a, b); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x1_t a_l = vget_high_f64(a_.f64); - float64x1_t b_l = vget_high_f64(b_.f64); - r_.neon_f64 = vcombine_f64(a_l, b_l); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2); i++) { - r_.f64[(i * 2)] = - a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - r_.f64[(i * 2) + 1] = - b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; - } -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_unpacklo_epi8(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi8(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); - int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); - int8x8x2_t result = vzip_s8(a1, b1); - r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, - 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2); i++) { - r_.i8[(i * 2)] = a_.i8[i]; - r_.i8[(i * 2) + 1] = b_.i8[i]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_unpacklo_epi16(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi16(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int16x4_t a1 = vget_low_s16(a_.neon_i16); - int16x4_t b1 = vget_low_s16(b_.neon_i16); - int16x4x2_t result = vzip_s16(a1, b1); - r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, - 10, 3, 11); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2); i++) { - r_.i16[(i * 2)] = a_.i16[i]; - r_.i16[(i * 2) + 1] = b_.i16[i]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_unpacklo_epi32(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi32(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); -#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int32x2_t a1 = vget_low_s32(a_.neon_i32); - int32x2_t b1 = vget_low_s32(b_.neon_i32); - int32x2x2_t result = vzip_s32(a1, b1); - r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2); i++) { - r_.i32[(i * 2)] = a_.i32[i]; - r_.i32[(i * 2) + 1] = b_.i32[i]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_unpacklo_epi64(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_epi64(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - int64x1_t a_l = vget_low_s64(a_.i64); - int64x1_t b_l = vget_low_s64(b_.i64); - r_.neon_i64 = vcombine_s64(a_l, b_l); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2); i++) { - r_.i64[(i * 2)] = a_.i64[i]; - r_.i64[(i * 2) + 1] = b_.i64[i]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_mm_unpacklo_pd(simde__m128d a, simde__m128d b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_unpacklo_pd(a, b); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a), - b_ = simde__m128d_to_private(b); - -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - float64x1_t a_l = vget_low_f64(a_.f64); - float64x1_t b_l = vget_low_f64(b_.f64); - r_.neon_f64 = vcombine_f64(a_l, b_l); -#elif defined(SIMDE_SHUFFLE_VECTOR_) - r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2); i++) { - r_.f64[(i * 2)] = a_.f64[i]; - r_.f64[(i * 2) + 1] = b_.f64[i]; - } -#endif - - return simde__m128d_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128d simde_x_mm_negate_pd(simde__m128d a) -{ -#if defined(SIMDE_X86_SSE_NATIVE) - return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); -#else - simde__m128d_private r_, a_ = simde__m128d_to_private(a); - -#if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ - (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8, 1, 0)) - r_.altivec_f64 = vec_neg(a_.altivec_f64); -#elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) - r_.neon_f64 = vnegq_f64(a_.neon_f64); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); -#elif defined(SIMDE_VECTOR_NEGATE) - r_.f64 = -a_.f64; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.f64) / sizeof(r_.f64[0])); i++) { - r_.f64[i] = -a_.f64[i]; - } -#endif - - return simde__m128d_from_private(r_); -#endif -} - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_mm_xor_si128(simde__m128i a, simde__m128i b) -{ -#if defined(SIMDE_X86_SSE2_NATIVE) - return _mm_xor_si128(a, b); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = a_.i32f ^ b_.i32f; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) { - r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; - } -#endif - - return simde__m128i_from_private(r_); -#endif -} -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde__m128i simde_x_mm_not_si128(simde__m128i a) -{ -#if defined(SIMDE_X86_AVX512VL_NATIVE) - return _mm_ternarylogic_epi32(a, a, a, 0x55); -#else - simde__m128i_private r_, a_ = simde__m128i_to_private(a); - -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - r_.neon_i32 = vmvnq_s32(a_.neon_i32); -#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) - r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); -#elif defined(SIMDE_WASM_SIMD128_NATIVE) - r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); -#elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.i32f = ~a_.i32f; -#else - SIMDE_VECTORIZE - for (size_t i = 0; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])); i++) { - r_.i32f[i] = ~(a_.i32f[i]); - } -#endif - - return simde__m128i_from_private(r_); -#endif -} - -#define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) -#if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) -#define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) -#endif - -SIMDE_END_DECLS_ - -HEDLEY_DIAGNOSTIC_POP - -#endif /* !defined(SIMDE_X86_SSE2_H) */ diff --git a/libobs/util/sse-intrin.h b/libobs/util/sse-intrin.h index df2b075fe9fde6..3c6dbb30d1cd11 100644 --- a/libobs/util/sse-intrin.h +++ b/libobs/util/sse-intrin.h @@ -26,8 +26,13 @@ #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif + +#if defined(__APPLE__) && !defined(__arm64__) +#include +#endif + #define SIMDE_ENABLE_NATIVE_ALIASES PRAGMA_WARN_PUSH -#include "simde/x86/sse2.h" +#include PRAGMA_WARN_POP #endif