diff --git a/src/libImaging/ImPlatform.h b/src/libImaging/ImPlatform.h index f6e7fb6b921..e5fb4595e39 100644 --- a/src/libImaging/ImPlatform.h +++ b/src/libImaging/ImPlatform.h @@ -96,3 +96,5 @@ typedef signed __int64 int64_t; #ifdef __GNUC__ #define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) #endif + +#include "ImagingSIMD.h" diff --git a/src/libImaging/ImagingSIMD.h b/src/libImaging/ImagingSIMD.h new file mode 100644 index 00000000000..3d80c79d8ec --- /dev/null +++ b/src/libImaging/ImagingSIMD.h @@ -0,0 +1,41 @@ +/* Microsoft compiler doesn't limit intrinsics for an architecture. + This macro is set only on x86 and means SSE2 and above including AVX2. */ +#if defined(_M_X64) || _M_IX86_FP == 2 + #define __SSE2__ +#endif + +#ifdef __SSE4_2__ + #define __SSE4__ +#endif + +#ifdef __SSE2__ + #include // MMX + #include // SSE + #include // SSE2 +#endif +#ifdef __SSE4__ + #include // SSE3 + #include // SSSE3 + #include // SSE4.1 + #include // SSE4.2 +#endif +#ifdef __AVX2__ + #include // AVX, AVX2 +#endif +#ifdef __aarch64__ + #include // ARM NEON +#endif + +#ifdef __SSE4__ +static __m128i inline +mm_cvtepu8_epi32(void *ptr) { + return _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*(INT32 *) ptr)); +} +#endif + +#ifdef __AVX2__ +static __m256i inline +mm256_cvtepu8_epi32(void *ptr) { + return _mm256_cvtepu8_epi32(_mm_loadl_epi64((__m128i *) ptr)); +} +#endif