mirror of
https://github.com/obsproject/obs-studio.git
synced 2024-07-14 23:34:08 +00:00
Merge pull request #2069 from pgwipeout/aarch64
Enable Aarch64 support via SIMDe
This commit is contained in:
commit
2a6baf9886
3
deps/media-playback/CMakeLists.txt
vendored
3
deps/media-playback/CMakeLists.txt
vendored
|
@ -28,6 +28,9 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64le")
|
|||
PUBLIC
|
||||
-mvsx)
|
||||
add_compile_definitions(NO_WARN_X86_INTRINSICS)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
|
||||
target_compile_options(media-playback
|
||||
PUBLIC)
|
||||
elseif(NOT MSVC)
|
||||
target_compile_options(media-playback
|
||||
PUBLIC
|
||||
|
|
|
@ -174,9 +174,20 @@ elseif(UNIX)
|
|||
util/pipe-posix.c
|
||||
util/platform-nix.c)
|
||||
|
||||
set(libobs_PLATFORM_HEADERS
|
||||
util/threading-posix.h)
|
||||
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
|
||||
set(libobs_PLATFORM_HEADERS
|
||||
util/aarch/check.h
|
||||
util/aarch/hedley.h
|
||||
util/aarch/mmx.h
|
||||
util/aarch/simde-arch.h
|
||||
util/aarch/simde-common.h
|
||||
util/aarch/sse.h
|
||||
util/aarch/sse2.h
|
||||
util/threading-posix.h)
|
||||
else()
|
||||
set(libobs_PLATFORM_HEADERS
|
||||
util/threading-posix.h)
|
||||
endif()
|
||||
if(HAVE_PULSEAUDIO)
|
||||
set(libobs_audio_monitoring_HEADERS
|
||||
audio-monitoring/pulse/pulseaudio-wrapper.h)
|
||||
|
@ -337,6 +348,7 @@ set(libobs_util_SOURCES
|
|||
util/cf-parser.c
|
||||
util/profiler.c)
|
||||
set(libobs_util_HEADERS
|
||||
util/sse-intrin.h
|
||||
util/array-serializer.h
|
||||
util/file-serializer.h
|
||||
util/utf8.h
|
||||
|
@ -470,6 +482,9 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64le")
|
|||
PUBLIC
|
||||
-mvsx)
|
||||
add_compile_definitions(NO_WARN_X86_INTRINSICS)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
|
||||
target_compile_options(libobs
|
||||
PUBLIC)
|
||||
elseif(NOT MSVC)
|
||||
target_compile_options(libobs
|
||||
PUBLIC
|
||||
|
|
|
@ -20,7 +20,8 @@
|
|||
#include "../util/c99defs.h"
|
||||
#include "math-defs.h"
|
||||
#include "vec3.h"
|
||||
#include <xmmintrin.h>
|
||||
|
||||
#include <util/sse-intrin.h>
|
||||
|
||||
/*
|
||||
* Quaternion math
|
||||
|
|
|
@ -19,7 +19,8 @@
|
|||
|
||||
#include "math-defs.h"
|
||||
#include "vec4.h"
|
||||
#include <xmmintrin.h>
|
||||
|
||||
#include <util/sse-intrin.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
|
|
@ -18,7 +18,8 @@
|
|||
#pragma once
|
||||
|
||||
#include "math-defs.h"
|
||||
#include <xmmintrin.h>
|
||||
|
||||
#include <util/sse-intrin.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
|
|
@ -16,8 +16,8 @@
|
|||
******************************************************************************/
|
||||
|
||||
#include "format-conversion.h"
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
#include <util/sse-intrin.h>
|
||||
|
||||
/* ...surprisingly, if I don't use a macro to force inlining, it causes the
|
||||
* CPU usage to boost by a tremendous amount in debug builds. */
|
||||
|
|
|
@ -16,7 +16,8 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||
*/
|
||||
|
||||
#include <math.h>
|
||||
#include <xmmintrin.h>
|
||||
|
||||
#include <util/sse-intrin.h>
|
||||
|
||||
#include "util/threading.h"
|
||||
#include "util/bmem.h"
|
||||
|
|
258
libobs/util/aarch/check.h
Normal file
258
libobs/util/aarch/check.h
Normal file
|
@ -0,0 +1,258 @@
|
|||
/* Check (assertions)
|
||||
* Portable Snippets - https://gitub.com/nemequ/portable-snippets
|
||||
* Created by Evan Nemerson <evan@nemerson.com>
|
||||
*
|
||||
* To the extent possible under law, the authors have waived all
|
||||
* copyright and related or neighboring rights to this code. For
|
||||
* details, see the Creative Commons Zero 1.0 Universal license at
|
||||
* https://creativecommons.org/publicdomain/zero/1.0/
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_CHECK_H)
|
||||
#define SIMDE_CHECK_H
|
||||
|
||||
#if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG)
|
||||
#define SIMDE_NDEBUG 1
|
||||
#endif
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#if !defined(_WIN32)
|
||||
#define SIMDE_SIZE_MODIFIER "z"
|
||||
#define SIMDE_CHAR_MODIFIER "hh"
|
||||
#define SIMDE_SHORT_MODIFIER "h"
|
||||
#else
|
||||
#if defined(_M_X64) || defined(__amd64__)
|
||||
#define SIMDE_SIZE_MODIFIER "I64"
|
||||
#else
|
||||
#define SIMDE_SIZE_MODIFIER ""
|
||||
#endif
|
||||
#define SIMDE_CHAR_MODIFIER ""
|
||||
#define SIMDE_SHORT_MODIFIER ""
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER >= 1500)
|
||||
#define SIMDE__PUSH_DISABLE_MSVC_C4127 \
|
||||
__pragma(warning(push)) __pragma(warning(disable : 4127))
|
||||
#define SIMDE__POP_DISABLE_MSVC_C4127 __pragma(warning(pop))
|
||||
#else
|
||||
#define SIMDE__PUSH_DISABLE_MSVC_C4127
|
||||
#define SIMDE__POP_DISABLE_MSVC_C4127
|
||||
#endif
|
||||
|
||||
#if !defined(simde_errorf)
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#define simde_errorf(format, ...) \
|
||||
(fprintf(stderr, format, __VA_ARGS__), abort())
|
||||
#endif
|
||||
|
||||
#define simde_error(msg) simde_errorf("%s", msg)
|
||||
|
||||
#if defined(SIMDE_NDEBUG)
|
||||
#if defined(SIMDE_CHECK_FAIL_DEFINED)
|
||||
#define simde_assert(expr)
|
||||
#else
|
||||
#if defined(HEDLEY_ASSUME)
|
||||
#define simde_assert(expr) HEDLEY_ASSUME(expr)
|
||||
#elif HEDLEY_GCC_VERSION_CHECK(4, 5, 0)
|
||||
#define simde_assert(expr) ((void)(!!(expr) ? 1 : (__builtin_unreachable(), 1)))
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0)
|
||||
#define simde_assert(expr) __assume(expr)
|
||||
#else
|
||||
#define simde_assert(expr)
|
||||
#endif
|
||||
#endif
|
||||
#define simde_assert_true(expr) simde_assert(expr)
|
||||
#define simde_assert_false(expr) simde_assert(!(expr))
|
||||
#define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \
|
||||
simde_assert(((a)op(b)))
|
||||
#define simde_assert_double_equal(a, b, precision)
|
||||
#define simde_assert_string_equal(a, b)
|
||||
#define simde_assert_string_not_equal(a, b)
|
||||
#define simde_assert_memory_equal(size, a, b)
|
||||
#define simde_assert_memory_not_equal(size, a, b)
|
||||
#else
|
||||
#define simde_assert(expr) \
|
||||
do { \
|
||||
if (!HEDLEY_LIKELY(expr)) { \
|
||||
simde_error("assertion failed: " #expr "\n"); \
|
||||
} \
|
||||
SIMDE__PUSH_DISABLE_MSVC_C4127 \
|
||||
} while (0) SIMDE__POP_DISABLE_MSVC_C4127
|
||||
|
||||
#define simde_assert_true(expr) \
|
||||
do { \
|
||||
if (!HEDLEY_LIKELY(expr)) { \
|
||||
simde_error("assertion failed: " #expr \
|
||||
" is not true\n"); \
|
||||
} \
|
||||
SIMDE__PUSH_DISABLE_MSVC_C4127 \
|
||||
} while (0) SIMDE__POP_DISABLE_MSVC_C4127
|
||||
|
||||
#define simde_assert_false(expr) \
|
||||
do { \
|
||||
if (!HEDLEY_LIKELY(!(expr))) { \
|
||||
simde_error("assertion failed: " #expr \
|
||||
" is not false\n"); \
|
||||
} \
|
||||
SIMDE__PUSH_DISABLE_MSVC_C4127 \
|
||||
} while (0) SIMDE__POP_DISABLE_MSVC_C4127
|
||||
|
||||
#define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \
|
||||
do { \
|
||||
T simde_tmp_a_ = (a); \
|
||||
T simde_tmp_b_ = (b); \
|
||||
if (!(simde_tmp_a_ op simde_tmp_b_)) { \
|
||||
simde_errorf("assertion failed: %s %s %s (" prefix \
|
||||
"%" fmt suffix " %s " prefix \
|
||||
"%" fmt suffix ")\n", \
|
||||
#a, #op, #b, simde_tmp_a_, #op, \
|
||||
simde_tmp_b_); \
|
||||
} \
|
||||
SIMDE__PUSH_DISABLE_MSVC_C4127 \
|
||||
} while (0) SIMDE__POP_DISABLE_MSVC_C4127
|
||||
|
||||
#define simde_assert_double_equal(a, b, precision) \
|
||||
do { \
|
||||
const double simde_tmp_a_ = (a); \
|
||||
const double simde_tmp_b_ = (b); \
|
||||
const double simde_tmp_diff_ = \
|
||||
((simde_tmp_a_ - simde_tmp_b_) < 0) \
|
||||
? -(simde_tmp_a_ - simde_tmp_b_) \
|
||||
: (simde_tmp_a_ - simde_tmp_b_); \
|
||||
if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \
|
||||
simde_errorf( \
|
||||
"assertion failed: %s == %s (%0." #precision \
|
||||
"g == %0." #precision "g)\n", \
|
||||
#a, #b, simde_tmp_a_, simde_tmp_b_); \
|
||||
} \
|
||||
SIMDE__PUSH_DISABLE_MSVC_C4127 \
|
||||
} while (0) SIMDE__POP_DISABLE_MSVC_C4127
|
||||
|
||||
#include <string.h>
|
||||
#define simde_assert_string_equal(a, b) \
|
||||
do { \
|
||||
const char *simde_tmp_a_ = a; \
|
||||
const char *simde_tmp_b_ = b; \
|
||||
if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != \
|
||||
0)) { \
|
||||
simde_errorf( \
|
||||
"assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \
|
||||
#a, #b, simde_tmp_a_, simde_tmp_b_); \
|
||||
} \
|
||||
SIMDE__PUSH_DISABLE_MSVC_C4127 \
|
||||
} while (0) SIMDE__POP_DISABLE_MSVC_C4127
|
||||
|
||||
#define simde_assert_string_not_equal(a, b) \
|
||||
do { \
|
||||
const char *simde_tmp_a_ = a; \
|
||||
const char *simde_tmp_b_ = b; \
|
||||
if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == \
|
||||
0)) { \
|
||||
simde_errorf( \
|
||||
"assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \
|
||||
#a, #b, simde_tmp_a_, simde_tmp_b_); \
|
||||
} \
|
||||
SIMDE__PUSH_DISABLE_MSVC_C4127 \
|
||||
} while (0) SIMDE__POP_DISABLE_MSVC_C4127
|
||||
|
||||
#define simde_assert_memory_equal(size, a, b) \
|
||||
do { \
|
||||
const unsigned char *simde_tmp_a_ = \
|
||||
(const unsigned char *)(a); \
|
||||
const unsigned char *simde_tmp_b_ = \
|
||||
(const unsigned char *)(b); \
|
||||
const size_t simde_tmp_size_ = (size); \
|
||||
if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, \
|
||||
simde_tmp_size_)) != 0) { \
|
||||
size_t simde_tmp_pos_; \
|
||||
for (simde_tmp_pos_ = 0; \
|
||||
simde_tmp_pos_ < simde_tmp_size_; \
|
||||
simde_tmp_pos_++) { \
|
||||
if (simde_tmp_a_[simde_tmp_pos_] != \
|
||||
simde_tmp_b_[simde_tmp_pos_]) { \
|
||||
simde_errorf( \
|
||||
"assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER \
|
||||
"u\n", \
|
||||
#a, #b, simde_tmp_pos_); \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
SIMDE__PUSH_DISABLE_MSVC_C4127 \
|
||||
} while (0) SIMDE__POP_DISABLE_MSVC_C4127
|
||||
|
||||
#define simde_assert_memory_not_equal(size, a, b) \
|
||||
do { \
|
||||
const unsigned char *simde_tmp_a_ = \
|
||||
(const unsigned char *)(a); \
|
||||
const unsigned char *simde_tmp_b_ = \
|
||||
(const unsigned char *)(b); \
|
||||
const size_t simde_tmp_size_ = (size); \
|
||||
if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, \
|
||||
simde_tmp_size_)) == 0) { \
|
||||
simde_errorf( \
|
||||
"assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER \
|
||||
"u bytes)\n", \
|
||||
#a, #b, simde_tmp_size_); \
|
||||
} \
|
||||
SIMDE__PUSH_DISABLE_MSVC_C4127 \
|
||||
} while (0) SIMDE__POP_DISABLE_MSVC_C4127
|
||||
#endif
|
||||
|
||||
#define simde_assert_type(T, fmt, a, op, b) \
|
||||
simde_assert_type_full("", "", T, fmt, a, op, b)
|
||||
|
||||
#define simde_assert_char(a, op, b) \
|
||||
simde_assert_type_full("'\\x", "'", char, \
|
||||
"02" SIMDE_CHAR_MODIFIER "x", a, op, b)
|
||||
#define simde_assert_uchar(a, op, b) \
|
||||
simde_assert_type_full("'\\x", "'", unsigned char, \
|
||||
"02" SIMDE_CHAR_MODIFIER "x", a, op, b)
|
||||
#define simde_assert_short(a, op, b) \
|
||||
simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b)
|
||||
#define simde_assert_ushort(a, op, b) \
|
||||
simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b)
|
||||
#define simde_assert_int(a, op, b) simde_assert_type(int, "d", a, op, b)
|
||||
#define simde_assert_uint(a, op, b) \
|
||||
simde_assert_type(unsigned int, "u", a, op, b)
|
||||
#define simde_assert_long(a, op, b) simde_assert_type(long int, "ld", a, op, b)
|
||||
#define simde_assert_ulong(a, op, b) \
|
||||
simde_assert_type(unsigned long int, "lu", a, op, b)
|
||||
#define simde_assert_llong(a, op, b) \
|
||||
simde_assert_type(long long int, "lld", a, op, b)
|
||||
#define simde_assert_ullong(a, op, b) \
|
||||
simde_assert_type(unsigned long long int, "llu", a, op, b)
|
||||
|
||||
#define simde_assert_size(a, op, b) \
|
||||
simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b)
|
||||
|
||||
#define simde_assert_float(a, op, b) simde_assert_type(float, "f", a, op, b)
|
||||
#define simde_assert_double(a, op, b) simde_assert_type(double, "g", a, op, b)
|
||||
#define simde_assert_ptr(a, op, b) \
|
||||
simde_assert_type(const void *, "p", a, op, b)
|
||||
|
||||
#define simde_assert_int8(a, op, b) simde_assert_type(int8_t, PRIi8, a, op, b)
|
||||
#define simde_assert_uint8(a, op, b) simde_assert_type(uint8_t, PRIu8, a, op, b)
|
||||
#define simde_assert_int16(a, op, b) \
|
||||
simde_assert_type(int16_t, PRIi16, a, op, b)
|
||||
#define simde_assert_uint16(a, op, b) \
|
||||
simde_assert_type(uint16_t, PRIu16, a, op, b)
|
||||
#define simde_assert_int32(a, op, b) \
|
||||
simde_assert_type(int32_t, PRIi32, a, op, b)
|
||||
#define simde_assert_uint32(a, op, b) \
|
||||
simde_assert_type(uint32_t, PRIu32, a, op, b)
|
||||
#define simde_assert_int64(a, op, b) \
|
||||
simde_assert_type(int64_t, PRIi64, a, op, b)
|
||||
#define simde_assert_uint64(a, op, b) \
|
||||
simde_assert_type(uint64_t, PRIu64, a, op, b)
|
||||
|
||||
#define simde_assert_ptr_equal(a, b) simde_assert_ptr(a, ==, b)
|
||||
#define simde_assert_ptr_not_equal(a, b) simde_assert_ptr(a, !=, b)
|
||||
#define simde_assert_null(ptr) simde_assert_ptr(ptr, ==, NULL)
|
||||
#define simde_assert_not_null(ptr) simde_assert_ptr(ptr, !=, NULL)
|
||||
#define simde_assert_ptr_null(ptr) simde_assert_ptr(ptr, ==, NULL)
|
||||
#define simde_assert_ptr_not_null(ptr) simde_assert_ptr(ptr, !=, NULL)
|
||||
|
||||
#endif /* !defined(SIMDE_CHECK_H) */
|
1616
libobs/util/aarch/hedley.h
Normal file
1616
libobs/util/aarch/hedley.h
Normal file
File diff suppressed because it is too large
Load diff
1356
libobs/util/aarch/mmx.h
Normal file
1356
libobs/util/aarch/mmx.h
Normal file
File diff suppressed because it is too large
Load diff
355
libobs/util/aarch/simde-arch.h
Normal file
355
libobs/util/aarch/simde-arch.h
Normal file
|
@ -0,0 +1,355 @@
|
|||
/* Architecture detection
|
||||
* Created by Evan Nemerson <evan@nemerson.com>
|
||||
*
|
||||
* To the extent possible under law, the authors have waived all
|
||||
* copyright and related or neighboring rights to this code. For
|
||||
* details, see the Creative Commons Zero 1.0 Universal license at
|
||||
* <https://creativecommons.org/publicdomain/zero/1.0/>
|
||||
*
|
||||
* Different compilers define different preprocessor macros for the
|
||||
* same architecture. This is an attempt to provide a single
|
||||
* interface which is usable on any compiler.
|
||||
*
|
||||
* In general, a macro named SIMDE_ARCH_* is defined for each
|
||||
* architecture the CPU supports. When there are multiple possible
|
||||
* versions, we try to define the macro to the target version. For
|
||||
* example, if you want to check for i586+, you could do something
|
||||
* like:
|
||||
*
|
||||
* #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5)
|
||||
* ...
|
||||
* #endif
|
||||
*
|
||||
* You could also just check that SIMDE_ARCH_X86 >= 5 without checking
|
||||
* if it's defined first, but some compilers may emit a warning about
|
||||
* an undefined macro being used (e.g., GCC with -Wundef).
|
||||
*
|
||||
* This was originally created for SIMDe
|
||||
* <https://github.com/nemequ/simde> (hence the prefix), but this
|
||||
* header has no dependencies and may be used anywhere. It is
|
||||
* originally based on information from
|
||||
* <https://sourceforge.net/p/predef/wiki/Architectures/>, though it
|
||||
* has been enhanced with additional information.
|
||||
*
|
||||
* If you improve this file, or find a bug, please file the issue at
|
||||
* <https://github.com/nemequ/simde/issues>. If you copy this into
|
||||
* your project, even if you change the prefix, please keep the links
|
||||
* to SIMDe intact so others know where to report issues, submit
|
||||
* enhancements, and find the latest version. */
|
||||
|
||||
#if !defined(SIMDE_ARCH_H)
|
||||
#define SIMDE_ARCH_H
|
||||
|
||||
/* Alpha
|
||||
<https://en.wikipedia.org/wiki/DEC_Alpha> */
|
||||
#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA)
|
||||
#if defined(__alpha_ev6__)
|
||||
#define SIMDE_ARCH_ALPHA 6
|
||||
#elif defined(__alpha_ev5__)
|
||||
#define SIMDE_ARCH_ALPHA 5
|
||||
#elif defined(__alpha_ev4__)
|
||||
#define SIMDE_ARCH_ALPHA 4
|
||||
#else
|
||||
#define SIMDE_ARCH_ALPHA 1
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Atmel AVR
|
||||
<https://en.wikipedia.org/wiki/Atmel_AVR> */
|
||||
#if defined(__AVR_ARCH__)
|
||||
#define SIMDE_ARCH_AVR __AVR_ARCH__
|
||||
#endif
|
||||
|
||||
/* AMD64 / x86_64
|
||||
<https://en.wikipedia.org/wiki/X86-64> */
|
||||
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || \
|
||||
defined(__x86_64) || defined(_M_X66) || defined(_M_AMD64)
|
||||
#define SIMDE_ARCH_AMD64 1
|
||||
#endif
|
||||
|
||||
/* ARM
|
||||
<https://en.wikipedia.org/wiki/ARM_architecture> */
|
||||
#if defined(__ARM_ARCH_8A__)
|
||||
#define SIMDE_ARCH_ARM 82
|
||||
#elif defined(__ARM_ARCH_8R__)
|
||||
#define SIMDE_ARCH_ARM 81
|
||||
#elif defined(__ARM_ARCH_8__)
|
||||
#define SIMDE_ARCH_ARM 80
|
||||
#elif defined(__ARM_ARCH_7S__)
|
||||
#define SIMDE_ARCH_ARM 74
|
||||
#elif defined(__ARM_ARCH_7M__)
|
||||
#define SIMDE_ARCH_ARM 73
|
||||
#elif defined(__ARM_ARCH_7R__)
|
||||
#define SIMDE_ARCH_ARM 72
|
||||
#elif defined(__ARM_ARCH_7A__)
|
||||
#define SIMDE_ARCH_ARM 71
|
||||
#elif defined(__ARM_ARCH_7__)
|
||||
#define SIMDE_ARCH_ARM 70
|
||||
#elif defined(__ARM_ARCH)
|
||||
#define SIMDE_ARCH_ARM (__ARM_ARCH * 10)
|
||||
#elif defined(_M_ARM)
|
||||
#define SIMDE_ARCH_ARM (_M_ARM * 10)
|
||||
#elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || \
|
||||
defined(_ARM) || defined(_M_ARM) || defined(_M_ARM)
|
||||
#define SIMDE_ARCH_ARM 1
|
||||
#endif
|
||||
|
||||
/* AArch64
|
||||
<https://en.wikipedia.org/wiki/ARM_architecture> */
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
#define SIMDE_ARCH_AARCH64 10
|
||||
#endif
|
||||
|
||||
/* Blackfin
|
||||
<https://en.wikipedia.org/wiki/Blackfin> */
|
||||
#if defined(__bfin) || defined(__BFIN__) || defined(__bfin__)
|
||||
#define SIMDE_ARCH_BLACKFIN 1
|
||||
#endif
|
||||
|
||||
/* CRIS
|
||||
<https://en.wikipedia.org/wiki/ETRAX_CRIS> */
|
||||
#if defined(__CRIS_arch_version)
|
||||
#define SIMDE_ARCH_CRIS __CRIS_arch_version
|
||||
#elif defined(__cris__) || defined(__cris) || defined(__CRIS) || \
|
||||
defined(__CRIS__)
|
||||
#define SIMDE_ARCH_CRIS 1
|
||||
#endif
|
||||
|
||||
/* Convex
|
||||
<https://en.wikipedia.org/wiki/Convex_Computer> */
|
||||
#if defined(__convex_c38__)
|
||||
#define SIMDE_ARCH_CONVEX 38
|
||||
#elif defined(__convex_c34__)
|
||||
#define SIMDE_ARCH_CONVEX 34
|
||||
#elif defined(__convex_c32__)
|
||||
#define SIMDE_ARCH_CONVEX 32
|
||||
#elif defined(__convex_c2__)
|
||||
#define SIMDE_ARCH_CONVEX 2
|
||||
#elif defined(__convex__)
|
||||
#define SIMDE_ARCH_CONVEX 1
|
||||
#endif
|
||||
|
||||
/* Adapteva Epiphany
|
||||
<https://en.wikipedia.org/wiki/Adapteva_Epiphany> */
|
||||
#if defined(__epiphany__)
|
||||
#define SIMDE_ARCH_EPIPHANY 1
|
||||
#endif
|
||||
|
||||
/* Fujitsu FR-V
|
||||
<https://en.wikipedia.org/wiki/FR-V_(microprocessor)> */
|
||||
#if defined(__frv__)
|
||||
#define SIMDE_ARCH_FRV 1
|
||||
#endif
|
||||
|
||||
/* H8/300
|
||||
<https://en.wikipedia.org/wiki/H8_Family> */
|
||||
#if defined(__H8300__)
|
||||
#define SIMDE_ARCH_H8300
|
||||
#endif
|
||||
|
||||
/* HP/PA / PA-RISC
|
||||
<https://en.wikipedia.org/wiki/PA-RISC> */
|
||||
#if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || \
|
||||
defined(_PA_RISC2_0)
|
||||
#define SIMDE_ARCH_HPPA 20
|
||||
#elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1)
|
||||
#define SIMDE_ARCH_HPPA 11
|
||||
#elif defined(_PA_RISC1_0)
|
||||
#define SIMDE_ARCH_HPPA 10
|
||||
#elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa)
|
||||
#define SIMDE_ARCH_HPPA 1
|
||||
#endif
|
||||
|
||||
/* x86
|
||||
<https://en.wikipedia.org/wiki/X86> */
|
||||
#if defined(_M_IX86)
|
||||
#define SIMDE_ARCH_X86 (_M_IX86 / 100)
|
||||
#elif defined(__I86__)
|
||||
#define SIMDE_ARCH_X86 __I86__
|
||||
#elif defined(i686) || defined(__i686) || defined(__i686__)
|
||||
#define SIMDE_ARCH_X86 6
|
||||
#elif defined(i586) || defined(__i586) || defined(__i586__)
|
||||
#define SIMDE_ARCH_X86 5
|
||||
#elif defined(i486) || defined(__i486) || defined(__i486__)
|
||||
#define SIMDE_ARCH_X86 4
|
||||
#elif defined(i386) || defined(__i386) || defined(__i386__)
|
||||
#define SIMDE_ARCH_X86 3
|
||||
#elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__)
|
||||
#define SIMDE_ARCH_X86 3
|
||||
#endif
|
||||
|
||||
/* Itanium
|
||||
<https://en.wikipedia.org/wiki/Itanium> */
|
||||
#if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || \
|
||||
defined(__ia64) || defined(_M_IA64) || defined(__itanium__)
|
||||
#define SIMDE_ARCH_IA64 1
|
||||
#endif
|
||||
|
||||
/* Renesas M32R
|
||||
<https://en.wikipedia.org/wiki/M32R> */
|
||||
#if defined(__m32r__) || defined(__M32R__)
|
||||
#define SIMDE_ARCH_M32R
|
||||
#endif
|
||||
|
||||
/* Motorola 68000
|
||||
<https://en.wikipedia.org/wiki/Motorola_68000> */
|
||||
#if defined(__mc68060__) || defined(__MC68060__)
|
||||
#define SIMDE_ARCH_M68K 68060
|
||||
#elif defined(__mc68040__) || defined(__MC68040__)
|
||||
#define SIMDE_ARCH_M68K 68040
|
||||
#elif defined(__mc68030__) || defined(__MC68030__)
|
||||
#define SIMDE_ARCH_M68K 68030
|
||||
#elif defined(__mc68020__) || defined(__MC68020__)
|
||||
#define SIMDE_ARCH_M68K 68020
|
||||
#elif defined(__mc68010__) || defined(__MC68010__)
|
||||
#define SIMDE_ARCH_M68K 68010
|
||||
#elif defined(__mc68000__) || defined(__MC68000__)
|
||||
#define SIMDE_ARCH_M68K 68000
|
||||
#endif
|
||||
|
||||
/* Xilinx MicroBlaze
|
||||
<https://en.wikipedia.org/wiki/MicroBlaze> */
|
||||
#if defined(__MICROBLAZE__) || defined(__microblaze__)
|
||||
#define SIMDE_ARCH_MICROBLAZE
|
||||
#endif
|
||||
|
||||
/* MIPS
|
||||
<https://en.wikipedia.org/wiki/MIPS_architecture> */
|
||||
#if defined(_MIPS_ISA_MIPS64R2)
|
||||
#define SIMDE_ARCH_MIPS 642
|
||||
#elif defined(_MIPS_ISA_MIPS64)
|
||||
#define SIMDE_ARCH_MIPS 640
|
||||
#elif defined(_MIPS_ISA_MIPS32R2)
|
||||
#define SIMDE_ARCH_MIPS 322
|
||||
#elif defined(_MIPS_ISA_MIPS32)
|
||||
#define SIMDE_ARCH_MIPS 320
|
||||
#elif defined(_MIPS_ISA_MIPS4)
|
||||
#define SIMDE_ARCH_MIPS 4
|
||||
#elif defined(_MIPS_ISA_MIPS3)
|
||||
#define SIMDE_ARCH_MIPS 3
|
||||
#elif defined(_MIPS_ISA_MIPS2)
|
||||
#define SIMDE_ARCH_MIPS 2
|
||||
#elif defined(_MIPS_ISA_MIPS1)
|
||||
#define SIMDE_ARCH_MIPS 1
|
||||
#elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__)
|
||||
#define SIMDE_ARCH_MIPS 1
|
||||
#endif
|
||||
|
||||
/* Matsushita MN10300
|
||||
<https://en.wikipedia.org/wiki/MN103> */
|
||||
#if defined(__MN10300__) || defined(__mn10300__)
|
||||
#define SIMDE_ARCH_MN10300 1
|
||||
#endif
|
||||
|
||||
/* POWER
|
||||
<https://en.wikipedia.org/wiki/IBM_POWER_Instruction_Set_Architecture> */
|
||||
#if defined(_M_PPC)
|
||||
#define SIMDE_ARCH_POWER _M_PPC
|
||||
#elif defined(_ARCH_PWR8)
|
||||
#define SIMDE_ARCH_POWER 800
|
||||
#elif defined(_ARCH_PWR7)
|
||||
#define SIMDE_ARCH_POWER 700
|
||||
#elif defined(_ARCH_PWR6)
|
||||
#define SIMDE_ARCH_POWER 600
|
||||
#elif defined(_ARCH_PWR5)
|
||||
#define SIMDE_ARCH_POWER 500
|
||||
#elif defined(_ARCH_PWR4)
|
||||
#define SIMDE_ARCH_POWER 400
|
||||
#elif defined(_ARCH_440) || defined(__ppc440__)
|
||||
#define SIMDE_ARCH_POWER 440
|
||||
#elif defined(_ARCH_450) || defined(__ppc450__)
|
||||
#define SIMDE_ARCH_POWER 450
|
||||
#elif defined(_ARCH_601) || defined(__ppc601__)
|
||||
#define SIMDE_ARCH_POWER 601
|
||||
#elif defined(_ARCH_603) || defined(__ppc603__)
|
||||
#define SIMDE_ARCH_POWER 603
|
||||
#elif defined(_ARCH_604) || defined(__ppc604__)
|
||||
#define SIMDE_ARCH_POWER 604
|
||||
#elif defined(_ARCH_605) || defined(__ppc605__)
|
||||
#define SIMDE_ARCH_POWER 605
|
||||
#elif defined(_ARCH_620) || defined(__ppc620__)
|
||||
#define SIMDE_ARCH_POWER 620
|
||||
#elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || \
|
||||
defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || \
|
||||
defined(__ppc)
|
||||
#define SIMDE_ARCH_POWER 1
|
||||
#endif
|
||||
|
||||
/* SPARC
|
||||
<https://en.wikipedia.org/wiki/SPARC> */
|
||||
#if defined(__sparc_v9__) || defined(__sparcv9)
|
||||
#define SIMDE_ARCH_SPARC 9
|
||||
#elif defined(__sparc_v8__) || defined(__sparcv8)
|
||||
#define SIMDE_ARCH_SPARC 8
|
||||
#elif defined(__sparc_v7__) || defined(__sparcv7)
|
||||
#define SIMDE_ARCH_SPARC 7
|
||||
#elif defined(__sparc_v6__) || defined(__sparcv6)
|
||||
#define SIMDE_ARCH_SPARC 6
|
||||
#elif defined(__sparc_v5__) || defined(__sparcv5)
|
||||
#define SIMDE_ARCH_SPARC 5
|
||||
#elif defined(__sparc_v4__) || defined(__sparcv4)
|
||||
#define SIMDE_ARCH_SPARC 4
|
||||
#elif defined(__sparc_v3__) || defined(__sparcv3)
|
||||
#define SIMDE_ARCH_SPARC 3
|
||||
#elif defined(__sparc_v2__) || defined(__sparcv2)
|
||||
#define SIMDE_ARCH_SPARC 2
|
||||
#elif defined(__sparc_v1__) || defined(__sparcv1)
|
||||
#define SIMDE_ARCH_SPARC 1
|
||||
#elif defined(__sparc__) || defined(__sparc)
|
||||
#define SIMDE_ARCH_SPARC 1
|
||||
#endif
|
||||
|
||||
/* SuperH
|
||||
<https://en.wikipedia.org/wiki/SuperH> */
|
||||
#if defined(__sh5__) || defined(__SH5__)
|
||||
#define SIMDE_ARCH_SUPERH 5
|
||||
#elif defined(__sh4__) || defined(__SH4__)
|
||||
#define SIMDE_ARCH_SUPERH 4
|
||||
#elif defined(__sh3__) || defined(__SH3__)
|
||||
#define SIMDE_ARCH_SUPERH 3
|
||||
#elif defined(__sh2__) || defined(__SH2__)
|
||||
#define SIMDE_ARCH_SUPERH 2
|
||||
#elif defined(__sh1__) || defined(__SH1__)
|
||||
#define SIMDE_ARCH_SUPERH 1
|
||||
#elif defined(__sh__) || defined(__SH__)
|
||||
#define SIMDE_ARCH_SUPERH 1
|
||||
#endif
|
||||
|
||||
/* IBM System z
|
||||
<https://en.wikipedia.org/wiki/IBM_System_z> */
|
||||
#if defined(__370__) || defined(__THW_370__) || defined(__s390__) || \
|
||||
defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__)
|
||||
#define SIMDE_ARCH_SYSTEMZ
|
||||
#endif
|
||||
|
||||
/* TMS320 DSP
|
||||
<https://en.wikipedia.org/wiki/Texas_Instruments_TMS320> */
|
||||
#if defined(_TMS320C6740) || defined(__TMS320C6740__)
|
||||
#define SIMDE_ARCH_TMS320 6740
|
||||
#elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__)
|
||||
#define SIMDE_ARCH_TMS320 6701
|
||||
#elif defined(_TMS320C6700) || defined(__TMS320C6700__)
|
||||
#define SIMDE_ARCH_TMS320 6700
|
||||
#elif defined(_TMS320C6600) || defined(__TMS320C6600__)
|
||||
#define SIMDE_ARCH_TMS320 6600
|
||||
#elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__)
|
||||
#define SIMDE_ARCH_TMS320 6401
|
||||
#elif defined(_TMS320C6400) || defined(__TMS320C6400__)
|
||||
#define SIMDE_ARCH_TMS320 6400
|
||||
#elif defined(_TMS320C6200) || defined(__TMS320C6200__)
|
||||
#define SIMDE_ARCH_TMS320 6200
|
||||
#elif defined(_TMS320C55X) || defined(__TMS320C55X__)
|
||||
#define SIMDE_ARCH_TMS320 550
|
||||
#elif defined(_TMS320C54X) || defined(__TMS320C54X__)
|
||||
#define SIMDE_ARCH_TMS320 540
|
||||
#elif defined(_TMS320C28X) || defined(__TMS320C28X__)
|
||||
#define SIMDE_ARCH_TMS320 280
|
||||
#endif
|
||||
|
||||
/* Xtensa
|
||||
<https://en.wikipedia.org/wiki/> */
|
||||
#if defined(__xtensa__) || defined(__XTENSA__)
|
||||
#define SIMDE_ARCH_XTENSA 1
|
||||
#endif
|
||||
|
||||
#endif /* !defined(SIMDE_ARCH_H) */
|
278
libobs/util/aarch/simde-common.h
Normal file
278
libobs/util/aarch/simde-common.h
Normal file
|
@ -0,0 +1,278 @@
|
|||
/* Copyright (c) 2017-2019 Evan Nemerson <evan@nemerson.com>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use, copy,
|
||||
* modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
* of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||||
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#if !defined(SIMDE_COMMON_H)
|
||||
#define SIMDE_COMMON_H
|
||||
|
||||
#include "hedley.h"
|
||||
#include "check.h"
|
||||
#include "simde-arch.h"
|
||||
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)
|
||||
#define SIMDE_ALIGN(alignment) _Alignas(alignment)
|
||||
#elif (defined(__cplusplus) && (__cplusplus >= 201103L))
|
||||
#define SIMDE_ALIGN(alignment) alignas(alignment)
|
||||
#elif HEDLEY_GCC_VERSION_CHECK(2, 95, 0) || \
|
||||
HEDLEY_CRAY_VERSION_CHECK(8, 4, 0) || \
|
||||
HEDLEY_IBM_VERSION_CHECK(11, 1, 0) || \
|
||||
HEDLEY_INTEL_VERSION_CHECK(13, 0, 0) || \
|
||||
HEDLEY_PGI_VERSION_CHECK(19, 4, 0) || \
|
||||
HEDLEY_ARM_VERSION_CHECK(4, 1, 0) || \
|
||||
HEDLEY_TINYC_VERSION_CHECK(0, 9, 24) || \
|
||||
HEDLEY_TI_VERSION_CHECK(8, 1, 0)
|
||||
#define SIMDE_ALIGN(alignment) __attribute__((aligned(alignment)))
|
||||
#elif defined(_MSC_VER) && (!defined(_M_IX86) || defined(_M_AMD64))
|
||||
#define SIMDE_ALIGN(alignment) __declspec(align(alignment))
|
||||
#else
|
||||
#define SIMDE_ALIGN(alignment)
|
||||
#endif
|
||||
|
||||
#define simde_assert_aligned(alignment, val) \
|
||||
simde_assert_int(((uintptr_t)(val)) % (alignment), ==, 0)
|
||||
|
||||
#if HEDLEY_GCC_HAS_ATTRIBUTE(vector_size, 4, 6, 0)
|
||||
#define SIMDE__ENABLE_GCC_VEC_EXT
|
||||
#endif
|
||||
|
||||
#if !defined(SIMDE_ENABLE_OPENMP) && \
|
||||
((defined(_OPENMP) && (_OPENMP >= 201307L)) || \
|
||||
(defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L)))
|
||||
#define SIMDE_ENABLE_OPENMP
|
||||
#endif
|
||||
|
||||
#if !defined(SIMDE_ENABLE_CILKPLUS) && defined(__cilk)
|
||||
#define SIMDE_ENABLE_CILKPLUS
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE_ENABLE_OPENMP)
|
||||
#define SIMDE__VECTORIZE _Pragma("omp simd")
|
||||
#define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l))
|
||||
#define SIMDE__VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r))
|
||||
#define SIMDE__VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a))
|
||||
#elif defined(SIMDE_ENABLE_CILKPLUS)
|
||||
#define SIMDE__VECTORIZE _Pragma("simd")
|
||||
#define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l))
|
||||
#define SIMDE__VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r))
|
||||
#define SIMDE__VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a))
|
||||
#elif defined(__INTEL_COMPILER)
|
||||
#define SIMDE__VECTORIZE _Pragma("simd")
|
||||
#define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l))
|
||||
#define SIMDE__VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r))
|
||||
#define SIMDE__VECTORIZE_ALIGNED(a)
|
||||
#elif defined(__clang__)
|
||||
#define SIMDE__VECTORIZE _Pragma("clang loop vectorize(enable)")
|
||||
#define SIMDE__VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l))
|
||||
#define SIMDE__VECTORIZE_REDUCTION(r) SIMDE__VECTORIZE
|
||||
#define SIMDE__VECTORIZE_ALIGNED(a)
|
||||
#elif HEDLEY_GCC_VERSION_CHECK(4, 9, 0)
|
||||
#define SIMDE__VECTORIZE _Pragma("GCC ivdep")
|
||||
#define SIMDE__VECTORIZE_SAFELEN(l) SIMDE__VECTORIZE
|
||||
#define SIMDE__VECTORIZE_REDUCTION(r) SIMDE__VECTORIZE
|
||||
#define SIMDE__VECTORIZE_ALIGNED(a)
|
||||
#elif HEDLEY_CRAY_VERSION_CHECK(5, 0, 0)
|
||||
#define SIMDE__VECTORIZE _Pragma("_CRI ivdep")
|
||||
#define SIMDE__VECTORIZE_SAFELEN(l) SIMDE__VECTORIZE
|
||||
#define SIMDE__VECTORIZE_REDUCTION(r) SIMDE__VECTORIZE
|
||||
#define SIMDE__VECTORIZE_ALIGNED(a)
|
||||
#else
|
||||
#define SIMDE__VECTORIZE
|
||||
#define SIMDE__VECTORIZE_SAFELEN(l)
|
||||
#define SIMDE__VECTORIZE_REDUCTION(r)
|
||||
#define SIMDE__VECTORIZE_ALIGNED(a)
|
||||
#endif
|
||||
|
||||
#if HEDLEY_GCC_HAS_ATTRIBUTE(unused, 3, 1, 0)
|
||||
#define SIMDE__UNUSED __attribute__((__unused__))
|
||||
#else
|
||||
#define SIMDE__UNUSED
|
||||
#endif
|
||||
|
||||
#if HEDLEY_GCC_HAS_ATTRIBUTE(artificial, 4, 3, 0)
|
||||
#define SIMDE__ARTIFICIAL __attribute__((__artificial__))
|
||||
#else
|
||||
#define SIMDE__ARTIFICIAL
|
||||
#endif
|
||||
|
||||
/* Intended for checking coverage, you should never use this in
|
||||
production. */
|
||||
#if defined(SIMDE_NO_INLINE)
|
||||
#define SIMDE__FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE SIMDE__UNUSED static
|
||||
#else
|
||||
#define SIMDE__FUNCTION_ATTRIBUTES HEDLEY_INLINE SIMDE__ARTIFICIAL static
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#define SIMDE__BEGIN_DECLS \
|
||||
HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable : 4996 4204)) \
|
||||
HEDLEY_BEGIN_C_DECLS
|
||||
#define SIMDE__END_DECLS HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS
|
||||
#else
|
||||
#define SIMDE__BEGIN_DECLS HEDLEY_BEGIN_C_DECLS
|
||||
#define SIMDE__END_DECLS HEDLEY_END_C_DECLS
|
||||
#endif
|
||||
|
||||
#if defined(__SIZEOF_INT128__)
|
||||
#define SIMDE__HAVE_INT128
|
||||
typedef __int128 simde_int128;
|
||||
typedef unsigned __int128 simde_uint128;
|
||||
#endif
|
||||
|
||||
/* TODO: we should at least make an attempt to detect the correct
|
||||
types for simde_float32/float64 instead of just assuming float and
|
||||
double. */
|
||||
|
||||
#if !defined(SIMDE_FLOAT32_TYPE)
|
||||
#define SIMDE_FLOAT32_TYPE float
|
||||
#define SIMDE_FLOAT32_C(value) value##f
|
||||
#else
|
||||
#define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE)value)
|
||||
#endif
|
||||
typedef SIMDE_FLOAT32_TYPE simde_float32;
|
||||
HEDLEY_STATIC_ASSERT(sizeof(simde_float32) == 4,
|
||||
"Unable to find 32-bit floating-point type.");
|
||||
|
||||
#if !defined(SIMDE_FLOAT64_TYPE)
|
||||
#define SIMDE_FLOAT64_TYPE double
|
||||
#define SIMDE_FLOAT64_C(value) value
|
||||
#else
|
||||
#define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT64_TYPE)value)
|
||||
#endif
|
||||
typedef SIMDE_FLOAT64_TYPE simde_float64;
|
||||
HEDLEY_STATIC_ASSERT(sizeof(simde_float64) == 8,
|
||||
"Unable to find 64-bit floating-point type.");
|
||||
|
||||
/* Whether to assume that the compiler can auto-vectorize reasonably
|
||||
well. This will cause SIMDe to attempt to compose vector
|
||||
operations using more simple vector operations instead of minimize
|
||||
serial work.
|
||||
|
||||
As an example, consider the _mm_add_ss(a, b) function from SSE,
|
||||
which returns { a0 + b0, a1, a2, a3 }. This pattern is repeated
|
||||
for other operations (sub, mul, etc.).
|
||||
|
||||
The naïve implementation would result in loading a0 and b0, adding
|
||||
them into a temporary variable, then splicing that value into a new
|
||||
vector with the remaining elements from a.
|
||||
|
||||
On platforms which support vectorization, it's generally faster to
|
||||
simply perform the operation on the entire vector to avoid having
|
||||
to move data between SIMD registers and non-SIMD registers.
|
||||
Basically, instead of the temporary variable being (a0 + b0) it
|
||||
would be a vector of (a + b), which is then combined with a to form
|
||||
the result.
|
||||
|
||||
By default, SIMDe will prefer the pure-vector versions if we detect
|
||||
a vector ISA extension, but this can be overridden by defining
|
||||
SIMDE_NO_ASSUME_VECTORIZATION. You can also define
|
||||
SIMDE_ASSUME_VECTORIZATION if you want to force SIMDe to use the
|
||||
vectorized version. */
|
||||
#if !defined(SIMDE_NO_ASSUME_VECTORIZATION) && \
|
||||
!defined(SIMDE_ASSUME_VECTORIZATION)
|
||||
#if defined(__SSE__) || defined(__ARM_NEON) || defined(__mips_msa) || \
|
||||
defined(__ALTIVEC__)
|
||||
#define SIMDE_ASSUME_VECTORIZATION
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* GCC and clang have built-in functions to handle shuffling of
|
||||
vectors, but the implementations are slightly different. This
|
||||
macro is just an abstraction over them. Note that elem_size is in
|
||||
bits but vec_size is in bytes. */
|
||||
#if HEDLEY_CLANG_HAS_BUILTIN(__builtin_shufflevector)
|
||||
#define SIMDE__SHUFFLE_VECTOR(elem_size, vec_size, a, b, ...) \
|
||||
__builtin_shufflevector(a, b, __VA_ARGS__)
|
||||
#elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle, 4, 7, 0) && \
|
||||
!defined(__INTEL_COMPILER)
|
||||
#define SIMDE__SHUFFLE_VECTOR(elem_size, vec_size, a, b, ...) \
|
||||
__builtin_shuffle(a, b, \
|
||||
(int##elem_size##_t __attribute__( \
|
||||
(__vector_size__(vec_size)))){__VA_ARGS__})
|
||||
#endif
|
||||
|
||||
/* Some algorithms are iterative, and fewer iterations means less
|
||||
accuracy. Lower values here will result in faster, but less
|
||||
accurate, calculations for some functions. */
|
||||
#if !defined(SIMDE_ACCURACY_ITERS)
|
||||
#define SIMDE_ACCURACY_ITERS 2
|
||||
#endif
|
||||
|
||||
/* This will probably move into Hedley at some point, but I'd like to
|
||||
more thoroughly check for other compilers which define __GNUC__
|
||||
first. */
|
||||
#if defined(SIMDE__REALLY_GCC)
|
||||
#undef SIMDE__REALLY_GCC
|
||||
#endif
|
||||
#if !defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER)
|
||||
#define SIMDE__REALLY_GCC 0
|
||||
#else
|
||||
#define SIMDE__REALLY_GCC 1
|
||||
#endif
|
||||
|
||||
#if defined(SIMDE__ASSUME_ALIGNED)
|
||||
#undef SIMDE__ASSUME_ALIGNED
|
||||
#endif
|
||||
#if HEDLEY_INTEL_VERSION_CHECK(9, 0, 0)
|
||||
#define SIMDE__ASSUME_ALIGNED(ptr, align) __assume_aligned(ptr, align)
|
||||
#elif HEDLEY_MSVC_VERSION_CHECK(13, 10, 0)
|
||||
#define SIMDE__ASSUME_ALIGNED(ptr, align) \
|
||||
__assume((((char *)ptr) - ((char *)0)) % (align) == 0)
|
||||
#elif HEDLEY_GCC_HAS_BUILTIN(__builtin_assume_aligned, 4, 7, 0)
|
||||
#define SIMDE__ASSUME_ALIGNED(ptr, align) \
|
||||
(ptr = (__typeof__(ptr))__builtin_assume_aligned((ptr), align))
|
||||
#elif HEDLEY_CLANG_HAS_BUILTIN(__builtin_assume)
|
||||
#define SIMDE__ASSUME_ALIGNED(ptr, align) \
|
||||
__builtin_assume((((char *)ptr) - ((char *)0)) % (align) == 0)
|
||||
#elif HEDLEY_GCC_HAS_BUILTIN(__builtin_unreachable, 4, 5, 0)
|
||||
#define SIMDE__ASSUME_ALIGNED(ptr, align) \
|
||||
((((char *)ptr) - ((char *)0)) % (align) == 0) \
|
||||
? (1) \
|
||||
: (__builtin_unreachable(), 0)
|
||||
#else
|
||||
#define SIMDE__ASSUME_ALIGNED(ptr, align)
|
||||
#endif
|
||||
|
||||
/* Sometimes we run into problems with specific versions of compilers
|
||||
which make the native versions unusable for us. Often this is due
|
||||
to missing functions, sometimes buggy implementations, etc. These
|
||||
macros are how we check for specific bugs. As they are fixed we'll
|
||||
start only defining them for problematic compiler versions. */
|
||||
|
||||
#if !defined(SIMDE_IGNORE_COMPILER_BUGS)
|
||||
#if SIMDE__REALLY_GCC
|
||||
#if !HEDLEY_GCC_VERSION_CHECK(4, 9, 0)
|
||||
#define SIMDE_BUG_GCC_REV_208793
|
||||
#endif
|
||||
#if !HEDLEY_GCC_VERSION_CHECK(5, 0, 0)
|
||||
#define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */
|
||||
#endif
|
||||
#if !HEDLEY_GCC_VERSION_CHECK(4, 6, 0)
|
||||
#define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */
|
||||
#endif
|
||||
#endif
|
||||
#if defined(__EMSCRIPTEN__)
|
||||
#define SIMDE_BUG_EMSCRIPTEN_MISSING_IMPL /* Placeholder for (as yet) unfiled issues. */
|
||||
#define SIMDE_BUG_EMSCRIPTEN_5242
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif /* !defined(SIMDE_COMMON_H) */
|
2591
libobs/util/aarch/sse.h
Normal file
2591
libobs/util/aarch/sse.h
Normal file
File diff suppressed because it is too large
Load diff
4197
libobs/util/aarch/sse2.h
Normal file
4197
libobs/util/aarch/sse2.h
Normal file
File diff suppressed because it is too large
Load diff
66
libobs/util/sse-intrin.h
Normal file
66
libobs/util/sse-intrin.h
Normal file
|
@ -0,0 +1,66 @@
|
|||
/******************************************************************************
|
||||
Copyright (C) 2019 by Peter Geis <pgwipeout@gmail.com>
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
******************************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef __aarch64__
|
||||
|
||||
#include "aarch/sse2.h"
|
||||
|
||||
#define __m128 simde__m128
|
||||
#define _mm_setzero_ps simde_mm_setzero_ps
|
||||
#define _mm_set_ps simde_mm_set_ps
|
||||
#define _mm_add_ps simde_mm_add_ps
|
||||
#define _mm_sub_ps simde_mm_sub_ps
|
||||
#define _mm_mul_ps simde_mm_mul_ps
|
||||
#define _mm_div_ps simde_mm_div_ps
|
||||
#define _mm_set1_ps simde_mm_set1_ps
|
||||
#define _mm_movehl_ps simde_mm_movehl_ps
|
||||
#define _mm_shuffle_ps simde_mm_shuffle_ps
|
||||
#define _mm_min_ps simde_mm_min_ps
|
||||
#define _mm_max_ps simde_mm_max_ps
|
||||
#define _mm_movelh_ps simde_mm_movelh_ps
|
||||
#define _mm_unpacklo_ps simde_mm_unpacklo_ps
|
||||
#define _mm_unpackhi_ps simde_mm_unpackhi_ps
|
||||
#define _mm_load_ps simde_mm_load_ps
|
||||
#define _mm_andnot_ps simde_mm_andnot_ps
|
||||
#define _mm_storeu_ps simde_mm_storeu_ps
|
||||
#define _mm_loadu_ps simde_mm_loadu_ps
|
||||
|
||||
#define __m128i simde__m128i
|
||||
#define _mm_set1_epi32 simde_mm_set1_epi32
|
||||
#define _mm_set1_epi16 simde_mm_set1_epi16
|
||||
#define _mm_load_si128 simde_mm_load_si128
|
||||
#define _mm_packs_epi32 simde_mm_packs_epi32
|
||||
#define _mm_srli_si128 simde_mm_srli_si128
|
||||
#define _mm_and_si128 simde_mm_and_si128
|
||||
#define _mm_packus_epi16 simde_mm_packus_epi16
|
||||
#define _mm_add_epi64 simde_mm_add_epi64
|
||||
#define _mm_shuffle_epi32 simde_mm_shuffle_epi32
|
||||
#define _mm_srai_epi16 simde_mm_srai_epi16
|
||||
#define _mm_shufflelo_epi16 simde_mm_shufflelo_epi16
|
||||
#define _mm_storeu_si128 simde_mm_storeu_si128
|
||||
|
||||
#define _MM_SHUFFLE SIMDE_MM_SHUFFLE
|
||||
#define _MM_TRANSPOSE4_PS SIMDE_MM_TRANSPOSE4_PS
|
||||
|
||||
#else
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
|
||||
#endif
|
|
@ -1,6 +1,6 @@
|
|||
#include "audio-repack.h"
|
||||
|
||||
#include <emmintrin.h>
|
||||
#include <util/sse-intrin.h>
|
||||
|
||||
int check_buffer(struct audio_repack *repack, uint32_t frame_count)
|
||||
{
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
#include <util/threading.h>
|
||||
#include <windows.h>
|
||||
#include <dxgi.h>
|
||||
#include <emmintrin.h>
|
||||
#include <util/sse-intrin.h>
|
||||
#include <ipc-util/pipe.h>
|
||||
#include "obfuscate.h"
|
||||
#include "inject-library.h"
|
||||
|
|
Loading…
Reference in a new issue