You've already forked godot
mirror of
https://github.com/godotengine/godot.git
synced 2025-11-24 15:26:15 +00:00
libwebp: Sync with upstream 1.2.4
Changes: https://chromium.googlesource.com/webm/libwebp/+/1.2.4/NEWS
This commit is contained in:
@@ -83,7 +83,7 @@ static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first,
|
||||
static int DispatchAlpha_NEON(const uint8_t* WEBP_RESTRICT alpha,
|
||||
int alpha_stride, int width, int height,
|
||||
uint8_t* WEBP_RESTRICT dst, int dst_stride) {
|
||||
uint32_t alpha_mask = 0xffffffffu;
|
||||
uint32_t alpha_mask = 0xffu;
|
||||
uint8x8_t mask8 = vdup_n_u8(0xff);
|
||||
uint32_t tmp[2];
|
||||
int i, j;
|
||||
@@ -107,6 +107,7 @@ static int DispatchAlpha_NEON(const uint8_t* WEBP_RESTRICT alpha,
|
||||
dst += dst_stride;
|
||||
}
|
||||
vst1_u8((uint8_t*)tmp, mask8);
|
||||
alpha_mask *= 0x01010101;
|
||||
alpha_mask &= tmp[0];
|
||||
alpha_mask &= tmp[1];
|
||||
return (alpha_mask != 0xffffffffu);
|
||||
@@ -135,7 +136,7 @@ static void DispatchAlphaToGreen_NEON(const uint8_t* WEBP_RESTRICT alpha,
|
||||
static int ExtractAlpha_NEON(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
|
||||
int width, int height,
|
||||
uint8_t* WEBP_RESTRICT alpha, int alpha_stride) {
|
||||
uint32_t alpha_mask = 0xffffffffu;
|
||||
uint32_t alpha_mask = 0xffu;
|
||||
uint8x8_t mask8 = vdup_n_u8(0xff);
|
||||
uint32_t tmp[2];
|
||||
int i, j;
|
||||
@@ -157,6 +158,7 @@ static int ExtractAlpha_NEON(const uint8_t* WEBP_RESTRICT argb, int argb_stride,
|
||||
alpha += alpha_stride;
|
||||
}
|
||||
vst1_u8((uint8_t*)tmp, mask8);
|
||||
alpha_mask *= 0x01010101;
|
||||
alpha_mask &= tmp[0];
|
||||
alpha_mask &= tmp[1];
|
||||
return (alpha_mask == 0xffffffffu);
|
||||
|
||||
2
thirdparty/libwebp/src/dsp/cpu.c
vendored
2
thirdparty/libwebp/src/dsp/cpu.c
vendored
@@ -11,7 +11,7 @@
|
||||
//
|
||||
// Author: Christian Duvivier (cduvivier@google.com)
|
||||
|
||||
#include "src/dsp/dsp.h"
|
||||
#include "src/dsp/cpu.h"
|
||||
|
||||
#if defined(WEBP_HAVE_NEON_RTCD)
|
||||
#include <stdio.h>
|
||||
|
||||
254
thirdparty/libwebp/src/dsp/cpu.h
vendored
Normal file
254
thirdparty/libwebp/src/dsp/cpu.h
vendored
Normal file
@@ -0,0 +1,254 @@
|
||||
// Copyright 2022 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Use of this source code is governed by a BSD-style license
|
||||
// that can be found in the COPYING file in the root of the source
|
||||
// tree. An additional intellectual property rights grant can be found
|
||||
// in the file PATENTS. All contributing project authors may
|
||||
// be found in the AUTHORS file in the root of the source tree.
|
||||
// -----------------------------------------------------------------------------
|
||||
//
|
||||
// CPU detection functions and macros.
|
||||
//
|
||||
// Author: Skal (pascal.massimino@gmail.com)
|
||||
|
||||
#ifndef WEBP_DSP_CPU_H_
|
||||
#define WEBP_DSP_CPU_H_
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "src/webp/config.h"
|
||||
#endif
|
||||
|
||||
#include "src/webp/types.h"
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
|
||||
#define LOCAL_GCC_PREREQ(maj, min) (LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
|
||||
#else
|
||||
#define LOCAL_GCC_VERSION 0
|
||||
#define LOCAL_GCC_PREREQ(maj, min) 0
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
#define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
|
||||
#define LOCAL_CLANG_PREREQ(maj, min) \
|
||||
(LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
|
||||
#else
|
||||
#define LOCAL_CLANG_VERSION 0
|
||||
#define LOCAL_CLANG_PREREQ(maj, min) 0
|
||||
#endif
|
||||
|
||||
#ifndef __has_builtin
|
||||
#define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
#if !defined(HAVE_CONFIG_H)
|
||||
#if defined(_MSC_VER) && _MSC_VER > 1310 && \
|
||||
(defined(_M_X64) || defined(_M_IX86))
|
||||
#define WEBP_MSC_SSE2 // Visual C++ SSE2 targets
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER >= 1500 && \
|
||||
(defined(_M_X64) || defined(_M_IX86))
|
||||
#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
|
||||
// files without intrinsics, allowing the corresponding Init() to be called.
|
||||
// Files containing intrinsics will need to be built targeting the instruction
|
||||
// set so should succeed on one of the earlier tests.
|
||||
#if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \
|
||||
(!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2))
|
||||
#define WEBP_USE_SSE2
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2)
|
||||
#define WEBP_HAVE_SSE2
|
||||
#endif
|
||||
|
||||
#if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \
|
||||
(!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41))
|
||||
#define WEBP_USE_SSE41
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41)
|
||||
#define WEBP_HAVE_SSE41
|
||||
#endif
|
||||
|
||||
#undef WEBP_MSC_SSE41
|
||||
#undef WEBP_MSC_SSE2
|
||||
|
||||
// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
|
||||
// inline assembly would need to be modified for use with Native Client.
|
||||
#if ((defined(__ARM_NEON__) || defined(__aarch64__)) && \
|
||||
(!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_NEON))) && \
|
||||
!defined(__native_client__)
|
||||
#define WEBP_USE_NEON
|
||||
#endif
|
||||
|
||||
#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \
|
||||
defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H)
|
||||
#define WEBP_ANDROID_NEON // Android targets that may have NEON
|
||||
#define WEBP_USE_NEON
|
||||
#endif
|
||||
|
||||
// Note: ARM64 is supported in Visual Studio 2017, but requires the direct
|
||||
// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in
|
||||
// arm_neon.h. Compile errors were seen with Visual Studio 2019 16.4 with
|
||||
// vtbl4_u8(); a fix was made in 16.6.
|
||||
#if defined(_MSC_VER) && ((_MSC_VER >= 1700 && defined(_M_ARM)) || \
|
||||
(_MSC_VER >= 1926 && defined(_M_ARM64)))
|
||||
#define WEBP_USE_NEON
|
||||
#define WEBP_USE_INTRINSICS
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON)
|
||||
#define WEBP_HAVE_NEON
|
||||
#endif
|
||||
|
||||
#if defined(__mips__) && !defined(__mips64) && defined(__mips_isa_rev) && \
|
||||
(__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
|
||||
#define WEBP_USE_MIPS32
|
||||
#if (__mips_isa_rev >= 2)
|
||||
#define WEBP_USE_MIPS32_R2
|
||||
#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
|
||||
#define WEBP_USE_MIPS_DSP_R2
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5)
|
||||
#define WEBP_USE_MSA
|
||||
#endif
|
||||
|
||||
#ifndef WEBP_DSP_OMIT_C_CODE
|
||||
#define WEBP_DSP_OMIT_C_CODE 1
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_NEON) && WEBP_DSP_OMIT_C_CODE
|
||||
#define WEBP_NEON_OMIT_C_CODE 1
|
||||
#else
|
||||
#define WEBP_NEON_OMIT_C_CODE 0
|
||||
#endif
|
||||
|
||||
#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || \
|
||||
defined(__aarch64__))
|
||||
#define WEBP_NEON_WORK_AROUND_GCC 1
|
||||
#else
|
||||
#define WEBP_NEON_WORK_AROUND_GCC 0
|
||||
#endif
|
||||
|
||||
// This macro prevents thread_sanitizer from reporting known concurrent writes.
|
||||
#define WEBP_TSAN_IGNORE_FUNCTION
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(thread_sanitizer)
|
||||
#undef WEBP_TSAN_IGNORE_FUNCTION
|
||||
#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer)
|
||||
#define WEBP_MSAN
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
|
||||
#include <pthread.h> // NOLINT
|
||||
|
||||
#define WEBP_DSP_INIT(func) \
|
||||
do { \
|
||||
static volatile VP8CPUInfo func##_last_cpuinfo_used = \
|
||||
(VP8CPUInfo)&func##_last_cpuinfo_used; \
|
||||
static pthread_mutex_t func##_lock = PTHREAD_MUTEX_INITIALIZER; \
|
||||
if (pthread_mutex_lock(&func##_lock)) break; \
|
||||
if (func##_last_cpuinfo_used != VP8GetCPUInfo) func(); \
|
||||
func##_last_cpuinfo_used = VP8GetCPUInfo; \
|
||||
(void)pthread_mutex_unlock(&func##_lock); \
|
||||
} while (0)
|
||||
#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
|
||||
#define WEBP_DSP_INIT(func) \
|
||||
do { \
|
||||
static volatile VP8CPUInfo func##_last_cpuinfo_used = \
|
||||
(VP8CPUInfo)&func##_last_cpuinfo_used; \
|
||||
if (func##_last_cpuinfo_used == VP8GetCPUInfo) break; \
|
||||
func(); \
|
||||
func##_last_cpuinfo_used = VP8GetCPUInfo; \
|
||||
} while (0)
|
||||
#endif // defined(WEBP_USE_THREAD) && !defined(_WIN32)
|
||||
|
||||
// Defines an Init + helper function that control multiple initialization of
|
||||
// function pointers / tables.
|
||||
/* Usage:
|
||||
WEBP_DSP_INIT_FUNC(InitFunc) {
|
||||
...function body
|
||||
}
|
||||
*/
|
||||
#define WEBP_DSP_INIT_FUNC(name) \
|
||||
static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void); \
|
||||
WEBP_TSAN_IGNORE_FUNCTION void name(void) { WEBP_DSP_INIT(name##_body); } \
|
||||
static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void)
|
||||
|
||||
#define WEBP_UBSAN_IGNORE_UNDEF
|
||||
#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
|
||||
#if defined(__clang__) && defined(__has_attribute)
|
||||
#if __has_attribute(no_sanitize)
|
||||
// This macro prevents the undefined behavior sanitizer from reporting
|
||||
// failures. This is only meant to silence unaligned loads on platforms that
|
||||
// are known to support them.
|
||||
#undef WEBP_UBSAN_IGNORE_UNDEF
|
||||
#define WEBP_UBSAN_IGNORE_UNDEF __attribute__((no_sanitize("undefined")))
|
||||
|
||||
// This macro prevents the undefined behavior sanitizer from reporting
|
||||
// failures related to unsigned integer overflows. This is only meant to
|
||||
// silence cases where this well defined behavior is expected.
|
||||
#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
|
||||
#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \
|
||||
__attribute__((no_sanitize("unsigned-integer-overflow")))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'.
|
||||
// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning.
|
||||
#if !defined(WEBP_OFFSET_PTR)
|
||||
#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off)))
|
||||
#endif
|
||||
|
||||
// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
|
||||
#if !defined(WEBP_SWAP_16BIT_CSP)
|
||||
#define WEBP_SWAP_16BIT_CSP 0
|
||||
#endif
|
||||
|
||||
// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
|
||||
#if !defined(WORDS_BIGENDIAN) && \
|
||||
(defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
|
||||
(defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
|
||||
#define WORDS_BIGENDIAN
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
kSSE2,
|
||||
kSSE3,
|
||||
kSlowSSSE3, // special feature for slow SSSE3 architectures
|
||||
kSSE4_1,
|
||||
kAVX,
|
||||
kAVX2,
|
||||
kNEON,
|
||||
kMIPS32,
|
||||
kMIPSdspR2,
|
||||
kMSA
|
||||
} CPUFeature;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// returns true if the CPU supports the feature.
|
||||
typedef int (*VP8CPUInfo)(CPUFeature feature);
|
||||
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // WEBP_DSP_CPU_H_
|
||||
229
thirdparty/libwebp/src/dsp/dsp.h
vendored
229
thirdparty/libwebp/src/dsp/dsp.h
vendored
@@ -18,6 +18,7 @@
|
||||
#include "src/webp/config.h"
|
||||
#endif
|
||||
|
||||
#include "src/dsp/cpu.h"
|
||||
#include "src/webp/types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
@@ -43,225 +44,6 @@ extern "C" {
|
||||
#define WEBP_RESTRICT
|
||||
#endif
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// CPU detection
|
||||
|
||||
#if defined(__GNUC__)
|
||||
# define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__)
|
||||
# define LOCAL_GCC_PREREQ(maj, min) \
|
||||
(LOCAL_GCC_VERSION >= (((maj) << 8) | (min)))
|
||||
#else
|
||||
# define LOCAL_GCC_VERSION 0
|
||||
# define LOCAL_GCC_PREREQ(maj, min) 0
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__)
|
||||
# define LOCAL_CLANG_PREREQ(maj, min) \
|
||||
(LOCAL_CLANG_VERSION >= (((maj) << 8) | (min)))
|
||||
#else
|
||||
# define LOCAL_CLANG_VERSION 0
|
||||
# define LOCAL_CLANG_PREREQ(maj, min) 0
|
||||
#endif
|
||||
|
||||
#ifndef __has_builtin
|
||||
# define __has_builtin(x) 0
|
||||
#endif
|
||||
|
||||
#if !defined(HAVE_CONFIG_H)
|
||||
#if defined(_MSC_VER) && _MSC_VER > 1310 && \
|
||||
(defined(_M_X64) || defined(_M_IX86))
|
||||
#define WEBP_MSC_SSE2 // Visual C++ SSE2 targets
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER >= 1500 && \
|
||||
(defined(_M_X64) || defined(_M_IX86))
|
||||
#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp
|
||||
// files without intrinsics, allowing the corresponding Init() to be called.
|
||||
// Files containing intrinsics will need to be built targeting the instruction
|
||||
// set so should succeed on one of the earlier tests.
|
||||
#if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \
|
||||
(!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2))
|
||||
#define WEBP_USE_SSE2
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2)
|
||||
#define WEBP_HAVE_SSE2
|
||||
#endif
|
||||
|
||||
#if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \
|
||||
(!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41))
|
||||
#define WEBP_USE_SSE41
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41)
|
||||
#define WEBP_HAVE_SSE41
|
||||
#endif
|
||||
|
||||
#undef WEBP_MSC_SSE41
|
||||
#undef WEBP_MSC_SSE2
|
||||
|
||||
// The intrinsics currently cause compiler errors with arm-nacl-gcc and the
|
||||
// inline assembly would need to be modified for use with Native Client.
|
||||
#if ((defined(__ARM_NEON__) || defined(__aarch64__)) && \
|
||||
(!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_NEON))) && \
|
||||
!defined(__native_client__)
|
||||
#define WEBP_USE_NEON
|
||||
#endif
|
||||
|
||||
#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \
|
||||
defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H)
|
||||
#define WEBP_ANDROID_NEON // Android targets that may have NEON
|
||||
#define WEBP_USE_NEON
|
||||
#endif
|
||||
|
||||
// Note: ARM64 is supported in Visual Studio 2017, but requires the direct
|
||||
// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in
|
||||
// arm_neon.h.
|
||||
#if defined(_MSC_VER) && \
|
||||
((_MSC_VER >= 1700 && defined(_M_ARM)) || \
|
||||
(_MSC_VER >= 1920 && defined(_M_ARM64)))
|
||||
#define WEBP_USE_NEON
|
||||
#define WEBP_USE_INTRINSICS
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON)
|
||||
#define WEBP_HAVE_NEON
|
||||
#endif
|
||||
|
||||
#if defined(__mips__) && !defined(__mips64) && \
|
||||
defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6)
|
||||
#define WEBP_USE_MIPS32
|
||||
#if (__mips_isa_rev >= 2)
|
||||
#define WEBP_USE_MIPS32_R2
|
||||
#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2)
|
||||
#define WEBP_USE_MIPS_DSP_R2
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5)
|
||||
#define WEBP_USE_MSA
|
||||
#endif
|
||||
|
||||
#ifndef WEBP_DSP_OMIT_C_CODE
|
||||
#define WEBP_DSP_OMIT_C_CODE 1
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_NEON) && WEBP_DSP_OMIT_C_CODE
|
||||
#define WEBP_NEON_OMIT_C_CODE 1
|
||||
#else
|
||||
#define WEBP_NEON_OMIT_C_CODE 0
|
||||
#endif
|
||||
|
||||
#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__))
|
||||
#define WEBP_NEON_WORK_AROUND_GCC 1
|
||||
#else
|
||||
#define WEBP_NEON_WORK_AROUND_GCC 0
|
||||
#endif
|
||||
|
||||
// This macro prevents thread_sanitizer from reporting known concurrent writes.
|
||||
#define WEBP_TSAN_IGNORE_FUNCTION
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(thread_sanitizer)
|
||||
#undef WEBP_TSAN_IGNORE_FUNCTION
|
||||
#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(WEBP_USE_THREAD) && !defined(_WIN32)
|
||||
#include <pthread.h> // NOLINT
|
||||
|
||||
#define WEBP_DSP_INIT(func) do { \
|
||||
static volatile VP8CPUInfo func ## _last_cpuinfo_used = \
|
||||
(VP8CPUInfo)&func ## _last_cpuinfo_used; \
|
||||
static pthread_mutex_t func ## _lock = PTHREAD_MUTEX_INITIALIZER; \
|
||||
if (pthread_mutex_lock(&func ## _lock)) break; \
|
||||
if (func ## _last_cpuinfo_used != VP8GetCPUInfo) func(); \
|
||||
func ## _last_cpuinfo_used = VP8GetCPUInfo; \
|
||||
(void)pthread_mutex_unlock(&func ## _lock); \
|
||||
} while (0)
|
||||
#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32))
|
||||
#define WEBP_DSP_INIT(func) do { \
|
||||
static volatile VP8CPUInfo func ## _last_cpuinfo_used = \
|
||||
(VP8CPUInfo)&func ## _last_cpuinfo_used; \
|
||||
if (func ## _last_cpuinfo_used == VP8GetCPUInfo) break; \
|
||||
func(); \
|
||||
func ## _last_cpuinfo_used = VP8GetCPUInfo; \
|
||||
} while (0)
|
||||
#endif // defined(WEBP_USE_THREAD) && !defined(_WIN32)
|
||||
|
||||
// Defines an Init + helper function that control multiple initialization of
|
||||
// function pointers / tables.
|
||||
/* Usage:
|
||||
WEBP_DSP_INIT_FUNC(InitFunc) {
|
||||
...function body
|
||||
}
|
||||
*/
|
||||
#define WEBP_DSP_INIT_FUNC(name) \
|
||||
static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void); \
|
||||
WEBP_TSAN_IGNORE_FUNCTION void name(void) { \
|
||||
WEBP_DSP_INIT(name ## _body); \
|
||||
} \
|
||||
static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void)
|
||||
|
||||
#define WEBP_UBSAN_IGNORE_UNDEF
|
||||
#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
|
||||
#if defined(__clang__) && defined(__has_attribute)
|
||||
#if __has_attribute(no_sanitize)
|
||||
// This macro prevents the undefined behavior sanitizer from reporting
|
||||
// failures. This is only meant to silence unaligned loads on platforms that
|
||||
// are known to support them.
|
||||
#undef WEBP_UBSAN_IGNORE_UNDEF
|
||||
#define WEBP_UBSAN_IGNORE_UNDEF \
|
||||
__attribute__((no_sanitize("undefined")))
|
||||
|
||||
// This macro prevents the undefined behavior sanitizer from reporting
|
||||
// failures related to unsigned integer overflows. This is only meant to
|
||||
// silence cases where this well defined behavior is expected.
|
||||
#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW
|
||||
#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \
|
||||
__attribute__((no_sanitize("unsigned-integer-overflow")))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'.
|
||||
// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning.
|
||||
#if !defined(WEBP_OFFSET_PTR)
|
||||
#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off)))
|
||||
#endif
|
||||
|
||||
// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility)
|
||||
#if !defined(WEBP_SWAP_16BIT_CSP)
|
||||
#define WEBP_SWAP_16BIT_CSP 0
|
||||
#endif
|
||||
|
||||
// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__)
|
||||
#if !defined(WORDS_BIGENDIAN) && \
|
||||
(defined(__BIG_ENDIAN__) || defined(_M_PPC) || \
|
||||
(defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))
|
||||
#define WORDS_BIGENDIAN
|
||||
#endif
|
||||
|
||||
typedef enum {
|
||||
kSSE2,
|
||||
kSSE3,
|
||||
kSlowSSSE3, // special feature for slow SSSE3 architectures
|
||||
kSSE4_1,
|
||||
kAVX,
|
||||
kAVX2,
|
||||
kNEON,
|
||||
kMIPS32,
|
||||
kMIPSdspR2,
|
||||
kMSA
|
||||
} CPUFeature;
|
||||
// returns true if the CPU supports the feature.
|
||||
typedef int (*VP8CPUInfo)(CPUFeature feature);
|
||||
WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Init stub generator
|
||||
@@ -550,15 +332,6 @@ extern void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
extern void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
|
||||
uint8_t* u, uint8_t* v, int width);
|
||||
|
||||
// utilities for accurate RGB->YUV conversion
|
||||
extern uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* src, const uint16_t* ref,
|
||||
uint16_t* dst, int len);
|
||||
extern void (*WebPSharpYUVUpdateRGB)(const int16_t* src, const int16_t* ref,
|
||||
int16_t* dst, int len);
|
||||
extern void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B,
|
||||
int len,
|
||||
const uint16_t* best_y, uint16_t* out);
|
||||
|
||||
// Must be called before using the above.
|
||||
void WebPInitConvertARGBToYUV(void);
|
||||
|
||||
|
||||
8
thirdparty/libwebp/src/dsp/lossless.h
vendored
8
thirdparty/libwebp/src/dsp/lossless.h
vendored
@@ -182,9 +182,9 @@ extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16];
|
||||
// -----------------------------------------------------------------------------
|
||||
// Huffman-cost related functions.
|
||||
|
||||
typedef double (*VP8LCostFunc)(const uint32_t* population, int length);
|
||||
typedef double (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
|
||||
int length);
|
||||
typedef float (*VP8LCostFunc)(const uint32_t* population, int length);
|
||||
typedef float (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y,
|
||||
int length);
|
||||
typedef float (*VP8LCombinedShannonEntropyFunc)(const int X[256],
|
||||
const int Y[256]);
|
||||
|
||||
@@ -198,7 +198,7 @@ typedef struct { // small struct to hold counters
|
||||
} VP8LStreaks;
|
||||
|
||||
typedef struct { // small struct to hold bit entropy results
|
||||
double entropy; // entropy
|
||||
float entropy; // entropy
|
||||
uint32_t sum; // sum of the population
|
||||
int nonzeros; // number of non-zero elements in the population
|
||||
uint32_t max_val; // maximum value in the population
|
||||
|
||||
12
thirdparty/libwebp/src/dsp/lossless_enc.c
vendored
12
thirdparty/libwebp/src/dsp/lossless_enc.c
vendored
@@ -402,7 +402,7 @@ static float FastLog2Slow_C(uint32_t v) {
|
||||
// Compute the combined Shanon's entropy for distribution {X} and {X+Y}
|
||||
static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) {
|
||||
int i;
|
||||
double retval = 0.;
|
||||
float retval = 0.f;
|
||||
int sumX = 0, sumXY = 0;
|
||||
for (i = 0; i < 256; ++i) {
|
||||
const int x = X[i];
|
||||
@@ -418,7 +418,7 @@ static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) {
|
||||
}
|
||||
}
|
||||
retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
|
||||
return (float)retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) {
|
||||
@@ -636,17 +636,17 @@ void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits,
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
static double ExtraCost_C(const uint32_t* population, int length) {
|
||||
static float ExtraCost_C(const uint32_t* population, int length) {
|
||||
int i;
|
||||
double cost = 0.;
|
||||
float cost = 0.f;
|
||||
for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
|
||||
return cost;
|
||||
}
|
||||
|
||||
static double ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
|
||||
static float ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y,
|
||||
int length) {
|
||||
int i;
|
||||
double cost = 0.;
|
||||
float cost = 0.f;
|
||||
for (i = 2; i < length - 2; ++i) {
|
||||
const int xy = X[i + 2] + Y[i + 2];
|
||||
cost += (i >> 1) * xy;
|
||||
|
||||
22
thirdparty/libwebp/src/dsp/lossless_enc_mips32.c
vendored
22
thirdparty/libwebp/src/dsp/lossless_enc_mips32.c
vendored
@@ -103,8 +103,8 @@ static float FastLog2Slow_MIPS32(uint32_t v) {
|
||||
// cost += i * *(pop + 1);
|
||||
// pop += 2;
|
||||
// }
|
||||
// return (double)cost;
|
||||
static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
|
||||
// return (float)cost;
|
||||
static float ExtraCost_MIPS32(const uint32_t* const population, int length) {
|
||||
int i, temp0, temp1;
|
||||
const uint32_t* pop = &population[4];
|
||||
const uint32_t* const LoopEnd = &population[length];
|
||||
@@ -130,7 +130,7 @@ static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
|
||||
: "memory", "hi", "lo"
|
||||
);
|
||||
|
||||
return (double)((int64_t)temp0 << 32 | temp1);
|
||||
return (float)((int64_t)temp0 << 32 | temp1);
|
||||
}
|
||||
|
||||
// C version of this function:
|
||||
@@ -148,9 +148,9 @@ static double ExtraCost_MIPS32(const uint32_t* const population, int length) {
|
||||
// pX += 2;
|
||||
// pY += 2;
|
||||
// }
|
||||
// return (double)cost;
|
||||
static double ExtraCostCombined_MIPS32(const uint32_t* const X,
|
||||
const uint32_t* const Y, int length) {
|
||||
// return (float)cost;
|
||||
static float ExtraCostCombined_MIPS32(const uint32_t* const X,
|
||||
const uint32_t* const Y, int length) {
|
||||
int i, temp0, temp1, temp2, temp3;
|
||||
const uint32_t* pX = &X[4];
|
||||
const uint32_t* pY = &Y[4];
|
||||
@@ -183,7 +183,7 @@ static double ExtraCostCombined_MIPS32(const uint32_t* const X,
|
||||
: "memory", "hi", "lo"
|
||||
);
|
||||
|
||||
return (double)((int64_t)temp0 << 32 | temp1);
|
||||
return (float)((int64_t)temp0 << 32 | temp1);
|
||||
}
|
||||
|
||||
#define HUFFMAN_COST_PASS \
|
||||
@@ -347,24 +347,24 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[],
|
||||
static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb,
|
||||
uint32_t* pout, int size) {
|
||||
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
const uint32_t end = ((size) / 4) * 4;
|
||||
const int end = ((size) / 4) * 4;
|
||||
const uint32_t* const LoopEnd = pa + end;
|
||||
int i;
|
||||
ASM_START
|
||||
ADD_TO_OUT(0, 4, 8, 12, 1, pa, pb, pout)
|
||||
ASM_END_0
|
||||
for (i = end; i < size; ++i) pout[i] = pa[i] + pb[i];
|
||||
for (i = 0; i < size - end; ++i) pout[i] = pa[i] + pb[i];
|
||||
}
|
||||
|
||||
static void AddVectorEq_MIPS32(const uint32_t* pa, uint32_t* pout, int size) {
|
||||
uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
|
||||
const uint32_t end = ((size) / 4) * 4;
|
||||
const int end = ((size) / 4) * 4;
|
||||
const uint32_t* const LoopEnd = pa + end;
|
||||
int i;
|
||||
ASM_START
|
||||
ADD_TO_OUT(0, 4, 8, 12, 0, pa, pout, pout)
|
||||
ASM_END_1
|
||||
for (i = end; i < size; ++i) pout[i] += pa[i];
|
||||
for (i = 0; i < size - end; ++i) pout[i] += pa[i];
|
||||
}
|
||||
|
||||
#undef ASM_END_1
|
||||
|
||||
@@ -239,7 +239,7 @@ static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) {
|
||||
|
||||
static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
|
||||
int i;
|
||||
double retval = 0.;
|
||||
float retval = 0.f;
|
||||
int sumX = 0, sumXY = 0;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
|
||||
@@ -273,7 +273,7 @@ static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) {
|
||||
}
|
||||
}
|
||||
retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY);
|
||||
return (float)retval;
|
||||
return retval;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
64
thirdparty/libwebp/src/dsp/yuv.c
vendored
64
thirdparty/libwebp/src/dsp/yuv.c
vendored
@@ -194,50 +194,6 @@ void WebPConvertRGBA32ToUV_C(const uint16_t* rgb,
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic
|
||||
static uint16_t clip_y(int v) {
|
||||
return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
|
||||
}
|
||||
|
||||
static uint64_t SharpYUVUpdateY_C(const uint16_t* ref, const uint16_t* src,
|
||||
uint16_t* dst, int len) {
|
||||
uint64_t diff = 0;
|
||||
int i;
|
||||
for (i = 0; i < len; ++i) {
|
||||
const int diff_y = ref[i] - src[i];
|
||||
const int new_y = (int)dst[i] + diff_y;
|
||||
dst[i] = clip_y(new_y);
|
||||
diff += (uint64_t)abs(diff_y);
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
static void SharpYUVUpdateRGB_C(const int16_t* ref, const int16_t* src,
|
||||
int16_t* dst, int len) {
|
||||
int i;
|
||||
for (i = 0; i < len; ++i) {
|
||||
const int diff_uv = ref[i] - src[i];
|
||||
dst[i] += diff_uv;
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYUVFilterRow_C(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out) {
|
||||
int i;
|
||||
for (i = 0; i < len; ++i, ++A, ++B) {
|
||||
const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4;
|
||||
const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4;
|
||||
out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0);
|
||||
out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1);
|
||||
}
|
||||
}
|
||||
#endif // !WEBP_NEON_OMIT_C_CODE
|
||||
|
||||
#undef MAX_Y
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width);
|
||||
void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width);
|
||||
void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb,
|
||||
@@ -247,18 +203,9 @@ void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width);
|
||||
void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v,
|
||||
int src_width, int do_store);
|
||||
|
||||
uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* ref, const uint16_t* src,
|
||||
uint16_t* dst, int len);
|
||||
void (*WebPSharpYUVUpdateRGB)(const int16_t* ref, const int16_t* src,
|
||||
int16_t* dst, int len);
|
||||
void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out);
|
||||
|
||||
extern void WebPInitConvertARGBToYUVSSE2(void);
|
||||
extern void WebPInitConvertARGBToYUVSSE41(void);
|
||||
extern void WebPInitConvertARGBToYUVNEON(void);
|
||||
extern void WebPInitSharpYUVSSE2(void);
|
||||
extern void WebPInitSharpYUVNEON(void);
|
||||
|
||||
WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
|
||||
WebPConvertARGBToY = ConvertARGBToY_C;
|
||||
@@ -269,17 +216,10 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
|
||||
|
||||
WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C;
|
||||
|
||||
#if !WEBP_NEON_OMIT_C_CODE
|
||||
WebPSharpYUVUpdateY = SharpYUVUpdateY_C;
|
||||
WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_C;
|
||||
WebPSharpYUVFilterRow = SharpYUVFilterRow_C;
|
||||
#endif
|
||||
|
||||
if (VP8GetCPUInfo != NULL) {
|
||||
#if defined(WEBP_HAVE_SSE2)
|
||||
if (VP8GetCPUInfo(kSSE2)) {
|
||||
WebPInitConvertARGBToYUVSSE2();
|
||||
WebPInitSharpYUVSSE2();
|
||||
}
|
||||
#endif // WEBP_HAVE_SSE2
|
||||
#if defined(WEBP_HAVE_SSE41)
|
||||
@@ -293,7 +233,6 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
|
||||
if (WEBP_NEON_OMIT_C_CODE ||
|
||||
(VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) {
|
||||
WebPInitConvertARGBToYUVNEON();
|
||||
WebPInitSharpYUVNEON();
|
||||
}
|
||||
#endif // WEBP_HAVE_NEON
|
||||
|
||||
@@ -302,7 +241,4 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) {
|
||||
assert(WebPConvertRGB24ToY != NULL);
|
||||
assert(WebPConvertBGR24ToY != NULL);
|
||||
assert(WebPConvertRGBA32ToUV != NULL);
|
||||
assert(WebPSharpYUVUpdateY != NULL);
|
||||
assert(WebPSharpYUVUpdateRGB != NULL);
|
||||
assert(WebPSharpYUVFilterRow != NULL);
|
||||
}
|
||||
|
||||
108
thirdparty/libwebp/src/dsp/yuv_neon.c
vendored
108
thirdparty/libwebp/src/dsp/yuv_neon.c
vendored
@@ -173,116 +173,8 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVNEON(void) {
|
||||
WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_NEON;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic
|
||||
static uint16_t clip_y_NEON(int v) {
|
||||
return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
|
||||
}
|
||||
|
||||
static uint64_t SharpYUVUpdateY_NEON(const uint16_t* ref, const uint16_t* src,
|
||||
uint16_t* dst, int len) {
|
||||
int i;
|
||||
const int16x8_t zero = vdupq_n_s16(0);
|
||||
const int16x8_t max = vdupq_n_s16(MAX_Y);
|
||||
uint64x2_t sum = vdupq_n_u64(0);
|
||||
uint64_t diff;
|
||||
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i));
|
||||
const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i));
|
||||
const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i));
|
||||
const int16x8_t D = vsubq_s16(A, B); // diff_y
|
||||
const int16x8_t F = vaddq_s16(C, D); // new_y
|
||||
const uint16x8_t H =
|
||||
vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero));
|
||||
const int16x8_t I = vabsq_s16(D); // abs(diff_y)
|
||||
vst1q_u16(dst + i, H);
|
||||
sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I)));
|
||||
}
|
||||
diff = vgetq_lane_u64(sum, 0) + vgetq_lane_u64(sum, 1);
|
||||
for (; i < len; ++i) {
|
||||
const int diff_y = ref[i] - src[i];
|
||||
const int new_y = (int)(dst[i]) + diff_y;
|
||||
dst[i] = clip_y_NEON(new_y);
|
||||
diff += (uint64_t)(abs(diff_y));
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
static void SharpYUVUpdateRGB_NEON(const int16_t* ref, const int16_t* src,
|
||||
int16_t* dst, int len) {
|
||||
int i;
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const int16x8_t A = vld1q_s16(ref + i);
|
||||
const int16x8_t B = vld1q_s16(src + i);
|
||||
const int16x8_t C = vld1q_s16(dst + i);
|
||||
const int16x8_t D = vsubq_s16(A, B); // diff_uv
|
||||
const int16x8_t E = vaddq_s16(C, D); // new_uv
|
||||
vst1q_s16(dst + i, E);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
const int diff_uv = ref[i] - src[i];
|
||||
dst[i] += diff_uv;
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYUVFilterRow_NEON(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out) {
|
||||
int i;
|
||||
const int16x8_t max = vdupq_n_s16(MAX_Y);
|
||||
const int16x8_t zero = vdupq_n_s16(0);
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const int16x8_t a0 = vld1q_s16(A + i + 0);
|
||||
const int16x8_t a1 = vld1q_s16(A + i + 1);
|
||||
const int16x8_t b0 = vld1q_s16(B + i + 0);
|
||||
const int16x8_t b1 = vld1q_s16(B + i + 1);
|
||||
const int16x8_t a0b1 = vaddq_s16(a0, b1);
|
||||
const int16x8_t a1b0 = vaddq_s16(a1, b0);
|
||||
const int16x8_t a0a1b0b1 = vaddq_s16(a0b1, a1b0); // A0+A1+B0+B1
|
||||
const int16x8_t a0b1_2 = vaddq_s16(a0b1, a0b1); // 2*(A0+B1)
|
||||
const int16x8_t a1b0_2 = vaddq_s16(a1b0, a1b0); // 2*(A1+B0)
|
||||
const int16x8_t c0 = vshrq_n_s16(vaddq_s16(a0b1_2, a0a1b0b1), 3);
|
||||
const int16x8_t c1 = vshrq_n_s16(vaddq_s16(a1b0_2, a0a1b0b1), 3);
|
||||
const int16x8_t d0 = vaddq_s16(c1, a0);
|
||||
const int16x8_t d1 = vaddq_s16(c0, a1);
|
||||
const int16x8_t e0 = vrshrq_n_s16(d0, 1);
|
||||
const int16x8_t e1 = vrshrq_n_s16(d1, 1);
|
||||
const int16x8x2_t f = vzipq_s16(e0, e1);
|
||||
const int16x8_t g0 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 0));
|
||||
const int16x8_t g1 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 8));
|
||||
const int16x8_t h0 = vaddq_s16(g0, f.val[0]);
|
||||
const int16x8_t h1 = vaddq_s16(g1, f.val[1]);
|
||||
const int16x8_t i0 = vmaxq_s16(vminq_s16(h0, max), zero);
|
||||
const int16x8_t i1 = vmaxq_s16(vminq_s16(h1, max), zero);
|
||||
vst1q_u16(out + 2 * i + 0, vreinterpretq_u16_s16(i0));
|
||||
vst1q_u16(out + 2 * i + 8, vreinterpretq_u16_s16(i1));
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
const int a0b1 = A[i + 0] + B[i + 1];
|
||||
const int a1b0 = A[i + 1] + B[i + 0];
|
||||
const int a0a1b0b1 = a0b1 + a1b0 + 8;
|
||||
const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
|
||||
const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
|
||||
out[2 * i + 0] = clip_y_NEON(best_y[2 * i + 0] + v0);
|
||||
out[2 * i + 1] = clip_y_NEON(best_y[2 * i + 1] + v1);
|
||||
}
|
||||
}
|
||||
#undef MAX_Y
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
extern void WebPInitSharpYUVNEON(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVNEON(void) {
|
||||
WebPSharpYUVUpdateY = SharpYUVUpdateY_NEON;
|
||||
WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_NEON;
|
||||
WebPSharpYUVFilterRow = SharpYUVFilterRow_NEON;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_NEON
|
||||
|
||||
WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVNEON)
|
||||
WEBP_DSP_INIT_STUB(WebPInitSharpYUVNEON)
|
||||
|
||||
#endif // WEBP_USE_NEON
|
||||
|
||||
119
thirdparty/libwebp/src/dsp/yuv_sse2.c
vendored
119
thirdparty/libwebp/src/dsp/yuv_sse2.c
vendored
@@ -747,128 +747,9 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVSSE2(void) {
|
||||
WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_SSE2;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic
|
||||
static uint16_t clip_y(int v) {
|
||||
return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v;
|
||||
}
|
||||
|
||||
static uint64_t SharpYUVUpdateY_SSE2(const uint16_t* ref, const uint16_t* src,
|
||||
uint16_t* dst, int len) {
|
||||
uint64_t diff = 0;
|
||||
uint32_t tmp[4];
|
||||
int i;
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
const __m128i max = _mm_set1_epi16(MAX_Y);
|
||||
const __m128i one = _mm_set1_epi16(1);
|
||||
__m128i sum = zero;
|
||||
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
|
||||
const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
|
||||
const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
|
||||
const __m128i D = _mm_sub_epi16(A, B); // diff_y
|
||||
const __m128i E = _mm_cmpgt_epi16(zero, D); // sign (-1 or 0)
|
||||
const __m128i F = _mm_add_epi16(C, D); // new_y
|
||||
const __m128i G = _mm_or_si128(E, one); // -1 or 1
|
||||
const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero);
|
||||
const __m128i I = _mm_madd_epi16(D, G); // sum(abs(...))
|
||||
_mm_storeu_si128((__m128i*)(dst + i), H);
|
||||
sum = _mm_add_epi32(sum, I);
|
||||
}
|
||||
_mm_storeu_si128((__m128i*)tmp, sum);
|
||||
diff = tmp[3] + tmp[2] + tmp[1] + tmp[0];
|
||||
for (; i < len; ++i) {
|
||||
const int diff_y = ref[i] - src[i];
|
||||
const int new_y = (int)dst[i] + diff_y;
|
||||
dst[i] = clip_y(new_y);
|
||||
diff += (uint64_t)abs(diff_y);
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
static void SharpYUVUpdateRGB_SSE2(const int16_t* ref, const int16_t* src,
|
||||
int16_t* dst, int len) {
|
||||
int i = 0;
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i));
|
||||
const __m128i B = _mm_loadu_si128((const __m128i*)(src + i));
|
||||
const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i));
|
||||
const __m128i D = _mm_sub_epi16(A, B); // diff_uv
|
||||
const __m128i E = _mm_add_epi16(C, D); // new_uv
|
||||
_mm_storeu_si128((__m128i*)(dst + i), E);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
const int diff_uv = ref[i] - src[i];
|
||||
dst[i] += diff_uv;
|
||||
}
|
||||
}
|
||||
|
||||
static void SharpYUVFilterRow_SSE2(const int16_t* A, const int16_t* B, int len,
|
||||
const uint16_t* best_y, uint16_t* out) {
|
||||
int i;
|
||||
const __m128i kCst8 = _mm_set1_epi16(8);
|
||||
const __m128i max = _mm_set1_epi16(MAX_Y);
|
||||
const __m128i zero = _mm_setzero_si128();
|
||||
for (i = 0; i + 8 <= len; i += 8) {
|
||||
const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0));
|
||||
const __m128i a1 = _mm_loadu_si128((const __m128i*)(A + i + 1));
|
||||
const __m128i b0 = _mm_loadu_si128((const __m128i*)(B + i + 0));
|
||||
const __m128i b1 = _mm_loadu_si128((const __m128i*)(B + i + 1));
|
||||
const __m128i a0b1 = _mm_add_epi16(a0, b1);
|
||||
const __m128i a1b0 = _mm_add_epi16(a1, b0);
|
||||
const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0); // A0+A1+B0+B1
|
||||
const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8);
|
||||
const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1); // 2*(A0+B1)
|
||||
const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0); // 2*(A1+B0)
|
||||
const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3);
|
||||
const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3);
|
||||
const __m128i d0 = _mm_add_epi16(c1, a0);
|
||||
const __m128i d1 = _mm_add_epi16(c0, a1);
|
||||
const __m128i e0 = _mm_srai_epi16(d0, 1);
|
||||
const __m128i e1 = _mm_srai_epi16(d1, 1);
|
||||
const __m128i f0 = _mm_unpacklo_epi16(e0, e1);
|
||||
const __m128i f1 = _mm_unpackhi_epi16(e0, e1);
|
||||
const __m128i g0 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0));
|
||||
const __m128i g1 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 8));
|
||||
const __m128i h0 = _mm_add_epi16(g0, f0);
|
||||
const __m128i h1 = _mm_add_epi16(g1, f1);
|
||||
const __m128i i0 = _mm_max_epi16(_mm_min_epi16(h0, max), zero);
|
||||
const __m128i i1 = _mm_max_epi16(_mm_min_epi16(h1, max), zero);
|
||||
_mm_storeu_si128((__m128i*)(out + 2 * i + 0), i0);
|
||||
_mm_storeu_si128((__m128i*)(out + 2 * i + 8), i1);
|
||||
}
|
||||
for (; i < len; ++i) {
|
||||
// (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 =
|
||||
// = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4
|
||||
// We reuse the common sub-expressions.
|
||||
const int a0b1 = A[i + 0] + B[i + 1];
|
||||
const int a1b0 = A[i + 1] + B[i + 0];
|
||||
const int a0a1b0b1 = a0b1 + a1b0 + 8;
|
||||
const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4;
|
||||
const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4;
|
||||
out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0);
|
||||
out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1);
|
||||
}
|
||||
}
|
||||
|
||||
#undef MAX_Y
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
extern void WebPInitSharpYUVSSE2(void);
|
||||
|
||||
WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVSSE2(void) {
|
||||
WebPSharpYUVUpdateY = SharpYUVUpdateY_SSE2;
|
||||
WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_SSE2;
|
||||
WebPSharpYUVFilterRow = SharpYUVFilterRow_SSE2;
|
||||
}
|
||||
|
||||
#else // !WEBP_USE_SSE2
|
||||
|
||||
WEBP_DSP_INIT_STUB(WebPInitSamplersSSE2)
|
||||
WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVSSE2)
|
||||
WEBP_DSP_INIT_STUB(WebPInitSharpYUVSSE2)
|
||||
|
||||
#endif // WEBP_USE_SSE2
|
||||
|
||||
Reference in New Issue
Block a user