1
0
mirror of https://github.com/godotengine/godot.git synced 2025-11-09 12:50:35 +00:00

Update libwebp to 1.1.0

This commit is contained in:
volzhs
2020-01-17 03:49:31 +09:00
parent 6b64c60b0e
commit 65f2ab1b61
28 changed files with 121 additions and 81 deletions

View File

@@ -1361,7 +1361,8 @@ static void RD4_NEON(uint8_t* dst) { // Down-right
const uint32_t J = dst[-1 + 1 * BPS];
const uint32_t K = dst[-1 + 2 * BPS];
const uint32_t L = dst[-1 + 3 * BPS];
const uint64x1_t LKJI____ = vcreate_u64(L | (K << 8) | (J << 16) | (I << 24));
const uint64x1_t LKJI____ =
vcreate_u64((uint64_t)L | (K << 8) | (J << 16) | (I << 24));
const uint64x1_t LKJIXABC = vorr_u64(LKJI____, ____XABC);
const uint8x8_t KJIXABC_ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 8));
const uint8x8_t JIXABC__ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 16));
@@ -1427,10 +1428,16 @@ static WEBP_INLINE void DC8_NEON(uint8_t* dst, int do_top, int do_left) {
if (do_top) {
const uint8x8_t A = vld1_u8(dst - BPS); // top row
#if defined(__aarch64__)
const uint16x8_t B = vmovl_u8(A);
const uint16_t p2 = vaddvq_u16(B);
sum_top = vdupq_n_u16(p2);
#else
const uint16x4_t p0 = vpaddl_u8(A); // cascading summation of the top
const uint16x4_t p1 = vpadd_u16(p0, p0);
const uint16x4_t p2 = vpadd_u16(p1, p1);
sum_top = vcombine_u16(p2, p2);
#endif
}
if (do_left) {

View File

@@ -246,9 +246,9 @@ extern VP8Fdct VP8FTransform2; // performs two transforms at a time
extern VP8WHT VP8FTransformWHT;
// Predictions
// *dst is the destination block. *top and *left can be NULL.
typedef void (*VP8IntraPreds)(uint8_t *dst, const uint8_t* left,
typedef void (*VP8IntraPreds)(uint8_t* dst, const uint8_t* left,
const uint8_t* top);
typedef void (*VP8Intra4Preds)(uint8_t *dst, const uint8_t* top);
typedef void (*VP8Intra4Preds)(uint8_t* dst, const uint8_t* top);
extern VP8Intra4Preds VP8EncPredLuma4;
extern VP8IntraPreds VP8EncPredLuma16;
extern VP8IntraPreds VP8EncPredChroma8;

View File

@@ -81,7 +81,7 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
// gcc <= 4.9 on ARM generates incorrect code in Select() when Sub3() is
// inlined.
#if defined(__arm__) && LOCAL_GCC_VERSION <= 0x409
#if defined(__arm__) && defined(__GNUC__) && LOCAL_GCC_VERSION <= 0x409
# define LOCAL_INLINE __attribute__ ((noinline))
#else
# define LOCAL_INLINE WEBP_INLINE
@@ -167,15 +167,20 @@ static uint32_t Predictor13_C(uint32_t left, const uint32_t* const top) {
return pred;
}
GENERATE_PREDICTOR_ADD(Predictor0_C, PredictorAdd0_C)
static void PredictorAdd0_C(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int x;
(void)upper;
for (x = 0; x < num_pixels; ++x) out[x] = VP8LAddPixels(in[x], ARGB_BLACK);
}
static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper,
int num_pixels, uint32_t* out) {
int i;
uint32_t left = out[-1];
(void)upper;
for (i = 0; i < num_pixels; ++i) {
out[i] = left = VP8LAddPixels(in[i], left);
}
(void)upper;
}
GENERATE_PREDICTOR_ADD(Predictor2_C, PredictorAdd2_C)
GENERATE_PREDICTOR_ADD(Predictor3_C, PredictorAdd3_C)

View File

@@ -177,6 +177,7 @@ uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
int num_pixels, uint32_t* out) { \
int x; \
assert(upper != NULL); \
for (x = 0; x < num_pixels; ++x) { \
const uint32_t pred = (PREDICTOR)(out[x - 1], upper + x); \
out[x] = VP8LAddPixels(in[x], pred); \
@@ -189,6 +190,7 @@ static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
static void PREDICTOR_SUB(const uint32_t* in, const uint32_t* upper, \
int num_pixels, uint32_t* out) { \
int x; \
assert(upper != NULL); \
for (x = 0; x < num_pixels; ++x) { \
const uint32_t pred = (PREDICTOR)(in[x - 1], upper + x); \
out[x] = VP8LSubPixels(in[x], pred); \

View File

@@ -455,8 +455,9 @@ static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper,
_mm_storeu_si128((__m128i*)&out[i], res);
}
if (i != num_pixels) {
VP8LPredictorsSub_C[0](in + i, upper + i, num_pixels - i, out + i);
VP8LPredictorsSub_C[0](in + i, NULL, num_pixels - i, out + i);
}
(void)upper;
}
#define GENERATE_PREDICTOR_1(X, IN) \

View File

@@ -191,8 +191,9 @@ static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper,
_mm_storeu_si128((__m128i*)&out[i], res);
}
if (i != num_pixels) {
VP8LPredictorsAdd_C[0](in + i, upper + i, num_pixels - i, out + i);
VP8LPredictorsAdd_C[0](in + i, NULL, num_pixels - i, out + i);
}
(void)upper;
}
// Predictor1: left.

View File

@@ -576,9 +576,9 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bot_y, \
const uint32_t l_uv = ((cur_u[0]) | ((cur_v[0]) << 16)); \
const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2; \
const uint8_t* ptop_y = &top_y[1]; \
uint8_t *ptop_dst = top_dst + XSTEP; \
uint8_t* ptop_dst = top_dst + XSTEP; \
const uint8_t* pbot_y = &bot_y[1]; \
uint8_t *pbot_dst = bot_dst + XSTEP; \
uint8_t* pbot_dst = bot_dst + XSTEP; \
\
FUNC(top_y[0], uv0 & 0xff, (uv0 >> 16), top_dst); \
if (bot_y != NULL) { \

View File

@@ -58,8 +58,8 @@
} while (0)
// Turn the macro into a function for reducing code-size when non-critical
static void Upsample16Pixels_NEON(const uint8_t *r1, const uint8_t *r2,
uint8_t *out) {
static void Upsample16Pixels_NEON(const uint8_t* r1, const uint8_t* r2,
uint8_t* out) {
UPSAMPLE_16PIXELS(r1, r2, out);
}
@@ -190,14 +190,14 @@ static const int16_t kCoeffs1[4] = { 19077, 26149, 6419, 13320 };
}
#define NEON_UPSAMPLE_FUNC(FUNC_NAME, FMT, XSTEP) \
static void FUNC_NAME(const uint8_t *top_y, const uint8_t *bottom_y, \
const uint8_t *top_u, const uint8_t *top_v, \
const uint8_t *cur_u, const uint8_t *cur_v, \
uint8_t *top_dst, uint8_t *bottom_dst, int len) { \
static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y, \
const uint8_t* top_u, const uint8_t* top_v, \
const uint8_t* cur_u, const uint8_t* cur_v, \
uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
int block; \
/* 16 byte aligned array to cache reconstructed u and v */ \
uint8_t uv_buf[2 * 32 + 15]; \
uint8_t *const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \
uint8_t* const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15); \
const int uv_len = (len + 1) >> 1; \
/* 9 pixels must be read-able for each block */ \
const int num_blocks = (uv_len - 1) >> 3; \