Update libwebp to 1.1.0

2025-11-09 12:50:35 +00:00 · 2020-01-17 03:49:31 +09:00
parent 6b64c60b0e
commit 65f2ab1b61
28 changed files with 121 additions and 81 deletions
--- a/thirdparty/libwebp/src/dsp/dec_neon.c
+++ b/thirdparty/libwebp/src/dsp/dec_neon.c
@@ -1361,7 +1361,8 @@ static void RD4_NEON(uint8_t* dst) {   // Down-right
  const uint32_t J = dst[-1 + 1 * BPS];
  const uint32_t K = dst[-1 + 2 * BPS];
  const uint32_t L = dst[-1 + 3 * BPS];
-  const uint64x1_t LKJI____ = vcreate_u64(L | (K << 8) | (J << 16) | (I << 24));
+  const uint64x1_t LKJI____ =
+      vcreate_u64((uint64_t)L | (K << 8) | (J << 16) | (I << 24));
  const uint64x1_t LKJIXABC = vorr_u64(LKJI____, ____XABC);
  const uint8x8_t KJIXABC_ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 8));
  const uint8x8_t JIXABC__ = vreinterpret_u8_u64(vshr_n_u64(LKJIXABC, 16));
@@ -1427,10 +1428,16 @@ static WEBP_INLINE void DC8_NEON(uint8_t* dst, int do_top, int do_left) {

  if (do_top) {
    const uint8x8_t A = vld1_u8(dst - BPS);  // top row
+#if defined(__aarch64__)
+    const uint16x8_t B = vmovl_u8(A);
+    const uint16_t p2 = vaddvq_u16(B);
+    sum_top = vdupq_n_u16(p2);
+#else
    const uint16x4_t p0 = vpaddl_u8(A);  // cascading summation of the top
    const uint16x4_t p1 = vpadd_u16(p0, p0);
    const uint16x4_t p2 = vpadd_u16(p1, p1);
    sum_top = vcombine_u16(p2, p2);
+#endif
  }

  if (do_left) {
--- a/thirdparty/libwebp/src/dsp/dsp.h
+++ b/thirdparty/libwebp/src/dsp/dsp.h
@@ -246,9 +246,9 @@ extern VP8Fdct VP8FTransform2;   // performs two transforms at a time
 extern VP8WHT VP8FTransformWHT;
 // Predictions
 // *dst is the destination block. *top and *left can be NULL.
-typedef void (*VP8IntraPreds)(uint8_t *dst, const uint8_t* left,
+typedef void (*VP8IntraPreds)(uint8_t* dst, const uint8_t* left,
                              const uint8_t* top);
-typedef void (*VP8Intra4Preds)(uint8_t *dst, const uint8_t* top);
+typedef void (*VP8Intra4Preds)(uint8_t* dst, const uint8_t* top);
 extern VP8Intra4Preds VP8EncPredLuma4;
 extern VP8IntraPreds VP8EncPredLuma16;
 extern VP8IntraPreds VP8EncPredChroma8;
--- a/thirdparty/libwebp/src/dsp/lossless.c
+++ b/thirdparty/libwebp/src/dsp/lossless.c
@@ -81,7 +81,7 @@ static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,

 // gcc <= 4.9 on ARM generates incorrect code in Select() when Sub3() is
 // inlined.
-#if defined(__arm__) && LOCAL_GCC_VERSION <= 0x409
+#if defined(__arm__) && defined(__GNUC__) && LOCAL_GCC_VERSION <= 0x409
 # define LOCAL_INLINE __attribute__ ((noinline))
 #else
 # define LOCAL_INLINE WEBP_INLINE
@@ -167,15 +167,20 @@ static uint32_t Predictor13_C(uint32_t left, const uint32_t* const top) {
  return pred;
 }

-GENERATE_PREDICTOR_ADD(Predictor0_C, PredictorAdd0_C)
+static void PredictorAdd0_C(const uint32_t* in, const uint32_t* upper,
+                            int num_pixels, uint32_t* out) {
+  int x;
+  (void)upper;
+  for (x = 0; x < num_pixels; ++x) out[x] = VP8LAddPixels(in[x], ARGB_BLACK);
+}
 static void PredictorAdd1_C(const uint32_t* in, const uint32_t* upper,
                            int num_pixels, uint32_t* out) {
  int i;
  uint32_t left = out[-1];
+  (void)upper;
  for (i = 0; i < num_pixels; ++i) {
    out[i] = left = VP8LAddPixels(in[i], left);
  }
-  (void)upper;
 }
 GENERATE_PREDICTOR_ADD(Predictor2_C, PredictorAdd2_C)
 GENERATE_PREDICTOR_ADD(Predictor3_C, PredictorAdd3_C)
--- a/thirdparty/libwebp/src/dsp/lossless_common.h
+++ b/thirdparty/libwebp/src/dsp/lossless_common.h
@@ -177,6 +177,7 @@ uint32_t VP8LSubPixels(uint32_t a, uint32_t b) {
 static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
                          int num_pixels, uint32_t* out) {           \
  int x;                                                             \
+  assert(upper != NULL);                                             \
  for (x = 0; x < num_pixels; ++x) {                                 \
    const uint32_t pred = (PREDICTOR)(out[x - 1], upper + x);        \
    out[x] = VP8LAddPixels(in[x], pred);                             \
@@ -189,6 +190,7 @@ static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
 static void PREDICTOR_SUB(const uint32_t* in, const uint32_t* upper, \
                          int num_pixels, uint32_t* out) {           \
  int x;                                                             \
+  assert(upper != NULL);                                             \
  for (x = 0; x < num_pixels; ++x) {                                 \
    const uint32_t pred = (PREDICTOR)(in[x - 1], upper + x);         \
    out[x] = VP8LSubPixels(in[x], pred);                             \
--- a/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c
+++ b/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c
@@ -455,8 +455,9 @@ static void PredictorSub0_SSE2(const uint32_t* in, const uint32_t* upper,
    _mm_storeu_si128((__m128i*)&out[i], res);
  }
  if (i != num_pixels) {
-    VP8LPredictorsSub_C[0](in + i, upper + i, num_pixels - i, out + i);
+    VP8LPredictorsSub_C[0](in + i, NULL, num_pixels - i, out + i);
  }
+  (void)upper;
 }

 #define GENERATE_PREDICTOR_1(X, IN)                                           \
--- a/thirdparty/libwebp/src/dsp/lossless_sse2.c
+++ b/thirdparty/libwebp/src/dsp/lossless_sse2.c
@@ -191,8 +191,9 @@ static void PredictorAdd0_SSE2(const uint32_t* in, const uint32_t* upper,
    _mm_storeu_si128((__m128i*)&out[i], res);
  }
  if (i != num_pixels) {
-    VP8LPredictorsAdd_C[0](in + i, upper + i, num_pixels - i, out + i);
+    VP8LPredictorsAdd_C[0](in + i, NULL, num_pixels - i, out + i);
  }
+  (void)upper;
 }

 // Predictor1: left.
--- a/thirdparty/libwebp/src/dsp/upsampling_msa.c
+++ b/thirdparty/libwebp/src/dsp/upsampling_msa.c
@@ -576,9 +576,9 @@ static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bot_y,        \
  const uint32_t l_uv = ((cur_u[0]) | ((cur_v[0]) << 16));               \
  const uint32_t uv0 = (3 * tl_uv + l_uv + 0x00020002u) >> 2;            \
  const uint8_t* ptop_y = &top_y[1];                                     \
-  uint8_t *ptop_dst = top_dst + XSTEP;                                   \
+  uint8_t* ptop_dst = top_dst + XSTEP;                                   \
  const uint8_t* pbot_y = &bot_y[1];                                     \
-  uint8_t *pbot_dst = bot_dst + XSTEP;                                   \
+  uint8_t* pbot_dst = bot_dst + XSTEP;                                   \
                                                                         \
  FUNC(top_y[0], uv0 & 0xff, (uv0 >> 16), top_dst);                      \
  if (bot_y != NULL) {                                                   \
--- a/thirdparty/libwebp/src/dsp/upsampling_neon.c
+++ b/thirdparty/libwebp/src/dsp/upsampling_neon.c
@@ -58,8 +58,8 @@
 } while (0)

 // Turn the macro into a function for reducing code-size when non-critical
-static void Upsample16Pixels_NEON(const uint8_t *r1, const uint8_t *r2,
-                                  uint8_t *out) {
+static void Upsample16Pixels_NEON(const uint8_t* r1, const uint8_t* r2,
+                                  uint8_t* out) {
  UPSAMPLE_16PIXELS(r1, r2, out);
 }

@@ -190,14 +190,14 @@ static const int16_t kCoeffs1[4] = { 19077, 26149, 6419, 13320 };
 }

 #define NEON_UPSAMPLE_FUNC(FUNC_NAME, FMT, XSTEP)                       \
-static void FUNC_NAME(const uint8_t *top_y, const uint8_t *bottom_y,    \
-                      const uint8_t *top_u, const uint8_t *top_v,       \
-                      const uint8_t *cur_u, const uint8_t *cur_v,       \
-                      uint8_t *top_dst, uint8_t *bottom_dst, int len) { \
+static void FUNC_NAME(const uint8_t* top_y, const uint8_t* bottom_y,    \
+                      const uint8_t* top_u, const uint8_t* top_v,       \
+                      const uint8_t* cur_u, const uint8_t* cur_v,       \
+                      uint8_t* top_dst, uint8_t* bottom_dst, int len) { \
  int block;                                                            \
  /* 16 byte aligned array to cache reconstructed u and v */            \
  uint8_t uv_buf[2 * 32 + 15];                                          \
-  uint8_t *const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15);     \
+  uint8_t* const r_uv = (uint8_t*)((uintptr_t)(uv_buf + 15) & ~15);     \
  const int uv_len = (len + 1) >> 1;                                    \
  /* 9 pixels must be read-able for each block */                       \
  const int num_blocks = (uv_len - 1) >> 3;                             \