From 8402a1d0bd72c301f7278f6f7e4821e37dfc1f29 Mon Sep 17 00:00:00 2001 From: BlueCube3310 <53150244+BlueCube3310@users.noreply.github.com> Date: Tue, 25 Feb 2025 12:18:41 +0100 Subject: [PATCH] bcdec: Fix decompressing mipmaps of non-power-of-2 textures --- modules/bcdec/image_decompress_bcdec.cpp | 205 ++++++++++++++--------- 1 file changed, 127 insertions(+), 78 deletions(-) diff --git a/modules/bcdec/image_decompress_bcdec.cpp b/modules/bcdec/image_decompress_bcdec.cpp index 784ac0f82bc..272db0e2cdf 100644 --- a/modules/bcdec/image_decompress_bcdec.cpp +++ b/modules/bcdec/image_decompress_bcdec.cpp @@ -44,110 +44,174 @@ inline void bcdec_bc6h_half_u(const void *compressedBlock, void *decompressedBlo bcdec_bc6h_half(compressedBlock, decompressedBlock, destinationPitch, false); } +template +static inline void _safe_decompress_mipmap(int width, int height, const uint8_t *src, uint8_t *dst) { + // A stack-allocated output buffer large enough to contain an entire uncompressed block. + uint8_t temp_buf[4 * 4 * pixel_size]; + + // The amount of misaligned pixels on each axis. + const int width_diff = width - (width & ~0x03); + const int height_diff = height - (height & ~0x03); + + // The amount of uncompressed blocks on each axis. + const int width_blocks = (width & ~0x03) / 4; + const int height_blocks = (height & ~0x03) / 4; + + // The pitch of the image in bytes. + const int image_pitch = width * pixel_size; + // The pitch of a block in bytes. + const int block_pitch = 4 * pixel_size; + // The pitch of the last block in bytes. + const int odd_pitch = width_diff * pixel_size; + + size_t src_pos = 0; + size_t dst_pos = 0; + + // Decompress the blocks, starting from the top. + for (int y = 0; y < height_blocks; y += 1) { + // Decompress the blocks, starting from the left. + for (int x = 0; x < width_blocks; x += 1) { + decompress_func(&src[src_pos], &dst[dst_pos], image_pitch / component_size); + src_pos += block_size; + dst_pos += block_pitch; + } + + // Decompress the block on the right. + if (width_diff > 0) { + decompress_func(&src[src_pos], temp_buf, block_pitch / component_size); + + // Copy the data from the temporary buffer to the output. + for (int i = 0; i < 4; i++) { + memcpy(&dst[dst_pos + i * image_pitch], &temp_buf[i * block_pitch], odd_pitch); + } + + src_pos += block_size; + dst_pos += odd_pitch; + } + + // Skip to the next row of blocks, the current one has already been filled. + dst_pos += 3 * image_pitch; + } + + // Decompress the blocks at the bottom of the image. + if (height_diff > 0) { + // Decompress the blocks at the bottom. + for (int x = 0; x < width_blocks; x += 1) { + decompress_func(&src[src_pos], temp_buf, block_pitch / component_size); + + // Copy the data from the temporary buffer to the output. + for (int i = 0; i < height_diff; i++) { + memcpy(&dst[dst_pos + i * image_pitch], &temp_buf[i * block_pitch], block_pitch); + } + + src_pos += block_size; + dst_pos += block_pitch; + } + + // Decompress the block in the lower-right corner. + if (width_diff > 0) { + decompress_func(&src[src_pos], temp_buf, block_pitch / component_size); + + // Copy the data from the temporary buffer to the output. + for (int i = 0; i < height_diff; i++) { + memcpy(&dst[dst_pos + i * image_pitch], &temp_buf[i * block_pitch], odd_pitch); + } + + src_pos += block_size; + dst_pos += odd_pitch; + } + } +} + +template +static inline void _decompress_mipmap(int width, int height, const uint8_t *src, uint8_t *dst) { + size_t src_pos = 0; + size_t dst_pos = 0; + + // The size of a single block in bytes. + const int block_pitch = 4 * pixel_size; + // The pitch of the image in bytes. + const int image_pitch = width * pixel_size; + + for (int y = 0; y < height; y += 4) { + for (int x = 0; x < width; x += 4) { + decompress_func(&src[src_pos], &dst[dst_pos], image_pitch / component_size); + src_pos += block_size; + dst_pos += block_pitch; + } + + // Skip to the next row of blocks, the current one has already been filled. + dst_pos += 3 * image_pitch; + } +} + static void decompress_image(BCdecFormat format, const void *src, void *dst, const uint64_t width, const uint64_t height) { const uint8_t *src_blocks = reinterpret_cast(src); uint8_t *dec_blocks = reinterpret_cast(dst); -#define DECOMPRESS_LOOP(func, block_size, color_bytesize, color_components) \ - for (uint64_t y = 0; y < height; y += 4) { \ - for (uint64_t x = 0; x < width; x += 4) { \ - func(&src_blocks[src_pos], &dec_blocks[dst_pos], width * color_components); \ - src_pos += block_size; \ - dst_pos += 4 * color_bytesize; \ - } \ - dst_pos += 3 * width * color_bytesize; \ - } - -#define DECOMPRESS_LOOP_SAFE(func, block_size, color_bytesize, color_components, output) \ - for (uint64_t y = 0; y < height; y += 4) { \ - for (uint64_t x = 0; x < width; x += 4) { \ - const uint32_t yblock = MIN(height - y, 4ul); \ - const uint32_t xblock = MIN(width - x, 4ul); \ - \ - const bool incomplete = yblock < 4 || xblock < 4; \ - uint8_t *dec_out = incomplete ? output : &dec_blocks[y * 4 * width + x * color_bytesize]; \ - \ - func(&src_blocks[src_pos], dec_out, 4 * color_components); \ - src_pos += block_size; \ - \ - if (incomplete) { \ - for (uint32_t cy = 0; cy < yblock; cy++) { \ - for (uint32_t cx = 0; cx < xblock; cx++) { \ - memcpy(&dec_blocks[(y + cy) * 4 * width + (x + cx) * color_bytesize], &output[cy * 4 + cx * color_bytesize], color_bytesize); \ - } \ - } \ - } \ - } \ - } - - if (width % 4 != 0 || height % 4 != 0) { - uint64_t src_pos = 0; - - uint8_t r8_output[4 * 4]; - uint8_t rg8_output[4 * 4 * 2]; - uint8_t rgba8_output[4 * 4 * 4]; - uint8_t rgbh_output[4 * 4 * 6]; + const uint64_t aligned_width = (width + 3) & ~0x03; + const uint64_t aligned_height = (height + 3) & ~0x03; + if (width != aligned_width || height != aligned_height) { + // Decompress the mipmap in a 'safe' way, which involves starting from the top left. + // For each block row, decompress all of the 'full' blocks, then the misaligned one (on the x axis). + // Then, decompress the final misaligned block row at the bottom. + // Finally, decompress the misaligned block at the bottom right. switch (format) { case BCdec_BC1: { - DECOMPRESS_LOOP_SAFE(bcdec_bc1, BCDEC_BC1_BLOCK_SIZE, 4, 4, rgba8_output) + _safe_decompress_mipmap(width, height, src_blocks, dec_blocks); } break; case BCdec_BC2: { - DECOMPRESS_LOOP_SAFE(bcdec_bc2, BCDEC_BC2_BLOCK_SIZE, 4, 4, rgba8_output) + _safe_decompress_mipmap(width, height, src_blocks, dec_blocks); } break; case BCdec_BC3: { - DECOMPRESS_LOOP_SAFE(bcdec_bc3, BCDEC_BC3_BLOCK_SIZE, 4, 4, rgba8_output) + _safe_decompress_mipmap(width, height, src_blocks, dec_blocks); } break; case BCdec_BC4: { - DECOMPRESS_LOOP_SAFE(bcdec_bc4, BCDEC_BC4_BLOCK_SIZE, 1, 1, r8_output) + _safe_decompress_mipmap(width, height, src_blocks, dec_blocks); } break; case BCdec_BC5: { - DECOMPRESS_LOOP_SAFE(bcdec_bc5, BCDEC_BC5_BLOCK_SIZE, 2, 2, rg8_output) + _safe_decompress_mipmap(width, height, src_blocks, dec_blocks); } break; case BCdec_BC6U: { - DECOMPRESS_LOOP_SAFE(bcdec_bc6h_half_u, BCDEC_BC6H_BLOCK_SIZE, 6, 3, rgbh_output) + _safe_decompress_mipmap(width, height, src_blocks, dec_blocks); } break; case BCdec_BC6S: { - DECOMPRESS_LOOP_SAFE(bcdec_bc6h_half_s, BCDEC_BC6H_BLOCK_SIZE, 6, 3, rgbh_output) + _safe_decompress_mipmap(width, height, src_blocks, dec_blocks); } break; case BCdec_BC7: { - DECOMPRESS_LOOP_SAFE(bcdec_bc7, BCDEC_BC7_BLOCK_SIZE, 4, 4, rgba8_output) + _safe_decompress_mipmap(width, height, src_blocks, dec_blocks); } break; } - } else { - uint64_t src_pos = 0, dst_pos = 0; - + // Just decompress as usual, as fast as possible. switch (format) { case BCdec_BC1: { - DECOMPRESS_LOOP(bcdec_bc1, BCDEC_BC1_BLOCK_SIZE, 4, 4) + _decompress_mipmap(width, height, src_blocks, dec_blocks); } break; case BCdec_BC2: { - DECOMPRESS_LOOP(bcdec_bc2, BCDEC_BC2_BLOCK_SIZE, 4, 4) + _decompress_mipmap(width, height, src_blocks, dec_blocks); } break; case BCdec_BC3: { - DECOMPRESS_LOOP(bcdec_bc3, BCDEC_BC3_BLOCK_SIZE, 4, 4) + _decompress_mipmap(width, height, src_blocks, dec_blocks); } break; case BCdec_BC4: { - DECOMPRESS_LOOP(bcdec_bc4, BCDEC_BC4_BLOCK_SIZE, 1, 1) + _decompress_mipmap(width, height, src_blocks, dec_blocks); } break; case BCdec_BC5: { - DECOMPRESS_LOOP(bcdec_bc5, BCDEC_BC5_BLOCK_SIZE, 2, 2) + _decompress_mipmap(width, height, src_blocks, dec_blocks); } break; case BCdec_BC6U: { - DECOMPRESS_LOOP(bcdec_bc6h_half_u, BCDEC_BC6H_BLOCK_SIZE, 6, 3) + _decompress_mipmap(width, height, src_blocks, dec_blocks); } break; case BCdec_BC6S: { - DECOMPRESS_LOOP(bcdec_bc6h_half_s, BCDEC_BC6H_BLOCK_SIZE, 6, 3) + _decompress_mipmap(width, height, src_blocks, dec_blocks); } break; case BCdec_BC7: { - DECOMPRESS_LOOP(bcdec_bc7, BCDEC_BC7_BLOCK_SIZE, 4, 4) + _decompress_mipmap(width, height, src_blocks, dec_blocks); } break; } } - -#undef DECOMPRESS_LOOP -#undef DECOMPRESS_LOOP_SAFE } void image_decompress_bcdec(Image *p_image) { @@ -156,21 +220,6 @@ void image_decompress_bcdec(Image *p_image) { int width = p_image->get_width(); int height = p_image->get_height(); - // Compressed images' dimensions should be padded to the upper multiple of 4. - // If they aren't, they need to be realigned (the actual data is correctly padded though). - const bool need_width_realign = width % 4 != 0; - const bool need_height_realign = height % 4 != 0; - - if (need_width_realign || need_height_realign) { - int new_width = need_width_realign ? width + (4 - (width % 4)) : width; - int new_height = need_height_realign ? height + (4 - (height % 4)) : height; - - print_verbose(vformat("Compressed image's dimensions are not multiples of 4 (%dx%d), aligning to (%dx%d)", width, height, new_width, new_height)); - - width = new_width; - height = new_height; - } - Image::Format source_format = p_image->get_format(); Image::Format target_format = Image::FORMAT_MAX; @@ -237,8 +286,8 @@ void image_decompress_bcdec(Image *p_image) { // Decompress mipmaps. for (int i = 0; i <= mm_count; i++) { int mipmap_w = 0, mipmap_h = 0; - int64_t src_ofs = Image::get_image_mipmap_offset_and_dimensions(width, height, source_format, i, mipmap_w, mipmap_h); - int64_t dst_ofs = Image::get_image_mipmap_offset(width, height, target_format, i); + int64_t src_ofs = Image::get_image_mipmap_offset(width, height, source_format, i); + int64_t dst_ofs = Image::get_image_mipmap_offset_and_dimensions(width, height, target_format, i, mipmap_w, mipmap_h); decompress_image(bcdec_format, rb + src_ofs, wb + dst_ofs, mipmap_w, mipmap_h); }