You've already forked godot
							
							
				mirror of
				https://github.com/godotengine/godot.git
				synced 2025-11-03 11:50:27 +00:00 
			
		
		
		
	bcdec: Fix decompressing mipmaps of non-power-of-2 textures
This commit is contained in:
		@@ -44,110 +44,174 @@ inline void bcdec_bc6h_half_u(const void *compressedBlock, void *decompressedBlo
 | 
			
		||||
	bcdec_bc6h_half(compressedBlock, decompressedBlock, destinationPitch, false);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <void (*decompress_func)(const void *, void *, int), int block_size, int pixel_size, int component_size>
 | 
			
		||||
static inline void _safe_decompress_mipmap(int width, int height, const uint8_t *src, uint8_t *dst) {
 | 
			
		||||
	// A stack-allocated output buffer large enough to contain an entire uncompressed block.
 | 
			
		||||
	uint8_t temp_buf[4 * 4 * pixel_size];
 | 
			
		||||
 | 
			
		||||
	// The amount of misaligned pixels on each axis.
 | 
			
		||||
	const int width_diff = width - (width & ~0x03);
 | 
			
		||||
	const int height_diff = height - (height & ~0x03);
 | 
			
		||||
 | 
			
		||||
	// The amount of uncompressed blocks on each axis.
 | 
			
		||||
	const int width_blocks = (width & ~0x03) / 4;
 | 
			
		||||
	const int height_blocks = (height & ~0x03) / 4;
 | 
			
		||||
 | 
			
		||||
	// The pitch of the image in bytes.
 | 
			
		||||
	const int image_pitch = width * pixel_size;
 | 
			
		||||
	// The pitch of a block in bytes.
 | 
			
		||||
	const int block_pitch = 4 * pixel_size;
 | 
			
		||||
	// The pitch of the last block in bytes.
 | 
			
		||||
	const int odd_pitch = width_diff * pixel_size;
 | 
			
		||||
 | 
			
		||||
	size_t src_pos = 0;
 | 
			
		||||
	size_t dst_pos = 0;
 | 
			
		||||
 | 
			
		||||
	// Decompress the blocks, starting from the top.
 | 
			
		||||
	for (int y = 0; y < height_blocks; y += 1) {
 | 
			
		||||
		// Decompress the blocks, starting from the left.
 | 
			
		||||
		for (int x = 0; x < width_blocks; x += 1) {
 | 
			
		||||
			decompress_func(&src[src_pos], &dst[dst_pos], image_pitch / component_size);
 | 
			
		||||
			src_pos += block_size;
 | 
			
		||||
			dst_pos += block_pitch;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// Decompress the block on the right.
 | 
			
		||||
		if (width_diff > 0) {
 | 
			
		||||
			decompress_func(&src[src_pos], temp_buf, block_pitch / component_size);
 | 
			
		||||
 | 
			
		||||
			// Copy the data from the temporary buffer to the output.
 | 
			
		||||
			for (int i = 0; i < 4; i++) {
 | 
			
		||||
				memcpy(&dst[dst_pos + i * image_pitch], &temp_buf[i * block_pitch], odd_pitch);
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			src_pos += block_size;
 | 
			
		||||
			dst_pos += odd_pitch;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// Skip to the next row of blocks, the current one has already been filled.
 | 
			
		||||
		dst_pos += 3 * image_pitch;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Decompress the blocks at the bottom of the image.
 | 
			
		||||
	if (height_diff > 0) {
 | 
			
		||||
		// Decompress the blocks at the bottom.
 | 
			
		||||
		for (int x = 0; x < width_blocks; x += 1) {
 | 
			
		||||
			decompress_func(&src[src_pos], temp_buf, block_pitch / component_size);
 | 
			
		||||
 | 
			
		||||
			// Copy the data from the temporary buffer to the output.
 | 
			
		||||
			for (int i = 0; i < height_diff; i++) {
 | 
			
		||||
				memcpy(&dst[dst_pos + i * image_pitch], &temp_buf[i * block_pitch], block_pitch);
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			src_pos += block_size;
 | 
			
		||||
			dst_pos += block_pitch;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// Decompress the block in the lower-right corner.
 | 
			
		||||
		if (width_diff > 0) {
 | 
			
		||||
			decompress_func(&src[src_pos], temp_buf, block_pitch / component_size);
 | 
			
		||||
 | 
			
		||||
			// Copy the data from the temporary buffer to the output.
 | 
			
		||||
			for (int i = 0; i < height_diff; i++) {
 | 
			
		||||
				memcpy(&dst[dst_pos + i * image_pitch], &temp_buf[i * block_pitch], odd_pitch);
 | 
			
		||||
			}
 | 
			
		||||
 | 
			
		||||
			src_pos += block_size;
 | 
			
		||||
			dst_pos += odd_pitch;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <void (*decompress_func)(const void *, void *, int), int block_size, int pixel_size, int component_size>
 | 
			
		||||
static inline void _decompress_mipmap(int width, int height, const uint8_t *src, uint8_t *dst) {
 | 
			
		||||
	size_t src_pos = 0;
 | 
			
		||||
	size_t dst_pos = 0;
 | 
			
		||||
 | 
			
		||||
	// The size of a single block in bytes.
 | 
			
		||||
	const int block_pitch = 4 * pixel_size;
 | 
			
		||||
	// The pitch of the image in bytes.
 | 
			
		||||
	const int image_pitch = width * pixel_size;
 | 
			
		||||
 | 
			
		||||
	for (int y = 0; y < height; y += 4) {
 | 
			
		||||
		for (int x = 0; x < width; x += 4) {
 | 
			
		||||
			decompress_func(&src[src_pos], &dst[dst_pos], image_pitch / component_size);
 | 
			
		||||
			src_pos += block_size;
 | 
			
		||||
			dst_pos += block_pitch;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		// Skip to the next row of blocks, the current one has already been filled.
 | 
			
		||||
		dst_pos += 3 * image_pitch;
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void decompress_image(BCdecFormat format, const void *src, void *dst, const uint64_t width, const uint64_t height) {
 | 
			
		||||
	const uint8_t *src_blocks = reinterpret_cast<const uint8_t *>(src);
 | 
			
		||||
	uint8_t *dec_blocks = reinterpret_cast<uint8_t *>(dst);
 | 
			
		||||
 | 
			
		||||
#define DECOMPRESS_LOOP(func, block_size, color_bytesize, color_components)             \
 | 
			
		||||
	for (uint64_t y = 0; y < height; y += 4) {                                          \
 | 
			
		||||
		for (uint64_t x = 0; x < width; x += 4) {                                       \
 | 
			
		||||
			func(&src_blocks[src_pos], &dec_blocks[dst_pos], width * color_components); \
 | 
			
		||||
			src_pos += block_size;                                                      \
 | 
			
		||||
			dst_pos += 4 * color_bytesize;                                              \
 | 
			
		||||
		}                                                                               \
 | 
			
		||||
		dst_pos += 3 * width * color_bytesize;                                          \
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
#define DECOMPRESS_LOOP_SAFE(func, block_size, color_bytesize, color_components, output)                                                              \
 | 
			
		||||
	for (uint64_t y = 0; y < height; y += 4) {                                                                                                        \
 | 
			
		||||
		for (uint64_t x = 0; x < width; x += 4) {                                                                                                     \
 | 
			
		||||
			const uint32_t yblock = MIN(height - y, 4ul);                                                                                             \
 | 
			
		||||
			const uint32_t xblock = MIN(width - x, 4ul);                                                                                              \
 | 
			
		||||
                                                                                                                                                      \
 | 
			
		||||
			const bool incomplete = yblock < 4 || xblock < 4;                                                                                         \
 | 
			
		||||
			uint8_t *dec_out = incomplete ? output : &dec_blocks[y * 4 * width + x * color_bytesize];                                                 \
 | 
			
		||||
                                                                                                                                                      \
 | 
			
		||||
			func(&src_blocks[src_pos], dec_out, 4 * color_components);                                                                                \
 | 
			
		||||
			src_pos += block_size;                                                                                                                    \
 | 
			
		||||
                                                                                                                                                      \
 | 
			
		||||
			if (incomplete) {                                                                                                                         \
 | 
			
		||||
				for (uint32_t cy = 0; cy < yblock; cy++) {                                                                                            \
 | 
			
		||||
					for (uint32_t cx = 0; cx < xblock; cx++) {                                                                                        \
 | 
			
		||||
						memcpy(&dec_blocks[(y + cy) * 4 * width + (x + cx) * color_bytesize], &output[cy * 4 + cx * color_bytesize], color_bytesize); \
 | 
			
		||||
					}                                                                                                                                 \
 | 
			
		||||
				}                                                                                                                                     \
 | 
			
		||||
			}                                                                                                                                         \
 | 
			
		||||
		}                                                                                                                                             \
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if (width % 4 != 0 || height % 4 != 0) {
 | 
			
		||||
		uint64_t src_pos = 0;
 | 
			
		||||
 | 
			
		||||
		uint8_t r8_output[4 * 4];
 | 
			
		||||
		uint8_t rg8_output[4 * 4 * 2];
 | 
			
		||||
		uint8_t rgba8_output[4 * 4 * 4];
 | 
			
		||||
		uint8_t rgbh_output[4 * 4 * 6];
 | 
			
		||||
	const uint64_t aligned_width = (width + 3) & ~0x03;
 | 
			
		||||
	const uint64_t aligned_height = (height + 3) & ~0x03;
 | 
			
		||||
 | 
			
		||||
	if (width != aligned_width || height != aligned_height) {
 | 
			
		||||
		// Decompress the mipmap in a 'safe' way, which involves starting from the top left.
 | 
			
		||||
		// For each block row, decompress all of the 'full' blocks, then the misaligned one (on the x axis).
 | 
			
		||||
		// Then, decompress the final misaligned block row at the bottom.
 | 
			
		||||
		// Finally, decompress the misaligned block at the bottom right.
 | 
			
		||||
		switch (format) {
 | 
			
		||||
			case BCdec_BC1: {
 | 
			
		||||
				DECOMPRESS_LOOP_SAFE(bcdec_bc1, BCDEC_BC1_BLOCK_SIZE, 4, 4, rgba8_output)
 | 
			
		||||
				_safe_decompress_mipmap<bcdec_bc1, BCDEC_BC1_BLOCK_SIZE, 4, 1>(width, height, src_blocks, dec_blocks);
 | 
			
		||||
			} break;
 | 
			
		||||
			case BCdec_BC2: {
 | 
			
		||||
				DECOMPRESS_LOOP_SAFE(bcdec_bc2, BCDEC_BC2_BLOCK_SIZE, 4, 4, rgba8_output)
 | 
			
		||||
				_safe_decompress_mipmap<bcdec_bc2, BCDEC_BC2_BLOCK_SIZE, 4, 1>(width, height, src_blocks, dec_blocks);
 | 
			
		||||
			} break;
 | 
			
		||||
			case BCdec_BC3: {
 | 
			
		||||
				DECOMPRESS_LOOP_SAFE(bcdec_bc3, BCDEC_BC3_BLOCK_SIZE, 4, 4, rgba8_output)
 | 
			
		||||
				_safe_decompress_mipmap<bcdec_bc3, BCDEC_BC3_BLOCK_SIZE, 4, 1>(width, height, src_blocks, dec_blocks);
 | 
			
		||||
			} break;
 | 
			
		||||
			case BCdec_BC4: {
 | 
			
		||||
				DECOMPRESS_LOOP_SAFE(bcdec_bc4, BCDEC_BC4_BLOCK_SIZE, 1, 1, r8_output)
 | 
			
		||||
				_safe_decompress_mipmap<bcdec_bc4, BCDEC_BC4_BLOCK_SIZE, 1, 1>(width, height, src_blocks, dec_blocks);
 | 
			
		||||
			} break;
 | 
			
		||||
			case BCdec_BC5: {
 | 
			
		||||
				DECOMPRESS_LOOP_SAFE(bcdec_bc5, BCDEC_BC5_BLOCK_SIZE, 2, 2, rg8_output)
 | 
			
		||||
				_safe_decompress_mipmap<bcdec_bc5, BCDEC_BC5_BLOCK_SIZE, 2, 1>(width, height, src_blocks, dec_blocks);
 | 
			
		||||
			} break;
 | 
			
		||||
			case BCdec_BC6U: {
 | 
			
		||||
				DECOMPRESS_LOOP_SAFE(bcdec_bc6h_half_u, BCDEC_BC6H_BLOCK_SIZE, 6, 3, rgbh_output)
 | 
			
		||||
				_safe_decompress_mipmap<bcdec_bc6h_half_u, BCDEC_BC6H_BLOCK_SIZE, 6, 2>(width, height, src_blocks, dec_blocks);
 | 
			
		||||
			} break;
 | 
			
		||||
			case BCdec_BC6S: {
 | 
			
		||||
				DECOMPRESS_LOOP_SAFE(bcdec_bc6h_half_s, BCDEC_BC6H_BLOCK_SIZE, 6, 3, rgbh_output)
 | 
			
		||||
				_safe_decompress_mipmap<bcdec_bc6h_half_s, BCDEC_BC6H_BLOCK_SIZE, 6, 2>(width, height, src_blocks, dec_blocks);
 | 
			
		||||
			} break;
 | 
			
		||||
			case BCdec_BC7: {
 | 
			
		||||
				DECOMPRESS_LOOP_SAFE(bcdec_bc7, BCDEC_BC7_BLOCK_SIZE, 4, 4, rgba8_output)
 | 
			
		||||
				_safe_decompress_mipmap<bcdec_bc7, BCDEC_BC7_BLOCK_SIZE, 4, 1>(width, height, src_blocks, dec_blocks);
 | 
			
		||||
			} break;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
	} else {
 | 
			
		||||
		uint64_t src_pos = 0, dst_pos = 0;
 | 
			
		||||
 | 
			
		||||
		// Just decompress as usual, as fast as possible.
 | 
			
		||||
		switch (format) {
 | 
			
		||||
			case BCdec_BC1: {
 | 
			
		||||
				DECOMPRESS_LOOP(bcdec_bc1, BCDEC_BC1_BLOCK_SIZE, 4, 4)
 | 
			
		||||
				_decompress_mipmap<bcdec_bc1, BCDEC_BC1_BLOCK_SIZE, 4, 1>(width, height, src_blocks, dec_blocks);
 | 
			
		||||
			} break;
 | 
			
		||||
			case BCdec_BC2: {
 | 
			
		||||
				DECOMPRESS_LOOP(bcdec_bc2, BCDEC_BC2_BLOCK_SIZE, 4, 4)
 | 
			
		||||
				_decompress_mipmap<bcdec_bc2, BCDEC_BC2_BLOCK_SIZE, 4, 1>(width, height, src_blocks, dec_blocks);
 | 
			
		||||
			} break;
 | 
			
		||||
			case BCdec_BC3: {
 | 
			
		||||
				DECOMPRESS_LOOP(bcdec_bc3, BCDEC_BC3_BLOCK_SIZE, 4, 4)
 | 
			
		||||
				_decompress_mipmap<bcdec_bc3, BCDEC_BC3_BLOCK_SIZE, 4, 1>(width, height, src_blocks, dec_blocks);
 | 
			
		||||
			} break;
 | 
			
		||||
			case BCdec_BC4: {
 | 
			
		||||
				DECOMPRESS_LOOP(bcdec_bc4, BCDEC_BC4_BLOCK_SIZE, 1, 1)
 | 
			
		||||
				_decompress_mipmap<bcdec_bc4, BCDEC_BC4_BLOCK_SIZE, 1, 1>(width, height, src_blocks, dec_blocks);
 | 
			
		||||
			} break;
 | 
			
		||||
			case BCdec_BC5: {
 | 
			
		||||
				DECOMPRESS_LOOP(bcdec_bc5, BCDEC_BC5_BLOCK_SIZE, 2, 2)
 | 
			
		||||
				_decompress_mipmap<bcdec_bc5, BCDEC_BC5_BLOCK_SIZE, 2, 1>(width, height, src_blocks, dec_blocks);
 | 
			
		||||
			} break;
 | 
			
		||||
			case BCdec_BC6U: {
 | 
			
		||||
				DECOMPRESS_LOOP(bcdec_bc6h_half_u, BCDEC_BC6H_BLOCK_SIZE, 6, 3)
 | 
			
		||||
				_decompress_mipmap<bcdec_bc6h_half_u, BCDEC_BC6H_BLOCK_SIZE, 6, 2>(width, height, src_blocks, dec_blocks);
 | 
			
		||||
			} break;
 | 
			
		||||
			case BCdec_BC6S: {
 | 
			
		||||
				DECOMPRESS_LOOP(bcdec_bc6h_half_s, BCDEC_BC6H_BLOCK_SIZE, 6, 3)
 | 
			
		||||
				_decompress_mipmap<bcdec_bc6h_half_s, BCDEC_BC6H_BLOCK_SIZE, 6, 2>(width, height, src_blocks, dec_blocks);
 | 
			
		||||
			} break;
 | 
			
		||||
			case BCdec_BC7: {
 | 
			
		||||
				DECOMPRESS_LOOP(bcdec_bc7, BCDEC_BC7_BLOCK_SIZE, 4, 4)
 | 
			
		||||
				_decompress_mipmap<bcdec_bc7, BCDEC_BC7_BLOCK_SIZE, 4, 1>(width, height, src_blocks, dec_blocks);
 | 
			
		||||
			} break;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
#undef DECOMPRESS_LOOP
 | 
			
		||||
#undef DECOMPRESS_LOOP_SAFE
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void image_decompress_bcdec(Image *p_image) {
 | 
			
		||||
@@ -156,21 +220,6 @@ void image_decompress_bcdec(Image *p_image) {
 | 
			
		||||
	int width = p_image->get_width();
 | 
			
		||||
	int height = p_image->get_height();
 | 
			
		||||
 | 
			
		||||
	// Compressed images' dimensions should be padded to the upper multiple of 4.
 | 
			
		||||
	// If they aren't, they need to be realigned (the actual data is correctly padded though).
 | 
			
		||||
	const bool need_width_realign = width % 4 != 0;
 | 
			
		||||
	const bool need_height_realign = height % 4 != 0;
 | 
			
		||||
 | 
			
		||||
	if (need_width_realign || need_height_realign) {
 | 
			
		||||
		int new_width = need_width_realign ? width + (4 - (width % 4)) : width;
 | 
			
		||||
		int new_height = need_height_realign ? height + (4 - (height % 4)) : height;
 | 
			
		||||
 | 
			
		||||
		print_verbose(vformat("Compressed image's dimensions are not multiples of 4 (%dx%d), aligning to (%dx%d)", width, height, new_width, new_height));
 | 
			
		||||
 | 
			
		||||
		width = new_width;
 | 
			
		||||
		height = new_height;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	Image::Format source_format = p_image->get_format();
 | 
			
		||||
	Image::Format target_format = Image::FORMAT_MAX;
 | 
			
		||||
 | 
			
		||||
@@ -237,8 +286,8 @@ void image_decompress_bcdec(Image *p_image) {
 | 
			
		||||
	// Decompress mipmaps.
 | 
			
		||||
	for (int i = 0; i <= mm_count; i++) {
 | 
			
		||||
		int mipmap_w = 0, mipmap_h = 0;
 | 
			
		||||
		int64_t src_ofs = Image::get_image_mipmap_offset_and_dimensions(width, height, source_format, i, mipmap_w, mipmap_h);
 | 
			
		||||
		int64_t dst_ofs = Image::get_image_mipmap_offset(width, height, target_format, i);
 | 
			
		||||
		int64_t src_ofs = Image::get_image_mipmap_offset(width, height, source_format, i);
 | 
			
		||||
		int64_t dst_ofs = Image::get_image_mipmap_offset_and_dimensions(width, height, target_format, i, mipmap_w, mipmap_h);
 | 
			
		||||
		decompress_image(bcdec_format, rb + src_ofs, wb + dst_ofs, mipmap_w, mipmap_h);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user