From 6f50511a4d41f78fe74e04d43015865e1fab3df4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pa=CC=84vels=20Nadtoc=CC=8Cajevs?= <7645683+bruvzg@users.noreply.github.com> Date: Mon, 24 Mar 2025 23:30:53 +0200 Subject: [PATCH] Force multiple of 4 sizes for CVTT compressor. --- modules/cvtt/image_compress_cvtt.cpp | 70 ++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 15 deletions(-) diff --git a/modules/cvtt/image_compress_cvtt.cpp b/modules/cvtt/image_compress_cvtt.cpp index 2087dde2a1b..c0d7c7f8c09 100644 --- a/modules/cvtt/image_compress_cvtt.cpp +++ b/modules/cvtt/image_compress_cvtt.cpp @@ -46,7 +46,7 @@ struct CVTTCompressionJobParams { }; struct CVTTCompressionRowTask { - const uint8_t *in_mm_bytes = nullptr; + Vector in_mm; uint8_t *out_mm_bytes = nullptr; int y_start = 0; int width = 0; @@ -61,7 +61,7 @@ struct CVTTCompressionJobQueue { }; static void _digest_row_task(const CVTTCompressionJobParams &p_job_params, const CVTTCompressionRowTask &p_row_task) { - const uint8_t *in_bytes = p_row_task.in_mm_bytes; + const uint8_t *in_bytes = p_row_task.in_mm.ptr(); uint8_t *out_bytes = p_row_task.out_mm_bytes; int w = p_row_task.width; int h = p_row_task.height; @@ -151,6 +151,11 @@ void image_compress_cvtt(Image *p_image, Image::UsedChannels p_channels) { int w = p_image->get_width(); int h = p_image->get_height(); + if (w % 4 != 0 || h % 4 != 0) { + w = w <= 2 ? w : (w + 3) & ~3; + h = h <= 2 ? h : (h + 3) & ~3; + } + bool is_ldr = (p_image->get_format() <= Image::FORMAT_RGBA8); bool is_hdr = (p_image->get_format() >= Image::FORMAT_RF) && (p_image->get_format() <= Image::FORMAT_RGBE9995); @@ -180,8 +185,6 @@ void image_compress_cvtt(Image *p_image, Image::UsedChannels p_channels) { p_image->convert(Image::FORMAT_RGBA8); //still uses RGBA to convert } - const uint8_t *rb = p_image->get_data().ptr(); - Vector data; int64_t target_size = Image::get_image_data_size(w, h, target_format, p_image->has_mipmaps()); int mm_count = p_image->has_mipmaps() ? Image::get_image_required_mipmaps(w, h, target_format) : 0; @@ -209,20 +212,59 @@ void image_compress_cvtt(Image *p_image, Image::UsedChannels p_channels) { Vector tasks; for (int i = 0; i <= mm_count; i++) { - int bw = w % 4 != 0 ? w + (4 - w % 4) : w; - int bh = h % 4 != 0 ? h + (4 - h % 4) : h; + Vector in_data; + int width, height; + Image::get_image_mipmap_offset_and_dimensions(w, h, target_format, i, width, height); - int64_t src_ofs = p_image->get_mipmap_offset(i); + int bw = width % 4 != 0 ? width + (4 - width % 4) : width; + int bh = height % 4 != 0 ? height + (4 - height % 4) : height; - const uint8_t *in_bytes = &rb[src_ofs]; + int64_t src_mip_ofs, src_mip_size; + int src_mip_w, src_mip_h; + p_image->get_mipmap_offset_size_and_dimensions(i, src_mip_ofs, src_mip_size, src_mip_w, src_mip_h); + + // Pad textures to nearest block by smearing. + if (width != src_mip_w || height != src_mip_h) { + const uint8_t *src_mip_read = p_image->ptr() + src_mip_ofs; + + // Reserve the buffer for padded image data. + int px_size = Image::get_format_pixel_size(p_image->get_format()); + in_data.resize(width * height * px_size); + uint8_t *ptrw = in_data.ptrw(); + + int x = 0, y = 0; + for (y = 0; y < src_mip_h; y++) { + for (x = 0; x < src_mip_w; x++) { + memcpy(ptrw + (width * y + x) * px_size, src_mip_read + (src_mip_w * y + x) * px_size, px_size); + } + + // First, smear in x. + for (; x < width; x++) { + memcpy(ptrw + (width * y + x) * px_size, ptrw + (width * y + x - 1) * px_size, px_size); + } + } + + // Then, smear in y. + for (; y < height; y++) { + for (x = 0; x < width; x++) { + memcpy(ptrw + (width * y + x) * px_size, ptrw + (width * y + x - width) * px_size, px_size); + } + } + } else { + // Create a buffer filled with the source mip layer data. + in_data.resize(src_mip_size); + memcpy(in_data.ptrw(), p_image->ptr() + src_mip_ofs, src_mip_size); + } + + //const uint8_t *in_bytes = &rb[src_ofs]; uint8_t *out_bytes = &wb[dst_ofs]; - for (int y_start = 0; y_start < h; y_start += 4) { + for (int y_start = 0; y_start < height; y_start += 4) { CVTTCompressionRowTask row_task; - row_task.width = w; - row_task.height = h; + row_task.width = width; + row_task.height = height; row_task.y_start = y_start; - row_task.in_mm_bytes = in_bytes; + row_task.in_mm = in_data; row_task.out_mm_bytes = out_bytes; tasks.push_back(row_task); @@ -231,8 +273,6 @@ void image_compress_cvtt(Image *p_image, Image::UsedChannels p_channels) { } dst_ofs += (MAX(4, bw) * MAX(4, bh)) >> shift; - w = MAX(w / 2, 1); - h = MAX(h / 2, 1); } const CVTTCompressionRowTask *tasks_rb = tasks.ptr(); @@ -242,7 +282,7 @@ void image_compress_cvtt(Image *p_image, Image::UsedChannels p_channels) { WorkerThreadPool::GroupID group_task = WorkerThreadPool::get_singleton()->add_native_group_task(&_digest_job_queue, &job_queue, WorkerThreadPool::get_singleton()->get_thread_count(), -1, true, SNAME("CVTT Compress")); WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task); - p_image->set_data(p_image->get_width(), p_image->get_height(), p_image->has_mipmaps(), target_format, data); + p_image->set_data(w, h, p_image->has_mipmaps(), target_format, data); print_verbose(vformat("CVTT: Encoding took %d ms.", OS::get_singleton()->get_ticks_msec() - start_time)); }