From 885904ef2f37c057fbea8944704cd20d718c7e8b Mon Sep 17 00:00:00 2001 From: BlueCube3310 <53150244+BlueCube3310@users.noreply.github.com> Date: Thu, 28 Aug 2025 16:16:06 +0200 Subject: [PATCH] Betsy: Convert RGB to RGBA textures on the GPU --- modules/betsy/SCsub | 1 + modules/betsy/image_compress_betsy.cpp | 143 ++++++++++++++++++++----- modules/betsy/image_compress_betsy.h | 10 ++ modules/betsy/rgb_to_rgba.glsl | 124 +++++++++++++++++++++ 4 files changed, 254 insertions(+), 24 deletions(-) create mode 100644 modules/betsy/rgb_to_rgba.glsl diff --git a/modules/betsy/SCsub b/modules/betsy/SCsub index 7bd0b1bb3d8..8f7d86740b0 100644 --- a/modules/betsy/SCsub +++ b/modules/betsy/SCsub @@ -11,6 +11,7 @@ env_betsy.GLSL_HEADER("bc6h.glsl") env_betsy.GLSL_HEADER("bc1.glsl") env_betsy.GLSL_HEADER("bc4.glsl") env_betsy.GLSL_HEADER("alpha_stitch.glsl") +env_betsy.GLSL_HEADER("rgb_to_rgba.glsl") env_betsy.Depends(Glob("*.glsl.gen.h"), ["#glsl_builders.py"]) diff --git a/modules/betsy/image_compress_betsy.cpp b/modules/betsy/image_compress_betsy.cpp index 062ff1e7645..73e57d0dfd3 100644 --- a/modules/betsy/image_compress_betsy.cpp +++ b/modules/betsy/image_compress_betsy.cpp @@ -38,6 +38,7 @@ #include "bc1.glsl.gen.h" #include "bc4.glsl.gen.h" #include "bc6h.glsl.gen.h" +#include "rgb_to_rgba.glsl.gen.h" #include "servers/display/display_server.h" static Mutex betsy_mutex; @@ -220,6 +221,44 @@ void BetsyCompressor::_init() { cached_shaders[BETSY_SHADER_ALPHA_STITCH].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_ALPHA_STITCH].compiled); ERR_FAIL_COND(cached_shaders[BETSY_SHADER_ALPHA_STITCH].pipeline.is_null()); } + + { + Ref rgb_to_rgba_shader; + rgb_to_rgba_shader.instantiate(); + Error err = rgb_to_rgba_shader->parse_versions_from_text(rgb_to_rgba_shader_glsl); + + if (err != OK) { + rgb_to_rgba_shader->print_errors("Betsy RGB to RGBA shader"); + } + + // Float32. + cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_float")); + ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].compiled.is_null()); + + cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].compiled); + ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].pipeline.is_null()); + + // Float16. + cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_half")); + ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].compiled.is_null()); + + cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].compiled); + ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].pipeline.is_null()); + + // Unorm8. + cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_unorm8")); + ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].compiled.is_null()); + + cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].compiled); + ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].pipeline.is_null()); + + // Unorm16. + cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_unorm16")); + ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].compiled.is_null()); + + cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].compiled); + ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].pipeline.is_null()); + } } void BetsyCompressor::init() { @@ -284,7 +323,9 @@ static int get_next_multiple(int n, int m) { return n + (m - (n % m)); } -static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) { +static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format, bool &r_is_rgb) { + r_is_rgb = false; + switch (r_img->get_format()) { case Image::FORMAT_L8: r_img->convert(Image::FORMAT_RGBA8); @@ -305,7 +346,7 @@ static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) { break; case Image::FORMAT_RGB8: - r_img->convert(Image::FORMAT_RGBA8); + r_is_rgb = true; r_format = RD::DATA_FORMAT_R8G8B8A8_UNORM; break; @@ -322,7 +363,7 @@ static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) { break; case Image::FORMAT_RGBH: - r_img->convert(Image::FORMAT_RGBAH); + r_is_rgb = true; r_format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; break; @@ -339,7 +380,7 @@ static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) { break; case Image::FORMAT_RGBF: - r_img->convert(Image::FORMAT_RGBAF); + r_is_rgb = true; r_format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT; break; @@ -360,7 +401,7 @@ static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) { break; case Image::FORMAT_RGB16: - r_img->convert(Image::FORMAT_RGBA16); + r_is_rgb = true; r_format = RD::DATA_FORMAT_R16G16B16A16_UNORM; break; @@ -368,23 +409,6 @@ static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) { r_format = RD::DATA_FORMAT_R16G16B16A16_UNORM; break; - case Image::FORMAT_R16I: - r_format = RD::DATA_FORMAT_R16_UINT; - break; - - case Image::FORMAT_RG16I: - r_format = RD::DATA_FORMAT_R16G16_UINT; - break; - - case Image::FORMAT_RGB16I: - r_img->convert(Image::FORMAT_RGBA16I); - r_format = RD::DATA_FORMAT_R16G16B16A16_UINT; - break; - - case Image::FORMAT_RGBA16I: - r_format = RD::DATA_FORMAT_R16G16B16A16_UINT; - break; - default: { return ERR_UNAVAILABLE; } @@ -445,7 +469,8 @@ Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) { src_texture_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; } - err = get_src_texture_format(r_img, src_texture_format.format); + bool needs_rgb_to_rgba = false; + err = get_src_texture_format(r_img, src_texture_format.format, needs_rgb_to_rgba); if (err != OK) { return err; @@ -536,9 +561,79 @@ Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) { } // Create the textures on the GPU. - RID src_texture = compress_rd->texture_create(src_texture_format, RD::TextureView(), src_images); + RID src_texture; RID dst_texture_primary = compress_rd->texture_create(dst_texture_format, RD::TextureView()); + if (needs_rgb_to_rgba) { + // RGB textures cannot be sampled directly on most hardware, so we do a little trick involving a compute shader + // which takes the input data as an SSBO and converts it directly into an RGBA image. + BetsyShaderType rgb_shader_type = BETSY_SHADER_MAX; + + switch (r_img->get_format()) { + case Image::FORMAT_RGB8: + rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_UNORM8; + break; + case Image::FORMAT_RGBH: + rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_HALF; + break; + case Image::FORMAT_RGBF: + rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_FLOAT; + break; + case Image::FORMAT_RGB16: + rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_UNORM16; + break; + default: + break; + } + + // The source 'RGB' buffer. + RID source_buffer = compress_rd->storage_buffer_create(src_image_ptr[0].size(), src_image_ptr[0].span()); + + RD::TextureFormat rgba_texture_format = src_texture_format; + rgba_texture_format.usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT; + src_texture = compress_rd->texture_create(rgba_texture_format, RD::TextureView()); + + Vector uniforms; + { + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; + u.binding = 0; + u.append_id(source_buffer); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.append_id(src_texture); + uniforms.push_back(u); + } + } + + BetsyShader &rgb_shader = cached_shaders[rgb_shader_type]; + + RID uniform_set = compress_rd->uniform_set_create(uniforms, rgb_shader.compiled, 0); + RD::ComputeListID compute_list = compress_rd->compute_list_begin(); + + compress_rd->compute_list_bind_compute_pipeline(compute_list, rgb_shader.pipeline); + compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0); + + // Prepare the push constant with the mipmap's resolution. + RGBToRGBAPushConstant push_constant; + push_constant.width = width; + push_constant.height = height; + + compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RGBToRGBAPushConstant)); + compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 8) / 8, get_next_multiple(height, 8) / 8, 1); + + compress_rd->compute_list_end(); + + compress_rd->free_rid(source_buffer); + } else { + src_texture = compress_rd->texture_create(src_texture_format, RD::TextureView(), src_images); + } + { Vector uniforms; { diff --git a/modules/betsy/image_compress_betsy.h b/modules/betsy/image_compress_betsy.h index afe9c26657f..15b58894970 100644 --- a/modules/betsy/image_compress_betsy.h +++ b/modules/betsy/image_compress_betsy.h @@ -66,6 +66,10 @@ enum BetsyShaderType { BETSY_SHADER_BC6_SIGNED, BETSY_SHADER_BC6_UNSIGNED, BETSY_SHADER_ALPHA_STITCH, + BETSY_SHADER_RGB_TO_RGBA_FLOAT, + BETSY_SHADER_RGB_TO_RGBA_HALF, + BETSY_SHADER_RGB_TO_RGBA_UNORM8, + BETSY_SHADER_RGB_TO_RGBA_UNORM16, BETSY_SHADER_MAX, }; @@ -85,6 +89,12 @@ struct BC4PushConstant { uint32_t padding[3] = { 0 }; }; +struct RGBToRGBAPushConstant { + uint32_t width; + uint32_t height; + uint32_t padding[2]; +}; + void free_device(); Error _betsy_compress_bptc(Image *r_img, Image::UsedChannels p_channels); diff --git a/modules/betsy/rgb_to_rgba.glsl b/modules/betsy/rgb_to_rgba.glsl new file mode 100644 index 00000000000..e2d2ff135fd --- /dev/null +++ b/modules/betsy/rgb_to_rgba.glsl @@ -0,0 +1,124 @@ +#[versions] + +version_float = "#define VER_FLOAT"; +version_half = "#define VER_HALF"; +version_unorm8 = "#define VER_UINT8"; +version_unorm16 = "#define VER_UINT16"; + +#[compute] +#version 450 + +#VERSION_DEFINES + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(std430, binding = 0) buffer Source { +#if defined(VER_FLOAT) + float data[]; +#else + uint data[]; +#endif +} +source; + +#if defined(VER_FLOAT) +layout(binding = 1, rgba32f) uniform writeonly image2D dest; +#elif defined(VER_HALF) +layout(binding = 1, rgba16f) uniform writeonly image2D dest; +#elif defined(VER_UINT8) +layout(binding = 1, rgba8) uniform writeonly image2D dest; +#elif defined(VER_UINT16) +layout(binding = 1, rgba16) uniform writeonly image2D dest; +#endif + +layout(push_constant, std430) uniform Params { + uint p_width; + uint p_height; + uint p_padding[2]; +} +params; + +void main() { + // gl_GlobalInvocationID is equivalent to the current texel coordinates. + if (gl_GlobalInvocationID.x >= params.p_width || gl_GlobalInvocationID.y >= params.p_height) { + return; + } + + // The index of a texel in the source buffer, NOT an index of source.data[] + const int texel_index = int(gl_GlobalInvocationID.y * params.p_width + gl_GlobalInvocationID.x); + +#if defined(VER_FLOAT) + // Since 32-bit floats are aligned with RGBF texel data, just retrieve the values from the array. + // Multiply by 3 to align with the components. + + int data_index = texel_index * 3; + vec3 color_rgb = vec3(source.data[data_index], source.data[data_index + 1], source.data[data_index + 2]); + +#elif defined(VER_UINT8) + // RGB8 texel data and 32-bit uints are not aligned, so we have to use a bit of magic. + // The source texel can be in either of 4 alignment 'states': + // 0 - [ XYZ_-____ ] + // 1 - [ _YZW-____ ] + // 2 - [ __ZW-X___ ] + // 3 - [ ___W-XY__ ] + // The texel index additionally needs to be decremented after every 'cycle' in order to properly fit into the source array. + + vec3 color_rgb = vec3(0.0); + int data_index = texel_index - (texel_index / 4); + + switch ((texel_index * 3) % 4) { + case 0: + color_rgb = unpackUnorm4x8(source.data[data_index]).xyz; + break; + case 1: + color_rgb = unpackUnorm4x8(source.data[data_index - 1]).yzw; + break; + case 2: + color_rgb.rg = unpackUnorm4x8(source.data[data_index - 1]).zw; + color_rgb.b = unpackUnorm4x8(source.data[data_index]).x; + break; + case 3: + color_rgb.r = unpackUnorm4x8(source.data[data_index - 1]).w; + color_rgb.gb = unpackUnorm4x8(source.data[data_index]).xy; + break; + default: + break; + } + +#else + // In a similar vein to RGB8, the RGBH/RGB16 source texel can be in either of 2 alignment 'states': + // 0 - [ XY-X_ ] + // 1 - [ _Y-XY ] + // The texel index has to be incremented this time, as the size of a texel (6 bytes) is greater than that of a 32-bit uint (4 bytes). + + vec3 color_rgb = vec3(0.0); + int data_index = texel_index + (texel_index / 2); + + switch ((texel_index * 3) % 2) { +#if defined(VER_HALF) + case 0: + color_rgb.xy = unpackHalf2x16(source.data[data_index]); + color_rgb.z = unpackHalf2x16(source.data[data_index + 1]).x; + break; + case 1: + color_rgb.x = unpackHalf2x16(source.data[data_index]).y; + color_rgb.yz = unpackHalf2x16(source.data[data_index + 1]); + break; +#elif defined(VER_UINT16) + case 0: + color_rgb.xy = unpackUnorm2x16(source.data[data_index]); + color_rgb.z = unpackUnorm2x16(source.data[data_index + 1]).x; + break; + case 1: + color_rgb.x = unpackUnorm2x16(source.data[data_index]).y; + color_rgb.yz = unpackUnorm2x16(source.data[data_index + 1]); + break; +#endif + default: + break; + } +#endif + + // Store the resulting RGBA color. + imageStore(dest, ivec2(gl_GlobalInvocationID.xy), vec4(color_rgb, 1.0)); +}