1
0
mirror of https://github.com/godotengine/godot.git synced 2025-11-04 12:00:25 +00:00

Merge pull request #110060 from BlueCube3310/betsy-rgb

Betsy: Convert RGB to RGBA on the GPU for faster compression
This commit is contained in:
Thaddeus Crews
2025-10-21 15:11:09 -05:00
4 changed files with 254 additions and 24 deletions

View File

@@ -11,6 +11,7 @@ env_betsy.GLSL_HEADER("bc6h.glsl")
env_betsy.GLSL_HEADER("bc1.glsl")
env_betsy.GLSL_HEADER("bc4.glsl")
env_betsy.GLSL_HEADER("alpha_stitch.glsl")
env_betsy.GLSL_HEADER("rgb_to_rgba.glsl")
env_betsy.Depends(Glob("*.glsl.gen.h"), ["#glsl_builders.py"])

View File

@@ -38,6 +38,7 @@
#include "bc1.glsl.gen.h"
#include "bc4.glsl.gen.h"
#include "bc6h.glsl.gen.h"
#include "rgb_to_rgba.glsl.gen.h"
#include "servers/display/display_server.h"
static Mutex betsy_mutex;
@@ -220,6 +221,44 @@ void BetsyCompressor::_init() {
cached_shaders[BETSY_SHADER_ALPHA_STITCH].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_ALPHA_STITCH].compiled);
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_ALPHA_STITCH].pipeline.is_null());
}
{
Ref<RDShaderFile> rgb_to_rgba_shader;
rgb_to_rgba_shader.instantiate();
Error err = rgb_to_rgba_shader->parse_versions_from_text(rgb_to_rgba_shader_glsl);
if (err != OK) {
rgb_to_rgba_shader->print_errors("Betsy RGB to RGBA shader");
}
// Float32.
cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_float"));
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].compiled.is_null());
cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].compiled);
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_FLOAT].pipeline.is_null());
// Float16.
cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_half"));
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].compiled.is_null());
cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].compiled);
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_HALF].pipeline.is_null());
// Unorm8.
cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_unorm8"));
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].compiled.is_null());
cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].compiled);
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM8].pipeline.is_null());
// Unorm16.
cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].compiled = compress_rd->shader_create_from_spirv(rgb_to_rgba_shader->get_spirv_stages("version_unorm16"));
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].compiled.is_null());
cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].pipeline = compress_rd->compute_pipeline_create(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].compiled);
ERR_FAIL_COND(cached_shaders[BETSY_SHADER_RGB_TO_RGBA_UNORM16].pipeline.is_null());
}
}
void BetsyCompressor::init() {
@@ -284,7 +323,9 @@ static int get_next_multiple(int n, int m) {
return n + (m - (n % m));
}
static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) {
static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format, bool &r_is_rgb) {
r_is_rgb = false;
switch (r_img->get_format()) {
case Image::FORMAT_L8:
r_img->convert(Image::FORMAT_RGBA8);
@@ -305,7 +346,7 @@ static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) {
break;
case Image::FORMAT_RGB8:
r_img->convert(Image::FORMAT_RGBA8);
r_is_rgb = true;
r_format = RD::DATA_FORMAT_R8G8B8A8_UNORM;
break;
@@ -322,7 +363,7 @@ static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) {
break;
case Image::FORMAT_RGBH:
r_img->convert(Image::FORMAT_RGBAH);
r_is_rgb = true;
r_format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT;
break;
@@ -339,7 +380,7 @@ static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) {
break;
case Image::FORMAT_RGBF:
r_img->convert(Image::FORMAT_RGBAF);
r_is_rgb = true;
r_format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT;
break;
@@ -360,7 +401,7 @@ static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) {
break;
case Image::FORMAT_RGB16:
r_img->convert(Image::FORMAT_RGBA16);
r_is_rgb = true;
r_format = RD::DATA_FORMAT_R16G16B16A16_UNORM;
break;
@@ -368,23 +409,6 @@ static Error get_src_texture_format(Image *r_img, RD::DataFormat &r_format) {
r_format = RD::DATA_FORMAT_R16G16B16A16_UNORM;
break;
case Image::FORMAT_R16I:
r_format = RD::DATA_FORMAT_R16_UINT;
break;
case Image::FORMAT_RG16I:
r_format = RD::DATA_FORMAT_R16G16_UINT;
break;
case Image::FORMAT_RGB16I:
r_img->convert(Image::FORMAT_RGBA16I);
r_format = RD::DATA_FORMAT_R16G16B16A16_UINT;
break;
case Image::FORMAT_RGBA16I:
r_format = RD::DATA_FORMAT_R16G16B16A16_UINT;
break;
default: {
return ERR_UNAVAILABLE;
}
@@ -445,7 +469,8 @@ Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) {
src_texture_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT;
}
err = get_src_texture_format(r_img, src_texture_format.format);
bool needs_rgb_to_rgba = false;
err = get_src_texture_format(r_img, src_texture_format.format, needs_rgb_to_rgba);
if (err != OK) {
return err;
@@ -546,9 +571,79 @@ Error BetsyCompressor::_compress(BetsyFormat p_format, Image *r_img) {
}
// Create the textures on the GPU.
RID src_texture = compress_rd->texture_create(src_texture_format, RD::TextureView(), src_images);
RID src_texture;
RID dst_texture_primary = compress_rd->texture_create(dst_texture_format, RD::TextureView());
if (needs_rgb_to_rgba) {
// RGB textures cannot be sampled directly on most hardware, so we do a little trick involving a compute shader
// which takes the input data as an SSBO and converts it directly into an RGBA image.
BetsyShaderType rgb_shader_type = BETSY_SHADER_MAX;
switch (r_img->get_format()) {
case Image::FORMAT_RGB8:
rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_UNORM8;
break;
case Image::FORMAT_RGBH:
rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_HALF;
break;
case Image::FORMAT_RGBF:
rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_FLOAT;
break;
case Image::FORMAT_RGB16:
rgb_shader_type = BETSY_SHADER_RGB_TO_RGBA_UNORM16;
break;
default:
break;
}
// The source 'RGB' buffer.
RID source_buffer = compress_rd->storage_buffer_create(src_image_ptr[0].size(), src_image_ptr[0].span());
RD::TextureFormat rgba_texture_format = src_texture_format;
rgba_texture_format.usage_bits |= RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT;
src_texture = compress_rd->texture_create(rgba_texture_format, RD::TextureView());
Vector<RD::Uniform> uniforms;
{
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
u.binding = 0;
u.append_id(source_buffer);
uniforms.push_back(u);
}
{
RD::Uniform u;
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
u.binding = 1;
u.append_id(src_texture);
uniforms.push_back(u);
}
}
BetsyShader &rgb_shader = cached_shaders[rgb_shader_type];
RID uniform_set = compress_rd->uniform_set_create(uniforms, rgb_shader.compiled, 0);
RD::ComputeListID compute_list = compress_rd->compute_list_begin();
compress_rd->compute_list_bind_compute_pipeline(compute_list, rgb_shader.pipeline);
compress_rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0);
// Prepare the push constant with the mipmap's resolution.
RGBToRGBAPushConstant push_constant;
push_constant.width = width;
push_constant.height = height;
compress_rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(RGBToRGBAPushConstant));
compress_rd->compute_list_dispatch(compute_list, get_next_multiple(width, 8) / 8, get_next_multiple(height, 8) / 8, 1);
compress_rd->compute_list_end();
compress_rd->free_rid(source_buffer);
} else {
src_texture = compress_rd->texture_create(src_texture_format, RD::TextureView(), src_images);
}
{
Vector<RD::Uniform> uniforms;
{

View File

@@ -66,6 +66,10 @@ enum BetsyShaderType {
BETSY_SHADER_BC6_SIGNED,
BETSY_SHADER_BC6_UNSIGNED,
BETSY_SHADER_ALPHA_STITCH,
BETSY_SHADER_RGB_TO_RGBA_FLOAT,
BETSY_SHADER_RGB_TO_RGBA_HALF,
BETSY_SHADER_RGB_TO_RGBA_UNORM8,
BETSY_SHADER_RGB_TO_RGBA_UNORM16,
BETSY_SHADER_MAX,
};
@@ -85,6 +89,12 @@ struct BC4PushConstant {
uint32_t padding[3] = { 0 };
};
struct RGBToRGBAPushConstant {
uint32_t width;
uint32_t height;
uint32_t padding[2];
};
void free_device();
Error _betsy_compress_bptc(Image *r_img, Image::UsedChannels p_channels);

View File

@@ -0,0 +1,124 @@
#[versions]
version_float = "#define VER_FLOAT";
version_half = "#define VER_HALF";
version_unorm8 = "#define VER_UINT8";
version_unorm16 = "#define VER_UINT16";
#[compute]
#version 450
#VERSION_DEFINES
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(std430, binding = 0) buffer Source {
#if defined(VER_FLOAT)
float data[];
#else
uint data[];
#endif
}
source;
#if defined(VER_FLOAT)
layout(binding = 1, rgba32f) uniform writeonly image2D dest;
#elif defined(VER_HALF)
layout(binding = 1, rgba16f) uniform writeonly image2D dest;
#elif defined(VER_UINT8)
layout(binding = 1, rgba8) uniform writeonly image2D dest;
#elif defined(VER_UINT16)
layout(binding = 1, rgba16) uniform writeonly image2D dest;
#endif
layout(push_constant, std430) uniform Params {
uint p_width;
uint p_height;
uint p_padding[2];
}
params;
void main() {
// gl_GlobalInvocationID is equivalent to the current texel coordinates.
if (gl_GlobalInvocationID.x >= params.p_width || gl_GlobalInvocationID.y >= params.p_height) {
return;
}
// The index of a texel in the source buffer, NOT an index of source.data[]
const int texel_index = int(gl_GlobalInvocationID.y * params.p_width + gl_GlobalInvocationID.x);
#if defined(VER_FLOAT)
// Since 32-bit floats are aligned with RGBF texel data, just retrieve the values from the array.
// Multiply by 3 to align with the components.
int data_index = texel_index * 3;
vec3 color_rgb = vec3(source.data[data_index], source.data[data_index + 1], source.data[data_index + 2]);
#elif defined(VER_UINT8)
// RGB8 texel data and 32-bit uints are not aligned, so we have to use a bit of magic.
// The source texel can be in either of 4 alignment 'states':
// 0 - [ XYZ_-____ ]
// 1 - [ _YZW-____ ]
// 2 - [ __ZW-X___ ]
// 3 - [ ___W-XY__ ]
// The texel index additionally needs to be decremented after every 'cycle' in order to properly fit into the source array.
vec3 color_rgb = vec3(0.0);
int data_index = texel_index - (texel_index / 4);
switch ((texel_index * 3) % 4) {
case 0:
color_rgb = unpackUnorm4x8(source.data[data_index]).xyz;
break;
case 1:
color_rgb = unpackUnorm4x8(source.data[data_index - 1]).yzw;
break;
case 2:
color_rgb.rg = unpackUnorm4x8(source.data[data_index - 1]).zw;
color_rgb.b = unpackUnorm4x8(source.data[data_index]).x;
break;
case 3:
color_rgb.r = unpackUnorm4x8(source.data[data_index - 1]).w;
color_rgb.gb = unpackUnorm4x8(source.data[data_index]).xy;
break;
default:
break;
}
#else
// In a similar vein to RGB8, the RGBH/RGB16 source texel can be in either of 2 alignment 'states':
// 0 - [ XY-X_ ]
// 1 - [ _Y-XY ]
// The texel index has to be incremented this time, as the size of a texel (6 bytes) is greater than that of a 32-bit uint (4 bytes).
vec3 color_rgb = vec3(0.0);
int data_index = texel_index + (texel_index / 2);
switch ((texel_index * 3) % 2) {
#if defined(VER_HALF)
case 0:
color_rgb.xy = unpackHalf2x16(source.data[data_index]);
color_rgb.z = unpackHalf2x16(source.data[data_index + 1]).x;
break;
case 1:
color_rgb.x = unpackHalf2x16(source.data[data_index]).y;
color_rgb.yz = unpackHalf2x16(source.data[data_index + 1]);
break;
#elif defined(VER_UINT16)
case 0:
color_rgb.xy = unpackUnorm2x16(source.data[data_index]);
color_rgb.z = unpackUnorm2x16(source.data[data_index + 1]).x;
break;
case 1:
color_rgb.x = unpackUnorm2x16(source.data[data_index]).y;
color_rgb.yz = unpackUnorm2x16(source.data[data_index + 1]);
break;
#endif
default:
break;
}
#endif
// Store the resulting RGBA color.
imageStore(dest, ivec2(gl_GlobalInvocationID.xy), vec4(color_rgb, 1.0));
}