You've already forked godot
mirror of
https://github.com/godotengine/godot.git
synced 2025-11-10 13:00:37 +00:00
Optimize Glow with local memory
This commit is contained in:
@@ -58,12 +58,20 @@ layout(rgba8, set = 3, binding = 0) uniform restrict writeonly image2D dest_buff
|
||||
layout(rgba32f, set = 3, binding = 0) uniform restrict writeonly image2D dest_buffer;
|
||||
#endif
|
||||
|
||||
#ifdef MODE_GAUSSIAN_GLOW
|
||||
shared vec4 local_cache[256];
|
||||
shared vec4 temp_cache[128];
|
||||
#endif
|
||||
|
||||
void main() {
|
||||
// Pixel being shaded
|
||||
ivec2 pos = ivec2(gl_GlobalInvocationID.xy);
|
||||
|
||||
#ifndef MODE_GAUSSIAN_GLOW // Glow needs the extra threads
|
||||
if (any(greaterThanEqual(pos, params.section.zw))) { //too large, do nothing
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef MODE_MIPMAP
|
||||
|
||||
@@ -104,70 +112,69 @@ void main() {
|
||||
|
||||
#ifdef MODE_GAUSSIAN_GLOW
|
||||
|
||||
//Glow uses larger sigma 1 for a more rounded blur effect
|
||||
// First pass copy texture into 16x16 local memory for every 8x8 thread block
|
||||
vec2 quad_center_uv = clamp(vec2(gl_GlobalInvocationID.xy + gl_LocalInvocationID.xy - 3.5) / params.section.zw, vec2(0.5 / params.section.zw), vec2(1.0 - 1.5 / params.section.zw));
|
||||
uint dest_index = gl_LocalInvocationID.x * 2 + gl_LocalInvocationID.y * 2 * 16;
|
||||
|
||||
#define GLOW_ADD(m_ofs, m_mult) \
|
||||
{ \
|
||||
ivec2 ofs = base_pos + m_ofs; \
|
||||
if (all(greaterThanEqual(ofs, section_begin)) && all(lessThan(ofs, section_end))) { \
|
||||
color += texelFetch(source_color, ofs, 0) * m_mult; \
|
||||
} \
|
||||
if (bool(params.flags & FLAG_HIGH_QUALITY_GLOW)) {
|
||||
vec2 quad_offset_uv = clamp((vec2(gl_GlobalInvocationID.xy + gl_LocalInvocationID.xy - 3.0)) / params.section.zw, vec2(0.5 / params.section.zw), vec2(1.0 - 1.5 / params.section.zw));
|
||||
|
||||
local_cache[dest_index] = (textureLod(source_color, quad_center_uv, 0) + textureLod(source_color, quad_offset_uv, 0)) * 0.5;
|
||||
local_cache[dest_index + 1] = (textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.z, 0.0), 0) + textureLod(source_color, quad_offset_uv + vec2(1.0 / params.section.z, 0.0), 0)) * 0.5;
|
||||
local_cache[dest_index + 16] = (textureLod(source_color, quad_center_uv + vec2(0.0, 1.0 / params.section.w), 0) + textureLod(source_color, quad_offset_uv + vec2(0.0, 1.0 / params.section.w), 0)) * 0.5;
|
||||
local_cache[dest_index + 16 + 1] = (textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.zw), 0) + textureLod(source_color, quad_offset_uv + vec2(1.0 / params.section.zw), 0)) * 0.5;
|
||||
} else {
|
||||
local_cache[dest_index] = textureLod(source_color, quad_center_uv, 0);
|
||||
local_cache[dest_index + 1] = textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.z, 0.0), 0);
|
||||
local_cache[dest_index + 16] = textureLod(source_color, quad_center_uv + vec2(0.0, 1.0 / params.section.w), 0);
|
||||
local_cache[dest_index + 16 + 1] = textureLod(source_color, quad_center_uv + vec2(1.0 / params.section.zw), 0);
|
||||
}
|
||||
|
||||
memoryBarrierShared();
|
||||
barrier();
|
||||
|
||||
// Horizontal pass. Needs to copy into 8x16 chunk of local memory so vertical pass has full resolution
|
||||
uint read_index = gl_LocalInvocationID.x + gl_LocalInvocationID.y * 32 + 4;
|
||||
vec4 color_top = vec4(0.0);
|
||||
color_top += local_cache[read_index] * 0.174938;
|
||||
color_top += local_cache[read_index + 1] * 0.165569;
|
||||
color_top += local_cache[read_index + 2] * 0.140367;
|
||||
color_top += local_cache[read_index + 3] * 0.106595;
|
||||
color_top += local_cache[read_index - 1] * 0.165569;
|
||||
color_top += local_cache[read_index - 2] * 0.140367;
|
||||
color_top += local_cache[read_index - 3] * 0.106595;
|
||||
|
||||
vec4 color_bottom = vec4(0.0);
|
||||
color_bottom += local_cache[read_index + 16] * 0.174938;
|
||||
color_bottom += local_cache[read_index + 1 + 16] * 0.165569;
|
||||
color_bottom += local_cache[read_index + 2 + 16] * 0.140367;
|
||||
color_bottom += local_cache[read_index + 3 + 16] * 0.106595;
|
||||
color_bottom += local_cache[read_index - 1 + 16] * 0.165569;
|
||||
color_bottom += local_cache[read_index - 2 + 16] * 0.140367;
|
||||
color_bottom += local_cache[read_index - 3 + 16] * 0.106595;
|
||||
|
||||
// rotate samples to take advantage of cache coherency
|
||||
uint write_index = gl_LocalInvocationID.y * 2 + gl_LocalInvocationID.x * 16;
|
||||
|
||||
temp_cache[write_index] = color_top;
|
||||
temp_cache[write_index + 1] = color_bottom;
|
||||
|
||||
memoryBarrierShared();
|
||||
barrier();
|
||||
|
||||
// Vertical pass
|
||||
uint index = gl_LocalInvocationID.y + gl_LocalInvocationID.x * 16 + 4;
|
||||
vec4 color = vec4(0.0);
|
||||
|
||||
if (bool(params.flags & FLAG_HORIZONTAL)) {
|
||||
ivec2 base_pos = ((pos + params.section.xy) << 1) + ivec2(1);
|
||||
ivec2 section_begin = params.section.xy << 1;
|
||||
ivec2 section_end = section_begin + (params.section.zw << 1);
|
||||
color += temp_cache[index] * 0.174938;
|
||||
color += temp_cache[index + 1] * 0.165569;
|
||||
color += temp_cache[index + 2] * 0.140367;
|
||||
color += temp_cache[index + 3] * 0.106595;
|
||||
color += temp_cache[index - 1] * 0.165569;
|
||||
color += temp_cache[index - 2] * 0.140367;
|
||||
color += temp_cache[index - 3] * 0.106595;
|
||||
|
||||
if (bool(params.flags & FLAG_HIGH_QUALITY_GLOW)) {
|
||||
//Sample from two lines to capture single pixel features
|
||||
GLOW_ADD(ivec2(0, 0), 0.152781);
|
||||
GLOW_ADD(ivec2(1, 0), 0.144599);
|
||||
GLOW_ADD(ivec2(2, 0), 0.122589);
|
||||
GLOW_ADD(ivec2(3, 0), 0.093095);
|
||||
GLOW_ADD(ivec2(4, 0), 0.063327);
|
||||
GLOW_ADD(ivec2(-1, 0), 0.144599);
|
||||
GLOW_ADD(ivec2(-2, 0), 0.122589);
|
||||
GLOW_ADD(ivec2(-3, 0), 0.093095);
|
||||
GLOW_ADD(ivec2(-4, 0), 0.063327);
|
||||
|
||||
GLOW_ADD(ivec2(0, 1), 0.152781);
|
||||
GLOW_ADD(ivec2(1, 1), 0.144599);
|
||||
GLOW_ADD(ivec2(2, 1), 0.122589);
|
||||
GLOW_ADD(ivec2(3, 1), 0.093095);
|
||||
GLOW_ADD(ivec2(4, 1), 0.063327);
|
||||
GLOW_ADD(ivec2(-1, 1), 0.144599);
|
||||
GLOW_ADD(ivec2(-2, 1), 0.122589);
|
||||
GLOW_ADD(ivec2(-3, 1), 0.093095);
|
||||
GLOW_ADD(ivec2(-4, 1), 0.063327);
|
||||
color *= 0.5;
|
||||
} else {
|
||||
GLOW_ADD(ivec2(0, 0), 0.174938);
|
||||
GLOW_ADD(ivec2(1, 0), 0.165569);
|
||||
GLOW_ADD(ivec2(2, 0), 0.140367);
|
||||
GLOW_ADD(ivec2(3, 0), 0.106595);
|
||||
GLOW_ADD(ivec2(-1, 0), 0.165569);
|
||||
GLOW_ADD(ivec2(-2, 0), 0.140367);
|
||||
GLOW_ADD(ivec2(-3, 0), 0.106595);
|
||||
}
|
||||
|
||||
color *= params.glow_strength;
|
||||
} else {
|
||||
ivec2 base_pos = pos + params.section.xy;
|
||||
ivec2 section_begin = params.section.xy;
|
||||
ivec2 section_end = section_begin + params.section.zw;
|
||||
|
||||
GLOW_ADD(ivec2(0, 0), 0.288713);
|
||||
GLOW_ADD(ivec2(0, 1), 0.233062);
|
||||
GLOW_ADD(ivec2(0, 2), 0.122581);
|
||||
GLOW_ADD(ivec2(0, -1), 0.233062);
|
||||
GLOW_ADD(ivec2(0, -2), 0.122581);
|
||||
color *= params.glow_strength;
|
||||
}
|
||||
|
||||
#undef GLOW_ADD
|
||||
color *= params.glow_strength;
|
||||
|
||||
if (bool(params.flags & FLAG_GLOW_FIRST_PASS)) {
|
||||
#ifdef GLOW_USE_AUTO_EXPOSURE
|
||||
|
||||
@@ -37,12 +37,14 @@ layout(push_constant, binding = 1, std430) uniform Params {
|
||||
uvec2 glow_texture_size;
|
||||
|
||||
float glow_intensity;
|
||||
uint glow_level_flags;
|
||||
uint pad3;
|
||||
uint glow_mode;
|
||||
float glow_levels[7];
|
||||
|
||||
float exposure;
|
||||
float white;
|
||||
float auto_exposure_grey;
|
||||
uint pad2;
|
||||
|
||||
vec2 pixel_size;
|
||||
bool use_fxaa;
|
||||
@@ -186,32 +188,32 @@ vec3 apply_tonemapping(vec3 color, float white) { // inputs are LINEAR, always o
|
||||
vec3 gather_glow(sampler2D tex, vec2 uv) { // sample all selected glow levels
|
||||
vec3 glow = vec3(0.0f);
|
||||
|
||||
if (bool(params.glow_level_flags & (1 << 0))) {
|
||||
glow += GLOW_TEXTURE_SAMPLE(tex, uv, 0).rgb;
|
||||
if (params.glow_levels[0] > 0.0001) {
|
||||
glow += GLOW_TEXTURE_SAMPLE(tex, uv, 0).rgb * params.glow_levels[0];
|
||||
}
|
||||
|
||||
if (bool(params.glow_level_flags & (1 << 1))) {
|
||||
glow += GLOW_TEXTURE_SAMPLE(tex, uv, 1).rgb;
|
||||
if (params.glow_levels[1] > 0.0001) {
|
||||
glow += GLOW_TEXTURE_SAMPLE(tex, uv, 1).rgb * params.glow_levels[1];
|
||||
}
|
||||
|
||||
if (bool(params.glow_level_flags & (1 << 2))) {
|
||||
glow += GLOW_TEXTURE_SAMPLE(tex, uv, 2).rgb;
|
||||
if (params.glow_levels[2] > 0.0001) {
|
||||
glow += GLOW_TEXTURE_SAMPLE(tex, uv, 2).rgb * params.glow_levels[2];
|
||||
}
|
||||
|
||||
if (bool(params.glow_level_flags & (1 << 3))) {
|
||||
glow += GLOW_TEXTURE_SAMPLE(tex, uv, 3).rgb;
|
||||
if (params.glow_levels[3] > 0.0001) {
|
||||
glow += GLOW_TEXTURE_SAMPLE(tex, uv, 3).rgb * params.glow_levels[3];
|
||||
}
|
||||
|
||||
if (bool(params.glow_level_flags & (1 << 4))) {
|
||||
glow += GLOW_TEXTURE_SAMPLE(tex, uv, 4).rgb;
|
||||
if (params.glow_levels[4] > 0.0001) {
|
||||
glow += GLOW_TEXTURE_SAMPLE(tex, uv, 4).rgb * params.glow_levels[4];
|
||||
}
|
||||
|
||||
if (bool(params.glow_level_flags & (1 << 5))) {
|
||||
glow += GLOW_TEXTURE_SAMPLE(tex, uv, 5).rgb;
|
||||
if (params.glow_levels[5] > 0.0001) {
|
||||
glow += GLOW_TEXTURE_SAMPLE(tex, uv, 5).rgb * params.glow_levels[5];
|
||||
}
|
||||
|
||||
if (bool(params.glow_level_flags & (1 << 6))) {
|
||||
glow += GLOW_TEXTURE_SAMPLE(tex, uv, 6).rgb;
|
||||
if (params.glow_levels[6] > 0.0001) {
|
||||
glow += GLOW_TEXTURE_SAMPLE(tex, uv, 6).rgb * params.glow_levels[6];
|
||||
}
|
||||
|
||||
return glow;
|
||||
|
||||
Reference in New Issue
Block a user