Simplify Volumetric Fog

-Always use temporal reproject, it just loos way better than any other filter. -By always using termporal reproject, the shadowmap reduction can be done away with, massively improving performance. -Disadvantage of temporal reproject is update latency so.. -Made sure a gaussian filter runs in XY after fog, this allows to keep stability and lower latency.
2025-12-06 17:25:19 +00:00 · 2021-02-06 11:51:56 -03:00
parent fa2f7693bb
commit 8faf23b52b
19 changed files with 24 additions and 577 deletions
--- a/servers/rendering/renderer_rd/shaders/SCsub
+++ b/servers/rendering/renderer_rd/shaders/SCsub
@@ -39,7 +39,6 @@ if "RD_GLSL" in env["BUILDERS"]:
    env.RD_GLSL("sdfgi_debug.glsl")
    env.RD_GLSL("sdfgi_debug_probes.glsl")
    env.RD_GLSL("volumetric_fog.glsl")
-    env.RD_GLSL("shadow_reduce.glsl")
    env.RD_GLSL("particles.glsl")
    env.RD_GLSL("particles_copy.glsl")
    env.RD_GLSL("sort.glsl")
--- a/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl
+++ b/servers/rendering/renderer_rd/shaders/shadow_reduce.glsl
@@ -1,163 +0,0 @@
-#[compute]
-
-#version 450
-
-VERSION_DEFINES
-
-#define BLOCK_SIZE 8
-
-#ifdef MODE_REDUCE_SUBGROUP
-
-#extension GL_KHR_shader_subgroup_ballot : enable
-#extension GL_KHR_shader_subgroup_arithmetic : enable
-
-//nvidia friendly, max 32
-layout(local_size_x = 8, local_size_y = 4, local_size_z = 1) in;
-
-#else
-
-layout(local_size_x = BLOCK_SIZE, local_size_y = BLOCK_SIZE, local_size_z = 1) in;
-
-#endif
-
-#ifdef MODE_REDUCE
-
-shared float tmp_data[BLOCK_SIZE * BLOCK_SIZE];
-const uint swizzle_table[BLOCK_SIZE] = uint[](0, 4, 2, 6, 1, 5, 3, 7);
-const uint unswizzle_table[BLOCK_SIZE] = uint[](0, 0, 0, 1, 0, 2, 1, 3);
-
-#endif
-
-#if defined(MODE_REDUCE) || defined(MODE_REDUCE_SUBGROUP)
-layout(set = 0, binding = 0) uniform sampler2D source_depth;
-#else
-layout(r16, set = 0, binding = 0) uniform restrict readonly image2D source_depth;
-#endif
-layout(r16, set = 1, binding = 0) uniform restrict writeonly image2D dst_depth;
-
-layout(push_constant, binding = 1, std430) uniform Params {
-	ivec2 source_size;
-	ivec2 source_offset;
-	uint min_size;
-	uint gaussian_kernel_version;
-	ivec2 filter_dir;
-}
-params;
-
-void main() {
-#ifdef MODE_REDUCE_SUBGROUP
-
-	uvec2 local_pos = gl_LocalInvocationID.xy;
-	ivec2 image_offset = params.source_offset;
-	ivec2 image_pos = image_offset + ivec2(gl_GlobalInvocationID.xy * ivec2(1, 2));
-
-	float depth = texelFetch(source_depth, min(image_pos, params.source_size - ivec2(1)), 0).r;
-	depth += texelFetch(source_depth, min(image_pos + ivec2(0, 1), params.source_size - ivec2(1)), 0).r;
-	depth *= 0.5;
-
-#ifdef MODE_REDUCE_8
-	//fast version, reduce all
-	float depth_average = subgroupAdd(depth) / 32.0;
-	if (local_pos == uvec2(0)) {
-		imageStore(dst_depth, image_pos / 8, vec4(depth_average));
-	}
-#else
-	//bit slower version, reduce by regions
-	uint group_size = (8 / params.min_size);
-	uvec2 group_id = local_pos / (8 / params.min_size);
-
-	uvec4 mask;
-	float depth_average = 0;
-
-	while (true) {
-		uvec2 first = subgroupBroadcastFirst(group_id);
-		mask = subgroupBallot(first == group_id);
-		if (first == group_id) {
-			depth_average = subgroupAdd(depth);
-			break;
-		}
-	}
-
-	depth_average /= float(group_size * group_size);
-
-	if (local_pos == group_id) {
-		imageStore(dst_depth, image_pos / int(group_size), vec4(depth_average));
-	}
-#endif
-
-#endif
-
-#ifdef MODE_REDUCE
-
-	uvec2 pos = gl_LocalInvocationID.xy;
-
-	ivec2 image_offset = params.source_offset;
-	ivec2 image_pos = image_offset + ivec2(gl_GlobalInvocationID.xy);
-	uint dst_t = swizzle_table[pos.y] * BLOCK_SIZE + swizzle_table[pos.x];
-	tmp_data[dst_t] = texelFetch(source_depth, min(image_pos, params.source_size - ivec2(1)), 0).r;
-	ivec2 image_size = params.source_size;
-
-	uint t = pos.y * BLOCK_SIZE + pos.x;
-
-	//neighbours
-	uint size = BLOCK_SIZE;
-
-	do {
-		groupMemoryBarrier();
-		barrier();
-
-		size >>= 1;
-		image_size >>= 1;
-		image_offset >>= 1;
-
-		if (all(lessThan(pos, uvec2(size)))) {
-			uint nx = t + size;
-			uint ny = t + (BLOCK_SIZE * size);
-			uint nxy = ny + size;
-
-			tmp_data[t] += tmp_data[nx];
-			tmp_data[t] += tmp_data[ny];
-			tmp_data[t] += tmp_data[nxy];
-			tmp_data[t] /= 4.0;
-		}
-
-	} while (size > params.min_size);
-
-	if (all(lessThan(pos, uvec2(size)))) {
-		image_pos = ivec2(unswizzle_table[size + pos.x], unswizzle_table[size + pos.y]);
-		image_pos += image_offset + ivec2(gl_WorkGroupID.xy) * int(size);
-
-		image_size = max(ivec2(1), image_size); //in case image size became 0
-
-		if (all(lessThan(image_pos, uvec2(image_size)))) {
-			imageStore(dst_depth, image_pos, vec4(tmp_data[t]));
-		}
-	}
-#endif
-
-#ifdef MODE_FILTER
-
-	ivec2 image_pos = params.source_offset + ivec2(gl_GlobalInvocationID.xy);
-	if (any(greaterThanEqual(image_pos, params.source_size))) {
-		return;
-	}
-
-	ivec2 clamp_min = ivec2(params.source_offset);
-	ivec2 clamp_max = ivec2(params.source_size) - 1;
-
-	//gaussian kernel, size 9, sigma 4
-	const int kernel_size = 9;
-	const float gaussian_kernel[kernel_size * 3] = float[](
-			0.000229, 0.005977, 0.060598, 0.241732, 0.382928, 0.241732, 0.060598, 0.005977, 0.000229,
-			0.028532, 0.067234, 0.124009, 0.179044, 0.20236, 0.179044, 0.124009, 0.067234, 0.028532,
-			0.081812, 0.101701, 0.118804, 0.130417, 0.134535, 0.130417, 0.118804, 0.101701, 0.081812);
-	float accum = 0.0;
-	for (int i = 0; i < kernel_size; i++) {
-		ivec2 ofs = clamp(image_pos + params.filter_dir * (i - kernel_size / 2), clamp_min, clamp_max);
-		accum += imageLoad(source_depth, ofs).r * gaussian_kernel[params.gaussian_kernel_version + i];
-	}
-
-	imageStore(dst_depth, image_pos, vec4(accum));
-
-#endif
-}