Use best fit normals for storing screen space normals

2025-11-13 13:31:48 +00:00 · 2023-12-18 15:35:26 -07:00
parent bf8dd73e9d
commit 43cf21cb71
19 changed files with 207 additions and 41 deletions
--- a/servers/rendering/renderer_rd/shaders/effects/copy_to_fb.glsl
+++ b/servers/rendering/renderer_rd/shaders/effects/copy_to_fb.glsl
@@ -20,6 +20,7 @@
 #define FLAG_SRGB (1 << 4)
 #define FLAG_ALPHA_TO_ONE (1 << 5)
 #define FLAG_LINEAR (1 << 6)
+#define FLAG_NORMAL (1 << 7)

 #ifdef MULTIVIEW
 layout(location = 0) out vec3 uv_interp;
@@ -77,6 +78,7 @@ void main() {
 #define FLAG_SRGB (1 << 4)
 #define FLAG_ALPHA_TO_ONE (1 << 5)
 #define FLAG_LINEAR (1 << 6)
+#define FLAG_NORMAL (1 << 7)

 layout(push_constant, std430) uniform Params {
 	vec4 section;
@@ -192,6 +194,9 @@ void main() {
 	if (bool(params.flags & FLAG_LINEAR)) {
 		color.rgb = srgb_to_linear(color.rgb);
 	}
+	if (bool(params.flags & FLAG_NORMAL)) {
+		color.rgb = normalize(color.rgb * 2.0 - 1.0) * 0.5 + 0.5;
+	}

 	frag_color = color / params.luminance_multiplier;
 #endif // MODE_SET_COLOR
--- a/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection.glsl
+++ b/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection.glsl
@@ -65,8 +65,12 @@ void main() {
 	vec3 vertex = reconstructCSPosition(uv * vec2(params.screen_size), base_depth);

 	vec4 normal_roughness = imageLoad(source_normal_roughness, ssC);
-	vec3 normal = normal_roughness.xyz * 2.0 - 1.0;
+	vec3 normal = normalize(normal_roughness.xyz * 2.0 - 1.0);
 	float roughness = normal_roughness.w;
+	if (roughness > 0.5) {
+		roughness = 1.0 - roughness;
+	}
+	roughness /= (127.0 / 255.0);

 	// The roughness cutoff of 0.6 is chosen to match the roughness fadeout from GH-69828.
 	if (roughness > 0.6) {
--- a/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_scale.glsl
+++ b/servers/rendering/renderer_rd/shaders/effects/screen_space_reflection_scale.glsl
@@ -59,8 +59,13 @@ void main() {
 			color += texelFetch(source_ssr, ofs, 0);
 			float d = texelFetch(source_depth, ofs, 0).r;
 			vec4 nr = texelFetch(source_normal, ofs, 0);
-			normal.xyz += nr.xyz * 2.0 - 1.0;
-			normal.w += nr.w;
+			normal.xyz += normalize(nr.xyz * 2.0 - 1.0);
+			float roughness = normal.w;
+			if (roughness > 0.5) {
+				roughness = 1.0 - roughness;
+			}
+			roughness /= (127.0 / 255.0);
+			normal.w += roughness;

 			if (sc_multiview) {
 				// we're doing a full unproject so we need the value as is.
@@ -81,6 +86,7 @@ void main() {
 		depth /= 4.0;
 		normal.xyz = normalize(normal.xyz / 4.0) * 0.5 + 0.5;
 		normal.w /= 4.0;
+		normal.w = normal.w * (127.0 / 255.0);
 	} else {
 		ivec2 ofs = ssC << 1;

--- a/servers/rendering/renderer_rd/shaders/effects/ssao.glsl
+++ b/servers/rendering/renderer_rd/shaders/effects/ssao.glsl
@@ -158,21 +158,16 @@ vec4 calculate_edges(const float p_center_z, const float p_left_z, const float p
 	return clamp((1.3 - edgesLRTB / (p_center_z * 0.040)), 0.0, 1.0);
 }

-vec3 decode_normal(vec3 p_encoded_normal) {
-	vec3 normal = p_encoded_normal * 2.0 - 1.0;
-	return normal;
-}
-
 vec3 load_normal(ivec2 p_pos) {
-	vec3 encoded_normal = imageLoad(source_normal, p_pos).xyz;
-	encoded_normal.z = 1.0 - encoded_normal.z;
-	return decode_normal(encoded_normal);
+	vec3 encoded_normal = normalize(imageLoad(source_normal, p_pos).xyz * 2.0 - 1.0);
+	encoded_normal.z = -encoded_normal.z;
+	return encoded_normal;
 }

 vec3 load_normal(ivec2 p_pos, ivec2 p_offset) {
-	vec3 encoded_normal = imageLoad(source_normal, p_pos + p_offset).xyz;
-	encoded_normal.z = 1.0 - encoded_normal.z;
-	return decode_normal(encoded_normal);
+	vec3 encoded_normal = normalize(imageLoad(source_normal, p_pos + p_offset).xyz * 2.0 - 1.0);
+	encoded_normal.z = -encoded_normal.z;
+	return encoded_normal;
 }

 // all vectors in viewspace
--- a/servers/rendering/renderer_rd/shaders/effects/ssil.glsl
+++ b/servers/rendering/renderer_rd/shaders/effects/ssil.glsl
@@ -159,21 +159,16 @@ vec4 calculate_edges(const float p_center_z, const float p_left_z, const float p
 	return clamp((1.3 - edgesLRTB / (p_center_z * 0.040)), 0.0, 1.0);
 }

-vec3 decode_normal(vec3 p_encoded_normal) {
-	vec3 normal = p_encoded_normal * 2.0 - 1.0;
-	return normal;
-}
-
 vec3 load_normal(ivec2 p_pos) {
-	vec3 encoded_normal = imageLoad(source_normal, p_pos).xyz;
-	encoded_normal.z = 1.0 - encoded_normal.z;
-	return decode_normal(encoded_normal);
+	vec3 encoded_normal = normalize(imageLoad(source_normal, p_pos).xyz * 2.0 - 1.0);
+	encoded_normal.z = -encoded_normal.z;
+	return encoded_normal;
 }

 vec3 load_normal(ivec2 p_pos, ivec2 p_offset) {
-	vec3 encoded_normal = imageLoad(source_normal, p_pos + p_offset).xyz;
-	encoded_normal.z = 1.0 - encoded_normal.z;
-	return decode_normal(encoded_normal);
+	vec3 encoded_normal = normalize(imageLoad(source_normal, p_pos + p_offset).xyz * 2.0 - 1.0);
+	encoded_normal.z = -encoded_normal.z;
+	return encoded_normal;
 }

 // all vectors in viewspace
--- a/servers/rendering/renderer_rd/shaders/environment/gi.glsl
+++ b/servers/rendering/renderer_rd/shaders/environment/gi.glsl
@@ -618,6 +618,11 @@ void process_gi(ivec2 pos, vec3 vertex, inout vec4 ambient_light, inout vec4 ref
 	if (normal.length() > 0.5) {
 		//valid normal, can do GI
 		float roughness = normal_roughness.w;
+		bool dynamic_object = roughness > 0.5;
+		if (dynamic_object) {
+			roughness = 1.0 - roughness;
+		}
+		roughness /= (127.0 / 255.0);
 		vec3 view = -normalize(mat3(scene_data.cam_transform) * (vertex - scene_data.eye_offset[gl_GlobalInvocationID.z].xyz));
 		vertex = mat3(scene_data.cam_transform) * vertex;
 		normal = normalize(mat3(scene_data.cam_transform) * normal);
--- a/servers/rendering/renderer_rd/shaders/environment/voxel_gi.glsl
+++ b/servers/rendering/renderer_rd/shaders/environment/voxel_gi.glsl
@@ -492,7 +492,7 @@ void main() {

 		ivec3 pos = params.x_dir * (params.rect_pos.x + pos_xy.x) + params.y_dir * (params.rect_pos.y + pos_xy.y) + abs(params.z_dir) * int(z);

-		vec3 normal = imageLoad(source_normal, uv_xy).xyz * 2.0 - 1.0;
+		vec3 normal = normalize(imageLoad(source_normal, uv_xy).xyz * 2.0 - 1.0);
 		normal = vec3(params.x_dir) * normal.x * mix(1.0, -1.0, params.flip_x) + vec3(params.y_dir) * normal.y * mix(1.0, -1.0, params.flip_y) - vec3(params.z_dir) * normal.z;

 		vec4 albedo = imageLoad(source_albedo, uv_xy);
--- a/servers/rendering/renderer_rd/shaders/forward_clustered/best_fit_normal.glsl
+++ b/servers/rendering/renderer_rd/shaders/forward_clustered/best_fit_normal.glsl
@@ -0,0 +1,43 @@
+#[compute]
+
+#version 450
+
+#VERSION_DEFINES
+
+layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+
+layout(r8, set = 0, binding = 0) uniform restrict writeonly image2D current_image;
+
+// This shader is used to generate a "best fit normal texture" as described by:
+// https://advances.realtimerendering.com/s2010/Kaplanyan-CryEngine3(SIGGRAPH%202010%20Advanced%20RealTime%20Rendering%20Course).pdf
+// This texture tells you what length of normal can be used to store a unit vector
+// with the lest amount of error.
+
+vec3 quantize(vec3 c) {
+	return round(clamp(c * 0.5 + 0.5, 0.0, 1.0) * 255.0) * (1.0 / 255.0) * 2.0 - 1.0;
+}
+
+float find_minimum_error(vec3 normal) {
+	float min_error = 100000.0;
+	float t_best = 0.0;
+	for (float nstep = 1.5; nstep < 127.5; ++nstep) {
+		float t = nstep / 127.5;
+		vec3 vp = normal * t;
+		vec3 quantizedp = quantize(vp);
+		vec3 vdiff = (quantizedp - vp) / t;
+		float error = max(abs(vdiff.x), max(abs(vdiff.y), abs(vdiff.z)));
+		if (error < min_error) {
+			min_error = error;
+			t_best = t;
+		}
+	}
+	return t_best;
+}
+
+void main() {
+	vec2 uv = vec2(gl_GlobalInvocationID.xy) * vec2(1.0 / 1024.0) + vec2(0.5 / 1024.0);
+	uv.y *= uv.x;
+
+	vec3 dir = vec3(uv.x, uv.y, 1.0);
+	imageStore(current_image, ivec2(gl_GlobalInvocationID.xy), vec4(find_minimum_error(dir), 1.0, 1.0, 1.0));
+}
--- a/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl
+++ b/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl
@@ -868,6 +868,28 @@ uint cluster_get_range_clip_mask(uint i, uint z_min, uint z_max) {

 #endif //!MODE_RENDER DEPTH

+#if defined(MODE_RENDER_NORMAL_ROUGHNESS) || defined(MODE_RENDER_MATERIAL)
+// https://advances.realtimerendering.com/s2010/Kaplanyan-CryEngine3(SIGGRAPH%202010%20Advanced%20RealTime%20Rendering%20Course).pdf
+vec3 encode24(vec3 v) {
+	// Unsigned normal (handles most symmetry)
+	vec3 vNormalUns = abs(v);
+	// Get the major axis for our collapsed cubemap lookup
+	float maxNAbs = max(vNormalUns.z, max(vNormalUns.x, vNormalUns.y));
+	// Get the collapsed cubemap texture coordinates
+	vec2 vTexCoord = vNormalUns.z < maxNAbs ? (vNormalUns.y < maxNAbs ? vNormalUns.yz : vNormalUns.xz) : vNormalUns.xy;
+	vTexCoord /= maxNAbs;
+	vTexCoord = vTexCoord.x < vTexCoord.y ? vTexCoord.yx : vTexCoord.xy;
+	// Stretch:
+	vTexCoord.y /= vTexCoord.x;
+	float fFittingScale = texture(sampler2D(best_fit_normal_texture, SAMPLER_NEAREST_CLAMP), vTexCoord).r;
+	// Make vector touch unit cube
+	vec3 result = v / maxNAbs;
+	// scale the normal to get the best fit
+	result *= fFittingScale;
+	return result;
+}
+#endif // MODE_RENDER_NORMAL_ROUGHNESS
+
 void fragment_shader(in SceneData scene_data) {
 	uint instance_index = instance_index_interp;

@@ -1519,18 +1541,18 @@ void fragment_shader(in SceneData scene_data) {
 			vec2 base_coord = screen_uv;
 			vec2 closest_coord = base_coord;
 #ifdef USE_MULTIVIEW
-			float closest_ang = dot(normal, textureLod(sampler2DArray(normal_roughness_buffer, SAMPLER_LINEAR_CLAMP), vec3(base_coord, ViewIndex), 0.0).xyz * 2.0 - 1.0);
+			float closest_ang = dot(normal, normalize(textureLod(sampler2DArray(normal_roughness_buffer, SAMPLER_LINEAR_CLAMP), vec3(base_coord, ViewIndex), 0.0).xyz * 2.0 - 1.0));
 #else // USE_MULTIVIEW
-			float closest_ang = dot(normal, textureLod(sampler2D(normal_roughness_buffer, SAMPLER_LINEAR_CLAMP), base_coord, 0.0).xyz * 2.0 - 1.0);
+			float closest_ang = dot(normal, normalize(textureLod(sampler2D(normal_roughness_buffer, SAMPLER_LINEAR_CLAMP), base_coord, 0.0).xyz * 2.0 - 1.0));
 #endif // USE_MULTIVIEW

 			for (int i = 0; i < 4; i++) {
 				const vec2 neighbors[4] = vec2[](vec2(-1, 0), vec2(1, 0), vec2(0, -1), vec2(0, 1));
 				vec2 neighbour_coord = base_coord + neighbors[i] * scene_data.screen_pixel_size;
 #ifdef USE_MULTIVIEW
-				float neighbour_ang = dot(normal, textureLod(sampler2DArray(normal_roughness_buffer, SAMPLER_LINEAR_CLAMP), vec3(neighbour_coord, ViewIndex), 0.0).xyz * 2.0 - 1.0);
+				float neighbour_ang = dot(normal, normalize(textureLod(sampler2DArray(normal_roughness_buffer, SAMPLER_LINEAR_CLAMP), vec3(neighbour_coord, ViewIndex), 0.0).xyz * 2.0 - 1.0));
 #else // USE_MULTIVIEW
-				float neighbour_ang = dot(normal, textureLod(sampler2D(normal_roughness_buffer, SAMPLER_LINEAR_CLAMP), neighbour_coord, 0.0).xyz * 2.0 - 1.0);
+				float neighbour_ang = dot(normal, normalize(textureLod(sampler2D(normal_roughness_buffer, SAMPLER_LINEAR_CLAMP), neighbour_coord, 0.0).xyz * 2.0 - 1.0));
 #endif // USE_MULTIVIEW
 				if (neighbour_ang > closest_ang) {
 					closest_ang = neighbour_ang;
@@ -2302,7 +2324,7 @@ void fragment_shader(in SceneData scene_data) {
 	albedo_output_buffer.rgb = albedo;
 	albedo_output_buffer.a = alpha;

-	normal_output_buffer.rgb = normal * 0.5 + 0.5;
+	normal_output_buffer.rgb = encode24(normal) * 0.5 + 0.5;
 	normal_output_buffer.a = 0.0;
 	depth_output_buffer.r = -vertex.z;

@@ -2316,7 +2338,15 @@ void fragment_shader(in SceneData scene_data) {
 #endif

 #ifdef MODE_RENDER_NORMAL_ROUGHNESS
-	normal_roughness_output_buffer = vec4(normal * 0.5 + 0.5, roughness);
+	normal_roughness_output_buffer = vec4(encode24(normal) * 0.5 + 0.5, roughness);
+
+	// We encode the dynamic static into roughness.
+	// Values over 0.5 are dynamic, under 0.5 are static.
+	normal_roughness_output_buffer.w = normal_roughness_output_buffer.w * (127.0 / 255.0);
+	if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_DYNAMIC)) {
+		normal_roughness_output_buffer.w = 1.0 - normal_roughness_output_buffer.w;
+	}
+	normal_roughness_output_buffer.w = normal_roughness_output_buffer.w;

 #ifdef MODE_RENDER_VOXEL_GI
 	if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_VOXEL_GI)) { // process voxel_gi_instances
--- a/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered_inc.glsl
+++ b/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered_inc.glsl
@@ -48,6 +48,7 @@ draw_call;

 layout(set = 0, binding = 2) uniform sampler shadow_sampler;

+#define INSTANCE_FLAGS_DYNAMIC (1 << 3)
 #define INSTANCE_FLAGS_NON_UNIFORM_SCALE (1 << 4)
 #define INSTANCE_FLAGS_USE_GI_BUFFERS (1 << 5)
 #define INSTANCE_FLAGS_USE_SDFGI (1 << 6)
@@ -163,6 +164,8 @@ sdfgi;

 layout(set = 0, binding = 14) uniform sampler DEFAULT_SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP;

+layout(set = 0, binding = 15) uniform texture2D best_fit_normal_texture;
+
 /* Set 1: Render Pass (changes per render pass) */

 layout(set = 1, binding = 0, std140) uniform SceneDataBlock {
@@ -328,6 +331,15 @@ layout(set = 1, binding = 34) uniform texture2D ssil_buffer;

 #endif

+vec4 normal_roughness_compatibility(vec4 p_normal_roughness) {
+	float roughness = p_normal_roughness.w;
+	if (roughness > 0.5) {
+		roughness = 1.0 - roughness;
+	}
+	roughness /= (127.0 / 255.0);
+	return vec4(normalize(p_normal_roughness.xyz * 2.0 - 1.0) * 0.5 + 0.5, roughness);
+}
+
 /* Set 2 Skeleton & Instancing (can change per item) */

 layout(set = 2, binding = 0, std430) restrict readonly buffer Transforms {
--- a/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile_inc.glsl
+++ b/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile_inc.glsl
@@ -29,6 +29,7 @@ draw_call;

 layout(set = 0, binding = 2) uniform sampler shadow_sampler;

+#define INSTANCE_FLAGS_DYNAMIC (1 << 3)
 #define INSTANCE_FLAGS_NON_UNIFORM_SCALE (1 << 4)
 #define INSTANCE_FLAGS_USE_GI_BUFFERS (1 << 5)
 #define INSTANCE_FLAGS_USE_SDFGI (1 << 6)