1
0
mirror of https://github.com/godotengine/godot.git synced 2025-11-07 12:30:27 +00:00

Optimize vertex shader using mat3x4 to reduce bandwidth, load/store operations and ALUs

This commit is contained in:
clayjohn
2025-06-23 23:06:11 -07:00
parent 9283328fe7
commit 14b60f2264
11 changed files with 211 additions and 138 deletions

View File

@@ -1935,19 +1935,22 @@ void RenderForwardMobile::_fill_instance_data(RenderListType p_render_list, uint
}
if (inst->store_transform_cache) {
RendererRD::MaterialStorage::store_transform(inst->transform, instance_data.transform);
RendererRD::MaterialStorage::store_transform(inst->prev_transform, instance_data.prev_transform);
RendererRD::MaterialStorage::store_transform_transposed_3x4(inst->transform, instance_data.transform);
RendererRD::MaterialStorage::store_transform_transposed_3x4(inst->prev_transform, instance_data.prev_transform);
#ifdef REAL_T_IS_DOUBLE
// Split the origin into two components, the float approximation and the missing precision.
// In the shader we will combine these back together to restore the lost precision.
RendererRD::MaterialStorage::split_double(inst->transform.origin.x, &instance_data.transform[12], &instance_data.transform[3]);
RendererRD::MaterialStorage::split_double(inst->transform.origin.y, &instance_data.transform[13], &instance_data.transform[7]);
RendererRD::MaterialStorage::split_double(inst->transform.origin.z, &instance_data.transform[14], &instance_data.transform[11]);
RendererRD::MaterialStorage::split_double(inst->transform.origin.x, &instance_data.transform[12], &instance_data.model_precision[0]);
RendererRD::MaterialStorage::split_double(inst->transform.origin.y, &instance_data.transform[13], &instance_data.model_precision[1]);
RendererRD::MaterialStorage::split_double(inst->transform.origin.z, &instance_data.transform[14], &instance_data.model_precision[2]);
RendererRD::MaterialStorage::split_double(inst->prev_transform.origin.x, &instance_data.prev_transform[12], &instance_data.prev_model_precision[0]);
RendererRD::MaterialStorage::split_double(inst->prev_transform.origin.y, &instance_data.prev_transform[13], &instance_data.prev_model_precision[1]);
RendererRD::MaterialStorage::split_double(inst->prev_transform.origin.z, &instance_data.prev_transform[14], &instance_data.prev_model_precision[2]);
#endif
} else {
RendererRD::MaterialStorage::store_transform(Transform3D(), instance_data.transform);
RendererRD::MaterialStorage::store_transform(Transform3D(), instance_data.prev_transform);
RendererRD::MaterialStorage::store_transform_transposed_3x4(Transform3D(), instance_data.transform);
RendererRD::MaterialStorage::store_transform_transposed_3x4(Transform3D(), instance_data.prev_transform);
}
instance_data.flags = inst->flags_cache;

View File

@@ -209,20 +209,24 @@ private:
};
struct InstanceData {
float transform[16];
float prev_transform[16];
float transform[12];
float compressed_aabb_position[4];
float compressed_aabb_size[4];
float uv_scale[4];
uint32_t flags;
uint32_t instance_uniforms_ofs; // Base offset in global buffer for instance variables.
uint32_t gi_offset; // GI information when using lightmapping (VCT or lightmap index).
uint32_t layer_mask;
float prev_transform[12];
float lightmap_uv_scale[4]; // Doubles as uv_offset when needed.
uint32_t reflection_probes[2]; // Packed reflection probes.
uint32_t omni_lights[2]; // Packed omni lights.
uint32_t spot_lights[2]; // Packed spot lights.
uint32_t decals[2]; // Packed spot lights.
float compressed_aabb_position[4];
float compressed_aabb_size[4];
float uv_scale[4];
#ifdef REAL_T_IS_DOUBLE
float model_precision[4];
float prev_model_precision[4];
#endif
// These setters allow us to copy the data over with operation when using floats.
inline void set_lightmap_uv_scale(const Rect2 &p_rect) {