1
0
mirror of https://github.com/godotengine/godot.git synced 2025-11-04 12:00:25 +00:00

Optimize vertex shader using mat3x4 to reduce bandwidth, load/store operations and ALUs

This commit is contained in:
clayjohn
2025-06-23 23:06:11 -07:00
parent 9283328fe7
commit 14b60f2264
11 changed files with 211 additions and 138 deletions

View File

@@ -789,19 +789,22 @@ void RenderForwardClustered::_fill_instance_data(RenderListType p_render_list, i
SceneState::InstanceData &instance_data = scene_state.instance_data[p_render_list][i + p_offset];
if (likely(inst->store_transform_cache)) {
RendererRD::MaterialStorage::store_transform(inst->transform, instance_data.transform);
RendererRD::MaterialStorage::store_transform(inst->prev_transform, instance_data.prev_transform);
RendererRD::MaterialStorage::store_transform_transposed_3x4(inst->transform, instance_data.transform);
RendererRD::MaterialStorage::store_transform_transposed_3x4(inst->prev_transform, instance_data.prev_transform);
#ifdef REAL_T_IS_DOUBLE
// Split the origin into two components, the float approximation and the missing precision.
// In the shader we will combine these back together to restore the lost precision.
RendererRD::MaterialStorage::split_double(inst->transform.origin.x, &instance_data.transform[12], &instance_data.transform[3]);
RendererRD::MaterialStorage::split_double(inst->transform.origin.y, &instance_data.transform[13], &instance_data.transform[7]);
RendererRD::MaterialStorage::split_double(inst->transform.origin.z, &instance_data.transform[14], &instance_data.transform[11]);
RendererRD::MaterialStorage::split_double(inst->transform.origin.x, &instance_data.transform[12], &instance_data.model_precision[0]);
RendererRD::MaterialStorage::split_double(inst->transform.origin.y, &instance_data.transform[13], &instance_data.model_precision[1]);
RendererRD::MaterialStorage::split_double(inst->transform.origin.z, &instance_data.transform[14], &instance_data.model_precision[2]);
RendererRD::MaterialStorage::split_double(inst->prev_transform.origin.x, &instance_data.prev_transform[12], &instance_data.prev_model_precision[0]);
RendererRD::MaterialStorage::split_double(inst->prev_transform.origin.y, &instance_data.prev_transform[13], &instance_data.prev_model_precision[1]);
RendererRD::MaterialStorage::split_double(inst->prev_transform.origin.z, &instance_data.prev_transform[14], &instance_data.prev_model_precision[2]);
#endif
} else {
RendererRD::MaterialStorage::store_transform(Transform3D(), instance_data.transform);
RendererRD::MaterialStorage::store_transform(Transform3D(), instance_data.prev_transform);
RendererRD::MaterialStorage::store_transform_transposed_3x4(Transform3D(), instance_data.transform);
RendererRD::MaterialStorage::store_transform_transposed_3x4(Transform3D(), instance_data.prev_transform);
}
instance_data.flags = inst->flags_cache;

View File

@@ -322,16 +322,20 @@ private:
};
struct InstanceData {
float transform[16];
float prev_transform[16];
float transform[12];
float compressed_aabb_position[4];
float compressed_aabb_size[4];
float uv_scale[4];
uint32_t flags;
uint32_t instance_uniforms_ofs; //base offset in global buffer for instance variables
uint32_t gi_offset; //GI information when using lightmapping (VCT or lightmap index)
uint32_t layer_mask;
float prev_transform[12];
float lightmap_uv_scale[4];
float compressed_aabb_position[4];
float compressed_aabb_size[4];
float uv_scale[4];
#ifdef REAL_T_IS_DOUBLE
float model_precision[4];
float prev_model_precision[4];
#endif
// These setters allow us to copy the data over with operation when using floats.
inline void set_lightmap_uv_scale(const Rect2 &p_rect) {