1
0
mirror of https://github.com/godotengine/godot.git synced 2025-11-05 12:10:55 +00:00

Add Persistent Buffers

This work is a heavily refactored and rewritten from TheForge's initial
code.

TheForge's original code had too many race conditions and was
fundamentally flawed as it was too easy to incur into those data races
by accident.

However they identified the proper places that needed changes, and the
idea was sound. I used their work as a blueprint to design this work.

This PR implements:

 - Introduction of UMA buffers used by a few buffers
(most notably the ones filled by _fill_instance_data).

Ironically this change seems to positively affect PC more than it does
on Mobile.

Updates D3D12 Memory Allocator to get GPU_UPLOAD heap support.

Metal implementation by Stuart Carnie.

Co-authored-by: Stuart Carnie <stuart.carnie@gmail.com>
Co-authored-by: TheForge team
This commit is contained in:
Stuart Carnie
2025-10-18 07:00:58 +11:00
parent 5950fca36c
commit 230adb7511
38 changed files with 2848 additions and 1466 deletions

View File

@@ -31,6 +31,7 @@
#pragma once
#include "core/templates/lru.h"
#include "servers/rendering/multi_uma_buffer.h"
#include "servers/rendering/renderer_canvas_render.h"
#include "servers/rendering/renderer_rd/pipeline_hash_map_rd.h"
#include "servers/rendering/renderer_rd/shaders/canvas.glsl.gen.h"
@@ -495,10 +496,12 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
HashMap<RID, TightLocalVector<RID>> canvas_texture_to_uniform_set;
struct Batch {
// Position in the UBO measured in bytes
/// First instance index into the instance buffer for this batch.
uint32_t start = 0;
/// Number of instances in this batch.
uint32_t instance_count = 0;
uint32_t instance_buffer_index = 0;
/// Resource ID of the instance buffer for this batch.
RID instance_buffer; // UMA
TextureInfo *tex_info;
@@ -528,11 +531,6 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
HashMap<TextureState, TextureInfo, HashMapHasherDefault, HashMapComparatorDefault<TextureState>, PagedAllocator<HashMapElement<TextureState, TextureInfo>>> texture_info_map;
// per-frame buffers
struct DataBuffer {
LocalVector<RID> instance_buffers;
};
struct State {
//state buffer
struct Buffer {
@@ -555,13 +553,17 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
uint32_t flags;
};
DataBuffer canvas_instance_data_buffers[BATCH_DATA_BUFFER_COUNT];
LocalVector<Batch> canvas_instance_batches;
uint32_t current_data_buffer_index = 0;
uint32_t current_instance_buffer_index = 0;
uint32_t current_batch_index = 0;
uint32_t last_instance_index = 0;
InstanceData *instance_data_array = nullptr;
static_assert(std::is_trivially_destructible_v<InstanceData>);
static_assert(std::is_trivially_constructible_v<InstanceData>);
MultiUmaBuffer<1u> instance_buffers = MultiUmaBuffer<1u>("CANVAS_INSTANCE_DATA");
/// A pointer to the current instance buffer retrieved from <c>instance_buffers</c>.
InstanceData *instance_data = nullptr;
/// The index of the next instance to be added to <c>instance_data</c>.
uint32_t instance_data_index = 0;
uint32_t max_instances_per_buffer = 16384;
uint32_t max_instance_buffer_size = 16384 * sizeof(InstanceData);
@@ -619,12 +621,14 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
inline RID _get_pipeline_specialization_or_ubershader(CanvasShaderData *p_shader_data, PipelineKey &r_pipeline_key, PushConstant &r_push_constant, RID p_mesh_instance = RID(), void *p_surface = nullptr, uint32_t p_surface_index = 0, RID *r_vertex_array = nullptr);
void _render_batch_items(RenderTarget p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, bool &r_sdf_used, bool p_to_backbuffer = false, RenderingMethod::RenderInfo *r_render_info = nullptr);
void _record_item_commands(const Item *p_item, RenderTarget p_render_target, const Transform2D &p_base_transform, Item *&r_current_clip, Light *p_lights, uint32_t &r_index, bool &r_batch_broken, bool &r_sdf_used, Batch *&r_current_batch);
void _record_item_commands(const Item *p_item, RenderTarget p_render_target, const Transform2D &p_base_transform, Item *&r_current_clip, Light *p_lights, bool &r_batch_broken, bool &r_sdf_used, Batch *&r_current_batch);
void _render_batch(RD::DrawListID p_draw_list, CanvasShaderData *p_shader_data, RenderingDevice::FramebufferFormatID p_framebuffer_format, Light *p_lights, Batch const *p_batch, RenderingMethod::RenderInfo *r_render_info = nullptr);
void _prepare_batch_texture_info(RID p_texture, TextureState &p_state, TextureInfo *p_info);
InstanceData *new_instance_data(float *p_world, uint32_t *p_lights, uint32_t p_base_flags, uint32_t p_index, uint32_t p_uniforms_ofs, TextureInfo *p_info);
// non-UMA
InstanceData *new_instance_data(const InstanceData &template_instance);
[[nodiscard]] Batch *_new_batch(bool &r_batch_broken);
void _add_to_batch(uint32_t &r_index, bool &r_batch_broken, Batch *&r_current_batch);
void _add_to_batch(bool &r_batch_broken, Batch *&r_current_batch);
void _allocate_instance_buffer();
_FORCE_INLINE_ void _update_transform_2d_to_mat2x4(const Transform2D &p_transform, float *p_mat2x4);