1
0
mirror of https://github.com/godotengine/godot.git synced 2025-11-04 12:00:25 +00:00

Merge pull request #111183 from stuartcarnie/matias-uma-pc-pr

Add Persistent Buffers utilizing UMA
This commit is contained in:
Thaddeus Crews
2025-10-24 11:23:11 -05:00
38 changed files with 2848 additions and 1466 deletions

View File

@@ -767,26 +767,38 @@ void RenderForwardClustered::_setup_environment(const RenderDataRD *p_render_dat
RD::get_singleton()->buffer_update(scene_state.implementation_uniform_buffers[p_index], 0, sizeof(SceneState::UBO), &scene_state.ubo);
}
void RenderForwardClustered::_update_instance_data_buffer(RenderListType p_render_list) {
if (scene_state.instance_data[p_render_list].size() > 0) {
if (scene_state.instance_buffer[p_render_list] == RID() || scene_state.instance_buffer_size[p_render_list] < scene_state.instance_data[p_render_list].size()) {
if (scene_state.instance_buffer[p_render_list] != RID()) {
RD::get_singleton()->free_rid(scene_state.instance_buffer[p_render_list]);
}
uint32_t new_size = nearest_power_of_2_templated(MAX(uint64_t(INSTANCE_DATA_BUFFER_MIN_SIZE), scene_state.instance_data[p_render_list].size()));
scene_state.instance_buffer[p_render_list] = RD::get_singleton()->storage_buffer_create(new_size * sizeof(SceneState::InstanceData));
scene_state.instance_buffer_size[p_render_list] = new_size;
void RenderForwardClustered::SceneState::grow_instance_buffer(RenderListType p_render_list, uint32_t p_req_element_count, bool p_append) {
if (p_req_element_count > 0) {
if (instance_buffer[p_render_list].get_size(0u) < p_req_element_count * sizeof(SceneState::InstanceData)) {
instance_buffer[p_render_list].uninit();
uint32_t new_size = nearest_power_of_2_templated(MAX(uint64_t(INSTANCE_DATA_BUFFER_MIN_SIZE), p_req_element_count));
instance_buffer[p_render_list].set_size(0u, new_size * sizeof(SceneState::InstanceData), true);
curr_gpu_ptr[p_render_list] = nullptr;
}
const bool must_remap = instance_buffer[p_render_list].prepare_for_map(p_append);
if (must_remap) {
curr_gpu_ptr[p_render_list] = nullptr;
}
RD::get_singleton()->buffer_update(scene_state.instance_buffer[p_render_list], 0, sizeof(SceneState::InstanceData) * scene_state.instance_data[p_render_list].size(), scene_state.instance_data[p_render_list].ptr());
}
}
void RenderForwardClustered::_fill_instance_data(RenderListType p_render_list, int *p_render_info, uint32_t p_offset, int32_t p_max_elements, bool p_update_buffer) {
RenderList *rl = &render_list[p_render_list];
uint32_t element_total = p_max_elements >= 0 ? uint32_t(p_max_elements) : rl->elements.size();
scene_state.instance_data[p_render_list].resize(p_offset + element_total);
rl->element_info.resize(p_offset + element_total);
// If p_offset == 0, grow_instance_buffer resets and increment the buffer.
// If this behavior ever changes, _render_shadow_begin may need to change.
scene_state.grow_instance_buffer(p_render_list, p_offset + element_total, p_offset != 0u);
if (!scene_state.curr_gpu_ptr[p_render_list] && element_total > 0u) {
// The old buffer was replaced for another larger one. We must start copying from scratch.
element_total += p_offset;
p_offset = 0u;
scene_state.curr_gpu_ptr[p_render_list] = reinterpret_cast<SceneState::InstanceData *>(scene_state.instance_buffer[p_render_list].map_raw_for_upload(0u));
}
if (p_render_info) {
p_render_info[RS::VIEWPORT_RENDER_INFO_OBJECTS_IN_FRAME] += element_total;
}
@@ -797,7 +809,7 @@ void RenderForwardClustered::_fill_instance_data(RenderListType p_render_list, i
GeometryInstanceSurfaceDataCache *surface = rl->elements[i + p_offset];
GeometryInstanceForwardClustered *inst = surface->owner;
SceneState::InstanceData &instance_data = scene_state.instance_data[p_render_list][i + p_offset];
SceneState::InstanceData instance_data;
if (likely(inst->store_transform_cache)) {
RendererRD::MaterialStorage::store_transform_transposed_3x4(inst->transform, instance_data.transform);
@@ -836,7 +848,9 @@ void RenderForwardClustered::_fill_instance_data(RenderListType p_render_list, i
instance_data.set_compressed_aabb(surface_aabb);
instance_data.set_uv_scale(uv_scale);
bool cant_repeat = instance_data.flags & INSTANCE_DATA_FLAG_MULTIMESH || inst->mesh_instance.is_valid();
scene_state.curr_gpu_ptr[p_render_list][i + p_offset] = instance_data;
const bool cant_repeat = instance_data.flags & INSTANCE_DATA_FLAG_MULTIMESH || inst->mesh_instance.is_valid();
if (prev_surface != nullptr && !cant_repeat && prev_surface->sort.sort_key1 == surface->sort.sort_key1 && prev_surface->sort.sort_key2 == surface->sort.sort_key2 && inst->mirror == prev_surface->owner->mirror && repeats < RenderElementInfo::MAX_REPEATS) {
//this element is the same as the previous one, count repeats to draw it using instancing
@@ -870,8 +884,8 @@ void RenderForwardClustered::_fill_instance_data(RenderListType p_render_list, i
}
}
if (p_update_buffer) {
_update_instance_data_buffer(p_render_list);
if (p_update_buffer && element_total > 0u) {
RenderingDevice::get_singleton()->buffer_flush(scene_state.instance_buffer[p_render_list]._get(0u));
}
}
@@ -2722,7 +2736,8 @@ void RenderForwardClustered::_render_shadow_begin() {
_update_render_base_uniform_set();
render_list[RENDER_LIST_SECONDARY].clear();
scene_state.instance_data[RENDER_LIST_SECONDARY].clear();
// No need to reset scene_state.curr_gpu_ptr or scene_state.instance_buffer[RENDER_LIST_SECONDARY]
// because _fill_instance_data will do that if it detects p_offset == 0u.
}
void RenderForwardClustered::_render_shadow_append(RID p_framebuffer, const PagedArray<RenderGeometryInstance *> &p_instances, const Projection &p_projection, const Transform3D &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_reverse_cull_face, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, float p_lod_distance_multiplier, float p_screen_mesh_lod_threshold, const Rect2i &p_rect, bool p_flip_y, bool p_clear_region, bool p_begin, bool p_end, RenderingMethod::RenderInfo *p_render_info, const Size2i &p_viewport_size, const Transform3D &p_main_cam_transform) {
@@ -2797,7 +2812,11 @@ void RenderForwardClustered::_render_shadow_append(RID p_framebuffer, const Page
}
void RenderForwardClustered::_render_shadow_process() {
_update_instance_data_buffer(RENDER_LIST_SECONDARY);
RenderingDevice *rd = RenderingDevice::get_singleton();
if (scene_state.instance_buffer[RENDER_LIST_SECONDARY].get_size(0u) > 0u) {
rd->buffer_flush(scene_state.instance_buffer[RENDER_LIST_SECONDARY]._get(0u));
}
//render shadows one after the other, so this can be done un-barriered and the driver can optimize (as well as allow us to run compute at the same time)
for (uint32_t i = 0; i < scene_state.shadow_passes.size(); i++) {
@@ -3258,11 +3277,14 @@ RID RenderForwardClustered::_setup_render_pass_uniform_set(RenderListType p_rend
{
RD::Uniform u;
u.binding = 2;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
RID instance_buffer = scene_state.instance_buffer[p_render_list];
if (instance_buffer == RID()) {
instance_buffer = scene_shader.default_vec4_xform_buffer; // any buffer will do since its not used
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC;
if (scene_state.instance_buffer[p_render_list].get_size(0u) == 0u) {
// Any buffer will do since it's not used, so just create one.
// We can't use scene_shader.default_vec4_xform_buffer because it's not dynamic.
scene_state.instance_buffer[p_render_list].set_size(0u, INSTANCE_DATA_BUFFER_MIN_SIZE * sizeof(SceneState::InstanceData), true);
scene_state.instance_buffer[p_render_list].prepare_for_upload();
}
RID instance_buffer = scene_state.instance_buffer[p_render_list]._get(0u);
u.append_id(instance_buffer);
uniforms.push_back(u);
}
@@ -3624,11 +3646,14 @@ RID RenderForwardClustered::_setup_sdfgi_render_pass_uniform_set(RID p_albedo_te
{
RD::Uniform u;
u.binding = 2;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
RID instance_buffer = scene_state.instance_buffer[RENDER_LIST_SECONDARY];
if (instance_buffer == RID()) {
instance_buffer = scene_shader.default_vec4_xform_buffer; // any buffer will do since its not used
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC;
if (scene_state.instance_buffer[RENDER_LIST_SECONDARY].get_size(0u) == 0u) {
// Any buffer will do since it's not used, so just create one.
// We can't use scene_shader.default_vec4_xform_buffer because it's not dynamic.
scene_state.instance_buffer[RENDER_LIST_SECONDARY].set_size(0u, INSTANCE_DATA_BUFFER_MIN_SIZE * sizeof(SceneState::InstanceData), true);
scene_state.instance_buffer[RENDER_LIST_SECONDARY].prepare_for_upload();
}
RID instance_buffer = scene_state.instance_buffer[RENDER_LIST_SECONDARY]._get(0u);
u.append_id(instance_buffer);
uniforms.push_back(u);
}
@@ -5125,9 +5150,7 @@ RenderForwardClustered::~RenderForwardClustered() {
RD::get_singleton()->free_rid(scene_state.lightmap_buffer);
RD::get_singleton()->free_rid(scene_state.lightmap_capture_buffer);
for (uint32_t i = 0; i < RENDER_LIST_MAX; i++) {
if (scene_state.instance_buffer[i] != RID()) {
RD::get_singleton()->free_rid(scene_state.instance_buffer[i]);
}
scene_state.instance_buffer[i].uninit();
}
memdelete_arr(scene_state.lightmap_captures);
}

View File

@@ -31,6 +31,7 @@
#pragma once
#include "core/templates/paged_allocator.h"
#include "servers/rendering/multi_uma_buffer.h"
#include "servers/rendering/renderer_rd/cluster_builder_rd.h"
#include "servers/rendering/renderer_rd/effects/fsr2.h"
#ifdef METAL_ENABLED
@@ -398,9 +399,8 @@ private:
uint32_t max_lightmaps;
RID lightmap_buffer;
RID instance_buffer[RENDER_LIST_MAX];
uint32_t instance_buffer_size[RENDER_LIST_MAX] = { 0, 0, 0 };
LocalVector<InstanceData> instance_data[RENDER_LIST_MAX];
MultiUmaBuffer<1u> instance_buffer[RENDER_LIST_MAX] = { MultiUmaBuffer<1u>("RENDER_LIST_OPAQUE"), MultiUmaBuffer<1u>("RENDER_LIST_MOTION"), MultiUmaBuffer<1u>("RENDER_LIST_ALPHA"), MultiUmaBuffer<1u>("RENDER_LIST_SECONDARY") };
InstanceData *curr_gpu_ptr[RENDER_LIST_MAX] = {};
LightmapCaptureData *lightmap_captures = nullptr;
uint32_t max_lightmap_captures;
@@ -433,6 +433,7 @@ private:
LocalVector<ShadowPass> shadow_passes;
void grow_instance_buffer(RenderListType p_render_list, uint32_t p_req_element_count, bool p_append);
} scene_state;
static RenderForwardClustered *singleton;
@@ -464,7 +465,6 @@ private:
void _render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderListParameters *p_params, uint32_t p_from_element, uint32_t p_to_element);
void _render_list_with_draw_list(RenderListParameters *p_params, RID p_framebuffer, BitField<RD::DrawFlags> p_draw_flags = RD::DRAW_DEFAULT_ALL, const Vector<Color> &p_clear_color_values = Vector<Color>(), float p_clear_depth_value = 0.0, uint32_t p_clear_stencil_value = 0, const Rect2 &p_region = Rect2());
void _update_instance_data_buffer(RenderListType p_render_list);
void _fill_instance_data(RenderListType p_render_list, int *p_render_info = nullptr, uint32_t p_offset = 0, int32_t p_max_elements = -1, bool p_update_buffer = true);
void _fill_render_list(RenderListType p_render_list, const RenderDataRD *p_render_data, PassMode p_pass_mode, bool p_using_sdfgi = false, bool p_using_opaque_gi = false, bool p_using_motion_pass = false, bool p_append = false);

View File

@@ -667,7 +667,9 @@ void SceneShaderForwardClustered::init(const String p_defines) {
shader_versions.push_back(ShaderRD::VariantDefine(group, version, false));
}
shader.initialize(shader_versions, p_defines);
Vector<uint64_t> dynamic_buffers;
dynamic_buffers.push_back(ShaderRD::DynamicBuffer::encode(RenderForwardClustered::RENDER_PASS_UNIFORM_SET, 2));
shader.initialize(shader_versions, p_defines, Vector<RD::PipelineImmutableSampler>(), dynamic_buffers);
if (RendererCompositorRD::get_singleton()->is_xr_enabled()) {
shader.enable_group(SHADER_GROUP_MULTIVIEW);

View File

@@ -424,13 +424,10 @@ bool RenderForwardMobile::_render_buffers_can_be_storage() {
return false;
}
RID RenderForwardMobile::_setup_render_pass_uniform_set(RenderListType p_render_list, const RenderDataRD *p_render_data, RID p_radiance_texture, const RendererRD::MaterialStorage::Samplers &p_samplers, bool p_use_directional_shadow_atlas, int p_index) {
RID RenderForwardMobile::_setup_render_pass_uniform_set(RenderListType p_render_list, const RenderDataRD *p_render_data, RID p_radiance_texture, const RendererRD::MaterialStorage::Samplers &p_samplers, bool p_use_directional_shadow_atlas, uint32_t p_pass_offset) {
RendererRD::LightStorage *light_storage = RendererRD::LightStorage::get_singleton();
RendererRD::TextureStorage *texture_storage = RendererRD::TextureStorage::get_singleton();
//there should always be enough uniform buffers for render passes, otherwise bugs
ERR_FAIL_INDEX_V(p_index, (int)scene_state.uniform_buffers.size(), RID());
bool is_multiview = false;
Ref<RenderBufferDataForwardMobile> rb_data;
@@ -454,19 +451,26 @@ RID RenderForwardMobile::_setup_render_pass_uniform_set(RenderListType p_render_
{
RD::Uniform u;
u.binding = 0;
u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER;
u.append_id(scene_state.uniform_buffers[p_index]);
u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC;
// Negative on purpose. We've created multiple uniform_buffers by calling prepare_for_upload()
// many times in a row, now we must reference those.
// We use 0u - p_pass_offset instead of -p_pass_offset to make MSVC warnings shut up.
// See the "Tricks" section of MultiUmaBuffer documentation.
u.append_id(scene_state.uniform_buffers._get(uint32_t(0u - p_pass_offset)));
uniforms.push_back(u);
}
{
RD::Uniform u;
u.binding = 1;
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
RID instance_buffer = scene_state.instance_buffer[p_render_list];
if (instance_buffer == RID()) {
instance_buffer = scene_shader.default_vec4_xform_buffer; // Any buffer will do since its not used.
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC;
if (scene_state.instance_buffer[p_render_list].get_size(0u) == 0u) {
// Any buffer will do since it's not used, so just create one.
// We can't use scene_shader.default_vec4_xform_buffer because it's not dynamic.
scene_state.instance_buffer[p_render_list].set_size(0u, INSTANCE_DATA_BUFFER_MIN_SIZE * sizeof(SceneState::InstanceData), true);
scene_state.instance_buffer[p_render_list].prepare_for_upload();
}
RID instance_buffer = scene_state.instance_buffer[p_render_list]._get(0u);
u.append_id(instance_buffer);
uniforms.push_back(u);
}
@@ -886,6 +890,7 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color
render_list[RENDER_LIST_OPAQUE].sort_by_key();
}
render_list[RENDER_LIST_ALPHA].sort_by_reverse_depth_and_priority();
_fill_instance_data(RENDER_LIST_OPAQUE);
_fill_instance_data(RENDER_LIST_ALPHA);
@@ -1507,12 +1512,9 @@ void RenderForwardMobile::_render_shadow_begin() {
_update_render_base_uniform_set();
render_list[RENDER_LIST_SECONDARY].clear();
scene_state.instance_data[RENDER_LIST_SECONDARY].clear();
}
void RenderForwardMobile::_render_shadow_append(RID p_framebuffer, const PagedArray<RenderGeometryInstance *> &p_instances, const Projection &p_projection, const Transform3D &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, float p_lod_distance_multiplier, float p_screen_mesh_lod_threshold, const Rect2i &p_rect, bool p_flip_y, bool p_clear_region, bool p_begin, bool p_end, RenderingMethod::RenderInfo *p_render_info, const Transform3D &p_main_cam_transform) {
uint32_t shadow_pass_index = scene_state.shadow_passes.size();
SceneState::ShadowPass shadow_pass;
if (p_render_info) {
@@ -1539,7 +1541,7 @@ void RenderForwardMobile::_render_shadow_append(RID p_framebuffer, const PagedAr
render_data.instances = &p_instances;
render_data.render_info = p_render_info;
_setup_environment(&render_data, true, Vector2(1, 1), Color(), false, p_use_pancake, shadow_pass_index);
_setup_environment(&render_data, true, Vector2(1, 1), Color(), false, p_use_pancake);
if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_DISABLE_LOD) {
scene_data.screen_mesh_lod_threshold = 0.0;
@@ -1580,13 +1582,17 @@ void RenderForwardMobile::_render_shadow_append(RID p_framebuffer, const PagedAr
}
void RenderForwardMobile::_render_shadow_process() {
_update_instance_data_buffer(RENDER_LIST_SECONDARY);
RenderingDevice *rd = RenderingDevice::get_singleton();
if (scene_state.instance_buffer[RENDER_LIST_SECONDARY].get_size(0u) > 0u) {
rd->buffer_flush(scene_state.instance_buffer[RENDER_LIST_SECONDARY]._get(0u));
}
//render shadows one after the other, so this can be done un-barriered and the driver can optimize (as well as allow us to run compute at the same time)
for (uint32_t i = 0; i < scene_state.shadow_passes.size(); i++) {
//render passes need to be configured after instance buffer is done, since they need the latest version
SceneState::ShadowPass &shadow_pass = scene_state.shadow_passes[i];
shadow_pass.rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_SECONDARY, nullptr, RID(), RendererRD::MaterialStorage::get_singleton()->samplers_rd_get_default(), false, i);
shadow_pass.rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_SECONDARY, nullptr, RID(), RendererRD::MaterialStorage::get_singleton()->samplers_rd_get_default(), false, scene_state.shadow_passes.size() - 1u - i);
}
RD::get_singleton()->draw_command_end_label();
@@ -1899,17 +1905,19 @@ RID RenderForwardMobile::_render_buffers_get_velocity_texture(Ref<RenderSceneBuf
return RID();
}
void RenderForwardMobile::_update_instance_data_buffer(RenderListType p_render_list) {
if (scene_state.instance_data[p_render_list].size() > 0) {
if (scene_state.instance_buffer[p_render_list] == RID() || scene_state.instance_buffer_size[p_render_list] < scene_state.instance_data[p_render_list].size()) {
if (scene_state.instance_buffer[p_render_list] != RID()) {
RD::get_singleton()->free_rid(scene_state.instance_buffer[p_render_list]);
}
uint32_t new_size = nearest_power_of_2_templated(MAX(uint64_t(INSTANCE_DATA_BUFFER_MIN_SIZE), scene_state.instance_data[p_render_list].size()));
scene_state.instance_buffer[p_render_list] = RD::get_singleton()->storage_buffer_create(new_size * sizeof(SceneState::InstanceData));
scene_state.instance_buffer_size[p_render_list] = new_size;
void RenderForwardMobile::SceneState::grow_instance_buffer(RenderListType p_render_list, uint32_t p_req_element_count, bool p_append) {
if (p_req_element_count > 0) {
if (instance_buffer[p_render_list].get_size(0u) < p_req_element_count * sizeof(SceneState::InstanceData)) {
instance_buffer[p_render_list].uninit();
uint32_t new_size = nearest_power_of_2_templated(MAX(uint64_t(INSTANCE_DATA_BUFFER_MIN_SIZE), p_req_element_count));
instance_buffer[p_render_list].set_size(0u, new_size * sizeof(SceneState::InstanceData), true);
curr_gpu_ptr[p_render_list] = nullptr;
}
const bool must_remap = instance_buffer[p_render_list].prepare_for_map(p_append);
if (must_remap) {
curr_gpu_ptr[p_render_list] = nullptr;
}
RD::get_singleton()->buffer_update(scene_state.instance_buffer[p_render_list], 0, sizeof(SceneState::InstanceData) * scene_state.instance_data[p_render_list].size(), scene_state.instance_data[p_render_list].ptr());
}
}
@@ -1917,16 +1925,22 @@ void RenderForwardMobile::_fill_instance_data(RenderListType p_render_list, uint
RenderList *rl = &render_list[p_render_list];
uint32_t element_total = p_max_elements >= 0 ? uint32_t(p_max_elements) : rl->elements.size();
scene_state.instance_data[p_render_list].resize(p_offset + element_total);
rl->element_info.resize(p_offset + element_total);
uint64_t frame = RSG::rasterizer->get_frame_number();
scene_state.grow_instance_buffer(p_render_list, p_offset + element_total, p_offset != 0u);
if (!scene_state.curr_gpu_ptr[p_render_list] && element_total > 0u) {
// The old buffer was replaced for another larger one. We must start copying from scratch.
element_total += p_offset;
p_offset = 0u;
scene_state.curr_gpu_ptr[p_render_list] = reinterpret_cast<SceneState::InstanceData *>(scene_state.instance_buffer[p_render_list].map_raw_for_upload(0u));
}
for (uint32_t i = 0; i < element_total; i++) {
GeometryInstanceSurfaceDataCache *surface = rl->elements[i + p_offset];
GeometryInstanceForwardMobile *inst = surface->owner;
SceneState::InstanceData &instance_data = scene_state.instance_data[p_render_list][i + p_offset];
SceneState::InstanceData instance_data;
if (inst->prev_transform_dirty && frame > inst->prev_transform_change_frame + 1 && inst->prev_transform_change_frame) {
inst->prev_transform = inst->transform;
@@ -1972,14 +1986,16 @@ void RenderForwardMobile::_fill_instance_data(RenderListType p_render_list, uint
instance_data.set_compressed_aabb(surface_aabb);
instance_data.set_uv_scale(uv_scale);
scene_state.curr_gpu_ptr[p_render_list][i + p_offset] = instance_data;
RenderElementInfo &element_info = rl->element_info[p_offset + i];
// Sets lod_index and uses_lightmap at once.
element_info.value = uint32_t(surface->sort.sort_key1 & 0x1FF);
}
if (p_update_buffer) {
_update_instance_data_buffer(p_render_list);
if (p_update_buffer && element_total > 0u) {
RenderingDevice::get_singleton()->buffer_flush(scene_state.instance_buffer[p_render_list]._get(0u));
}
}
@@ -2182,22 +2198,20 @@ void RenderForwardMobile::_fill_render_list(RenderListType p_render_list, const
}
}
void RenderForwardMobile::_setup_environment(const RenderDataRD *p_render_data, bool p_no_fog, const Size2i &p_screen_size, const Color &p_default_bg_color, bool p_opaque_render_buffers, bool p_pancake_shadows, int p_index) {
void RenderForwardMobile::_setup_environment(const RenderDataRD *p_render_data, bool p_no_fog, const Size2i &p_screen_size, const Color &p_default_bg_color, bool p_opaque_render_buffers, bool p_pancake_shadows) {
RID env = is_environment(p_render_data->environment) ? p_render_data->environment : RID();
RID reflection_probe_instance = p_render_data->reflection_probe.is_valid() ? RendererRD::LightStorage::get_singleton()->reflection_probe_instance_get_probe(p_render_data->reflection_probe) : RID();
// May do this earlier in RenderSceneRenderRD::render_scene
if (p_index >= (int)scene_state.uniform_buffers.size()) {
uint32_t from = scene_state.uniform_buffers.size();
scene_state.uniform_buffers.resize(p_index + 1);
for (uint32_t i = from; i < scene_state.uniform_buffers.size(); i++) {
scene_state.uniform_buffers[i] = p_render_data->scene_data->create_uniform_buffer();
}
if (scene_state.uniform_buffers.get_size(0u) == 0u) {
scene_state.uniform_buffers.set_size(0u, p_render_data->scene_data->get_uniform_buffer_size_bytes(), false);
}
float luminance_multiplier = p_render_data->render_buffers.is_valid() ? p_render_data->render_buffers->get_luminance_multiplier() : 1.0;
p_render_data->scene_data->update_ubo(scene_state.uniform_buffers[p_index], get_debug_draw_mode(), env, reflection_probe_instance, p_render_data->camera_attributes, p_pancake_shadows, p_screen_size, p_default_bg_color, luminance_multiplier, p_opaque_render_buffers, false);
// Start a new setup.
scene_state.uniform_buffers.prepare_for_upload();
p_render_data->scene_data->update_ubo(scene_state.uniform_buffers.get_for_upload(0u), get_debug_draw_mode(), env, reflection_probe_instance, p_render_data->camera_attributes, p_pancake_shadows, p_screen_size, p_default_bg_color, luminance_multiplier, p_opaque_render_buffers, false);
}
/// RENDERING ///
@@ -3395,13 +3409,9 @@ RenderForwardMobile::~RenderForwardMobile() {
RSG::light_storage->directional_shadow_atlas_set_size(0);
{
for (const RID &rid : scene_state.uniform_buffers) {
RD::get_singleton()->free_rid(rid);
}
scene_state.uniform_buffers.uninit();
for (uint32_t i = 0; i < RENDER_LIST_MAX; i++) {
if (scene_state.instance_buffer[i].is_valid()) {
RD::get_singleton()->free_rid(scene_state.instance_buffer[i]);
}
scene_state.instance_buffer[i].uninit();
}
RD::get_singleton()->free_rid(scene_state.lightmap_buffer);
RD::get_singleton()->free_rid(scene_state.lightmap_capture_buffer);

View File

@@ -31,6 +31,7 @@
#pragma once
#include "core/templates/paged_allocator.h"
#include "servers/rendering/multi_uma_buffer.h"
#include "servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.h"
#include "servers/rendering/renderer_rd/renderer_scene_render_rd.h"
@@ -161,18 +162,17 @@ private:
/* Render Scene */
RID _setup_render_pass_uniform_set(RenderListType p_render_list, const RenderDataRD *p_render_data, RID p_radiance_texture, const RendererRD::MaterialStorage::Samplers &p_samplers, bool p_use_directional_shadow_atlas = false, int p_index = 0);
RID _setup_render_pass_uniform_set(RenderListType p_render_list, const RenderDataRD *p_render_data, RID p_radiance_texture, const RendererRD::MaterialStorage::Samplers &p_samplers, bool p_use_directional_shadow_atlas = false, uint32_t p_pass_offset = 0u);
void _pre_opaque_render(RenderDataRD *p_render_data);
uint64_t lightmap_texture_array_version = 0xFFFFFFFF;
void _update_render_base_uniform_set();
void _update_instance_data_buffer(RenderListType p_render_list);
void _fill_instance_data(RenderListType p_render_list, uint32_t p_offset = 0, int32_t p_max_elements = -1, bool p_update_buffer = true);
void _fill_render_list(RenderListType p_render_list, const RenderDataRD *p_render_data, PassMode p_pass_mode, bool p_append = false);
void _setup_environment(const RenderDataRD *p_render_data, bool p_no_fog, const Size2i &p_screen_size, const Color &p_default_bg_color, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false, int p_index = 0);
void _setup_environment(const RenderDataRD *p_render_data, bool p_no_fog, const Size2i &p_screen_size, const Color &p_default_bg_color, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false);
void _setup_lightmaps(const RenderDataRD *p_render_data, const PagedArray<RID> &p_lightmaps, const Transform3D &p_cam_transform);
RID render_base_uniform_set;
@@ -193,7 +193,7 @@ private:
/* Scene state */
struct SceneState {
LocalVector<RID> uniform_buffers;
MultiUmaBuffer<1u> uniform_buffers = MultiUmaBuffer<1u>("SceneState::uniform_buffers");
struct PushConstantUbershader {
SceneShaderForwardMobile::ShaderSpecialization specialization;
@@ -274,9 +274,8 @@ private:
static_assert(std::is_trivially_destructible_v<InstanceData>);
static_assert(std::is_trivially_constructible_v<InstanceData>);
RID instance_buffer[RENDER_LIST_MAX];
uint32_t instance_buffer_size[RENDER_LIST_MAX] = { 0, 0, 0 };
LocalVector<InstanceData> instance_data[RENDER_LIST_MAX];
MultiUmaBuffer<1u> instance_buffer[RENDER_LIST_MAX] = { MultiUmaBuffer<1u>("RENDER_LIST_OPAQUE"), MultiUmaBuffer<1u>("RENDER_LIST_ALPHA"), MultiUmaBuffer<1u>("RENDER_LIST_SECONDARY") };
InstanceData *curr_gpu_ptr[RENDER_LIST_MAX] = {};
// !BAS! We need to change lightmaps, we're not going to do this with a buffer but pushing the used lightmap in
LightmapData lightmaps[MAX_LIGHTMAPS];
@@ -311,6 +310,8 @@ private:
};
LocalVector<ShadowPass> shadow_passes;
void grow_instance_buffer(RenderListType p_render_list, uint32_t p_req_element_count, bool p_append);
} scene_state;
/* Render List */

View File

@@ -601,7 +601,10 @@ void SceneShaderForwardMobile::init(const String p_defines) {
immutable_shadow_sampler.append_id(shadow_sampler);
immutable_shadow_sampler.uniform_type = RenderingDeviceCommons::UNIFORM_TYPE_SAMPLER;
immutable_samplers.push_back(immutable_shadow_sampler);
shader.initialize(shader_versions, p_defines, immutable_samplers);
Vector<uint64_t> dynamic_buffers;
dynamic_buffers.push_back(ShaderRD::DynamicBuffer::encode(RenderForwardMobile::RENDER_PASS_UNIFORM_SET, 0));
dynamic_buffers.push_back(ShaderRD::DynamicBuffer::encode(RenderForwardMobile::RENDER_PASS_UNIFORM_SET, 1));
shader.initialize(shader_versions, p_defines, immutable_samplers, dynamic_buffers);
if (RendererCompositorRD::get_singleton()->is_xr_enabled()) {
enable_multiview_shader_group();

View File

@@ -748,8 +748,6 @@ void RendererCanvasRenderRD::canvas_render_items(RID p_to_render_target, Item *p
Item *canvas_group_owner = nullptr;
bool skip_item = false;
state.last_instance_index = 0;
bool update_skeletons = false;
bool time_used = false;
@@ -916,8 +914,13 @@ void RendererCanvasRenderRD::canvas_render_items(RID p_to_render_target, Item *p
}
texture_info_map.clear();
state.current_data_buffer_index = (state.current_data_buffer_index + 1) % BATCH_DATA_BUFFER_COUNT;
state.current_instance_buffer_index = 0;
state.instance_data = nullptr;
if (state.instance_data_index > 0) {
// If there was any remaining instance data, it must be flushed.
RID buf = state.instance_buffers._get(0);
RD::get_singleton()->buffer_flush(buf);
state.instance_data_index = 0;
}
}
RID RendererCanvasRenderRD::light_create() {
@@ -1747,7 +1750,10 @@ RendererCanvasRenderRD::RendererCanvasRenderRD() {
variants.push_back(base_define + "#define USE_ATTRIBUTES\n#define USE_POINT_SIZE\n"); // SHADER_VARIANT_ATTRIBUTES_POINTS
}
shader.canvas_shader.initialize(variants, global_defines);
Vector<uint64_t> dynamic_buffers;
dynamic_buffers.push_back(ShaderRD::DynamicBuffer::encode(BATCH_UNIFORM_SET, 4));
shader.canvas_shader.initialize(variants, global_defines, {}, dynamic_buffers);
shader.default_version_data = memnew(CanvasShaderData);
shader.default_version_data->version = shader.canvas_shader.version_create();
@@ -2058,12 +2064,7 @@ void fragment() {
state.max_instances_per_buffer = uint32_t(GLOBAL_GET("rendering/2d/batching/item_buffer_size"));
state.max_instance_buffer_size = state.max_instances_per_buffer * sizeof(InstanceData);
state.canvas_instance_batches.reserve(200);
for (uint32_t i = 0; i < BATCH_DATA_BUFFER_COUNT; i++) {
DataBuffer &db = state.canvas_instance_data_buffers[i];
db.instance_buffers.push_back(RD::get_singleton()->storage_buffer_create(state.max_instance_buffer_size));
}
state.instance_data_array = memnew_arr(InstanceData, state.max_instances_per_buffer);
state.instance_buffers.set_size(0, state.max_instance_buffer_size, true);
}
}
@@ -2122,7 +2123,6 @@ uint32_t RendererCanvasRenderRD::get_pipeline_compilations(RS::PipelineSource p_
void RendererCanvasRenderRD::_render_batch_items(RenderTarget p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, bool &r_sdf_used, bool p_to_backbuffer, RenderingMethod::RenderInfo *r_render_info) {
// Record batches
uint32_t instance_index = 0;
{
RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton();
Item *current_clip = nullptr;
@@ -2132,7 +2132,7 @@ void RendererCanvasRenderRD::_render_batch_items(RenderTarget p_to_render_target
bool batch_broken = false;
Batch *current_batch = _new_batch(batch_broken);
// Override the start position and index as we want to start from where we finished off last time.
current_batch->start = state.last_instance_index;
current_batch->start = state.instance_data_index;
for (int i = 0; i < p_item_count; i++) {
Item *ci = items[i];
@@ -2173,7 +2173,7 @@ void RendererCanvasRenderRD::_render_batch_items(RenderTarget p_to_render_target
if (ci->repeat_source_item == nullptr || ci->repeat_size == Vector2()) {
Transform2D base_transform = p_canvas_transform_inverse * ci->final_transform;
_record_item_commands(ci, p_to_render_target, base_transform, current_clip, p_lights, instance_index, batch_broken, r_sdf_used, current_batch);
_record_item_commands(ci, p_to_render_target, base_transform, current_clip, p_lights, batch_broken, r_sdf_used, current_batch);
} else {
Point2 start_pos = ci->repeat_size * -(ci->repeat_times / 2);
Point2 offset;
@@ -2186,20 +2186,11 @@ void RendererCanvasRenderRD::_render_batch_items(RenderTarget p_to_render_target
Transform2D base_transform = ci->final_transform;
base_transform.columns[2] += ci->repeat_source_item->final_transform.basis_xform(offset);
base_transform = p_canvas_transform_inverse * base_transform;
_record_item_commands(ci, p_to_render_target, base_transform, current_clip, p_lights, instance_index, batch_broken, r_sdf_used, current_batch);
_record_item_commands(ci, p_to_render_target, base_transform, current_clip, p_lights, batch_broken, r_sdf_used, current_batch);
}
}
}
}
// Copy over remaining data needed for rendering.
if (instance_index > 0) {
RD::get_singleton()->buffer_update(
state.canvas_instance_data_buffers[state.current_data_buffer_index].instance_buffers[state.current_instance_buffer_index],
state.last_instance_index * sizeof(InstanceData),
instance_index * sizeof(InstanceData),
state.instance_data_array);
}
}
if (state.canvas_instance_batches.is_empty()) {
@@ -2284,63 +2275,28 @@ void RendererCanvasRenderRD::_render_batch_items(RenderTarget p_to_render_target
state.current_batch_index = 0;
state.canvas_instance_batches.clear();
state.last_instance_index += instance_index;
}
RendererCanvasRenderRD::InstanceData *RendererCanvasRenderRD::new_instance_data(float *p_world, uint32_t *p_lights, uint32_t p_base_flags, uint32_t p_index, uint32_t p_uniforms_ofs, TextureInfo *p_info) {
InstanceData *instance_data = &state.instance_data_array[p_index];
// Zero out most fields.
for (int i = 0; i < 4; i++) {
instance_data->modulation[i] = 0.0;
instance_data->ninepatch_margins[i] = 0.0;
instance_data->src_rect[i] = 0.0;
instance_data->dst_rect[i] = 0.0;
}
instance_data->pad[0] = 0.0;
instance_data->pad[1] = 0.0;
instance_data->lights[0] = p_lights[0];
instance_data->lights[1] = p_lights[1];
instance_data->lights[2] = p_lights[2];
instance_data->lights[3] = p_lights[3];
for (int i = 0; i < 6; i++) {
instance_data->world[i] = p_world[i];
}
instance_data->flags = p_base_flags; // Reset on each command for safety.
instance_data->color_texture_pixel_size[0] = p_info->texpixel_size.width;
instance_data->color_texture_pixel_size[1] = p_info->texpixel_size.height;
instance_data->instance_uniforms_ofs = p_uniforms_ofs;
return instance_data;
}
void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTarget p_render_target, const Transform2D &p_base_transform, Item *&r_current_clip, Light *p_lights, uint32_t &r_index, bool &r_batch_broken, bool &r_sdf_used, Batch *&r_current_batch) {
void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTarget p_render_target, const Transform2D &p_base_transform, Item *&r_current_clip, Light *p_lights, bool &r_batch_broken, bool &r_sdf_used, Batch *&r_current_batch) {
const RenderingServer::CanvasItemTextureFilter texture_filter = p_item->texture_filter == RS::CANVAS_ITEM_TEXTURE_FILTER_DEFAULT ? default_filter : p_item->texture_filter;
const RenderingServer::CanvasItemTextureRepeat texture_repeat = p_item->texture_repeat == RS::CANVAS_ITEM_TEXTURE_REPEAT_DEFAULT ? default_repeat : p_item->texture_repeat;
Transform2D base_transform = p_base_transform;
float world[6];
InstanceData template_instance;
memset(&template_instance, 0, sizeof(InstanceData));
Transform2D draw_transform; // Used by transform command
_update_transform_2d_to_mat2x3(base_transform, world);
_update_transform_2d_to_mat2x3(base_transform, template_instance.world);
Color base_color = p_item->final_modulate;
bool use_linear_colors = p_render_target.use_linear_colors;
uint32_t base_flags = 0;
uint32_t uniforms_ofs = static_cast<uint32_t>(p_item->instance_allocated_shader_uniforms_offset);
template_instance.instance_uniforms_ofs = static_cast<uint32_t>(p_item->instance_allocated_shader_uniforms_offset);
bool reclip = false;
bool skipping = false;
// TODO: consider making lights a per-batch property and then baking light operations in the shader for better performance.
uint32_t lights[4] = { 0, 0, 0, 0 };
uint16_t light_count = 0;
uint16_t shadow_mask = 0;
@@ -2350,7 +2306,8 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
while (light) {
if (light->render_index_cache >= 0 && p_item->light_mask & light->item_mask && p_item->z_final >= light->z_min && p_item->z_final <= light->z_max && p_item->global_rect_cache.intersects(light->rect_cache)) {
uint32_t light_index = light->render_index_cache;
lights[light_count >> 2] |= light_index << ((light_count & 3) * 8);
// TODO: consider making lights a per-batch property and then baking light operations in the shader for better performance.
template_instance.lights[light_count >> 2] |= light_index << ((light_count & 3) * 8);
if (p_item->light_mask & light->item_shadow_mask) {
shadow_mask |= 1 << light_count;
@@ -2365,8 +2322,8 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
light = light->next_ptr;
}
base_flags |= light_count << INSTANCE_FLAGS_LIGHT_COUNT_SHIFT;
base_flags |= shadow_mask << INSTANCE_FLAGS_SHADOW_MASKED_SHIFT;
template_instance.flags |= light_count << INSTANCE_FLAGS_LIGHT_COUNT_SHIFT;
template_instance.flags |= shadow_mask << INSTANCE_FLAGS_SHADOW_MASKED_SHIFT;
}
bool use_lighting = (light_count > 0 || using_directional_lights);
@@ -2430,9 +2387,11 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
if (r_current_batch->tex_info != tex_info) {
r_current_batch = _new_batch(r_batch_broken);
r_current_batch->tex_info = tex_info;
template_instance.color_texture_pixel_size[0] = tex_info->texpixel_size.width;
template_instance.color_texture_pixel_size[1] = tex_info->texpixel_size.height;
}
InstanceData *instance_data = new_instance_data(world, lights, base_flags, r_index, uniforms_ofs, tex_info);
InstanceData *instance_data = new_instance_data(template_instance);
Rect2 src_rect;
Rect2 dst_rect;
@@ -2505,7 +2464,7 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
instance_data->dst_rect[2] = dst_rect.size.width;
instance_data->dst_rect[3] = dst_rect.size.height;
_add_to_batch(r_index, r_batch_broken, r_current_batch);
_add_to_batch(r_batch_broken, r_current_batch);
} break;
case Item::Command::TYPE_NINEPATCH: {
@@ -2531,9 +2490,11 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
if (r_current_batch->tex_info != tex_info) {
r_current_batch = _new_batch(r_batch_broken);
r_current_batch->tex_info = tex_info;
template_instance.color_texture_pixel_size[0] = tex_info->texpixel_size.width;
template_instance.color_texture_pixel_size[1] = tex_info->texpixel_size.height;
}
InstanceData *instance_data = new_instance_data(world, lights, base_flags, r_index, uniforms_ofs, tex_info);
InstanceData *instance_data = new_instance_data(template_instance);
Rect2 src_rect;
Rect2 dst_rect(np->rect.position.x, np->rect.position.y, np->rect.size.x, np->rect.size.y);
@@ -2582,7 +2543,7 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
instance_data->ninepatch_margins[2] = np->margin[SIDE_RIGHT];
instance_data->ninepatch_margins[3] = np->margin[SIDE_BOTTOM];
_add_to_batch(r_index, r_batch_broken, r_current_batch);
_add_to_batch(r_batch_broken, r_current_batch);
} break;
case Item::Command::TYPE_POLYGON: {
@@ -2606,6 +2567,8 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
if (r_current_batch->tex_info != tex_info) {
r_current_batch = _new_batch(r_batch_broken);
r_current_batch->tex_info = tex_info;
template_instance.color_texture_pixel_size[0] = tex_info->texpixel_size.width;
template_instance.color_texture_pixel_size[1] = tex_info->texpixel_size.height;
}
// pipeline variant
@@ -2615,7 +2578,7 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
r_current_batch->render_primitive = _primitive_type_to_render_primitive(polygon->primitive);
}
InstanceData *instance_data = new_instance_data(world, lights, base_flags, r_index, uniforms_ofs, tex_info);
InstanceData *instance_data = new_instance_data(template_instance);
Color color = base_color;
if (use_linear_colors) {
@@ -2627,7 +2590,7 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
instance_data->modulation[2] = color.b;
instance_data->modulation[3] = color.a;
_add_to_batch(r_index, r_batch_broken, r_current_batch);
_add_to_batch(r_batch_broken, r_current_batch);
} break;
case Item::Command::TYPE_PRIMITIVE: {
@@ -2673,9 +2636,11 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
if (r_current_batch->tex_info != tex_info) {
r_current_batch = _new_batch(r_batch_broken);
r_current_batch->tex_info = tex_info;
template_instance.color_texture_pixel_size[0] = tex_info->texpixel_size.width;
template_instance.color_texture_pixel_size[1] = tex_info->texpixel_size.height;
}
InstanceData *instance_data = new_instance_data(world, lights, base_flags, r_index, uniforms_ofs, tex_info);
InstanceData *instance_data = new_instance_data(template_instance);
for (uint32_t j = 0; j < MIN(3u, primitive->point_count); j++) {
instance_data->points[j * 2 + 0] = primitive->points[j].x;
@@ -2690,10 +2655,10 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
instance_data->colors[j * 2 + 1] = (uint32_t(Math::make_half_float(col.a)) << 16) | Math::make_half_float(col.b);
}
_add_to_batch(r_index, r_batch_broken, r_current_batch);
_add_to_batch(r_batch_broken, r_current_batch);
if (primitive->point_count == 4) {
instance_data = new_instance_data(world, lights, base_flags, r_index, uniforms_ofs, tex_info);
instance_data = new_instance_data(template_instance);
for (uint32_t j = 0; j < 3; j++) {
int offset = j == 0 ? 0 : 1;
@@ -2710,7 +2675,7 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
instance_data->colors[j * 2 + 1] = (uint32_t(Math::make_half_float(col.a)) << 16) | Math::make_half_float(col.b);
}
_add_to_batch(r_index, r_batch_broken, r_current_batch);
_add_to_batch(r_batch_broken, r_current_batch);
}
} break;
@@ -2736,7 +2701,9 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
_prepare_batch_texture_info(m->texture, tex_state, tex_info);
}
r_current_batch->tex_info = tex_info;
instance_data = new_instance_data(world, lights, base_flags, r_index, uniforms_ofs, tex_info);
template_instance.color_texture_pixel_size[0] = tex_info->texpixel_size.width;
template_instance.color_texture_pixel_size[1] = tex_info->texpixel_size.height;
instance_data = new_instance_data(template_instance);
r_current_batch->mesh_instance_count = 1;
_update_transform_2d_to_mat2x3(base_transform * draw_transform * m->transform, instance_data->world);
@@ -2763,7 +2730,9 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
_prepare_batch_texture_info(mm->texture, tex_state, tex_info);
}
r_current_batch->tex_info = tex_info;
instance_data = new_instance_data(world, lights, base_flags, r_index, uniforms_ofs, tex_info);
template_instance.color_texture_pixel_size[0] = tex_info->texpixel_size.width;
template_instance.color_texture_pixel_size[1] = tex_info->texpixel_size.height;
instance_data = new_instance_data(template_instance);
r_current_batch->flags |= 1; // multimesh, trails disabled
@@ -2785,7 +2754,9 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
_prepare_batch_texture_info(pt->texture, tex_state, tex_info);
}
r_current_batch->tex_info = tex_info;
instance_data = new_instance_data(world, lights, base_flags, r_index, uniforms_ofs, tex_info);
template_instance.color_texture_pixel_size[0] = tex_info->texpixel_size.width;
template_instance.color_texture_pixel_size[1] = tex_info->texpixel_size.height;
instance_data = new_instance_data(template_instance);
uint32_t divisor = 1;
r_current_batch->mesh_instance_count = particles_storage->particles_get_amount(pt->particles, divisor);
@@ -2828,13 +2799,13 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
instance_data->modulation[2] = modulated.b;
instance_data->modulation[3] = modulated.a;
_add_to_batch(r_index, r_batch_broken, r_current_batch);
_add_to_batch(r_batch_broken, r_current_batch);
} break;
case Item::Command::TYPE_TRANSFORM: {
const Item::CommandTransform *transform = static_cast<const Item::CommandTransform *>(c);
draw_transform = transform->xform;
_update_transform_2d_to_mat2x3(base_transform * transform->xform, world);
_update_transform_2d_to_mat2x3(base_transform * transform->xform, template_instance.world);
} break;
case Item::Command::TYPE_CLIP_IGNORE: {
@@ -2906,10 +2877,12 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
if (r_current_batch->tex_info != tex_info) {
r_current_batch = _new_batch(r_batch_broken);
r_current_batch->tex_info = tex_info;
template_instance.color_texture_pixel_size[0] = tex_info->texpixel_size.width;
template_instance.color_texture_pixel_size[1] = tex_info->texpixel_size.height;
}
_update_transform_2d_to_mat2x3(base_transform, world);
InstanceData *instance_data = new_instance_data(world, lights, base_flags, r_index, uniforms_ofs, tex_info);
_update_transform_2d_to_mat2x3(base_transform, template_instance.world);
InstanceData *instance_data = new_instance_data(template_instance);
Rect2 src_rect;
Rect2 dst_rect;
@@ -2941,7 +2914,7 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
instance_data->dst_rect[2] = dst_rect.size.width;
instance_data->dst_rect[3] = dst_rect.size.height;
_add_to_batch(r_index, r_batch_broken, r_current_batch);
_add_to_batch(r_batch_broken, r_current_batch);
p_item->debug_redraw_time -= RSG::rasterizer->get_frame_delta_time();
@@ -2984,9 +2957,7 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasSha
{
RendererRD::TextureStorage *ts = RendererRD::TextureStorage::get_singleton();
RIDSetKey key(
p_batch->tex_info->state,
state.canvas_instance_data_buffers[state.current_data_buffer_index].instance_buffers[p_batch->instance_buffer_index]);
RIDSetKey key(p_batch->tex_info->state, p_batch->instance_buffer);
const RID *uniform_set = rid_set_to_uniform_set.getptr(key);
if (uniform_set == nullptr) {
@@ -2995,7 +2966,7 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasSha
uniform_ptrw[1] = RD::Uniform(RD::UNIFORM_TYPE_TEXTURE, 1, p_batch->tex_info->normal);
uniform_ptrw[2] = RD::Uniform(RD::UNIFORM_TYPE_TEXTURE, 2, p_batch->tex_info->specular);
uniform_ptrw[3] = RD::Uniform(RD::UNIFORM_TYPE_SAMPLER, 3, p_batch->tex_info->sampler);
uniform_ptrw[4] = RD::Uniform(RD::UNIFORM_TYPE_STORAGE_BUFFER, 4, state.canvas_instance_data_buffers[state.current_data_buffer_index].instance_buffers[p_batch->instance_buffer_index]);
uniform_ptrw[4] = RD::Uniform(RD::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC, 4, p_batch->instance_buffer);
RID rid = RD::get_singleton()->uniform_set_create(state.batch_texture_uniforms, shader.default_version_rd_shader, BATCH_UNIFORM_SET);
ERR_FAIL_COND_MSG(rid.is_null(), "Failed to create uniform set for batch.");
@@ -3194,10 +3165,24 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasSha
}
}
RendererCanvasRenderRD::InstanceData *RendererCanvasRenderRD::new_instance_data(const InstanceData &template_instance) {
DEV_ASSERT(state.instance_data != nullptr);
InstanceData *instance_data = &state.instance_data[state.instance_data_index];
memcpy(instance_data, &template_instance, sizeof(InstanceData));
return instance_data;
}
RendererCanvasRenderRD::Batch *RendererCanvasRenderRD::_new_batch(bool &r_batch_broken) {
if (state.canvas_instance_batches.is_empty()) {
Batch new_batch;
new_batch.instance_buffer_index = state.current_instance_buffer_index;
// This will still be a valid point when multiple calls to _render_batch_items
// are made in the same draw call.
if (state.instance_data == nullptr) {
// If there is no existing instance buffer, we must allocate a new one.
_allocate_instance_buffer();
}
new_batch.instance_buffer = state.instance_buffers._get(0);
state.canvas_instance_batches.push_back(new_batch);
return state.canvas_instance_batches.ptr();
}
@@ -3212,43 +3197,30 @@ RendererCanvasRenderRD::Batch *RendererCanvasRenderRD::_new_batch(bool &r_batch_
Batch new_batch = state.canvas_instance_batches[state.current_batch_index];
new_batch.instance_count = 0;
new_batch.start = state.canvas_instance_batches[state.current_batch_index].start + state.canvas_instance_batches[state.current_batch_index].instance_count;
new_batch.instance_buffer_index = state.current_instance_buffer_index;
state.current_batch_index++;
state.canvas_instance_batches.push_back(new_batch);
return &state.canvas_instance_batches[state.current_batch_index];
}
void RendererCanvasRenderRD::_add_to_batch(uint32_t &r_index, bool &r_batch_broken, Batch *&r_current_batch) {
void RendererCanvasRenderRD::_add_to_batch(bool &r_batch_broken, Batch *&r_current_batch) {
r_current_batch->instance_count++;
r_index++;
if (r_index + state.last_instance_index >= state.max_instances_per_buffer) {
// Copy over all data needed for rendering right away
// then go back to recording item commands.
RD::get_singleton()->buffer_update(
state.canvas_instance_data_buffers[state.current_data_buffer_index].instance_buffers[state.current_instance_buffer_index],
state.last_instance_index * sizeof(InstanceData),
r_index * sizeof(InstanceData),
state.instance_data_array);
state.instance_data_index++;
if (state.instance_data_index >= state.max_instances_per_buffer) {
RD::get_singleton()->buffer_flush(r_current_batch->instance_buffer);
state.instance_data = nullptr;
_allocate_instance_buffer();
r_index = 0;
state.last_instance_index = 0;
state.instance_data_index = 0;
state.instance_data_index = 0;
r_batch_broken = false; // Force a new batch to be created
r_current_batch = _new_batch(r_batch_broken);
r_current_batch->start = 0;
r_current_batch->instance_buffer = state.instance_buffers._get(0);
}
}
void RendererCanvasRenderRD::_allocate_instance_buffer() {
state.current_instance_buffer_index++;
if (state.current_instance_buffer_index < state.canvas_instance_data_buffers[state.current_data_buffer_index].instance_buffers.size()) {
// We already allocated another buffer in a previous frame, so we can just use it.
return;
}
// Allocate a new buffer.
RID buf = RD::get_singleton()->storage_buffer_create(state.max_instance_buffer_size);
state.canvas_instance_data_buffers[state.current_data_buffer_index].instance_buffers.push_back(buf);
state.instance_buffers.prepare_for_upload();
state.instance_data = reinterpret_cast<InstanceData *>(state.instance_buffers.map_raw_for_upload(0));
}
void RendererCanvasRenderRD::_prepare_batch_texture_info(RID p_texture, TextureState &p_state, TextureInfo *p_info) {
@@ -3337,12 +3309,7 @@ RendererCanvasRenderRD::~RendererCanvasRenderRD() {
RD::get_singleton()->free_rid(state.shadow_occluder_buffer);
}
memdelete_arr(state.instance_data_array);
for (uint32_t i = 0; i < BATCH_DATA_BUFFER_COUNT; i++) {
for (uint32_t j = 0; j < state.canvas_instance_data_buffers[i].instance_buffers.size(); j++) {
RD::get_singleton()->free_rid(state.canvas_instance_data_buffers[i].instance_buffers[j]);
}
}
state.instance_buffers.uninit();
// Disable the callback, as we're tearing everything down
texture_storage->canvas_texture_set_invalidation_callback(default_canvas_texture, nullptr, nullptr);

View File

@@ -31,6 +31,7 @@
#pragma once
#include "core/templates/lru.h"
#include "servers/rendering/multi_uma_buffer.h"
#include "servers/rendering/renderer_canvas_render.h"
#include "servers/rendering/renderer_rd/pipeline_hash_map_rd.h"
#include "servers/rendering/renderer_rd/shaders/canvas.glsl.gen.h"
@@ -495,10 +496,12 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
HashMap<RID, TightLocalVector<RID>> canvas_texture_to_uniform_set;
struct Batch {
// Position in the UBO measured in bytes
/// First instance index into the instance buffer for this batch.
uint32_t start = 0;
/// Number of instances in this batch.
uint32_t instance_count = 0;
uint32_t instance_buffer_index = 0;
/// Resource ID of the instance buffer for this batch.
RID instance_buffer; // UMA
TextureInfo *tex_info;
@@ -528,11 +531,6 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
HashMap<TextureState, TextureInfo, HashMapHasherDefault, HashMapComparatorDefault<TextureState>, PagedAllocator<HashMapElement<TextureState, TextureInfo>>> texture_info_map;
// per-frame buffers
struct DataBuffer {
LocalVector<RID> instance_buffers;
};
struct State {
//state buffer
struct Buffer {
@@ -555,13 +553,17 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
uint32_t flags;
};
DataBuffer canvas_instance_data_buffers[BATCH_DATA_BUFFER_COUNT];
LocalVector<Batch> canvas_instance_batches;
uint32_t current_data_buffer_index = 0;
uint32_t current_instance_buffer_index = 0;
uint32_t current_batch_index = 0;
uint32_t last_instance_index = 0;
InstanceData *instance_data_array = nullptr;
static_assert(std::is_trivially_destructible_v<InstanceData>);
static_assert(std::is_trivially_constructible_v<InstanceData>);
MultiUmaBuffer<1u> instance_buffers = MultiUmaBuffer<1u>("CANVAS_INSTANCE_DATA");
/// A pointer to the current instance buffer retrieved from <c>instance_buffers</c>.
InstanceData *instance_data = nullptr;
/// The index of the next instance to be added to <c>instance_data</c>.
uint32_t instance_data_index = 0;
uint32_t max_instances_per_buffer = 16384;
uint32_t max_instance_buffer_size = 16384 * sizeof(InstanceData);
@@ -619,12 +621,14 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
inline RID _get_pipeline_specialization_or_ubershader(CanvasShaderData *p_shader_data, PipelineKey &r_pipeline_key, PushConstant &r_push_constant, RID p_mesh_instance = RID(), void *p_surface = nullptr, uint32_t p_surface_index = 0, RID *r_vertex_array = nullptr);
void _render_batch_items(RenderTarget p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, bool &r_sdf_used, bool p_to_backbuffer = false, RenderingMethod::RenderInfo *r_render_info = nullptr);
void _record_item_commands(const Item *p_item, RenderTarget p_render_target, const Transform2D &p_base_transform, Item *&r_current_clip, Light *p_lights, uint32_t &r_index, bool &r_batch_broken, bool &r_sdf_used, Batch *&r_current_batch);
void _record_item_commands(const Item *p_item, RenderTarget p_render_target, const Transform2D &p_base_transform, Item *&r_current_clip, Light *p_lights, bool &r_batch_broken, bool &r_sdf_used, Batch *&r_current_batch);
void _render_batch(RD::DrawListID p_draw_list, CanvasShaderData *p_shader_data, RenderingDevice::FramebufferFormatID p_framebuffer_format, Light *p_lights, Batch const *p_batch, RenderingMethod::RenderInfo *r_render_info = nullptr);
void _prepare_batch_texture_info(RID p_texture, TextureState &p_state, TextureInfo *p_info);
InstanceData *new_instance_data(float *p_world, uint32_t *p_lights, uint32_t p_base_flags, uint32_t p_index, uint32_t p_uniforms_ofs, TextureInfo *p_info);
// non-UMA
InstanceData *new_instance_data(const InstanceData &template_instance);
[[nodiscard]] Batch *_new_batch(bool &r_batch_broken);
void _add_to_batch(uint32_t &r_index, bool &r_batch_broken, Batch *&r_current_batch);
void _add_to_batch(bool &r_batch_broken, Batch *&r_current_batch);
void _allocate_instance_buffer();
_FORCE_INLINE_ void _update_transform_2d_to_mat2x4(const Transform2D &p_transform, float *p_mat2x4);

View File

@@ -302,7 +302,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, CompileData p_data) {
}
Vector<String> variant_stage_sources = _build_variant_stage_sources(variant, p_data);
Vector<RD::ShaderStageSPIRVData> variant_stages = compile_stages(variant_stage_sources);
Vector<RD::ShaderStageSPIRVData> variant_stages = compile_stages(variant_stage_sources, dynamic_buffers);
ERR_FAIL_COND(variant_stages.is_empty());
Vector<uint8_t> shader_data = RD::get_singleton()->shader_compile_binary_from_spirv(variant_stages, name + ":" + itos(variant));
@@ -783,6 +783,10 @@ const String &ShaderRD::get_name() const {
return name;
}
const Vector<uint64_t> &ShaderRD::get_dynamic_buffers() const {
return dynamic_buffers;
}
bool ShaderRD::shader_cache_cleanup_on_start = false;
ShaderRD::ShaderRD() {
@@ -801,12 +805,13 @@ ShaderRD::ShaderRD() {
base_compute_defines = base_compute_define_text.ascii();
}
void ShaderRD::initialize(const Vector<String> &p_variant_defines, const String &p_general_defines, const Vector<RD::PipelineImmutableSampler> &p_immutable_samplers) {
void ShaderRD::initialize(const Vector<String> &p_variant_defines, const String &p_general_defines, const Vector<RD::PipelineImmutableSampler> &p_immutable_samplers, const Vector<uint64_t> &p_dynamic_buffers) {
ERR_FAIL_COND(variant_defines.size());
ERR_FAIL_COND(p_variant_defines.is_empty());
general_defines = p_general_defines.utf8();
immutable_samplers = p_immutable_samplers;
dynamic_buffers = p_dynamic_buffers;
// When initialized this way, there is just one group and its always enabled.
group_to_variant_map.insert(0, LocalVector<int>{});
@@ -846,6 +851,11 @@ void ShaderRD::_initialize_cache() {
hash_build.append(variant_defines[E.value[i]].text.get_data());
}
for (const uint64_t dyn_buffer : dynamic_buffers) {
hash_build.append("[dynamic_buffer]");
hash_build.append(uitos(dyn_buffer));
}
group_sha256[E.key] = hash_build.as_string().sha256_text();
if (!shader_cache_user_dir.is_empty()) {
@@ -880,12 +890,13 @@ void ShaderRD::_initialize_cache() {
}
// Same as above, but allows specifying shader compilation groups.
void ShaderRD::initialize(const Vector<VariantDefine> &p_variant_defines, const String &p_general_defines, const Vector<RD::PipelineImmutableSampler> &p_immutable_samplers) {
void ShaderRD::initialize(const Vector<VariantDefine> &p_variant_defines, const String &p_general_defines, const Vector<RD::PipelineImmutableSampler> &p_immutable_samplers, const Vector<uint64_t> &p_dynamic_buffers) {
ERR_FAIL_COND(variant_defines.size());
ERR_FAIL_COND(p_variant_defines.is_empty());
general_defines = p_general_defines.utf8();
immutable_samplers = p_immutable_samplers;
dynamic_buffers = p_dynamic_buffers;
int max_group_id = 0;
@@ -962,7 +973,7 @@ void ShaderRD::set_shader_cache_save_debug(bool p_enable) {
shader_cache_save_debug = p_enable;
}
Vector<RD::ShaderStageSPIRVData> ShaderRD::compile_stages(const Vector<String> &p_stage_sources) {
Vector<RD::ShaderStageSPIRVData> ShaderRD::compile_stages(const Vector<String> &p_stage_sources, const Vector<uint64_t> &p_dynamic_buffers) {
RD::ShaderStageSPIRVData stage;
Vector<RD::ShaderStageSPIRVData> stages;
String error;
@@ -974,6 +985,7 @@ Vector<RD::ShaderStageSPIRVData> ShaderRD::compile_stages(const Vector<String> &
}
stage.spirv = RD::get_singleton()->shader_compile_spirv_from_source(RD::ShaderStage(i), p_stage_sources[i], RD::SHADER_LANGUAGE_GLSL, &error);
stage.dynamic_buffers = p_dynamic_buffers;
stage.shader_stage = RD::ShaderStage(i);
if (!stage.spirv.is_empty()) {
stages.push_back(stage);

View File

@@ -65,6 +65,7 @@ private:
Vector<bool> group_enabled;
Vector<RD::PipelineImmutableSampler> immutable_samplers;
Vector<uint64_t> dynamic_buffers;
struct Version {
Mutex *mutex = nullptr;
@@ -225,6 +226,8 @@ public:
const String &get_name() const;
const Vector<uint64_t> &get_dynamic_buffers() const;
static void shaders_embedded_set_lock();
static const ShaderVersionPairSet &shaders_embedded_set_get();
static void shaders_embedded_set_unlock();
@@ -237,15 +240,26 @@ public:
static void set_shader_cache_save_compressed_zstd(bool p_enable);
static void set_shader_cache_save_debug(bool p_enable);
static Vector<RD::ShaderStageSPIRVData> compile_stages(const Vector<String> &p_stage_sources);
static Vector<RD::ShaderStageSPIRVData> compile_stages(const Vector<String> &p_stage_sources, const Vector<uint64_t> &p_dynamic_buffers);
static PackedByteArray save_shader_cache_bytes(const LocalVector<int> &p_variants, const Vector<Vector<uint8_t>> &p_variant_data);
Vector<String> version_build_variant_stage_sources(RID p_version, int p_variant);
RS::ShaderNativeSourceCode version_get_native_source_code(RID p_version);
String version_get_cache_file_relative_path(RID p_version, int p_group, const String &p_api_name);
void initialize(const Vector<String> &p_variant_defines, const String &p_general_defines = "", const Vector<RD::PipelineImmutableSampler> &p_immutable_samplers = Vector<RD::PipelineImmutableSampler>());
void initialize(const Vector<VariantDefine> &p_variant_defines, const String &p_general_defines = "", const Vector<RD::PipelineImmutableSampler> &p_immutable_samplers = Vector<RD::PipelineImmutableSampler>());
struct DynamicBuffer {
static uint64_t encode(uint32_t p_set_id, uint32_t p_binding) {
return uint64_t(p_set_id) << 32ul | uint64_t(p_binding);
}
};
// Dynamic Buffers specifies Which buffers will be persistent/dynamic when used.
// See DynamicBuffer::encode. We need this argument because SPIR-V does not distinguish between a
// uniform buffer and a dynamic uniform buffer. At shader level they're the same thing, but the PSO
// is created slightly differently and they're bound differently.
// On D3D12 the Root Layout is also different.
void initialize(const Vector<String> &p_variant_defines, const String &p_general_defines = "", const Vector<RD::PipelineImmutableSampler> &p_immutable_samplers = Vector<RD::PipelineImmutableSampler>(), const Vector<uint64_t> &p_dynamic_buffers = Vector<uint64_t>());
void initialize(const Vector<VariantDefine> &p_variant_defines, const String &p_general_defines = "", const Vector<RD::PipelineImmutableSampler> &p_immutable_samplers = Vector<RD::PipelineImmutableSampler>(), const Vector<uint64_t> &p_dynamic_buffers = Vector<uint64_t>());
virtual ~ShaderRD();
};

View File

@@ -94,6 +94,8 @@ public:
void update_ubo(RID p_uniform_buffer, RS::ViewportDebugDraw p_debug_mode, RID p_env, RID p_reflection_probe_instance, RID p_camera_attributes, bool p_pancake_shadows, const Size2i &p_screen_size, const Color &p_default_bg_color, float p_luminance_multiplier, bool p_opaque_render_buffers, bool p_apply_alpha_multiplier);
virtual RID get_uniform_buffer() const override;
static uint32_t get_uniform_buffer_size_bytes() { return sizeof(UBODATA); }
private:
RID uniform_buffer; // loaded into this uniform buffer (supplied externally)