diff --git a/core/templates/hash_map.h b/core/templates/hash_map.h index 63a9c0488ad..f3e30b23d3f 100644 --- a/core/templates/hash_map.h +++ b/core/templates/hash_map.h @@ -71,6 +71,7 @@ public: static constexpr uint32_t MIN_CAPACITY_INDEX = 2; // Use a prime. static constexpr float MAX_OCCUPANCY = 0.75; static constexpr uint32_t EMPTY_HASH = 0; + using KV = KeyValue; // Type alias for easier access to KeyValue. private: HashMapElement **_elements = nullptr; @@ -590,6 +591,22 @@ public: } } + HashMap(HashMap &&p_other) { + _elements = p_other._elements; + _hashes = p_other._hashes; + _head_element = p_other._head_element; + _tail_element = p_other._tail_element; + _capacity_idx = p_other._capacity_idx; + _size = p_other._size; + + p_other._elements = nullptr; + p_other._hashes = nullptr; + p_other._head_element = nullptr; + p_other._tail_element = nullptr; + p_other._capacity_idx = MIN_CAPACITY_INDEX; + p_other._size = 0; + } + void operator=(const HashMap &p_other) { if (this == &p_other) { return; // Ignore self assignment. @@ -609,6 +626,36 @@ public: } } + HashMap &operator=(HashMap &&p_other) { + if (this == &p_other) { + return *this; + } + + if (_size != 0) { + clear(); + } + if (_elements != nullptr) { + Memory::free_static(_elements); + Memory::free_static(_hashes); + } + + _elements = p_other._elements; + _hashes = p_other._hashes; + _head_element = p_other._head_element; + _tail_element = p_other._tail_element; + _capacity_idx = p_other._capacity_idx; + _size = p_other._size; + + p_other._elements = nullptr; + p_other._hashes = nullptr; + p_other._head_element = nullptr; + p_other._tail_element = nullptr; + p_other._capacity_idx = MIN_CAPACITY_INDEX; + p_other._size = 0; + + return *this; + } + HashMap(uint32_t p_initial_capacity) { // Capacity can't be 0. _capacity_idx = 0; diff --git a/doc/classes/RDVertexAttribute.xml b/doc/classes/RDVertexAttribute.xml index 04e4bb53a04..ef5e8f8c97d 100644 --- a/doc/classes/RDVertexAttribute.xml +++ b/doc/classes/RDVertexAttribute.xml @@ -9,6 +9,10 @@ + + The index of the buffer in the vertex buffer array to bind this vertex attribute. When set to [code]-1[/code], it defaults to the index of the attribute. + [b]Note:[/b] You cannot mix binding explicitly assigned attributes with implicitly assigned ones (i.e. [code]-1[/code]). Either all attributes must have their binding set to [code]-1[/code], or all must have explicit bindings. + The way that this attribute's data is interpreted when sent to a shader. diff --git a/doc/classes/RenderingDevice.xml b/doc/classes/RenderingDevice.xml index 12958fe24b6..00526951b19 100644 --- a/doc/classes/RenderingDevice.xml +++ b/doc/classes/RenderingDevice.xml @@ -335,6 +335,17 @@ Binds [param vertex_array] to the specified [param draw_list]. + + + + + + + + + Binds a set of [param vertex_buffers] directly to the specified [param draw_list] using [param vertex_format] without creating a vertex array RID. Provide the number of vertices in [param vertex_count]; optional per-buffer byte [param offsets] may also be supplied. + + diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index 865d862e217..2ba7cb509cb 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -1165,6 +1165,24 @@ uint8_t *RenderingDeviceDriverD3D12::buffer_persistent_map_advance(BufferID p_bu return buf_info->persistent_ptr + buf_info->frame_idx * buf_info->size; } +uint64_t RenderingDeviceDriverD3D12::buffer_get_dynamic_offsets(Span p_buffers) { + uint64_t mask = 0u; + uint64_t shift = 0u; + + for (const BufferID &buf : p_buffers) { + const BufferInfo *buf_info = (const BufferInfo *)buf.id; + if (!buf_info->is_dynamic()) { + continue; + } + const BufferDynamicInfo *dyn_buf = (const BufferDynamicInfo *)buf.id; + mask |= dyn_buf->frame_idx << shift; + // We can encode the frame index in 2 bits since frame_count won't be > 4. + shift += 2UL; + } + + return mask; +} + uint64_t RenderingDeviceDriverD3D12::buffer_get_device_address(BufferID p_buffer) { const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id; return buf_info->resource->GetGPUVirtualAddress(); @@ -2183,27 +2201,36 @@ bool RenderingDeviceDriverD3D12::sampler_is_format_supported_for_filter(DataForm /**** VERTEX ARRAY ****/ /**********************/ -RDD::VertexFormatID RenderingDeviceDriverD3D12::vertex_format_create(VectorView p_vertex_attribs) { +RDD::VertexFormatID RenderingDeviceDriverD3D12::vertex_format_create(Span p_vertex_attribs, const VertexAttributeBindingsMap &p_vertex_bindings) { VertexFormatInfo *vf_info = VersatileResource::allocate(resources_allocator); - vf_info->input_elem_descs.resize(p_vertex_attribs.size()); - vf_info->vertex_buffer_strides.resize(p_vertex_attribs.size()); + + uint32_t max_binding = 0; for (uint32_t i = 0; i < p_vertex_attribs.size(); i++) { - vf_info->input_elem_descs[i] = {}; - vf_info->input_elem_descs[i].SemanticName = "TEXCOORD"; - vf_info->input_elem_descs[i].SemanticIndex = p_vertex_attribs[i].location; - vf_info->input_elem_descs[i].Format = RD_TO_D3D12_FORMAT[p_vertex_attribs[i].format].general_format; - vf_info->input_elem_descs[i].InputSlot = i; // TODO: Can the same slot be used if data comes from the same buffer (regardless format)? - vf_info->input_elem_descs[i].AlignedByteOffset = p_vertex_attribs[i].offset; - if (p_vertex_attribs[i].frequency == VERTEX_FREQUENCY_INSTANCE) { - vf_info->input_elem_descs[i].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA; - vf_info->input_elem_descs[i].InstanceDataStepRate = 1; + D3D12_INPUT_ELEMENT_DESC &input_element_desc = vf_info->input_elem_descs[i]; + const VertexAttribute &vertex_attrib = p_vertex_attribs[i]; + const VertexAttributeBinding &vertex_binding = p_vertex_bindings[vertex_attrib.binding]; + + input_element_desc = {}; + input_element_desc.SemanticName = "TEXCOORD"; + input_element_desc.SemanticIndex = vertex_attrib.location; + input_element_desc.Format = RD_TO_D3D12_FORMAT[vertex_attrib.format].general_format; + input_element_desc.InputSlot = vertex_attrib.binding; + input_element_desc.AlignedByteOffset = vertex_attrib.offset; + if (vertex_binding.frequency == VERTEX_FREQUENCY_INSTANCE) { + input_element_desc.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA; + input_element_desc.InstanceDataStepRate = 1; } else { - vf_info->input_elem_descs[i].InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; - vf_info->input_elem_descs[i].InstanceDataStepRate = 0; + input_element_desc.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; + input_element_desc.InstanceDataStepRate = 0; } - vf_info->vertex_buffer_strides[i] = p_vertex_attribs[i].stride; + max_binding = MAX(max_binding, vertex_attrib.binding + 1); + } + + vf_info->vertex_buffer_strides.resize(max_binding); + for (const VertexAttributeBindingsMap::KV &vertex_binding_pair : p_vertex_bindings) { + vf_info->vertex_buffer_strides[vertex_binding_pair.key] = vertex_binding_pair.value.stride; } return VertexFormatID(vf_info); @@ -5378,7 +5405,7 @@ void RenderingDeviceDriverD3D12::command_render_draw_indirect_count(CommandBuffe cmd_buf_info->cmd_list->ExecuteIndirect(indirect_cmd_signatures.draw.Get(), p_max_draw_count, indirect_buf_info->resource, p_offset, count_buf_info->resource, p_count_buffer_offset); } -void RenderingDeviceDriverD3D12::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) { +void RenderingDeviceDriverD3D12::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) { CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; DEV_ASSERT(cmd_buf_info->render_pass_state.current_subpass != UINT32_MAX); @@ -5390,8 +5417,15 @@ void RenderingDeviceDriverD3D12::command_render_bind_vertex_buffers(CommandBuffe for (uint32_t i = 0; i < p_binding_count; i++) { BufferInfo *buffer_info = (BufferInfo *)p_buffers[i].id; + uint32_t dynamic_offset = 0; + if (buffer_info->is_dynamic()) { + uint64_t buffer_frame_idx = p_dynamic_offsets & 0x3; // Assuming max 4 frames. + p_dynamic_offsets >>= 2; + dynamic_offset = buffer_frame_idx * buffer_info->size; + } + cmd_buf_info->render_pass_state.vertex_buffer_views[i] = {}; - cmd_buf_info->render_pass_state.vertex_buffer_views[i].BufferLocation = buffer_info->resource->GetGPUVirtualAddress() + p_offsets[i]; + cmd_buf_info->render_pass_state.vertex_buffer_views[i].BufferLocation = buffer_info->resource->GetGPUVirtualAddress() + dynamic_offset + p_offsets[i]; cmd_buf_info->render_pass_state.vertex_buffer_views[i].SizeInBytes = buffer_info->size - p_offsets[i]; if (!barrier_capabilities.enhanced_barriers_supported) { _resource_transition_batch(cmd_buf_info, buffer_info, 0, 1, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER); diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h index 827eba2315b..ad12b686250 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.h +++ b/drivers/d3d12/rendering_device_driver_d3d12.h @@ -347,6 +347,7 @@ public: virtual uint8_t *buffer_map(BufferID p_buffer) override final; virtual void buffer_unmap(BufferID p_buffer) override final; virtual uint8_t *buffer_persistent_map_advance(BufferID p_buffer, uint64_t p_frames_drawn) override final; + virtual uint64_t buffer_get_dynamic_offsets(Span p_buffers) override final; virtual uint64_t buffer_get_device_address(BufferID p_buffer) override final; /*****************/ @@ -429,7 +430,7 @@ private: }; public: - virtual VertexFormatID vertex_format_create(VectorView p_vertex_attribs) override final; + virtual VertexFormatID vertex_format_create(Span p_vertex_attribs, const VertexAttributeBindingsMap &p_vertex_bindings) override final; virtual void vertex_format_free(VertexFormatID p_vertex_format) override final; /******************/ @@ -862,7 +863,7 @@ public: virtual void command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) override final; // Buffer binding. - virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) override final; + virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) override final; virtual void command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) override final; private: diff --git a/drivers/metal/metal_objects.h b/drivers/metal/metal_objects.h index df1e6da3109..3860218a0e7 100644 --- a/drivers/metal/metal_objects.h +++ b/drivers/metal/metal_objects.h @@ -663,7 +663,7 @@ public: uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance); - void render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets); + void render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets); void render_bind_index_buffer(RDD::BufferID p_buffer, RDD::IndexBufferFormat p_format, uint64_t p_offset); void render_draw_indexed(uint32_t p_index_count, diff --git a/drivers/metal/metal_objects.mm b/drivers/metal/metal_objects.mm index 4a85081b2bc..7ec81f44588 100644 --- a/drivers/metal/metal_objects.mm +++ b/drivers/metal/metal_objects.mm @@ -783,10 +783,12 @@ void MDCommandBuffer::_render_set_dirty_state() { if (render.dirty.has_flag(RenderState::DIRTY_VERTEX)) { uint32_t p_binding_count = render.vertex_buffers.size(); - uint32_t first = device_driver->get_metal_buffer_index_for_vertex_attribute_binding(p_binding_count - 1); - [render.encoder setVertexBuffers:render.vertex_buffers.ptr() - offsets:render.vertex_offsets.ptr() - withRange:NSMakeRange(first, p_binding_count)]; + if (p_binding_count > 0) { + uint32_t first = device_driver->get_metal_buffer_index_for_vertex_attribute_binding(p_binding_count - 1); + [render.encoder setVertexBuffers:render.vertex_buffers.ptr() + offsets:render.vertex_offsets.ptr() + withRange:NSMakeRange(first, p_binding_count)]; + } } render.resource_tracker.encode(render.encoder); @@ -1252,24 +1254,47 @@ void MDCommandBuffer::render_draw(uint32_t p_vertex_count, baseInstance:p_first_instance]; } -void MDCommandBuffer::render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets) { +void MDCommandBuffer::render_bind_vertex_buffers(uint32_t p_binding_count, const RDD::BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) { DEV_ASSERT(type == MDCommandBufferStateType::Render); render.vertex_buffers.resize(p_binding_count); render.vertex_offsets.resize(p_binding_count); + // Are the existing buffer bindings the same? + bool same = true; + // Reverse the buffers, as their bindings are assigned in descending order. for (uint32_t i = 0; i < p_binding_count; i += 1) { const RenderingDeviceDriverMetal::BufferInfo *buf_info = (const RenderingDeviceDriverMetal::BufferInfo *)p_buffers[p_binding_count - i - 1].id; - render.vertex_buffers[i] = buf_info->metal_buffer; - render.vertex_offsets[i] = p_offsets[p_binding_count - i - 1]; + + NSUInteger dynamic_offset = 0; + if (buf_info->is_dynamic()) { + const MetalBufferDynamicInfo *dyn_buf = (const MetalBufferDynamicInfo *)buf_info; + uint64_t frame_idx = p_dynamic_offsets & 0x3; + p_dynamic_offsets >>= 2; + dynamic_offset = frame_idx * dyn_buf->size_bytes; + } + if (render.vertex_buffers[i] != buf_info->metal_buffer) { + render.vertex_buffers[i] = buf_info->metal_buffer; + same = false; + } + + render.vertex_offsets[i] = dynamic_offset + p_offsets[p_binding_count - i - 1]; } if (render.encoder) { uint32_t first = device_driver->get_metal_buffer_index_for_vertex_attribute_binding(p_binding_count - 1); - [render.encoder setVertexBuffers:render.vertex_buffers.ptr() - offsets:render.vertex_offsets.ptr() - withRange:NSMakeRange(first, p_binding_count)]; + if (same) { + NSUInteger *offset_ptr = render.vertex_offsets.ptr(); + for (uint32_t i = first; i < first + p_binding_count; i++) { + [render.encoder setVertexBufferOffset:*offset_ptr atIndex:i]; + offset_ptr++; + } + } else { + [render.encoder setVertexBuffers:render.vertex_buffers.ptr() + offsets:render.vertex_offsets.ptr() + withRange:NSMakeRange(first, p_binding_count)]; + } render.dirty.clear_flag(RenderState::DIRTY_VERTEX); } else { render.dirty.set_flag(RenderState::DIRTY_VERTEX); @@ -1394,7 +1419,9 @@ void MDCommandBuffer::RenderState::reset() { viewports.clear(); scissors.clear(); blend_constants.reset(); + bzero(vertex_buffers.ptr(), sizeof(id __unsafe_unretained) * vertex_buffers.size()); vertex_buffers.clear(); + bzero(vertex_offsets.ptr(), sizeof(NSUInteger) * vertex_offsets.size()); vertex_offsets.clear(); resource_tracker.reset(); } diff --git a/drivers/metal/rendering_device_driver_metal.h b/drivers/metal/rendering_device_driver_metal.h index b1b6b64030d..bd10c7a4f21 100644 --- a/drivers/metal/rendering_device_driver_metal.h +++ b/drivers/metal/rendering_device_driver_metal.h @@ -130,6 +130,7 @@ public: virtual uint8_t *buffer_map(BufferID p_buffer) override final; virtual void buffer_unmap(BufferID p_buffer) override final; virtual uint8_t *buffer_persistent_map_advance(BufferID p_buffer, uint64_t p_frames_drawn) override final; + virtual uint64_t buffer_get_dynamic_offsets(Span p_buffers) override final; virtual void buffer_flush(BufferID p_buffer) override final; virtual uint64_t buffer_get_device_address(BufferID p_buffer) override final; @@ -164,7 +165,7 @@ public: private: public: - virtual VertexFormatID vertex_format_create(VectorView p_vertex_attribs) override final; + virtual VertexFormatID vertex_format_create(Span p_vertex_attribs, const VertexAttributeBindingsMap &p_vertex_bindings) override final; virtual void vertex_format_free(VertexFormatID p_vertex_format) override final; #pragma mark - Barriers @@ -403,7 +404,7 @@ public: virtual void command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) override final; // Buffer binding. - virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) override final; + virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) override final; virtual void command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) override final; // Dynamic state. diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.mm index 657f7815586..4052c3dae98 100644 --- a/drivers/metal/rendering_device_driver_metal.mm +++ b/drivers/metal/rendering_device_driver_metal.mm @@ -186,6 +186,23 @@ uint8_t *RenderingDeviceDriverMetal::buffer_persistent_map_advance(BufferID p_bu return (uint8_t *)buf_info->metal_buffer.contents + buf_info->next_frame_index(_frame_count) * buf_info->size_bytes; } +uint64_t RenderingDeviceDriverMetal::buffer_get_dynamic_offsets(Span p_buffers) { + uint64_t mask = 0u; + uint64_t shift = 0u; + + for (const BufferID &buf : p_buffers) { + const BufferInfo *buf_info = (const BufferInfo *)buf.id; + if (!buf_info->is_dynamic()) { + continue; + } + mask |= buf_info->frame_index() << shift; + // We can encode the frame index in 2 bits since frame_count won't be > 4. + shift += 2UL; + } + + return mask; +} + void RenderingDeviceDriverMetal::buffer_flush(BufferID p_buffer) { // Nothing to do. } @@ -809,27 +826,33 @@ bool RenderingDeviceDriverMetal::sampler_is_format_supported_for_filter(DataForm #pragma mark - Vertex Array -RDD::VertexFormatID RenderingDeviceDriverMetal::vertex_format_create(VectorView p_vertex_attribs) { +RDD::VertexFormatID RenderingDeviceDriverMetal::vertex_format_create(Span p_vertex_attribs, const VertexAttributeBindingsMap &p_vertex_bindings) { MTLVertexDescriptor *desc = MTLVertexDescriptor.vertexDescriptor; - for (uint32_t i = 0; i < p_vertex_attribs.size(); i++) { - VertexAttribute const &vf = p_vertex_attribs[i]; - - ERR_FAIL_COND_V_MSG(get_format_vertex_size(vf.format) == 0, VertexFormatID(), - "Data format for attachment (" + itos(i) + "), '" + FORMAT_NAMES[vf.format] + "', is not valid for a vertex array."); + for (const VertexAttributeBindingsMap::KV &kv : p_vertex_bindings) { + uint32_t idx = get_metal_buffer_index_for_vertex_attribute_binding(kv.key); + MTLVertexBufferLayoutDescriptor *ld = desc.layouts[idx]; + if (kv.value.stride != 0) { + ld.stepFunction = kv.value.frequency == VERTEX_FREQUENCY_VERTEX ? MTLVertexStepFunctionPerVertex : MTLVertexStepFunctionPerInstance; + ld.stepRate = 1; + ld.stride = kv.value.stride; + } else { + ld.stepFunction = MTLVertexStepFunctionConstant; + ld.stepRate = 0; + ld.stride = 0; + } + DEV_ASSERT(ld.stride == desc.layouts[idx].stride); + } + for (const VertexAttribute &vf : p_vertex_attribs) { desc.attributes[vf.location].format = pixel_formats->getMTLVertexFormat(vf.format); desc.attributes[vf.location].offset = vf.offset; - uint32_t idx = get_metal_buffer_index_for_vertex_attribute_binding(i); + uint32_t idx = get_metal_buffer_index_for_vertex_attribute_binding(vf.binding); desc.attributes[vf.location].bufferIndex = idx; if (vf.stride == 0) { - desc.layouts[idx].stepFunction = MTLVertexStepFunctionConstant; - desc.layouts[idx].stepRate = 0; - desc.layouts[idx].stride = pixel_formats->getBytesPerBlock(vf.format); - } else { - desc.layouts[idx].stepFunction = vf.frequency == VERTEX_FREQUENCY_VERTEX ? MTLVertexStepFunctionPerVertex : MTLVertexStepFunctionPerInstance; - desc.layouts[idx].stepRate = 1; - desc.layouts[idx].stride = vf.stride; + // Constant attribute, so we must determine the stride to satisfy Metal API. + uint32_t stride = desc.layouts[idx].stride; + desc.layouts[idx].stride = std::max(stride, vf.offset + pixel_formats->getBytesPerBlock(vf.format)); } } @@ -1768,9 +1791,9 @@ void RenderingDeviceDriverMetal::command_render_draw_indirect_count(CommandBuffe cb->render_draw_indirect_count(p_indirect_buffer, p_offset, p_count_buffer, p_count_buffer_offset, p_max_draw_count, p_stride); } -void RenderingDeviceDriverMetal::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) { +void RenderingDeviceDriverMetal::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) { MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id); - cb->render_bind_vertex_buffers(p_binding_count, p_buffers, p_offsets); + cb->render_bind_vertex_buffers(p_binding_count, p_buffers, p_offsets, p_dynamic_offsets); } void RenderingDeviceDriverMetal::command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) { diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index 939201660cd..1342be92304 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -1815,6 +1815,23 @@ uint8_t *RenderingDeviceDriverVulkan::buffer_persistent_map_advance(BufferID p_b return buf_info->persistent_ptr + buf_info->frame_idx * buf_info->size; } +uint64_t RenderingDeviceDriverVulkan::buffer_get_dynamic_offsets(Span p_buffers) { + uint64_t mask = 0u; + uint64_t shift = 0u; + + for (const BufferID &buf : p_buffers) { + const BufferInfo *buf_info = (const BufferInfo *)buf.id; + if (!buf_info->is_dynamic()) { + continue; + } + mask |= buf_info->frame_idx << shift; + // We can encode the frame index in 2 bits since frame_count won't be > 4. + shift += 2UL; + } + + return mask; +} + void RenderingDeviceDriverVulkan::buffer_flush(BufferID p_buffer) { BufferDynamicInfo *buf_info = (BufferDynamicInfo *)p_buffer.id; @@ -2525,19 +2542,23 @@ bool RenderingDeviceDriverVulkan::sampler_is_format_supported_for_filter(DataFor /**** VERTEX ARRAY ****/ /**********************/ -RDD::VertexFormatID RenderingDeviceDriverVulkan::vertex_format_create(VectorView p_vertex_attribs) { +RDD::VertexFormatID RenderingDeviceDriverVulkan::vertex_format_create(Span p_vertex_attribs, const VertexAttributeBindingsMap &p_vertex_bindings) { // Pre-bookkeep. VertexFormatInfo *vf_info = VersatileResource::allocate(resources_allocator); - vf_info->vk_bindings.resize(p_vertex_attribs.size()); + vf_info->vk_bindings.reserve(p_vertex_bindings.size()); + for (const VertexAttributeBindingsMap::KV &E : p_vertex_bindings) { + const VertexAttributeBinding &binding = E.value; + VkVertexInputBindingDescription vk_binding = {}; + vk_binding.binding = E.key; + vk_binding.stride = binding.stride; + vk_binding.inputRate = binding.frequency == VERTEX_FREQUENCY_INSTANCE ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; + vf_info->vk_bindings.push_back(vk_binding); + } vf_info->vk_attributes.resize(p_vertex_attribs.size()); for (uint32_t i = 0; i < p_vertex_attribs.size(); i++) { - vf_info->vk_bindings[i] = {}; - vf_info->vk_bindings[i].binding = i; - vf_info->vk_bindings[i].stride = p_vertex_attribs[i].stride; - vf_info->vk_bindings[i].inputRate = p_vertex_attribs[i].frequency == VERTEX_FREQUENCY_INSTANCE ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; vf_info->vk_attributes[i] = {}; - vf_info->vk_attributes[i].binding = i; + vf_info->vk_attributes[i].binding = p_vertex_attribs[i].binding; vf_info->vk_attributes[i].location = p_vertex_attribs[i].location; vf_info->vk_attributes[i].format = RD_TO_VK_FORMAT[p_vertex_attribs[i].format]; vf_info->vk_attributes[i].offset = p_vertex_attribs[i].offset; @@ -5097,14 +5118,22 @@ void RenderingDeviceDriverVulkan::command_render_draw_indirect_count(CommandBuff vkCmdDrawIndirectCount(command_buffer->vk_command_buffer, indirect_buf_info->vk_buffer, p_offset, count_buf_info->vk_buffer, p_count_buffer_offset, p_max_draw_count, p_stride); } -void RenderingDeviceDriverVulkan::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) { +void RenderingDeviceDriverVulkan::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) { const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; - VkBuffer *vk_buffers = ALLOCA_ARRAY(VkBuffer, p_binding_count); + uint64_t *vk_offsets = ALLOCA_ARRAY(uint64_t, p_binding_count); for (uint32_t i = 0; i < p_binding_count; i++) { + const BufferInfo *buf_info = (const BufferInfo *)p_buffers[i].id; + uint64_t offset = p_offsets[i]; + if (buf_info->is_dynamic()) { + uint64_t frame_idx = p_dynamic_offsets & 0x3; // Assuming max 4 frames. + p_dynamic_offsets >>= 2; + offset += frame_idx * buf_info->size; + } vk_buffers[i] = ((const BufferInfo *)p_buffers[i].id)->vk_buffer; + vk_offsets[i] = offset; } - vkCmdBindVertexBuffers(command_buffer->vk_command_buffer, 0, p_binding_count, vk_buffers, p_offsets); + vkCmdBindVertexBuffers(command_buffer->vk_command_buffer, 0, p_binding_count, vk_buffers, vk_offsets); } void RenderingDeviceDriverVulkan::command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) { diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h index 8789fde04c5..6f60c69008f 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.h +++ b/drivers/vulkan/rendering_device_driver_vulkan.h @@ -224,6 +224,7 @@ public: virtual uint8_t *buffer_map(BufferID p_buffer) override final; virtual void buffer_unmap(BufferID p_buffer) override final; virtual uint8_t *buffer_persistent_map_advance(BufferID p_buffer, uint64_t p_frames_drawn) override final; + virtual uint64_t buffer_get_dynamic_offsets(Span p_buffers) override final; virtual void buffer_flush(BufferID p_buffer) override final; virtual uint64_t buffer_get_device_address(BufferID p_buffer) override final; @@ -282,7 +283,7 @@ private: }; public: - virtual VertexFormatID vertex_format_create(VectorView p_vertex_attribs) override final; + virtual VertexFormatID vertex_format_create(Span p_vertex_attribs, const VertexAttributeBindingsMap &p_vertex_bindings) override final; virtual void vertex_format_free(VertexFormatID p_vertex_format) override final; /******************/ @@ -603,7 +604,7 @@ public: virtual void command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) override final; // Buffer binding. - virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) override final; + virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) override final; virtual void command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) override final; // Dynamic state. diff --git a/servers/rendering/multi_uma_buffer.h b/servers/rendering/multi_uma_buffer.h index 605a6aac018..febbced2ee4 100644 --- a/servers/rendering/multi_uma_buffer.h +++ b/servers/rendering/multi_uma_buffer.h @@ -111,6 +111,12 @@ public: } }; +enum class MultiUmaBufferType : uint8_t { + UNIFORM, + STORAGE, + VERTEX, +}; + /// Interface for making it easier to work with UMA. /// /// # What is UMA? @@ -157,7 +163,7 @@ public: /// /// Example code 01: /// MultiUmaBuffer<1> uma_buffer = MultiUmaBuffer<1>("Debug name displayed if run with --verbose"); -/// uma_buffer.set_size(0, max_size_bytes, false); +/// uma_buffer.set_uniform_size(0, max_size_bytes); /// /// for(uint32_t i = 0u; i < num_passes; ++i) { /// uma_buffer.prepare_for_upload(); // Creates a new buffer (if none exists already) @@ -225,9 +231,9 @@ public: /// MultiUmaBuffer<1> spot_lights = /*...*/; /// MultiUmaBuffer<1> directional_lights = /*...*/; /// -/// omni_lights.set_size(0u, omni_size); -/// spot_lights.set_size(0u, spot_size); -/// directional_lights.set_size(0u, dir_size); +/// omni_lights.set_uniform_size(0u, omni_size); +/// spot_lights.set_uniform_size(0u, spot_size); +/// directional_lights.set_uniform_size(0u, dir_size); /// /// omni_lights.prepare_for_upload(); /// spot_lights.prepare_for_upload(); @@ -237,9 +243,9 @@ public: /// /// MultiUmaBuffer<3> lights = /*...*/; /// -/// lights.set_size(0u, omni_size); -/// lights.set_size(1u, spot_size); -/// lights.set_size(2u, dir_size); +/// lights.set_uniform_size(0u, omni_size); +/// lights.set_uniform_size(1u, spot_size); +/// lights.set_uniform_size(2u, dir_size); /// /// lights.prepare_for_upload(); /// @@ -276,7 +282,11 @@ public: /// Launching godot with --verbose will print diagnostic information. template class MultiUmaBuffer : public MultiUmaBufferBase { - uint32_t buffer_sizes[NUM_BUFFERS] = {}; + struct BufferInfo { + uint32_t size_bytes = 0; + MultiUmaBufferType type = MultiUmaBufferType::UNIFORM; + }; + BufferInfo buffer_info[NUM_BUFFERS]; #ifdef DEV_ENABLED bool can_upload[NUM_BUFFERS] = {}; #endif @@ -284,13 +294,19 @@ class MultiUmaBuffer : public MultiUmaBufferBase { void push() { RenderingDevice *rd = RD::RenderingDevice::get_singleton(); for (uint32_t i = 0u; i < NUM_BUFFERS; ++i) { - const bool is_storage = buffer_sizes[i] & 0x80000000u; - const uint32_t size_bytes = buffer_sizes[i] & ~0x80000000u; + const BufferInfo &info = buffer_info[i]; RID buffer; - if (is_storage) { - buffer = rd->storage_buffer_create(size_bytes, Vector(), 0, RD::BUFFER_CREATION_DYNAMIC_PERSISTENT_BIT); - } else { - buffer = rd->uniform_buffer_create(size_bytes, Vector(), RD::BUFFER_CREATION_DYNAMIC_PERSISTENT_BIT); + switch (info.type) { + case MultiUmaBufferType::STORAGE: + buffer = rd->storage_buffer_create(info.size_bytes, Vector(), BitField(), RD::BUFFER_CREATION_DYNAMIC_PERSISTENT_BIT); + break; + case MultiUmaBufferType::VERTEX: + buffer = rd->vertex_buffer_create(info.size_bytes, Vector(), RD::BUFFER_CREATION_DYNAMIC_PERSISTENT_BIT); + break; + case MultiUmaBufferType::UNIFORM: + default: + buffer = rd->uniform_buffer_create(info.size_bytes, Vector(), RD::BUFFER_CREATION_DYNAMIC_PERSISTENT_BIT); + break; } buffers.push_back(buffer); } @@ -302,14 +318,31 @@ public: uint32_t get_curr_idx() const { return curr_idx; } - void set_size(uint32_t p_idx, uint32_t p_size_bytes, bool p_is_storage) { + void set_size(uint32_t p_idx, uint32_t p_size_bytes, MultiUmaBufferType p_type) { DEV_ASSERT(buffers.is_empty()); - buffer_sizes[p_idx] = p_size_bytes | (p_is_storage ? 0x80000000u : 0u); + buffer_info[p_idx].size_bytes = p_size_bytes; + buffer_info[p_idx].type = p_type; curr_idx = UINT32_MAX; last_frame_mapped = UINT64_MAX; } - uint32_t get_size(uint32_t p_idx) const { return buffer_sizes[p_idx] & ~0x80000000u; } + void set_size(uint32_t p_idx, uint32_t p_size_bytes, bool p_is_storage) { + set_size(p_idx, p_size_bytes, p_is_storage ? MultiUmaBufferType::STORAGE : MultiUmaBufferType::UNIFORM); + } + + void set_uniform_size(uint32_t p_idx, uint32_t p_size_bytes) { + set_size(p_idx, p_size_bytes, MultiUmaBufferType::UNIFORM); + } + + void set_storage_size(uint32_t p_idx, uint32_t p_size_bytes) { + set_size(p_idx, p_size_bytes, MultiUmaBufferType::STORAGE); + } + + void set_vertex_size(uint32_t p_idx, uint32_t p_size_bytes) { + set_size(p_idx, p_size_bytes, MultiUmaBufferType::VERTEX); + } + + uint32_t get_size(uint32_t p_idx) const { return buffer_info[p_idx].size_bytes; } // Gets the raw buffer. Use with care. // If you call this function, make sure to have called prepare_for_upload() first. @@ -320,7 +353,7 @@ public: /** * @param p_append True if you wish to append more data to existing buffer. - * @return True if it's possible to append. False if the internal buffer changed. + * @return False if it's possible to append. True if the internal buffer changed. */ bool prepare_for_map(bool p_append) { RenderingDevice *rd = RD::RenderingDevice::get_singleton(); diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp index cb893150cf1..10f96eeb060 100644 --- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp +++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp @@ -772,7 +772,7 @@ void RenderForwardClustered::SceneState::grow_instance_buffer(RenderListType p_r if (instance_buffer[p_render_list].get_size(0u) < p_req_element_count * sizeof(SceneState::InstanceData)) { instance_buffer[p_render_list].uninit(); uint32_t new_size = nearest_power_of_2_templated(MAX(uint64_t(INSTANCE_DATA_BUFFER_MIN_SIZE), p_req_element_count)); - instance_buffer[p_render_list].set_size(0u, new_size * sizeof(SceneState::InstanceData), true); + instance_buffer[p_render_list].set_storage_size(0u, new_size * sizeof(SceneState::InstanceData)); curr_gpu_ptr[p_render_list] = nullptr; } @@ -3281,7 +3281,7 @@ RID RenderForwardClustered::_setup_render_pass_uniform_set(RenderListType p_rend if (scene_state.instance_buffer[p_render_list].get_size(0u) == 0u) { // Any buffer will do since it's not used, so just create one. // We can't use scene_shader.default_vec4_xform_buffer because it's not dynamic. - scene_state.instance_buffer[p_render_list].set_size(0u, INSTANCE_DATA_BUFFER_MIN_SIZE * sizeof(SceneState::InstanceData), true); + scene_state.instance_buffer[p_render_list].set_storage_size(0u, INSTANCE_DATA_BUFFER_MIN_SIZE * sizeof(SceneState::InstanceData)); scene_state.instance_buffer[p_render_list].prepare_for_upload(); } RID instance_buffer = scene_state.instance_buffer[p_render_list]._get(0u); @@ -3650,7 +3650,7 @@ RID RenderForwardClustered::_setup_sdfgi_render_pass_uniform_set(RID p_albedo_te if (scene_state.instance_buffer[RENDER_LIST_SECONDARY].get_size(0u) == 0u) { // Any buffer will do since it's not used, so just create one. // We can't use scene_shader.default_vec4_xform_buffer because it's not dynamic. - scene_state.instance_buffer[RENDER_LIST_SECONDARY].set_size(0u, INSTANCE_DATA_BUFFER_MIN_SIZE * sizeof(SceneState::InstanceData), true); + scene_state.instance_buffer[RENDER_LIST_SECONDARY].set_storage_size(0u, INSTANCE_DATA_BUFFER_MIN_SIZE * sizeof(SceneState::InstanceData)); scene_state.instance_buffer[RENDER_LIST_SECONDARY].prepare_for_upload(); } RID instance_buffer = scene_state.instance_buffer[RENDER_LIST_SECONDARY]._get(0u); diff --git a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp index ee12788ac7c..c90a63868ba 100644 --- a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp +++ b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp @@ -467,7 +467,7 @@ RID RenderForwardMobile::_setup_render_pass_uniform_set(RenderListType p_render_ if (scene_state.instance_buffer[p_render_list].get_size(0u) == 0u) { // Any buffer will do since it's not used, so just create one. // We can't use scene_shader.default_vec4_xform_buffer because it's not dynamic. - scene_state.instance_buffer[p_render_list].set_size(0u, INSTANCE_DATA_BUFFER_MIN_SIZE * sizeof(SceneState::InstanceData), true); + scene_state.instance_buffer[p_render_list].set_storage_size(0u, INSTANCE_DATA_BUFFER_MIN_SIZE * sizeof(SceneState::InstanceData)); scene_state.instance_buffer[p_render_list].prepare_for_upload(); } RID instance_buffer = scene_state.instance_buffer[p_render_list]._get(0u); @@ -1910,7 +1910,7 @@ void RenderForwardMobile::SceneState::grow_instance_buffer(RenderListType p_rend if (instance_buffer[p_render_list].get_size(0u) < p_req_element_count * sizeof(SceneState::InstanceData)) { instance_buffer[p_render_list].uninit(); uint32_t new_size = nearest_power_of_2_templated(MAX(uint64_t(INSTANCE_DATA_BUFFER_MIN_SIZE), p_req_element_count)); - instance_buffer[p_render_list].set_size(0u, new_size * sizeof(SceneState::InstanceData), true); + instance_buffer[p_render_list].set_storage_size(0u, new_size * sizeof(SceneState::InstanceData)); curr_gpu_ptr[p_render_list] = nullptr; } @@ -2204,7 +2204,7 @@ void RenderForwardMobile::_setup_environment(const RenderDataRD *p_render_data, // May do this earlier in RenderSceneRenderRD::render_scene if (scene_state.uniform_buffers.get_size(0u) == 0u) { - scene_state.uniform_buffers.set_size(0u, p_render_data->scene_data->get_uniform_buffer_size_bytes(), false); + scene_state.uniform_buffers.set_uniform_size(0u, p_render_data->scene_data->get_uniform_buffer_size_bytes()); } float luminance_multiplier = p_render_data->render_buffers.is_valid() ? p_render_data->render_buffers->get_luminance_multiplier() : 1.0; diff --git a/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp b/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp index cecc4db07d3..d0d6aea651f 100644 --- a/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp @@ -35,6 +35,7 @@ #include "core/math/math_defs.h" #include "core/math/math_funcs.h" #include "core/math/transform_interpolator.h" +#include "core/templates/fixed_vector.h" #include "servers/rendering/renderer_rd/storage_rd/material_storage.h" #include "servers/rendering/renderer_rd/storage_rd/mesh_storage.h" #include "servers/rendering/renderer_rd/storage_rd/particles_storage.h" @@ -914,6 +915,11 @@ void RendererCanvasRenderRD::canvas_render_items(RID p_to_render_target, Item *p } texture_info_map.clear(); + + // Save the previous instance data pointer in case more items are rendered in the same frame. + state.prev_instance_data = state.instance_data; + state.prev_instance_data_index = state.instance_data_index; + state.instance_data = nullptr; if (state.instance_data_index > 0) { // If there was any remaining instance data, it must be flushed. @@ -1728,8 +1734,8 @@ RendererCanvasRenderRD::RendererCanvasRenderRD() { default_samplers.default_repeat = RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED; } - // preallocate 5 slots for uniform set 3 - state.batch_texture_uniforms.resize(5); + // preallocate slots for uniform set 3 + state.batch_texture_uniforms.resize(4); { //shader variants @@ -1750,10 +1756,7 @@ RendererCanvasRenderRD::RendererCanvasRenderRD() { variants.push_back(base_define + "#define USE_ATTRIBUTES\n#define USE_POINT_SIZE\n"); // SHADER_VARIANT_ATTRIBUTES_POINTS } - Vector dynamic_buffers; - dynamic_buffers.push_back(ShaderRD::DynamicBuffer::encode(BATCH_UNIFORM_SET, 4)); - - shader.canvas_shader.initialize(variants, global_defines, {}, dynamic_buffers); + shader.canvas_shader.initialize(variants, global_defines, {}, {}); shader.default_version_data = memnew(CanvasShaderData); shader.default_version_data->version = shader.canvas_shader.version_create(); @@ -1786,7 +1789,7 @@ RendererCanvasRenderRD::RendererCanvasRenderRD() { actions.renames["NORMAL_MAP"] = "normal_map"; actions.renames["NORMAL_MAP_DEPTH"] = "normal_map_depth"; actions.renames["TEXTURE"] = "color_texture"; - actions.renames["TEXTURE_PIXEL_SIZE"] = "draw_data.color_texture_pixel_size"; + actions.renames["TEXTURE_PIXEL_SIZE"] = "read_draw_data_color_texture_pixel_size"; actions.renames["NORMAL_TEXTURE"] = "normal_texture"; actions.renames["SPECULAR_SHININESS_TEXTURE"] = "specular_texture"; actions.renames["SPECULAR_SHININESS"] = "specular_shininess"; @@ -1840,7 +1843,7 @@ RendererCanvasRenderRD::RendererCanvasRenderRD() { actions.base_varying_index = 5; actions.global_buffer_array_variable = "global_shader_uniforms.data"; - actions.instance_uniform_index_variable = "instances.data[instance_index].instance_uniforms_ofs"; + actions.instance_uniform_index_variable = "read_draw_data_instance_offset"; shader.compiler.initialize(actions); } @@ -1954,6 +1957,64 @@ RendererCanvasRenderRD::RendererCanvasRenderRD() { shader.quad_index_array = RD::get_singleton()->index_array_create(shader.quad_index_buffer, 0, 6); } + { + Vector vf; + uint32_t offset = 0; + RD::VertexAttribute vd; + vd.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT; + vd.stride = sizeof(InstanceData); + vd.frequency = RD::VERTEX_FREQUENCY_INSTANCE; + vd.location = 8; + vd.binding = 0; // Explicitly assign binding 0 for instance data. + vd.offset = offset; + offset += sizeof(float) * 4; + vf.push_back(vd); // attrib_A + + vd.location = 9; + vd.offset = offset; + offset += sizeof(float) * 4; + vf.push_back(vd); // attrib_B + + vd.location = 10; + vd.offset = offset; + offset += sizeof(float) * 4; + vf.push_back(vd); // attrib_C + + vd.location = 11; + vd.offset = offset; + offset += sizeof(float) * 4; + vf.push_back(vd); // attrib_D + + vd.location = 12; + vd.offset = offset; + offset += sizeof(float) * 4; + vf.push_back(vd); // attrib_E + + uint32_t attrib_F_index = vf.size(); + vd.location = 13; + vd.offset = offset; + offset += sizeof(float) * 4; + vf.push_back(vd); // attrib_F (RECT, NINEPATCH) + + vd.format = RD::DATA_FORMAT_R32G32B32A32_UINT; + vd.location = 14; + vd.offset = offset; + offset += sizeof(uint32_t) * 4; + vf.push_back(vd); // attrib_G + + vd.location = 15; + vd.offset = offset; + offset += sizeof(uint32_t) * 4; + vf.push_back(vd); // attrib_H + + // RECT, NINEPATCH + shader.quad_vertex_format_id = RD::get_singleton()->vertex_format_create(vf); + + // PRIMITIVE + vf.write[attrib_F_index].format = RD::DATA_FORMAT_R32G32B32A32_UINT; + shader.primitive_vertex_format_id = RD::get_singleton()->vertex_format_create(vf); + } + { //primitive primitive_arrays.index_array[0] = RD::get_singleton()->index_array_create(shader.quad_index_buffer, 0, 1); primitive_arrays.index_array[1] = RD::get_singleton()->index_array_create(shader.quad_index_buffer, 0, 2); @@ -2064,7 +2125,7 @@ void fragment() { state.max_instances_per_buffer = uint32_t(GLOBAL_GET("rendering/2d/batching/item_buffer_size")); state.max_instance_buffer_size = state.max_instances_per_buffer * sizeof(InstanceData); state.canvas_instance_batches.reserve(200); - state.instance_buffers.set_size(0, state.max_instance_buffer_size, true); + state.instance_buffers.set_vertex_size(0, state.max_instance_buffer_size); } } @@ -2131,8 +2192,6 @@ void RendererCanvasRenderRD::_render_batch_items(RenderTarget p_to_render_target // First item always forms its own batch. bool batch_broken = false; Batch *current_batch = _new_batch(batch_broken); - // Override the start position and index as we want to start from where we finished off last time. - current_batch->start = state.instance_data_index; for (int i = 0; i < p_item_count; i++) { Item *ci = items[i]; @@ -2384,6 +2443,19 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar _prepare_batch_texture_info(rect->texture, tex_state, tex_info); } + if (has_msdf != r_current_batch->use_msdf || rect->px_range != r_current_batch->msdf_pix_range || rect->outline != r_current_batch->msdf_outline) { + r_current_batch = _new_batch(r_batch_broken); + r_current_batch->use_msdf = has_msdf; + r_current_batch->msdf_pix_range = rect->px_range; + r_current_batch->msdf_outline = rect->outline; + } + + bool has_lcd = bool(rect->flags & CANVAS_RECT_LCD); + if (has_lcd != r_current_batch->use_lcd) { + r_current_batch = _new_batch(r_batch_broken); + r_current_batch->use_lcd = has_lcd; + } + if (r_current_batch->tex_info != tex_info) { r_current_batch = _new_batch(r_batch_broken); r_current_batch->tex_info = tex_info; @@ -2437,16 +2509,6 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar src_rect = Rect2(0, 0, 1, 1); } - if (has_msdf) { - instance_data->flags |= INSTANCE_FLAGS_USE_MSDF; - instance_data->msdf[0] = rect->px_range; // Pixel range. - instance_data->msdf[1] = rect->outline; // Outline size. - instance_data->msdf[2] = 0.f; // Reserved. - instance_data->msdf[3] = 0.f; // Reserved. - } else if (rect->flags & CANVAS_RECT_LCD) { - instance_data->flags |= INSTANCE_FLAGS_USE_LCD; - } - instance_data->modulation[0] = modulated.r; instance_data->modulation[1] = modulated.g; instance_data->modulation[2] = modulated.b; @@ -2500,8 +2562,8 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar } else { if (np->source != Rect2()) { src_rect = Rect2(np->source.position.x * tex_info->texpixel_size.width, np->source.position.y * tex_info->texpixel_size.height, np->source.size.x * tex_info->texpixel_size.width, np->source.size.y * tex_info->texpixel_size.height); - instance_data->color_texture_pixel_size[0] = 1.0 / np->source.size.width; - instance_data->color_texture_pixel_size[1] = 1.0 / np->source.size.height; + instance_data->ninepatch_pixel_size[0] = 1.0 / np->source.size.width; + instance_data->ninepatch_pixel_size[1] = 1.0 / np->source.size.height; } else { src_rect = Rect2(0, 0, 1, 1); } @@ -2572,7 +2634,7 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar r_current_batch->render_primitive = _primitive_type_to_render_primitive(polygon->primitive); } - InstanceData *instance_data = new_instance_data(*r_current_batch, template_instance); + InstanceData *instance_data = new_instance_data(*r_current_batch, template_instance, true); Color color = base_color; if (use_linear_colors) { @@ -2583,8 +2645,6 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar instance_data->modulation[1] = color.g; instance_data->modulation[2] = color.b; instance_data->modulation[3] = color.a; - - _add_to_batch(r_batch_broken, r_current_batch); } break; case Item::Command::TYPE_PRIMITIVE: { @@ -2693,7 +2753,7 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar _prepare_batch_texture_info(m->texture, tex_state, tex_info); } r_current_batch->tex_info = tex_info; - instance_data = new_instance_data(*r_current_batch, template_instance); + instance_data = new_instance_data(*r_current_batch, template_instance, true); r_current_batch->mesh_instance_count = 1; _update_transform_2d_to_mat2x3(base_transform * draw_transform * m->transform, instance_data->world); @@ -2720,7 +2780,7 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar _prepare_batch_texture_info(mm->texture, tex_state, tex_info); } r_current_batch->tex_info = tex_info; - instance_data = new_instance_data(*r_current_batch, template_instance); + instance_data = new_instance_data(*r_current_batch, template_instance, true); r_current_batch->flags |= 1; // multimesh, trails disabled @@ -2742,7 +2802,7 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar _prepare_batch_texture_info(pt->texture, tex_state, tex_info); } r_current_batch->tex_info = tex_info; - instance_data = new_instance_data(*r_current_batch, template_instance); + instance_data = new_instance_data(*r_current_batch, template_instance, true); uint32_t divisor = 1; r_current_batch->mesh_instance_count = particles_storage->particles_get_amount(pt->particles, divisor); @@ -2784,8 +2844,6 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar instance_data->modulation[1] = modulated.g; instance_data->modulation[2] = modulated.b; instance_data->modulation[3] = modulated.a; - - _add_to_batch(r_batch_broken, r_current_batch); } break; case Item::Command::TYPE_TRANSFORM: { @@ -2941,7 +2999,7 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasSha { RendererRD::TextureStorage *ts = RendererRD::TextureStorage::get_singleton(); - RIDSetKey key(p_batch->tex_info->state, p_batch->instance_buffer); + RIDSetKey key(p_batch->tex_info->state); const RID *uniform_set = rid_set_to_uniform_set.getptr(key); if (uniform_set == nullptr) { @@ -2950,7 +3008,6 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasSha uniform_ptrw[1] = RD::Uniform(RD::UNIFORM_TYPE_TEXTURE, 1, p_batch->tex_info->normal); uniform_ptrw[2] = RD::Uniform(RD::UNIFORM_TYPE_TEXTURE, 2, p_batch->tex_info->specular); uniform_ptrw[3] = RD::Uniform(RD::UNIFORM_TYPE_SAMPLER, 3, p_batch->tex_info->sampler); - uniform_ptrw[4] = RD::Uniform(RD::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC, 4, p_batch->instance_buffer); RID rid = RD::get_singleton()->uniform_set_create(state.batch_texture_uniforms, shader.default_version_rd_shader, BATCH_UNIFORM_SET); ERR_FAIL_COND_MSG(rid.is_null(), "Failed to create uniform set for batch."); @@ -2978,10 +3035,6 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasSha RD::get_singleton()->draw_list_bind_uniform_set(p_draw_list, *uniform_set, BATCH_UNIFORM_SET); } } - PushConstant push_constant; - push_constant.base_instance_index = p_batch->start; - push_constant.specular_shininess = p_batch->tex_info->specular_shininess; - push_constant.batch_flags = p_batch->tex_info->flags | p_batch->flags; RID pipeline; PipelineKey pipeline_key; @@ -2989,18 +3042,26 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasSha pipeline_key.variant = p_batch->shader_variant; pipeline_key.render_primitive = p_batch->render_primitive; pipeline_key.shader_specialization.use_lighting = p_batch->use_lighting; + pipeline_key.shader_specialization.use_msdf = p_batch->use_msdf; + pipeline_key.shader_specialization.use_lcd = p_batch->use_lcd; pipeline_key.lcd_blend = p_batch->has_blend; switch (p_batch->command_type) { case Item::Command::TYPE_RECT: case Item::Command::TYPE_NINEPATCH: { + PushConstant push_constant = p_batch->push_constant(); + + pipeline_key.vertex_format_id = shader.quad_vertex_format_id; pipeline = _get_pipeline_specialization_or_ubershader(p_shader_data, pipeline_key, push_constant); RD::get_singleton()->draw_list_bind_render_pipeline(p_draw_list, pipeline); if (p_batch->has_blend) { RD::get_singleton()->draw_list_set_blend_constants(p_draw_list, p_batch->modulate); } - RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(PushConstant)); + RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(push_constant)); + FixedVector vb = { p_batch->instance_buffer }; + FixedVector vo = { uint64_t(p_batch->start) * sizeof(InstanceData) }; + RD::get_singleton()->draw_list_bind_vertex_buffers_format(p_draw_list, shader.quad_vertex_format_id, 1, vb, vo); RD::get_singleton()->draw_list_bind_index_array(p_draw_list, shader.quad_index_array); RD::get_singleton()->draw_list_draw(p_draw_list, true, p_batch->instance_count); @@ -3013,6 +3074,7 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasSha case Item::Command::TYPE_POLYGON: { ERR_FAIL_NULL(p_batch->command); + PushConstantAttributes push_constant = p_batch->push_constant_attributes(); const Item::CommandPolygon *polygon = static_cast(p_batch->command); @@ -3023,7 +3085,7 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasSha pipeline = _get_pipeline_specialization_or_ubershader(p_shader_data, pipeline_key, push_constant); RD::get_singleton()->draw_list_bind_render_pipeline(p_draw_list, pipeline); - RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(PushConstant)); + RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(push_constant)); RD::get_singleton()->draw_list_bind_vertex_array(p_draw_list, pb->vertex_array); if (pb->indices.is_valid()) { RD::get_singleton()->draw_list_bind_index_array(p_draw_list, pb->indices); @@ -3042,10 +3104,15 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasSha const Item::CommandPrimitive *primitive = static_cast(p_batch->command); + PushConstant push_constant = p_batch->push_constant(); + pipeline_key.vertex_format_id = shader.primitive_vertex_format_id; pipeline = _get_pipeline_specialization_or_ubershader(p_shader_data, pipeline_key, push_constant); RD::get_singleton()->draw_list_bind_render_pipeline(p_draw_list, pipeline); - RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(PushConstant)); + RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(push_constant)); + FixedVector vb = { p_batch->instance_buffer }; + FixedVector vo = { uint64_t(p_batch->start) * sizeof(InstanceData) }; + RD::get_singleton()->draw_list_bind_vertex_buffers_format(p_draw_list, shader.primitive_vertex_format_id, 1, vb, vo); RD::get_singleton()->draw_list_bind_index_array(p_draw_list, primitive_arrays.index_array[MIN(3u, primitive->point_count) - 1]); uint32_t instance_count = p_batch->instance_count; RD::get_singleton()->draw_list_draw(p_draw_list, true, instance_count); @@ -3063,6 +3130,8 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasSha case Item::Command::TYPE_PARTICLES: { ERR_FAIL_NULL(p_batch->command); + PushConstantAttributes push_constant = p_batch->push_constant_attributes(); + RendererRD::MeshStorage *mesh_storage = RendererRD::MeshStorage::get_singleton(); RendererRD::ParticlesStorage *particles_storage = RendererRD::ParticlesStorage::get_singleton(); @@ -3123,7 +3192,7 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasSha pipeline = _get_pipeline_specialization_or_ubershader(p_shader_data, pipeline_key, push_constant, mesh_instance, surface, j, &vertex_array); RD::get_singleton()->draw_list_bind_render_pipeline(p_draw_list, pipeline); - RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(PushConstant)); + RD::get_singleton()->draw_list_set_push_constant(p_draw_list, &push_constant, sizeof(push_constant)); RID index_array = mesh_storage->mesh_surface_get_index_array(surface, 0); @@ -3149,19 +3218,36 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasSha } } -RendererCanvasRenderRD::InstanceData *RendererCanvasRenderRD::new_instance_data(Batch &p_current_batch, const InstanceData &template_instance) { - DEV_ASSERT(state.instance_data != nullptr); +RendererCanvasRenderRD::InstanceData *RendererCanvasRenderRD::new_instance_data(Batch &p_current_batch, const InstanceData &template_instance, bool p_use_push_data) { + InstanceData *instance_data = nullptr; + + if (unlikely(p_use_push_data)) { + instance_data = &p_current_batch.push_data; + // instance_count must be > 0 to indicate the batch has been used when calling _new_batch, so we set a flag. + p_current_batch.instance_count = PUSH_DATA_INSTANCE_COUNT; + } else { + instance_data = &state.instance_data[state.instance_data_index]; + } - InstanceData *instance_data = &state.instance_data[state.instance_data_index]; memcpy(instance_data, &template_instance, sizeof(InstanceData)); - instance_data->color_texture_pixel_size[0] = p_current_batch.tex_info->texpixel_size.width; - instance_data->color_texture_pixel_size[1] = p_current_batch.tex_info->texpixel_size.height; return instance_data; } RendererCanvasRenderRD::Batch *RendererCanvasRenderRD::_new_batch(bool &r_batch_broken) { if (state.canvas_instance_batches.is_empty()) { Batch new_batch; + // First try to reuse previous instance buffer if possible. + if (state.prev_instance_data && state.prev_instance_data_index < state.max_instances_per_buffer) { + bool must_remap = state.instance_buffers.prepare_for_map(true); + // must_remap will be false if we're preparing to map the buffer for the same frame and can reuse the existing UMA buffer. + if (!must_remap) { + state.instance_data = state.prev_instance_data; + state.instance_data_index = state.prev_instance_data_index; + new_batch.start = state.instance_data_index; + } + state.prev_instance_data = nullptr; + state.prev_instance_data_index = 0; + } // This will still be a valid point when multiple calls to _render_batch_items // are made in the same draw call. if (state.instance_data == nullptr) { @@ -3182,13 +3268,17 @@ RendererCanvasRenderRD::Batch *RendererCanvasRenderRD::_new_batch(bool &r_batch_ // Copy the properties of the current batch, we will manually update the things that changed. Batch new_batch = state.canvas_instance_batches[state.current_batch_index]; new_batch.instance_count = 0; - new_batch.start = state.canvas_instance_batches[state.current_batch_index].start + state.canvas_instance_batches[state.current_batch_index].instance_count; + new_batch.start = state.instance_data_index; + memset(&new_batch.push_data, 0, sizeof(new_batch.push_data)); state.current_batch_index++; state.canvas_instance_batches.push_back(new_batch); return &state.canvas_instance_batches[state.current_batch_index]; } void RendererCanvasRenderRD::_add_to_batch(bool &r_batch_broken, Batch *&r_current_batch) { + DEV_ASSERT(r_current_batch->command_type == Item::Command::TYPE_RECT || + r_current_batch->command_type == Item::Command::TYPE_NINEPATCH || + r_current_batch->command_type == Item::Command::TYPE_PRIMITIVE); r_current_batch->instance_count++; state.instance_data_index++; if (state.instance_data_index >= state.max_instances_per_buffer) { @@ -3196,10 +3286,8 @@ void RendererCanvasRenderRD::_add_to_batch(bool &r_batch_broken, Batch *&r_curre state.instance_data = nullptr; _allocate_instance_buffer(); state.instance_data_index = 0; - state.instance_data_index = 0; r_batch_broken = false; // Force a new batch to be created r_current_batch = _new_batch(r_batch_broken); - r_current_batch->start = 0; r_current_batch->instance_buffer = state.instance_buffers._get(0); } } diff --git a/servers/rendering/renderer_rd/renderer_canvas_render_rd.h b/servers/rendering/renderer_rd/renderer_canvas_render_rd.h index e902b26c571..9a47856c4da 100644 --- a/servers/rendering/renderer_rd/renderer_canvas_render_rd.h +++ b/servers/rendering/renderer_rd/renderer_canvas_render_rd.h @@ -67,8 +67,6 @@ class RendererCanvasRenderRD : public RendererCanvasRender { INSTANCE_FLAGS_CLIP_RECT_UV = (1 << 4), INSTANCE_FLAGS_TRANSPOSE_RECT = (1 << 5), - INSTANCE_FLAGS_USE_MSDF = (1 << 6), - INSTANCE_FLAGS_USE_LCD = (1 << 7), INSTANCE_FLAGS_NINEPACH_DRAW_CENTER = (1 << 8), INSTANCE_FLAGS_NINEPATCH_H_MODE_SHIFT = 9, @@ -120,6 +118,8 @@ class RendererCanvasRenderRD : public RendererCanvasRender { struct { uint32_t use_lighting : 1; + uint32_t use_msdf : 1; + uint32_t use_lcd : 1; }; }; }; @@ -186,6 +186,8 @@ class RendererCanvasRenderRD : public RendererCanvasRender { RID default_version_rd_shader; RID quad_index_buffer; RID quad_index_array; + RD::VertexFormatID quad_vertex_format_id; + RD::VertexFormatID primitive_vertex_format_id; ShaderCompiler compiler; uint32_t pipeline_compilations[RS::PIPELINE_SOURCE_MAX] = {}; Mutex mutex; @@ -352,16 +354,12 @@ class RendererCanvasRenderRD : public RendererCanvasRender { struct InstanceData { float world[6]; - uint32_t flags; - uint32_t instance_uniforms_ofs; + float ninepatch_pixel_size[2]; union { //rect struct { float modulation[4]; - union { - float msdf[4]; - float ninepatch_margins[4]; - }; + float ninepatch_margins[4]; float dst_rect[4]; float src_rect[4]; float pad[2]; @@ -373,15 +371,35 @@ class RendererCanvasRenderRD : public RendererCanvasRender { uint32_t colors[6]; // colors encoded as half }; }; - float color_texture_pixel_size[2]; + uint32_t flags; + uint32_t instance_uniforms_ofs; uint32_t lights[4]; }; + static_assert(sizeof(InstanceData) == 128, "2D instance data struct size must be 128 bytes"); + struct PushConstant { - uint32_t base_instance_index; ShaderSpecialization shader_specialization; uint32_t specular_shininess; uint32_t batch_flags; + uint32_t pad0; + + float msdf[2]; + float color_texture_pixel_size[2]; + }; + + struct PushConstantAttributes { + PushConstant base; + + float world[6]; + uint32_t flags; + uint32_t instance_uniforms_ofs; + float modulation[4]; + uint32_t lights[4]; + + operator PushConstant &() { + return base; + } }; // TextureState is used to determine when a new batch is required due to a change of texture state. @@ -459,18 +477,16 @@ class RendererCanvasRenderRD : public RendererCanvasRender { /// A key used to uniquely identify a distinct BATCH_UNIFORM_SET struct RIDSetKey { TextureState state; - RID instance_data; RIDSetKey() { } - RIDSetKey(TextureState p_state, RID p_instance_data) : - state(p_state), - instance_data(p_instance_data) { + RIDSetKey(TextureState p_state) : + state(p_state) { } _ALWAYS_INLINE_ bool operator==(const RIDSetKey &p_val) const { - return state == p_val.state && instance_data == p_val.instance_data; + return state == p_val.state; } _ALWAYS_INLINE_ bool operator!=(const RIDSetKey &p_val) const { @@ -478,9 +494,7 @@ class RendererCanvasRenderRD : public RendererCanvasRender { } _ALWAYS_INLINE_ uint32_t hash() const { - uint32_t h = state.hash(); - h = hash_murmur3_one_64(instance_data.get_id(), h); - return hash_fmix32(h); + return state.hash(); } }; @@ -495,6 +509,9 @@ class RendererCanvasRenderRD : public RendererCanvasRender { /// diffuse texture. HashMap> canvas_texture_to_uniform_set; + static constexpr uint32_t PUSH_DATA_INSTANCE_COUNT = 0x8000'0000; // Use high bit to indicate instance data comes from push_data. + static constexpr uint32_t INSTANCE_COUNT_MASK = 0x7fff'ffff; + struct Batch { /// First instance index into the instance buffer for this batch. uint32_t start = 0; @@ -502,10 +519,14 @@ class RendererCanvasRenderRD : public RendererCanvasRender { uint32_t instance_count = 0; /// Resource ID of the instance buffer for this batch. RID instance_buffer; // UMA + /// Push-constant payload for non-VAO draws. + InstanceData push_data = {}; TextureInfo *tex_info; Color modulate = Color(1.0, 1.0, 1.0, 1.0); + float msdf_pix_range = 0.0; + float msdf_outline = 0.0; Item *clip = nullptr; @@ -517,6 +538,9 @@ class RendererCanvasRenderRD : public RendererCanvasRender { ShaderVariant shader_variant = SHADER_VARIANT_QUAD; RD::RenderPrimitive render_primitive = RD::RENDER_PRIMITIVE_TRIANGLES; bool use_lighting = false; + bool use_msdf = false; + bool use_lcd = false; + bool has_blend = false; // batch-specific data union { @@ -525,8 +549,31 @@ class RendererCanvasRenderRD : public RendererCanvasRender { // TYPE_PARTICLES uint32_t mesh_instance_count; }; - bool has_blend = false; uint32_t flags = 0; + + _FORCE_INLINE_ PushConstant push_constant() const { + PushConstant pc; + pc.specular_shininess = tex_info->specular_shininess; + pc.batch_flags = tex_info->flags | flags; + pc.pad0 = 0; + + pc.msdf[0] = msdf_pix_range; + pc.msdf[1] = msdf_outline; + pc.color_texture_pixel_size[0] = tex_info->texpixel_size.x; + pc.color_texture_pixel_size[1] = tex_info->texpixel_size.y; + return pc; + } + + _FORCE_INLINE_ PushConstantAttributes push_constant_attributes() const { + PushConstantAttributes pc; + pc.base = push_constant(); + memcpy(pc.world, push_data.world, sizeof(pc.world)); + memcpy(pc.modulation, push_data.modulation, sizeof(pc.modulation)); + memcpy(pc.lights, push_data.lights, sizeof(pc.lights)); + pc.flags = push_data.flags; + pc.instance_uniforms_ofs = push_data.instance_uniforms_ofs; + return pc; + } }; HashMap, PagedAllocator>> texture_info_map; @@ -564,6 +611,9 @@ class RendererCanvasRenderRD : public RendererCanvasRender { InstanceData *instance_data = nullptr; /// The index of the next instance to be added to instance_data. uint32_t instance_data_index = 0; + /// Save the previous instance data to allow us to append . + InstanceData *prev_instance_data = nullptr; + uint32_t prev_instance_data_index = 0; uint32_t max_instances_per_buffer = 16384; uint32_t max_instance_buffer_size = 16384 * sizeof(InstanceData); @@ -626,7 +676,7 @@ class RendererCanvasRenderRD : public RendererCanvasRender { void _prepare_batch_texture_info(RID p_texture, TextureState &p_state, TextureInfo *p_info); // non-UMA - InstanceData *new_instance_data(Batch &p_current_batch, const InstanceData &template_instance); + InstanceData *new_instance_data(Batch &p_current_batch, const InstanceData &template_instance, bool p_use_push_data = false); [[nodiscard]] Batch *_new_batch(bool &r_batch_broken); void _add_to_batch(bool &r_batch_broken, Batch *&r_current_batch); void _allocate_instance_buffer(); diff --git a/servers/rendering/renderer_rd/shaders/canvas.glsl b/servers/rendering/renderer_rd/shaders/canvas.glsl index b4de817dd5f..cf619a437e6 100644 --- a/servers/rendering/renderer_rd/shaders/canvas.glsl +++ b/servers/rendering/renderer_rd/shaders/canvas.glsl @@ -24,12 +24,6 @@ layout(location = 11) in vec4 weight_attrib; #include "canvas_uniforms_inc.glsl" -#ifndef USE_ATTRIBUTES -layout(location = 4) out flat uint instance_index; -#else -#define instance_index params.base_instance_index -#endif // USE_ATTRIBUTES - layout(location = 0) out vec2 uv_interp; layout(location = 1) out vec4 color_interp; layout(location = 2) out vec2 vertex_interp; @@ -40,6 +34,79 @@ layout(location = 3) out vec2 pixel_size_interp; #endif +#define read_draw_data_color_texture_pixel_size params.color_texture_pixel_size + +#ifdef USE_ATTRIBUTES + +#define read_draw_data_world_x params.world_x +#define read_draw_data_world_y params.world_y +#define read_draw_data_world_ofs params.world_ofs +#define read_draw_data_modulation params.modulation +#define read_draw_data_flags params.flags +#define read_draw_data_instance_offset params.instance_uniforms_ofs +#define read_draw_data_lights params.lights + +#else // !USE_ATTRIBUTES + +layout(location = 8) in vec4 attrib_A; +layout(location = 9) in vec4 attrib_B; +layout(location = 10) in vec4 attrib_C; +layout(location = 11) in vec4 attrib_D; +layout(location = 12) in vec4 attrib_E; +#ifdef USE_PRIMITIVE +layout(location = 13) in uvec4 attrib_F; +#else // !USE_PRIMITIVE +layout(location = 13) in vec4 attrib_F; +#endif // USE_PRIMITIVE +layout(location = 14) in uvec4 attrib_G; +layout(location = 15) in uvec4 attrib_H; + +// Varyings so the per-instance info can be used in the fragment shader +layout(location = 5) out flat vec4 varying_A; +layout(location = 6) out flat uvec4 varying_B; +layout(location = 7) out flat uvec4 varying_C; + +#ifdef USE_NINEPATCH +layout(location = 8) out flat vec4 varying_D; +layout(location = 9) out flat vec4 varying_E; +#endif // USE_NINEPATCH + +#define read_draw_data_world_x attrib_A.xy +#define read_draw_data_world_y attrib_A.zw +#define read_draw_data_world_ofs attrib_B.xy + +#ifdef USE_PRIMITIVE + +#define read_draw_data_point_a attrib_C.xy +#define read_draw_data_point_b attrib_C.zw +#define read_draw_data_point_c attrib_D.xy +#define read_draw_data_uv_a attrib_D.zw +#define read_draw_data_uv_b attrib_E.xy +#define read_draw_data_uv_c attrib_E.zw + +#define read_draw_data_color_a_rg attrib_F.x +#define read_draw_data_color_a_ba attrib_F.y +#define read_draw_data_color_b_rg attrib_F.z +#define read_draw_data_color_b_ba attrib_F.w +#define read_draw_data_color_c_rg attrib_G.x +#define read_draw_data_color_c_ba attrib_G.y + +#else // !USE_PRIMITIVE + +#define read_draw_data_ninepatch_pixel_size (attrib_B.zw) +#define read_draw_data_modulation attrib_C +#define read_draw_data_ninepatch_margins attrib_D +#define read_draw_data_dst_rect attrib_E +#define read_draw_data_src_rect attrib_F + +#endif // USE_PRIMITIVE + +#define read_draw_data_flags attrib_G.z +#define read_draw_data_instance_offset attrib_G.w +#define read_draw_data_lights attrib_H + +#endif // USE_ATTRIBUTES + #ifdef MATERIAL_UNIFORMS_USED /* clang-format off */ layout(set = 1, binding = 0, std140) uniform MaterialUniforms { @@ -57,6 +124,20 @@ vec3 srgb_to_linear(vec3 color) { #endif void main() { +#ifndef USE_ATTRIBUTES + varying_A = vec4(read_draw_data_world_x, read_draw_data_world_y); +#ifdef USE_PRIMITIVE + varying_B = uvec4(read_draw_data_flags, read_draw_data_instance_offset, 0.0, 0.0); +#else + varying_B = uvec4(read_draw_data_flags, read_draw_data_instance_offset, packHalf2x16(read_draw_data_src_rect.xy), packHalf2x16(read_draw_data_src_rect.zw)); +#endif + varying_C = read_draw_data_lights; +#ifdef USE_NINEPATCH + varying_D = read_draw_data_ninepatch_margins; + varying_E = vec4(read_draw_data_dst_rect.z, read_draw_data_dst_rect.w, read_draw_data_ninepatch_pixel_size.x, read_draw_data_ninepatch_pixel_size.y); +#endif // USE_NINEPATCH +#endif // !USE_ATTRIBUTES + vec4 instance_custom = vec4(0.0); #if defined(CUSTOM0_USED) vec4 custom0 = vec4(0.0); @@ -65,11 +146,6 @@ void main() { vec4 custom1 = vec4(0.0); #endif -#ifndef USE_ATTRIBUTES - instance_index = gl_InstanceIndex + params.base_instance_index; -#endif // USE_ATTRIBUTES - const InstanceData draw_data = instances.data[instance_index]; - #ifdef USE_PRIMITIVE //weird bug, @@ -79,18 +155,19 @@ void main() { vec4 color; if (gl_VertexIndex == 0) { - vertex = draw_data.points[0]; - uv = draw_data.uvs[0]; - color = vec4(unpackHalf2x16(draw_data.colors[0]), unpackHalf2x16(draw_data.colors[1])); + vertex = read_draw_data_point_a; + uv = read_draw_data_uv_a; + color = vec4(unpackHalf2x16(read_draw_data_color_a_rg), unpackHalf2x16(read_draw_data_color_a_ba)); } else if (gl_VertexIndex == 1) { - vertex = draw_data.points[1]; - uv = draw_data.uvs[1]; - color = vec4(unpackHalf2x16(draw_data.colors[2]), unpackHalf2x16(draw_data.colors[3])); + vertex = read_draw_data_point_b; + uv = read_draw_data_uv_b; + color = vec4(unpackHalf2x16(read_draw_data_color_b_rg), unpackHalf2x16(read_draw_data_color_b_ba)); } else { - vertex = draw_data.points[2]; - uv = draw_data.uvs[2]; - color = vec4(unpackHalf2x16(draw_data.colors[4]), unpackHalf2x16(draw_data.colors[5])); + vertex = read_draw_data_point_c; + uv = read_draw_data_uv_c; + color = vec4(unpackHalf2x16(read_draw_data_color_c_rg), unpackHalf2x16(read_draw_data_color_c_ba)); } + uvec4 bones = uvec4(0, 0, 0, 0); vec4 bone_weights = vec4(0.0); @@ -101,7 +178,7 @@ void main() { if (bool(canvas_data.flags & CANVAS_FLAGS_CONVERT_ATTRIBUTES_TO_LINEAR)) { color.rgb = srgb_to_linear(color.rgb); } - color *= draw_data.modulation; + color *= read_draw_data_modulation; vec2 uv = uv_attrib; #if defined(CUSTOM0_USED) @@ -119,14 +196,14 @@ void main() { vec2 vertex_base_arr[4] = vec2[](vec2(0.0, 0.0), vec2(0.0, 1.0), vec2(1.0, 1.0), vec2(1.0, 0.0)); vec2 vertex_base = vertex_base_arr[gl_VertexIndex]; - vec2 uv = draw_data.src_rect.xy + abs(draw_data.src_rect.zw) * ((draw_data.flags & INSTANCE_FLAGS_TRANSPOSE_RECT) != 0 ? vertex_base.yx : vertex_base.xy); - vec4 color = draw_data.modulation; - vec2 vertex = draw_data.dst_rect.xy + abs(draw_data.dst_rect.zw) * mix(vertex_base, vec2(1.0, 1.0) - vertex_base, lessThan(draw_data.src_rect.zw, vec2(0.0, 0.0))); + vec2 uv = read_draw_data_src_rect.xy + abs(read_draw_data_src_rect.zw) * ((read_draw_data_flags & INSTANCE_FLAGS_TRANSPOSE_RECT) != 0 ? vertex_base.yx : vertex_base.xy); + vec4 color = read_draw_data_modulation; + vec2 vertex = read_draw_data_dst_rect.xy + abs(read_draw_data_dst_rect.zw) * mix(vertex_base, vec2(1.0, 1.0) - vertex_base, lessThan(read_draw_data_src_rect.zw, vec2(0.0, 0.0))); uvec4 bones = uvec4(0, 0, 0, 0); #endif // USE_ATTRIBUTES - mat4 model_matrix = mat4(vec4(draw_data.world_x, 0.0, 0.0), vec4(draw_data.world_y, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0), vec4(draw_data.world_ofs, 0.0, 1.0)); + mat4 model_matrix = mat4(vec4(read_draw_data_world_x, 0.0, 0.0), vec4(read_draw_data_world_y, 0.0, 0.0), vec4(0.0, 0.0, 1.0, 0.0), vec4(read_draw_data_world_ofs, 0.0, 1.0)); #ifdef USE_ATTRIBUTES @@ -200,7 +277,7 @@ void main() { } #ifdef USE_NINEPATCH - pixel_size_interp = abs(draw_data.dst_rect.zw) * vertex_base; + pixel_size_interp = abs(read_draw_data_dst_rect.zw) * vertex_base; #endif #if !defined(SKIP_TRANSFORM_USED) && !defined(USE_WORLD_VERTEX_COORDS) @@ -236,12 +313,6 @@ void main() { #include "canvas_uniforms_inc.glsl" -#ifndef USE_ATTRIBUTES -layout(location = 4) in flat uint instance_index; -#else -#define instance_index params.base_instance_index -#endif // USE_ATTRIBUTES - layout(location = 0) in vec2 uv_interp; layout(location = 1) in vec4 color_interp; layout(location = 2) in vec2 vertex_interp; @@ -252,6 +323,42 @@ layout(location = 3) in vec2 pixel_size_interp; #endif +#define read_draw_data_color_texture_pixel_size params.color_texture_pixel_size + +#ifdef USE_ATTRIBUTES + +#define read_draw_data_world_x params.world_x +#define read_draw_data_world_y params.world_y +#define read_draw_data_flags params.flags +#define read_draw_data_instance_offset params.instance_uniforms_ofs +#define read_draw_data_lights params.lights + +#else // !USE_ATTRIBUTES + +// Can all be flat as they are the same for the whole batched instance +layout(location = 5) in flat vec4 varying_A; + +#define read_draw_data_world_x varying_A.xy +#define read_draw_data_world_y varying_A.zw + +layout(location = 6) in flat uvec4 varying_B; +layout(location = 7) in flat uvec4 varying_C; +#define read_draw_data_flags varying_B.x +#define read_draw_data_instance_offset varying_B.y +#define read_draw_data_src_rect (varying_B.zw) +#define read_draw_data_lights varying_C + +#ifdef USE_NINEPATCH +layout(location = 8) in flat vec4 varying_D; +layout(location = 9) in flat vec4 varying_E; +#define read_draw_data_ninepatch_margins varying_D +#define read_draw_data_dst_rect_z varying_E.x +#define read_draw_data_dst_rect_w varying_E.y +#define read_draw_data_ninepatch_pixel_size (varying_E.zw) +#endif // USE_NINEPATCH + +#endif // USE_ATTRIBUTES + layout(location = 0) out vec4 frag_color; #ifdef MATERIAL_UNIFORMS_USED @@ -312,7 +419,6 @@ vec4 light_compute( vec2 screen_uv, vec2 uv, vec4 color, bool is_directional) { - const InstanceData draw_data = instances.data[instance_index]; vec4 light = vec4(0.0); vec3 light_direction = vec3(0.0); @@ -333,8 +439,6 @@ vec4 light_compute( #ifdef USE_NINEPATCH float map_ninepatch_axis(float pixel, float draw_size, float tex_pixel_size, float margin_begin, float margin_end, int np_repeat, inout int draw_center) { - const InstanceData draw_data = instances.data[instance_index]; - float tex_size = 1.0 / tex_pixel_size; if (pixel < margin_begin) { @@ -342,7 +446,7 @@ float map_ninepatch_axis(float pixel, float draw_size, float tex_pixel_size, flo } else if (pixel >= draw_size - margin_end) { return (tex_size - (draw_size - pixel)) * tex_pixel_size; } else { - draw_center -= 1 - int(bitfieldExtract(draw_data.flags, INSTANCE_FLAGS_NINEPATCH_DRAW_CENTER_SHIFT, 1)); + draw_center -= 1 - int(bitfieldExtract(read_draw_data_flags, INSTANCE_FLAGS_NINEPATCH_DRAW_CENTER_SHIFT, 1)); // np_repeat is passed as uniform using NinePatchRect::AxisStretchMode enum. if (np_repeat == 0) { // Stretch. @@ -473,12 +577,11 @@ void main() { vec2 uv = uv_interp; vec2 vertex = vertex_interp; - const InstanceData draw_data = instances.data[instance_index]; - #if !defined(USE_ATTRIBUTES) && !defined(USE_PRIMITIVE) - vec4 region_rect = draw_data.src_rect; + vec4 src_rect = vec4(unpackHalf2x16(read_draw_data_src_rect.x), unpackHalf2x16(read_draw_data_src_rect.y)); + vec4 region_rect = src_rect; #else - vec4 region_rect = vec4(0.0, 0.0, 1.0 / draw_data.color_texture_pixel_size); + vec4 region_rect = vec4(0.0, 0.0, 1.0 / read_draw_data_color_texture_pixel_size); #endif #if !defined(USE_ATTRIBUTES) && !defined(USE_PRIMITIVE) @@ -487,29 +590,28 @@ void main() { int draw_center = 2; uv = vec2( - map_ninepatch_axis(pixel_size_interp.x, abs(draw_data.dst_rect.z), draw_data.color_texture_pixel_size.x, draw_data.ninepatch_margins.x, draw_data.ninepatch_margins.z, int(bitfieldExtract(draw_data.flags, INSTANCE_FLAGS_NINEPATCH_H_MODE_SHIFT, 2)), draw_center), - map_ninepatch_axis(pixel_size_interp.y, abs(draw_data.dst_rect.w), draw_data.color_texture_pixel_size.y, draw_data.ninepatch_margins.y, draw_data.ninepatch_margins.w, int(bitfieldExtract(draw_data.flags, INSTANCE_FLAGS_NINEPATCH_V_MODE_SHIFT, 2)), draw_center)); + map_ninepatch_axis(pixel_size_interp.x, abs(read_draw_data_dst_rect_z), read_draw_data_ninepatch_pixel_size.x, read_draw_data_ninepatch_margins.x, read_draw_data_ninepatch_margins.z, int(bitfieldExtract(read_draw_data_flags, INSTANCE_FLAGS_NINEPATCH_H_MODE_SHIFT, 2)), draw_center), + map_ninepatch_axis(pixel_size_interp.y, abs(read_draw_data_dst_rect_w), read_draw_data_ninepatch_pixel_size.y, read_draw_data_ninepatch_margins.y, read_draw_data_ninepatch_margins.w, int(bitfieldExtract(read_draw_data_flags, INSTANCE_FLAGS_NINEPATCH_V_MODE_SHIFT, 2)), draw_center)); if (draw_center == 0) { color.a = 0.0; } - uv = uv * draw_data.src_rect.zw + draw_data.src_rect.xy; //apply region if needed + uv = uv * src_rect.zw + src_rect.xy; //apply region if needed #endif - if (bool(draw_data.flags & INSTANCE_FLAGS_CLIP_RECT_UV)) { - vec2 half_texpixel = draw_data.color_texture_pixel_size * 0.5; - uv = clamp(uv, draw_data.src_rect.xy + half_texpixel, draw_data.src_rect.xy + abs(draw_data.src_rect.zw) - half_texpixel); + if (bool(read_draw_data_flags & INSTANCE_FLAGS_CLIP_RECT_UV)) { + vec2 half_texpixel = read_draw_data_color_texture_pixel_size * 0.5; + uv = clamp(uv, src_rect.xy + half_texpixel, src_rect.xy + abs(src_rect.zw) - half_texpixel); } #endif -#ifndef USE_PRIMITIVE - if (bool(draw_data.flags & INSTANCE_FLAGS_USE_MSDF)) { - float px_range = draw_data.ninepatch_margins.x; - float outline_thickness = draw_data.ninepatch_margins.y; - //float reserved1 = draw_data.ninepatch_margins.z; - //float reserved2 = draw_data.ninepatch_margins.w; +#if !defined(USE_ATTRIBUTES) && !defined(USE_PRIMITIVE) + // only used by TYPE_RECT + if (sc_use_msdf()) { + float px_range = params.msdf.x; + float outline_thickness = params.msdf.y; vec4 msdf_sample = texture(sampler2D(color_texture, texture_sampler), uv); vec2 msdf_size = vec2(textureSize(sampler2D(color_texture, texture_sampler), 0)); @@ -526,7 +628,7 @@ void main() { float a = clamp((d - 0.5) * px_size + 0.5, 0.0, 1.0); color.a = a * color.a; } - } else if (bool(draw_data.flags & INSTANCE_FLAGS_USE_LCD)) { + } else if (sc_use_lcd()) { vec4 lcd_sample = texture(sampler2D(color_texture, texture_sampler), uv); if (lcd_sample.a == 1.0) { color.rgb = lcd_sample.rgb * color.a; @@ -540,8 +642,8 @@ void main() { color *= texture(sampler2D(color_texture, texture_sampler), uv); } - uint light_count = draw_data.flags & 15u; //max 15 lights - bool using_light = (light_count + canvas_data.directional_light_count) > 0; + uint light_count = read_draw_data_flags & 15u; //max 15 lights + bool using_light = ((light_count + canvas_data.directional_light_count) > 0) && sc_use_lighting(); vec3 normal; @@ -555,10 +657,10 @@ void main() { normal.xy = texture(sampler2D(normal_texture, texture_sampler), uv).xy * vec2(2.0, -2.0) - vec2(1.0, -1.0); #if !defined(USE_ATTRIBUTES) && !defined(USE_PRIMITIVE) - if (bool(draw_data.flags & INSTANCE_FLAGS_TRANSPOSE_RECT)) { + if (bool(read_draw_data_flags & INSTANCE_FLAGS_TRANSPOSE_RECT)) { normal.xy = normal.yx; } - normal.xy *= sign(draw_data.src_rect.zw); + normal.xy *= sign(src_rect.zw); #endif normal.z = sqrt(max(0.0, 1.0 - dot(normal.xy, normal.xy))); normal_used = true; @@ -609,7 +711,7 @@ void main() { if (normal_used) { //convert by item transform - normal.xy = mat2(normalize(draw_data.world_x), normalize(draw_data.world_y)) * normal.xy; + normal.xy = mat2(normalize(read_draw_data_world_x), normalize(read_draw_data_world_y)) * normal.xy; //convert by canvas transform normal = normalize((canvas_data.canvas_normal_transform * vec4(normal, 0.0)).xyz); } @@ -671,7 +773,7 @@ void main() { if (i >= light_count) { break; } - uint light_base = bitfieldExtract(draw_data.lights[i >> 2], (int(i) & 0x3) * 8, 8); + uint light_base = bitfieldExtract(read_draw_data_lights[i >> 2], (int(i) & 0x3) * 8, 8); vec2 tex_uv = (vec4(vertex, 0.0, 1.0) * mat4(light_array.data[light_base].texture_matrix[0], light_array.data[light_base].texture_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations. vec2 tex_uv_atlas = tex_uv * light_array.data[light_base].atlas_rect.zw + light_array.data[light_base].atlas_rect.xy; @@ -706,7 +808,7 @@ void main() { } #endif - if (bool(light_array.data[light_base].flags & LIGHT_FLAGS_HAS_SHADOW) && bool(draw_data.flags & (INSTANCE_FLAGS_SHADOW_MASKED << i))) { + if (bool(light_array.data[light_base].flags & LIGHT_FLAGS_HAS_SHADOW) && bool(read_draw_data_flags & (INSTANCE_FLAGS_SHADOW_MASKED << i))) { vec2 shadow_pos = (vec4(shadow_vertex, 0.0, 1.0) * mat4(light_array.data[light_base].shadow_matrix[0], light_array.data[light_base].shadow_matrix[1], vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))).xy; //multiply inverse given its transposed. Optimizer removes useless operations. vec2 pos_norm = normalize(shadow_pos); diff --git a/servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl b/servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl index a2aea974c18..8d8e07c228a 100644 --- a/servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl @@ -8,8 +8,6 @@ #define INSTANCE_FLAGS_CLIP_RECT_UV (1 << 4) #define INSTANCE_FLAGS_TRANSPOSE_RECT (1 << 5) -#define INSTANCE_FLAGS_USE_MSDF (1 << 6) -#define INSTANCE_FLAGS_USE_LCD (1 << 7) #define INSTANCE_FLAGS_NINEPATCH_DRAW_CENTER_SHIFT 8 #define INSTANCE_FLAGS_NINEPATCH_H_MODE_SHIFT 9 @@ -22,8 +20,7 @@ struct InstanceData { vec2 world_x; vec2 world_y; vec2 world_ofs; - uint flags; - uint instance_uniforms_ofs; + vec2 ninepatch_pixel_size; #ifdef USE_PRIMITIVE vec2 points[3]; vec2 uvs[3]; @@ -36,7 +33,8 @@ struct InstanceData { vec2 pad; #endif - vec2 color_texture_pixel_size; + uint flags; + uint instance_uniforms_ofs; uvec4 lights; }; @@ -51,10 +49,25 @@ struct InstanceData { #define BATCH_FLAGS_DEFAULT_SPECULAR_MAP_USED (1 << 10) layout(push_constant, std430) uniform Params { - uint base_instance_index; // base index to instance data uint sc_packed_0; uint specular_shininess; uint batch_flags; + uint pad0; + + vec2 msdf; + vec2 color_texture_pixel_size; +#ifdef USE_ATTRIBUTES + // Particles and meshes + vec2 world_x; + vec2 world_y; + + vec2 world_ofs; + uint flags; + uint instance_uniforms_ofs; + + vec4 modulation; + uvec4 lights; +#endif } params; @@ -82,6 +95,14 @@ bool sc_use_lighting() { return ((sc_packed_0() >> 0) & 1U) != 0; } +bool sc_use_msdf() { + return ((sc_packed_0() >> 1) & 1U) != 0; +} + +bool sc_use_lcd() { + return ((sc_packed_0() >> 2) & 1U) != 0; +} + // In vulkan, sets should always be ordered using the following logic: // Lower Sets: Sets that change format and layout less often // Higher sets: Sets that change format and layout very often @@ -180,8 +201,3 @@ layout(set = 3, binding = 0) uniform texture2D color_texture; layout(set = 3, binding = 1) uniform texture2D normal_texture; layout(set = 3, binding = 2) uniform texture2D specular_texture; layout(set = 3, binding = 3) uniform sampler texture_sampler; - -layout(set = 3, binding = 4, std430) restrict readonly buffer DrawData { - InstanceData data[]; -} -instances; diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index faf2eca2ee1..3b605f7f327 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -38,6 +38,7 @@ #include "core/io/dir_access.h" #include "core/io/file_access.h" #include "core/profiling/profiling.h" +#include "core/templates/fixed_vector.h" #include "modules/modules_enabled.gen.h" #include "servers/rendering/rendering_shader_container.h" @@ -3088,6 +3089,12 @@ RID RenderingDevice::vertex_buffer_create(uint32_t p_size_bytes, Span p if (p_creation_bits.has_flag(BUFFER_CREATION_AS_STORAGE_BIT)) { buffer.usage.set_flag(RDD::BUFFER_USAGE_STORAGE_BIT); } + if (p_creation_bits.has_flag(BUFFER_CREATION_DYNAMIC_PERSISTENT_BIT)) { + buffer.usage.set_flag(RDD::BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT); + + // Persistent buffers expect frequent CPU -> GPU writes, so GPU writes should avoid the same path. + buffer.usage.clear_flag(RDD::BUFFER_USAGE_TRANSFER_TO_BIT); + } if (p_creation_bits.has_flag(BUFFER_CREATION_DEVICE_ADDRESS_BIT)) { buffer.usage.set_flag(RDD::BUFFER_USAGE_DEVICE_ADDRESS_BIT); } @@ -3095,7 +3102,7 @@ RID RenderingDevice::vertex_buffer_create(uint32_t p_size_bytes, Span p ERR_FAIL_COND_V(!buffer.driver_id, RID()); // Vertex buffers are assumed to be immutable unless they don't have initial data or they've been marked for storage explicitly. - if (p_data.is_empty() || p_creation_bits.has_flag(BUFFER_CREATION_AS_STORAGE_BIT)) { + if (p_data.is_empty() || p_creation_bits.has_flag(BUFFER_CREATION_AS_STORAGE_BIT) || p_creation_bits.has_flag(BUFFER_CREATION_DYNAMIC_PERSISTENT_BIT)) { buffer.draw_tracker = RDG::resource_tracker_create(); buffer.draw_tracker->buffer_driver_id = buffer.driver_id; } @@ -3127,24 +3134,49 @@ RenderingDevice::VertexFormatID RenderingDevice::vertex_format_create(const Vect return *idptr; } + VertexAttributeBindingsMap bindings; + bool has_implicit = false; + bool has_explicit = false; + Vector vertex_descriptions = p_vertex_descriptions; HashSet used_locations; - for (int i = 0; i < p_vertex_descriptions.size(); i++) { - ERR_CONTINUE(p_vertex_descriptions[i].format >= DATA_FORMAT_MAX); - ERR_FAIL_COND_V(used_locations.has(p_vertex_descriptions[i].location), INVALID_ID); + for (int i = 0; i < vertex_descriptions.size(); i++) { + VertexAttribute &attr = vertex_descriptions.write[i]; + ERR_CONTINUE(attr.format >= DATA_FORMAT_MAX); + ERR_FAIL_COND_V(used_locations.has(attr.location), INVALID_ID); - ERR_FAIL_COND_V_MSG(get_format_vertex_size(p_vertex_descriptions[i].format) == 0, INVALID_ID, - "Data format for attachment (" + itos(i) + "), '" + FORMAT_NAMES[p_vertex_descriptions[i].format] + "', is not valid for a vertex array."); + ERR_FAIL_COND_V_MSG(get_format_vertex_size(attr.format) == 0, INVALID_ID, + vformat("Data format for attribute (%d), '%s', is not valid for a vertex array.", attr.location, String(FORMAT_NAMES[attr.format]))); - used_locations.insert(p_vertex_descriptions[i].location); + if (attr.binding == UINT32_MAX) { + attr.binding = i; // Implicitly assigned binding + has_implicit = true; + } else { + has_explicit = true; + } + ERR_FAIL_COND_V_MSG(!(has_implicit ^ has_explicit), INVALID_ID, "Vertex attributes must use either all explicit or all implicit bindings."); + + const VertexAttributeBinding *existing = bindings.getptr(attr.binding); + if (!existing) { + bindings.insert(attr.binding, VertexAttributeBinding(attr.stride, attr.frequency)); + } else { + ERR_FAIL_COND_V_MSG(existing->stride != attr.stride, INVALID_ID, + vformat("Vertex attributes with binding (%d) have an inconsistent stride.", attr.binding)); + ERR_FAIL_COND_V_MSG(existing->frequency != attr.frequency, INVALID_ID, + vformat("Vertex attributes with binding (%d) have an inconsistent frequency.", attr.binding)); + } + + used_locations.insert(attr.location); } - RDD::VertexFormatID driver_id = driver->vertex_format_create(p_vertex_descriptions); + RDD::VertexFormatID driver_id = driver->vertex_format_create(vertex_descriptions, bindings); ERR_FAIL_COND_V(!driver_id, 0); VertexFormatID id = (vertex_format_cache.size() | ((int64_t)ID_TYPE_VERTEX_FORMAT << ID_BASE_SHIFT)); vertex_format_cache[key] = id; - vertex_formats[id].vertex_formats = p_vertex_descriptions; - vertex_formats[id].driver_id = driver_id; + VertexDescriptionCache &ce = vertex_formats.insert(id, VertexDescriptionCache())->value; + ce.vertex_formats = vertex_descriptions; + ce.bindings = std::move(bindings); + ce.driver_id = driver_id; return id; } @@ -3154,12 +3186,6 @@ RID RenderingDevice::vertex_array_create(uint32_t p_vertex_count, VertexFormatID ERR_FAIL_COND_V(!vertex_formats.has(p_vertex_format), RID()); const VertexDescriptionCache &vd = vertex_formats[p_vertex_format]; - ERR_FAIL_COND_V(vd.vertex_formats.size() != p_src_buffers.size(), RID()); - - for (int i = 0; i < p_src_buffers.size(); i++) { - ERR_FAIL_COND_V(!vertex_buffer_owner.owns(p_src_buffers[i]), RID()); - } - VertexArray vertex_array; if (p_offsets.is_empty()) { @@ -3172,39 +3198,53 @@ RID RenderingDevice::vertex_array_create(uint32_t p_vertex_count, VertexFormatID vertex_array.vertex_count = p_vertex_count; vertex_array.description = p_vertex_format; vertex_array.max_instances_allowed = 0xFFFFFFFF; // By default as many as you want. - for (int i = 0; i < p_src_buffers.size(); i++) { - Buffer *buffer = vertex_buffer_owner.get_or_null(p_src_buffers[i]); + vertex_array.buffers.resize(p_src_buffers.size()); + + HashSet unique_buffers; + unique_buffers.reserve(p_src_buffers.size()); + + for (const VertexAttribute &atf : vd.vertex_formats) { + ERR_FAIL_COND_V_MSG(atf.binding >= p_src_buffers.size(), RID(), vformat("Vertex attribute location (%d) is missing a buffer for binding (%d).", atf.location, atf.binding)); + RID buf = p_src_buffers[atf.binding]; + ERR_FAIL_COND_V(!vertex_buffer_owner.owns(buf), RID()); + + Buffer *buffer = vertex_buffer_owner.get_or_null(buf); // Validate with buffer. { - const VertexAttribute &atf = vd.vertex_formats[i]; - uint32_t element_size = get_format_vertex_size(atf.format); - ERR_FAIL_COND_V(element_size == 0, RID()); // Should never happens since this was prevalidated. + ERR_FAIL_COND_V(element_size == 0, RID()); // Should never happen since this was prevalidated. if (atf.frequency == VERTEX_FREQUENCY_VERTEX) { // Validate size for regular drawing. uint64_t total_size = uint64_t(atf.stride) * (p_vertex_count - 1) + atf.offset + element_size; ERR_FAIL_COND_V_MSG(total_size > buffer->size, RID(), - "Attachment (" + itos(i) + ") will read past the end of the buffer."); + vformat("Vertex attribute (%d) will read past the end of the buffer.", atf.location)); } else { // Validate size for instances drawing. uint64_t available = buffer->size - atf.offset; ERR_FAIL_COND_V_MSG(available < element_size, RID(), - "Attachment (" + itos(i) + ") uses instancing, but it's just too small."); + vformat("Vertex attribute (%d) uses instancing, but it's just too small.", atf.location)); uint32_t instances_allowed = available / atf.stride; vertex_array.max_instances_allowed = MIN(instances_allowed, vertex_array.max_instances_allowed); } } - vertex_array.buffers.push_back(buffer->driver_id); + vertex_array.buffers.write[atf.binding] = buffer->driver_id; + + if (unique_buffers.has(buf)) { + // No need to add dependencies multiple times. + continue; + } + + unique_buffers.insert(buf); if (buffer->draw_tracker != nullptr) { vertex_array.draw_trackers.push_back(buffer->draw_tracker); } else { - vertex_array.untracked_buffers.insert(p_src_buffers[i]); + vertex_array.untracked_buffers.insert(buf); } if (buffer->transfer_worker_index >= 0) { @@ -3214,8 +3254,8 @@ RID RenderingDevice::vertex_array_create(uint32_t p_vertex_count, VertexFormatID } RID id = vertex_array_owner.make_rid(vertex_array); - for (int i = 0; i < p_src_buffers.size(); i++) { - _add_dependency(id, p_src_buffers[i]); + for (const RID &buf : unique_buffers) { + _add_dependency(id, buf); } return id; @@ -4667,6 +4707,102 @@ void RenderingDevice::draw_list_bind_vertex_array(DrawListID p_list, RID p_verte } } +void RenderingDevice::draw_list_bind_vertex_buffers_format(DrawListID p_list, VertexFormatID p_vertex_format, uint32_t p_vertex_count, const Span &p_vertex_buffers, const Span &p_offsets) { + ERR_RENDER_THREAD_GUARD(); + + ERR_FAIL_COND(!draw_list.active); + + const VertexDescriptionCache *vertex_description = vertex_formats.getptr(p_vertex_format); + ERR_FAIL_NULL_MSG(vertex_description, "Supplied vertex format does not exist."); + + Span offsets_span = p_offsets; + FixedVector offsets; + if (offsets_span.is_empty()) { + offsets.resize_initialized(p_vertex_buffers.size()); + offsets_span = offsets; + } else { + ERR_FAIL_COND_MSG(offsets_span.size() != p_vertex_buffers.size(), + "Number of vertex buffer offsets (" + itos(offsets_span.size()) + ") does not match number of vertex buffers (" + itos(p_vertex_buffers.size()) + ")."); + } + + FixedVector driver_buffers; + driver_buffers.resize_initialized(p_vertex_buffers.size()); + + FixedVector draw_trackers; + +#if DEBUG_ENABLED + uint32_t max_instances_allowed = 0xFFFFFFFF; +#endif + + for (uint32_t i = 0; i < p_vertex_buffers.size(); i++) { + RID buffer_rid = p_vertex_buffers[i]; + if (buffer_rid.is_null()) { + // The buffer array can be sparse. + continue; + } + ERR_FAIL_COND_MSG(!vertex_buffer_owner.owns(buffer_rid), "Vertex buffer at index " + itos(i) + " is invalid."); + + Buffer *buffer = vertex_buffer_owner.get_or_null(buffer_rid); + ERR_FAIL_NULL(buffer); + + _check_transfer_worker_buffer(buffer); + +#if DEBUG_ENABLED + uint64_t binding_offset = offsets_span[i]; + ERR_FAIL_COND_MSG(binding_offset > buffer->size, "Vertex buffer offset for attachment (" + itos(i) + ") exceeds buffer size."); + + const VertexAttribute &attribute = vertex_description->vertex_formats[i]; + uint32_t element_size = get_format_vertex_size(attribute.format); + ERR_FAIL_COND_MSG(element_size == 0, "Vertex attribute format for attachment (" + itos(i) + ") is invalid."); + + uint64_t attribute_offset = binding_offset + attribute.offset; + ERR_FAIL_COND_MSG(attribute_offset > buffer->size, "Vertex attribute offset for attachment (" + itos(i) + ") exceeds buffer size."); + ERR_FAIL_COND_MSG(attribute_offset + element_size > buffer->size, + "Vertex buffer (" + itos(i) + ") will read past the end of the buffer."); + + if (attribute.frequency == VERTEX_FREQUENCY_VERTEX) { + ERR_FAIL_COND_MSG(p_vertex_count == 0, "Vertex count must be greater than 0 when binding vertex buffers."); + + uint64_t required_size = attribute_offset + element_size; + if (p_vertex_count > 1) { + required_size += uint64_t(attribute.stride) * (uint64_t(p_vertex_count) - 1); + } + + ERR_FAIL_COND_MSG(required_size > buffer->size, + "Vertex buffer (" + itos(i) + ") will read past the end of the buffer."); + } else { + uint64_t available = buffer->size - attribute_offset; + ERR_FAIL_COND_MSG(available < element_size, + "Vertex buffer (" + itos(i) + ") uses instancing, but it's just too small."); + + uint32_t instances_allowed = attribute.stride == 0 ? 0 : uint32_t(buffer->size / attribute.stride); + max_instances_allowed = MIN(instances_allowed, max_instances_allowed); + } +#endif + + driver_buffers[i] = buffer->driver_id; + + if (buffer->draw_tracker != nullptr) { + draw_trackers.push_back(buffer->draw_tracker); + } + } + + draw_list.state.vertex_array = RID(); + + draw_graph.add_draw_list_bind_vertex_buffers(driver_buffers, offsets_span); + + for (RDG::ResourceTracker *tracker : draw_trackers) { + draw_graph.add_draw_list_usage(tracker, RDG::RESOURCE_USAGE_VERTEX_BUFFER_READ); + } + + draw_list.validation.vertex_array_size = p_vertex_count; + +#ifdef DEBUG_ENABLED + draw_list.validation.vertex_format = p_vertex_format; + draw_list.validation.vertex_max_instances_allowed = max_instances_allowed; +#endif +} + void RenderingDevice::draw_list_bind_index_array(DrawListID p_list, RID p_index_array) { ERR_RENDER_THREAD_GUARD(); @@ -7431,6 +7567,7 @@ void RenderingDevice::_bind_methods() { ClassDB::bind_method(D_METHOD("draw_list_bind_render_pipeline", "draw_list", "render_pipeline"), &RenderingDevice::draw_list_bind_render_pipeline); ClassDB::bind_method(D_METHOD("draw_list_bind_uniform_set", "draw_list", "uniform_set", "set_index"), &RenderingDevice::draw_list_bind_uniform_set); ClassDB::bind_method(D_METHOD("draw_list_bind_vertex_array", "draw_list", "vertex_array"), &RenderingDevice::draw_list_bind_vertex_array); + ClassDB::bind_method(D_METHOD("draw_list_bind_vertex_buffers_format", "draw_list", "vertex_format", "vertex_count", "vertex_buffers", "offsets"), &RenderingDevice::_draw_list_bind_vertex_buffers_format, DEFVAL(Vector())); ClassDB::bind_method(D_METHOD("draw_list_bind_index_array", "draw_list", "index_array"), &RenderingDevice::draw_list_bind_index_array); ClassDB::bind_method(D_METHOD("draw_list_set_push_constant", "draw_list", "buffer", "size_bytes"), &RenderingDevice::_draw_list_set_push_constant); @@ -8234,6 +8371,18 @@ RID RenderingDevice::_vertex_array_create(uint32_t p_vertex_count, VertexFormatI return vertex_array_create(p_vertex_count, p_vertex_format, buffers, offsets); } +void RenderingDevice::_draw_list_bind_vertex_buffers_format(DrawListID p_list, VertexFormatID p_vertex_format, uint32_t p_vertex_count, const TypedArray &p_vertex_buffers, const Vector &p_offsets) { + Vector buffers = Variant(p_vertex_buffers); + + Vector offsets; + offsets.resize(p_offsets.size()); + for (int i = 0; i < p_offsets.size(); i++) { + offsets.write[i] = p_offsets[i]; + } + + draw_list_bind_vertex_buffers_format(p_list, p_vertex_format, p_vertex_count, buffers, offsets); +} + Ref RenderingDevice::_shader_compile_spirv_from_source(const Ref &p_source, bool p_allow_cache) { ERR_FAIL_COND_V(p_source.is_null(), Ref()); diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index a3c0ce4ad15..4558fde4c68 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -785,6 +785,7 @@ private: struct VertexDescriptionCache { Vector vertex_formats; + VertexAttributeBindingsMap bindings; RDD::VertexFormatID driver_id; }; @@ -1366,6 +1367,7 @@ public: void draw_list_bind_render_pipeline(DrawListID p_list, RID p_render_pipeline); void draw_list_bind_uniform_set(DrawListID p_list, RID p_uniform_set, uint32_t p_index); void draw_list_bind_vertex_array(DrawListID p_list, RID p_vertex_array); + void draw_list_bind_vertex_buffers_format(DrawListID p_list, VertexFormatID p_vertex_format, uint32_t p_vertex_count, const Span &p_vertex_buffers, const Span &p_offsets = Vector()); void draw_list_bind_index_array(DrawListID p_list, RID p_index_array); void draw_list_set_line_width(DrawListID p_list, float p_width); void draw_list_set_push_constant(DrawListID p_list, const void *p_data, uint32_t p_data_size); @@ -1749,6 +1751,7 @@ private: VertexFormatID _vertex_format_create(const TypedArray &p_vertex_formats); RID _vertex_array_create(uint32_t p_vertex_count, VertexFormatID p_vertex_format, const TypedArray &p_src_buffers, const Vector &p_offsets = Vector()); + void _draw_list_bind_vertex_buffers_format(DrawListID p_list, VertexFormatID p_vertex_format, uint32_t p_vertex_count, const TypedArray &p_vertex_buffers, const Vector &p_offsets = Vector()); Ref _shader_compile_spirv_from_source(const Ref &p_source, bool p_allow_cache = true); Vector _shader_compile_binary_from_spirv(const Ref &p_bytecode, const String &p_shader_name = ""); diff --git a/servers/rendering/rendering_device_binds.h b/servers/rendering/rendering_device_binds.h index 15b1cf5469a..9d8c81e3a88 100644 --- a/servers/rendering/rendering_device_binds.h +++ b/servers/rendering/rendering_device_binds.h @@ -221,6 +221,7 @@ class RDVertexAttribute : public RefCounted { RD::VertexAttribute base; public: + RD_SETGET(uint32_t, binding) RD_SETGET(uint32_t, location) RD_SETGET(uint32_t, offset) RD_SETGET(RD::DataFormat, format) @@ -229,6 +230,7 @@ public: protected: static void _bind_methods() { + RD_BIND(Variant::INT, RDVertexAttribute, binding); RD_BIND(Variant::INT, RDVertexAttribute, location); RD_BIND(Variant::INT, RDVertexAttribute, offset); RD_BIND(Variant::INT, RDVertexAttribute, format); diff --git a/servers/rendering/rendering_device_commons.h b/servers/rendering/rendering_device_commons.h index e51566453a4..802dfea7aa1 100644 --- a/servers/rendering/rendering_device_commons.h +++ b/servers/rendering/rendering_device_commons.h @@ -549,6 +549,7 @@ public: }; struct VertexAttribute { + uint32_t binding = UINT32_MAX; // Attribute buffer binding index. When set to UINT32_MAX, it uses the index of the attribute in the layout. uint32_t location = 0; // Shader location. uint32_t offset = 0; DataFormat format = DATA_FORMAT_MAX; @@ -556,6 +557,18 @@ public: VertexFrequency frequency = VERTEX_FREQUENCY_VERTEX; }; + struct VertexAttributeBinding { + uint32_t stride = 0; + VertexFrequency frequency = VERTEX_FREQUENCY_VERTEX; + + VertexAttributeBinding() = default; + VertexAttributeBinding(uint32_t p_stride, VertexFrequency p_frequency) : + stride(p_stride), + frequency(p_frequency) {} + }; + + typedef HashMap VertexAttributeBindingsMap; + /*********************/ /**** FRAMEBUFFER ****/ /*********************/ diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h index abf11ef0fc7..b6e7927f487 100644 --- a/servers/rendering/rendering_device_driver.h +++ b/servers/rendering/rendering_device_driver.h @@ -194,6 +194,7 @@ public: virtual uint8_t *buffer_map(BufferID p_buffer) = 0; virtual void buffer_unmap(BufferID p_buffer) = 0; virtual uint8_t *buffer_persistent_map_advance(BufferID p_buffer, uint64_t p_frames_drawn) = 0; + virtual uint64_t buffer_get_dynamic_offsets(Span p_buffers) = 0; virtual void buffer_flush(BufferID p_buffer) {} // Only for a buffer with BUFFER_USAGE_DEVICE_ADDRESS_BIT. virtual uint64_t buffer_get_device_address(BufferID p_buffer) = 0; @@ -301,7 +302,7 @@ public: /**** VERTEX ARRAY ****/ /**********************/ - virtual VertexFormatID vertex_format_create(VectorView p_vertex_attribs) = 0; + virtual VertexFormatID vertex_format_create(Span p_vertex_attribs, const VertexAttributeBindingsMap &p_vertex_bindings) = 0; virtual void vertex_format_free(VertexFormatID p_vertex_format) = 0; /******************/ @@ -673,7 +674,7 @@ public: virtual void command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) = 0; // Buffer binding. - virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets) = 0; + virtual void command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) = 0; virtual void command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) = 0; // Dynamic state. diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp index e0f6ff25f8a..2b7f2ebd500 100644 --- a/servers/rendering/rendering_device_graph.cpp +++ b/servers/rendering/rendering_device_graph.cpp @@ -870,7 +870,7 @@ void RenderingDeviceGraph::_run_draw_list_command(RDD::CommandBufferID p_command } break; case DrawListInstruction::TYPE_BIND_VERTEX_BUFFERS: { const DrawListBindVertexBuffersInstruction *bind_vertex_buffers_instruction = reinterpret_cast(instruction); - driver->command_render_bind_vertex_buffers(p_command_buffer, bind_vertex_buffers_instruction->vertex_buffers_count, bind_vertex_buffers_instruction->vertex_buffers(), bind_vertex_buffers_instruction->vertex_buffer_offsets()); + driver->command_render_bind_vertex_buffers(p_command_buffer, bind_vertex_buffers_instruction->vertex_buffers_count, bind_vertex_buffers_instruction->vertex_buffers(), bind_vertex_buffers_instruction->vertex_buffer_offsets(), bind_vertex_buffers_instruction->dynamic_offsets_mask); instruction_data_cursor += sizeof(DrawListBindVertexBuffersInstruction); instruction_data_cursor += sizeof(RDD::BufferID) * bind_vertex_buffers_instruction->vertex_buffers_count; instruction_data_cursor += sizeof(uint64_t) * bind_vertex_buffers_instruction->vertex_buffers_count; @@ -1872,13 +1872,14 @@ void RenderingDeviceGraph::add_draw_list_bind_uniform_sets(RDD::ShaderID p_shade } } -void RenderingDeviceGraph::add_draw_list_bind_vertex_buffers(VectorView p_vertex_buffers, VectorView p_vertex_buffer_offsets) { +void RenderingDeviceGraph::add_draw_list_bind_vertex_buffers(Span p_vertex_buffers, Span p_vertex_buffer_offsets) { DEV_ASSERT(p_vertex_buffers.size() == p_vertex_buffer_offsets.size()); uint32_t instruction_size = sizeof(DrawListBindVertexBuffersInstruction) + sizeof(RDD::BufferID) * p_vertex_buffers.size() + sizeof(uint64_t) * p_vertex_buffer_offsets.size(); DrawListBindVertexBuffersInstruction *instruction = reinterpret_cast(_allocate_draw_list_instruction(instruction_size)); instruction->type = DrawListInstruction::TYPE_BIND_VERTEX_BUFFERS; instruction->vertex_buffers_count = p_vertex_buffers.size(); + instruction->dynamic_offsets_mask = driver->buffer_get_dynamic_offsets(p_vertex_buffers); RDD::BufferID *vertex_buffers = instruction->vertex_buffers(); uint64_t *vertex_buffer_offsets = instruction->vertex_buffer_offsets(); diff --git a/servers/rendering/rendering_device_graph.h b/servers/rendering/rendering_device_graph.h index 7b986617f0e..69e0ad0d631 100644 --- a/servers/rendering/rendering_device_graph.h +++ b/servers/rendering/rendering_device_graph.h @@ -502,6 +502,7 @@ private: struct DrawListBindVertexBuffersInstruction : DrawListInstruction { uint32_t vertex_buffers_count = 0; + uint64_t dynamic_offsets_mask = 0; _FORCE_INLINE_ RDD::BufferID *vertex_buffers() { return reinterpret_cast(&this[1]); @@ -795,7 +796,7 @@ public: void add_draw_list_bind_pipeline(RDD::PipelineID p_pipeline, BitField p_pipeline_stage_bits); void add_draw_list_bind_uniform_set(RDD::ShaderID p_shader, RDD::UniformSetID p_uniform_set, uint32_t set_index); void add_draw_list_bind_uniform_sets(RDD::ShaderID p_shader, VectorView p_uniform_set, uint32_t p_first_index, uint32_t p_set_count); - void add_draw_list_bind_vertex_buffers(VectorView p_vertex_buffers, VectorView p_vertex_buffer_offsets); + void add_draw_list_bind_vertex_buffers(Span p_vertex_buffers, Span p_vertex_buffer_offsets); void add_draw_list_clear_attachments(VectorView p_attachments_clear, VectorView p_attachments_clear_rect); void add_draw_list_draw(uint32_t p_vertex_count, uint32_t p_instance_count); void add_draw_list_draw_indexed(uint32_t p_index_count, uint32_t p_instance_count, uint32_t p_first_index);