1
0
mirror of https://github.com/godotengine/godot.git synced 2026-01-05 19:31:35 +00:00

Refactor descriptor heaps in D3D12 driver.

This commit is contained in:
Skyth
2025-11-24 18:42:11 +03:00
parent 9f5309a2a4
commit a8d3ecec13
6 changed files with 640 additions and 1138 deletions

View File

@@ -194,7 +194,11 @@ uint32_t RenderingShaderContainerD3D12::_format_version() const {
uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_extra_data(const uint8_t *p_bytes) {
reflection_data_d3d12 = *(const ReflectionDataD3D12 *)(p_bytes);
return sizeof(ReflectionDataD3D12);
reflection_binding_set_data_d3d12.resize(reflection_data.set_count);
for (uint32_t i = 0; i < reflection_binding_set_data_d3d12.size(); i++) {
reflection_binding_set_data_d3d12.ptrw()[i] = *(const ReflectionBindingSetDataD3D12 *)(p_bytes + sizeof(ReflectionDataD3D12) + (i * sizeof(ReflectionBindingSetDataD3D12)));
}
return sizeof(ReflectionDataD3D12) + (reflection_binding_set_data_d3d12.size() * sizeof(ReflectionBindingSetDataD3D12));
}
uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes) {
@@ -228,9 +232,12 @@ uint32_t RenderingShaderContainerD3D12::_from_bytes_footer_extra_data(const uint
uint32_t RenderingShaderContainerD3D12::_to_bytes_reflection_extra_data(uint8_t *p_bytes) const {
if (p_bytes != nullptr) {
*(ReflectionDataD3D12 *)(p_bytes) = reflection_data_d3d12;
for (uint32_t i = 0; i < reflection_binding_set_data_d3d12.size(); i++) {
*(ReflectionBindingSetDataD3D12 *)(p_bytes + sizeof(ReflectionDataD3D12) + (i * sizeof(ReflectionBindingSetDataD3D12))) = reflection_binding_set_data_d3d12[i];
}
}
return sizeof(ReflectionDataD3D12);
return sizeof(ReflectionDataD3D12) + (reflection_binding_set_data_d3d12.size() * sizeof(ReflectionBindingSetDataD3D12));
}
uint32_t RenderingShaderContainerD3D12::_to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const {
@@ -269,14 +276,10 @@ bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(Span<ReflectShaderStag
dxil_runtime_conf.push_constant_cbv.base_shader_register = ROOT_CONSTANT_REGISTER;
dxil_runtime_conf.zero_based_vertex_instance_id = true;
dxil_runtime_conf.zero_based_compute_workgroup_id = true;
dxil_runtime_conf.declared_read_only_images_as_srvs = true;
// Making this explicit to let maintainers know that in practice this didn't improve performance,
// probably because data generated by one shader and consumed by another one forces the resource
// to transition from UAV to SRV, and back, instead of being an UAV all the time.
// In case someone wants to try, care must be taken so in case of incompatible bindings across stages
// happen as a result, all the stages are re-translated. That can happen if, for instance, a stage only
// uses an allegedly writable resource only for reading but the next stage doesn't.
// Explicitly keeping these false because converting UAV descriptors to SRVs do not seem to have real performance benefits on desktop GPUs.
// It also makes it easier to implement descriptor heaps and enhanced barriers.
dxil_runtime_conf.declared_read_only_images_as_srvs = false;
dxil_runtime_conf.inferred_read_only_images_as_srvs = false;
// Translate SPIR-V to NIR.
@@ -482,7 +485,7 @@ bool RenderingShaderContainerD3D12::_generate_root_signature(BitField<RenderingD
struct TraceableDescriptorTable {
uint32_t stages_mask = {};
Vector<D3D12_DESCRIPTOR_RANGE1> ranges;
Vector<RootSignatureLocation *> root_signature_locations;
uint32_t set = UINT_MAX;
};
uint32_t binding_start = 0;
@@ -495,31 +498,35 @@ bool RenderingShaderContainerD3D12::_generate_root_signature(BitField<RenderingD
for (uint32_t j = 0; j < uniform_count; j++) {
const ReflectionBindingData &uniform = reflection_binding_set_uniforms_data[binding_start + j];
ReflectionBindingDataD3D12 &uniform_d3d12 = reflection_binding_set_uniforms_data_d3d12.ptrw()[binding_start + j];
bool really_used = uniform_d3d12.dxil_stages != 0;
#ifdef DEV_ENABLED
bool really_used = uniform_d3d12.dxil_stages != 0;
bool anybody_home = (ResourceClass)(uniform_d3d12.resource_class) != RES_CLASS_INVALID || uniform_d3d12.has_sampler;
DEV_ASSERT(anybody_home == really_used);
#endif
if (!really_used) {
continue; // Existed in SPIR-V; went away in DXIL.
}
auto insert_range = [](D3D12_DESCRIPTOR_RANGE_TYPE p_range_type,
auto insert_range = [i](D3D12_DESCRIPTOR_RANGE_TYPE p_range_type,
uint32_t p_num_descriptors,
uint32_t p_dxil_register,
uint32_t p_dxil_stages_mask,
RootSignatureLocation *p_root_sig_locations,
Vector<TraceableDescriptorTable> &r_tables,
bool &r_first_in_set) {
uint32_t &r_descriptor_offset,
uint32_t &r_descriptor_count,
bool &r_first_in_set,
Vector<TraceableDescriptorTable> &r_tables) {
r_descriptor_offset = r_descriptor_count;
if (r_first_in_set) {
r_tables.resize(r_tables.size() + 1);
r_first_in_set = false;
}
TraceableDescriptorTable &table = r_tables.write[r_tables.size() - 1];
DEV_ASSERT(table.set == UINT_MAX || table.set == i);
table.stages_mask |= p_dxil_stages_mask;
table.set = i;
CD3DX12_DESCRIPTOR_RANGE1 range;
// Due to the aliasing hack for SRV-UAV of different families,
// we can be causing an unintended change of data (sometimes the validation layers catch it).
D3D12_DESCRIPTOR_RANGE_FLAGS flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE;
@@ -528,79 +535,130 @@ bool RenderingShaderContainerD3D12::_generate_root_signature(BitField<RenderingD
} else if (p_range_type == D3D12_DESCRIPTOR_RANGE_TYPE_CBV) {
flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE;
}
range.Init(p_range_type, p_num_descriptors, p_dxil_register, 0, flags);
range.Init(p_range_type, p_num_descriptors, p_dxil_register, 0, flags, r_descriptor_offset);
r_descriptor_count += p_num_descriptors;
table.ranges.push_back(range);
table.root_signature_locations.push_back(p_root_sig_locations);
};
D3D12_DESCRIPTOR_RANGE_TYPE range_type = (D3D12_DESCRIPTOR_RANGE_TYPE)UINT_MAX;
bool has_sampler = false;
uint32_t num_descriptors = 1;
D3D12_DESCRIPTOR_RANGE_TYPE resource_range_type = {};
switch ((ResourceClass)(uniform_d3d12.resource_class)) {
case RES_CLASS_INVALID: {
switch (uniform.type) {
case RDC::UNIFORM_TYPE_SAMPLER: {
has_sampler = true;
num_descriptors = uniform.length;
DEV_ASSERT(uniform_d3d12.has_sampler);
} break;
case RES_CLASS_CBV: {
resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
DEV_ASSERT(!uniform_d3d12.has_sampler);
case RDC::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: {
range_type = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
has_sampler = true;
num_descriptors = MAX(1u, uniform.length);
} break;
case RES_CLASS_SRV: {
resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
num_descriptors = MAX(1u, uniform.length); // An unbound R/O buffer is reflected as zero-size.
case RDC::UNIFORM_TYPE_TEXTURE: {
range_type = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
num_descriptors = MAX(1u, uniform.length);
} break;
case RES_CLASS_UAV: {
resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
num_descriptors = MAX(1u, uniform.length); // An unbound R/W buffer is reflected as zero-size.
DEV_ASSERT(!uniform_d3d12.has_sampler);
case RDC::UNIFORM_TYPE_IMAGE: {
range_type = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
num_descriptors = MAX(1u, uniform.length);
} break;
case RDC::UNIFORM_TYPE_TEXTURE_BUFFER: {
CRASH_NOW_MSG("Unimplemented!");
} break;
case RDC::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: {
CRASH_NOW_MSG("Unimplemented!");
} break;
case RDC::UNIFORM_TYPE_IMAGE_BUFFER: {
CRASH_NOW_MSG("Unimplemented!");
} break;
case RDC::UNIFORM_TYPE_UNIFORM_BUFFER: {
range_type = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
} break;
case RDC::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: {
range_type = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
} break;
case RDC::UNIFORM_TYPE_STORAGE_BUFFER: {
range_type = uniform.writable ? D3D12_DESCRIPTOR_RANGE_TYPE_UAV : D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
} break;
case RDC::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: {
range_type = uniform.writable ? D3D12_DESCRIPTOR_RANGE_TYPE_UAV : D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
} break;
case RDC::UNIFORM_TYPE_INPUT_ATTACHMENT: {
range_type = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
} break;
default: {
DEV_ASSERT(false);
}
}
uint32_t dxil_register = i * GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER + uniform.binding * GODOT_NIR_BINDING_MULTIPLIER;
if (uniform_d3d12.resource_class != RES_CLASS_INVALID) {
insert_range(
resource_range_type,
num_descriptors,
dxil_register,
uniform_d3d12.dxil_stages,
&uniform_d3d12.root_signature_locations[RS_LOC_TYPE_RESOURCE],
resource_tables_maps,
first_resource_in_set);
if (range_type != (D3D12_DESCRIPTOR_RANGE_TYPE)UINT_MAX) {
// Dynamic buffers are converted to root descriptors to prevent copying descriptors during command recording.
// Out of bounds accesses are not a concern because that's already undefined behavior on Vulkan.
if (uniform.type == RDC::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC || uniform.type == RDC::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC) {
CD3DX12_ROOT_PARAMETER1 root_param = {};
D3D12_SHADER_VISIBILITY visibility = stages_to_d3d12_visibility(uniform.stages);
switch (range_type) {
case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: {
root_param.InitAsConstantBufferView(dxil_register, 0, D3D12_ROOT_DESCRIPTOR_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE, visibility);
} break;
case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: {
root_param.InitAsShaderResourceView(dxil_register, 0, D3D12_ROOT_DESCRIPTOR_FLAG_DATA_VOLATILE, visibility);
} break;
case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: {
root_param.InitAsUnorderedAccessView(dxil_register, 0, D3D12_ROOT_DESCRIPTOR_FLAG_DATA_VOLATILE, visibility);
} break;
default: {
DEV_ASSERT(false && "Unrecognized range type.");
} break;
}
uniform_d3d12.root_param_idx = root_params.size();
root_params.push_back(root_param);
} else {
insert_range(
range_type,
num_descriptors,
dxil_register,
uniform.stages,
uniform_d3d12.resource_descriptor_offset,
reflection_binding_set_data_d3d12.ptrw()[i].resource_descriptor_count,
first_resource_in_set,
resource_tables_maps);
}
}
if (uniform_d3d12.has_sampler) {
if (has_sampler) {
insert_range(
D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER,
num_descriptors,
dxil_register,
uniform_d3d12.dxil_stages,
&uniform_d3d12.root_signature_locations[RS_LOC_TYPE_SAMPLER],
sampler_tables_maps,
first_sampler_in_set);
uniform.stages,
uniform_d3d12.sampler_descriptor_offset,
reflection_binding_set_data_d3d12.ptrw()[i].sampler_descriptor_count,
first_sampler_in_set,
sampler_tables_maps);
}
}
binding_start += uniform_count;
}
auto make_descriptor_tables = [&root_params](const Vector<TraceableDescriptorTable> &p_tables) {
for (const TraceableDescriptorTable &table : p_tables) {
D3D12_SHADER_VISIBILITY visibility = stages_to_d3d12_visibility(table.stages_mask);
DEV_ASSERT(table.ranges.size() == table.root_signature_locations.size());
for (int i = 0; i < table.ranges.size(); i++) {
// By now we know very well which root signature location corresponds to the pointed uniform.
table.root_signature_locations[i]->root_param_index = root_params.size();
table.root_signature_locations[i]->range_index = i;
}
for (const TraceableDescriptorTable &table : resource_tables_maps) {
CD3DX12_ROOT_PARAMETER1 root_table = {};
root_table.InitAsDescriptorTable(table.ranges.size(), table.ranges.ptr(), stages_to_d3d12_visibility(table.stages_mask));
reflection_binding_set_data_d3d12.ptrw()[table.set].resource_root_param_idx = root_params.size();
root_params.push_back(root_table);
}
CD3DX12_ROOT_PARAMETER1 root_table;
root_table.InitAsDescriptorTable(table.ranges.size(), table.ranges.ptr(), visibility);
root_params.push_back(root_table);
}
};
make_descriptor_tables(resource_tables_maps);
make_descriptor_tables(sampler_tables_maps);
for (const TraceableDescriptorTable &table : sampler_tables_maps) {
CD3DX12_ROOT_PARAMETER1 root_table = {};
root_table.InitAsDescriptorTable(table.ranges.size(), table.ranges.ptr(), stages_to_d3d12_visibility(table.stages_mask));
reflection_binding_set_data_d3d12.ptrw()[table.set].sampler_root_param_idx = root_params.size();
root_params.push_back(root_table);
}
CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {};
D3D12_ROOT_SIGNATURE_FLAGS root_sig_flags =
@@ -755,6 +813,7 @@ void RenderingShaderContainerD3D12::_nir_report_bitcode_bit_offset(uint64_t p_bi
#endif
void RenderingShaderContainerD3D12::_set_from_shader_reflection_post(const ReflectShader &p_shader) {
reflection_binding_set_data_d3d12.resize(reflection_binding_set_uniforms_count.size());
reflection_binding_set_uniforms_data_d3d12.resize(reflection_binding_set_uniforms_data.size());
reflection_specialization_data_d3d12.resize(reflection_specialization_data.size());
@@ -841,6 +900,7 @@ RenderingShaderContainerD3D12::ShaderReflectionD3D12 RenderingShaderContainerD3D
reflection.spirv_specialization_constants_ids_mask = reflection_data_d3d12.spirv_specialization_constants_ids_mask;
reflection.dxil_push_constant_stages = reflection_data_d3d12.dxil_push_constant_stages;
reflection.nir_runtime_data_root_param_idx = reflection_data_d3d12.nir_runtime_data_root_param_idx;
reflection.reflection_binding_sets_d3d12 = reflection_binding_set_data_d3d12;
reflection.reflection_specialization_data_d3d12 = reflection_specialization_data_d3d12;
reflection.root_signature_bytes = root_signature_bytes;
reflection.root_signature_crc = root_signature_crc;