You've already forked godot
mirror of
https://github.com/godotengine/godot.git
synced 2025-11-04 12:00:25 +00:00
Merge pull request #111988 from blueskythlikesclouds/d3d12-resolve-barrier-fix
Insert barriers between subpasses when using enhanced barriers on D3D12.
This commit is contained in:
@@ -2485,8 +2485,8 @@ void RenderingDeviceDriverD3D12::command_pipeline_barrier(CommandBufferID p_cmd_
|
||||
|
||||
// The command list must support the required interface.
|
||||
const CommandBufferInfo *cmd_buf_info = (const CommandBufferInfo *)(p_cmd_buffer.id);
|
||||
ID3D12GraphicsCommandList7 *cmd_list_7 = nullptr;
|
||||
HRESULT res = cmd_buf_info->cmd_list->QueryInterface(IID_PPV_ARGS(&cmd_list_7));
|
||||
ComPtr<ID3D12GraphicsCommandList7> cmd_list_7;
|
||||
HRESULT res = cmd_buf_info->cmd_list->QueryInterface(cmd_list_7.GetAddressOf());
|
||||
ERR_FAIL_COND(FAILED(res));
|
||||
|
||||
// Convert the RDD barriers to D3D12 enhanced barriers.
|
||||
@@ -4795,8 +4795,17 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd
|
||||
cmd_buf_info->render_pass_state.region_rect.right == fb_info->size.x &&
|
||||
cmd_buf_info->render_pass_state.region_rect.bottom == fb_info->size.y);
|
||||
|
||||
cmd_buf_info->render_pass_state.attachment_layouts.resize(pass_info->attachments.size());
|
||||
|
||||
for (uint32_t i = 0; i < pass_info->attachments.size(); i++) {
|
||||
if (pass_info->attachments[i].load_op == ATTACHMENT_LOAD_OP_DONT_CARE) {
|
||||
const Attachment &attachment = pass_info->attachments[i];
|
||||
|
||||
for (RenderPassState::AttachmentLayout::AspectLayout &aspect_layout : cmd_buf_info->render_pass_state.attachment_layouts[i].aspect_layouts) {
|
||||
aspect_layout.cur_layout = attachment.initial_layout;
|
||||
aspect_layout.expected_layout = attachment.initial_layout;
|
||||
}
|
||||
|
||||
if (attachment.load_op == ATTACHMENT_LOAD_OP_DONT_CARE) {
|
||||
const TextureInfo *tex_info = (const TextureInfo *)fb_info->attachments[i].id;
|
||||
_discard_texture_subresources(tex_info, cmd_buf_info);
|
||||
}
|
||||
@@ -4857,6 +4866,91 @@ void RenderingDeviceDriverD3D12::command_begin_render_pass(CommandBufferID p_cmd
|
||||
}
|
||||
}
|
||||
|
||||
// Subpass dependencies cannot be specified by the end user, and by default they are very aggressive.
|
||||
// We can be more lenient by just looking at the texture layout and specifying appropriate access and stage bits.
|
||||
|
||||
// We specify full barrier for layouts we don't expect to see as fallback.
|
||||
static const BitField<RDD::BarrierAccessBits> RD_RENDER_PASS_LAYOUT_TO_ACCESS_BITS[RDD::TEXTURE_LAYOUT_MAX] = {
|
||||
RDD::BARRIER_ACCESS_MEMORY_READ_BIT | RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT, // TEXTURE_LAYOUT_UNDEFINED
|
||||
RDD::BARRIER_ACCESS_MEMORY_READ_BIT | RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT, // TEXTURE_LAYOUT_GENERAL
|
||||
RDD::BARRIER_ACCESS_MEMORY_READ_BIT | RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT, // TEXTURE_LAYOUT_STORAGE_OPTIMAL
|
||||
RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, // TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
|
||||
RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, // TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
|
||||
RDD::BARRIER_ACCESS_MEMORY_READ_BIT | RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT, // TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL
|
||||
RDD::BARRIER_ACCESS_SHADER_READ_BIT, // TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
|
||||
RDD::BARRIER_ACCESS_MEMORY_READ_BIT | RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT, // TEXTURE_LAYOUT_COPY_SRC_OPTIMAL
|
||||
RDD::BARRIER_ACCESS_MEMORY_READ_BIT | RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT, // TEXTURE_LAYOUT_COPY_DST_OPTIMAL
|
||||
RDD::BARRIER_ACCESS_RESOLVE_READ_BIT, // TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL
|
||||
RDD::BARRIER_ACCESS_RESOLVE_WRITE_BIT, // TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL
|
||||
RDD::BARRIER_ACCESS_MEMORY_READ_BIT | RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT, // TEXTURE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL
|
||||
RDD::BARRIER_ACCESS_MEMORY_READ_BIT | RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT // TEXTURE_LAYOUT_FRAGMENT_DENSITY_MAP_ATTACHMENT_OPTIMAL
|
||||
};
|
||||
|
||||
// We specify all commands for layouts we don't expect to see as fallback.
|
||||
static const BitField<RDD::PipelineStageBits> RD_RENDER_PASS_LAYOUT_TO_STAGE_BITS[RDD::TEXTURE_LAYOUT_MAX] = {
|
||||
RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, // TEXTURE_LAYOUT_UNDEFINED
|
||||
RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, // TEXTURE_LAYOUT_GENERAL
|
||||
RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, // TEXTURE_LAYOUT_STORAGE_OPTIMAL
|
||||
RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, // TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
|
||||
RDD::PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | RDD::PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, // TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
|
||||
RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, // TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL
|
||||
RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT, // TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
|
||||
RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, // TEXTURE_LAYOUT_COPY_SRC_OPTIMAL
|
||||
RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, // TEXTURE_LAYOUT_COPY_DST_OPTIMAL
|
||||
RDD::PIPELINE_STAGE_RESOLVE_BIT, // TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL
|
||||
RDD::PIPELINE_STAGE_RESOLVE_BIT, // TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL
|
||||
RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, // TEXTURE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL
|
||||
RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT // TEXTURE_LAYOUT_FRAGMENT_DENSITY_MAP_ATTACHMENT_OPTIMAL
|
||||
};
|
||||
|
||||
void RenderingDeviceDriverD3D12::_render_pass_enhanced_barriers_flush(CommandBufferID p_cmd_buffer) {
|
||||
if (!barrier_capabilities.enhanced_barriers_supported) {
|
||||
return;
|
||||
}
|
||||
|
||||
BitField<PipelineStageBits> src_stages = {};
|
||||
BitField<PipelineStageBits> dst_stages = {};
|
||||
|
||||
thread_local LocalVector<TextureBarrier> texture_barriers;
|
||||
texture_barriers.clear();
|
||||
|
||||
CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;
|
||||
|
||||
for (uint32_t i = 0; i < cmd_buf_info->render_pass_state.attachment_layouts.size(); i++) {
|
||||
RenderPassState::AttachmentLayout &attachment_layout = cmd_buf_info->render_pass_state.attachment_layouts[i];
|
||||
TextureID tex = cmd_buf_info->render_pass_state.fb_info->attachments[i];
|
||||
TextureInfo *tex_info = (TextureInfo *)tex.id;
|
||||
|
||||
for (uint32_t j = 0; j < TEXTURE_ASPECT_MAX; j++) {
|
||||
RenderPassState::AttachmentLayout::AspectLayout &aspect_layout = attachment_layout.aspect_layouts[j];
|
||||
|
||||
if (aspect_layout.cur_layout != aspect_layout.expected_layout) {
|
||||
src_stages = src_stages | RD_RENDER_PASS_LAYOUT_TO_STAGE_BITS[aspect_layout.cur_layout];
|
||||
dst_stages = dst_stages | RD_RENDER_PASS_LAYOUT_TO_STAGE_BITS[aspect_layout.expected_layout];
|
||||
|
||||
TextureBarrier texture_barrier;
|
||||
texture_barrier.texture = tex;
|
||||
texture_barrier.src_access = RD_RENDER_PASS_LAYOUT_TO_ACCESS_BITS[aspect_layout.cur_layout];
|
||||
texture_barrier.dst_access = RD_RENDER_PASS_LAYOUT_TO_ACCESS_BITS[aspect_layout.expected_layout];
|
||||
texture_barrier.prev_layout = aspect_layout.cur_layout;
|
||||
texture_barrier.next_layout = aspect_layout.expected_layout;
|
||||
texture_barrier.subresources.aspect = (TextureAspectBits)(1 << j);
|
||||
texture_barrier.subresources.base_mipmap = tex_info->base_mip;
|
||||
texture_barrier.subresources.mipmap_count = tex_info->mipmaps;
|
||||
texture_barrier.subresources.base_layer = tex_info->base_layer;
|
||||
texture_barrier.subresources.layer_count = tex_info->layers;
|
||||
texture_barriers.push_back(texture_barrier);
|
||||
|
||||
aspect_layout.cur_layout = aspect_layout.expected_layout;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!texture_barriers.is_empty()) {
|
||||
command_pipeline_barrier(p_cmd_buffer, src_stages, dst_stages, VectorView<MemoryAccessBarrier>(), VectorView<BufferBarrier>(), texture_barriers);
|
||||
}
|
||||
}
|
||||
|
||||
void RenderingDeviceDriverD3D12::_end_render_pass(CommandBufferID p_cmd_buffer) {
|
||||
CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id;
|
||||
|
||||
@@ -4895,11 +4989,22 @@ void RenderingDeviceDriverD3D12::_end_render_pass(CommandBufferID p_cmd_buffer)
|
||||
|
||||
TextureInfo *src_tex_info = (TextureInfo *)fb_info->attachments[color_index].id;
|
||||
uint32_t src_subresource = D3D12CalcSubresource(src_tex_info->base_mip, src_tex_info->base_layer, 0, src_tex_info->desc.MipLevels, src_tex_info->desc.ArraySize());
|
||||
_resource_transition_batch(cmd_buf_info, src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_SOURCE);
|
||||
|
||||
if (barrier_capabilities.enhanced_barriers_supported) {
|
||||
cmd_buf_info->render_pass_state.attachment_layouts[color_index].aspect_layouts[TEXTURE_ASPECT_COLOR].expected_layout = TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL;
|
||||
} else {
|
||||
_resource_transition_batch(cmd_buf_info, src_tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_SOURCE);
|
||||
}
|
||||
|
||||
TextureInfo *dst_tex_info = (TextureInfo *)fb_info->attachments[resolve_index].id;
|
||||
uint32_t dst_subresource = D3D12CalcSubresource(dst_tex_info->base_mip, dst_tex_info->base_layer, 0, dst_tex_info->desc.MipLevels, dst_tex_info->desc.ArraySize());
|
||||
_resource_transition_batch(cmd_buf_info, dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_DEST);
|
||||
|
||||
if (barrier_capabilities.enhanced_barriers_supported) {
|
||||
// This should have already been done when beginning the subpass.
|
||||
DEV_ASSERT(cmd_buf_info->render_pass_state.attachment_layouts[resolve_index].aspect_layouts[TEXTURE_ASPECT_COLOR].expected_layout == TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL);
|
||||
} else {
|
||||
_resource_transition_batch(cmd_buf_info, dst_tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_RESOLVE_DEST);
|
||||
}
|
||||
|
||||
resolves[num_resolves].src_res = src_tex_info->resource;
|
||||
resolves[num_resolves].src_subres = src_subresource;
|
||||
@@ -4911,6 +5016,11 @@ void RenderingDeviceDriverD3D12::_end_render_pass(CommandBufferID p_cmd_buffer)
|
||||
|
||||
_resource_transitions_flush(cmd_buf_info);
|
||||
|
||||
// There can be enhanced barriers to flush only when we need to resolve textures.
|
||||
if (num_resolves != 0) {
|
||||
_render_pass_enhanced_barriers_flush(p_cmd_buffer);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < num_resolves; i++) {
|
||||
cmd_buf_info->cmd_list->ResolveSubresource(resolves[i].dst_res, resolves[i].dst_subres, resolves[i].src_res, resolves[i].src_subres, resolves[i].format);
|
||||
}
|
||||
@@ -4933,6 +5043,16 @@ void RenderingDeviceDriverD3D12::command_end_render_pass(CommandBufferID p_cmd_b
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < pass_info->attachments.size(); i++) {
|
||||
const Attachment &attachment = pass_info->attachments[i];
|
||||
|
||||
for (RenderPassState::AttachmentLayout::AspectLayout &aspect_layout : cmd_buf_info->render_pass_state.attachment_layouts[i].aspect_layouts) {
|
||||
aspect_layout.expected_layout = attachment.final_layout;
|
||||
}
|
||||
}
|
||||
|
||||
_render_pass_enhanced_barriers_flush(p_cmd_buffer);
|
||||
|
||||
for (uint32_t i = 0; i < pass_info->attachments.size(); i++) {
|
||||
if (pass_info->attachments[i].store_op == ATTACHMENT_STORE_OP_DONT_CARE) {
|
||||
const TextureInfo *tex_info = (const TextureInfo *)fb_info->attachments[i].id;
|
||||
@@ -4957,10 +5077,27 @@ void RenderingDeviceDriverD3D12::command_next_render_subpass(CommandBufferID p_c
|
||||
const RenderPassInfo *pass_info = cmd_buf_info->render_pass_state.pass_info;
|
||||
const Subpass &subpass = pass_info->subpasses[cmd_buf_info->render_pass_state.current_subpass];
|
||||
|
||||
for (uint32_t i = 0; i < subpass.input_references.size(); i++) {
|
||||
const AttachmentReference &input_reference = subpass.input_references[i];
|
||||
uint32_t attachment = input_reference.attachment;
|
||||
|
||||
if (attachment != AttachmentReference::UNUSED) {
|
||||
RenderPassState::AttachmentLayout &attachment_layout = cmd_buf_info->render_pass_state.attachment_layouts[attachment];
|
||||
|
||||
// Vulkan cares about aspect bits only for input attachments.
|
||||
for (uint32_t j = 0; j < TEXTURE_ASPECT_MAX; j++) {
|
||||
if (input_reference.aspect & (1 << j)) {
|
||||
attachment_layout.aspect_layouts[j].expected_layout = input_reference.layout;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE *rtv_handles = ALLOCA_ARRAY(D3D12_CPU_DESCRIPTOR_HANDLE, subpass.color_references.size());
|
||||
CPUDescriptorsHeapWalker rtv_heap_walker = fb_info->rtv_heap.make_walker();
|
||||
for (uint32_t i = 0; i < subpass.color_references.size(); i++) {
|
||||
uint32_t attachment = subpass.color_references[i].attachment;
|
||||
const AttachmentReference &color_reference = subpass.color_references[i];
|
||||
uint32_t attachment = color_reference.attachment;
|
||||
if (attachment == AttachmentReference::UNUSED) {
|
||||
if (!frames[frame_idx].null_rtv_handle.ptr) {
|
||||
// No null descriptor-handle created for this frame yet.
|
||||
@@ -4988,6 +5125,8 @@ void RenderingDeviceDriverD3D12::command_next_render_subpass(CommandBufferID p_c
|
||||
rtv_heap_walker.rewind();
|
||||
rtv_heap_walker.advance(rt_index);
|
||||
rtv_handles[i] = rtv_heap_walker.get_curr_cpu_handle();
|
||||
|
||||
cmd_buf_info->render_pass_state.attachment_layouts[attachment].aspect_layouts[TEXTURE_ASPECT_COLOR].expected_layout = color_reference.layout;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4999,9 +5138,26 @@ void RenderingDeviceDriverD3D12::command_next_render_subpass(CommandBufferID p_c
|
||||
dsv_heap_walker.rewind();
|
||||
dsv_heap_walker.advance(ds_index);
|
||||
dsv_handle = dsv_heap_walker.get_curr_cpu_handle();
|
||||
|
||||
RenderPassState::AttachmentLayout &attachment_layout = cmd_buf_info->render_pass_state.attachment_layouts[subpass.depth_stencil_reference.attachment];
|
||||
attachment_layout.aspect_layouts[TEXTURE_ASPECT_DEPTH].expected_layout = subpass.depth_stencil_reference.layout;
|
||||
attachment_layout.aspect_layouts[TEXTURE_ASPECT_STENCIL].expected_layout = subpass.depth_stencil_reference.layout;
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < subpass.resolve_references.size(); i++) {
|
||||
const AttachmentReference &resolve_reference = subpass.resolve_references[i];
|
||||
uint32_t attachment = resolve_reference.attachment;
|
||||
|
||||
if (attachment != AttachmentReference::UNUSED) {
|
||||
// Vulkan expects the layout to be in color attachment layout, but D3D12 wants resolve destination.
|
||||
DEV_ASSERT(resolve_reference.layout == TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
|
||||
cmd_buf_info->render_pass_state.attachment_layouts[attachment].aspect_layouts[TEXTURE_ASPECT_COLOR].expected_layout = TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL;
|
||||
}
|
||||
}
|
||||
|
||||
_render_pass_enhanced_barriers_flush(p_cmd_buffer);
|
||||
|
||||
cmd_buf_info->cmd_list->OMSetRenderTargets(subpass.color_references.size(), rtv_handles, false, dsv_handle.ptr ? &dsv_handle : nullptr);
|
||||
}
|
||||
|
||||
|
||||
@@ -515,11 +515,21 @@ private:
|
||||
struct FramebufferInfo;
|
||||
struct RenderPassInfo;
|
||||
struct RenderPassState {
|
||||
struct AttachmentLayout {
|
||||
struct AspectLayout {
|
||||
TextureLayout cur_layout = TEXTURE_LAYOUT_UNDEFINED;
|
||||
TextureLayout expected_layout = TEXTURE_LAYOUT_UNDEFINED;
|
||||
};
|
||||
|
||||
AspectLayout aspect_layouts[TEXTURE_ASPECT_MAX];
|
||||
};
|
||||
|
||||
uint32_t current_subpass = UINT32_MAX;
|
||||
const FramebufferInfo *fb_info = nullptr;
|
||||
const RenderPassInfo *pass_info = nullptr;
|
||||
CD3DX12_RECT region_rect = {};
|
||||
bool region_is_all = false;
|
||||
LocalVector<AttachmentLayout> attachment_layouts;
|
||||
|
||||
const VertexFormatInfo *vf_info = nullptr;
|
||||
D3D12_VERTEX_BUFFER_VIEW vertex_buffer_views[8] = {};
|
||||
@@ -828,6 +838,7 @@ public:
|
||||
virtual void command_begin_render_pass(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, FramebufferID p_framebuffer, CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView<RenderPassClearValue> p_clear_values) override final;
|
||||
|
||||
private:
|
||||
void _render_pass_enhanced_barriers_flush(CommandBufferID p_cmd_buffer);
|
||||
void _end_render_pass(CommandBufferID p_cmd_buffer);
|
||||
|
||||
public:
|
||||
|
||||
Reference in New Issue
Block a user