diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index 154a8460434..4098ecea673 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -1491,6 +1491,9 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p if ((p_format.usage_bits & TEXTURE_USAGE_STORAGE_BIT)) { resource_desc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; } + if ((p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT)) { + ERR_FAIL_V_MSG(TextureID(), "CPU readable textures are unsupported on D3D12."); + } if ((p_format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) && (p_format.usage_bits & TEXTURE_USAGE_VRS_FRAGMENT_SHADING_RATE_BIT)) { // For VRS images we can't use the typeless format. resource_desc.Format = DXGI_FORMAT_R8_UINT; @@ -1511,7 +1514,7 @@ RDD::TextureID RenderingDeviceDriverD3D12::texture_create(const TextureFormat &p // Create. D3D12MA::ALLOCATION_DESC allocation_desc = {}; - allocation_desc.HeapType = (p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT) ? D3D12_HEAP_TYPE_READBACK : D3D12_HEAP_TYPE_DEFAULT; + allocation_desc.HeapType = D3D12_HEAP_TYPE_DEFAULT; if ((resource_desc.Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL))) { allocation_desc.ExtraHeapFlags = D3D12_HEAP_FLAG_ALLOW_ONLY_RT_DS_TEXTURES; } else { @@ -1912,7 +1915,8 @@ uint64_t RenderingDeviceDriverD3D12::texture_get_allocation_size(TextureID p_tex void RenderingDeviceDriverD3D12::texture_get_copyable_layout(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) { TextureInfo *tex_info = (TextureInfo *)p_texture.id; - UINT subresource = tex_info->desc.CalcSubresource(p_subresource.mipmap, p_subresource.layer, 0); + UINT plane = _compute_plane_slice(tex_info->format, p_subresource.aspect); + UINT subresource = tex_info->desc.CalcSubresource(p_subresource.mipmap, p_subresource.layer, plane); D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint = {}; UINT64 subresource_total_size = 0; @@ -1927,111 +1931,20 @@ void RenderingDeviceDriverD3D12::texture_get_copyable_layout(TextureID p_texture &subresource_total_size); *r_layout = {}; - r_layout->offset = footprint.Offset; r_layout->size = subresource_total_size; r_layout->row_pitch = footprint.Footprint.RowPitch; - r_layout->depth_pitch = subresource_total_size / tex_info->desc.Depth(); - r_layout->layer_pitch = subresource_total_size / tex_info->desc.ArraySize(); } Vector RenderingDeviceDriverD3D12::texture_get_data(TextureID p_texture, uint32_t p_layer) { - const TextureInfo *tex = (const TextureInfo *)p_texture.id; - - DataFormat tex_format = tex->format; - uint32_t tex_width = tex->desc.Width; - uint32_t tex_height = tex->desc.Height; - uint32_t tex_depth = tex->desc.DepthOrArraySize; - uint32_t tex_mipmaps = tex->mipmaps; - - uint32_t width, height, depth; - uint32_t tight_mip_size = get_image_format_required_size(tex_format, tex_width, tex_height, tex_depth, tex_mipmaps, &width, &height, &depth); - - Vector image_data; - image_data.resize(tight_mip_size); - - uint32_t blockw, blockh; - get_compressed_image_format_block_dimensions(tex_format, blockw, blockh); - uint32_t block_size = get_compressed_image_format_block_byte_size(tex_format); - uint32_t pixel_size = get_image_format_pixel_size(tex_format); - - { - uint8_t *w = image_data.ptrw(); - - uint32_t mipmap_offset = 0; - for (uint32_t mm_i = 0; mm_i < tex_mipmaps; mm_i++) { - uint32_t image_total = get_image_format_required_size(tex_format, tex_width, tex_height, tex_depth, mm_i + 1, &width, &height, &depth); - - uint8_t *write_ptr_mipmap = w + mipmap_offset; - tight_mip_size = image_total - mipmap_offset; - - RDD::TextureSubresource subres; - subres.aspect = RDD::TEXTURE_ASPECT_COLOR; - subres.layer = p_layer; - subres.mipmap = mm_i; - RDD::TextureCopyableLayout layout; - texture_get_copyable_layout(p_texture, subres, &layout); - - uint8_t *img_mem = texture_map(p_texture, subres); - ERR_FAIL_NULL_V(img_mem, Vector()); - - for (uint32_t z = 0; z < depth; z++) { - uint8_t *write_ptr = write_ptr_mipmap + z * tight_mip_size / depth; - const uint8_t *slice_read_ptr = img_mem + z * layout.depth_pitch; - - if (block_size > 1) { - // Compressed. - uint32_t line_width = (block_size * (width / blockw)); - for (uint32_t y = 0; y < height / blockh; y++) { - const uint8_t *rptr = slice_read_ptr + y * layout.row_pitch; - uint8_t *wptr = write_ptr + y * line_width; - - memcpy(wptr, rptr, line_width); - } - } else { - // Uncompressed. - for (uint32_t y = 0; y < height; y++) { - const uint8_t *rptr = slice_read_ptr + y * layout.row_pitch; - uint8_t *wptr = write_ptr + y * pixel_size * width; - memcpy(wptr, rptr, (uint64_t)pixel_size * width); - } - } - } - - texture_unmap(p_texture); - - mipmap_offset = image_total; - } - } - - return image_data; -} - -uint8_t *RenderingDeviceDriverD3D12::texture_map(TextureID p_texture, const TextureSubresource &p_subresource) { - TextureInfo *tex_info = (TextureInfo *)p_texture.id; -#ifdef DEBUG_ENABLED - ERR_FAIL_COND_V(tex_info->mapped_subresource != UINT_MAX, nullptr); -#endif - - UINT plane = _compute_plane_slice(tex_info->format, p_subresource.aspect); - UINT subresource = tex_info->desc.CalcSubresource(p_subresource.mipmap, p_subresource.layer, plane); - - void *data_ptr = nullptr; - HRESULT res = tex_info->resource->Map(subresource, &VOID_RANGE, &data_ptr); - ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), nullptr, "Map failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); - tex_info->mapped_subresource = subresource; - return (uint8_t *)data_ptr; -} - -void RenderingDeviceDriverD3D12::texture_unmap(TextureID p_texture) { - TextureInfo *tex_info = (TextureInfo *)p_texture.id; -#ifdef DEBUG_ENABLED - ERR_FAIL_COND(tex_info->mapped_subresource == UINT_MAX); -#endif - tex_info->resource->Unmap(tex_info->mapped_subresource, &VOID_RANGE); - tex_info->mapped_subresource = UINT_MAX; + ERR_FAIL_V_MSG(Vector(), "Cannot get texture data. CPU readable textures are unsupported on D3D12."); } BitField RenderingDeviceDriverD3D12::texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) { + if (p_cpu_readable) { + // CPU readable textures are unsupported on D3D12. + return 0; + } + D3D12_FEATURE_DATA_FORMAT_SUPPORT srv_rtv_support = {}; srv_rtv_support.Format = RD_TO_D3D12_FORMAT[p_format].general_format; if (srv_rtv_support.Format != DXGI_FORMAT_UNKNOWN) { // Some implementations (i.e., vkd3d-proton) error out instead of returning empty. @@ -4549,17 +4462,17 @@ void RenderingDeviceDriverD3D12::command_copy_buffer_to_texture(CommandBufferID CommandBufferInfo *cmd_buf_info = (CommandBufferInfo *)p_cmd_buffer.id; BufferInfo *buf_info = (BufferInfo *)p_src_buffer.id; TextureInfo *tex_info = (TextureInfo *)p_dst_texture.id; + if (!barrier_capabilities.enhanced_barriers_supported) { _resource_transition_batch(cmd_buf_info, buf_info, 0, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); } - uint32_t pixel_size = get_image_format_pixel_size(tex_info->format); uint32_t block_w = 0, block_h = 0; get_compressed_image_format_block_dimensions(tex_info->format, block_w, block_h); for (uint32_t i = 0; i < p_regions.size(); i++) { - uint32_t region_pitch = (p_regions[i].texture_region_size.x * pixel_size * block_w) >> get_compressed_image_format_pixel_rshift(tex_info->format); - region_pitch = STEPIFY(region_pitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + DEV_ASSERT((p_regions[i].buffer_offset & (D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT - 1)) == 0 && "Buffer offset must be aligned to 512 bytes. See API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT."); + DEV_ASSERT((p_regions[i].row_pitch & (D3D12_TEXTURE_DATA_PITCH_ALIGNMENT - 1)) == 0 && "Row pitch must be aligned to 256 bytes. See API_TRAIT_TEXTURE_DATA_ROW_PITCH_STEP."); D3D12_PLACED_SUBRESOURCE_FOOTPRINT src_footprint = {}; src_footprint.Offset = p_regions[i].buffer_offset; @@ -4568,48 +4481,31 @@ void RenderingDeviceDriverD3D12::command_copy_buffer_to_texture(CommandBufferID STEPIFY(p_regions[i].texture_region_size.x, block_w), STEPIFY(p_regions[i].texture_region_size.y, block_h), p_regions[i].texture_region_size.z, - region_pitch); + p_regions[i].row_pitch); + CD3DX12_TEXTURE_COPY_LOCATION copy_src(buf_info->resource, src_footprint); - CD3DX12_BOX src_box( - 0, 0, 0, - STEPIFY(p_regions[i].texture_region_size.x, block_w), - STEPIFY(p_regions[i].texture_region_size.y, block_h), - p_regions[i].texture_region_size.z); + UINT dst_subresource = D3D12CalcSubresource( + p_regions[i].texture_subresource.mipmap, + p_regions[i].texture_subresource.layer, + _compute_plane_slice(tex_info->format, p_regions[i].texture_subresource.aspect), + tex_info->desc.MipLevels, + tex_info->desc.ArraySize()); if (!barrier_capabilities.enhanced_barriers_supported) { - for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) { - UINT dst_subresource = D3D12CalcSubresource( - p_regions[i].texture_subresources.mipmap, - p_regions[i].texture_subresources.base_layer + j, - _compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect), - tex_info->desc.MipLevels, - tex_info->desc.ArraySize()); - CD3DX12_TEXTURE_COPY_LOCATION copy_dst(tex_info->resource, dst_subresource); - - _resource_transition_batch(cmd_buf_info, tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST); - } - + _resource_transition_batch(cmd_buf_info, tex_info, dst_subresource, 1, D3D12_RESOURCE_STATE_COPY_DEST); _resource_transitions_flush(cmd_buf_info); } - for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) { - UINT dst_subresource = D3D12CalcSubresource( - p_regions[i].texture_subresources.mipmap, - p_regions[i].texture_subresources.base_layer + j, - _compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect), - tex_info->desc.MipLevels, - tex_info->desc.ArraySize()); - CD3DX12_TEXTURE_COPY_LOCATION copy_dst(tex_info->resource, dst_subresource); + CD3DX12_TEXTURE_COPY_LOCATION copy_dst(tex_info->resource, dst_subresource); - cmd_buf_info->cmd_list->CopyTextureRegion( - ©_dst, - p_regions[i].texture_offset.x, - p_regions[i].texture_offset.y, - p_regions[i].texture_offset.z, - ©_src, - &src_box); - } + cmd_buf_info->cmd_list->CopyTextureRegion( + ©_dst, + p_regions[i].texture_offset.x, + p_regions[i].texture_offset.y, + p_regions[i].texture_offset.z, + ©_src, + nullptr); } } @@ -4626,53 +4522,56 @@ void RenderingDeviceDriverD3D12::command_copy_texture_to_buffer(CommandBufferID get_compressed_image_format_block_dimensions(tex_info->format, block_w, block_h); for (uint32_t i = 0; i < p_regions.size(); i++) { + DEV_ASSERT((p_regions[i].buffer_offset & (D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT - 1)) == 0 && "Buffer offset must be aligned to 512 bytes. See API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT."); + DEV_ASSERT((p_regions[i].row_pitch & (D3D12_TEXTURE_DATA_PITCH_ALIGNMENT - 1)) == 0 && "Row pitch must be aligned to 256 bytes. See API_TRAIT_TEXTURE_DATA_ROW_PITCH_STEP."); + + UINT src_subresource = D3D12CalcSubresource( + p_regions[i].texture_subresource.mipmap, + p_regions[i].texture_subresource.layer, + _compute_plane_slice(tex_info->format, p_regions[i].texture_subresource.aspect), + tex_info->desc.MipLevels, + tex_info->desc.ArraySize()); + if (!barrier_capabilities.enhanced_barriers_supported) { - for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) { - UINT src_subresource = D3D12CalcSubresource( - p_regions[i].texture_subresources.mipmap, - p_regions[i].texture_subresources.base_layer + j, - _compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect), - tex_info->desc.MipLevels, - tex_info->desc.ArraySize()); - - _resource_transition_batch(cmd_buf_info, tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); - } - + _resource_transition_batch(cmd_buf_info, tex_info, src_subresource, 1, D3D12_RESOURCE_STATE_COPY_SOURCE); _resource_transitions_flush(cmd_buf_info); } - for (uint32_t j = 0; j < p_regions[i].texture_subresources.layer_count; j++) { - UINT src_subresource = D3D12CalcSubresource( - p_regions[i].texture_subresources.mipmap, - p_regions[i].texture_subresources.base_layer + j, - _compute_plane_slice(tex_info->format, p_regions[i].texture_subresources.aspect), - tex_info->desc.MipLevels, - tex_info->desc.ArraySize()); + CD3DX12_TEXTURE_COPY_LOCATION copy_src(tex_info->resource, src_subresource); - CD3DX12_TEXTURE_COPY_LOCATION copy_src(tex_info->resource, src_subresource); + CD3DX12_BOX src_box( + p_regions[i].texture_offset.x, + p_regions[i].texture_offset.y, + p_regions[i].texture_offset.z, + p_regions[i].texture_offset.x + STEPIFY(p_regions[i].texture_region_size.x, block_w), + p_regions[i].texture_offset.y + STEPIFY(p_regions[i].texture_region_size.y, block_h), + p_regions[i].texture_offset.z + p_regions[i].texture_region_size.z); - uint32_t computed_d = MAX(1, tex_info->desc.DepthOrArraySize >> p_regions[i].texture_subresources.mipmap); - uint32_t image_size = get_image_format_required_size( - tex_info->format, - MAX(1u, tex_info->desc.Width >> p_regions[i].texture_subresources.mipmap), - MAX(1u, tex_info->desc.Height >> p_regions[i].texture_subresources.mipmap), - computed_d, - 1); - uint32_t row_pitch = image_size / (p_regions[i].texture_region_size.y * computed_d) * block_h; - row_pitch = STEPIFY(row_pitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT); + bool full_box = + src_box.left == 0 && + src_box.top == 0 && + src_box.front == 0 && + src_box.right == tex_info->desc.Width && + src_box.bottom == tex_info->desc.Height && + src_box.back == tex_info->desc.Depth(); - D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_footprint = {}; - dst_footprint.Offset = p_regions[i].buffer_offset; - dst_footprint.Footprint.Width = STEPIFY(p_regions[i].texture_region_size.x, block_w); - dst_footprint.Footprint.Height = STEPIFY(p_regions[i].texture_region_size.y, block_h); - dst_footprint.Footprint.Depth = p_regions[i].texture_region_size.z; - dst_footprint.Footprint.RowPitch = row_pitch; - dst_footprint.Footprint.Format = RD_TO_D3D12_FORMAT[tex_info->format].family; + D3D12_PLACED_SUBRESOURCE_FOOTPRINT dst_footprint = {}; + dst_footprint.Offset = p_regions[i].buffer_offset; + dst_footprint.Footprint.Format = RD_TO_D3D12_FORMAT[tex_info->format].family; + dst_footprint.Footprint.Width = STEPIFY(p_regions[i].texture_region_size.x, block_w); + dst_footprint.Footprint.Height = STEPIFY(p_regions[i].texture_region_size.y, block_h); + dst_footprint.Footprint.Depth = p_regions[i].texture_region_size.z; + dst_footprint.Footprint.RowPitch = p_regions[i].row_pitch; - CD3DX12_TEXTURE_COPY_LOCATION copy_dst(buf_info->resource, dst_footprint); + CD3DX12_TEXTURE_COPY_LOCATION copy_dst(buf_info->resource, dst_footprint); - cmd_buf_info->cmd_list->CopyTextureRegion(©_dst, 0, 0, 0, ©_src, nullptr); - } + cmd_buf_info->cmd_list->CopyTextureRegion( + ©_dst, + 0, + 0, + 0, + ©_src, + full_box ? nullptr : &src_box); } } diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h index b9e87538ce5..23ccfaf5db5 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.h +++ b/drivers/d3d12/rendering_device_driver_d3d12.h @@ -364,7 +364,6 @@ private: TextureInfo *main_texture = nullptr; - UINT mapped_subresource = UINT_MAX; #ifdef DEBUG_ENABLED bool created_from_extension = false; #endif @@ -393,8 +392,6 @@ public: virtual uint64_t texture_get_allocation_size(TextureID p_texture) override final; virtual void texture_get_copyable_layout(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) override final; virtual Vector texture_get_data(TextureID p_texture, uint32_t p_layer) override final; - virtual uint8_t *texture_map(TextureID p_texture, const TextureSubresource &p_subresource) override final; - virtual void texture_unmap(TextureID p_texture) override final; virtual BitField texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) override final; virtual bool texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) override final; diff --git a/drivers/metal/metal_objects.mm b/drivers/metal/metal_objects.mm index 7ec81f44588..4dc17b15fa3 100644 --- a/drivers/metal/metal_objects.mm +++ b/drivers/metal/metal_objects.mm @@ -519,7 +519,7 @@ void MDCommandBuffer::_copy_texture_buffer(CopySource p_source, for (uint32_t i = 0; i < p_regions.size(); i++) { RDD::BufferTextureCopyRegion region = p_regions[i]; - uint32_t mip_level = region.texture_subresources.mipmap; + uint32_t mip_level = region.texture_subresource.mipmap; MTLOrigin txt_origin = MTLOriginMake(region.texture_offset.x, region.texture_offset.y, region.texture_offset.z); MTLSize src_extent = mipmapLevelSizeFromTexture(texture, mip_level); MTLSize txt_size = clampMTLSize(MTLSizeMake(region.texture_region_size.x, region.texture_region_size.y, region.texture_region_size.z), @@ -535,18 +535,15 @@ void MDCommandBuffer::_copy_texture_buffer(CopySource p_source, MTLBlitOption blit_options = options; if (pf.isDepthFormat(mtlPixFmt) && pf.isStencilFormat(mtlPixFmt)) { - bool want_depth = flags::all(region.texture_subresources.aspect, RDD::TEXTURE_ASPECT_DEPTH_BIT); - bool want_stencil = flags::all(region.texture_subresources.aspect, RDD::TEXTURE_ASPECT_STENCIL_BIT); - - // The stencil component is always 1 byte per pixel. // Don't reduce depths of 32-bit depth/stencil formats. - if (want_depth && !want_stencil) { + if (region.texture_subresource.aspect == RDD::TEXTURE_ASPECT_DEPTH) { if (pf.getBytesPerTexel(mtlPixFmt) != 4) { bytesPerRow -= buffImgWd; bytesPerImg -= buffImgWd * buffImgHt; } blit_options |= MTLBlitOptionDepthFromDepthStencil; - } else if (want_stencil && !want_depth) { + } else if (region.texture_subresource.aspect == RDD::TEXTURE_ASPECT_STENCIL) { + // The stencil component is always 1 byte per pixel. bytesPerRow = buffImgWd; bytesPerImg = buffImgWd * buffImgHt; blit_options |= MTLBlitOptionStencilFromDepthStencil; @@ -558,31 +555,27 @@ void MDCommandBuffer::_copy_texture_buffer(CopySource p_source, } if (p_source == CopySource::Buffer) { - for (uint32_t lyrIdx = 0; lyrIdx < region.texture_subresources.layer_count; lyrIdx++) { - [enc copyFromBuffer:buffer->metal_buffer - sourceOffset:region.buffer_offset + (bytesPerImg * lyrIdx) - sourceBytesPerRow:bytesPerRow - sourceBytesPerImage:bytesPerImg - sourceSize:txt_size - toTexture:texture - destinationSlice:region.texture_subresources.base_layer + lyrIdx - destinationLevel:mip_level - destinationOrigin:txt_origin - options:blit_options]; - } + [enc copyFromBuffer:buffer->metal_buffer + sourceOffset:region.buffer_offset + sourceBytesPerRow:bytesPerRow + sourceBytesPerImage:bytesPerImg + sourceSize:txt_size + toTexture:texture + destinationSlice:region.texture_subresource.layer + destinationLevel:mip_level + destinationOrigin:txt_origin + options:blit_options]; } else { - for (uint32_t lyrIdx = 0; lyrIdx < region.texture_subresources.layer_count; lyrIdx++) { - [enc copyFromTexture:texture - sourceSlice:region.texture_subresources.base_layer + lyrIdx - sourceLevel:mip_level - sourceOrigin:txt_origin - sourceSize:txt_size - toBuffer:buffer->metal_buffer - destinationOffset:region.buffer_offset + (bytesPerImg * lyrIdx) - destinationBytesPerRow:bytesPerRow - destinationBytesPerImage:bytesPerImg - options:blit_options]; - } + [enc copyFromTexture:texture + sourceSlice:region.texture_subresource.layer + sourceLevel:mip_level + sourceOrigin:txt_origin + sourceSize:txt_size + toBuffer:buffer->metal_buffer + destinationOffset:region.buffer_offset + destinationBytesPerRow:bytesPerRow + destinationBytesPerImage:bytesPerImg + options:blit_options]; } } } diff --git a/drivers/metal/rendering_device_driver_metal.h b/drivers/metal/rendering_device_driver_metal.h index bd10c7a4f21..77968babe23 100644 --- a/drivers/metal/rendering_device_driver_metal.h +++ b/drivers/metal/rendering_device_driver_metal.h @@ -149,8 +149,6 @@ public: virtual uint64_t texture_get_allocation_size(TextureID p_texture) override final; virtual void texture_get_copyable_layout(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) override final; virtual Vector texture_get_data(TextureID p_texture, uint32_t p_layer) override final; - virtual uint8_t *texture_map(TextureID p_texture, const TextureSubresource &p_subresource) override final; - virtual void texture_unmap(TextureID p_texture) override final; virtual BitField texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) override final; virtual bool texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) override final; diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.mm index 4052c3dae98..4b729420dfa 100644 --- a/drivers/metal/rendering_device_driver_metal.mm +++ b/drivers/metal/rendering_device_driver_metal.mm @@ -563,33 +563,21 @@ uint64_t RenderingDeviceDriverMetal::texture_get_allocation_size(TextureID p_tex void RenderingDeviceDriverMetal::texture_get_copyable_layout(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) { id __unsafe_unretained obj = rid::get(p_texture); - *r_layout = {}; PixelFormats &pf = *pixel_formats; DataFormat format = pf.getDataFormat(obj.pixelFormat); - MTLSize sz = MTLSizeMake(obj.width, obj.height, obj.depth); - - if (p_subresource.mipmap > 0) { - r_layout->offset = get_image_format_required_size(format, sz.width, sz.height, sz.depth, p_subresource.mipmap); - } - - sz = mipmapLevelSizeFromSize(sz, p_subresource.mipmap); + uint32_t w = MAX(1u, obj.width >> p_subresource.mipmap); + uint32_t h = MAX(1u, obj.height >> p_subresource.mipmap); + uint32_t d = MAX(1u, obj.depth >> p_subresource.mipmap); uint32_t bw = 0, bh = 0; get_compressed_image_format_block_dimensions(format, bw, bh); - uint32_t sbw = 0, sbh = 0; - r_layout->size = get_image_format_required_size(format, sz.width, sz.height, sz.depth, 1, &sbw, &sbh); - r_layout->row_pitch = r_layout->size / ((sbh / bh) * sz.depth); - r_layout->depth_pitch = r_layout->size / sz.depth; - uint32_t array_length = obj.arrayLength; - if (obj.textureType == MTLTextureTypeCube) { - array_length = 6; - } else if (obj.textureType == MTLTextureTypeCubeArray) { - array_length *= 6; - } - r_layout->layer_pitch = r_layout->size / array_length; + uint32_t sbw = 0, sbh = 0; + *r_layout = {}; + r_layout->size = get_image_format_required_size(format, w, h, d, 1, &sbw, &sbh); + r_layout->row_pitch = r_layout->size / ((sbh / bh) * d); } Vector RenderingDeviceDriverMetal::texture_get_data(TextureID p_texture, uint32_t p_layer) { @@ -654,20 +642,6 @@ Vector RenderingDeviceDriverMetal::texture_get_data(TextureID p_texture return image_data; } -uint8_t *RenderingDeviceDriverMetal::texture_map(TextureID p_texture, const TextureSubresource &p_subresource) { - id obj = rid::get(p_texture); - ERR_FAIL_COND_V_MSG(obj.storageMode != MTLStorageModeShared, nullptr, "Texture must be created with TEXTURE_USAGE_CPU_READ_BIT set."); - ERR_FAIL_COND_V_MSG(obj.buffer, nullptr, "Texture mapping is not supported for non-linear textures in Metal."); - ERR_FAIL_COND_V_MSG(p_subresource.layer > 0, nullptr, "A linear texture should have a single layer."); - ERR_FAIL_COND_V_MSG(p_subresource.mipmap > 0, nullptr, "A linear texture should have a single mipmap."); - - return (uint8_t *)obj.buffer.contents; -} - -void RenderingDeviceDriverMetal::texture_unmap(TextureID p_texture) { - // Nothing to do. -} - BitField RenderingDeviceDriverMetal::texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) { PixelFormats &pf = *pixel_formats; if (pf.getMTLPixelFormat(p_format) == MTLPixelFormatInvalid) { diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index 4f9d5e7d966..7cdc48e73be 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -2065,6 +2065,8 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create(const TextureFormat & } else { alloc_create_info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; } + } else if (p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT) { + alloc_create_info.preferredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; } else { alloc_create_info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; } @@ -2326,43 +2328,17 @@ uint64_t RenderingDeviceDriverVulkan::texture_get_allocation_size(TextureID p_te void RenderingDeviceDriverVulkan::texture_get_copyable_layout(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) { const TextureInfo *tex_info = (const TextureInfo *)p_texture.id; + uint32_t w = MAX(1u, tex_info->vk_create_info.extent.width >> p_subresource.mipmap); + uint32_t h = MAX(1u, tex_info->vk_create_info.extent.height >> p_subresource.mipmap); + uint32_t d = MAX(1u, tex_info->vk_create_info.extent.depth >> p_subresource.mipmap); + + uint32_t bw = 0, bh = 0; + get_compressed_image_format_block_dimensions(tex_info->rd_format, bw, bh); + + uint32_t sbw = 0, sbh = 0; *r_layout = {}; - - if (tex_info->vk_create_info.tiling == VK_IMAGE_TILING_LINEAR) { - VkImageSubresource vk_subres = {}; - vk_subres.aspectMask = (VkImageAspectFlags)(1 << p_subresource.aspect); - vk_subres.arrayLayer = p_subresource.layer; - vk_subres.mipLevel = p_subresource.mipmap; - - VkSubresourceLayout vk_layout = {}; - vkGetImageSubresourceLayout(vk_device, tex_info->vk_view_create_info.image, &vk_subres, &vk_layout); - - r_layout->offset = vk_layout.offset; - r_layout->size = vk_layout.size; - r_layout->row_pitch = vk_layout.rowPitch; - r_layout->depth_pitch = vk_layout.depthPitch; - r_layout->layer_pitch = vk_layout.arrayPitch; - } else { - // Tight. - uint32_t w = tex_info->vk_create_info.extent.width; - uint32_t h = tex_info->vk_create_info.extent.height; - uint32_t d = tex_info->vk_create_info.extent.depth; - if (p_subresource.mipmap > 0) { - r_layout->offset = get_image_format_required_size(tex_info->rd_format, w, h, d, p_subresource.mipmap); - } - for (uint32_t i = 0; i < p_subresource.mipmap; i++) { - w = MAX(1u, w >> 1); - h = MAX(1u, h >> 1); - d = MAX(1u, d >> 1); - } - uint32_t bw = 0, bh = 0; - get_compressed_image_format_block_dimensions(tex_info->rd_format, bw, bh); - uint32_t sbw = 0, sbh = 0; - r_layout->size = get_image_format_required_size(tex_info->rd_format, w, h, d, 1, &sbw, &sbh); - r_layout->row_pitch = r_layout->size / ((sbh / bh) * d); - r_layout->depth_pitch = r_layout->size / d; - r_layout->layer_pitch = r_layout->size / tex_info->vk_create_info.arrayLayers; - } + r_layout->size = get_image_format_required_size(tex_info->rd_format, w, h, d, 1, &sbw, &sbh); + r_layout->row_pitch = r_layout->size / ((sbh / bh) * d); } Vector RenderingDeviceDriverVulkan::texture_get_data(TextureID p_texture, uint32_t p_layer) { @@ -2385,6 +2361,10 @@ Vector RenderingDeviceDriverVulkan::texture_get_data(TextureID p_textur uint32_t block_size = get_compressed_image_format_block_byte_size(tex_format); uint32_t pixel_size = get_image_format_pixel_size(tex_format); + void *data_ptr = nullptr; + VkResult err = vmaMapMemory(allocator, tex->allocation.handle, &data_ptr); + ERR_FAIL_COND_V_MSG(err, Vector(), "vmaMapMemory failed with error " + itos(err) + "."); + { uint8_t *w = image_data.ptrw(); @@ -2395,25 +2375,23 @@ Vector RenderingDeviceDriverVulkan::texture_get_data(TextureID p_textur uint8_t *write_ptr_mipmap = w + mipmap_offset; tight_mip_size = image_total - mipmap_offset; - RDD::TextureSubresource subres; - subres.aspect = RDD::TEXTURE_ASPECT_COLOR; - subres.layer = p_layer; - subres.mipmap = mm_i; - RDD::TextureCopyableLayout layout; - texture_get_copyable_layout(p_texture, subres, &layout); + VkImageSubresource vk_subres = {}; + vk_subres.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + vk_subres.arrayLayer = p_layer; + vk_subres.mipLevel = mm_i; - uint8_t *img_mem = texture_map(p_texture, subres); - ERR_FAIL_NULL_V(img_mem, Vector()); + VkSubresourceLayout vk_layout = {}; + vkGetImageSubresourceLayout(vk_device, tex->vk_view_create_info.image, &vk_subres, &vk_layout); for (uint32_t z = 0; z < depth; z++) { uint8_t *write_ptr = write_ptr_mipmap + z * tight_mip_size / depth; - const uint8_t *slice_read_ptr = img_mem + z * layout.depth_pitch; + const uint8_t *slice_read_ptr = (uint8_t *)data_ptr + vk_layout.offset + z * vk_layout.depthPitch; if (block_size > 1) { // Compressed. uint32_t line_width = (block_size * (width / blockw)); for (uint32_t y = 0; y < height / blockh; y++) { - const uint8_t *rptr = slice_read_ptr + y * layout.row_pitch; + const uint8_t *rptr = slice_read_ptr + y * vk_layout.rowPitch; uint8_t *wptr = write_ptr + y * line_width; memcpy(wptr, rptr, line_width); @@ -2421,52 +2399,22 @@ Vector RenderingDeviceDriverVulkan::texture_get_data(TextureID p_textur } else { // Uncompressed. for (uint32_t y = 0; y < height; y++) { - const uint8_t *rptr = slice_read_ptr + y * layout.row_pitch; + const uint8_t *rptr = slice_read_ptr + y * vk_layout.rowPitch; uint8_t *wptr = write_ptr + y * pixel_size * width; memcpy(wptr, rptr, (uint64_t)pixel_size * width); } } } - texture_unmap(p_texture); - mipmap_offset = image_total; } } + vmaUnmapMemory(allocator, tex->allocation.handle); + return image_data; } -uint8_t *RenderingDeviceDriverVulkan::texture_map(TextureID p_texture, const TextureSubresource &p_subresource) { - const TextureInfo *tex_info = (const TextureInfo *)p_texture.id; - - VkImageSubresource vk_subres = {}; - vk_subres.aspectMask = (VkImageAspectFlags)(1 << p_subresource.aspect); - vk_subres.arrayLayer = p_subresource.layer; - vk_subres.mipLevel = p_subresource.mipmap; - - VkSubresourceLayout vk_layout = {}; - vkGetImageSubresourceLayout(vk_device, tex_info->vk_view_create_info.image, &vk_subres, &vk_layout); - - void *data_ptr = nullptr; - VkResult err = vkMapMemory( - vk_device, - tex_info->allocation.info.deviceMemory, - tex_info->allocation.info.offset + vk_layout.offset, - vk_layout.size, - 0, - &data_ptr); - - vmaMapMemory(allocator, tex_info->allocation.handle, &data_ptr); - ERR_FAIL_COND_V_MSG(err, nullptr, "vkMapMemory failed with error " + itos(err) + "."); - return (uint8_t *)data_ptr; -} - -void RenderingDeviceDriverVulkan::texture_unmap(TextureID p_texture) { - const TextureInfo *tex_info = (const TextureInfo *)p_texture.id; - vmaUnmapMemory(allocator, tex_info->allocation.handle); -} - BitField RenderingDeviceDriverVulkan::texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) { if (p_format >= DATA_FORMAT_ASTC_4x4_SFLOAT_BLOCK && p_format <= DATA_FORMAT_ASTC_12x12_SFLOAT_BLOCK && !enabled_device_extension_names.has(VK_EXT_TEXTURE_COMPRESSION_ASTC_HDR_EXTENSION_NAME)) { // Formats that were introduced later with extensions must not reach vkGetPhysicalDeviceFormatProperties if the extension isn't available. This means it's not supported. @@ -4528,10 +4476,14 @@ static void _texture_subresource_layers_to_vk(const RDD::TextureSubresourceLayer r_vk_subreources->layerCount = p_subresources.layer_count; } -static void _buffer_texture_copy_region_to_vk(const RDD::BufferTextureCopyRegion &p_copy_region, VkBufferImageCopy *r_vk_copy_region) { +static void _buffer_texture_copy_region_to_vk(const RDD::BufferTextureCopyRegion &p_copy_region, uint32_t p_buffer_row_length, VkBufferImageCopy *r_vk_copy_region) { *r_vk_copy_region = {}; r_vk_copy_region->bufferOffset = p_copy_region.buffer_offset; - _texture_subresource_layers_to_vk(p_copy_region.texture_subresources, &r_vk_copy_region->imageSubresource); + r_vk_copy_region->bufferRowLength = p_buffer_row_length; + r_vk_copy_region->imageSubresource.aspectMask = (VkImageAspectFlags)(1 << p_copy_region.texture_subresource.aspect); + r_vk_copy_region->imageSubresource.mipLevel = p_copy_region.texture_subresource.mipmap; + r_vk_copy_region->imageSubresource.baseArrayLayer = p_copy_region.texture_subresource.layer; + r_vk_copy_region->imageSubresource.layerCount = 1; r_vk_copy_region->imageOffset.x = p_copy_region.texture_offset.x; r_vk_copy_region->imageOffset.y = p_copy_region.texture_offset.y; r_vk_copy_region->imageOffset.z = p_copy_region.texture_offset.z; @@ -4638,14 +4590,20 @@ void RenderingDeviceDriverVulkan::command_clear_color_texture(CommandBufferID p_ } void RenderingDeviceDriverVulkan::command_copy_buffer_to_texture(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView p_regions) { + const TextureInfo *tex_info = (const TextureInfo *)p_dst_texture.id; + + uint32_t pixel_size = get_image_format_pixel_size(tex_info->rd_format); + uint32_t block_size = get_compressed_image_format_block_byte_size(tex_info->rd_format); + uint32_t block_w, block_h; + get_compressed_image_format_block_dimensions(tex_info->rd_format, block_w, block_h); + VkBufferImageCopy *vk_copy_regions = ALLOCA_ARRAY(VkBufferImageCopy, p_regions.size()); for (uint32_t i = 0; i < p_regions.size(); i++) { - _buffer_texture_copy_region_to_vk(p_regions[i], &vk_copy_regions[i]); + _buffer_texture_copy_region_to_vk(p_regions[i], p_regions[i].row_pitch * block_w / (pixel_size * block_size), &vk_copy_regions[i]); } const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; const BufferInfo *buf_info = (const BufferInfo *)p_src_buffer.id; - const TextureInfo *tex_info = (const TextureInfo *)p_dst_texture.id; #ifdef DEBUG_ENABLED if (tex_info->transient) { ERR_PRINT("TEXTURE_USAGE_TRANSIENT_BIT p_dst_texture must not be used in command_copy_buffer_to_texture."); @@ -4655,13 +4613,19 @@ void RenderingDeviceDriverVulkan::command_copy_buffer_to_texture(CommandBufferID } void RenderingDeviceDriverVulkan::command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_dst_buffer, VectorView p_regions) { + const TextureInfo *tex_info = (const TextureInfo *)p_src_texture.id; + + uint32_t pixel_size = get_image_format_pixel_size(tex_info->rd_format); + uint32_t block_size = get_compressed_image_format_block_byte_size(tex_info->rd_format); + uint32_t block_w, block_h; + get_compressed_image_format_block_dimensions(tex_info->rd_format, block_w, block_h); + VkBufferImageCopy *vk_copy_regions = ALLOCA_ARRAY(VkBufferImageCopy, p_regions.size()); for (uint32_t i = 0; i < p_regions.size(); i++) { - _buffer_texture_copy_region_to_vk(p_regions[i], &vk_copy_regions[i]); + _buffer_texture_copy_region_to_vk(p_regions[i], p_regions[i].row_pitch * block_w / (pixel_size * block_size), &vk_copy_regions[i]); } const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id; - const TextureInfo *tex_info = (const TextureInfo *)p_src_texture.id; const BufferInfo *buf_info = (const BufferInfo *)p_dst_buffer.id; #ifdef DEBUG_ENABLED if (tex_info->transient) { diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h index 31b0e6a6d72..9bad455c48c 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.h +++ b/drivers/vulkan/rendering_device_driver_vulkan.h @@ -260,8 +260,6 @@ public: virtual uint64_t texture_get_allocation_size(TextureID p_texture) override final; virtual void texture_get_copyable_layout(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) override final; virtual Vector texture_get_data(TextureID p_texture, uint32_t p_layer) override final; - virtual uint8_t *texture_map(TextureID p_texture, const TextureSubresource &p_subresource) override final; - virtual void texture_unmap(TextureID p_texture) override final; virtual BitField texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) override final; virtual bool texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) override final; diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index 33f14b71b51..56d869bc1fa 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -1564,10 +1564,10 @@ Error RenderingDevice::_texture_initialize(RID p_texture, uint32_t p_layer, cons RDD::BufferTextureCopyRegion copy_region; copy_region.buffer_offset = staging_buffer_offset; - copy_region.texture_subresources.aspect = texture->read_aspect_flags; - copy_region.texture_subresources.mipmap = mm_i; - copy_region.texture_subresources.base_layer = p_layer; - copy_region.texture_subresources.layer_count = 1; + copy_region.row_pitch = pitch; + copy_region.texture_subresource.aspect = texture->read_aspect_flags.has_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT) ? RDD::TEXTURE_ASPECT_DEPTH : RDD::TEXTURE_ASPECT_COLOR; + copy_region.texture_subresource.mipmap = mm_i; + copy_region.texture_subresource.layer = p_layer; copy_region.texture_offset = Vector3i(0, 0, z); copy_region.texture_region_size = Vector3i(logic_width, logic_height, 1); driver->command_copy_buffer_to_texture(transfer_worker->command_buffer, transfer_worker->staging_buffer, texture->driver_id, p_dst_layout, copy_region); @@ -1713,10 +1713,10 @@ Error RenderingDevice::texture_update(RID p_texture, uint32_t p_layer, const Vec RDD::BufferTextureCopyRegion copy_region; copy_region.buffer_offset = alloc_offset; - copy_region.texture_subresources.aspect = texture->read_aspect_flags; - copy_region.texture_subresources.mipmap = mm_i; - copy_region.texture_subresources.base_layer = p_layer; - copy_region.texture_subresources.layer_count = 1; + copy_region.row_pitch = region_pitch; + copy_region.texture_subresource.aspect = texture->read_aspect_flags.has_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT) ? RDD::TEXTURE_ASPECT_DEPTH : RDD::TEXTURE_ASPECT_COLOR; + copy_region.texture_subresource.mipmap = mm_i; + copy_region.texture_subresource.layer = p_layer; copy_region.texture_offset = Vector3i(x, y, z); copy_region.texture_region_size = Vector3i(region_logic_w, region_logic_h, 1); @@ -1838,50 +1838,57 @@ void RenderingDevice::_texture_copy_shared(RID p_src_texture_rid, Texture *p_src DEV_ASSERT(false && "This path should not be reachable."); } - // FIXME: When using reinterpretation buffers, the only texture aspect supported is color. Depth or stencil contents won't get copied. - RDD::BufferTextureCopyRegion get_data_region; - RDG::RecordedBufferToTextureCopy update_copy; - RDD::TextureCopyableLayout first_copyable_layout; - RDD::TextureCopyableLayout copyable_layout; - RDD::TextureSubresource texture_subresource; - texture_subresource.aspect = RDD::TEXTURE_ASPECT_COLOR; - texture_subresource.layer = 0; - texture_subresource.mipmap = 0; - driver->texture_get_copyable_layout(p_dst_texture->shared_fallback->texture, texture_subresource, &first_copyable_layout); - // Copying each mipmap from main texture to a buffer and then to the slice texture. thread_local LocalVector get_data_vector; thread_local LocalVector update_vector; get_data_vector.clear(); update_vector.clear(); - for (uint32_t i = 0; i < p_dst_texture->mipmaps; i++) { - driver->texture_get_copyable_layout(p_dst_texture->shared_fallback->texture, texture_subresource, ©able_layout); - uint32_t mipmap = p_dst_texture->base_mipmap + i; - get_data_region.buffer_offset = copyable_layout.offset - first_copyable_layout.offset; - get_data_region.texture_subresources.aspect = RDD::TEXTURE_ASPECT_COLOR_BIT; - get_data_region.texture_subresources.base_layer = p_dst_texture->base_layer; - get_data_region.texture_subresources.mipmap = mipmap; - get_data_region.texture_subresources.layer_count = p_dst_texture->layers; - get_data_region.texture_region_size.x = MAX(1U, p_src_texture->width >> mipmap); - get_data_region.texture_region_size.y = MAX(1U, p_src_texture->height >> mipmap); - get_data_region.texture_region_size.z = MAX(1U, p_src_texture->depth >> mipmap); - get_data_vector.push_back(get_data_region); + uint32_t buffer_size = 0; + uint32_t transfer_alignment = driver->api_trait_get(RDD::API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT); - update_copy.from_buffer = shared_buffer; - update_copy.region.buffer_offset = get_data_region.buffer_offset; - update_copy.region.texture_subresources.aspect = RDD::TEXTURE_ASPECT_COLOR_BIT; - update_copy.region.texture_subresources.base_layer = texture_subresource.layer; - update_copy.region.texture_subresources.mipmap = texture_subresource.mipmap; - update_copy.region.texture_subresources.layer_count = get_data_region.texture_subresources.layer_count; - update_copy.region.texture_region_size.x = get_data_region.texture_region_size.x; - update_copy.region.texture_region_size.y = get_data_region.texture_region_size.y; - update_copy.region.texture_region_size.z = get_data_region.texture_region_size.z; - update_vector.push_back(update_copy); + for (uint32_t i = 0; i < p_dst_texture->layers; i++) { + for (uint32_t j = 0; j < p_dst_texture->mipmaps; j++) { + // FIXME: When using reinterpretation buffers, the only texture aspect supported is color. Depth or stencil contents won't get copied. + RDD::TextureSubresource texture_subresource; + texture_subresource.aspect = RDD::TEXTURE_ASPECT_COLOR; + texture_subresource.layer = i; + texture_subresource.mipmap = j; - texture_subresource.mipmap++; + RDD::TextureCopyableLayout copyable_layout; + driver->texture_get_copyable_layout(p_dst_texture->shared_fallback->texture, texture_subresource, ©able_layout); + + uint32_t mipmap = p_dst_texture->base_mipmap + j; + + RDD::BufferTextureCopyRegion get_data_region; + get_data_region.buffer_offset = STEPIFY(buffer_size, transfer_alignment); + get_data_region.row_pitch = copyable_layout.row_pitch; + get_data_region.texture_subresource.aspect = RDD::TEXTURE_ASPECT_COLOR; + get_data_region.texture_subresource.layer = p_dst_texture->base_layer + i; + get_data_region.texture_subresource.mipmap = mipmap; + get_data_region.texture_region_size.x = MAX(1U, p_src_texture->width >> mipmap); + get_data_region.texture_region_size.y = MAX(1U, p_src_texture->height >> mipmap); + get_data_region.texture_region_size.z = MAX(1U, p_src_texture->depth >> mipmap); + get_data_vector.push_back(get_data_region); + + RDG::RecordedBufferToTextureCopy update_copy; + update_copy.from_buffer = shared_buffer; + update_copy.region.buffer_offset = get_data_region.buffer_offset; + update_copy.region.row_pitch = get_data_region.row_pitch; + update_copy.region.texture_subresource.aspect = RDD::TEXTURE_ASPECT_COLOR; + update_copy.region.texture_subresource.layer = texture_subresource.layer; + update_copy.region.texture_subresource.mipmap = texture_subresource.mipmap; + update_copy.region.texture_region_size.x = get_data_region.texture_region_size.x; + update_copy.region.texture_region_size.y = get_data_region.texture_region_size.y; + update_copy.region.texture_region_size.z = get_data_region.texture_region_size.z; + update_vector.push_back(update_copy); + + buffer_size = get_data_region.buffer_offset + copyable_layout.size; + } } + DEV_ASSERT(buffer_size <= driver->buffer_get_allocation_size(shared_buffer)); + draw_graph.add_texture_get_data(p_src_texture->driver_id, p_src_texture->draw_tracker, shared_buffer, get_data_vector, shared_buffer_tracker); draw_graph.add_texture_update(p_dst_texture->shared_fallback->texture, p_dst_texture->shared_fallback->texture_tracker, update_vector, shared_buffer_tracker); } else { @@ -1998,55 +2005,47 @@ Vector RenderingDevice::texture_get_data(RID p_texture, uint32_t p_laye if (tex->usage_flags & TEXTURE_USAGE_CPU_READ_BIT) { return driver->texture_get_data(tex->driver_id, p_layer); } else { - LocalVector mip_layouts; - uint32_t work_mip_alignment = driver->api_trait_get(RDD::API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT); - uint32_t work_buffer_size = 0; + RDD::TextureAspect aspect = tex->read_aspect_flags.has_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT) ? RDD::TEXTURE_ASPECT_DEPTH : RDD::TEXTURE_ASPECT_COLOR; + uint32_t mip_alignment = driver->api_trait_get(RDD::API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT); + uint32_t buffer_size = 0; + + thread_local LocalVector mip_layouts; + thread_local LocalVector copy_regions; mip_layouts.resize(tex->mipmaps); + copy_regions.resize(tex->mipmaps); + for (uint32_t i = 0; i < tex->mipmaps; i++) { RDD::TextureSubresource subres; - subres.aspect = RDD::TEXTURE_ASPECT_COLOR; + subres.aspect = aspect; subres.layer = p_layer; subres.mipmap = i; - driver->texture_get_copyable_layout(tex->driver_id, subres, &mip_layouts[i]); - // Assuming layers are tightly packed. If this is not true on some driver, we must modify the copy algorithm. - DEV_ASSERT(mip_layouts[i].layer_pitch == mip_layouts[i].size / tex->layers); + RDD::TextureCopyableLayout &mip_layout = mip_layouts[i]; + driver->texture_get_copyable_layout(tex->driver_id, subres, &mip_layout); - work_buffer_size = STEPIFY(work_buffer_size, work_mip_alignment) + mip_layouts[i].size; + uint32_t mip_offset = STEPIFY(buffer_size, mip_alignment); + buffer_size = mip_offset + mip_layout.size; + + RDD::BufferTextureCopyRegion ©_region = copy_regions[i]; + copy_region.buffer_offset = mip_offset; + copy_region.row_pitch = mip_layout.row_pitch; + copy_region.texture_subresource.aspect = aspect; + copy_region.texture_subresource.mipmap = i; + copy_region.texture_subresource.layer = p_layer; + copy_region.texture_region_size.x = MAX(1u, tex->width >> i); + copy_region.texture_region_size.y = MAX(1u, tex->height >> i); + copy_region.texture_region_size.z = MAX(1u, tex->depth >> i); } - RDD::BufferID tmp_buffer = driver->buffer_create(work_buffer_size, RDD::BUFFER_USAGE_TRANSFER_TO_BIT, RDD::MEMORY_ALLOCATION_TYPE_CPU, frames_drawn); + RDD::BufferID tmp_buffer = driver->buffer_create(buffer_size, RDD::BUFFER_USAGE_TRANSFER_TO_BIT, RDD::MEMORY_ALLOCATION_TYPE_CPU, frames_drawn); ERR_FAIL_COND_V(!tmp_buffer, Vector()); - thread_local LocalVector command_buffer_texture_copy_regions_vector; - command_buffer_texture_copy_regions_vector.clear(); - - uint32_t w = tex->width; - uint32_t h = tex->height; - uint32_t d = tex->depth; - for (uint32_t i = 0; i < tex->mipmaps; i++) { - RDD::BufferTextureCopyRegion copy_region; - copy_region.buffer_offset = mip_layouts[i].offset; - copy_region.texture_subresources.aspect = tex->read_aspect_flags; - copy_region.texture_subresources.mipmap = i; - copy_region.texture_subresources.base_layer = p_layer; - copy_region.texture_subresources.layer_count = 1; - copy_region.texture_region_size.x = w; - copy_region.texture_region_size.y = h; - copy_region.texture_region_size.z = d; - command_buffer_texture_copy_regions_vector.push_back(copy_region); - - w = MAX(1u, w >> 1); - h = MAX(1u, h >> 1); - d = MAX(1u, d >> 1); - } - if (_texture_make_mutable(tex, p_texture)) { // The texture must be mutable to be used as a copy source due to layout transitions. draw_graph.add_synchronization(); } - draw_graph.add_texture_get_data(tex->driver_id, tex->draw_tracker, tmp_buffer, command_buffer_texture_copy_regions_vector); + draw_graph.add_texture_get_data(tex->driver_id, tex->draw_tracker, tmp_buffer, copy_regions); // Flush everything so memory can be safely mapped. _flush_and_stall_for_all_frames(); @@ -2064,28 +2063,37 @@ Vector RenderingDevice::texture_get_data(RID p_texture, uint32_t p_laye uint8_t *write_ptr = buffer_data.ptrw(); - w = tex->width; - h = tex->height; - d = tex->depth; for (uint32_t i = 0; i < tex->mipmaps; i++) { uint32_t width = 0, height = 0, depth = 0; - uint32_t tight_mip_size = get_image_format_required_size(tex->format, w, h, d, 1, &width, &height, &depth); - uint32_t tight_row_pitch = tight_mip_size / ((height / block_h) * depth); - // Copy row-by-row to erase padding due to alignments. - const uint8_t *rp = read_ptr; - uint8_t *wp = write_ptr; - for (uint32_t row = h * d / block_h; row != 0; row--) { - memcpy(wp, rp, tight_row_pitch); - rp += mip_layouts[i].row_pitch; - wp += tight_row_pitch; + uint32_t tight_mip_size = get_image_format_required_size( + tex->format, + MAX(1u, tex->width >> i), + MAX(1u, tex->height >> i), + MAX(1u, tex->depth >> i), + 1, + &width, + &height, + &depth); + + uint32_t row_count = (height / block_h) * depth; + uint32_t tight_row_pitch = tight_mip_size / row_count; + + const uint8_t *rp = read_ptr + copy_regions[i].buffer_offset; + uint32_t row_pitch = mip_layouts[i].row_pitch; + + if (tight_row_pitch == row_pitch) { + // Same row pitch, we can copy directly. + memcpy(write_ptr, rp, tight_mip_size); + write_ptr += tight_mip_size; + } else { + // Copy row-by-row to erase padding. + for (uint32_t j = 0; j < row_count; j++) { + memcpy(write_ptr, rp, tight_row_pitch); + rp += row_pitch; + write_ptr += tight_row_pitch; + } } - - w = MAX(block_w, w >> 1); - h = MAX(block_h, h >> 1); - d = MAX(1u, d >> 1); - read_ptr += mip_layouts[i].size; - write_ptr += tight_mip_size; } driver->buffer_unmap(tmp_buffer); @@ -2107,21 +2115,6 @@ Error RenderingDevice::texture_get_data_async(RID p_texture, uint32_t p_layer, c _check_transfer_worker_texture(tex); - thread_local LocalVector mip_layouts; - mip_layouts.resize(tex->mipmaps); - for (uint32_t i = 0; i < tex->mipmaps; i++) { - RDD::TextureSubresource subres; - subres.aspect = RDD::TEXTURE_ASPECT_COLOR; - subres.layer = p_layer; - subres.mipmap = i; - driver->texture_get_copyable_layout(tex->driver_id, subres, &mip_layouts[i]); - - // Assuming layers are tightly packed. If this is not true on some driver, we must modify the copy algorithm. - DEV_ASSERT(mip_layouts[i].layer_pitch == mip_layouts[i].size / tex->layers); - } - - ERR_FAIL_COND_V(mip_layouts.is_empty(), ERR_INVALID_PARAMETER); - if (_texture_make_mutable(tex, p_texture)) { // The texture must be mutable to be used as a copy source due to layout transitions. draw_graph.add_synchronization(); @@ -2189,10 +2182,10 @@ Error RenderingDevice::texture_get_data_async(RID p_texture, uint32_t p_layer, c RDD::BufferTextureCopyRegion copy_region; copy_region.buffer_offset = block_write_offset; - copy_region.texture_subresources.aspect = tex->read_aspect_flags; - copy_region.texture_subresources.mipmap = i; - copy_region.texture_subresources.base_layer = p_layer; - copy_region.texture_subresources.layer_count = 1; + copy_region.row_pitch = region_pitch; + copy_region.texture_subresource.aspect = tex->read_aspect_flags.has_flag(RDD::TEXTURE_ASPECT_DEPTH_BIT) ? RDD::TEXTURE_ASPECT_DEPTH : RDD::TEXTURE_ASPECT_COLOR; + copy_region.texture_subresource.mipmap = i; + copy_region.texture_subresource.layer = p_layer; copy_region.texture_offset = Vector3i(x, y, z); copy_region.texture_region_size = Vector3i(region_logic_w, region_logic_h, 1); frames[frame].download_texture_staging_buffers.push_back(download_staging_buffers.blocks[download_staging_buffers.current].driver_id); @@ -6813,7 +6806,6 @@ void RenderingDevice::_stall_for_frame(uint32_t p_frame) { // Flush any pending requests for asynchronous texture downloads. if (!frames[p_frame].download_texture_get_data_requests.is_empty()) { GodotProfileZoneGrouped(_profile_zone, "flush asynchronous texture downloads"); - uint32_t pitch_step = driver->api_trait_get(RDD::API_TRAIT_TEXTURE_DATA_ROW_PITCH_STEP); for (uint32_t i = 0; i < frames[p_frame].download_texture_get_data_requests.size(); i++) { const TextureGetDataRequest &request = frames[p_frame].download_texture_get_data_requests[i]; uint32_t texture_size = get_image_format_required_size(request.format, request.width, request.height, request.depth, request.mipmaps); @@ -6826,18 +6818,15 @@ void RenderingDevice::_stall_for_frame(uint32_t p_frame) { uint32_t block_size = get_compressed_image_format_block_byte_size(request.format); uint32_t pixel_size = get_image_format_pixel_size(request.format); - uint32_t pixel_rshift = get_compressed_image_format_pixel_rshift(request.format); uint32_t region_size = texture_download_region_size_px; for (uint32_t j = 0; j < request.frame_local_count; j++) { uint32_t local_index = request.frame_local_index + j; const RDD::BufferTextureCopyRegion ®ion = frames[p_frame].download_buffer_texture_copy_regions[local_index]; - uint32_t w = STEPIFY(request.width >> region.texture_subresources.mipmap, block_w); - uint32_t h = STEPIFY(request.height >> region.texture_subresources.mipmap, block_h); + uint32_t w = STEPIFY(request.width >> region.texture_subresource.mipmap, block_w); + uint32_t h = STEPIFY(request.height >> region.texture_subresource.mipmap, block_h); uint32_t region_w = MIN(region_size, w - region.texture_offset.x); uint32_t region_h = MIN(region_size, h - region.texture_offset.y); - uint32_t region_pitch = (region_w * pixel_size * block_w) >> pixel_rshift; - region_pitch = STEPIFY(region_pitch, pitch_step); uint8_t *buffer_data = driver->buffer_map(frames[p_frame].download_texture_staging_buffers[local_index]); const uint8_t *read_ptr = buffer_data + region.buffer_offset; @@ -6851,7 +6840,7 @@ void RenderingDevice::_stall_for_frame(uint32_t p_frame) { for (uint32_t y = region_h / block_h; y > 0; y--) { memcpy(write_ptr, read_ptr, (region_w / block_w) * unit_size); write_ptr += (w / block_w) * unit_size; - read_ptr += region_pitch; + read_ptr += region.row_pitch; } driver->buffer_unmap(frames[p_frame].download_texture_staging_buffers[local_index]); diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h index 26ce888f158..11d5f5be97b 100644 --- a/servers/rendering/rendering_device_driver.h +++ b/servers/rendering/rendering_device_driver.h @@ -268,11 +268,8 @@ public: }; struct TextureCopyableLayout { - uint64_t offset = 0; uint64_t size = 0; uint64_t row_pitch = 0; - uint64_t depth_pitch = 0; - uint64_t layer_pitch = 0; }; virtual TextureID texture_create(const TextureFormat &p_format, const TextureView &p_view) = 0; @@ -282,11 +279,11 @@ public: virtual TextureID texture_create_shared_from_slice(TextureID p_original_texture, const TextureView &p_view, TextureSliceType p_slice_type, uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps) = 0; virtual void texture_free(TextureID p_texture) = 0; virtual uint64_t texture_get_allocation_size(TextureID p_texture) = 0; + // Returns a texture layout for buffer <-> texture copies. If you are copying multiple texture subresources to/from the same buffer, + // you are responsible for correctly aligning the start offset for every buffer region. See API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT. virtual void texture_get_copyable_layout(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) = 0; // Returns the data of a texture layer for a CPU texture that was created with TEXTURE_USAGE_CPU_READ_BIT. virtual Vector texture_get_data(TextureID p_texture, uint32_t p_layer) = 0; - virtual uint8_t *texture_map(TextureID p_texture, const TextureSubresource &p_subresource) = 0; - virtual void texture_unmap(TextureID p_texture) = 0; virtual BitField texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) = 0; virtual bool texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) = 0; @@ -547,7 +544,8 @@ public: struct BufferTextureCopyRegion { uint64_t buffer_offset = 0; - TextureSubresourceLayers texture_subresources; + uint64_t row_pitch = 0; + TextureSubresource texture_subresource; Vector3i texture_offset; Vector3i texture_region_size; };