1
0
mirror of https://github.com/godotengine/godot.git synced 2025-11-04 12:00:25 +00:00

Metal: Stable argument buffers; GPU rendering crashes; visionOS exports

Supersedes #110683
This commit is contained in:
Stuart Carnie
2025-10-24 11:03:44 +11:00
parent ab6c6eece8
commit 97c17aedc7
24 changed files with 2635 additions and 2082 deletions

View File

@@ -266,7 +266,7 @@ uint32_t RenderingShaderContainerD3D12::_to_bytes_footer_extra_data(uint8_t *p_b
}
#if NIR_ENABLED
bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(Span<ReflectedShaderStage> p_spirv, const nir_shader_compiler_options *p_compiler_options, HashMap<int, nir_shader *> &r_stages_nir_shaders, Vector<RenderingDeviceCommons::ShaderStage> &r_stages, BitField<RenderingDeviceCommons::ShaderStage> &r_stages_processed) {
bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(Span<ReflectShaderStage> p_spirv, const nir_shader_compiler_options *p_compiler_options, HashMap<int, nir_shader *> &r_stages_nir_shaders, Vector<RenderingDeviceCommons::ShaderStage> &r_stages, BitField<RenderingDeviceCommons::ShaderStage> &r_stages_processed) {
r_stages_processed.clear();
dxil_spirv_runtime_conf dxil_runtime_conf = {};
@@ -428,7 +428,7 @@ bool RenderingShaderContainerD3D12::_convert_nir_to_dxil(const HashMap<int, nir_
return true;
}
bool RenderingShaderContainerD3D12::_convert_spirv_to_dxil(Span<ReflectedShaderStage> p_spirv, HashMap<RenderingDeviceCommons::ShaderStage, Vector<uint8_t>> &r_dxil_blobs, Vector<RenderingDeviceCommons::ShaderStage> &r_stages, BitField<RenderingDeviceCommons::ShaderStage> &r_stages_processed) {
bool RenderingShaderContainerD3D12::_convert_spirv_to_dxil(Span<ReflectShaderStage> p_spirv, HashMap<RenderingDeviceCommons::ShaderStage, Vector<uint8_t>> &r_dxil_blobs, Vector<RenderingDeviceCommons::ShaderStage> &r_stages, BitField<RenderingDeviceCommons::ShaderStage> &r_stages_processed) {
r_dxil_blobs.clear();
HashMap<int, nir_shader *> stages_nir_shaders;
@@ -763,7 +763,7 @@ void RenderingShaderContainerD3D12::_nir_report_bitcode_bit_offset(uint64_t p_bi
}
#endif
void RenderingShaderContainerD3D12::_set_from_shader_reflection_post(const RenderingDeviceCommons::ShaderReflection &p_reflection) {
void RenderingShaderContainerD3D12::_set_from_shader_reflection_post(const ReflectShader &p_shader) {
reflection_binding_set_uniforms_data_d3d12.resize(reflection_binding_set_uniforms_data.size());
reflection_specialization_data_d3d12.resize(reflection_specialization_data.size());
@@ -779,8 +779,9 @@ void RenderingShaderContainerD3D12::_set_from_shader_reflection_post(const Rende
}
}
bool RenderingShaderContainerD3D12::_set_code_from_spirv(Span<ReflectedShaderStage> p_spirv) {
bool RenderingShaderContainerD3D12::_set_code_from_spirv(const ReflectShader &p_shader) {
#if NIR_ENABLED
const LocalVector<ReflectShaderStage> &p_spirv = p_shader.shader_stages;
reflection_data_d3d12.nir_runtime_data_root_param_idx = UINT32_MAX;
for (int64_t i = 0; i < reflection_specialization_data.size(); i++) {

View File

@@ -122,9 +122,9 @@ protected:
uint32_t root_signature_crc = 0;
#if NIR_ENABLED
bool _convert_spirv_to_nir(Span<ReflectedShaderStage> p_spirv, const nir_shader_compiler_options *p_compiler_options, HashMap<int, nir_shader *> &r_stages_nir_shaders, Vector<RenderingDeviceCommons::ShaderStage> &r_stages, BitField<RenderingDeviceCommons::ShaderStage> &r_stages_processed);
bool _convert_spirv_to_nir(Span<ReflectShaderStage> p_spirv, const nir_shader_compiler_options *p_compiler_options, HashMap<int, nir_shader *> &r_stages_nir_shaders, Vector<RenderingDeviceCommons::ShaderStage> &r_stages, BitField<RenderingDeviceCommons::ShaderStage> &r_stages_processed);
bool _convert_nir_to_dxil(const HashMap<int, nir_shader *> &p_stages_nir_shaders, BitField<RenderingDeviceCommons::ShaderStage> p_stages_processed, HashMap<RenderingDeviceCommons::ShaderStage, Vector<uint8_t>> &r_dxil_blobs);
bool _convert_spirv_to_dxil(Span<ReflectedShaderStage> p_spirv, HashMap<RenderingDeviceCommons::ShaderStage, Vector<uint8_t>> &r_dxil_blobs, Vector<RenderingDeviceCommons::ShaderStage> &r_stages, BitField<RenderingDeviceCommons::ShaderStage> &r_stages_processed);
bool _convert_spirv_to_dxil(Span<ReflectShaderStage> p_spirv, HashMap<RenderingDeviceCommons::ShaderStage, Vector<uint8_t>> &r_dxil_blobs, Vector<RenderingDeviceCommons::ShaderStage> &r_stages, BitField<RenderingDeviceCommons::ShaderStage> &r_stages_processed);
bool _generate_root_signature(BitField<RenderingDeviceCommons::ShaderStage> p_stages_processed);
// GodotNirCallbacks.
@@ -146,8 +146,8 @@ protected:
virtual uint32_t _to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const override;
virtual uint32_t _to_bytes_reflection_specialization_extra_data(uint8_t *p_bytes, uint32_t p_index) const override;
virtual uint32_t _to_bytes_footer_extra_data(uint8_t *p_bytes) const override;
virtual void _set_from_shader_reflection_post(const RenderingDeviceCommons::ShaderReflection &p_reflection) override;
virtual bool _set_code_from_spirv(Span<ReflectedShaderStage> p_spirv) override;
virtual void _set_from_shader_reflection_post(const ReflectShader &p_shader) override;
virtual bool _set_code_from_spirv(const ReflectShader &p_shader) override;
public:
struct ShaderReflectionD3D12 {

View File

@@ -46,6 +46,7 @@ env_metal.Append(CCFLAGS=["-fmodules", "-fcxx-modules"])
driver_obj = []
env_metal.add_source_files(driver_obj, "*.mm")
env_metal.add_source_files(driver_obj, "*.cpp")
env.drivers_sources += driver_obj
# Needed to force rebuilding the driver files when the thirdparty library is updated.

View File

@@ -0,0 +1,129 @@
/**************************************************************************/
/* metal_device_profile.cpp */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#include "metal_device_profile.h"
#include "metal_utils.h"
Mutex MetalDeviceProfile::profiles_lock;
HashMap<MetalDeviceProfile::ProfileKey, MetalDeviceProfile> MetalDeviceProfile::profiles;
const MetalDeviceProfile *MetalDeviceProfile::get_profile(Platform p_platform, GPU p_gpu, MinOsVersion p_min_os_version) {
DEV_ASSERT(p_platform == Platform::macOS || p_platform == Platform::iOS || p_platform == Platform::visionOS);
MutexLock lock(profiles_lock);
ProfileKey key(p_min_os_version, p_platform, p_gpu);
if (MetalDeviceProfile *profile = profiles.getptr(key)) {
return profile;
}
MetalDeviceProfile res;
res.platform = p_platform;
res.gpu = p_gpu;
res.min_os_version = p_min_os_version;
switch (p_platform) {
case Platform::macOS: {
if (p_min_os_version >= os_version::MACOS_26_0) {
res.features.msl_version = MSL_VERSION_40;
} else if (p_min_os_version >= os_version::MACOS_15_0) {
res.features.msl_version = MSL_VERSION_32;
} else if (p_min_os_version >= os_version::MACOS_14_0) {
res.features.msl_version = MSL_VERSION_31;
} else if (p_min_os_version >= os_version::MACOS_13_0) {
res.features.msl_version = MSL_VERSION_30;
} else if (p_min_os_version >= os_version::MACOS_12_0) {
res.features.msl_version = MSL_VERSION_24;
} else {
res.features.msl_version = MSL_VERSION_23;
}
res.features.use_argument_buffers = p_min_os_version >= os_version::MACOS_13_0;
res.features.simdPermute = true;
} break;
case Platform::iOS: {
if (p_min_os_version >= os_version::IOS_26_0) {
res.features.msl_version = MSL_VERSION_40;
} else if (p_min_os_version >= os_version::IOS_18_0) {
res.features.msl_version = MSL_VERSION_32;
} else if (p_min_os_version >= os_version::IOS_17_0) {
res.features.msl_version = MSL_VERSION_31;
} else if (p_min_os_version >= os_version::IOS_16_0) {
res.features.msl_version = MSL_VERSION_30;
} else if (p_min_os_version >= os_version::IOS_15_0) {
res.features.msl_version = MSL_VERSION_24;
} else {
res.features.msl_version = MSL_VERSION_23;
}
switch (p_gpu) {
case GPU::Apple1:
case GPU::Apple2:
case GPU::Apple3:
case GPU::Apple4:
case GPU::Apple5: {
res.features.simdPermute = false;
res.features.use_argument_buffers = false;
} break;
case GPU::Apple6:
case GPU::Apple7:
case GPU::Apple8:
case GPU::Apple9: {
res.features.use_argument_buffers = p_min_os_version >= os_version::IOS_16_0;
res.features.simdPermute = true;
} break;
}
} break;
case Platform::visionOS: {
if (p_min_os_version >= os_version::VISIONOS_26_0) {
res.features.msl_version = MSL_VERSION_40;
} else if (p_min_os_version >= os_version::VISIONOS_02_4) {
res.features.msl_version = MSL_VERSION_32;
} else {
ERR_FAIL_V_MSG(nullptr, "visionOS 2.4 is the minimum supported version for visionOS.");
}
switch (p_gpu) {
case GPU::Apple8:
case GPU::Apple9: {
res.features.use_argument_buffers = true;
res.features.simdPermute = true;
} break;
default: {
CRASH_NOW_MSG("visionOS hardware has a minimum Apple8 GPU.");
}
}
} break;
}
return &profiles.insert(key, res)->value;
}

View File

@@ -0,0 +1,157 @@
/**************************************************************************/
/* metal_device_profile.h */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#pragma once
#include "core/os/mutex.h"
#include "core/string/ustring.h"
#include "core/templates/hash_map.h"
#include "core/typedefs.h"
class MinOsVersion {
uint32_t version;
public:
String to_compiler_os_version() const;
bool is_null() const { return version == UINT32_MAX; }
bool is_valid() const { return version != UINT32_MAX; }
MinOsVersion(const String &p_version);
constexpr explicit MinOsVersion(uint32_t p_version) :
version(p_version) {}
constexpr MinOsVersion(uint32_t p_major, uint32_t p_minor, uint32_t p_patch = 0) :
version(p_major * 10000 + p_minor * 100 + p_patch) {}
constexpr MinOsVersion() :
version(UINT32_MAX) {}
bool operator>(uint32_t p_other) {
return version > p_other;
}
constexpr operator uint32_t() const { return version; }
};
namespace os_version {
constexpr MinOsVersion MACOS_26_0(26'00'00);
constexpr MinOsVersion MACOS_15_0(15'00'00);
constexpr MinOsVersion MACOS_14_0(14'00'00);
constexpr MinOsVersion MACOS_13_0(13'00'00);
constexpr MinOsVersion MACOS_12_0(12'00'00);
constexpr MinOsVersion MACOS_11_0(11'00'00);
constexpr MinOsVersion IOS_26_0(26'00'00);
constexpr MinOsVersion IOS_18_0(18'00'00);
constexpr MinOsVersion IOS_17_0(17'00'00);
constexpr MinOsVersion IOS_16_0(16'00'00);
constexpr MinOsVersion IOS_15_0(15'00'00);
constexpr MinOsVersion VISIONOS_26_0(26'00'00);
constexpr MinOsVersion VISIONOS_02_4(02'04'00);
} //namespace os_version
/// @brief A minimal structure that defines a device profile for Metal.
///
/// This structure is used by the `RenderingShaderContainerMetal` class to
/// determine options for compiling SPIR-V to Metal source. It currently only
/// contains the minimum properties required to transform shaders from SPIR-V to Metal
/// and potentially compile to a `.metallib`.
struct MetalDeviceProfile {
enum class Platform : uint32_t {
macOS = 0,
iOS = 1,
visionOS = 2,
};
/*! @brief The GPU family.
*
* NOTE: These values match Apple's MTLGPUFamily
*/
enum class GPU : uint32_t {
Apple1 = 1001,
Apple2 = 1002,
Apple3 = 1003,
Apple4 = 1004,
Apple5 = 1005,
Apple6 = 1006,
Apple7 = 1007,
Apple8 = 1008,
Apple9 = 1009,
};
enum class ArgumentBuffersTier : uint32_t {
Tier1 = 0,
Tier2 = 1,
};
struct Features {
uint32_t msl_version = 0;
bool use_argument_buffers = false;
bool simdPermute = false;
};
Platform platform = Platform::macOS;
GPU gpu = GPU::Apple4;
MinOsVersion min_os_version;
Features features;
static const MetalDeviceProfile *get_profile(Platform p_platform, GPU p_gpu, MinOsVersion p_min_os_version);
MetalDeviceProfile() = default;
private:
static Mutex profiles_lock; ///< Mutex to protect access to the profiles map.
struct ProfileKey {
friend struct HashMapHasherDefaultImpl<ProfileKey>;
union {
struct {
uint32_t min_os_version;
uint16_t platform;
uint16_t gpu;
};
uint64_t value = 0;
};
ProfileKey() = default;
ProfileKey(MinOsVersion p_min_os_version, Platform p_platform, GPU p_gpu) :
min_os_version(p_min_os_version), platform((uint16_t)p_platform), gpu((uint16_t)p_gpu) {}
_FORCE_INLINE_ uint32_t hash() const {
return hash_one_uint64(value);
}
bool operator==(const ProfileKey &p_other) const {
return value == p_other.value;
}
};
static HashMap<ProfileKey, MetalDeviceProfile> profiles;
};

View File

@@ -70,15 +70,20 @@ typedef NS_OPTIONS(NSUInteger, SampleCount) {
};
struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalFeatures {
uint32_t mslVersionMajor = 0;
uint32_t mslVersionMinor = 0;
/// Maximum version of the Metal Shading Language version available.
uint32_t msl_max_version = 0;
/*! @brief Target version of the Metal Shading Language used to translate shaders.
*
* This can be used to override the features used to generate shaders. Primarily
* for engine developers for testing.
*/
uint32_t msl_target_version = 0;
MTLGPUFamily highestFamily = MTLGPUFamilyApple4;
bool supportsBCTextureCompression = false;
bool supportsDepth24Stencil8 = false;
bool supports32BitFloatFiltering = false;
bool supports32BitMSAA = false;
bool supportsMac = TARGET_OS_OSX;
MTLLanguageVersion mslVersionEnum = MTLLanguageVersion1_2;
SampleCount supportedSampleCounts = SampleCount1;
long hostMemoryPageSize = 0;
bool layeredRendering = false;
@@ -89,14 +94,29 @@ struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalFeatures {
bool tessellationShader = false; /**< If true, tessellation shaders are supported. */
bool imageCubeArray = false; /**< If true, image cube arrays are supported. */
MTLArgumentBuffersTier argument_buffers_tier = MTLArgumentBuffersTier1;
/// If true, argument encoders are required to encode arguments into an argument buffer.
bool needs_arg_encoders = true;
bool needs_arg_encoders = true; /**< If true, argument encoders are required to encode arguments into an argument buffer. */
bool use_argument_buffers = true; /**< If true, argument buffers are can be used instead of slot binding, if available. */
bool metal_fx_spatial = false; /**< If true, Metal FX spatial functions are supported. */
bool metal_fx_temporal = false; /**< If true, Metal FX temporal functions are supported. */
bool supports_gpu_address = false; /**< If true, referencing a GPU address in a shader is supported. */
bool supports_image_atomic_32_bit = false; /**< If true, 32-bit atomic operations on images are supported by the GPU. */
bool supports_image_atomic_64_bit = false; /**< If true, 64-bit atomic operations on images are supported by the GPU. */
bool supports_native_image_atomics = false; /**< If true, native image atomic operations are supported by the OS. */
bool supports_residency_sets = false; /**< If true, residency sets (MTLResidencySet) are supported by the OS. */
/*!
* Check if argument buffers are fully supported, which requires tier 2 support and no need for argument encoders.
*/
_FORCE_INLINE_ bool argument_buffers_supported() const {
return argument_buffers_tier == MTLArgumentBuffersTier2 && needs_arg_encoders == false;
}
/*!
* Check if argument buffers can be used, which requires that they are supported and that the user has enabled their use.
*/
_FORCE_INLINE_ bool argument_buffers_enabled() const {
return use_argument_buffers && argument_buffers_supported();
}
};
struct MetalLimits {

View File

@@ -50,7 +50,9 @@
#import "metal_device_properties.h"
#include "servers/rendering/renderer_rd/effects/metal_fx.h"
#import "metal_utils.h"
#import "servers/rendering/renderer_rd/effects/metal_fx.h"
#import <Metal/Metal.h>
#import <MetalFX/MetalFX.h>
@@ -78,6 +80,28 @@ MTLGPUFamily &operator--(MTLGPUFamily &p_family) {
void MetalDeviceProperties::init_features(id<MTLDevice> p_device) {
features = {};
MTLCompileOptions *opts = [MTLCompileOptions new];
features.msl_max_version = make_msl_version((opts.languageVersion >> 0x10) & 0xff, (opts.languageVersion >> 0x00) & 0xff);
features.msl_target_version = features.msl_max_version;
if (String version = OS::get_singleton()->get_environment("GODOT_MTL_TARGET_VERSION"); !version.is_empty()) {
if (version != "max") {
Vector<String> parts = version.split(".", true, 2);
if (parts.size() == 2) {
uint32_t major = parts[0].to_int();
uint32_t minor = parts[1].to_int();
uint32_t msl_version = make_msl_version(major, minor);
if (msl_version < MSL_VERSION_23 || msl_version > MSL_VERSION_40) {
WARN_PRINT(vformat("GODOT_MTL_TARGET_VERSION: invalid MSL version '%d.%d'", major, minor));
} else {
print_line(vformat("Override: Targeting Metal version %d.%d", major, minor));
features.msl_target_version = msl_version;
}
} else {
WARN_PRINT("GODOT_MTL_TARGET_VERSION: invalid version string format. Expected major.minor or 'max'.");
}
}
}
features.highestFamily = MTLGPUFamilyApple1;
for (MTLGPUFamily family = MTLGPUFamilyApple9; family >= MTLGPUFamilyApple1; --family) {
if ([p_device supportsFamily:family]) {
@@ -123,17 +147,32 @@ void MetalDeviceProperties::init_features(id<MTLDevice> p_device) {
features.argument_buffers_tier = p_device.argumentBuffersSupport;
features.supports_image_atomic_32_bit = [p_device supportsFamily:MTLGPUFamilyApple6];
features.supports_image_atomic_64_bit = [p_device supportsFamily:MTLGPUFamilyApple9] || ([p_device supportsFamily:MTLGPUFamilyApple8] && [p_device supportsFamily:MTLGPUFamilyMac2]);
if (features.msl_target_version >= MSL_VERSION_31) {
// Native atomics are only supported on 3.1 and above.
if (@available(macOS 14.0, iOS 17.0, tvOS 17.0, visionOS 1.0, *)) {
features.supports_native_image_atomics = true;
}
}
if (OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_IMAGE_ATOMICS") == "1") {
features.supports_native_image_atomics = false;
}
if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, *)) {
features.supports_residency_sets = true;
} else {
features.supports_residency_sets = false;
}
if (@available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) {
features.needs_arg_encoders = !([p_device supportsFamily:MTLGPUFamilyMetal3] && features.argument_buffers_tier == MTLArgumentBuffersTier2);
}
if (String v = OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_ARGUMENT_BUFFERS"); v == "1") {
features.use_argument_buffers = false;
}
if (@available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) {
features.metal_fx_spatial = [MTLFXSpatialScalerDescriptor supportsDevice:p_device];
#ifdef METAL_MFXTEMPORAL_ENABLED
@@ -142,11 +181,6 @@ void MetalDeviceProperties::init_features(id<MTLDevice> p_device) {
features.metal_fx_temporal = false;
#endif
}
MTLCompileOptions *opts = [MTLCompileOptions new];
features.mslVersionEnum = opts.languageVersion; // By default, Metal uses the most recent language version.
features.mslVersionMajor = (opts.languageVersion >> 0x10) & 0xff;
features.mslVersionMinor = (opts.languageVersion >> 0x00) & 0xff;
}
void MetalDeviceProperties::init_limits(id<MTLDevice> p_device) {

View File

@@ -51,6 +51,7 @@
/**************************************************************************/
#import "metal_device_properties.h"
#import "metal_objects_shared.h"
#import "metal_utils.h"
#import "pixel_formats.h"
#import "sha256_digest.h"
@@ -66,38 +67,8 @@
#import <initializer_list>
#import <optional>
// These types can be used in Vector and other containers that use
// pointer operations not supported by ARC.
namespace MTL {
#define MTL_CLASS(name) \
class name { \
public: \
name(id<MTL##name> obj = nil) : m_obj(obj) {} \
operator id<MTL##name>() const { \
return m_obj; \
} \
id<MTL##name> m_obj; \
};
MTL_CLASS(Texture)
} //namespace MTL
enum ShaderStageUsage : uint32_t {
None = 0,
Vertex = RDD::SHADER_STAGE_VERTEX_BIT,
Fragment = RDD::SHADER_STAGE_FRAGMENT_BIT,
TesselationControl = RDD::SHADER_STAGE_TESSELATION_CONTROL_BIT,
TesselationEvaluation = RDD::SHADER_STAGE_TESSELATION_EVALUATION_BIT,
Compute = RDD::SHADER_STAGE_COMPUTE_BIT,
};
_FORCE_INLINE_ ShaderStageUsage &operator|=(ShaderStageUsage &p_a, int p_b) {
p_a = ShaderStageUsage(uint32_t(p_a) | uint32_t(p_b));
return p_a;
}
enum StageResourceUsage : uint32_t {
ResourceUnused = 0,
VertexRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_VERTEX * 2),
VertexWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_VERTEX * 2),
FragmentRead = (MTLResourceUsageRead << RDD::SHADER_STAGE_FRAGMENT * 2),
@@ -110,9 +81,61 @@ enum StageResourceUsage : uint32_t {
ComputeWrite = (MTLResourceUsageWrite << RDD::SHADER_STAGE_COMPUTE * 2),
};
typedef LocalVector<__unsafe_unretained id<MTLResource>> ResourceVector;
typedef id<MTLResource> __unsafe_unretained MTLResourceUnsafe;
template <>
struct HashMapHasherDefaultImpl<MTLResourceUnsafe> {
static _FORCE_INLINE_ uint32_t hash(const MTLResourceUnsafe p_pointer) { return hash_one_uint64((uint64_t)p_pointer); }
};
typedef LocalVector<MTLResourceUnsafe> ResourceVector;
typedef HashMap<StageResourceUsage, ResourceVector> ResourceUsageMap;
struct ResourceUsageEntry {
StageResourceUsage usage = ResourceUnused;
uint32_t unused = 0;
ResourceUsageEntry() {}
ResourceUsageEntry(StageResourceUsage p_usage) :
usage(p_usage) {}
};
template <>
struct is_zero_constructible<ResourceUsageEntry> : std::true_type {};
/*! Track the cumulative usage for a resource during a render or compute pass */
typedef HashMap<MTLResourceUnsafe, ResourceUsageEntry> ResourceToStageUsage;
/*! Track resource and ensure they are resident prior to dispatch or draw commands.
*
* The primary purpose of this data structure is to track all the resources that must be made resident prior
* to issuing the next dispatch or draw command. It aggregates all resources used from argument buffers.
*
* As an optimization, this data structure also tracks previous usage for resources, so that
* it may avoid binding them again in later commands if the resource is already resident and its usage flagged.
*/
struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) ResourceTracker {
// A constant specifying how many iterations a resource can remain in
// the _previous HashSet before it will be removed permanently.
//
// Keeping them in the _previous HashMap reduces churn if resources are regularly
// bound. 256 is arbitrary, but if an object remains unused for 256 encoders,
// it will be released.
static constexpr uint32_t RESOURCE_UNUSED_CLEANUP_COUNT = 256;
// Used as a scratch buffer to periodically clean up resources from _previous.
ResourceVector _scratch;
// Tracks all resources and their prior usage for the duration of the encoder.
ResourceToStageUsage _previous;
// Tracks resources for the current command that must be made resident
ResourceUsageMap _current;
void merge_from(const ResourceUsageMap &p_from);
void encode(id<MTLRenderCommandEncoder> __unsafe_unretained p_enc);
void encode(id<MTLComputeCommandEncoder> __unsafe_unretained p_enc);
void reset();
};
enum class MDCommandBufferStateType {
None,
Render,
@@ -130,54 +153,16 @@ class MDRenderPass;
class MDPipeline;
class MDRenderPipeline;
class MDComputePipeline;
class MDFrameBuffer;
class RenderingDeviceDriverMetal;
class MDUniformSet;
class MDShader;
struct MetalBufferDynamicInfo;
using RDM = RenderingDeviceDriverMetal;
#pragma mark - Resource Factory
struct ClearAttKey {
const static uint32_t COLOR_COUNT = MAX_COLOR_ATTACHMENT_COUNT;
const static uint32_t DEPTH_INDEX = COLOR_COUNT;
const static uint32_t STENCIL_INDEX = DEPTH_INDEX + 1;
const static uint32_t ATTACHMENT_COUNT = STENCIL_INDEX + 1;
enum Flags : uint16_t {
CLEAR_FLAGS_NONE = 0,
CLEAR_FLAGS_LAYERED = 1 << 0,
};
Flags flags = CLEAR_FLAGS_NONE;
uint16_t sample_count = 0;
uint16_t pixel_formats[ATTACHMENT_COUNT] = { 0 };
_FORCE_INLINE_ void set_color_format(uint32_t p_idx, MTLPixelFormat p_fmt) { pixel_formats[p_idx] = p_fmt; }
_FORCE_INLINE_ void set_depth_format(MTLPixelFormat p_fmt) { pixel_formats[DEPTH_INDEX] = p_fmt; }
_FORCE_INLINE_ void set_stencil_format(MTLPixelFormat p_fmt) { pixel_formats[STENCIL_INDEX] = p_fmt; }
_FORCE_INLINE_ MTLPixelFormat depth_format() const { return (MTLPixelFormat)pixel_formats[DEPTH_INDEX]; }
_FORCE_INLINE_ MTLPixelFormat stencil_format() const { return (MTLPixelFormat)pixel_formats[STENCIL_INDEX]; }
_FORCE_INLINE_ void enable_layered_rendering() { flags::set(flags, CLEAR_FLAGS_LAYERED); }
_FORCE_INLINE_ bool is_enabled(uint32_t p_idx) const { return pixel_formats[p_idx] != 0; }
_FORCE_INLINE_ bool is_depth_enabled() const { return pixel_formats[DEPTH_INDEX] != 0; }
_FORCE_INLINE_ bool is_stencil_enabled() const { return pixel_formats[STENCIL_INDEX] != 0; }
_FORCE_INLINE_ bool is_layered_rendering_enabled() const { return flags::any(flags, CLEAR_FLAGS_LAYERED); }
_FORCE_INLINE_ bool operator==(const ClearAttKey &p_rhs) const {
return memcmp(this, &p_rhs, sizeof(ClearAttKey)) == 0;
}
uint32_t hash() const {
uint32_t h = hash_murmur3_one_32(flags);
h = hash_murmur3_one_32(sample_count, h);
h = hash_murmur3_buffer(pixel_formats, ATTACHMENT_COUNT * sizeof(pixel_formats[0]), h);
return hash_fmix32(h);
}
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDResourceFactory {
private:
RenderingDeviceDriverMetal *device_driver;
@@ -309,13 +294,127 @@ public:
MDRenderPass(Vector<MDAttachment> &p_attachments, Vector<MDSubpass> &p_subpasses);
};
struct BindingCache {
struct BufferBinding {
id<MTLBuffer> __unsafe_unretained buffer = nil;
NSUInteger offset = 0;
bool operator!=(const BufferBinding &p_other) const {
return buffer != p_other.buffer || offset != p_other.offset;
}
};
LocalVector<id<MTLTexture> __unsafe_unretained> textures;
LocalVector<id<MTLSamplerState> __unsafe_unretained> samplers;
LocalVector<BufferBinding> buffers;
_FORCE_INLINE_ void clear() {
textures.clear();
samplers.clear();
buffers.clear();
}
private:
template <typename T>
_FORCE_INLINE_ void ensure_size(LocalVector<T> &p_vec, uint32_t p_required) {
if (p_vec.size() < p_required) {
p_vec.resize_initialized(p_required);
}
}
public:
_FORCE_INLINE_ bool update(NSRange p_range, id<MTLTexture> __unsafe_unretained const *p_values) {
if (p_range.length == 0) {
return false;
}
uint32_t required = (uint32_t)(p_range.location + p_range.length);
ensure_size(textures, required);
bool changed = false;
for (NSUInteger i = 0; i < p_range.length; ++i) {
uint32_t slot = (uint32_t)(p_range.location + i);
id<MTLTexture> value = p_values[i];
if (textures[slot] != value) {
textures[slot] = value;
changed = true;
}
}
return changed;
}
_FORCE_INLINE_ bool update(NSRange p_range, id<MTLSamplerState> __unsafe_unretained const *p_values) {
if (p_range.length == 0) {
return false;
}
uint32_t required = (uint32_t)(p_range.location + p_range.length);
ensure_size(samplers, required);
bool changed = false;
for (NSUInteger i = 0; i < p_range.length; ++i) {
uint32_t slot = (uint32_t)(p_range.location + i);
id<MTLSamplerState> __unsafe_unretained value = p_values[i];
if (samplers[slot] != value) {
samplers[slot] = value;
changed = true;
}
}
return changed;
}
_FORCE_INLINE_ bool update(NSRange p_range, id<MTLBuffer> __unsafe_unretained const *p_values, const NSUInteger *p_offsets) {
if (p_range.length == 0) {
return false;
}
uint32_t required = (uint32_t)(p_range.location + p_range.length);
ensure_size(buffers, required);
BufferBinding *buffers_ptr = buffers.ptr() + p_range.location;
bool changed = false;
for (NSUInteger i = 0; i < p_range.length; ++i) {
BufferBinding &binding = *buffers_ptr;
BufferBinding new_binding = {
.buffer = p_values[i],
.offset = p_offsets[i],
};
if (binding != new_binding) {
binding = new_binding;
changed = true;
}
++buffers_ptr;
}
return changed;
}
_FORCE_INLINE_ bool update(id<MTLBuffer> __unsafe_unretained p_buffer, NSUInteger p_offset, uint32_t p_index) {
uint32_t required = p_index + 1;
ensure_size(buffers, required);
BufferBinding &binding = buffers.ptr()[p_index];
BufferBinding new_binding = {
.buffer = p_buffer,
.offset = p_offset,
};
if (binding != new_binding) {
binding = new_binding;
return true;
}
return false;
}
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer {
friend class MDUniformSet;
private:
#pragma mark - Common State
// From RenderingDevice
static constexpr uint32_t MAX_PUSH_CONSTANT_SIZE = 128;
uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE];
uint32_t push_constant_data_len = 0;
uint32_t push_constant_binding = UINT32_MAX;
BindingCache binding_cache;
void reset();
RenderingDeviceDriverMetal *device_driver = nullptr;
id<MTLCommandQueue> queue = nil;
id<MTLCommandBuffer> commandBuffer = nil;
@@ -331,6 +430,16 @@ private:
void _end_compute_dispatch();
void _end_blit();
id<MTLBlitCommandEncoder> _ensure_blit_encoder();
enum class CopySource {
Buffer,
Texture,
};
void _copy_texture_buffer(CopySource p_source,
RDD::TextureID p_texture,
RDD::BufferID p_buffer,
VectorView<RDD::BufferTextureCopyRegion> p_regions);
#pragma mark - Render
@@ -368,7 +477,7 @@ public:
uint32_t index_offset = 0;
LocalVector<id<MTLBuffer> __unsafe_unretained> vertex_buffers;
LocalVector<NSUInteger> vertex_offsets;
ResourceUsageMap resource_usage;
ResourceTracker resource_tracker;
// clang-format off
enum DirtyFlag: uint16_t {
DIRTY_NONE = 0,
@@ -390,9 +499,6 @@ public:
uint32_t dynamic_offsets = 0;
// Bit mask of the uniform sets that are dirty, to prevent redundant binding.
uint64_t uniform_set_mask = 0;
uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE];
uint32_t push_constant_data_len = 0;
uint32_t push_constant_bindings[2] = { ~0U, ~0U };
_FORCE_INLINE_ void reset();
void end_encoding();
@@ -447,13 +553,6 @@ public:
dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS);
}
_FORCE_INLINE_ void mark_push_constants_dirty() {
if (push_constant_data_len == 0) {
return;
}
dirty.set_flag(DirtyFlag::DIRTY_PUSH);
}
_FORCE_INLINE_ void mark_blend_dirty() {
if (!blend_constants.has_value()) {
return;
@@ -495,7 +594,7 @@ public:
struct ComputeState {
MDComputePipeline *pipeline = nullptr;
id<MTLComputeCommandEncoder> encoder = nil;
ResourceUsageMap resource_usage;
ResourceTracker resource_tracker;
// clang-format off
enum DirtyFlag: uint16_t {
DIRTY_NONE = 0,
@@ -511,9 +610,6 @@ public:
uint32_t dynamic_offsets = 0;
// Bit mask of the uniform sets that are dirty, to prevent redundant binding.
uint64_t uniform_set_mask = 0;
uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE];
uint32_t push_constant_data_len = 0;
uint32_t push_constant_bindings[1] = { ~0U };
_FORCE_INLINE_ void reset();
void end_encoding();
@@ -529,14 +625,6 @@ public:
}
dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS);
}
_FORCE_INLINE_ void mark_push_constants_dirty() {
if (push_constant_data_len == 0) {
return;
}
dirty.set_flag(DirtyFlag::DIRTY_PUSH);
}
} compute;
// State specific to a blit pass.
@@ -555,9 +643,6 @@ public:
void commit();
void end();
id<MTLBlitCommandEncoder> blit_command_encoder();
void encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDescriptor *p_desc, NSString *p_label);
void bind_pipeline(RDD::PipelineID p_pipeline);
void encode_push_constant_data(RDD::ShaderID p_shader, VectorView<uint32_t> p_data);
@@ -600,6 +685,25 @@ public:
void compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups);
void compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset);
#pragma mark - Transfer
private:
void encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDescriptor *p_desc, NSString *p_label);
public:
void resolve_texture(RDD::TextureID p_src_texture, RDD::TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, RDD::TextureID p_dst_texture, RDD::TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap);
void clear_color_texture(RDD::TextureID p_texture, RDD::TextureLayout p_texture_layout, const Color &p_color, const RDD::TextureSubresourceRange &p_subresources);
void clear_buffer(RDD::BufferID p_buffer, uint64_t p_offset, uint64_t p_size);
void copy_buffer(RDD::BufferID p_src_buffer, RDD::BufferID p_dst_buffer, VectorView<RDD::BufferCopyRegion> p_regions);
void copy_texture(RDD::TextureID p_src_texture, RDD::TextureID p_dst_texture, VectorView<RDD::TextureCopyRegion> p_regions);
void copy_buffer_to_texture(RDD::BufferID p_src_buffer, RDD::TextureID p_dst_texture, VectorView<RDD::BufferTextureCopyRegion> p_regions);
void copy_texture_to_buffer(RDD::TextureID p_src_texture, RDD::BufferID p_dst_buffer, VectorView<RDD::BufferTextureCopyRegion> p_regions);
#pragma mark - Debugging
void begin_label(const char *p_label_name, const Color &p_color);
void end_label();
MDCommandBuffer(id<MTLCommandQueue> p_queue, RenderingDeviceDriverMetal *p_device_driver) :
device_driver(p_device_driver), queue(p_queue) {
type = MDCommandBufferStateType::None;
@@ -615,44 +719,44 @@ public:
#define MTLBindingAccessWriteOnly MTLArgumentAccessWriteOnly
#endif
struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) BindingInfo {
struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) UniformInfo {
uint32_t binding;
BitField<RDD::ShaderStage> active_stages;
MTLDataType dataType = MTLDataTypeNone;
uint32_t index = 0;
MTLBindingAccess access = MTLBindingAccessReadOnly;
MTLResourceUsage usage = 0;
MTLTextureType textureType = MTLTextureType2D;
int imageFormat = 0;
uint32_t imageFormat = 0;
uint32_t arrayLength = 0;
bool isMultisampled = false;
bool isMultisampled = 0;
inline MTLArgumentDescriptor *new_argument_descriptor() const {
MTLArgumentDescriptor *desc = MTLArgumentDescriptor.argumentDescriptor;
desc.dataType = dataType;
desc.index = index;
desc.access = access;
desc.textureType = textureType;
desc.arrayLength = arrayLength;
return desc;
}
struct Indexes {
uint32_t buffer = UINT32_MAX;
uint32_t texture = UINT32_MAX;
uint32_t sampler = UINT32_MAX;
};
Indexes slot;
Indexes arg_buffer;
enum class IndexType {
SLOT,
ARG,
};
using RDC = RenderingDeviceCommons;
typedef API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) HashMap<RDC::ShaderStage, BindingInfo> BindingInfoMap;
struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) UniformInfo {
uint32_t binding;
ShaderStageUsage active_stages = None;
BindingInfoMap bindings;
BindingInfoMap bindings_secondary;
_FORCE_INLINE_ Indexes &get_indexes(IndexType p_type) {
switch (p_type) {
case IndexType::SLOT:
return slot;
case IndexType::ARG:
return arg_buffer;
}
}
};
struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) UniformSet {
LocalVector<UniformInfo> uniforms;
LocalVector<uint32_t> dynamic_uniforms;
uint32_t buffer_size = 0;
HashMap<RDC::ShaderStage, uint32_t> offsets;
HashMap<RDC::ShaderStage, id<MTLArgumentEncoder>> encoders;
};
struct ShaderCacheEntry;
@@ -691,13 +795,6 @@ enum class ShaderLoadStrategy {
data:(dispatch_data_t)data;
@end
template <>
struct HashMapComparatorDefault<SHA256Digest> {
static bool compare(const SHA256Digest &p_lhs, const SHA256Digest &p_rhs) {
return memcmp(p_lhs.data, p_rhs.data, CC_SHA256_DIGEST_LENGTH) == 0;
}
};
/// A cache entry for a Metal shader library.
struct ShaderCacheEntry {
RenderingDeviceDriverMetal &owner;
@@ -718,16 +815,6 @@ struct ShaderCacheEntry {
~ShaderCacheEntry() = default;
};
/// Godot limits the number of dynamic buffers to 8.
///
/// This is a minimum guarantee for Vulkan.
constexpr uint32_t MAX_DYNAMIC_BUFFERS = 8;
/// Maximum number of queued frames.
///
/// See setting: rendering/rendering_device/vsync/frame_queue_size
constexpr uint32_t MAX_FRAME_COUNT = 4;
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) DynamicOffsetLayout {
struct Data {
uint8_t offset : 4;
@@ -760,19 +847,15 @@ public:
}
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) DynamicOffsets {
uint32_t data;
public:
_FORCE_INLINE_ uint32_t get_frame_index(const DynamicOffsetLayout &p_layout) const {
return data;
}
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDShader {
public:
CharString name;
Vector<UniformSet> sets;
struct {
BitField<RDD::ShaderStage> stages = {};
uint32_t binding = UINT32_MAX;
uint32_t size = 0;
} push_constants;
DynamicOffsetLayout dynamic_offset_layout;
bool uses_argument_buffers = true;
@@ -783,10 +866,6 @@ public:
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDComputeShader final : public MDShader {
public:
struct {
int32_t binding = -1;
uint32_t size = 0;
} push_constants;
MTLSize local = {};
MDLibrary *kernel;
@@ -796,16 +875,6 @@ public:
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDRenderShader final : public MDShader {
public:
struct {
struct {
int32_t binding = -1;
uint32_t size = 0;
} vert;
struct {
int32_t binding = -1;
uint32_t size = 0;
} frag;
} push_constants;
bool needs_view_mask_buffer = false;
MDLibrary *vert;
@@ -838,52 +907,69 @@ struct HashMapComparatorDefault<RDD::ShaderID> {
}
};
struct BoundUniformSet {
id<MTLBuffer> buffer;
ResourceUsageMap usage_to_resources;
/// Size of the per-frame buffer, which is 0 when there are no dynamic uniforms.
uint32_t frame_size = 0;
/// Perform a 2-way merge each key of `ResourceVector` resources from this set into the
/// destination set.
///
/// Assumes the vectors of resources are sorted.
void merge_into(ResourceUsageMap &p_dst) const;
/// Returns true if this bound uniform set contains dynamic uniforms.
_FORCE_INLINE_ bool is_dynamic() const { return frame_size > 0; }
/// Calculate the offset in the Metal buffer for the current frame.
_FORCE_INLINE_ uint32_t frame_offset(uint32_t p_frame_index) const { return p_frame_index * frame_size; }
/// Calculate the offset in the buffer for the given frame index and base offset.
_FORCE_INLINE_ uint32_t make_offset(uint32_t p_frame_index, uint32_t p_base_offset) const {
return frame_offset(p_frame_index) + p_base_offset;
template <>
struct HashMapComparatorDefault<RDD::BufferID> {
static bool compare(const RDD::BufferID &p_lhs, const RDD::BufferID &p_rhs) {
return p_lhs.id == p_rhs.id;
}
};
BoundUniformSet() = default;
BoundUniformSet(id<MTLBuffer> p_buffer, ResourceUsageMap &&p_usage_to_resources, uint32_t p_frame_size) :
buffer(p_buffer), usage_to_resources(std::move(p_usage_to_resources)), frame_size(p_frame_size) {}
template <>
struct HashMapComparatorDefault<RDD::TextureID> {
static bool compare(const RDD::TextureID &p_lhs, const RDD::TextureID &p_rhs) {
return p_lhs.id == p_rhs.id;
}
};
template <>
struct HashMapHasherDefaultImpl<RDD::BufferID> {
static _FORCE_INLINE_ uint32_t hash(const RDD::BufferID &p_value) {
return HashMapHasherDefaultImpl<uint64_t>::hash(p_value.id);
}
};
template <>
struct HashMapHasherDefaultImpl<RDD::TextureID> {
static _FORCE_INLINE_ uint32_t hash(const RDD::TextureID &p_value) {
return HashMapHasherDefaultImpl<uint64_t>::hash(p_value.id);
}
};
// A type used to encode resources directly to a MTLCommandEncoder
struct DirectEncoder {
id<MTLCommandEncoder> __unsafe_unretained encoder;
BindingCache &cache;
enum Mode {
RENDER,
COMPUTE
};
Mode mode;
void set(id<MTLBuffer> __unsafe_unretained *p_buffers, const NSUInteger *p_offsets, NSRange p_range);
void set(id<MTLBuffer> __unsafe_unretained p_buffer, const NSUInteger p_offset, uint32_t p_index);
void set(id<MTLTexture> __unsafe_unretained *p_textures, NSRange p_range);
void set(id<MTLSamplerState> __unsafe_unretained *p_samplers, NSRange p_range);
DirectEncoder(id<MTLCommandEncoder> __unsafe_unretained p_encoder, BindingCache &p_cache) :
encoder(p_encoder), cache(p_cache) {
if ([p_encoder conformsToProtocol:@protocol(MTLRenderCommandEncoder)]) {
mode = RENDER;
} else {
mode = COMPUTE;
}
}
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDUniformSet {
private:
void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count);
void bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets);
void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count);
void bind_uniforms_direct(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets);
void update_dynamic_uniforms(MDShader *p_shader, ResourceUsageMap &p_resource_usage, uint32_t p_set_index, BoundUniformSet &p_bound_set, uint32_t p_dynamic_offsets, uint32_t p_frame_idx);
public:
uint32_t index = 0;
id<MTLBuffer> arg_buffer = nil;
ResourceUsageMap usage_to_resources;
LocalVector<RDD::BoundUniform> uniforms;
HashMap<MDShader *, BoundUniformSet> bound_uniforms;
void bind_uniforms(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count);
void bind_uniforms(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count);
BoundUniformSet &bound_uniform_set(MDShader *p_shader, id<MTLDevice> p_device, ResourceUsageMap &p_resource_usage, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count);
void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count);
void bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::ComputeState &p_state, uint32_t p_set_index, uint32_t p_dynamic_offsets, uint32_t p_frame_idx, uint32_t p_frame_count);
void bind_uniforms_direct(MDShader *p_shader, DirectEncoder p_enc, uint32_t p_set_index, uint32_t p_dynamic_offsets);
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDPipeline {
@@ -986,72 +1072,13 @@ public:
~MDComputePipeline() final = default;
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDFrameBuffer {
Vector<MTL::Texture> textures;
public:
Size2i size;
MDFrameBuffer(Vector<MTL::Texture> p_textures, Size2i p_size) :
textures(p_textures), size(p_size) {}
MDFrameBuffer() {}
/// Returns the texture at the given index.
_ALWAYS_INLINE_ MTL::Texture get_texture(uint32_t p_idx) const {
return textures[p_idx];
}
/// Returns true if the texture at the given index is not nil.
_ALWAYS_INLINE_ bool has_texture(uint32_t p_idx) const {
return textures[p_idx] != nil;
}
/// Set the texture at the given index.
_ALWAYS_INLINE_ void set_texture(uint32_t p_idx, MTL::Texture p_texture) {
textures.write[p_idx] = p_texture;
}
/// Unset or nil the texture at the given index.
_ALWAYS_INLINE_ void unset_texture(uint32_t p_idx) {
textures.write[p_idx] = nil;
}
/// Resizes buffers to the specified size.
_ALWAYS_INLINE_ void set_texture_count(uint32_t p_size) {
textures.resize(p_size);
}
virtual ~MDFrameBuffer() = default;
};
// These functions are used to convert between Objective-C objects and
// the RIDs used by Godot, respecting automatic reference counting.
namespace rid {
// Converts an Objective-C object to a pointer, and incrementing the
// reference count.
_FORCE_INLINE_ void *owned(id p_id) {
return (__bridge_retained void *)p_id;
}
#define MAKE_ID(FROM, TO) \
_FORCE_INLINE_ TO make(FROM p_obj) { \
return TO(owned(p_obj)); \
}
MAKE_ID(id<MTLTexture>, RDD::TextureID)
MAKE_ID(id<MTLBuffer>, RDD::BufferID)
MAKE_ID(id<MTLSamplerState>, RDD::SamplerID)
MAKE_ID(MTLVertexDescriptor *, RDD::VertexFormatID)
MAKE_ID(id<MTLCommandQueue>, RDD::CommandPoolID)
// Converts a pointer to an Objective-C object without changing the reference count.
_FORCE_INLINE_ auto get(RDD::ID p_id) {
return (p_id.id) ? (__bridge ::id)(void *)p_id.id : nil;
}
// Converts a pointer to an Objective-C object, and decrements the reference count.
_FORCE_INLINE_ auto release(RDD::ID p_id) {
return (__bridge_transfer ::id)(void *)p_id.id;
}
#undef MAKE_ID
} //namespace rid

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,196 @@
/**************************************************************************/
/* metal_objects_shared.h */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#pragma once
#import "metal_device_properties.h"
#import "metal_utils.h"
using RDC = RenderingDeviceCommons;
// These types can be used in Vector and other containers that use
// pointer operations not supported by ARC.
namespace MTL {
#define MTL_CLASS(name) \
class name { \
public: \
name(id<MTL##name> obj = nil) : m_obj(obj) {} \
operator id<MTL##name>() const { \
return m_obj; \
} \
id<MTL##name> m_obj; \
};
MTL_CLASS(Texture)
} //namespace MTL
enum ShaderStageUsage : uint32_t {
None = 0,
Vertex = RDD::SHADER_STAGE_VERTEX_BIT,
Fragment = RDD::SHADER_STAGE_FRAGMENT_BIT,
TesselationControl = RDD::SHADER_STAGE_TESSELATION_CONTROL_BIT,
TesselationEvaluation = RDD::SHADER_STAGE_TESSELATION_EVALUATION_BIT,
Compute = RDD::SHADER_STAGE_COMPUTE_BIT,
};
_FORCE_INLINE_ ShaderStageUsage &operator|=(ShaderStageUsage &p_a, int p_b) {
p_a = ShaderStageUsage(uint32_t(p_a) | uint32_t(p_b));
return p_a;
}
struct ClearAttKey {
const static uint32_t COLOR_COUNT = MAX_COLOR_ATTACHMENT_COUNT;
const static uint32_t DEPTH_INDEX = COLOR_COUNT;
const static uint32_t STENCIL_INDEX = DEPTH_INDEX + 1;
const static uint32_t ATTACHMENT_COUNT = STENCIL_INDEX + 1;
enum Flags : uint16_t {
CLEAR_FLAGS_NONE = 0,
CLEAR_FLAGS_LAYERED = 1 << 0,
};
Flags flags = CLEAR_FLAGS_NONE;
uint16_t sample_count = 0;
uint16_t pixel_formats[ATTACHMENT_COUNT] = { 0 };
_FORCE_INLINE_ void set_color_format(uint32_t p_idx, MTLPixelFormat p_fmt) { pixel_formats[p_idx] = p_fmt; }
_FORCE_INLINE_ void set_depth_format(MTLPixelFormat p_fmt) { pixel_formats[DEPTH_INDEX] = p_fmt; }
_FORCE_INLINE_ void set_stencil_format(MTLPixelFormat p_fmt) { pixel_formats[STENCIL_INDEX] = p_fmt; }
_FORCE_INLINE_ MTLPixelFormat depth_format() const { return (MTLPixelFormat)pixel_formats[DEPTH_INDEX]; }
_FORCE_INLINE_ MTLPixelFormat stencil_format() const { return (MTLPixelFormat)pixel_formats[STENCIL_INDEX]; }
_FORCE_INLINE_ void enable_layered_rendering() { flags::set(flags, CLEAR_FLAGS_LAYERED); }
_FORCE_INLINE_ bool is_enabled(uint32_t p_idx) const { return pixel_formats[p_idx] != 0; }
_FORCE_INLINE_ bool is_depth_enabled() const { return pixel_formats[DEPTH_INDEX] != 0; }
_FORCE_INLINE_ bool is_stencil_enabled() const { return pixel_formats[STENCIL_INDEX] != 0; }
_FORCE_INLINE_ bool is_layered_rendering_enabled() const { return flags::any(flags, CLEAR_FLAGS_LAYERED); }
_FORCE_INLINE_ bool operator==(const ClearAttKey &p_rhs) const {
return memcmp(this, &p_rhs, sizeof(ClearAttKey)) == 0;
}
uint32_t hash() const {
uint32_t h = hash_murmur3_one_32(flags);
h = hash_murmur3_one_32(sample_count, h);
h = hash_murmur3_buffer(pixel_formats, ATTACHMENT_COUNT * sizeof(pixel_formats[0]), h);
return hash_fmix32(h);
}
};
/**
* Returns an index that can be used to map a shader stage to an index in a fixed-size array that is used for
* a single pipeline type.
*/
_FORCE_INLINE_ static uint32_t to_index(RDD::ShaderStage p_s) {
switch (p_s) {
case RenderingDeviceCommons::SHADER_STAGE_VERTEX:
case RenderingDeviceCommons::SHADER_STAGE_TESSELATION_CONTROL:
case RenderingDeviceCommons::SHADER_STAGE_TESSELATION_EVALUATION:
case RenderingDeviceCommons::SHADER_STAGE_COMPUTE:
default:
return 0;
case RenderingDeviceCommons::SHADER_STAGE_FRAGMENT:
return 1;
}
}
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDFrameBuffer {
Vector<MTL::Texture> textures;
public:
Size2i size;
MDFrameBuffer(Vector<MTL::Texture> p_textures, Size2i p_size) :
textures(p_textures), size(p_size) {}
MDFrameBuffer() {}
/// Returns the texture at the given index.
_ALWAYS_INLINE_ MTL::Texture get_texture(uint32_t p_idx) const {
return textures[p_idx];
}
/// Returns true if the texture at the given index is not nil.
_ALWAYS_INLINE_ bool has_texture(uint32_t p_idx) const {
return textures[p_idx] != nil;
}
/// Set the texture at the given index.
_ALWAYS_INLINE_ void set_texture(uint32_t p_idx, MTL::Texture p_texture) {
textures.write[p_idx] = p_texture;
}
/// Unset or nil the texture at the given index.
_ALWAYS_INLINE_ void unset_texture(uint32_t p_idx) {
textures.write[p_idx] = nil;
}
/// Resizes buffers to the specified size.
_ALWAYS_INLINE_ void set_texture_count(uint32_t p_size) {
textures.resize(p_size);
}
virtual ~MDFrameBuffer() = default;
};
// These functions are used to convert between Objective-C objects and
// the RIDs used by Godot, respecting automatic reference counting.
namespace rid {
// Converts an Objective-C object to a pointer, and incrementing the
// reference count.
_FORCE_INLINE_ void *owned(id p_id) {
return (__bridge_retained void *)p_id;
}
#define MAKE_ID(FROM, TO) \
_FORCE_INLINE_ TO make(FROM p_obj) { \
return TO(owned(p_obj)); \
}
// These are shared for Metal and Metal 4 drivers
MAKE_ID(id<MTLTexture>, RDD::TextureID)
MAKE_ID(id<MTLBuffer>, RDD::BufferID)
MAKE_ID(id<MTLSamplerState>, RDD::SamplerID)
MAKE_ID(MTLVertexDescriptor *, RDD::VertexFormatID)
#undef MAKE_ID
// Converts a pointer to an Objective-C object without changing the reference count.
_FORCE_INLINE_ auto get(RDD::ID p_id) {
return (p_id.id) ? (__bridge ::id)(void *)p_id.id : nil;
}
// Converts a pointer to an Objective-C object, and decrements the reference count.
_FORCE_INLINE_ auto release(RDD::ID p_id) {
return (__bridge_transfer ::id)(void *)p_id.id;
}
} // namespace rid

View File

@@ -34,6 +34,14 @@
#import <functional>
/// Godot limits the number of dynamic buffers to 8.
///
/// This is a minimum guarantee for Vulkan.
constexpr uint32_t MAX_DYNAMIC_BUFFERS = 8;
// From rendering/rendering_device/vsync/frame_queue_size
static constexpr uint32_t MAX_FRAME_COUNT = 3;
#pragma mark - Boolean flags
namespace flags {
@@ -103,11 +111,37 @@ extern os_log_t LOG_DRIVER;
// Used for dynamic tracing.
extern os_log_t LOG_INTERVALS;
_FORCE_INLINE_ static uint32_t make_msl_version(uint32_t p_major, uint32_t p_minor = 0, uint32_t p_patch = 0) {
_FORCE_INLINE_ static constexpr uint32_t make_msl_version(uint32_t p_major, uint32_t p_minor = 0, uint32_t p_patch = 0) {
return (p_major * 10000) + (p_minor * 100) + p_patch;
}
_FORCE_INLINE_ static void parse_msl_version(uint32_t p_version, uint32_t &r_major, uint32_t &r_minor) {
_FORCE_INLINE_ static constexpr void parse_msl_version(uint32_t p_version, uint32_t &r_major, uint32_t &r_minor) {
r_major = p_version / 10000;
r_minor = (p_version % 10000) / 100;
}
constexpr uint32_t MSL_VERSION_23 = make_msl_version(2, 3);
constexpr uint32_t MSL_VERSION_24 = make_msl_version(2, 4);
constexpr uint32_t MSL_VERSION_30 = make_msl_version(3, 0);
constexpr uint32_t MSL_VERSION_31 = make_msl_version(3, 1);
constexpr uint32_t MSL_VERSION_32 = make_msl_version(3, 2);
constexpr uint32_t MSL_VERSION_40 = make_msl_version(4, 0);
/* MSL Language version table
*
* | Version | macOS | iOS |
* |---------|---------|---------|
* | 1.0 | | 9.0 |
* | 1.1 | 10.11 | 9.0 |
* | 1.2 | 10.12 | 10.0 |
* | 2.0 | 10.13 | 11.0 |
* | 2.1 | 10.14 | 12.0 |
* | 2.2 | 10.15 | 13.0 |
* | 2.3 | 11.0 | 14.0 |
* | 2.4 | 12.0 | 15.0 |
* | 3.0 | 13.0 | 16.0 |
* | 3.1 | 14.0 | 17.0 |
* | 3.2 | 15.0 | 18.0 |
* | 4.0 | 26.0 | 26.0 |
* |---------|---------|---------|
*/

View File

@@ -117,6 +117,8 @@ bool PixelFormats::isPVRTCFormat(MTLPixelFormat p_format) {
#if defined(VISIONOS_ENABLED)
return false;
#else
// Deprecated in SDK 26.0
GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wdeprecated-declarations")
switch (p_format) {
case MTLPixelFormatPVRTC_RGBA_2BPP:
case MTLPixelFormatPVRTC_RGBA_2BPP_sRGB:
@@ -130,6 +132,7 @@ bool PixelFormats::isPVRTCFormat(MTLPixelFormat p_format) {
default:
return false;
}
GODOT_CLANG_WARNING_POP
#endif
}
@@ -673,11 +676,13 @@ void PixelFormats::initMTLPixelFormatCapabilities() {
addMTLPixelFormatDesc(RGBA32Float, Color128, All);
#if !defined(VISIONOS_ENABLED)
GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wdeprecated-declarations")
// Compressed pixel formats
addMTLPixelFormatDesc(PVRTC_RGBA_2BPP, PVRTC_RGBA_2BPP, RF);
addMTLPixelFormatDescSRGB(PVRTC_RGBA_2BPP_sRGB, PVRTC_RGBA_2BPP, RF, PVRTC_RGBA_2BPP);
addMTLPixelFormatDesc(PVRTC_RGBA_4BPP, PVRTC_RGBA_4BPP, RF);
addMTLPixelFormatDescSRGB(PVRTC_RGBA_4BPP_sRGB, PVRTC_RGBA_4BPP, RF, PVRTC_RGBA_4BPP);
GODOT_CLANG_WARNING_POP
#endif
addMTLPixelFormatDesc(ETC2_RGB8, ETC2_RGB8, RF);

View File

@@ -53,17 +53,24 @@ class MDCommandBuffer;
#endif
class PixelFormats;
class MDResourceCache;
#ifdef __OBJC__
#define METAL_DEVICE id<MTLDevice>
#define METAL_DRAWABLE id<MTLDrawable>
#define METAL_LAYER CAMetalLayer *__unsafe_unretained
#define METAL_RESIDENCY_SET id<MTLResidencySet>
#else
#define METAL_DEVICE void *
#define METAL_DRAWABLE void *
#define METAL_LAYER void *
#define METAL_RESIDENCY_SET void *
#endif
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingContextDriverMetal : public RenderingContextDriver {
bool capture_available = false;
protected:
#ifdef __OBJC__
id<MTLDevice> metal_device = nullptr;
#else
void *metal_device = nullptr;
#endif
METAL_DEVICE metal_device = nullptr;
Device device; // There is only one device on Apple Silicon.
public:
@@ -88,20 +95,12 @@ public:
// Platform-specific data for the Windows embedded in this driver.
struct WindowPlatformData {
#ifdef __OBJC__
CAMetalLayer *__unsafe_unretained layer;
#else
void *layer;
#endif
METAL_LAYER layer;
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) Surface {
protected:
#ifdef __OBJC__
id<MTLDevice> device;
#else
void *device;
#endif
METAL_DEVICE device;
public:
uint32_t width = 0;
@@ -110,15 +109,8 @@ public:
bool needs_resize = false;
double present_minimum_duration = 0.0;
Surface(
#ifdef __OBJC__
id<MTLDevice> p_device
#else
void *p_device
#endif
) :
device(p_device) {
}
Surface(METAL_DEVICE p_device) :
device(p_device) {}
virtual ~Surface() = default;
MTLPixelFormat get_pixel_format() const { return MTLPixelFormatBGRA8Unorm; }
@@ -128,12 +120,7 @@ public:
void set_max_fps(int p_max_fps) { present_minimum_duration = p_max_fps ? 1.0 / p_max_fps : 0.0; }
};
#ifdef __OBJC__
id<MTLDevice>
#else
void *
#endif
get_metal_device() const {
METAL_DEVICE get_metal_device() const {
return metal_device;
}

View File

@@ -32,6 +32,22 @@
#import "rendering_device_driver_metal.h"
#include "core/templates/sort_array.h"
#import <os/log.h>
#import <os/signpost.h>
#pragma mark - Logging
os_log_t LOG_DRIVER;
// Used for dynamic tracing.
os_log_t LOG_INTERVALS;
__attribute__((constructor)) static void InitializeLogging(void) {
LOG_DRIVER = os_log_create("org.godotengine.godot.metal", OS_LOG_CATEGORY_POINTS_OF_INTEREST);
LOG_INTERVALS = os_log_create("org.godotengine.godot.metal", "events");
}
@protocol MTLDeviceEx <MTLDevice>
#if TARGET_OS_OSX && __MAC_OS_X_VERSION_MAX_ALLOWED < 130300
- (void)setShouldMaximizeConcurrentCompilation:(BOOL)v;
@@ -184,9 +200,115 @@ public:
}
};
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) SurfaceOffscreen : public RenderingContextDriverMetal::Surface {
int frame_buffer_size = 3;
MDFrameBuffer *frame_buffers;
LocalVector<id<MTLTexture>> textures;
LocalVector<id<MTLDrawable>> drawables;
int32_t rear = -1;
std::atomic_int count;
uint64_t target_time = 0;
CAMetalLayer *layer;
public:
SurfaceOffscreen(CAMetalLayer *p_layer, id<MTLDevice> p_device) :
Surface(p_device), layer(p_layer) {
layer.allowsNextDrawableTimeout = YES;
layer.framebufferOnly = YES;
layer.opaque = OS::get_singleton()->is_layered_allowed() ? NO : YES;
layer.pixelFormat = get_pixel_format();
layer.device = p_device;
#if TARGET_OS_OSX
layer.displaySyncEnabled = NO;
#endif
target_time = OS::get_singleton()->get_ticks_usec();
textures.resize(frame_buffer_size);
drawables.resize(frame_buffer_size);
frame_buffers = memnew_arr(MDFrameBuffer, frame_buffer_size);
for (int i = 0; i < frame_buffer_size; i++) {
frame_buffers[i].set_texture_count(1);
}
}
~SurfaceOffscreen() override {
memdelete_arr(frame_buffers);
}
Error resize(uint32_t p_desired_framebuffer_count) override final {
if (width == 0 || height == 0) {
// Very likely the window is minimized, don't create a swap chain.
return ERR_SKIP;
}
CGSize drawableSize = CGSizeMake(width, height);
CGSize current = layer.drawableSize;
if (!CGSizeEqualToSize(current, drawableSize)) {
layer.drawableSize = drawableSize;
}
return OK;
}
RDD::FramebufferID acquire_next_frame_buffer() override final {
if (count.load(std::memory_order_relaxed) == 3) {
// Wait for a frame to be presented.
return RDD::FramebufferID();
}
rear = (rear + 1) % 3;
count.fetch_add(1, std::memory_order_relaxed);
MDFrameBuffer &frame_buffer = frame_buffers[rear];
if (textures[rear] == nil || textures[rear].width != width || textures[rear].height != height) {
MTLTextureDescriptor *texture_descriptor = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:get_pixel_format() width:width height:height mipmapped:NO];
texture_descriptor.usage = MTLTextureUsageRenderTarget;
texture_descriptor.hazardTrackingMode = MTLHazardTrackingModeUntracked;
texture_descriptor.storageMode = MTLStorageModePrivate;
textures[rear] = [device newTextureWithDescriptor:texture_descriptor];
}
frame_buffer.size = Size2i(width, height);
uint64_t now = OS::get_singleton()->get_ticks_usec();
if (now >= target_time) {
target_time = now + 1'000'000; // 1 second into the future.
id<CAMetalDrawable> drawable = layer.nextDrawable;
ERR_FAIL_NULL_V_MSG(drawable, RDD::FramebufferID(), "no drawable available");
drawables[rear] = drawable;
frame_buffer.set_texture(0, drawable.texture);
} else {
frame_buffer.set_texture(0, textures[rear]);
}
return RDD::FramebufferID(&frame_buffers[rear]);
}
void present(MDCommandBuffer *p_cmd_buffer) override final {
MDFrameBuffer *frame_buffer = &frame_buffers[rear];
if (drawables[rear] != nil) {
[p_cmd_buffer->get_command_buffer() presentDrawable:drawables[rear]];
drawables[rear] = nil;
}
[p_cmd_buffer->get_command_buffer() addScheduledHandler:^(id<MTLCommandBuffer> p_command_buffer) {
frame_buffer->unset_texture(0);
count.fetch_add(-1, std::memory_order_relaxed);
}];
}
};
RenderingContextDriver::SurfaceID RenderingContextDriverMetal::surface_create(const void *p_platform_data) {
const WindowPlatformData *wpd = (const WindowPlatformData *)(p_platform_data);
Surface *surface = memnew(SurfaceLayer(wpd->layer, metal_device));
Surface *surface;
if (String v = OS::get_singleton()->get_environment("GODOT_MTL_OFF_SCREEN"); v == U"1") {
surface = memnew(SurfaceOffscreen(wpd->layer, metal_device));
} else {
surface = memnew(SurfaceLayer(wpd->layer, metal_device));
}
return SurfaceID(surface);
}

View File

@@ -30,14 +30,16 @@
#pragma once
#import "metal_device_profile.h"
#import "metal_objects.h"
#import "rendering_shader_container_metal.h"
#include "servers/rendering/rendering_device_driver.h"
#import <Metal/Metal.h>
#import <variant>
class RenderingShaderContainerFormatMetal;
#ifdef DEBUG_ENABLED
#ifndef _DEBUG
#define _DEBUG
@@ -59,11 +61,11 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMet
RenderingContextDriver::Device context_device;
id<MTLDevice> device = nil;
uint32_t frame_count = 1;
uint32_t _frame_count = 1;
/// frame_index is a cyclic counter derived from the current frame number modulo frame_count,
/// cycling through values from 0 to frame_count - 1
uint32_t frame_index = 0;
uint32_t frames_drawn = 0;
uint32_t _frame_index = 0;
uint32_t _frames_drawn = 0;
MetalDeviceProperties *device_properties = nullptr;
MetalDeviceProfile device_profile;
@@ -179,9 +181,64 @@ public:
private:
struct Fence {
virtual void signal(id<MTLCommandBuffer> p_cmd_buffer) = 0;
virtual Error wait(uint32_t p_timeout_ms) = 0;
virtual ~Fence() = default;
};
struct FenceEvent : public Fence {
id<MTLSharedEvent> event;
uint64_t value;
FenceEvent(id<MTLSharedEvent> p_event) :
event(p_event),
value(0) {}
virtual void signal(id<MTLCommandBuffer> p_cb) override {
if (p_cb) {
value++;
[p_cb encodeSignalEvent:event value:value];
}
}
virtual Error wait(uint32_t p_timeout_ms) override {
GODOT_CLANG_WARNING_PUSH
GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability")
BOOL signaled = [event waitUntilSignaledValue:value timeoutMS:p_timeout_ms];
GODOT_CLANG_WARNING_POP
if (!signaled) {
#ifdef DEBUG_ENABLED
ERR_PRINT("timeout waiting for fence");
#endif
return ERR_TIMEOUT;
}
return OK;
}
};
struct FenceSemaphore : public Fence {
dispatch_semaphore_t semaphore;
Fence() :
FenceSemaphore() :
semaphore(dispatch_semaphore_create(0)) {}
virtual void signal(id<MTLCommandBuffer> p_cb) override {
if (p_cb) {
[p_cb addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
dispatch_semaphore_signal(semaphore);
}];
} else {
dispatch_semaphore_signal(semaphore);
}
}
virtual Error wait(uint32_t p_timeout_ms) override {
dispatch_time_t timeout = dispatch_time(DISPATCH_TIME_NOW, static_cast<int64_t>(p_timeout_ms) * 1000000);
long result = dispatch_semaphore_wait(semaphore, timeout);
if (result != 0) {
return ERR_TIMEOUT;
}
return OK;
}
};
public:
@@ -283,17 +340,6 @@ public:
#pragma mark Transfer
private:
enum class CopySource {
Buffer,
Texture,
};
void _copy_texture_buffer(CommandBufferID p_cmd_buffer,
CopySource p_source,
TextureID p_texture,
BufferID p_buffer,
VectorView<BufferTextureCopyRegion> p_regions);
public:
virtual void command_clear_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, uint64_t p_offset, uint64_t p_size) override final;
virtual void command_copy_buffer(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, BufferID p_dst_buffer, VectorView<BufferCopyRegion> p_regions) override final;
@@ -455,6 +501,10 @@ public:
size_t get_texel_buffer_alignment_for_format(RDD::DataFormat p_format) const;
size_t get_texel_buffer_alignment_for_format(MTLPixelFormat p_format) const;
_FORCE_INLINE_ uint32_t frame_count() const { return _frame_count; }
_FORCE_INLINE_ uint32_t frame_index() const { return _frame_index; }
_FORCE_INLINE_ uint32_t frames_drawn() const { return _frames_drawn; }
/******************/
RenderingDeviceDriverMetal(RenderingContextDriverMetal *p_context_driver);
~RenderingDeviceDriverMetal();

View File

@@ -54,7 +54,6 @@
#import "rendering_context_driver_metal.h"
#import "rendering_shader_container_metal.h"
#include "core/io/compression.h"
#include "core/io/marshalls.h"
#include "core/string/ustring.h"
#include "core/templates/hash_map.h"
@@ -68,14 +67,9 @@
#pragma mark - Logging
os_log_t LOG_DRIVER;
extern os_log_t LOG_DRIVER;
// Used for dynamic tracing.
os_log_t LOG_INTERVALS;
__attribute__((constructor)) static void InitializeLogging(void) {
LOG_DRIVER = os_log_create("org.godotengine.godot.metal", OS_LOG_CATEGORY_POINTS_OF_INTEREST);
LOG_INTERVALS = os_log_create("org.godotengine.godot.metal", "events");
}
extern os_log_t LOG_INTERVALS;
/*****************/
/**** GENERIC ****/
@@ -91,14 +85,6 @@ static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_NOT_EQUAL, MTLCompareFunctionNo
static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_GREATER_OR_EQUAL, MTLCompareFunctionGreaterEqual));
static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_ALWAYS, MTLCompareFunctionAlways));
_FORCE_INLINE_ MTLSize mipmapLevelSizeFromTexture(id<MTLTexture> p_tex, NSUInteger p_level) {
MTLSize lvlSize;
lvlSize.width = MAX(p_tex.width >> p_level, 1UL);
lvlSize.height = MAX(p_tex.height >> p_level, 1UL);
lvlSize.depth = MAX(p_tex.depth >> p_level, 1UL);
return lvlSize;
}
_FORCE_INLINE_ MTLSize mipmapLevelSizeFromSize(MTLSize p_size, NSUInteger p_level) {
if (p_level == 0) {
return p_size;
@@ -111,10 +97,6 @@ _FORCE_INLINE_ MTLSize mipmapLevelSizeFromSize(MTLSize p_size, NSUInteger p_leve
return lvlSize;
}
_FORCE_INLINE_ static bool operator==(MTLSize p_a, MTLSize p_b) {
return p_a.width == p_b.width && p_a.height == p_b.height && p_a.depth == p_b.depth;
}
/*****************/
/**** BUFFERS ****/
/*****************/
@@ -122,7 +104,7 @@ _FORCE_INLINE_ static bool operator==(MTLSize p_a, MTLSize p_b) {
RDD::BufferID RenderingDeviceDriverMetal::buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type, uint64_t p_frames_drawn) {
const uint64_t original_size = p_size;
if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {
p_size = round_up_to_alignment(p_size, 16u) * frame_count;
p_size = round_up_to_alignment(p_size, 16u) * _frame_count;
}
MTLResourceOptions options = 0;
@@ -197,7 +179,7 @@ uint8_t *RenderingDeviceDriverMetal::buffer_persistent_map_advance(BufferID p_bu
ERR_FAIL_COND_V_MSG(buf_info->last_frame_mapped == p_frames_drawn, nullptr, "Buffers with BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT must only be mapped once per frame. Otherwise there could be race conditions with the GPU. Amalgamate all data uploading into one map(), use an extra buffer or remove the bit.");
buf_info->last_frame_mapped = p_frames_drawn;
#endif
return (uint8_t *)buf_info->metal_buffer.contents + buf_info->next_frame_index(frame_count) * buf_info->size_bytes;
return (uint8_t *)buf_info->metal_buffer.contents + buf_info->next_frame_index(_frame_count) * buf_info->size_bytes;
}
void RenderingDeviceDriverMetal::buffer_flush(BufferID p_buffer) {
@@ -328,7 +310,9 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p
#if defined(VISIONOS_ENABLED)
const bool supports_memoryless = true;
#else
GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wdeprecated-declarations")
const bool supports_memoryless = (*device_properties).features.highestFamily >= MTLGPUFamilyApple2 && (*device_properties).features.highestFamily < MTLGPUFamilyMac1;
GODOT_CLANG_WARNING_POP
#endif
if (supports_memoryless && p_format.usage_bits & TEXTURE_USAGE_TRANSIENT_BIT) {
options = MTLResourceStorageModeMemoryless | MTLResourceHazardTrackingModeTracked;
@@ -552,12 +536,12 @@ void RenderingDeviceDriverMetal::texture_free(TextureID p_texture) {
}
uint64_t RenderingDeviceDriverMetal::texture_get_allocation_size(TextureID p_texture) {
id<MTLTexture> obj = rid::get(p_texture);
id<MTLTexture> __unsafe_unretained obj = rid::get(p_texture);
return obj.allocatedSize;
}
void RenderingDeviceDriverMetal::texture_get_copyable_layout(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) {
id<MTLTexture> obj = rid::get(p_texture);
id<MTLTexture> __unsafe_unretained obj = rid::get(p_texture);
*r_layout = {};
PixelFormats &pf = *pixel_formats;
@@ -791,9 +775,13 @@ RDD::SamplerID RenderingDeviceDriverMetal::sampler_create(const SamplerState &p_
desc.normalizedCoordinates = !p_state.unnormalized_uvw;
#if __MAC_OS_X_VERSION_MAX_ALLOWED >= 260000 || __IPHONE_OS_VERSION_MAX_ALLOWED >= 260000 || __TV_OS_VERSION_MAX_ALLOWED >= 260000 || __VISION_OS_VERSION_MAX_ALLOWED >= 260000
if (p_state.lod_bias != 0.0) {
WARN_PRINT_ONCE("Metal does not support LOD bias for samplers.");
if (@available(macOS 26.0, iOS 26.0, tvOS 26.0, visionOS 26.0, *)) {
desc.lodBias = p_state.lod_bias;
}
}
#endif
id<MTLSamplerState> obj = [device newSamplerStateWithDescriptor:desc];
ERR_FAIL_NULL_V_MSG(obj, SamplerID(), "newSamplerStateWithDescriptor failed");
@@ -863,17 +851,18 @@ void RenderingDeviceDriverMetal::command_pipeline_barrier(
#pragma mark - Fences
RDD::FenceID RenderingDeviceDriverMetal::fence_create() {
Fence *fence = memnew(Fence);
Fence *fence = nullptr;
if (@available(macOS 10.14, iOS 12.0, tvOS 12.0, visionOS 1.0, *)) {
fence = memnew(FenceEvent([device newSharedEvent]));
} else {
fence = memnew(FenceSemaphore());
}
return FenceID(fence);
}
Error RenderingDeviceDriverMetal::fence_wait(FenceID p_fence) {
Fence *fence = (Fence *)(p_fence.id);
// Wait forever, so this function is infallible.
dispatch_semaphore_wait(fence->semaphore, DISPATCH_TIME_FOREVER);
return OK;
return fence->wait(1000);
}
void RenderingDeviceDriverMetal::fence_free(FenceID p_fence) {
@@ -924,15 +913,9 @@ Error RenderingDeviceDriverMetal::command_queue_execute_and_present(CommandQueue
MDCommandBuffer *cmd_buffer = (MDCommandBuffer *)(p_cmd_buffers[size - 1].id);
Fence *fence = (Fence *)(p_cmd_fence.id);
if (fence != nullptr) {
cmd_buffer->end();
id<MTLCommandBuffer> cb = cmd_buffer->get_command_buffer();
if (cb == nil) {
// If there is nothing to do, signal the fence immediately.
dispatch_semaphore_signal(fence->semaphore);
} else {
[cb addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
dispatch_semaphore_signal(fence->semaphore);
}];
}
fence->signal(cb);
}
for (uint32_t i = 0; i < p_swap_chains.size(); i++) {
@@ -1133,18 +1116,27 @@ void RenderingDeviceDriverMetal::shader_cache_free_entry(const SHA256Digest &key
}
}
template <typename T, typename U>
struct is_layout_compatible
: std::bool_constant<
sizeof(T) == sizeof(U) &&
alignof(T) == alignof(U) &&
std::is_trivially_copyable_v<T> &&
std::is_trivially_copyable_v<U>> {};
static_assert(is_layout_compatible<UniformInfo::Indexes, RenderingShaderContainerMetal::UniformData::Indexes>::value, "UniformInfo::Indexes layout does not match RenderingShaderContainerMetal::UniformData::Indexes layout");
API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0))
static BindingInfo from_binding_info_data(const RenderingShaderContainerMetal::BindingInfoData &p_data) {
BindingInfo bi;
bi.dataType = static_cast<MTLDataType>(p_data.data_type);
bi.index = p_data.index;
bi.access = static_cast<MTLBindingAccess>(p_data.access);
bi.usage = static_cast<MTLResourceUsage>(p_data.usage);
bi.textureType = static_cast<MTLTextureType>(p_data.texture_type);
bi.imageFormat = p_data.image_format;
bi.arrayLength = p_data.array_length;
bi.isMultisampled = p_data.is_multisampled;
return bi;
static void update_uniform_info(const RenderingShaderContainerMetal::UniformData &p_data, UniformInfo &r_ui) {
r_ui.active_stages = p_data.active_stages;
r_ui.dataType = static_cast<MTLDataType>(p_data.data_type);
memcpy(&r_ui.slot, &p_data.slot, sizeof(UniformInfo::Indexes));
memcpy(&r_ui.arg_buffer, &p_data.arg_buffer, sizeof(UniformInfo::Indexes));
r_ui.access = static_cast<MTLBindingAccess>(p_data.access);
r_ui.usage = static_cast<MTLResourceUsage>(p_data.usage);
r_ui.textureType = static_cast<MTLTextureType>(p_data.texture_type);
r_ui.imageFormat = p_data.image_format;
r_ui.arrayLength = p_data.array_length;
r_ui.isMultisampled = p_data.is_multisampled;
}
RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_container(const Ref<RenderingShaderContainer> &p_shader_container, const Vector<ImmutableSampler> &p_immutable_samplers) {
@@ -1156,13 +1148,12 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_container(const Ref
Vector<RenderingShaderContainer::Shader> &shaders = shader_container->shaders;
Vector<RSCM::StageData> &mtl_shaders = shader_container->mtl_shaders;
// We need to regenerate the shader if the cache is moved to an incompatible device.
ERR_FAIL_COND_V_MSG(device_properties->features.argument_buffers_tier < MTLArgumentBuffersTier2 && mtl_reflection_data.uses_argument_buffers(),
// We need to regenerate the shader if the cache is moved to an incompatible device or argument buffer support differs.
ERR_FAIL_COND_V_MSG(!device_properties->features.argument_buffers_supported() && mtl_reflection_data.uses_argument_buffers(),
RDD::ShaderID(),
"Shader was compiled with argument buffers enabled, but this device does not support them");
uint32_t msl_version = make_msl_version(device_properties->features.mslVersionMajor, device_properties->features.mslVersionMinor);
ERR_FAIL_COND_V_MSG(msl_version < mtl_reflection_data.msl_version,
ERR_FAIL_COND_V_MSG(device_properties->features.msl_max_version < mtl_reflection_data.msl_version,
RDD::ShaderID(),
"Shader was compiled for a newer version of Metal");
@@ -1175,6 +1166,10 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_container(const Ref
uint32_t major = mtl_reflection_data.msl_version / 10000;
uint32_t minor = (mtl_reflection_data.msl_version / 100) % 100;
options.languageVersion = MTLLanguageVersion((major << 0x10) + minor);
if (@available(macOS 15.0, iOS 18.0, tvOS 18.0, visionOS 2.0, *)) {
options.enableLogging = mtl_reflection_data.needs_debug_logging();
}
HashMap<RD::ShaderStage, MDLibrary *> libraries;
bool is_compute = false;
@@ -1222,7 +1217,7 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_container(const Ref
data:binary];
} else {
options.preserveInvariance = shader_data.is_position_invariant;
#if defined(VISIONOS_ENABLED)
#if __MAC_OS_X_VERSION_MIN_REQUIRED >= 150000 || __IPHONE_OS_VERSION_MIN_REQUIRED >= 180000 || __TV_OS_VERSION_MIN_REQUIRED >= 180000 || defined(VISIONOS_ENABLED)
options.mathMode = MTLMathModeFast;
#else
options.fastMathEnabled = YES;
@@ -1247,7 +1242,6 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_container(const Ref
DynamicOffsetLayout dynamic_offset_layout;
uint8_t dynamic_offset = 0;
uint8_t dynamic_count = 0;
// Create sets.
for (uint32_t i = 0; i < uniform_sets_count; i++) {
@@ -1257,6 +1251,8 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_container(const Ref
uint32_t set_size = mtl_set.size();
set.uniforms.resize(set_size);
uint8_t dynamic_count = 0;
LocalVector<UniformInfo>::Iterator iter = set.uniforms.begin();
for (uint32_t j = 0; j < set_size; j++) {
const ShaderUniform &uniform = refl_set.ptr()[j];
@@ -1274,89 +1270,34 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_container(const Ref
UniformInfo &ui = *iter;
++iter;
update_uniform_info(bind, ui);
ui.binding = uniform.binding;
ui.active_stages = static_cast<ShaderStageUsage>(bind.active_stages);
for (const RSCM::BindingInfoData &info : bind.bindings) {
if (info.shader_stage == UINT32_MAX) {
continue;
}
BindingInfo bi = from_binding_info_data(info);
ui.bindings.insert((RDC::ShaderStage)info.shader_stage, bi);
}
for (const RSCM::BindingInfoData &info : bind.bindings_secondary) {
if (info.shader_stage == UINT32_MAX) {
continue;
}
BindingInfo bi = from_binding_info_data(info);
ui.bindings_secondary.insert((RDC::ShaderStage)info.shader_stage, bi);
}
}
if (dynamic_count > 0) {
dynamic_offset_layout.set_offset_count(i, dynamic_offset, dynamic_count);
dynamic_offset += dynamic_count;
dynamic_count = 0;
}
}
for (uint32_t i = 0; i < uniform_sets_count; i++) {
UniformSet &set = uniform_sets.write[i];
// Make encoders.
for (RenderingShaderContainer::Shader const &shader : shaders) {
RD::ShaderStage stage = shader.shader_stage;
NSMutableArray<MTLArgumentDescriptor *> *descriptors = [NSMutableArray new];
for (UniformInfo const &uniform : set.uniforms) {
BindingInfo const *binding_info = uniform.bindings.getptr(stage);
if (binding_info == nullptr) {
continue;
}
[descriptors addObject:binding_info->new_argument_descriptor()];
BindingInfo const *secondary_binding_info = uniform.bindings_secondary.getptr(stage);
if (secondary_binding_info != nullptr) {
[descriptors addObject:secondary_binding_info->new_argument_descriptor()];
}
}
if (descriptors.count == 0) {
if (ui.arg_buffer.texture == UINT32_MAX && ui.arg_buffer.buffer == UINT32_MAX && ui.arg_buffer.sampler == UINT32_MAX) {
// No bindings.
continue;
}
// Sort by index.
[descriptors sortUsingComparator:^NSComparisonResult(MTLArgumentDescriptor *a, MTLArgumentDescriptor *b) {
if (a.index < b.index) {
return NSOrderedAscending;
} else if (a.index > b.index) {
return NSOrderedDescending;
} else {
return NSOrderedSame;
#define VAL(x) (x == UINT32_MAX ? 0 : x)
uint32_t max = std::max({ VAL(ui.arg_buffer.texture), VAL(ui.arg_buffer.buffer), VAL(ui.arg_buffer.sampler) });
max += ui.arrayLength > 0 ? ui.arrayLength - 1 : 0;
set.buffer_size = std::max(set.buffer_size, (max + 1) * (uint32_t)sizeof(uint64_t));
#undef VAL
}
}];
id<MTLArgumentEncoder> enc = [device newArgumentEncoderWithArguments:descriptors];
set.encoders[stage] = enc;
set.offsets[stage] = set.buffer_size;
set.buffer_size += enc.encodedLength;
if (dynamic_count > 0) {
dynamic_offset_layout.set_offset_count(i, dynamic_offset, dynamic_count);
dynamic_offset += dynamic_count;
}
}
MDShader *shader = nullptr;
if (is_compute) {
const RSCM::StageData &stage_data = mtl_shaders[0];
MDComputeShader *cs = new MDComputeShader(
shader_name,
uniform_sets,
mtl_reflection_data.uses_argument_buffers(),
libraries[RD::ShaderStage::SHADER_STAGE_COMPUTE]);
if (stage_data.push_constant_binding != UINT32_MAX) {
cs->push_constants.size = refl.push_constant_size;
cs->push_constants.binding = stage_data.push_constant_binding;
}
cs->local = MTLSizeMake(refl.compute_local_size[0], refl.compute_local_size[1], refl.compute_local_size[2]);
shader = cs;
} else {
@@ -1367,30 +1308,12 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_container(const Ref
mtl_reflection_data.uses_argument_buffers(),
libraries[RD::ShaderStage::SHADER_STAGE_VERTEX],
libraries[RD::ShaderStage::SHADER_STAGE_FRAGMENT]);
for (uint32_t j = 0; j < shaders.size(); j++) {
const RSCM::StageData &stage_data = mtl_shaders[j];
switch (shaders[j].shader_stage) {
case RD::ShaderStage::SHADER_STAGE_VERTEX: {
if (stage_data.push_constant_binding != UINT32_MAX) {
rs->push_constants.vert.size = refl.push_constant_size;
rs->push_constants.vert.binding = stage_data.push_constant_binding;
}
} break;
case RD::ShaderStage::SHADER_STAGE_FRAGMENT: {
if (stage_data.push_constant_binding != UINT32_MAX) {
rs->push_constants.frag.size = refl.push_constant_size;
rs->push_constants.frag.binding = stage_data.push_constant_binding;
}
} break;
default: {
ERR_FAIL_V_MSG(RDD::ShaderID(), "Invalid shader stage");
} break;
}
}
shader = rs;
}
shader->push_constants.stages = refl.push_constant_stages;
shader->push_constants.size = refl.push_constant_size;
shader->push_constants.binding = mtl_reflection_data.push_constant_binding;
shader->dynamic_offset_layout = dynamic_offset_layout;
return RDD::ShaderID(shader);
@@ -1412,7 +1335,135 @@ void RenderingDeviceDriverMetal::shader_destroy_modules(ShaderID p_shader) {
RDD::UniformSetID RenderingDeviceDriverMetal::uniform_set_create(VectorView<BoundUniform> p_uniforms, ShaderID p_shader, uint32_t p_set_index, int p_linear_pool_index) {
//p_linear_pool_index = -1; // TODO:? Linear pools not implemented or not supported by API backend.
MDShader *shader = (MDShader *)(p_shader.id);
ERR_FAIL_INDEX_V_MSG(p_set_index, shader->sets.size(), UniformSetID(), "Set index out of range");
const UniformSet &shader_set = shader->sets.get(p_set_index);
MDUniformSet *set = memnew(MDUniformSet);
if (device_properties->features.argument_buffers_supported()) {
// If argument buffers are enabled, we have already verified availability, so we can skip the runtime check.
GODOT_CLANG_WARNING_PUSH_AND_IGNORE("-Wunguarded-availability-new")
set->arg_buffer = [device newBufferWithLength:shader_set.buffer_size options:MTLResourceStorageModeShared];
uint64_t *ptr = (uint64_t *)set->arg_buffer.contents;
HashMap<MTLResourceUnsafe, StageResourceUsage, HashMapHasherDefault> bound_resources;
auto add_usage = [&bound_resources](MTLResourceUnsafe res, BitField<RDD::ShaderStage> stage, MTLResourceUsage usage) {
StageResourceUsage *sru = bound_resources.getptr(res);
if (sru == nullptr) {
sru = &bound_resources.insert(res, ResourceUnused)->value;
}
if (stage.has_flag(RDD::SHADER_STAGE_VERTEX_BIT)) {
*sru |= stage_resource_usage(RDD::SHADER_STAGE_VERTEX, usage);
}
if (stage.has_flag(RDD::SHADER_STAGE_FRAGMENT_BIT)) {
*sru |= stage_resource_usage(RDD::SHADER_STAGE_FRAGMENT, usage);
}
if (stage.has_flag(RDD::SHADER_STAGE_COMPUTE_BIT)) {
*sru |= stage_resource_usage(RDD::SHADER_STAGE_COMPUTE, usage);
}
};
// Ensure the argument buffer exists for this set as some shader pipelines may
// have been generated with argument buffers enabled.
for (uint32_t i = 0; i < p_uniforms.size(); i += 1) {
const BoundUniform &uniform = p_uniforms[i];
const UniformInfo &ui = shader_set.uniforms[i];
const UniformInfo::Indexes &idx = ui.arg_buffer;
switch (uniform.type) {
case UNIFORM_TYPE_SAMPLER: {
size_t count = uniform.ids.size();
for (size_t j = 0; j < count; j += 1) {
id<MTLSamplerState> sampler = rid::get(uniform.ids[j]);
*(MTLResourceID *)(ptr + idx.sampler + j) = sampler.gpuResourceID;
}
} break;
case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: {
uint32_t count = uniform.ids.size() / 2;
for (uint32_t j = 0; j < count; j += 1) {
id<MTLSamplerState> sampler = rid::get(uniform.ids[j * 2 + 0]);
id<MTLTexture> texture = rid::get(uniform.ids[j * 2 + 1]);
*(MTLResourceID *)(ptr + idx.texture + j) = texture.gpuResourceID;
*(MTLResourceID *)(ptr + idx.sampler + j) = sampler.gpuResourceID;
add_usage(texture, ui.active_stages, ui.usage);
}
} break;
case UNIFORM_TYPE_TEXTURE: {
size_t count = uniform.ids.size();
for (size_t j = 0; j < count; j += 1) {
id<MTLTexture> texture = rid::get(uniform.ids[j]);
*(MTLResourceID *)(ptr + idx.texture + j) = texture.gpuResourceID;
add_usage(texture, ui.active_stages, ui.usage);
}
} break;
case UNIFORM_TYPE_IMAGE: {
size_t count = uniform.ids.size();
for (size_t j = 0; j < count; j += 1) {
id<MTLTexture> texture = rid::get(uniform.ids[j]);
*(MTLResourceID *)(ptr + idx.texture + j) = texture.gpuResourceID;
add_usage(texture, ui.active_stages, ui.usage);
if (idx.buffer != UINT32_MAX) {
// Emulated atomic image access.
id<MTLBuffer> buffer = (texture.parentTexture ? texture.parentTexture : texture).buffer;
*(MTLGPUAddress *)(ptr + idx.buffer + j) = buffer.gpuAddress;
add_usage(buffer, ui.active_stages, ui.usage);
}
}
} break;
case UNIFORM_TYPE_TEXTURE_BUFFER: {
ERR_PRINT("not implemented: UNIFORM_TYPE_TEXTURE_BUFFER");
} break;
case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: {
ERR_PRINT("not implemented: UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER");
} break;
case UNIFORM_TYPE_IMAGE_BUFFER: {
CRASH_NOW_MSG("not implemented: UNIFORM_TYPE_IMAGE_BUFFER");
} break;
case UNIFORM_TYPE_STORAGE_BUFFER:
case UNIFORM_TYPE_UNIFORM_BUFFER: {
const BufferInfo *buffer = (const BufferInfo *)uniform.ids[0].id;
*(MTLGPUAddress *)(ptr + idx.buffer) = buffer->metal_buffer.gpuAddress;
add_usage(buffer->metal_buffer, ui.active_stages, ui.usage);
} break;
case UNIFORM_TYPE_INPUT_ATTACHMENT: {
size_t count = uniform.ids.size();
for (size_t j = 0; j < count; j += 1) {
id<MTLTexture> texture = rid::get(uniform.ids[j]);
*(MTLResourceID *)(ptr + idx.texture + j) = texture.gpuResourceID;
add_usage(texture, ui.active_stages, ui.usage);
}
} break;
case UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC:
case UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: {
// Dynamic buffers are not supported by argument buffers currently.
// so we do not encode them, as there shouldn't be any runtime shaders that used them.
} break;
default: {
DEV_ASSERT(false);
}
}
}
for (KeyValue<MTLResourceUnsafe, StageResourceUsage> const &keyval : bound_resources) {
ResourceVector *resources = set->usage_to_resources.getptr(keyval.value);
if (resources == nullptr) {
resources = &set->usage_to_resources.insert(keyval.value, ResourceVector())->value;
}
int64_t pos = resources->span().bisect(keyval.key, true);
if (pos == resources->size() || (*resources)[pos] != keyval.key) {
resources->insert(pos, keyval.key);
}
}
GODOT_CLANG_WARNING_POP
}
Vector<BoundUniform> bound_uniforms;
bound_uniforms.resize(p_uniforms.size());
for (uint32_t i = 0; i < p_uniforms.size(); i += 1) {
@@ -1468,350 +1519,37 @@ void RenderingDeviceDriverMetal::command_uniform_set_prepare_for_use(CommandBuff
void RenderingDeviceDriverMetal::command_clear_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, uint64_t p_offset, uint64_t p_size) {
MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id);
id<MTLBlitCommandEncoder> blit = cmd->blit_command_encoder();
[blit fillBuffer:((const BufferInfo *)p_buffer.id)->metal_buffer
range:NSMakeRange(p_offset, p_size)
value:0];
cmd->clear_buffer(p_buffer, p_offset, p_size);
}
void RenderingDeviceDriverMetal::command_copy_buffer(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, BufferID p_dst_buffer, VectorView<BufferCopyRegion> p_regions) {
MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id);
const BufferInfo *src = (const BufferInfo *)p_src_buffer.id;
const BufferInfo *dst = (const BufferInfo *)p_dst_buffer.id;
id<MTLBlitCommandEncoder> blit = cmd->blit_command_encoder();
for (uint32_t i = 0; i < p_regions.size(); i++) {
BufferCopyRegion region = p_regions[i];
[blit copyFromBuffer:src->metal_buffer
sourceOffset:region.src_offset
toBuffer:dst->metal_buffer
destinationOffset:region.dst_offset
size:region.size];
}
}
MTLSize MTLSizeFromVector3i(Vector3i p_size) {
return MTLSizeMake(p_size.x, p_size.y, p_size.z);
}
MTLOrigin MTLOriginFromVector3i(Vector3i p_origin) {
return MTLOriginMake(p_origin.x, p_origin.y, p_origin.z);
}
// Clamps the size so that the sum of the origin and size do not exceed the maximum size.
static inline MTLSize clampMTLSize(MTLSize p_size, MTLOrigin p_origin, MTLSize p_max_size) {
MTLSize clamped;
clamped.width = MIN(p_size.width, p_max_size.width - p_origin.x);
clamped.height = MIN(p_size.height, p_max_size.height - p_origin.y);
clamped.depth = MIN(p_size.depth, p_max_size.depth - p_origin.z);
return clamped;
cmd->copy_buffer(p_src_buffer, p_dst_buffer, p_regions);
}
void RenderingDeviceDriverMetal::command_copy_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView<TextureCopyRegion> p_regions) {
MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id);
id<MTLTexture> src = rid::get(p_src_texture);
id<MTLTexture> dst = rid::get(p_dst_texture);
id<MTLBlitCommandEncoder> blit = cmd->blit_command_encoder();
PixelFormats &pf = *pixel_formats;
MTLPixelFormat src_fmt = src.pixelFormat;
bool src_is_compressed = pf.getFormatType(src_fmt) == MTLFormatType::Compressed;
MTLPixelFormat dst_fmt = dst.pixelFormat;
bool dst_is_compressed = pf.getFormatType(dst_fmt) == MTLFormatType::Compressed;
// Validate copy.
if (src.sampleCount != dst.sampleCount || pf.getBytesPerBlock(src_fmt) != pf.getBytesPerBlock(dst_fmt)) {
ERR_FAIL_MSG("Cannot copy between incompatible pixel formats, such as formats of different pixel sizes, or between images with different sample counts.");
}
// If source and destination have different formats and at least one is compressed, a temporary buffer is required.
bool need_tmp_buffer = (src_fmt != dst_fmt) && (src_is_compressed || dst_is_compressed);
if (need_tmp_buffer) {
ERR_FAIL_MSG("not implemented: copy with intermediate buffer");
}
if (src_fmt != dst_fmt) {
// Map the source pixel format to the dst through a texture view on the source texture.
src = [src newTextureViewWithPixelFormat:dst_fmt];
}
for (uint32_t i = 0; i < p_regions.size(); i++) {
TextureCopyRegion region = p_regions[i];
MTLSize extent = MTLSizeFromVector3i(region.size);
// If copies can be performed using direct texture-texture copying, do so.
uint32_t src_level = region.src_subresources.mipmap;
uint32_t src_base_layer = region.src_subresources.base_layer;
MTLSize src_extent = mipmapLevelSizeFromTexture(src, src_level);
uint32_t dst_level = region.dst_subresources.mipmap;
uint32_t dst_base_layer = region.dst_subresources.base_layer;
MTLSize dst_extent = mipmapLevelSizeFromTexture(dst, dst_level);
// All layers may be copied at once, if the extent completely covers both images.
if (src_extent == extent && dst_extent == extent) {
[blit copyFromTexture:src
sourceSlice:src_base_layer
sourceLevel:src_level
toTexture:dst
destinationSlice:dst_base_layer
destinationLevel:dst_level
sliceCount:region.src_subresources.layer_count
levelCount:1];
} else {
MTLOrigin src_origin = MTLOriginFromVector3i(region.src_offset);
MTLSize src_size = clampMTLSize(extent, src_origin, src_extent);
uint32_t layer_count = 0;
if ((src.textureType == MTLTextureType3D) != (dst.textureType == MTLTextureType3D)) {
// In the case, the number of layers to copy is in extent.depth. Use that value,
// then clamp the depth, so we don't try to copy more than Metal will allow.
layer_count = extent.depth;
src_size.depth = 1;
} else {
layer_count = region.src_subresources.layer_count;
}
MTLOrigin dst_origin = MTLOriginFromVector3i(region.dst_offset);
for (uint32_t layer = 0; layer < layer_count; layer++) {
// We can copy between a 3D and a 2D image easily. Just copy between
// one slice of the 2D image and one plane of the 3D image at a time.
if ((src.textureType == MTLTextureType3D) == (dst.textureType == MTLTextureType3D)) {
[blit copyFromTexture:src
sourceSlice:src_base_layer + layer
sourceLevel:src_level
sourceOrigin:src_origin
sourceSize:src_size
toTexture:dst
destinationSlice:dst_base_layer + layer
destinationLevel:dst_level
destinationOrigin:dst_origin];
} else if (src.textureType == MTLTextureType3D) {
[blit copyFromTexture:src
sourceSlice:src_base_layer
sourceLevel:src_level
sourceOrigin:MTLOriginMake(src_origin.x, src_origin.y, src_origin.z + layer)
sourceSize:src_size
toTexture:dst
destinationSlice:dst_base_layer + layer
destinationLevel:dst_level
destinationOrigin:dst_origin];
} else {
DEV_ASSERT(dst.textureType == MTLTextureType3D);
[blit copyFromTexture:src
sourceSlice:src_base_layer + layer
sourceLevel:src_level
sourceOrigin:src_origin
sourceSize:src_size
toTexture:dst
destinationSlice:dst_base_layer
destinationLevel:dst_level
destinationOrigin:MTLOriginMake(dst_origin.x, dst_origin.y, dst_origin.z + layer)];
}
}
}
}
cmd->copy_texture(p_src_texture, p_dst_texture, p_regions);
}
void RenderingDeviceDriverMetal::command_resolve_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) {
MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
id<MTLTexture> src_tex = rid::get(p_src_texture);
id<MTLTexture> dst_tex = rid::get(p_dst_texture);
MTLRenderPassDescriptor *mtlRPD = [MTLRenderPassDescriptor renderPassDescriptor];
MTLRenderPassColorAttachmentDescriptor *mtlColorAttDesc = mtlRPD.colorAttachments[0];
mtlColorAttDesc.loadAction = MTLLoadActionLoad;
mtlColorAttDesc.storeAction = MTLStoreActionMultisampleResolve;
mtlColorAttDesc.texture = src_tex;
mtlColorAttDesc.resolveTexture = dst_tex;
mtlColorAttDesc.level = p_src_mipmap;
mtlColorAttDesc.slice = p_src_layer;
mtlColorAttDesc.resolveLevel = p_dst_mipmap;
mtlColorAttDesc.resolveSlice = p_dst_layer;
cb->encodeRenderCommandEncoderWithDescriptor(mtlRPD, @"Resolve Image");
cb->resolve_texture(p_src_texture, p_src_texture_layout, p_src_layer, p_src_mipmap, p_dst_texture, p_dst_texture_layout, p_dst_layer, p_dst_mipmap);
}
void RenderingDeviceDriverMetal::command_clear_color_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, const Color &p_color, const TextureSubresourceRange &p_subresources) {
MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
id<MTLTexture> src_tex = rid::get(p_texture);
if (src_tex.parentTexture) {
// Clear via the parent texture rather than the view.
src_tex = src_tex.parentTexture;
}
PixelFormats &pf = *pixel_formats;
if (pf.isDepthFormat(src_tex.pixelFormat) || pf.isStencilFormat(src_tex.pixelFormat)) {
ERR_FAIL_MSG("invalid: depth or stencil texture format");
}
MTLRenderPassDescriptor *desc = MTLRenderPassDescriptor.renderPassDescriptor;
if (p_subresources.aspect.has_flag(TEXTURE_ASPECT_COLOR_BIT)) {
MTLRenderPassColorAttachmentDescriptor *caDesc = desc.colorAttachments[0];
caDesc.texture = src_tex;
caDesc.loadAction = MTLLoadActionClear;
caDesc.storeAction = MTLStoreActionStore;
caDesc.clearColor = MTLClearColorMake(p_color.r, p_color.g, p_color.b, p_color.a);
// Extract the mipmap levels that are to be updated.
uint32_t mipLvlStart = p_subresources.base_mipmap;
uint32_t mipLvlCnt = p_subresources.mipmap_count;
uint32_t mipLvlEnd = mipLvlStart + mipLvlCnt;
uint32_t levelCount = src_tex.mipmapLevelCount;
// Extract the cube or array layers (slices) that are to be updated.
bool is3D = src_tex.textureType == MTLTextureType3D;
uint32_t layerStart = is3D ? 0 : p_subresources.base_layer;
uint32_t layerCnt = p_subresources.layer_count;
uint32_t layerEnd = layerStart + layerCnt;
MetalFeatures const &features = (*device_properties).features;
// Iterate across mipmap levels and layers, and perform and empty render to clear each.
for (uint32_t mipLvl = mipLvlStart; mipLvl < mipLvlEnd; mipLvl++) {
ERR_FAIL_INDEX_MSG(mipLvl, levelCount, "mip level out of range");
caDesc.level = mipLvl;
// If a 3D image, we need to get the depth for each level.
if (is3D) {
layerCnt = mipmapLevelSizeFromTexture(src_tex, mipLvl).depth;
layerEnd = layerStart + layerCnt;
}
if ((features.layeredRendering && src_tex.sampleCount == 1) || features.multisampleLayeredRendering) {
// We can clear all layers at once.
if (is3D) {
caDesc.depthPlane = layerStart;
} else {
caDesc.slice = layerStart;
}
desc.renderTargetArrayLength = layerCnt;
cb->encodeRenderCommandEncoderWithDescriptor(desc, @"Clear Image");
} else {
for (uint32_t layer = layerStart; layer < layerEnd; layer++) {
if (is3D) {
caDesc.depthPlane = layer;
} else {
caDesc.slice = layer;
}
cb->encodeRenderCommandEncoderWithDescriptor(desc, @"Clear Image");
}
}
}
}
}
API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0))
bool isArrayTexture(MTLTextureType p_type) {
return (p_type == MTLTextureType3D ||
p_type == MTLTextureType2DArray ||
p_type == MTLTextureType2DMultisampleArray ||
p_type == MTLTextureType1DArray);
}
void RenderingDeviceDriverMetal::_copy_texture_buffer(CommandBufferID p_cmd_buffer,
CopySource p_source,
TextureID p_texture,
BufferID p_buffer,
VectorView<BufferTextureCopyRegion> p_regions) {
MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id);
const BufferInfo *buffer = (const BufferInfo *)p_buffer.id;
id<MTLTexture> texture = rid::get(p_texture);
id<MTLBlitCommandEncoder> enc = cmd->blit_command_encoder();
PixelFormats &pf = *pixel_formats;
MTLPixelFormat mtlPixFmt = texture.pixelFormat;
MTLBlitOption options = MTLBlitOptionNone;
if (pf.isPVRTCFormat(mtlPixFmt)) {
options |= MTLBlitOptionRowLinearPVRTC;
}
for (uint32_t i = 0; i < p_regions.size(); i++) {
BufferTextureCopyRegion region = p_regions[i];
uint32_t mip_level = region.texture_subresources.mipmap;
MTLOrigin txt_origin = MTLOriginMake(region.texture_offset.x, region.texture_offset.y, region.texture_offset.z);
MTLSize src_extent = mipmapLevelSizeFromTexture(texture, mip_level);
MTLSize txt_size = clampMTLSize(MTLSizeMake(region.texture_region_size.x, region.texture_region_size.y, region.texture_region_size.z),
txt_origin,
src_extent);
uint32_t buffImgWd = region.texture_region_size.x;
uint32_t buffImgHt = region.texture_region_size.y;
NSUInteger bytesPerRow = pf.getBytesPerRow(mtlPixFmt, buffImgWd);
NSUInteger bytesPerImg = pf.getBytesPerLayer(mtlPixFmt, bytesPerRow, buffImgHt);
MTLBlitOption blit_options = options;
if (pf.isDepthFormat(mtlPixFmt) && pf.isStencilFormat(mtlPixFmt)) {
bool want_depth = flags::all(region.texture_subresources.aspect, TEXTURE_ASPECT_DEPTH_BIT);
bool want_stencil = flags::all(region.texture_subresources.aspect, TEXTURE_ASPECT_STENCIL_BIT);
// The stencil component is always 1 byte per pixel.
// Don't reduce depths of 32-bit depth/stencil formats.
if (want_depth && !want_stencil) {
if (pf.getBytesPerTexel(mtlPixFmt) != 4) {
bytesPerRow -= buffImgWd;
bytesPerImg -= buffImgWd * buffImgHt;
}
blit_options |= MTLBlitOptionDepthFromDepthStencil;
} else if (want_stencil && !want_depth) {
bytesPerRow = buffImgWd;
bytesPerImg = buffImgWd * buffImgHt;
blit_options |= MTLBlitOptionStencilFromDepthStencil;
}
}
if (!isArrayTexture(texture.textureType)) {
bytesPerImg = 0;
}
if (p_source == CopySource::Buffer) {
for (uint32_t lyrIdx = 0; lyrIdx < region.texture_subresources.layer_count; lyrIdx++) {
[enc copyFromBuffer:buffer->metal_buffer
sourceOffset:region.buffer_offset + (bytesPerImg * lyrIdx)
sourceBytesPerRow:bytesPerRow
sourceBytesPerImage:bytesPerImg
sourceSize:txt_size
toTexture:texture
destinationSlice:region.texture_subresources.base_layer + lyrIdx
destinationLevel:mip_level
destinationOrigin:txt_origin
options:blit_options];
}
} else {
for (uint32_t lyrIdx = 0; lyrIdx < region.texture_subresources.layer_count; lyrIdx++) {
[enc copyFromTexture:texture
sourceSlice:region.texture_subresources.base_layer + lyrIdx
sourceLevel:mip_level
sourceOrigin:txt_origin
sourceSize:txt_size
toBuffer:buffer->metal_buffer
destinationOffset:region.buffer_offset + (bytesPerImg * lyrIdx)
destinationBytesPerRow:bytesPerRow
destinationBytesPerImage:bytesPerImg
options:blit_options];
}
}
}
cb->clear_color_texture(p_texture, p_texture_layout, p_color, p_subresources);
}
void RenderingDeviceDriverMetal::command_copy_buffer_to_texture(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView<BufferTextureCopyRegion> p_regions) {
_copy_texture_buffer(p_cmd_buffer, CopySource::Buffer, p_dst_texture, p_src_buffer, p_regions);
MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id);
cmd->copy_buffer_to_texture(p_src_buffer, p_dst_texture, p_regions);
}
void RenderingDeviceDriverMetal::command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_dst_buffer, VectorView<BufferTextureCopyRegion> p_regions) {
_copy_texture_buffer(p_cmd_buffer, CopySource::Texture, p_src_texture, p_dst_buffer, p_regions);
MDCommandBuffer *cmd = (MDCommandBuffer *)(p_cmd_buffer.id);
cmd->copy_texture_to_buffer(p_src_texture, p_dst_buffer, p_regions);
}
#pragma mark - Pipeline
@@ -2558,13 +2296,12 @@ void RenderingDeviceDriverMetal::command_timestamp_write(CommandBufferID p_cmd_b
void RenderingDeviceDriverMetal::command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) {
MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
NSString *s = [[NSString alloc] initWithBytesNoCopy:(void *)p_label_name length:strlen(p_label_name) encoding:NSUTF8StringEncoding freeWhenDone:NO];
[cb->get_command_buffer() pushDebugGroup:s];
cb->begin_label(p_label_name, p_color);
}
void RenderingDeviceDriverMetal::command_end_label(CommandBufferID p_cmd_buffer) {
MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
[cb->get_command_buffer() popDebugGroup];
cb->end_label();
}
#pragma mark - Debug
@@ -2576,8 +2313,8 @@ void RenderingDeviceDriverMetal::command_insert_breadcrumb(CommandBufferID p_cmd
#pragma mark - Submission
void RenderingDeviceDriverMetal::begin_segment(uint32_t p_frame_index, uint32_t p_frames_drawn) {
frame_index = p_frame_index;
frames_drawn = p_frames_drawn;
_frame_index = p_frame_index;
_frames_drawn = p_frames_drawn;
}
void RenderingDeviceDriverMetal::end_segment() {
@@ -2612,9 +2349,7 @@ void RenderingDeviceDriverMetal::set_object_name(ObjectType p_type, ID p_driver_
} break;
case OBJECT_TYPE_UNIFORM_SET: {
MDUniformSet *set = (MDUniformSet *)(p_driver_id.id);
for (KeyValue<MDShader *, BoundUniformSet> &keyval : set->bound_uniforms) {
keyval.value.buffer.label = [NSString stringWithUTF8String:p_name.utf8().get_data()];
}
set->arg_buffer.label = [NSString stringWithUTF8String:p_name.utf8().get_data()];
} break;
case OBJECT_TYPE_PIPELINE: {
// Can't set label after creation.
@@ -2804,7 +2539,9 @@ uint64_t RenderingDeviceDriverMetal::limit_get(Limit p_limit) {
uint64_t RenderingDeviceDriverMetal::api_trait_get(ApiTrait p_trait) {
switch (p_trait) {
case API_TRAIT_HONORS_PIPELINE_BARRIERS:
return 0;
return false;
case API_TRAIT_CLEARS_WITH_COPY_ENGINE:
return false;
default:
return RenderingDeviceDriver::api_trait_get(p_trait);
}
@@ -2925,31 +2662,26 @@ Error RenderingDeviceDriverMetal::_create_device() {
void RenderingDeviceDriverMetal::_check_capabilities() {
capabilities.device_family = DEVICE_METAL;
capabilities.version_major = device_properties->features.mslVersionMajor;
capabilities.version_minor = device_properties->features.mslVersionMinor;
parse_msl_version(device_properties->features.msl_target_version, capabilities.version_major, capabilities.version_minor);
}
API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0))
static MetalDeviceProfile device_profile_from_properties(MetalDeviceProperties *p_device_properties) {
using DP = MetalDeviceProfile;
NSOperatingSystemVersion os_version = NSProcessInfo.processInfo.operatingSystemVersion;
MetalDeviceProfile res;
res.min_os_version = MinOsVersion(os_version.majorVersion, os_version.minorVersion, os_version.patchVersion);
#if TARGET_OS_OSX
res.platform = DP::Platform::macOS;
res.features = {
.mslVersionMajor = p_device_properties->features.mslVersionMajor,
.mslVersionMinor = p_device_properties->features.mslVersionMinor,
.argument_buffers_tier = DP::ArgumentBuffersTier::Tier2,
.simdPermute = true
};
#else
res.platform = DP::Platform::iOS;
#endif
res.features = {
.mslVersionMajor = p_device_properties->features.mslVersionMajor,
.mslVersionMinor = p_device_properties->features.mslVersionMinor,
.argument_buffers_tier = p_device_properties->features.argument_buffers_tier == MTLArgumentBuffersTier1 ? DP::ArgumentBuffersTier::Tier1 : DP::ArgumentBuffersTier::Tier2,
.msl_version = p_device_properties->features.msl_target_version,
.use_argument_buffers = p_device_properties->features.argument_buffers_enabled(),
.simdPermute = p_device_properties->features.simdPermute,
};
#endif
// highestFamily will only be set to an Apple GPU family
switch (p_device_properties->features.highestFamily) {
case MTLGPUFamilyApple1:
@@ -2985,8 +2717,6 @@ static MetalDeviceProfile device_profile_from_properties(MetalDeviceProperties *
} break;
}
res.update_options();
return res;
}
@@ -3001,7 +2731,7 @@ Error RenderingDeviceDriverMetal::initialize(uint32_t p_device_index, uint32_t p
_check_capabilities();
frame_count = p_frame_count;
_frame_count = p_frame_count;
// Set the pipeline cache ID based on the Metal version.
pipeline_cache_id = "metal-driver-" + get_api_version();

View File

@@ -30,6 +30,7 @@
#pragma once
#import "metal_device_profile.h"
#import "sha256_digest.h"
#import "servers/rendering/rendering_device_driver.h"
@@ -41,92 +42,6 @@ const uint32_t VIEW_MASK_BUFFER_INDEX = 24;
class RenderingShaderContainerFormatMetal;
class MinOsVersion {
uint32_t version;
public:
String to_compiler_os_version() const;
bool is_null() const { return version == UINT32_MAX; }
bool is_valid() const { return version != UINT32_MAX; }
MinOsVersion(const String &p_version);
explicit MinOsVersion(uint32_t p_version) :
version(p_version) {}
MinOsVersion() :
version(UINT32_MAX) {}
bool operator>(uint32_t p_other) {
return version > p_other;
}
};
/// @brief A minimal structure that defines a device profile for Metal.
///
/// This structure is used by the `RenderingShaderContainerMetal` class to
/// determine options for compiling SPIR-V to Metal source. It currently only
/// contains the minimum properties required to transform shaders from SPIR-V to Metal
/// and potentially compile to a `.metallib`.
struct MetalDeviceProfile {
enum class Platform : uint32_t {
macOS = 0,
iOS = 1,
};
/*! @brief The GPU family.
*
* NOTE: These values match Apple's MTLGPUFamily
*/
enum class GPU : uint32_t {
Apple1 = 1001,
Apple2 = 1002,
Apple3 = 1003,
Apple4 = 1004,
Apple5 = 1005,
Apple6 = 1006,
Apple7 = 1007,
Apple8 = 1008,
Apple9 = 1009,
};
enum class ArgumentBuffersTier : uint32_t {
Tier1 = 0,
Tier2 = 1,
};
struct Features {
uint32_t mslVersionMajor = 0;
uint32_t mslVersionMinor = 0;
ArgumentBuffersTier argument_buffers_tier = ArgumentBuffersTier::Tier1;
bool simdPermute = false;
};
/**
* @brief Options to configure the Metal device profile.
*
* This structure allows customization of the Metal device profile,
* such as the argument buffers tier, which can affect how shaders are compiled.
*/
struct Options {
ArgumentBuffersTier argument_buffers_tier = ArgumentBuffersTier::Tier1;
};
Platform platform = Platform::macOS;
GPU gpu = GPU::Apple4;
Features features;
Options options;
static const MetalDeviceProfile *get_profile(Platform p_platform, GPU p_gpu);
// Configure any options for the device profile, which may include overrides from the environment.
void update_options();
MetalDeviceProfile() = default;
private:
static Mutex profiles_lock; ///< Mutex to protect access to the profiles map.
static HashMap<uint32_t, MetalDeviceProfile> profiles;
};
class RenderingShaderContainerMetal : public RenderingShaderContainer {
GDSOFTCLASS(RenderingShaderContainerMetal, RenderingShaderContainer);
@@ -136,6 +51,7 @@ public:
NONE = 0,
NEEDS_VIEW_MASK_BUFFER = 1 << 0,
USES_ARGUMENT_BUFFERS = 1 << 1,
NEEDS_DEBUG_LOGGING = 1 << 2,
};
/// The base profile that was used to generate this shader.
@@ -152,6 +68,7 @@ public:
*/
MinOsVersion os_min_version;
uint32_t flags = NONE;
uint32_t push_constant_binding = UINT32_MAX; ///< Metal binding slot for the push constant data
/// @brief Returns `true` if the shader is compiled with multi-view support.
bool needs_view_mask_buffer() const {
@@ -178,6 +95,19 @@ public:
flags &= ~USES_ARGUMENT_BUFFERS;
}
}
/// Returns `true` if the shader was compiled with the GL_EXT_debug_printf extension enabled.
bool needs_debug_logging() const {
return flags & NEEDS_DEBUG_LOGGING;
}
void set_needs_debug_logging(bool p_value) {
if (p_value) {
flags |= NEEDS_DEBUG_LOGGING;
} else {
flags &= ~NEEDS_DEBUG_LOGGING;
}
}
};
struct StageData {
@@ -187,67 +117,40 @@ public:
SHA256Digest hash; ///< SHA 256 hash of the shader code
uint32_t source_size = 0; ///< size of the source code in the returned bytes
uint32_t library_size = 0; ///< size of the compiled library in the returned bytes, 0 if it is not compiled
uint32_t push_constant_binding = UINT32_MAX; ///< Metal binding slot for the push constant data
};
struct BindingInfoData {
uint32_t shader_stage = UINT32_MAX; ///< The shader stage this binding is used in, or UINT32_MAX if not used.
struct UniformData {
uint32_t active_stages = 0;
uint32_t uniform_type = 0; // UniformType
uint32_t data_type = 0; // MTLDataTypeNone
uint32_t index = 0;
uint32_t access = 0; // MTLBindingAccessReadOnly
uint32_t usage = 0; // MTLResourceUsage (none)
uint32_t texture_type = 2; // MTLTextureType2D
uint32_t image_format = 0;
uint32_t array_length = 0;
uint32_t is_multisampled = 0;
struct Indexes {
uint32_t buffer = UINT32_MAX;
uint32_t texture = UINT32_MAX;
uint32_t sampler = UINT32_MAX;
};
Indexes slot;
Indexes arg_buffer;
enum class IndexType {
SLOT,
ARG,
};
struct UniformData {
/// Specifies the index into the `bindings` array for the shader stage.
///
/// For example, a vertex and fragment shader use slots 0 and 1 of the bindings and bindings_secondary arrays.
static constexpr uint32_t STAGE_INDEX[RenderingDeviceCommons::SHADER_STAGE_MAX] = {
0, // SHADER_STAGE_VERTEX
1, // SHADER_STAGE_FRAGMENT
0, // SHADER_STAGE_TESSELATION_CONTROL
1, // SHADER_STAGE_TESSELATION_EVALUATION
0, // SHADER_STAGE_COMPUTE
};
/// Specifies the stages the uniform data is
/// used by the Metal shader.
uint32_t active_stages = 0;
/// The primary binding information for the uniform data.
///
/// A maximum of two stages is expected for any given pipeline, such as a vertex and fragment, so
/// the array size is fixed to 2.
BindingInfoData bindings[2];
/// The secondary binding information for the uniform data.
///
/// This is typically a sampler for an image-sampler uniform
BindingInfoData bindings_secondary[2];
_FORCE_INLINE_ constexpr uint32_t get_index_for_stage(RenderingDeviceCommons::ShaderStage p_stage) const {
return STAGE_INDEX[p_stage];
_FORCE_INLINE_ Indexes &get_indexes(IndexType p_type) {
switch (p_type) {
case IndexType::SLOT:
return slot;
case IndexType::ARG:
return arg_buffer;
}
_FORCE_INLINE_ BindingInfoData &get_binding_for_stage(RenderingDeviceCommons::ShaderStage p_stage) {
BindingInfoData &info = bindings[get_index_for_stage(p_stage)];
DEV_ASSERT(info.shader_stage == UINT32_MAX || info.shader_stage == p_stage); // make sure this uniform isn't used in the other stage
info.shader_stage = p_stage;
return info;
}
_FORCE_INLINE_ BindingInfoData &get_secondary_binding_for_stage(RenderingDeviceCommons::ShaderStage p_stage) {
BindingInfoData &info = bindings_secondary[get_index_for_stage(p_stage)];
DEV_ASSERT(info.shader_stage == UINT32_MAX || info.shader_stage == p_stage); // make sure this uniform isn't used in the other stage
info.shader_stage = p_stage;
return info;
}
};
struct SpecializationData {
uint32_t used_stages = 0;
};
HeaderData mtl_reflection_data; // compliment to reflection_data
@@ -269,23 +172,19 @@ private:
private:
const MetalDeviceProfile *device_profile = nullptr;
bool export_mode = false;
MinOsVersion min_os_version;
Vector<UniformData> mtl_reflection_binding_set_uniforms_data; // compliment to reflection_binding_set_uniforms_data
Vector<SpecializationData> mtl_reflection_specialization_data; // compliment to reflection_specialization_data
Error compile_metal_source(const char *p_source, const StageData &p_stage_data, Vector<uint8_t> &r_binary_data);
public:
static constexpr uint32_t FORMAT_VERSION = 1;
static constexpr uint32_t FORMAT_VERSION = 2;
void set_export_mode(bool p_export_mode) { export_mode = p_export_mode; }
void set_device_profile(const MetalDeviceProfile *p_device_profile) { device_profile = p_device_profile; }
void set_min_os_version(const MinOsVersion p_min_os_version) { min_os_version = p_min_os_version; }
struct MetalShaderReflection {
Vector<Vector<UniformData>> uniform_sets;
Vector<SpecializationData> specialization_constants;
};
MetalShaderReflection get_metal_shader_reflection() const;
@@ -294,24 +193,20 @@ protected:
virtual uint32_t _from_bytes_reflection_extra_data(const uint8_t *p_bytes) override;
virtual uint32_t _from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes) override;
virtual uint32_t _from_bytes_reflection_binding_uniform_extra_data(const uint8_t *p_bytes, uint32_t p_index) override;
virtual uint32_t _from_bytes_reflection_specialization_extra_data_start(const uint8_t *p_bytes) override;
virtual uint32_t _from_bytes_reflection_specialization_extra_data(const uint8_t *p_bytes, uint32_t p_index) override;
virtual uint32_t _from_bytes_shader_extra_data_start(const uint8_t *p_bytes) override;
virtual uint32_t _from_bytes_shader_extra_data(const uint8_t *p_bytes, uint32_t p_index) override;
virtual uint32_t _to_bytes_reflection_extra_data(uint8_t *p_bytes) const override;
virtual uint32_t _to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const override;
virtual uint32_t _to_bytes_reflection_specialization_extra_data(uint8_t *p_bytes, uint32_t p_index) const override;
virtual uint32_t _to_bytes_shader_extra_data(uint8_t *p_bytes, uint32_t p_index) const override;
virtual uint32_t _format() const override;
virtual uint32_t _format_version() const override;
virtual bool _set_code_from_spirv(Span<ReflectedShaderStage> p_spirv) override;
virtual bool _set_code_from_spirv(const ReflectShader &p_shader) override;
};
class RenderingShaderContainerFormatMetal : public RenderingShaderContainerFormat {
bool export_mode = false;
MinOsVersion min_os_version;
const MetalDeviceProfile *device_profile = nullptr;
@@ -319,6 +214,6 @@ public:
virtual Ref<RenderingShaderContainer> create_container() const override;
virtual ShaderLanguageVersion get_shader_language_version() const override;
virtual ShaderSpirvVersion get_shader_spirv_version() const override;
RenderingShaderContainerFormatMetal(const MetalDeviceProfile *p_device_profile, bool p_export = false, const MinOsVersion p_min_os_version = MinOsVersion());
RenderingShaderContainerFormatMetal(const MetalDeviceProfile *p_device_profile, bool p_export = false);
virtual ~RenderingShaderContainerFormatMetal() = default;
};

View File

@@ -34,85 +34,16 @@
#import "core/io/file_access.h"
#import "core/io/marshalls.h"
#import "core/templates/fixed_vector.h"
#import "servers/rendering/rendering_device.h"
#include "thirdparty/spirv-reflect/spirv_reflect.h"
#import <Metal/Metal.h>
#import <spirv.hpp>
#import <spirv_msl.hpp>
#import <spirv_parser.hpp>
Mutex MetalDeviceProfile::profiles_lock;
HashMap<uint32_t, MetalDeviceProfile> MetalDeviceProfile::profiles;
const MetalDeviceProfile *MetalDeviceProfile::get_profile(MetalDeviceProfile::Platform p_platform, MetalDeviceProfile::GPU p_gpu) {
DEV_ASSERT(p_platform == Platform::macOS || p_platform == Platform::iOS);
MutexLock lock(profiles_lock);
uint32_t key = (uint32_t)p_platform << 16 | (uint32_t)p_gpu;
if (MetalDeviceProfile *profile = profiles.getptr(key)) {
return profile;
}
MetalDeviceProfile res;
res.platform = p_platform;
res.gpu = p_gpu;
if (p_platform == Platform::macOS) {
res.features.mslVersionMajor = 3;
res.features.mslVersionMinor = 2;
res.features.argument_buffers_tier = ArgumentBuffersTier::Tier2;
res.features.simdPermute = true;
} else if (p_platform == Platform::iOS) {
switch (p_gpu) {
case GPU::Apple1:
case GPU::Apple2:
case GPU::Apple3:
case GPU::Apple4:
case GPU::Apple5: {
res.features.simdPermute = false;
res.features.argument_buffers_tier = ArgumentBuffersTier::Tier1;
} break;
case GPU::Apple6:
case GPU::Apple7:
case GPU::Apple8:
case GPU::Apple9: {
res.features.argument_buffers_tier = ArgumentBuffersTier::Tier2;
res.features.simdPermute = true;
} break;
}
res.features.mslVersionMajor = 3;
res.features.mslVersionMinor = 1;
}
res.update_options();
return &profiles.insert(key, res)->value;
}
void MetalDeviceProfile::update_options() {
options.argument_buffers_tier = features.argument_buffers_tier;
if (OS::get_singleton()->has_environment(U"GODOT_MTL_ARGUMENT_BUFFERS_TIER")) {
uint64_t tier = OS::get_singleton()->get_environment(U"GODOT_MTL_ARGUMENT_BUFFERS_TIER").to_int();
switch (tier) {
case 1:
// All devices support tier 1 argument buffers.
options.argument_buffers_tier = ArgumentBuffersTier::Tier1;
break;
case 2:
if (features.argument_buffers_tier >= ArgumentBuffersTier::Tier2) {
options.argument_buffers_tier = ArgumentBuffersTier::Tier2;
} else {
WARN_PRINT("Current device does not support tier 2 argument buffers, leaving as default.");
}
break;
default:
WARN_PRINT(vformat("Invalid value for GODOT_MTL_ARGUMENT_BUFFER_TIER: %d. Falling back to device default.", tier));
break;
}
}
}
void RenderingShaderContainerMetal::_initialize_toolchain_properties() {
if (compiler_props.is_valid()) {
return;
@@ -126,21 +57,25 @@ void RenderingShaderContainerMetal::_initialize_toolchain_properties() {
case MetalDeviceProfile::Platform::iOS:
sdk = "iphoneos";
break;
case MetalDeviceProfile::Platform::visionOS:
sdk = "xros";
break;
}
Vector<String> parts{ "echo", R"("")", "|", "/usr/bin/xcrun", "-sdk", sdk, "metal", "-E", "-dM", "-x", "metal" };
// Compile metal shaders for the minimum supported target instead of the host machine
if (min_os_version.is_valid()) {
switch (device_profile->platform) {
case MetalDeviceProfile::Platform::macOS: {
parts.push_back("-mmacosx-version-min=" + min_os_version.to_compiler_os_version());
parts.push_back("-mtargetos=macos" + device_profile->min_os_version.to_compiler_os_version());
break;
}
case MetalDeviceProfile::Platform::iOS: {
parts.push_back("-mios-version-min=" + min_os_version.to_compiler_os_version());
parts.push_back("-mtargetos=ios" + device_profile->min_os_version.to_compiler_os_version());
break;
}
case MetalDeviceProfile::Platform::visionOS: {
parts.push_back("-mtargetos=xros" + device_profile->min_os_version.to_compiler_os_version());
break;
}
}
@@ -205,6 +140,9 @@ Error RenderingShaderContainerMetal::compile_metal_source(const char *p_source,
case MetalDeviceProfile::Platform::iOS:
sdk = "iphoneos";
break;
case MetalDeviceProfile::Platform::visionOS:
sdk = "xros";
break;
}
// Build the .metallib binary.
@@ -212,19 +150,19 @@ Error RenderingShaderContainerMetal::compile_metal_source(const char *p_source,
List<String> args{ "-sdk", sdk, "metal", "-O3" };
// Compile metal shaders for the minimum supported target instead of the host machine.
if (min_os_version.is_valid()) {
switch (device_profile->platform) {
case MetalDeviceProfile::Platform::macOS: {
args.push_back("-mmacosx-version-min=" + min_os_version.to_compiler_os_version());
args.push_back("-mtargetos=macos" + device_profile->min_os_version.to_compiler_os_version());
break;
}
case MetalDeviceProfile::Platform::iOS: {
args.push_back("-mios-version-min=" + min_os_version.to_compiler_os_version());
args.push_back("-mtargetos=ios" + device_profile->min_os_version.to_compiler_os_version());
break;
}
case MetalDeviceProfile::Platform::visionOS: {
args.push_back("-mtargetos=xros" + device_profile->min_os_version.to_compiler_os_version());
break;
}
} else {
WARN_PRINT_ONCE(vformat("Minimum target OS version is not set, so baking shaders for Metal will target the default version of your toolchain: %s", compiler_props.os_version_min_required.to_compiler_os_version()));
}
if (p_stage_data.is_position_invariant) {
@@ -279,11 +217,32 @@ Error RenderingShaderContainerMetal::compile_metal_source(const char *p_source,
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunguarded-availability"
bool RenderingShaderContainerMetal::_set_code_from_spirv(Span<ReflectedShaderStage> p_spirv) {
static spv::ExecutionModel SHADER_STAGE_REMAP[RDD::SHADER_STAGE_MAX] = {
[RDD::SHADER_STAGE_VERTEX] = spv::ExecutionModelVertex,
[RDD::SHADER_STAGE_FRAGMENT] = spv::ExecutionModelFragment,
[RDD::SHADER_STAGE_TESSELATION_CONTROL] = spv::ExecutionModelTessellationControl,
[RDD::SHADER_STAGE_TESSELATION_EVALUATION] = spv::ExecutionModelTessellationEvaluation,
[RDD::SHADER_STAGE_COMPUTE] = spv::ExecutionModelGLCompute,
};
spv::ExecutionModel get_stage(uint32_t p_stages_mask, RDD::ShaderStage p_stage) {
if (p_stages_mask & (1 << p_stage)) {
return SHADER_STAGE_REMAP[p_stage];
}
return spv::ExecutionModel::ExecutionModelMax;
}
spv::ExecutionModel map_stage(RDD::ShaderStage p_stage) {
return SHADER_STAGE_REMAP[p_stage];
}
bool RenderingShaderContainerMetal::_set_code_from_spirv(const ReflectShader &p_shader) {
using namespace spirv_cross;
using spirv_cross::CompilerMSL;
using spirv_cross::Resource;
const LocalVector<ReflectShaderStage> &p_spirv = p_shader.shader_stages;
if (export_mode) {
_initialize_toolchain_properties();
}
@@ -292,26 +251,10 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(Span<ReflectedShaderSta
shaders.resize(p_spirv.size());
mtl_shaders.resize(p_spirv.size());
mtl_reflection_binding_set_uniforms_data.resize(reflection_binding_set_uniforms_data.size());
mtl_reflection_specialization_data.resize(reflection_specialization_data.size());
mtl_reflection_data.set_needs_view_mask_buffer(reflection_data.has_multiview);
mtl_reflection_data.profile = *device_profile;
// set_indexes will contain the starting offsets of each descriptor set in the binding set uniforms data
// including the last one, which is the size of reflection_binding_set_uniforms_count.
LocalVector<uint32_t> set_indexes;
uint32_t set_indexes_size = reflection_binding_set_uniforms_count.size() + 1;
{
// calculate the starting offsets of each descriptor set in the binding set uniforms data
uint32_t size = reflection_binding_set_uniforms_count.size();
set_indexes.resize(set_indexes_size);
uint32_t offset = 0;
for (uint32_t i = 0; i < size; i++) {
set_indexes[i] = offset;
offset += reflection_binding_set_uniforms_count.get(i);
}
set_indexes[set_indexes_size - 1] = offset;
}
CompilerMSL::Options msl_options{};
// Determine Metal language version.
@@ -322,7 +265,7 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(Span<ReflectedShaderSta
msl_version = compiler_props.metal_version;
mtl_reflection_data.os_min_version = compiler_props.os_version_min_required;
} else {
msl_version = make_msl_version(device_profile->features.mslVersionMajor, device_profile->features.mslVersionMinor);
msl_version = device_profile->features.msl_version;
mtl_reflection_data.os_min_version = MinOsVersion();
}
uint32_t msl_ver_maj = 0;
@@ -339,9 +282,12 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(Span<ReflectedShaderSta
msl_options.ios_support_base_vertex_instance = true;
}
// We don't currently allow argument buffers when using dynamic buffers as
// the current implementation does not update the argument buffer each time
// the dynamic buffer changes. This is a future TODO.
bool argument_buffers_allowed = get_shader_reflection().has_dynamic_buffers == false;
if (device_profile->options.argument_buffers_tier >= MetalDeviceProfile::ArgumentBuffersTier::Tier2 && argument_buffers_allowed) {
if (device_profile->features.use_argument_buffers && argument_buffers_allowed) {
msl_options.argument_buffers_tier = CompilerMSL::Options::ArgumentBuffersTier::Tier2;
msl_options.argument_buffers = true;
mtl_reflection_data.set_uses_argument_buffers(true);
@@ -352,8 +298,7 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(Span<ReflectedShaderSta
mtl_reflection_data.set_uses_argument_buffers(false);
}
msl_options.force_active_argument_buffer_resources = true;
// We can't use this, as we have to add the descriptor sets via compiler.add_msl_resource_binding.
// msl_options.pad_argument_buffer_resources = true;
msl_options.pad_argument_buffer_resources = true;
msl_options.texture_buffer_native = true; // Enable texture buffer support.
msl_options.use_framebuffer_fetch_subpasses = false;
msl_options.pad_fragment_output_components = true;
@@ -364,7 +309,7 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(Span<ReflectedShaderSta
msl_options.multiview_layered_rendering = true;
msl_options.view_mask_buffer_index = VIEW_MASK_BUFFER_INDEX;
}
if (msl_version >= make_msl_version(3, 2)) {
if (msl_version >= MSL_VERSION_32) {
// All 3.2+ versions support device coherence, so we can disable texture fences.
msl_options.readwrite_texture_fences = false;
}
@@ -375,11 +320,234 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(Span<ReflectedShaderSta
options.emit_line_directives = true;
#endif
// Assign MSL bindings for all the descriptor sets.
typedef std::pair<MSLResourceBinding, uint32_t> MSLBindingInfo;
LocalVector<MSLBindingInfo> spirv_bindings;
MSLResourceBinding push_constant_resource_binding;
{
enum IndexType {
Texture,
Buffer,
Sampler,
Max,
};
uint32_t dset_count = p_shader.uniform_sets.size();
uint32_t size = reflection_binding_set_uniforms_data.size();
spirv_bindings.resize(size);
uint32_t indices[IndexType::Max] = { 0 };
auto next_index = [&indices](IndexType p_t, uint32_t p_stride) -> uint32_t {
uint32_t v = indices[p_t];
indices[p_t] += p_stride;
return v;
};
uint32_t idx_dset = 0;
MSLBindingInfo *iter = spirv_bindings.ptr();
UniformData *found = mtl_reflection_binding_set_uniforms_data.ptrw();
UniformData::IndexType shader_index_type = msl_options.argument_buffers ? UniformData::IndexType::ARG : UniformData::IndexType::SLOT;
for (const ReflectDescriptorSet &dset : p_shader.uniform_sets) {
// Reset the index count for each descriptor set, as this is an index in to the argument table.
uint32_t next_arg_buffer_index = 0;
auto next_arg_index = [&next_arg_buffer_index](uint32_t p_stride) -> uint32_t {
uint32_t v = next_arg_buffer_index;
next_arg_buffer_index += p_stride;
return v;
};
for (const ReflectUniform &uniform : dset) {
const SpvReflectDescriptorBinding &binding = uniform.get_spv_reflect();
found->active_stages = uniform.stages;
RD::UniformType type = RD::UniformType(uniform.type);
uint32_t binding_stride = 1; // If this is an array, stride will be the length of the array.
if (uniform.length > 1) {
switch (type) {
case RDC::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC:
case RDC::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC:
case RDC::UNIFORM_TYPE_UNIFORM_BUFFER:
case RDC::UNIFORM_TYPE_STORAGE_BUFFER:
// Buffers's length is its size, in bytes, so there is no stride.
break;
default: {
binding_stride = uniform.length;
found->array_length = uniform.length;
} break;
}
}
// Determine access type.
switch (binding.descriptor_type) {
case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_IMAGE: {
if (!(binding.decoration_flags & SPV_REFLECT_DECORATION_NON_WRITABLE)) {
if (!(binding.decoration_flags & SPV_REFLECT_DECORATION_NON_READABLE)) {
found->access = MTLBindingAccessReadWrite;
} else {
found->access = MTLBindingAccessWriteOnly;
}
}
} break;
case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
if (!(binding.decoration_flags & SPV_REFLECT_DECORATION_NON_WRITABLE) && !(binding.block.decoration_flags & SPV_REFLECT_DECORATION_NON_WRITABLE)) {
if (!(binding.decoration_flags & SPV_REFLECT_DECORATION_NON_READABLE) && !(binding.block.decoration_flags & SPV_REFLECT_DECORATION_NON_READABLE)) {
found->access = MTLBindingAccessReadWrite;
} else {
found->access = MTLBindingAccessWriteOnly;
}
}
} break;
default:
break;
}
switch (found->access) {
case MTLBindingAccessReadOnly:
found->usage = MTLResourceUsageRead;
break;
case MTLBindingAccessWriteOnly:
found->usage = MTLResourceUsageWrite;
break;
case MTLBindingAccessReadWrite:
found->usage = MTLResourceUsageRead | MTLResourceUsageWrite;
break;
}
iter->second = uniform.stages;
MSLResourceBinding &rb = iter->first;
rb.desc_set = idx_dset;
rb.binding = uniform.binding;
rb.count = binding_stride;
switch (type) {
case RDC::UNIFORM_TYPE_SAMPLER: {
found->data_type = MTLDataTypeSampler;
found->get_indexes(UniformData::IndexType::SLOT).sampler = next_index(Sampler, binding_stride);
found->get_indexes(UniformData::IndexType::ARG).sampler = next_arg_index(binding_stride);
rb.basetype = SPIRType::BaseType::Sampler;
} break;
case RDC::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE:
case RDC::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: {
found->data_type = MTLDataTypeTexture;
found->get_indexes(UniformData::IndexType::SLOT).texture = next_index(Texture, binding_stride);
found->get_indexes(UniformData::IndexType::SLOT).sampler = next_index(Sampler, binding_stride);
found->get_indexes(UniformData::IndexType::ARG).texture = next_arg_index(binding_stride);
found->get_indexes(UniformData::IndexType::ARG).sampler = next_arg_index(binding_stride);
rb.basetype = SPIRType::BaseType::SampledImage;
} break;
case RDC::UNIFORM_TYPE_TEXTURE:
case RDC::UNIFORM_TYPE_IMAGE:
case RDC::UNIFORM_TYPE_TEXTURE_BUFFER: {
found->data_type = MTLDataTypeTexture;
found->get_indexes(UniformData::IndexType::SLOT).texture = next_index(Texture, binding_stride);
found->get_indexes(UniformData::IndexType::ARG).texture = next_arg_index(binding_stride);
rb.basetype = SPIRType::BaseType::Image;
} break;
case RDC::UNIFORM_TYPE_IMAGE_BUFFER:
CRASH_NOW_MSG("Unimplemented!"); // TODO.
break;
case RDC::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC:
case RDC::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC:
case RDC::UNIFORM_TYPE_UNIFORM_BUFFER:
case RDC::UNIFORM_TYPE_STORAGE_BUFFER: {
found->data_type = MTLDataTypePointer;
found->get_indexes(UniformData::IndexType::SLOT).buffer = next_index(Buffer, binding_stride);
found->get_indexes(UniformData::IndexType::ARG).buffer = next_arg_index(binding_stride);
rb.basetype = SPIRType::BaseType::Void;
} break;
case RDC::UNIFORM_TYPE_INPUT_ATTACHMENT: {
found->data_type = MTLDataTypeTexture;
found->get_indexes(UniformData::IndexType::SLOT).texture = next_index(Texture, binding_stride);
found->get_indexes(UniformData::IndexType::ARG).texture = next_arg_index(binding_stride);
rb.basetype = SPIRType::BaseType::Image;
} break;
case RDC::UNIFORM_TYPE_MAX:
default:
CRASH_NOW_MSG("Unreachable");
}
// Specify the MSL resource bindings based on how the binding mode used by the shader.
rb.msl_buffer = found->get_indexes(shader_index_type).buffer;
rb.msl_texture = found->get_indexes(shader_index_type).texture;
rb.msl_sampler = found->get_indexes(shader_index_type).sampler;
if (found->data_type == MTLDataTypeTexture) {
const SpvReflectImageTraits &image = uniform.get_spv_reflect().image;
switch (image.dim) {
case SpvDim1D: {
if (image.arrayed) {
found->texture_type = MTLTextureType1DArray;
} else {
found->texture_type = MTLTextureType1D;
}
} break;
case SpvDimSubpassData:
case SpvDim2D: {
if (image.arrayed && image.ms) {
found->texture_type = MTLTextureType2DMultisampleArray;
} else if (image.arrayed) {
found->texture_type = MTLTextureType2DArray;
} else if (image.ms) {
found->texture_type = MTLTextureType2DMultisample;
} else {
found->texture_type = MTLTextureType2D;
}
} break;
case SpvDim3D: {
found->texture_type = MTLTextureType3D;
} break;
case SpvDimCube: {
if (image.arrayed) {
found->texture_type = MTLTextureTypeCubeArray;
} else {
found->texture_type = MTLTextureTypeCube;
}
} break;
case SpvDimRect: {
// Ignored.
} break;
case SpvDimBuffer: {
found->texture_type = MTLTextureTypeTextureBuffer;
} break;
case SpvDimTileImageDataEXT: {
// Godot does not use this extension.
// See: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_EXT_shader_tile_image.html
} break;
case SpvDimMax: {
// Add all enumerations to silence the compiler warning
// and generate future warnings, should a new one be added.
} break;
}
}
iter++;
found++;
}
idx_dset++;
}
if (reflection_data.push_constant_size > 0) {
push_constant_resource_binding.desc_set = ResourceBindingPushConstantDescriptorSet;
push_constant_resource_binding.basetype = SPIRType::BaseType::Void;
if (msl_options.argument_buffers) {
push_constant_resource_binding.msl_buffer = dset_count;
} else {
push_constant_resource_binding.msl_buffer = next_index(Buffer, 1);
}
mtl_reflection_data.push_constant_binding = push_constant_resource_binding.msl_buffer;
}
}
for (uint32_t i = 0; i < p_spirv.size(); i++) {
StageData &stage_data = mtl_shaders.write[i];
const ReflectedShaderStage &v = p_spirv[i];
const ReflectShaderStage &v = p_spirv[i];
RD::ShaderStage stage = v.shader_stage;
char const *stage_name = RD::SHADER_STAGE_NAMES[stage];
Span<uint32_t> spirv = v.spirv();
Parser parser(spirv.ptr(), spirv.size());
try {
@@ -392,6 +560,18 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(Span<ReflectedShaderSta
compiler.set_msl_options(msl_options);
compiler.set_common_options(options);
spv::ExecutionModel execution_model = map_stage(stage);
for (uint32_t jj = 0; jj < spirv_bindings.size(); jj++) {
MSLResourceBinding &rb = spirv_bindings.ptr()[jj].first;
rb.stage = execution_model;
compiler.add_msl_resource_binding(rb);
}
if (push_constant_resource_binding.desc_set == ResourceBindingPushConstantDescriptorSet) {
push_constant_resource_binding.stage = execution_model;
compiler.add_msl_resource_binding(push_constant_resource_binding);
}
std::unordered_set<VariableID> active = compiler.get_active_interface_variables();
ShaderResources resources = compiler.get_shader_resources();
@@ -408,275 +588,12 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(Span<ReflectedShaderSta
EntryPoint &entry_point_stage = entry_pts_stages.front();
SPIREntryPoint &entry_point = compiler.get_entry_point(entry_point_stage.name, entry_point_stage.execution_model);
// Process specialization constants.
if (!compiler.get_specialization_constants().empty()) {
uint32_t size = reflection_specialization_data.size();
for (SpecializationConstant const &constant : compiler.get_specialization_constants()) {
uint32_t j = 0;
while (j < size) {
const ReflectionSpecializationData &res = reflection_specialization_data.ptr()[j];
if (res.constant_id == constant.constant_id) {
mtl_reflection_specialization_data.ptrw()[j].used_stages |= 1 << stage;
// emulate labeled for loop and continue
goto outer_continue;
}
++j;
}
if (j == size) {
WARN_PRINT(String(stage_name) + ": unable to find constant_id: " + itos(constant.constant_id));
}
outer_continue:;
}
}
// Process bindings.
uint32_t uniform_sets_size = reflection_binding_set_uniforms_count.size();
using BT = SPIRType::BaseType;
// Always clearer than a boolean.
enum class Writable {
No,
Maybe,
};
// Returns a std::optional containing the value of the
// decoration, if it exists.
auto get_decoration = [&compiler](spirv_cross::ID id, spv::Decoration decoration) {
uint32_t res = -1;
if (compiler.has_decoration(id, decoration)) {
res = compiler.get_decoration(id, decoration);
}
return res;
};
auto descriptor_bindings = [&compiler, &active, this, &set_indexes, uniform_sets_size, stage, &get_decoration](SmallVector<Resource> &p_resources, Writable p_writable) {
for (Resource const &res : p_resources) {
uint32_t dset = get_decoration(res.id, spv::DecorationDescriptorSet);
uint32_t dbin = get_decoration(res.id, spv::DecorationBinding);
UniformData *found = nullptr;
if (dset != (uint32_t)-1 && dbin != (uint32_t)-1 && dset < uniform_sets_size) {
uint32_t begin = set_indexes[dset];
uint32_t end = set_indexes[dset + 1];
for (uint32_t j = begin; j < end; j++) {
const ReflectionBindingData &ref_bind = reflection_binding_set_uniforms_data[j];
if (dbin == ref_bind.binding) {
found = &mtl_reflection_binding_set_uniforms_data.write[j];
for (auto ext : compiler.get_declared_extensions()) {
if (ext == "SPV_KHR_non_semantic_info" || ext == "SPV_KHR_printf") {
mtl_reflection_data.set_needs_debug_logging(true);
break;
}
}
}
ERR_FAIL_NULL_V_MSG(found, ERR_CANT_CREATE, "UniformData not found");
bool is_active = active.find(res.id) != active.end();
if (is_active) {
found->active_stages |= 1 << stage;
}
BindingInfoData &primary = found->get_binding_for_stage(stage);
SPIRType const &a_type = compiler.get_type(res.type_id);
BT basetype = a_type.basetype;
switch (basetype) {
case BT::Struct: {
primary.data_type = MTLDataTypePointer;
} break;
case BT::Image:
case BT::SampledImage: {
primary.data_type = MTLDataTypeTexture;
} break;
case BT::Sampler: {
primary.data_type = MTLDataTypeSampler;
primary.array_length = 1;
for (uint32_t const &a : a_type.array) {
primary.array_length *= a;
}
} break;
default: {
ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Unexpected BaseType");
} break;
}
// Find array length of image.
if (basetype == BT::Image || basetype == BT::SampledImage) {
primary.array_length = 1;
for (uint32_t const &a : a_type.array) {
primary.array_length *= a;
}
primary.is_multisampled = a_type.image.ms;
SPIRType::ImageType const &image = a_type.image;
primary.image_format = image.format;
switch (image.dim) {
case spv::Dim1D: {
if (image.arrayed) {
primary.texture_type = MTLTextureType1DArray;
} else {
primary.texture_type = MTLTextureType1D;
}
} break;
case spv::DimSubpassData: {
[[fallthrough]];
}
case spv::Dim2D: {
if (image.arrayed && image.ms) {
primary.texture_type = MTLTextureType2DMultisampleArray;
} else if (image.arrayed) {
primary.texture_type = MTLTextureType2DArray;
} else if (image.ms) {
primary.texture_type = MTLTextureType2DMultisample;
} else {
primary.texture_type = MTLTextureType2D;
}
} break;
case spv::Dim3D: {
primary.texture_type = MTLTextureType3D;
} break;
case spv::DimCube: {
if (image.arrayed) {
primary.texture_type = MTLTextureTypeCube;
}
} break;
case spv::DimRect: {
} break;
case spv::DimBuffer: {
// VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER
primary.texture_type = MTLTextureTypeTextureBuffer;
} break;
case spv::DimTileImageDataEXT: {
// Godot does not use this extension.
// See: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_EXT_shader_tile_image.html
} break;
case spv::DimMax: {
// Add all enumerations to silence the compiler warning
// and generate future warnings, should a new one be added.
} break;
}
}
// Update writable.
if (p_writable == Writable::Maybe) {
if (basetype == BT::Struct) {
Bitset flags = compiler.get_buffer_block_flags(res.id);
if (!flags.get(spv::DecorationNonWritable)) {
if (flags.get(spv::DecorationNonReadable)) {
primary.access = MTLBindingAccessWriteOnly;
} else {
primary.access = MTLBindingAccessReadWrite;
}
}
} else if (basetype == BT::Image) {
switch (a_type.image.access) {
case spv::AccessQualifierWriteOnly:
primary.access = MTLBindingAccessWriteOnly;
break;
case spv::AccessQualifierReadWrite:
primary.access = MTLBindingAccessReadWrite;
break;
case spv::AccessQualifierReadOnly:
break;
case spv::AccessQualifierMax:
[[fallthrough]];
default:
if (!compiler.has_decoration(res.id, spv::DecorationNonWritable)) {
if (compiler.has_decoration(res.id, spv::DecorationNonReadable)) {
primary.access = MTLBindingAccessWriteOnly;
} else {
primary.access = MTLBindingAccessReadWrite;
}
}
break;
}
}
}
switch (primary.access) {
case MTLBindingAccessReadOnly:
primary.usage = MTLResourceUsageRead;
break;
case MTLBindingAccessWriteOnly:
primary.usage = MTLResourceUsageWrite;
break;
case MTLBindingAccessReadWrite:
primary.usage = MTLResourceUsageRead | MTLResourceUsageWrite;
break;
}
primary.index = compiler.get_automatic_msl_resource_binding(res.id);
// A sampled image contains two bindings, the primary
// is to the image, and the secondary is to the associated sampler.
if (basetype == BT::SampledImage) {
uint32_t binding = compiler.get_automatic_msl_resource_binding_secondary(res.id);
if (binding != (uint32_t)-1) {
BindingInfoData &secondary = found->get_secondary_binding_for_stage(stage);
secondary.data_type = MTLDataTypeSampler;
secondary.index = binding;
secondary.access = MTLBindingAccessReadOnly;
}
}
// An image may have a secondary binding if it is used
// for atomic operations.
if (basetype == BT::Image) {
uint32_t binding = compiler.get_automatic_msl_resource_binding_secondary(res.id);
if (binding != (uint32_t)-1) {
BindingInfoData &secondary = found->get_secondary_binding_for_stage(stage);
secondary.data_type = MTLDataTypePointer;
secondary.index = binding;
secondary.access = MTLBindingAccessReadWrite;
}
}
}
return Error::OK;
};
if (!resources.uniform_buffers.empty()) {
Error err = descriptor_bindings(resources.uniform_buffers, Writable::No);
ERR_FAIL_COND_V(err != OK, false);
}
if (!resources.storage_buffers.empty()) {
Error err = descriptor_bindings(resources.storage_buffers, Writable::Maybe);
ERR_FAIL_COND_V(err != OK, false);
}
if (!resources.storage_images.empty()) {
Error err = descriptor_bindings(resources.storage_images, Writable::Maybe);
ERR_FAIL_COND_V(err != OK, false);
}
if (!resources.sampled_images.empty()) {
Error err = descriptor_bindings(resources.sampled_images, Writable::No);
ERR_FAIL_COND_V(err != OK, false);
}
if (!resources.separate_images.empty()) {
Error err = descriptor_bindings(resources.separate_images, Writable::No);
ERR_FAIL_COND_V(err != OK, false);
}
if (!resources.separate_samplers.empty()) {
Error err = descriptor_bindings(resources.separate_samplers, Writable::No);
ERR_FAIL_COND_V(err != OK, false);
}
if (!resources.subpass_inputs.empty()) {
Error err = descriptor_bindings(resources.subpass_inputs, Writable::No);
ERR_FAIL_COND_V(err != OK, false);
}
if (!resources.push_constant_buffers.empty()) {
for (Resource const &res : resources.push_constant_buffers) {
uint32_t binding = compiler.get_automatic_msl_resource_binding(res.id);
if (binding != (uint32_t)-1) {
stage_data.push_constant_binding = binding;
}
}
}
ERR_FAIL_COND_V_MSG(!resources.atomic_counters.empty(), false, "Atomic counters not supported");
ERR_FAIL_COND_V_MSG(!resources.acceleration_structures.empty(), false, "Acceleration structures not supported");
ERR_FAIL_COND_V_MSG(!resources.shader_record_buffers.empty(), false, "Shader record buffers not supported");
if (!resources.stage_inputs.empty()) {
for (Resource const &res : resources.stage_inputs) {
@@ -744,13 +661,6 @@ uint32_t RenderingShaderContainerMetal::_to_bytes_reflection_binding_uniform_ext
return sizeof(UniformData);
}
uint32_t RenderingShaderContainerMetal::_to_bytes_reflection_specialization_extra_data(uint8_t *p_bytes, uint32_t p_index) const {
if (p_bytes != nullptr) {
*(SpecializationData *)p_bytes = mtl_reflection_specialization_data[p_index];
}
return sizeof(SpecializationData);
}
uint32_t RenderingShaderContainerMetal::_to_bytes_shader_extra_data(uint8_t *p_bytes, uint32_t p_index) const {
if (p_bytes != nullptr) {
*(StageData *)p_bytes = mtl_shaders[p_index];
@@ -773,16 +683,6 @@ uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_binding_uniform_e
return sizeof(UniformData);
}
uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_specialization_extra_data_start(const uint8_t *p_bytes) {
mtl_reflection_specialization_data.resize(reflection_specialization_data.size());
return 0;
}
uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_specialization_extra_data(const uint8_t *p_bytes, uint32_t p_index) {
mtl_reflection_specialization_data.ptrw()[p_index] = *(SpecializationData *)p_bytes;
return sizeof(SpecializationData);
}
uint32_t RenderingShaderContainerMetal::_from_bytes_shader_extra_data_start(const uint8_t *p_bytes) {
mtl_shaders.resize(shaders.size());
return 0;
@@ -796,7 +696,6 @@ uint32_t RenderingShaderContainerMetal::_from_bytes_shader_extra_data(const uint
RenderingShaderContainerMetal::MetalShaderReflection RenderingShaderContainerMetal::get_metal_shader_reflection() const {
MetalShaderReflection res;
res.specialization_constants = mtl_reflection_specialization_data;
uint32_t uniform_set_count = reflection_binding_set_uniforms_count.size();
uint32_t start = 0;
res.uniform_sets.resize(uniform_set_count);
@@ -824,7 +723,6 @@ Ref<RenderingShaderContainer> RenderingShaderContainerFormatMetal::create_contai
result.instantiate();
result->set_export_mode(export_mode);
result->set_device_profile(device_profile);
result->set_min_os_version(min_os_version);
return result;
}
@@ -836,8 +734,8 @@ RenderingDeviceCommons::ShaderSpirvVersion RenderingShaderContainerFormatMetal::
return SHADER_SPIRV_VERSION_1_6;
}
RenderingShaderContainerFormatMetal::RenderingShaderContainerFormatMetal(const MetalDeviceProfile *p_device_profile, bool p_export, const MinOsVersion p_min_os_version) :
export_mode(p_export), min_os_version(p_min_os_version), device_profile(p_device_profile) {
RenderingShaderContainerFormatMetal::RenderingShaderContainerFormatMetal(const MetalDeviceProfile *p_device_profile, bool p_export) :
export_mode(p_export), device_profile(p_device_profile) {
}
String MinOsVersion::to_compiler_os_version() const {

View File

@@ -34,6 +34,7 @@
#import <simd/simd.h>
#import <zlib.h>
#include "core/templates/hashfuncs.h"
#include "core/templates/local_vector.h"
struct SHA256Digest {
@@ -73,3 +74,10 @@ struct SHA256Digest {
return SHA256Digest((const char *)p_ser.ptr());
}
};
template <>
struct HashMapComparatorDefault<SHA256Digest> {
static bool compare(const SHA256Digest &p_lhs, const SHA256Digest &p_rhs) {
return memcmp(p_lhs.data, p_rhs.data, CC_SHA256_DIGEST_LENGTH) == 0;
}
};

View File

@@ -44,7 +44,9 @@ uint32_t RenderingShaderContainerVulkan::_format_version() const {
return FORMAT_VERSION;
}
bool RenderingShaderContainerVulkan::_set_code_from_spirv(Span<ReflectedShaderStage> p_spirv) {
bool RenderingShaderContainerVulkan::_set_code_from_spirv(const ReflectShader &p_shader) {
const LocalVector<ReflectShaderStage> &p_spirv = p_shader.shader_stages;
PackedByteArray code_bytes;
shaders.resize(p_spirv.size());
for (uint64_t i = 0; i < p_spirv.size(); i++) {

View File

@@ -47,7 +47,7 @@ public:
protected:
virtual uint32_t _format() const override;
virtual uint32_t _format_version() const override;
virtual bool _set_code_from_spirv(Span<ReflectedShaderStage> p_spirv) override;
virtual bool _set_code_from_spirv(const ReflectShader &p_shader) override;
public:
RenderingShaderContainerVulkan(bool p_debug_info_enabled);

View File

@@ -35,19 +35,23 @@
RenderingShaderContainerFormat *ShaderBakerExportPluginPlatformMetal::create_shader_container_format(const Ref<EditorExportPlatform> &p_platform, const Ref<EditorExportPreset> &p_preset) {
const String &os_name = p_platform->get_os_name();
const MetalDeviceProfile *profile;
String min_os_version;
MinOsVersion min_os_version;
if (os_name == U"macOS") {
profile = MetalDeviceProfile::get_profile(MetalDeviceProfile::Platform::macOS, MetalDeviceProfile::GPU::Apple7);
min_os_version = (String)p_preset->get("application/min_macos_version_arm64");
// Godot metal doesn't support x86_64 mac so no need to worry about that version
min_os_version = p_preset->get("application/min_macos_version_arm64");
profile = MetalDeviceProfile::get_profile(MetalDeviceProfile::Platform::macOS, MetalDeviceProfile::GPU::Apple7, min_os_version);
} else if (os_name == U"iOS") {
profile = MetalDeviceProfile::get_profile(MetalDeviceProfile::Platform::iOS, MetalDeviceProfile::GPU::Apple7);
min_os_version = p_preset->get("application/min_ios_version");
min_os_version = (String)p_preset->get("application/min_ios_version");
profile = MetalDeviceProfile::get_profile(MetalDeviceProfile::Platform::iOS, MetalDeviceProfile::GPU::Apple7, min_os_version);
} else if (os_name == U"visionOS") {
min_os_version = (String)p_preset->get("application/min_visionos_version");
profile = MetalDeviceProfile::get_profile(MetalDeviceProfile::Platform::visionOS, MetalDeviceProfile::GPU::Apple8, min_os_version);
} else {
ERR_FAIL_V_MSG(nullptr, vformat("Unsupported platform: %s", os_name));
}
return memnew(RenderingShaderContainerFormatMetal(profile, true, min_os_version));
ERR_FAIL_NULL_V(profile, nullptr);
return memnew(RenderingShaderContainerFormatMetal(profile, true));
}
bool ShaderBakerExportPluginPlatformMetal::matches_driver(const String &p_driver) {

View File

@@ -43,20 +43,35 @@ static inline uint32_t aligned_to(uint32_t p_size, uint32_t p_alignment) {
}
}
RenderingShaderContainer::ReflectedShaderStage::ReflectedShaderStage() :
_module(memnew(SpvReflectShaderModule)) {
template <class T>
const T &RenderingShaderContainer::ReflectSymbol<T>::get_spv_reflect(RDC::ShaderStage p_stage) const {
const T *info = _spv_reflect[get_index_for_stage(p_stage)];
DEV_ASSERT(info != nullptr); // Caller is expected to specify valid shader stages
return *info;
}
RenderingShaderContainer::ReflectedShaderStage::~ReflectedShaderStage() {
template <class T>
void RenderingShaderContainer::ReflectSymbol<T>::set_spv_reflect(RDC::ShaderStage p_stage, const T *p_spv) {
stages.set_flag(1 << p_stage);
_spv_reflect[get_index_for_stage(p_stage)] = p_spv;
}
RenderingShaderContainer::ReflectShaderStage::ReflectShaderStage() {
_module = memnew(SpvReflectShaderModule);
memset(_module, 0, sizeof(SpvReflectShaderModule));
}
RenderingShaderContainer::ReflectShaderStage::~ReflectShaderStage() {
spvReflectDestroyShaderModule(_module);
memdelete(_module);
_module = nullptr;
}
const SpvReflectShaderModule &RenderingShaderContainer::ReflectedShaderStage::module() const {
const SpvReflectShaderModule &RenderingShaderContainer::ReflectShaderStage::module() const {
return *_module;
}
const Span<uint32_t> RenderingShaderContainer::ReflectedShaderStage::spirv() const {
const Span<uint32_t> RenderingShaderContainer::ReflectShaderStage::spirv() const {
return _spirv_data.span().reinterpret<uint32_t>();
}
@@ -120,34 +135,128 @@ uint32_t RenderingShaderContainer::_to_bytes_footer_extra_data(uint8_t *) const
return 0;
}
void RenderingShaderContainer::_set_from_shader_reflection_post(const RenderingDeviceCommons::ShaderReflection &p_reflection) {
void RenderingShaderContainer::_set_from_shader_reflection_post(const ReflectShader &p_shader) {
// Do nothing.
}
Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span<RenderingDeviceCommons::ShaderStageSPIRVData> p_spirv, LocalVector<ReflectedShaderStage> &r_refl) {
static RenderingDeviceCommons::DataFormat spv_image_format_to_data_format(const SpvImageFormat p_format) {
using RDC = RenderingDeviceCommons;
RDC::ShaderReflection reflection;
switch (p_format) {
case SpvImageFormatUnknown:
return RDC::DATA_FORMAT_MAX;
case SpvImageFormatRgba32f:
return RDC::DATA_FORMAT_R32G32B32A32_SFLOAT;
case SpvImageFormatRgba16f:
return RDC::DATA_FORMAT_R16G16B16A16_SFLOAT;
case SpvImageFormatR32f:
return RDC::DATA_FORMAT_R32_SFLOAT;
case SpvImageFormatRgba8:
return RDC::DATA_FORMAT_R8G8B8A8_UNORM;
case SpvImageFormatRgba8Snorm:
return RDC::DATA_FORMAT_R8G8B8A8_SNORM;
case SpvImageFormatRg32f:
return RDC::DATA_FORMAT_R32G32_SFLOAT;
case SpvImageFormatRg16f:
return RDC::DATA_FORMAT_R16G16_SFLOAT;
case SpvImageFormatR11fG11fB10f:
return RDC::DATA_FORMAT_B10G11R11_UFLOAT_PACK32;
case SpvImageFormatR16f:
return RDC::DATA_FORMAT_R16_SFLOAT;
case SpvImageFormatRgba16:
return RDC::DATA_FORMAT_R16G16B16A16_UNORM;
case SpvImageFormatRgb10A2:
return RDC::DATA_FORMAT_A2B10G10R10_UNORM_PACK32;
case SpvImageFormatRg16:
return RDC::DATA_FORMAT_R16G16_UNORM;
case SpvImageFormatRg8:
return RDC::DATA_FORMAT_R8G8_UNORM;
case SpvImageFormatR16:
return RDC::DATA_FORMAT_R16_UNORM;
case SpvImageFormatR8:
return RDC::DATA_FORMAT_R8_UNORM;
case SpvImageFormatRgba16Snorm:
return RDC::DATA_FORMAT_R16G16B16A16_SNORM;
case SpvImageFormatRg16Snorm:
return RDC::DATA_FORMAT_R16G16_SNORM;
case SpvImageFormatRg8Snorm:
return RDC::DATA_FORMAT_R8G8_SNORM;
case SpvImageFormatR16Snorm:
return RDC::DATA_FORMAT_R16_SNORM;
case SpvImageFormatR8Snorm:
return RDC::DATA_FORMAT_R8_SNORM;
case SpvImageFormatRgba32i:
return RDC::DATA_FORMAT_R32G32B32A32_SINT;
case SpvImageFormatRgba16i:
return RDC::DATA_FORMAT_R16G16B16A16_SINT;
case SpvImageFormatRgba8i:
return RDC::DATA_FORMAT_R8G8B8A8_SINT;
case SpvImageFormatR32i:
return RDC::DATA_FORMAT_R32_SINT;
case SpvImageFormatRg32i:
return RDC::DATA_FORMAT_R32G32_SINT;
case SpvImageFormatRg16i:
return RDC::DATA_FORMAT_R16G16_SINT;
case SpvImageFormatRg8i:
return RDC::DATA_FORMAT_R8G8_SINT;
case SpvImageFormatR16i:
return RDC::DATA_FORMAT_R16_SINT;
case SpvImageFormatR8i:
return RDC::DATA_FORMAT_R8_SINT;
case SpvImageFormatRgba32ui:
return RDC::DATA_FORMAT_R32G32B32A32_UINT;
case SpvImageFormatRgba16ui:
return RDC::DATA_FORMAT_R16G16B16A16_UINT;
case SpvImageFormatRgba8ui:
return RDC::DATA_FORMAT_R8G8B8A8_UINT;
case SpvImageFormatR32ui:
return RDC::DATA_FORMAT_R32_UINT;
case SpvImageFormatRgb10a2ui:
return RDC::DATA_FORMAT_A2B10G10R10_UINT_PACK32;
case SpvImageFormatRg32ui:
return RDC::DATA_FORMAT_R32G32_UINT;
case SpvImageFormatRg16ui:
return RDC::DATA_FORMAT_R16G16_UINT;
case SpvImageFormatRg8ui:
return RDC::DATA_FORMAT_R8G8_UINT;
case SpvImageFormatR16ui:
return RDC::DATA_FORMAT_R16_UINT;
case SpvImageFormatR8ui:
return RDC::DATA_FORMAT_R8_UINT;
case SpvImageFormatR64ui:
return RDC::DATA_FORMAT_R64_UINT;
case SpvImageFormatR64i:
return RDC::DATA_FORMAT_R64_SINT;
case SpvImageFormatMax:
return RDC::DATA_FORMAT_MAX;
}
return RDC::DATA_FORMAT_MAX;
}
Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span<RDC::ShaderStageSPIRVData> p_spirv, ReflectShader &r_shader) {
ReflectShader &reflection = r_shader;
shader_name = p_shader_name.utf8();
const uint32_t spirv_size = p_spirv.size() + 0;
LocalVector<ReflectShaderStage> &r_refl = r_shader.shader_stages;
r_refl.resize(spirv_size);
for (uint32_t i = 0; i < spirv_size; i++) {
RDC::ShaderStage stage = p_spirv[i].shader_stage;
RDC::ShaderStage stage_flag = (RDC::ShaderStage)(1 << p_spirv[i].shader_stage);
r_refl[i].shader_stage = p_spirv[i].shader_stage;
RDC::ShaderStage stage_flag = (RDC::ShaderStage)(1 << stage);
r_refl[i].shader_stage = stage;
r_refl[i]._spirv_data = p_spirv[i].spirv;
const Vector<uint64_t> &dynamic_buffers = p_spirv[i].dynamic_buffers;
if (p_spirv[i].shader_stage == RDC::SHADER_STAGE_COMPUTE) {
reflection.is_compute = true;
if (stage == RDC::SHADER_STAGE_COMPUTE) {
ERR_FAIL_COND_V_MSG(spirv_size != 1, FAILED,
"Compute shaders can only receive one stage, dedicated to compute.");
}
ERR_FAIL_COND_V_MSG(reflection.stages_bits.has_flag(stage_flag), FAILED,
"Stage " + String(RDC::SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + " submitted more than once.");
reflection.stages_bits.set_flag(stage_flag);
{
SpvReflectShaderModule &module = *r_refl.ptr()[i]._module;
@@ -163,7 +272,7 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span<
}
}
if (reflection.is_compute) {
if (reflection.is_compute()) {
reflection.compute_local_size[0] = module.entry_points->local_size.x;
reflection.compute_local_size[1] = module.entry_points->local_size.y;
reflection.compute_local_size[2] = module.entry_points->local_size.z;
@@ -186,11 +295,13 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span<
for (uint32_t j = 0; j < binding_count; j++) {
const SpvReflectDescriptorBinding &binding = *bindings[j];
RDC::ShaderUniform uniform;
ReflectUniform uniform;
uniform.set_spv_reflect(stage, &binding);
bool need_array_dimensions = false;
bool need_block_size = false;
bool may_be_writable = false;
bool is_image = false;
switch (binding.descriptor_type) {
case SPV_REFLECT_DESCRIPTOR_TYPE_SAMPLER: {
@@ -200,24 +311,29 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span<
case SPV_REFLECT_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: {
uniform.type = RDC::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE;
need_array_dimensions = true;
is_image = true;
} break;
case SPV_REFLECT_DESCRIPTOR_TYPE_SAMPLED_IMAGE: {
uniform.type = RDC::UNIFORM_TYPE_TEXTURE;
need_array_dimensions = true;
is_image = true;
} break;
case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_IMAGE: {
uniform.type = RDC::UNIFORM_TYPE_IMAGE;
need_array_dimensions = true;
may_be_writable = true;
is_image = true;
} break;
case SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: {
uniform.type = RDC::UNIFORM_TYPE_TEXTURE_BUFFER;
need_array_dimensions = true;
is_image = true;
} break;
case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: {
uniform.type = RDC::UNIFORM_TYPE_IMAGE_BUFFER;
need_array_dimensions = true;
may_be_writable = true;
is_image = true;
} break;
case SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_BUFFER: {
const uint64_t key = ShaderRD::DynamicBuffer::encode(binding.set, binding.binding);
@@ -251,6 +367,7 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span<
case SPV_REFLECT_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: {
uniform.type = RDC::UNIFORM_TYPE_INPUT_ATTACHMENT;
need_array_dimensions = true;
is_image = true;
} break;
case SPV_REFLECT_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: {
ERR_PRINT("Acceleration structure not supported.");
@@ -259,18 +376,10 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span<
}
if (need_array_dimensions) {
if (binding.array.dims_count == 0) {
uniform.length = 1;
} else {
for (uint32_t k = 0; k < binding.array.dims_count; k++) {
if (k == 0) {
uniform.length = binding.array.dims[0];
} else {
uniform.length *= binding.array.dims[k];
}
}
}
} else if (need_block_size) {
uniform.length = binding.block.size;
} else {
@@ -287,6 +396,10 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span<
uniform.writable = false;
}
if (is_image) {
uniform.image.format = spv_image_format_to_data_format(binding.image.image_format);
}
uniform.binding = binding.binding;
uint32_t set = binding.set;
@@ -296,7 +409,7 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span<
if (set < (uint32_t)reflection.uniform_sets.size()) {
// Check if this already exists.
bool exists = false;
for (int k = 0; k < reflection.uniform_sets[set].size(); k++) {
for (uint32_t k = 0; k < reflection.uniform_sets[set].size(); k++) {
if (reflection.uniform_sets[set][k].binding == uniform.binding) {
// Already exists, verify that it's the same type.
ERR_FAIL_COND_V_MSG(reflection.uniform_sets[set][k].type != uniform.type, FAILED,
@@ -311,7 +424,7 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span<
"On shader stage '" + String(RDC::SHADER_STAGE_NAMES[stage]) + "', uniform '" + binding.name + "' trying to reuse location for set=" + itos(set) + ", binding=" + itos(uniform.binding) + " with different writability.");
// Just append stage mask and return.
reflection.uniform_sets.write[set].write[k].stages.set_flag(stage_flag);
reflection.uniform_sets[set][k].stages.set_flag(stage_flag);
exists = true;
break;
}
@@ -328,7 +441,7 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span<
reflection.uniform_sets.resize(set + 1);
}
reflection.uniform_sets.write[set].push_back(uniform);
reflection.uniform_sets[set].push_back(uniform);
}
}
@@ -350,8 +463,9 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span<
for (uint32_t j = 0; j < sc_count; j++) {
int32_t existing = -1;
RDC::ShaderSpecializationConstant sconst;
ReflectSpecializationConstant sconst;
SpvReflectSpecializationConstant *spc = spec_constants[j];
sconst.set_spv_reflect(stage, spc);
sconst.constant_id = spc->constant_id;
sconst.int_value = 0; // Clear previous value JIC.
@@ -371,7 +485,7 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span<
}
sconst.stages.set_flag(stage_flag);
for (int k = 0; k < reflection.specialization_constants.size(); k++) {
for (uint32_t k = 0; k < reflection.specialization_constants.size(); k++) {
if (reflection.specialization_constants[k].constant_id == sconst.constant_id) {
ERR_FAIL_COND_V_MSG(reflection.specialization_constants[k].type != sconst.type, FAILED, "More than one specialization constant used for id (" + itos(sconst.constant_id) + "), but their types differ.");
ERR_FAIL_COND_V_MSG(reflection.specialization_constants[k].int_value != sconst.int_value, FAILED, "More than one specialization constant used for id (" + itos(sconst.constant_id) + "), but their default values differ.");
@@ -381,7 +495,7 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span<
}
if (existing >= 0) {
reflection.specialization_constants.write[existing].stages.set_flag(stage_flag);
reflection.specialization_constants[existing].stages.set_flag(stage_flag);
} else {
reflection.specialization_constants.push_back(sconst);
}
@@ -476,13 +590,11 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span<
//print_line("Stage: " + String(RDC::SHADER_STAGE_NAMES[stage]) + " push constant of size=" + itos(push_constant.push_constant_size));
}
}
reflection.stages_bits.set_flag(stage_flag);
}
// Sort all uniform_sets by binding.
for (uint32_t i = 0; i < reflection.uniform_sets.size(); i++) {
reflection.uniform_sets.write[i].sort();
reflection.uniform_sets[i].sort();
}
set_from_shader_reflection(reflection);
@@ -490,7 +602,7 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span<
return OK;
}
void RenderingShaderContainer::set_from_shader_reflection(const RenderingDeviceCommons::ShaderReflection &p_reflection) {
void RenderingShaderContainer::set_from_shader_reflection(const ReflectShader &p_reflection) {
reflection_binding_set_uniforms_count.clear();
reflection_binding_set_uniforms_data.clear();
reflection_specialization_data.clear();
@@ -499,7 +611,7 @@ void RenderingShaderContainer::set_from_shader_reflection(const RenderingDeviceC
reflection_data.vertex_input_mask = p_reflection.vertex_input_mask;
reflection_data.fragment_output_mask = p_reflection.fragment_output_mask;
reflection_data.specialization_constants_count = p_reflection.specialization_constants.size();
reflection_data.is_compute = p_reflection.is_compute;
reflection_data.is_compute = p_reflection.is_compute();
reflection_data.has_multiview = p_reflection.has_multiview;
reflection_data.has_dynamic_buffers = p_reflection.has_dynamic_buffers;
reflection_data.compute_local_size[0] = p_reflection.compute_local_size[0];
@@ -511,8 +623,8 @@ void RenderingShaderContainer::set_from_shader_reflection(const RenderingDeviceC
reflection_data.shader_name_len = shader_name.length();
ReflectionBindingData binding_data;
for (const Vector<RenderingDeviceCommons::ShaderUniform> &uniform_set : p_reflection.uniform_sets) {
for (const RenderingDeviceCommons::ShaderUniform &uniform : uniform_set) {
for (const ReflectDescriptorSet &uniform_set : p_reflection.uniform_sets) {
for (const ReflectUniform &uniform : uniform_set) {
binding_data.type = uint32_t(uniform.type);
binding_data.binding = uniform.binding;
binding_data.stages = uint32_t(uniform.stages);
@@ -525,7 +637,7 @@ void RenderingShaderContainer::set_from_shader_reflection(const RenderingDeviceC
}
ReflectionSpecializationData specialization_data;
for (const RenderingDeviceCommons::ShaderSpecializationConstant &spec : p_reflection.specialization_constants) {
for (const ReflectSpecializationConstant &spec : p_reflection.specialization_constants) {
specialization_data.type = uint32_t(spec.type);
specialization_data.constant_id = spec.constant_id;
specialization_data.int_value = spec.int_value;
@@ -533,9 +645,9 @@ void RenderingShaderContainer::set_from_shader_reflection(const RenderingDeviceC
reflection_specialization_data.push_back(specialization_data);
}
for (uint32_t i = 0; i < RenderingDeviceCommons::SHADER_STAGE_MAX; i++) {
if (p_reflection.stages_bits.has_flag(RenderingDeviceCommons::ShaderStage(1U << i))) {
reflection_shader_stages.push_back(RenderingDeviceCommons::ShaderStage(i));
for (uint32_t i = 0; i < RDC::SHADER_STAGE_MAX; i++) {
if (p_reflection.stages_bits.has_flag(RDC::ShaderStage(1U << i))) {
reflection_shader_stages.push_back(RDC::ShaderStage(i));
}
}
@@ -544,14 +656,14 @@ void RenderingShaderContainer::set_from_shader_reflection(const RenderingDeviceC
_set_from_shader_reflection_post(p_reflection);
}
bool RenderingShaderContainer::set_code_from_spirv(const String &p_shader_name, Span<RenderingDeviceCommons::ShaderStageSPIRVData> p_spirv) {
LocalVector<ReflectedShaderStage> spirv;
ERR_FAIL_COND_V(reflect_spirv(p_shader_name, p_spirv, spirv) != OK, false);
return _set_code_from_spirv(spirv.span());
bool RenderingShaderContainer::set_code_from_spirv(const String &p_shader_name, Span<RDC::ShaderStageSPIRVData> p_spirv) {
ReflectShader shader;
ERR_FAIL_COND_V(reflect_spirv(p_shader_name, p_spirv, shader) != OK, false);
return _set_code_from_spirv(shader);
}
RenderingDeviceCommons::ShaderReflection RenderingShaderContainer::get_shader_reflection() const {
RenderingDeviceCommons::ShaderReflection shader_refl;
RDC::ShaderReflection shader_refl;
shader_refl.push_constant_size = reflection_data.push_constant_size;
shader_refl.push_constant_stages = reflection_data.push_constant_stages_mask;
shader_refl.vertex_input_mask = reflection_data.vertex_input_mask;
@@ -569,13 +681,13 @@ RenderingDeviceCommons::ShaderReflection RenderingShaderContainer::get_shader_re
DEV_ASSERT(reflection_binding_set_uniforms_count.size() == reflection_data.set_count && "The amount of elements in the reflection and the shader container can't be different.");
uint32_t uniform_index = 0;
for (uint32_t i = 0; i < reflection_data.set_count; i++) {
Vector<RenderingDeviceCommons::ShaderUniform> &uniform_set = shader_refl.uniform_sets.ptrw()[i];
Vector<RDC::ShaderUniform> &uniform_set = shader_refl.uniform_sets.ptrw()[i];
uint32_t uniforms_count = reflection_binding_set_uniforms_count[i];
uniform_set.resize(uniforms_count);
for (uint32_t j = 0; j < uniforms_count; j++) {
const ReflectionBindingData &binding = reflection_binding_set_uniforms_data[uniform_index++];
RenderingDeviceCommons::ShaderUniform &uniform = uniform_set.ptrw()[j];
uniform.type = RenderingDeviceCommons::UniformType(binding.type);
RDC::ShaderUniform &uniform = uniform_set.ptrw()[j];
uniform.type = RDC::UniformType(binding.type);
uniform.writable = binding.writable;
uniform.length = binding.length;
uniform.binding = binding.binding;
@@ -586,8 +698,8 @@ RenderingDeviceCommons::ShaderReflection RenderingShaderContainer::get_shader_re
shader_refl.specialization_constants.resize(reflection_data.specialization_constants_count);
for (uint32_t i = 0; i < reflection_data.specialization_constants_count; i++) {
const ReflectionSpecializationData &spec = reflection_specialization_data[i];
RenderingDeviceCommons::ShaderSpecializationConstant &sc = shader_refl.specialization_constants.ptrw()[i];
sc.type = RenderingDeviceCommons::PipelineSpecializationConstantType(spec.type);
RDC::ShaderSpecializationConstant &sc = shader_refl.specialization_constants.ptrw()[i];
sc.type = RDC::PipelineSpecializationConstantType(spec.type);
sc.constant_id = spec.constant_id;
sc.int_value = spec.int_value;
sc.stages = spec.stage_flags;
@@ -596,7 +708,7 @@ RenderingDeviceCommons::ShaderReflection RenderingShaderContainer::get_shader_re
shader_refl.stages_vector.resize(reflection_data.stage_count);
for (uint32_t i = 0; i < reflection_data.stage_count; i++) {
shader_refl.stages_vector.set(i, reflection_shader_stages[i]);
shader_refl.stages_bits.set_flag(RenderingDeviceCommons::ShaderStage(1U << reflection_shader_stages[i]));
shader_refl.stages_bits.set_flag(RDC::ShaderStage(1U << reflection_shader_stages[i]));
}
return shader_refl;
@@ -672,11 +784,11 @@ bool RenderingShaderContainer::from_bytes(const PackedByteArray &p_bytes) {
const uint32_t stage_count = reflection_data.stage_count;
if (stage_count > 0) {
ERR_FAIL_COND_V_MSG(int64_t(bytes_offset + stage_count * sizeof(RenderingDeviceCommons::ShaderStage)) > p_bytes.size(), false, "Not enough bytes for stages in shader container.");
ERR_FAIL_COND_V_MSG(int64_t(bytes_offset + stage_count * sizeof(RDC::ShaderStage)) > p_bytes.size(), false, "Not enough bytes for stages in shader container.");
reflection_shader_stages.resize(stage_count);
bytes_offset += _from_bytes_shader_extra_data_start(&bytes_ptr[bytes_offset]);
memcpy(reflection_shader_stages.ptrw(), &bytes_ptr[bytes_offset], stage_count * sizeof(RenderingDeviceCommons::ShaderStage));
bytes_offset += stage_count * sizeof(RenderingDeviceCommons::ShaderStage);
memcpy(reflection_shader_stages.ptrw(), &bytes_ptr[bytes_offset], stage_count * sizeof(RDC::ShaderStage));
bytes_offset += stage_count * sizeof(RDC::ShaderStage);
}
// Read shaders.
@@ -687,7 +799,7 @@ bool RenderingShaderContainer::from_bytes(const PackedByteArray &p_bytes) {
ERR_FAIL_COND_V_MSG(int64_t(bytes_offset + header.code_compressed_size) > p_bytes.size(), false, "Not enough bytes for a shader in shader container.");
Shader &shader = shaders.ptrw()[i];
shader.shader_stage = RenderingDeviceCommons::ShaderStage(header.shader_stage);
shader.shader_stage = RDC::ShaderStage(header.shader_stage);
shader.code_compression_flags = header.code_compression_flags;
shader.code_decompressed_size = header.code_decompressed_size;
shader.code_compressed_bytes.resize(header.code_compressed_size);
@@ -712,7 +824,7 @@ PackedByteArray RenderingShaderContainer::to_bytes() const {
total_size += reflection_binding_set_uniforms_count.size() * sizeof(uint32_t);
total_size += reflection_binding_set_uniforms_data.size() * sizeof(ReflectionBindingData);
total_size += reflection_specialization_data.size() * sizeof(ReflectionSpecializationData);
total_size += reflection_shader_stages.size() * sizeof(RenderingDeviceCommons::ShaderStage);
total_size += reflection_shader_stages.size() * sizeof(RDC::ShaderStage);
for (uint32_t i = 0; i < reflection_binding_set_uniforms_data.size(); i++) {
total_size += _to_bytes_reflection_binding_uniform_extra_data(nullptr, i);
@@ -777,8 +889,8 @@ PackedByteArray RenderingShaderContainer::to_bytes() const {
if (!reflection_shader_stages.is_empty()) {
uint32_t stage_count = reflection_shader_stages.size();
memcpy(&bytes_ptr[bytes_offset], reflection_shader_stages.ptr(), stage_count * sizeof(RenderingDeviceCommons::ShaderStage));
bytes_offset += stage_count * sizeof(RenderingDeviceCommons::ShaderStage);
memcpy(&bytes_ptr[bytes_offset], reflection_shader_stages.ptr(), stage_count * sizeof(RDC::ShaderStage));
bytes_offset += stage_count * sizeof(RDC::ShaderStage);
}
for (uint32_t i = 0; i < shaders.size(); i++) {

View File

@@ -34,6 +34,8 @@
#include "servers/rendering/rendering_device_commons.h"
struct SpvReflectShaderModule;
struct SpvReflectDescriptorBinding;
struct SpvReflectSpecializationConstant;
class RenderingShaderContainer : public RefCounted {
GDSOFTCLASS(RenderingShaderContainer, RefCounted);
@@ -43,6 +45,8 @@ public:
static const uint32_t CONTAINER_VERSION = 2;
protected:
using RDC = RenderingDeviceCommons;
struct ContainerHeader {
uint32_t magic_number = 0;
uint32_t version = 0;
@@ -96,7 +100,7 @@ protected:
Vector<uint32_t> reflection_binding_set_uniforms_count;
Vector<ReflectionBindingData> reflection_binding_set_uniforms_data;
Vector<ReflectionSpecializationData> reflection_specialization_data;
Vector<RenderingDeviceCommons::ShaderStage> reflection_shader_stages;
Vector<RDC::ShaderStage> reflection_shader_stages;
virtual uint32_t _format() const = 0;
virtual uint32_t _format_version() const = 0;
@@ -120,30 +124,159 @@ protected:
virtual uint32_t _to_bytes_shader_extra_data(uint8_t *p_bytes, uint32_t p_index) const;
virtual uint32_t _to_bytes_footer_extra_data(uint8_t *p_bytes) const;
// This method will be called when set_from_shader_reflection() is finished. Used to update internal structures to match the reflection if necessary.
virtual void _set_from_shader_reflection_post(const RenderingDeviceCommons::ShaderReflection &p_reflection);
template <class T>
struct ReflectSymbol {
static constexpr uint32_t STAGE_INDEX[RDC::SHADER_STAGE_MAX] = {
0, // SHADER_STAGE_VERTEX
1, // SHADER_STAGE_FRAGMENT
0, // SHADER_STAGE_TESSELATION_CONTROL
1, // SHADER_STAGE_TESSELATION_EVALUATION
0, // SHADER_STAGE_COMPUTE
};
class ReflectedShaderStage {
BitField<RDC::ShaderStage> stages = {};
private:
const T *_spv_reflect[2] = { nullptr };
public:
_FORCE_INLINE_ constexpr uint32_t get_index_for_stage(RDC::ShaderStage p_stage) const {
DEV_ASSERT(stages.has_flag((1 << p_stage)));
return STAGE_INDEX[p_stage];
}
const T &get_spv_reflect(RDC::ShaderStage p_stage) const;
/*! Returns the first valid stage if multiple stages are set.
*
* Crashes if no stages are set.
*/
const T &get_spv_reflect() const {
for (const T *d : _spv_reflect) {
if (d != nullptr) {
return *d;
}
}
CRASH_NOW_MSG("No stages set in ReflectSymbol");
}
void set_spv_reflect(RDC::ShaderStage p_stage, const T *p_spv);
};
struct ReflectImageTraits {
RDC::DataFormat format = RDC::DATA_FORMAT_MAX;
};
struct ReflectUniform : ReflectSymbol<SpvReflectDescriptorBinding> {
RDC::UniformType type = RDC::UniformType::UNIFORM_TYPE_MAX;
uint32_t binding = 0;
ReflectImageTraits image;
uint32_t length = 0; // Size of arrays (in total elements), or ubos (in bytes * total elements).
bool writable = false;
bool operator<(const ReflectUniform &p_other) const {
if (binding != p_other.binding) {
return binding < p_other.binding;
}
if (type != p_other.type) {
return type < p_other.type;
}
if (writable != p_other.writable) {
return writable < p_other.writable;
}
if (stages != p_other.stages) {
return stages < p_other.stages;
}
if (length != p_other.length) {
return length < p_other.length;
}
return false;
}
};
struct ReflectSpecializationConstant : ReflectSymbol<SpvReflectSpecializationConstant> {
RDC::PipelineSpecializationConstantType type = {};
uint32_t constant_id = 0xffffffff;
union {
uint32_t int_value = 0;
float float_value;
bool bool_value;
};
bool operator<(const ReflectSpecializationConstant &p_other) const { return constant_id < p_other.constant_id; }
};
class ReflectShaderStage {
friend class RenderingShaderContainer;
Vector<uint8_t> _spirv_data;
SpvReflectShaderModule *_module = nullptr;
public:
RenderingDeviceCommons::ShaderStage shader_stage = RenderingDeviceCommons::SHADER_STAGE_MAX;
RDC::ShaderStage shader_stage = RDC::SHADER_STAGE_MAX;
const SpvReflectShaderModule &module() const;
const Span<uint32_t> spirv() const;
const Vector<uint8_t> spirv_data() const { return _spirv_data; }
ReflectedShaderStage();
~ReflectedShaderStage();
ReflectShaderStage();
~ReflectShaderStage();
};
// This method will be called when set_code_from_spirv() is called.
virtual bool _set_code_from_spirv(Span<ReflectedShaderStage> p_spirv) = 0;
typedef LocalVector<ReflectUniform> ReflectDescriptorSet;
void set_from_shader_reflection(const RenderingDeviceCommons::ShaderReflection &p_reflection);
Error reflect_spirv(const String &p_shader_name, Span<RenderingDeviceCommons::ShaderStageSPIRVData> p_spirv, LocalVector<ReflectedShaderStage> &r_refl);
struct ReflectShader {
uint64_t vertex_input_mask = 0;
uint32_t fragment_output_mask = 0;
uint32_t compute_local_size[3] = {};
uint32_t push_constant_size = 0;
bool has_multiview = false;
bool has_dynamic_buffers = false;
LocalVector<ReflectShaderStage> shader_stages;
LocalVector<ReflectDescriptorSet> uniform_sets;
LocalVector<ReflectSymbol<SpvReflectDescriptorBinding>> reflect_uniforms;
LocalVector<ReflectSpecializationConstant> specialization_constants;
LocalVector<ReflectSymbol<SpvReflectSpecializationConstant>> reflect_specialization_constants;
LocalVector<RDC::ShaderStage> stages_vector;
BitField<RDC::ShaderStage> stages_bits = {};
BitField<RDC::ShaderStage> push_constant_stages = {};
_FORCE_INLINE_ bool is_compute() const {
return stages_bits.has_flag(RDC::SHADER_STAGE_COMPUTE_BIT);
}
/*! Returns the uniform at the specified global index.
*
* This is a flattened view of all uniform sets.
*/
ReflectUniform &uniform_at(uint32_t p_index) {
for (LocalVector<ReflectUniform> &set : uniform_sets) {
if (p_index < set.size()) {
return set[p_index];
}
p_index -= set.size();
}
CRASH_NOW_MSG(vformat("Uniform index %d out of range (total %d)", p_index, uniform_count()));
}
uint32_t uniform_count() const {
uint32_t count = 0;
for (const LocalVector<ReflectUniform> &set : uniform_sets) {
count += set.size();
}
return count;
}
};
// This method will be called when set_from_shader_reflection() is finished. Used to update internal structures to match the reflection if necessary.
virtual void _set_from_shader_reflection_post(const ReflectShader &p_shader);
// This method will be called when set_code_from_spirv() is called.
virtual bool _set_code_from_spirv(const ReflectShader &p_shader) = 0;
void set_from_shader_reflection(const ReflectShader &p_reflection);
Error reflect_spirv(const String &p_shader_name, Span<RDC::ShaderStageSPIRVData> p_spirv, ReflectShader &r_shader);
public:
enum CompressionFlags {
@@ -151,7 +284,7 @@ public:
};
struct Shader {
RenderingDeviceCommons::ShaderStage shader_stage = RenderingDeviceCommons::SHADER_STAGE_MAX;
RDC::ShaderStage shader_stage = RDC::SHADER_STAGE_MAX;
PackedByteArray code_compressed_bytes;
uint32_t code_compression_flags = 0;
uint32_t code_decompressed_size = 0;
@@ -160,8 +293,8 @@ public:
CharString shader_name;
Vector<Shader> shaders;
bool set_code_from_spirv(const String &p_shader_name, Span<RenderingDeviceCommons::ShaderStageSPIRVData> p_spirv);
RenderingDeviceCommons::ShaderReflection get_shader_reflection() const;
bool set_code_from_spirv(const String &p_shader_name, Span<RDC::ShaderStageSPIRVData> p_spirv);
RDC::ShaderReflection get_shader_reflection() const;
bool from_bytes(const PackedByteArray &p_bytes);
PackedByteArray to_bytes() const;
bool compress_code(const uint8_t *p_decompressed_bytes, uint32_t p_decompressed_size, uint8_t *p_compressed_bytes, uint32_t *r_compressed_size, uint32_t *r_compressed_flags) const;