1
0
mirror of https://github.com/godotengine/godot.git synced 2025-11-12 13:20:55 +00:00

[macOS] Selectively bake specific shader variants for MoltenVK.

This commit is contained in:
Pāvels Nadtočajevs
2025-06-20 13:03:47 +03:00
parent d7cc121e64
commit a8873727ac
6 changed files with 87 additions and 9 deletions

View File

@@ -397,9 +397,15 @@ void ShaderBakerExportPlugin::_customize_shader_version(ShaderRD *p_shader, RID
for (int64_t i = 0; i < variant_count; i++) { for (int64_t i = 0; i < variant_count; i++) {
int group = p_shader->get_variant_to_group(i); int group = p_shader->get_variant_to_group(i);
if (p_shader->has_variant_bake_for(i)) {
if (!p_shader->get_variant_bake_for(i, shader_cache_platform_name + "_" + shader_cache_renderer_name + "_" + shader_container_driver) || !groups_to_compile.has(group)) {
continue;
}
} else {
if (!p_shader->is_variant_enabled(i) || !groups_to_compile.has(group)) { if (!p_shader->is_variant_enabled(i) || !groups_to_compile.has(group)) {
continue; continue;
} }
}
WorkItem work_item; WorkItem work_item;
work_item.cache_path = group_items[group].cache_path; work_item.cache_path = group_items[group].cache_path;

View File

@@ -57,22 +57,62 @@ ClusterBuilderSharedDataRD::ClusterBuilderSharedDataRD() {
Vector<String> variants; Vector<String> variants;
variants.push_back(""); variants.push_back("");
variants.push_back("\n#define USE_ATTACHMENT\n"); variants.push_back("\n#define USE_ATTACHMENT\n");
variants.push_back("\n#define MOLTENVK_USED\n");
variants.push_back("\n#define USE_ATTACHMENT\n#define MOLTENVK_USED\n");
ClusterRender::ShaderVariant shader_variant; ClusterRender::ShaderVariant shader_variant;
if (RD::get_singleton()->has_feature(RD::SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS)) { if (RD::get_singleton()->has_feature(RD::SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS)) {
fb_format = RD::get_singleton()->framebuffer_format_create_empty(); fb_format = RD::get_singleton()->framebuffer_format_create_empty();
blend_state = RD::PipelineColorBlendState::create_disabled(); blend_state = RD::PipelineColorBlendState::create_disabled();
#if (defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED))
if (RD::get_singleton()->get_device_capabilities().device_family == RDD::DEVICE_VULKAN) {
shader_variant = ClusterRender::SHADER_NORMAL_MOLTENVK;
} else {
shader_variant = ClusterRender::SHADER_NORMAL; shader_variant = ClusterRender::SHADER_NORMAL;
}
#else
shader_variant = ClusterRender::SHADER_NORMAL;
#endif
} else { } else {
Vector<RD::AttachmentFormat> afs; Vector<RD::AttachmentFormat> afs;
afs.push_back(RD::AttachmentFormat()); afs.push_back(RD::AttachmentFormat());
afs.write[0].usage_flags = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; afs.write[0].usage_flags = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT;
fb_format = RD::get_singleton()->framebuffer_format_create(afs); fb_format = RD::get_singleton()->framebuffer_format_create(afs);
blend_state = RD::PipelineColorBlendState::create_blend(); blend_state = RD::PipelineColorBlendState::create_blend();
#if (defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED))
if (RD::get_singleton()->get_device_capabilities().device_family == RDD::DEVICE_VULKAN) {
shader_variant = ClusterRender::SHADER_USE_ATTACHMENT_MOLTENVK;
} else {
shader_variant = ClusterRender::SHADER_USE_ATTACHMENT; shader_variant = ClusterRender::SHADER_USE_ATTACHMENT;
} }
#else
shader_variant = ClusterRender::SHADER_USE_ATTACHMENT;
#endif
}
cluster_render.cluster_render_shader.initialize(variants); cluster_render.cluster_render_shader.initialize(variants);
#if (defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED))
if (RD::get_singleton()->get_device_capabilities().device_family == RDD::DEVICE_VULKAN) {
cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_NORMAL, false);
cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_USE_ATTACHMENT, false);
} else {
cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_NORMAL_MOLTENVK, false);
cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_USE_ATTACHMENT_MOLTENVK, false);
}
#else
cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_NORMAL_MOLTENVK, false);
cluster_render.cluster_render_shader.set_variant_enabled(ClusterRender::SHADER_USE_ATTACHMENT_MOLTENVK, false);
#endif
cluster_render.cluster_render_shader.set_variants_bake_for(ClusterRender::SHADER_NORMAL, "macos_forward_clustered_vulkan", false, true);
cluster_render.cluster_render_shader.set_variants_bake_for(ClusterRender::SHADER_USE_ATTACHMENT, "macos_forward_clustered_vulkan", false, true);
cluster_render.cluster_render_shader.set_variants_bake_for(ClusterRender::SHADER_NORMAL_MOLTENVK, "macos_forward_clustered_vulkan", true, false);
cluster_render.cluster_render_shader.set_variants_bake_for(ClusterRender::SHADER_USE_ATTACHMENT_MOLTENVK, "macos_forward_clustered_vulkan", true, false);
cluster_render.cluster_render_shader.set_variants_bake_for(ClusterRender::SHADER_NORMAL, "ios_forward_clustered_vulkan", false, true);
cluster_render.cluster_render_shader.set_variants_bake_for(ClusterRender::SHADER_USE_ATTACHMENT, "ios_forward_clustered_vulkan", false, true);
cluster_render.cluster_render_shader.set_variants_bake_for(ClusterRender::SHADER_NORMAL_MOLTENVK, "ios_forward_clustered_vulkan", true, false);
cluster_render.cluster_render_shader.set_variants_bake_for(ClusterRender::SHADER_USE_ATTACHMENT_MOLTENVK, "ios_forward_clustered_vulkan", true, false);
cluster_render.shader_version = cluster_render.cluster_render_shader.version_create(); cluster_render.shader_version = cluster_render.cluster_render_shader.version_create();
cluster_render.shader = cluster_render.cluster_render_shader.version_get_shader(cluster_render.shader_version, shader_variant); cluster_render.shader = cluster_render.cluster_render_shader.version_get_shader(cluster_render.shader_version, shader_variant);
cluster_render.shader_pipelines[ClusterRender::PIPELINE_NORMAL] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, fb_format, vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, rasterization_state, RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), blend_state, 0); cluster_render.shader_pipelines[ClusterRender::PIPELINE_NORMAL] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, fb_format, vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, rasterization_state, RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), blend_state, 0);

View File

@@ -76,6 +76,8 @@ class ClusterBuilderSharedDataRD {
enum ShaderVariant { enum ShaderVariant {
SHADER_NORMAL, SHADER_NORMAL,
SHADER_USE_ATTACHMENT, SHADER_USE_ATTACHMENT,
SHADER_NORMAL_MOLTENVK,
SHADER_USE_ATTACHMENT_MOLTENVK,
}; };
enum PipelineVersion { enum PipelineVersion {

View File

@@ -235,9 +235,6 @@ void ShaderRD::_build_variant_code(StringBuilder &builder, uint32_t p_variant, c
} }
#if (defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)) #if (defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED))
RenderingDevice *rd = RD::get_singleton(); RenderingDevice *rd = RD::get_singleton();
if (rd->get_device_capabilities().device_family == RDD::DEVICE_VULKAN) {
builder.append("#define MOLTENVK_USED\n");
}
if (!rd->has_feature(RD::SUPPORTS_IMAGE_ATOMIC_32_BIT)) { if (!rd->has_feature(RD::SUPPORTS_IMAGE_ATOMIC_32_BIT)) {
builder.append("#define NO_IMAGE_ATOMICS\n"); builder.append("#define NO_IMAGE_ATOMICS\n");
} }
@@ -272,7 +269,7 @@ void ShaderRD::_build_variant_code(StringBuilder &builder, uint32_t p_variant, c
} }
Vector<String> ShaderRD::_build_variant_stage_sources(uint32_t p_variant, CompileData p_data) { Vector<String> ShaderRD::_build_variant_stage_sources(uint32_t p_variant, CompileData p_data) {
if (!variants_enabled[p_variant]) { if (!variants_enabled[p_variant] && !variants_bake_for.has(p_variant)) {
return Vector<String>(); // Variant is disabled, return. return Vector<String>(); // Variant is disabled, return.
} }
@@ -474,7 +471,10 @@ bool ShaderRD::_load_from_cache(Version *p_version, int p_group) {
int variant_id = group_to_variant_map[p_group][i]; int variant_id = group_to_variant_map[p_group][i];
uint32_t variant_size = f->get_32(); uint32_t variant_size = f->get_32();
ERR_FAIL_COND_V(variant_size == 0 && variants_enabled[variant_id], false); ERR_FAIL_COND_V(variant_size == 0 && variants_enabled[variant_id], false);
if (!variants_enabled[variant_id]) { if (!variants_enabled[variant_id] && !variants_bake_for.has(variant_id)) {
continue;
}
if (variant_size == 0) {
continue; continue;
} }
Vector<uint8_t> variant_bytes; Vector<uint8_t> variant_bytes;
@@ -489,10 +489,11 @@ bool ShaderRD::_load_from_cache(Version *p_version, int p_group) {
for (uint32_t i = 0; i < variant_count; i++) { for (uint32_t i = 0; i < variant_count; i++) {
int variant_id = group_to_variant_map[p_group][i]; int variant_id = group_to_variant_map[p_group][i];
if (!variants_enabled[variant_id]) { if ((!variants_enabled[variant_id] && !variants_bake_for.has(variant_id)) || p_version->variant_data[variant_id].is_empty()) {
p_version->variants.write[variant_id] = RID(); p_version->variants.write[variant_id] = RID();
continue; continue;
} }
print_verbose(vformat("Loading cache for shader %s, variant %d", name, i));
{ {
RID shader = RD::get_singleton()->shader_create_from_bytecode_with_samplers(p_version->variant_data[variant_id], p_version->variants[variant_id], immutable_samplers); RID shader = RD::get_singleton()->shader_create_from_bytecode_with_samplers(p_version->variant_data[variant_id], p_version->variants[variant_id], immutable_samplers);
if (shader.is_null()) { if (shader.is_null()) {
@@ -581,7 +582,7 @@ void ShaderRD::_compile_version_end(Version *p_version, int p_group) {
if (!all_valid) { if (!all_valid) {
// Clear versions if they exist. // Clear versions if they exist.
for (int i = 0; i < variant_defines.size(); i++) { for (int i = 0; i < variant_defines.size(); i++) {
if (!variants_enabled[i] || !group_enabled[variant_defines[i].group]) { if ((!variants_enabled[i] && !variants_bake_for.has(i)) || !group_enabled[variant_defines[i].group]) {
continue; // Disabled. continue; // Disabled.
} }
if (!p_version->variants[i].is_null()) { if (!p_version->variants[i].is_null()) {

View File

@@ -60,6 +60,8 @@ private:
CharString general_defines; CharString general_defines;
Vector<VariantDefine> variant_defines; Vector<VariantDefine> variant_defines;
Vector<bool> variants_enabled; Vector<bool> variants_enabled;
HashMap<int, HashMap<String, bool>> variants_bake_for;
HashMap<int, bool> variants_bake_for_def;
Vector<uint32_t> variant_to_group; Vector<uint32_t> variant_to_group;
HashMap<int, LocalVector<int>> group_to_variant_map; HashMap<int, LocalVector<int>> group_to_variant_map;
Vector<bool> group_enabled; Vector<bool> group_enabled;
@@ -216,6 +218,25 @@ public:
int64_t get_variant_count() const; int64_t get_variant_count() const;
int get_variant_to_group(int p_variant) const; int get_variant_to_group(int p_variant) const;
bool has_variant_bake_for(int p_variant) const {
return variants_bake_for.has(p_variant);
}
bool get_variant_bake_for(int p_variant, const String &p_name) const {
if (!variants_bake_for.has(p_variant)) {
return is_variant_enabled(p_variant);
}
if (!variants_bake_for[p_variant].has(p_name.to_lower())) {
return variants_bake_for_def[p_variant];
}
return variants_bake_for[p_variant][p_name.to_lower()];
}
void set_variants_bake_for(int p_variant, const String &p_name, bool p_enable, bool p_default) {
variants_bake_for[p_variant][p_name.to_lower()] = p_enable;
variants_bake_for_def[p_variant] = p_default;
}
// Enable/disable groups for things that might be enabled at run time. // Enable/disable groups for things that might be enabled at run time.
void enable_group(int p_group); void enable_group(int p_group);
bool is_group_enabled(int p_group) const; bool is_group_enabled(int p_group) const;

View File

@@ -114,7 +114,11 @@ void main() {
uint aux = 0; uint aux = 0;
uint cluster_thread_group_index; uint cluster_thread_group_index;
#ifndef MOLTENVK_USED
if (!gl_HelperInvocation) { if (!gl_HelperInvocation) {
#else
{
#endif
//https://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf //https://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf
uvec4 mask; uvec4 mask;
@@ -147,7 +151,11 @@ void main() {
uint z_write_offset = cluster_offset + state.cluster_depth_offset + element_index; uint z_write_offset = cluster_offset + state.cluster_depth_offset + element_index;
uint z_write_bit = 1 << z_bit; uint z_write_bit = 1 << z_bit;
#ifndef MOLTENVK_USED
if (!gl_HelperInvocation) { if (!gl_HelperInvocation) {
#else
{
#endif
z_write_bit = subgroupOr(z_write_bit); //merge all Zs z_write_bit = subgroupOr(z_write_bit); //merge all Zs
if (cluster_thread_group_index == 0) { if (cluster_thread_group_index == 0) {
aux = atomicOr(cluster_render.data[z_write_offset], z_write_bit); aux = atomicOr(cluster_render.data[z_write_offset], z_write_bit);