diff --git a/drivers/d3d12/rendering_device_driver_d3d12.cpp b/drivers/d3d12/rendering_device_driver_d3d12.cpp index a7208c5af9e..45478d2f8ec 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.cpp +++ b/drivers/d3d12/rendering_device_driver_d3d12.cpp @@ -38,7 +38,6 @@ #include "thirdparty/zlib/zlib.h" #include "d3d12_godot_nir_bridge.h" -#include "dxil_hash.h" #include "rendering_context_driver_d3d12.h" #include @@ -72,9 +71,6 @@ extern "C" { static const D3D12_RANGE VOID_RANGE = {}; -static const uint32_t ROOT_CONSTANT_REGISTER = GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER * (RDD::MAX_UNIFORM_SETS + 1); -static const uint32_t RUNTIME_DATA_REGISTER = GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER * (RDD::MAX_UNIFORM_SETS + 2); - /*****************/ /**** GENERIC ****/ /*****************/ @@ -2982,109 +2978,6 @@ void RenderingDeviceDriverD3D12::framebuffer_free(FramebufferID p_framebuffer) { /**** SHADER ****/ /****************/ -static uint32_t SHADER_STAGES_BIT_OFFSET_INDICES[RenderingDevice::SHADER_STAGE_MAX] = { - /* SHADER_STAGE_VERTEX */ 0, - /* SHADER_STAGE_FRAGMENT */ 1, - /* SHADER_STAGE_TESSELATION_CONTROL */ UINT32_MAX, - /* SHADER_STAGE_TESSELATION_EVALUATION */ UINT32_MAX, - /* SHADER_STAGE_COMPUTE */ 2, -}; - -uint32_t RenderingDeviceDriverD3D12::_shader_patch_dxil_specialization_constant( - PipelineSpecializationConstantType p_type, - const void *p_value, - const uint64_t (&p_stages_bit_offsets)[D3D12_BITCODE_OFFSETS_NUM_STAGES], - HashMap> &r_stages_bytecodes, - bool p_is_first_patch) { - uint32_t patch_val = 0; - switch (p_type) { - case PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT: { - uint32_t int_value = *((const int *)p_value); - ERR_FAIL_COND_V(int_value & (1 << 31), 0); - patch_val = int_value; - } break; - case PIPELINE_SPECIALIZATION_CONSTANT_TYPE_BOOL: { - bool bool_value = *((const bool *)p_value); - patch_val = (uint32_t)bool_value; - } break; - case PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT: { - uint32_t int_value = *((const int *)p_value); - ERR_FAIL_COND_V(int_value & (1 << 31), 0); - patch_val = (int_value >> 1); - } break; - } - // For VBR encoding to encode the number of bits we expect (32), we need to set the MSB unconditionally. - // However, signed VBR moves the MSB to the LSB, so setting the MSB to 1 wouldn't help. Therefore, - // the bit we set to 1 is the one at index 30. - patch_val |= (1 << 30); - patch_val <<= 1; // What signed VBR does. - - auto tamper_bits = [](uint8_t *p_start, uint64_t p_bit_offset, uint64_t p_tb_value) -> uint64_t { - uint64_t original = 0; - uint32_t curr_input_byte = p_bit_offset / 8; - uint8_t curr_input_bit = p_bit_offset % 8; - auto get_curr_input_bit = [&]() -> bool { - return ((p_start[curr_input_byte] >> curr_input_bit) & 1); - }; - auto move_to_next_input_bit = [&]() { - if (curr_input_bit == 7) { - curr_input_bit = 0; - curr_input_byte++; - } else { - curr_input_bit++; - } - }; - auto tamper_input_bit = [&](bool p_new_bit) { - p_start[curr_input_byte] &= ~((uint8_t)1 << curr_input_bit); - if (p_new_bit) { - p_start[curr_input_byte] |= (uint8_t)1 << curr_input_bit; - } - }; - uint8_t value_bit_idx = 0; - for (uint32_t i = 0; i < 5; i++) { // 32 bits take 5 full bytes in VBR. - for (uint32_t j = 0; j < 7; j++) { - bool input_bit = get_curr_input_bit(); - original |= (uint64_t)(input_bit ? 1 : 0) << value_bit_idx; - tamper_input_bit((p_tb_value >> value_bit_idx) & 1); - move_to_next_input_bit(); - value_bit_idx++; - } -#ifdef DEV_ENABLED - bool input_bit = get_curr_input_bit(); - DEV_ASSERT((i < 4 && input_bit) || (i == 4 && !input_bit)); -#endif - move_to_next_input_bit(); - } - return original; - }; - uint32_t stages_patched_mask = 0; - for (int stage = 0; stage < SHADER_STAGE_MAX; stage++) { - if (!r_stages_bytecodes.has((ShaderStage)stage)) { - continue; - } - - uint64_t offset = p_stages_bit_offsets[SHADER_STAGES_BIT_OFFSET_INDICES[stage]]; - if (offset == 0) { - // This constant does not appear at this stage. - continue; - } - - Vector &bytecode = r_stages_bytecodes[(ShaderStage)stage]; -#ifdef DEV_ENABLED - uint64_t orig_patch_val = tamper_bits(bytecode.ptrw(), offset, patch_val); - // Checking against the value the NIR patch should have set. - DEV_ASSERT(!p_is_first_patch || ((orig_patch_val >> 1) & GODOT_NIR_SC_SENTINEL_MAGIC_MASK) == GODOT_NIR_SC_SENTINEL_MAGIC); - uint64_t readback_patch_val = tamper_bits(bytecode.ptrw(), offset, patch_val); - DEV_ASSERT(readback_patch_val == patch_val); -#else - tamper_bits(bytecode.ptrw(), offset, patch_val); -#endif - - stages_patched_mask |= (1 << stage); - } - return stages_patched_mask; -} - bool RenderingDeviceDriverD3D12::_shader_apply_specialization_constants( const ShaderInfo *p_shader_info, VectorView p_specialization_constants, @@ -3101,7 +2994,7 @@ bool RenderingDeviceDriverD3D12::_shader_apply_specialization_constants( for (const ShaderInfo::SpecializationConstant &sc : p_shader_info->specialization_constants) { if (psc.constant_id == sc.constant_id) { if (psc.int_value != sc.int_value) { - stages_re_sign_mask |= _shader_patch_dxil_specialization_constant(psc.type, &psc.int_value, sc.stages_bit_offsets, r_final_stages_bytecode, false); + stages_re_sign_mask |= RenderingDXIL::patch_specialization_constant(psc.type, &psc.int_value, sc.stages_bit_offsets, r_final_stages_bytecode, false); } break; } @@ -3112,732 +3005,45 @@ bool RenderingDeviceDriverD3D12::_shader_apply_specialization_constants( ShaderStage stage = E.key; if ((stages_re_sign_mask & (1 << stage))) { Vector &bytecode = E.value; - _shader_sign_dxil_bytecode(stage, bytecode); + RenderingDXIL::sign_bytecode(stage, bytecode); } } return true; } -void RenderingDeviceDriverD3D12::_shader_sign_dxil_bytecode(ShaderStage p_stage, Vector &r_dxil_blob) { - uint8_t *w = r_dxil_blob.ptrw(); - compute_dxil_hash(w + 20, r_dxil_blob.size() - 20, w + 4); -} +RDD::ShaderID RenderingDeviceDriverD3D12::shader_create_from_container(const Ref &p_shader_container, const Vector &p_immutable_samplers) { + ShaderReflection shader_refl = p_shader_container->get_shader_reflection(); + ShaderInfo shader_info_in; + const RenderingShaderContainerD3D12 *shader_container_d3d12 = Object::cast_to(p_shader_container.ptr()); + ERR_FAIL_NULL_V_MSG(shader_container_d3d12, ShaderID(), "Shader container is not a recognized format."); -String RenderingDeviceDriverD3D12::shader_get_binary_cache_key() { - return "D3D12-SV" + uitos(ShaderBinary::VERSION) + "-" + itos(shader_capabilities.shader_model); -} - -Vector RenderingDeviceDriverD3D12::shader_compile_binary_from_spirv(VectorView p_spirv, const String &p_shader_name) { - ShaderReflection shader_refl; - if (_reflect_spirv(p_spirv, shader_refl) != OK) { - return Vector(); + RenderingShaderContainerD3D12::ShaderReflectionD3D12 shader_refl_d3d12 = shader_container_d3d12->get_shader_reflection_d3d12(); + if (shader_refl_d3d12.dxil_push_constant_stages != 0) { + shader_info_in.dxil_push_constant_size = shader_refl.push_constant_size; } - // Collect reflection data into binary data. - ShaderBinary::Data binary_data; - Vector> sets_bindings; - Vector specialization_constants; - { - binary_data.vertex_input_mask = shader_refl.vertex_input_mask; - binary_data.fragment_output_mask = shader_refl.fragment_output_mask; - binary_data.specialization_constants_count = shader_refl.specialization_constants.size(); - binary_data.is_compute = shader_refl.is_compute; - binary_data.compute_local_size[0] = shader_refl.compute_local_size[0]; - binary_data.compute_local_size[1] = shader_refl.compute_local_size[1]; - binary_data.compute_local_size[2] = shader_refl.compute_local_size[2]; - binary_data.set_count = shader_refl.uniform_sets.size(); - binary_data.push_constant_size = shader_refl.push_constant_size; - binary_data.nir_runtime_data_root_param_idx = UINT32_MAX; - binary_data.stage_count = p_spirv.size(); - - for (const Vector &spirv_set : shader_refl.uniform_sets) { - Vector bindings; - for (const ShaderUniform &spirv_uniform : spirv_set) { - ShaderBinary::DataBinding binding; - binding.type = (uint32_t)spirv_uniform.type; - binding.binding = spirv_uniform.binding; - binding.stages = (uint32_t)spirv_uniform.stages; - binding.length = spirv_uniform.length; - binding.writable = (uint32_t)spirv_uniform.writable; - bindings.push_back(binding); - } - sets_bindings.push_back(bindings); - } - - for (const ShaderSpecializationConstant &spirv_sc : shader_refl.specialization_constants) { - ShaderBinary::SpecializationConstant spec_constant; - spec_constant.type = (uint32_t)spirv_sc.type; - spec_constant.constant_id = spirv_sc.constant_id; - spec_constant.int_value = spirv_sc.int_value; - spec_constant.stage_flags = spirv_sc.stages; - specialization_constants.push_back(spec_constant); - - binary_data.spirv_specialization_constants_ids_mask |= (1 << spirv_sc.constant_id); - } - } - - // Translate SPIR-V shaders to DXIL, and collect shader info from the new representation. - HashMap> dxil_blobs; - BitField stages_processed = {}; - { - HashMap stages_nir_shaders; - - auto free_nir_shaders = [&]() { - for (KeyValue &E : stages_nir_shaders) { - ralloc_free(E.value); - } - stages_nir_shaders.clear(); - }; - - // This is based on spirv2dxil.c. May need updates when it changes. - // Also, this has to stay around until after linking. - nir_shader_compiler_options nir_options = *dxil_get_nir_compiler_options(); - nir_options.lower_base_vertex = false; - - dxil_spirv_runtime_conf dxil_runtime_conf = {}; - dxil_runtime_conf.runtime_data_cbv.base_shader_register = RUNTIME_DATA_REGISTER; - dxil_runtime_conf.push_constant_cbv.base_shader_register = ROOT_CONSTANT_REGISTER; - dxil_runtime_conf.zero_based_vertex_instance_id = true; - dxil_runtime_conf.zero_based_compute_workgroup_id = true; - dxil_runtime_conf.declared_read_only_images_as_srvs = true; - // Making this explicit to let maintainers know that in practice this didn't improve performance, - // probably because data generated by one shader and consumed by another one forces the resource - // to transition from UAV to SRV, and back, instead of being an UAV all the time. - // In case someone wants to try, care must be taken so in case of incompatible bindings across stages - // happen as a result, all the stages are re-translated. That can happen if, for instance, a stage only - // uses an allegedly writable resource only for reading but the next stage doesn't. - dxil_runtime_conf.inferred_read_only_images_as_srvs = false; - - // - Translate SPIR-V to NIR. - for (uint32_t i = 0; i < p_spirv.size(); i++) { - ShaderStage stage = (ShaderStage)p_spirv[i].shader_stage; - ShaderStage stage_flag = (ShaderStage)(1 << p_spirv[i].shader_stage); - - stages_processed.set_flag(stage_flag); - - { - const char *entry_point = "main"; - - static const gl_shader_stage SPIRV_TO_MESA_STAGES[SHADER_STAGE_MAX] = { - /* SHADER_STAGE_VERTEX */ MESA_SHADER_VERTEX, - /* SHADER_STAGE_FRAGMENT */ MESA_SHADER_FRAGMENT, - /* SHADER_STAGE_TESSELATION_CONTROL */ MESA_SHADER_TESS_CTRL, - /* SHADER_STAGE_TESSELATION_EVALUATION */ MESA_SHADER_TESS_EVAL, - /* SHADER_STAGE_COMPUTE */ MESA_SHADER_COMPUTE, - }; - - nir_shader *shader = spirv_to_nir( - (const uint32_t *)p_spirv[i].spirv.ptr(), - p_spirv[i].spirv.size() / sizeof(uint32_t), - nullptr, - 0, - SPIRV_TO_MESA_STAGES[stage], - entry_point, - dxil_spirv_nir_get_spirv_options(), &nir_options); - if (!shader) { - free_nir_shaders(); - ERR_FAIL_V_MSG(Vector(), "Shader translation (step 1) at stage " + String(SHADER_STAGE_NAMES[stage]) + " failed."); - } + shader_info_in.spirv_specialization_constants_ids_mask = shader_refl_d3d12.spirv_specialization_constants_ids_mask; + shader_info_in.nir_runtime_data_root_param_idx = shader_refl_d3d12.nir_runtime_data_root_param_idx; + shader_info_in.is_compute = shader_refl.is_compute; + shader_info_in.sets.resize(shader_refl.uniform_sets.size()); + for (uint32_t i = 0; i < shader_info_in.sets.size(); i++) { + shader_info_in.sets[i].bindings.resize(shader_refl.uniform_sets[i].size()); + for (uint32_t j = 0; j < shader_info_in.sets[i].bindings.size(); j++) { + const ShaderUniform &uniform = shader_refl.uniform_sets[i][j]; + const RenderingShaderContainerD3D12::ReflectionBindingDataD3D12 &uniform_d3d12 = shader_refl_d3d12.reflection_binding_set_uniforms_d3d12[i][j]; + ShaderInfo::UniformBindingInfo &binding = shader_info_in.sets[i].bindings[j]; + binding.stages = uniform_d3d12.dxil_stages; + binding.res_class = (ResourceClass)(uniform_d3d12.resource_class); + binding.type = UniformType(uniform.type); + binding.length = uniform.length; #ifdef DEV_ENABLED - nir_validate_shader(shader, "Validate before feeding NIR to the DXIL compiler"); + binding.writable = uniform.writable; #endif - if (stage == SHADER_STAGE_VERTEX) { - dxil_runtime_conf.yz_flip.y_mask = 0xffff; - dxil_runtime_conf.yz_flip.mode = DXIL_SPIRV_Y_FLIP_UNCONDITIONAL; - } else { - dxil_runtime_conf.yz_flip.y_mask = 0; - dxil_runtime_conf.yz_flip.mode = DXIL_SPIRV_YZ_FLIP_NONE; - } - - // This is based on spirv2dxil.c. May need updates when it changes. - dxil_spirv_nir_prep(shader); - bool requires_runtime_data = {}; - dxil_spirv_nir_passes(shader, &dxil_runtime_conf, &requires_runtime_data); - - stages_nir_shaders[stage] = shader; - } - } - - // - Link NIR shaders. - bool can_use_multiview = D3D12Hooks::get_singleton() != nullptr; - for (int i = SHADER_STAGE_MAX - 1; i >= 0; i--) { - if (!stages_nir_shaders.has(i)) { - continue; - } - nir_shader *shader = stages_nir_shaders[i]; - nir_shader *prev_shader = nullptr; - for (int j = i - 1; j >= 0; j--) { - if (stages_nir_shaders.has(j)) { - prev_shader = stages_nir_shaders[j]; - break; - } - } - // There is a bug in the Direct3D runtime during creation of a PSO with view instancing. If a fragment - // shader uses front/back face detection (SV_IsFrontFace), its signature must include the pixel position - // builtin variable (SV_Position), otherwise an Internal Runtime error will occur. - if (i == SHADER_STAGE_FRAGMENT && can_use_multiview) { - const bool use_front_face = - nir_find_variable_with_location(shader, nir_var_shader_in, VARYING_SLOT_FACE) || - (shader->info.inputs_read & VARYING_BIT_FACE) || - nir_find_variable_with_location(shader, nir_var_system_value, SYSTEM_VALUE_FRONT_FACE) || - BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRONT_FACE); - const bool use_position = - nir_find_variable_with_location(shader, nir_var_shader_in, VARYING_SLOT_POS) || - (shader->info.inputs_read & VARYING_BIT_POS) || - nir_find_variable_with_location(shader, nir_var_system_value, SYSTEM_VALUE_FRAG_COORD) || - BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD); - if (use_front_face && !use_position) { - nir_variable *const pos = nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(), "gl_FragCoord"); - pos->data.location = VARYING_SLOT_POS; - shader->info.inputs_read |= VARYING_BIT_POS; - } - } - if (prev_shader) { - bool requires_runtime_data = {}; - dxil_spirv_nir_link(shader, prev_shader, &dxil_runtime_conf, &requires_runtime_data); - } - } - - // - Translate NIR to DXIL. - for (uint32_t i = 0; i < p_spirv.size(); i++) { - ShaderStage stage = (ShaderStage)p_spirv[i].shader_stage; - - struct ShaderData { - ShaderStage stage; - ShaderBinary::Data &binary_data; - Vector> &sets_bindings; - Vector &specialization_constants; - } shader_data{ stage, binary_data, sets_bindings, specialization_constants }; - - GodotNirCallbacks godot_nir_callbacks = {}; - godot_nir_callbacks.data = &shader_data; - - godot_nir_callbacks.report_resource = [](uint32_t p_register, uint32_t p_space, uint32_t p_dxil_type, void *p_data) { - ShaderData &shader_data_in = *(ShaderData *)p_data; - - // Types based on Mesa's dxil_container.h. - static const uint32_t DXIL_RES_SAMPLER = 1; - static const ResourceClass DXIL_TYPE_TO_CLASS[] = { - /* DXIL_RES_INVALID */ RES_CLASS_INVALID, - /* DXIL_RES_SAMPLER */ RES_CLASS_INVALID, // Handling sampler as a flag. - /* DXIL_RES_CBV */ RES_CLASS_CBV, - /* DXIL_RES_SRV_TYPED */ RES_CLASS_SRV, - /* DXIL_RES_SRV_RAW */ RES_CLASS_SRV, - /* DXIL_RES_SRV_STRUCTURED */ RES_CLASS_SRV, - /* DXIL_RES_UAV_TYPED */ RES_CLASS_UAV, - /* DXIL_RES_UAV_RAW */ RES_CLASS_UAV, - /* DXIL_RES_UAV_STRUCTURED */ RES_CLASS_UAV, - /* DXIL_RES_UAV_STRUCTURED_WITH_COUNTER */ RES_CLASS_INVALID, - }; - DEV_ASSERT(p_dxil_type < ARRAY_SIZE(DXIL_TYPE_TO_CLASS)); - ResourceClass res_class = DXIL_TYPE_TO_CLASS[p_dxil_type]; - - if (p_register == ROOT_CONSTANT_REGISTER && p_space == 0) { - DEV_ASSERT(res_class == RES_CLASS_CBV); - shader_data_in.binary_data.dxil_push_constant_stages |= (1 << shader_data_in.stage); - } else if (p_register == RUNTIME_DATA_REGISTER && p_space == 0) { - DEV_ASSERT(res_class == RES_CLASS_CBV); - shader_data_in.binary_data.nir_runtime_data_root_param_idx = 1; // Temporary, to be determined later. - } else { - DEV_ASSERT(p_space == 0); - - uint32_t set = p_register / GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER; - uint32_t binding = (p_register % GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER) / GODOT_NIR_BINDING_MULTIPLIER; - - DEV_ASSERT(set < (uint32_t)shader_data_in.sets_bindings.size()); - [[maybe_unused]] bool found = false; - for (int j = 0; j < shader_data_in.sets_bindings[set].size(); j++) { - if (shader_data_in.sets_bindings[set][j].binding != binding) { - continue; - } - - ShaderBinary::DataBinding &binding_info = shader_data_in.sets_bindings.write[set].write[j]; - - binding_info.dxil_stages |= (1 << shader_data_in.stage); - - if (res_class != RES_CLASS_INVALID) { - DEV_ASSERT(binding_info.res_class == (uint32_t)RES_CLASS_INVALID || binding_info.res_class == (uint32_t)res_class); - binding_info.res_class = res_class; - } else if (p_dxil_type == DXIL_RES_SAMPLER) { - binding_info.has_sampler = (uint32_t)true; - } else { - CRASH_NOW(); - } - found = true; - break; - } - DEV_ASSERT(found); - } - }; - - godot_nir_callbacks.report_sc_bit_offset_fn = [](uint32_t p_sc_id, uint64_t p_bit_offset, void *p_data) { - ShaderData &shader_data_in = *(ShaderData *)p_data; - [[maybe_unused]] bool found = false; - for (int j = 0; j < shader_data_in.specialization_constants.size(); j++) { - if (shader_data_in.specialization_constants[j].constant_id != p_sc_id) { - continue; - } - - uint32_t offset_idx = SHADER_STAGES_BIT_OFFSET_INDICES[shader_data_in.stage]; - DEV_ASSERT(shader_data_in.specialization_constants.write[j].stages_bit_offsets[offset_idx] == 0); - shader_data_in.specialization_constants.write[j].stages_bit_offsets[offset_idx] = p_bit_offset; - found = true; - break; - } - DEV_ASSERT(found); - }; - - godot_nir_callbacks.report_bitcode_bit_offset_fn = [](uint64_t p_bit_offset, void *p_data) { - DEV_ASSERT(p_bit_offset % 8 == 0); - ShaderData &shader_data_in = *(ShaderData *)p_data; - uint32_t offset_idx = SHADER_STAGES_BIT_OFFSET_INDICES[shader_data_in.stage]; - for (int j = 0; j < shader_data_in.specialization_constants.size(); j++) { - if (shader_data_in.specialization_constants.write[j].stages_bit_offsets[offset_idx] == 0) { - // This SC has been optimized out from this stage. - continue; - } - shader_data_in.specialization_constants.write[j].stages_bit_offsets[offset_idx] += p_bit_offset; - } - }; - - auto shader_model_d3d_to_dxil = [](D3D_SHADER_MODEL p_d3d_shader_model) -> dxil_shader_model { - static_assert(SHADER_MODEL_6_0 == 0x60000); - static_assert(SHADER_MODEL_6_3 == 0x60003); - static_assert(D3D_SHADER_MODEL_6_0 == 0x60); - static_assert(D3D_SHADER_MODEL_6_3 == 0x63); - return (dxil_shader_model)((p_d3d_shader_model >> 4) * 0x10000 + (p_d3d_shader_model & 0xf)); - }; - - nir_to_dxil_options nir_to_dxil_options = {}; - nir_to_dxil_options.environment = DXIL_ENVIRONMENT_VULKAN; - nir_to_dxil_options.shader_model_max = shader_model_d3d_to_dxil(shader_capabilities.shader_model); - nir_to_dxil_options.validator_version_max = NO_DXIL_VALIDATION; - nir_to_dxil_options.godot_nir_callbacks = &godot_nir_callbacks; - - dxil_logger logger = {}; - logger.log = [](void *p_priv, const char *p_msg) { -#ifdef DEBUG_ENABLED - print_verbose(p_msg); -#endif - }; - - blob dxil_blob = {}; - bool ok = nir_to_dxil(stages_nir_shaders[stage], &nir_to_dxil_options, &logger, &dxil_blob); - ralloc_free(stages_nir_shaders[stage]); - stages_nir_shaders.erase(stage); - if (!ok) { - free_nir_shaders(); - ERR_FAIL_V_MSG(Vector(), "Shader translation at stage " + String(SHADER_STAGE_NAMES[stage]) + " failed."); - } - - Vector blob_copy; - blob_copy.resize(dxil_blob.size); - memcpy(blob_copy.ptrw(), dxil_blob.data, dxil_blob.size); - blob_finish(&dxil_blob); - dxil_blobs.insert(stage, blob_copy); - } - } - -#if 0 - if (dxil_blobs.has(SHADER_STAGE_FRAGMENT)) { - Ref f = FileAccess::open("res://1.dxil", FileAccess::WRITE); - f->store_buffer(dxil_blobs[SHADER_STAGE_FRAGMENT].ptr(), dxil_blobs[SHADER_STAGE_FRAGMENT].size()); - } -#endif - - // Patch with default values of specialization constants. - if (specialization_constants.size()) { - for (const ShaderBinary::SpecializationConstant &sc : specialization_constants) { - _shader_patch_dxil_specialization_constant((PipelineSpecializationConstantType)sc.type, &sc.int_value, sc.stages_bit_offsets, dxil_blobs, true); - } -#if 0 - if (dxil_blobs.has(SHADER_STAGE_FRAGMENT)) { - Ref f = FileAccess::open("res://2.dxil", FileAccess::WRITE); - f->store_buffer(dxil_blobs[SHADER_STAGE_FRAGMENT].ptr(), dxil_blobs[SHADER_STAGE_FRAGMENT].size()); - } -#endif - } - - // Sign. - for (KeyValue> &E : dxil_blobs) { - ShaderStage stage = E.key; - Vector &dxil_blob = E.value; - _shader_sign_dxil_bytecode(stage, dxil_blob); - } - - // Build the root signature. - ComPtr root_sig_blob; - { - auto stages_to_d3d12_visibility = [](uint32_t p_stages_mask) -> D3D12_SHADER_VISIBILITY { - switch (p_stages_mask) { - case SHADER_STAGE_VERTEX_BIT: { - return D3D12_SHADER_VISIBILITY_VERTEX; - } - case SHADER_STAGE_FRAGMENT_BIT: { - return D3D12_SHADER_VISIBILITY_PIXEL; - } - default: { - return D3D12_SHADER_VISIBILITY_ALL; - } - } - }; - - LocalVector root_params; - - // Root (push) constants. - if (binary_data.dxil_push_constant_stages) { - CD3DX12_ROOT_PARAMETER1 push_constant; - push_constant.InitAsConstants( - binary_data.push_constant_size / sizeof(uint32_t), - ROOT_CONSTANT_REGISTER, - 0, - stages_to_d3d12_visibility(binary_data.dxil_push_constant_stages)); - root_params.push_back(push_constant); - } - - // NIR-DXIL runtime data. - if (binary_data.nir_runtime_data_root_param_idx == 1) { // Set above to 1 when discovering runtime data is needed. - DEV_ASSERT(!binary_data.is_compute); // Could be supported if needed, but it's pointless as of now. - binary_data.nir_runtime_data_root_param_idx = root_params.size(); - CD3DX12_ROOT_PARAMETER1 nir_runtime_data; - nir_runtime_data.InitAsConstants( - sizeof(dxil_spirv_vertex_runtime_data) / sizeof(uint32_t), - RUNTIME_DATA_REGISTER, - 0, - D3D12_SHADER_VISIBILITY_VERTEX); - root_params.push_back(nir_runtime_data); - } - - // Descriptor tables (up to two per uniform set, for resources and/or samplers). - - // These have to stay around until serialization! - struct TraceableDescriptorTable { - uint32_t stages_mask = {}; - Vector ranges; - Vector root_sig_locations; - }; - Vector resource_tables_maps; - Vector sampler_tables_maps; - - for (int set = 0; set < sets_bindings.size(); set++) { - bool first_resource_in_set = true; - bool first_sampler_in_set = true; - sets_bindings.write[set].sort(); - for (int i = 0; i < sets_bindings[set].size(); i++) { - const ShaderBinary::DataBinding &binding = sets_bindings[set][i]; - - bool really_used = binding.dxil_stages != 0; -#ifdef DEV_ENABLED - bool anybody_home = (ResourceClass)binding.res_class != RES_CLASS_INVALID || binding.has_sampler; - DEV_ASSERT(anybody_home == really_used); -#endif - if (!really_used) { - continue; // Existed in SPIR-V; went away in DXIL. - } - - auto insert_range = [](D3D12_DESCRIPTOR_RANGE_TYPE p_range_type, - uint32_t p_num_descriptors, - uint32_t p_dxil_register, - uint32_t p_dxil_stages_mask, - ShaderBinary::DataBinding::RootSignatureLocation(&p_root_sig_locations), - Vector &r_tables, - bool &r_first_in_set) { - if (r_first_in_set) { - r_tables.resize(r_tables.size() + 1); - r_first_in_set = false; - } - TraceableDescriptorTable &table = r_tables.write[r_tables.size() - 1]; - table.stages_mask |= p_dxil_stages_mask; - - CD3DX12_DESCRIPTOR_RANGE1 range; - // Due to the aliasing hack for SRV-UAV of different families, - // we can be causing an unintended change of data (sometimes the validation layers catch it). - D3D12_DESCRIPTOR_RANGE_FLAGS flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE; - if (p_range_type == D3D12_DESCRIPTOR_RANGE_TYPE_SRV || p_range_type == D3D12_DESCRIPTOR_RANGE_TYPE_UAV) { - flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE; - } else if (p_range_type == D3D12_DESCRIPTOR_RANGE_TYPE_CBV) { - flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE; - } - range.Init(p_range_type, p_num_descriptors, p_dxil_register, 0, flags); - - table.ranges.push_back(range); - table.root_sig_locations.push_back(&p_root_sig_locations); - }; - - uint32_t num_descriptors = 1; - - D3D12_DESCRIPTOR_RANGE_TYPE resource_range_type = {}; - switch ((ResourceClass)binding.res_class) { - case RES_CLASS_INVALID: { - num_descriptors = binding.length; - DEV_ASSERT(binding.has_sampler); - } break; - case RES_CLASS_CBV: { - resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; - DEV_ASSERT(!binding.has_sampler); - } break; - case RES_CLASS_SRV: { - resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; - num_descriptors = MAX(1u, binding.length); // An unbound R/O buffer is reflected as zero-size. - } break; - case RES_CLASS_UAV: { - resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; - num_descriptors = MAX(1u, binding.length); // An unbound R/W buffer is reflected as zero-size. - DEV_ASSERT(!binding.has_sampler); - } break; - } - - uint32_t dxil_register = set * GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER + binding.binding * GODOT_NIR_BINDING_MULTIPLIER; - - if (binding.res_class != RES_CLASS_INVALID) { - insert_range( - resource_range_type, - num_descriptors, - dxil_register, - sets_bindings[set][i].dxil_stages, - sets_bindings.write[set].write[i].root_sig_locations[RS_LOC_TYPE_RESOURCE], - resource_tables_maps, - first_resource_in_set); - } - if (binding.has_sampler) { - insert_range( - D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, - num_descriptors, - dxil_register, - sets_bindings[set][i].dxil_stages, - sets_bindings.write[set].write[i].root_sig_locations[RS_LOC_TYPE_SAMPLER], - sampler_tables_maps, - first_sampler_in_set); - } - } - } - - auto make_descriptor_tables = [&root_params, &stages_to_d3d12_visibility](const Vector &p_tables) { - for (const TraceableDescriptorTable &table : p_tables) { - D3D12_SHADER_VISIBILITY visibility = stages_to_d3d12_visibility(table.stages_mask); - DEV_ASSERT(table.ranges.size() == table.root_sig_locations.size()); - for (int i = 0; i < table.ranges.size(); i++) { - // By now we know very well which root signature location corresponds to the pointed uniform. - table.root_sig_locations[i]->root_param_idx = root_params.size(); - table.root_sig_locations[i]->range_idx = i; - } - - CD3DX12_ROOT_PARAMETER1 root_table; - root_table.InitAsDescriptorTable(table.ranges.size(), table.ranges.ptr(), visibility); - root_params.push_back(root_table); - } - }; - - make_descriptor_tables(resource_tables_maps); - make_descriptor_tables(sampler_tables_maps); - - CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {}; - D3D12_ROOT_SIGNATURE_FLAGS root_sig_flags = - D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS | - D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS | - D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS | - D3D12_ROOT_SIGNATURE_FLAG_DENY_AMPLIFICATION_SHADER_ROOT_ACCESS | - D3D12_ROOT_SIGNATURE_FLAG_DENY_MESH_SHADER_ROOT_ACCESS; - if (!stages_processed.has_flag(SHADER_STAGE_VERTEX_BIT)) { - root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_VERTEX_SHADER_ROOT_ACCESS; - } - if (!stages_processed.has_flag(SHADER_STAGE_FRAGMENT_BIT)) { - root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_PIXEL_SHADER_ROOT_ACCESS; - } - if (binary_data.vertex_input_mask) { - root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; - } - root_sig_desc.Init_1_1(root_params.size(), root_params.ptr(), 0, nullptr, root_sig_flags); - - ComPtr error_blob; - HRESULT res = D3DX12SerializeVersionedRootSignature(context_driver->lib_d3d12, &root_sig_desc, D3D_ROOT_SIGNATURE_VERSION_1_1, root_sig_blob.GetAddressOf(), error_blob.GetAddressOf()); - ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), Vector(), - "Serialization of root signature failed with error " + vformat("0x%08ux", (uint64_t)res) + " and the following message:\n" + String::ascii(Span((char *)error_blob->GetBufferPointer(), error_blob->GetBufferSize()))); - - binary_data.root_signature_crc = crc32(0, nullptr, 0); - binary_data.root_signature_crc = crc32(binary_data.root_signature_crc, (const Bytef *)root_sig_blob->GetBufferPointer(), root_sig_blob->GetBufferSize()); - } - - Vector> compressed_stages; - Vector zstd_size; - - uint32_t stages_binary_size = 0; - - for (uint32_t i = 0; i < p_spirv.size(); i++) { - Vector zstd; - Vector &dxil_blob = dxil_blobs[p_spirv[i].shader_stage]; - zstd.resize(Compression::get_max_compressed_buffer_size(dxil_blob.size(), Compression::MODE_ZSTD)); - int dst_size = Compression::compress(zstd.ptrw(), dxil_blob.ptr(), dxil_blob.size(), Compression::MODE_ZSTD); - - zstd_size.push_back(dst_size); - zstd.resize(dst_size); - compressed_stages.push_back(zstd); - - uint32_t s = compressed_stages[i].size(); - stages_binary_size += STEPIFY(s, 4); - } - - CharString shader_name_utf = p_shader_name.utf8(); - - binary_data.shader_name_len = shader_name_utf.length(); - - uint32_t total_size = sizeof(uint32_t) * 3; // Header + version + main datasize;. - total_size += sizeof(ShaderBinary::Data); - - total_size += STEPIFY(binary_data.shader_name_len, 4); - - for (int i = 0; i < sets_bindings.size(); i++) { - total_size += sizeof(uint32_t); - total_size += sets_bindings[i].size() * sizeof(ShaderBinary::DataBinding); - } - - total_size += sizeof(ShaderBinary::SpecializationConstant) * specialization_constants.size(); - - total_size += compressed_stages.size() * sizeof(uint32_t) * 3; // Sizes. - total_size += stages_binary_size; - - binary_data.root_signature_len = root_sig_blob->GetBufferSize(); - total_size += binary_data.root_signature_len; - - Vector ret; - ret.resize(total_size); - { - uint32_t offset = 0; - uint8_t *binptr = ret.ptrw(); - binptr[0] = 'G'; - binptr[1] = 'S'; - binptr[2] = 'B'; - binptr[3] = 'D'; // Godot shader binary data. - offset += 4; - encode_uint32(ShaderBinary::VERSION, binptr + offset); - offset += sizeof(uint32_t); - encode_uint32(sizeof(ShaderBinary::Data), binptr + offset); - offset += sizeof(uint32_t); - memcpy(binptr + offset, &binary_data, sizeof(ShaderBinary::Data)); - offset += sizeof(ShaderBinary::Data); - -#define ADVANCE_OFFSET_WITH_ALIGNMENT(m_bytes) \ - { \ - offset += m_bytes; \ - uint32_t padding = STEPIFY(m_bytes, 4) - m_bytes; \ - memset(binptr + offset, 0, padding); /* Avoid garbage data. */ \ - offset += padding; \ - } - - if (binary_data.shader_name_len > 0) { - memcpy(binptr + offset, shader_name_utf.ptr(), binary_data.shader_name_len); - ADVANCE_OFFSET_WITH_ALIGNMENT(binary_data.shader_name_len); - } - - for (int i = 0; i < sets_bindings.size(); i++) { - int count = sets_bindings[i].size(); - encode_uint32(count, binptr + offset); - offset += sizeof(uint32_t); - if (count > 0) { - memcpy(binptr + offset, sets_bindings[i].ptr(), sizeof(ShaderBinary::DataBinding) * count); - offset += sizeof(ShaderBinary::DataBinding) * count; - } - } - - if (specialization_constants.size()) { - memcpy(binptr + offset, specialization_constants.ptr(), sizeof(ShaderBinary::SpecializationConstant) * specialization_constants.size()); - offset += sizeof(ShaderBinary::SpecializationConstant) * specialization_constants.size(); - } - - for (int i = 0; i < compressed_stages.size(); i++) { - encode_uint32(p_spirv[i].shader_stage, binptr + offset); - offset += sizeof(uint32_t); - encode_uint32(dxil_blobs[p_spirv[i].shader_stage].size(), binptr + offset); - offset += sizeof(uint32_t); - encode_uint32(zstd_size[i], binptr + offset); - offset += sizeof(uint32_t); - memcpy(binptr + offset, compressed_stages[i].ptr(), compressed_stages[i].size()); - ADVANCE_OFFSET_WITH_ALIGNMENT(compressed_stages[i].size()); - } - - memcpy(binptr + offset, root_sig_blob->GetBufferPointer(), root_sig_blob->GetBufferSize()); - offset += root_sig_blob->GetBufferSize(); - - ERR_FAIL_COND_V(offset != (uint32_t)ret.size(), Vector()); - } - - return ret; -} - -RDD::ShaderID RenderingDeviceDriverD3D12::shader_create_from_bytecode(const Vector &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name, const Vector &p_immutable_samplers) { - r_shader_desc = {}; // Driver-agnostic. - ShaderInfo shader_info_in; // Driver-specific. - - const uint8_t *binptr = p_shader_binary.ptr(); - uint32_t binsize = p_shader_binary.size(); - - uint32_t read_offset = 0; - - // Consistency check. - ERR_FAIL_COND_V(binsize < sizeof(uint32_t) * 3 + sizeof(ShaderBinary::Data), ShaderID()); - ERR_FAIL_COND_V(binptr[0] != 'G' || binptr[1] != 'S' || binptr[2] != 'B' || binptr[3] != 'D', ShaderID()); - - uint32_t bin_version = decode_uint32(binptr + 4); - ERR_FAIL_COND_V(bin_version != ShaderBinary::VERSION, ShaderID()); - - uint32_t bin_data_size = decode_uint32(binptr + 8); - - const ShaderBinary::Data &binary_data = *(reinterpret_cast(binptr + 12)); - - r_shader_desc.push_constant_size = binary_data.push_constant_size; - shader_info_in.dxil_push_constant_size = binary_data.dxil_push_constant_stages ? binary_data.push_constant_size : 0; - shader_info_in.nir_runtime_data_root_param_idx = binary_data.nir_runtime_data_root_param_idx; - - r_shader_desc.vertex_input_mask = binary_data.vertex_input_mask; - r_shader_desc.fragment_output_mask = binary_data.fragment_output_mask; - - r_shader_desc.is_compute = binary_data.is_compute; - shader_info_in.is_compute = binary_data.is_compute; - r_shader_desc.compute_local_size[0] = binary_data.compute_local_size[0]; - r_shader_desc.compute_local_size[1] = binary_data.compute_local_size[1]; - r_shader_desc.compute_local_size[2] = binary_data.compute_local_size[2]; - - read_offset += sizeof(uint32_t) * 3 + bin_data_size; - - if (binary_data.shader_name_len) { - r_name.clear(); - r_name.append_utf8((const char *)(binptr + read_offset), binary_data.shader_name_len); - read_offset += STEPIFY(binary_data.shader_name_len, 4); - } - - r_shader_desc.uniform_sets.resize(binary_data.set_count); - shader_info_in.sets.resize(binary_data.set_count); - - for (uint32_t i = 0; i < binary_data.set_count; i++) { - ERR_FAIL_COND_V(read_offset + sizeof(uint32_t) >= binsize, ShaderID()); - uint32_t set_count = decode_uint32(binptr + read_offset); - read_offset += sizeof(uint32_t); - const ShaderBinary::DataBinding *set_ptr = reinterpret_cast(binptr + read_offset); - uint32_t set_size = set_count * sizeof(ShaderBinary::DataBinding); - ERR_FAIL_COND_V(read_offset + set_size >= binsize, ShaderID()); - - shader_info_in.sets[i].bindings.reserve(set_count); - - for (uint32_t j = 0; j < set_count; j++) { - ShaderUniform info; - info.type = UniformType(set_ptr[j].type); - info.writable = set_ptr[j].writable; - info.length = set_ptr[j].length; - info.binding = set_ptr[j].binding; - - ShaderInfo::UniformBindingInfo binding; - binding.stages = set_ptr[j].dxil_stages; - binding.res_class = (ResourceClass)set_ptr[j].res_class; - binding.type = info.type; - binding.length = info.length; -#ifdef DEV_ENABLED - binding.writable = set_ptr[j].writable; -#endif - static_assert(sizeof(ShaderInfo::UniformBindingInfo::root_sig_locations) == sizeof(ShaderBinary::DataBinding::root_sig_locations)); - memcpy((void *)&binding.root_sig_locations, (void *)&set_ptr[j].root_sig_locations, sizeof(ShaderInfo::UniformBindingInfo::root_sig_locations)); + static_assert(sizeof(ShaderInfo::UniformBindingInfo::root_sig_locations) == sizeof(RenderingShaderContainerD3D12::ReflectionBindingDataD3D12::root_signature_locations)); + memcpy((void *)&binding.root_sig_locations, (void *)&uniform_d3d12.root_signature_locations, sizeof(ShaderInfo::UniformBindingInfo::root_sig_locations)); if (binding.root_sig_locations.resource.root_param_idx != UINT32_MAX) { shader_info_in.sets[i].num_root_params.resources++; @@ -3845,80 +3051,50 @@ RDD::ShaderID RenderingDeviceDriverD3D12::shader_create_from_bytecode(const Vect if (binding.root_sig_locations.sampler.root_param_idx != UINT32_MAX) { shader_info_in.sets[i].num_root_params.samplers++; } - - r_shader_desc.uniform_sets.write[i].push_back(info); - shader_info_in.sets[i].bindings.push_back(binding); } - - read_offset += set_size; } - ERR_FAIL_COND_V(read_offset + binary_data.specialization_constants_count * sizeof(ShaderBinary::SpecializationConstant) >= binsize, ShaderID()); - - r_shader_desc.specialization_constants.resize(binary_data.specialization_constants_count); - shader_info_in.specialization_constants.resize(binary_data.specialization_constants_count); - for (uint32_t i = 0; i < binary_data.specialization_constants_count; i++) { - const ShaderBinary::SpecializationConstant &src_sc = *(reinterpret_cast(binptr + read_offset)); - ShaderSpecializationConstant sc; - sc.type = PipelineSpecializationConstantType(src_sc.type); + shader_info_in.specialization_constants.resize(shader_refl.specialization_constants.size()); + for (uint32_t i = 0; i < shader_info_in.specialization_constants.size(); i++) { + ShaderInfo::SpecializationConstant &sc = shader_info_in.specialization_constants[i]; + const ShaderSpecializationConstant &src_sc = shader_refl.specialization_constants[i]; + const RenderingShaderContainerD3D12::ReflectionSpecializationDataD3D12 &src_sc_d3d12 = shader_refl_d3d12.reflection_specialization_data_d3d12[i]; sc.constant_id = src_sc.constant_id; sc.int_value = src_sc.int_value; - sc.stages = src_sc.stage_flags; - r_shader_desc.specialization_constants.write[i] = sc; - - ShaderInfo::SpecializationConstant ssc; - ssc.constant_id = src_sc.constant_id; - ssc.int_value = src_sc.int_value; - memcpy(ssc.stages_bit_offsets, src_sc.stages_bit_offsets, sizeof(ssc.stages_bit_offsets)); - shader_info_in.specialization_constants[i] = ssc; - - read_offset += sizeof(ShaderBinary::SpecializationConstant); - } - shader_info_in.spirv_specialization_constants_ids_mask = binary_data.spirv_specialization_constants_ids_mask; - - for (uint32_t i = 0; i < binary_data.stage_count; i++) { - ERR_FAIL_COND_V(read_offset + sizeof(uint32_t) * 3 >= binsize, ShaderID()); - - uint32_t stage = decode_uint32(binptr + read_offset); - read_offset += sizeof(uint32_t); - uint32_t dxil_size = decode_uint32(binptr + read_offset); - read_offset += sizeof(uint32_t); - uint32_t zstd_size = decode_uint32(binptr + read_offset); - read_offset += sizeof(uint32_t); - - // Decompress. - Vector dxil; - dxil.resize(dxil_size); - int dec_dxil_size = Compression::decompress(dxil.ptrw(), dxil.size(), binptr + read_offset, zstd_size, Compression::MODE_ZSTD); - ERR_FAIL_COND_V(dec_dxil_size != (int32_t)dxil_size, ShaderID()); - shader_info_in.stages_bytecode[ShaderStage(stage)] = dxil; - - zstd_size = STEPIFY(zstd_size, 4); - read_offset += zstd_size; - ERR_FAIL_COND_V(read_offset > binsize, ShaderID()); - - r_shader_desc.stages.push_back(ShaderStage(stage)); + memcpy(sc.stages_bit_offsets, src_sc_d3d12.stages_bit_offsets, sizeof(sc.stages_bit_offsets)); } - const uint8_t *root_sig_data_ptr = binptr + read_offset; + Vector decompressed_code; + for (uint32_t i = 0; i < shader_refl.stages_vector.size(); i++) { + const RenderingShaderContainer::Shader &shader = p_shader_container->shaders[i]; + bool requires_decompression = (shader.code_decompressed_size > 0); + if (requires_decompression) { + decompressed_code.resize(shader.code_decompressed_size); + bool decompressed = p_shader_container->decompress_code(shader.code_compressed_bytes.ptr(), shader.code_compressed_bytes.size(), shader.code_compression_flags, decompressed_code.ptrw(), decompressed_code.size()); + ERR_FAIL_COND_V_MSG(!decompressed, ShaderID(), vformat("Failed to decompress code on shader stage %s.", String(SHADER_STAGE_NAMES[shader_refl.stages_vector[i]]))); + } + + if (requires_decompression) { + shader_info_in.stages_bytecode[shader.shader_stage] = decompressed_code; + } else { + shader_info_in.stages_bytecode[shader.shader_stage] = shader.code_compressed_bytes; + } + } PFN_D3D12_CREATE_ROOT_SIGNATURE_DESERIALIZER d3d_D3D12CreateRootSignatureDeserializer = (PFN_D3D12_CREATE_ROOT_SIGNATURE_DESERIALIZER)(void *)GetProcAddress(context_driver->lib_d3d12, "D3D12CreateRootSignatureDeserializer"); ERR_FAIL_NULL_V(d3d_D3D12CreateRootSignatureDeserializer, ShaderID()); - HRESULT res = d3d_D3D12CreateRootSignatureDeserializer(root_sig_data_ptr, binary_data.root_signature_len, IID_PPV_ARGS(shader_info_in.root_signature_deserializer.GetAddressOf())); + HRESULT res = d3d_D3D12CreateRootSignatureDeserializer(shader_refl_d3d12.root_signature_bytes.ptr(), shader_refl_d3d12.root_signature_bytes.size(), IID_PPV_ARGS(shader_info_in.root_signature_deserializer.GetAddressOf())); ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ShaderID(), "D3D12CreateRootSignatureDeserializer failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); - read_offset += binary_data.root_signature_len; - - ERR_FAIL_COND_V(read_offset != binsize, ShaderID()); ComPtr root_signature; - res = device->CreateRootSignature(0, root_sig_data_ptr, binary_data.root_signature_len, IID_PPV_ARGS(shader_info_in.root_signature.GetAddressOf())); + res = device->CreateRootSignature(0, shader_refl_d3d12.root_signature_bytes.ptr(), shader_refl_d3d12.root_signature_bytes.size(), IID_PPV_ARGS(shader_info_in.root_signature.GetAddressOf())); ERR_FAIL_COND_V_MSG(!SUCCEEDED(res), ShaderID(), "CreateRootSignature failed with error " + vformat("0x%08ux", (uint64_t)res) + "."); + shader_info_in.root_signature_desc = shader_info_in.root_signature_deserializer->GetRootSignatureDesc(); - shader_info_in.root_signature_crc = binary_data.root_signature_crc; + shader_info_in.root_signature_crc = shader_refl_d3d12.root_signature_crc; // Bookkeep. - ShaderInfo *shader_info_ptr = VersatileResource::allocate(resources_allocator); *shader_info_ptr = shader_info_in; return ShaderID(shader_info_ptr); @@ -6443,6 +5619,10 @@ const RDD::Capabilities &RenderingDeviceDriverD3D12::get_capabilities() const { return device_capabilities; } +const RenderingShaderContainerFormat &RenderingDeviceDriverD3D12::get_shader_container_format() const { + return shader_container_format; +} + bool RenderingDeviceDriverD3D12::is_composite_alpha_supported(CommandQueueID p_queue) const { if (has_comp_alpha.has((uint64_t)p_queue.id)) { return has_comp_alpha[(uint64_t)p_queue.id]; @@ -6664,6 +5844,8 @@ Error RenderingDeviceDriverD3D12::_check_capabilities() { print_verbose(" model: " + D3D_SHADER_MODEL_TO_STRING(shader_capabilities.shader_model)); } + shader_container_format.set_lib_d3d12(context_driver->lib_d3d12); + D3D12_FEATURE_DATA_D3D12_OPTIONS options = {}; res = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options)); if (SUCCEEDED(res)) { diff --git a/drivers/d3d12/rendering_device_driver_d3d12.h b/drivers/d3d12/rendering_device_driver_d3d12.h index 54b57cbc2f5..506976a2f51 100644 --- a/drivers/d3d12/rendering_device_driver_d3d12.h +++ b/drivers/d3d12/rendering_device_driver_d3d12.h @@ -33,6 +33,7 @@ #include "core/templates/hash_map.h" #include "core/templates/paged_allocator.h" #include "core/templates/self_list.h" +#include "rendering_shader_container_d3d12.h" #include "servers/rendering/rendering_device_driver.h" #ifndef _MSC_VER @@ -54,8 +55,6 @@ using Microsoft::WRL::ComPtr; -#define D3D12_BITCODE_OFFSETS_NUM_STAGES 3 - #ifdef DEV_ENABLED #define CUSTOM_INFO_QUEUE_ENABLED 0 #endif @@ -131,6 +130,7 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver { FormatCapabilities format_capabilities; BarrierCapabilities barrier_capabilities; MiscFeaturesSupport misc_features_support; + RenderingShaderContainerFormatD3D12 shader_container_format; String pipeline_cache_id; class DescriptorsHeap { @@ -518,6 +518,7 @@ public: /****************/ /**** SHADER ****/ /****************/ + private: static const uint32_t ROOT_SIGNATURE_SIZE = 256; static const uint32_t PUSH_CONSTANT_SIZE = 128; // Mimicking Vulkan. @@ -535,82 +536,6 @@ private: MAX_UNIFORM_SETS = (ROOT_SIGNATURE_SIZE - PUSH_CONSTANT_SIZE) / sizeof(uint32_t), }; - enum RootSignatureLocationType { - RS_LOC_TYPE_RESOURCE, - RS_LOC_TYPE_SAMPLER, - }; - - enum ResourceClass { - RES_CLASS_INVALID, - RES_CLASS_CBV, - RES_CLASS_SRV, - RES_CLASS_UAV, - }; - - struct ShaderBinary { - // Version 1: Initial. - // Version 2: 64-bit vertex input mask. - // Version 3: Added SC stage mask. - static const uint32_t VERSION = 3; - - // Phase 1: SPIR-V reflection, where the Vulkan/RD interface of the shader is discovered. - // Phase 2: SPIR-V to DXIL translation, where the DXIL interface is discovered, which may have gaps due to optimizations. - - struct DataBinding { - // - Phase 1. - uint32_t type = 0; - uint32_t binding = 0; - uint32_t stages = 0; - uint32_t length = 0; // Size of arrays (in total elements), or ubos (in bytes * total elements). - uint32_t writable = 0; - // - Phase 2. - uint32_t res_class = 0; - uint32_t has_sampler = 0; - uint32_t dxil_stages = 0; - struct RootSignatureLocation { - uint32_t root_param_idx = UINT32_MAX; // UINT32_MAX if unused. - uint32_t range_idx = UINT32_MAX; // UINT32_MAX if unused. - }; - RootSignatureLocation root_sig_locations[2]; // Index is RootSignatureLocationType. - - // We need to sort these to fill the root signature locations properly. - bool operator<(const DataBinding &p_other) const { - return binding < p_other.binding; - } - }; - - struct SpecializationConstant { - // - Phase 1. - uint32_t type = 0; - uint32_t constant_id = 0; - union { - uint32_t int_value = 0; - float float_value; - bool bool_value; - }; - uint32_t stage_flags = 0; - // - Phase 2. - uint64_t stages_bit_offsets[D3D12_BITCODE_OFFSETS_NUM_STAGES] = {}; - }; - - struct Data { - uint64_t vertex_input_mask = 0; - uint32_t fragment_output_mask = 0; - uint32_t specialization_constants_count = 0; - uint32_t spirv_specialization_constants_ids_mask = 0; - uint32_t is_compute = 0; - uint32_t compute_local_size[3] = {}; - uint32_t set_count = 0; - uint32_t push_constant_size = 0; - uint32_t dxil_push_constant_stages = 0; // Phase 2. - uint32_t nir_runtime_data_root_param_idx = 0; // Phase 2. - uint32_t stage_count = 0; - uint32_t shader_name_len = 0; - uint32_t root_signature_len = 0; - uint32_t root_signature_crc = 0; - }; - }; - struct ShaderInfo { uint32_t dxil_push_constant_size = 0; uint32_t nir_runtime_data_root_param_idx = UINT32_MAX; @@ -661,22 +586,13 @@ private: uint32_t root_signature_crc = 0; }; - uint32_t _shader_patch_dxil_specialization_constant( - PipelineSpecializationConstantType p_type, - const void *p_value, - const uint64_t (&p_stages_bit_offsets)[D3D12_BITCODE_OFFSETS_NUM_STAGES], - HashMap> &r_stages_bytecodes, - bool p_is_first_patch); bool _shader_apply_specialization_constants( const ShaderInfo *p_shader_info, VectorView p_specialization_constants, HashMap> &r_final_stages_bytecode); - void _shader_sign_dxil_bytecode(ShaderStage p_stage, Vector &r_dxil_blob); public: - virtual String shader_get_binary_cache_key() override final; - virtual Vector shader_compile_binary_from_spirv(VectorView p_spirv, const String &p_shader_name) override final; - virtual ShaderID shader_create_from_bytecode(const Vector &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name, const Vector &p_immutable_samplers) override final; + virtual ShaderID shader_create_from_container(const Ref &p_shader_container, const Vector &p_immutable_samplers) override final; virtual uint32_t shader_get_layout_hash(ShaderID p_shader) override final; virtual void shader_free(ShaderID p_shader) override final; virtual void shader_destroy_modules(ShaderID p_shader) override final; @@ -979,6 +895,7 @@ public: virtual String get_api_version() const override final; virtual String get_pipeline_cache_uuid() const override final; virtual const Capabilities &get_capabilities() const override final; + virtual const RenderingShaderContainerFormat &get_shader_container_format() const override final; virtual bool is_composite_alpha_supported(CommandQueueID p_queue) const override final; diff --git a/drivers/d3d12/rendering_shader_container_d3d12.cpp b/drivers/d3d12/rendering_shader_container_d3d12.cpp new file mode 100644 index 00000000000..34cb0ff2724 --- /dev/null +++ b/drivers/d3d12/rendering_shader_container_d3d12.cpp @@ -0,0 +1,912 @@ +/**************************************************************************/ +/* rendering_shader_container_d3d12.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "rendering_shader_container_d3d12.h" + +#include "core/templates/sort_array.h" + +#include "dxil_hash.h" + +#include + +#ifndef _MSC_VER +// Match current version used by MinGW, MSVC and Direct3D 12 headers use 500. +#define __REQUIRED_RPCNDR_H_VERSION__ 475 +#endif + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wnon-virtual-dtor" +#pragma GCC diagnostic ignored "-Wshadow" +#pragma GCC diagnostic ignored "-Wswitch" +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#pragma GCC diagnostic ignored "-Wimplicit-fallthrough" +#elif defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wnon-virtual-dtor" +#pragma clang diagnostic ignored "-Wstring-plus-int" +#pragma clang diagnostic ignored "-Wswitch" +#pragma clang diagnostic ignored "-Wmissing-field-initializers" +#pragma clang diagnostic ignored "-Wimplicit-fallthrough" +#endif + +#include "d3dx12.h" +#include +#define D3D12MA_D3D12_HEADERS_ALREADY_INCLUDED +#include "D3D12MemAlloc.h" + +#include + +#if defined(_MSC_VER) && defined(MemoryBarrier) +// Annoying define from winnt.h. Reintroduced by some of the headers above. +#undef MemoryBarrier +#endif + +// No point in fighting warnings in Mesa. +#if defined(_MSC_VER) +#pragma warning(push) +#pragma warning(disable : 4200) // "nonstandard extension used: zero-sized array in struct/union". +#pragma warning(disable : 4806) // "'&': unsafe operation: no value of type 'bool' promoted to type 'uint32_t' can equal the given constant". +#endif + +#include "nir_spirv.h" +#include "nir_to_dxil.h" +#include "spirv_to_dxil.h" +extern "C" { +#include "dxil_spirv_nir.h" +} + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic pop +#elif defined(__clang__) +#pragma clang diagnostic pop +#endif + +#if defined(_MSC_VER) +#pragma warning(pop) +#endif + +static D3D12_SHADER_VISIBILITY stages_to_d3d12_visibility(uint32_t p_stages_mask) { + switch (p_stages_mask) { + case RenderingDeviceCommons::SHADER_STAGE_VERTEX_BIT: + return D3D12_SHADER_VISIBILITY_VERTEX; + case RenderingDeviceCommons::SHADER_STAGE_FRAGMENT_BIT: + return D3D12_SHADER_VISIBILITY_PIXEL; + default: + return D3D12_SHADER_VISIBILITY_ALL; + } +} + +uint32_t RenderingDXIL::patch_specialization_constant( + RenderingDeviceCommons::PipelineSpecializationConstantType p_type, + const void *p_value, + const uint64_t (&p_stages_bit_offsets)[D3D12_BITCODE_OFFSETS_NUM_STAGES], + HashMap> &r_stages_bytecodes, + bool p_is_first_patch) { + uint32_t patch_val = 0; + switch (p_type) { + case RenderingDeviceCommons::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT: { + uint32_t int_value = *((const int *)p_value); + ERR_FAIL_COND_V(int_value & (1 << 31), 0); + patch_val = int_value; + } break; + case RenderingDeviceCommons::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_BOOL: { + bool bool_value = *((const bool *)p_value); + patch_val = (uint32_t)bool_value; + } break; + case RenderingDeviceCommons::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT: { + uint32_t int_value = *((const int *)p_value); + ERR_FAIL_COND_V(int_value & (1 << 31), 0); + patch_val = (int_value >> 1); + } break; + } + // For VBR encoding to encode the number of bits we expect (32), we need to set the MSB unconditionally. + // However, signed VBR moves the MSB to the LSB, so setting the MSB to 1 wouldn't help. Therefore, + // the bit we set to 1 is the one at index 30. + patch_val |= (1 << 30); + patch_val <<= 1; // What signed VBR does. + + auto tamper_bits = [](uint8_t *p_start, uint64_t p_bit_offset, uint64_t p_tb_value) -> uint64_t { + uint64_t original = 0; + uint32_t curr_input_byte = p_bit_offset / 8; + uint8_t curr_input_bit = p_bit_offset % 8; + auto get_curr_input_bit = [&]() -> bool { + return ((p_start[curr_input_byte] >> curr_input_bit) & 1); + }; + auto move_to_next_input_bit = [&]() { + if (curr_input_bit == 7) { + curr_input_bit = 0; + curr_input_byte++; + } else { + curr_input_bit++; + } + }; + auto tamper_input_bit = [&](bool p_new_bit) { + p_start[curr_input_byte] &= ~((uint8_t)1 << curr_input_bit); + if (p_new_bit) { + p_start[curr_input_byte] |= (uint8_t)1 << curr_input_bit; + } + }; + uint8_t value_bit_idx = 0; + for (uint32_t i = 0; i < 5; i++) { // 32 bits take 5 full bytes in VBR. + for (uint32_t j = 0; j < 7; j++) { + bool input_bit = get_curr_input_bit(); + original |= (uint64_t)(input_bit ? 1 : 0) << value_bit_idx; + tamper_input_bit((p_tb_value >> value_bit_idx) & 1); + move_to_next_input_bit(); + value_bit_idx++; + } +#ifdef DEV_ENABLED + bool input_bit = get_curr_input_bit(); + DEV_ASSERT((i < 4 && input_bit) || (i == 4 && !input_bit)); +#endif + move_to_next_input_bit(); + } + return original; + }; + uint32_t stages_patched_mask = 0; + for (int stage = 0; stage < RenderingDeviceCommons::SHADER_STAGE_MAX; stage++) { + if (!r_stages_bytecodes.has((RenderingDeviceCommons::ShaderStage)stage)) { + continue; + } + + uint64_t offset = p_stages_bit_offsets[RenderingShaderContainerD3D12::SHADER_STAGES_BIT_OFFSET_INDICES[stage]]; + if (offset == 0) { + // This constant does not appear at this stage. + continue; + } + + Vector &bytecode = r_stages_bytecodes[(RenderingDeviceCommons::ShaderStage)stage]; +#ifdef DEV_ENABLED + uint64_t orig_patch_val = tamper_bits(bytecode.ptrw(), offset, patch_val); + // Checking against the value the NIR patch should have set. + DEV_ASSERT(!p_is_first_patch || ((orig_patch_val >> 1) & GODOT_NIR_SC_SENTINEL_MAGIC_MASK) == GODOT_NIR_SC_SENTINEL_MAGIC); + uint64_t readback_patch_val = tamper_bits(bytecode.ptrw(), offset, patch_val); + DEV_ASSERT(readback_patch_val == patch_val); +#else + tamper_bits(bytecode.ptrw(), offset, patch_val); +#endif + + stages_patched_mask |= (1 << stage); + } + + return stages_patched_mask; +} + +void RenderingDXIL::sign_bytecode(RenderingDeviceCommons::ShaderStage p_stage, Vector &r_dxil_blob) { + uint8_t *w = r_dxil_blob.ptrw(); + compute_dxil_hash(w + 20, r_dxil_blob.size() - 20, w + 4); +} + +// RenderingShaderContainerD3D12 + +uint32_t RenderingShaderContainerD3D12::_format() const { + return 0x43443344; +} + +uint32_t RenderingShaderContainerD3D12::_format_version() const { + return FORMAT_VERSION; +} + +uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_extra_data(const uint8_t *p_bytes) { + reflection_data_d3d12 = *(const ReflectionDataD3D12 *)(p_bytes); + return sizeof(ReflectionDataD3D12); +} + +uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes) { + reflection_binding_set_uniforms_data_d3d12.resize(reflection_binding_set_uniforms_data.size()); + return 0; +} + +uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_binding_uniform_extra_data(const uint8_t *p_bytes, uint32_t p_index) { + reflection_binding_set_uniforms_data_d3d12.ptrw()[p_index] = *(const ReflectionBindingDataD3D12 *)(p_bytes); + return sizeof(ReflectionBindingDataD3D12); +} + +uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_specialization_extra_data_start(const uint8_t *p_bytes) { + reflection_specialization_data_d3d12.resize(reflection_specialization_data.size()); + return 0; +} + +uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_specialization_extra_data(const uint8_t *p_bytes, uint32_t p_index) { + reflection_specialization_data_d3d12.ptrw()[p_index] = *(const ReflectionSpecializationDataD3D12 *)(p_bytes); + return sizeof(ReflectionSpecializationDataD3D12); +} + +uint32_t RenderingShaderContainerD3D12::_from_bytes_footer_extra_data(const uint8_t *p_bytes) { + ContainerFooterD3D12 footer = *(const ContainerFooterD3D12 *)(p_bytes); + root_signature_crc = footer.root_signature_crc; + root_signature_bytes.resize(footer.root_signature_length); + memcpy(root_signature_bytes.ptrw(), p_bytes + sizeof(ContainerFooterD3D12), root_signature_bytes.size()); + return sizeof(ContainerFooterD3D12) + footer.root_signature_length; +} + +uint32_t RenderingShaderContainerD3D12::_to_bytes_reflection_extra_data(uint8_t *p_bytes) const { + if (p_bytes != nullptr) { + *(ReflectionDataD3D12 *)(p_bytes) = reflection_data_d3d12; + } + + return sizeof(ReflectionDataD3D12); +} + +uint32_t RenderingShaderContainerD3D12::_to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const { + if (p_bytes != nullptr) { + *(ReflectionBindingDataD3D12 *)(p_bytes) = reflection_binding_set_uniforms_data_d3d12[p_index]; + } + + return sizeof(ReflectionBindingDataD3D12); +} + +uint32_t RenderingShaderContainerD3D12::_to_bytes_reflection_specialization_extra_data(uint8_t *p_bytes, uint32_t p_index) const { + if (p_bytes != nullptr) { + *(ReflectionSpecializationDataD3D12 *)(p_bytes) = reflection_specialization_data_d3d12[p_index]; + } + + return sizeof(ReflectionSpecializationDataD3D12); +} + +uint32_t RenderingShaderContainerD3D12::_to_bytes_footer_extra_data(uint8_t *p_bytes) const { + if (p_bytes != nullptr) { + ContainerFooterD3D12 &footer = *(ContainerFooterD3D12 *)(p_bytes); + footer.root_signature_length = root_signature_bytes.size(); + footer.root_signature_crc = root_signature_crc; + memcpy(p_bytes + sizeof(ContainerFooterD3D12), root_signature_bytes.ptr(), root_signature_bytes.size()); + } + + return sizeof(ContainerFooterD3D12) + root_signature_bytes.size(); +} + +#if NIR_ENABLED +bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(const Vector &p_spirv, const nir_shader_compiler_options *p_compiler_options, HashMap &r_stages_nir_shaders, Vector &r_stages, BitField &r_stages_processed) { + r_stages_processed.clear(); + + dxil_spirv_runtime_conf dxil_runtime_conf = {}; + dxil_runtime_conf.runtime_data_cbv.base_shader_register = RUNTIME_DATA_REGISTER; + dxil_runtime_conf.push_constant_cbv.base_shader_register = ROOT_CONSTANT_REGISTER; + dxil_runtime_conf.zero_based_vertex_instance_id = true; + dxil_runtime_conf.zero_based_compute_workgroup_id = true; + dxil_runtime_conf.declared_read_only_images_as_srvs = true; + + // Making this explicit to let maintainers know that in practice this didn't improve performance, + // probably because data generated by one shader and consumed by another one forces the resource + // to transition from UAV to SRV, and back, instead of being an UAV all the time. + // In case someone wants to try, care must be taken so in case of incompatible bindings across stages + // happen as a result, all the stages are re-translated. That can happen if, for instance, a stage only + // uses an allegedly writable resource only for reading but the next stage doesn't. + dxil_runtime_conf.inferred_read_only_images_as_srvs = false; + + // Translate SPIR-V to NIR. + for (int64_t i = 0; i < p_spirv.size(); i++) { + RenderingDeviceCommons::ShaderStage stage = p_spirv[i].shader_stage; + RenderingDeviceCommons::ShaderStage stage_flag = (RenderingDeviceCommons::ShaderStage)(1 << stage); + r_stages.push_back(stage); + r_stages_processed.set_flag(stage_flag); + + const char *entry_point = "main"; + static const gl_shader_stage SPIRV_TO_MESA_STAGES[RenderingDeviceCommons::SHADER_STAGE_MAX] = { + MESA_SHADER_VERTEX, // SHADER_STAGE_VERTEX + MESA_SHADER_FRAGMENT, // SHADER_STAGE_FRAGMENT + MESA_SHADER_TESS_CTRL, // SHADER_STAGE_TESSELATION_CONTROL + MESA_SHADER_TESS_EVAL, // SHADER_STAGE_TESSELATION_EVALUATION + MESA_SHADER_COMPUTE, // SHADER_STAGE_COMPUTE + }; + + nir_shader *shader = spirv_to_nir( + (const uint32_t *)(p_spirv[i].spirv.ptr()), + p_spirv[i].spirv.size() / sizeof(uint32_t), + nullptr, + 0, + SPIRV_TO_MESA_STAGES[stage], + entry_point, + dxil_spirv_nir_get_spirv_options(), + p_compiler_options); + + ERR_FAIL_NULL_V_MSG(shader, false, "Shader translation (step 1) at stage " + String(RenderingDeviceCommons::SHADER_STAGE_NAMES[stage]) + " failed."); + +#ifdef DEV_ENABLED + nir_validate_shader(shader, "Validate before feeding NIR to the DXIL compiler"); +#endif + + if (stage == RenderingDeviceCommons::SHADER_STAGE_VERTEX) { + dxil_runtime_conf.yz_flip.y_mask = 0xffff; + dxil_runtime_conf.yz_flip.mode = DXIL_SPIRV_Y_FLIP_UNCONDITIONAL; + } else { + dxil_runtime_conf.yz_flip.y_mask = 0; + dxil_runtime_conf.yz_flip.mode = DXIL_SPIRV_YZ_FLIP_NONE; + } + + dxil_spirv_nir_prep(shader); + bool requires_runtime_data = false; + dxil_spirv_nir_passes(shader, &dxil_runtime_conf, &requires_runtime_data); + + r_stages_nir_shaders[stage] = shader; + } + + // Link NIR shaders. + for (int i = RenderingDeviceCommons::SHADER_STAGE_MAX - 1; i >= 0; i--) { + if (!r_stages_nir_shaders.has(i)) { + continue; + } + nir_shader *shader = r_stages_nir_shaders[i]; + nir_shader *prev_shader = nullptr; + for (int j = i - 1; j >= 0; j--) { + if (r_stages_nir_shaders.has(j)) { + prev_shader = r_stages_nir_shaders[j]; + break; + } + } + // There is a bug in the Direct3D runtime during creation of a PSO with view instancing. If a fragment + // shader uses front/back face detection (SV_IsFrontFace), its signature must include the pixel position + // builtin variable (SV_Position), otherwise an Internal Runtime error will occur. + if (i == RenderingDeviceCommons::SHADER_STAGE_FRAGMENT) { + const bool use_front_face = + nir_find_variable_with_location(shader, nir_var_shader_in, VARYING_SLOT_FACE) || + (shader->info.inputs_read & VARYING_BIT_FACE) || + nir_find_variable_with_location(shader, nir_var_system_value, SYSTEM_VALUE_FRONT_FACE) || + BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRONT_FACE); + const bool use_position = + nir_find_variable_with_location(shader, nir_var_shader_in, VARYING_SLOT_POS) || + (shader->info.inputs_read & VARYING_BIT_POS) || + nir_find_variable_with_location(shader, nir_var_system_value, SYSTEM_VALUE_FRAG_COORD) || + BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD); + if (use_front_face && !use_position) { + nir_variable *const pos = nir_variable_create(shader, nir_var_shader_in, glsl_vec4_type(), "gl_FragCoord"); + pos->data.location = VARYING_SLOT_POS; + shader->info.inputs_read |= VARYING_BIT_POS; + } + } + if (prev_shader) { + bool requires_runtime_data = {}; + dxil_spirv_nir_link(shader, prev_shader, &dxil_runtime_conf, &requires_runtime_data); + } + } + + return true; +} + +struct GodotNirCallbackUserData { + RenderingShaderContainerD3D12 *container; + RenderingDeviceCommons::ShaderStage stage; +}; + +static dxil_shader_model shader_model_d3d_to_dxil(D3D_SHADER_MODEL p_d3d_shader_model) { + static_assert(SHADER_MODEL_6_0 == 0x60000); + static_assert(SHADER_MODEL_6_3 == 0x60003); + static_assert(D3D_SHADER_MODEL_6_0 == 0x60); + static_assert(D3D_SHADER_MODEL_6_3 == 0x63); + return (dxil_shader_model)((p_d3d_shader_model >> 4) * 0x10000 + (p_d3d_shader_model & 0xf)); +} + +bool RenderingShaderContainerD3D12::_convert_nir_to_dxil(const HashMap &p_stages_nir_shaders, BitField p_stages_processed, HashMap> &r_dxil_blobs) { + // Translate NIR to DXIL. + for (KeyValue it : p_stages_nir_shaders) { + RenderingDeviceCommons::ShaderStage stage = (RenderingDeviceCommons::ShaderStage)(it.key); + GodotNirCallbackUserData godot_nir_callback_user_data; + godot_nir_callback_user_data.container = this; + godot_nir_callback_user_data.stage = stage; + + GodotNirCallbacks godot_nir_callbacks = {}; + godot_nir_callbacks.data = &godot_nir_callback_user_data; + godot_nir_callbacks.report_resource = _nir_report_resource; + godot_nir_callbacks.report_sc_bit_offset_fn = _nir_report_sc_bit_offset; + godot_nir_callbacks.report_bitcode_bit_offset_fn = _nir_report_bitcode_bit_offset; + + nir_to_dxil_options nir_to_dxil_options = {}; + nir_to_dxil_options.environment = DXIL_ENVIRONMENT_VULKAN; + nir_to_dxil_options.shader_model_max = shader_model_d3d_to_dxil(D3D_SHADER_MODEL(REQUIRED_SHADER_MODEL)); + nir_to_dxil_options.validator_version_max = NO_DXIL_VALIDATION; + nir_to_dxil_options.godot_nir_callbacks = &godot_nir_callbacks; + + dxil_logger logger = {}; + logger.log = [](void *p_priv, const char *p_msg) { +#ifdef DEBUG_ENABLED + print_verbose(p_msg); +#endif + }; + + blob dxil_blob = {}; + bool ok = nir_to_dxil(it.value, &nir_to_dxil_options, &logger, &dxil_blob); + ERR_FAIL_COND_V_MSG(!ok, false, "Shader translation at stage " + String(RenderingDeviceCommons::SHADER_STAGE_NAMES[stage]) + " failed."); + + Vector blob_copy; + blob_copy.resize(dxil_blob.size); + memcpy(blob_copy.ptrw(), dxil_blob.data, dxil_blob.size); + blob_finish(&dxil_blob); + r_dxil_blobs.insert(stage, blob_copy); + } + + return true; +} + +bool RenderingShaderContainerD3D12::_convert_spirv_to_dxil(const Vector &p_spirv, HashMap> &r_dxil_blobs, Vector &r_stages, BitField &r_stages_processed) { + r_dxil_blobs.clear(); + + HashMap stages_nir_shaders; + auto free_nir_shaders = [&]() { + for (KeyValue &E : stages_nir_shaders) { + ralloc_free(E.value); + } + stages_nir_shaders.clear(); + }; + + // This structure must live as long as the shaders are alive. + nir_shader_compiler_options compiler_options = *dxil_get_nir_compiler_options(); + compiler_options.lower_base_vertex = false; + + // This is based on spirv2dxil.c. May need updates when it changes. + // Also, this has to stay around until after linking. + if (!_convert_spirv_to_nir(p_spirv, &compiler_options, stages_nir_shaders, r_stages, r_stages_processed)) { + free_nir_shaders(); + return false; + } + + if (!_convert_nir_to_dxil(stages_nir_shaders, r_stages_processed, r_dxil_blobs)) { + free_nir_shaders(); + return false; + } + + free_nir_shaders(); + return true; +} + +bool RenderingShaderContainerD3D12::_generate_root_signature(BitField p_stages_processed) { + // Root (push) constants. + LocalVector root_params; + if (reflection_data_d3d12.dxil_push_constant_stages) { + CD3DX12_ROOT_PARAMETER1 push_constant; + push_constant.InitAsConstants( + reflection_data.push_constant_size / sizeof(uint32_t), + ROOT_CONSTANT_REGISTER, + 0, + stages_to_d3d12_visibility(reflection_data_d3d12.dxil_push_constant_stages)); + + root_params.push_back(push_constant); + } + + // NIR-DXIL runtime data. + if (reflection_data_d3d12.nir_runtime_data_root_param_idx == 1) { // Set above to 1 when discovering runtime data is needed. + DEV_ASSERT(!reflection_data.is_compute); // Could be supported if needed, but it's pointless as of now. + reflection_data_d3d12.nir_runtime_data_root_param_idx = root_params.size(); + CD3DX12_ROOT_PARAMETER1 nir_runtime_data; + nir_runtime_data.InitAsConstants( + sizeof(dxil_spirv_vertex_runtime_data) / sizeof(uint32_t), + RUNTIME_DATA_REGISTER, + 0, + D3D12_SHADER_VISIBILITY_VERTEX); + root_params.push_back(nir_runtime_data); + } + + // Descriptor tables (up to two per uniform set, for resources and/or samplers). + // These have to stay around until serialization! + struct TraceableDescriptorTable { + uint32_t stages_mask = {}; + Vector ranges; + Vector root_signature_locations; + }; + + uint32_t binding_start = 0; + Vector resource_tables_maps; + Vector sampler_tables_maps; + for (uint32_t i = 0; i < reflection_binding_set_uniforms_count.size(); i++) { + bool first_resource_in_set = true; + bool first_sampler_in_set = true; + uint32_t uniform_count = reflection_binding_set_uniforms_count[i]; + for (uint32_t j = 0; j < uniform_count; j++) { + const ReflectionBindingData &uniform = reflection_binding_set_uniforms_data[binding_start + j]; + ReflectionBindingDataD3D12 &uniform_d3d12 = reflection_binding_set_uniforms_data_d3d12.ptrw()[binding_start + j]; + bool really_used = uniform_d3d12.dxil_stages != 0; +#ifdef DEV_ENABLED + bool anybody_home = (ResourceClass)(uniform_d3d12.resource_class) != RES_CLASS_INVALID || uniform_d3d12.has_sampler; + DEV_ASSERT(anybody_home == really_used); +#endif + if (!really_used) { + continue; // Existed in SPIR-V; went away in DXIL. + } + + auto insert_range = [](D3D12_DESCRIPTOR_RANGE_TYPE p_range_type, + uint32_t p_num_descriptors, + uint32_t p_dxil_register, + uint32_t p_dxil_stages_mask, + RootSignatureLocation *p_root_sig_locations, + Vector &r_tables, + bool &r_first_in_set) { + if (r_first_in_set) { + r_tables.resize(r_tables.size() + 1); + r_first_in_set = false; + } + + TraceableDescriptorTable &table = r_tables.write[r_tables.size() - 1]; + table.stages_mask |= p_dxil_stages_mask; + + CD3DX12_DESCRIPTOR_RANGE1 range; + // Due to the aliasing hack for SRV-UAV of different families, + // we can be causing an unintended change of data (sometimes the validation layers catch it). + D3D12_DESCRIPTOR_RANGE_FLAGS flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE; + if (p_range_type == D3D12_DESCRIPTOR_RANGE_TYPE_SRV || p_range_type == D3D12_DESCRIPTOR_RANGE_TYPE_UAV) { + flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE; + } else if (p_range_type == D3D12_DESCRIPTOR_RANGE_TYPE_CBV) { + flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE; + } + range.Init(p_range_type, p_num_descriptors, p_dxil_register, 0, flags); + + table.ranges.push_back(range); + table.root_signature_locations.push_back(p_root_sig_locations); + }; + + uint32_t num_descriptors = 1; + D3D12_DESCRIPTOR_RANGE_TYPE resource_range_type = {}; + switch ((ResourceClass)(uniform_d3d12.resource_class)) { + case RES_CLASS_INVALID: { + num_descriptors = uniform.length; + DEV_ASSERT(uniform_d3d12.has_sampler); + } break; + case RES_CLASS_CBV: { + resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + DEV_ASSERT(!uniform_d3d12.has_sampler); + } break; + case RES_CLASS_SRV: { + resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + num_descriptors = MAX(1u, uniform.length); // An unbound R/O buffer is reflected as zero-size. + } break; + case RES_CLASS_UAV: { + resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + num_descriptors = MAX(1u, uniform.length); // An unbound R/W buffer is reflected as zero-size. + DEV_ASSERT(!uniform_d3d12.has_sampler); + } break; + } + + uint32_t dxil_register = i * GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER + uniform.binding * GODOT_NIR_BINDING_MULTIPLIER; + if (uniform_d3d12.resource_class != RES_CLASS_INVALID) { + insert_range( + resource_range_type, + num_descriptors, + dxil_register, + uniform_d3d12.dxil_stages, + &uniform_d3d12.root_signature_locations[RS_LOC_TYPE_RESOURCE], + resource_tables_maps, + first_resource_in_set); + } + + if (uniform_d3d12.has_sampler) { + insert_range( + D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, + num_descriptors, + dxil_register, + uniform_d3d12.dxil_stages, + &uniform_d3d12.root_signature_locations[RS_LOC_TYPE_SAMPLER], + sampler_tables_maps, + first_sampler_in_set); + } + } + + binding_start += uniform_count; + } + + auto make_descriptor_tables = [&root_params](const Vector &p_tables) { + for (const TraceableDescriptorTable &table : p_tables) { + D3D12_SHADER_VISIBILITY visibility = stages_to_d3d12_visibility(table.stages_mask); + DEV_ASSERT(table.ranges.size() == table.root_signature_locations.size()); + for (int i = 0; i < table.ranges.size(); i++) { + // By now we know very well which root signature location corresponds to the pointed uniform. + table.root_signature_locations[i]->root_param_index = root_params.size(); + table.root_signature_locations[i]->range_index = i; + } + + CD3DX12_ROOT_PARAMETER1 root_table; + root_table.InitAsDescriptorTable(table.ranges.size(), table.ranges.ptr(), visibility); + root_params.push_back(root_table); + } + }; + + make_descriptor_tables(resource_tables_maps); + make_descriptor_tables(sampler_tables_maps); + + CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {}; + D3D12_ROOT_SIGNATURE_FLAGS root_sig_flags = + D3D12_ROOT_SIGNATURE_FLAG_DENY_HULL_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_DOMAIN_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_GEOMETRY_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_AMPLIFICATION_SHADER_ROOT_ACCESS | + D3D12_ROOT_SIGNATURE_FLAG_DENY_MESH_SHADER_ROOT_ACCESS; + + if (!p_stages_processed.has_flag(RenderingDeviceCommons::SHADER_STAGE_VERTEX_BIT)) { + root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_VERTEX_SHADER_ROOT_ACCESS; + } + + if (!p_stages_processed.has_flag(RenderingDeviceCommons::SHADER_STAGE_FRAGMENT_BIT)) { + root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_DENY_PIXEL_SHADER_ROOT_ACCESS; + } + + if (reflection_data.vertex_input_mask) { + root_sig_flags |= D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; + } + + root_sig_desc.Init_1_1(root_params.size(), root_params.ptr(), 0, nullptr, root_sig_flags); + + // Create and store the root signature and its CRC32. + ID3DBlob *error_blob = nullptr; + ID3DBlob *root_sig_blob = nullptr; + HRESULT res = D3DX12SerializeVersionedRootSignature(HMODULE(lib_d3d12), &root_sig_desc, D3D_ROOT_SIGNATURE_VERSION_1_1, &root_sig_blob, &error_blob); + if (SUCCEEDED(res)) { + root_signature_bytes.resize(root_sig_blob->GetBufferSize()); + memcpy(root_signature_bytes.ptrw(), root_sig_blob->GetBufferPointer(), root_sig_blob->GetBufferSize()); + + root_signature_crc = crc32(0, nullptr, 0); + root_signature_crc = crc32(root_signature_crc, (const Bytef *)root_sig_blob->GetBufferPointer(), root_sig_blob->GetBufferSize()); + + return true; + } else { + if (root_sig_blob != nullptr) { + root_sig_blob->Release(); + } + + String error_string; + if (error_blob != nullptr) { + error_string = vformat("Serialization of root signature failed with error 0x%08ux and the following message:\n%s", uint32_t(res), String::ascii(Span((char *)error_blob->GetBufferPointer(), error_blob->GetBufferSize()))); + error_blob->Release(); + } else { + error_string = vformat("Serialization of root signature failed with error 0x%08ux", uint32_t(res)); + } + + ERR_FAIL_V_MSG(false, error_string); + } +} + +void RenderingShaderContainerD3D12::_nir_report_resource(uint32_t p_register, uint32_t p_space, uint32_t p_dxil_type, void *p_data) { + const GodotNirCallbackUserData &user_data = *(GodotNirCallbackUserData *)p_data; + + // Types based on Mesa's dxil_container.h. + static const uint32_t DXIL_RES_SAMPLER = 1; + static const ResourceClass DXIL_TYPE_TO_CLASS[] = { + RES_CLASS_INVALID, // DXIL_RES_INVALID + RES_CLASS_INVALID, // DXIL_RES_SAMPLER + RES_CLASS_CBV, // DXIL_RES_CBV + RES_CLASS_SRV, // DXIL_RES_SRV_TYPED + RES_CLASS_SRV, // DXIL_RES_SRV_RAW + RES_CLASS_SRV, // DXIL_RES_SRV_STRUCTURED + RES_CLASS_UAV, // DXIL_RES_UAV_TYPED + RES_CLASS_UAV, // DXIL_RES_UAV_RAW + RES_CLASS_UAV, // DXIL_RES_UAV_STRUCTURED + RES_CLASS_INVALID, // DXIL_RES_UAV_STRUCTURED_WITH_COUNTER + }; + + DEV_ASSERT(p_dxil_type < ARRAY_SIZE(DXIL_TYPE_TO_CLASS)); + ResourceClass resource_class = DXIL_TYPE_TO_CLASS[p_dxil_type]; + + if (p_register == ROOT_CONSTANT_REGISTER && p_space == 0) { + DEV_ASSERT(resource_class == RES_CLASS_CBV); + user_data.container->reflection_data_d3d12.dxil_push_constant_stages |= (1 << user_data.stage); + } else if (p_register == RUNTIME_DATA_REGISTER && p_space == 0) { + DEV_ASSERT(resource_class == RES_CLASS_CBV); + user_data.container->reflection_data_d3d12.nir_runtime_data_root_param_idx = 1; // Temporary, to be determined later. + } else { + DEV_ASSERT(p_space == 0); + + uint32_t set = p_register / GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER; + uint32_t binding = (p_register % GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER) / GODOT_NIR_BINDING_MULTIPLIER; + + DEV_ASSERT(set < (uint32_t)user_data.container->reflection_binding_set_uniforms_count.size()); + + uint32_t binding_start = 0; + for (uint32_t i = 0; i < set; i++) { + binding_start += user_data.container->reflection_binding_set_uniforms_count[i]; + } + + [[maybe_unused]] bool found = false; + for (uint32_t i = 0; i < user_data.container->reflection_binding_set_uniforms_count[set]; i++) { + const ReflectionBindingData &uniform = user_data.container->reflection_binding_set_uniforms_data[binding_start + i]; + ReflectionBindingDataD3D12 &uniform_d3d12 = user_data.container->reflection_binding_set_uniforms_data_d3d12.ptrw()[binding_start + i]; + if (uniform.binding != binding) { + continue; + } + + uniform_d3d12.dxil_stages |= (1 << user_data.stage); + if (resource_class != RES_CLASS_INVALID) { + DEV_ASSERT(uniform_d3d12.resource_class == (uint32_t)RES_CLASS_INVALID || uniform_d3d12.resource_class == (uint32_t)resource_class); + uniform_d3d12.resource_class = resource_class; + } else if (p_dxil_type == DXIL_RES_SAMPLER) { + uniform_d3d12.has_sampler = (uint32_t)true; + } else { + DEV_ASSERT(false && "Unknown resource class."); + } + found = true; + } + + DEV_ASSERT(found); + } +} + +void RenderingShaderContainerD3D12::_nir_report_sc_bit_offset(uint32_t p_sc_id, uint64_t p_bit_offset, void *p_data) { + const GodotNirCallbackUserData &user_data = *(GodotNirCallbackUserData *)p_data; + [[maybe_unused]] bool found = false; + for (int64_t i = 0; i < user_data.container->reflection_specialization_data.size(); i++) { + const ReflectionSpecializationData &sc = user_data.container->reflection_specialization_data[i]; + ReflectionSpecializationDataD3D12 &sc_d3d12 = user_data.container->reflection_specialization_data_d3d12.ptrw()[i]; + if (sc.constant_id != p_sc_id) { + continue; + } + + uint32_t offset_idx = SHADER_STAGES_BIT_OFFSET_INDICES[user_data.stage]; + DEV_ASSERT(sc_d3d12.stages_bit_offsets[offset_idx] == 0); + sc_d3d12.stages_bit_offsets[offset_idx] = p_bit_offset; + found = true; + break; + } + + DEV_ASSERT(found); +} + +void RenderingShaderContainerD3D12::_nir_report_bitcode_bit_offset(uint64_t p_bit_offset, void *p_data) { + DEV_ASSERT(p_bit_offset % 8 == 0); + + const GodotNirCallbackUserData &user_data = *(GodotNirCallbackUserData *)p_data; + uint32_t offset_idx = SHADER_STAGES_BIT_OFFSET_INDICES[user_data.stage]; + for (int64_t i = 0; i < user_data.container->reflection_specialization_data.size(); i++) { + ReflectionSpecializationDataD3D12 &sc_d3d12 = user_data.container->reflection_specialization_data_d3d12.ptrw()[i]; + if (sc_d3d12.stages_bit_offsets[offset_idx] == 0) { + // This SC has been optimized out from this stage. + continue; + } + + sc_d3d12.stages_bit_offsets[offset_idx] += p_bit_offset; + } +} +#endif + +void RenderingShaderContainerD3D12::_set_from_shader_reflection_post(const String &p_shader_name, const RenderingDeviceCommons::ShaderReflection &p_reflection) { + reflection_binding_set_uniforms_data_d3d12.resize(reflection_binding_set_uniforms_data.size()); + reflection_specialization_data_d3d12.resize(reflection_specialization_data.size()); + + // Sort bindings inside each uniform set. This guarantees the root signature will be generated in the correct order. + SortArray sorter; + uint32_t binding_start = 0; + for (uint32_t i = 0; i < reflection_binding_set_uniforms_count.size(); i++) { + uint32_t uniform_count = reflection_binding_set_uniforms_count[i]; + if (uniform_count > 0) { + sorter.sort(&reflection_binding_set_uniforms_data.ptrw()[binding_start], uniform_count); + binding_start += uniform_count; + } + } +} + +bool RenderingShaderContainerD3D12::_set_code_from_spirv(const Vector &p_spirv) { +#if NIR_ENABLED + reflection_data_d3d12.nir_runtime_data_root_param_idx = UINT32_MAX; + + for (int64_t i = 0; i < reflection_specialization_data.size(); i++) { + DEV_ASSERT(reflection_specialization_data[i].constant_id < (sizeof(reflection_data_d3d12.spirv_specialization_constants_ids_mask) * 8) && "Constant IDs with values above 31 are not supported."); + reflection_data_d3d12.spirv_specialization_constants_ids_mask |= (1 << reflection_specialization_data[i].constant_id); + } + + // Translate SPIR-V shaders to DXIL, and collect shader info from the new representation. + HashMap> dxil_blobs; + Vector stages; + BitField stages_processed = {}; + if (!_convert_spirv_to_dxil(p_spirv, dxil_blobs, stages, stages_processed)) { + return false; + } + + // Patch with default values of specialization constants. + DEV_ASSERT(reflection_specialization_data.size() == reflection_specialization_data_d3d12.size()); + for (int32_t i = 0; i < reflection_specialization_data.size(); i++) { + const ReflectionSpecializationData &sc = reflection_specialization_data[i]; + const ReflectionSpecializationDataD3D12 &sc_d3d12 = reflection_specialization_data_d3d12[i]; + RenderingDXIL::patch_specialization_constant((RenderingDeviceCommons::PipelineSpecializationConstantType)(sc.type), &sc.int_value, sc_d3d12.stages_bit_offsets, dxil_blobs, true); + } + + // Sign. + uint32_t shader_index = 0; + for (KeyValue> &E : dxil_blobs) { + RenderingDXIL::sign_bytecode(E.key, E.value); + } + + // Store compressed DXIL blobs as the shaders. + shaders.resize(p_spirv.size()); + for (int64_t i = 0; i < shaders.size(); i++) { + const PackedByteArray &dxil_bytes = dxil_blobs[stages[i]]; + RenderingShaderContainer::Shader &shader = shaders.ptrw()[i]; + uint32_t compressed_size = 0; + shader.shader_stage = stages[i]; + shader.code_decompressed_size = dxil_bytes.size(); + shader.code_compressed_bytes.resize(dxil_bytes.size()); + + bool compressed = compress_code(dxil_bytes.ptr(), dxil_bytes.size(), shader.code_compressed_bytes.ptrw(), &compressed_size, &shader.code_compression_flags); + ERR_FAIL_COND_V_MSG(!compressed, false, vformat("Failed to compress native code to native for SPIR-V #%d.", shader_index)); + + shader.code_compressed_bytes.resize(compressed_size); + } + + if (!_generate_root_signature(stages_processed)) { + return false; + } + + return true; +#else + ERR_FAIL_V_MSG(false, "Shader compilation is not supported at runtime without NIR."); +#endif +} + +RenderingShaderContainerD3D12::RenderingShaderContainerD3D12() { + // Default empty constructor. +} + +RenderingShaderContainerD3D12::RenderingShaderContainerD3D12(void *p_lib_d3d12) { + lib_d3d12 = p_lib_d3d12; +} + +RenderingShaderContainerD3D12::ShaderReflectionD3D12 RenderingShaderContainerD3D12::get_shader_reflection_d3d12() const { + ShaderReflectionD3D12 reflection; + reflection.spirv_specialization_constants_ids_mask = reflection_data_d3d12.spirv_specialization_constants_ids_mask; + reflection.dxil_push_constant_stages = reflection_data_d3d12.dxil_push_constant_stages; + reflection.nir_runtime_data_root_param_idx = reflection_data_d3d12.nir_runtime_data_root_param_idx; + reflection.reflection_specialization_data_d3d12 = reflection_specialization_data_d3d12; + reflection.root_signature_bytes = root_signature_bytes; + reflection.root_signature_crc = root_signature_crc; + + // Transform data vector into a vector of vectors that's easier to user. + uint32_t uniform_index = 0; + reflection.reflection_binding_set_uniforms_d3d12.resize(reflection_binding_set_uniforms_count.size()); + for (int64_t i = 0; i < reflection.reflection_binding_set_uniforms_d3d12.size(); i++) { + Vector &uniforms = reflection.reflection_binding_set_uniforms_d3d12.ptrw()[i]; + uniforms.resize(reflection_binding_set_uniforms_count[i]); + for (int64_t j = 0; j < uniforms.size(); j++) { + uniforms.ptrw()[j] = reflection_binding_set_uniforms_data_d3d12[uniform_index]; + uniform_index++; + } + } + + return reflection; +} + +// RenderingShaderContainerFormatD3D12 + +void RenderingShaderContainerFormatD3D12::set_lib_d3d12(void *p_lib_d3d12) { + lib_d3d12 = p_lib_d3d12; +} + +Ref RenderingShaderContainerFormatD3D12::create_container() const { + return memnew(RenderingShaderContainerD3D12(lib_d3d12)); +} + +RenderingDeviceCommons::ShaderLanguageVersion RenderingShaderContainerFormatD3D12::get_shader_language_version() const { + // NIR-DXIL is Vulkan 1.1-conformant. + return SHADER_LANGUAGE_VULKAN_VERSION_1_1; +} + +RenderingDeviceCommons::ShaderSpirvVersion RenderingShaderContainerFormatD3D12::get_shader_spirv_version() const { + // The SPIR-V part of Mesa supports 1.6, but: + // - SPIRV-Reflect won't be able to parse the compute workgroup size. + // - We want to play it safe with NIR-DXIL. + return SHADER_SPIRV_VERSION_1_5; +} + +RenderingShaderContainerFormatD3D12::RenderingShaderContainerFormatD3D12() {} + +RenderingShaderContainerFormatD3D12::~RenderingShaderContainerFormatD3D12() {} diff --git a/drivers/d3d12/rendering_shader_container_d3d12.h b/drivers/d3d12/rendering_shader_container_d3d12.h new file mode 100644 index 00000000000..dccf67248f7 --- /dev/null +++ b/drivers/d3d12/rendering_shader_container_d3d12.h @@ -0,0 +1,179 @@ +/**************************************************************************/ +/* rendering_shader_container_d3d12.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#include "servers/rendering/rendering_shader_container.h" + +#define NIR_ENABLED 1 + +#ifdef SHADER_BAKER_RUNTIME_ENABLED +#undef NIR_ENABLED +#endif + +#include "d3d12_godot_nir_bridge.h" + +#define D3D12_BITCODE_OFFSETS_NUM_STAGES 3 + +#if NIR_ENABLED +struct nir_shader; +struct nir_shader_compiler_options; +#endif + +enum RootSignatureLocationType { + RS_LOC_TYPE_RESOURCE, + RS_LOC_TYPE_SAMPLER, +}; + +enum ResourceClass { + RES_CLASS_INVALID, + RES_CLASS_CBV, + RES_CLASS_SRV, + RES_CLASS_UAV, +}; + +struct RenderingDXIL { + static uint32_t patch_specialization_constant( + RenderingDeviceCommons::PipelineSpecializationConstantType p_type, + const void *p_value, + const uint64_t (&p_stages_bit_offsets)[D3D12_BITCODE_OFFSETS_NUM_STAGES], + HashMap> &r_stages_bytecodes, + bool p_is_first_patch); + + static void sign_bytecode(RenderingDeviceCommons::ShaderStage p_stage, Vector &r_dxil_blob); +}; + +class RenderingShaderContainerD3D12 : public RenderingShaderContainer { + GDSOFTCLASS(RenderingShaderContainerD3D12, RenderingShaderContainer); + +public: + static constexpr uint32_t REQUIRED_SHADER_MODEL = 0x62; // D3D_SHADER_MODEL_6_2 + static constexpr uint32_t ROOT_CONSTANT_REGISTER = GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER * (RenderingDeviceCommons::MAX_UNIFORM_SETS + 1); + static constexpr uint32_t RUNTIME_DATA_REGISTER = GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER * (RenderingDeviceCommons::MAX_UNIFORM_SETS + 2); + static constexpr uint32_t FORMAT_VERSION = 1; + static constexpr uint32_t SHADER_STAGES_BIT_OFFSET_INDICES[RenderingDeviceCommons::SHADER_STAGE_MAX] = { + 0, // SHADER_STAGE_VERTEX + 1, // SHADER_STAGE_FRAGMENT + UINT32_MAX, // SHADER_STAGE_TESSELATION_CONTROL + UINT32_MAX, // SHADER_STAGE_TESSELATION_EVALUATION + 2, // SHADER_STAGE_COMPUTE + }; + + struct RootSignatureLocation { + uint32_t root_param_index = UINT32_MAX; + uint32_t range_index = UINT32_MAX; + }; + + struct ReflectionBindingDataD3D12 { + uint32_t resource_class = 0; + uint32_t has_sampler = 0; + uint32_t dxil_stages = 0; + RootSignatureLocation root_signature_locations[2]; + }; + + struct ReflectionSpecializationDataD3D12 { + uint64_t stages_bit_offsets[D3D12_BITCODE_OFFSETS_NUM_STAGES] = {}; + }; + +protected: + struct ReflectionDataD3D12 { + uint32_t spirv_specialization_constants_ids_mask = 0; + uint32_t dxil_push_constant_stages = 0; + uint32_t nir_runtime_data_root_param_idx = 0; + }; + + struct ContainerFooterD3D12 { + uint32_t root_signature_length = 0; + uint32_t root_signature_crc = 0; + }; + + void *lib_d3d12 = nullptr; + ReflectionDataD3D12 reflection_data_d3d12; + Vector reflection_binding_set_uniforms_data_d3d12; + Vector reflection_specialization_data_d3d12; + Vector root_signature_bytes; + uint32_t root_signature_crc = 0; + +#if NIR_ENABLED + bool _convert_spirv_to_nir(const Vector &p_spirv, const nir_shader_compiler_options *p_compiler_options, HashMap &r_stages_nir_shaders, Vector &r_stages, BitField &r_stages_processed); + bool _convert_nir_to_dxil(const HashMap &p_stages_nir_shaders, BitField p_stages_processed, HashMap> &r_dxil_blobs); + bool _convert_spirv_to_dxil(const Vector &p_spirv, HashMap> &r_dxil_blobs, Vector &r_stages, BitField &r_stages_processed); + bool _generate_root_signature(BitField p_stages_processed); + + // GodotNirCallbacks. + static void _nir_report_resource(uint32_t p_register, uint32_t p_space, uint32_t p_dxil_type, void *p_data); + static void _nir_report_sc_bit_offset(uint32_t p_sc_id, uint64_t p_bit_offset, void *p_data); + static void _nir_report_bitcode_bit_offset(uint64_t p_bit_offset, void *p_data); +#endif + + // RenderingShaderContainer overrides. + virtual uint32_t _format() const override; + virtual uint32_t _format_version() const override; + virtual uint32_t _from_bytes_reflection_extra_data(const uint8_t *p_bytes) override; + virtual uint32_t _from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes) override; + virtual uint32_t _from_bytes_reflection_binding_uniform_extra_data(const uint8_t *p_bytes, uint32_t p_index) override; + virtual uint32_t _from_bytes_reflection_specialization_extra_data_start(const uint8_t *p_bytes) override; + virtual uint32_t _from_bytes_reflection_specialization_extra_data(const uint8_t *p_bytes, uint32_t p_index) override; + virtual uint32_t _from_bytes_footer_extra_data(const uint8_t *p_bytes) override; + virtual uint32_t _to_bytes_reflection_extra_data(uint8_t *p_bytes) const override; + virtual uint32_t _to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const override; + virtual uint32_t _to_bytes_reflection_specialization_extra_data(uint8_t *p_bytes, uint32_t p_index) const override; + virtual uint32_t _to_bytes_footer_extra_data(uint8_t *p_bytes) const override; + virtual void _set_from_shader_reflection_post(const String &p_shader_name, const RenderingDeviceCommons::ShaderReflection &p_reflection) override; + virtual bool _set_code_from_spirv(const Vector &p_spirv) override; + +public: + struct ShaderReflectionD3D12 { + uint32_t spirv_specialization_constants_ids_mask = 0; + uint32_t dxil_push_constant_stages = 0; + uint32_t nir_runtime_data_root_param_idx = 0; + Vector> reflection_binding_set_uniforms_d3d12; + Vector reflection_specialization_data_d3d12; + Vector root_signature_bytes; + uint32_t root_signature_crc = 0; + }; + + RenderingShaderContainerD3D12(); + RenderingShaderContainerD3D12(void *p_lib_d3d12); + ShaderReflectionD3D12 get_shader_reflection_d3d12() const; +}; + +class RenderingShaderContainerFormatD3D12 : public RenderingShaderContainerFormat { +protected: + void *lib_d3d12 = nullptr; + +public: + void set_lib_d3d12(void *p_lib_d3d12); + virtual Ref create_container() const override; + virtual ShaderLanguageVersion get_shader_language_version() const override; + virtual ShaderSpirvVersion get_shader_spirv_version() const override; + RenderingShaderContainerFormatD3D12(); + virtual ~RenderingShaderContainerFormatD3D12(); +}; diff --git a/drivers/gles3/shader_gles3.cpp b/drivers/gles3/shader_gles3.cpp index 9b4628d8a42..0fd25a61f86 100644 --- a/drivers/gles3/shader_gles3.cpp +++ b/drivers/gles3/shader_gles3.cpp @@ -129,12 +129,6 @@ void ShaderGLES3::_setup(const char *p_vertex_code, const char *p_fragment_code, feedback_count = p_feedback_count; StringBuilder tohash; - /* - tohash.append("[SpirvCacheKey]"); - tohash.append(RenderingDevice::get_singleton()->shader_get_spirv_cache_key()); - tohash.append("[BinaryCacheKey]"); - tohash.append(RenderingDevice::get_singleton()->shader_get_binary_cache_key()); - */ tohash.append("[Vertex]"); tohash.append(p_vertex_code ? p_vertex_code : ""); tohash.append("[Fragment]"); diff --git a/drivers/gles3/storage/material_storage.cpp b/drivers/gles3/storage/material_storage.cpp index 7ea54fe87aa..86a6ad83f4c 100644 --- a/drivers/gles3/storage/material_storage.cpp +++ b/drivers/gles3/storage/material_storage.cpp @@ -2164,7 +2164,7 @@ RID MaterialStorage::shader_allocate() { return shader_owner.allocate_rid(); } -void MaterialStorage::shader_initialize(RID p_rid) { +void MaterialStorage::shader_initialize(RID p_rid, bool p_embedded) { Shader shader; shader.data = nullptr; shader.mode = RS::SHADER_MAX; diff --git a/drivers/gles3/storage/material_storage.h b/drivers/gles3/storage/material_storage.h index 28097411d79..9f2b214f3e7 100644 --- a/drivers/gles3/storage/material_storage.h +++ b/drivers/gles3/storage/material_storage.h @@ -482,6 +482,7 @@ private: mutable RID_Owner material_owner; SelfList::List material_update_list; + HashSet dummy_embedded_set; public: static MaterialStorage *get_singleton(); @@ -574,7 +575,7 @@ public: void _shader_make_dirty(Shader *p_shader); virtual RID shader_allocate() override; - virtual void shader_initialize(RID p_rid) override; + virtual void shader_initialize(RID p_rid, bool p_embedded = true) override; virtual void shader_free(RID p_rid) override; virtual void shader_set_code(RID p_shader, const String &p_code) override; @@ -587,6 +588,9 @@ public: virtual Variant shader_get_parameter_default(RID p_shader, const StringName &p_name) const override; virtual RS::ShaderNativeSourceCode shader_get_native_source_code(RID p_shader) const override; + virtual void shader_embedded_set_lock() override {} + virtual const HashSet &shader_embedded_set_get() const override { return dummy_embedded_set; } + virtual void shader_embedded_set_unlock() override {} /* MATERIAL API */ diff --git a/drivers/metal/metal_device_properties.h b/drivers/metal/metal_device_properties.h index 49441b64750..720efd64e1f 100644 --- a/drivers/metal/metal_device_properties.h +++ b/drivers/metal/metal_device_properties.h @@ -70,7 +70,8 @@ typedef NS_OPTIONS(NSUInteger, SampleCount) { }; struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalFeatures { - uint32_t mslVersion = 0; + uint32_t mslVersionMajor = 0; + uint32_t mslVersionMinor = 0; MTLGPUFamily highestFamily = MTLGPUFamilyApple4; bool supportsBCTextureCompression = false; bool supportsDepth24Stencil8 = false; diff --git a/drivers/metal/metal_device_properties.mm b/drivers/metal/metal_device_properties.mm index 1d3b78a1b69..43946ede6e8 100644 --- a/drivers/metal/metal_device_properties.mm +++ b/drivers/metal/metal_device_properties.mm @@ -137,51 +137,8 @@ void MetalDeviceProperties::init_features(id p_device) { MTLCompileOptions *opts = [MTLCompileOptions new]; features.mslVersionEnum = opts.languageVersion; // By default, Metal uses the most recent language version. - -#define setMSLVersion(m_maj, m_min) \ - features.mslVersion = SPIRV_CROSS_NAMESPACE::CompilerMSL::Options::make_msl_version(m_maj, m_min) - - switch (features.mslVersionEnum) { -#if __MAC_OS_X_VERSION_MAX_ALLOWED >= 150000 || __IPHONE_OS_VERSION_MAX_ALLOWED >= 180000 || __TV_OS_VERSION_MAX_ALLOWED >= 180000 || __VISION_OS_VERSION_MAX_ALLOWED >= 20000 - case MTLLanguageVersion3_2: - setMSLVersion(3, 2); - break; -#endif -#if __MAC_OS_X_VERSION_MAX_ALLOWED >= 140000 || __IPHONE_OS_VERSION_MAX_ALLOWED >= 170000 || __TV_OS_VERSION_MAX_ALLOWED >= 170000 - case MTLLanguageVersion3_1: - setMSLVersion(3, 1); - break; -#endif - case MTLLanguageVersion3_0: - setMSLVersion(3, 0); - break; - case MTLLanguageVersion2_4: - setMSLVersion(2, 4); - break; - case MTLLanguageVersion2_3: - setMSLVersion(2, 3); - break; - case MTLLanguageVersion2_2: - setMSLVersion(2, 2); - break; - case MTLLanguageVersion2_1: - setMSLVersion(2, 1); - break; - case MTLLanguageVersion2_0: - setMSLVersion(2, 0); - break; - case MTLLanguageVersion1_2: - setMSLVersion(1, 2); - break; - case MTLLanguageVersion1_1: - setMSLVersion(1, 1); - break; -#if TARGET_OS_IPHONE && !TARGET_OS_MACCATALYST && !TARGET_OS_VISION - case MTLLanguageVersion1_0: - setMSLVersion(1, 0); - break; -#endif - } + features.mslVersionMajor = (opts.languageVersion >> 0x10) & 0xff; + features.mslVersionMinor = (opts.languageVersion >> 0x00) & 0xff; } void MetalDeviceProperties::init_limits(id p_device) { diff --git a/drivers/metal/metal_objects.h b/drivers/metal/metal_objects.h index 00b407bf506..b89d4ba2e0f 100644 --- a/drivers/metal/metal_objects.h +++ b/drivers/metal/metal_objects.h @@ -53,6 +53,7 @@ #import "metal_device_properties.h" #import "metal_utils.h" #import "pixel_formats.h" +#import "sha256_digest.h" #include "servers/rendering/rendering_device_driver.h" @@ -82,9 +83,6 @@ MTL_CLASS(Texture) } //namespace MTL -/// Metal buffer index for the view mask when rendering multi-view. -const uint32_t VIEW_MASK_BUFFER_INDEX = 24; - enum ShaderStageUsage : uint32_t { None = 0, Vertex = RDD::SHADER_STAGE_VERTEX_BIT, @@ -574,34 +572,6 @@ struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) BindingInfo { desc.arrayLength = arrayLength; return desc; } - - size_t serialize_size() const { - return sizeof(uint32_t) * 8 /* 8 uint32_t fields */; - } - - template - void serialize(W &p_writer) const { - p_writer.write((uint32_t)dataType); - p_writer.write(index); - p_writer.write((uint32_t)access); - p_writer.write((uint32_t)usage); - p_writer.write((uint32_t)textureType); - p_writer.write(imageFormat); - p_writer.write(arrayLength); - p_writer.write(isMultisampled); - } - - template - void deserialize(R &p_reader) { - p_reader.read((uint32_t &)dataType); - p_reader.read(index); - p_reader.read((uint32_t &)access); - p_reader.read((uint32_t &)usage); - p_reader.read((uint32_t &)textureType); - p_reader.read((uint32_t &)imageFormat); - p_reader.read(arrayLength); - p_reader.read(isMultisampled); - } }; using RDC = RenderingDeviceCommons; @@ -635,39 +605,29 @@ enum class ShaderLoadStrategy { /// A Metal shader library. @interface MDLibrary : NSObject { ShaderCacheEntry *_entry; + NSString *_original_source; }; - (id)library; - (NSError *)error; - (void)setLabel:(NSString *)label; +#ifdef DEV_ENABLED +- (NSString *)originalSource; +#endif + (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry device:(id)device source:(NSString *)source options:(MTLCompileOptions *)options strategy:(ShaderLoadStrategy)strategy; + ++ (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry + device:(id)device +#ifdef DEV_ENABLED + source:(NSString *)source +#endif + data:(dispatch_data_t)data; @end -struct SHA256Digest { - unsigned char data[CC_SHA256_DIGEST_LENGTH]; - - uint32_t hash() const { - uint32_t c = crc32(0, data, CC_SHA256_DIGEST_LENGTH); - return c; - } - - SHA256Digest() { - bzero(data, CC_SHA256_DIGEST_LENGTH); - } - - SHA256Digest(const char *p_data, size_t p_length) { - CC_SHA256(p_data, (CC_LONG)p_length, data); - } - - _FORCE_INLINE_ uint32_t short_sha() const { - return __builtin_bswap32(*(uint32_t *)&data[0]); - } -}; - template <> struct HashMapComparatorDefault { static bool compare(const SHA256Digest &p_lhs, const SHA256Digest &p_rhs) { @@ -717,9 +677,6 @@ public: MTLSize local = {}; MDLibrary *kernel; -#if DEV_ENABLED - CharString kernel_source; -#endif void encode_push_constant_data(VectorView p_data, MDCommandBuffer *p_cb) final; @@ -742,10 +699,6 @@ public: MDLibrary *vert; MDLibrary *frag; -#if DEV_ENABLED - CharString vert_source; - CharString frag_source; -#endif void encode_push_constant_data(VectorView p_data, MDCommandBuffer *p_cb) final; diff --git a/drivers/metal/metal_objects.mm b/drivers/metal/metal_objects.mm index 06f6d4bc0d8..f8056e217b4 100644 --- a/drivers/metal/metal_objects.mm +++ b/drivers/metal/metal_objects.mm @@ -53,6 +53,7 @@ #import "metal_utils.h" #import "pixel_formats.h" #import "rendering_device_driver_metal.h" +#import "rendering_shader_container_metal.h" #import @@ -1941,7 +1942,11 @@ void ShaderCacheEntry::notify_free() const { } @interface MDLibrary () -- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry; +- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry +#ifdef DEV_ENABLED + source:(NSString *)source; +#endif +; @end /// Loads the MTLLibrary when the library is first accessed. @@ -1975,6 +1980,18 @@ void ShaderCacheEntry::notify_free() const { options:(MTLCompileOptions *)options; @end +@interface MDBinaryLibrary : MDLibrary { + id _library; + NSError *_error; +} +- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry + device:(id)device +#ifdef DEV_ENABLED + source:(NSString *)source +#endif + data:(dispatch_data_t)data; +@end + @implementation MDLibrary + (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry @@ -1992,6 +2009,26 @@ void ShaderCacheEntry::notify_free() const { } } ++ (instancetype)newLibraryWithCacheEntry:(ShaderCacheEntry *)entry + device:(id)device +#ifdef DEV_ENABLED + source:(NSString *)source +#endif + data:(dispatch_data_t)data { + return [[MDBinaryLibrary alloc] initWithCacheEntry:entry + device:device +#ifdef DEV_ENABLED + source:source +#endif + data:data]; +} + +#ifdef DEV_ENABLED +- (NSString *)originalSource { + return _original_source; +} +#endif + - (id)library { CRASH_NOW_MSG("Not implemented"); return nil; @@ -2005,10 +2042,17 @@ void ShaderCacheEntry::notify_free() const { - (void)setLabel:(NSString *)label { } -- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry { +- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry +#ifdef DEV_ENABLED + source:(NSString *)source +#endif +{ self = [super init]; _entry = entry; _entry->library = self; +#ifdef DEV_ENABLED + _original_source = source; +#endif return self; } @@ -2024,7 +2068,11 @@ void ShaderCacheEntry::notify_free() const { device:(id)device source:(NSString *)source options:(MTLCompileOptions *)options { - self = [super initWithCacheEntry:entry]; + self = [super initWithCacheEntry:entry +#ifdef DEV_ENABLED + source:source +#endif + ]; _complete = false; _ready = false; @@ -2076,7 +2124,11 @@ void ShaderCacheEntry::notify_free() const { device:(id)device source:(NSString *)source options:(MTLCompileOptions *)options { - self = [super initWithCacheEntry:entry]; + self = [super initWithCacheEntry:entry +#ifdef DEV_ENABLED + source:source +#endif + ]; _device = device; _source = source; _options = options; @@ -2121,3 +2173,36 @@ void ShaderCacheEntry::notify_free() const { } @end + +@implementation MDBinaryLibrary + +- (instancetype)initWithCacheEntry:(ShaderCacheEntry *)entry + device:(id)device +#ifdef DEV_ENABLED + source:(NSString *)source +#endif + data:(dispatch_data_t)data { + self = [super initWithCacheEntry:entry +#ifdef DEV_ENABLED + source:source +#endif + ]; + NSError *error = nil; + _library = [device newLibraryWithData:data error:&error]; + if (error != nil) { + _error = error; + NSString *desc = [error description]; + ERR_PRINT(vformat("Unable to load shader library: %s", desc.UTF8String)); + } + return self; +} + +- (id)library { + return _library; +} + +- (NSError *)error { + return _error; +} + +@end diff --git a/drivers/metal/rendering_device_driver_metal.h b/drivers/metal/rendering_device_driver_metal.h index 0fed3aacb64..61ee296a9d8 100644 --- a/drivers/metal/rendering_device_driver_metal.h +++ b/drivers/metal/rendering_device_driver_metal.h @@ -31,6 +31,7 @@ #pragma once #import "metal_objects.h" +#import "rendering_shader_container_metal.h" #include "servers/rendering/rendering_device_driver.h" @@ -57,9 +58,9 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMet RenderingContextDriver::Device context_device; id device = nil; - uint32_t version_major = 2; - uint32_t version_minor = 0; MetalDeviceProperties *device_properties = nullptr; + MetalDeviceProfile device_profile; + RenderingShaderContainerFormatMetal *shader_container_format = nullptr; PixelFormats *pixel_formats = nullptr; std::unique_ptr resource_cache; @@ -77,7 +78,7 @@ class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) RenderingDeviceDriverMet String pipeline_cache_id; Error _create_device(); - Error _check_capabilities(); + void _check_capabilities(); #pragma mark - Shader Cache @@ -241,21 +242,11 @@ private: friend struct ShaderBinaryData; friend struct PushConstantData; -private: - /// Contains additional metadata about the shader. - struct ShaderMeta { - /// Indicates whether the shader uses multiview. - bool has_multiview = false; - }; - - Error _reflect_spirv16(VectorView p_spirv, ShaderReflection &r_reflection, ShaderMeta &r_shader_meta); - public: - virtual String shader_get_binary_cache_key() override final; - virtual Vector shader_compile_binary_from_spirv(VectorView p_spirv, const String &p_shader_name) override final; - virtual ShaderID shader_create_from_bytecode(const Vector &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name, const Vector &p_immutable_samplers) override final; + virtual ShaderID shader_create_from_container(const Ref &p_shader_container, const Vector &p_immutable_samplers) override final; virtual void shader_free(ShaderID p_shader) override final; virtual void shader_destroy_modules(ShaderID p_shader) override final; + virtual const RenderingShaderContainerFormat &get_shader_container_format() const override final; #pragma mark - Uniform Set diff --git a/drivers/metal/rendering_device_driver_metal.mm b/drivers/metal/rendering_device_driver_metal.mm index c21b7a98475..1a2957cb12c 100644 --- a/drivers/metal/rendering_device_driver_metal.mm +++ b/drivers/metal/rendering_device_driver_metal.mm @@ -52,6 +52,7 @@ #import "pixel_formats.h" #import "rendering_context_driver_metal.h" +#import "rendering_shader_container_metal.h" #include "core/io/compression.h" #include "core/io/marshalls.h" @@ -1093,1349 +1094,6 @@ void RenderingDeviceDriverMetal::framebuffer_free(FramebufferID p_framebuffer) { #pragma mark - Shader -const uint32_t SHADER_BINARY_VERSION = 4; - -// region Serialization - -class BufWriter; - -template -concept Serializable = requires(T t, BufWriter &p_writer) { - { - t.serialize_size() - } -> std::same_as; - { - t.serialize(p_writer) - } -> std::same_as; -}; - -class BufWriter { - uint8_t *data = nullptr; - uint64_t length = 0; // Length of data. - uint64_t pos = 0; - -public: - BufWriter(uint8_t *p_data, uint64_t p_length) : - data(p_data), length(p_length) {} - - template - void write(T const &p_value) { - p_value.serialize(*this); - } - - _FORCE_INLINE_ void write(uint32_t p_value) { - DEV_ASSERT(pos + sizeof(uint32_t) <= length); - pos += encode_uint32(p_value, data + pos); - } - - _FORCE_INLINE_ void write(RD::ShaderStage p_value) { - write((uint32_t)p_value); - } - - _FORCE_INLINE_ void write(bool p_value) { - DEV_ASSERT(pos + sizeof(uint8_t) <= length); - *(data + pos) = p_value ? 1 : 0; - pos += 1; - } - - _FORCE_INLINE_ void write(int p_value) { - write((uint32_t)p_value); - } - - _FORCE_INLINE_ void write(uint64_t p_value) { - DEV_ASSERT(pos + sizeof(uint64_t) <= length); - pos += encode_uint64(p_value, data + pos); - } - - _FORCE_INLINE_ void write(float p_value) { - DEV_ASSERT(pos + sizeof(float) <= length); - pos += encode_float(p_value, data + pos); - } - - _FORCE_INLINE_ void write(double p_value) { - DEV_ASSERT(pos + sizeof(double) <= length); - pos += encode_double(p_value, data + pos); - } - - void write_compressed(CharString const &p_string) { - write(p_string.length()); // Uncompressed size. - - DEV_ASSERT(pos + sizeof(uint32_t) + Compression::get_max_compressed_buffer_size(p_string.length(), Compression::MODE_ZSTD) <= length); - - // Save pointer for compressed size. - uint8_t *dst_size_ptr = data + pos; // Compressed size. - pos += sizeof(uint32_t); - - int dst_size = Compression::compress(data + pos, reinterpret_cast(p_string.ptr()), p_string.length(), Compression::MODE_ZSTD); - encode_uint32(dst_size, dst_size_ptr); - pos += dst_size; - } - - void write(CharString const &p_string) { - write_buffer(reinterpret_cast(p_string.ptr()), p_string.length()); - } - - template - void write(VectorView p_vector) { - write(p_vector.size()); - for (uint32_t i = 0; i < p_vector.size(); i++) { - T const &e = p_vector[i]; - write(e); - } - } - - void write(VectorView p_vector) { - write_buffer(p_vector.ptr(), p_vector.size()); - } - - template - void write(HashMap const &p_map) { - write(p_map.size()); - for (KeyValue const &e : p_map) { - write(e.key); - write(e.value); - } - } - - uint64_t get_pos() const { - return pos; - } - - uint64_t get_length() const { - return length; - } - -private: - void write_buffer(uint8_t const *p_buffer, uint32_t p_length) { - write(p_length); - - DEV_ASSERT(pos + p_length <= length); - memcpy(data + pos, p_buffer, p_length); - pos += p_length; - } -}; - -class BufReader; - -template -concept Deserializable = requires(T t, BufReader &p_reader) { - { - t.serialize_size() - } -> std::same_as; - { - t.deserialize(p_reader) - } -> std::same_as; -}; - -class BufReader { - uint8_t const *data = nullptr; - uint64_t length = 0; - uint64_t pos = 0; - - bool check_length(size_t p_size) { - if (status != Status::OK) { - return false; - } - - if (pos + p_size > length) { - status = Status::SHORT_BUFFER; - return false; - } - return true; - } - -#define CHECK(p_size) \ - if (!check_length(p_size)) \ - return - -public: - enum class Status { - OK, - SHORT_BUFFER, - BAD_COMPRESSION, - }; - - Status status = Status::OK; - - BufReader(uint8_t const *p_data, uint64_t p_length) : - data(p_data), length(p_length) {} - - template - void read(T &p_value) { - p_value.deserialize(*this); - } - - _FORCE_INLINE_ void read(uint32_t &p_val) { - CHECK(sizeof(uint32_t)); - - p_val = decode_uint32(data + pos); - pos += sizeof(uint32_t); - } - - _FORCE_INLINE_ void read(RD::ShaderStage &p_val) { - uint32_t val; - read(val); - p_val = (RD::ShaderStage)val; - } - - _FORCE_INLINE_ void read(bool &p_val) { - CHECK(sizeof(uint8_t)); - - p_val = *(data + pos) > 0; - pos += 1; - } - - _FORCE_INLINE_ void read(uint64_t &p_val) { - CHECK(sizeof(uint64_t)); - - p_val = decode_uint64(data + pos); - pos += sizeof(uint64_t); - } - - _FORCE_INLINE_ void read(float &p_val) { - CHECK(sizeof(float)); - - p_val = decode_float(data + pos); - pos += sizeof(float); - } - - _FORCE_INLINE_ void read(double &p_val) { - CHECK(sizeof(double)); - - p_val = decode_double(data + pos); - pos += sizeof(double); - } - - void read(CharString &p_val) { - uint32_t len; - read(len); - CHECK(len); - p_val.resize(len + 1 /* NUL */); - memcpy(p_val.ptrw(), data + pos, len); - p_val.set(len, 0); - pos += len; - } - - void read_compressed(CharString &p_val) { - uint32_t len; - read(len); - uint32_t comp_size; - read(comp_size); - - CHECK(comp_size); - - p_val.resize(len + 1 /* NUL */); - uint32_t bytes = (uint32_t)Compression::decompress(reinterpret_cast(p_val.ptrw()), len, data + pos, comp_size, Compression::MODE_ZSTD); - if (bytes != len) { - status = Status::BAD_COMPRESSION; - return; - } - p_val.set(len, 0); - pos += comp_size; - } - - void read(LocalVector &p_val) { - uint32_t len; - read(len); - CHECK(len); - p_val.resize(len); - memcpy(p_val.ptr(), data + pos, len); - pos += len; - } - - template - void read(LocalVector &p_val) { - uint32_t len; - read(len); - CHECK(len); - p_val.resize(len); - for (uint32_t i = 0; i < len; i++) { - read(p_val[i]); - } - } - - template - void read(HashMap &p_map) { - uint32_t len; - read(len); - CHECK(len); - p_map.reserve(len); - for (uint32_t i = 0; i < len; i++) { - K key; - read(key); - V value; - read(value); - p_map[key] = value; - } - } - -#undef CHECK -}; - -const uint32_t R32UI_ALIGNMENT_CONSTANT_ID = 65535; - -struct ComputeSize { - uint32_t x = 0; - uint32_t y = 0; - uint32_t z = 0; - - size_t serialize_size() const { - return sizeof(uint32_t) * 3; - } - - void serialize(BufWriter &p_writer) const { - p_writer.write(x); - p_writer.write(y); - p_writer.write(z); - } - - void deserialize(BufReader &p_reader) { - p_reader.read(x); - p_reader.read(y); - p_reader.read(z); - } -}; - -struct ShaderStageData { - RD::ShaderStage stage = RD::ShaderStage::SHADER_STAGE_MAX; - uint32_t is_position_invariant = UINT32_MAX; - uint32_t supports_fast_math = UINT32_MAX; - CharString entry_point_name; - CharString source; - - size_t serialize_size() const { - int comp_size = Compression::get_max_compressed_buffer_size(source.length(), Compression::MODE_ZSTD); - return sizeof(uint32_t) // Stage. - + sizeof(uint32_t) // is_position_invariant - + sizeof(uint32_t) // supports_fast_math - + sizeof(uint32_t) /* entry_point_name.utf8().length */ - + entry_point_name.length() + sizeof(uint32_t) /* uncompressed size */ + sizeof(uint32_t) /* compressed size */ + comp_size; - } - - void serialize(BufWriter &p_writer) const { - p_writer.write((uint32_t)stage); - p_writer.write(is_position_invariant); - p_writer.write(supports_fast_math); - p_writer.write(entry_point_name); - p_writer.write_compressed(source); - } - - void deserialize(BufReader &p_reader) { - p_reader.read((uint32_t &)stage); - p_reader.read(is_position_invariant); - p_reader.read(supports_fast_math); - p_reader.read(entry_point_name); - p_reader.read_compressed(source); - } -}; - -struct SpecializationConstantData { - uint32_t constant_id = UINT32_MAX; - RD::PipelineSpecializationConstantType type = RD::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT; - ShaderStageUsage stages = ShaderStageUsage::None; - // Specifies the stages the constant is used by Metal. - ShaderStageUsage used_stages = ShaderStageUsage::None; - uint32_t int_value = UINT32_MAX; - - size_t serialize_size() const { - return sizeof(constant_id) + sizeof(uint32_t) // type - + sizeof(stages) + sizeof(used_stages) // used_stages - + sizeof(int_value); // int_value - } - - void serialize(BufWriter &p_writer) const { - p_writer.write(constant_id); - p_writer.write((uint32_t)type); - p_writer.write(stages); - p_writer.write(used_stages); - p_writer.write(int_value); - } - - void deserialize(BufReader &p_reader) { - p_reader.read(constant_id); - p_reader.read((uint32_t &)type); - p_reader.read((uint32_t &)stages); - p_reader.read((uint32_t &)used_stages); - p_reader.read(int_value); - } -}; - -struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) UniformData { - RD::UniformType type = RD::UniformType::UNIFORM_TYPE_MAX; - uint32_t binding = UINT32_MAX; - bool writable = false; - uint32_t length = UINT32_MAX; - ShaderStageUsage stages = ShaderStageUsage::None; - // Specifies the stages the uniform data is - // used by the Metal shader. - ShaderStageUsage active_stages = ShaderStageUsage::None; - BindingInfoMap bindings; - BindingInfoMap bindings_secondary; - - size_t serialize_size() const { - size_t size = 0; - size += sizeof(uint32_t); // type - size += sizeof(uint32_t); // binding - size += sizeof(uint32_t); // writable - size += sizeof(uint32_t); // length - size += sizeof(uint32_t); // stages - size += sizeof(uint32_t); // active_stages - size += sizeof(uint32_t); // bindings.size() - size += sizeof(uint32_t) * bindings.size(); // Total size of keys. - for (KeyValue const &e : bindings) { - size += e.value.serialize_size(); - } - size += sizeof(uint32_t); // bindings_secondary.size() - size += sizeof(uint32_t) * bindings_secondary.size(); // Total size of keys. - for (KeyValue const &e : bindings_secondary) { - size += e.value.serialize_size(); - } - return size; - } - - void serialize(BufWriter &p_writer) const { - p_writer.write((uint32_t)type); - p_writer.write(binding); - p_writer.write(writable); - p_writer.write(length); - p_writer.write(stages); - p_writer.write(active_stages); - p_writer.write(bindings); - p_writer.write(bindings_secondary); - } - - void deserialize(BufReader &p_reader) { - p_reader.read((uint32_t &)type); - p_reader.read(binding); - p_reader.read(writable); - p_reader.read(length); - p_reader.read((uint32_t &)stages); - p_reader.read((uint32_t &)active_stages); - p_reader.read(bindings); - p_reader.read(bindings_secondary); - } -}; - -struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) UniformSetData { - uint32_t index = UINT32_MAX; - LocalVector uniforms; - - size_t serialize_size() const { - size_t size = 0; - size += sizeof(uint32_t); // index - size += sizeof(uint32_t); // uniforms.size() - for (UniformData const &e : uniforms) { - size += e.serialize_size(); - } - return size; - } - - void serialize(BufWriter &p_writer) const { - p_writer.write(index); - p_writer.write(VectorView(uniforms)); - } - - void deserialize(BufReader &p_reader) { - p_reader.read(index); - p_reader.read(uniforms); - } - UniformSetData() = default; - UniformSetData(uint32_t p_index) : - index(p_index) {} -}; - -struct PushConstantData { - uint32_t size = UINT32_MAX; - ShaderStageUsage stages = ShaderStageUsage::None; - ShaderStageUsage used_stages = ShaderStageUsage::None; - HashMap msl_binding; - - size_t serialize_size() const { - return sizeof(uint32_t) // size - + sizeof(uint32_t) // stages - + sizeof(uint32_t) // used_stages - + sizeof(uint32_t) // msl_binding.size() - + sizeof(uint32_t) * msl_binding.size() // keys - + sizeof(uint32_t) * msl_binding.size(); // values - } - - void serialize(BufWriter &p_writer) const { - p_writer.write(size); - p_writer.write((uint32_t)stages); - p_writer.write((uint32_t)used_stages); - p_writer.write(msl_binding); - } - - void deserialize(BufReader &p_reader) { - p_reader.read(size); - p_reader.read((uint32_t &)stages); - p_reader.read((uint32_t &)used_stages); - p_reader.read(msl_binding); - } -}; - -struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) ShaderBinaryData { - enum Flags : uint32_t { - NONE = 0, - NEEDS_VIEW_MASK_BUFFER = 1 << 0, - USES_ARGUMENT_BUFFERS = 1 << 1, - }; - CharString shader_name; - // The Metal language version specified when compiling SPIR-V to MSL. - // Format is major * 10000 + minor * 100 + patch. - uint32_t msl_version = UINT32_MAX; - uint32_t vertex_input_mask = UINT32_MAX; - uint32_t fragment_output_mask = UINT32_MAX; - uint32_t spirv_specialization_constants_ids_mask = UINT32_MAX; - uint32_t flags = NONE; - ComputeSize compute_local_size; - PushConstantData push_constant; - LocalVector stages; - LocalVector constants; - LocalVector uniforms; - - MTLLanguageVersion get_msl_version() const { - uint32_t major = msl_version / 10000; - uint32_t minor = (msl_version / 100) % 100; - return MTLLanguageVersion((major << 0x10) + minor); - } - - bool is_compute() const { - return std::any_of(stages.begin(), stages.end(), [](ShaderStageData const &e) { - return e.stage == RD::ShaderStage::SHADER_STAGE_COMPUTE; - }); - } - - bool needs_view_mask_buffer() const { - return flags & NEEDS_VIEW_MASK_BUFFER; - } - - void set_needs_view_mask_buffer(bool p_value) { - if (p_value) { - flags |= NEEDS_VIEW_MASK_BUFFER; - } else { - flags &= ~NEEDS_VIEW_MASK_BUFFER; - } - } - - bool uses_argument_buffers() const { - return flags & USES_ARGUMENT_BUFFERS; - } - - void set_uses_argument_buffers(bool p_value) { - if (p_value) { - flags |= USES_ARGUMENT_BUFFERS; - } else { - flags &= ~USES_ARGUMENT_BUFFERS; - } - } - - size_t serialize_size() const { - size_t size = 0; - size += sizeof(uint32_t) + shader_name.length(); // shader_name - size += sizeof(msl_version); - size += sizeof(vertex_input_mask); - size += sizeof(fragment_output_mask); - size += sizeof(spirv_specialization_constants_ids_mask); - size += sizeof(flags); - size += compute_local_size.serialize_size(); - size += push_constant.serialize_size(); - size += sizeof(uint32_t); // stages.size() - for (ShaderStageData const &e : stages) { - size += e.serialize_size(); - } - size += sizeof(uint32_t); // constants.size() - for (SpecializationConstantData const &e : constants) { - size += e.serialize_size(); - } - size += sizeof(uint32_t); // uniforms.size() - for (UniformSetData const &e : uniforms) { - size += e.serialize_size(); - } - return size; - } - - void serialize(BufWriter &p_writer) const { - p_writer.write(shader_name); - p_writer.write(msl_version); - p_writer.write(vertex_input_mask); - p_writer.write(fragment_output_mask); - p_writer.write(spirv_specialization_constants_ids_mask); - p_writer.write(flags); - p_writer.write(compute_local_size); - p_writer.write(push_constant); - p_writer.write(VectorView(stages)); - p_writer.write(VectorView(constants)); - p_writer.write(VectorView(uniforms)); - } - - void deserialize(BufReader &p_reader) { - p_reader.read(shader_name); - p_reader.read(msl_version); - p_reader.read(vertex_input_mask); - p_reader.read(fragment_output_mask); - p_reader.read(spirv_specialization_constants_ids_mask); - p_reader.read(flags); - p_reader.read(compute_local_size); - p_reader.read(push_constant); - p_reader.read(stages); - p_reader.read(constants); - p_reader.read(uniforms); - } -}; - -// endregion - -String RenderingDeviceDriverMetal::shader_get_binary_cache_key() { - static const String cache_key = "Metal-SV" + uitos(SHADER_BINARY_VERSION); - return cache_key; -} - -Error RenderingDeviceDriverMetal::_reflect_spirv16(VectorView p_spirv, ShaderReflection &r_reflection, ShaderMeta &r_shader_meta) { - using namespace spirv_cross; - using spirv_cross::Resource; - - r_reflection = {}; - r_shader_meta = {}; - - for (uint32_t i = 0; i < p_spirv.size(); i++) { - ShaderStageSPIRVData const &v = p_spirv[i]; - ShaderStage stage = v.shader_stage; - uint32_t const *const ir = reinterpret_cast(v.spirv.ptr()); - size_t word_count = v.spirv.size() / sizeof(uint32_t); - Parser parser(ir, word_count); - try { - parser.parse(); - } catch (CompilerError &e) { - ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Failed to parse IR at stage " + String(SHADER_STAGE_NAMES[stage]) + ": " + e.what()); - } - - ShaderStage stage_flag = (ShaderStage)(1 << p_spirv[i].shader_stage); - - if (p_spirv[i].shader_stage == SHADER_STAGE_COMPUTE) { - r_reflection.is_compute = true; - ERR_FAIL_COND_V_MSG(p_spirv.size() != 1, FAILED, - "Compute shaders can only receive one stage, dedicated to compute."); - } - ERR_FAIL_COND_V_MSG(r_reflection.stages.has_flag(stage_flag), FAILED, - "Stage " + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + " submitted more than once."); - - ParsedIR &pir = parser.get_parsed_ir(); - using BT = SPIRType::BaseType; - - Compiler compiler(std::move(pir)); - - if (r_reflection.is_compute) { - r_reflection.compute_local_size[0] = compiler.get_execution_mode_argument(spv::ExecutionModeLocalSize, 0); - r_reflection.compute_local_size[1] = compiler.get_execution_mode_argument(spv::ExecutionModeLocalSize, 1); - r_reflection.compute_local_size[2] = compiler.get_execution_mode_argument(spv::ExecutionModeLocalSize, 2); - } - - // Parse bindings. - - auto get_decoration = [&compiler](spirv_cross::ID id, spv::Decoration decoration) { - uint32_t res = -1; - if (compiler.has_decoration(id, decoration)) { - res = compiler.get_decoration(id, decoration); - } - return res; - }; - - // Always clearer than a boolean. - enum class Writable { - No, - Maybe, - }; - - // clang-format off - enum { - SPIRV_WORD_SIZE = sizeof(uint32_t), - SPIRV_DATA_ALIGNMENT = 4 * SPIRV_WORD_SIZE, - }; - // clang-format on - - auto process_uniforms = [&r_reflection, &compiler, &get_decoration, stage, stage_flag](SmallVector &resources, Writable writable, std::function uniform_type) { - for (Resource const &res : resources) { - ShaderUniform uniform; - - std::string const &name = compiler.get_name(res.id); - uint32_t set = get_decoration(res.id, spv::DecorationDescriptorSet); - ERR_FAIL_COND_V_MSG(set == (uint32_t)-1, FAILED, "No descriptor set found"); - ERR_FAIL_COND_V_MSG(set >= MAX_UNIFORM_SETS, FAILED, "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + name.c_str() + "' uses a set (" + itos(set) + ") index larger than what is supported (" + itos(MAX_UNIFORM_SETS) + ")."); - - uniform.binding = get_decoration(res.id, spv::DecorationBinding); - ERR_FAIL_COND_V_MSG(uniform.binding == (uint32_t)-1, FAILED, "No binding found"); - - SPIRType const &a_type = compiler.get_type(res.type_id); - uniform.type = uniform_type(a_type); - - // Update length. - switch (a_type.basetype) { - case BT::Struct: { - if (uniform.type == UNIFORM_TYPE_STORAGE_BUFFER) { - // Consistent with spirv_reflect. - uniform.length = 0; - } else { - uniform.length = round_up_to_alignment(compiler.get_declared_struct_size(a_type), SPIRV_DATA_ALIGNMENT); - } - } break; - case BT::Image: - case BT::Sampler: - case BT::SampledImage: { - uniform.length = 1; - for (uint32_t const &a : a_type.array) { - uniform.length *= a; - } - } break; - default: - break; - } - - // Update writable. - if (writable == Writable::Maybe) { - if (a_type.basetype == BT::Struct) { - Bitset flags = compiler.get_buffer_block_flags(res.id); - uniform.writable = !compiler.has_decoration(res.id, spv::DecorationNonWritable) && !flags.get(spv::DecorationNonWritable); - } else if (a_type.basetype == BT::Image) { - if (a_type.image.access == spv::AccessQualifierMax) { - uniform.writable = !compiler.has_decoration(res.id, spv::DecorationNonWritable); - } else { - uniform.writable = a_type.image.access != spv::AccessQualifierReadOnly; - } - } - } - - if (set < (uint32_t)r_reflection.uniform_sets.size()) { - // Check if this already exists. - bool exists = false; - for (uint32_t k = 0; k < r_reflection.uniform_sets[set].size(); k++) { - if (r_reflection.uniform_sets[set][k].binding == uniform.binding) { - // Already exists, verify that it's the same type. - ERR_FAIL_COND_V_MSG(r_reflection.uniform_sets[set][k].type != uniform.type, FAILED, - "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + name.c_str() + "' trying to reuse location for set=" + itos(set) + ", binding=" + itos(uniform.binding) + " with different uniform type."); - - // Also, verify that it's the same size. - ERR_FAIL_COND_V_MSG(r_reflection.uniform_sets[set][k].length != uniform.length, FAILED, - "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + name.c_str() + "' trying to reuse location for set=" + itos(set) + ", binding=" + itos(uniform.binding) + " with different uniform size."); - - // Also, verify that it has the same writability. - ERR_FAIL_COND_V_MSG(r_reflection.uniform_sets[set][k].writable != uniform.writable, FAILED, - "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + name.c_str() + "' trying to reuse location for set=" + itos(set) + ", binding=" + itos(uniform.binding) + " with different writability."); - - // Just append stage mask and continue. - r_reflection.uniform_sets.write[set].write[k].stages.set_flag(stage_flag); - exists = true; - break; - } - } - - if (exists) { - continue; // Merged. - } - } - - uniform.stages.set_flag(stage_flag); - - if (set >= (uint32_t)r_reflection.uniform_sets.size()) { - r_reflection.uniform_sets.resize(set + 1); - } - - r_reflection.uniform_sets.write[set].push_back(uniform); - } - - return OK; - }; - - ShaderResources resources = compiler.get_shader_resources(); - - process_uniforms(resources.uniform_buffers, Writable::No, [](SPIRType const &a_type) { - DEV_ASSERT(a_type.basetype == BT::Struct); - return UNIFORM_TYPE_UNIFORM_BUFFER; - }); - - process_uniforms(resources.storage_buffers, Writable::Maybe, [](SPIRType const &a_type) { - DEV_ASSERT(a_type.basetype == BT::Struct); - return UNIFORM_TYPE_STORAGE_BUFFER; - }); - - process_uniforms(resources.storage_images, Writable::Maybe, [](SPIRType const &a_type) { - DEV_ASSERT(a_type.basetype == BT::Image); - if (a_type.image.dim == spv::DimBuffer) { - return UNIFORM_TYPE_IMAGE_BUFFER; - } else { - return UNIFORM_TYPE_IMAGE; - } - }); - - process_uniforms(resources.sampled_images, Writable::No, [](SPIRType const &a_type) { - DEV_ASSERT(a_type.basetype == BT::SampledImage); - return UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; - }); - - process_uniforms(resources.separate_images, Writable::No, [](SPIRType const &a_type) { - DEV_ASSERT(a_type.basetype == BT::Image); - if (a_type.image.dim == spv::DimBuffer) { - return UNIFORM_TYPE_TEXTURE_BUFFER; - } else { - return UNIFORM_TYPE_TEXTURE; - } - }); - - process_uniforms(resources.separate_samplers, Writable::No, [](SPIRType const &a_type) { - DEV_ASSERT(a_type.basetype == BT::Sampler); - return UNIFORM_TYPE_SAMPLER; - }); - - process_uniforms(resources.subpass_inputs, Writable::No, [](SPIRType const &a_type) { - DEV_ASSERT(a_type.basetype == BT::Image && a_type.image.dim == spv::DimSubpassData); - return UNIFORM_TYPE_INPUT_ATTACHMENT; - }); - - if (!resources.push_constant_buffers.empty()) { - // There can be only one push constant block. - Resource const &res = resources.push_constant_buffers.front(); - - size_t push_constant_size = round_up_to_alignment(compiler.get_declared_struct_size(compiler.get_type(res.base_type_id)), SPIRV_DATA_ALIGNMENT); - ERR_FAIL_COND_V_MSG(r_reflection.push_constant_size && r_reflection.push_constant_size != push_constant_size, FAILED, - "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "': Push constant block must be the same across shader stages."); - - r_reflection.push_constant_size = push_constant_size; - r_reflection.push_constant_stages.set_flag(stage_flag); - } - - ERR_FAIL_COND_V_MSG(!resources.atomic_counters.empty(), FAILED, "Atomic counters not supported"); - ERR_FAIL_COND_V_MSG(!resources.acceleration_structures.empty(), FAILED, "Acceleration structures not supported"); - ERR_FAIL_COND_V_MSG(!resources.shader_record_buffers.empty(), FAILED, "Shader record buffers not supported"); - - if (stage == SHADER_STAGE_VERTEX && !resources.stage_inputs.empty()) { - for (Resource const &res : resources.stage_inputs) { - SPIRType a_type = compiler.get_type(res.base_type_id); - uint32_t loc = get_decoration(res.id, spv::DecorationLocation); - if (loc != (uint32_t)-1) { - r_reflection.vertex_input_mask |= 1 << loc; - } - } - } - - if (stage == SHADER_STAGE_FRAGMENT && !resources.stage_outputs.empty()) { - for (Resource const &res : resources.stage_outputs) { - SPIRType a_type = compiler.get_type(res.base_type_id); - uint32_t loc = get_decoration(res.id, spv::DecorationLocation); - uint32_t built_in = spv::BuiltIn(get_decoration(res.id, spv::DecorationBuiltIn)); - if (loc != (uint32_t)-1 && built_in != spv::BuiltInFragDepth) { - r_reflection.fragment_output_mask |= 1 << loc; - } - } - } - - for (const BuiltInResource &res : resources.builtin_inputs) { - if (res.builtin == spv::BuiltInViewIndex || res.builtin == spv::BuiltInViewportIndex) { - r_shader_meta.has_multiview = true; - } - } - - if (!r_shader_meta.has_multiview) { - for (const BuiltInResource &res : resources.builtin_outputs) { - if (res.builtin == spv::BuiltInViewIndex || res.builtin == spv::BuiltInViewportIndex) { - r_shader_meta.has_multiview = true; - } - } - } - - // Specialization constants. - for (SpecializationConstant const &constant : compiler.get_specialization_constants()) { - int32_t existing = -1; - ShaderSpecializationConstant sconst; - SPIRConstant &spc = compiler.get_constant(constant.id); - SPIRType const &spct = compiler.get_type(spc.constant_type); - - sconst.constant_id = constant.constant_id; - sconst.int_value = 0; - - switch (spct.basetype) { - case BT::Boolean: { - sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_BOOL; - sconst.bool_value = spc.scalar() != 0; - } break; - case BT::Int: - case BT::UInt: { - sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT; - sconst.int_value = spc.scalar(); - } break; - case BT::Float: { - sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT; - sconst.float_value = spc.scalar_f32(); - } break; - default: - ERR_FAIL_V_MSG(FAILED, "Unsupported specialization constant type"); - } - sconst.stages.set_flag(stage_flag); - - for (uint32_t k = 0; k < r_reflection.specialization_constants.size(); k++) { - if (r_reflection.specialization_constants[k].constant_id == sconst.constant_id) { - ERR_FAIL_COND_V_MSG(r_reflection.specialization_constants[k].type != sconst.type, FAILED, "More than one specialization constant used for id (" + itos(sconst.constant_id) + "), but their types differ."); - ERR_FAIL_COND_V_MSG(r_reflection.specialization_constants[k].int_value != sconst.int_value, FAILED, "More than one specialization constant used for id (" + itos(sconst.constant_id) + "), but their default values differ."); - existing = k; - break; - } - } - - if (existing > 0) { - r_reflection.specialization_constants.write[existing].stages.set_flag(stage_flag); - } else { - r_reflection.specialization_constants.push_back(sconst); - } - } - - r_reflection.stages.set_flag(stage_flag); - } - - // Sort all uniform_sets. - for (uint32_t i = 0; i < r_reflection.uniform_sets.size(); i++) { - r_reflection.uniform_sets.write[i].sort(); - } - - return OK; -} - -Vector RenderingDeviceDriverMetal::shader_compile_binary_from_spirv(VectorView p_spirv, const String &p_shader_name) { - using Result = ::Vector; - using namespace spirv_cross; - using spirv_cross::CompilerMSL; - using spirv_cross::Resource; - - ShaderReflection spirv_data; - ShaderMeta shader_meta; - ERR_FAIL_COND_V(_reflect_spirv16(p_spirv, spirv_data, shader_meta), Result()); - - ShaderBinaryData bin_data{}; - if (!p_shader_name.is_empty()) { - bin_data.shader_name = p_shader_name.utf8(); - } else { - bin_data.shader_name = "unnamed"; - } - - bin_data.vertex_input_mask = spirv_data.vertex_input_mask; - bin_data.fragment_output_mask = spirv_data.fragment_output_mask; - bin_data.compute_local_size = ComputeSize{ - .x = spirv_data.compute_local_size[0], - .y = spirv_data.compute_local_size[1], - .z = spirv_data.compute_local_size[2], - }; - bin_data.push_constant.size = spirv_data.push_constant_size; - bin_data.push_constant.stages = (ShaderStageUsage)(uint8_t)spirv_data.push_constant_stages; - bin_data.set_needs_view_mask_buffer(shader_meta.has_multiview); - - for (uint32_t i = 0; i < spirv_data.uniform_sets.size(); i++) { - const ::Vector &spirv_set = spirv_data.uniform_sets[i]; - UniformSetData set(i); - for (const ShaderUniform &spirv_uniform : spirv_set) { - UniformData binding{}; - binding.type = spirv_uniform.type; - binding.binding = spirv_uniform.binding; - binding.writable = spirv_uniform.writable; - binding.stages = (ShaderStageUsage)(uint8_t)spirv_uniform.stages; - binding.length = spirv_uniform.length; - set.uniforms.push_back(binding); - } - bin_data.uniforms.push_back(set); - } - - for (const ShaderSpecializationConstant &spirv_sc : spirv_data.specialization_constants) { - SpecializationConstantData spec_constant{}; - spec_constant.type = spirv_sc.type; - spec_constant.constant_id = spirv_sc.constant_id; - spec_constant.int_value = spirv_sc.int_value; - spec_constant.stages = (ShaderStageUsage)(uint8_t)spirv_sc.stages; - bin_data.constants.push_back(spec_constant); - bin_data.spirv_specialization_constants_ids_mask |= (1 << spirv_sc.constant_id); - } - - // Reflection using SPIRV-Cross: - // https://github.com/KhronosGroup/SPIRV-Cross/wiki/Reflection-API-user-guide - - CompilerMSL::Options msl_options{}; - msl_options.set_msl_version(version_major, version_minor); - bin_data.msl_version = msl_options.msl_version; -#if TARGET_OS_OSX - msl_options.platform = CompilerMSL::Options::macOS; -#else - msl_options.platform = CompilerMSL::Options::iOS; -#endif - -#if TARGET_OS_IPHONE - msl_options.ios_use_simdgroup_functions = (*device_properties).features.simdPermute; - msl_options.ios_support_base_vertex_instance = true; -#endif - - bool disable_argument_buffers = false; - if (String v = OS::get_singleton()->get_environment(U"GODOT_DISABLE_ARGUMENT_BUFFERS"); v == U"1") { - disable_argument_buffers = true; - } - - if (device_properties->features.argument_buffers_tier >= MTLArgumentBuffersTier2 && !disable_argument_buffers) { - msl_options.argument_buffers_tier = CompilerMSL::Options::ArgumentBuffersTier::Tier2; - msl_options.argument_buffers = true; - bin_data.set_uses_argument_buffers(true); - } else { - msl_options.argument_buffers_tier = CompilerMSL::Options::ArgumentBuffersTier::Tier1; - // Tier 1 argument buffers don't support writable textures, so we disable them completely. - msl_options.argument_buffers = false; - bin_data.set_uses_argument_buffers(false); - } - msl_options.force_active_argument_buffer_resources = true; - // We can't use this, as we have to add the descriptor sets via compiler.add_msl_resource_binding. - // msl_options.pad_argument_buffer_resources = true; - msl_options.texture_buffer_native = true; // Enable texture buffer support. - msl_options.use_framebuffer_fetch_subpasses = false; - msl_options.pad_fragment_output_components = true; - msl_options.r32ui_alignment_constant_id = R32UI_ALIGNMENT_CONSTANT_ID; - msl_options.agx_manual_cube_grad_fixup = true; - if (shader_meta.has_multiview) { - msl_options.multiview = true; - msl_options.multiview_layered_rendering = true; - msl_options.view_mask_buffer_index = VIEW_MASK_BUFFER_INDEX; - } - - CompilerGLSL::Options options{}; - options.vertex.flip_vert_y = true; -#if DEV_ENABLED - options.emit_line_directives = true; -#endif - - for (uint32_t i = 0; i < p_spirv.size(); i++) { - ShaderStageSPIRVData const &v = p_spirv[i]; - ShaderStage stage = v.shader_stage; - char const *stage_name = SHADER_STAGE_NAMES[stage]; - uint32_t const *const ir = reinterpret_cast(v.spirv.ptr()); - size_t word_count = v.spirv.size() / sizeof(uint32_t); - Parser parser(ir, word_count); - try { - parser.parse(); - } catch (CompilerError &e) { - ERR_FAIL_V_MSG(Result(), "Failed to parse IR at stage " + String(SHADER_STAGE_NAMES[stage]) + ": " + e.what()); - } - - CompilerMSL compiler(std::move(parser.get_parsed_ir())); - compiler.set_msl_options(msl_options); - compiler.set_common_options(options); - - std::unordered_set active = compiler.get_active_interface_variables(); - ShaderResources resources = compiler.get_shader_resources(); - - std::string source; - try { - source = compiler.compile(); - } catch (CompilerError &e) { - ERR_FAIL_V_MSG(Result(), "Failed to compile stage " + String(SHADER_STAGE_NAMES[stage]) + ": " + e.what()); - } - - ERR_FAIL_COND_V_MSG(compiler.get_entry_points_and_stages().size() != 1, Result(), "Expected a single entry point and stage."); - - SmallVector entry_pts_stages = compiler.get_entry_points_and_stages(); - EntryPoint &entry_point_stage = entry_pts_stages.front(); - SPIREntryPoint &entry_point = compiler.get_entry_point(entry_point_stage.name, entry_point_stage.execution_model); - - // Process specialization constants. - if (!compiler.get_specialization_constants().empty()) { - for (SpecializationConstant const &constant : compiler.get_specialization_constants()) { - LocalVector::Iterator res = bin_data.constants.begin(); - while (res != bin_data.constants.end()) { - if (res->constant_id == constant.constant_id) { - res->used_stages |= 1 << stage; - break; - } - ++res; - } - if (res == bin_data.constants.end()) { - WARN_PRINT(String(stage_name) + ": unable to find constant_id: " + itos(constant.constant_id)); - } - } - } - - // Process bindings. - - LocalVector &uniform_sets = bin_data.uniforms; - using BT = SPIRType::BaseType; - - // Always clearer than a boolean. - enum class Writable { - No, - Maybe, - }; - - // Returns a std::optional containing the value of the - // decoration, if it exists. - auto get_decoration = [&compiler](spirv_cross::ID id, spv::Decoration decoration) { - uint32_t res = -1; - if (compiler.has_decoration(id, decoration)) { - res = compiler.get_decoration(id, decoration); - } - return res; - }; - - auto descriptor_bindings = [&compiler, &active, &uniform_sets, stage, &get_decoration](SmallVector &p_resources, Writable p_writable) { - for (Resource const &res : p_resources) { - uint32_t dset = get_decoration(res.id, spv::DecorationDescriptorSet); - uint32_t dbin = get_decoration(res.id, spv::DecorationBinding); - UniformData *found = nullptr; - if (dset != (uint32_t)-1 && dbin != (uint32_t)-1 && dset < uniform_sets.size()) { - UniformSetData &set = uniform_sets[dset]; - LocalVector::Iterator pos = set.uniforms.begin(); - while (pos != set.uniforms.end()) { - if (dbin == pos->binding) { - found = &(*pos); - break; - } - ++pos; - } - } - - ERR_FAIL_NULL_V_MSG(found, ERR_CANT_CREATE, "UniformData not found"); - - bool is_active = active.find(res.id) != active.end(); - if (is_active) { - found->active_stages |= 1 << stage; - } - - BindingInfo primary{}; - - SPIRType const &a_type = compiler.get_type(res.type_id); - BT basetype = a_type.basetype; - - switch (basetype) { - case BT::Struct: { - primary.dataType = MTLDataTypePointer; - } break; - - case BT::Image: - case BT::SampledImage: { - primary.dataType = MTLDataTypeTexture; - } break; - - case BT::Sampler: { - primary.dataType = MTLDataTypeSampler; - primary.arrayLength = 1; - for (uint32_t const &a : a_type.array) { - primary.arrayLength *= a; - } - } break; - - default: { - ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Unexpected BaseType"); - } break; - } - - // Find array length of image. - if (basetype == BT::Image || basetype == BT::SampledImage) { - primary.arrayLength = 1; - for (uint32_t const &a : a_type.array) { - primary.arrayLength *= a; - } - primary.isMultisampled = a_type.image.ms; - - SPIRType::ImageType const &image = a_type.image; - primary.imageFormat = image.format; - - switch (image.dim) { - case spv::Dim1D: { - if (image.arrayed) { - primary.textureType = MTLTextureType1DArray; - } else { - primary.textureType = MTLTextureType1D; - } - } break; - case spv::DimSubpassData: { - DISPATCH_FALLTHROUGH; - } - case spv::Dim2D: { - if (image.arrayed && image.ms) { - primary.textureType = MTLTextureType2DMultisampleArray; - } else if (image.arrayed) { - primary.textureType = MTLTextureType2DArray; - } else if (image.ms) { - primary.textureType = MTLTextureType2DMultisample; - } else { - primary.textureType = MTLTextureType2D; - } - } break; - case spv::Dim3D: { - primary.textureType = MTLTextureType3D; - } break; - case spv::DimCube: { - if (image.arrayed) { - primary.textureType = MTLTextureTypeCube; - } - } break; - case spv::DimRect: { - } break; - case spv::DimBuffer: { - // VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER - primary.textureType = MTLTextureTypeTextureBuffer; - } break; - case spv::DimMax: { - // Add all enumerations to silence the compiler warning - // and generate future warnings, should a new one be added. - } break; - } - } - - // Update writable. - if (p_writable == Writable::Maybe) { - if (basetype == BT::Struct) { - Bitset flags = compiler.get_buffer_block_flags(res.id); - if (!flags.get(spv::DecorationNonWritable)) { - if (flags.get(spv::DecorationNonReadable)) { - primary.access = MTLBindingAccessWriteOnly; - } else { - primary.access = MTLBindingAccessReadWrite; - } - } - } else if (basetype == BT::Image) { - switch (a_type.image.access) { - case spv::AccessQualifierWriteOnly: - primary.access = MTLBindingAccessWriteOnly; - break; - case spv::AccessQualifierReadWrite: - primary.access = MTLBindingAccessReadWrite; - break; - case spv::AccessQualifierReadOnly: - break; - case spv::AccessQualifierMax: - DISPATCH_FALLTHROUGH; - default: - if (!compiler.has_decoration(res.id, spv::DecorationNonWritable)) { - if (compiler.has_decoration(res.id, spv::DecorationNonReadable)) { - primary.access = MTLBindingAccessWriteOnly; - } else { - primary.access = MTLBindingAccessReadWrite; - } - } - break; - } - } - } - - switch (primary.access) { - case MTLBindingAccessReadOnly: - primary.usage = MTLResourceUsageRead; - break; - case MTLBindingAccessWriteOnly: - primary.usage = MTLResourceUsageWrite; - break; - case MTLBindingAccessReadWrite: - primary.usage = MTLResourceUsageRead | MTLResourceUsageWrite; - break; - } - - primary.index = compiler.get_automatic_msl_resource_binding(res.id); - - found->bindings[stage] = primary; - - // A sampled image contains two bindings, the primary - // is to the image, and the secondary is to the associated sampler. - if (basetype == BT::SampledImage) { - uint32_t binding = compiler.get_automatic_msl_resource_binding_secondary(res.id); - if (binding != (uint32_t)-1) { - found->bindings_secondary[stage] = BindingInfo{ - .dataType = MTLDataTypeSampler, - .index = binding, - .access = MTLBindingAccessReadOnly, - }; - } - } - - // An image may have a secondary binding if it is used - // for atomic operations. - if (basetype == BT::Image) { - uint32_t binding = compiler.get_automatic_msl_resource_binding_secondary(res.id); - if (binding != (uint32_t)-1) { - found->bindings_secondary[stage] = BindingInfo{ - .dataType = MTLDataTypePointer, - .index = binding, - .access = MTLBindingAccessReadWrite, - }; - } - } - } - return Error::OK; - }; - - if (!resources.uniform_buffers.empty()) { - Error err = descriptor_bindings(resources.uniform_buffers, Writable::No); - ERR_FAIL_COND_V(err != OK, Result()); - } - if (!resources.storage_buffers.empty()) { - Error err = descriptor_bindings(resources.storage_buffers, Writable::Maybe); - ERR_FAIL_COND_V(err != OK, Result()); - } - if (!resources.storage_images.empty()) { - Error err = descriptor_bindings(resources.storage_images, Writable::Maybe); - ERR_FAIL_COND_V(err != OK, Result()); - } - if (!resources.sampled_images.empty()) { - Error err = descriptor_bindings(resources.sampled_images, Writable::No); - ERR_FAIL_COND_V(err != OK, Result()); - } - if (!resources.separate_images.empty()) { - Error err = descriptor_bindings(resources.separate_images, Writable::No); - ERR_FAIL_COND_V(err != OK, Result()); - } - if (!resources.separate_samplers.empty()) { - Error err = descriptor_bindings(resources.separate_samplers, Writable::No); - ERR_FAIL_COND_V(err != OK, Result()); - } - if (!resources.subpass_inputs.empty()) { - Error err = descriptor_bindings(resources.subpass_inputs, Writable::No); - ERR_FAIL_COND_V(err != OK, Result()); - } - - if (!resources.push_constant_buffers.empty()) { - for (Resource const &res : resources.push_constant_buffers) { - uint32_t binding = compiler.get_automatic_msl_resource_binding(res.id); - if (binding != (uint32_t)-1) { - bin_data.push_constant.used_stages |= 1 << stage; - bin_data.push_constant.msl_binding[stage] = binding; - } - } - } - - ERR_FAIL_COND_V_MSG(!resources.atomic_counters.empty(), Result(), "Atomic counters not supported"); - ERR_FAIL_COND_V_MSG(!resources.acceleration_structures.empty(), Result(), "Acceleration structures not supported"); - ERR_FAIL_COND_V_MSG(!resources.shader_record_buffers.empty(), Result(), "Shader record buffers not supported"); - - if (!resources.stage_inputs.empty()) { - for (Resource const &res : resources.stage_inputs) { - uint32_t binding = compiler.get_automatic_msl_resource_binding(res.id); - if (binding != (uint32_t)-1) { - bin_data.vertex_input_mask |= 1 << binding; - } - } - } - - ShaderStageData stage_data; - stage_data.stage = v.shader_stage; - stage_data.is_position_invariant = compiler.is_position_invariant(); - stage_data.supports_fast_math = !entry_point.flags.get(spv::ExecutionModeSignedZeroInfNanPreserve); - stage_data.entry_point_name = entry_point.name.c_str(); - stage_data.source = source.c_str(); - bin_data.stages.push_back(stage_data); - } - - size_t vec_size = bin_data.serialize_size() + 8; - - ::Vector ret; - ret.resize(vec_size); - BufWriter writer(ret.ptrw(), vec_size); - const uint8_t HEADER[4] = { 'G', 'M', 'S', 'L' }; - writer.write(*(uint32_t *)HEADER); - writer.write(SHADER_BINARY_VERSION); - bin_data.serialize(writer); - ret.resize(writer.get_pos()); - - return ret; -} - void RenderingDeviceDriverMetal::shader_cache_free_entry(const SHA256Digest &key) { if (ShaderCacheEntry **pentry = _shader_cache.getptr(key); pentry != nullptr) { ShaderCacheEntry *entry = *pentry; @@ -2445,115 +1103,146 @@ void RenderingDeviceDriverMetal::shader_cache_free_entry(const SHA256Digest &key } } -RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_bytecode(const Vector &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name, const Vector &p_immutable_samplers) { - r_shader_desc = {}; // Driver-agnostic. +API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) +static BindingInfo from_binding_info_data(const RenderingShaderContainerMetal::BindingInfoData &p_data) { + BindingInfo bi; + bi.dataType = static_cast(p_data.data_type); + bi.index = p_data.index; + bi.access = static_cast(p_data.access); + bi.usage = static_cast(p_data.usage); + bi.textureType = static_cast(p_data.texture_type); + bi.imageFormat = p_data.image_format; + bi.arrayLength = p_data.array_length; + bi.isMultisampled = p_data.is_multisampled; + return bi; +} - const uint8_t *binptr = p_shader_binary.ptr(); - uint32_t binsize = p_shader_binary.size(); +RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_container(const Ref &p_shader_container, const Vector &p_immutable_samplers) { + Ref shader_container = p_shader_container; + using RSCM = RenderingShaderContainerMetal; - BufReader reader(binptr, binsize); - uint8_t header[4]; - reader.read((uint32_t &)header); - ERR_FAIL_COND_V_MSG(memcmp(header, "GMSL", 4) != 0, ShaderID(), "Invalid header"); - uint32_t version = 0; - reader.read(version); - ERR_FAIL_COND_V_MSG(version != SHADER_BINARY_VERSION, ShaderID(), "Invalid shader binary version"); - - ShaderBinaryData binary_data; - binary_data.deserialize(reader); - switch (reader.status) { - case BufReader::Status::OK: - break; - case BufReader::Status::BAD_COMPRESSION: - ERR_FAIL_V_MSG(ShaderID(), "Invalid compressed data"); - case BufReader::Status::SHORT_BUFFER: - ERR_FAIL_V_MSG(ShaderID(), "Unexpected end of buffer"); - } + CharString shader_name = shader_container->shader_name; + RSCM::HeaderData &mtl_reflection_data = shader_container->mtl_reflection_data; + Vector &shaders = shader_container->shaders; + Vector &mtl_shaders = shader_container->mtl_shaders; // We need to regenerate the shader if the cache is moved to an incompatible device. - ERR_FAIL_COND_V_MSG(device_properties->features.argument_buffers_tier < MTLArgumentBuffersTier2 && binary_data.uses_argument_buffers(), - ShaderID(), + ERR_FAIL_COND_V_MSG(device_properties->features.argument_buffers_tier < MTLArgumentBuffersTier2 && mtl_reflection_data.uses_argument_buffers(), + RDD::ShaderID(), "Shader was generated with argument buffers, but device has limited support"); MTLCompileOptions *options = [MTLCompileOptions new]; - options.languageVersion = binary_data.get_msl_version(); - HashMap libraries; + uint32_t major = mtl_reflection_data.msl_version / 10000; + uint32_t minor = (mtl_reflection_data.msl_version / 100) % 100; + options.languageVersion = MTLLanguageVersion((major << 0x10) + minor); + HashMap libraries; - r_name = String(binary_data.shader_name.ptr()); + bool is_compute = false; + Vector decompressed_code; + for (uint32_t shader_index = 0; shader_index < shaders.size(); shader_index++) { + const RenderingShaderContainer::Shader &shader = shaders[shader_index]; + const RSCM::StageData &shader_data = mtl_shaders[shader_index]; - for (ShaderStageData &shader_data : binary_data.stages) { - r_shader_desc.stages.push_back(shader_data.stage); + if (shader.shader_stage == RD::ShaderStage::SHADER_STAGE_COMPUTE) { + is_compute = true; + } - SHA256Digest key = SHA256Digest(shader_data.source.ptr(), shader_data.source.length()); - - if (ShaderCacheEntry **p = _shader_cache.getptr(key); p != nullptr) { - libraries[shader_data.stage] = (*p)->library; + if (ShaderCacheEntry **p = _shader_cache.getptr(shader_data.hash); p != nullptr) { + libraries[shader.shader_stage] = (*p)->library; continue; } - NSString *source = [[NSString alloc] initWithBytes:(void *)shader_data.source.ptr() - length:shader_data.source.length() + if (shader.code_decompressed_size > 0) { + decompressed_code.resize(shader.code_decompressed_size); + bool decompressed = shader_container->decompress_code(shader.code_compressed_bytes.ptr(), shader.code_compressed_bytes.size(), shader.code_compression_flags, decompressed_code.ptrw(), decompressed_code.size()); + ERR_FAIL_COND_V_MSG(!decompressed, RDD::ShaderID(), vformat("Failed to decompress code on shader stage %s.", String(RDD::SHADER_STAGE_NAMES[shader.shader_stage]))); + } else { + decompressed_code = shader.code_compressed_bytes; + } + + ShaderCacheEntry *cd = memnew(ShaderCacheEntry(*this, shader_data.hash)); + cd->name = shader_name; + cd->stage = shader.shader_stage; + + NSString *source = [[NSString alloc] initWithBytes:(void *)decompressed_code.ptr() + length:shader_data.source_size encoding:NSUTF8StringEncoding]; - ShaderCacheEntry *cd = memnew(ShaderCacheEntry(*this, key)); - cd->name = binary_data.shader_name; - cd->stage = shader_data.stage; - options.preserveInvariance = shader_data.is_position_invariant; -#if defined(VISIONOS_ENABLED) - options.mathMode = MTLMathModeFast; -#else - options.fastMathEnabled = YES; + MDLibrary *library = nil; + if (shader_data.library_size > 0) { + dispatch_data_t binary = dispatch_data_create(decompressed_code.ptr() + shader_data.source_size, shader_data.library_size, dispatch_get_main_queue(), DISPATCH_DATA_DESTRUCTOR_DEFAULT); + library = [MDLibrary newLibraryWithCacheEntry:cd + device:device +#if DEV_ENABLED + source:source #endif - MDLibrary *library = [MDLibrary newLibraryWithCacheEntry:cd - device:device - source:source - options:options - strategy:_shader_load_strategy]; - _shader_cache[key] = cd; - libraries[shader_data.stage] = library; + data:binary]; + } else { + options.preserveInvariance = shader_data.is_position_invariant; +#if defined(VISIONOS_ENABLED) + options.mathMode = MTLMathModeFast; +#else + options.fastMathEnabled = YES; +#endif + library = [MDLibrary newLibraryWithCacheEntry:cd + device:device + source:source + options:options + strategy:_shader_load_strategy]; + } + + _shader_cache[shader_data.hash] = cd; + libraries[shader.shader_stage] = library; } - Vector uniform_sets; - uniform_sets.resize(binary_data.uniforms.size()); + ShaderReflection refl = shader_container->get_shader_reflection(); + RSCM::MetalShaderReflection mtl_refl = shader_container->get_metal_shader_reflection(); - r_shader_desc.uniform_sets.resize(binary_data.uniforms.size()); + Vector uniform_sets; + uint32_t uniform_sets_count = mtl_refl.uniform_sets.size(); + uniform_sets.resize(uniform_sets_count); // Create sets. - for (UniformSetData &uniform_set : binary_data.uniforms) { - UniformSet &set = uniform_sets.write[uniform_set.index]; - set.uniforms.resize(uniform_set.uniforms.size()); + for (uint32_t i = 0; i < uniform_sets_count; i++) { + UniformSet &set = uniform_sets.write[i]; + const Vector &refl_set = refl.uniform_sets.ptr()[i]; + const Vector &mtl_set = mtl_refl.uniform_sets.ptr()[i]; + uint32_t set_size = mtl_set.size(); + set.uniforms.resize(set_size); - Vector &uset = r_shader_desc.uniform_sets.write[uniform_set.index]; - uset.resize(uniform_set.uniforms.size()); + LocalVector::Iterator iter = set.uniforms.begin(); + for (uint32_t j = 0; j < set_size; j++) { + const ShaderUniform &uniform = refl_set.ptr()[j]; + const RSCM::UniformData &bind = mtl_set.ptr()[j]; - for (uint32_t i = 0; i < uniform_set.uniforms.size(); i++) { - UniformData &uniform = uniform_set.uniforms[i]; - - ShaderUniform su; - su.type = uniform.type; - su.writable = uniform.writable; - su.length = uniform.length; - su.binding = uniform.binding; - su.stages = (ShaderStage)(uint8_t)uniform.stages; - uset.write[i] = su; - - UniformInfo &ui = set.uniforms[i]; + UniformInfo &ui = *iter; + ++iter; ui.binding = uniform.binding; - ui.active_stages = uniform.active_stages; - for (KeyValue &kv : uniform.bindings) { - ui.bindings.insert(kv.key, kv.value); + ui.active_stages = static_cast(bind.active_stages); + + for (const RSCM::BindingInfoData &info : bind.bindings) { + if (info.shader_stage == UINT32_MAX) { + continue; + } + BindingInfo bi = from_binding_info_data(info); + ui.bindings.insert((RDC::ShaderStage)info.shader_stage, bi); } - for (KeyValue &kv : uniform.bindings_secondary) { - ui.bindings_secondary.insert(kv.key, kv.value); + for (const RSCM::BindingInfoData &info : bind.bindings_secondary) { + if (info.shader_stage == UINT32_MAX) { + continue; + } + BindingInfo bi = from_binding_info_data(info); + ui.bindings_secondary.insert((RDC::ShaderStage)info.shader_stage, bi); } } } - for (UniformSetData &uniform_set : binary_data.uniforms) { - UniformSet &set = uniform_sets.write[uniform_set.index]; + + for (uint32_t i = 0; i < uniform_sets_count; i++) { + UniformSet &set = uniform_sets.write[i]; // Make encoders. - for (ShaderStageData const &stage_data : binary_data.stages) { - ShaderStage stage = stage_data.stage; + for (RenderingShaderContainer::Shader const &shader : shaders) { + RD::ShaderStage stage = shader.shader_stage; NSMutableArray *descriptors = [NSMutableArray new]; for (UniformInfo const &uniform : set.uniforms) { @@ -2591,78 +1280,56 @@ RDD::ShaderID RenderingDeviceDriverMetal::shader_create_from_bytecode(const Vect } } - r_shader_desc.specialization_constants.resize(binary_data.constants.size()); - for (uint32_t i = 0; i < binary_data.constants.size(); i++) { - SpecializationConstantData &c = binary_data.constants[i]; - - ShaderSpecializationConstant sc; - sc.type = c.type; - sc.constant_id = c.constant_id; - sc.int_value = c.int_value; - sc.stages = (ShaderStage)(uint8_t)c.stages; - r_shader_desc.specialization_constants.write[i] = sc; - } - MDShader *shader = nullptr; - if (binary_data.is_compute()) { - MDComputeShader *cs = new MDComputeShader( - binary_data.shader_name, - uniform_sets, - binary_data.uses_argument_buffers(), - libraries[ShaderStage::SHADER_STAGE_COMPUTE]); + if (is_compute) { + const RSCM::StageData &stage_data = mtl_shaders[0]; - uint32_t *binding = binary_data.push_constant.msl_binding.getptr(SHADER_STAGE_COMPUTE); - if (binding) { - cs->push_constants.size = binary_data.push_constant.size; - cs->push_constants.binding = *binding; + MDComputeShader *cs = new MDComputeShader( + shader_name, + uniform_sets, + mtl_reflection_data.uses_argument_buffers(), + libraries[RD::ShaderStage::SHADER_STAGE_COMPUTE]); + + if (stage_data.push_constant_binding != UINT32_MAX) { + cs->push_constants.size = refl.push_constant_size; + cs->push_constants.binding = stage_data.push_constant_binding; } - cs->local = MTLSizeMake(binary_data.compute_local_size.x, binary_data.compute_local_size.y, binary_data.compute_local_size.z); -#if DEV_ENABLED - cs->kernel_source = binary_data.stages[0].source; -#endif + cs->local = MTLSizeMake(refl.compute_local_size[0], refl.compute_local_size[1], refl.compute_local_size[2]); shader = cs; } else { MDRenderShader *rs = new MDRenderShader( - binary_data.shader_name, + shader_name, uniform_sets, - binary_data.needs_view_mask_buffer(), - binary_data.uses_argument_buffers(), - libraries[ShaderStage::SHADER_STAGE_VERTEX], - libraries[ShaderStage::SHADER_STAGE_FRAGMENT]); + mtl_reflection_data.needs_view_mask_buffer(), + mtl_reflection_data.uses_argument_buffers(), + libraries[RD::ShaderStage::SHADER_STAGE_VERTEX], + libraries[RD::ShaderStage::SHADER_STAGE_FRAGMENT]); - uint32_t *vert_binding = binary_data.push_constant.msl_binding.getptr(SHADER_STAGE_VERTEX); - if (vert_binding) { - rs->push_constants.vert.size = binary_data.push_constant.size; - rs->push_constants.vert.binding = *vert_binding; - } - uint32_t *frag_binding = binary_data.push_constant.msl_binding.getptr(SHADER_STAGE_FRAGMENT); - if (frag_binding) { - rs->push_constants.frag.size = binary_data.push_constant.size; - rs->push_constants.frag.binding = *frag_binding; - } - -#if DEV_ENABLED - for (ShaderStageData &stage_data : binary_data.stages) { - if (stage_data.stage == ShaderStage::SHADER_STAGE_VERTEX) { - rs->vert_source = stage_data.source; - } else if (stage_data.stage == ShaderStage::SHADER_STAGE_FRAGMENT) { - rs->frag_source = stage_data.source; + for (uint32_t j = 0; j < shaders.size(); j++) { + const RSCM::StageData &stage_data = mtl_shaders[j]; + switch (shaders[j].shader_stage) { + case RD::ShaderStage::SHADER_STAGE_VERTEX: { + if (stage_data.push_constant_binding != UINT32_MAX) { + rs->push_constants.vert.size = refl.push_constant_size; + rs->push_constants.vert.binding = stage_data.push_constant_binding; + } + } break; + case RD::ShaderStage::SHADER_STAGE_FRAGMENT: { + if (stage_data.push_constant_binding != UINT32_MAX) { + rs->push_constants.frag.size = refl.push_constant_size; + rs->push_constants.frag.binding = stage_data.push_constant_binding; + } + } break; + default: { + ERR_FAIL_V_MSG(RDD::ShaderID(), "Invalid shader stage"); + } break; } } -#endif shader = rs; } - r_shader_desc.vertex_input_mask = binary_data.vertex_input_mask; - r_shader_desc.fragment_output_mask = binary_data.fragment_output_mask; - r_shader_desc.is_compute = binary_data.is_compute(); - r_shader_desc.compute_local_size[0] = binary_data.compute_local_size.x; - r_shader_desc.compute_local_size[1] = binary_data.compute_local_size.y; - r_shader_desc.compute_local_size[2] = binary_data.compute_local_size.z; - r_shader_desc.push_constant_size = binary_data.push_constant.size; - - return ShaderID(shader); + return RDD::ShaderID(shader); } void RenderingDeviceDriverMetal::shader_free(ShaderID p_shader) { @@ -4086,7 +2753,7 @@ const RDD::FragmentDensityMapCapabilities &RenderingDeviceDriverMetal::get_fragm } String RenderingDeviceDriverMetal::get_api_version() const { - return vformat("%d.%d", version_major, version_minor); + return vformat("%d.%d", capabilities.version_major, capabilities.version_minor); } String RenderingDeviceDriverMetal::get_pipeline_cache_uuid() const { @@ -4134,6 +2801,18 @@ RenderingDeviceDriverMetal::~RenderingDeviceDriverMetal() { for (KeyValue &kv : _shader_cache) { memdelete(kv.value); } + + if (shader_container_format != nullptr) { + memdelete(shader_container_format); + } + + if (pixel_formats != nullptr) { + memdelete(pixel_formats); + } + + if (device_properties != nullptr) { + memdelete(device_properties); + } } #pragma mark - Initialization @@ -4153,16 +2832,69 @@ Error RenderingDeviceDriverMetal::_create_device() { return OK; } -Error RenderingDeviceDriverMetal::_check_capabilities() { - MTLCompileOptions *options = [MTLCompileOptions new]; - version_major = (options.languageVersion >> 0x10) & 0xff; - version_minor = (options.languageVersion >> 0x00) & 0xff; - +void RenderingDeviceDriverMetal::_check_capabilities() { capabilities.device_family = DEVICE_METAL; - capabilities.version_major = version_major; - capabilities.version_minor = version_minor; + capabilities.version_major = device_properties->features.mslVersionMajor; + capabilities.version_minor = device_properties->features.mslVersionMinor; +} - return OK; +API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) +static MetalDeviceProfile device_profile_from_properties(MetalDeviceProperties *p_device_properties) { + using DP = MetalDeviceProfile; + MetalDeviceProfile res; +#if TARGET_OS_OSX + res.platform = DP::Platform::macOS; + res.features = { + .mslVersionMajor = p_device_properties->features.mslVersionMajor, + .mslVersionMinor = p_device_properties->features.mslVersionMinor, + .argument_buffers_tier = DP::ArgumentBuffersTier::Tier2, + .simdPermute = true + }; +#else + res.platform = DP::Platform::iOS; + res.features = { + .mslVersionMajor = p_device_properties->features.mslVersionMajor, + .mslVersionMinor = p_device_properties->features.mslVersionMinor, + .argument_buffers_tier = p_device_properties->features.argument_buffers_tier == MTLArgumentBuffersTier1 ? DP::ArgumentBuffersTier::Tier1 : DP::ArgumentBuffersTier::Tier2, + .simdPermute = p_device_properties->features.simdPermute, + }; +#endif + // highestFamily will only be set to an Apple GPU family + switch (p_device_properties->features.highestFamily) { + case MTLGPUFamilyApple1: + res.gpu = DP::GPU::Apple1; + break; + case MTLGPUFamilyApple2: + res.gpu = DP::GPU::Apple2; + break; + case MTLGPUFamilyApple3: + res.gpu = DP::GPU::Apple3; + break; + case MTLGPUFamilyApple4: + res.gpu = DP::GPU::Apple4; + break; + case MTLGPUFamilyApple5: + res.gpu = DP::GPU::Apple5; + break; + case MTLGPUFamilyApple6: + res.gpu = DP::GPU::Apple6; + break; + case MTLGPUFamilyApple7: + res.gpu = DP::GPU::Apple7; + break; + case MTLGPUFamilyApple8: + res.gpu = DP::GPU::Apple8; + break; + case MTLGPUFamilyApple9: + res.gpu = DP::GPU::Apple9; + break; + default: { + // Programming error if the default case is hit. + CRASH_NOW_MSG("Unsupported GPU family"); + } break; + } + + return res; } Error RenderingDeviceDriverMetal::initialize(uint32_t p_device_index, uint32_t p_frame_count) { @@ -4170,13 +2902,15 @@ Error RenderingDeviceDriverMetal::initialize(uint32_t p_device_index, uint32_t p Error err = _create_device(); ERR_FAIL_COND_V(err, ERR_CANT_CREATE); - err = _check_capabilities(); - ERR_FAIL_COND_V(err, ERR_CANT_CREATE); + device_properties = memnew(MetalDeviceProperties(device)); + device_profile = device_profile_from_properties(device_properties); + shader_container_format = memnew(RenderingShaderContainerFormatMetal(&device_profile)); + + _check_capabilities(); // Set the pipeline cache ID based on the Metal version. pipeline_cache_id = "metal-driver-" + get_api_version(); - device_properties = memnew(MetalDeviceProperties(device)); pixel_formats = memnew(PixelFormats(device, device_properties->features)); if (device_properties->features.layeredRendering) { multiview_capabilities.is_supported = true; @@ -4210,3 +2944,7 @@ Error RenderingDeviceDriverMetal::initialize(uint32_t p_device_index, uint32_t p return OK; } + +const RenderingShaderContainerFormat &RenderingDeviceDriverMetal::get_shader_container_format() const { + return *shader_container_format; +} diff --git a/drivers/metal/rendering_shader_container_metal.h b/drivers/metal/rendering_shader_container_metal.h new file mode 100644 index 00000000000..cafe0fb8e4c --- /dev/null +++ b/drivers/metal/rendering_shader_container_metal.h @@ -0,0 +1,265 @@ +/**************************************************************************/ +/* rendering_shader_container_metal.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#import "sha256_digest.h" + +#import "servers/rendering/rendering_device_driver.h" +#import "servers/rendering/rendering_shader_container.h" + +constexpr uint32_t R32UI_ALIGNMENT_CONSTANT_ID = 65535; +/// Metal buffer index for the view mask when rendering multi-view. +const uint32_t VIEW_MASK_BUFFER_INDEX = 24; + +class RenderingShaderContainerFormatMetal; + +/// @brief A minimal structure that defines a device profile for Metal. +/// +/// This structure is used by the `RenderingShaderContainerMetal` class to +/// determine options for compiling SPIR-V to Metal source. It currently only +/// contains the minimum properties required to transform shaders from SPIR-V to Metal +/// and potentially compile to a `.metallib`. +struct MetalDeviceProfile { + enum class Platform : uint32_t { + macOS = 0, + iOS = 1, + }; + + /// @brief The GPU family. + enum class GPU : uint32_t { + Apple1, + Apple2, + Apple3, + Apple4, + Apple5, + Apple6, + Apple7, + Apple8, + Apple9, + }; + + enum class ArgumentBuffersTier : uint32_t { + Tier1 = 0, + Tier2 = 1, + }; + + struct Features { + uint32_t mslVersionMajor = 0; + uint32_t mslVersionMinor = 0; + ArgumentBuffersTier argument_buffers_tier = ArgumentBuffersTier::Tier1; + bool simdPermute = false; + }; + + Platform platform = Platform::macOS; + GPU gpu = GPU::Apple4; + Features features; + + static const MetalDeviceProfile *get_profile(Platform p_platform, GPU p_gpu); + + MetalDeviceProfile() = default; + +private: + static Mutex profiles_lock; ///< Mutex to protect access to the profiles map. + static HashMap profiles; +}; + +class RenderingShaderContainerMetal : public RenderingShaderContainer { + GDSOFTCLASS(RenderingShaderContainerMetal, RenderingShaderContainer); + +public: + struct HeaderData { + enum Flags : uint32_t { + NONE = 0, + NEEDS_VIEW_MASK_BUFFER = 1 << 0, + USES_ARGUMENT_BUFFERS = 1 << 1, + }; + + /// The base profile that was used to generate this shader. + MetalDeviceProfile profile; + + /// The Metal language version specified when compiling SPIR-V to MSL. + /// Format is major * 10000 + minor * 100 + patch. + uint32_t msl_version = UINT32_MAX; + uint32_t flags = NONE; + + /// @brief Returns `true` if the shader is compiled with multi-view support. + bool needs_view_mask_buffer() const { + return flags & NEEDS_VIEW_MASK_BUFFER; + } + + void set_needs_view_mask_buffer(bool p_value) { + if (p_value) { + flags |= NEEDS_VIEW_MASK_BUFFER; + } else { + flags &= ~NEEDS_VIEW_MASK_BUFFER; + } + } + + /// @brief Returns `true` if the shader was compiled with argument buffer support. + bool uses_argument_buffers() const { + return flags & USES_ARGUMENT_BUFFERS; + } + + void set_uses_argument_buffers(bool p_value) { + if (p_value) { + flags |= USES_ARGUMENT_BUFFERS; + } else { + flags &= ~USES_ARGUMENT_BUFFERS; + } + } + }; + + struct StageData { + uint32_t vertex_input_binding_mask = 0; + uint32_t is_position_invariant = 0; ///< true if the position output is invariant + uint32_t supports_fast_math = 0; + SHA256Digest hash; ///< SHA 256 hash of the shader code + uint32_t source_size = 0; ///< size of the source code in the returned bytes + uint32_t library_size = 0; ///< size of the compiled library in the returned bytes, 0 if it is not compiled + uint32_t push_constant_binding = UINT32_MAX; ///< Metal binding slot for the push constant data + }; + + struct BindingInfoData { + uint32_t shader_stage = UINT32_MAX; ///< The shader stage this binding is used in, or UINT32_MAX if not used. + uint32_t data_type = 0; // MTLDataTypeNone + uint32_t index = 0; + uint32_t access = 0; // MTLBindingAccessReadOnly + uint32_t usage = 0; // MTLResourceUsage (none) + uint32_t texture_type = 2; // MTLTextureType2D + uint32_t image_format = 0; + uint32_t array_length = 0; + uint32_t is_multisampled = 0; + }; + + struct UniformData { + /// Specifies the index into the `bindings` array for the shader stage. + /// + /// For example, a vertex and fragment shader use slots 0 and 1 of the bindings and bindings_secondary arrays. + static constexpr uint32_t STAGE_INDEX[RenderingDeviceCommons::SHADER_STAGE_MAX] = { + 0, // SHADER_STAGE_VERTEX + 1, // SHADER_STAGE_FRAGMENT + 0, // SHADER_STAGE_TESSELATION_CONTROL + 1, // SHADER_STAGE_TESSELATION_EVALUATION + 0, // SHADER_STAGE_COMPUTE + }; + + /// Specifies the stages the uniform data is + /// used by the Metal shader. + uint32_t active_stages = 0; + /// The primary binding information for the uniform data. + /// + /// A maximum of two stages is expected for any given pipeline, such as a vertex and fragment, so + /// the array size is fixed to 2. + BindingInfoData bindings[2]; + /// The secondary binding information for the uniform data. + /// + /// This is typically a sampler for an image-sampler uniform + BindingInfoData bindings_secondary[2]; + + _FORCE_INLINE_ constexpr uint32_t get_index_for_stage(RenderingDeviceCommons::ShaderStage p_stage) const { + return STAGE_INDEX[p_stage]; + } + + _FORCE_INLINE_ BindingInfoData &get_binding_for_stage(RenderingDeviceCommons::ShaderStage p_stage) { + BindingInfoData &info = bindings[get_index_for_stage(p_stage)]; + DEV_ASSERT(info.shader_stage == UINT32_MAX || info.shader_stage == p_stage); // make sure this uniform isn't used in the other stage + info.shader_stage = p_stage; + return info; + } + + _FORCE_INLINE_ BindingInfoData &get_secondary_binding_for_stage(RenderingDeviceCommons::ShaderStage p_stage) { + BindingInfoData &info = bindings_secondary[get_index_for_stage(p_stage)]; + DEV_ASSERT(info.shader_stage == UINT32_MAX || info.shader_stage == p_stage); // make sure this uniform isn't used in the other stage + info.shader_stage = p_stage; + return info; + } + }; + + struct SpecializationData { + uint32_t used_stages = 0; + }; + + HeaderData mtl_reflection_data; // compliment to reflection_data + Vector mtl_shaders; // compliment to shaders + +private: + const MetalDeviceProfile *device_profile = nullptr; + bool export_mode = false; + + Vector mtl_reflection_binding_set_uniforms_data; // compliment to reflection_binding_set_uniforms_data + Vector mtl_reflection_specialization_data; // compliment to reflection_specialization_data + + Error compile_metal_source(const char *p_source, const StageData &p_stage_data, Vector &r_binary_data); + +public: + static constexpr uint32_t FORMAT_VERSION = 1; + + void set_export_mode(bool p_export_mode) { export_mode = p_export_mode; } + void set_device_profile(const MetalDeviceProfile *p_device_profile) { device_profile = p_device_profile; } + + struct MetalShaderReflection { + Vector> uniform_sets; + Vector specialization_constants; + }; + + MetalShaderReflection get_metal_shader_reflection() const; + +protected: + virtual uint32_t _from_bytes_reflection_extra_data(const uint8_t *p_bytes) override; + virtual uint32_t _from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes) override; + virtual uint32_t _from_bytes_reflection_binding_uniform_extra_data(const uint8_t *p_bytes, uint32_t p_index) override; + virtual uint32_t _from_bytes_reflection_specialization_extra_data_start(const uint8_t *p_bytes) override; + virtual uint32_t _from_bytes_reflection_specialization_extra_data(const uint8_t *p_bytes, uint32_t p_index) override; + virtual uint32_t _from_bytes_shader_extra_data_start(const uint8_t *p_bytes) override; + virtual uint32_t _from_bytes_shader_extra_data(const uint8_t *p_bytes, uint32_t p_index) override; + + virtual uint32_t _to_bytes_reflection_extra_data(uint8_t *p_bytes) const override; + virtual uint32_t _to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const override; + virtual uint32_t _to_bytes_reflection_specialization_extra_data(uint8_t *p_bytes, uint32_t p_index) const override; + virtual uint32_t _to_bytes_shader_extra_data(uint8_t *p_bytes, uint32_t p_index) const override; + + virtual uint32_t _format() const override; + virtual uint32_t _format_version() const override; + virtual bool _set_code_from_spirv(const Vector &p_spirv) override; +}; + +class RenderingShaderContainerFormatMetal : public RenderingShaderContainerFormat { + bool export_mode = false; + + const MetalDeviceProfile *device_profile = nullptr; + +public: + virtual Ref create_container() const override; + virtual ShaderLanguageVersion get_shader_language_version() const override; + virtual ShaderSpirvVersion get_shader_spirv_version() const override; + RenderingShaderContainerFormatMetal(const MetalDeviceProfile *p_device_profile, bool p_export = false); + virtual ~RenderingShaderContainerFormatMetal() = default; +}; diff --git a/drivers/metal/rendering_shader_container_metal.mm b/drivers/metal/rendering_shader_container_metal.mm new file mode 100644 index 00000000000..c2e4518a061 --- /dev/null +++ b/drivers/metal/rendering_shader_container_metal.mm @@ -0,0 +1,699 @@ +/**************************************************************************/ +/* rendering_shader_container_metal.mm */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "rendering_shader_container_metal.h" + +#include "servers/rendering/rendering_device.h" + +#import "core/io/marshalls.h" + +#import +#import +#import +#import + +Mutex MetalDeviceProfile::profiles_lock; +HashMap MetalDeviceProfile::profiles; + +const MetalDeviceProfile *MetalDeviceProfile::get_profile(MetalDeviceProfile::Platform p_platform, MetalDeviceProfile::GPU p_gpu) { + DEV_ASSERT(p_platform == Platform::macOS || p_platform == Platform::iOS); + + MutexLock lock(profiles_lock); + + uint32_t key = (uint32_t)p_platform << 16 | (uint32_t)p_gpu; + if (MetalDeviceProfile *profile = profiles.getptr(key)) { + return profile; + } + + MetalDeviceProfile res; + res.platform = p_platform; + res.gpu = p_gpu; + if (p_platform == Platform::macOS) { + res.features.mslVersionMajor = 3; + res.features.mslVersionMinor = 2; + res.features.argument_buffers_tier = ArgumentBuffersTier::Tier2; + res.features.simdPermute = true; + } else if (p_platform == Platform::iOS) { + switch (p_gpu) { + case GPU::Apple1: + case GPU::Apple2: + case GPU::Apple3: + case GPU::Apple4: + case GPU::Apple5: { + res.features.simdPermute = false; + res.features.argument_buffers_tier = ArgumentBuffersTier::Tier1; + } break; + case GPU::Apple6: + case GPU::Apple7: + case GPU::Apple8: + case GPU::Apple9: { + res.features.argument_buffers_tier = ArgumentBuffersTier::Tier2; + res.features.simdPermute = true; + } break; + } + res.features.mslVersionMajor = 3; + res.features.mslVersionMinor = 2; + } + + return &profiles.insert(key, res)->value; +} + +Error RenderingShaderContainerMetal::compile_metal_source(const char *p_source, const StageData &p_stage_data, Vector &r_binary_data) { + String name(shader_name.ptr()); + if (name.contains_char(':')) { + name = name.replace_char(':', '_'); + } + Error r_error; + Ref source_file = FileAccess::create_temp(FileAccess::ModeFlags::READ_WRITE, + name + "_" + itos(p_stage_data.hash.short_sha()), + "metal", false, &r_error); + ERR_FAIL_COND_V_MSG(r_error != OK, r_error, "Unable to create temporary source file."); + if (!source_file->store_buffer((const uint8_t *)p_source, strlen(p_source))) { + ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Unable to write temporary source file"); + } + source_file->flush(); + Ref result_file = FileAccess::create_temp(FileAccess::ModeFlags::READ_WRITE, + name + "_" + itos(p_stage_data.hash.short_sha()), + "metallib", false, &r_error); + + ERR_FAIL_COND_V_MSG(r_error != OK, r_error, "Unable to create temporary target file"); + + String sdk; + switch (device_profile->platform) { + case MetalDeviceProfile::Platform::macOS: + sdk = "macosx"; + break; + case MetalDeviceProfile::Platform::iOS: + sdk = "iphoneos"; + break; + } + + // Build the metallib binary. + { + List args{ "-sdk", sdk, "metal", "-O3" }; + if (p_stage_data.is_position_invariant) { + args.push_back("-fpreserve-invariance"); + } + args.push_back("-fmetal-math-mode=fast"); + args.push_back(source_file->get_path_absolute()); + args.push_back("-o"); + args.push_back(result_file->get_path_absolute()); + String r_pipe; + int exit_code; + Error err = OS::get_singleton()->execute("/usr/bin/xcrun", args, &r_pipe, &exit_code, true); + if (!r_pipe.is_empty()) { + print_line(r_pipe); + } + if (err != OK) { + ERR_PRINT(vformat("Metal compiler returned error code: %d", err)); + } + + if (exit_code != 0) { + ERR_PRINT(vformat("Metal compiler exited with error code: %d", exit_code)); + } + int len = result_file->get_length(); + ERR_FAIL_COND_V_MSG(len == 0, ERR_CANT_CREATE, "Metal compiler created empty library"); + } + + // Strip the source from the binary. + { + List args{ "-sdk", sdk, "metal-dsymutil", "--remove-source", result_file->get_path_absolute() }; + String r_pipe; + int exit_code; + Error err = OS::get_singleton()->execute("/usr/bin/xcrun", args, &r_pipe, &exit_code, true); + if (!r_pipe.is_empty()) { + print_line(r_pipe); + } + if (err != OK) { + ERR_PRINT(vformat("metal-dsymutil tool returned error code: %d", err)); + } + + if (exit_code != 0) { + ERR_PRINT(vformat("metal-dsymutil Compiler exited with error code: %d", exit_code)); + } + int len = result_file->get_length(); + ERR_FAIL_COND_V_MSG(len == 0, ERR_CANT_CREATE, "metal-dsymutil tool created empty library"); + } + + r_binary_data = result_file->get_buffer(result_file->get_length()); + + return OK; +} + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunguarded-availability" + +bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vector &p_spirv) { + using namespace spirv_cross; + using spirv_cross::CompilerMSL; + using spirv_cross::Resource; + + // initialize Metal-specific reflection data + shaders.resize(p_spirv.size()); + mtl_shaders.resize(p_spirv.size()); + mtl_reflection_binding_set_uniforms_data.resize(reflection_binding_set_uniforms_data.size()); + mtl_reflection_specialization_data.resize(reflection_specialization_data.size()); + + mtl_reflection_data.set_needs_view_mask_buffer(reflection_data.has_multiview); + + // set_indexes will contain the starting offsets of each descriptor set in the binding set uniforms data + // including the last one, which is the size of reflection_binding_set_uniforms_count. + LocalVector set_indexes; + uint32_t set_indexes_size = reflection_binding_set_uniforms_count.size() + 1; + { + // calculate the starting offsets of each descriptor set in the binding set uniforms data + uint32_t size = reflection_binding_set_uniforms_count.size(); + set_indexes.resize(set_indexes_size); + uint32_t offset = 0; + for (uint32_t i = 0; i < size; i++) { + set_indexes[i] = offset; + offset += reflection_binding_set_uniforms_count.get(i); + } + set_indexes[set_indexes_size - 1] = offset; + } + CompilerMSL::Options msl_options{}; + msl_options.set_msl_version(device_profile->features.mslVersionMajor, device_profile->features.mslVersionMinor); + mtl_reflection_data.msl_version = msl_options.msl_version; + msl_options.platform = device_profile->platform == MetalDeviceProfile::Platform::macOS ? CompilerMSL::Options::macOS : CompilerMSL::Options::iOS; + + if (device_profile->platform == MetalDeviceProfile::Platform::iOS) { + msl_options.ios_use_simdgroup_functions = device_profile->features.simdPermute; + msl_options.ios_support_base_vertex_instance = true; + } + + bool disable_argument_buffers = false; + if (String v = OS::get_singleton()->get_environment(U"GODOT_DISABLE_ARGUMENT_BUFFERS"); v == U"1") { + disable_argument_buffers = true; + } + + if (device_profile->features.argument_buffers_tier >= MetalDeviceProfile::ArgumentBuffersTier::Tier2 && !disable_argument_buffers) { + msl_options.argument_buffers_tier = CompilerMSL::Options::ArgumentBuffersTier::Tier2; + msl_options.argument_buffers = true; + mtl_reflection_data.set_uses_argument_buffers(true); + } else { + msl_options.argument_buffers_tier = CompilerMSL::Options::ArgumentBuffersTier::Tier1; + // Tier 1 argument buffers don't support writable textures, so we disable them completely. + msl_options.argument_buffers = false; + mtl_reflection_data.set_uses_argument_buffers(false); + } + msl_options.force_active_argument_buffer_resources = true; + // We can't use this, as we have to add the descriptor sets via compiler.add_msl_resource_binding. + // msl_options.pad_argument_buffer_resources = true; + msl_options.texture_buffer_native = true; // Enable texture buffer support. + msl_options.use_framebuffer_fetch_subpasses = false; + msl_options.pad_fragment_output_components = true; + msl_options.r32ui_alignment_constant_id = R32UI_ALIGNMENT_CONSTANT_ID; + msl_options.agx_manual_cube_grad_fixup = true; + if (reflection_data.has_multiview) { + msl_options.multiview = true; + msl_options.multiview_layered_rendering = true; + msl_options.view_mask_buffer_index = VIEW_MASK_BUFFER_INDEX; + } + + CompilerGLSL::Options options{}; + options.vertex.flip_vert_y = true; +#if DEV_ENABLED + options.emit_line_directives = true; +#endif + + for (uint32_t i = 0; i < p_spirv.size(); i++) { + StageData &stage_data = mtl_shaders.write[i]; + RD::ShaderStageSPIRVData const &v = p_spirv[i]; + RD::ShaderStage stage = v.shader_stage; + char const *stage_name = RD::SHADER_STAGE_NAMES[stage]; + uint32_t const *const ir = reinterpret_cast(v.spirv.ptr()); + size_t word_count = v.spirv.size() / sizeof(uint32_t); + Parser parser(ir, word_count); + try { + parser.parse(); + } catch (CompilerError &e) { + ERR_FAIL_V_MSG(false, "Failed to parse IR at stage " + String(RD::SHADER_STAGE_NAMES[stage]) + ": " + e.what()); + } + + CompilerMSL compiler(std::move(parser.get_parsed_ir())); + compiler.set_msl_options(msl_options); + compiler.set_common_options(options); + + std::unordered_set active = compiler.get_active_interface_variables(); + ShaderResources resources = compiler.get_shader_resources(); + + std::string source; + try { + source = compiler.compile(); + } catch (CompilerError &e) { + ERR_FAIL_V_MSG(false, "Failed to compile stage " + String(RD::SHADER_STAGE_NAMES[stage]) + ": " + e.what()); + } + + ERR_FAIL_COND_V_MSG(compiler.get_entry_points_and_stages().size() != 1, false, "Expected a single entry point and stage."); + + SmallVector entry_pts_stages = compiler.get_entry_points_and_stages(); + EntryPoint &entry_point_stage = entry_pts_stages.front(); + SPIREntryPoint &entry_point = compiler.get_entry_point(entry_point_stage.name, entry_point_stage.execution_model); + + // Process specialization constants. + if (!compiler.get_specialization_constants().empty()) { + uint32_t size = reflection_specialization_data.size(); + for (SpecializationConstant const &constant : compiler.get_specialization_constants()) { + uint32_t j = 0; + while (j < size) { + const ReflectionSpecializationData &res = reflection_specialization_data.ptr()[j]; + if (res.constant_id == constant.constant_id) { + mtl_reflection_specialization_data.ptrw()[j].used_stages |= 1 << stage; + // emulate labeled for loop and continue + goto outer_continue; + } + ++j; + } + if (j == size) { + WARN_PRINT(String(stage_name) + ": unable to find constant_id: " + itos(constant.constant_id)); + } + outer_continue:; + } + } + + // Process bindings. + uint32_t uniform_sets_size = reflection_binding_set_uniforms_count.size(); + using BT = SPIRType::BaseType; + + // Always clearer than a boolean. + enum class Writable { + No, + Maybe, + }; + + // Returns a std::optional containing the value of the + // decoration, if it exists. + auto get_decoration = [&compiler](spirv_cross::ID id, spv::Decoration decoration) { + uint32_t res = -1; + if (compiler.has_decoration(id, decoration)) { + res = compiler.get_decoration(id, decoration); + } + return res; + }; + + auto descriptor_bindings = [&compiler, &active, this, &set_indexes, uniform_sets_size, stage, &get_decoration](SmallVector &p_resources, Writable p_writable) { + for (Resource const &res : p_resources) { + uint32_t dset = get_decoration(res.id, spv::DecorationDescriptorSet); + uint32_t dbin = get_decoration(res.id, spv::DecorationBinding); + UniformData *found = nullptr; + if (dset != (uint32_t)-1 && dbin != (uint32_t)-1 && dset < uniform_sets_size) { + uint32_t begin = set_indexes[dset]; + uint32_t end = set_indexes[dset + 1]; + for (uint32_t j = begin; j < end; j++) { + const ReflectionBindingData &ref_bind = reflection_binding_set_uniforms_data[j]; + if (dbin == ref_bind.binding) { + found = &mtl_reflection_binding_set_uniforms_data.write[j]; + break; + } + } + } + + ERR_FAIL_NULL_V_MSG(found, ERR_CANT_CREATE, "UniformData not found"); + + bool is_active = active.find(res.id) != active.end(); + if (is_active) { + found->active_stages |= 1 << stage; + } + + BindingInfoData &primary = found->get_binding_for_stage(stage); + + SPIRType const &a_type = compiler.get_type(res.type_id); + BT basetype = a_type.basetype; + + switch (basetype) { + case BT::Struct: { + primary.data_type = MTLDataTypePointer; + } break; + + case BT::Image: + case BT::SampledImage: { + primary.data_type = MTLDataTypeTexture; + } break; + + case BT::Sampler: { + primary.data_type = MTLDataTypeSampler; + primary.array_length = 1; + for (uint32_t const &a : a_type.array) { + primary.array_length *= a; + } + } break; + + default: { + ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Unexpected BaseType"); + } break; + } + + // Find array length of image. + if (basetype == BT::Image || basetype == BT::SampledImage) { + primary.array_length = 1; + for (uint32_t const &a : a_type.array) { + primary.array_length *= a; + } + primary.is_multisampled = a_type.image.ms; + + SPIRType::ImageType const &image = a_type.image; + primary.image_format = image.format; + + switch (image.dim) { + case spv::Dim1D: { + if (image.arrayed) { + primary.texture_type = MTLTextureType1DArray; + } else { + primary.texture_type = MTLTextureType1D; + } + } break; + case spv::DimSubpassData: { + [[fallthrough]]; + } + case spv::Dim2D: { + if (image.arrayed && image.ms) { + primary.texture_type = MTLTextureType2DMultisampleArray; + } else if (image.arrayed) { + primary.texture_type = MTLTextureType2DArray; + } else if (image.ms) { + primary.texture_type = MTLTextureType2DMultisample; + } else { + primary.texture_type = MTLTextureType2D; + } + } break; + case spv::Dim3D: { + primary.texture_type = MTLTextureType3D; + } break; + case spv::DimCube: { + if (image.arrayed) { + primary.texture_type = MTLTextureTypeCube; + } + } break; + case spv::DimRect: { + } break; + case spv::DimBuffer: { + // VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER + primary.texture_type = MTLTextureTypeTextureBuffer; + } break; + case spv::DimMax: { + // Add all enumerations to silence the compiler warning + // and generate future warnings, should a new one be added. + } break; + } + } + + // Update writable. + if (p_writable == Writable::Maybe) { + if (basetype == BT::Struct) { + Bitset flags = compiler.get_buffer_block_flags(res.id); + if (!flags.get(spv::DecorationNonWritable)) { + if (flags.get(spv::DecorationNonReadable)) { + primary.access = MTLBindingAccessWriteOnly; + } else { + primary.access = MTLBindingAccessReadWrite; + } + } + } else if (basetype == BT::Image) { + switch (a_type.image.access) { + case spv::AccessQualifierWriteOnly: + primary.access = MTLBindingAccessWriteOnly; + break; + case spv::AccessQualifierReadWrite: + primary.access = MTLBindingAccessReadWrite; + break; + case spv::AccessQualifierReadOnly: + break; + case spv::AccessQualifierMax: + [[fallthrough]]; + default: + if (!compiler.has_decoration(res.id, spv::DecorationNonWritable)) { + if (compiler.has_decoration(res.id, spv::DecorationNonReadable)) { + primary.access = MTLBindingAccessWriteOnly; + } else { + primary.access = MTLBindingAccessReadWrite; + } + } + break; + } + } + } + + switch (primary.access) { + case MTLBindingAccessReadOnly: + primary.usage = MTLResourceUsageRead; + break; + case MTLBindingAccessWriteOnly: + primary.usage = MTLResourceUsageWrite; + break; + case MTLBindingAccessReadWrite: + primary.usage = MTLResourceUsageRead | MTLResourceUsageWrite; + break; + } + + primary.index = compiler.get_automatic_msl_resource_binding(res.id); + + // A sampled image contains two bindings, the primary + // is to the image, and the secondary is to the associated sampler. + if (basetype == BT::SampledImage) { + uint32_t binding = compiler.get_automatic_msl_resource_binding_secondary(res.id); + if (binding != (uint32_t)-1) { + BindingInfoData &secondary = found->get_secondary_binding_for_stage(stage); + secondary.data_type = MTLDataTypeSampler; + secondary.index = binding; + secondary.access = MTLBindingAccessReadOnly; + } + } + + // An image may have a secondary binding if it is used + // for atomic operations. + if (basetype == BT::Image) { + uint32_t binding = compiler.get_automatic_msl_resource_binding_secondary(res.id); + if (binding != (uint32_t)-1) { + BindingInfoData &secondary = found->get_secondary_binding_for_stage(stage); + secondary.data_type = MTLDataTypePointer; + secondary.index = binding; + secondary.access = MTLBindingAccessReadWrite; + } + } + } + return Error::OK; + }; + + if (!resources.uniform_buffers.empty()) { + Error err = descriptor_bindings(resources.uniform_buffers, Writable::No); + ERR_FAIL_COND_V(err != OK, false); + } + if (!resources.storage_buffers.empty()) { + Error err = descriptor_bindings(resources.storage_buffers, Writable::Maybe); + ERR_FAIL_COND_V(err != OK, false); + } + if (!resources.storage_images.empty()) { + Error err = descriptor_bindings(resources.storage_images, Writable::Maybe); + ERR_FAIL_COND_V(err != OK, false); + } + if (!resources.sampled_images.empty()) { + Error err = descriptor_bindings(resources.sampled_images, Writable::No); + ERR_FAIL_COND_V(err != OK, false); + } + if (!resources.separate_images.empty()) { + Error err = descriptor_bindings(resources.separate_images, Writable::No); + ERR_FAIL_COND_V(err != OK, false); + } + if (!resources.separate_samplers.empty()) { + Error err = descriptor_bindings(resources.separate_samplers, Writable::No); + ERR_FAIL_COND_V(err != OK, false); + } + if (!resources.subpass_inputs.empty()) { + Error err = descriptor_bindings(resources.subpass_inputs, Writable::No); + ERR_FAIL_COND_V(err != OK, false); + } + + if (!resources.push_constant_buffers.empty()) { + for (Resource const &res : resources.push_constant_buffers) { + uint32_t binding = compiler.get_automatic_msl_resource_binding(res.id); + if (binding != (uint32_t)-1) { + stage_data.push_constant_binding = binding; + } + } + } + + ERR_FAIL_COND_V_MSG(!resources.atomic_counters.empty(), false, "Atomic counters not supported"); + ERR_FAIL_COND_V_MSG(!resources.acceleration_structures.empty(), false, "Acceleration structures not supported"); + ERR_FAIL_COND_V_MSG(!resources.shader_record_buffers.empty(), false, "Shader record buffers not supported"); + + if (!resources.stage_inputs.empty()) { + for (Resource const &res : resources.stage_inputs) { + uint32_t binding = compiler.get_automatic_msl_resource_binding(res.id); + if (binding != (uint32_t)-1) { + stage_data.vertex_input_binding_mask |= 1 << binding; + } + } + } + + stage_data.is_position_invariant = compiler.is_position_invariant(); + stage_data.supports_fast_math = !entry_point.flags.get(spv::ExecutionModeSignedZeroInfNanPreserve); + stage_data.hash = SHA256Digest(source.c_str(), source.length()); + stage_data.source_size = source.length(); + ::Vector binary_data; + binary_data.resize(stage_data.source_size); + memcpy(binary_data.ptrw(), source.c_str(), stage_data.source_size); + + if (export_mode) { + // Try to compile the Metal source code + ::Vector library_data; + Error compile_err = compile_metal_source(source.c_str(), stage_data, library_data); + if (compile_err == OK) { + stage_data.library_size = library_data.size(); + binary_data.resize(stage_data.source_size + stage_data.library_size); + memcpy(binary_data.ptrw() + stage_data.source_size, library_data.ptr(), stage_data.library_size); + } + } + + uint32_t binary_data_size = binary_data.size(); + Shader &shader = shaders.write[i]; + shader.shader_stage = stage; + shader.code_decompressed_size = binary_data_size; + shader.code_compressed_bytes.resize(binary_data_size); + + uint32_t compressed_size = 0; + bool compressed = compress_code(binary_data.ptr(), binary_data_size, shader.code_compressed_bytes.ptrw(), &compressed_size, &shader.code_compression_flags); + ERR_FAIL_COND_V_MSG(!compressed, false, vformat("Failed to compress native code to native for SPIR-V #%d.", i)); + + shader.code_compressed_bytes.resize(compressed_size); + } + + return true; +} + +#pragma clang diagnostic pop + +uint32_t RenderingShaderContainerMetal::_to_bytes_reflection_extra_data(uint8_t *p_bytes) const { + if (p_bytes != nullptr) { + *(HeaderData *)p_bytes = mtl_reflection_data; + } + return sizeof(HeaderData); +} + +uint32_t RenderingShaderContainerMetal::_to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const { + if (p_bytes != nullptr) { + *(UniformData *)p_bytes = mtl_reflection_binding_set_uniforms_data[p_index]; + } + return sizeof(UniformData); +} + +uint32_t RenderingShaderContainerMetal::_to_bytes_reflection_specialization_extra_data(uint8_t *p_bytes, uint32_t p_index) const { + if (p_bytes != nullptr) { + *(SpecializationData *)p_bytes = mtl_reflection_specialization_data[p_index]; + } + return sizeof(SpecializationData); +} + +uint32_t RenderingShaderContainerMetal::_to_bytes_shader_extra_data(uint8_t *p_bytes, uint32_t p_index) const { + if (p_bytes != nullptr) { + *(StageData *)p_bytes = mtl_shaders[p_index]; + } + return sizeof(StageData); +} + +uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_extra_data(const uint8_t *p_bytes) { + mtl_reflection_data = *(HeaderData *)p_bytes; + return sizeof(HeaderData); +} + +uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes) { + mtl_reflection_binding_set_uniforms_data.resize(reflection_binding_set_uniforms_data.size()); + return 0; +} + +uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_binding_uniform_extra_data(const uint8_t *p_bytes, uint32_t p_index) { + mtl_reflection_binding_set_uniforms_data.ptrw()[p_index] = *(UniformData *)p_bytes; + return sizeof(UniformData); +} + +uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_specialization_extra_data_start(const uint8_t *p_bytes) { + mtl_reflection_specialization_data.resize(reflection_specialization_data.size()); + return 0; +} + +uint32_t RenderingShaderContainerMetal::_from_bytes_reflection_specialization_extra_data(const uint8_t *p_bytes, uint32_t p_index) { + mtl_reflection_specialization_data.ptrw()[p_index] = *(SpecializationData *)p_bytes; + return sizeof(SpecializationData); +} + +uint32_t RenderingShaderContainerMetal::_from_bytes_shader_extra_data_start(const uint8_t *p_bytes) { + mtl_shaders.resize(shaders.size()); + return 0; +} + +uint32_t RenderingShaderContainerMetal::_from_bytes_shader_extra_data(const uint8_t *p_bytes, uint32_t p_index) { + mtl_shaders.ptrw()[p_index] = *(StageData *)p_bytes; + return sizeof(StageData); +} + +RenderingShaderContainerMetal::MetalShaderReflection RenderingShaderContainerMetal::get_metal_shader_reflection() const { + MetalShaderReflection res; + + res.specialization_constants = mtl_reflection_specialization_data; + uint32_t uniform_set_count = reflection_binding_set_uniforms_count.size(); + uint32_t start = 0; + res.uniform_sets.resize(uniform_set_count); + for (uint32_t i = 0; i < uniform_set_count; i++) { + Vector &set = res.uniform_sets.ptrw()[i]; + uint32_t count = reflection_binding_set_uniforms_count.get(i); + set.resize(count); + memcpy(set.ptrw(), &mtl_reflection_binding_set_uniforms_data.ptr()[start], count * sizeof(UniformData)); + start += count; + } + + return res; +} + +uint32_t RenderingShaderContainerMetal::_format() const { + return 0x42424242; +} + +uint32_t RenderingShaderContainerMetal::_format_version() const { + return FORMAT_VERSION; +} + +Ref RenderingShaderContainerFormatMetal::create_container() const { + Ref result; + result.instantiate(); + result->set_export_mode(export_mode); + result->set_device_profile(device_profile); + return result; +} + +RenderingDeviceCommons::ShaderLanguageVersion RenderingShaderContainerFormatMetal::get_shader_language_version() const { + return SHADER_LANGUAGE_VULKAN_VERSION_1_1; +} + +RenderingDeviceCommons::ShaderSpirvVersion RenderingShaderContainerFormatMetal::get_shader_spirv_version() const { + return SHADER_SPIRV_VERSION_1_6; +} + +RenderingShaderContainerFormatMetal::RenderingShaderContainerFormatMetal(const MetalDeviceProfile *p_device_profile, bool p_export) : + export_mode(p_export), device_profile(p_device_profile) { +} diff --git a/drivers/metal/sha256_digest.h b/drivers/metal/sha256_digest.h new file mode 100644 index 00000000000..28a7a5733fe --- /dev/null +++ b/drivers/metal/sha256_digest.h @@ -0,0 +1,75 @@ +/**************************************************************************/ +/* sha256_digest.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#import +#import +#import + +#include "core/templates/local_vector.h" + +struct SHA256Digest { + unsigned char data[CC_SHA256_DIGEST_LENGTH]; + + static constexpr size_t serialized_size() { return CC_SHA256_DIGEST_LENGTH; } + + uint32_t hash() const { + uint32_t c = crc32(0, data, CC_SHA256_DIGEST_LENGTH); + return c; + } + + SHA256Digest() { + bzero(data, CC_SHA256_DIGEST_LENGTH); + } + + SHA256Digest(const char *p_hash) { + memcpy(data, p_hash, CC_SHA256_DIGEST_LENGTH); + } + + SHA256Digest(const char *p_data, size_t p_length) { + CC_SHA256(p_data, (CC_LONG)p_length, data); + } + + _FORCE_INLINE_ uint32_t short_sha() const { + return __builtin_bswap32(*(uint32_t *)&data[0]); + } + + LocalVector serialize() const { + LocalVector result; + result.resize(CC_SHA256_DIGEST_LENGTH); + memcpy(result.ptr(), data, CC_SHA256_DIGEST_LENGTH); + return result; + } + + static SHA256Digest deserialize(LocalVector p_ser) { + return SHA256Digest((const char *)p_ser.ptr()); + } +}; diff --git a/drivers/vulkan/rendering_device_driver_vulkan.cpp b/drivers/vulkan/rendering_device_driver_vulkan.cpp index 72b1881aea8..95462acabea 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.cpp +++ b/drivers/vulkan/rendering_device_driver_vulkan.cpp @@ -32,9 +32,12 @@ #include "core/config/project_settings.h" #include "core/io/marshalls.h" -#include "thirdparty/misc/smolv.h" #include "vulkan_hooks.h" +#if RENDERING_SHADER_CONTAINER_VULKAN_SMOLV +#include "thirdparty/misc/smolv.h" +#endif + #if defined(ANDROID_ENABLED) #include "platform/android/java_godot_wrapper.h" #include "platform/android/os_android.h" @@ -3550,260 +3553,34 @@ static VkShaderStageFlagBits RD_STAGE_TO_VK_SHADER_STAGE_BITS[RDD::SHADER_STAGE_ VK_SHADER_STAGE_COMPUTE_BIT, }; -String RenderingDeviceDriverVulkan::shader_get_binary_cache_key() { - return "Vulkan-SV" + uitos(ShaderBinary::VERSION); -} - -Vector RenderingDeviceDriverVulkan::shader_compile_binary_from_spirv(VectorView p_spirv, const String &p_shader_name) { - ShaderReflection shader_refl; - if (_reflect_spirv(p_spirv, shader_refl) != OK) { - return Vector(); - } - - ERR_FAIL_COND_V_MSG((uint32_t)shader_refl.uniform_sets.size() > physical_device_properties.limits.maxBoundDescriptorSets, Vector(), - "Number of uniform sets is larger than what is supported by the hardware (" + itos(physical_device_properties.limits.maxBoundDescriptorSets) + ")."); - - // Collect reflection data into binary data. - ShaderBinary::Data binary_data; - Vector> uniforms; // Set bindings. - Vector specialization_constants; - { - binary_data.vertex_input_mask = shader_refl.vertex_input_mask; - binary_data.fragment_output_mask = shader_refl.fragment_output_mask; - binary_data.specialization_constants_count = shader_refl.specialization_constants.size(); - binary_data.is_compute = shader_refl.is_compute; - binary_data.compute_local_size[0] = shader_refl.compute_local_size[0]; - binary_data.compute_local_size[1] = shader_refl.compute_local_size[1]; - binary_data.compute_local_size[2] = shader_refl.compute_local_size[2]; - binary_data.set_count = shader_refl.uniform_sets.size(); - binary_data.push_constant_size = shader_refl.push_constant_size; - for (uint32_t i = 0; i < SHADER_STAGE_MAX; i++) { - if (shader_refl.push_constant_stages.has_flag((ShaderStage)(1 << i))) { - binary_data.vk_push_constant_stages_mask |= RD_STAGE_TO_VK_SHADER_STAGE_BITS[i]; - } - } - - for (const Vector &set_refl : shader_refl.uniform_sets) { - Vector set_bindings; - for (const ShaderUniform &uniform_refl : set_refl) { - ShaderBinary::DataBinding binding; - binding.type = (uint32_t)uniform_refl.type; - binding.binding = uniform_refl.binding; - binding.stages = (uint32_t)uniform_refl.stages; - binding.length = uniform_refl.length; - binding.writable = (uint32_t)uniform_refl.writable; - set_bindings.push_back(binding); - } - uniforms.push_back(set_bindings); - } - - for (const ShaderSpecializationConstant &refl_sc : shader_refl.specialization_constants) { - ShaderBinary::SpecializationConstant spec_constant; - spec_constant.type = (uint32_t)refl_sc.type; - spec_constant.constant_id = refl_sc.constant_id; - spec_constant.int_value = refl_sc.int_value; - spec_constant.stage_flags = (uint32_t)refl_sc.stages; - specialization_constants.push_back(spec_constant); +RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_container(const Ref &p_shader_container, const Vector &p_immutable_samplers) { + ShaderReflection shader_refl = p_shader_container->get_shader_reflection(); + ShaderInfo shader_info; + for (uint32_t i = 0; i < SHADER_STAGE_MAX; i++) { + if (shader_refl.push_constant_stages.has_flag((ShaderStage)(1 << i))) { + shader_info.vk_push_constant_stages |= RD_STAGE_TO_VK_SHADER_STAGE_BITS[i]; } } - Vector> compressed_stages; - Vector smolv_size; - Vector zstd_size; // If 0, zstd not used. - - uint32_t stages_binary_size = 0; - - bool strip_debug = false; - - for (uint32_t i = 0; i < p_spirv.size(); i++) { - smolv::ByteArray smolv; - if (!smolv::Encode(p_spirv[i].spirv.ptr(), p_spirv[i].spirv.size(), smolv, strip_debug ? smolv::kEncodeFlagStripDebugInfo : 0)) { - ERR_FAIL_V_MSG(Vector(), "Error compressing shader stage :" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage])); - } else { - smolv_size.push_back(smolv.size()); - { // zstd. - Vector zstd; - zstd.resize(Compression::get_max_compressed_buffer_size(smolv.size(), Compression::MODE_ZSTD)); - int dst_size = Compression::compress(zstd.ptrw(), &smolv[0], smolv.size(), Compression::MODE_ZSTD); - - if (dst_size > 0 && (uint32_t)dst_size < smolv.size()) { - zstd_size.push_back(dst_size); - zstd.resize(dst_size); - compressed_stages.push_back(zstd); - } else { - Vector smv; - smv.resize(smolv.size()); - memcpy(smv.ptrw(), &smolv[0], smolv.size()); - zstd_size.push_back(0); // Not using zstd. - compressed_stages.push_back(smv); - } - } - } - uint32_t s = compressed_stages[i].size(); - stages_binary_size += STEPIFY(s, 4); - } - - binary_data.specialization_constants_count = specialization_constants.size(); - binary_data.set_count = uniforms.size(); - binary_data.stage_count = p_spirv.size(); - - CharString shader_name_utf = p_shader_name.utf8(); - - binary_data.shader_name_len = shader_name_utf.length(); - - uint32_t total_size = sizeof(uint32_t) * 4; // Header + version + pad + main datasize;. - total_size += sizeof(ShaderBinary::Data); - - total_size += STEPIFY(binary_data.shader_name_len, 4); - - for (int i = 0; i < uniforms.size(); i++) { - total_size += sizeof(uint32_t); - total_size += uniforms[i].size() * sizeof(ShaderBinary::DataBinding); - } - - total_size += sizeof(ShaderBinary::SpecializationConstant) * specialization_constants.size(); - - total_size += compressed_stages.size() * sizeof(uint32_t) * 3; // Sizes. - total_size += stages_binary_size; - - Vector ret; - ret.resize(total_size); - { - uint32_t offset = 0; - uint8_t *binptr = ret.ptrw(); - binptr[0] = 'G'; - binptr[1] = 'S'; - binptr[2] = 'B'; - binptr[3] = 'D'; // Godot Shader Binary Data. - offset += 4; - encode_uint32(ShaderBinary::VERSION, binptr + offset); - offset += sizeof(uint32_t); - encode_uint32(sizeof(ShaderBinary::Data), binptr + offset); - offset += sizeof(uint32_t); - encode_uint32(0, binptr + offset); // Pad to align ShaderBinary::Data to 8 bytes. - offset += sizeof(uint32_t); - memcpy(binptr + offset, &binary_data, sizeof(ShaderBinary::Data)); - offset += sizeof(ShaderBinary::Data); - -#define ADVANCE_OFFSET_WITH_ALIGNMENT(m_bytes) \ - { \ - offset += m_bytes; \ - uint32_t padding = STEPIFY(m_bytes, 4) - m_bytes; \ - memset(binptr + offset, 0, padding); /* Avoid garbage data. */ \ - offset += padding; \ - } - - if (binary_data.shader_name_len > 0) { - memcpy(binptr + offset, shader_name_utf.ptr(), binary_data.shader_name_len); - ADVANCE_OFFSET_WITH_ALIGNMENT(binary_data.shader_name_len); - } - - for (int i = 0; i < uniforms.size(); i++) { - int count = uniforms[i].size(); - encode_uint32(count, binptr + offset); - offset += sizeof(uint32_t); - if (count > 0) { - memcpy(binptr + offset, uniforms[i].ptr(), sizeof(ShaderBinary::DataBinding) * count); - offset += sizeof(ShaderBinary::DataBinding) * count; - } - } - - if (specialization_constants.size()) { - memcpy(binptr + offset, specialization_constants.ptr(), sizeof(ShaderBinary::SpecializationConstant) * specialization_constants.size()); - offset += sizeof(ShaderBinary::SpecializationConstant) * specialization_constants.size(); - } - - for (int i = 0; i < compressed_stages.size(); i++) { - encode_uint32(p_spirv[i].shader_stage, binptr + offset); - offset += sizeof(uint32_t); - encode_uint32(smolv_size[i], binptr + offset); - offset += sizeof(uint32_t); - encode_uint32(zstd_size[i], binptr + offset); - offset += sizeof(uint32_t); - memcpy(binptr + offset, compressed_stages[i].ptr(), compressed_stages[i].size()); - ADVANCE_OFFSET_WITH_ALIGNMENT(compressed_stages[i].size()); - } - - DEV_ASSERT(offset == (uint32_t)ret.size()); - } - - return ret; -} - -RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vector &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name, const Vector &p_immutable_samplers) { - r_shader_desc = {}; // Driver-agnostic. - ShaderInfo shader_info; // Driver-specific. - - const uint8_t *binptr = p_shader_binary.ptr(); - uint32_t binsize = p_shader_binary.size(); - - uint32_t read_offset = 0; - - // Consistency check. - ERR_FAIL_COND_V(binsize < sizeof(uint32_t) * 4 + sizeof(ShaderBinary::Data), ShaderID()); - ERR_FAIL_COND_V(binptr[0] != 'G' || binptr[1] != 'S' || binptr[2] != 'B' || binptr[3] != 'D', ShaderID()); - - uint32_t bin_version = decode_uint32(binptr + 4); - ERR_FAIL_COND_V(bin_version != ShaderBinary::VERSION, ShaderID()); - - uint32_t bin_data_size = decode_uint32(binptr + 8); - - // 16, not 12, to skip alignment padding. - const ShaderBinary::Data &binary_data = *(reinterpret_cast(binptr + 16)); - - r_shader_desc.push_constant_size = binary_data.push_constant_size; - shader_info.vk_push_constant_stages = binary_data.vk_push_constant_stages_mask; - - r_shader_desc.vertex_input_mask = binary_data.vertex_input_mask; - r_shader_desc.fragment_output_mask = binary_data.fragment_output_mask; - - r_shader_desc.is_compute = binary_data.is_compute; - r_shader_desc.compute_local_size[0] = binary_data.compute_local_size[0]; - r_shader_desc.compute_local_size[1] = binary_data.compute_local_size[1]; - r_shader_desc.compute_local_size[2] = binary_data.compute_local_size[2]; - - read_offset += sizeof(uint32_t) * 4 + bin_data_size; - - if (binary_data.shader_name_len) { - r_name.clear(); - r_name.append_utf8((const char *)(binptr + read_offset), binary_data.shader_name_len); - read_offset += STEPIFY(binary_data.shader_name_len, 4); - } - + // Set bindings. Vector> vk_set_bindings; - - r_shader_desc.uniform_sets.resize(binary_data.set_count); - vk_set_bindings.resize(binary_data.set_count); - - for (uint32_t i = 0; i < binary_data.set_count; i++) { - ERR_FAIL_COND_V(read_offset + sizeof(uint32_t) >= binsize, ShaderID()); - uint32_t set_count = decode_uint32(binptr + read_offset); - read_offset += sizeof(uint32_t); - const ShaderBinary::DataBinding *set_ptr = reinterpret_cast(binptr + read_offset); - uint32_t set_size = set_count * sizeof(ShaderBinary::DataBinding); - ERR_FAIL_COND_V(read_offset + set_size >= binsize, ShaderID()); - - for (uint32_t j = 0; j < set_count; j++) { - ShaderUniform info; - info.type = UniformType(set_ptr[j].type); - info.writable = set_ptr[j].writable; - info.length = set_ptr[j].length; - info.binding = set_ptr[j].binding; - info.stages = set_ptr[j].stages; - + vk_set_bindings.resize(shader_refl.uniform_sets.size()); + for (uint32_t i = 0; i < shader_refl.uniform_sets.size(); i++) { + for (uint32_t j = 0; j < shader_refl.uniform_sets[i].size(); j++) { + const ShaderUniform &uniform = shader_refl.uniform_sets[i][j]; VkDescriptorSetLayoutBinding layout_binding = {}; - layout_binding.binding = set_ptr[j].binding; + layout_binding.binding = uniform.binding; layout_binding.descriptorCount = 1; for (uint32_t k = 0; k < SHADER_STAGE_MAX; k++) { - if ((set_ptr[j].stages & (1 << k))) { + if ((uniform.stages.has_flag(ShaderStage(1U << k)))) { layout_binding.stageFlags |= RD_STAGE_TO_VK_SHADER_STAGE_BITS[k]; } } - switch (info.type) { + switch (uniform.type) { case UNIFORM_TYPE_SAMPLER: { layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; - layout_binding.descriptorCount = set_ptr[j].length; + layout_binding.descriptorCount = uniform.length; // Immutable samplers: here they get set in the layoutbinding, given that they will not be changed later. int immutable_bind_index = -1; if (immutable_samplers_enabled && p_immutable_samplers.size() > 0) { @@ -3820,19 +3597,19 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec } break; case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: { layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; - layout_binding.descriptorCount = set_ptr[j].length; + layout_binding.descriptorCount = uniform.length; } break; case UNIFORM_TYPE_TEXTURE: { layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE; - layout_binding.descriptorCount = set_ptr[j].length; + layout_binding.descriptorCount = uniform.length; } break; case UNIFORM_TYPE_IMAGE: { layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - layout_binding.descriptorCount = set_ptr[j].length; + layout_binding.descriptorCount = uniform.length; } break; case UNIFORM_TYPE_TEXTURE_BUFFER: { layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER; - layout_binding.descriptorCount = set_ptr[j].length; + layout_binding.descriptorCount = uniform.length; } break; case UNIFORM_TYPE_IMAGE_BUFFER: { layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; @@ -3851,104 +3628,72 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec } } - r_shader_desc.uniform_sets.write[i].push_back(info); vk_set_bindings.write[i].push_back(layout_binding); } - - read_offset += set_size; } - ERR_FAIL_COND_V(read_offset + binary_data.specialization_constants_count * sizeof(ShaderBinary::SpecializationConstant) >= binsize, ShaderID()); - - r_shader_desc.specialization_constants.resize(binary_data.specialization_constants_count); - for (uint32_t i = 0; i < binary_data.specialization_constants_count; i++) { - const ShaderBinary::SpecializationConstant &src_sc = *(reinterpret_cast(binptr + read_offset)); - ShaderSpecializationConstant sc; - sc.type = PipelineSpecializationConstantType(src_sc.type); - sc.constant_id = src_sc.constant_id; - sc.int_value = src_sc.int_value; - sc.stages = src_sc.stage_flags; - r_shader_desc.specialization_constants.write[i] = sc; - - read_offset += sizeof(ShaderBinary::SpecializationConstant); - } - - Vector> stages_spirv; - stages_spirv.resize(binary_data.stage_count); - r_shader_desc.stages.resize(binary_data.stage_count); - - for (uint32_t i = 0; i < binary_data.stage_count; i++) { - ERR_FAIL_COND_V(read_offset + sizeof(uint32_t) * 3 >= binsize, ShaderID()); - - uint32_t stage = decode_uint32(binptr + read_offset); - read_offset += sizeof(uint32_t); - uint32_t smolv_size = decode_uint32(binptr + read_offset); - read_offset += sizeof(uint32_t); - uint32_t zstd_size = decode_uint32(binptr + read_offset); - read_offset += sizeof(uint32_t); - - uint32_t buf_size = (zstd_size > 0) ? zstd_size : smolv_size; - - Vector smolv; - const uint8_t *src_smolv = nullptr; - - if (zstd_size > 0) { - // Decompress to smolv. - smolv.resize(smolv_size); - int dec_smolv_size = Compression::decompress(smolv.ptrw(), smolv.size(), binptr + read_offset, zstd_size, Compression::MODE_ZSTD); - ERR_FAIL_COND_V(dec_smolv_size != (int32_t)smolv_size, ShaderID()); - src_smolv = smolv.ptr(); - } else { - src_smolv = binptr + read_offset; - } - - Vector &spirv = stages_spirv.ptrw()[i]; - uint32_t spirv_size = smolv::GetDecodedBufferSize(src_smolv, smolv_size); - spirv.resize(spirv_size); - if (!smolv::Decode(src_smolv, smolv_size, spirv.ptrw(), spirv_size)) { - ERR_FAIL_V_MSG(ShaderID(), "Malformed smolv input uncompressing shader stage:" + String(SHADER_STAGE_NAMES[stage])); - } - - r_shader_desc.stages.set(i, ShaderStage(stage)); - - buf_size = STEPIFY(buf_size, 4); - read_offset += buf_size; - ERR_FAIL_COND_V(read_offset > binsize, ShaderID()); - } - - ERR_FAIL_COND_V(read_offset != binsize, ShaderID()); - // Modules. - + VkResult res; String error_text; + Vector decompressed_code; + Vector decoded_spirv; + VkShaderModule vk_module; + for (int i = 0; i < shader_refl.stages_vector.size(); i++) { + const RenderingShaderContainer::Shader &shader = p_shader_container->shaders[i]; +#if RENDERING_SHADER_CONTAINER_VULKAN_COMPRESSION + bool requires_decompression = (shader.code_decompressed_size > 0); + if (requires_decompression) { + decompressed_code.resize(shader.code_decompressed_size); + bool decompressed = p_shader_container->decompress_code(shader.code_compressed_bytes.ptr(), shader.code_compressed_bytes.size(), shader.code_compression_flags, decompressed_code.ptrw(), decompressed_code.size()); + if (!decompressed) { + error_text = vformat("Failed to decompress code on shader stage %s.", String(SHADER_STAGE_NAMES[shader_refl.stages_vector[i]])); + break; + } + } +#else + bool requires_decompression = false; +#endif + + const uint8_t *smolv_input = requires_decompression ? decompressed_code.ptr() : shader.code_compressed_bytes.ptr(); + uint32_t smolv_input_size = requires_decompression ? decompressed_code.size() : shader.code_compressed_bytes.size(); +#if RENDERING_SHADER_CONTAINER_VULKAN_SMOLV + decoded_spirv.resize(smolv::GetDecodedBufferSize(smolv_input, smolv_input_size)); + if (decoded_spirv.is_empty()) { + error_text = vformat("Malformed smolv input on shader stage %s.", String(SHADER_STAGE_NAMES[shader_refl.stages_vector[i]])); + break; + } + + if (!smolv::Decode(smolv_input, smolv_input_size, decoded_spirv.ptrw(), decoded_spirv.size())) { + error_text = vformat("Malformed smolv input on shader stage %s.", String(SHADER_STAGE_NAMES[shader_refl.stages_vector[i]])); + break; + } +#else + decoded_spirv.resize(smolv_input_size); + memcpy(decoded_spirv.ptrw(), smolv_input, decoded_spirv.size()); +#endif - for (int i = 0; i < r_shader_desc.stages.size(); i++) { VkShaderModuleCreateInfo shader_module_create_info = {}; shader_module_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; - shader_module_create_info.codeSize = stages_spirv[i].size(); - shader_module_create_info.pCode = (const uint32_t *)stages_spirv[i].ptr(); + shader_module_create_info.codeSize = decoded_spirv.size(); + shader_module_create_info.pCode = (const uint32_t *)(decoded_spirv.ptr()); - VkShaderModule vk_module = VK_NULL_HANDLE; - VkResult res = vkCreateShaderModule(vk_device, &shader_module_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SHADER_MODULE), &vk_module); - if (res) { - error_text = "Error (" + itos(res) + ") creating shader module for stage: " + String(SHADER_STAGE_NAMES[r_shader_desc.stages[i]]); + res = vkCreateShaderModule(vk_device, &shader_module_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SHADER_MODULE), &vk_module); + if (res != VK_SUCCESS) { + error_text = vformat("Error (%d) creating module for shader stage %s.", res, String(SHADER_STAGE_NAMES[shader_refl.stages_vector[i]])); break; } VkPipelineShaderStageCreateInfo create_info = {}; create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; - create_info.stage = RD_STAGE_TO_VK_SHADER_STAGE_BITS[r_shader_desc.stages[i]]; + create_info.stage = RD_STAGE_TO_VK_SHADER_STAGE_BITS[shader_refl.stages_vector[i]]; create_info.module = vk_module; create_info.pName = "main"; - shader_info.vk_stages_create_info.push_back(create_info); } // Descriptor sets. - if (error_text.is_empty()) { - DEV_ASSERT((uint32_t)vk_set_bindings.size() == binary_data.set_count); - for (uint32_t i = 0; i < binary_data.set_count; i++) { + for (uint32_t i = 0; i < shader_refl.uniform_sets.size(); i++) { // Empty ones are fine if they were not used according to spec (binding count will be 0). VkDescriptorSetLayoutCreateInfo layout_create_info = {}; layout_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; @@ -3956,9 +3701,9 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec layout_create_info.pBindings = vk_set_bindings[i].ptr(); VkDescriptorSetLayout layout = VK_NULL_HANDLE; - VkResult res = vkCreateDescriptorSetLayout(vk_device, &layout_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT), &layout); + res = vkCreateDescriptorSetLayout(vk_device, &layout_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT), &layout); if (res) { - error_text = "Error (" + itos(res) + ") creating descriptor set layout for set " + itos(i); + error_text = vformat("Error (%d) creating descriptor set layout for set %d.", res, i); break; } @@ -3968,24 +3713,23 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec if (error_text.is_empty()) { // Pipeline layout. - VkPipelineLayoutCreateInfo pipeline_layout_create_info = {}; pipeline_layout_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; - pipeline_layout_create_info.setLayoutCount = binary_data.set_count; + pipeline_layout_create_info.setLayoutCount = shader_info.vk_descriptor_set_layouts.size(); pipeline_layout_create_info.pSetLayouts = shader_info.vk_descriptor_set_layouts.ptr(); - if (binary_data.push_constant_size) { + if (shader_refl.push_constant_size > 0) { VkPushConstantRange *push_constant_range = ALLOCA_SINGLE(VkPushConstantRange); *push_constant_range = {}; - push_constant_range->stageFlags = binary_data.vk_push_constant_stages_mask; - push_constant_range->size = binary_data.push_constant_size; + push_constant_range->stageFlags = shader_info.vk_push_constant_stages; + push_constant_range->size = shader_refl.push_constant_size; pipeline_layout_create_info.pushConstantRangeCount = 1; pipeline_layout_create_info.pPushConstantRanges = push_constant_range; } - VkResult err = vkCreatePipelineLayout(vk_device, &pipeline_layout_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE_LAYOUT), &shader_info.vk_pipeline_layout); - if (err) { - error_text = "Error (" + itos(err) + ") creating pipeline layout."; + res = vkCreatePipelineLayout(vk_device, &pipeline_layout_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE_LAYOUT), &shader_info.vk_pipeline_layout); + if (res != VK_SUCCESS) { + error_text = vformat("Error (%d) creating pipeline layout.", res); } } @@ -3994,7 +3738,7 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec for (uint32_t i = 0; i < shader_info.vk_stages_create_info.size(); i++) { vkDestroyShaderModule(vk_device, shader_info.vk_stages_create_info[i].module, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SHADER_MODULE)); } - for (uint32_t i = 0; i < binary_data.set_count; i++) { + for (uint32_t i = 0; i < shader_info.vk_descriptor_set_layouts.size(); i++) { vkDestroyDescriptorSetLayout(vk_device, shader_info.vk_descriptor_set_layouts[i], VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)); } @@ -4002,7 +3746,6 @@ RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_bytecode(const Vec } // Bookkeep. - ShaderInfo *shader_info_ptr = VersatileResource::allocate(resources_allocator); *shader_info_ptr = shader_info; return ShaderID(shader_info_ptr); @@ -6185,6 +5928,10 @@ const RDD::Capabilities &RenderingDeviceDriverVulkan::get_capabilities() const { return device_capabilities; } +const RenderingShaderContainerFormat &RenderingDeviceDriverVulkan::get_shader_container_format() const { + return shader_container_format; +} + bool RenderingDeviceDriverVulkan::is_composite_alpha_supported(CommandQueueID p_queue) const { if (has_comp_alpha.has((uint64_t)p_queue.id)) { return has_comp_alpha[(uint64_t)p_queue.id]; diff --git a/drivers/vulkan/rendering_device_driver_vulkan.h b/drivers/vulkan/rendering_device_driver_vulkan.h index 07c1b2a0a9b..5800bc70d9c 100644 --- a/drivers/vulkan/rendering_device_driver_vulkan.h +++ b/drivers/vulkan/rendering_device_driver_vulkan.h @@ -33,6 +33,7 @@ #include "core/templates/hash_map.h" #include "core/templates/paged_allocator.h" #include "drivers/vulkan/rendering_context_driver_vulkan.h" +#include "drivers/vulkan/rendering_shader_container_vulkan.h" #include "servers/rendering/rendering_device_driver.h" #ifdef DEBUG_ENABLED @@ -130,6 +131,7 @@ class RenderingDeviceDriverVulkan : public RenderingDeviceDriver { FragmentDensityMapCapabilities fdm_capabilities; ShaderCapabilities shader_capabilities; StorageBufferCapabilities storage_buffer_capabilities; + RenderingShaderContainerFormatVulkan shader_container_format; bool buffer_device_address_support = false; bool pipeline_cache_control_support = false; bool device_fault_support = false; @@ -408,43 +410,6 @@ public: /**** SHADER ****/ /****************/ private: - struct ShaderBinary { - // Version 1: initial. - // Version 2: Added shader name. - // Version 3: Added writable. - // Version 4: 64-bit vertex input mask. - // Version 5: Add 4 bytes padding to align the Data struct after the change in version 4. - static const uint32_t VERSION = 5; - - struct DataBinding { - uint32_t type = 0; - uint32_t binding = 0; - uint32_t stages = 0; - uint32_t length = 0; // Size of arrays (in total elements), or UBOs (in bytes * total elements). - uint32_t writable = 0; - }; - - struct SpecializationConstant { - uint32_t type = 0; - uint32_t constant_id = 0; - uint32_t int_value = 0; - uint32_t stage_flags = 0; - }; - - struct Data { - uint64_t vertex_input_mask = 0; - uint32_t fragment_output_mask = 0; - uint32_t specialization_constants_count = 0; - uint32_t is_compute = 0; - uint32_t compute_local_size[3] = {}; - uint32_t set_count = 0; - uint32_t push_constant_size = 0; - uint32_t vk_push_constant_stages_mask = 0; - uint32_t stage_count = 0; - uint32_t shader_name_len = 0; - }; - }; - struct ShaderInfo { VkShaderStageFlags vk_push_constant_stages = 0; TightLocalVector vk_stages_create_info; @@ -453,9 +418,7 @@ private: }; public: - virtual String shader_get_binary_cache_key() override final; - virtual Vector shader_compile_binary_from_spirv(VectorView p_spirv, const String &p_shader_name) override final; - virtual ShaderID shader_create_from_bytecode(const Vector &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name, const Vector &p_immutable_samplers) override final; + virtual ShaderID shader_create_from_container(const Ref &p_shader_container, const Vector &p_immutable_samplers) override final; virtual void shader_free(ShaderID p_shader) override final; virtual void shader_destroy_modules(ShaderID p_shader) override final; @@ -711,6 +674,7 @@ public: virtual String get_api_version() const override final; virtual String get_pipeline_cache_uuid() const override final; virtual const Capabilities &get_capabilities() const override final; + virtual const RenderingShaderContainerFormat &get_shader_container_format() const override final; virtual bool is_composite_alpha_supported(CommandQueueID p_queue) const override final; diff --git a/drivers/vulkan/rendering_shader_container_vulkan.cpp b/drivers/vulkan/rendering_shader_container_vulkan.cpp new file mode 100644 index 00000000000..7b22213c3b3 --- /dev/null +++ b/drivers/vulkan/rendering_shader_container_vulkan.cpp @@ -0,0 +1,103 @@ +/**************************************************************************/ +/* rendering_shader_container_vulkan.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "rendering_shader_container_vulkan.h" + +#if RENDERING_SHADER_CONTAINER_VULKAN_SMOLV +#include "thirdparty/misc/smolv.h" +#endif + +// RenderingShaderContainerVulkan + +const uint32_t RenderingShaderContainerVulkan::FORMAT_VERSION = 1; + +uint32_t RenderingShaderContainerVulkan::_format() const { + return 0x43565053; +} + +uint32_t RenderingShaderContainerVulkan::_format_version() const { + return FORMAT_VERSION; +} + +bool RenderingShaderContainerVulkan::_set_code_from_spirv(const Vector &p_spirv) { + PackedByteArray code_bytes; + shaders.resize(p_spirv.size()); + for (int64_t i = 0; i < p_spirv.size(); i++) { +#if RENDERING_SHADER_CONTAINER_VULKAN_SMOLV + // Encode into smolv. + smolv::ByteArray smolv_bytes; + bool smolv_encoded = smolv::Encode(p_spirv[i].spirv.ptr(), p_spirv[i].spirv.size(), smolv_bytes, smolv::kEncodeFlagStripDebugInfo); + ERR_FAIL_COND_V_MSG(!smolv_encoded, false, "Failed to compress SPIR-V into smolv."); + + code_bytes.resize(smolv_bytes.size()); + memcpy(code_bytes.ptrw(), smolv_bytes.data(), code_bytes.size()); +#else + code_bytes.resize(p_spirv[i].spirv.size()); + memcpy(code_bytes.ptrw(), p_spirv[i].spirv.ptr(), code_bytes.size()); +#endif + + RenderingShaderContainer::Shader &shader = shaders.ptrw()[i]; +#if RENDERING_SHADER_CONTAINER_VULKAN_COMPRESSION + uint32_t compressed_size = 0; + shader.code_decompressed_size = code_bytes.size(); + shader.code_compressed_bytes.resize(code_bytes.size()); + + bool compressed = compress_code(code_bytes.ptr(), code_bytes.size(), shader.code_compressed_bytes.ptrw(), &compressed_size, &shader.code_compression_flags); + ERR_FAIL_COND_V_MSG(!compressed, false, vformat("Failed to compress native code to native for SPIR-V #%d.", i)); + + shader.code_compressed_bytes.resize(compressed_size); +#else + shader.code_decompressed_size = 0; + shader.code_compression_flags = 0; + shader.code_compressed_bytes = code_bytes; +#endif + shader.shader_stage = p_spirv[i].shader_stage; + } + + return true; +} + +// RenderingShaderContainerFormatVulkan + +Ref RenderingShaderContainerFormatVulkan::create_container() const { + return memnew(RenderingShaderContainerVulkan); +} + +RenderingDeviceCommons::ShaderLanguageVersion RenderingShaderContainerFormatVulkan::get_shader_language_version() const { + return SHADER_LANGUAGE_VULKAN_VERSION_1_1; +} + +RenderingDeviceCommons::ShaderSpirvVersion RenderingShaderContainerFormatVulkan::get_shader_spirv_version() const { + return SHADER_SPIRV_VERSION_1_3; +} + +RenderingShaderContainerFormatVulkan::RenderingShaderContainerFormatVulkan() {} + +RenderingShaderContainerFormatVulkan::~RenderingShaderContainerFormatVulkan() {} diff --git a/drivers/vulkan/rendering_shader_container_vulkan.h b/drivers/vulkan/rendering_shader_container_vulkan.h new file mode 100644 index 00000000000..6d3a06ddc8e --- /dev/null +++ b/drivers/vulkan/rendering_shader_container_vulkan.h @@ -0,0 +1,57 @@ +/**************************************************************************/ +/* rendering_shader_container_vulkan.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#include "servers/rendering/rendering_shader_container.h" + +#define RENDERING_SHADER_CONTAINER_VULKAN_COMPRESSION 1 +#define RENDERING_SHADER_CONTAINER_VULKAN_SMOLV 1 + +class RenderingShaderContainerVulkan : public RenderingShaderContainer { + GDSOFTCLASS(RenderingShaderContainerVulkan, RenderingShaderContainer); + +public: + static const uint32_t FORMAT_VERSION; + +protected: + virtual uint32_t _format() const override; + virtual uint32_t _format_version() const override; + virtual bool _set_code_from_spirv(const Vector &p_spirv) override; +}; + +class RenderingShaderContainerFormatVulkan : public RenderingShaderContainerFormat { +public: + virtual Ref create_container() const override; + virtual ShaderLanguageVersion get_shader_language_version() const override; + virtual ShaderSpirvVersion get_shader_spirv_version() const override; + RenderingShaderContainerFormatVulkan(); + virtual ~RenderingShaderContainerFormatVulkan(); +}; diff --git a/editor/editor_node.cpp b/editor/editor_node.cpp index 330bc9d50f5..075d12ef915 100644 --- a/editor/editor_node.cpp +++ b/editor/editor_node.cpp @@ -155,6 +155,7 @@ #include "editor/plugins/plugin_config_dialog.h" #include "editor/plugins/root_motion_editor_plugin.h" #include "editor/plugins/script_text_editor.h" +#include "editor/plugins/shader_baker_export_plugin.h" #include "editor/plugins/text_editor.h" #include "editor/plugins/version_control_editor_plugin.h" #include "editor/plugins/visual_shader_editor_plugin.h" @@ -167,6 +168,18 @@ #include "editor/themes/editor_theme_manager.h" #include "editor/window_wrapper.h" +#ifdef VULKAN_ENABLED +#include "editor/plugins/shader_baker/shader_baker_export_plugin_platform_vulkan.h" +#endif + +#ifdef D3D12_ENABLED +#include "editor/plugins/shader_baker/shader_baker_export_plugin_platform_d3d12.h" +#endif + +#ifdef METAL_ENABLED +#include "editor/plugins/shader_baker/shader_baker_export_plugin_platform_metal.h" +#endif + #include "modules/modules_enabled.gen.h" // For gdscript, mono. #ifndef PHYSICS_2D_DISABLED @@ -8531,6 +8544,29 @@ EditorNode::EditorNode() { EditorExport::get_singleton()->add_export_plugin(dedicated_server_export_plugin); + Ref shader_baker_export_plugin; + shader_baker_export_plugin.instantiate(); + +#ifdef VULKAN_ENABLED + Ref shader_baker_export_plugin_platform_vulkan; + shader_baker_export_plugin_platform_vulkan.instantiate(); + shader_baker_export_plugin->add_platform(shader_baker_export_plugin_platform_vulkan); +#endif + +#ifdef D3D12_ENABLED + Ref shader_baker_export_plugin_platform_d3d12; + shader_baker_export_plugin_platform_d3d12.instantiate(); + shader_baker_export_plugin->add_platform(shader_baker_export_plugin_platform_d3d12); +#endif + +#ifdef METAL_ENABLED + Ref shader_baker_export_plugin_platform_metal; + shader_baker_export_plugin_platform_metal.instantiate(); + shader_baker_export_plugin->add_platform(shader_baker_export_plugin_platform_metal); +#endif + + EditorExport::get_singleton()->add_export_plugin(shader_baker_export_plugin); + Ref packed_scene_translation_parser_plugin; packed_scene_translation_parser_plugin.instantiate(); EditorTranslationParser::get_singleton()->add_parser(packed_scene_translation_parser_plugin, EditorTranslationParser::STANDARD); diff --git a/editor/export/editor_export_platform.cpp b/editor/export/editor_export_platform.cpp index ab4132444fc..8f877056b68 100644 --- a/editor/export/editor_export_platform.cpp +++ b/editor/export/editor_export_platform.cpp @@ -1200,27 +1200,31 @@ Error EditorExportPlatform::export_project_files(const Ref & } }; - // Always sort by name, to so if for some reason they are re-arranged, it still works. - export_plugins.sort_custom(); - - for (int i = 0; i < export_plugins.size(); i++) { - if (p_so_func) { - for (int j = 0; j < export_plugins[i]->shared_objects.size(); j++) { - err = p_so_func(p_udata, export_plugins[i]->shared_objects[j]); + auto add_shared_objects_and_extra_files_from_export_plugins = [&]() { + for (int i = 0; i < export_plugins.size(); i++) { + if (p_so_func) { + for (int j = 0; j < export_plugins[i]->shared_objects.size(); j++) { + err = p_so_func(p_udata, export_plugins[i]->shared_objects[j]); + if (err != OK) { + return err; + } + } + } + for (int j = 0; j < export_plugins[i]->extra_files.size(); j++) { + err = save_proxy.save_file(p_udata, export_plugins[i]->extra_files[j].path, export_plugins[i]->extra_files[j].data, 0, paths.size(), enc_in_filters, enc_ex_filters, key, seed); if (err != OK) { return err; } } - } - for (int j = 0; j < export_plugins[i]->extra_files.size(); j++) { - err = save_proxy.save_file(p_udata, export_plugins[i]->extra_files[j].path, export_plugins[i]->extra_files[j].data, 0, paths.size(), enc_in_filters, enc_ex_filters, key, seed); - if (err != OK) { - return err; - } + + export_plugins.write[i]->_clear(); } - export_plugins.write[i]->_clear(); - } + return OK; + }; + + // Always sort by name, to so if for some reason they are re-arranged, it still works. + export_plugins.sort_custom(); HashSet features = get_features(p_preset, p_debug); PackedStringArray features_psa; @@ -1252,6 +1256,12 @@ Error EditorExportPlatform::export_project_files(const Ref & } } + // Add any files that might've been defined during the initial steps of the export plugins. + err = add_shared_objects_and_extra_files_from_export_plugins(); + if (err != OK) { + return err; + } + HashMap export_cache; String export_base_path = ProjectSettings::get_singleton()->get_project_data_path().path_join("exported/") + itos(custom_resources_hash); @@ -1283,6 +1293,10 @@ Error EditorExportPlatform::export_project_files(const Ref & } } + for (int i = 0; i < export_plugins.size(); i++) { + export_plugins.write[i]->set_export_base_path(export_base_path); + } + //store everything in the export medium int total = paths.size(); // idx is incremented at the beginning of the paths loop to easily allow @@ -1521,6 +1535,13 @@ Error EditorExportPlatform::export_project_files(const Ref & plugin->_end_customize_scenes(); } } + + // Add any files that might've been defined during the final steps of the export plugins. + err = add_shared_objects_and_extra_files_from_export_plugins(); + if (err != OK) { + return err; + } + //save config! Vector custom_list; diff --git a/editor/export/editor_export_platform_apple_embedded.cpp b/editor/export/editor_export_platform_apple_embedded.cpp index 02e8b86609b..c5637b3da1c 100644 --- a/editor/export/editor_export_platform_apple_embedded.cpp +++ b/editor/export/editor_export_platform_apple_embedded.cpp @@ -51,6 +51,10 @@ void EditorExportPlatformAppleEmbedded::get_preset_features(const Refpush_back("etc2"); r_features->push_back("astc"); + if (p_preset->get("shader_baker/enabled")) { + r_features->push_back("shader_baker"); + } + Vector architectures = _get_preset_architectures(p_preset); for (int i = 0; i < architectures.size(); ++i) { r_features->push_back(architectures[i]); @@ -186,6 +190,13 @@ String EditorExportPlatformAppleEmbedded::get_export_option_warning(const Editor if (access == 0) { return TTR("At least one system boot time access reason should be selected."); } + } else if (p_name == "shader_baker/enabled") { + String export_renderer = GLOBAL_GET("rendering/renderer/rendering_method.mobile"); + if (OS::get_singleton()->get_current_rendering_method() == "gl_compatibility") { + return TTR("\"Shader Baker\" doesn't work with the Compatibility renderer."); + } else if (OS::get_singleton()->get_current_rendering_method() != export_renderer) { + return vformat(TTR("The editor is currently using a different renderer than what the target platform will use. \"Shader Baker\" won't be able to include core shaders. Switch to \"%s\" renderer temporarily to fix this."), export_renderer); + } } } return String(); @@ -297,6 +308,8 @@ void EditorExportPlatformAppleEmbedded::get_export_options(List *r r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "capabilities/performance_a12"), false)); r_options->push_back(ExportOption(PropertyInfo(Variant::PACKED_STRING_ARRAY, "capabilities/additional"), PackedStringArray())); + r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "shader_baker/enabled"), false)); + r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "user_data/accessible_from_files_app"), false)); r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "user_data/accessible_from_itunes_sharing"), false)); diff --git a/editor/export/editor_export_platform_pc.cpp b/editor/export/editor_export_platform_pc.cpp index 15d684cac5e..d3a1b97acde 100644 --- a/editor/export/editor_export_platform_pc.cpp +++ b/editor/export/editor_export_platform_pc.cpp @@ -42,6 +42,9 @@ void EditorExportPlatformPC::get_preset_features(const Ref & r_features->push_back("etc2"); r_features->push_back("astc"); } + if (p_preset->get("shader_baker/enabled")) { + r_features->push_back("shader_baker"); + } // PC platforms only have one architecture per export, since // we export a single executable instead of a bundle. r_features->push_back(p_preset->get("binary_format/architecture")); @@ -58,6 +61,21 @@ void EditorExportPlatformPC::get_export_options(List *r_options) c r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "texture_format/s3tc_bptc"), true)); r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "texture_format/etc2_astc"), false)); + + r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "shader_baker/enabled"), false)); +} + +String EditorExportPlatformPC::get_export_option_warning(const EditorExportPreset *p_preset, const StringName &p_name) const { + if (p_name == "shader_baker/enabled") { + String export_renderer = GLOBAL_GET("rendering/renderer/rendering_method"); + if (OS::get_singleton()->get_current_rendering_method() == "gl_compatibility") { + return TTR("\"Shader Baker\" is not supported when using the Compatibility renderer."); + } else if (OS::get_singleton()->get_current_rendering_method() != export_renderer) { + return vformat(TTR("The editor is currently using a different renderer than what the target platform will use. \"Shader Baker\" won't be able to include core shaders. Switch to the \"%s\" renderer temporarily to fix this."), export_renderer); + } + } + + return String(); } String EditorExportPlatformPC::get_name() const { diff --git a/editor/export/editor_export_platform_pc.h b/editor/export/editor_export_platform_pc.h index d5ed2a78359..5760543241b 100644 --- a/editor/export/editor_export_platform_pc.h +++ b/editor/export/editor_export_platform_pc.h @@ -44,8 +44,8 @@ private: public: virtual void get_preset_features(const Ref &p_preset, List *r_features) const override; - virtual void get_export_options(List *r_options) const override; + virtual String get_export_option_warning(const EditorExportPreset *p_preset, const StringName &p_name) const override; virtual String get_name() const override; virtual String get_os_name() const override; diff --git a/editor/export/editor_export_plugin.cpp b/editor/export/editor_export_plugin.cpp index fbabd7d88d3..2d5e038fc2f 100644 --- a/editor/export/editor_export_plugin.cpp +++ b/editor/export/editor_export_plugin.cpp @@ -33,6 +33,14 @@ #include "core/config/project_settings.h" #include "editor/export/editor_export_platform.h" +void EditorExportPlugin::set_export_base_path(const String &p_export_base_path) { + export_base_path = p_export_base_path; +} + +const String &EditorExportPlugin::get_export_base_path() const { + return export_base_path; +} + void EditorExportPlugin::set_export_preset(const Ref &p_preset) { if (p_preset.is_valid()) { export_preset = p_preset; diff --git a/editor/export/editor_export_plugin.h b/editor/export/editor_export_plugin.h index d721760d729..2b7ec0879a3 100644 --- a/editor/export/editor_export_plugin.h +++ b/editor/export/editor_export_plugin.h @@ -42,6 +42,7 @@ class EditorExportPlugin : public RefCounted { friend class EditorExportPlatform; friend class EditorExportPreset; + String export_base_path; Ref export_preset; Vector shared_objects; @@ -87,6 +88,8 @@ class EditorExportPlugin : public RefCounted { String _has_valid_export_configuration(const Ref &p_export_platform, const Ref &p_preset); protected: + void set_export_base_path(const String &p_export_base_path); + const String &get_export_base_path() const; void set_export_preset(const Ref &p_preset); Ref get_export_preset() const; Ref get_export_platform() const; diff --git a/editor/import/resource_importer_shader_file.cpp b/editor/import/resource_importer_shader_file.cpp index 6cfca20a05b..2faa74fc62b 100644 --- a/editor/import/resource_importer_shader_file.cpp +++ b/editor/import/resource_importer_shader_file.cpp @@ -89,11 +89,6 @@ static String _include_function(const String &p_path, void *userpointer) { } Error ResourceImporterShaderFile::import(ResourceUID::ID p_source_id, const String &p_source_file, const String &p_save_path, const HashMap &p_options, List *r_platform_variants, List *r_gen_files, Variant *r_metadata) { - /* STEP 1, Read shader code */ - ERR_FAIL_COND_V_EDMSG((OS::get_singleton()->get_current_rendering_method() == "gl_compatibility"), ERR_UNAVAILABLE, "Cannot import custom .glsl shaders when using the Compatibility renderer. Please switch to the Forward+ or Mobile renderer to use custom shaders."); - ERR_FAIL_COND_V_EDMSG((OS::get_singleton()->get_current_rendering_method() == "dummy"), ERR_UNAVAILABLE, "Cannot import custom .glsl shaders when using the Dummy renderer. Please switch to the Forward+ or Mobile renderer to use custom shaders."); - ERR_FAIL_COND_V_EDMSG((DisplayServer::get_singleton()->get_name() == "headless"), ERR_UNAVAILABLE, "Cannot import custom .glsl shaders when running in headless mode."); - Error err; Ref file = FileAccess::open(p_source_file, FileAccess::READ, &err); ERR_FAIL_COND_V(err != OK, ERR_CANT_OPEN); diff --git a/editor/plugins/SCsub b/editor/plugins/SCsub index 2d3066c7c9e..8974f5ba737 100644 --- a/editor/plugins/SCsub +++ b/editor/plugins/SCsub @@ -6,4 +6,5 @@ Import("env") env.add_source_files(env.editor_sources, "*.cpp") SConscript("gizmos/SCsub") +SConscript("shader_baker/SCsub") SConscript("tiles/SCsub") diff --git a/editor/plugins/shader_baker/SCsub b/editor/plugins/shader_baker/SCsub new file mode 100644 index 00000000000..764b73760c4 --- /dev/null +++ b/editor/plugins/shader_baker/SCsub @@ -0,0 +1,13 @@ +#!/usr/bin/env python +from misc.utility.scons_hints import * + +Import("env") + +if env["vulkan"]: + env.add_source_files(env.editor_sources, "shader_baker_export_plugin_platform_vulkan.cpp") + +if env["d3d12"]: + env.add_source_files(env.editor_sources, "shader_baker_export_plugin_platform_d3d12.cpp") + +if env["metal"]: + env.add_source_files(env.editor_sources, "shader_baker_export_plugin_platform_metal.cpp") diff --git a/editor/plugins/shader_baker/shader_baker_export_plugin_platform_d3d12.cpp b/editor/plugins/shader_baker/shader_baker_export_plugin_platform_d3d12.cpp new file mode 100644 index 00000000000..690d4bc9dd6 --- /dev/null +++ b/editor/plugins/shader_baker/shader_baker_export_plugin_platform_d3d12.cpp @@ -0,0 +1,57 @@ +/**************************************************************************/ +/* shader_baker_export_plugin_platform_d3d12.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "shader_baker_export_plugin_platform_d3d12.h" + +#include "drivers/d3d12/rendering_shader_container_d3d12.h" + +#include + +RenderingShaderContainerFormat *ShaderBakerExportPluginPlatformD3D12::create_shader_container_format(const Ref &p_platform) { + if (lib_d3d12 == nullptr) { + lib_d3d12 = LoadLibraryW(L"D3D12.dll"); + ERR_FAIL_NULL_V_MSG(lib_d3d12, nullptr, "Unable to load D3D12.dll."); + } + + // Shader Model 6.2 is required to export shaders that have FP16 variants. + RenderingShaderContainerFormatD3D12 *shader_container_format_d3d12 = memnew(RenderingShaderContainerFormatD3D12); + shader_container_format_d3d12->set_lib_d3d12(lib_d3d12); + return shader_container_format_d3d12; +} + +bool ShaderBakerExportPluginPlatformD3D12::matches_driver(const String &p_driver) { + return p_driver == "d3d12"; +} + +ShaderBakerExportPluginPlatformD3D12 ::~ShaderBakerExportPluginPlatformD3D12() { + if (lib_d3d12 != nullptr) { + FreeLibrary((HMODULE)(lib_d3d12)); + } +} diff --git a/editor/plugins/shader_baker/shader_baker_export_plugin_platform_d3d12.h b/editor/plugins/shader_baker/shader_baker_export_plugin_platform_d3d12.h new file mode 100644 index 00000000000..abc5aa81e08 --- /dev/null +++ b/editor/plugins/shader_baker/shader_baker_export_plugin_platform_d3d12.h @@ -0,0 +1,45 @@ +/**************************************************************************/ +/* shader_baker_export_plugin_platform_d3d12.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#include "editor/plugins/shader_baker_export_plugin.h" + +class ShaderBakerExportPluginPlatformD3D12 : public ShaderBakerExportPluginPlatform { + GDCLASS(ShaderBakerExportPluginPlatformD3D12, ShaderBakerExportPluginPlatform); + +private: + void *lib_d3d12 = nullptr; + +public: + virtual RenderingShaderContainerFormat *create_shader_container_format(const Ref &p_platform) override; + virtual bool matches_driver(const String &p_driver) override; + virtual ~ShaderBakerExportPluginPlatformD3D12() override; +}; diff --git a/editor/plugins/shader_baker/shader_baker_export_plugin_platform_metal.cpp b/editor/plugins/shader_baker/shader_baker_export_plugin_platform_metal.cpp new file mode 100644 index 00000000000..13de8879c72 --- /dev/null +++ b/editor/plugins/shader_baker/shader_baker_export_plugin_platform_metal.cpp @@ -0,0 +1,51 @@ +/**************************************************************************/ +/* shader_baker_export_plugin_platform_metal.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "shader_baker_export_plugin_platform_metal.h" + +#include "drivers/metal/rendering_shader_container_metal.h" + +RenderingShaderContainerFormat *ShaderBakerExportPluginPlatformMetal::create_shader_container_format(const Ref &p_platform) { + const String &os_name = p_platform->get_os_name(); + const MetalDeviceProfile *profile; + + if (os_name == U"macOS") { + profile = MetalDeviceProfile::get_profile(MetalDeviceProfile::Platform::macOS, MetalDeviceProfile::GPU::Apple7); + } else if (os_name == U"iOS") { + profile = MetalDeviceProfile::get_profile(MetalDeviceProfile::Platform::iOS, MetalDeviceProfile::GPU::Apple7); + } else { + ERR_FAIL_V_MSG(nullptr, vformat("Unsupported platform: %s", os_name)); + } + return memnew(RenderingShaderContainerFormatMetal(profile, true)); +} + +bool ShaderBakerExportPluginPlatformMetal::matches_driver(const String &p_driver) { + return p_driver == "metal"; +} diff --git a/editor/plugins/shader_baker/shader_baker_export_plugin_platform_metal.h b/editor/plugins/shader_baker/shader_baker_export_plugin_platform_metal.h new file mode 100644 index 00000000000..8d65cbe4dc1 --- /dev/null +++ b/editor/plugins/shader_baker/shader_baker_export_plugin_platform_metal.h @@ -0,0 +1,39 @@ +/**************************************************************************/ +/* shader_baker_export_plugin_platform_metal.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#include "editor/plugins/shader_baker_export_plugin.h" + +class ShaderBakerExportPluginPlatformMetal : public ShaderBakerExportPluginPlatform { +public: + virtual RenderingShaderContainerFormat *create_shader_container_format(const Ref &p_platform) override; + virtual bool matches_driver(const String &p_driver) override; +}; diff --git a/editor/plugins/shader_baker/shader_baker_export_plugin_platform_vulkan.cpp b/editor/plugins/shader_baker/shader_baker_export_plugin_platform_vulkan.cpp new file mode 100644 index 00000000000..0ba7ff351ee --- /dev/null +++ b/editor/plugins/shader_baker/shader_baker_export_plugin_platform_vulkan.cpp @@ -0,0 +1,41 @@ +/**************************************************************************/ +/* shader_baker_export_plugin_platform_vulkan.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "shader_baker_export_plugin_platform_vulkan.h" + +#include "drivers/vulkan/rendering_shader_container_vulkan.h" + +RenderingShaderContainerFormat *ShaderBakerExportPluginPlatformVulkan::create_shader_container_format(const Ref &p_platform) { + return memnew(RenderingShaderContainerFormatVulkan); +} + +bool ShaderBakerExportPluginPlatformVulkan::matches_driver(const String &p_driver) { + return p_driver == "vulkan"; +} diff --git a/editor/plugins/shader_baker/shader_baker_export_plugin_platform_vulkan.h b/editor/plugins/shader_baker/shader_baker_export_plugin_platform_vulkan.h new file mode 100644 index 00000000000..ff174a03eb8 --- /dev/null +++ b/editor/plugins/shader_baker/shader_baker_export_plugin_platform_vulkan.h @@ -0,0 +1,41 @@ +/**************************************************************************/ +/* shader_baker_export_plugin_platform_vulkan.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#include "editor/plugins/shader_baker_export_plugin.h" + +class ShaderBakerExportPluginPlatformVulkan : public ShaderBakerExportPluginPlatform { + GDCLASS(ShaderBakerExportPluginPlatformVulkan, ShaderBakerExportPluginPlatform); + +public: + virtual RenderingShaderContainerFormat *create_shader_container_format(const Ref &p_platform) override; + virtual bool matches_driver(const String &p_driver) override; +}; diff --git a/editor/plugins/shader_baker_export_plugin.cpp b/editor/plugins/shader_baker_export_plugin.cpp new file mode 100644 index 00000000000..d9b3e81f6a9 --- /dev/null +++ b/editor/plugins/shader_baker_export_plugin.cpp @@ -0,0 +1,459 @@ +/**************************************************************************/ +/* shader_baker_export_plugin.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "shader_baker_export_plugin.h" + +#include "core/config/project_settings.h" +#include "core/version.h" +#include "editor/editor_node.h" +#include "scene/3d/label_3d.h" +#include "scene/3d/sprite_3d.h" +#include "servers/rendering/renderer_rd/renderer_scene_render_rd.h" +#include "servers/rendering/renderer_rd/storage_rd/material_storage.h" + +// Ensure that AlphaCut is the same between the two classes so we can share the code to detect transparency. +static_assert(ENUM_MEMBERS_EQUAL(SpriteBase3D::ALPHA_CUT_DISABLED, Label3D::ALPHA_CUT_DISABLED)); +static_assert(ENUM_MEMBERS_EQUAL(SpriteBase3D::ALPHA_CUT_DISCARD, Label3D::ALPHA_CUT_DISCARD)); +static_assert(ENUM_MEMBERS_EQUAL(SpriteBase3D::ALPHA_CUT_OPAQUE_PREPASS, Label3D::ALPHA_CUT_OPAQUE_PREPASS)); +static_assert(ENUM_MEMBERS_EQUAL(SpriteBase3D::ALPHA_CUT_HASH, Label3D::ALPHA_CUT_HASH)); +static_assert(ENUM_MEMBERS_EQUAL(SpriteBase3D::ALPHA_CUT_MAX, Label3D::ALPHA_CUT_MAX)); + +String ShaderBakerExportPlugin::get_name() const { + return "ShaderBaker"; +} + +bool ShaderBakerExportPlugin::_is_active(const Vector &p_features) const { + // Shader baker should only work when a RendererRD driver is active, as the embedded shaders won't be found otherwise. + return RendererSceneRenderRD::get_singleton() != nullptr && RendererRD::MaterialStorage::get_singleton() != nullptr && p_features.has("shader_baker"); +} + +bool ShaderBakerExportPlugin::_initialize_container_format(const Ref &p_platform, const Vector &p_features) { + Variant driver_variant = GLOBAL_GET("rendering/rendering_device/driver." + p_platform->get_os_name().to_lower()); + if (!driver_variant.is_string()) { + driver_variant = GLOBAL_GET("rendering/rendering_device/driver"); + if (!driver_variant.is_string()) { + return false; + } + } + + shader_container_driver = driver_variant; + + for (Ref platform : platforms) { + if (platform->matches_driver(shader_container_driver)) { + shader_container_format = platform->create_shader_container_format(p_platform); + ERR_FAIL_NULL_V_MSG(shader_container_format, false, "Unable to create shader container format for the export platform."); + return true; + } + } + + return false; +} + +void ShaderBakerExportPlugin::_cleanup_container_format() { + if (shader_container_format != nullptr) { + memdelete(shader_container_format); + shader_container_format = nullptr; + } +} + +bool ShaderBakerExportPlugin::_initialize_cache_directory() { + shader_cache_export_path = get_export_base_path().path_join("shader_baker").path_join(shader_cache_platform_name).path_join(shader_container_driver); + + if (!DirAccess::dir_exists_absolute(shader_cache_export_path)) { + Error err = DirAccess::make_dir_recursive_absolute(shader_cache_export_path); + ERR_FAIL_COND_V_MSG(err != OK, false, "Can't create shader cache folder for exporting."); + } + + return true; +} + +bool ShaderBakerExportPlugin::_begin_customize_resources(const Ref &p_platform, const Vector &p_features) { + if (!_is_active(p_features)) { + return false; + } + + if (!_initialize_container_format(p_platform, p_features)) { + return false; + } + + shader_cache_platform_name = p_platform->get_os_name(); + shader_cache_renderer_name = RendererSceneRenderRD::get_singleton()->get_name(); + tasks_processed = 0; + tasks_total = 0; + tasks_cancelled = false; + + StringBuilder to_hash; + to_hash.append("[GodotVersionNumber]"); + to_hash.append(VERSION_NUMBER); + to_hash.append("[GodotVersionHash]"); + to_hash.append(VERSION_HASH); + to_hash.append("[Renderer]"); + to_hash.append(shader_cache_renderer_name); + customization_configuration_hash = to_hash.as_string().hash64(); + + BitField renderer_features = {}; + bool xr_enabled = GLOBAL_GET("xr/shaders/enabled"); + renderer_features.set_flag(RenderingShaderLibrary::FEATURE_ADVANCED_BIT); + if (xr_enabled) { + renderer_features.set_flag(RenderingShaderLibrary::FEATURE_MULTIVIEW_BIT); + } + + int vrs_mode = GLOBAL_GET("rendering/vrs/mode"); + if (vrs_mode != 0) { + renderer_features.set_flag(RenderingShaderLibrary::FEATURE_VRS_BIT); + } + + RendererSceneRenderRD::get_singleton()->enable_features(renderer_features); + + // Included all shaders created by renderers and effects. + ShaderRD::shaders_embedded_set_lock(); + const ShaderRD::ShaderVersionPairSet &pair_set = ShaderRD::shaders_embedded_set_get(); + for (Pair pair : pair_set) { + _customize_shader_version(pair.first, pair.second); + } + + ShaderRD::shaders_embedded_set_unlock(); + + // Include all shaders created by embedded materials. + RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); + material_storage->shader_embedded_set_lock(); + const HashSet &rid_set = material_storage->shader_embedded_set_get(); + for (RID rid : rid_set) { + RendererRD::MaterialStorage::ShaderData *shader_data = material_storage->shader_get_data(rid); + if (shader_data != nullptr) { + Pair shader_version_pair = shader_data->get_native_shader_and_version(); + if (shader_version_pair.first != nullptr) { + _customize_shader_version(shader_version_pair.first, shader_version_pair.second); + } + } + } + + material_storage->shader_embedded_set_unlock(); + + return true; +} + +bool ShaderBakerExportPlugin::_begin_customize_scenes(const Ref &p_platform, const Vector &p_features) { + if (!_is_active(p_features)) { + return false; + } + + if (shader_container_format == nullptr) { + // Resource customization failed to initialize. + return false; + } + + return true; +} + +void ShaderBakerExportPlugin::_end_customize_resources() { + if (!_initialize_cache_directory()) { + return; + } + + // Run a progress bar that waits for all shader baking tasks to finish. + bool progress_active = true; + EditorProgress editor_progress("baking_shaders", TTR("Baking shaders"), tasks_total); + editor_progress.step("Baking...", 0); + while (progress_active) { + uint32_t tasks_for_progress = 0; + { + MutexLock lock(tasks_mutex); + if (tasks_processed >= tasks_total) { + progress_active = false; + } else { + tasks_condition.wait(lock); + tasks_for_progress = tasks_processed; + } + } + + if (progress_active && editor_progress.step("Baking...", tasks_for_progress)) { + // User skipped the shader baker, we just don't pack the shaders in the project. + tasks_cancelled = true; + progress_active = false; + } + } + + String shader_cache_user_dir = ShaderRD::get_shader_cache_user_dir(); + for (const ShaderGroupItem &group_item : shader_group_items) { + // Wait for all shader compilation tasks of the group to be finished. + for (WorkerThreadPool::TaskID task_id : group_item.variant_tasks) { + WorkerThreadPool::get_singleton()->wait_for_task_completion(task_id); + } + + if (!tasks_cancelled) { + WorkResult work_result; + { + MutexLock lock(shader_work_results_mutex); + work_result = shader_work_results[group_item.cache_path]; + } + + PackedByteArray cache_file_bytes = ShaderRD::save_shader_cache_bytes(group_item.variants, work_result.variant_data); + add_file(shader_cache_user_dir.path_join(group_item.cache_path), cache_file_bytes, false); + + String cache_file_path = shader_cache_export_path.path_join(group_item.cache_path); + if (!DirAccess::exists(cache_file_path)) { + DirAccess::make_dir_recursive_absolute(cache_file_path.get_base_dir()); + } + + Ref cache_file_access = FileAccess::open(cache_file_path, FileAccess::WRITE); + if (cache_file_access.is_valid()) { + cache_file_access->store_buffer(cache_file_bytes); + } + } + } + + if (!tasks_cancelled) { + String file_cache_path = shader_cache_export_path.path_join("file_cache"); + Ref cache_list_access = FileAccess::open(file_cache_path, FileAccess::READ_WRITE); + if (cache_list_access.is_null()) { + cache_list_access = FileAccess::open(file_cache_path, FileAccess::WRITE); + } + + if (cache_list_access.is_valid()) { + String cache_list_line; + while (cache_list_line = cache_list_access->get_line(), !cache_list_line.is_empty()) { + PackedByteArray cache_file_bytes = FileAccess::get_file_as_bytes(shader_cache_export_path.path_join(cache_list_line)); + if (!cache_file_bytes.is_empty()) { + add_file(shader_cache_user_dir.path_join(cache_list_line), cache_file_bytes, false); + } + + shader_paths_processed.erase(cache_list_line); + } + + for (const String &shader_path : shader_paths_processed) { + cache_list_access->store_line(shader_path); + } + + cache_list_access->close(); + } + } + + shader_paths_processed.clear(); + shader_work_results.clear(); + shader_group_items.clear(); + + _cleanup_container_format(); +} + +Ref ShaderBakerExportPlugin::_customize_resource(const Ref &p_resource, const String &p_path) { + RendererRD::MaterialStorage *singleton = RendererRD::MaterialStorage::get_singleton(); + DEV_ASSERT(singleton != nullptr); + + Ref material = p_resource; + if (material.is_valid()) { + RID material_rid = material->get_rid(); + if (material_rid.is_valid()) { + RendererRD::MaterialStorage::ShaderData *shader_data = singleton->material_get_shader_data(material_rid); + if (shader_data != nullptr) { + Pair shader_version_pair = shader_data->get_native_shader_and_version(); + if (shader_version_pair.first != nullptr) { + _customize_shader_version(shader_version_pair.first, shader_version_pair.second); + } + } + } + } + + return Ref(); +} + +Node *ShaderBakerExportPlugin::_customize_scene(Node *p_root, const String &p_path) { + LocalVector nodes_to_visit; + nodes_to_visit.push_back(p_root); + while (!nodes_to_visit.is_empty()) { + // Visit all nodes recursively in the scene to find the Label3Ds and Sprite3Ds. + Node *node = nodes_to_visit[nodes_to_visit.size() - 1]; + nodes_to_visit.remove_at(nodes_to_visit.size() - 1); + + Label3D *label_3d = Object::cast_to(node); + Sprite3D *sprite_3d = Object::cast_to(node); + if (label_3d != nullptr || sprite_3d != nullptr) { + // Create materials for Label3D and Sprite3D, which are normally generated at runtime on demand. + HashMap properties; + + // These must match the defaults set by Sprite3D/Label3D. + properties["transparent"] = true; // Label3D doesn't have this property, but it is always true anyway. + properties["shaded"] = false; + properties["double_sided"] = true; + properties["no_depth_test"] = false; + properties["fixed_size"] = false; + properties["billboard"] = StandardMaterial3D::BILLBOARD_DISABLED; + properties["texture_filter"] = StandardMaterial3D::TEXTURE_FILTER_LINEAR_WITH_MIPMAPS; + properties["alpha_antialiasing_mode"] = StandardMaterial3D::ALPHA_ANTIALIASING_OFF; + properties["alpha_cut"] = SpriteBase3D::ALPHA_CUT_DISABLED; + + List property_list; + node->get_property_list(&property_list); + for (const PropertyInfo &info : property_list) { + bool valid = false; + Variant property = node->get(info.name, &valid); + if (valid) { + properties[info.name] = property; + } + } + + // This must follow the logic in Sprite3D::draw_texture_rect(). + BaseMaterial3D::Transparency mat_transparency = BaseMaterial3D::Transparency::TRANSPARENCY_DISABLED; + if (properties["transparent"]) { + SpriteBase3D::AlphaCutMode acm = SpriteBase3D::AlphaCutMode(int(properties["alpha_cut"])); + if (acm == SpriteBase3D::ALPHA_CUT_DISCARD) { + mat_transparency = BaseMaterial3D::Transparency::TRANSPARENCY_ALPHA_SCISSOR; + } else if (acm == SpriteBase3D::ALPHA_CUT_OPAQUE_PREPASS) { + mat_transparency = BaseMaterial3D::Transparency::TRANSPARENCY_ALPHA_DEPTH_PRE_PASS; + } else if (acm == SpriteBase3D::ALPHA_CUT_HASH) { + mat_transparency = BaseMaterial3D::Transparency::TRANSPARENCY_ALPHA_HASH; + } else { + mat_transparency = BaseMaterial3D::Transparency::TRANSPARENCY_ALPHA; + } + } + + StandardMaterial3D::BillboardMode billboard_mode = StandardMaterial3D::BillboardMode(int(properties["billboard"])); + Ref sprite_3d_material = StandardMaterial3D::get_material_for_2d(bool(properties["shaded"]), mat_transparency, bool(properties["double_sided"]), billboard_mode == StandardMaterial3D::BILLBOARD_ENABLED, billboard_mode == StandardMaterial3D::BILLBOARD_FIXED_Y, false, bool(properties["no_depth_test"]), bool(properties["fixed_size"]), BaseMaterial3D::TextureFilter(int(properties["texture_filter"])), BaseMaterial3D::AlphaAntiAliasing(int(properties["alpha_antialiasing_mode"]))); + _customize_resource(sprite_3d_material, String()); + + if (label_3d != nullptr) { + // Generate variants with and without MSDF support since we don't have access to the font here. + Ref label_3d_material = StandardMaterial3D::get_material_for_2d(bool(properties["shaded"]), mat_transparency, bool(properties["double_sided"]), billboard_mode == StandardMaterial3D::BILLBOARD_ENABLED, billboard_mode == StandardMaterial3D::BILLBOARD_FIXED_Y, true, bool(properties["no_depth_test"]), bool(properties["fixed_size"]), BaseMaterial3D::TextureFilter(int(properties["texture_filter"])), BaseMaterial3D::AlphaAntiAliasing(int(properties["alpha_antialiasing_mode"]))); + _customize_resource(label_3d_material, String()); + } + } + + // Visit children. + int child_count = node->get_child_count(); + for (int i = 0; i < child_count; i++) { + nodes_to_visit.push_back(node->get_child(i)); + } + } + + return nullptr; +} + +uint64_t ShaderBakerExportPlugin::_get_customization_configuration_hash() const { + return customization_configuration_hash; +} + +void ShaderBakerExportPlugin::_customize_shader_version(ShaderRD *p_shader, RID p_version) { + const int64_t variant_count = p_shader->get_variant_count(); + const int64_t group_count = p_shader->get_group_count(); + LocalVector group_items; + group_items.resize(group_count); + + RBSet groups_to_compile; + for (int64_t i = 0; i < group_count; i++) { + if (!p_shader->is_group_enabled(i)) { + continue; + } + + String cache_path = p_shader->version_get_cache_file_relative_path(p_version, i, shader_container_driver); + if (shader_paths_processed.has(cache_path)) { + continue; + } + + shader_paths_processed.insert(cache_path); + groups_to_compile.insert(i); + + group_items[i].cache_path = cache_path; + group_items[i].variants = p_shader->get_group_to_variants(i); + + { + MutexLock lock(shader_work_results_mutex); + shader_work_results[cache_path].variant_data.resize(variant_count); + } + } + + for (int64_t i = 0; i < variant_count; i++) { + int group = p_shader->get_variant_to_group(i); + if (!p_shader->is_variant_enabled(i) || !groups_to_compile.has(group)) { + continue; + } + + WorkItem work_item; + work_item.cache_path = group_items[group].cache_path; + work_item.shader_name = p_shader->get_name(); + work_item.stage_sources = p_shader->version_build_variant_stage_sources(p_version, i); + work_item.variant = i; + + WorkerThreadPool::TaskID task_id = WorkerThreadPool::get_singleton()->add_template_task(this, &ShaderBakerExportPlugin::_process_work_item, work_item); + group_items[group].variant_tasks.push_back(task_id); + tasks_total++; + } + + for (uint32_t i : groups_to_compile) { + shader_group_items.push_back(group_items[i]); + } +} + +void ShaderBakerExportPlugin::_process_work_item(WorkItem p_work_item) { + if (!tasks_cancelled) { + // Only process the item if the tasks haven't been cancelled by the user yet. + Vector spirv_data = ShaderRD::compile_stages(p_work_item.stage_sources); + ERR_FAIL_COND_MSG(spirv_data.is_empty(), "Unable to retrieve SPIR-V data for shader"); + + RD::ShaderReflection shader_refl; + Error err = RenderingDeviceCommons::reflect_spirv(spirv_data, shader_refl); + ERR_FAIL_COND_MSG(err != OK, "Unable to reflect SPIR-V data that was compiled"); + + Ref shader_container = shader_container_format->create_container(); + shader_container->set_from_shader_reflection(p_work_item.shader_name, shader_refl); + + // Compile shader binary from SPIR-V. + bool code_compiled = shader_container->set_code_from_spirv(spirv_data); + ERR_FAIL_COND_MSG(!code_compiled, vformat("Failed to compile code to native for SPIR-V.")); + + PackedByteArray shader_bytes = shader_container->to_bytes(); + { + MutexLock lock(shader_work_results_mutex); + shader_work_results[p_work_item.cache_path].variant_data.ptrw()[p_work_item.variant] = shader_bytes; + } + } + + { + MutexLock lock(tasks_mutex); + tasks_processed++; + } + + tasks_condition.notify_one(); +} + +ShaderBakerExportPlugin::ShaderBakerExportPlugin() { + // Do nothing. +} + +ShaderBakerExportPlugin::~ShaderBakerExportPlugin() { + // Do nothing. +} + +void ShaderBakerExportPlugin::add_platform(Ref p_platform) { + platforms.push_back(p_platform); +} + +void ShaderBakerExportPlugin::remove_platform(Ref p_platform) { + platforms.erase(p_platform); +} diff --git a/editor/plugins/shader_baker_export_plugin.h b/editor/plugins/shader_baker_export_plugin.h new file mode 100644 index 00000000000..9c114fb5e4c --- /dev/null +++ b/editor/plugins/shader_baker_export_plugin.h @@ -0,0 +1,102 @@ +/**************************************************************************/ +/* shader_baker_export_plugin.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#include "editor/export/editor_export_plugin.h" +#include "servers/rendering/renderer_rd/shader_rd.h" +#include "servers/rendering/rendering_shader_container.h" + +class ShaderBakerExportPluginPlatform : public RefCounted { + GDCLASS(ShaderBakerExportPluginPlatform, RefCounted); + +public: + virtual RenderingShaderContainerFormat *create_shader_container_format(const Ref &p_platform) = 0; + virtual bool matches_driver(const String &p_driver) = 0; + virtual ~ShaderBakerExportPluginPlatform() {} +}; + +class ShaderBakerExportPlugin : public EditorExportPlugin { +protected: + struct WorkItem { + String cache_path; + String shader_name; + Vector stage_sources; + int64_t variant = 0; + }; + + struct WorkResult { + // Since this result is per group, this vector will have gaps in the data it covers as the indices must stay relative to all variants. + Vector variant_data; + }; + + struct ShaderGroupItem { + String cache_path; + LocalVector variants; + LocalVector variant_tasks; + }; + + String shader_cache_platform_name; + String shader_cache_renderer_name; + String shader_cache_export_path; + RBSet shader_paths_processed; + HashMap shader_work_results; + Mutex shader_work_results_mutex; + LocalVector shader_group_items; + RenderingShaderContainerFormat *shader_container_format = nullptr; + String shader_container_driver; + Vector> platforms; + uint64_t customization_configuration_hash = 0; + uint32_t tasks_processed = 0; + uint32_t tasks_total = 0; + std::atomic tasks_cancelled; + BinaryMutex tasks_mutex; + ConditionVariable tasks_condition; + + virtual String get_name() const override; + virtual bool _is_active(const Vector &p_features) const; + virtual bool _initialize_container_format(const Ref &p_platform, const Vector &p_features); + virtual void _cleanup_container_format(); + virtual bool _initialize_cache_directory(); + virtual bool _begin_customize_resources(const Ref &p_platform, const Vector &p_features) override; + virtual bool _begin_customize_scenes(const Ref &p_platform, const Vector &p_features) override; + virtual void _end_customize_resources() override; + virtual Ref _customize_resource(const Ref &p_resource, const String &p_path) override; + virtual Node *_customize_scene(Node *p_root, const String &p_path) override; + virtual uint64_t _get_customization_configuration_hash() const override; + virtual void _customize_shader_version(ShaderRD *p_shader, RID p_version); + void _process_work_item(WorkItem p_work_item); + +public: + ShaderBakerExportPlugin(); + virtual ~ShaderBakerExportPlugin() override; + void add_platform(Ref p_platform); + void remove_platform(Ref p_platform); +}; diff --git a/modules/glslang/register_types.cpp b/modules/glslang/register_types.cpp index 81505f716a2..764d339bac7 100644 --- a/modules/glslang/register_types.cpp +++ b/modules/glslang/register_types.cpp @@ -31,19 +31,15 @@ #include "register_types.h" #include "core/config/engine.h" -#include "servers/rendering/rendering_device.h" +#include "shader_compile.h" #include #include #include -static Vector _compile_shader_glsl(RenderingDevice::ShaderStage p_stage, const String &p_source_code, RenderingDevice::ShaderLanguage p_language, String *r_error, const RenderingDevice *p_render_device) { - const RDD::Capabilities &capabilities = p_render_device->get_device_capabilities(); +Vector compile_glslang_shader(RenderingDeviceCommons::ShaderStage p_stage, const String &p_source_code, RenderingDeviceCommons::ShaderLanguageVersion p_language_version, RenderingDeviceCommons::ShaderSpirvVersion p_spirv_version, String *r_error) { Vector ret; - - ERR_FAIL_COND_V(p_language == RenderingDevice::SHADER_LANGUAGE_HLSL, ret); - - EShLanguage stages[RenderingDevice::SHADER_STAGE_MAX] = { + EShLanguage stages[RenderingDeviceCommons::SHADER_STAGE_MAX] = { EShLangVertex, EShLangFragment, EShLangTessControl, @@ -53,36 +49,9 @@ static Vector _compile_shader_glsl(RenderingDevice::ShaderStage p_stage int ClientInputSemanticsVersion = 100; // maps to, say, #define VULKAN 100 - glslang::EShTargetClientVersion ClientVersion = glslang::EShTargetVulkan_1_2; - glslang::EShTargetLanguageVersion TargetVersion = glslang::EShTargetSpv_1_5; - - if (capabilities.device_family == RDD::DEVICE_VULKAN) { - if (capabilities.version_major == 1 && capabilities.version_minor == 0) { - ClientVersion = glslang::EShTargetVulkan_1_0; - TargetVersion = glslang::EShTargetSpv_1_0; - } else if (capabilities.version_major == 1 && capabilities.version_minor == 1) { - ClientVersion = glslang::EShTargetVulkan_1_1; - TargetVersion = glslang::EShTargetSpv_1_3; - } else { - // use defaults - } - } else if (capabilities.device_family == RDD::DEVICE_DIRECTX) { - // NIR-DXIL is Vulkan 1.1-conformant. - ClientVersion = glslang::EShTargetVulkan_1_1; - // The SPIR-V part of Mesa supports 1.6, but: - // - SPIRV-Reflect won't be able to parse the compute workgroup size. - // - We want to play it safe with NIR-DXIL. - TargetVersion = glslang::EShTargetSpv_1_3; - } else if (capabilities.device_family == RDD::DEVICE_METAL) { - ClientVersion = glslang::EShTargetVulkan_1_1; - TargetVersion = glslang::EShTargetSpv_1_6; - } else { - // once we support other backends we'll need to do something here - if (r_error) { - (*r_error) = "GLSLANG - Unsupported device family"; - } - return ret; - } + // The enum values can be converted directly. + glslang::EShTargetClientVersion ClientVersion = (glslang::EShTargetClientVersion)p_language_version; + glslang::EShTargetLanguageVersion TargetVersion = (glslang::EShTargetLanguageVersion)p_spirv_version; glslang::TShader shader(stages[p_stage]); CharString cs = p_source_code.ascii(); @@ -94,42 +63,6 @@ static Vector _compile_shader_glsl(RenderingDevice::ShaderStage p_stage shader.setEnvClient(glslang::EShClientVulkan, ClientVersion); shader.setEnvTarget(glslang::EShTargetSpv, TargetVersion); - { - uint32_t stage_bit = 1 << p_stage; - - uint32_t subgroup_in_shaders = uint32_t(p_render_device->limit_get(RD::LIMIT_SUBGROUP_IN_SHADERS)); - uint32_t subgroup_operations = uint32_t(p_render_device->limit_get(RD::LIMIT_SUBGROUP_OPERATIONS)); - if ((subgroup_in_shaders & stage_bit) == stage_bit) { - // stage supports subgroups - preamble += "#define has_GL_KHR_shader_subgroup_basic 1\n"; - if (subgroup_operations & RenderingDevice::SUBGROUP_VOTE_BIT) { - preamble += "#define has_GL_KHR_shader_subgroup_vote 1\n"; - } - if (subgroup_operations & RenderingDevice::SUBGROUP_ARITHMETIC_BIT) { - preamble += "#define has_GL_KHR_shader_subgroup_arithmetic 1\n"; - } - if (subgroup_operations & RenderingDevice::SUBGROUP_BALLOT_BIT) { - preamble += "#define has_GL_KHR_shader_subgroup_ballot 1\n"; - } - if (subgroup_operations & RenderingDevice::SUBGROUP_SHUFFLE_BIT) { - preamble += "#define has_GL_KHR_shader_subgroup_shuffle 1\n"; - } - if (subgroup_operations & RenderingDevice::SUBGROUP_SHUFFLE_RELATIVE_BIT) { - preamble += "#define has_GL_KHR_shader_subgroup_shuffle_relative 1\n"; - } - if (subgroup_operations & RenderingDevice::SUBGROUP_CLUSTERED_BIT) { - preamble += "#define has_GL_KHR_shader_subgroup_clustered 1\n"; - } - if (subgroup_operations & RenderingDevice::SUBGROUP_QUAD_BIT) { - preamble += "#define has_GL_KHR_shader_subgroup_quad 1\n"; - } - } - } - - if (p_render_device->has_feature(RD::SUPPORTS_MULTIVIEW)) { - preamble += "#define has_VK_KHR_multiview 1\n"; - } - if (!preamble.empty()) { shader.setPreamble(preamble.c_str()); } @@ -187,13 +120,6 @@ static Vector _compile_shader_glsl(RenderingDevice::ShaderStage p_stage return ret; } -static String _get_cache_key_function_glsl(const RenderingDevice *p_render_device) { - const RenderingDeviceDriver::Capabilities &capabilities = p_render_device->get_device_capabilities(); - String version; - version = "SpirVGen=" + itos(glslang::GetSpirvGeneratorVersion()) + ", major=" + itos(capabilities.version_major) + ", minor=" + itos(capabilities.version_minor) + " , subgroup_size=" + itos(p_render_device->limit_get(RD::LIMIT_SUBGROUP_SIZE)) + " , subgroup_ops=" + itos(p_render_device->limit_get(RD::LIMIT_SUBGROUP_OPERATIONS)) + " , subgroup_in_shaders=" + itos(p_render_device->limit_get(RD::LIMIT_SUBGROUP_IN_SHADERS)) + " , debug=" + itos(Engine::get_singleton()->is_generate_spirv_debug_info_enabled()); - return version; -} - void initialize_glslang_module(ModuleInitializationLevel p_level) { if (p_level != MODULE_INITIALIZATION_LEVEL_CORE) { return; @@ -202,8 +128,6 @@ void initialize_glslang_module(ModuleInitializationLevel p_level) { // Initialize in case it's not initialized. This is done once per thread // and it's safe to call multiple times. glslang::InitializeProcess(); - RenderingDevice::shader_set_compile_to_spirv_function(_compile_shader_glsl); - RenderingDevice::shader_set_get_cache_key_function(_get_cache_key_function_glsl); } void uninitialize_glslang_module(ModuleInitializationLevel p_level) { diff --git a/modules/glslang/shader_compile.h b/modules/glslang/shader_compile.h new file mode 100644 index 00000000000..d35086a05bb --- /dev/null +++ b/modules/glslang/shader_compile.h @@ -0,0 +1,35 @@ +/**************************************************************************/ +/* shader_compile.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#include "servers/rendering/rendering_device_commons.h" + +Vector compile_glslang_shader(RenderingDeviceCommons::ShaderStage p_stage, const String &p_source_code, RenderingDeviceCommons::ShaderLanguageVersion p_language_version, RenderingDeviceCommons::ShaderSpirvVersion p_spirv_version, String *r_error); diff --git a/platform/android/doc_classes/EditorExportPlatformAndroid.xml b/platform/android/doc_classes/EditorExportPlatformAndroid.xml index 62ee39fbb98..b9840154e08 100644 --- a/platform/android/doc_classes/EditorExportPlatformAndroid.xml +++ b/platform/android/doc_classes/EditorExportPlatformAndroid.xml @@ -613,6 +613,9 @@ Indicates whether the application supports extra large screen form-factors. + + If [code]true[/code], shaders will be compiled and embedded in the application. This option is only supported when using the Forward+ or Mobile renderers. + If [code]true[/code], allows the application to participate in the backup and restore infrastructure. diff --git a/platform/android/export/export_plugin.cpp b/platform/android/export/export_plugin.cpp index c6639a99ede..d36ce643ae7 100644 --- a/platform/android/export/export_plugin.cpp +++ b/platform/android/export/export_plugin.cpp @@ -1967,6 +1967,10 @@ void EditorExportPlatformAndroid::get_preset_features(const Refpush_back("etc2"); r_features->push_back("astc"); + if (p_preset->get("shader_baker/enabled")) { + r_features->push_back("shader_baker"); + } + Vector abis = get_enabled_abis(p_preset); for (int i = 0; i < abis.size(); ++i) { r_features->push_back(abis[i].arch); @@ -2063,6 +2067,13 @@ String EditorExportPlatformAndroid::get_export_option_warning(const EditorExport if (!bool(p_preset->get("package/show_in_app_library")) && !gradle_build_enabled) { return TTR("\"Use Gradle Build\" must be enabled to disable \"Show In App Library\"."); } + } else if (p_name == "shader_baker/enabled") { + String export_renderer = GLOBAL_GET("rendering/renderer/rendering_method.mobile"); + if (OS::get_singleton()->get_current_rendering_method() == "gl_compatibility") { + return TTR("\"Shader Baker\" is not supported when using the Compatibility renderer."); + } else if (OS::get_singleton()->get_current_rendering_method() != export_renderer) { + return vformat(TTR("The editor is currently using a different renderer than what the target platform will use. \"Shader Baker\" won't be able to include core shaders. Switch to the \"%s\" renderer temporarily to fix this."), export_renderer); + } } } return String(); @@ -2130,6 +2141,8 @@ void EditorExportPlatformAndroid::get_export_options(List *r_optio r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "graphics/opengl_debug"), false)); + r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "shader_baker/enabled"), false)); + r_options->push_back(ExportOption(PropertyInfo(Variant::INT, "xr_features/xr_mode", PROPERTY_HINT_ENUM, "Regular,OpenXR"), XR_MODE_REGULAR, false, true)); r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "gesture/swipe_to_dismiss"), false)); diff --git a/platform/ios/doc_classes/EditorExportPlatformIOS.xml b/platform/ios/doc_classes/EditorExportPlatformIOS.xml index 96ea2a7b46a..08c9b79bf49 100644 --- a/platform/ios/doc_classes/EditorExportPlatformIOS.xml +++ b/platform/ios/doc_classes/EditorExportPlatformIOS.xml @@ -726,6 +726,9 @@ The reasons your app use user defaults API. See [url=https://developer.apple.com/documentation/bundleresources/privacy_manifest_files/describing_use_of_required_reason_api]Describing use of required reason API[/url]. + + If [code]true[/code], shaders will be compiled and embedded in the application. This option is only supported when using the Forward+ or Mobile renderers. + A custom background color of the storyboard launch screen. diff --git a/platform/linuxbsd/doc_classes/EditorExportPlatformLinuxBSD.xml b/platform/linuxbsd/doc_classes/EditorExportPlatformLinuxBSD.xml index cbd86eca3d5..6f89d889734 100644 --- a/platform/linuxbsd/doc_classes/EditorExportPlatformLinuxBSD.xml +++ b/platform/linuxbsd/doc_classes/EditorExportPlatformLinuxBSD.xml @@ -26,6 +26,9 @@ If [code]true[/code], a console wrapper is exported alongside the main executable, which allows running the project with enabled console output. + + If [code]true[/code], shaders will be compiled and embedded in the application. This option is only supported when using the Forward+ or Mobile renderers. + Script code to execute on the remote host when app is finished. The following variables can be used in the script: diff --git a/platform/macos/doc_classes/EditorExportPlatformMacOS.xml b/platform/macos/doc_classes/EditorExportPlatformMacOS.xml index 802128dac6e..c6ffed2522b 100644 --- a/platform/macos/doc_classes/EditorExportPlatformMacOS.xml +++ b/platform/macos/doc_classes/EditorExportPlatformMacOS.xml @@ -695,6 +695,9 @@ Indicates whether your app uses data for tracking. See [url=https://developer.apple.com/documentation/bundleresources/privacy_manifest_files]Privacy manifest files[/url]. + + If [code]true[/code], shaders will be compiled and embedded in the application. This option is only supported when using the Forward+ or Mobile renderers. + Script code to execute on the remote host when app is finished. The following variables can be used in the script: diff --git a/platform/macos/export/export_plugin.cpp b/platform/macos/export/export_plugin.cpp index d59b6e0ef83..06b9e7a440d 100644 --- a/platform/macos/export/export_plugin.cpp +++ b/platform/macos/export/export_plugin.cpp @@ -63,6 +63,10 @@ void EditorExportPlatformMacOS::get_preset_features(const Refget("shader_baker/enabled")) { + r_features->push_back("shader_baker"); + } + if (architecture == "universal") { r_features->push_back("x86_64"); r_features->push_back("arm64"); @@ -99,6 +103,15 @@ String EditorExportPlatformMacOS::get_export_option_warning(const EditorExportPr } } + if (p_name == "shader_baker/enabled") { + String export_renderer = GLOBAL_GET("rendering/renderer/rendering_method"); + if (OS::get_singleton()->get_current_rendering_method() == "gl_compatibility") { + return TTR("\"Shader Baker\" is not supported when using the Compatibility renderer."); + } else if (OS::get_singleton()->get_current_rendering_method() != export_renderer) { + return vformat(TTR("The editor is currently using a different renderer than what the target platform will use. \"Shader Baker\" won't be able to include core shaders. Switch to the \"%s\" renderer temporarily to fix this."), export_renderer); + } + } + if (p_name == "codesign/certificate_file" || p_name == "codesign/certificate_password" || p_name == "codesign/identity") { if (dist_type == 2) { if (ad_hoc) { @@ -468,6 +481,8 @@ void EditorExportPlatformMacOS::get_export_options(List *r_options r_options->push_back(ExportOption(PropertyInfo(Variant::INT, "application/export_angle", PROPERTY_HINT_ENUM, "Auto,Yes,No"), 0, true)); r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "display/high_res"), true)); + r_options->push_back(ExportOption(PropertyInfo(Variant::BOOL, "shader_baker/enabled"), false)); + r_options->push_back(ExportOption(PropertyInfo(Variant::STRING, "application/additional_plist_content", PROPERTY_HINT_MULTILINE_TEXT), "")); r_options->push_back(ExportOption(PropertyInfo(Variant::STRING, "xcode/platform_build"), "14C18")); diff --git a/platform/visionos/doc_classes/EditorExportPlatformVisionOS.xml b/platform/visionos/doc_classes/EditorExportPlatformVisionOS.xml index 17d6f4596d0..5408bbf9111 100644 --- a/platform/visionos/doc_classes/EditorExportPlatformVisionOS.xml +++ b/platform/visionos/doc_classes/EditorExportPlatformVisionOS.xml @@ -578,6 +578,9 @@ The reasons your app use user defaults API. See [url=https://developer.apple.com/documentation/bundleresources/privacy_manifest_files/describing_use_of_required_reason_api]Describing use of required reason API[/url]. + + If [code]true[/code], shaders will be compiled and embedded in the application. This option is only supported when using the Forward+ and Mobile renderers. + If [code]true[/code], the app "Documents" folder can be accessed via "Files" app. See [url=https://developer.apple.com/documentation/bundleresources/information_property_list/lssupportsopeningdocumentsinplace]LSSupportsOpeningDocumentsInPlace[/url]. diff --git a/platform/windows/doc_classes/EditorExportPlatformWindows.xml b/platform/windows/doc_classes/EditorExportPlatformWindows.xml index 9c8ce371586..3faf75bf393 100644 --- a/platform/windows/doc_classes/EditorExportPlatformWindows.xml +++ b/platform/windows/doc_classes/EditorExportPlatformWindows.xml @@ -98,6 +98,9 @@ If [code]true[/code], a console wrapper executable is exported alongside the main executable, which allows running the project with enabled console output. + + If [code]true[/code], shaders will be compiled and embedded in the application. This option is only supported when using the Forward+ and Mobile renderers. + Script code to execute on the remote host when app is finished. The following variables can be used in the script: diff --git a/scene/resources/3d/sky_material.cpp b/scene/resources/3d/sky_material.cpp index 007d532592b..4d5b0ade29f 100644 --- a/scene/resources/3d/sky_material.cpp +++ b/scene/resources/3d/sky_material.cpp @@ -85,6 +85,8 @@ void ProceduralSkyMaterial::set_sky_cover(const Ref &p_sky_cover) { RS::get_singleton()->material_set_param(_get_material(), "sky_cover", Variant()); } + _update_shader(use_debanding, sky_cover.is_valid()); + if (shader_set) { RS::get_singleton()->material_set_shader(_get_material(), get_shader_cache()); } @@ -164,7 +166,7 @@ float ProceduralSkyMaterial::get_sun_curve() const { void ProceduralSkyMaterial::set_use_debanding(bool p_use_debanding) { use_debanding = p_use_debanding; - _update_shader(); + _update_shader(use_debanding, sky_cover.is_valid()); // Only set if shader already compiled if (shader_set) { RS::get_singleton()->material_set_shader(_get_material(), get_shader_cache()); @@ -195,7 +197,7 @@ RID ProceduralSkyMaterial::get_shader_cache() const { } RID ProceduralSkyMaterial::get_rid() const { - _update_shader(); + _update_shader(use_debanding, sky_cover.is_valid()); if (!shader_set) { RS::get_singleton()->material_set_shader(_get_material(), get_shader_cache()); shader_set = true; @@ -204,7 +206,7 @@ RID ProceduralSkyMaterial::get_rid() const { } RID ProceduralSkyMaterial::get_shader_rid() const { - _update_shader(); + _update_shader(use_debanding, sky_cover.is_valid()); return get_shader_cache(); } @@ -281,22 +283,21 @@ void ProceduralSkyMaterial::_bind_methods() { } void ProceduralSkyMaterial::cleanup_shader() { - if (shader_cache[0].is_valid()) { - RS::get_singleton()->free(shader_cache[0]); - RS::get_singleton()->free(shader_cache[1]); - RS::get_singleton()->free(shader_cache[2]); - RS::get_singleton()->free(shader_cache[3]); + for (int i = 0; i < 4; i++) { + if (shader_cache[i].is_valid()) { + RS::get_singleton()->free(shader_cache[i]); + } } } -void ProceduralSkyMaterial::_update_shader() { +void ProceduralSkyMaterial::_update_shader(bool p_use_debanding, bool p_use_sky_cover) { MutexLock shader_lock(shader_mutex); - if (shader_cache[0].is_null()) { - for (int i = 0; i < 4; i++) { - shader_cache[i] = RS::get_singleton()->shader_create(); + int index = int(p_use_debanding) + int(p_use_sky_cover) * 2; + if (shader_cache[index].is_null()) { + shader_cache[index] = RS::get_singleton()->shader_create(); - // Add a comment to describe the shader origin (useful when converting to ShaderMaterial). - RS::get_singleton()->shader_set_code(shader_cache[i], vformat(R"( + // Add a comment to describe the shader origin (useful when converting to ShaderMaterial). + RS::get_singleton()->shader_set_code(shader_cache[index], vformat(R"( // NOTE: Shader automatically converted from )" GODOT_VERSION_NAME " " GODOT_VERSION_FULL_CONFIG R"('s ProceduralSkyMaterial. shader_type sky; @@ -370,8 +371,7 @@ void sky() { COLOR = mix(ground, sky, step(0.0, EYEDIR.y)) * exposure; } )", - (i % 2) ? "render_mode use_debanding;" : "", i > 1 ? "vec4 sky_cover_texture = texture(sky_cover, SKY_COORDS);" : "", i > 1 ? "sky += (sky_cover_texture.rgb * sky_cover_modulate.rgb) * sky_cover_texture.a * sky_cover_modulate.a;" : "")); - } + p_use_debanding ? "render_mode use_debanding;" : "", p_use_sky_cover ? "vec4 sky_cover_texture = texture(sky_cover, SKY_COORDS);" : "", p_use_sky_cover ? "sky += (sky_cover_texture.rgb * sky_cover_modulate.rgb) * sky_cover_texture.a * sky_cover_modulate.a;" : "")); } } @@ -416,7 +416,7 @@ Ref PanoramaSkyMaterial::get_panorama() const { void PanoramaSkyMaterial::set_filtering_enabled(bool p_enabled) { filter = p_enabled; notify_property_list_changed(); - _update_shader(); + _update_shader(filter); // Only set if shader already compiled if (shader_set) { RS::get_singleton()->material_set_shader(_get_material(), shader_cache[int(filter)]); @@ -441,10 +441,8 @@ Shader::Mode PanoramaSkyMaterial::get_shader_mode() const { } RID PanoramaSkyMaterial::get_rid() const { - _update_shader(); - // Don't compile shaders until first use, then compile both + _update_shader(filter); if (!shader_set) { - RS::get_singleton()->material_set_shader(_get_material(), shader_cache[1 - int(filter)]); RS::get_singleton()->material_set_shader(_get_material(), shader_cache[int(filter)]); shader_set = true; } @@ -452,7 +450,7 @@ RID PanoramaSkyMaterial::get_rid() const { } RID PanoramaSkyMaterial::get_shader_rid() const { - _update_shader(); + _update_shader(filter); return shader_cache[int(filter)]; } @@ -475,20 +473,21 @@ Mutex PanoramaSkyMaterial::shader_mutex; RID PanoramaSkyMaterial::shader_cache[2]; void PanoramaSkyMaterial::cleanup_shader() { - if (shader_cache[0].is_valid()) { - RS::get_singleton()->free(shader_cache[0]); - RS::get_singleton()->free(shader_cache[1]); + for (int i = 0; i < 2; i++) { + if (shader_cache[i].is_valid()) { + RS::get_singleton()->free(shader_cache[i]); + } } } -void PanoramaSkyMaterial::_update_shader() { +void PanoramaSkyMaterial::_update_shader(bool p_filter) { MutexLock shader_lock(shader_mutex); - if (shader_cache[0].is_null()) { - for (int i = 0; i < 2; i++) { - shader_cache[i] = RS::get_singleton()->shader_create(); + int index = int(p_filter); + if (shader_cache[index].is_null()) { + shader_cache[index] = RS::get_singleton()->shader_create(); - // Add a comment to describe the shader origin (useful when converting to ShaderMaterial). - RS::get_singleton()->shader_set_code(shader_cache[i], vformat(R"( + // Add a comment to describe the shader origin (useful when converting to ShaderMaterial). + RS::get_singleton()->shader_set_code(shader_cache[index], vformat(R"( // NOTE: Shader automatically converted from )" GODOT_VERSION_NAME " " GODOT_VERSION_FULL_CONFIG R"('s PanoramaSkyMaterial. shader_type sky; @@ -500,8 +499,7 @@ void sky() { COLOR = texture(source_panorama, SKY_COORDS).rgb * exposure; } )", - i ? "filter_linear" : "filter_nearest")); - } + p_filter ? "filter_linear" : "filter_nearest")); } } @@ -599,7 +597,7 @@ float PhysicalSkyMaterial::get_energy_multiplier() const { void PhysicalSkyMaterial::set_use_debanding(bool p_use_debanding) { use_debanding = p_use_debanding; - _update_shader(); + _update_shader(use_debanding, night_sky.is_valid()); // Only set if shader already compiled if (shader_set) { RS::get_singleton()->material_set_shader(_get_material(), get_shader_cache()); @@ -618,6 +616,8 @@ void PhysicalSkyMaterial::set_night_sky(const Ref &p_night_sky) { RS::get_singleton()->material_set_param(_get_material(), "night_sky", Variant()); } + _update_shader(use_debanding, night_sky.is_valid()); + if (shader_set) { RS::get_singleton()->material_set_shader(_get_material(), get_shader_cache()); } @@ -638,7 +638,7 @@ RID PhysicalSkyMaterial::get_shader_cache() const { } RID PhysicalSkyMaterial::get_rid() const { - _update_shader(); + _update_shader(use_debanding, night_sky.is_valid()); if (!shader_set) { RS::get_singleton()->material_set_shader(_get_material(), get_shader_cache()); shader_set = true; @@ -647,7 +647,7 @@ RID PhysicalSkyMaterial::get_rid() const { } RID PhysicalSkyMaterial::get_shader_rid() const { - _update_shader(); + _update_shader(use_debanding, night_sky.is_valid()); return get_shader_cache(); } @@ -712,22 +712,21 @@ void PhysicalSkyMaterial::_bind_methods() { } void PhysicalSkyMaterial::cleanup_shader() { - if (shader_cache[0].is_valid()) { - RS::get_singleton()->free(shader_cache[0]); - RS::get_singleton()->free(shader_cache[1]); - RS::get_singleton()->free(shader_cache[2]); - RS::get_singleton()->free(shader_cache[3]); + for (int i = 0; i < 4; i++) { + if (shader_cache[i].is_valid()) { + RS::get_singleton()->free(shader_cache[i]); + } } } -void PhysicalSkyMaterial::_update_shader() { +void PhysicalSkyMaterial::_update_shader(bool p_use_debanding, bool p_use_night_sky) { MutexLock shader_lock(shader_mutex); - if (shader_cache[0].is_null()) { - for (int i = 0; i < 4; i++) { - shader_cache[i] = RS::get_singleton()->shader_create(); + int index = int(p_use_debanding) + int(p_use_night_sky) * 2; + if (shader_cache[index].is_null()) { + shader_cache[index] = RS::get_singleton()->shader_create(); - // Add a comment to describe the shader origin (useful when converting to ShaderMaterial). - RS::get_singleton()->shader_set_code(shader_cache[i], vformat(R"( + // Add a comment to describe the shader origin (useful when converting to ShaderMaterial). + RS::get_singleton()->shader_set_code(shader_cache[index], vformat(R"( // NOTE: Shader automatically converted from )" GODOT_VERSION_NAME " " GODOT_VERSION_FULL_CONFIG R"('s PhysicalSkyMaterial. shader_type sky; @@ -811,8 +810,7 @@ void sky() { } } )", - (i % 2) ? "render_mode use_debanding;" : "", i > 1 ? "L0 += texture(night_sky, SKY_COORDS).xyz * extinction;" : "", i > 1 ? "COLOR = texture(night_sky, SKY_COORDS).xyz;" : "")); - } + p_use_debanding ? "render_mode use_debanding;" : "", p_use_night_sky ? "L0 += texture(night_sky, SKY_COORDS).xyz * extinction;" : "", p_use_night_sky ? "COLOR = texture(night_sky, SKY_COORDS).xyz;" : "")); } } diff --git a/scene/resources/3d/sky_material.h b/scene/resources/3d/sky_material.h index 0a6516994ce..341fb4deb87 100644 --- a/scene/resources/3d/sky_material.h +++ b/scene/resources/3d/sky_material.h @@ -56,7 +56,7 @@ private: static Mutex shader_mutex; static RID shader_cache[4]; - static void _update_shader(); + static void _update_shader(bool p_use_debanding, bool p_use_sky_cover); mutable bool shader_set = false; RID get_shader_cache() const; @@ -130,7 +130,7 @@ private: static Mutex shader_mutex; static RID shader_cache[2]; - static void _update_shader(); + static void _update_shader(bool p_filter); mutable bool shader_set = false; bool filter = true; @@ -181,7 +181,7 @@ private: float energy_multiplier = 1.0f; bool use_debanding = true; Ref night_sky; - static void _update_shader(); + static void _update_shader(bool p_use_debanding, bool p_use_night_sky); mutable bool shader_set = false; protected: diff --git a/scene/resources/material.cpp b/scene/resources/material.cpp index adb050f41bb..0bd2805c8cb 100644 --- a/scene/resources/material.cpp +++ b/scene/resources/material.cpp @@ -682,6 +682,10 @@ void BaseMaterial3D::finish_shaders() { } void BaseMaterial3D::_update_shader() { + if (!_is_initialized()) { + _mark_ready(); + } + MaterialKey mk = _compute_key(); if (mk == current_key) { return; //no update required in the end @@ -3644,8 +3648,6 @@ BaseMaterial3D::BaseMaterial3D(bool p_orm) : flags[FLAG_USE_TEXTURE_REPEAT] = true; current_key.invalid_key = 1; - - _mark_initialized(callable_mp(this, &BaseMaterial3D::_queue_shader_change), Callable()); } BaseMaterial3D::~BaseMaterial3D() { diff --git a/scene/resources/particle_process_material.cpp b/scene/resources/particle_process_material.cpp index cb5a0e08005..733b34ae056 100644 --- a/scene/resources/particle_process_material.cpp +++ b/scene/resources/particle_process_material.cpp @@ -32,8 +32,9 @@ #include "core/version.h" -Mutex ParticleProcessMaterial::material_mutex; +Mutex ParticleProcessMaterial::dirty_materials_mutex; SelfList::List ParticleProcessMaterial::dirty_materials; +Mutex ParticleProcessMaterial::shader_map_mutex; HashMap ParticleProcessMaterial::shader_map; RBSet ParticleProcessMaterial::min_max_properties; ParticleProcessMaterial::ShaderNames *ParticleProcessMaterial::shader_names = nullptr; @@ -147,26 +148,37 @@ void ParticleProcessMaterial::finish_shaders() { } void ParticleProcessMaterial::_update_shader() { + if (!_is_initialized()) { + _mark_ready(); + } + MaterialKey mk = _compute_key(); if (mk == current_key) { return; // No update required in the end. } - if (shader_map.has(current_key)) { - shader_map[current_key].users--; - if (shader_map[current_key].users == 0) { - // Deallocate shader, as it's no longer in use. - RS::get_singleton()->free(shader_map[current_key].shader); - shader_map.erase(current_key); + { + MutexLock lock(shader_map_mutex); + ShaderData *v = shader_map.getptr(current_key); + if (v) { + v->users--; + if (v->users == 0) { + // Deallocate shader, as it's no longer in use. + RS::get_singleton()->free(v->shader); + shader_map.erase(current_key); + shader_rid = RID(); + } } - } - current_key = mk; + current_key = mk; - if (shader_map.has(mk)) { - RS::get_singleton()->material_set_shader(_get_material(), shader_map[mk].shader); - shader_map[mk].users++; - return; + v = shader_map.getptr(mk); + if (v) { + shader_rid = v->shader; + RS::get_singleton()->material_set_shader(_get_material(), shader_rid); + v->users++; + return; + } } // No pre-existing shader, create one. @@ -1176,19 +1188,34 @@ void ParticleProcessMaterial::_update_shader() { code += " }\n"; code += "}\n"; - ShaderData shader_data; - shader_data.shader = RS::get_singleton()->shader_create(); - shader_data.users = 1; + // We must create the shader outside the shader_map_mutex to avoid potential deadlocks with + // other tasks in the WorkerThreadPool simultaneously creating materials, which + // may also hold the shared shader_map_mutex lock. + RID new_shader = RS::get_singleton()->shader_create_from_code(code); - RS::get_singleton()->shader_set_code(shader_data.shader, code); + MutexLock lock(shader_map_mutex); - shader_map[mk] = shader_data; + ShaderData *v = shader_map.getptr(mk); + if (unlikely(v)) { + // We raced and managed to create the same key concurrently, so we'll free the shader we just created, + // given we know it isn't used, and use the winner. + RS::get_singleton()->free(new_shader); + } else { + ShaderData shader_data; + shader_data.shader = new_shader; + // ShaderData will be inserted with a users count of 0, but we + // increment unconditionally outside this if block, whilst still under lock. + v = &shader_map.insert(mk, shader_data)->value; + } - RS::get_singleton()->material_set_shader(_get_material(), shader_data.shader); + shader_rid = v->shader; + v->users++; + + RS::get_singleton()->material_set_shader(_get_material(), shader_rid); } void ParticleProcessMaterial::flush_changes() { - MutexLock lock(material_mutex); + MutexLock lock(dirty_materials_mutex); while (dirty_materials.first()) { dirty_materials.first()->self()->_update_shader(); @@ -1201,7 +1228,7 @@ void ParticleProcessMaterial::_queue_shader_change() { return; } - MutexLock lock(material_mutex); + MutexLock lock(dirty_materials_mutex); if (!element.in_list()) { dirty_materials.add(&element); @@ -1836,9 +1863,14 @@ double ParticleProcessMaterial::get_lifetime_randomness() const { return lifetime_randomness; } +RID ParticleProcessMaterial::get_rid() const { + const_cast(this)->_update_shader(); + return Material::get_rid(); +} + RID ParticleProcessMaterial::get_shader_rid() const { - ERR_FAIL_COND_V(!shader_map.has(current_key), RID()); - return shader_map[current_key].shader; + const_cast(this)->_update_shader(); + return shader_rid; } void ParticleProcessMaterial::_validate_property(PropertyInfo &p_property) const { @@ -2403,13 +2435,11 @@ ParticleProcessMaterial::ParticleProcessMaterial() : set_color(Color(1, 1, 1, 1)); current_key.invalid_key = 1; - - _mark_initialized(callable_mp(this, &ParticleProcessMaterial::_queue_shader_change), callable_mp(this, &ParticleProcessMaterial::_update_shader)); } ParticleProcessMaterial::~ParticleProcessMaterial() { ERR_FAIL_NULL(RenderingServer::get_singleton()); - MutexLock lock(material_mutex); + MutexLock lock(shader_map_mutex); if (shader_map.has(current_key)) { shader_map[current_key].users--; diff --git a/scene/resources/particle_process_material.h b/scene/resources/particle_process_material.h index 51bb57ede8b..0a90306b5b8 100644 --- a/scene/resources/particle_process_material.h +++ b/scene/resources/particle_process_material.h @@ -148,10 +148,12 @@ private: int users = 0; }; + static Mutex shader_map_mutex; static HashMap shader_map; static RBSet min_max_properties; MaterialKey current_key; + RID shader_rid; _FORCE_INLINE_ MaterialKey _compute_key() const { MaterialKey mk; @@ -185,7 +187,7 @@ private: return mk; } - static Mutex material_mutex; + static Mutex dirty_materials_mutex; static SelfList::List dirty_materials; struct ShaderNames { @@ -501,6 +503,7 @@ public: void set_emission_shape_scale(const Vector3 &p_emission_shape_scale); Vector3 get_emission_shape_scale() const; + virtual RID get_rid() const override; virtual RID get_shader_rid() const override; virtual Shader::Mode get_shader_mode() const override; diff --git a/servers/rendering/dummy/storage/material_storage.cpp b/servers/rendering/dummy/storage/material_storage.cpp index e70ff0fd056..8276eba1366 100644 --- a/servers/rendering/dummy/storage/material_storage.cpp +++ b/servers/rendering/dummy/storage/material_storage.cpp @@ -147,7 +147,7 @@ RID MaterialStorage::shader_allocate() { return shader_owner.allocate_rid(); } -void MaterialStorage::shader_initialize(RID p_rid) { +void MaterialStorage::shader_initialize(RID p_rid, bool p_embedded) { shader_owner.initialize_rid(p_rid, DummyShader()); } diff --git a/servers/rendering/dummy/storage/material_storage.h b/servers/rendering/dummy/storage/material_storage.h index 33b7d770a8f..77bf7e06db9 100644 --- a/servers/rendering/dummy/storage/material_storage.h +++ b/servers/rendering/dummy/storage/material_storage.h @@ -50,6 +50,7 @@ private: mutable RID_Owner shader_owner; ShaderCompiler dummy_compiler; + HashSet dummy_embedded_set; struct DummyMaterial { RID shader; @@ -87,7 +88,7 @@ public: bool owns_shader(RID p_rid) { return shader_owner.owns(p_rid); } virtual RID shader_allocate() override; - virtual void shader_initialize(RID p_rid) override; + virtual void shader_initialize(RID p_rid, bool p_embedded) override; virtual void shader_free(RID p_rid) override; virtual void shader_set_code(RID p_shader, const String &p_code) override; @@ -101,6 +102,9 @@ public: virtual Variant shader_get_parameter_default(RID p_material, const StringName &p_param) const override { return Variant(); } virtual RS::ShaderNativeSourceCode shader_get_native_source_code(RID p_shader) const override { return RS::ShaderNativeSourceCode(); } + virtual void shader_embedded_set_lock() override {} + virtual const HashSet &shader_embedded_set_get() const override { return dummy_embedded_set; } + virtual void shader_embedded_set_unlock() override {} /* MATERIAL API */ diff --git a/servers/rendering/renderer_rd/cluster_builder_rd.cpp b/servers/rendering/renderer_rd/cluster_builder_rd.cpp index 752e0fe4531..f47ed35dc64 100644 --- a/servers/rendering/renderer_rd/cluster_builder_rd.cpp +++ b/servers/rendering/renderer_rd/cluster_builder_rd.cpp @@ -49,29 +49,33 @@ ClusterBuilderSharedDataRD::ClusterBuilderSharedDataRD() { { RD::FramebufferFormatID fb_format; RD::PipelineColorBlendState blend_state; - String defines; + RD::PipelineRasterizationState rasterization_state; + RD::PipelineMultisampleState ms; + rasterization_state.enable_depth_clamp = true; + ms.sample_count = RD::TEXTURE_SAMPLES_4; + + Vector variants; + variants.push_back(""); + variants.push_back("\n#define USE_ATTACHMENT\n"); + + ClusterRender::ShaderVariant shader_variant; if (RD::get_singleton()->has_feature(RD::SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS)) { fb_format = RD::get_singleton()->framebuffer_format_create_empty(); blend_state = RD::PipelineColorBlendState::create_disabled(); + shader_variant = ClusterRender::SHADER_NORMAL; } else { Vector afs; afs.push_back(RD::AttachmentFormat()); afs.write[0].usage_flags = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT; fb_format = RD::get_singleton()->framebuffer_format_create(afs); blend_state = RD::PipelineColorBlendState::create_blend(); - defines = "\n#define USE_ATTACHMENT\n"; + shader_variant = ClusterRender::SHADER_USE_ATTACHMENT; } - RD::PipelineRasterizationState rasterization_state; - rasterization_state.enable_depth_clamp = true; - Vector versions; - versions.push_back(""); - cluster_render.cluster_render_shader.initialize(versions, defines); + cluster_render.cluster_render_shader.initialize(variants); cluster_render.shader_version = cluster_render.cluster_render_shader.version_create(); - cluster_render.shader = cluster_render.cluster_render_shader.version_get_shader(cluster_render.shader_version, 0); + cluster_render.shader = cluster_render.cluster_render_shader.version_get_shader(cluster_render.shader_version, shader_variant); cluster_render.shader_pipelines[ClusterRender::PIPELINE_NORMAL] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, fb_format, vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, rasterization_state, RD::PipelineMultisampleState(), RD::PipelineDepthStencilState(), blend_state, 0); - RD::PipelineMultisampleState ms; - ms.sample_count = RD::TEXTURE_SAMPLES_4; cluster_render.shader_pipelines[ClusterRender::PIPELINE_MSAA] = RD::get_singleton()->render_pipeline_create(cluster_render.shader, fb_format, vertex_format, RD::RENDER_PRIMITIVE_TRIANGLES, rasterization_state, ms, RD::PipelineDepthStencilState(), blend_state, 0); } { diff --git a/servers/rendering/renderer_rd/cluster_builder_rd.h b/servers/rendering/renderer_rd/cluster_builder_rd.h index c2c423cc0f5..3b852b8c67a 100644 --- a/servers/rendering/renderer_rd/cluster_builder_rd.h +++ b/servers/rendering/renderer_rd/cluster_builder_rd.h @@ -73,6 +73,11 @@ class ClusterBuilderSharedDataRD { RID shader_version; RID shader; + enum ShaderVariant { + SHADER_NORMAL, + SHADER_USE_ATTACHMENT, + }; + enum PipelineVersion { PIPELINE_NORMAL, PIPELINE_MSAA, diff --git a/servers/rendering/renderer_rd/effects/fsr.cpp b/servers/rendering/renderer_rd/effects/fsr.cpp index 634d2d9dab2..8368513beeb 100644 --- a/servers/rendering/renderer_rd/effects/fsr.cpp +++ b/servers/rendering/renderer_rd/effects/fsr.cpp @@ -35,17 +35,20 @@ using namespace RendererRD; FSR::FSR() { - Vector FSR_upscale_modes; + Vector fsr_upscale_modes; + fsr_upscale_modes.push_back("\n#define MODE_FSR_UPSCALE_NORMAL\n"); + fsr_upscale_modes.push_back("\n#define MODE_FSR_UPSCALE_FALLBACK\n"); + fsr_shader.initialize(fsr_upscale_modes); + + FSRShaderVariant variant; if (RD::get_singleton()->has_feature(RD::SUPPORTS_FSR_HALF_FLOAT)) { - FSR_upscale_modes.push_back("\n#define MODE_FSR_UPSCALE_NORMAL\n"); + variant = FSR_SHADER_VARIANT_NORMAL; } else { - FSR_upscale_modes.push_back("\n#define MODE_FSR_UPSCALE_FALLBACK\n"); + variant = FSR_SHADER_VARIANT_FALLBACK; } - fsr_shader.initialize(FSR_upscale_modes); - shader_version = fsr_shader.version_create(); - pipeline = RD::get_singleton()->compute_pipeline_create(fsr_shader.version_get_shader(shader_version, 0)); + pipeline = RD::get_singleton()->compute_pipeline_create(fsr_shader.version_get_shader(shader_version, variant)); } FSR::~FSR() { diff --git a/servers/rendering/renderer_rd/effects/fsr.h b/servers/rendering/renderer_rd/effects/fsr.h index dcb4a134387..ff3de5d6abc 100644 --- a/servers/rendering/renderer_rd/effects/fsr.h +++ b/servers/rendering/renderer_rd/effects/fsr.h @@ -49,6 +49,11 @@ public: virtual void process(Ref p_render_buffers, RID p_source_rd_texture, RID p_destination_texture) final; private: + enum FSRShaderVariant { + FSR_SHADER_VARIANT_NORMAL, + FSR_SHADER_VARIANT_FALLBACK, + }; + enum FSRUpscalePass { FSR_UPSCALE_PASS_EASU = 0, FSR_UPSCALE_PASS_RCAS = 1 diff --git a/servers/rendering/renderer_rd/effects/fsr2.cpp b/servers/rendering/renderer_rd/effects/fsr2.cpp index f9c52454aac..32d208090da 100644 --- a/servers/rendering/renderer_rd/effects/fsr2.cpp +++ b/servers/rendering/renderer_rd/effects/fsr2.cpp @@ -515,17 +515,13 @@ FSR2Context::~FSR2Context() { FSR2Effect::FSR2Effect() { FfxDeviceCapabilities &capabilities = device.capabilities; - uint64_t default_subgroup_size = RD::get_singleton()->limit_get(RD::LIMIT_SUBGROUP_SIZE); capabilities.minimumSupportedShaderModel = FFX_SHADER_MODEL_5_1; - capabilities.waveLaneCountMin = RD::get_singleton()->limit_get(RD::LIMIT_SUBGROUP_MIN_SIZE); - capabilities.waveLaneCountMax = RD::get_singleton()->limit_get(RD::LIMIT_SUBGROUP_MAX_SIZE); + capabilities.waveLaneCountMin = 32; + capabilities.waveLaneCountMax = 32; capabilities.fp16Supported = RD::get_singleton()->has_feature(RD::Features::SUPPORTS_FSR_HALF_FLOAT); capabilities.raytracingSupported = false; - bool force_wave_64 = default_subgroup_size == 32 && capabilities.waveLaneCountMax == 64; - bool use_lut = force_wave_64 || default_subgroup_size == 64; - - String general_defines_base = + String general_defines = "\n#define FFX_GPU\n" "\n#define FFX_GLSL 1\n" "\n#define FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS 1\n" @@ -534,17 +530,12 @@ FSR2Effect::FSR2Effect() { "\n#define FFX_FSR2_OPTION_GODOT_REACTIVE_MASK_CLAMP 1\n" "\n#define FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS 1\n"; - if (use_lut) { - general_defines_base += "\n#define FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE 1\n"; - } + Vector modes_single; + modes_single.push_back(""); - String general_defines = general_defines_base; - if (capabilities.fp16Supported) { - general_defines += "\n#define FFX_HALF 1\n"; - } - - Vector modes; - modes.push_back(""); + Vector modes_with_fp16; + modes_with_fp16.push_back(""); + modes_with_fp16.push_back("\n#define FFX_HALF 1\n"); // Since Godot currently lacks a shader reflection mechanism to persist the name of the bindings in the shader cache and // there's also no mechanism to compile the shaders offline, the bindings are created manually by looking at the GLSL @@ -557,8 +548,9 @@ FSR2Effect::FSR2Effect() { { Pass &pass = device.passes[FFX_FSR2_PASS_DEPTH_CLIP]; pass.shader = &shaders.depth_clip; - pass.shader->initialize(modes, general_defines); + pass.shader->initialize(modes_with_fp16, general_defines); pass.shader_version = pass.shader->version_create(); + pass.shader_variant = capabilities.fp16Supported ? 1 : 0; pass.sampled_bindings = { FfxResourceBinding{ 0, 0, L"r_reconstructed_previous_nearest_depth" }, @@ -587,8 +579,9 @@ FSR2Effect::FSR2Effect() { { Pass &pass = device.passes[FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH]; pass.shader = &shaders.reconstruct_previous_depth; - pass.shader->initialize(modes, general_defines); + pass.shader->initialize(modes_with_fp16, general_defines); pass.shader_version = pass.shader->version_create(); + pass.shader_variant = capabilities.fp16Supported ? 1 : 0; pass.sampled_bindings = { FfxResourceBinding{ 0, 0, L"r_input_motion_vectors" }, @@ -616,8 +609,9 @@ FSR2Effect::FSR2Effect() { { Pass &pass = device.passes[FFX_FSR2_PASS_LOCK]; pass.shader = &shaders.lock; - pass.shader->initialize(modes, general_defines); + pass.shader->initialize(modes_with_fp16, general_defines); pass.shader_version = pass.shader->version_create(); + pass.shader_variant = capabilities.fp16Supported ? 1 : 0; pass.sampled_bindings = { FfxResourceBinding{ 0, 0, L"r_lock_input_luma" } @@ -634,22 +628,19 @@ FSR2Effect::FSR2Effect() { } { - Vector accumulate_modes; - accumulate_modes.push_back("\n"); - accumulate_modes.push_back("\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n"); - - String general_defines_accumulate; - if (RD::get_singleton()->get_device_vendor_name() == "NVIDIA") { - // Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput. - general_defines_accumulate = general_defines_base; - } else { - general_defines_accumulate = general_defines; - } + Vector accumulate_modes_with_fp16; + accumulate_modes_with_fp16.push_back("\n"); + accumulate_modes_with_fp16.push_back("\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n"); + accumulate_modes_with_fp16.push_back("\n#define FFX_HALF 1\n"); + accumulate_modes_with_fp16.push_back("\n#define FFX_HALF 1\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n"); + // Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput. + const bool fp16_path_supported = RD::get_singleton()->get_device_vendor_name() != "NVIDIA"; Pass &pass = device.passes[FFX_FSR2_PASS_ACCUMULATE]; pass.shader = &shaders.accumulate; - pass.shader->initialize(accumulate_modes, general_defines_accumulate); + pass.shader->initialize(accumulate_modes_with_fp16, general_defines); pass.shader_version = pass.shader->version_create(); + pass.shader_variant = capabilities.fp16Supported && fp16_path_supported ? 2 : 0; pass.sampled_bindings = { FfxResourceBinding{ 0, 0, L"r_input_exposure" }, @@ -679,16 +670,16 @@ FSR2Effect::FSR2Effect() { FfxResourceBinding{ 18, 0, L"cbFSR2" } }; - // Sharpen pass is a clone of the accumulate pass. + // Sharpen pass is a clone of the accumulate pass with the sharpening variant. Pass &sharpen_pass = device.passes[FFX_FSR2_PASS_ACCUMULATE_SHARPEN]; sharpen_pass = pass; - sharpen_pass.shader_variant = 1; + sharpen_pass.shader_variant = pass.shader_variant + 1; } { Pass &pass = device.passes[FFX_FSR2_PASS_RCAS]; pass.shader = &shaders.rcas; - pass.shader->initialize(modes, general_defines_base); + pass.shader->initialize(modes_single, general_defines); pass.shader_version = pass.shader->version_create(); pass.sampled_bindings = { @@ -709,7 +700,7 @@ FSR2Effect::FSR2Effect() { { Pass &pass = device.passes[FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID]; pass.shader = &shaders.compute_luminance_pyramid; - pass.shader->initialize(modes, general_defines_base); + pass.shader->initialize(modes_single, general_defines); pass.shader_version = pass.shader->version_create(); pass.sampled_bindings = { @@ -732,8 +723,9 @@ FSR2Effect::FSR2Effect() { { Pass &pass = device.passes[FFX_FSR2_PASS_GENERATE_REACTIVE]; pass.shader = &shaders.autogen_reactive; - pass.shader->initialize(modes, general_defines); + pass.shader->initialize(modes_with_fp16, general_defines); pass.shader_version = pass.shader->version_create(); + pass.shader_variant = capabilities.fp16Supported ? 1 : 0; pass.sampled_bindings = { FfxResourceBinding{ 0, 0, L"r_input_opaque_only" }, @@ -753,8 +745,9 @@ FSR2Effect::FSR2Effect() { { Pass &pass = device.passes[FFX_FSR2_PASS_TCR_AUTOGENERATE]; pass.shader = &shaders.tcr_autogen; - pass.shader->initialize(modes, general_defines); + pass.shader->initialize(modes_with_fp16, general_defines); pass.shader_version = pass.shader->version_create(); + pass.shader_variant = capabilities.fp16Supported ? 1 : 0; pass.sampled_bindings = { FfxResourceBinding{ 0, 0, L"r_input_opaque_only" }, diff --git a/servers/rendering/renderer_rd/environment/fog.cpp b/servers/rendering/renderer_rd/environment/fog.cpp index b8de20f2a2d..0664209ce71 100644 --- a/servers/rendering/renderer_rd/environment/fog.cpp +++ b/servers/rendering/renderer_rd/environment/fog.cpp @@ -368,6 +368,11 @@ RS::ShaderNativeSourceCode Fog::FogShaderData::get_native_source_code() const { return fog_singleton->volumetric_fog.shader.version_get_native_source_code(version); } +Pair Fog::FogShaderData::get_native_shader_and_version() const { + Fog *fog_singleton = Fog::get_singleton(); + return { &fog_singleton->volumetric_fog.shader, version }; +} + Fog::FogShaderData::~FogShaderData() { Fog *fog_singleton = Fog::get_singleton(); ERR_FAIL_NULL(fog_singleton); diff --git a/servers/rendering/renderer_rd/environment/fog.h b/servers/rendering/renderer_rd/environment/fog.h index a7ddee4f7ee..6e91b4bcf4c 100644 --- a/servers/rendering/renderer_rd/environment/fog.h +++ b/servers/rendering/renderer_rd/environment/fog.h @@ -202,6 +202,7 @@ private: virtual bool is_animated() const; virtual bool casts_shadows() const; virtual RS::ShaderNativeSourceCode get_native_source_code() const; + virtual Pair get_native_shader_and_version() const; FogShaderData() {} virtual ~FogShaderData(); diff --git a/servers/rendering/renderer_rd/environment/gi.cpp b/servers/rendering/renderer_rd/environment/gi.cpp index b6a637758ae..33e79a387f4 100644 --- a/servers/rendering/renderer_rd/environment/gi.cpp +++ b/servers/rendering/renderer_rd/environment/gi.cpp @@ -3366,6 +3366,31 @@ GI::GI() { } GI::~GI() { + if (voxel_gi_debug_shader_version.is_valid()) { + voxel_gi_debug_shader.version_free(voxel_gi_debug_shader_version); + } + if (voxel_gi_lighting_shader_version.is_valid()) { + voxel_gi_shader.version_free(voxel_gi_lighting_shader_version); + } + if (shader_version.is_valid()) { + shader.version_free(shader_version); + } + if (sdfgi_shader.debug_probes_shader.is_valid()) { + sdfgi_shader.debug_probes.version_free(sdfgi_shader.debug_probes_shader); + } + if (sdfgi_shader.debug_shader.is_valid()) { + sdfgi_shader.debug.version_free(sdfgi_shader.debug_shader); + } + if (sdfgi_shader.direct_light_shader.is_valid()) { + sdfgi_shader.direct_light.version_free(sdfgi_shader.direct_light_shader); + } + if (sdfgi_shader.integrate_shader.is_valid()) { + sdfgi_shader.integrate.version_free(sdfgi_shader.integrate_shader); + } + if (sdfgi_shader.preprocess_shader.is_valid()) { + sdfgi_shader.preprocess.version_free(sdfgi_shader.preprocess_shader); + } + singleton = nullptr; } @@ -3511,20 +3536,26 @@ void GI::init(SkyRD *p_sky) { { //calculate tables String defines = "\n#define SDFGI_OCT_SIZE " + itos(SDFGI::LIGHTPROBE_OCT_SIZE) + "\n"; - if (RendererSceneRenderRD::get_singleton()->is_vrs_supported()) { - defines += "\n#define USE_VRS\n"; - } - if (!RD::get_singleton()->sampler_is_format_supported_for_filter(RD::DATA_FORMAT_R8G8_UINT, RD::SAMPLER_FILTER_LINEAR)) { - defines += "\n#define SAMPLE_VOXEL_GI_NEAREST\n"; + + Vector variants; + for (uint32_t vrs = 0; vrs < 2; vrs++) { + String vrs_base = vrs ? "\n#define USE_VRS\n" : ""; + Group group = vrs ? GROUP_VRS : GROUP_NORMAL; + bool default_enabled = vrs == 0; + variants.push_back(ShaderRD::VariantDefine(group, vrs_base + "\n#define USE_VOXEL_GI_INSTANCES\n", default_enabled)); // MODE_VOXEL_GI + variants.push_back(ShaderRD::VariantDefine(group, vrs_base + "\n#define USE_VOXEL_GI_INSTANCES\n#define SAMPLE_VOXEL_GI_NEAREST\n", default_enabled)); // MODE_VOXEL_GI_WITHOUT_SAMPLER + variants.push_back(ShaderRD::VariantDefine(group, vrs_base + "\n#define USE_SDFGI\n", default_enabled)); // MODE_SDFGI + variants.push_back(ShaderRD::VariantDefine(group, vrs_base + "\n#define USE_SDFGI\n\n#define USE_VOXEL_GI_INSTANCES\n", default_enabled)); // MODE_COMBINED + variants.push_back(ShaderRD::VariantDefine(group, vrs_base + "\n#define USE_SDFGI\n\n#define USE_VOXEL_GI_INSTANCES\n#define SAMPLE_VOXEL_GI_NEAREST\n", default_enabled)); // MODE_COMBINED_WITHOUT_SAMPLER } - Vector gi_modes; + shader.initialize(variants, defines); - gi_modes.push_back("\n#define USE_VOXEL_GI_INSTANCES\n"); // MODE_VOXEL_GI - gi_modes.push_back("\n#define USE_SDFGI\n"); // MODE_SDFGI - gi_modes.push_back("\n#define USE_SDFGI\n\n#define USE_VOXEL_GI_INSTANCES\n"); // MODE_COMBINED + bool vrs_supported = RendererSceneRenderRD::get_singleton()->is_vrs_supported(); + if (vrs_supported) { + shader.enable_group(GROUP_VRS); + } - shader.initialize(gi_modes, defines); shader_version = shader.version_create(); Vector specialization_constants; @@ -3551,8 +3582,10 @@ void GI::init(SkyRD *p_sky) { specialization_constants.ptrw()[0].bool_value = (v & SHADER_SPECIALIZATION_HALF_RES) ? true : false; specialization_constants.ptrw()[1].bool_value = (v & SHADER_SPECIALIZATION_USE_FULL_PROJECTION_MATRIX) ? true : false; specialization_constants.ptrw()[2].bool_value = (v & SHADER_SPECIALIZATION_USE_VRS) ? true : false; + + int variant_base = vrs_supported ? MODE_MAX : 0; for (int i = 0; i < MODE_MAX; i++) { - pipelines[v][i] = RD::get_singleton()->compute_pipeline_create(shader.version_get_shader(shader_version, i), specialization_constants); + pipelines[v][i] = RD::get_singleton()->compute_pipeline_create(shader.version_get_shader(shader_version, variant_base + i), specialization_constants); } } @@ -3612,31 +3645,6 @@ void GI::free() { RD::get_singleton()->free(sdfgi_ubo); } - if (voxel_gi_debug_shader_version.is_valid()) { - voxel_gi_debug_shader.version_free(voxel_gi_debug_shader_version); - } - if (voxel_gi_lighting_shader_version.is_valid()) { - voxel_gi_shader.version_free(voxel_gi_lighting_shader_version); - } - if (shader_version.is_valid()) { - shader.version_free(shader_version); - } - if (sdfgi_shader.debug_probes_shader.is_valid()) { - sdfgi_shader.debug_probes.version_free(sdfgi_shader.debug_probes_shader); - } - if (sdfgi_shader.debug_shader.is_valid()) { - sdfgi_shader.debug.version_free(sdfgi_shader.debug_shader); - } - if (sdfgi_shader.direct_light_shader.is_valid()) { - sdfgi_shader.direct_light.version_free(sdfgi_shader.direct_light_shader); - } - if (sdfgi_shader.integrate_shader.is_valid()) { - sdfgi_shader.integrate.version_free(sdfgi_shader.integrate_shader); - } - if (sdfgi_shader.preprocess_shader.is_valid()) { - sdfgi_shader.preprocess.version_free(sdfgi_shader.preprocess_shader); - } - if (voxel_gi_lights) { memdelete_arr(voxel_gi_lights); } @@ -3889,7 +3897,15 @@ void GI::process_gi(Ref p_render_buffers, const RID *p_nor pipeline_specialization |= SHADER_SPECIALIZATION_USE_VRS; } - Mode mode = (use_sdfgi && use_voxel_gi_instances) ? MODE_COMBINED : (use_sdfgi ? MODE_SDFGI : MODE_VOXEL_GI); + bool without_sampler = RD::get_singleton()->sampler_is_format_supported_for_filter(RD::DATA_FORMAT_R8G8_UINT, RD::SAMPLER_FILTER_LINEAR); + Mode mode; + if (use_sdfgi && use_voxel_gi_instances) { + mode = without_sampler ? MODE_COMBINED_WITHOUT_SAMPLER : MODE_COMBINED; + } else if (use_sdfgi) { + mode = MODE_SDFGI; + } else { + mode = without_sampler ? MODE_VOXEL_GI_WITHOUT_SAMPLER : MODE_VOXEL_GI; + } for (uint32_t v = 0; v < p_view_count; v++) { push_constant.view_index = v; @@ -4062,7 +4078,9 @@ void GI::process_gi(Ref p_render_buffers, const RID *p_nor uniforms.push_back(u); } - rbgi->uniform_set[v] = RD::get_singleton()->uniform_set_create(uniforms, shader.version_get_shader(shader_version, 0), 0); + bool vrs_supported = RendererSceneRenderRD::get_singleton()->is_vrs_supported(); + int variant_base = vrs_supported ? MODE_MAX : 0; + rbgi->uniform_set[v] = RD::get_singleton()->uniform_set_create(uniforms, shader.version_get_shader(shader_version, variant_base), 0); } RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, pipelines[pipeline_specialization][mode]); @@ -4121,3 +4139,7 @@ void GI::debug_voxel_gi(RID p_voxel_gi, RD::DrawListID p_draw_list, RID p_frameb voxel_gi->debug(p_draw_list, p_framebuffer, p_camera_with_transform, p_lighting, p_emission, p_alpha); } + +void GI::enable_vrs_shader_group() { + shader.enable_group(GROUP_VRS); +} diff --git a/servers/rendering/renderer_rd/environment/gi.h b/servers/rendering/renderer_rd/environment/gi.h index 0e21e85d75b..0d3ca2f1a7e 100644 --- a/servers/rendering/renderer_rd/environment/gi.h +++ b/servers/rendering/renderer_rd/environment/gi.h @@ -788,10 +788,17 @@ public: RID sdfgi_ubo; + enum Group { + GROUP_NORMAL, + GROUP_VRS, + }; + enum Mode { MODE_VOXEL_GI, + MODE_VOXEL_GI_WITHOUT_SAMPLER, MODE_SDFGI, MODE_COMBINED, + MODE_COMBINED_WITHOUT_SAMPLER, MODE_MAX }; @@ -825,6 +832,8 @@ public: bool voxel_gi_needs_update(RID p_probe) const; void voxel_gi_update(RID p_probe, bool p_update_light_instances, const Vector &p_light_instances, const PagedArray &p_dynamic_objects); void debug_voxel_gi(RID p_voxel_gi, RD::DrawListID p_draw_list, RID p_framebuffer, const Projection &p_camera_with_transform, bool p_lighting, bool p_emission, float p_alpha); + + void enable_vrs_shader_group(); }; } // namespace RendererRD diff --git a/servers/rendering/renderer_rd/environment/sky.cpp b/servers/rendering/renderer_rd/environment/sky.cpp index c6e2f9eae34..7b2c95f57ad 100644 --- a/servers/rendering/renderer_rd/environment/sky.cpp +++ b/servers/rendering/renderer_rd/environment/sky.cpp @@ -168,6 +168,11 @@ RS::ShaderNativeSourceCode SkyRD::SkyShaderData::get_native_source_code() const return scene_singleton->sky.sky_shader.shader.version_get_native_source_code(version); } +Pair SkyRD::SkyShaderData::get_native_shader_and_version() const { + RendererSceneRenderRD *scene_singleton = static_cast(RendererSceneRenderRD::singleton); + return { &scene_singleton->sky.sky_shader.shader, version }; +} + SkyRD::SkyShaderData::~SkyShaderData() { RendererSceneRenderRD *scene_singleton = static_cast(RendererSceneRenderRD::singleton); ERR_FAIL_NULL(scene_singleton); diff --git a/servers/rendering/renderer_rd/environment/sky.h b/servers/rendering/renderer_rd/environment/sky.h index 5f71aae3143..94566431389 100644 --- a/servers/rendering/renderer_rd/environment/sky.h +++ b/servers/rendering/renderer_rd/environment/sky.h @@ -128,6 +128,7 @@ private: virtual bool is_animated() const; virtual bool casts_shadows() const; virtual RS::ShaderNativeSourceCode get_native_source_code() const; + virtual Pair get_native_shader_and_version() const; SkyShaderData() {} virtual ~SkyShaderData(); diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp index 23da0160797..7d841f34ddf 100644 --- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp +++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp @@ -4821,6 +4821,24 @@ uint32_t RenderForwardClustered::get_pipeline_compilations(RS::PipelineSource p_ return scene_shader.get_pipeline_compilations(p_source); } +void RenderForwardClustered::enable_features(BitField p_feature_bits) { + if (p_feature_bits.has_flag(FEATURE_MULTIVIEW_BIT)) { + scene_shader.enable_multiview_shader_group(); + } + + if (p_feature_bits.has_flag(FEATURE_ADVANCED_BIT)) { + scene_shader.enable_advanced_shader_group(p_feature_bits.has_flag(FEATURE_MULTIVIEW_BIT)); + } + + if (p_feature_bits.has_flag(FEATURE_VRS_BIT)) { + gi.enable_vrs_shader_group(); + } +} + +String RenderForwardClustered::get_name() const { + return "forward_clustered"; +} + void RenderForwardClustered::GeometryInstanceForwardClustered::pair_voxel_gi_instances(const RID *p_voxel_gi_instances, uint32_t p_voxel_gi_instance_count) { if (p_voxel_gi_instance_count > 0) { voxel_gi_instances[0] = p_voxel_gi_instances[0]; @@ -4957,8 +4975,6 @@ RenderForwardClustered::RenderForwardClustered() { RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set, 0); RD::get_singleton()->compute_list_dispatch_threads(compute_list, tformat.width, tformat.height, 1); RD::get_singleton()->compute_list_end(); - - best_fit_normal.shader.version_free(best_fit_normal.shader_version); } /* DFG LUT */ @@ -4996,8 +5012,6 @@ RenderForwardClustered::RenderForwardClustered() { RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set, 0); RD::get_singleton()->compute_list_dispatch_threads(compute_list, tformat.width, tformat.height, 1); RD::get_singleton()->compute_list_end(); - - dfg_lut.shader.version_free(dfg_lut.shader_version); } _update_shader_quality_settings(); @@ -5048,8 +5062,14 @@ RenderForwardClustered::~RenderForwardClustered() { RD::get_singleton()->free(shadow_sampler); RSG::light_storage->directional_shadow_atlas_set_size(0); + + RD::get_singleton()->free(best_fit_normal.pipeline); RD::get_singleton()->free(best_fit_normal.texture); + best_fit_normal.shader.version_free(best_fit_normal.shader_version); + + RD::get_singleton()->free(dfg_lut.pipeline); RD::get_singleton()->free(dfg_lut.texture); + dfg_lut.shader.version_free(dfg_lut.shader_version); { for (const RID &rid : scene_state.uniform_buffers) { diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h index 9f5127d0492..0626cef6d31 100644 --- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h +++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h @@ -812,6 +812,11 @@ public: virtual void mesh_generate_pipelines(RID p_mesh, bool p_background_compilation) override; virtual uint32_t get_pipeline_compilations(RS::PipelineSource p_source) override; + /* SHADER LIBRARY */ + + virtual void enable_features(BitField p_feature_bits) override; + virtual String get_name() const override; + virtual bool free(RID p_rid) override; virtual void update() override; diff --git a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp index 5e5da94515f..dfb1634f1f3 100644 --- a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp +++ b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp @@ -160,7 +160,7 @@ void SceneShaderForwardClustered::ShaderData::set_code(const String &p_code) { } if (version.is_null()) { - version = SceneShaderForwardClustered::singleton->shader.version_create(); + version = SceneShaderForwardClustered::singleton->shader.version_create(false); } depth_draw = DepthDraw(depth_drawi); @@ -230,6 +230,14 @@ RS::ShaderNativeSourceCode SceneShaderForwardClustered::ShaderData::get_native_s } } +Pair SceneShaderForwardClustered::ShaderData::get_native_shader_and_version() const { + if (version.is_valid()) { + return { &SceneShaderForwardClustered::singleton->shader, version }; + } else { + return {}; + } +} + uint16_t SceneShaderForwardClustered::ShaderData::_get_shader_version(PipelineVersion p_pipeline_version, uint32_t p_color_pass_flags, bool p_ubershader) const { uint32_t ubershader_base = p_ubershader ? ShaderVersion::SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL : 0; switch (p_pipeline_version) { @@ -789,7 +797,7 @@ void SceneShaderForwardClustered::init(const String p_defines) { actions.global_buffer_array_variable = "global_shader_uniforms.data"; actions.instance_uniform_index_variable = "instances.data[instance_index_interp].instance_uniforms_ofs"; - actions.check_multiview_samplers = RendererCompositorRD::get_singleton()->is_xr_enabled(); // Make sure we check sampling multiview textures. + actions.check_multiview_samplers = true; compiler.initialize(actions); } @@ -901,6 +909,10 @@ void SceneShaderForwardClustered::set_default_specialization(const ShaderSpecial } } +void SceneShaderForwardClustered::enable_multiview_shader_group() { + shader.enable_group(SHADER_GROUP_MULTIVIEW); +} + void SceneShaderForwardClustered::enable_advanced_shader_group(bool p_needs_multiview) { if (p_needs_multiview || RendererCompositorRD::get_singleton()->is_xr_enabled()) { shader.enable_group(SHADER_GROUP_ADVANCED_MULTIVIEW); diff --git a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h index 79f10d4cabc..f9d09b1c40f 100644 --- a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h +++ b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h @@ -277,6 +277,7 @@ public: virtual bool is_animated() const; virtual bool casts_shadows() const; virtual RS::ShaderNativeSourceCode get_native_source_code() const; + virtual Pair get_native_shader_and_version() const; uint16_t _get_shader_version(PipelineVersion p_pipeline_version, uint32_t p_color_pass_flags, bool p_ubershader) const; RID _get_shader_variant(uint16_t p_shader_version) const; void _clear_vertex_input_mask_cache(); @@ -350,6 +351,7 @@ public: void init(const String p_defines); void set_default_specialization(const ShaderSpecialization &p_specialization); + void enable_multiview_shader_group(); void enable_advanced_shader_group(bool p_needs_multiview = false); bool is_multiview_shader_group_enabled() const; bool is_advanced_shader_group_enabled(bool p_multiview) const; diff --git a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp index 9bdd8223001..d3e1abc7c9c 100644 --- a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp +++ b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp @@ -351,6 +351,20 @@ uint32_t RenderForwardMobile::get_pipeline_compilations(RS::PipelineSource p_sou return scene_shader.get_pipeline_compilations(p_source); } +void RenderForwardMobile::enable_features(BitField p_feature_bits) { + if (p_feature_bits.has_flag(FEATURE_MULTIVIEW_BIT)) { + scene_shader.enable_multiview_shader_group(); + } + + if (p_feature_bits.has_flag(FEATURE_VRS_BIT)) { + gi.enable_vrs_shader_group(); + } +} + +String RenderForwardMobile::get_name() const { + return "forward_mobile"; +} + bool RenderForwardMobile::free(RID p_rid) { if (RendererSceneRenderRD::free(p_rid)) { return true; @@ -3013,7 +3027,7 @@ void RenderForwardMobile::_mesh_compile_pipelines_for_surface(const SurfacePipel pipeline_key.primitive_type = mesh_storage->mesh_surface_get_primitive(p_surface.mesh_surface); pipeline_key.wireframe = false; - const bool multiview_enabled = p_global.use_multiview && scene_shader.is_multiview_enabled(); + const bool multiview_enabled = p_global.use_multiview && scene_shader.is_multiview_shader_group_enabled(); const RD::DataFormat buffers_color_format = _render_buffers_get_color_format(); const bool buffers_can_be_storage = _render_buffers_can_be_storage(); const uint32_t vrs_iterations = p_global.use_vrs ? 2 : 1; diff --git a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.h b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.h index 643ca4559f3..a3bfb86ccdd 100644 --- a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.h +++ b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.h @@ -673,6 +673,11 @@ public: virtual void mesh_generate_pipelines(RID p_mesh, bool p_background_compilation) override; virtual uint32_t get_pipeline_compilations(RS::PipelineSource p_source) override; + /* SHADER LIBRARY */ + + virtual void enable_features(BitField p_feature_bits) override; + virtual String get_name() const override; + virtual bool free(RID p_rid) override; virtual void update() override; diff --git a/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp b/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp index 785f0a89fee..b32e00bfad4 100644 --- a/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp +++ b/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.cpp @@ -155,7 +155,7 @@ void SceneShaderForwardMobile::ShaderData::set_code(const String &p_code) { } if (version.is_null()) { - version = SceneShaderForwardMobile::singleton->shader.version_create(); + version = SceneShaderForwardMobile::singleton->shader.version_create(false); } depth_draw = DepthDraw(depth_drawi); @@ -236,6 +236,15 @@ RS::ShaderNativeSourceCode SceneShaderForwardMobile::ShaderData::get_native_sour } } +Pair SceneShaderForwardMobile::ShaderData::get_native_shader_and_version() const { + if (version.is_valid()) { + MutexLock lock(SceneShaderForwardMobile::singleton_mutex); + return { &SceneShaderForwardMobile::singleton->shader, version }; + } else { + return {}; + } +} + void SceneShaderForwardMobile::ShaderData::_create_pipeline(PipelineKey p_pipeline_key) { #if PRINT_PIPELINE_COMPILATION_KEYS print_line( @@ -480,19 +489,19 @@ void SceneShaderForwardMobile::init(const String p_defines) { /* SCENE SHADER */ { - Vector shader_versions; + Vector shader_versions; for (uint32_t ubershader = 0; ubershader < 2; ubershader++) { const String base_define = ubershader ? "\n#define UBERSHADER\n" : ""; - shader_versions.push_back(base_define + ""); // SHADER_VERSION_COLOR_PASS - shader_versions.push_back(base_define + "\n#define USE_LIGHTMAP\n"); // SHADER_VERSION_LIGHTMAP_COLOR_PASS - shader_versions.push_back(base_define + "\n#define MODE_RENDER_DEPTH\n#define SHADOW_PASS\n"); // SHADER_VERSION_SHADOW_PASS, should probably change this to MODE_RENDER_SHADOW because we don't have a depth pass here... - shader_versions.push_back(base_define + "\n#define MODE_RENDER_DEPTH\n#define MODE_DUAL_PARABOLOID\n#define SHADOW_PASS\n"); // SHADER_VERSION_SHADOW_PASS_DP - shader_versions.push_back(base_define + "\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_MATERIAL\n"); // SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL + shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_BASE, base_define + "", true)); // SHADER_VERSION_COLOR_PASS + shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_BASE, base_define + "\n#define USE_LIGHTMAP\n", true)); // SHADER_VERSION_LIGHTMAP_COLOR_PASS + shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_BASE, base_define + "\n#define MODE_RENDER_DEPTH\n#define SHADOW_PASS\n", true)); // SHADER_VERSION_SHADOW_PASS, should probably change this to MODE_RENDER_SHADOW because we don't have a depth pass here... + shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_BASE, base_define + "\n#define MODE_RENDER_DEPTH\n#define MODE_DUAL_PARABOLOID\n#define SHADOW_PASS\n", true)); // SHADER_VERSION_SHADOW_PASS_DP + shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_BASE, base_define + "\n#define MODE_RENDER_DEPTH\n#define MODE_RENDER_MATERIAL\n", true)); // SHADER_VERSION_DEPTH_PASS_WITH_MATERIAL // Multiview versions of our shaders. - shader_versions.push_back(base_define + "\n#define USE_MULTIVIEW\n"); // SHADER_VERSION_COLOR_PASS_MULTIVIEW - shader_versions.push_back(base_define + "\n#define USE_MULTIVIEW\n#define USE_LIGHTMAP\n"); // SHADER_VERSION_LIGHTMAP_COLOR_PASS_MULTIVIEW - shader_versions.push_back(base_define + "\n#define USE_MULTIVIEW\n#define MODE_RENDER_DEPTH\n#define SHADOW_PASS\n"); // SHADER_VERSION_SHADOW_PASS_MULTIVIEW + shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_MULTIVIEW, base_define + "\n#define USE_MULTIVIEW\n", false)); // SHADER_VERSION_COLOR_PASS_MULTIVIEW + shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_MULTIVIEW, base_define + "\n#define USE_MULTIVIEW\n#define USE_LIGHTMAP\n", false)); // SHADER_VERSION_LIGHTMAP_COLOR_PASS_MULTIVIEW + shader_versions.push_back(ShaderRD::VariantDefine(SHADER_GROUP_MULTIVIEW, base_define + "\n#define USE_MULTIVIEW\n#define MODE_RENDER_DEPTH\n#define SHADOW_PASS\n", false)); // SHADER_VERSION_SHADOW_PASS_MULTIVIEW } Vector immutable_samplers; @@ -502,13 +511,9 @@ void SceneShaderForwardMobile::init(const String p_defines) { immutable_shadow_sampler.uniform_type = RenderingDeviceCommons::UNIFORM_TYPE_SAMPLER; immutable_samplers.push_back(immutable_shadow_sampler); shader.initialize(shader_versions, p_defines, immutable_samplers); - if (!RendererCompositorRD::get_singleton()->is_xr_enabled()) { - for (uint32_t ubershader = 0; ubershader < 2; ubershader++) { - uint32_t base_variant = ubershader ? SHADER_VERSION_MAX : 0; - shader.set_variant_enabled(base_variant + SHADER_VERSION_COLOR_PASS_MULTIVIEW, false); - shader.set_variant_enabled(base_variant + SHADER_VERSION_LIGHTMAP_COLOR_PASS_MULTIVIEW, false); - shader.set_variant_enabled(base_variant + SHADER_VERSION_SHADOW_PASS_MULTIVIEW, false); - } + + if (RendererCompositorRD::get_singleton()->is_xr_enabled()) { + shader.enable_group(SHADER_GROUP_MULTIVIEW); } } @@ -714,7 +719,7 @@ void SceneShaderForwardMobile::init(const String p_defines) { actions.instance_uniform_index_variable = "instances.data[draw_call.instance_index].instance_uniforms_ofs"; actions.apply_luminance_multiplier = true; // apply luminance multiplier to screen texture - actions.check_multiview_samplers = RendererCompositorRD::get_singleton()->is_xr_enabled(); // Make sure we check sampling multiview textures. + actions.check_multiview_samplers = true; compiler.initialize(actions); } @@ -824,8 +829,12 @@ uint32_t SceneShaderForwardMobile::get_pipeline_compilations(RS::PipelineSource return pipeline_compilations[p_source]; } -bool SceneShaderForwardMobile::is_multiview_enabled() const { - return shader.is_variant_enabled(SHADER_VERSION_COLOR_PASS_MULTIVIEW); +void SceneShaderForwardMobile::enable_multiview_shader_group() { + shader.enable_group(SHADER_GROUP_MULTIVIEW); +} + +bool SceneShaderForwardMobile::is_multiview_shader_group_enabled() const { + return shader.is_group_enabled(SHADER_GROUP_MULTIVIEW); } SceneShaderForwardMobile::~SceneShaderForwardMobile() { diff --git a/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.h b/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.h index 1ce84ee5ba6..51ca5628eac 100644 --- a/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.h +++ b/servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.h @@ -72,6 +72,11 @@ public: } } + enum ShaderGroup { + SHADER_GROUP_BASE, // Always compiled at the beginning. + SHADER_GROUP_MULTIVIEW, + }; + struct ShaderSpecialization { union { uint32_t packed_0; @@ -265,6 +270,7 @@ public: virtual bool is_animated() const; virtual bool casts_shadows() const; virtual RS::ShaderNativeSourceCode get_native_source_code() const; + virtual Pair get_native_shader_and_version() const; RD::PolygonCullMode get_cull_mode_from_cull_variant(CullVariant p_cull_variant); void _clear_vertex_input_mask_cache(); RID get_shader_variant(ShaderVersion p_shader_version, bool p_ubershader) const; @@ -337,7 +343,8 @@ public: void init(const String p_defines); void set_default_specialization(const ShaderSpecialization &p_specialization); uint32_t get_pipeline_compilations(RS::PipelineSource p_source); - bool is_multiview_enabled() const; + void enable_multiview_shader_group(); + bool is_multiview_shader_group_enabled() const; }; } // namespace RendererSceneRenderImplementation diff --git a/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp b/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp index cf80e8227a7..e0c8cee7b76 100644 --- a/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_canvas_render_rd.cpp @@ -392,9 +392,9 @@ RID RendererCanvasRenderRD::_create_base_uniform_set(RID p_to_render_target, boo { RD::Uniform u; - u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER; + u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER; u.binding = 2; - u.append_id(state.lights_uniform_buffer); + u.append_id(state.lights_storage_buffer); uniforms.push_back(u); } @@ -525,7 +525,7 @@ void RendererCanvasRenderRD::canvas_render_items(RID p_to_render_target, Item *p uint32_t index = 0; while (l) { - if (index == state.max_lights_per_render) { + if (index == MAX_LIGHTS_PER_RENDER) { l->render_index_cache = -1; l = l->next_ptr; continue; @@ -588,7 +588,7 @@ void RendererCanvasRenderRD::canvas_render_items(RID p_to_render_target, Item *p uint32_t index = light_count; while (l) { - if (index == state.max_lights_per_render) { + if (index == MAX_LIGHTS_PER_RENDER) { l->render_index_cache = -1; l = l->next_ptr; continue; @@ -664,7 +664,7 @@ void RendererCanvasRenderRD::canvas_render_items(RID p_to_render_target, Item *p } if (light_count > 0) { - RD::get_singleton()->buffer_update(state.lights_uniform_buffer, 0, sizeof(LightUniform) * light_count, &state.light_uniforms[0]); + RD::get_singleton()->buffer_update(state.lights_storage_buffer, 0, sizeof(LightUniform) * light_count, &state.light_uniforms[0]); } bool use_linear_colors = texture_storage->render_target_is_using_hdr(p_to_render_target); @@ -964,7 +964,7 @@ void RendererCanvasRenderRD::_update_shadow_atlas() { RD::TextureFormat tf; tf.texture_type = RD::TEXTURE_TYPE_2D; tf.width = state.shadow_texture_size; - tf.height = state.max_lights_per_render * 2; + tf.height = MAX_LIGHTS_PER_RENDER * 2; tf.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT; tf.format = RD::DATA_FORMAT_R32_SFLOAT; @@ -975,7 +975,7 @@ void RendererCanvasRenderRD::_update_shadow_atlas() { RD::TextureFormat tf; tf.texture_type = RD::TEXTURE_TYPE_2D; tf.width = state.shadow_texture_size; - tf.height = state.max_lights_per_render * 2; + tf.height = MAX_LIGHTS_PER_RENDER * 2; tf.usage_bits = RD::TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT; tf.format = RD::DATA_FORMAT_D32_SFLOAT; tf.is_discardable = true; @@ -1022,7 +1022,7 @@ void RendererCanvasRenderRD::light_update_shadow(RID p_rid, int p_shadow_index, _update_shadow_atlas(); cl->shadow.z_far = p_far; - cl->shadow.y_offset = float(p_shadow_index * 2 + 1) / float(state.max_lights_per_render * 2); + cl->shadow.y_offset = float(p_shadow_index * 2 + 1) / float(MAX_LIGHTS_PER_RENDER * 2); Color cc = Color(p_far, p_far, p_far, 1.0); // First, do a culling pass and record what occluders need to be drawn for this light. @@ -1137,7 +1137,7 @@ void RendererCanvasRenderRD::light_update_directional_shadow(RID p_rid, int p_sh float half_size = p_clip_rect.size.length() * 0.5; //shadow length, must keep this no matter the angle cl->shadow.z_far = distance; - cl->shadow.y_offset = float(p_shadow_index * 2 + 1) / float(state.max_lights_per_render * 2); + cl->shadow.y_offset = float(p_shadow_index * 2 + 1) / float(MAX_LIGHTS_PER_RENDER * 2); Transform2D to_light_xform; @@ -1584,7 +1584,7 @@ void RendererCanvasRenderRD::CanvasShaderData::set_code(const String &p_code) { pipeline_hash_map.clear_pipelines(); if (version.is_null()) { - version = canvas_singleton->shader.canvas_shader.version_create(); + version = canvas_singleton->shader.canvas_shader.version_create(false); } #if 0 @@ -1625,6 +1625,11 @@ RS::ShaderNativeSourceCode RendererCanvasRenderRD::CanvasShaderData::get_native_ return canvas_singleton->shader.canvas_shader.version_get_native_source_code(version); } +Pair RendererCanvasRenderRD::CanvasShaderData::get_native_shader_and_version() const { + RendererCanvasRenderRD *canvas_singleton = static_cast(RendererCanvasRender::singleton); + return { &canvas_singleton->shader.canvas_shader, version }; +} + RID RendererCanvasRenderRD::CanvasShaderData::get_shader(ShaderVariant p_shader_variant, bool p_ubershader) const { if (version.is_valid()) { uint32_t variant_index = p_shader_variant + (p_ubershader ? SHADER_VARIANT_MAX : 0); @@ -1726,20 +1731,10 @@ RendererCanvasRenderRD::RendererCanvasRenderRD() { { //shader variants String global_defines; - - uint64_t uniform_max_size = RD::get_singleton()->limit_get(RD::LIMIT_MAX_UNIFORM_BUFFER_SIZE); - if (uniform_max_size < 65536) { - //Yes, you guessed right, ARM again - state.max_lights_per_render = 64; - global_defines += "#define MAX_LIGHTS 64\n"; - } else { - state.max_lights_per_render = DEFAULT_MAX_LIGHTS_PER_RENDER; - global_defines += "#define MAX_LIGHTS " + itos(DEFAULT_MAX_LIGHTS_PER_RENDER) + "\n"; - } - + global_defines += "#define MAX_LIGHTS " + itos(MAX_LIGHTS_PER_RENDER) + "\n"; global_defines += "\n#define SAMPLERS_BINDING_FIRST_INDEX " + itos(SAMPLERS_BINDING_FIRST_INDEX) + "\n"; - state.light_uniforms = memnew_arr(LightUniform, state.max_lights_per_render); + state.light_uniforms = memnew_arr(LightUniform, MAX_LIGHTS_PER_RENDER); Vector variants; const uint32_t ubershader_iterations = 1; for (uint32_t ubershader = 0; ubershader < ubershader_iterations; ubershader++) { @@ -1921,7 +1916,7 @@ RendererCanvasRenderRD::RendererCanvasRenderRD() { { //bindings state.canvas_state_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(State::Buffer)); - state.lights_uniform_buffer = RD::get_singleton()->uniform_buffer_create(sizeof(LightUniform) * state.max_lights_per_render); + state.lights_storage_buffer = RD::get_singleton()->storage_buffer_create(sizeof(LightUniform) * MAX_LIGHTS_PER_RENDER); RD::SamplerState shadow_sampler_state; shadow_sampler_state.mag_filter = RD::SAMPLER_FILTER_NEAREST; @@ -3316,7 +3311,7 @@ RendererCanvasRenderRD::~RendererCanvasRenderRD() { } memdelete_arr(state.light_uniforms); - RD::get_singleton()->free(state.lights_uniform_buffer); + RD::get_singleton()->free(state.lights_storage_buffer); } //shadow rendering diff --git a/servers/rendering/renderer_rd/renderer_canvas_render_rd.h b/servers/rendering/renderer_rd/renderer_canvas_render_rd.h index 68ab7bb40f6..a927834820a 100644 --- a/servers/rendering/renderer_rd/renderer_canvas_render_rd.h +++ b/servers/rendering/renderer_rd/renderer_canvas_render_rd.h @@ -106,7 +106,7 @@ class RendererCanvasRenderRD : public RendererCanvasRender { MAX_RENDER_ITEMS = 256 * 1024, MAX_LIGHT_TEXTURES = 1024, MAX_LIGHTS_PER_ITEM = 16, - DEFAULT_MAX_LIGHTS_PER_RENDER = 256 + MAX_LIGHTS_PER_RENDER = 256, }; /****************/ @@ -169,6 +169,7 @@ class RendererCanvasRenderRD : public RendererCanvasRender { virtual bool is_animated() const; virtual bool casts_shadows() const; virtual RS::ShaderNativeSourceCode get_native_source_code() const; + virtual Pair get_native_shader_and_version() const; RID get_shader(ShaderVariant p_shader_variant, bool p_ubershader) const; uint64_t get_vertex_input_mask(ShaderVariant p_shader_variant, bool p_ubershader); bool is_valid() const; @@ -570,7 +571,7 @@ class RendererCanvasRenderRD : public RendererCanvasRender { LightUniform *light_uniforms = nullptr; - RID lights_uniform_buffer; + RID lights_storage_buffer; RID canvas_state_buffer; RID shadow_sampler; RID shadow_texture; @@ -584,8 +585,6 @@ class RendererCanvasRenderRD : public RendererCanvasRender { RID default_transforms_uniform_set; - uint32_t max_lights_per_render; - double time; } state; diff --git a/servers/rendering/renderer_rd/renderer_compositor_rd.cpp b/servers/rendering/renderer_rd/renderer_compositor_rd.cpp index a508577335a..21ba32af63e 100644 --- a/servers/rendering/renderer_rd/renderer_compositor_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_compositor_rd.cpp @@ -275,41 +275,45 @@ RendererCompositorRD::RendererCompositorRD() { uniform_set_cache = memnew(UniformSetCacheRD); framebuffer_cache = memnew(FramebufferCacheRD); - { - String shader_cache_dir = Engine::get_singleton()->get_shader_cache_path(); - if (shader_cache_dir.is_empty()) { - shader_cache_dir = "user://"; + bool shader_cache_enabled = GLOBAL_GET("rendering/shader_compiler/shader_cache/enabled"); + bool compress = GLOBAL_GET("rendering/shader_compiler/shader_cache/compress"); + bool use_zstd = GLOBAL_GET("rendering/shader_compiler/shader_cache/use_zstd_compression"); + bool strip_debug = GLOBAL_GET("rendering/shader_compiler/shader_cache/strip_debug"); + ShaderRD::set_shader_cache_save_compressed(compress); + ShaderRD::set_shader_cache_save_compressed_zstd(use_zstd); + ShaderRD::set_shader_cache_save_debug(!strip_debug); + + // Shader cache is forcefully enabled when running the editor. + if (shader_cache_enabled || Engine::get_singleton()->is_editor_hint()) { + // Attempt to create a folder for the shader cache that the user can write to. Shaders will only be attempted to be saved if this path exists. + String shader_cache_user_dir = Engine::get_singleton()->get_shader_cache_path(); + if (shader_cache_user_dir.is_empty()) { + shader_cache_user_dir = "user://"; } - Ref da = DirAccess::open(shader_cache_dir); - if (da.is_null()) { - ERR_PRINT("Can't create shader cache folder, no shader caching will happen: " + shader_cache_dir); + + Ref user_da = DirAccess::open(shader_cache_user_dir); + if (user_da.is_null()) { + ERR_PRINT("Can't create shader cache folder, no shader caching will happen: " + shader_cache_user_dir); } else { - Error err = da->change_dir("shader_cache"); + Error err = user_da->change_dir("shader_cache"); if (err != OK) { - err = da->make_dir("shader_cache"); + err = user_da->make_dir("shader_cache"); } + if (err != OK) { - ERR_PRINT("Can't create shader cache folder, no shader caching will happen: " + shader_cache_dir); + ERR_PRINT("Can't create shader cache folder, no shader caching will happen: " + shader_cache_user_dir); } else { - shader_cache_dir = shader_cache_dir.path_join("shader_cache"); - - bool shader_cache_enabled = GLOBAL_GET("rendering/shader_compiler/shader_cache/enabled"); - if (!Engine::get_singleton()->is_editor_hint() && !shader_cache_enabled) { - shader_cache_dir = String(); //disable only if not editor - } - - if (!shader_cache_dir.is_empty()) { - bool compress = GLOBAL_GET("rendering/shader_compiler/shader_cache/compress"); - bool use_zstd = GLOBAL_GET("rendering/shader_compiler/shader_cache/use_zstd_compression"); - bool strip_debug = GLOBAL_GET("rendering/shader_compiler/shader_cache/strip_debug"); - - ShaderRD::set_shader_cache_dir(shader_cache_dir); - ShaderRD::set_shader_cache_save_compressed(compress); - ShaderRD::set_shader_cache_save_compressed_zstd(use_zstd); - ShaderRD::set_shader_cache_save_debug(!strip_debug); - } + shader_cache_user_dir = shader_cache_user_dir.path_join("shader_cache"); + ShaderRD::set_shader_cache_user_dir(shader_cache_user_dir); } } + + // Check if a directory exists for the shader cache to pull shaders from as read-only. This is used on exported projects with baked shaders. + String shader_cache_res_dir = "res://.godot/shader_cache"; + Ref res_da = DirAccess::open(shader_cache_res_dir); + if (res_da.is_valid()) { + ShaderRD::set_shader_cache_res_dir(shader_cache_res_dir); + } } ERR_FAIL_COND_MSG(singleton != nullptr, "A RendererCompositorRD singleton already exists."); @@ -347,5 +351,6 @@ RendererCompositorRD::~RendererCompositorRD() { singleton = nullptr; memdelete(uniform_set_cache); memdelete(framebuffer_cache); - ShaderRD::set_shader_cache_dir(String()); + ShaderRD::set_shader_cache_user_dir(String()); + ShaderRD::set_shader_cache_res_dir(String()); } diff --git a/servers/rendering/renderer_rd/renderer_compositor_rd.h b/servers/rendering/renderer_rd/renderer_compositor_rd.h index 782125db280..42e3bb03be3 100644 --- a/servers/rendering/renderer_rd/renderer_compositor_rd.h +++ b/servers/rendering/renderer_rd/renderer_compositor_rd.h @@ -134,6 +134,8 @@ public: _ALWAYS_INLINE_ double get_total_time() const { return time; } _ALWAYS_INLINE_ bool can_create_resources_async() const { return true; } + virtual bool is_xr_enabled() const { return RendererCompositor::is_xr_enabled(); } + static Error is_viable() { return OK; } diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.h b/servers/rendering/renderer_rd/renderer_scene_render_rd.h index 0043901946d..9d5a127cba4 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.h +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.h @@ -50,8 +50,9 @@ #include "servers/rendering/renderer_scene_render.h" #include "servers/rendering/rendering_device.h" #include "servers/rendering/rendering_method.h" +#include "servers/rendering/rendering_shader_library.h" -class RendererSceneRenderRD : public RendererSceneRender { +class RendererSceneRenderRD : public RendererSceneRender, public RenderingShaderLibrary { friend RendererRD::SkyRD; friend RendererRD::GI; diff --git a/servers/rendering/renderer_rd/shader_rd.cpp b/servers/rendering/renderer_rd/shader_rd.cpp index c493c7b1a00..49ebbcdaf79 100644 --- a/servers/rendering/renderer_rd/shader_rd.cpp +++ b/servers/rendering/renderer_rd/shader_rd.cpp @@ -152,10 +152,6 @@ void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, con tohash.append(GODOT_VERSION_NUMBER); tohash.append("[GodotVersionHash]"); tohash.append(GODOT_VERSION_HASH); - tohash.append("[SpirvCacheKey]"); - tohash.append(RenderingDevice::get_singleton()->shader_get_spirv_cache_key()); - tohash.append("[BinaryCacheKey]"); - tohash.append(RenderingDevice::get_singleton()->shader_get_binary_cache_key()); tohash.append("[Vertex]"); tohash.append(p_vertex_code ? p_vertex_code : ""); tohash.append("[Fragment]"); @@ -166,7 +162,7 @@ void ShaderRD::setup(const char *p_vertex_code, const char *p_fragment_code, con base_sha256 = tohash.as_string().sha256_text(); } -RID ShaderRD::version_create() { +RID ShaderRD::version_create(bool p_embedded) { //initialize() was never called ERR_FAIL_COND_V(group_to_variant_map.is_empty(), RID()); @@ -174,12 +170,22 @@ RID ShaderRD::version_create() { version.dirty = true; version.valid = false; version.initialize_needed = true; + version.embedded = p_embedded; version.variants.clear(); version.variant_data.clear(); + version.mutex = memnew(Mutex); RID rid = version_owner.make_rid(version); - MutexLock lock(versions_mutex); - version_mutexes.insert(rid, version.mutex); + { + MutexLock lock(versions_mutex); + version_mutexes.insert(rid, version.mutex); + } + + if (p_embedded) { + MutexLock lock(shader_versions_embedded_set_mutex); + shader_versions_embedded_set.insert({ this, rid }); + } + return rid; } @@ -263,86 +269,49 @@ void ShaderRD::_build_variant_code(StringBuilder &builder, uint32_t p_variant, c } } +Vector ShaderRD::_build_variant_stage_sources(uint32_t p_variant, CompileData p_data) { + if (!variants_enabled[p_variant]) { + return Vector(); // Variant is disabled, return. + } + + Vector stage_sources; + stage_sources.resize(RD::SHADER_STAGE_MAX); + + if (is_compute) { + // Compute stage. + StringBuilder builder; + _build_variant_code(builder, p_variant, p_data.version, stage_templates[STAGE_TYPE_COMPUTE]); + stage_sources.write[RD::SHADER_STAGE_COMPUTE] = builder.as_string(); + } else { + { + // Vertex stage. + StringBuilder builder; + _build_variant_code(builder, p_variant, p_data.version, stage_templates[STAGE_TYPE_VERTEX]); + stage_sources.write[RD::SHADER_STAGE_VERTEX] = builder.as_string(); + } + + { + // Fragment stage. + StringBuilder builder; + _build_variant_code(builder, p_variant, p_data.version, stage_templates[STAGE_TYPE_FRAGMENT]); + stage_sources.write[RD::SHADER_STAGE_FRAGMENT] = builder.as_string(); + } + } + + return stage_sources; +} + void ShaderRD::_compile_variant(uint32_t p_variant, CompileData p_data) { uint32_t variant = group_to_variant_map[p_data.group][p_variant]; - if (!variants_enabled[variant]) { return; // Variant is disabled, return. } - Vector stages; - - String error; - String current_source; - RD::ShaderStage current_stage = RD::SHADER_STAGE_VERTEX; - bool build_ok = true; - - if (!is_compute) { - //vertex stage - - StringBuilder builder; - _build_variant_code(builder, variant, p_data.version, stage_templates[STAGE_TYPE_VERTEX]); - - current_source = builder.as_string(); - RD::ShaderStageSPIRVData stage; - stage.spirv = RD::get_singleton()->shader_compile_spirv_from_source(RD::SHADER_STAGE_VERTEX, current_source, RD::SHADER_LANGUAGE_GLSL, &error); - if (stage.spirv.is_empty()) { - build_ok = false; - } else { - stage.shader_stage = RD::SHADER_STAGE_VERTEX; - stages.push_back(stage); - } - } - - if (!is_compute && build_ok) { - //fragment stage - current_stage = RD::SHADER_STAGE_FRAGMENT; - - StringBuilder builder; - _build_variant_code(builder, variant, p_data.version, stage_templates[STAGE_TYPE_FRAGMENT]); - - current_source = builder.as_string(); - RD::ShaderStageSPIRVData stage; - stage.spirv = RD::get_singleton()->shader_compile_spirv_from_source(RD::SHADER_STAGE_FRAGMENT, current_source, RD::SHADER_LANGUAGE_GLSL, &error); - if (stage.spirv.is_empty()) { - build_ok = false; - } else { - stage.shader_stage = RD::SHADER_STAGE_FRAGMENT; - stages.push_back(stage); - } - } - - if (is_compute) { - //compute stage - current_stage = RD::SHADER_STAGE_COMPUTE; - - StringBuilder builder; - _build_variant_code(builder, variant, p_data.version, stage_templates[STAGE_TYPE_COMPUTE]); - - current_source = builder.as_string(); - - RD::ShaderStageSPIRVData stage; - stage.spirv = RD::get_singleton()->shader_compile_spirv_from_source(RD::SHADER_STAGE_COMPUTE, current_source, RD::SHADER_LANGUAGE_GLSL, &error); - if (stage.spirv.is_empty()) { - build_ok = false; - } else { - stage.shader_stage = RD::SHADER_STAGE_COMPUTE; - stages.push_back(stage); - } - } - - if (!build_ok) { - ERR_PRINT("Error compiling " + String(current_stage == RD::SHADER_STAGE_COMPUTE ? "Compute " : (current_stage == RD::SHADER_STAGE_VERTEX ? "Vertex" : "Fragment")) + " shader, variant #" + itos(variant) + " (" + variant_defines[variant].text.get_data() + ")."); - ERR_PRINT(error); - -#ifdef DEBUG_ENABLED - ERR_PRINT("code:\n" + current_source.get_with_code_lines()); -#endif - return; - } - - Vector shader_data = RD::get_singleton()->shader_compile_binary_from_spirv(stages, name + ":" + itos(variant)); + Vector variant_stage_sources = _build_variant_stage_sources(variant, p_data); + Vector variant_stages = compile_stages(variant_stage_sources); + ERR_FAIL_COND(variant_stages.is_empty()); + Vector shader_data = RD::get_singleton()->shader_compile_binary_from_spirv(variant_stages, name + ":" + itos(variant)); ERR_FAIL_COND(shader_data.is_empty()); { @@ -351,6 +320,20 @@ void ShaderRD::_compile_variant(uint32_t p_variant, CompileData p_data) { } } +Vector ShaderRD::version_build_variant_stage_sources(RID p_version, int p_variant) { + Version *version = version_owner.get_or_null(p_version); + ERR_FAIL_NULL_V(version, Vector()); + + if (version->dirty) { + _initialize_version(version); + } + + CompileData compile_data; + compile_data.version = version; + compile_data.group = variant_to_group[p_variant]; + return _build_variant_stage_sources(p_variant, compile_data); +} + RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_version) { Version *version = version_owner.get_or_null(p_version); RS::ShaderNativeSourceCode source_code; @@ -404,6 +387,13 @@ RS::ShaderNativeSourceCode ShaderRD::version_get_native_source_code(RID p_versio return source_code; } +String ShaderRD::version_get_cache_file_relative_path(RID p_version, int p_group, const String &p_api_name) { + Version *version = version_owner.get_or_null(p_version); + ERR_FAIL_NULL_V(version, String()); + + return _get_cache_file_relative_path(version, p_group, p_api_name); +} + String ShaderRD::_version_get_sha1(Version *p_version) const { StringBuilder hash_build; @@ -437,17 +427,31 @@ String ShaderRD::_version_get_sha1(Version *p_version) const { static const char *shader_file_header = "GDSC"; static const uint32_t cache_file_version = 4; -String ShaderRD::_get_cache_file_path(Version *p_version, int p_group) { - const String &sha1 = _version_get_sha1(p_version); - const String &api_safe_name = String(RD::get_singleton()->get_device_api_name()).validate_filename().to_lower(); - const String &path = shader_cache_dir.path_join(name).path_join(group_sha256[p_group]).path_join(sha1) + "." + api_safe_name + ".cache"; - return path; +String ShaderRD::_get_cache_file_relative_path(Version *p_version, int p_group, const String &p_api_name) { + String sha1 = _version_get_sha1(p_version); + return name.path_join(group_sha256[p_group]).path_join(sha1) + "." + p_api_name + ".cache"; +} + +String ShaderRD::_get_cache_file_path(Version *p_version, int p_group, const String &p_api_name, bool p_user_dir) { + const String &shader_cache_dir = p_user_dir ? shader_cache_user_dir : shader_cache_res_dir; + String relative_path = _get_cache_file_relative_path(p_version, p_group, p_api_name); + return shader_cache_dir.path_join(relative_path); } bool ShaderRD::_load_from_cache(Version *p_version, int p_group) { - const String &path = _get_cache_file_path(p_version, p_group); - Ref f = FileAccess::open(path, FileAccess::READ); + String api_safe_name = String(RD::get_singleton()->get_device_api_name()).validate_filename().to_lower(); + Ref f; + if (shader_cache_user_dir_valid) { + f = FileAccess::open(_get_cache_file_path(p_version, p_group, api_safe_name, true), FileAccess::READ); + } + if (f.is_null()) { + f = FileAccess::open(_get_cache_file_path(p_version, p_group, api_safe_name, false), FileAccess::READ); + } + + if (f.is_null()) { + const String &sha1 = _version_get_sha1(p_version); + print_verbose(vformat("Shader cache miss for %s", name.path_join(group_sha256[p_group]).path_join(sha1))); return false; } @@ -506,19 +510,14 @@ bool ShaderRD::_load_from_cache(Version *p_version, int p_group) { } void ShaderRD::_save_to_cache(Version *p_version, int p_group) { - ERR_FAIL_COND(!shader_cache_dir_valid); - const String &path = _get_cache_file_path(p_version, p_group); + ERR_FAIL_COND(!shader_cache_user_dir_valid); + String api_safe_name = String(RD::get_singleton()->get_device_api_name()).validate_filename().to_lower(); + const String &path = _get_cache_file_path(p_version, p_group, api_safe_name, true); Ref f = FileAccess::open(path, FileAccess::WRITE); ERR_FAIL_COND(f.is_null()); - f->store_buffer((const uint8_t *)shader_file_header, 4); - f->store_32(cache_file_version); // File version. - uint32_t variant_count = group_to_variant_map[p_group].size(); - f->store_32(variant_count); // Variant count. - for (uint32_t i = 0; i < variant_count; i++) { - int variant_id = group_to_variant_map[p_group][i]; - f->store_32(p_version->variant_data[variant_id].size()); // Stage count. - f->store_buffer(p_version->variant_data[variant_id].ptr(), p_version->variant_data[variant_id].size()); - } + + PackedByteArray shader_cache_bytes = ShaderRD::save_shader_cache_bytes(group_to_variant_map[p_group], p_version->variant_data); + f->store_buffer(shader_cache_bytes); } void ShaderRD::_allocate_placeholders(Version *p_version, int p_group) { @@ -543,10 +542,8 @@ void ShaderRD::_compile_version_start(Version *p_version, int p_group) { p_version->dirty = false; #if ENABLE_SHADER_CACHE - if (shader_cache_dir_valid) { - if (_load_from_cache(p_version, p_group)) { - return; - } + if (_load_from_cache(p_version, p_group)) { + return; } #endif @@ -595,7 +592,7 @@ void ShaderRD::_compile_version_end(Version *p_version, int p_group) { return; } #if ENABLE_SHADER_CACHE - else if (shader_cache_dir_valid) { + else if (shader_cache_user_dir_valid) { _save_to_cache(p_version, p_group); } #endif @@ -714,6 +711,11 @@ bool ShaderRD::version_free(RID p_version) { } Version *version = version_owner.get_or_null(p_version); + if (version->embedded) { + MutexLock lock(shader_versions_embedded_set_mutex); + shader_versions_embedded_set.erase({ this, p_version }); + } + version->mutex->lock(); _clear_version(version); version_owner.free(p_version); @@ -737,6 +739,14 @@ bool ShaderRD::is_variant_enabled(int p_variant) const { return variants_enabled[p_variant]; } +int64_t ShaderRD::get_variant_count() const { + return variants_enabled.size(); +} + +int ShaderRD::get_variant_to_group(int p_variant) const { + return variant_to_group[p_variant]; +} + void ShaderRD::enable_group(int p_group) { ERR_FAIL_INDEX(p_group, group_enabled.size()); @@ -760,6 +770,18 @@ bool ShaderRD::is_group_enabled(int p_group) const { return group_enabled[p_group]; } +int64_t ShaderRD::get_group_count() const { + return group_enabled.size(); +} + +const LocalVector &ShaderRD::get_group_to_variants(int p_group) const { + return group_to_variant_map[p_group]; +} + +const String &ShaderRD::get_name() const { + return name; +} + bool ShaderRD::shader_cache_cleanup_on_start = false; ShaderRD::ShaderRD() { @@ -778,12 +800,12 @@ ShaderRD::ShaderRD() { base_compute_defines = base_compute_define_text.ascii(); } -void ShaderRD::initialize(const Vector &p_variant_defines, const String &p_general_defines, const Vector &r_immutable_samplers) { - immutable_samplers = r_immutable_samplers; +void ShaderRD::initialize(const Vector &p_variant_defines, const String &p_general_defines, const Vector &p_immutable_samplers) { ERR_FAIL_COND(variant_defines.size()); ERR_FAIL_COND(p_variant_defines.is_empty()); general_defines = p_general_defines.utf8(); + immutable_samplers = p_immutable_samplers; // When initialized this way, there is just one group and its always enabled. group_to_variant_map.insert(0, LocalVector{}); @@ -796,13 +818,18 @@ void ShaderRD::initialize(const Vector &p_variant_defines, const String group_to_variant_map[0].push_back(i); } - if (!shader_cache_dir.is_empty()) { + if (!shader_cache_user_dir.is_empty() || !shader_cache_res_dir.is_empty()) { group_sha256.resize(1); _initialize_cache(); } } void ShaderRD::_initialize_cache() { + shader_cache_user_dir_valid = !shader_cache_user_dir.is_empty(); + if (!shader_cache_user_dir_valid) { + return; + } + for (const KeyValue> &E : group_to_variant_map) { StringBuilder hash_build; @@ -819,34 +846,44 @@ void ShaderRD::_initialize_cache() { group_sha256[E.key] = hash_build.as_string().sha256_text(); - Ref d = DirAccess::open(shader_cache_dir); - ERR_FAIL_COND(d.is_null()); - if (d->change_dir(name) != OK) { - Error err = d->make_dir(name); - ERR_FAIL_COND(err != OK); - d->change_dir(name); - } + if (!shader_cache_user_dir.is_empty()) { + // Validate if it's possible to write to all the directories required by in the user directory. + Ref d = DirAccess::open(shader_cache_user_dir); + if (d.is_null()) { + shader_cache_user_dir_valid = false; + ERR_FAIL_MSG(vformat("Unable to open shader cache directory at %s.", shader_cache_user_dir)); + } - // Erase other versions? - if (shader_cache_cleanup_on_start) { + if (d->change_dir(name) != OK) { + Error err = d->make_dir(name); + if (err != OK) { + shader_cache_user_dir_valid = false; + ERR_FAIL_MSG(vformat("Unable to create shader cache directory %s at %s.", name, shader_cache_user_dir)); + } + + d->change_dir(name); + } + + if (d->change_dir(group_sha256[E.key]) != OK) { + Error err = d->make_dir(group_sha256[E.key]); + if (err != OK) { + shader_cache_user_dir_valid = false; + ERR_FAIL_MSG(vformat("Unable to create shader cache directory %s/%s at %s.", name, group_sha256[E.key], shader_cache_user_dir)); + } + } } - // - if (d->change_dir(group_sha256[E.key]) != OK) { - Error err = d->make_dir(group_sha256[E.key]); - ERR_FAIL_COND(err != OK); - } - shader_cache_dir_valid = true; print_verbose("Shader '" + name + "' (group " + itos(E.key) + ") SHA256: " + group_sha256[E.key]); } } // Same as above, but allows specifying shader compilation groups. -void ShaderRD::initialize(const Vector &p_variant_defines, const String &p_general_defines) { +void ShaderRD::initialize(const Vector &p_variant_defines, const String &p_general_defines, const Vector &p_immutable_samplers) { ERR_FAIL_COND(variant_defines.size()); ERR_FAIL_COND(p_variant_defines.is_empty()); general_defines = p_general_defines.utf8(); + immutable_samplers = p_immutable_samplers; int max_group_id = 0; @@ -877,14 +914,38 @@ void ShaderRD::initialize(const Vector &p_variant_defines, const } } - if (!shader_cache_dir.is_empty()) { + if (!shader_cache_user_dir.is_empty()) { group_sha256.resize(max_group_id + 1); _initialize_cache(); } } -void ShaderRD::set_shader_cache_dir(const String &p_dir) { - shader_cache_dir = p_dir; +void ShaderRD::shaders_embedded_set_lock() { + shader_versions_embedded_set_mutex.lock(); +} + +const ShaderRD::ShaderVersionPairSet &ShaderRD::shaders_embedded_set_get() { + return shader_versions_embedded_set; +} + +void ShaderRD::shaders_embedded_set_unlock() { + shader_versions_embedded_set_mutex.unlock(); +} + +void ShaderRD::set_shader_cache_user_dir(const String &p_dir) { + shader_cache_user_dir = p_dir; +} + +const String &ShaderRD::get_shader_cache_user_dir() { + return shader_cache_user_dir; +} + +void ShaderRD::set_shader_cache_res_dir(const String &p_dir) { + shader_cache_res_dir = p_dir; +} + +const String &ShaderRD::get_shader_cache_res_dir() { + return shader_cache_res_dir; } void ShaderRD::set_shader_cache_save_compressed(bool p_enable) { @@ -899,7 +960,78 @@ void ShaderRD::set_shader_cache_save_debug(bool p_enable) { shader_cache_save_debug = p_enable; } -String ShaderRD::shader_cache_dir; +Vector ShaderRD::compile_stages(const Vector &p_stage_sources) { + RD::ShaderStageSPIRVData stage; + Vector stages; + String error; + RD::ShaderStage compilation_failed_stage = RD::SHADER_STAGE_MAX; + bool compilation_failed = false; + for (int64_t i = 0; i < p_stage_sources.size() && !compilation_failed; i++) { + if (p_stage_sources[i].is_empty()) { + continue; + } + + stage.spirv = RD::get_singleton()->shader_compile_spirv_from_source(RD::ShaderStage(i), p_stage_sources[i], RD::SHADER_LANGUAGE_GLSL, &error); + stage.shader_stage = RD::ShaderStage(i); + if (!stage.spirv.is_empty()) { + stages.push_back(stage); + + } else { + compilation_failed_stage = RD::ShaderStage(i); + compilation_failed = true; + } + } + + if (compilation_failed) { + ERR_PRINT("Error compiling " + String(compilation_failed_stage == RD::SHADER_STAGE_COMPUTE ? "Compute " : (compilation_failed_stage == RD::SHADER_STAGE_VERTEX ? "Vertex" : "Fragment")) + " shader."); + ERR_PRINT(error); + +#ifdef DEBUG_ENABLED + ERR_PRINT("code:\n" + p_stage_sources[compilation_failed_stage].get_with_code_lines()); +#endif + + return Vector(); + } else { + return stages; + } +} + +PackedByteArray ShaderRD::save_shader_cache_bytes(const LocalVector &p_variants, const Vector> &p_variant_data) { + uint32_t variant_count = p_variants.size(); + PackedByteArray bytes; + int64_t total_size = 0; + total_size += 4 + sizeof(uint32_t) * 2; + for (uint32_t i = 0; i < variant_count; i++) { + total_size += sizeof(uint32_t) + p_variant_data[p_variants[i]].size(); + } + + bytes.resize(total_size); + + uint8_t *bytes_ptr = bytes.ptrw(); + memcpy(bytes_ptr, shader_file_header, 4); + bytes_ptr += 4; + + *(uint32_t *)(bytes_ptr) = cache_file_version; + bytes_ptr += sizeof(uint32_t); + + *(uint32_t *)(bytes_ptr) = variant_count; + bytes_ptr += sizeof(uint32_t); + + for (uint32_t i = 0; i < variant_count; i++) { + int variant_id = p_variants[i]; + *(uint32_t *)(bytes_ptr) = uint32_t(p_variant_data[variant_id].size()); + bytes_ptr += sizeof(uint32_t); + + memcpy(bytes_ptr, p_variant_data[variant_id].ptr(), p_variant_data[variant_id].size()); + bytes_ptr += p_variant_data[variant_id].size(); + } + + DEV_ASSERT((bytes.ptrw() + bytes.size()) == bytes_ptr); + return bytes; +} + +String ShaderRD::shader_cache_user_dir; +String ShaderRD::shader_cache_res_dir; bool ShaderRD::shader_cache_save_compressed = true; bool ShaderRD::shader_cache_save_compressed_zstd = true; bool ShaderRD::shader_cache_save_debug = true; diff --git a/servers/rendering/renderer_rd/shader_rd.h b/servers/rendering/renderer_rd/shader_rd.h index 578e953e854..2a38e27886a 100644 --- a/servers/rendering/renderer_rd/shader_rd.h +++ b/servers/rendering/renderer_rd/shader_rd.h @@ -35,6 +35,7 @@ #include "core/templates/hash_map.h" #include "core/templates/local_vector.h" #include "core/templates/rid_owner.h" +#include "core/templates/self_list.h" #include "servers/rendering_server.h" class ShaderRD { @@ -51,6 +52,9 @@ public: } }; + typedef Pair ShaderVersionPair; + typedef HashSet ShaderVersionPairSet; + private: //versions CharString general_defines; @@ -78,6 +82,7 @@ private: bool valid; bool dirty; bool initialize_needed; + bool embedded; }; struct CompileData { @@ -85,6 +90,7 @@ private: int group = 0; }; + // Vector will have the size of SHADER_STAGE_MAX and unused stages will have empty strings. void _compile_variant(uint32_t p_variant, CompileData p_data); void _initialize_version(Version *p_version); @@ -126,12 +132,16 @@ private: String base_sha256; LocalVector group_sha256; - static String shader_cache_dir; + static inline ShaderVersionPairSet shader_versions_embedded_set; + static inline Mutex shader_versions_embedded_set_mutex; + + static String shader_cache_user_dir; + static String shader_cache_res_dir; static bool shader_cache_cleanup_on_start; static bool shader_cache_save_compressed; static bool shader_cache_save_compressed_zstd; static bool shader_cache_save_debug; - bool shader_cache_dir_valid = false; + bool shader_cache_user_dir_valid = false; enum StageType { STAGE_TYPE_VERTEX, @@ -143,11 +153,13 @@ private: StageTemplate stage_templates[STAGE_TYPE_MAX]; void _build_variant_code(StringBuilder &p_builder, uint32_t p_variant, const Version *p_version, const StageTemplate &p_template); + Vector _build_variant_stage_sources(uint32_t p_variant, CompileData p_data); void _add_stage(const char *p_code, StageType p_stage_type); String _version_get_sha1(Version *p_version) const; - String _get_cache_file_path(Version *p_version, int p_group); + String _get_cache_file_relative_path(Version *p_version, int p_group, const String &p_api_name); + String _get_cache_file_path(Version *p_version, int p_group, const String &p_api_name, bool p_user_dir); bool _load_from_cache(Version *p_version, int p_group); void _save_to_cache(Version *p_version, int p_group); void _initialize_cache(); @@ -157,7 +169,7 @@ protected: void setup(const char *p_vertex_code, const char *p_fragment_code, const char *p_compute_code, const char *p_name); public: - RID version_create(); + RID version_create(bool p_embedded = true); void version_set_code(RID p_version, const HashMap &p_code, const String &p_uniforms, const String &p_vertex_globals, const String &p_fragment_globals, const Vector &p_custom_defines); void version_set_compute_code(RID p_version, const HashMap &p_code, const String &p_uniforms, const String &p_compute_globals, const Vector &p_custom_defines); @@ -201,20 +213,38 @@ public: // Enable/disable variants for things that you know won't be used at engine initialization time . void set_variant_enabled(int p_variant, bool p_enabled); bool is_variant_enabled(int p_variant) const; + int64_t get_variant_count() const; + int get_variant_to_group(int p_variant) const; // Enable/disable groups for things that might be enabled at run time. void enable_group(int p_group); bool is_group_enabled(int p_group) const; + int64_t get_group_count() const; + const LocalVector &get_group_to_variants(int p_group) const; - static void set_shader_cache_dir(const String &p_dir); + const String &get_name() const; + + static void shaders_embedded_set_lock(); + static const ShaderVersionPairSet &shaders_embedded_set_get(); + static void shaders_embedded_set_unlock(); + + static void set_shader_cache_user_dir(const String &p_dir); + static const String &get_shader_cache_user_dir(); + static void set_shader_cache_res_dir(const String &p_dir); + static const String &get_shader_cache_res_dir(); static void set_shader_cache_save_compressed(bool p_enable); static void set_shader_cache_save_compressed_zstd(bool p_enable); static void set_shader_cache_save_debug(bool p_enable); - RS::ShaderNativeSourceCode version_get_native_source_code(RID p_version); + static Vector compile_stages(const Vector &p_stage_sources); + static PackedByteArray save_shader_cache_bytes(const LocalVector &p_variants, const Vector> &p_variant_data); - void initialize(const Vector &p_variant_defines, const String &p_general_defines = "", const Vector &r_immutable_samplers = Vector()); - void initialize(const Vector &p_variant_defines, const String &p_general_defines = ""); + Vector version_build_variant_stage_sources(RID p_version, int p_variant); + RS::ShaderNativeSourceCode version_get_native_source_code(RID p_version); + String version_get_cache_file_relative_path(RID p_version, int p_group, const String &p_api_name); + + void initialize(const Vector &p_variant_defines, const String &p_general_defines = "", const Vector &p_immutable_samplers = Vector()); + void initialize(const Vector &p_variant_defines, const String &p_general_defines = "", const Vector &p_immutable_samplers = Vector()); virtual ~ShaderRD(); }; diff --git a/servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl b/servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl index 32180e8c2a5..a2aea974c18 100644 --- a/servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/canvas_uniforms_inc.glsl @@ -143,8 +143,8 @@ struct Light { vec4 atlas_rect; }; -layout(set = 0, binding = 2, std140) uniform LightData { - Light data[MAX_LIGHTS]; +layout(set = 0, binding = 2, std430) restrict readonly buffer LightData { + Light data[]; } light_array; diff --git a/servers/rendering/renderer_rd/shaders/cluster_render.glsl b/servers/rendering/renderer_rd/shaders/cluster_render.glsl index bfc98445c55..4a4299080fb 100644 --- a/servers/rendering/renderer_rd/shaders/cluster_render.glsl +++ b/servers/rendering/renderer_rd/shaders/cluster_render.glsl @@ -64,17 +64,11 @@ void main() { #version 450 #VERSION_DEFINES -#ifndef MOLTENVK_USED // Metal will corrupt GPU state otherwise -#if defined(has_GL_KHR_shader_subgroup_ballot) && defined(has_GL_KHR_shader_subgroup_arithmetic) && defined(has_GL_KHR_shader_subgroup_vote) #extension GL_KHR_shader_subgroup_ballot : enable #extension GL_KHR_shader_subgroup_arithmetic : enable #extension GL_KHR_shader_subgroup_vote : enable -#define USE_SUBGROUPS -#endif -#endif - layout(location = 0) in float depth_interp; layout(location = 1) in flat uint element_index; @@ -119,10 +113,7 @@ void main() { uint aux = 0; -#ifdef USE_SUBGROUPS - uint cluster_thread_group_index; - if (!gl_HelperInvocation) { //https://advances.realtimerendering.com/s2017/2017_Sig_Improved_Culling_final.pdf @@ -147,15 +138,7 @@ void main() { aux = atomicOr(cluster_render.data[usage_write_offset], usage_write_bit); } } -#else -// MoltenVK/Metal fails to compile shaders using gl_HelperInvocation for some GPUs -#ifndef MOLTENVK_USED - if (!gl_HelperInvocation) -#endif - { - aux = atomicOr(cluster_render.data[usage_write_offset], usage_write_bit); - } -#endif + //find the current element in the depth usage list and mark the current depth as used float unit_depth = depth_interp * state.inv_z_far; @@ -164,22 +147,12 @@ void main() { uint z_write_offset = cluster_offset + state.cluster_depth_offset + element_index; uint z_write_bit = 1 << z_bit; -#ifdef USE_SUBGROUPS if (!gl_HelperInvocation) { z_write_bit = subgroupOr(z_write_bit); //merge all Zs if (cluster_thread_group_index == 0) { aux = atomicOr(cluster_render.data[z_write_offset], z_write_bit); } } -#else -// MoltenVK/Metal fails to compile shaders using gl_HelperInvocation for some GPUs -#ifndef MOLTENVK_USED - if (!gl_HelperInvocation) -#endif - { - aux = atomicOr(cluster_render.data[z_write_offset], z_write_bit); - } -#endif #ifdef USE_ATTACHMENT frag_color = vec4(float(aux)); diff --git a/servers/rendering/renderer_rd/shaders/effects/copy_to_fb.glsl b/servers/rendering/renderer_rd/shaders/effects/copy_to_fb.glsl index 26ee06aa03e..a348bc9904c 100644 --- a/servers/rendering/renderer_rd/shaders/effects/copy_to_fb.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/copy_to_fb.glsl @@ -5,13 +5,9 @@ #VERSION_DEFINES #ifdef USE_MULTIVIEW -#ifdef has_VK_KHR_multiview #extension GL_EXT_multiview : enable #define ViewIndex gl_ViewIndex -#else // has_VK_KHR_multiview -#define ViewIndex 0 -#endif // has_VK_KHR_multiview -#endif //USE_MULTIVIEW +#endif // USE_MULTIVIEW #define FLAG_FLIP_Y (1 << 0) #define FLAG_USE_SECTION (1 << 1) @@ -67,15 +63,6 @@ void main() { #VERSION_DEFINES -#ifdef USE_MULTIVIEW -#ifdef has_VK_KHR_multiview -#extension GL_EXT_multiview : enable -#define ViewIndex gl_ViewIndex -#else // has_VK_KHR_multiview -#define ViewIndex 0 -#endif // has_VK_KHR_multiview -#endif //USE_MULTIVIEW - #define FLAG_FLIP_Y (1 << 0) #define FLAG_USE_SECTION (1 << 1) #define FLAG_FORCE_LUMINANCE (1 << 2) diff --git a/servers/rendering/renderer_rd/shaders/effects/specular_merge.glsl b/servers/rendering/renderer_rd/shaders/effects/specular_merge.glsl index e87f644bb04..9418eda5870 100644 --- a/servers/rendering/renderer_rd/shaders/effects/specular_merge.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/specular_merge.glsl @@ -4,21 +4,10 @@ #VERSION_DEFINES -#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview) +#if defined(USE_MULTIVIEW) #extension GL_EXT_multiview : enable -#endif - -#ifdef USE_MULTIVIEW -#ifdef has_VK_KHR_multiview #define ViewIndex gl_ViewIndex -#else // has_VK_KHR_multiview -// !BAS! This needs to become an input once we implement our fallback! -#define ViewIndex 0 -#endif // has_VK_KHR_multiview -#else // USE_MULTIVIEW -// Set to zero, not supported in non stereo -#define ViewIndex 0 -#endif //USE_MULTIVIEW +#endif // USE_MULTIVIEW #ifdef USE_MULTIVIEW layout(location = 0) out vec3 uv_interp; @@ -41,22 +30,6 @@ void main() { #VERSION_DEFINES -#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview) -#extension GL_EXT_multiview : enable -#endif - -#ifdef USE_MULTIVIEW -#ifdef has_VK_KHR_multiview -#define ViewIndex gl_ViewIndex -#else // has_VK_KHR_multiview -// !BAS! This needs to become an input once we implement our fallback! -#define ViewIndex 0 -#endif // has_VK_KHR_multiview -#else // USE_MULTIVIEW -// Set to zero, not supported in non stereo -#define ViewIndex 0 -#endif //USE_MULTIVIEW - #ifdef USE_MULTIVIEW layout(location = 0) in vec3 uv_interp; #else // USE_MULTIVIEW diff --git a/servers/rendering/renderer_rd/shaders/effects/taa_resolve.glsl b/servers/rendering/renderer_rd/shaders/effects/taa_resolve.glsl index d5396fc8c5d..71e8d945ccd 100644 --- a/servers/rendering/renderer_rd/shaders/effects/taa_resolve.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/taa_resolve.glsl @@ -32,19 +32,13 @@ // Based on Spartan Engine's TAA implementation (without TAA upscale). // -#ifndef MOLTENVK_USED -#define USE_SUBGROUPS -#endif // MOLTENVK_USED - #define GROUP_SIZE 8 #define FLT_MIN 0.00000001 #define FLT_MAX 32767.0 #define RPC_9 0.11111111111 #define RPC_16 0.0625 -#ifdef USE_SUBGROUPS layout(local_size_x = GROUP_SIZE, local_size_y = GROUP_SIZE, local_size_z = 1) in; -#endif layout(rgba16f, set = 0, binding = 0) uniform restrict readonly image2D color_buffer; layout(set = 0, binding = 1) uniform sampler2D depth_buffer; @@ -92,7 +86,6 @@ float get_depth(ivec2 thread_id) { return texelFetch(depth_buffer, thread_id, 0).r; } -#ifdef USE_SUBGROUPS shared vec3 tile_color[kTileDimension][kTileDimension]; shared float tile_depth[kTileDimension][kTileDimension]; @@ -141,15 +134,6 @@ void populate_group_shared_memory(uvec2 group_id, uint group_index) { groupMemoryBarrier(); barrier(); } -#else -vec3 load_color(uvec2 screen_pos) { - return imageLoad(color_buffer, ivec2(screen_pos)).rgb; -} - -float load_depth(uvec2 screen_pos) { - return get_depth(ivec2(screen_pos)); -} -#endif /*------------------------------------------------------------------------------ VELOCITY @@ -380,22 +364,15 @@ vec3 temporal_antialiasing(uvec2 pos_group_top_left, uvec2 pos_group, uvec2 pos_ } void main() { -#ifdef USE_SUBGROUPS populate_group_shared_memory(gl_WorkGroupID.xy, gl_LocalInvocationIndex); -#endif // Out of bounds check if (any(greaterThanEqual(vec2(gl_GlobalInvocationID.xy), params.resolution))) { return; } -#ifdef USE_SUBGROUPS const uvec2 pos_group = gl_LocalInvocationID.xy; const uvec2 pos_group_top_left = gl_WorkGroupID.xy * kGroupSize - kBorderSize; -#else - const uvec2 pos_group = gl_GlobalInvocationID.xy; - const uvec2 pos_group_top_left = uvec2(0, 0); -#endif const uvec2 pos_screen = gl_GlobalInvocationID.xy; const vec2 uv = (gl_GlobalInvocationID.xy + 0.5f) / params.resolution; diff --git a/servers/rendering/renderer_rd/shaders/effects/tonemap.glsl b/servers/rendering/renderer_rd/shaders/effects/tonemap.glsl index 1b3addf0d49..3043dae8bcd 100644 --- a/servers/rendering/renderer_rd/shaders/effects/tonemap.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/tonemap.glsl @@ -4,12 +4,6 @@ #VERSION_DEFINES -#ifdef USE_MULTIVIEW -#ifdef has_VK_KHR_multiview -#extension GL_EXT_multiview : enable -#endif -#endif - layout(location = 0) out vec2 uv_interp; void main() { @@ -38,12 +32,8 @@ void main() { #VERSION_DEFINES #ifdef USE_MULTIVIEW -#ifdef has_VK_KHR_multiview #extension GL_EXT_multiview : enable #define ViewIndex gl_ViewIndex -#else // has_VK_KHR_multiview -#define ViewIndex 0 -#endif // has_VK_KHR_multiview #endif //USE_MULTIVIEW layout(location = 0) in vec2 uv_interp; diff --git a/servers/rendering/renderer_rd/shaders/effects/vrs.glsl b/servers/rendering/renderer_rd/shaders/effects/vrs.glsl index 1d3463dd2bf..c6fb549d362 100644 --- a/servers/rendering/renderer_rd/shaders/effects/vrs.glsl +++ b/servers/rendering/renderer_rd/shaders/effects/vrs.glsl @@ -5,12 +5,8 @@ #VERSION_DEFINES #ifdef USE_MULTIVIEW -#ifdef has_VK_KHR_multiview #extension GL_EXT_multiview : enable #define ViewIndex gl_ViewIndex -#else // has_VK_KHR_multiview -#define ViewIndex 0 -#endif // has_VK_KHR_multiview #endif //USE_MULTIVIEW #ifdef USE_MULTIVIEW @@ -42,15 +38,6 @@ void main() { #VERSION_DEFINES -#ifdef USE_MULTIVIEW -#ifdef has_VK_KHR_multiview -#extension GL_EXT_multiview : enable -#define ViewIndex gl_ViewIndex -#else // has_VK_KHR_multiview -#define ViewIndex 0 -#endif // has_VK_KHR_multiview -#endif //USE_MULTIVIEW - #ifdef USE_MULTIVIEW layout(location = 0) in vec3 uv_interp; layout(set = 0, binding = 0) uniform sampler2DArray source_color; diff --git a/servers/rendering/renderer_rd/shaders/environment/sdfgi_debug_probes.glsl b/servers/rendering/renderer_rd/shaders/environment/sdfgi_debug_probes.glsl index a0ef169f037..5e594e963dc 100644 --- a/servers/rendering/renderer_rd/shaders/environment/sdfgi_debug_probes.glsl +++ b/servers/rendering/renderer_rd/shaders/environment/sdfgi_debug_probes.glsl @@ -2,21 +2,12 @@ #version 450 -#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview) -#extension GL_EXT_multiview : enable -#endif - #ifdef USE_MULTIVIEW -#ifdef has_VK_KHR_multiview +#extension GL_EXT_multiview : enable #define ViewIndex gl_ViewIndex -#else // has_VK_KHR_multiview -// !BAS! This needs to become an input once we implement our fallback! -#define ViewIndex 0 -#endif // has_VK_KHR_multiview #else // USE_MULTIVIEW -// Set to zero, not supported in non stereo #define ViewIndex 0 -#endif //USE_MULTIVIEW +#endif // !USE_MULTIVIEW #VERSION_DEFINES @@ -174,22 +165,6 @@ void main() { #version 450 -#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview) -#extension GL_EXT_multiview : enable -#endif - -#ifdef USE_MULTIVIEW -#ifdef has_VK_KHR_multiview -#define ViewIndex gl_ViewIndex -#else // has_VK_KHR_multiview -// !BAS! This needs to become an input once we implement our fallback! -#define ViewIndex 0 -#endif // has_VK_KHR_multiview -#else // USE_MULTIVIEW -// Set to zero, not supported in non stereo -#define ViewIndex 0 -#endif //USE_MULTIVIEW - #VERSION_DEFINES #define MAX_VIEWS 2 diff --git a/servers/rendering/renderer_rd/shaders/environment/sky.glsl b/servers/rendering/renderer_rd/shaders/environment/sky.glsl index 5c42b199195..4cf61766493 100644 --- a/servers/rendering/renderer_rd/shaders/environment/sky.glsl +++ b/servers/rendering/renderer_rd/shaders/environment/sky.glsl @@ -4,12 +4,6 @@ #VERSION_DEFINES -#define MAX_VIEWS 2 - -#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview) -#extension GL_EXT_multiview : enable -#endif - layout(location = 0) out vec2 uv_interp; layout(push_constant, std430) uniform Params { @@ -36,20 +30,11 @@ void main() { #VERSION_DEFINES #ifdef USE_MULTIVIEW -#ifdef has_VK_KHR_multiview #extension GL_EXT_multiview : enable #define ViewIndex gl_ViewIndex -#else // has_VK_KHR_multiview -// !BAS! This needs to become an input once we implement our fallback! -#define ViewIndex 0 -#endif // has_VK_KHR_multiview -#else // USE_MULTIVIEW -// Set to zero, not supported in non stereo -#define ViewIndex 0 -#endif //USE_MULTIVIEW +#endif #define M_PI 3.14159265359 -#define MAX_VIEWS 2 layout(location = 0) in vec2 uv_interp; diff --git a/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl b/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl index 90bbdfe6859..d7e422a74b0 100644 --- a/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl +++ b/servers/rendering/renderer_rd/shaders/environment/volumetric_fog_process.glsl @@ -4,15 +4,6 @@ #VERSION_DEFINES -/* Do not use subgroups here, seems there is not much advantage and causes glitches -#if defined(has_GL_KHR_shader_subgroup_ballot) && defined(has_GL_KHR_shader_subgroup_arithmetic) -#extension GL_KHR_shader_subgroup_ballot: enable -#extension GL_KHR_shader_subgroup_arithmetic: enable - -#define USE_SUBGROUPS -#endif -*/ - #ifdef MODE_DENSITY layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in; #else @@ -459,28 +450,15 @@ void main() { cluster_get_item_range(cluster_omni_offset + params.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); -#ifdef USE_SUBGROUPS - item_from = subgroupBroadcastFirst(subgroupMin(item_from)); - item_to = subgroupBroadcastFirst(subgroupMax(item_to)); -#endif - for (uint i = item_from; i < item_to; i++) { uint mask = cluster_buffer.data[cluster_omni_offset + i]; mask &= cluster_get_range_clip_mask(i, item_min, item_max); -#ifdef USE_SUBGROUPS - uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); -#else uint merged_mask = mask; -#endif while (merged_mask != 0) { uint bit = findMSB(merged_mask); merged_mask &= ~(1 << bit); -#ifdef USE_SUBGROUPS - if (((1 << bit) & mask) == 0) { //do not process if not originally here - continue; - } -#endif + uint light_index = 32 * i + bit; //if (!bool(omni_omni_lights.data[light_index].mask & draw_call.layer_mask)) { @@ -539,28 +517,14 @@ void main() { cluster_get_item_range(cluster_spot_offset + params.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); -#ifdef USE_SUBGROUPS - item_from = subgroupBroadcastFirst(subgroupMin(item_from)); - item_to = subgroupBroadcastFirst(subgroupMax(item_to)); -#endif - for (uint i = item_from; i < item_to; i++) { uint mask = cluster_buffer.data[cluster_spot_offset + i]; mask &= cluster_get_range_clip_mask(i, item_min, item_max); -#ifdef USE_SUBGROUPS - uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); -#else uint merged_mask = mask; -#endif while (merged_mask != 0) { uint bit = findMSB(merged_mask); merged_mask &= ~(1 << bit); -#ifdef USE_SUBGROUPS - if (((1 << bit) & mask) == 0) { //do not process if not originally here - continue; - } -#endif //if (!bool(omni_lights.data[light_index].mask & draw_call.layer_mask)) { // continue; //not masked diff --git a/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl b/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl index 7f2a9d2ecdc..c1339aa8833 100644 --- a/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl +++ b/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered.glsl @@ -134,12 +134,9 @@ layout(location = 9) out float dp_clip; layout(location = 10) out flat uint instance_index_interp; #ifdef USE_MULTIVIEW -#ifdef has_VK_KHR_multiview +#extension GL_EXT_multiview : enable #define ViewIndex gl_ViewIndex -#else // has_VK_KHR_multiview -// !BAS! This needs to become an input once we implement our fallback! -#define ViewIndex 0 -#endif // has_VK_KHR_multiview + vec3 multiview_uv(vec2 uv) { return vec3(uv, ViewIndex); } @@ -148,15 +145,12 @@ ivec3 multiview_uv(ivec2 uv) { } layout(location = 11) out vec4 combined_projected; #else // USE_MULTIVIEW -// Set to zero, not supported in non stereo -#define ViewIndex 0 vec2 multiview_uv(vec2 uv) { return uv; } ivec2 multiview_uv(ivec2 uv) { return uv; } - #endif //USE_MULTIVIEW #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) && defined(USE_VERTEX_LIGHTING) @@ -915,12 +909,8 @@ vec4 textureArray_bicubic(texture2DArray tex, vec3 uv, vec2 texture_size) { #endif //USE_LIGHTMAP #ifdef USE_MULTIVIEW -#ifdef has_VK_KHR_multiview +#extension GL_EXT_multiview : enable #define ViewIndex gl_ViewIndex -#else // has_VK_KHR_multiview -// !BAS! This needs to become an input once we implement our fallback! -#define ViewIndex 0 -#endif // has_VK_KHR_multiview vec3 multiview_uv(vec2 uv) { return vec3(uv, ViewIndex); } @@ -929,15 +919,13 @@ ivec3 multiview_uv(ivec2 uv) { } layout(location = 11) in vec4 combined_projected; #else // USE_MULTIVIEW -// Set to zero, not supported in non stereo -#define ViewIndex 0 vec2 multiview_uv(vec2 uv) { return uv; } ivec2 multiview_uv(ivec2 uv) { return uv; } -#endif //USE_MULTIVIEW +#endif // !USE_MULTIVIEW #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) && defined(USE_VERTEX_LIGHTING) layout(location = 12) highp in vec4 diffuse_light_interp; layout(location = 13) highp in vec4 specular_light_interp; @@ -1452,28 +1440,22 @@ void fragment_shader(in SceneData scene_data) { cluster_get_item_range(cluster_decal_offset + implementation_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); -#ifdef USE_SUBGROUPS item_from = subgroupBroadcastFirst(subgroupMin(item_from)); item_to = subgroupBroadcastFirst(subgroupMax(item_to)); -#endif for (uint i = item_from; i < item_to; i++) { uint mask = cluster_buffer.data[cluster_decal_offset + i]; mask &= cluster_get_range_clip_mask(i, item_min, item_max); -#ifdef USE_SUBGROUPS - uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); -#else - uint merged_mask = mask; -#endif + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); while (merged_mask != 0) { uint bit = findMSB(merged_mask); merged_mask &= ~(1u << bit); -#ifdef USE_SUBGROUPS + if (((1u << bit) & mask) == 0) { //do not process if not originally here continue; } -#endif + uint decal_index = 32 * i + bit; if (!bool(decals.data[decal_index].mask & instances.data[instance_index].layer_mask)) { @@ -1931,10 +1913,8 @@ void fragment_shader(in SceneData scene_data) { cluster_get_item_range(cluster_reflection_offset + implementation_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); -#ifdef USE_SUBGROUPS item_from = subgroupBroadcastFirst(subgroupMin(item_from)); item_to = subgroupBroadcastFirst(subgroupMax(item_to)); -#endif #ifdef LIGHT_ANISOTROPY_USED // https://google.github.io/filament/Filament.html#lighting/imagebasedlights/anisotropy @@ -1952,20 +1932,16 @@ void fragment_shader(in SceneData scene_data) { for (uint i = item_from; i < item_to; i++) { uint mask = cluster_buffer.data[cluster_reflection_offset + i]; mask &= cluster_get_range_clip_mask(i, item_min, item_max); -#ifdef USE_SUBGROUPS - uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); -#else - uint merged_mask = mask; -#endif + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); while (merged_mask != 0) { uint bit = findMSB(merged_mask); merged_mask &= ~(1u << bit); -#ifdef USE_SUBGROUPS + if (((1u << bit) & mask) == 0) { //do not process if not originally here continue; } -#endif + uint reflection_index = 32 * i + bit; if (!bool(reflections.data[reflection_index].mask & instances.data[instance_index].layer_mask)) { @@ -2515,28 +2491,22 @@ void fragment_shader(in SceneData scene_data) { cluster_get_item_range(cluster_omni_offset + implementation_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); -#ifdef USE_SUBGROUPS item_from = subgroupBroadcastFirst(subgroupMin(item_from)); item_to = subgroupBroadcastFirst(subgroupMax(item_to)); -#endif for (uint i = item_from; i < item_to; i++) { uint mask = cluster_buffer.data[cluster_omni_offset + i]; mask &= cluster_get_range_clip_mask(i, item_min, item_max); -#ifdef USE_SUBGROUPS - uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); -#else - uint merged_mask = mask; -#endif + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); while (merged_mask != 0) { uint bit = findMSB(merged_mask); merged_mask &= ~(1u << bit); -#ifdef USE_SUBGROUPS + if (((1u << bit) & mask) == 0) { //do not process if not originally here continue; } -#endif + uint light_index = 32 * i + bit; if (!bool(omni_lights.data[light_index].mask & instances.data[instance_index].layer_mask)) { @@ -2582,28 +2552,21 @@ void fragment_shader(in SceneData scene_data) { cluster_get_item_range(cluster_spot_offset + implementation_data.max_cluster_element_count_div_32 + cluster_z, item_min, item_max, item_from, item_to); -#ifdef USE_SUBGROUPS item_from = subgroupBroadcastFirst(subgroupMin(item_from)); item_to = subgroupBroadcastFirst(subgroupMax(item_to)); -#endif for (uint i = item_from; i < item_to; i++) { uint mask = cluster_buffer.data[cluster_spot_offset + i]; mask &= cluster_get_range_clip_mask(i, item_min, item_max); -#ifdef USE_SUBGROUPS - uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); -#else - uint merged_mask = mask; -#endif + uint merged_mask = subgroupBroadcastFirst(subgroupOr(mask)); while (merged_mask != 0) { uint bit = findMSB(merged_mask); merged_mask &= ~(1u << bit); -#ifdef USE_SUBGROUPS + if (((1u << bit) & mask) == 0) { //do not process if not originally here continue; } -#endif uint light_index = 32 * i + bit; diff --git a/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered_inc.glsl b/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered_inc.glsl index a96a494bda2..328e2f9263b 100644 --- a/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/forward_clustered/scene_forward_clustered_inc.glsl @@ -5,20 +5,9 @@ #define MAX_VOXEL_GI_INSTANCES 8 #define MAX_VIEWS 2 -#ifndef MOLTENVK_USED -#if defined(has_GL_KHR_shader_subgroup_ballot) && defined(has_GL_KHR_shader_subgroup_arithmetic) - #extension GL_KHR_shader_subgroup_ballot : enable #extension GL_KHR_shader_subgroup_arithmetic : enable -#define USE_SUBGROUPS -#endif -#endif // MOLTENVK_USED - -#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview) -#extension GL_EXT_multiview : enable -#endif - #include "../cluster_data_inc.glsl" #include "../decal_data_inc.glsl" #include "../scene_data_inc.glsl" diff --git a/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl b/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl index ce916f72f12..5842d054cd1 100644 --- a/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl +++ b/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile.glsl @@ -128,28 +128,22 @@ layout(location = 9) out highp float dp_clip; #endif #ifdef USE_MULTIVIEW -#ifdef has_VK_KHR_multiview +#extension GL_EXT_multiview : enable #define ViewIndex gl_ViewIndex -#else -// !BAS! This needs to become an input once we implement our fallback! -#define ViewIndex 0 -#endif vec3 multiview_uv(vec2 uv) { return vec3(uv, ViewIndex); } ivec3 multiview_uv(ivec2 uv) { return ivec3(uv, int(ViewIndex)); } -#else -// Set to zero, not supported in non stereo -#define ViewIndex 0 +#else // USE_MULTIVIEW vec2 multiview_uv(vec2 uv) { return uv; } ivec2 multiview_uv(ivec2 uv) { return uv; } -#endif //USE_MULTIVIEW +#endif // !USE_MULTIVIEW invariant gl_Position; @@ -712,28 +706,22 @@ vec4 textureArray_bicubic(texture2DArray tex, vec3 uv, vec2 texture_size) { #endif //USE_LIGHTMAP #ifdef USE_MULTIVIEW -#ifdef has_VK_KHR_multiview +#extension GL_EXT_multiview : enable #define ViewIndex gl_ViewIndex -#else -// !BAS! This needs to become an input once we implement our fallback! -#define ViewIndex 0 -#endif vec3 multiview_uv(vec2 uv) { return vec3(uv, ViewIndex); } ivec3 multiview_uv(ivec2 uv) { return ivec3(uv, int(ViewIndex)); } -#else -// Set to zero, not supported in non stereo -#define ViewIndex 0 +#else // USE_MULTIVIEW vec2 multiview_uv(vec2 uv) { return uv; } ivec2 multiview_uv(ivec2 uv) { return uv; } -#endif //USE_MULTIVIEW +#endif // !USE_MULTIVIEW //defines to keep compatibility with vertex diff --git a/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile_inc.glsl b/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile_inc.glsl index 7864d68ae2d..86bca89a6c0 100644 --- a/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/forward_mobile/scene_forward_mobile_inc.glsl @@ -1,10 +1,6 @@ #define M_PI 3.14159265359 #define MAX_VIEWS 2 -#if defined(USE_MULTIVIEW) && defined(has_VK_KHR_multiview) -#extension GL_EXT_multiview : enable -#endif - #include "../decal_data_inc.glsl" #include "../scene_data_inc.glsl" diff --git a/servers/rendering/renderer_rd/storage_rd/material_storage.cpp b/servers/rendering/renderer_rd/storage_rd/material_storage.cpp index 9c89283262c..62e5c87faae 100644 --- a/servers/rendering/renderer_rd/storage_rd/material_storage.cpp +++ b/servers/rendering/renderer_rd/storage_rd/material_storage.cpp @@ -1936,12 +1936,19 @@ RID MaterialStorage::shader_allocate() { return shader_owner.allocate_rid(); } -void MaterialStorage::shader_initialize(RID p_rid) { +void MaterialStorage::shader_initialize(RID p_rid, bool p_embedded) { Shader shader; shader.data = nullptr; shader.type = SHADER_TYPE_MAX; + shader.embedded = p_embedded; shader_owner.initialize_rid(p_rid, shader); + + if (p_embedded) { + // Add to the global embedded set. + MutexLock lock(embedded_set_mutex); + embedded_set.insert(p_rid); + } } void MaterialStorage::shader_free(RID p_rid) { @@ -1957,6 +1964,13 @@ void MaterialStorage::shader_free(RID p_rid) { if (shader->data) { memdelete(shader->data); } + + if (shader->embedded) { + // Remove from the global embedded set. + MutexLock lock(embedded_set_mutex); + embedded_set.erase(p_rid); + } + shader_owner.free(p_rid); } @@ -2112,6 +2126,12 @@ void MaterialStorage::shader_set_data_request_function(ShaderType p_shader_type, shader_data_request_func[p_shader_type] = p_function; } +MaterialStorage::ShaderData *MaterialStorage::shader_get_data(RID p_shader) const { + Shader *shader = shader_owner.get_or_null(p_shader); + ERR_FAIL_NULL_V(shader, nullptr); + return shader->data; +} + RS::ShaderNativeSourceCode MaterialStorage::shader_get_native_source_code(RID p_shader) const { Shader *shader = shader_owner.get_or_null(p_shader); ERR_FAIL_NULL_V(shader, RS::ShaderNativeSourceCode()); @@ -2121,6 +2141,18 @@ RS::ShaderNativeSourceCode MaterialStorage::shader_get_native_source_code(RID p_ return RS::ShaderNativeSourceCode(); } +void MaterialStorage::shader_embedded_set_lock() { + embedded_set_mutex.lock(); +} + +const HashSet &MaterialStorage::shader_embedded_set_get() const { + return embedded_set; +} + +void MaterialStorage::shader_embedded_set_unlock() { + embedded_set_mutex.unlock(); +} + /* MATERIAL API */ void MaterialStorage::_material_uniform_set_erased(void *p_material) { diff --git a/servers/rendering/renderer_rd/storage_rd/material_storage.h b/servers/rendering/renderer_rd/storage_rd/material_storage.h index f9031b08637..3ca150646cd 100644 --- a/servers/rendering/renderer_rd/storage_rd/material_storage.h +++ b/servers/rendering/renderer_rd/storage_rd/material_storage.h @@ -79,7 +79,8 @@ public: virtual void set_code(const String &p_Code) = 0; virtual bool is_animated() const = 0; virtual bool casts_shadows() const = 0; - virtual RS::ShaderNativeSourceCode get_native_source_code() const { return RS::ShaderNativeSourceCode(); } + virtual RS::ShaderNativeSourceCode get_native_source_code() const = 0; + virtual Pair get_native_shader_and_version() const = 0; virtual ~ShaderData() {} @@ -220,12 +221,15 @@ private: ShaderType type; HashMap> default_texture_parameter; HashSet owners; + bool embedded = false; }; typedef ShaderData *(*ShaderDataRequestFunction)(); ShaderDataRequestFunction shader_data_request_func[SHADER_TYPE_MAX]; mutable RID_Owner shader_owner; + HashSet embedded_set; + Mutex embedded_set_mutex; Shader *get_shader(RID p_rid) { return shader_owner.get_or_null(p_rid); } /* MATERIAL API */ @@ -406,7 +410,7 @@ public: bool owns_shader(RID p_rid) { return shader_owner.owns(p_rid); } virtual RID shader_allocate() override; - virtual void shader_initialize(RID p_shader) override; + virtual void shader_initialize(RID p_shader, bool p_embedded = true) override; virtual void shader_free(RID p_rid) override; virtual void shader_set_code(RID p_shader, const String &p_code) override; @@ -418,8 +422,12 @@ public: virtual RID shader_get_default_texture_parameter(RID p_shader, const StringName &p_name, int p_index) const override; virtual Variant shader_get_parameter_default(RID p_shader, const StringName &p_param) const override; void shader_set_data_request_function(ShaderType p_shader_type, ShaderDataRequestFunction p_function); + ShaderData *shader_get_data(RID p_shader) const; virtual RS::ShaderNativeSourceCode shader_get_native_source_code(RID p_shader) const override; + virtual void shader_embedded_set_lock() override; + virtual const HashSet &shader_embedded_set_get() const override; + virtual void shader_embedded_set_unlock() override; /* MATERIAL API */ diff --git a/servers/rendering/renderer_rd/storage_rd/particles_storage.cpp b/servers/rendering/renderer_rd/storage_rd/particles_storage.cpp index 3b21afaa8ab..6bd9dc49499 100644 --- a/servers/rendering/renderer_rd/storage_rd/particles_storage.cpp +++ b/servers/rendering/renderer_rd/storage_rd/particles_storage.cpp @@ -1748,6 +1748,10 @@ RS::ShaderNativeSourceCode ParticlesStorage::ParticlesShaderData::get_native_sou return ParticlesStorage::get_singleton()->particles_shader.shader.version_get_native_source_code(version); } +Pair ParticlesStorage::ParticlesShaderData::get_native_shader_and_version() const { + return { &ParticlesStorage::get_singleton()->particles_shader.shader, version }; +} + ParticlesStorage::ParticlesShaderData::~ParticlesShaderData() { //pipeline variants will clear themselves if shader is gone if (version.is_valid()) { diff --git a/servers/rendering/renderer_rd/storage_rd/particles_storage.h b/servers/rendering/renderer_rd/storage_rd/particles_storage.h index 60f4a24bc9f..de929857be2 100644 --- a/servers/rendering/renderer_rd/storage_rd/particles_storage.h +++ b/servers/rendering/renderer_rd/storage_rd/particles_storage.h @@ -364,6 +364,7 @@ private: virtual bool is_animated() const; virtual bool casts_shadows() const; virtual RS::ShaderNativeSourceCode get_native_source_code() const; + virtual Pair get_native_shader_and_version() const; ParticlesShaderData() {} virtual ~ParticlesShaderData(); diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index 0c060b43765..2579ec49151 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -36,6 +36,11 @@ #include "core/config/project_settings.h" #include "core/io/dir_access.h" +#include "modules/modules_enabled.gen.h" + +#ifdef MODULE_GLSLANG_ENABLED +#include "modules/glslang/shader_compile.h" +#endif #define FORCE_SEPARATE_PRESENT_QUEUE 0 #define PRINT_FRAMEBUFFER_FORMAT 0 @@ -135,10 +140,6 @@ RenderingDevice *RenderingDevice::get_singleton() { return singleton; } -RenderingDevice::ShaderCompileToSPIRVFunction RenderingDevice::compile_to_spirv_function = nullptr; -RenderingDevice::ShaderCacheFunction RenderingDevice::cache_function = nullptr; -RenderingDevice::ShaderSPIRVGetCacheKeyFunction RenderingDevice::get_spirv_cache_key_function = nullptr; - /***************************/ /**** ID INFRASTRUCTURE ****/ /***************************/ @@ -191,36 +192,18 @@ void RenderingDevice::_free_dependencies(RID p_id) { /**** SHADER INFRASTRUCTURE ****/ /*******************************/ -void RenderingDevice::shader_set_compile_to_spirv_function(ShaderCompileToSPIRVFunction p_function) { - compile_to_spirv_function = p_function; -} - -void RenderingDevice::shader_set_spirv_cache_function(ShaderCacheFunction p_function) { - cache_function = p_function; -} - -void RenderingDevice::shader_set_get_cache_key_function(ShaderSPIRVGetCacheKeyFunction p_function) { - get_spirv_cache_key_function = p_function; -} - Vector RenderingDevice::shader_compile_spirv_from_source(ShaderStage p_stage, const String &p_source_code, ShaderLanguage p_language, String *r_error, bool p_allow_cache) { - if (p_allow_cache && cache_function) { - Vector cache = cache_function(p_stage, p_source_code, p_language); - if (cache.size()) { - return cache; + switch (p_language) { +#ifdef MODULE_GLSLANG_ENABLED + case ShaderLanguage::SHADER_LANGUAGE_GLSL: { + ShaderLanguageVersion language_version = driver->get_shader_container_format().get_shader_language_version(); + ShaderSpirvVersion spirv_version = driver->get_shader_container_format().get_shader_spirv_version(); + return compile_glslang_shader(p_stage, ShaderIncludeDB::parse_include_files(p_source_code), language_version, spirv_version, r_error); } +#endif + default: + ERR_FAIL_V_MSG(Vector(), "Shader language is not supported."); } - - ERR_FAIL_NULL_V(compile_to_spirv_function, Vector()); - - return compile_to_spirv_function(p_stage, ShaderIncludeDB::parse_include_files(p_source_code), p_language, r_error, this); -} - -String RenderingDevice::shader_get_spirv_cache_key() const { - if (get_spirv_cache_key_function) { - return get_spirv_cache_key_function(this); - } - return String(); } RID RenderingDevice::shader_create_from_spirv(const Vector &p_spirv, const String &p_shader_name) { @@ -3360,12 +3343,23 @@ String RenderingDevice::_shader_uniform_debug(RID p_shader, int p_set) { return ret; } -String RenderingDevice::shader_get_binary_cache_key() const { - return driver->shader_get_binary_cache_key(); -} - Vector RenderingDevice::shader_compile_binary_from_spirv(const Vector &p_spirv, const String &p_shader_name) { - return driver->shader_compile_binary_from_spirv(p_spirv, p_shader_name); + ShaderReflection shader_refl; + if (reflect_spirv(p_spirv, shader_refl) != OK) { + return Vector(); + } + + const RenderingShaderContainerFormat &container_format = driver->get_shader_container_format(); + Ref shader_container = container_format.create_container(); + ERR_FAIL_COND_V(shader_container.is_null(), Vector()); + + shader_container->set_from_shader_reflection(p_shader_name, shader_refl); + + // Compile shader binary from SPIR-V. + bool code_compiled = shader_container->set_code_from_spirv(p_spirv); + ERR_FAIL_COND_V_MSG(!code_compiled, Vector(), vformat("Failed to compile code to native for SPIR-V.")); + + return shader_container->to_bytes(); } RID RenderingDevice::shader_create_from_bytecode(const Vector &p_shader_binary, RID p_placeholder) { @@ -3379,8 +3373,11 @@ RID RenderingDevice::shader_create_from_bytecode(const Vector &p_shader RID RenderingDevice::shader_create_from_bytecode_with_samplers(const Vector &p_shader_binary, RID p_placeholder, const Vector &p_immutable_samplers) { _THREAD_SAFE_METHOD_ - ShaderDescription shader_desc; - String name; + Ref shader_container = driver->get_shader_container_format().create_container(); + ERR_FAIL_COND_V(shader_container.is_null(), RID()); + + bool parsed_container = shader_container->from_bytes(p_shader_binary); + ERR_FAIL_COND_V_MSG(!parsed_container, RID(), "Failed to parse shader container from binary."); Vector driver_immutable_samplers; for (const PipelineImmutableSampler &source_sampler : p_immutable_samplers) { @@ -3395,7 +3392,8 @@ RID RenderingDevice::shader_create_from_bytecode_with_samplers(const Vectorshader_create_from_bytecode(p_shader_binary, shader_desc, name, driver_immutable_samplers); + + RDD::ShaderID shader_id = driver->shader_create_from_container(shader_container, driver_immutable_samplers); ERR_FAIL_COND_V(!shader_id, RID()); // All good, let's create modules. @@ -3410,8 +3408,9 @@ RID RenderingDevice::shader_create_from_bytecode_with_samplers(const Vectorname = name; + *((ShaderReflection *)shader) = shader_container->get_shader_reflection(); + shader->name.clear(); + shader->name.append_utf8(shader_container->shader_name); shader->driver_id = shader_id; shader->layout_hash = driver->shader_get_layout_hash(shader_id); @@ -3437,7 +3436,7 @@ RID RenderingDevice::shader_create_from_bytecode_with_samplers(const Vectorset_formats.push_back(format); } - for (ShaderStage stage : shader_desc.stages) { + for (ShaderStage stage : shader->stages_vector) { switch (stage) { case SHADER_STAGE_VERTEX: shader->stage_bits.set_flag(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT); diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 2c936b0100c..acd5d4dd01b 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -65,25 +65,12 @@ private: Thread::ID render_thread_id; public: - enum ShaderLanguage { - SHADER_LANGUAGE_GLSL, - SHADER_LANGUAGE_HLSL - }; - typedef int64_t DrawListID; typedef int64_t ComputeListID; - typedef String (*ShaderSPIRVGetCacheKeyFunction)(const RenderingDevice *p_render_device); - typedef Vector (*ShaderCompileToSPIRVFunction)(ShaderStage p_stage, const String &p_source_code, ShaderLanguage p_language, String *r_error, const RenderingDevice *p_render_device); - typedef Vector (*ShaderCacheFunction)(ShaderStage p_stage, const String &p_source_code, ShaderLanguage p_language); - typedef void (*InvalidationCallback)(void *); private: - static ShaderCompileToSPIRVFunction compile_to_spirv_function; - static ShaderCacheFunction cache_function; - static ShaderSPIRVGetCacheKeyFunction get_spirv_cache_key_function; - static RenderingDevice *singleton; RenderingContextDriver *context = nullptr; @@ -871,7 +858,7 @@ private: // to do quick validation and ensuring the user // does not submit something invalid. - struct Shader : public ShaderDescription { + struct Shader : public ShaderReflection { String name; // Used for debug. RDD::ShaderID driver_id; uint32_t layout_hash = 0; @@ -963,13 +950,6 @@ public: bool has_feature(const Features p_feature) const; Vector shader_compile_spirv_from_source(ShaderStage p_stage, const String &p_source_code, ShaderLanguage p_language = SHADER_LANGUAGE_GLSL, String *r_error = nullptr, bool p_allow_cache = true); - String shader_get_spirv_cache_key() const; - - static void shader_set_compile_to_spirv_function(ShaderCompileToSPIRVFunction p_function); - static void shader_set_spirv_cache_function(ShaderCacheFunction p_function); - static void shader_set_get_cache_key_function(ShaderSPIRVGetCacheKeyFunction p_function); - - String shader_get_binary_cache_key() const; Vector shader_compile_binary_from_spirv(const Vector &p_spirv, const String &p_shader_name = ""); RID shader_create_from_spirv(const Vector &p_spirv, const String &p_shader_name = ""); diff --git a/servers/rendering/rendering_device_binds.cpp b/servers/rendering/rendering_device_binds.cpp index a900bf9520e..14e38682b94 100644 --- a/servers/rendering/rendering_device_binds.cpp +++ b/servers/rendering/rendering_device_binds.cpp @@ -30,14 +30,14 @@ #include "rendering_device_binds.h" +#include "modules/modules_enabled.gen.h" // For glslang. +#ifdef MODULE_GLSLANG_ENABLED +#include "modules/glslang/shader_compile.h" +#endif + #include "shader_include_db.h" Error RDShaderFile::parse_versions_from_text(const String &p_text, const String p_defines, OpenIncludeFunction p_include_func, void *p_include_func_userdata) { - ERR_FAIL_NULL_V_MSG( - RenderingDevice::get_singleton(), - ERR_UNAVAILABLE, - "Cannot import custom .glsl shaders when running without a RenderingDevice. This can happen if you are using the headless more or the Compatibility renderer."); - Vector lines = p_text.split("\n"); bool reading_versions = false; @@ -192,8 +192,12 @@ Error RDShaderFile::parse_versions_from_text(const String &p_text, const String } code = code.replace("VERSION_DEFINES", E.value); String error; - Vector spirv = RenderingDevice::get_singleton()->shader_compile_spirv_from_source(RD::ShaderStage(i), code, RD::SHADER_LANGUAGE_GLSL, &error, false); +#ifdef MODULE_GLSLANG_ENABLED + Vector spirv = compile_glslang_shader(RD::ShaderStage(i), ShaderIncludeDB::parse_include_files(code), RD::SHADER_LANGUAGE_VULKAN_VERSION_1_1, RD::SHADER_SPIRV_VERSION_1_3, &error); bytecode->set_stage_bytecode(RD::ShaderStage(i), spirv); +#else + error = "Shader compilation is not supported because glslang was not enabled."; +#endif if (!error.is_empty()) { error += String() + "\n\nStage '" + stage_str[i] + "' source code: \n\n"; Vector sclines = code.split("\n"); diff --git a/servers/rendering/rendering_device_commons.cpp b/servers/rendering/rendering_device_commons.cpp index a11ffe57dd1..4892d14cfff 100644 --- a/servers/rendering/rendering_device_commons.cpp +++ b/servers/rendering/rendering_device_commons.cpp @@ -30,6 +30,8 @@ #include "rendering_device_commons.h" +#include "thirdparty/spirv-reflect/spirv_reflect.h" + /*****************/ /**** GENERIC ****/ /*****************/ @@ -968,3 +970,343 @@ const char *RenderingDeviceCommons::SHADER_STAGE_NAMES[SHADER_STAGE_MAX] = { "TesselationEvaluation", "Compute", }; + +Error RenderingDeviceCommons::reflect_spirv(VectorView p_spirv, ShaderReflection &r_reflection) { + r_reflection = {}; + + const uint32_t spirv_size = p_spirv.size(); + for (uint32_t i = 0; i < spirv_size; i++) { + ShaderStage stage = p_spirv[i].shader_stage; + ShaderStage stage_flag = (ShaderStage)(1 << p_spirv[i].shader_stage); + + if (p_spirv[i].shader_stage == SHADER_STAGE_COMPUTE) { + r_reflection.is_compute = true; + ERR_FAIL_COND_V_MSG(spirv_size != 1, FAILED, + "Compute shaders can only receive one stage, dedicated to compute."); + } + ERR_FAIL_COND_V_MSG(r_reflection.stages_bits.has_flag(stage_flag), FAILED, + "Stage " + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + " submitted more than once."); + + { + SpvReflectShaderModule module; + const uint8_t *spirv = p_spirv[i].spirv.ptr(); + SpvReflectResult result = spvReflectCreateShaderModule(p_spirv[i].spirv.size(), spirv, &module); + ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, + "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed parsing shader."); + + if (r_reflection.is_compute) { + r_reflection.compute_local_size[0] = module.entry_points->local_size.x; + r_reflection.compute_local_size[1] = module.entry_points->local_size.y; + r_reflection.compute_local_size[2] = module.entry_points->local_size.z; + } + uint32_t binding_count = 0; + result = spvReflectEnumerateDescriptorBindings(&module, &binding_count, nullptr); + ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, + "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed enumerating descriptor bindings."); + + if (binding_count > 0) { + // Parse bindings. + + Vector bindings; + bindings.resize(binding_count); + result = spvReflectEnumerateDescriptorBindings(&module, &binding_count, bindings.ptrw()); + + ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, + "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed getting descriptor bindings."); + + for (uint32_t j = 0; j < binding_count; j++) { + const SpvReflectDescriptorBinding &binding = *bindings[j]; + + ShaderUniform uniform; + + bool need_array_dimensions = false; + bool need_block_size = false; + bool may_be_writable = false; + + switch (binding.descriptor_type) { + case SPV_REFLECT_DESCRIPTOR_TYPE_SAMPLER: { + uniform.type = UNIFORM_TYPE_SAMPLER; + need_array_dimensions = true; + } break; + case SPV_REFLECT_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: { + uniform.type = UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; + need_array_dimensions = true; + } break; + case SPV_REFLECT_DESCRIPTOR_TYPE_SAMPLED_IMAGE: { + uniform.type = UNIFORM_TYPE_TEXTURE; + need_array_dimensions = true; + } break; + case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_IMAGE: { + uniform.type = UNIFORM_TYPE_IMAGE; + need_array_dimensions = true; + may_be_writable = true; + } break; + case SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: { + uniform.type = UNIFORM_TYPE_TEXTURE_BUFFER; + need_array_dimensions = true; + } break; + case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: { + uniform.type = UNIFORM_TYPE_IMAGE_BUFFER; + need_array_dimensions = true; + may_be_writable = true; + } break; + case SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_BUFFER: { + uniform.type = UNIFORM_TYPE_UNIFORM_BUFFER; + need_block_size = true; + } break; + case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_BUFFER: { + uniform.type = UNIFORM_TYPE_STORAGE_BUFFER; + need_block_size = true; + may_be_writable = true; + } break; + case SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: { + ERR_PRINT("Dynamic uniform buffer not supported."); + continue; + } break; + case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { + ERR_PRINT("Dynamic storage buffer not supported."); + continue; + } break; + case SPV_REFLECT_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: { + uniform.type = UNIFORM_TYPE_INPUT_ATTACHMENT; + need_array_dimensions = true; + } break; + case SPV_REFLECT_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: { + ERR_PRINT("Acceleration structure not supported."); + continue; + } break; + } + + if (need_array_dimensions) { + if (binding.array.dims_count == 0) { + uniform.length = 1; + } else { + for (uint32_t k = 0; k < binding.array.dims_count; k++) { + if (k == 0) { + uniform.length = binding.array.dims[0]; + } else { + uniform.length *= binding.array.dims[k]; + } + } + } + + } else if (need_block_size) { + uniform.length = binding.block.size; + } else { + uniform.length = 0; + } + + if (may_be_writable) { + if (binding.descriptor_type == SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_IMAGE) { + uniform.writable = !(binding.decoration_flags & SPV_REFLECT_DECORATION_NON_WRITABLE); + } else { + uniform.writable = !(binding.decoration_flags & SPV_REFLECT_DECORATION_NON_WRITABLE) && !(binding.block.decoration_flags & SPV_REFLECT_DECORATION_NON_WRITABLE); + } + } else { + uniform.writable = false; + } + + uniform.binding = binding.binding; + uint32_t set = binding.set; + + ERR_FAIL_COND_V_MSG(set >= MAX_UNIFORM_SETS, FAILED, + "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + binding.name + "' uses a set (" + itos(set) + ") index larger than what is supported (" + itos(MAX_UNIFORM_SETS) + ")."); + + if (set < (uint32_t)r_reflection.uniform_sets.size()) { + // Check if this already exists. + bool exists = false; + for (int k = 0; k < r_reflection.uniform_sets[set].size(); k++) { + if (r_reflection.uniform_sets[set][k].binding == uniform.binding) { + // Already exists, verify that it's the same type. + ERR_FAIL_COND_V_MSG(r_reflection.uniform_sets[set][k].type != uniform.type, FAILED, + "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + binding.name + "' trying to reuse location for set=" + itos(set) + ", binding=" + itos(uniform.binding) + " with different uniform type."); + + // Also, verify that it's the same size. + ERR_FAIL_COND_V_MSG(r_reflection.uniform_sets[set][k].length != uniform.length, FAILED, + "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + binding.name + "' trying to reuse location for set=" + itos(set) + ", binding=" + itos(uniform.binding) + " with different uniform size."); + + // Also, verify that it has the same writability. + ERR_FAIL_COND_V_MSG(r_reflection.uniform_sets[set][k].writable != uniform.writable, FAILED, + "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + binding.name + "' trying to reuse location for set=" + itos(set) + ", binding=" + itos(uniform.binding) + " with different writability."); + + // Just append stage mask and return. + r_reflection.uniform_sets.write[set].write[k].stages.set_flag(stage_flag); + exists = true; + break; + } + } + + if (exists) { + continue; // Merged. + } + } + + uniform.stages.set_flag(stage_flag); + + if (set >= (uint32_t)r_reflection.uniform_sets.size()) { + r_reflection.uniform_sets.resize(set + 1); + } + + r_reflection.uniform_sets.write[set].push_back(uniform); + } + } + + { + // Specialization constants. + + uint32_t sc_count = 0; + result = spvReflectEnumerateSpecializationConstants(&module, &sc_count, nullptr); + ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, + "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed enumerating specialization constants."); + + if (sc_count) { + Vector spec_constants; + spec_constants.resize(sc_count); + + result = spvReflectEnumerateSpecializationConstants(&module, &sc_count, spec_constants.ptrw()); + ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, + "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed obtaining specialization constants."); + + for (uint32_t j = 0; j < sc_count; j++) { + int32_t existing = -1; + ShaderSpecializationConstant sconst; + SpvReflectSpecializationConstant *spc = spec_constants[j]; + + sconst.constant_id = spc->constant_id; + sconst.int_value = 0; // Clear previous value JIC. + switch (spc->constant_type) { + case SPV_REFLECT_SPECIALIZATION_CONSTANT_BOOL: { + sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_BOOL; + sconst.bool_value = spc->default_value.int_bool_value != 0; + } break; + case SPV_REFLECT_SPECIALIZATION_CONSTANT_INT: { + sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT; + sconst.int_value = spc->default_value.int_bool_value; + } break; + case SPV_REFLECT_SPECIALIZATION_CONSTANT_FLOAT: { + sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT; + sconst.float_value = spc->default_value.float_value; + } break; + } + sconst.stages.set_flag(stage_flag); + + for (int k = 0; k < r_reflection.specialization_constants.size(); k++) { + if (r_reflection.specialization_constants[k].constant_id == sconst.constant_id) { + ERR_FAIL_COND_V_MSG(r_reflection.specialization_constants[k].type != sconst.type, FAILED, "More than one specialization constant used for id (" + itos(sconst.constant_id) + "), but their types differ."); + ERR_FAIL_COND_V_MSG(r_reflection.specialization_constants[k].int_value != sconst.int_value, FAILED, "More than one specialization constant used for id (" + itos(sconst.constant_id) + "), but their default values differ."); + existing = k; + break; + } + } + + if (existing >= 0) { + r_reflection.specialization_constants.write[existing].stages.set_flag(stage_flag); + } else { + r_reflection.specialization_constants.push_back(sconst); + } + } + + r_reflection.specialization_constants.sort(); + } + } + + if (stage == SHADER_STAGE_VERTEX) { + uint32_t iv_count = 0; + result = spvReflectEnumerateInputVariables(&module, &iv_count, nullptr); + ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, + "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed enumerating input variables."); + + if (iv_count) { + Vector input_vars; + input_vars.resize(iv_count); + + result = spvReflectEnumerateInputVariables(&module, &iv_count, input_vars.ptrw()); + ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, + "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed obtaining input variables."); + + for (const SpvReflectInterfaceVariable *v : input_vars) { + if (!v) { + continue; + } + if (v->decoration_flags == 0) { // Regular input. + r_reflection.vertex_input_mask |= (((uint64_t)1) << v->location); + } + if (v->built_in == SpvBuiltInViewIndex || v->built_in == SpvBuiltInViewportIndex) { + r_reflection.has_multiview = true; + } + } + } + } + + if (stage == SHADER_STAGE_FRAGMENT) { + uint32_t ov_count = 0; + result = spvReflectEnumerateOutputVariables(&module, &ov_count, nullptr); + ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, + "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed enumerating output variables."); + + if (ov_count) { + Vector output_vars; + output_vars.resize(ov_count); + + result = spvReflectEnumerateOutputVariables(&module, &ov_count, output_vars.ptrw()); + ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, + "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed obtaining output variables."); + + for (const SpvReflectInterfaceVariable *refvar : output_vars) { + if (!refvar) { + continue; + } + if (refvar->built_in != SpvBuiltInFragDepth) { + r_reflection.fragment_output_mask |= 1 << refvar->location; + } + if (refvar->built_in == SpvBuiltInViewIndex || refvar->built_in == SpvBuiltInViewportIndex) { + r_reflection.has_multiview = true; + } + } + } + } + + uint32_t pc_count = 0; + result = spvReflectEnumeratePushConstantBlocks(&module, &pc_count, nullptr); + ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, + "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed enumerating push constants."); + + if (pc_count) { + ERR_FAIL_COND_V_MSG(pc_count > 1, FAILED, + "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "': Only one push constant is supported, which should be the same across shader stages."); + + Vector pconstants; + pconstants.resize(pc_count); + result = spvReflectEnumeratePushConstantBlocks(&module, &pc_count, pconstants.ptrw()); + ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, + "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed obtaining push constants."); +#if 0 + if (pconstants[0] == nullptr) { + Ref f = FileAccess::open("res://popo.spv", FileAccess::WRITE); + f->store_buffer((const uint8_t *)&SpirV[0], SpirV.size() * sizeof(uint32_t)); + } +#endif + + ERR_FAIL_COND_V_MSG(r_reflection.push_constant_size && r_reflection.push_constant_size != pconstants[0]->size, FAILED, + "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "': Push constant block must be the same across shader stages."); + + r_reflection.push_constant_size = pconstants[0]->size; + r_reflection.push_constant_stages.set_flag(stage_flag); + + //print_line("Stage: " + String(SHADER_STAGE_NAMES[stage]) + " push constant of size=" + itos(push_constant.push_constant_size)); + } + + // Destroy the reflection data when no longer required. + spvReflectDestroyShaderModule(&module); + } + + r_reflection.stages_bits.set_flag(stage_flag); + } + + // Sort all uniform_sets by binding. + for (uint32_t i = 0; i < r_reflection.uniform_sets.size(); i++) { + r_reflection.uniform_sets.write[i].sort(); + } + + return OK; +} diff --git a/servers/rendering/rendering_device_commons.h b/servers/rendering/rendering_device_commons.h index 63f2b8792b4..e34a5127f99 100644 --- a/servers/rendering/rendering_device_commons.h +++ b/servers/rendering/rendering_device_commons.h @@ -33,8 +33,39 @@ #include "core/object/object.h" #include "core/variant/type_info.h" +#include + #define STEPIFY(m_number, m_alignment) ((((m_number) + ((m_alignment) - 1)) / (m_alignment)) * (m_alignment)) +// This may one day be used in Godot for interoperability between C arrays, Vector and LocalVector. +// (See https://github.com/godotengine/godot-proposals/issues/5144.) +template +class VectorView { + const T *_ptr = nullptr; + const uint32_t _size = 0; + +public: + const T &operator[](uint32_t p_index) { + DEV_ASSERT(p_index < _size); + return _ptr[p_index]; + } + + _ALWAYS_INLINE_ const T *ptr() const { return _ptr; } + _ALWAYS_INLINE_ uint32_t size() const { return _size; } + + VectorView() = default; + VectorView(const T &p_ptr) : + // With this one you can pass a single element very conveniently! + _ptr(&p_ptr), + _size(1) {} + VectorView(const T *p_ptr, uint32_t p_size) : + _ptr(p_ptr), _size(p_size) {} + VectorView(const Vector &p_lv) : + _ptr(p_lv.ptr()), _size(p_lv.size()) {} + VectorView(const LocalVector &p_lv) : + _ptr(p_lv.ptr()), _size(p_lv.size()) {} +}; + class RenderingDeviceCommons : public Object { //////////////////////////////////////////// // PUBLIC STUFF @@ -549,6 +580,30 @@ public: SHADER_STAGE_COMPUTE_BIT = (1 << SHADER_STAGE_COMPUTE), }; + enum ShaderLanguage { + SHADER_LANGUAGE_GLSL, + SHADER_LANGUAGE_HLSL, + }; + + enum ShaderLanguageVersion { + SHADER_LANGUAGE_VULKAN_VERSION_1_0 = (1 << 22), + SHADER_LANGUAGE_VULKAN_VERSION_1_1 = (1 << 22) | (1 << 12), + SHADER_LANGUAGE_VULKAN_VERSION_1_2 = (1 << 22) | (2 << 12), + SHADER_LANGUAGE_VULKAN_VERSION_1_3 = (1 << 22) | (3 << 12), + SHADER_LANGUAGE_VULKAN_VERSION_1_4 = (1 << 22) | (4 << 12), + SHADER_LANGUAGE_OPENGL_VERSION_4_5_0 = 450, + }; + + enum ShaderSpirvVersion { + SHADER_SPIRV_VERSION_1_0 = (1 << 16), + SHADER_SPIRV_VERSION_1_1 = (1 << 16) | (1 << 8), + SHADER_SPIRV_VERSION_1_2 = (1 << 16) | (2 << 8), + SHADER_SPIRV_VERSION_1_3 = (1 << 16) | (3 << 8), + SHADER_SPIRV_VERSION_1_4 = (1 << 16) | (4 << 8), + SHADER_SPIRV_VERSION_1_5 = (1 << 16) | (5 << 8), + SHADER_SPIRV_VERSION_1_6 = (1 << 16) | (6 << 8), + }; + struct ShaderStageSPIRVData { ShaderStage shader_stage = SHADER_STAGE_MAX; Vector spirv; @@ -951,13 +1006,13 @@ protected: static uint32_t get_format_vertex_size(DataFormat p_format); +public: /****************/ /**** SHADER ****/ /****************/ static const char *SHADER_STAGE_NAMES[SHADER_STAGE_MAX]; -public: struct ShaderUniform { UniformType type = UniformType::UNIFORM_TYPE_MAX; bool writable = false; @@ -995,21 +1050,20 @@ public: bool operator<(const ShaderSpecializationConstant &p_other) const { return constant_id < p_other.constant_id; } }; - struct ShaderDescription { + struct ShaderReflection { uint64_t vertex_input_mask = 0; uint32_t fragment_output_mask = 0; bool is_compute = false; + bool has_multiview = false; uint32_t compute_local_size[3] = {}; uint32_t push_constant_size = 0; Vector> uniform_sets; Vector specialization_constants; - Vector stages; - }; - -protected: - struct ShaderReflection : public ShaderDescription { - BitField stages = {}; + Vector stages_vector; + BitField stages_bits = {}; BitField push_constant_stages = {}; }; + + static Error reflect_spirv(VectorView p_spirv, ShaderReflection &r_reflection); }; diff --git a/servers/rendering/rendering_device_driver.cpp b/servers/rendering/rendering_device_driver.cpp index 9c10031f09e..0c26a5808a1 100644 --- a/servers/rendering/rendering_device_driver.cpp +++ b/servers/rendering/rendering_device_driver.cpp @@ -30,331 +30,6 @@ #include "rendering_device_driver.h" -#include "thirdparty/spirv-reflect/spirv_reflect.h" - -/****************/ -/**** SHADER ****/ -/****************/ - -Error RenderingDeviceDriver::_reflect_spirv(VectorView p_spirv, ShaderReflection &r_reflection) { - r_reflection = {}; - - for (uint32_t i = 0; i < p_spirv.size(); i++) { - ShaderStage stage = p_spirv[i].shader_stage; - ShaderStage stage_flag = (ShaderStage)(1 << p_spirv[i].shader_stage); - - if (p_spirv[i].shader_stage == SHADER_STAGE_COMPUTE) { - r_reflection.is_compute = true; - ERR_FAIL_COND_V_MSG(p_spirv.size() != 1, FAILED, - "Compute shaders can only receive one stage, dedicated to compute."); - } - ERR_FAIL_COND_V_MSG(r_reflection.stages.has_flag(stage_flag), FAILED, - "Stage " + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + " submitted more than once."); - - { - SpvReflectShaderModule module; - const uint8_t *spirv = p_spirv[i].spirv.ptr(); - SpvReflectResult result = spvReflectCreateShaderModule(p_spirv[i].spirv.size(), spirv, &module); - ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, - "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed parsing shader."); - - if (r_reflection.is_compute) { - r_reflection.compute_local_size[0] = module.entry_points->local_size.x; - r_reflection.compute_local_size[1] = module.entry_points->local_size.y; - r_reflection.compute_local_size[2] = module.entry_points->local_size.z; - } - uint32_t binding_count = 0; - result = spvReflectEnumerateDescriptorBindings(&module, &binding_count, nullptr); - ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, - "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed enumerating descriptor bindings."); - - if (binding_count > 0) { - // Parse bindings. - - Vector bindings; - bindings.resize(binding_count); - result = spvReflectEnumerateDescriptorBindings(&module, &binding_count, bindings.ptrw()); - - ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, - "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed getting descriptor bindings."); - - for (uint32_t j = 0; j < binding_count; j++) { - const SpvReflectDescriptorBinding &binding = *bindings[j]; - - ShaderUniform uniform; - - bool need_array_dimensions = false; - bool need_block_size = false; - bool may_be_writable = false; - - switch (binding.descriptor_type) { - case SPV_REFLECT_DESCRIPTOR_TYPE_SAMPLER: { - uniform.type = UNIFORM_TYPE_SAMPLER; - need_array_dimensions = true; - } break; - case SPV_REFLECT_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: { - uniform.type = UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; - need_array_dimensions = true; - } break; - case SPV_REFLECT_DESCRIPTOR_TYPE_SAMPLED_IMAGE: { - uniform.type = UNIFORM_TYPE_TEXTURE; - need_array_dimensions = true; - } break; - case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_IMAGE: { - uniform.type = UNIFORM_TYPE_IMAGE; - need_array_dimensions = true; - may_be_writable = true; - } break; - case SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: { - uniform.type = UNIFORM_TYPE_TEXTURE_BUFFER; - need_array_dimensions = true; - } break; - case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: { - uniform.type = UNIFORM_TYPE_IMAGE_BUFFER; - need_array_dimensions = true; - may_be_writable = true; - } break; - case SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_BUFFER: { - uniform.type = UNIFORM_TYPE_UNIFORM_BUFFER; - need_block_size = true; - } break; - case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_BUFFER: { - uniform.type = UNIFORM_TYPE_STORAGE_BUFFER; - need_block_size = true; - may_be_writable = true; - } break; - case SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: { - ERR_PRINT("Dynamic uniform buffer not supported."); - continue; - } break; - case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: { - ERR_PRINT("Dynamic storage buffer not supported."); - continue; - } break; - case SPV_REFLECT_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: { - uniform.type = UNIFORM_TYPE_INPUT_ATTACHMENT; - need_array_dimensions = true; - } break; - case SPV_REFLECT_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: { - ERR_PRINT("Acceleration structure not supported."); - continue; - } break; - } - - if (need_array_dimensions) { - if (binding.array.dims_count == 0) { - uniform.length = 1; - } else { - for (uint32_t k = 0; k < binding.array.dims_count; k++) { - if (k == 0) { - uniform.length = binding.array.dims[0]; - } else { - uniform.length *= binding.array.dims[k]; - } - } - } - - } else if (need_block_size) { - uniform.length = binding.block.size; - } else { - uniform.length = 0; - } - - if (may_be_writable) { - uniform.writable = !(binding.type_description->decoration_flags & SPV_REFLECT_DECORATION_NON_WRITABLE) && !(binding.block.decoration_flags & SPV_REFLECT_DECORATION_NON_WRITABLE); - } else { - uniform.writable = false; - } - - uniform.binding = binding.binding; - uint32_t set = binding.set; - - ERR_FAIL_COND_V_MSG(set >= MAX_UNIFORM_SETS, FAILED, - "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + binding.name + "' uses a set (" + itos(set) + ") index larger than what is supported (" + itos(MAX_UNIFORM_SETS) + ")."); - - if (set < (uint32_t)r_reflection.uniform_sets.size()) { - // Check if this already exists. - bool exists = false; - for (int k = 0; k < r_reflection.uniform_sets[set].size(); k++) { - if (r_reflection.uniform_sets[set][k].binding == uniform.binding) { - // Already exists, verify that it's the same type. - ERR_FAIL_COND_V_MSG(r_reflection.uniform_sets[set][k].type != uniform.type, FAILED, - "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + binding.name + "' trying to reuse location for set=" + itos(set) + ", binding=" + itos(uniform.binding) + " with different uniform type."); - - // Also, verify that it's the same size. - ERR_FAIL_COND_V_MSG(r_reflection.uniform_sets[set][k].length != uniform.length, FAILED, - "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + binding.name + "' trying to reuse location for set=" + itos(set) + ", binding=" + itos(uniform.binding) + " with different uniform size."); - - // Also, verify that it has the same writability. - ERR_FAIL_COND_V_MSG(r_reflection.uniform_sets[set][k].writable != uniform.writable, FAILED, - "On shader stage '" + String(SHADER_STAGE_NAMES[stage]) + "', uniform '" + binding.name + "' trying to reuse location for set=" + itos(set) + ", binding=" + itos(uniform.binding) + " with different writability."); - - // Just append stage mask and return. - r_reflection.uniform_sets.write[set].write[k].stages.set_flag(stage_flag); - exists = true; - break; - } - } - - if (exists) { - continue; // Merged. - } - } - - uniform.stages.set_flag(stage_flag); - - if (set >= (uint32_t)r_reflection.uniform_sets.size()) { - r_reflection.uniform_sets.resize(set + 1); - } - - r_reflection.uniform_sets.write[set].push_back(uniform); - } - } - - { - // Specialization constants. - - uint32_t sc_count = 0; - result = spvReflectEnumerateSpecializationConstants(&module, &sc_count, nullptr); - ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, - "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed enumerating specialization constants."); - - if (sc_count) { - Vector spec_constants; - spec_constants.resize(sc_count); - - result = spvReflectEnumerateSpecializationConstants(&module, &sc_count, spec_constants.ptrw()); - ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, - "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed obtaining specialization constants."); - - for (uint32_t j = 0; j < sc_count; j++) { - int32_t existing = -1; - ShaderSpecializationConstant sconst; - SpvReflectSpecializationConstant *spc = spec_constants[j]; - - sconst.constant_id = spc->constant_id; - sconst.int_value = 0; // Clear previous value JIC. - switch (spc->constant_type) { - case SPV_REFLECT_SPECIALIZATION_CONSTANT_BOOL: { - sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_BOOL; - sconst.bool_value = spc->default_value.int_bool_value != 0; - } break; - case SPV_REFLECT_SPECIALIZATION_CONSTANT_INT: { - sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT; - sconst.int_value = spc->default_value.int_bool_value; - } break; - case SPV_REFLECT_SPECIALIZATION_CONSTANT_FLOAT: { - sconst.type = PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT; - sconst.float_value = spc->default_value.float_value; - } break; - } - sconst.stages.set_flag(stage_flag); - - for (int k = 0; k < r_reflection.specialization_constants.size(); k++) { - if (r_reflection.specialization_constants[k].constant_id == sconst.constant_id) { - ERR_FAIL_COND_V_MSG(r_reflection.specialization_constants[k].type != sconst.type, FAILED, "More than one specialization constant used for id (" + itos(sconst.constant_id) + "), but their types differ."); - ERR_FAIL_COND_V_MSG(r_reflection.specialization_constants[k].int_value != sconst.int_value, FAILED, "More than one specialization constant used for id (" + itos(sconst.constant_id) + "), but their default values differ."); - existing = k; - break; - } - } - - if (existing >= 0) { - r_reflection.specialization_constants.write[existing].stages.set_flag(stage_flag); - } else { - r_reflection.specialization_constants.push_back(sconst); - } - } - - r_reflection.specialization_constants.sort(); - } - } - - if (stage == SHADER_STAGE_VERTEX) { - uint32_t iv_count = 0; - result = spvReflectEnumerateInputVariables(&module, &iv_count, nullptr); - ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, - "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed enumerating input variables."); - - if (iv_count) { - Vector input_vars; - input_vars.resize(iv_count); - - result = spvReflectEnumerateInputVariables(&module, &iv_count, input_vars.ptrw()); - ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, - "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed obtaining input variables."); - - for (uint32_t j = 0; j < iv_count; j++) { - if (input_vars[j] && input_vars[j]->decoration_flags == 0) { // Regular input. - r_reflection.vertex_input_mask |= (((uint64_t)1) << input_vars[j]->location); - } - } - } - } - - if (stage == SHADER_STAGE_FRAGMENT) { - uint32_t ov_count = 0; - result = spvReflectEnumerateOutputVariables(&module, &ov_count, nullptr); - ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, - "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed enumerating output variables."); - - if (ov_count) { - Vector output_vars; - output_vars.resize(ov_count); - - result = spvReflectEnumerateOutputVariables(&module, &ov_count, output_vars.ptrw()); - ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, - "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed obtaining output variables."); - - for (uint32_t j = 0; j < ov_count; j++) { - const SpvReflectInterfaceVariable *refvar = output_vars[j]; - if (refvar != nullptr && refvar->built_in != SpvBuiltInFragDepth) { - r_reflection.fragment_output_mask |= 1 << refvar->location; - } - } - } - } - - uint32_t pc_count = 0; - result = spvReflectEnumeratePushConstantBlocks(&module, &pc_count, nullptr); - ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, - "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed enumerating push constants."); - - if (pc_count) { - ERR_FAIL_COND_V_MSG(pc_count > 1, FAILED, - "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "': Only one push constant is supported, which should be the same across shader stages."); - - Vector pconstants; - pconstants.resize(pc_count); - result = spvReflectEnumeratePushConstantBlocks(&module, &pc_count, pconstants.ptrw()); - ERR_FAIL_COND_V_MSG(result != SPV_REFLECT_RESULT_SUCCESS, FAILED, - "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "' failed obtaining push constants."); -#if 0 - if (pconstants[0] == nullptr) { - Ref f = FileAccess::open("res://popo.spv", FileAccess::WRITE); - f->store_buffer((const uint8_t *)&SpirV[0], SpirV.size() * sizeof(uint32_t)); - } -#endif - - ERR_FAIL_COND_V_MSG(r_reflection.push_constant_size && r_reflection.push_constant_size != pconstants[0]->size, FAILED, - "Reflection of SPIR-V shader stage '" + String(SHADER_STAGE_NAMES[p_spirv[i].shader_stage]) + "': Push constant block must be the same across shader stages."); - - r_reflection.push_constant_size = pconstants[0]->size; - r_reflection.push_constant_stages.set_flag(stage_flag); - - //print_line("Stage: " + String(SHADER_STAGE_NAMES[stage]) + " push constant of size=" + itos(push_constant.push_constant_size)); - } - - // Destroy the reflection data when no longer required. - spvReflectDestroyShaderModule(&module); - } - - r_reflection.stages.set_flag(stage_flag); - } - - return OK; -} - /**************/ /**** MISC ****/ /**************/ diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h index 0ac91df3884..7d7d732f079 100644 --- a/servers/rendering/rendering_device_driver.h +++ b/servers/rendering/rendering_device_driver.h @@ -49,37 +49,7 @@ #include "core/variant/type_info.h" #include "servers/rendering/rendering_context_driver.h" #include "servers/rendering/rendering_device_commons.h" - -#include - -// This may one day be used in Godot for interoperability between C arrays, Vector and LocalVector. -// (See https://github.com/godotengine/godot-proposals/issues/5144.) -template -class VectorView { - const T *_ptr = nullptr; - const uint32_t _size = 0; - -public: - const T &operator[](uint32_t p_index) { - DEV_ASSERT(p_index < _size); - return _ptr[p_index]; - } - - _ALWAYS_INLINE_ const T *ptr() const { return _ptr; } - _ALWAYS_INLINE_ uint32_t size() const { return _size; } - - VectorView() = default; - VectorView(const T &p_ptr) : - // With this one you can pass a single element very conveniently! - _ptr(&p_ptr), - _size(1) {} - VectorView(const T *p_ptr, uint32_t p_size) : - _ptr(p_ptr), _size(p_size) {} - VectorView(const Vector &p_lv) : - _ptr(p_lv.ptr()), _size(p_lv.size()) {} - VectorView(const LocalVector &p_lv) : - _ptr(p_lv.ptr()), _size(p_lv.size()) {} -}; +#include "servers/rendering/rendering_shader_container.h" // These utilities help drivers avoid allocations. #define ALLOCA(m_size) ((m_size != 0) ? alloca(m_size) : nullptr) @@ -495,32 +465,21 @@ public: /**** SHADER ****/ /****************/ - virtual String shader_get_binary_cache_key() = 0; - virtual Vector shader_compile_binary_from_spirv(VectorView p_spirv, const String &p_shader_name) = 0; - struct ImmutableSampler { UniformType type = UNIFORM_TYPE_MAX; uint32_t binding = 0xffffffff; // Binding index as specified in shader. LocalVector ids; }; - /** Creates a Pipeline State Object (PSO) out of the shader and all the input data it needs. - @param p_shader_binary Shader binary bytecode (e.g. SPIR-V). - @param r_shader_desc TBD. - @param r_name TBD. - @param p_immutable_samplers Immutable samplers can be embedded when creating the pipeline layout on the condition they - remain valid and unchanged, so they don't need to be specified when creating uniform sets. - @return PSO resource for binding. - */ - virtual ShaderID shader_create_from_bytecode(const Vector &p_shader_binary, ShaderDescription &r_shader_desc, String &r_name, const Vector &p_immutable_samplers) = 0; + + // Creates a Pipeline State Object (PSO) out of the shader and all the input data it needs. + // Immutable samplers can be embedded when creating the pipeline layout on the condition they remain valid and unchanged, so they don't need to be + // specified when creating uniform sets PSO resource for binding. + virtual ShaderID shader_create_from_container(const Ref &p_shader_container, const Vector &p_immutable_samplers) = 0; // Only meaningful if API_TRAIT_SHADER_CHANGE_INVALIDATION is SHADER_CHANGE_INVALIDATION_ALL_OR_NONE_ACCORDING_TO_LAYOUT_HASH. virtual uint32_t shader_get_layout_hash(ShaderID p_shader) { return 0; } virtual void shader_free(ShaderID p_shader) = 0; virtual void shader_destroy_modules(ShaderID p_shader) = 0; -protected: - // An optional service to implementations. - Error _reflect_spirv(VectorView p_spirv, ShaderReflection &r_reflection); - public: /*********************/ /**** UNIFORM SET ****/ @@ -863,6 +822,7 @@ public: virtual String get_api_version() const = 0; virtual String get_pipeline_cache_uuid() const = 0; virtual const Capabilities &get_capabilities() const = 0; + virtual const RenderingShaderContainerFormat &get_shader_container_format() const = 0; virtual bool is_composite_alpha_supported(CommandQueueID p_queue) const { return false; } diff --git a/servers/rendering/rendering_server_default.h b/servers/rendering/rendering_server_default.h index f8321f0448d..e18963c94fa 100644 --- a/servers/rendering/rendering_server_default.h +++ b/servers/rendering/rendering_server_default.h @@ -241,7 +241,15 @@ public: #define ServerName RendererMaterialStorage #define server_name RSG::material_storage - FUNCRIDSPLIT(shader) + virtual RID shader_create() override { + RID ret = RSG::material_storage->shader_allocate(); + if (Thread::get_caller_id() == server_thread) { + RSG::material_storage->shader_initialize(ret, false); + } else { + command_queue.push(RSG::material_storage, &ServerName::shader_initialize, ret, false); + } + return ret; + } virtual RID shader_create_from_code(const String &p_code, const String &p_path_hint = String()) override { RID shader = RSG::material_storage->shader_allocate(); @@ -251,11 +259,11 @@ public: command_queue.flush_if_pending(); } - RSG::material_storage->shader_initialize(shader); + RSG::material_storage->shader_initialize(shader, false); RSG::material_storage->shader_set_code(shader, p_code); RSG::material_storage->shader_set_path_hint(shader, p_path_hint); } else { - command_queue.push(RSG::material_storage, &RendererMaterialStorage::shader_initialize, shader); + command_queue.push(RSG::material_storage, &RendererMaterialStorage::shader_initialize, shader, false); command_queue.push(RSG::material_storage, &RendererMaterialStorage::shader_set_code, shader, p_code); command_queue.push(RSG::material_storage, &RendererMaterialStorage::shader_set_path_hint, shader, p_path_hint); } diff --git a/servers/rendering/rendering_shader_container.cpp b/servers/rendering/rendering_shader_container.cpp new file mode 100644 index 00000000000..10f911e9022 --- /dev/null +++ b/servers/rendering/rendering_shader_container.cpp @@ -0,0 +1,466 @@ +/**************************************************************************/ +/* rendering_shader_container.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "rendering_shader_container.h" + +#include "core/io/compression.h" + +const uint32_t RenderingShaderContainer::MAGIC_NUMBER = 0x43535247; +const uint32_t RenderingShaderContainer::VERSION = 2; + +static inline uint32_t aligned_to(uint32_t p_size, uint32_t p_alignment) { + if (p_size % p_alignment) { + return p_size + (p_alignment - (p_size % p_alignment)); + } else { + return p_size; + } +} + +uint32_t RenderingShaderContainer::_from_bytes_header_extra_data(const uint8_t *p_bytes) { + return 0; +} + +uint32_t RenderingShaderContainer::_from_bytes_reflection_extra_data(const uint8_t *p_bytes) { + return 0; +} + +uint32_t RenderingShaderContainer::_from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes) { + return 0; +} + +uint32_t RenderingShaderContainer::_from_bytes_reflection_binding_uniform_extra_data(const uint8_t *p_bytes, uint32_t p_index) { + return 0; +} + +uint32_t RenderingShaderContainer::_from_bytes_reflection_specialization_extra_data_start(const uint8_t *p_bytes) { + return 0; +} + +uint32_t RenderingShaderContainer::_from_bytes_reflection_specialization_extra_data(const uint8_t *p_bytes, uint32_t p_index) { + return 0; +} + +uint32_t RenderingShaderContainer::_from_bytes_shader_extra_data_start(const uint8_t *p_bytes) { + return 0; +} + +uint32_t RenderingShaderContainer::_from_bytes_shader_extra_data(const uint8_t *p_bytes, uint32_t p_index) { + return 0; +} + +uint32_t RenderingShaderContainer::_from_bytes_footer_extra_data(const uint8_t *p_bytes) { + return 0; +} + +uint32_t RenderingShaderContainer::_to_bytes_header_extra_data(uint8_t *) const { + return 0; +} + +uint32_t RenderingShaderContainer::_to_bytes_reflection_extra_data(uint8_t *) const { + return 0; +} + +uint32_t RenderingShaderContainer::_to_bytes_reflection_binding_uniform_extra_data(uint8_t *, uint32_t) const { + return 0; +} + +uint32_t RenderingShaderContainer::_to_bytes_reflection_specialization_extra_data(uint8_t *, uint32_t) const { + return 0; +} + +uint32_t RenderingShaderContainer::_to_bytes_shader_extra_data(uint8_t *, uint32_t) const { + return 0; +} + +uint32_t RenderingShaderContainer::_to_bytes_footer_extra_data(uint8_t *) const { + return 0; +} + +void RenderingShaderContainer::_set_from_shader_reflection_post(const String &p_shader_name, const RenderingDeviceCommons::ShaderReflection &p_reflection) { + // Do nothing. +} + +void RenderingShaderContainer::set_from_shader_reflection(const String &p_shader_name, const RenderingDeviceCommons::ShaderReflection &p_reflection) { + reflection_binding_set_uniforms_count.clear(); + reflection_binding_set_uniforms_data.clear(); + reflection_specialization_data.clear(); + reflection_shader_stages.clear(); + + shader_name = p_shader_name.utf8(); + + reflection_data.vertex_input_mask = p_reflection.vertex_input_mask; + reflection_data.fragment_output_mask = p_reflection.fragment_output_mask; + reflection_data.specialization_constants_count = p_reflection.specialization_constants.size(); + reflection_data.is_compute = p_reflection.is_compute; + reflection_data.has_multiview = p_reflection.has_multiview; + reflection_data.compute_local_size[0] = p_reflection.compute_local_size[0]; + reflection_data.compute_local_size[1] = p_reflection.compute_local_size[1]; + reflection_data.compute_local_size[2] = p_reflection.compute_local_size[2]; + reflection_data.set_count = p_reflection.uniform_sets.size(); + reflection_data.push_constant_size = p_reflection.push_constant_size; + reflection_data.push_constant_stages_mask = uint32_t(p_reflection.push_constant_stages); + reflection_data.shader_name_len = shader_name.length(); + + ReflectionBindingData binding_data; + for (const Vector &uniform_set : p_reflection.uniform_sets) { + for (const RenderingDeviceCommons::ShaderUniform &uniform : uniform_set) { + binding_data.type = uint32_t(uniform.type); + binding_data.binding = uniform.binding; + binding_data.stages = uint32_t(uniform.stages); + binding_data.length = uniform.length; + binding_data.writable = uint32_t(uniform.writable); + reflection_binding_set_uniforms_data.push_back(binding_data); + } + + reflection_binding_set_uniforms_count.push_back(uniform_set.size()); + } + + ReflectionSpecializationData specialization_data; + for (const RenderingDeviceCommons::ShaderSpecializationConstant &spec : p_reflection.specialization_constants) { + specialization_data.type = uint32_t(spec.type); + specialization_data.constant_id = spec.constant_id; + specialization_data.int_value = spec.int_value; + specialization_data.stage_flags = uint32_t(spec.stages); + reflection_specialization_data.push_back(specialization_data); + } + + for (uint32_t i = 0; i < RenderingDeviceCommons::SHADER_STAGE_MAX; i++) { + if (p_reflection.stages_bits.has_flag(RenderingDeviceCommons::ShaderStage(1U << i))) { + reflection_shader_stages.push_back(RenderingDeviceCommons::ShaderStage(i)); + } + } + + reflection_data.stage_count = reflection_shader_stages.size(); + + _set_from_shader_reflection_post(p_shader_name, p_reflection); +} + +bool RenderingShaderContainer::set_code_from_spirv(const Vector &p_spirv) { + return _set_code_from_spirv(p_spirv); +} + +RenderingDeviceCommons::ShaderReflection RenderingShaderContainer::get_shader_reflection() const { + RenderingDeviceCommons::ShaderReflection shader_refl; + shader_refl.push_constant_size = reflection_data.push_constant_size; + shader_refl.push_constant_stages = reflection_data.push_constant_stages_mask; + shader_refl.vertex_input_mask = reflection_data.vertex_input_mask; + shader_refl.fragment_output_mask = reflection_data.fragment_output_mask; + shader_refl.is_compute = reflection_data.is_compute; + shader_refl.has_multiview = reflection_data.has_multiview; + shader_refl.compute_local_size[0] = reflection_data.compute_local_size[0]; + shader_refl.compute_local_size[1] = reflection_data.compute_local_size[1]; + shader_refl.compute_local_size[2] = reflection_data.compute_local_size[2]; + shader_refl.uniform_sets.resize(reflection_data.set_count); + shader_refl.specialization_constants.resize(reflection_data.specialization_constants_count); + shader_refl.stages_vector.resize(reflection_data.stage_count); + + DEV_ASSERT(reflection_binding_set_uniforms_count.size() == reflection_data.set_count && "The amount of elements in the reflection and the shader container can't be different."); + uint32_t uniform_index = 0; + for (uint32_t i = 0; i < reflection_data.set_count; i++) { + Vector &uniform_set = shader_refl.uniform_sets.ptrw()[i]; + uint32_t uniforms_count = reflection_binding_set_uniforms_count[i]; + uniform_set.resize(uniforms_count); + for (uint32_t j = 0; j < uniforms_count; j++) { + const ReflectionBindingData &binding = reflection_binding_set_uniforms_data[uniform_index++]; + RenderingDeviceCommons::ShaderUniform &uniform = uniform_set.ptrw()[j]; + uniform.type = RenderingDeviceCommons::UniformType(binding.type); + uniform.writable = binding.writable; + uniform.length = binding.length; + uniform.binding = binding.binding; + uniform.stages = binding.stages; + } + } + + shader_refl.specialization_constants.resize(reflection_data.specialization_constants_count); + for (uint32_t i = 0; i < reflection_data.specialization_constants_count; i++) { + const ReflectionSpecializationData &spec = reflection_specialization_data[i]; + RenderingDeviceCommons::ShaderSpecializationConstant &sc = shader_refl.specialization_constants.ptrw()[i]; + sc.type = RenderingDeviceCommons::PipelineSpecializationConstantType(spec.type); + sc.constant_id = spec.constant_id; + sc.int_value = spec.int_value; + sc.stages = spec.stage_flags; + } + + shader_refl.stages_vector.resize(reflection_data.stage_count); + for (uint32_t i = 0; i < reflection_data.stage_count; i++) { + shader_refl.stages_vector.set(i, reflection_shader_stages[i]); + shader_refl.stages_bits.set_flag(RenderingDeviceCommons::ShaderStage(1U << reflection_shader_stages[i])); + } + + return shader_refl; +} + +bool RenderingShaderContainer::from_bytes(const PackedByteArray &p_bytes) { + const uint64_t alignment = sizeof(uint32_t); + const uint8_t *bytes_ptr = p_bytes.ptr(); + uint64_t bytes_offset = 0; + + // Read container header. + ERR_FAIL_COND_V_MSG(int64_t(bytes_offset + sizeof(ContainerHeader)) > p_bytes.size(), false, "Not enough bytes for a container header in shader container."); + const ContainerHeader &container_header = *(const ContainerHeader *)(&bytes_ptr[bytes_offset]); + bytes_offset += sizeof(ContainerHeader); + bytes_offset += _from_bytes_header_extra_data(&bytes_ptr[bytes_offset]); + + ERR_FAIL_COND_V_MSG(container_header.magic_number != MAGIC_NUMBER, false, "Incorrect magic number in shader container."); + ERR_FAIL_COND_V_MSG(container_header.version > VERSION, false, "Unsupported version in shader container."); + ERR_FAIL_COND_V_MSG(container_header.format != _format(), false, "Incorrect format in shader container."); + ERR_FAIL_COND_V_MSG(container_header.format_version > _format_version(), false, "Unsupported format version in shader container."); + + // Adjust shaders to the size indicated by the container header. + shaders.resize(container_header.shader_count); + + // Read reflection data. + ERR_FAIL_COND_V_MSG(int64_t(bytes_offset + sizeof(ReflectionData)) > p_bytes.size(), false, "Not enough bytes for reflection data in shader container."); + reflection_data = *(const ReflectionData *)(&bytes_ptr[bytes_offset]); + bytes_offset += sizeof(ReflectionData); + bytes_offset += _from_bytes_reflection_extra_data(&bytes_ptr[bytes_offset]); + + // Read shader name. + ERR_FAIL_COND_V_MSG(int64_t(bytes_offset + reflection_data.shader_name_len) > p_bytes.size(), false, "Not enough bytes for shader name in shader container."); + if (reflection_data.shader_name_len > 0) { + String shader_name_str; + shader_name_str.append_utf8((const char *)(&bytes_ptr[bytes_offset]), reflection_data.shader_name_len); + shader_name = shader_name_str.utf8(); + bytes_offset = aligned_to(bytes_offset + reflection_data.shader_name_len, alignment); + } else { + shader_name = CharString(); + } + + reflection_binding_set_uniforms_count.resize(reflection_data.set_count); + reflection_binding_set_uniforms_data.clear(); + + uint32_t uniform_index = 0; + for (uint32_t i = 0; i < reflection_data.set_count; i++) { + ERR_FAIL_COND_V_MSG(int64_t(bytes_offset + sizeof(uint32_t)) > p_bytes.size(), false, "Not enough bytes for uniform set count in shader container."); + uint32_t uniforms_count = *(uint32_t *)(&bytes_ptr[bytes_offset]); + reflection_binding_set_uniforms_count.ptrw()[i] = uniforms_count; + bytes_offset += sizeof(uint32_t); + + reflection_binding_set_uniforms_data.resize(reflection_binding_set_uniforms_data.size() + uniforms_count); + bytes_offset += _from_bytes_reflection_binding_uniform_extra_data_start(&bytes_ptr[bytes_offset]); + + for (uint32_t j = 0; j < uniforms_count; j++) { + ERR_FAIL_COND_V_MSG(int64_t(bytes_offset + sizeof(ReflectionBindingData)) > p_bytes.size(), false, "Not enough bytes for uniform in shader container."); + memcpy(&reflection_binding_set_uniforms_data.ptrw()[uniform_index], &bytes_ptr[bytes_offset], sizeof(ReflectionBindingData)); + bytes_offset += sizeof(ReflectionBindingData); + bytes_offset += _from_bytes_reflection_binding_uniform_extra_data(&bytes_ptr[bytes_offset], uniform_index); + uniform_index++; + } + } + + reflection_specialization_data.resize(reflection_data.specialization_constants_count); + bytes_offset += _from_bytes_reflection_specialization_extra_data_start(&bytes_ptr[bytes_offset]); + + for (uint32_t i = 0; i < reflection_data.specialization_constants_count; i++) { + ERR_FAIL_COND_V_MSG(int64_t(bytes_offset + sizeof(ReflectionSpecializationData)) > p_bytes.size(), false, "Not enough bytes for specialization in shader container."); + memcpy(&reflection_specialization_data.ptrw()[i], &bytes_ptr[bytes_offset], sizeof(ReflectionSpecializationData)); + bytes_offset += sizeof(ReflectionSpecializationData); + bytes_offset += _from_bytes_reflection_specialization_extra_data(&bytes_ptr[bytes_offset], i); + } + + const uint32_t stage_count = reflection_data.stage_count; + if (stage_count > 0) { + ERR_FAIL_COND_V_MSG(int64_t(bytes_offset + stage_count * sizeof(RenderingDeviceCommons::ShaderStage)) > p_bytes.size(), false, "Not enough bytes for stages in shader container."); + reflection_shader_stages.resize(stage_count); + bytes_offset += _from_bytes_shader_extra_data_start(&bytes_ptr[bytes_offset]); + memcpy(reflection_shader_stages.ptrw(), &bytes_ptr[bytes_offset], stage_count * sizeof(RenderingDeviceCommons::ShaderStage)); + bytes_offset += stage_count * sizeof(RenderingDeviceCommons::ShaderStage); + } + + // Read shaders. + for (int64_t i = 0; i < shaders.size(); i++) { + ERR_FAIL_COND_V_MSG(int64_t(bytes_offset + sizeof(ShaderHeader)) > p_bytes.size(), false, "Not enough bytes for shader header in shader container."); + const ShaderHeader &header = *(const ShaderHeader *)(&bytes_ptr[bytes_offset]); + bytes_offset += sizeof(ShaderHeader); + + ERR_FAIL_COND_V_MSG(int64_t(bytes_offset + header.code_compressed_size) > p_bytes.size(), false, "Not enough bytes for a shader in shader container."); + Shader &shader = shaders.ptrw()[i]; + shader.shader_stage = RenderingDeviceCommons::ShaderStage(header.shader_stage); + shader.code_compression_flags = header.code_compression_flags; + shader.code_decompressed_size = header.code_decompressed_size; + shader.code_compressed_bytes.resize(header.code_compressed_size); + memcpy(shader.code_compressed_bytes.ptrw(), &bytes_ptr[bytes_offset], header.code_compressed_size); + bytes_offset = aligned_to(bytes_offset + header.code_compressed_size, alignment); + bytes_offset += _from_bytes_shader_extra_data(&bytes_ptr[bytes_offset], i); + } + + bytes_offset += _from_bytes_footer_extra_data(&bytes_ptr[bytes_offset]); + + ERR_FAIL_COND_V_MSG(bytes_offset != (uint64_t)p_bytes.size(), false, "Amount of bytes in the container does not match the amount of bytes read."); + return true; +} + +PackedByteArray RenderingShaderContainer::to_bytes() const { + // Compute the exact size the container will require for writing everything out. + const uint64_t alignment = sizeof(uint32_t); + uint64_t total_size = 0; + total_size += sizeof(ContainerHeader) + _to_bytes_header_extra_data(nullptr); + total_size += sizeof(ReflectionData) + _to_bytes_reflection_extra_data(nullptr); + total_size += aligned_to(reflection_data.shader_name_len, alignment); + total_size += reflection_binding_set_uniforms_count.size() * sizeof(uint32_t); + total_size += reflection_binding_set_uniforms_data.size() * sizeof(ReflectionBindingData); + total_size += reflection_specialization_data.size() * sizeof(ReflectionSpecializationData); + total_size += reflection_shader_stages.size() * sizeof(RenderingDeviceCommons::ShaderStage); + + for (uint32_t i = 0; i < reflection_binding_set_uniforms_data.size(); i++) { + total_size += _to_bytes_reflection_binding_uniform_extra_data(nullptr, i); + } + + for (uint32_t i = 0; i < reflection_specialization_data.size(); i++) { + total_size += _to_bytes_reflection_specialization_extra_data(nullptr, i); + } + + for (uint32_t i = 0; i < shaders.size(); i++) { + total_size += sizeof(ShaderHeader); + total_size += shaders[i].code_compressed_bytes.size(); + total_size = aligned_to(total_size, alignment); + total_size += _to_bytes_shader_extra_data(nullptr, i); + } + + total_size += _to_bytes_footer_extra_data(nullptr); + + // Create the array that will hold all of the data. + PackedByteArray bytes; + bytes.resize_initialized(total_size); + + // Write out the data to the array. + uint64_t bytes_offset = 0; + uint8_t *bytes_ptr = bytes.ptrw(); + ContainerHeader &container_header = *(ContainerHeader *)(&bytes_ptr[bytes_offset]); + container_header.magic_number = MAGIC_NUMBER; + container_header.version = VERSION; + container_header.format = _format(); + container_header.format_version = _format_version(); + container_header.shader_count = shaders.size(); + bytes_offset += sizeof(ContainerHeader); + bytes_offset += _to_bytes_header_extra_data(&bytes_ptr[bytes_offset]); + + memcpy(&bytes_ptr[bytes_offset], &reflection_data, sizeof(ReflectionData)); + bytes_offset += sizeof(ReflectionData); + bytes_offset += _to_bytes_reflection_extra_data(&bytes_ptr[bytes_offset]); + + if (shader_name.size() > 0) { + memcpy(&bytes_ptr[bytes_offset], shader_name.ptr(), reflection_data.shader_name_len); + bytes_offset = aligned_to(bytes_offset + reflection_data.shader_name_len, alignment); + } + + uint32_t uniform_index = 0; + for (uint32_t uniform_count : reflection_binding_set_uniforms_count) { + memcpy(&bytes_ptr[bytes_offset], &uniform_count, sizeof(uniform_count)); + bytes_offset += sizeof(uint32_t); + + for (uint32_t i = 0; i < uniform_count; i++) { + memcpy(&bytes_ptr[bytes_offset], &reflection_binding_set_uniforms_data[uniform_index], sizeof(ReflectionBindingData)); + bytes_offset += sizeof(ReflectionBindingData); + bytes_offset += _to_bytes_reflection_binding_uniform_extra_data(&bytes_ptr[bytes_offset], uniform_index); + uniform_index++; + } + } + + for (uint32_t i = 0; i < reflection_specialization_data.size(); i++) { + memcpy(&bytes_ptr[bytes_offset], &reflection_specialization_data.ptr()[i], sizeof(ReflectionSpecializationData)); + bytes_offset += sizeof(ReflectionSpecializationData); + bytes_offset += _to_bytes_reflection_specialization_extra_data(&bytes_ptr[bytes_offset], i); + } + + if (!reflection_shader_stages.is_empty()) { + uint32_t stage_count = reflection_shader_stages.size(); + memcpy(&bytes_ptr[bytes_offset], reflection_shader_stages.ptr(), stage_count * sizeof(RenderingDeviceCommons::ShaderStage)); + bytes_offset += stage_count * sizeof(RenderingDeviceCommons::ShaderStage); + } + + for (uint32_t i = 0; i < shaders.size(); i++) { + const Shader &shader = shaders[i]; + ShaderHeader &header = *(ShaderHeader *)(&bytes.ptr()[bytes_offset]); + header.shader_stage = shader.shader_stage; + header.code_compressed_size = uint32_t(shader.code_compressed_bytes.size()); + header.code_compression_flags = shader.code_compression_flags; + header.code_decompressed_size = shader.code_decompressed_size; + bytes_offset += sizeof(ShaderHeader); + memcpy(&bytes.ptrw()[bytes_offset], shader.code_compressed_bytes.ptr(), shader.code_compressed_bytes.size()); + bytes_offset = aligned_to(bytes_offset + shader.code_compressed_bytes.size(), alignment); + bytes_offset += _to_bytes_shader_extra_data(&bytes_ptr[bytes_offset], i); + } + + bytes_offset += _to_bytes_footer_extra_data(&bytes_ptr[bytes_offset]); + + ERR_FAIL_COND_V_MSG(bytes_offset != total_size, PackedByteArray(), "Amount of bytes written does not match the amount of bytes reserved for the container."); + return bytes; +} + +bool RenderingShaderContainer::compress_code(const uint8_t *p_decompressed_bytes, uint32_t p_decompressed_size, uint8_t *p_compressed_bytes, uint32_t *r_compressed_size, uint32_t *r_compressed_flags) const { + DEV_ASSERT(p_decompressed_bytes != nullptr); + DEV_ASSERT(p_decompressed_size > 0); + DEV_ASSERT(p_compressed_bytes != nullptr); + DEV_ASSERT(r_compressed_size != nullptr); + DEV_ASSERT(r_compressed_flags != nullptr); + + *r_compressed_flags = 0; + + PackedByteArray zstd_bytes; + int zstd_max_bytes = Compression::get_max_compressed_buffer_size(p_decompressed_size, Compression::MODE_ZSTD); + zstd_bytes.resize(zstd_max_bytes); + + int zstd_size = Compression::compress(zstd_bytes.ptrw(), p_decompressed_bytes, p_decompressed_size, Compression::MODE_ZSTD); + if (zstd_size > 0 && (uint32_t)(zstd_size) < p_decompressed_size) { + // Only choose Zstd if it results in actual compression. + memcpy(p_compressed_bytes, zstd_bytes.ptr(), zstd_size); + *r_compressed_size = zstd_size; + *r_compressed_flags |= COMPRESSION_FLAG_ZSTD; + } else { + // Just copy the input to the output directly. + memcpy(p_compressed_bytes, p_decompressed_bytes, p_decompressed_size); + *r_compressed_size = p_decompressed_size; + } + + return true; +} + +bool RenderingShaderContainer::decompress_code(const uint8_t *p_compressed_bytes, uint32_t p_compressed_size, uint32_t p_compressed_flags, uint8_t *p_decompressed_bytes, uint32_t p_decompressed_size) const { + DEV_ASSERT(p_compressed_bytes != nullptr); + DEV_ASSERT(p_compressed_size > 0); + DEV_ASSERT(p_decompressed_bytes != nullptr); + DEV_ASSERT(p_decompressed_size > 0); + + bool uses_zstd = p_compressed_flags & COMPRESSION_FLAG_ZSTD; + if (uses_zstd) { + if (!Compression::decompress(p_decompressed_bytes, p_decompressed_size, p_compressed_bytes, p_compressed_size, Compression::MODE_ZSTD)) { + ERR_FAIL_V_MSG(false, "Malformed zstd input for decompressing shader code."); + } + } else { + memcpy(p_decompressed_bytes, p_compressed_bytes, MIN(p_compressed_size, p_decompressed_size)); + } + + return true; +} + +RenderingShaderContainer::RenderingShaderContainer() {} + +RenderingShaderContainer::~RenderingShaderContainer() {} diff --git a/servers/rendering/rendering_shader_container.h b/servers/rendering/rendering_shader_container.h new file mode 100644 index 00000000000..d184dd41bf5 --- /dev/null +++ b/servers/rendering/rendering_shader_container.h @@ -0,0 +1,157 @@ +/**************************************************************************/ +/* rendering_shader_container.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#include "core/object/ref_counted.h" +#include "servers/rendering/rendering_device_commons.h" + +class RenderingShaderContainer : public RefCounted { + GDSOFTCLASS(RenderingShaderContainer, RefCounted); + +public: + static const uint32_t MAGIC_NUMBER; + static const uint32_t VERSION; + +protected: + struct ContainerHeader { + uint32_t magic_number = 0; + uint32_t version = 0; + uint32_t format = 0; + uint32_t format_version = 0; + uint32_t shader_count = 0; + }; + + struct ReflectionData { + uint64_t vertex_input_mask = 0; + uint32_t fragment_output_mask = 0; + uint32_t specialization_constants_count = 0; + uint32_t is_compute = 0; + uint32_t has_multiview = 0; + uint32_t compute_local_size[3] = {}; + uint32_t set_count = 0; + uint32_t push_constant_size = 0; + uint32_t push_constant_stages_mask = 0; + uint32_t stage_count = 0; + uint32_t shader_name_len = 0; + }; + + struct ReflectionBindingData { + uint32_t type = 0; + uint32_t binding = 0; + uint32_t stages = 0; + uint32_t length = 0; // Size of arrays (in total elements), or UBOs (in bytes * total elements). + uint32_t writable = 0; + + bool operator<(const ReflectionBindingData &p_other) const { + return binding < p_other.binding; + } + }; + + struct ReflectionSpecializationData { + uint32_t type = 0; + uint32_t constant_id = 0; + uint32_t int_value = 0; + uint32_t stage_flags = 0; + }; + + struct ShaderHeader { + uint32_t shader_stage = 0; + uint32_t code_compressed_size = 0; + uint32_t code_compression_flags = 0; + uint32_t code_decompressed_size = 0; + }; + + ReflectionData reflection_data; + Vector reflection_binding_set_uniforms_count; + Vector reflection_binding_set_uniforms_data; + Vector reflection_specialization_data; + Vector reflection_shader_stages; + + virtual uint32_t _format() const = 0; + virtual uint32_t _format_version() const = 0; + + // These methods will always be called with a valid pointer. + virtual uint32_t _from_bytes_header_extra_data(const uint8_t *p_bytes); + virtual uint32_t _from_bytes_reflection_extra_data(const uint8_t *p_bytes); + virtual uint32_t _from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes); + virtual uint32_t _from_bytes_reflection_binding_uniform_extra_data(const uint8_t *p_bytes, uint32_t p_index); + virtual uint32_t _from_bytes_reflection_specialization_extra_data_start(const uint8_t *p_bytes); + virtual uint32_t _from_bytes_reflection_specialization_extra_data(const uint8_t *p_bytes, uint32_t p_index); + virtual uint32_t _from_bytes_shader_extra_data_start(const uint8_t *p_bytes); + virtual uint32_t _from_bytes_shader_extra_data(const uint8_t *p_bytes, uint32_t p_index); + virtual uint32_t _from_bytes_footer_extra_data(const uint8_t *p_bytes); + + // These methods will be called with a nullptr to retrieve the size of the data. + virtual uint32_t _to_bytes_header_extra_data(uint8_t *p_bytes) const; + virtual uint32_t _to_bytes_reflection_extra_data(uint8_t *p_bytes) const; + virtual uint32_t _to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const; + virtual uint32_t _to_bytes_reflection_specialization_extra_data(uint8_t *p_bytes, uint32_t p_index) const; + virtual uint32_t _to_bytes_shader_extra_data(uint8_t *p_bytes, uint32_t p_index) const; + virtual uint32_t _to_bytes_footer_extra_data(uint8_t *p_bytes) const; + + // This method will be called when set_from_shader_reflection() is finished. Used to update internal structures to match the reflection if necessary. + virtual void _set_from_shader_reflection_post(const String &p_shader_name, const RenderingDeviceCommons::ShaderReflection &p_reflection); + + // This method will be called when set_code_from_spirv() is called. + virtual bool _set_code_from_spirv(const Vector &p_spirv) = 0; + +public: + enum CompressionFlags { + COMPRESSION_FLAG_ZSTD = 0x1, + }; + + struct Shader { + RenderingDeviceCommons::ShaderStage shader_stage = RenderingDeviceCommons::SHADER_STAGE_MAX; + PackedByteArray code_compressed_bytes; + uint32_t code_compression_flags = 0; + uint32_t code_decompressed_size = 0; + }; + + CharString shader_name; + Vector shaders; + + void set_from_shader_reflection(const String &p_shader_name, const RenderingDeviceCommons::ShaderReflection &p_reflection); + bool set_code_from_spirv(const Vector &p_spirv); + RenderingDeviceCommons::ShaderReflection get_shader_reflection() const; + bool from_bytes(const PackedByteArray &p_bytes); + PackedByteArray to_bytes() const; + bool compress_code(const uint8_t *p_decompressed_bytes, uint32_t p_decompressed_size, uint8_t *p_compressed_bytes, uint32_t *r_compressed_size, uint32_t *r_compressed_flags) const; + bool decompress_code(const uint8_t *p_compressed_bytes, uint32_t p_compressed_size, uint32_t p_compressed_flags, uint8_t *p_decompressed_bytes, uint32_t p_decompressed_size) const; + RenderingShaderContainer(); + virtual ~RenderingShaderContainer(); +}; + +class RenderingShaderContainerFormat : public RenderingDeviceCommons { +public: + virtual Ref create_container() const = 0; + virtual ShaderLanguageVersion get_shader_language_version() const = 0; + virtual ShaderSpirvVersion get_shader_spirv_version() const = 0; +}; diff --git a/servers/rendering/rendering_shader_library.h b/servers/rendering/rendering_shader_library.h new file mode 100644 index 00000000000..566fb7eb2fa --- /dev/null +++ b/servers/rendering/rendering_shader_library.h @@ -0,0 +1,48 @@ +/**************************************************************************/ +/* rendering_shader_library.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +class RenderingShaderLibrary { +public: + enum FeatureBits { + FEATURE_ADVANCED_BIT = 1U << 0U, + FEATURE_MULTIVIEW_BIT = 1U << 1U, + FEATURE_VRS_BIT = 1U << 2U, + }; + + // Used by the shader baker to globally enable features on all the shaders that will be exported. + virtual void enable_features(BitField p_feature_bits) = 0; + + // Used by the shader baker to reference by name the library. + virtual String get_name() const = 0; + + virtual ~RenderingShaderLibrary() {} +}; diff --git a/servers/rendering/shader_language.cpp b/servers/rendering/shader_language.cpp index 3ab3fd8afcb..68a833545a0 100644 --- a/servers/rendering/shader_language.cpp +++ b/servers/rendering/shader_language.cpp @@ -9159,10 +9159,6 @@ Error ShaderLanguage::_parse_shader(const HashMap &p_f uint64_t max_uniform_buffer_size = 65536; int uniform_buffer_exceeded_line = -1; bool check_device_limit_warnings = check_warnings && HAS_WARNING(ShaderWarning::DEVICE_LIMIT_EXCEEDED_FLAG); - // Can be false for internal shaders created in the process of initializing the engine. - if (RSG::utilities) { - max_uniform_buffer_size = RSG::utilities->get_maximum_uniform_buffer_size(); - } #endif // DEBUG_ENABLED ShaderNode::Uniform::Scope uniform_scope = ShaderNode::Uniform::SCOPE_LOCAL; diff --git a/servers/rendering/storage/material_storage.h b/servers/rendering/storage/material_storage.h index c9fe7c11d9d..0b56a05f89e 100644 --- a/servers/rendering/storage/material_storage.h +++ b/servers/rendering/storage/material_storage.h @@ -56,7 +56,7 @@ public: /* SHADER API */ virtual RID shader_allocate() = 0; - virtual void shader_initialize(RID p_rid) = 0; + virtual void shader_initialize(RID p_rid, bool p_embedded = true) = 0; virtual void shader_free(RID p_rid) = 0; virtual void shader_set_code(RID p_shader, const String &p_code) = 0; @@ -69,6 +69,9 @@ public: virtual Variant shader_get_parameter_default(RID p_material, const StringName &p_param) const = 0; virtual RS::ShaderNativeSourceCode shader_get_native_source_code(RID p_shader) const = 0; + virtual void shader_embedded_set_lock() = 0; + virtual const HashSet &shader_embedded_set_get() const = 0; + virtual void shader_embedded_set_unlock() = 0; /* MATERIAL API */