1
0
mirror of https://github.com/godotengine/godot.git synced 2025-11-24 15:26:15 +00:00

Add shader baker to project exporter.

Metal Support contributed by Migeran (https://migeran.com) and Stuart Carnie.

Co-authored-by: Stuart Carnie <stuart.carnie@gmail.com>
Co-authored-by: Gergely Kis <gergely.kis@migeran.com>
This commit is contained in:
Dario
2025-01-13 16:13:39 -03:00
parent 99f5a3d665
commit 5a30a7e7cd
112 changed files with 5786 additions and 4203 deletions

View File

@@ -515,17 +515,13 @@ FSR2Context::~FSR2Context() {
FSR2Effect::FSR2Effect() {
FfxDeviceCapabilities &capabilities = device.capabilities;
uint64_t default_subgroup_size = RD::get_singleton()->limit_get(RD::LIMIT_SUBGROUP_SIZE);
capabilities.minimumSupportedShaderModel = FFX_SHADER_MODEL_5_1;
capabilities.waveLaneCountMin = RD::get_singleton()->limit_get(RD::LIMIT_SUBGROUP_MIN_SIZE);
capabilities.waveLaneCountMax = RD::get_singleton()->limit_get(RD::LIMIT_SUBGROUP_MAX_SIZE);
capabilities.waveLaneCountMin = 32;
capabilities.waveLaneCountMax = 32;
capabilities.fp16Supported = RD::get_singleton()->has_feature(RD::Features::SUPPORTS_FSR_HALF_FLOAT);
capabilities.raytracingSupported = false;
bool force_wave_64 = default_subgroup_size == 32 && capabilities.waveLaneCountMax == 64;
bool use_lut = force_wave_64 || default_subgroup_size == 64;
String general_defines_base =
String general_defines =
"\n#define FFX_GPU\n"
"\n#define FFX_GLSL 1\n"
"\n#define FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS 1\n"
@@ -534,17 +530,12 @@ FSR2Effect::FSR2Effect() {
"\n#define FFX_FSR2_OPTION_GODOT_REACTIVE_MASK_CLAMP 1\n"
"\n#define FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS 1\n";
if (use_lut) {
general_defines_base += "\n#define FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE 1\n";
}
Vector<String> modes_single;
modes_single.push_back("");
String general_defines = general_defines_base;
if (capabilities.fp16Supported) {
general_defines += "\n#define FFX_HALF 1\n";
}
Vector<String> modes;
modes.push_back("");
Vector<String> modes_with_fp16;
modes_with_fp16.push_back("");
modes_with_fp16.push_back("\n#define FFX_HALF 1\n");
// Since Godot currently lacks a shader reflection mechanism to persist the name of the bindings in the shader cache and
// there's also no mechanism to compile the shaders offline, the bindings are created manually by looking at the GLSL
@@ -557,8 +548,9 @@ FSR2Effect::FSR2Effect() {
{
Pass &pass = device.passes[FFX_FSR2_PASS_DEPTH_CLIP];
pass.shader = &shaders.depth_clip;
pass.shader->initialize(modes, general_defines);
pass.shader->initialize(modes_with_fp16, general_defines);
pass.shader_version = pass.shader->version_create();
pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
pass.sampled_bindings = {
FfxResourceBinding{ 0, 0, L"r_reconstructed_previous_nearest_depth" },
@@ -587,8 +579,9 @@ FSR2Effect::FSR2Effect() {
{
Pass &pass = device.passes[FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH];
pass.shader = &shaders.reconstruct_previous_depth;
pass.shader->initialize(modes, general_defines);
pass.shader->initialize(modes_with_fp16, general_defines);
pass.shader_version = pass.shader->version_create();
pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
pass.sampled_bindings = {
FfxResourceBinding{ 0, 0, L"r_input_motion_vectors" },
@@ -616,8 +609,9 @@ FSR2Effect::FSR2Effect() {
{
Pass &pass = device.passes[FFX_FSR2_PASS_LOCK];
pass.shader = &shaders.lock;
pass.shader->initialize(modes, general_defines);
pass.shader->initialize(modes_with_fp16, general_defines);
pass.shader_version = pass.shader->version_create();
pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
pass.sampled_bindings = {
FfxResourceBinding{ 0, 0, L"r_lock_input_luma" }
@@ -634,22 +628,19 @@ FSR2Effect::FSR2Effect() {
}
{
Vector<String> accumulate_modes;
accumulate_modes.push_back("\n");
accumulate_modes.push_back("\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n");
String general_defines_accumulate;
if (RD::get_singleton()->get_device_vendor_name() == "NVIDIA") {
// Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput.
general_defines_accumulate = general_defines_base;
} else {
general_defines_accumulate = general_defines;
}
Vector<String> accumulate_modes_with_fp16;
accumulate_modes_with_fp16.push_back("\n");
accumulate_modes_with_fp16.push_back("\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n");
accumulate_modes_with_fp16.push_back("\n#define FFX_HALF 1\n");
accumulate_modes_with_fp16.push_back("\n#define FFX_HALF 1\n#define FFX_FSR2_OPTION_APPLY_SHARPENING 1\n");
// Workaround: Disable FP16 path for the accumulate pass on NVIDIA due to reduced occupancy and high VRAM throughput.
const bool fp16_path_supported = RD::get_singleton()->get_device_vendor_name() != "NVIDIA";
Pass &pass = device.passes[FFX_FSR2_PASS_ACCUMULATE];
pass.shader = &shaders.accumulate;
pass.shader->initialize(accumulate_modes, general_defines_accumulate);
pass.shader->initialize(accumulate_modes_with_fp16, general_defines);
pass.shader_version = pass.shader->version_create();
pass.shader_variant = capabilities.fp16Supported && fp16_path_supported ? 2 : 0;
pass.sampled_bindings = {
FfxResourceBinding{ 0, 0, L"r_input_exposure" },
@@ -679,16 +670,16 @@ FSR2Effect::FSR2Effect() {
FfxResourceBinding{ 18, 0, L"cbFSR2" }
};
// Sharpen pass is a clone of the accumulate pass.
// Sharpen pass is a clone of the accumulate pass with the sharpening variant.
Pass &sharpen_pass = device.passes[FFX_FSR2_PASS_ACCUMULATE_SHARPEN];
sharpen_pass = pass;
sharpen_pass.shader_variant = 1;
sharpen_pass.shader_variant = pass.shader_variant + 1;
}
{
Pass &pass = device.passes[FFX_FSR2_PASS_RCAS];
pass.shader = &shaders.rcas;
pass.shader->initialize(modes, general_defines_base);
pass.shader->initialize(modes_single, general_defines);
pass.shader_version = pass.shader->version_create();
pass.sampled_bindings = {
@@ -709,7 +700,7 @@ FSR2Effect::FSR2Effect() {
{
Pass &pass = device.passes[FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID];
pass.shader = &shaders.compute_luminance_pyramid;
pass.shader->initialize(modes, general_defines_base);
pass.shader->initialize(modes_single, general_defines);
pass.shader_version = pass.shader->version_create();
pass.sampled_bindings = {
@@ -732,8 +723,9 @@ FSR2Effect::FSR2Effect() {
{
Pass &pass = device.passes[FFX_FSR2_PASS_GENERATE_REACTIVE];
pass.shader = &shaders.autogen_reactive;
pass.shader->initialize(modes, general_defines);
pass.shader->initialize(modes_with_fp16, general_defines);
pass.shader_version = pass.shader->version_create();
pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
pass.sampled_bindings = {
FfxResourceBinding{ 0, 0, L"r_input_opaque_only" },
@@ -753,8 +745,9 @@ FSR2Effect::FSR2Effect() {
{
Pass &pass = device.passes[FFX_FSR2_PASS_TCR_AUTOGENERATE];
pass.shader = &shaders.tcr_autogen;
pass.shader->initialize(modes, general_defines);
pass.shader->initialize(modes_with_fp16, general_defines);
pass.shader_version = pass.shader->version_create();
pass.shader_variant = capabilities.fp16Supported ? 1 : 0;
pass.sampled_bindings = {
FfxResourceBinding{ 0, 0, L"r_input_opaque_only" },