You've already forked godot
mirror of
https://github.com/godotengine/godot.git
synced 2025-11-22 15:06:45 +00:00
Re-Implement GPU particles on master.
-No new features yet -Unlike godot 3.x, sorting happens using GPU
This commit is contained in:
@@ -37,3 +37,6 @@ if "RD_GLSL" in env["BUILDERS"]:
|
||||
env.RD_GLSL("sdfgi_debug_probes.glsl")
|
||||
env.RD_GLSL("volumetric_fog.glsl")
|
||||
env.RD_GLSL("shadow_reduce.glsl")
|
||||
env.RD_GLSL("particles.glsl")
|
||||
env.RD_GLSL("particles_copy.glsl")
|
||||
env.RD_GLSL("sort.glsl")
|
||||
|
||||
262
servers/rendering/rasterizer_rd/shaders/particles.glsl
Normal file
262
servers/rendering/rasterizer_rd/shaders/particles.glsl
Normal file
@@ -0,0 +1,262 @@
|
||||
#[compute]
|
||||
|
||||
#version 450
|
||||
|
||||
VERSION_DEFINES
|
||||
|
||||
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
#define SAMPLER_NEAREST_CLAMP 0
|
||||
#define SAMPLER_LINEAR_CLAMP 1
|
||||
#define SAMPLER_NEAREST_WITH_MIPMAPS_CLAMP 2
|
||||
#define SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP 3
|
||||
#define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_CLAMP 4
|
||||
#define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_CLAMP 5
|
||||
#define SAMPLER_NEAREST_REPEAT 6
|
||||
#define SAMPLER_LINEAR_REPEAT 7
|
||||
#define SAMPLER_NEAREST_WITH_MIPMAPS_REPEAT 8
|
||||
#define SAMPLER_LINEAR_WITH_MIPMAPS_REPEAT 9
|
||||
#define SAMPLER_NEAREST_WITH_MIPMAPS_ANISOTROPIC_REPEAT 10
|
||||
#define SAMPLER_LINEAR_WITH_MIPMAPS_ANISOTROPIC_REPEAT 11
|
||||
|
||||
/* SET 0: GLOBAL DATA */
|
||||
|
||||
layout(set = 0, binding = 1) uniform sampler material_samplers[12];
|
||||
|
||||
layout(set = 0, binding = 2, std430) restrict readonly buffer GlobalVariableData {
|
||||
vec4 data[];
|
||||
}
|
||||
global_variables;
|
||||
|
||||
/* Set 1: FRAME AND PARTICLE DATA */
|
||||
|
||||
// a frame history is kept for trail deterministic behavior
|
||||
struct FrameParams {
|
||||
bool emitting;
|
||||
float system_phase;
|
||||
float prev_system_phase;
|
||||
uint cycle;
|
||||
|
||||
float explosiveness;
|
||||
float randomness;
|
||||
float time;
|
||||
float delta;
|
||||
|
||||
uint random_seed;
|
||||
uint pad[3];
|
||||
|
||||
mat4 emission_transform;
|
||||
};
|
||||
|
||||
layout(set = 1, binding = 0, std430) restrict buffer FrameHistory {
|
||||
FrameParams data[];
|
||||
}
|
||||
frame_history;
|
||||
|
||||
struct ParticleData {
|
||||
mat4 xform;
|
||||
vec3 velocity;
|
||||
bool is_active;
|
||||
vec4 color;
|
||||
vec4 custom;
|
||||
};
|
||||
|
||||
layout(set = 1, binding = 1, std430) restrict buffer Particles {
|
||||
ParticleData data[];
|
||||
}
|
||||
particles;
|
||||
|
||||
/* SET 2: MATERIAL */
|
||||
|
||||
#ifdef USE_MATERIAL_UNIFORMS
|
||||
layout(set = 2, binding = 0, std140) uniform MaterialUniforms{
|
||||
/* clang-format off */
|
||||
MATERIAL_UNIFORMS
|
||||
/* clang-format on */
|
||||
} material;
|
||||
#endif
|
||||
|
||||
layout(push_constant, binding = 0, std430) uniform Params {
|
||||
float lifetime;
|
||||
bool clear;
|
||||
uint total_particles;
|
||||
uint trail_size;
|
||||
bool use_fractional_delta;
|
||||
uint pad[3];
|
||||
}
|
||||
params;
|
||||
|
||||
uint hash(uint x) {
|
||||
x = ((x >> uint(16)) ^ x) * uint(0x45d9f3b);
|
||||
x = ((x >> uint(16)) ^ x) * uint(0x45d9f3b);
|
||||
x = (x >> uint(16)) ^ x;
|
||||
return x;
|
||||
}
|
||||
|
||||
/* clang-format off */
|
||||
|
||||
COMPUTE_SHADER_GLOBALS
|
||||
|
||||
/* clang-format on */
|
||||
|
||||
void main() {
|
||||
uint particle = gl_GlobalInvocationID.x;
|
||||
|
||||
if (particle >= params.total_particles * params.trail_size) {
|
||||
return; //discard
|
||||
}
|
||||
|
||||
uint index = particle / params.trail_size;
|
||||
uint frame = (particle % params.trail_size);
|
||||
|
||||
#define FRAME frame_history.data[frame]
|
||||
#define PARTICLE particles.data[particle]
|
||||
|
||||
bool apply_forces = true;
|
||||
bool apply_velocity = true;
|
||||
float local_delta = FRAME.delta;
|
||||
|
||||
float mass = 1.0;
|
||||
|
||||
float restart_phase = float(index) / float(params.total_particles);
|
||||
|
||||
if (FRAME.randomness > 0.0) {
|
||||
uint seed = FRAME.cycle;
|
||||
if (restart_phase >= FRAME.system_phase) {
|
||||
seed -= uint(1);
|
||||
}
|
||||
seed *= uint(params.total_particles);
|
||||
seed += uint(index);
|
||||
float random = float(hash(seed) % uint(65536)) / 65536.0;
|
||||
restart_phase += FRAME.randomness * random * 1.0 / float(params.total_particles);
|
||||
}
|
||||
|
||||
restart_phase *= (1.0 - FRAME.explosiveness);
|
||||
|
||||
bool restart = false;
|
||||
|
||||
if (FRAME.system_phase > FRAME.prev_system_phase) {
|
||||
// restart_phase >= prev_system_phase is used so particles emit in the first frame they are processed
|
||||
|
||||
if (restart_phase >= FRAME.prev_system_phase && restart_phase < FRAME.system_phase) {
|
||||
restart = true;
|
||||
if (params.use_fractional_delta) {
|
||||
local_delta = (FRAME.system_phase - restart_phase) * params.lifetime;
|
||||
}
|
||||
}
|
||||
|
||||
} else if (FRAME.delta > 0.0) {
|
||||
if (restart_phase >= FRAME.prev_system_phase) {
|
||||
restart = true;
|
||||
if (params.use_fractional_delta) {
|
||||
local_delta = (1.0 - restart_phase + FRAME.system_phase) * params.lifetime;
|
||||
}
|
||||
|
||||
} else if (restart_phase < FRAME.system_phase) {
|
||||
restart = true;
|
||||
if (params.use_fractional_delta) {
|
||||
local_delta = (FRAME.system_phase - restart_phase) * params.lifetime;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint current_cycle = FRAME.cycle;
|
||||
|
||||
if (FRAME.system_phase < restart_phase) {
|
||||
current_cycle -= uint(1);
|
||||
}
|
||||
|
||||
uint particle_number = current_cycle * uint(params.total_particles) + particle;
|
||||
|
||||
if (restart) {
|
||||
PARTICLE.is_active = FRAME.emitting;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_KEEP_DATA
|
||||
if (params.clear) {
|
||||
#else
|
||||
if (params.clear || restart) {
|
||||
#endif
|
||||
PARTICLE.color = vec4(1.0);
|
||||
PARTICLE.custom = vec4(0.0);
|
||||
PARTICLE.velocity = vec3(0.0);
|
||||
if (!restart) {
|
||||
PARTICLE.is_active = false;
|
||||
}
|
||||
PARTICLE.xform = mat4(
|
||||
vec4(1.0, 0.0, 0.0, 0.0),
|
||||
vec4(0.0, 1.0, 0.0, 0.0),
|
||||
vec4(0.0, 0.0, 1.0, 0.0),
|
||||
vec4(0.0, 0.0, 0.0, 1.0));
|
||||
}
|
||||
|
||||
if (PARTICLE.is_active) {
|
||||
/* clang-format off */
|
||||
|
||||
COMPUTE_SHADER_CODE
|
||||
|
||||
/* clang-format on */
|
||||
}
|
||||
|
||||
#if !defined(DISABLE_VELOCITY)
|
||||
|
||||
if (PARTICLE.is_active) {
|
||||
PARTICLE.xform[3].xyz += PARTICLE.velocity * local_delta;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
if (PARTICLE.is_active) {
|
||||
//execute shader
|
||||
|
||||
|
||||
|
||||
|
||||
//!defined(DISABLE_FORCE)
|
||||
|
||||
if (false) {
|
||||
vec3 force = vec3(0.0);
|
||||
for (int i = 0; i < attractor_count; i++) {
|
||||
vec3 rel_vec = xform[3].xyz - attractors[i].pos;
|
||||
float dist = length(rel_vec);
|
||||
if (attractors[i].radius < dist)
|
||||
continue;
|
||||
if (attractors[i].eat_radius > 0.0 && attractors[i].eat_radius > dist) {
|
||||
out_velocity_active.a = 0.0;
|
||||
}
|
||||
|
||||
rel_vec = normalize(rel_vec);
|
||||
|
||||
float attenuation = pow(dist / attractors[i].radius, attractors[i].attenuation);
|
||||
|
||||
if (attractors[i].dir == vec3(0.0)) {
|
||||
//towards center
|
||||
force += attractors[i].strength * rel_vec * attenuation * mass;
|
||||
} else {
|
||||
force += attractors[i].strength * attractors[i].dir * attenuation * mass;
|
||||
}
|
||||
}
|
||||
|
||||
out_velocity_active.xyz += force * local_delta;
|
||||
}
|
||||
|
||||
#if !defined(DISABLE_VELOCITY)
|
||||
|
||||
if (true) {
|
||||
xform[3].xyz += out_velocity_active.xyz * local_delta;
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
xform = mat4(0.0);
|
||||
}
|
||||
|
||||
|
||||
xform = transpose(xform);
|
||||
|
||||
out_velocity_active.a = mix(0.0, 1.0, shader_active);
|
||||
|
||||
out_xform_1 = xform[0];
|
||||
out_xform_2 = xform[1];
|
||||
out_xform_3 = xform[2];
|
||||
#endif
|
||||
}
|
||||
82
servers/rendering/rasterizer_rd/shaders/particles_copy.glsl
Normal file
82
servers/rendering/rasterizer_rd/shaders/particles_copy.glsl
Normal file
@@ -0,0 +1,82 @@
|
||||
#[compute]
|
||||
|
||||
#version 450
|
||||
|
||||
VERSION_DEFINES
|
||||
|
||||
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
struct ParticleData {
|
||||
mat4 xform;
|
||||
vec3 velocity;
|
||||
bool is_active;
|
||||
vec4 color;
|
||||
vec4 custom;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 1, std430) restrict readonly buffer Particles {
|
||||
ParticleData data[];
|
||||
}
|
||||
particles;
|
||||
|
||||
layout(set = 0, binding = 2, std430) restrict writeonly buffer Transforms {
|
||||
vec4 data[];
|
||||
}
|
||||
instances;
|
||||
|
||||
#ifdef USE_SORT_BUFFER
|
||||
|
||||
layout(set = 1, binding = 0, std430) restrict buffer SortBuffer {
|
||||
vec2 data[];
|
||||
}
|
||||
sort_buffer;
|
||||
|
||||
#endif // USE_SORT_BUFFER
|
||||
|
||||
layout(push_constant, binding = 0, std430) uniform Params {
|
||||
vec3 sort_direction;
|
||||
uint total_particles;
|
||||
}
|
||||
params;
|
||||
|
||||
void main() {
|
||||
#ifdef MODE_FILL_SORT_BUFFER
|
||||
|
||||
uint particle = gl_GlobalInvocationID.x;
|
||||
if (particle >= params.total_particles) {
|
||||
return; //discard
|
||||
}
|
||||
|
||||
sort_buffer.data[particle].x = dot(params.sort_direction, particles.data[particle].xform[3].xyz);
|
||||
sort_buffer.data[particle].y = float(particle);
|
||||
#endif
|
||||
|
||||
#ifdef MODE_FILL_INSTANCES
|
||||
|
||||
uint particle = gl_GlobalInvocationID.x;
|
||||
uint write_offset = gl_GlobalInvocationID.x * (3 + 1 + 1); //xform + color + custom
|
||||
|
||||
if (particle >= params.total_particles) {
|
||||
return; //discard
|
||||
}
|
||||
|
||||
#ifdef USE_SORT_BUFFER
|
||||
particle = uint(sort_buffer.data[particle].y); //use index from sort buffer
|
||||
#endif
|
||||
|
||||
mat4 txform;
|
||||
|
||||
if (particles.data[particle].is_active) {
|
||||
txform = transpose(particles.data[particle].xform);
|
||||
} else {
|
||||
txform = mat4(vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)); //zero scale, becomes invisible
|
||||
}
|
||||
|
||||
instances.data[write_offset + 0] = txform[0];
|
||||
instances.data[write_offset + 1] = txform[1];
|
||||
instances.data[write_offset + 2] = txform[2];
|
||||
instances.data[write_offset + 3] = particles.data[particle].color;
|
||||
instances.data[write_offset + 4] = particles.data[particle].custom;
|
||||
|
||||
#endif
|
||||
}
|
||||
203
servers/rendering/rasterizer_rd/shaders/sort.glsl
Normal file
203
servers/rendering/rasterizer_rd/shaders/sort.glsl
Normal file
@@ -0,0 +1,203 @@
|
||||
#[compute]
|
||||
|
||||
#version 450
|
||||
|
||||
VERSION_DEFINES
|
||||
|
||||
// Original version here:
|
||||
// https://github.com/GPUOpen-LibrariesAndSDKs/GPUParticles11/blob/master/gpuparticles11/src/Shaders
|
||||
|
||||
//
|
||||
// Copyright (c) 2016 Advanced Micro Devices, Inc. All rights reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in
|
||||
// all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE.
|
||||
//
|
||||
|
||||
#define SORT_SIZE 512
|
||||
#define NUM_THREADS (SORT_SIZE / 2)
|
||||
#define INVERSION (16 * 2 + 8 * 3)
|
||||
#define ITERATIONS 1
|
||||
|
||||
layout(local_size_x = NUM_THREADS, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
#ifndef MODE_SORT_STEP
|
||||
|
||||
shared vec2 g_LDS[SORT_SIZE];
|
||||
|
||||
#endif
|
||||
|
||||
layout(set = 1, binding = 0, std430) restrict buffer SortBuffer {
|
||||
vec2 data[];
|
||||
}
|
||||
sort_buffer;
|
||||
|
||||
layout(push_constant, binding = 0, std430) uniform Params {
|
||||
uint total_elements;
|
||||
uint pad[3];
|
||||
ivec4 job_params;
|
||||
}
|
||||
params;
|
||||
|
||||
void main() {
|
||||
#ifdef MODE_SORT_BLOCK
|
||||
|
||||
uvec3 Gid = gl_WorkGroupID;
|
||||
uvec3 DTid = gl_GlobalInvocationID;
|
||||
uvec3 GTid = gl_LocalInvocationID;
|
||||
uint GI = gl_LocalInvocationIndex;
|
||||
|
||||
int GlobalBaseIndex = int((Gid.x * SORT_SIZE) + GTid.x);
|
||||
int LocalBaseIndex = int(GI);
|
||||
int numElementsInThreadGroup = int(min(SORT_SIZE, params.total_elements - (Gid.x * SORT_SIZE)));
|
||||
|
||||
// Load shared data
|
||||
|
||||
int i;
|
||||
for (i = 0; i < 2 * ITERATIONS; ++i) {
|
||||
if (GI + i * NUM_THREADS < numElementsInThreadGroup)
|
||||
g_LDS[LocalBaseIndex + i * NUM_THREADS] = sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS];
|
||||
}
|
||||
|
||||
groupMemoryBarrier();
|
||||
barrier();
|
||||
|
||||
// Bitonic sort
|
||||
for (int nMergeSize = 2; nMergeSize <= SORT_SIZE; nMergeSize = nMergeSize * 2) {
|
||||
for (int nMergeSubSize = nMergeSize >> 1; nMergeSubSize > 0; nMergeSubSize = nMergeSubSize >> 1) {
|
||||
for (i = 0; i < ITERATIONS; ++i) {
|
||||
int tmp_index = int(GI + NUM_THREADS * i);
|
||||
int index_low = tmp_index & (nMergeSubSize - 1);
|
||||
int index_high = 2 * (tmp_index - index_low);
|
||||
int index = index_high + index_low;
|
||||
|
||||
int nSwapElem = nMergeSubSize == nMergeSize >> 1 ? index_high + (2 * nMergeSubSize - 1) - index_low : index_high + nMergeSubSize + index_low;
|
||||
if (nSwapElem < numElementsInThreadGroup) {
|
||||
vec2 a = g_LDS[index];
|
||||
vec2 b = g_LDS[nSwapElem];
|
||||
|
||||
if (a.x > b.x) {
|
||||
g_LDS[index] = b;
|
||||
g_LDS[nSwapElem] = a;
|
||||
}
|
||||
}
|
||||
groupMemoryBarrier();
|
||||
barrier();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Store shared data
|
||||
for (i = 0; i < 2 * ITERATIONS; ++i) {
|
||||
if (GI + i * NUM_THREADS < numElementsInThreadGroup) {
|
||||
sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS] = g_LDS[LocalBaseIndex + i * NUM_THREADS];
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef MODE_SORT_STEP
|
||||
|
||||
uvec3 Gid = gl_WorkGroupID;
|
||||
uvec3 GTid = gl_LocalInvocationID;
|
||||
|
||||
ivec4 tgp;
|
||||
|
||||
tgp.x = int(Gid.x) * 256;
|
||||
tgp.y = 0;
|
||||
tgp.z = int(params.total_elements);
|
||||
tgp.w = min(512, max(0, tgp.z - int(Gid.x) * 512));
|
||||
|
||||
uint localID = int(tgp.x) + GTid.x; // calculate threadID within this sortable-array
|
||||
|
||||
uint index_low = localID & (params.job_params.x - 1);
|
||||
uint index_high = 2 * (localID - index_low);
|
||||
|
||||
uint index = tgp.y + index_high + index_low;
|
||||
uint nSwapElem = tgp.y + index_high + params.job_params.y + params.job_params.z * index_low;
|
||||
|
||||
if (nSwapElem < tgp.y + tgp.z) {
|
||||
vec2 a = sort_buffer.data[index];
|
||||
vec2 b = sort_buffer.data[nSwapElem];
|
||||
|
||||
if (a.x > b.x) {
|
||||
sort_buffer.data[index] = b;
|
||||
sort_buffer.data[nSwapElem] = a;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef MODE_SORT_INNER
|
||||
|
||||
uvec3 Gid = gl_WorkGroupID;
|
||||
uvec3 DTid = gl_GlobalInvocationID;
|
||||
uvec3 GTid = gl_LocalInvocationID;
|
||||
uint GI = gl_LocalInvocationIndex;
|
||||
|
||||
ivec4 tgp;
|
||||
|
||||
tgp.x = int(Gid.x * 256);
|
||||
tgp.y = 0;
|
||||
tgp.z = int(params.total_elements.x);
|
||||
tgp.w = int(min(512, max(0, params.total_elements - Gid.x * 512)));
|
||||
|
||||
int GlobalBaseIndex = int(tgp.y + tgp.x * 2 + GTid.x);
|
||||
int LocalBaseIndex = int(GI);
|
||||
int i;
|
||||
|
||||
// Load shared data
|
||||
for (i = 0; i < 2; ++i) {
|
||||
if (GI + i * NUM_THREADS < tgp.w)
|
||||
g_LDS[LocalBaseIndex + i * NUM_THREADS] = sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS];
|
||||
}
|
||||
|
||||
groupMemoryBarrier();
|
||||
barrier();
|
||||
|
||||
// sort threadgroup shared memory
|
||||
for (int nMergeSubSize = SORT_SIZE >> 1; nMergeSubSize > 0; nMergeSubSize = nMergeSubSize >> 1) {
|
||||
int tmp_index = int(GI);
|
||||
int index_low = tmp_index & (nMergeSubSize - 1);
|
||||
int index_high = 2 * (tmp_index - index_low);
|
||||
int index = index_high + index_low;
|
||||
|
||||
int nSwapElem = index_high + nMergeSubSize + index_low;
|
||||
|
||||
if (nSwapElem < tgp.w) {
|
||||
vec2 a = g_LDS[index];
|
||||
vec2 b = g_LDS[nSwapElem];
|
||||
|
||||
if (a.x > b.x) {
|
||||
g_LDS[index] = b;
|
||||
g_LDS[nSwapElem] = a;
|
||||
}
|
||||
}
|
||||
groupMemoryBarrier();
|
||||
barrier();
|
||||
}
|
||||
|
||||
// Store shared data
|
||||
for (i = 0; i < 2; ++i) {
|
||||
if (GI + i * NUM_THREADS < tgp.w) {
|
||||
sort_buffer.data[GlobalBaseIndex + i * NUM_THREADS] = g_LDS[LocalBaseIndex + i * NUM_THREADS];
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
Reference in New Issue
Block a user