diff --git a/SConstruct b/SConstruct index 991ab89846e..61691b22992 100644 --- a/SConstruct +++ b/SConstruct @@ -202,6 +202,14 @@ opts.Add(BoolVariable("use_volk", "Use the volk library to load the Vulkan loade opts.Add(BoolVariable("accesskit", "Use AccessKit C SDK", True)) opts.Add(("accesskit_sdk_path", "Path to the AccessKit C SDK", "")) opts.Add(BoolVariable("sdl", "Enable the SDL3 input driver", True)) +opts.Add(("profiler_path", "Path to the Profiler framework. Only tracy and perfetto are supported at the moment.", "")) +opts.Add( + BoolVariable( + "profiler_sample_callstack", + "Profile random samples application-wide using a callstack based sampler.", + False, + ) +) # Advanced options opts.Add( diff --git a/core/SCsub b/core/SCsub index 60ea202ac96..3f1b92055a4 100644 --- a/core/SCsub +++ b/core/SCsub @@ -217,6 +217,7 @@ env.CommandNoCache( ) # Chain load SCsubs +SConscript("profiling/SCsub") SConscript("os/SCsub") SConscript("math/SCsub") SConscript("crypto/SCsub") diff --git a/core/profiling/SCsub b/core/profiling/SCsub new file mode 100644 index 00000000000..1ddc0c05659 --- /dev/null +++ b/core/profiling/SCsub @@ -0,0 +1,69 @@ +#!/usr/bin/env python +from misc.utility.scons_hints import * + +import pathlib +from typing import Tuple + +import profiling_builders + +Import("env") + +env.add_source_files(env.core_sources, "*.cpp") + + +def get_profiler_and_path_from_path(path: pathlib.Path) -> Tuple[str, pathlib.Path]: + if not path.is_dir(): + print("profiler_path must be empty or point to a directory.") + Exit(255) + + if (path / "sdk" / "perfetto.cc").is_file(): + # perfetto root directory. + return "perfetto", path / "sdk" + if (path / "perfetto.cc").is_file(): + # perfetto sdk directory. + return "perfetto", path + + if (path / "public" / "TracyClient.cpp").is_file(): + # tracy root directory + return "tracy", path / "public" + if (path / "TracyClient.cpp").is_file(): + # tracy public directory + return "tracy", path + + print("Unrecognized profiler_path option. Please set a path to either tracy or perfetto.") + Exit(255) + + +env["profiler"] = None +if env["profiler_path"]: + profiler_name, profiler_path = get_profiler_and_path_from_path(pathlib.Path(env["profiler_path"])) + env["profiler"] = profiler_name + + if profiler_name == "tracy": + env.Prepend(CPPPATH=[str(profiler_path.absolute())]) + + env_tracy = env.Clone() + env_tracy.Append(CPPDEFINES=["TRACY_ENABLE"]) + if env["profiler_sample_callstack"]: + if env["platform"] not in ("windows", "linux", "android"): + # Reference the feature matrix in the tracy documentation. + print("Tracy does not support call stack sampling on this platform. Aborting.") + Exit(255) + + # 62 is the maximum supported callstack depth reported by the tracy docs. + env_tracy.Append(CPPDEFINES=[("TRACY_CALLSTACK", 62)]) + env_tracy.disable_warnings() + env_tracy.add_source_files(env.core_sources, str((profiler_path / "TracyClient.cpp").absolute())) + elif profiler_name == "perfetto": + env.Prepend(CPPPATH=[str(profiler_path.absolute())]) + + env_perfetto = env.Clone() + if env["profiler_sample_callstack"]: + print("Perfetto does not support call stack sampling. Aborting.") + Exit(255) + env_perfetto.disable_warnings() + env_perfetto.Prepend(CPPPATH=[str(profiler_path.absolute())]) + env_perfetto.add_source_files(env.core_sources, str((profiler_path / "perfetto.cc").absolute())) + + +env.CommandNoCache("profiling.gen.h", [env.Value(env["profiler"])], env.Run(profiling_builders.profiler_gen_builder)) diff --git a/core/profiling/profiling.cpp b/core/profiling/profiling.cpp new file mode 100644 index 00000000000..a809fee6b2e --- /dev/null +++ b/core/profiling/profiling.cpp @@ -0,0 +1,54 @@ +/**************************************************************************/ +/* profiling.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "profiling.h" + +#if defined(GODOT_USE_TRACY) +void godot_init_profiler() { + // Send our first event to tracy; otherwise it doesn't start collecting data. + // FrameMark is kind of fitting because it communicates "this is where we started tracing". + FrameMark; +} +#elif defined(GODOT_USE_PERFETTO) +PERFETTO_TRACK_EVENT_STATIC_STORAGE(); + +void godot_init_profiler() { + perfetto::TracingInitArgs args; + + args.backends |= perfetto::kSystemBackend; + + perfetto::Tracing::Initialize(args); + perfetto::TrackEvent::Register(); +} +#else +void godot_init_profiler() { + // Stub +} +#endif diff --git a/core/profiling/profiling.h b/core/profiling/profiling.h new file mode 100644 index 00000000000..93af70ea4b6 --- /dev/null +++ b/core/profiling/profiling.h @@ -0,0 +1,101 @@ +/**************************************************************************/ +/* profiling.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#pragma once + +#include "core/typedefs.h" +#include "profiling.gen.h" + +#if defined(GODOT_USE_TRACY) +// Use the tracy profiler. + +#define TRACY_ENABLE +#include + +#ifndef TRACY_CALLSTACK +#define TRACY_CALLSTACK 0 +#endif + +// Define tracing macros. +#define GodotProfileFrameMark FrameMark +#define GodotProfileZone(m_zone_name) ZoneScopedN(m_zone_name) +#define GodotProfileZoneGroupedFirst(m_group_name, m_zone_name) ZoneNamedN(__godot_tracy_zone_##m_group_name, m_zone_name, true) +#define GodotProfileZoneGroupedEndEarly(m_group_name, m_zone_name) __godot_tracy_zone_##m_group_name.~ScopedZone(); +#define GodotProfileZoneGrouped(m_group_name, m_zone_name) \ + GodotProfileZoneGroupedEndEarly(m_group_name, m_zone_name); \ + static constexpr tracy::SourceLocationData TracyConcat(__tracy_source_location, TracyLine){ m_zone_name, TracyFunction, TracyFile, (uint32_t)TracyLine, 0 }; \ + new (&__godot_tracy_zone_##m_group_name) tracy::ScopedZone(&TracyConcat(__tracy_source_location, TracyLine), TRACY_CALLSTACK, true) + +void godot_init_profiler(); + +#elif defined(GODOT_USE_PERFETTO) +// Use the perfetto profiler. + +#include + +PERFETTO_DEFINE_CATEGORIES( + perfetto::Category("godot") + .SetDescription("All Godot Events"), ); + +// See PERFETTO_INTERNAL_SCOPED_EVENT_FINALIZER +struct PerfettoGroupedEventEnder { + _FORCE_INLINE_ void _end_now() { + TRACE_EVENT_END("godot"); + } + + _FORCE_INLINE_ ~PerfettoGroupedEventEnder() { + _end_now(); + } +}; + +#define GodotProfileFrameMark // TODO +#define GodotProfileZone(m_zone_name) TRACE_EVENT("godot", m_zone_name); +#define GodotProfileZoneGroupedFirst(m_group_name, m_zone_name) \ + TRACE_EVENT_BEGIN("godot", m_zone_name); \ + PerfettoGroupedEventEnder __godot_perfetto_zone_##m_group_name +#define GodotProfileZoneGroupedEndEarly(m_group_name, m_zone_name) __godot_perfetto_zone_##m_group_name.~PerfettoGroupedEventEnder() +#define GodotProfileZoneGrouped(m_group_name, m_zone_name) \ + __godot_perfetto_zone_##m_group_name._end_now(); \ + TRACE_EVENT_BEGIN("godot", m_zone_name); + +void godot_init_profiler(); + +#else +// No profiling; all macros are stubs. + +void godot_init_profiler(); + +#define GodotProfileFrameMark +#define GodotProfileZone(m_zone_name) +#define GodotProfileZoneGroupedFirst(m_group_name, m_zone_name) +#define GodotProfileZoneGroupedEndEarly(m_group_name, m_zone_name) +#define GodotProfileZoneGrouped(m_group_name, m_zone_name) + +#endif diff --git a/core/profiling/profiling_builders.py b/core/profiling/profiling_builders.py new file mode 100644 index 00000000000..9a2122d8b2b --- /dev/null +++ b/core/profiling/profiling_builders.py @@ -0,0 +1,13 @@ +"""Functions used to generate source files during build time""" + +import methods + + +def profiler_gen_builder(target, source, env): + with methods.generated_wrapper(str(target[0])) as file: + if env["profiler"] == "tracy": + file.write("#define GODOT_USE_TRACY\n") + if env["profiler_sample_callstack"]: + file.write("#define TRACY_CALLSTACK 62\n") + if env["profiler"] == "perfetto": + file.write("#define GODOT_USE_PERFETTO\n") diff --git a/drivers/apple_embedded/os_apple_embedded.mm b/drivers/apple_embedded/os_apple_embedded.mm index 79b2c4a0baf..eab16ead8bb 100644 --- a/drivers/apple_embedded/os_apple_embedded.mm +++ b/drivers/apple_embedded/os_apple_embedded.mm @@ -41,6 +41,7 @@ #include "core/io/dir_access.h" #include "core/io/file_access.h" #include "core/os/main_loop.h" +#include "core/profiling/profiling.h" #import "drivers/apple/os_log_logger.h" #include "main/main.h" @@ -205,6 +206,9 @@ bool OS_AppleEmbedded::iterate() { return true; } + GodotProfileFrameMark; + GodotProfileZone("OS_AppleEmbedded::iterate"); + if (DisplayServer::get_singleton()) { DisplayServer::get_singleton()->process_events(); } diff --git a/main/main.cpp b/main/main.cpp index 144b398da13..1ec96a577bc 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -50,6 +50,7 @@ #include "core/object/script_language.h" #include "core/os/os.h" #include "core/os/time.h" +#include "core/profiling/profiling.h" #include "core/register_core_types.h" #include "core/string/translation_server.h" #include "core/version.h" @@ -984,6 +985,7 @@ int Main::test_entrypoint(int argc, char *argv[], bool &tests_need_run) { */ Error Main::setup(const char *execpath, int argc, char *argv[], bool p_second_phase) { + GodotProfileZone("setup"); Thread::make_main_thread(); set_current_thread_safe_for_nodes(true); @@ -2908,6 +2910,7 @@ Error _parse_resource_dummy(void *p_data, VariantParser::Stream *p_stream, Refbenchmark_begin_measure("Startup", "Main::Setup2"); Thread::make_main_thread(); // Make whatever thread call this the main thread. @@ -3743,6 +3746,7 @@ Error Main::setup2(bool p_show_boot_logo) { } void Main::setup_boot_logo() { + GodotProfileZone("setup_boot_logo"); MAIN_PRINT("Main: Load Boot Image"); #if !defined(TOOLS_ENABLED) && defined(WEB_ENABLED) @@ -3838,6 +3842,7 @@ static MainTimerSync main_timer_sync; // and should move on to `OS::run`, and EXIT_FAILURE otherwise for // an early exit with that error code. int Main::start() { + GodotProfileZone("start"); OS::get_singleton()->benchmark_begin_measure("Startup", "Main::Start"); ERR_FAIL_COND_V(!_start_success, EXIT_FAILURE); @@ -4709,6 +4714,8 @@ static uint64_t navigation_process_max = 0; // will terminate the program. In case of failure, the OS exit code needs // to be set explicitly here (defaults to EXIT_SUCCESS). bool Main::iteration() { + GodotProfileZone("Main::iteration"); + GodotProfileZoneGroupedFirst(_profile_zone, "prepare"); iterating++; const uint64_t ticks = OS::get_singleton()->get_ticks_usec(); @@ -4754,6 +4761,8 @@ bool Main::iteration() { #endif // XR_DISABLED for (int iters = 0; iters < advance.physics_steps; ++iters) { + GodotProfileZone("Physics Step"); + GodotProfileZoneGroupedFirst(_physics_zone, "setup"); if (Input::get_singleton()->is_agile_input_event_flushing()) { Input::get_singleton()->flush_buffered_events(); } @@ -4766,18 +4775,22 @@ bool Main::iteration() { // Prepare the fixed timestep interpolated nodes BEFORE they are updated // by the physics server, otherwise the current and previous transforms // may be the same, and no interpolation takes place. + GodotProfileZoneGrouped(_physics_zone, "main loop iteration prepare"); OS::get_singleton()->get_main_loop()->iteration_prepare(); #ifndef PHYSICS_3D_DISABLED + GodotProfileZoneGrouped(_physics_zone, "PhysicsServer3D::sync"); PhysicsServer3D::get_singleton()->sync(); PhysicsServer3D::get_singleton()->flush_queries(); #endif // PHYSICS_3D_DISABLED #ifndef PHYSICS_2D_DISABLED + GodotProfileZoneGrouped(_physics_zone, "PhysicsServer2D::sync"); PhysicsServer2D::get_singleton()->sync(); PhysicsServer2D::get_singleton()->flush_queries(); #endif // PHYSICS_2D_DISABLED + GodotProfileZoneGrouped(_physics_zone, "physics_process"); if (OS::get_singleton()->get_main_loop()->physics_process(physics_step * time_scale)) { #ifndef PHYSICS_3D_DISABLED PhysicsServer3D::get_singleton()->end_sync(); @@ -4795,9 +4808,11 @@ bool Main::iteration() { uint64_t navigation_begin = OS::get_singleton()->get_ticks_usec(); #ifndef NAVIGATION_2D_DISABLED + GodotProfileZoneGrouped(_profile_zone, "NavigationServer2D::physics_process"); NavigationServer2D::get_singleton()->physics_process(physics_step * time_scale); #endif // NAVIGATION_2D_DISABLED #ifndef NAVIGATION_3D_DISABLED + GodotProfileZoneGrouped(_profile_zone, "NavigationServer3D::physics_process"); NavigationServer3D::get_singleton()->physics_process(physics_step * time_scale); #endif // NAVIGATION_3D_DISABLED @@ -4808,17 +4823,20 @@ bool Main::iteration() { #endif // !defined(NAVIGATION_2D_DISABLED) || !defined(NAVIGATION_3D_DISABLED) #ifndef PHYSICS_3D_DISABLED + GodotProfileZoneGrouped(_profile_zone, "3D physics"); PhysicsServer3D::get_singleton()->end_sync(); PhysicsServer3D::get_singleton()->step(physics_step * time_scale); #endif // PHYSICS_3D_DISABLED #ifndef PHYSICS_2D_DISABLED + GodotProfileZoneGrouped(_profile_zone, "2D physics"); PhysicsServer2D::get_singleton()->end_sync(); PhysicsServer2D::get_singleton()->step(physics_step * time_scale); #endif // PHYSICS_2D_DISABLED message_queue->flush(); + GodotProfileZoneGrouped(_profile_zone, "main loop iteration end"); OS::get_singleton()->get_main_loop()->iteration_end(); physics_process_ticks = MAX(physics_process_ticks, OS::get_singleton()->get_ticks_usec() - physics_begin); // keep the largest one for reference @@ -4833,20 +4851,25 @@ bool Main::iteration() { uint64_t process_begin = OS::get_singleton()->get_ticks_usec(); + GodotProfileZoneGrouped(_profile_zone, "process"); if (OS::get_singleton()->get_main_loop()->process(process_step * time_scale)) { exit = true; } message_queue->flush(); #ifndef NAVIGATION_2D_DISABLED + GodotProfileZoneGrouped(_profile_zone, "process 2D navigation"); NavigationServer2D::get_singleton()->process(process_step * time_scale); #endif // NAVIGATION_2D_DISABLED #ifndef NAVIGATION_3D_DISABLED + GodotProfileZoneGrouped(_profile_zone, "process 3D navigation"); NavigationServer3D::get_singleton()->process(process_step * time_scale); #endif // NAVIGATION_3D_DISABLED + GodotProfileZoneGrouped(_profile_zone, "RenderingServer::sync"); RenderingServer::get_singleton()->sync(); //sync if still drawing from previous frames. + GodotProfileZoneGrouped(_profile_zone, "RenderingServer::draw"); const bool has_pending_resources_for_processing = RD::get_singleton() && RD::get_singleton()->has_pending_resources_for_processing(); bool wants_present = (DisplayServer::get_singleton()->can_any_window_draw() || DisplayServer::get_singleton()->has_additional_outputs()) && @@ -4870,12 +4893,15 @@ bool Main::iteration() { process_max = MAX(process_ticks, process_max); uint64_t frame_time = OS::get_singleton()->get_ticks_usec() - ticks; + GodotProfileZoneGrouped(_profile_zone, "GDExtensionManager::frame"); GDExtensionManager::get_singleton()->frame(); + GodotProfileZoneGrouped(_profile_zone, "ScriptServer::frame"); for (int i = 0; i < ScriptServer::get_language_count(); i++) { ScriptServer::get_language(i)->frame(); } + GodotProfileZoneGrouped(_profile_zone, "AudioServer::update"); AudioServer::get_singleton()->update(); if (EngineDebugger::is_active()) { @@ -4914,6 +4940,7 @@ bool Main::iteration() { iterating--; if (movie_writer) { + GodotProfileZoneGrouped(_profile_zone, "movie_writer->add_frame"); movie_writer->add_frame(); } @@ -4940,6 +4967,7 @@ bool Main::iteration() { SceneTree *scene_tree = SceneTree::get_singleton(); bool wake_for_events = scene_tree && scene_tree->is_accessibility_enabled(); + GodotProfileZoneGrouped(_profile_zone, "OS::add_frame_delay"); OS::get_singleton()->add_frame_delay(DisplayServer::get_singleton()->window_can_draw(), wake_for_events); #ifdef TOOLS_ENABLED @@ -4979,6 +5007,7 @@ void Main::force_redraw() { * The order matters as some of those steps are linked with each other. */ void Main::cleanup(bool p_force) { + GodotProfileZone("cleanup"); OS::get_singleton()->benchmark_begin_measure("Shutdown", "Main::Cleanup"); if (!p_force) { ERR_FAIL_COND(!_start_success); diff --git a/platform/android/java_godot_lib_jni.cpp b/platform/android/java_godot_lib_jni.cpp index fd19541f7cd..8b3035301bb 100644 --- a/platform/android/java_godot_lib_jni.cpp +++ b/platform/android/java_godot_lib_jni.cpp @@ -49,6 +49,7 @@ #include "core/config/project_settings.h" #include "core/input/input.h" #include "core/os/main_loop.h" +#include "core/profiling/profiling.h" #include "main/main.h" #include "servers/rendering/rendering_server.h" @@ -151,6 +152,8 @@ JNIEXPORT void JNICALL Java_org_godotengine_godot_GodotLib_setVirtualKeyboardHei } JNIEXPORT jboolean JNICALL Java_org_godotengine_godot_GodotLib_initialize(JNIEnv *env, jclass clazz, jobject p_godot_instance, jobject p_asset_manager, jobject p_godot_io, jobject p_net_utils, jobject p_directory_access_handler, jobject p_file_access_handler, jboolean p_use_apk_expansion) { + godot_init_profiler(); + JavaVM *jvm; env->GetJavaVM(&jvm); diff --git a/platform/android/os_android.cpp b/platform/android/os_android.cpp index 9a351708867..227bb9e5f3a 100644 --- a/platform/android/os_android.cpp +++ b/platform/android/os_android.cpp @@ -42,6 +42,7 @@ #include "core/extension/gdextension_manager.h" #include "core/io/xml_parser.h" #include "core/os/main_loop.h" +#include "core/profiling/profiling.h" #include "drivers/unix/dir_access_unix.h" #include "drivers/unix/file_access_unix.h" #ifdef TOOLS_ENABLED @@ -364,6 +365,8 @@ void OS_Android::main_loop_begin() { } bool OS_Android::main_loop_iterate(bool *r_should_swap_buffers) { + GodotProfileFrameMark; + GodotProfileZone("OS_Android::main_loop_iterate"); if (!main_loop) { return false; } diff --git a/platform/ios/main_ios.mm b/platform/ios/main_ios.mm index fe7e2022ca7..7d9c054950e 100644 --- a/platform/ios/main_ios.mm +++ b/platform/ios/main_ios.mm @@ -30,6 +30,7 @@ #import "os_ios.h" +#include "core/profiling/profiling.h" #import "drivers/apple_embedded/godot_app_delegate.h" #import "drivers/apple_embedded/main_utilities.h" #include "main/main.h" @@ -55,6 +56,8 @@ int apple_embedded_main(int argc, char **argv) { char *fargv[64]; argc = process_args(argc, argv, fargv); + godot_init_profiler(); + Error err = Main::setup(fargv[0], argc - 1, &fargv[1], false); if (err != OK) { diff --git a/platform/linuxbsd/godot_linuxbsd.cpp b/platform/linuxbsd/godot_linuxbsd.cpp index 8bda6d932c1..f2c7d8eccf1 100644 --- a/platform/linuxbsd/godot_linuxbsd.cpp +++ b/platform/linuxbsd/godot_linuxbsd.cpp @@ -30,6 +30,7 @@ #include "os_linuxbsd.h" +#include "core/profiling/profiling.h" #include "main/main.h" #include @@ -93,6 +94,8 @@ int main(int argc, char *argv[]) { setrlimit(RLIMIT_STACK, &stack_lim); #endif + godot_init_profiler(); + OS_LinuxBSD os; setlocale(LC_CTYPE, ""); diff --git a/platform/linuxbsd/os_linuxbsd.cpp b/platform/linuxbsd/os_linuxbsd.cpp index dd864bfd15b..92e9132cdaf 100644 --- a/platform/linuxbsd/os_linuxbsd.cpp +++ b/platform/linuxbsd/os_linuxbsd.cpp @@ -37,6 +37,7 @@ #ifdef SDL_ENABLED #include "drivers/sdl/joypad_sdl.h" #endif +#include "core/profiling/profiling.h" #include "main/main.h" #include "servers/display/display_server.h" #include "servers/rendering/rendering_server.h" @@ -970,6 +971,8 @@ String OS_LinuxBSD::get_system_dir(SystemDir p_dir, bool p_shared_storage) const } void OS_LinuxBSD::run() { + GodotProfileFrameMark; + GodotProfileZone("OS_LinuxBSD::run"); if (!main_loop) { return; } diff --git a/platform/macos/godot_content_view.mm b/platform/macos/godot_content_view.mm index 97263c58a15..f792936b811 100644 --- a/platform/macos/godot_content_view.mm +++ b/platform/macos/godot_content_view.mm @@ -34,6 +34,7 @@ #import "godot_window.h" #import "key_mapping_macos.h" +#include "core/profiling/profiling.h" #include "main/main.h" @implementation GodotContentLayerDelegate @@ -56,6 +57,9 @@ - (void)displayLayer:(CALayer *)layer { DisplayServerMacOS *ds = (DisplayServerMacOS *)DisplayServer::get_singleton(); if (OS::get_singleton()->get_main_loop() && ds->get_is_resizing() && need_redraw) { + GodotProfileFrameMark; + GodotProfileZone("[GodotContentLayerDelegate displayLayer]"); + Main::force_redraw(); if (!Main::is_iterating()) { // Avoid cyclic loop. Main::iteration(); diff --git a/platform/macos/godot_main_macos.mm b/platform/macos/godot_main_macos.mm index 3807464fcf2..4eefa7df571 100644 --- a/platform/macos/godot_main_macos.mm +++ b/platform/macos/godot_main_macos.mm @@ -32,6 +32,7 @@ #import "godot_application.h" +#include "core/profiling/profiling.h" #include "main/main.h" #if defined(SANITIZERS_ENABLED) @@ -39,6 +40,8 @@ #endif int main(int argc, char **argv) { + godot_init_profiler(); + #if defined(VULKAN_ENABLED) setenv("MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE", "1", 1); // MoltenVK - enable full component swizzling support. setenv("MVK_CONFIG_SWAPCHAIN_MIN_MAG_FILTER_USE_NEAREST", "0", 1); // MoltenVK - use linear surface scaling. TODO: remove when full DPI scaling is implemented. diff --git a/platform/macos/os_macos.mm b/platform/macos/os_macos.mm index 512489734cc..db10fafaba6 100644 --- a/platform/macos/os_macos.mm +++ b/platform/macos/os_macos.mm @@ -41,6 +41,7 @@ #include "core/crypto/crypto_core.h" #include "core/io/file_access.h" #include "core/os/main_loop.h" +#include "core/profiling/profiling.h" #include "core/version_generated.gen.h" #include "drivers/apple/os_log_logger.h" #include "main/main.h" @@ -1080,6 +1081,8 @@ static void handle_interrupt(int sig) { } void OS_MacOS_NSApp::start_main() { + godot_init_profiler(); + Error err; @autoreleasepool { err = Main::setup(execpath, argc, argv); @@ -1103,6 +1106,9 @@ void OS_MacOS_NSApp::start_main() { pre_wait_observer = CFRunLoopObserverCreateWithHandler(kCFAllocatorDefault, kCFRunLoopBeforeWaiting, true, 0, ^(CFRunLoopObserverRef observer, CFRunLoopActivity activity) { @autoreleasepool { @try { + GodotProfileFrameMark; + GodotProfileZone("macOS main loop"); + if (ds_mac) { ds_mac->_process_events(false); } else if (ds) { @@ -1276,6 +1282,9 @@ void OS_MacOS_Embedded::run() { while (true) { @autoreleasepool { @try { + GodotProfileFrameMark; + GodotProfileZone("macOS embedded main loop"); + ds->process_events(); #ifdef SDL_ENABLED diff --git a/platform/visionos/main_visionos.mm b/platform/visionos/main_visionos.mm index 3e392307c86..c0d1ba3b70a 100644 --- a/platform/visionos/main_visionos.mm +++ b/platform/visionos/main_visionos.mm @@ -30,6 +30,7 @@ #import "os_visionos.h" +#include "core/profiling/profiling.h" #import "drivers/apple_embedded/godot_app_delegate.h" #import "drivers/apple_embedded/main_utilities.h" #include "main/main.h" @@ -50,6 +51,8 @@ int apple_embedded_main(int argc, char **argv) { char *fargv[64]; argc = process_args(argc, argv, fargv); + godot_init_profiler(); + Error err = Main::setup(fargv[0], argc - 1, &fargv[1], false); if (err != OK) { diff --git a/platform/web/os_web.cpp b/platform/web/os_web.cpp index fb40f1d2e9b..9666bd8882e 100644 --- a/platform/web/os_web.cpp +++ b/platform/web/os_web.cpp @@ -40,6 +40,7 @@ #include "core/debugger/engine_debugger.h" #include "core/io/file_access.h" #include "core/os/main_loop.h" +#include "core/profiling/profiling.h" #include "drivers/unix/dir_access_unix.h" #include "drivers/unix/file_access_unix.h" #include "main/main.h" @@ -79,6 +80,8 @@ void OS_Web::fs_sync_callback() { } bool OS_Web::main_loop_iterate() { + GodotProfileFrameMark; + GodotProfileZone("OS_Web::main_loop_iterate"); if (is_userfs_persistent() && idb_needs_sync && !idb_is_syncing) { idb_is_syncing = true; idb_needs_sync = false; diff --git a/platform/web/web_main.cpp b/platform/web/web_main.cpp index 2378dd32ce8..3fb3f2bcce8 100644 --- a/platform/web/web_main.cpp +++ b/platform/web/web_main.cpp @@ -35,6 +35,7 @@ #include "core/config/engine.h" #include "core/io/file_access.h" #include "core/io/resource_loader.h" +#include "core/profiling/profiling.h" #include "main/main.h" #include "scene/main/scene_tree.h" #include "scene/main/window.h" // SceneTree only forward declares it. @@ -126,6 +127,8 @@ void print_web_header() { /// When calling main, it is assumed FS is setup and synced. extern EMSCRIPTEN_KEEPALIVE int godot_web_main(int argc, char *argv[]) { + godot_init_profiler(); + os = new OS_Web(); #ifdef TOOLS_ENABLED diff --git a/platform/windows/godot_windows.cpp b/platform/windows/godot_windows.cpp index 3d6f973b331..7355a9f925a 100644 --- a/platform/windows/godot_windows.cpp +++ b/platform/windows/godot_windows.cpp @@ -30,6 +30,7 @@ #include "os_windows.h" +#include "core/profiling/profiling.h" #include "main/main.h" #include @@ -66,6 +67,8 @@ char *wc_to_utf8(const wchar_t *wc) { } int widechar_main(int argc, wchar_t **argv) { + godot_init_profiler(); + OS_Windows os(nullptr); setlocale(LC_CTYPE, ""); diff --git a/platform/windows/os_windows.cpp b/platform/windows/os_windows.cpp index 588a19b394a..a24488014c4 100644 --- a/platform/windows/os_windows.cpp +++ b/platform/windows/os_windows.cpp @@ -39,6 +39,7 @@ #include "core/debugger/script_debugger.h" #include "core/io/marshalls.h" #include "core/os/main_loop.h" +#include "core/profiling/profiling.h" #include "core/version_generated.gen.h" #include "drivers/windows/dir_access_windows.h" #include "drivers/windows/file_access_windows.h" @@ -2329,6 +2330,8 @@ void OS_Windows::run() { main_loop->initialize(); while (true) { + GodotProfileFrameMark; + GodotProfileZone("OS_Windows::run"); DisplayServer::get_singleton()->process_events(); // get rid of pending events if (Main::iteration()) { break; diff --git a/scene/main/scene_tree.cpp b/scene/main/scene_tree.cpp index ed6835bc731..3ced395f9f7 100644 --- a/scene/main/scene_tree.cpp +++ b/scene/main/scene_tree.cpp @@ -37,6 +37,7 @@ #include "core/object/message_queue.h" #include "core/object/worker_thread_pool.h" #include "core/os/os.h" +#include "core/profiling/profiling.h" #include "node.h" #include "scene/animation/tween.h" #include "scene/debugger/scene_debugger.h" @@ -572,6 +573,7 @@ void SceneTree::set_group(const StringName &p_group, const String &p_name, const } void SceneTree::initialize() { + GodotProfileZone("SceneTree::initialize"); ERR_FAIL_NULL(root); MainLoop::initialize(); root->_set_tree(this); diff --git a/servers/rendering/renderer_viewport.cpp b/servers/rendering/renderer_viewport.cpp index e5104bba640..c622decaa65 100644 --- a/servers/rendering/renderer_viewport.cpp +++ b/servers/rendering/renderer_viewport.cpp @@ -33,6 +33,7 @@ #include "core/config/project_settings.h" #include "core/math/transform_interpolator.h" #include "core/object/worker_thread_pool.h" +#include "core/profiling/profiling.h" #include "renderer_canvas_cull.h" #include "renderer_scene_cull.h" #include "rendering_server_globals.h" @@ -733,6 +734,7 @@ void RendererViewport::_draw_viewport(Viewport *p_viewport) { } void RendererViewport::draw_viewports(bool p_swap_buffers) { + GodotProfileZoneGroupedFirst(_profile_zone, "prepare viewports"); timestamp_vp_map.clear(); #ifndef XR_DISABLED @@ -750,6 +752,7 @@ void RendererViewport::draw_viewports(bool p_swap_buffers) { } if (sorted_active_viewports_dirty) { + GodotProfileZoneGrouped(_profile_zone, "_sort_active_viewports"); sorted_active_viewports = _sort_active_viewports(); sorted_active_viewports_dirty = false; } @@ -758,11 +761,12 @@ void RendererViewport::draw_viewports(bool p_swap_buffers) { //draw viewports RENDER_TIMESTAMP("> Render Viewports"); + GodotProfileZoneGrouped(_profile_zone, "render viewports"); + //determine what is visible draw_viewports_pass++; for (int i = sorted_active_viewports.size() - 1; i >= 0; i--) { //to compute parent dependency, must go in reverse draw order - Viewport *vp = sorted_active_viewports[i]; if (vp->update_mode == RS::VIEWPORT_UPDATE_DISABLED) { @@ -821,6 +825,9 @@ void RendererViewport::draw_viewports(bool p_swap_buffers) { int draw_calls_used = 0; for (int i = 0; i < sorted_active_viewports.size(); i++) { + // TODO Somehow print the index + GodotProfileZone("render viewport"); + Viewport *vp = sorted_active_viewports[i]; if (vp->last_pass != draw_viewports_pass) { @@ -925,6 +932,7 @@ void RendererViewport::draw_viewports(bool p_swap_buffers) { vertices_drawn += vp->render_info.info[RS::VIEWPORT_RENDER_INFO_TYPE_CANVAS][RS::VIEWPORT_RENDER_INFO_PRIMITIVES_IN_FRAME]; draw_calls_used += vp->render_info.info[RS::VIEWPORT_RENDER_INFO_TYPE_CANVAS][RS::VIEWPORT_RENDER_INFO_DRAW_CALLS_IN_FRAME]; } + RSG::scene->set_debug_draw_mode(RS::VIEWPORT_DEBUG_DRAW_DISABLED); total_objects_drawn = objects_drawn; @@ -933,6 +941,7 @@ void RendererViewport::draw_viewports(bool p_swap_buffers) { RENDER_TIMESTAMP("< Render Viewports"); + GodotProfileZoneGrouped(_profile_zone, "rasterizer->blit_render_targets_to_screen"); if (p_swap_buffers && !blit_to_screen_list.is_empty()) { for (const KeyValue> &E : blit_to_screen_list) { RSG::rasterizer->blit_render_targets_to_screen(E.key, E.value.ptr(), E.value.size()); diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp index 6a3646e4bed..faf2eca2ee1 100644 --- a/servers/rendering/rendering_device.cpp +++ b/servers/rendering/rendering_device.cpp @@ -37,6 +37,7 @@ #include "core/config/project_settings.h" #include "core/io/dir_access.h" #include "core/io/file_access.h" +#include "core/profiling/profiling.h" #include "modules/modules_enabled.gen.h" #include "servers/rendering/rendering_shader_container.h" @@ -6256,12 +6257,16 @@ String RenderingDevice::get_device_pipeline_cache_uuid() const { void RenderingDevice::swap_buffers(bool p_present) { ERR_RENDER_THREAD_GUARD(); + GodotProfileZoneGroupedFirst(_profile_zone, "_end_frame"); _end_frame(); + + GodotProfileZoneGrouped(_profile_zone, "_execute_frame"); _execute_frame(p_present); // Advance to the next frame and begin recording again. frame = (frame + 1) % frames.size(); + GodotProfileZoneGrouped(_profile_zone, "_begin_frame"); _begin_frame(true); } @@ -6389,27 +6394,34 @@ uint64_t RenderingDevice::get_memory_usage(MemoryType p_type) const { } void RenderingDevice::_begin_frame(bool p_presented) { + GodotProfileZoneGroupedFirst(_profile_zone, "_stall_for_frame"); // Before writing to this frame, wait for it to be finished. _stall_for_frame(frame); if (command_pool_reset_enabled) { + GodotProfileZoneGrouped(_profile_zone, "driver->command_pool_reset"); bool reset = driver->command_pool_reset(frames[frame].command_pool); ERR_FAIL_COND(!reset); } if (p_presented) { + GodotProfileZoneGrouped(_profile_zone, "update_perf_report"); update_perf_report(); driver->linear_uniform_set_pools_reset(frame); } // Begin recording on the frame's command buffers. + GodotProfileZoneGrouped(_profile_zone, "driver->begin_segment"); driver->begin_segment(frame, frames_drawn++); + GodotProfileZoneGrouped(_profile_zone, "driver->command_buffer_begin"); driver->command_buffer_begin(frames[frame].command_buffer); // Reset the graph. + GodotProfileZoneGrouped(_profile_zone, "draw_graph.begin"); draw_graph.begin(); // Erase pending resources. + GodotProfileZoneGrouped(_profile_zone, "_free_pending_resources"); _free_pending_resources(frame); // Advance staging buffers if used. @@ -6446,11 +6458,16 @@ void RenderingDevice::_end_frame() { // The command buffer must be copied into a stack variable as the driver workarounds can change the command buffer in use. RDD::CommandBufferID command_buffer = frames[frame].command_buffer; + GodotProfileZoneGroupedFirst(_profile_zone, "_submit_transfer_workers"); _submit_transfer_workers(command_buffer); + GodotProfileZoneGrouped(_profile_zone, "_submit_transfer_barriers"); _submit_transfer_barriers(command_buffer); + GodotProfileZoneGrouped(_profile_zone, "draw_graph.end"); draw_graph.end(RENDER_GRAPH_REORDER, RENDER_GRAPH_FULL_BARRIERS, command_buffer, frames[frame].command_buffer_pool); + GodotProfileZoneGrouped(_profile_zone, "driver->command_buffer_end"); driver->command_buffer_end(command_buffer); + GodotProfileZoneGrouped(_profile_zone, "driver->end_segment"); driver->end_segment(); } @@ -6542,11 +6559,13 @@ void RenderingDevice::_stall_for_frame(uint32_t p_frame) { thread_local PackedByteArray packed_byte_array; if (frames[p_frame].fence_signaled) { + GodotProfileZoneGroupedFirst(_profile_zone, "driver->fence_wait"); driver->fence_wait(frames[p_frame].fence); frames[p_frame].fence_signaled = false; // Flush any pending requests for asynchronous buffer downloads. if (!frames[p_frame].download_buffer_get_data_requests.is_empty()) { + GodotProfileZoneGrouped(_profile_zone, "flush asynchronous buffer downloads"); for (uint32_t i = 0; i < frames[p_frame].download_buffer_get_data_requests.size(); i++) { const BufferGetDataRequest &request = frames[p_frame].download_buffer_get_data_requests[i]; packed_byte_array.resize(request.size); @@ -6571,6 +6590,7 @@ void RenderingDevice::_stall_for_frame(uint32_t p_frame) { // Flush any pending requests for asynchronous texture downloads. if (!frames[p_frame].download_texture_get_data_requests.is_empty()) { + GodotProfileZoneGrouped(_profile_zone, "flush asynchronous texture downloads"); uint32_t pitch_step = driver->api_trait_get(RDD::API_TRAIT_TEXTURE_DATA_ROW_PITCH_STEP); for (uint32_t i = 0; i < frames[p_frame].download_texture_get_data_requests.size(); i++) { const TextureGetDataRequest &request = frames[p_frame].download_texture_get_data_requests[i]; @@ -6618,6 +6638,7 @@ void RenderingDevice::_stall_for_frame(uint32_t p_frame) { request.callback.call(packed_byte_array); } + GodotProfileZoneGrouped(_profile_zone, "clear buffers"); frames[p_frame].download_texture_staging_buffers.clear(); frames[p_frame].download_buffer_texture_copy_regions.clear(); frames[p_frame].download_texture_mipmap_offsets.clear(); diff --git a/servers/rendering/rendering_server_default.cpp b/servers/rendering/rendering_server_default.cpp index 8293789c200..d2abea79c92 100644 --- a/servers/rendering/rendering_server_default.cpp +++ b/servers/rendering/rendering_server_default.cpp @@ -31,6 +31,7 @@ #include "rendering_server_default.h" #include "core/os/os.h" +#include "core/profiling/profiling.h" #include "renderer_canvas_cull.h" #include "renderer_scene_cull.h" #include "rendering_server_globals.h" @@ -66,6 +67,7 @@ void RenderingServerDefault::request_frame_drawn_callback(const Callable &p_call } void RenderingServerDefault::_draw(bool p_swap_buffers, double frame_step) { + GodotProfileZoneGroupedFirst(_profile_zone, "rasterizer->begin_frame"); RSG::rasterizer->begin_frame(frame_step); TIMESTAMP_BEGIN() @@ -75,6 +77,7 @@ void RenderingServerDefault::_draw(bool p_swap_buffers, double frame_step) { RENDER_TIMESTAMP("Prepare Render Frame"); #ifndef XR_DISABLED + GodotProfileZoneGrouped(_profile_zone, "xr_server->pre_render"); XRServer *xr_server = XRServer::get_singleton(); if (xr_server != nullptr) { // Let XR server know we're about to render a frame. @@ -82,30 +85,41 @@ void RenderingServerDefault::_draw(bool p_swap_buffers, double frame_step) { } #endif // XR_DISABLED + GodotProfileZoneGrouped(_profile_zone, "scene->update"); RSG::scene->update(); //update scenes stuff before updating instances + GodotProfileZoneGrouped(_profile_zone, "canvas->update"); RSG::canvas->update(); frame_setup_time = double(OS::get_singleton()->get_ticks_usec() - time_usec) / 1000.0; + GodotProfileZoneGrouped(_profile_zone, "particles_storage->update_particles"); RSG::particles_storage->update_particles(); //need to be done after instances are updated (colliders and particle transforms), and colliders are rendered + GodotProfileZoneGrouped(_profile_zone, "scene->render_probes"); RSG::scene->render_probes(); + GodotProfileZoneGrouped(_profile_zone, "viewport->draw_viewports"); RSG::viewport->draw_viewports(p_swap_buffers); + + GodotProfileZoneGrouped(_profile_zone, "canvas_render->update"); RSG::canvas_render->update(); + GodotProfileZoneGrouped(_profile_zone, "rasterizer->end_frame"); RSG::rasterizer->end_frame(p_swap_buffers); #ifndef XR_DISABLED if (xr_server != nullptr) { + GodotProfileZone("xr_server->end_frame"); // let our XR server know we're done so we can get our frame timing xr_server->end_frame(); } #endif // XR_DISABLED + GodotProfileZoneGrouped(_profile_zone, "update_visibility_notifiers"); RSG::canvas->update_visibility_notifiers(); RSG::scene->update_visibility_notifiers(); + GodotProfileZoneGrouped(_profile_zone, "post_draw_steps"); if (create_thread) { callable_mp(this, &RenderingServerDefault::_run_post_draw_steps).call_deferred(); } else { @@ -113,6 +127,7 @@ void RenderingServerDefault::_draw(bool p_swap_buffers, double frame_step) { } if (RSG::utilities->get_captured_timestamps_count()) { + GodotProfileZoneGrouped(_profile_zone, "frame_profile"); Vector new_profile; if (RSG::utilities->capturing_timestamps) { new_profile.resize(RSG::utilities->get_captured_timestamps_count()); @@ -143,6 +158,7 @@ void RenderingServerDefault::_draw(bool p_swap_buffers, double frame_step) { frame_profile_frame = RSG::utilities->get_captured_timestamps_frame(); if (print_gpu_profile) { + GodotProfileZoneGrouped(_profile_zone, "gpu_profile"); if (print_frame_profile_ticks_from == 0) { print_frame_profile_ticks_from = OS::get_singleton()->get_ticks_usec(); } @@ -185,6 +201,7 @@ void RenderingServerDefault::_draw(bool p_swap_buffers, double frame_step) { } } + GodotProfileZoneGrouped(_profile_zone, "memory_info"); RSG::utilities->update_memory_info(); }