Ensure that threads only process one pump task.

This is necessary because we will always deadlock if a thread takes on multiple pump tasks since pump tasks never return. This means when using separate threads for certain systems (like physics or rendering), we need to be sure that there are enough threads to have at least one per system (to ensure forward progress).
2026-01-04 19:21:46 +00:00 · 2025-07-16 23:31:35 -07:00
parent 7826b6b13f
commit 782b9e328c
6 changed files with 71 additions and 17 deletions
--- a/core/object/worker_thread_pool.cpp
+++ b/core/object/worker_thread_pool.cpp
@@ -76,6 +76,7 @@ void WorkerThreadPool::_process_task(Task *p_task) {
 		p_task->pool_thread_index = pool_thread_index;
 		prev_task = curr_thread.current_task;
 		curr_thread.current_task = p_task;
+		curr_thread.has_pump_task = p_task->is_pump_task;
 		if (p_task->pending_notify_yield_over) {
 			curr_thread.yield_is_over = true;
 		}
@@ -218,11 +219,13 @@ void WorkerThreadPool::_thread_function(void *p_user) {
 	}
 }

-void WorkerThreadPool::_post_tasks(Task **p_tasks, uint32_t p_count, bool p_high_priority, MutexLock<BinaryMutex> &p_lock) {
+void WorkerThreadPool::_post_tasks(Task **p_tasks, uint32_t p_count, bool p_high_priority, MutexLock<BinaryMutex> &p_lock, bool p_pump_task) {
 	// Fall back to processing on the calling thread if there are no worker threads.
 	// Separated into its own variable to make it easier to extend this logic
 	// in custom builds.
-	bool process_on_calling_thread = threads.is_empty();
+
+	// Avoid calling pump tasks or low priority tasks from the calling thread.
+	bool process_on_calling_thread = threads.is_empty() && p_high_priority && !p_pump_task;
 	if (process_on_calling_thread) {
 		p_lock.temp_unlock();
 		for (uint32_t i = 0; i < p_count; i++) {
@@ -339,7 +342,7 @@ WorkerThreadPool::TaskID WorkerThreadPool::add_native_task(void (*p_func)(void *
 	return _add_task(Callable(), p_func, p_userdata, nullptr, p_high_priority, p_description);
 }

-WorkerThreadPool::TaskID WorkerThreadPool::_add_task(const Callable &p_callable, void (*p_func)(void *), void *p_userdata, BaseTemplateUserdata *p_template_userdata, bool p_high_priority, const String &p_description) {
+WorkerThreadPool::TaskID WorkerThreadPool::_add_task(const Callable &p_callable, void (*p_func)(void *), void *p_userdata, BaseTemplateUserdata *p_template_userdata, bool p_high_priority, const String &p_description, bool p_pump_task) {
 	MutexLock<BinaryMutex> lock(task_mutex);

 	// Get a free task
@@ -351,15 +354,50 @@ WorkerThreadPool::TaskID WorkerThreadPool::_add_task(const Callable &p_callable,
 	task->native_func_userdata = p_userdata;
 	task->description = p_description;
 	task->template_userdata = p_template_userdata;
+	task->is_pump_task = p_pump_task;
 	tasks.insert(id, task);

-	_post_tasks(&task, 1, p_high_priority, lock);
+#ifdef THREADS_ENABLED
+	if (p_pump_task) {
+		pump_task_count++;
+		int thread_count = get_thread_count();
+		if (pump_task_count >= thread_count) {
+			print_verbose(vformat("A greater number of dedicated threads were requested (%d) than threads available (%d). Please increase the number of available worker task threads. Recovering this session by spawning more worker task threads.", pump_task_count + 1, thread_count)); // +1 because we want to keep a Thread without any pump tasks free.
+
+			Thread::Settings settings;
+#ifdef __APPLE__
+			// The default stack size for new threads on Apple platforms is 512KiB.
+			// This is insufficient when using a library like SPIRV-Cross,
+			// which can generate deep stacks and result in a stack overflow.
+#ifdef DEV_ENABLED
+			// Debug builds need an even larger stack size.
+			settings.stack_size = 2 * 1024 * 1024; // 2 MiB
+#else
+			settings.stack_size = 1 * 1024 * 1024; // 1 MiB
+#endif
+#endif
+			// Re-sizing implies relocation, which is not supported for this array.
+			CRASH_COND_MSG(thread_count + 1 > (int)threads.get_capacity(), "Reserve trick for worker thread pool failed. Crashing.");
+			threads.resize_initialized(thread_count + 1);
+			threads[thread_count].index = thread_count;
+			threads[thread_count].pool = this;
+			threads[thread_count].thread.start(&WorkerThreadPool::_thread_function, &threads[thread_count], settings);
+			thread_ids.insert(threads[thread_count].thread.get_id(), thread_count);
+		}
+	}
+#endif
+
+	_post_tasks(&task, 1, p_high_priority, lock, p_pump_task);

 	return id;
 }

-WorkerThreadPool::TaskID WorkerThreadPool::add_task(const Callable &p_action, bool p_high_priority, const String &p_description) {
-	return _add_task(p_action, nullptr, nullptr, nullptr, p_high_priority, p_description);
+WorkerThreadPool::TaskID WorkerThreadPool::add_task(const Callable &p_action, bool p_high_priority, const String &p_description, bool p_pump_task) {
+	return _add_task(p_action, nullptr, nullptr, nullptr, p_high_priority, p_description, p_pump_task);
+}
+
+WorkerThreadPool::TaskID WorkerThreadPool::add_task_bind(const Callable &p_action, bool p_high_priority, const String &p_description) {
+	return _add_task(p_action, nullptr, nullptr, nullptr, p_high_priority, p_description, false);
 }

 bool WorkerThreadPool::is_task_completed(TaskID p_task_id) const {
@@ -510,7 +548,12 @@ void WorkerThreadPool::_wait_collaboratively(ThreadData *p_caller_pool_thread, T

 			if (p_caller_pool_thread->pool->task_queue.first()) {
 				task_to_process = task_queue.first()->self();
-				task_queue.remove(task_queue.first());
+				if ((p_task == ThreadData::YIELDING || p_caller_pool_thread->has_pump_task == true) && task_to_process->is_pump_task) {
+					task_to_process = nullptr;
+					_notify_threads(p_caller_pool_thread, 1, 0);
+				} else {
+					task_queue.remove(task_queue.first());
+				}
 			}

 			if (!task_to_process) {
@@ -661,7 +704,7 @@ WorkerThreadPool::GroupID WorkerThreadPool::_add_group_task(const Callable &p_ca

 	groups[id] = group;

-	_post_tasks(tasks_posted, p_tasks, p_high_priority, lock);
+	_post_tasks(tasks_posted, p_tasks, p_high_priority, lock, false);

 	return id;
 }
@@ -788,6 +831,11 @@ void WorkerThreadPool::init(int p_thread_count, float p_low_priority_task_ratio)

 	print_verbose(vformat("WorkerThreadPool: %d threads, %d max low-priority.", p_thread_count, max_low_priority_threads));

+#ifdef THREADS_ENABLED
+	// Reserve 5 threads in case we need separate threads for 1) 2D physics 2) 3D physics 3) rendering 4) GPU texture compression, 5) all other tasks.
+	// We cannot safely increase the Vector size at runtime, so reserve enough up front, but only launch those needed.
+	threads.reserve(5);
+#endif
 	threads.resize(p_thread_count);

 	Thread::Settings settings;
@@ -862,7 +910,7 @@ void WorkerThreadPool::finish() {
 }

 void WorkerThreadPool::_bind_methods() {
-	ClassDB::bind_method(D_METHOD("add_task", "action", "high_priority", "description"), &WorkerThreadPool::add_task, DEFVAL(false), DEFVAL(String()));
+	ClassDB::bind_method(D_METHOD("add_task", "action", "high_priority", "description"), &WorkerThreadPool::add_task_bind, DEFVAL(false), DEFVAL(String()));
 	ClassDB::bind_method(D_METHOD("is_task_completed", "task_id"), &WorkerThreadPool::is_task_completed);
 	ClassDB::bind_method(D_METHOD("wait_for_task_completion", "task_id"), &WorkerThreadPool::wait_for_task_completion);
 	ClassDB::bind_method(D_METHOD("get_caller_task_id"), &WorkerThreadPool::get_caller_task_id);