Fix Android TTS on-demand init.

2025-11-04 12:00:25 +00:00 · 2025-07-31 16:47:19 +03:00
parent 2a9ff39264
commit 4915d61362
3 changed files with 138 additions and 21 deletions
--- a/platform/android/java/lib/src/org/godotengine/godot/tts/GodotTTS.java
+++ b/platform/android/java/lib/src/org/godotengine/godot/tts/GodotTTS.java
@@ -56,15 +56,21 @@ import java.util.Set;
 * </ul>
 */
@Keep
-public class GodotTTS extends UtteranceProgressListener {
+public class GodotTTS extends UtteranceProgressListener implements TextToSpeech.OnInitListener {
 	// Note: These constants must be in sync with DisplayServer::TTSUtteranceEvent enum from "servers/display_server.h".
 	final private static int EVENT_START = 0;
 	final private static int EVENT_END = 1;
 	final private static int EVENT_CANCEL = 2;
 	final private static int EVENT_BOUNDARY = 3;
 	// Note: These constants must be in sync with TTS_Android constants from "platform/android/tts_android.h".
 	final private static int INIT_STATE_UNKNOWN = 0;
 	final private static int INIT_STATE_SUCCESS = 1;
 	final private static int INIT_STATE_FAIL = -1;
 	private final Context context;
 	private TextToSpeech synth;
 	private int state;
 	private LinkedList<GodotUtterance> queue;
 	final private Object lock = new Object();
 	private GodotUtterance lastUtterance;
@@ -82,6 +88,9 @@ public class GodotTTS extends UtteranceProgressListener {
 			GodotUtterance message = queue.pollFirst();
 			Set<Voice> voices = synth.getVoices();
 			if (voices == null) {
 				return;
 			}
 			for (Voice v : voices) {
 				if (v.getName().equals(message.voice)) {
 					synth.setVoice(v);
@@ -189,17 +198,35 @@ public class GodotTTS extends UtteranceProgressListener {
 	 * Initialize synth and query.
 	 */
 	public void init() {
-		synth = new TextToSpeech(context, null);
+		state = INIT_STATE_UNKNOWN;
 		synth = new TextToSpeech(context, this);
 		queue = new LinkedList<GodotUtterance>();
 		synth.setOnUtteranceProgressListener(this);
 	}
 	/**
 	 * Called by TTS engine when initialization is finished.
 	 */
 	@Override
 	public void onInit(int status) {
 		synchronized (lock) {
 			if (status == TextToSpeech.SUCCESS) {
 				state = INIT_STATE_SUCCESS;
 			} else {
 				state = INIT_STATE_FAIL;
 			}
 		}
 	}
 	/**
 	 * Adds an utterance to the queue.
 	 */
 	public void speak(String text, String voice, int volume, float pitch, float rate, int utterance_id, boolean interrupt) {
 		synchronized (lock) {
 			if (state != INIT_STATE_SUCCESS) {
 				return;
 			}
 			GodotUtterance message = new GodotUtterance(text, voice, volume, pitch, rate, utterance_id);
 			queue.addLast(message);
@@ -216,6 +243,9 @@ public class GodotTTS extends UtteranceProgressListener {
 	 */
 	public void pauseSpeaking() {
 		synchronized (lock) {
 			if (state != INIT_STATE_SUCCESS) {
 				return;
 			}
 			if (!paused) {
 				paused = true;
 				synth.stop();
@@ -228,10 +258,16 @@ public class GodotTTS extends UtteranceProgressListener {
 	 */
 	public void resumeSpeaking() {
 		synchronized (lock) {
 			if (state != INIT_STATE_SUCCESS) {
 				return;
 			}
 			if (lastUtterance != null && paused) {
 				int mode = TextToSpeech.QUEUE_FLUSH;
 				Set<Voice> voices = synth.getVoices();
 				if (voices == null) {
 					return;
 				}
 				for (Voice v : voices) {
 					if (v.getName().equals(lastUtterance.voice)) {
 						synth.setVoice(v);
@@ -261,6 +297,9 @@ public class GodotTTS extends UtteranceProgressListener {
 	 */
 	public void stopSpeaking() {
 		synchronized (lock) {
 			if (state != INIT_STATE_SUCCESS) {
 				return;
 			}
 			for (GodotUtterance u : queue) {
 				GodotLib.ttsCallback(EVENT_CANCEL, u.id, 0);
 			}
@@ -282,7 +321,14 @@ public class GodotTTS extends UtteranceProgressListener {
 	 * Returns voice information.
 	 */
 	public String[] getVoices() {
 		synchronized (lock) {
 			if (state != INIT_STATE_SUCCESS) {
 				return new String[0];
 			}
 			Set<Voice> voices = synth.getVoices();
 			if (voices == null) {
 				return new String[0];
 			}
 			String[] list = new String[voices.size()];
 			int i = 0;
 			for (Voice v : voices) {
@@ -290,6 +336,7 @@ public class GodotTTS extends UtteranceProgressListener {
 			}
 			return list;
 		}
 	}
 	/**
 	 * Returns true if the synthesizer is generating speech, or have utterance waiting in the queue.
@@ -304,4 +351,13 @@ public class GodotTTS extends UtteranceProgressListener {
 	public boolean isPaused() {
 		return paused;
 	}
 	/**
 	 * Returns INIT_STATE_SUCCESS if the synthesizer initialization finished successfully, INIT_STATE_FAIL if initialization failed, and INIT_STATE_UNKNOWN otherwise.
 	 */
 	public int getState() {
 		synchronized (lock) {
 			return state;
 		}
 	}
 }
--- a/platform/android/tts_android.cpp
+++ b/platform/android/tts_android.cpp
@@ -38,9 +38,14 @@ bool TTS_Android::initialized = false;
 jobject TTS_Android::tts = nullptr;
 jclass TTS_Android::cls = nullptr;
 Thread TTS_Android::init_thread;
 SafeFlag TTS_Android::quit_request;
 SafeFlag TTS_Android::init_done;
 jmethodID TTS_Android::_init = nullptr;
 jmethodID TTS_Android::_is_speaking = nullptr;
 jmethodID TTS_Android::_is_paused = nullptr;
 jmethodID TTS_Android::_get_state = nullptr;
 jmethodID TTS_Android::_get_voices = nullptr;
 jmethodID TTS_Android::_speak = nullptr;
 jmethodID TTS_Android::_pause_speaking = nullptr;
@@ -49,12 +54,49 @@ jmethodID TTS_Android::_stop_speaking = nullptr;
 HashMap<int, Char16String> TTS_Android::ids;
-void TTS_Android::initialize_tts() {
+void TTS_Android::_thread_function(void *self) {
 	JNIEnv *env = get_jni_env();
 	ERR_FAIL_NULL(env);
 	if (_init) {
 	env->CallVoidMethod(tts, _init);
 	uint64_t sleep = 200;
 	while (env->CallIntMethod(tts, _get_state) == INIT_STATE_UNKNOWN && !quit_request.is_set()) {
 		OS::get_singleton()->delay_usec(1000 * sleep);
 	}
 	init_done.set();
 }
 void TTS_Android::initialize_tts(bool p_wait) {
 	if (!_init || !_get_state || !tts) {
 		return;
 	}
 	JNIEnv *env = get_jni_env();
 	ERR_FAIL_NULL(env);
 	if (!init_thread.is_started() && !init_done.is_set()) {
 		init_thread.start(TTS_Android::_thread_function, nullptr);
 	}
 	if (env->CallIntMethod(tts, _get_state) == INIT_STATE_SUCCESS) {
 		initialized = true;
 		return;
 	}
 	// If it's not initialized at launch wait for 1 second for TTS init.
 	if (p_wait) {
 		uint64_t sleep = 200;
 		uint64_t wait = 1000000;
 		uint64_t time = OS::get_singleton()->get_ticks_usec();
 		while (OS::get_singleton()->get_ticks_usec() - time < wait) {
 			OS::get_singleton()->delay_usec(1000 * sleep);
 			if (init_done.is_set()) {
 				break;
 			}
 		}
 	}
 	if (env->CallIntMethod(tts, _get_state) == INIT_STATE_SUCCESS) {
 		initialized = true;
 	}
 }
@@ -64,6 +106,8 @@ void TTS_Android::setup(jobject p_tts) {
 	ERR_FAIL_NULL(env);
 	tts = env->NewGlobalRef(p_tts);
 	quit_request.clear();
 	init_done.clear();
 	jclass c = env->GetObjectClass(tts);
 	cls = (jclass)env->NewGlobalRef(c);
@@ -71,6 +115,7 @@ void TTS_Android::setup(jobject p_tts) {
 	_init = env->GetMethodID(cls, "init", "()V");
 	_is_speaking = env->GetMethodID(cls, "isSpeaking", "()Z");
 	_is_paused = env->GetMethodID(cls, "isPaused", "()Z");
 	_get_state = env->GetMethodID(cls, "getState", "()I");
 	_get_voices = env->GetMethodID(cls, "getVoices", "()[Ljava/lang/String;");
 	_speak = env->GetMethodID(cls, "speak", "(Ljava/lang/String;Ljava/lang/String;IFFIZ)V");
 	_pause_speaking = env->GetMethodID(cls, "pauseSpeaking", "()V");
@@ -79,7 +124,7 @@ void TTS_Android::setup(jobject p_tts) {
 	bool tts_enabled = GLOBAL_GET("audio/general/text_to_speech");
 	if (tts_enabled) {
-		initialize_tts();
+		initialize_tts(false);
 	}
 }
@@ -87,6 +132,11 @@ void TTS_Android::terminate() {
 	JNIEnv *env = get_jni_env();
 	ERR_FAIL_NULL(env);
 	if (init_thread.is_started()) {
 		quit_request.set();
 		init_thread.wait_to_finish();
 	}
 	if (cls) {
 		env->DeleteGlobalRef(cls);
 	}
@@ -99,7 +149,7 @@ void TTS_Android::_java_utterance_callback(int p_event, int p_id, int p_pos) {
 	if (unlikely(!initialized)) {
 		initialize_tts();
 	}
-	ERR_FAIL_NULL(tts);
+	ERR_FAIL_COND_MSG(!initialized || tts == nullptr, "Text to Speech not initialized.");
 	if (ids.has(p_id)) {
 		int pos = 0;
 		if ((DisplayServer::TTSUtteranceEvent)p_event == DisplayServer::TTS_UTTERANCE_BOUNDARY) {
@@ -123,7 +173,7 @@ bool TTS_Android::is_speaking() {
 	if (unlikely(!initialized)) {
 		initialize_tts();
 	}
-	ERR_FAIL_NULL_V(tts, false);
+	ERR_FAIL_COND_V_MSG(!initialized || tts == nullptr, false, "Text to Speech not initialized.");
 	if (_is_speaking) {
 		JNIEnv *env = get_jni_env();
@@ -138,7 +188,7 @@ bool TTS_Android::is_paused() {
 	if (unlikely(!initialized)) {
 		initialize_tts();
 	}
-	ERR_FAIL_NULL_V(tts, false);
+	ERR_FAIL_COND_V_MSG(!initialized || tts == nullptr, false, "Text to Speech not initialized.");
 	if (_is_paused) {
 		JNIEnv *env = get_jni_env();
@@ -153,7 +203,7 @@ Array TTS_Android::get_voices() {
 	if (unlikely(!initialized)) {
 		initialize_tts();
 	}
-	ERR_FAIL_NULL_V(tts, Array());
+	ERR_FAIL_COND_V_MSG(!initialized || tts == nullptr, Array(), "Text to Speech not initialized.");
 	Array list;
 	if (_get_voices) {
 		JNIEnv *env = get_jni_env();
@@ -184,7 +234,7 @@ void TTS_Android::speak(const String &p_text, const String &p_voice, int p_volum
 	if (unlikely(!initialized)) {
 		initialize_tts();
 	}
-	ERR_FAIL_NULL(tts);
+	ERR_FAIL_COND_MSG(!initialized || tts == nullptr, "Text to Speech not initialized.");
 	if (p_interrupt) {
 		stop();
 	}
@@ -212,7 +262,7 @@ void TTS_Android::pause() {
 	if (unlikely(!initialized)) {
 		initialize_tts();
 	}
-	ERR_FAIL_NULL(tts);
+	ERR_FAIL_COND_MSG(!initialized || tts == nullptr, "Text to Speech not initialized.");
 	if (_pause_speaking) {
 		JNIEnv *env = get_jni_env();
@@ -225,7 +275,7 @@ void TTS_Android::resume() {
 	if (unlikely(!initialized)) {
 		initialize_tts();
 	}
-	ERR_FAIL_NULL(tts);
+	ERR_FAIL_COND_MSG(!initialized || tts == nullptr, "Text to Speech not initialized.");
 	if (_resume_speaking) {
 		JNIEnv *env = get_jni_env();
@@ -238,7 +288,7 @@ void TTS_Android::stop() {
 	if (unlikely(!initialized)) {
 		initialize_tts();
 	}
-	ERR_FAIL_NULL(tts);
+	ERR_FAIL_COND_MSG(!initialized || tts == nullptr, "Text to Speech not initialized.");
 	for (const KeyValue<int, Char16String> &E : ids) {
 		DisplayServer::get_singleton()->tts_post_utterance_event(DisplayServer::TTS_UTTERANCE_CANCELED, E.key);
 	}
--- a/platform/android/tts_android.h
+++ b/platform/android/tts_android.h
@@ -39,6 +39,10 @@
 #include <jni.h>
 class TTS_Android {
 	static inline int INIT_STATE_UNKNOWN = 0;
 	static inline int INIT_STATE_SUCCESS = 1;
 	static inline int INIT_STATE_FAIL = -1;
 	static bool initialized;
 	static jobject tts;
 	static jclass cls;
@@ -46,15 +50,22 @@ class TTS_Android {
 	static jmethodID _init;
 	static jmethodID _is_speaking;
 	static jmethodID _is_paused;
 	static jmethodID _get_state;
 	static jmethodID _get_voices;
 	static jmethodID _speak;
 	static jmethodID _pause_speaking;
 	static jmethodID _resume_speaking;
 	static jmethodID _stop_speaking;
 	static Thread init_thread;
 	static SafeFlag quit_request;
 	static SafeFlag init_done;
 	static void _thread_function(void *self);
 	static HashMap<int, Char16String> ids;
-	static void initialize_tts();
+	static void initialize_tts(bool p_wait = true);
 public:
 	static void setup(jobject p_tts);