diff --git a/core/string/optimized_translation.cpp b/core/string/optimized_translation.cpp index 0067997c393..d9e154c491c 100644 --- a/core/string/optimized_translation.cpp +++ b/core/string/optimized_translation.cpp @@ -314,10 +314,24 @@ StringName OptimizedTranslation::get_plural_message(const StringName &p_src_text return get_message(p_src_text, p_context); } +Vector OptimizedTranslation::_get_message_list() const { + WARN_PRINT_ONCE("OptimizedTranslation does not store the message texts to be translated."); + return {}; +} + +void OptimizedTranslation::get_message_list(List *r_messages) const { + WARN_PRINT_ONCE("OptimizedTranslation does not store the message texts to be translated."); +} + +int OptimizedTranslation::get_message_count() const { + WARN_PRINT_ONCE("OptimizedTranslation does not store the message texts to be translated."); + return 0; +} + void OptimizedTranslation::_get_property_list(List *p_list) const { - p_list->push_back(PropertyInfo(Variant::PACKED_INT32_ARRAY, "hash_table")); - p_list->push_back(PropertyInfo(Variant::PACKED_INT32_ARRAY, "bucket_table")); - p_list->push_back(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "strings")); + p_list->push_back(PropertyInfo(Variant::PACKED_INT32_ARRAY, "hash_table", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR)); + p_list->push_back(PropertyInfo(Variant::PACKED_INT32_ARRAY, "bucket_table", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR)); + p_list->push_back(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "strings", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR)); p_list->push_back(PropertyInfo(Variant::OBJECT, "load_from", PROPERTY_HINT_RESOURCE_TYPE, "Translation", PROPERTY_USAGE_EDITOR)); } diff --git a/core/string/optimized_translation.h b/core/string/optimized_translation.h index fc4f446831b..0f8957e95f7 100644 --- a/core/string/optimized_translation.h +++ b/core/string/optimized_translation.h @@ -35,23 +35,35 @@ class OptimizedTranslation : public Translation { GDCLASS(OptimizedTranslation, Translation); - //this translation uses a sort of modified perfect hash algorithm - //it requires hashing strings twice and then does a binary search, - //so it's slower, but at the same time it has an extremely high chance - //of catching untranslated strings + // This translation uses a sort of modified perfect hash algorithm + // it requires hashing strings twice and then does a binary search, + // so it's slower, but at the same time it has an extremely high chance + // of catching untranslated strings. - //load/store friendly types + // `hash_table[hash(0, text)]` produces a `bucket_table` index or 0xFFFFFFFF if not found. Vector hash_table; + + // Continuous `Bucket`s in a flat layout. Vector bucket_table; + + // Data for translated strings, UTF-8 encoded, either compressed or uncompressed. Vector strings; struct Bucket { + // Number of `Elem` objects at `elem`. int size; + + // Use `hash(func, text)` to generate the unique `Elem::key` in this bucket. uint32_t func; struct Elem { + // Unique key for the text. uint32_t key; + + // Used to index into `strings`. uint32_t str_offset; + + // The string is not compressed if `comp_size` equals `uncomp_size`. uint32_t comp_size; uint32_t uncomp_size; }; @@ -71,6 +83,8 @@ class OptimizedTranslation : public Translation { return d; } + virtual Vector _get_message_list() const override; + protected: bool _set(const StringName &p_name, const Variant &p_value); bool _get(const StringName &p_name, Variant &r_ret) const; @@ -83,5 +97,8 @@ public: virtual Vector get_translated_message_list() const override; void generate(const Ref &p_from); + virtual void get_message_list(List *r_messages) const override; + virtual int get_message_count() const override; + OptimizedTranslation() {} }; diff --git a/doc/classes/OptimizedTranslation.xml b/doc/classes/OptimizedTranslation.xml index bc158984d72..e7a12401a87 100644 --- a/doc/classes/OptimizedTranslation.xml +++ b/doc/classes/OptimizedTranslation.xml @@ -1,10 +1,11 @@ - An optimized translation, used by default for CSV Translations. + An optimized translation. - An optimized translation, used by default for CSV Translations. Uses real-time compressed translations, which results in very small dictionaries. + An optimized translation. Uses real-time compressed translations, which results in very small dictionaries. + This class does not store the untranslated strings for optimization purposes. Therefore, [method Translation.get_message_list] always returns an empty array, and [method Translation.get_message_count] always returns [code]0[/code]. @@ -14,6 +15,7 @@ Generates and sets an optimized translation from the given [Translation] resource. + [b]Note:[/b] Messages in [param from] should not use context or plural forms. [b]Note:[/b] This method is intended to be used in the editor. It does nothing when called from an exported project. diff --git a/doc/classes/ResourceImporterCSVTranslation.xml b/doc/classes/ResourceImporterCSVTranslation.xml index 578a79fdca9..331379fdeda 100644 --- a/doc/classes/ResourceImporterCSVTranslation.xml +++ b/doc/classes/ResourceImporterCSVTranslation.xml @@ -1,10 +1,11 @@ - Imports comma-separated values + Imports comma-separated values as [Translation]s. Comma-separated values are a plain text table storage format. The format's simplicity makes it easy to edit in any text editor or spreadsheet software. This makes it a common choice for game localization. + In the CSV file used for translation, the first column contains string identifiers, and the first row serves as the header. The first column's header can be any value. The remaining headers indicate the locale for that column. Columns whose headers begin with an underscore ([code]_[/code]) will be ignored. [b]Example CSV file:[/b] [codeblock lang=text] keys,en,es,ja @@ -13,16 +14,38 @@ BYE,Goodbye,Adiós,さようなら QUOTE,"""Hello"" said the man.","""Hola"" dijo el hombre.",「こんにちは」男は言いました [/codeblock] + Although keys in the first column typically use uppercase string identifiers, it is not uncommon to directly use strings appearing in the game as keys. To avoid string ambiguity, you can use a special [code]?context[/code] column to specify the context to use with [method Object.tr]. + [codeblock lang=text] + en,?context,fr,ja,zh + Letter,Alphabet,Lettre,字母,字母 + Letter,Message,Courrier,手紙,信件 + [/codeblock] + To set the plural form of a string to use with [method Object.tr_n], add a special [code]?plural[/code] column. After setting the plural form of the source string in this column, you can add additional rows to provide translations for more plural forms. The first column and all special columns in these plural form rows must be empty. + Godot includes built-in plural rules for some languages. You can also customize them using a special [code]?pluralrule[/code] row. See [url=https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html]GNU gettext[/url] for examples and more info. + [codeblock lang=text] + en,?plural,fr,ru,zh,_Comment + ?pluralrule,,nplurals=2; plural=(n >= 2);,,,Customize the plural rule for French + There is %d apple,There are %d apples,Il y a %d pomme,Есть %d яблоко,那里有%d个苹果, + ,,Il y a %d pommes,Есть %d яблока,, + ,,,Есть %d яблок,, + [/codeblock] $DOCS_URL/tutorials/assets_pipeline/importing_translations.html - - If [code]true[/code], creates an [OptimizedTranslation] instead of a [Translation]. This makes the resulting file smaller at the cost of a small CPU overhead. + + - [b]Disabled[/b]: Creates a [Translation]. + - [b]Auto[/b]: Creates an [OptimizedTranslation] when possible. This makes the resulting file smaller at the cost of a small CPU overhead. Falls back to [Translation] for translations with context or plural forms. The delimiter to use in the CSV file. The default value matches the common CSV convention. Tab-separated values are sometimes called TSV files. + + If [code]true[/code], message keys in the CSV file are unescaped using [method String.c_unescape] during the import process. + + + If [code]true[/code], message translations in the CSV file are unescaped using [method String.c_unescape] during the import process. + diff --git a/editor/import/resource_importer_csv_translation.cpp b/editor/import/resource_importer_csv_translation.cpp index 6e7e5862771..cf3cb0aaab9 100644 --- a/editor/import/resource_importer_csv_translation.cpp +++ b/editor/import/resource_importer_csv_translation.cpp @@ -68,72 +68,169 @@ String ResourceImporterCSVTranslation::get_preset_name(int p_idx) const { } void ResourceImporterCSVTranslation::get_import_options(const String &p_path, List *r_options, int p_preset) const { - r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "compress"), true)); + r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "compress", PROPERTY_HINT_ENUM, "Disabled,Auto"), 1)); // Enum for compatibility with previous versions. r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "delimiter", PROPERTY_HINT_ENUM, "Comma,Semicolon,Tab"), 0)); + r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "unescape_keys"), false)); + r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "unescape_translations"), true)); } Error ResourceImporterCSVTranslation::import(ResourceUID::ID p_source_id, const String &p_source_file, const String &p_save_path, const HashMap &p_options, List *r_platform_variants, List *r_gen_files, Variant *r_metadata) { - bool compress = p_options["compress"]; - - String delimiter; - switch ((int)p_options["delimiter"]) { - case 0: - delimiter = ","; - break; - case 1: - delimiter = ";"; - break; - case 2: - delimiter = "\t"; - break; - } - Ref f = FileAccess::open(p_source_file, FileAccess::READ); ERR_FAIL_COND_V_MSG(f.is_null(), ERR_INVALID_PARAMETER, "Cannot open file from path '" + p_source_file + "'."); - Vector line = f->get_csv_line(delimiter); - ERR_FAIL_COND_V(line.size() <= 1, ERR_PARSE_ERROR); - - Vector locales; - Vector> translations; - HashSet skipped_locales; - - for (int i = 1; i < line.size(); i++) { - String locale = TranslationServer::get_singleton()->standardize_locale(line[i]); - - if (line[i].left(1) == "_") { - skipped_locales.insert(i); - continue; - } else if (locale.is_empty()) { - skipped_locales.insert(i); - ERR_CONTINUE_MSG(true, vformat("Error importing CSV translation: Invalid locale format '%s', should be 'language_Script_COUNTRY_VARIANT@extra'. This column will be ignored.", line[i])); - } - - locales.push_back(locale); - Ref translation; - translation.instantiate(); - translation->set_locale(locale); - translations.push_back(translation); + String delimiter; + switch ((int)p_options["delimiter"]) { + case 1: { + delimiter = ";"; + } break; + case 2: { + delimiter = "\t"; + } break; + default: { + delimiter = ","; + } break; } - do { - line = f->get_csv_line(delimiter); - String key = line[0]; - if (!key.is_empty()) { - ERR_CONTINUE_MSG(line.size() != locales.size() + (int)skipped_locales.size() + 1, vformat("Error importing CSV translation: expected %d locale(s), but the '%s' key has %d locale(s).", locales.size(), key, line.size() - 1)); + // Parse the header row. + HashMap> column_to_translation; + int context_column = -1; + int plural_column = -1; + { + const Vector line = f->get_csv_line(delimiter); + for (int i = 1; i < line.size(); i++) { + if (line[i].left(1) == "_") { + continue; + } + if (line[i].to_lower() == "?context") { + ERR_CONTINUE_MSG(context_column != -1, "Error importing CSV translation: Multiple '?context' columns found. Only one is allowed. Subsequent ones will be ignored."); + context_column = i; + continue; + } + if (line[i].to_lower() == "?plural") { + ERR_CONTINUE_MSG(plural_column != -1, "Error importing CSV translation: Multiple '?plural' columns found. Only one is allowed. Subsequent ones will be ignored."); + plural_column = i; + continue; + } + + const String locale = TranslationServer::get_singleton()->standardize_locale(line[i]); + ERR_CONTINUE_MSG(locale.is_empty(), vformat("Error importing CSV translation: Invalid locale format '%s', should be 'language_Script_COUNTRY_VARIANT@extra'. This column will be ignored.", line[i])); + + Ref translation; + translation.instantiate(); + translation->set_locale(locale); + column_to_translation[i] = translation; + } + + ERR_FAIL_COND_V_MSG(column_to_translation.is_empty(), ERR_PARSE_ERROR, "Error importing CSV translation: The CSV file must have at least one column for key and one column for translation."); + } + + // Parse content rows. + bool context_used = false; + bool plural_used = false; + { + const bool unescape_keys = p_options.has("unescape_keys") ? bool(p_options["unescape_keys"]) : false; + const bool unescape_translations = p_options.has("unescape_translations") ? bool(p_options["unescape_translations"]) : true; + + bool reading_plural_rows = false; + String plural_msgid; + String plural_msgctxt; + HashMap> plural_msgstrs; + + do { + const Vector line = f->get_csv_line(delimiter); + + // Skip empty lines. + if (line.size() == 1 && line[0].is_empty()) { + continue; + } + + if (line[0].to_lower() == "?pluralrule") { + for (int i = 1; i < line.size(); i++) { + if (line[i].is_empty() || !column_to_translation.has(i)) { + continue; + } + Ref translation = column_to_translation[i]; + ERR_CONTINUE_MSG(!translation->get_plural_rules_override().is_empty(), vformat("Error importing CSV translation: Multiple '?pluralrule' definitions found for locale '%s'. Only one is allowed. Subsequent ones will be ignored.", translation->get_locale())); + translation->set_plural_rules_override(line[i]); + } + continue; + } + + const String msgid = unescape_keys ? line[0].c_unescape() : line[0]; + if (!reading_plural_rows && msgid.is_empty()) { + continue; + } + + // It's okay if you define context or plural columns but don't use them. + const String msgctxt = (context_column != -1 && context_column < line.size()) ? line[context_column] : String(); + if (!msgctxt.is_empty()) { + context_used = true; + } + const String msgid_plural = (plural_column != -1 && plural_column < line.size()) ? line[plural_column] : String(); + if (!msgid_plural.is_empty()) { + plural_used = true; + } + + // End of plural rows. + if (reading_plural_rows && (!msgid.is_empty() || !msgctxt.is_empty() || !msgid_plural.is_empty())) { + reading_plural_rows = false; + + for (KeyValue> E : column_to_translation) { + Ref translation = E.value; + const Vector &msgstrs = plural_msgstrs[E.key]; + if (!msgstrs.is_empty()) { + translation->add_plural_message(plural_msgid, msgstrs, plural_msgctxt); + } + } + plural_msgstrs.clear(); + } + + // Start of plural rows. + if (!reading_plural_rows && !msgid_plural.is_empty()) { + reading_plural_rows = true; + plural_msgid = msgid; + plural_msgctxt = msgctxt; + } - int write_index = 0; // Keep track of translations written in case some locales are skipped. for (int i = 1; i < line.size(); i++) { - if (skipped_locales.has(i)) { + if (!column_to_translation.has(i)) { continue; } - translations.write[write_index++]->add_message(key, line[i].c_unescape()); + const String msgstr = unescape_translations ? line[i].c_unescape() : line[i]; + if (msgstr.is_empty()) { + continue; + } + if (reading_plural_rows) { + plural_msgstrs[i].push_back(msgstr); + } else { + column_to_translation[i]->add_message(msgid, msgstr, msgctxt); + } + } + } while (!f->eof_reached()); + + if (reading_plural_rows) { + for (KeyValue> E : column_to_translation) { + Ref translation = E.value; + const Vector &msgstrs = plural_msgstrs[E.key]; + if (!msgstrs.is_empty()) { + translation->add_plural_message(plural_msgid, msgstrs, plural_msgctxt); + } } } - } while (!f->eof_reached()); + } - for (int i = 0; i < translations.size(); i++) { - Ref xlt = translations[i]; + bool compress; + switch ((int)p_options["compress"]) { + case 0: { // Disabled. + compress = false; + } break; + default: { // Auto. + compress = !context_used && !plural_used; + } break; + } + + for (KeyValue> E : column_to_translation) { + Ref xlt = E.value; if (compress) { Ref cxl = memnew(OptimizedTranslation); @@ -141,8 +238,8 @@ Error ResourceImporterCSVTranslation::import(ResourceUID::ID p_source_id, const xlt = cxl; } - String save_path = p_source_file.get_basename() + "." + translations[i]->get_locale() + ".translation"; - ResourceUID::ID save_id = hash64_murmur3_64(translations[i]->get_locale().hash64(), p_source_id) & 0x7FFFFFFFFFFFFFFF; + String save_path = p_source_file.get_basename() + "." + xlt->get_locale() + ".translation"; + ResourceUID::ID save_id = hash64_murmur3_64(xlt->get_locale().hash64(), p_source_id) & 0x7FFFFFFFFFFFFFFF; bool uid_already_exists = ResourceUID::get_singleton()->has_id(save_id); if (uid_already_exists) { // Avoid creating a new file with a duplicate UID.