1
0
mirror of https://github.com/godotengine/godot.git synced 2025-11-04 12:00:25 +00:00

Improve CSV translations

- Adds context and plural support
- Adds unescape import options
- Better document OptimizedTranslation

Co-Authored-By: Pāvels Nadtočajevs <7645683+bruvzg@users.noreply.github.com>
This commit is contained in:
Haoyu Qiu
2025-10-24 12:00:03 +08:00
parent 0fdb93cde6
commit c1ee8e52e4
5 changed files with 218 additions and 65 deletions

View File

@@ -314,10 +314,24 @@ StringName OptimizedTranslation::get_plural_message(const StringName &p_src_text
return get_message(p_src_text, p_context);
}
Vector<String> OptimizedTranslation::_get_message_list() const {
WARN_PRINT_ONCE("OptimizedTranslation does not store the message texts to be translated.");
return {};
}
void OptimizedTranslation::get_message_list(List<StringName> *r_messages) const {
WARN_PRINT_ONCE("OptimizedTranslation does not store the message texts to be translated.");
}
int OptimizedTranslation::get_message_count() const {
WARN_PRINT_ONCE("OptimizedTranslation does not store the message texts to be translated.");
return 0;
}
void OptimizedTranslation::_get_property_list(List<PropertyInfo> *p_list) const {
p_list->push_back(PropertyInfo(Variant::PACKED_INT32_ARRAY, "hash_table"));
p_list->push_back(PropertyInfo(Variant::PACKED_INT32_ARRAY, "bucket_table"));
p_list->push_back(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "strings"));
p_list->push_back(PropertyInfo(Variant::PACKED_INT32_ARRAY, "hash_table", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR));
p_list->push_back(PropertyInfo(Variant::PACKED_INT32_ARRAY, "bucket_table", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR));
p_list->push_back(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "strings", PROPERTY_HINT_NONE, "", PROPERTY_USAGE_NO_EDITOR));
p_list->push_back(PropertyInfo(Variant::OBJECT, "load_from", PROPERTY_HINT_RESOURCE_TYPE, "Translation", PROPERTY_USAGE_EDITOR));
}

View File

@@ -35,23 +35,35 @@
class OptimizedTranslation : public Translation {
GDCLASS(OptimizedTranslation, Translation);
//this translation uses a sort of modified perfect hash algorithm
//it requires hashing strings twice and then does a binary search,
//so it's slower, but at the same time it has an extremely high chance
//of catching untranslated strings
// This translation uses a sort of modified perfect hash algorithm
// it requires hashing strings twice and then does a binary search,
// so it's slower, but at the same time it has an extremely high chance
// of catching untranslated strings.
//load/store friendly types
// `hash_table[hash(0, text)]` produces a `bucket_table` index or 0xFFFFFFFF if not found.
Vector<int> hash_table;
// Continuous `Bucket`s in a flat layout.
Vector<int> bucket_table;
// Data for translated strings, UTF-8 encoded, either compressed or uncompressed.
Vector<uint8_t> strings;
struct Bucket {
// Number of `Elem` objects at `elem`.
int size;
// Use `hash(func, text)` to generate the unique `Elem::key` in this bucket.
uint32_t func;
struct Elem {
// Unique key for the text.
uint32_t key;
// Used to index into `strings`.
uint32_t str_offset;
// The string is not compressed if `comp_size` equals `uncomp_size`.
uint32_t comp_size;
uint32_t uncomp_size;
};
@@ -71,6 +83,8 @@ class OptimizedTranslation : public Translation {
return d;
}
virtual Vector<String> _get_message_list() const override;
protected:
bool _set(const StringName &p_name, const Variant &p_value);
bool _get(const StringName &p_name, Variant &r_ret) const;
@@ -83,5 +97,8 @@ public:
virtual Vector<String> get_translated_message_list() const override;
void generate(const Ref<Translation> &p_from);
virtual void get_message_list(List<StringName> *r_messages) const override;
virtual int get_message_count() const override;
OptimizedTranslation() {}
};

View File

@@ -1,10 +1,11 @@
<?xml version="1.0" encoding="UTF-8" ?>
<class name="OptimizedTranslation" inherits="Translation" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="../class.xsd">
<brief_description>
An optimized translation, used by default for CSV Translations.
An optimized translation.
</brief_description>
<description>
An optimized translation, used by default for CSV Translations. Uses real-time compressed translations, which results in very small dictionaries.
An optimized translation. Uses real-time compressed translations, which results in very small dictionaries.
This class does not store the untranslated strings for optimization purposes. Therefore, [method Translation.get_message_list] always returns an empty array, and [method Translation.get_message_count] always returns [code]0[/code].
</description>
<tutorials>
</tutorials>
@@ -14,6 +15,7 @@
<param index="0" name="from" type="Translation" />
<description>
Generates and sets an optimized translation from the given [Translation] resource.
[b]Note:[/b] Messages in [param from] should not use context or plural forms.
[b]Note:[/b] This method is intended to be used in the editor. It does nothing when called from an exported project.
</description>
</method>

View File

@@ -1,10 +1,11 @@
<?xml version="1.0" encoding="UTF-8" ?>
<class name="ResourceImporterCSVTranslation" inherits="ResourceImporter" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="../class.xsd">
<brief_description>
Imports comma-separated values
Imports comma-separated values as [Translation]s.
</brief_description>
<description>
Comma-separated values are a plain text table storage format. The format's simplicity makes it easy to edit in any text editor or spreadsheet software. This makes it a common choice for game localization.
In the CSV file used for translation, the first column contains string identifiers, and the first row serves as the header. The first column's header can be any value. The remaining headers indicate the locale for that column. Columns whose headers begin with an underscore ([code]_[/code]) will be ignored.
[b]Example CSV file:[/b]
[codeblock lang=text]
keys,en,es,ja
@@ -13,16 +14,38 @@
BYE,Goodbye,Adiós,さようなら
QUOTE,"""Hello"" said the man.","""Hola"" dijo el hombre.",「こんにちは」男は言いました
[/codeblock]
Although keys in the first column typically use uppercase string identifiers, it is not uncommon to directly use strings appearing in the game as keys. To avoid string ambiguity, you can use a special [code]?context[/code] column to specify the context to use with [method Object.tr].
[codeblock lang=text]
en,?context,fr,ja,zh
Letter,Alphabet,Lettre,字母,字母
Letter,Message,Courrier,手紙,信件
[/codeblock]
To set the plural form of a string to use with [method Object.tr_n], add a special [code]?plural[/code] column. After setting the plural form of the source string in this column, you can add additional rows to provide translations for more plural forms. The first column and all special columns in these plural form rows must be empty.
Godot includes built-in plural rules for some languages. You can also customize them using a special [code]?pluralrule[/code] row. See [url=https://www.gnu.org/software/gettext/manual/html_node/Plural-forms.html]GNU gettext[/url] for examples and more info.
[codeblock lang=text]
en,?plural,fr,ru,zh,_Comment
?pluralrule,,nplurals=2; plural=(n &gt;= 2);,,,Customize the plural rule for French
There is %d apple,There are %d apples,Il y a %d pomme,Есть %d яблоко,那里有%d个苹果,
,,Il y a %d pommes,Есть %d яблока,,
,,,Есть %d яблок,,
[/codeblock]
</description>
<tutorials>
<link title="Importing translations">$DOCS_URL/tutorials/assets_pipeline/importing_translations.html</link>
</tutorials>
<members>
<member name="compress" type="bool" setter="" getter="" default="true">
If [code]true[/code], creates an [OptimizedTranslation] instead of a [Translation]. This makes the resulting file smaller at the cost of a small CPU overhead.
<member name="compress" type="int" setter="" getter="" default="1">
- [b]Disabled[/b]: Creates a [Translation].
- [b]Auto[/b]: Creates an [OptimizedTranslation] when possible. This makes the resulting file smaller at the cost of a small CPU overhead. Falls back to [Translation] for translations with context or plural forms.
</member>
<member name="delimiter" type="int" setter="" getter="" default="0">
The delimiter to use in the CSV file. The default value matches the common CSV convention. Tab-separated values are sometimes called TSV files.
</member>
<member name="unescape_keys" type="bool" setter="" getter="" default="false">
If [code]true[/code], message keys in the CSV file are unescaped using [method String.c_unescape] during the import process.
</member>
<member name="unescape_translations" type="bool" setter="" getter="" default="true">
If [code]true[/code], message translations in the CSV file are unescaped using [method String.c_unescape] during the import process.
</member>
</members>
</class>

View File

@@ -68,72 +68,169 @@ String ResourceImporterCSVTranslation::get_preset_name(int p_idx) const {
}
void ResourceImporterCSVTranslation::get_import_options(const String &p_path, List<ImportOption> *r_options, int p_preset) const {
r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "compress"), true));
r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "compress", PROPERTY_HINT_ENUM, "Disabled,Auto"), 1)); // Enum for compatibility with previous versions.
r_options->push_back(ImportOption(PropertyInfo(Variant::INT, "delimiter", PROPERTY_HINT_ENUM, "Comma,Semicolon,Tab"), 0));
r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "unescape_keys"), false));
r_options->push_back(ImportOption(PropertyInfo(Variant::BOOL, "unescape_translations"), true));
}
Error ResourceImporterCSVTranslation::import(ResourceUID::ID p_source_id, const String &p_source_file, const String &p_save_path, const HashMap<StringName, Variant> &p_options, List<String> *r_platform_variants, List<String> *r_gen_files, Variant *r_metadata) {
bool compress = p_options["compress"];
String delimiter;
switch ((int)p_options["delimiter"]) {
case 0:
delimiter = ",";
break;
case 1:
delimiter = ";";
break;
case 2:
delimiter = "\t";
break;
}
Ref<FileAccess> f = FileAccess::open(p_source_file, FileAccess::READ);
ERR_FAIL_COND_V_MSG(f.is_null(), ERR_INVALID_PARAMETER, "Cannot open file from path '" + p_source_file + "'.");
Vector<String> line = f->get_csv_line(delimiter);
ERR_FAIL_COND_V(line.size() <= 1, ERR_PARSE_ERROR);
Vector<String> locales;
Vector<Ref<Translation>> translations;
HashSet<int> skipped_locales;
for (int i = 1; i < line.size(); i++) {
String locale = TranslationServer::get_singleton()->standardize_locale(line[i]);
if (line[i].left(1) == "_") {
skipped_locales.insert(i);
continue;
} else if (locale.is_empty()) {
skipped_locales.insert(i);
ERR_CONTINUE_MSG(true, vformat("Error importing CSV translation: Invalid locale format '%s', should be 'language_Script_COUNTRY_VARIANT@extra'. This column will be ignored.", line[i]));
}
locales.push_back(locale);
Ref<Translation> translation;
translation.instantiate();
translation->set_locale(locale);
translations.push_back(translation);
String delimiter;
switch ((int)p_options["delimiter"]) {
case 1: {
delimiter = ";";
} break;
case 2: {
delimiter = "\t";
} break;
default: {
delimiter = ",";
} break;
}
do {
line = f->get_csv_line(delimiter);
String key = line[0];
if (!key.is_empty()) {
ERR_CONTINUE_MSG(line.size() != locales.size() + (int)skipped_locales.size() + 1, vformat("Error importing CSV translation: expected %d locale(s), but the '%s' key has %d locale(s).", locales.size(), key, line.size() - 1));
// Parse the header row.
HashMap<int, Ref<Translation>> column_to_translation;
int context_column = -1;
int plural_column = -1;
{
const Vector<String> line = f->get_csv_line(delimiter);
for (int i = 1; i < line.size(); i++) {
if (line[i].left(1) == "_") {
continue;
}
if (line[i].to_lower() == "?context") {
ERR_CONTINUE_MSG(context_column != -1, "Error importing CSV translation: Multiple '?context' columns found. Only one is allowed. Subsequent ones will be ignored.");
context_column = i;
continue;
}
if (line[i].to_lower() == "?plural") {
ERR_CONTINUE_MSG(plural_column != -1, "Error importing CSV translation: Multiple '?plural' columns found. Only one is allowed. Subsequent ones will be ignored.");
plural_column = i;
continue;
}
const String locale = TranslationServer::get_singleton()->standardize_locale(line[i]);
ERR_CONTINUE_MSG(locale.is_empty(), vformat("Error importing CSV translation: Invalid locale format '%s', should be 'language_Script_COUNTRY_VARIANT@extra'. This column will be ignored.", line[i]));
Ref<Translation> translation;
translation.instantiate();
translation->set_locale(locale);
column_to_translation[i] = translation;
}
ERR_FAIL_COND_V_MSG(column_to_translation.is_empty(), ERR_PARSE_ERROR, "Error importing CSV translation: The CSV file must have at least one column for key and one column for translation.");
}
// Parse content rows.
bool context_used = false;
bool plural_used = false;
{
const bool unescape_keys = p_options.has("unescape_keys") ? bool(p_options["unescape_keys"]) : false;
const bool unescape_translations = p_options.has("unescape_translations") ? bool(p_options["unescape_translations"]) : true;
bool reading_plural_rows = false;
String plural_msgid;
String plural_msgctxt;
HashMap<int, Vector<String>> plural_msgstrs;
do {
const Vector<String> line = f->get_csv_line(delimiter);
// Skip empty lines.
if (line.size() == 1 && line[0].is_empty()) {
continue;
}
if (line[0].to_lower() == "?pluralrule") {
for (int i = 1; i < line.size(); i++) {
if (line[i].is_empty() || !column_to_translation.has(i)) {
continue;
}
Ref<Translation> translation = column_to_translation[i];
ERR_CONTINUE_MSG(!translation->get_plural_rules_override().is_empty(), vformat("Error importing CSV translation: Multiple '?pluralrule' definitions found for locale '%s'. Only one is allowed. Subsequent ones will be ignored.", translation->get_locale()));
translation->set_plural_rules_override(line[i]);
}
continue;
}
const String msgid = unescape_keys ? line[0].c_unescape() : line[0];
if (!reading_plural_rows && msgid.is_empty()) {
continue;
}
// It's okay if you define context or plural columns but don't use them.
const String msgctxt = (context_column != -1 && context_column < line.size()) ? line[context_column] : String();
if (!msgctxt.is_empty()) {
context_used = true;
}
const String msgid_plural = (plural_column != -1 && plural_column < line.size()) ? line[plural_column] : String();
if (!msgid_plural.is_empty()) {
plural_used = true;
}
// End of plural rows.
if (reading_plural_rows && (!msgid.is_empty() || !msgctxt.is_empty() || !msgid_plural.is_empty())) {
reading_plural_rows = false;
for (KeyValue<int, Ref<Translation>> E : column_to_translation) {
Ref<Translation> translation = E.value;
const Vector<String> &msgstrs = plural_msgstrs[E.key];
if (!msgstrs.is_empty()) {
translation->add_plural_message(plural_msgid, msgstrs, plural_msgctxt);
}
}
plural_msgstrs.clear();
}
// Start of plural rows.
if (!reading_plural_rows && !msgid_plural.is_empty()) {
reading_plural_rows = true;
plural_msgid = msgid;
plural_msgctxt = msgctxt;
}
int write_index = 0; // Keep track of translations written in case some locales are skipped.
for (int i = 1; i < line.size(); i++) {
if (skipped_locales.has(i)) {
if (!column_to_translation.has(i)) {
continue;
}
translations.write[write_index++]->add_message(key, line[i].c_unescape());
const String msgstr = unescape_translations ? line[i].c_unescape() : line[i];
if (msgstr.is_empty()) {
continue;
}
if (reading_plural_rows) {
plural_msgstrs[i].push_back(msgstr);
} else {
column_to_translation[i]->add_message(msgid, msgstr, msgctxt);
}
}
} while (!f->eof_reached());
if (reading_plural_rows) {
for (KeyValue<int, Ref<Translation>> E : column_to_translation) {
Ref<Translation> translation = E.value;
const Vector<String> &msgstrs = plural_msgstrs[E.key];
if (!msgstrs.is_empty()) {
translation->add_plural_message(plural_msgid, msgstrs, plural_msgctxt);
}
}
}
} while (!f->eof_reached());
}
for (int i = 0; i < translations.size(); i++) {
Ref<Translation> xlt = translations[i];
bool compress;
switch ((int)p_options["compress"]) {
case 0: { // Disabled.
compress = false;
} break;
default: { // Auto.
compress = !context_used && !plural_used;
} break;
}
for (KeyValue<int, Ref<Translation>> E : column_to_translation) {
Ref<Translation> xlt = E.value;
if (compress) {
Ref<OptimizedTranslation> cxl = memnew(OptimizedTranslation);
@@ -141,8 +238,8 @@ Error ResourceImporterCSVTranslation::import(ResourceUID::ID p_source_id, const
xlt = cxl;
}
String save_path = p_source_file.get_basename() + "." + translations[i]->get_locale() + ".translation";
ResourceUID::ID save_id = hash64_murmur3_64(translations[i]->get_locale().hash64(), p_source_id) & 0x7FFFFFFFFFFFFFFF;
String save_path = p_source_file.get_basename() + "." + xlt->get_locale() + ".translation";
ResourceUID::ID save_id = hash64_murmur3_64(xlt->get_locale().hash64(), p_source_id) & 0x7FFFFFFFFFFFFFFF;
bool uid_already_exists = ResourceUID::get_singleton()->has_id(save_id);
if (uid_already_exists) {
// Avoid creating a new file with a duplicate UID.