diff --git a/core/io/file_access.compat.inc b/core/io/file_access.compat.inc index 97c7849e677..f48005da671 100644 --- a/core/io/file_access.compat.inc +++ b/core/io/file_access.compat.inc @@ -90,6 +90,14 @@ void FileAccess::store_pascal_string_bind_compat_78289(const String &p_string) { store_pascal_string(p_string); } +String FileAccess::get_as_text_bind_compat_110867(bool p_skip_cr) const { + String text = get_as_text(); + if (unlikely(p_skip_cr)) { + text = text.remove_char('\r'); + } + return text; +} + void FileAccess::_bind_compatibility_methods() { ClassDB::bind_compatibility_static_method("FileAccess", D_METHOD("open_encrypted", "path", "mode_flags", "key"), &FileAccess::_open_encrypted_bind_compat_98918); @@ -107,6 +115,7 @@ void FileAccess::_bind_compatibility_methods() { ClassDB::bind_compatibility_method(D_METHOD("store_string", "string"), &FileAccess::store_string_bind_compat_78289); ClassDB::bind_compatibility_method(D_METHOD("store_var", "value", "full_objects"), &FileAccess::store_var_bind_compat_78289, DEFVAL(false)); ClassDB::bind_compatibility_method(D_METHOD("store_pascal_string", "string"), &FileAccess::store_pascal_string_bind_compat_78289); + ClassDB::bind_compatibility_method(D_METHOD("get_as_text", "skip_cr"), &FileAccess::get_as_text_bind_compat_110867, DEFVAL(false)); } #endif diff --git a/core/io/file_access.cpp b/core/io/file_access.cpp index eb7f2a43ac3..cec768a4c91 100644 --- a/core/io/file_access.cpp +++ b/core/io/file_access.cpp @@ -467,10 +467,22 @@ String FileAccess::get_line() const { uint8_t c = get_8(); while (!eof_reached()) { - if (c == '\n' || c == '\0' || get_error() != OK) { + if (c == '\r' || c == '\n' || c == '\0' || get_error() != OK) { + if (c == '\r') { + // Check for Windows-style EOL. + const uint64_t prev_pos = get_position() - 1; + if (unlikely(get_8() != '\n')) { + // HACK: We can't simply check the next value in a vacuum, so we risk triggering + // an EOL false-positive in the unlikely event that this `\r` was the final + // value of the file. Unilaterally work around by re-reading the *previous* + // byte (the starting `\r`) to ensure `get_error()` returns `OK`. + const_cast(this)->seek(prev_pos); + get_8(); + } + } line.push_back(0); return String::utf8(line.get_data()); - } else if (c != '\r') { + } else { line.push_back(char(c)); } @@ -538,11 +550,11 @@ Vector FileAccess::get_csv_line(const String &p_delim) const { return strings; } -String FileAccess::get_as_text(bool p_skip_cr) const { +String FileAccess::get_as_text() const { uint64_t original_pos = get_position(); const_cast(this)->seek(0); - String text = get_as_utf8_string(p_skip_cr); + String text = get_as_utf8_string(); const_cast(this)->seek(original_pos); @@ -570,7 +582,7 @@ Vector FileAccess::get_buffer(int64_t p_length) const { return data; } -String FileAccess::get_as_utf8_string(bool p_skip_cr) const { +String FileAccess::get_as_utf8_string() const { Vector sourcef; uint64_t len = get_length(); sourcef.resize(len + 1); @@ -581,7 +593,7 @@ String FileAccess::get_as_utf8_string(bool p_skip_cr) const { w[len] = 0; String s; - s.append_utf8((const char *)w, len, p_skip_cr); + s.append_utf8((const char *)w, len); return s; } @@ -987,7 +999,7 @@ void FileAccess::_bind_methods() { ClassDB::bind_method(D_METHOD("get_buffer", "length"), (Vector (FileAccess::*)(int64_t) const) & FileAccess::get_buffer); ClassDB::bind_method(D_METHOD("get_line"), &FileAccess::get_line); ClassDB::bind_method(D_METHOD("get_csv_line", "delim"), &FileAccess::get_csv_line, DEFVAL(",")); - ClassDB::bind_method(D_METHOD("get_as_text", "skip_cr"), &FileAccess::get_as_text, DEFVAL(false)); + ClassDB::bind_method(D_METHOD("get_as_text"), &FileAccess::get_as_text); ClassDB::bind_static_method("FileAccess", D_METHOD("get_md5", "path"), &FileAccess::get_md5); ClassDB::bind_static_method("FileAccess", D_METHOD("get_sha256", "path"), &FileAccess::get_sha256); ClassDB::bind_method(D_METHOD("is_big_endian"), &FileAccess::is_big_endian); diff --git a/core/io/file_access.h b/core/io/file_access.h index ed306d6cc81..036dde9440c 100644 --- a/core/io/file_access.h +++ b/core/io/file_access.h @@ -132,6 +132,7 @@ protected: void store_line_bind_compat_78289(const String &p_line); void store_csv_line_bind_compat_78289(const Vector &p_values, const String &p_delim = ","); void store_pascal_string_bind_compat_78289(const String &p_string); + String get_as_text_bind_compat_110867(bool p_skip_cr) const; static void _bind_compatibility_methods(); #endif @@ -188,8 +189,8 @@ public: virtual String get_line() const; virtual String get_token() const; virtual Vector get_csv_line(const String &p_delim = ",") const; - String get_as_text(bool p_skip_cr = false) const; - virtual String get_as_utf8_string(bool p_skip_cr = false) const; + String get_as_text() const; + virtual String get_as_utf8_string() const; /** diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp index 0cb4b2c414a..4172f21e65a 100644 --- a/core/string/ustring.cpp +++ b/core/string/ustring.cpp @@ -1763,7 +1763,7 @@ Error String::append_ascii(const Span &p_range) { return decode_failed ? ERR_INVALID_DATA : OK; } -Error String::append_utf8(const char *p_utf8, int p_len, bool p_skip_cr) { +Error String::append_utf8(const char *p_utf8, int p_len) { if (!p_utf8) { return ERR_INVALID_DATA; } @@ -1796,11 +1796,6 @@ Error String::append_utf8(const char *p_utf8, int p_len, bool p_skip_cr) { while (ptrtmp < ptr_limit && *ptrtmp) { uint8_t c = *ptrtmp; - - if (p_skip_cr && c == '\r') { - ++ptrtmp; - continue; - } uint32_t unicode = _replacement_char; uint32_t size = 1; diff --git a/core/string/ustring.h b/core/string/ustring.h index 909aaa746b1..3b0869a7d00 100644 --- a/core/string/ustring.h +++ b/core/string/ustring.h @@ -525,9 +525,9 @@ public: } CharString utf8(Vector *r_ch_length_map = nullptr) const; - Error append_utf8(const char *p_utf8, int p_len = -1, bool p_skip_cr = false); - Error append_utf8(const Span &p_range, bool p_skip_cr = false) { - return append_utf8(p_range.ptr(), p_range.size(), p_skip_cr); + Error append_utf8(const char *p_utf8, int p_len = -1); + Error append_utf8(const Span &p_range) { + return append_utf8(p_range.ptr(), p_range.size()); } static String utf8(const char *p_utf8, int p_len = -1) { String ret; diff --git a/doc/classes/FileAccess.xml b/doc/classes/FileAccess.xml index 049d30bf547..74e6426cdff 100644 --- a/doc/classes/FileAccess.xml +++ b/doc/classes/FileAccess.xml @@ -133,10 +133,8 @@ - Returns the whole file as a [String]. Text is interpreted as being UTF-8 encoded. This ignores the file cursor and does not affect it. - If [param skip_cr] is [code]true[/code], carriage return characters ([code]\r[/code], CR) will be ignored when parsing the UTF-8, so that only line feed characters ([code]\n[/code], LF) represent a new line (Unix convention). diff --git a/misc/extension_api_validation/4.5-stable.expected b/misc/extension_api_validation/4.5-stable.expected index 7cafd1fa773..35f38aee43a 100644 --- a/misc/extension_api_validation/4.5-stable.expected +++ b/misc/extension_api_validation/4.5-stable.expected @@ -14,3 +14,10 @@ Validate extension JSON: JSON file: Field was added in a way that breaks compati Validate extension JSON: JSON file: Field was added in a way that breaks compatibility 'classes/Control/methods/has_focus': arguments Optional argument added. Compatibility methods registered. + + +GH-110867 +--------- +ERROR: Validate extension JSON: Missing field in current API 'classes/FileAccess/methods/get_as_text': arguments. This is a bug. + +Optional argument removed. Compatibility method registered. diff --git a/platform/android/file_access_filesystem_jandroid.cpp b/platform/android/file_access_filesystem_jandroid.cpp index d437c011e37..c8666f53240 100644 --- a/platform/android/file_access_filesystem_jandroid.cpp +++ b/platform/android/file_access_filesystem_jandroid.cpp @@ -200,10 +200,11 @@ String FileAccessFilesystemJAndroid::get_line() const { for (; bytes_read > 0; line_buffer_position++, bytes_read--) { uint8_t elem = line_buffer[line_buffer_position]; - if (elem == '\n' || elem == '\0') { + if (elem == '\r' || elem == '\n' || elem == '\0') { // Found the end of the line - const_cast(this)->seek(start_position + line_buffer_position + 1); - if (result.append_utf8((const char *)line_buffer.ptr(), line_buffer_position, true)) { + const bool is_crlf = elem == '\r' && line_buffer_position + 1 < current_buffer_size && line_buffer[line_buffer_position + 1] == '\n'; + const_cast(this)->seek(start_position + line_buffer_position + (is_crlf ? 2 : 1)); + if (result.append_utf8((const char *)line_buffer.ptr(), line_buffer_position)) { return String(); } return result; @@ -211,7 +212,7 @@ String FileAccessFilesystemJAndroid::get_line() const { } } - if (result.append_utf8((const char *)line_buffer.ptr(), line_buffer_position, true)) { + if (result.append_utf8((const char *)line_buffer.ptr(), line_buffer_position)) { return String(); } return result; diff --git a/tests/core/io/test_file_access.h b/tests/core/io/test_file_access.h index 2a69de24197..7c6a0ab3c4a 100644 --- a/tests/core/io/test_file_access.h +++ b/tests/core/io/test_file_access.h @@ -82,29 +82,57 @@ TEST_CASE("[FileAccess] CSV read") { } TEST_CASE("[FileAccess] Get as UTF-8 String") { - Ref f_lf = FileAccess::open(TestUtils::get_data_path("line_endings_lf.test.txt"), FileAccess::READ); - REQUIRE(f_lf.is_valid()); - String s_lf = f_lf->get_as_utf8_string(); - f_lf->seek(0); - String s_lf_nocr = f_lf->get_as_utf8_string(true); - CHECK(s_lf == "Hello darkness\nMy old friend\nI've come to talk\nWith you again\n"); - CHECK(s_lf_nocr == "Hello darkness\nMy old friend\nI've come to talk\nWith you again\n"); + SUBCASE("Newline == \\n (Unix)") { + Ref f_lf = FileAccess::open(TestUtils::get_data_path("line_endings_lf.test.txt"), FileAccess::READ); + REQUIRE(f_lf.is_valid()); + String s_lf = f_lf->get_as_utf8_string(); + CHECK(s_lf == "Hello darkness\nMy old friend\nI've come to talk\nWith you again\n"); + f_lf->seek(0); + CHECK(f_lf->get_line() == "Hello darkness"); + CHECK(f_lf->get_line() == "My old friend"); + CHECK(f_lf->get_line() == "I've come to talk"); + CHECK(f_lf->get_line() == "With you again"); + CHECK(f_lf->get_error() == Error::OK); + } - Ref f_crlf = FileAccess::open(TestUtils::get_data_path("line_endings_crlf.test.txt"), FileAccess::READ); - REQUIRE(f_crlf.is_valid()); - String s_crlf = f_crlf->get_as_utf8_string(); - f_crlf->seek(0); - String s_crlf_nocr = f_crlf->get_as_utf8_string(true); - CHECK(s_crlf == "Hello darkness\r\nMy old friend\r\nI've come to talk\r\nWith you again\r\n"); - CHECK(s_crlf_nocr == "Hello darkness\nMy old friend\nI've come to talk\nWith you again\n"); + SUBCASE("Newline == \\r\\n (Windows)") { + Ref f_crlf = FileAccess::open(TestUtils::get_data_path("line_endings_crlf.test.txt"), FileAccess::READ); + REQUIRE(f_crlf.is_valid()); + String s_crlf = f_crlf->get_as_utf8_string(); + CHECK(s_crlf == "Hello darkness\r\nMy old friend\r\nI've come to talk\r\nWith you again\r\n"); + f_crlf->seek(0); + CHECK(f_crlf->get_line() == "Hello darkness"); + CHECK(f_crlf->get_line() == "My old friend"); + CHECK(f_crlf->get_line() == "I've come to talk"); + CHECK(f_crlf->get_line() == "With you again"); + CHECK(f_crlf->get_error() == Error::OK); + } - Ref f_cr = FileAccess::open(TestUtils::get_data_path("line_endings_cr.test.txt"), FileAccess::READ); - REQUIRE(f_cr.is_valid()); - String s_cr = f_cr->get_as_utf8_string(); - f_cr->seek(0); - String s_cr_nocr = f_cr->get_as_utf8_string(true); - CHECK(s_cr == "Hello darkness\rMy old friend\rI've come to talk\rWith you again\r"); - CHECK(s_cr_nocr == "Hello darknessMy old friendI've come to talkWith you again"); + SUBCASE("Newline == \\r (Legacy macOS)") { + Ref f_cr = FileAccess::open(TestUtils::get_data_path("line_endings_cr.test.txt"), FileAccess::READ); + REQUIRE(f_cr.is_valid()); + String s_cr = f_cr->get_as_utf8_string(); + CHECK(s_cr == "Hello darkness\rMy old friend\rI've come to talk\rWith you again\r"); + f_cr->seek(0); + CHECK(f_cr->get_line() == "Hello darkness"); + CHECK(f_cr->get_line() == "My old friend"); + CHECK(f_cr->get_line() == "I've come to talk"); + CHECK(f_cr->get_line() == "With you again"); + CHECK(f_cr->get_error() == Error::OK); + } + + SUBCASE("Newline == Mixed") { + Ref f_mix = FileAccess::open(TestUtils::get_data_path("line_endings_mixed.test.txt"), FileAccess::READ); + REQUIRE(f_mix.is_valid()); + String s_mix = f_mix->get_as_utf8_string(); + CHECK(s_mix == "Hello darkness\nMy old friend\r\nI've come to talk\rWith you again"); + f_mix->seek(0); + CHECK(f_mix->get_line() == "Hello darkness"); + CHECK(f_mix->get_line() == "My old friend"); + CHECK(f_mix->get_line() == "I've come to talk"); + CHECK(f_mix->get_line() == "With you again"); + CHECK(f_mix->get_error() == Error::ERR_FILE_EOF); // Not a bug; the file lacks a final newline. + } } TEST_CASE("[FileAccess] Get/Store floating point values") { diff --git a/tests/core/string/test_string.h b/tests/core/string/test_string.h index 1cc507213fb..91217e86a6f 100644 --- a/tests/core/string/test_string.h +++ b/tests/core/string/test_string.h @@ -156,20 +156,6 @@ TEST_CASE("[String] UTF16 with BOM") { CHECK(String::utf16(cs) == s); } -TEST_CASE("[String] UTF8 with CR") { - const String base = U"Hello darkness\r\nMy old friend\nI've come to talk\rWith you again"; - - String keep_cr; - Error err = keep_cr.append_utf8(base.utf8().get_data()); - CHECK(err == OK); - CHECK(keep_cr == base); - - String no_cr; - err = no_cr.append_utf8(base.utf8().get_data(), -1, true); // Skip CR. - CHECK(err == OK); - CHECK(no_cr == base.replace("\r", "")); -} - TEST_CASE("[String] Invalid UTF8 (non shortest form sequence)") { ERR_PRINT_OFF // Examples from the unicode standard : 3.9 Unicode Encoding Forms - Table 3.8. diff --git a/tests/data/line_endings_mixed.test.txt b/tests/data/line_endings_mixed.test.txt new file mode 100644 index 00000000000..619b76f40bd --- /dev/null +++ b/tests/data/line_endings_mixed.test.txt @@ -0,0 +1,3 @@ +Hello darkness +My old friend +I've come to talk With you again \ No newline at end of file