From 43a8009a56e98b88a521af04f58d095564d3d3d2 Mon Sep 17 00:00:00 2001 From: Lukas Tenbrink Date: Mon, 22 Sep 2025 14:46:55 +0200 Subject: [PATCH] Clean up String::find to remove duplicate code, and speed up comparison with `memcmp` where possible. --- core/string/ustring.cpp | 201 +++++++------------------------- core/templates/span.h | 17 ++- tests/core/string/test_string.h | 11 ++ 3 files changed, 62 insertions(+), 167 deletions(-) diff --git a/core/string/ustring.cpp b/core/string/ustring.cpp index 18bc9372a4b..95cf149ca68 100644 --- a/core/string/ustring.cpp +++ b/core/string/ustring.cpp @@ -63,6 +63,17 @@ static _FORCE_INLINE_ char32_t lower_case(char32_t c) { return (is_ascii_upper_case(c) ? (c + ('a' - 'A')) : c); } +// Case-insensitive version of are_spans_equal +template +static bool strings_equal_lower(const T1 *p_lhs_begin, const T2 *p_rhs_begin, size_t p_len) { + for (size_t i = 0; i < p_len; ++i) { + if (_find_lower(p_lhs_begin[i]) != _find_lower(p_rhs_begin[i])) { + return false; + } + } + return true; +} + Error String::parse_url(String &r_scheme, String &r_host, int &r_port, String &r_path, String &r_fragment) const { // Splits the URL into scheme, host, port, path, fragment. Strip credentials when present. String base = *this; @@ -3043,41 +3054,7 @@ int String::find(const char *p_str, int p_from) const { return find_char(*p_str, p_from); // Optimize with single-char find. } - const char32_t *src = get_data(); - - if (str_len == 1) { - const char32_t needle = p_str[0]; - - for (int i = p_from; i < len; i++) { - if (src[i] == needle) { - return i; - } - } - - } else { - for (int i = p_from; i <= (len - str_len); i++) { - bool found = true; - for (int j = 0; j < str_len; j++) { - int read_pos = i + j; - - if (read_pos >= len) { - ERR_PRINT("read_pos>=length()"); - return -1; - } - - if (src[read_pos] != (char32_t)p_str[j]) { - found = false; - break; - } - } - - if (found) { - return i; - } - } - } - - return -1; + return span().find_sequence(Span((const unsigned char *)p_str, str_len), p_from); } int String::find_char(char32_t p_char, int p_from) const { @@ -3110,35 +3087,20 @@ int String::findmk(const Vector &p_keys, int p_from, int *r_key) const { const char32_t *src = get_data(); for (int i = p_from; i < len; i++) { - bool found = true; for (int k = 0; k < key_count; k++) { - found = true; - if (r_key) { - *r_key = k; + const int str_len = keys[k].length(); + + if (i + str_len > len) { + continue; // Can't find this key here. } - const char32_t *cmp = keys[k].get_data(); - int l = keys[k].length(); - for (int j = 0; j < l; j++) { - int read_pos = i + j; - - if (read_pos >= len) { - found = false; - break; - } - - if (src[read_pos] != cmp[j]) { - found = false; - break; + const char32_t *str = keys[k].get_data(); + if (are_spans_equal(src + i, str, str_len)) { + if (r_key) { + *r_key = k; } + return i; } - if (found) { - break; - } - } - - if (found) { - return i; } } @@ -3156,28 +3118,11 @@ int String::findn(const String &p_str, int p_from) const { return -1; // Still out of bounds } - const char32_t *srcd = get_data(); + const char32_t *src = get_data(); + const char32_t *str = p_str.get_data(); for (int i = p_from; i <= (len - str_len); i++) { - bool found = true; - for (int j = 0; j < str_len; j++) { - int read_pos = i + j; - - if (read_pos >= len) { - ERR_PRINT("read_pos>=length()"); - return -1; - } - - char32_t src = _find_lower(srcd[read_pos]); - char32_t dst = _find_lower(p_str[j]); - - if (src != dst) { - found = false; - break; - } - } - - if (found) { + if (strings_equal_lower(src + i, str, str_len)) { return i; } } @@ -3196,28 +3141,10 @@ int String::findn(const char *p_str, int p_from) const { return -1; // Still out of bounds } - const char32_t *srcd = get_data(); + const char32_t *src = get_data(); for (int i = p_from; i <= (len - str_len); i++) { - bool found = true; - for (int j = 0; j < str_len; j++) { - int read_pos = i + j; - - if (read_pos >= len) { - ERR_PRINT("read_pos>=length()"); - return -1; - } - - char32_t src = _find_lower(srcd[read_pos]); - char32_t dst = _find_lower(p_str[j]); - - if (src != dst) { - found = false; - break; - } - } - - if (found) { + if (strings_equal_lower(src + i, p_str, str_len)) { return i; } } @@ -3255,31 +3182,12 @@ int String::rfind(const char *p_str, int p_from) const { return -1; // Still out of bounds } - const char32_t *source = get_data(); - - for (int i = p_from; i >= 0; i--) { - bool found = true; - for (int j = 0; j < str_len; j++) { - int read_pos = i + j; - - if (read_pos >= length()) { - ERR_PRINT("read_pos>=length()"); - return -1; - } - - const char32_t key_needle = p_str[j]; - if (source[read_pos] != key_needle) { - found = false; - break; - } - } - - if (found) { - return i; - } + if (str_len == 1) { + // Optimize with single-char implementation. + return span().rfind(p_str[0], p_from); } - return -1; + return span().rfind_sequence(Span((const unsigned char *)p_str, str_len), p_from); } int String::rfind_char(char32_t p_char, int p_from) const { @@ -3303,28 +3211,16 @@ int String::rfindn(const String &p_str, int p_from) const { return -1; // Still out of bounds } + if (str_len == 1) { + // Optimize with single-char implementation. + return span().rfind(p_str[0], p_from); + } + const char32_t *src = get_data(); + const char32_t *str = p_str.get_data(); for (int i = p_from; i >= 0; i--) { - bool found = true; - for (int j = 0; j < str_len; j++) { - int read_pos = i + j; - - if (read_pos >= len) { - ERR_PRINT("read_pos>=length()"); - return -1; - } - - char32_t srcc = _find_lower(src[read_pos]); - char32_t dstc = _find_lower(p_str[j]); - - if (srcc != dstc) { - found = false; - break; - } - } - - if (found) { + if (strings_equal_lower(src + i, str, str_len)) { return i; } } @@ -3343,29 +3239,10 @@ int String::rfindn(const char *p_str, int p_from) const { return -1; // Still out of bounds } - const char32_t *source = get_data(); + const char32_t *src = get_data(); for (int i = p_from; i >= 0; i--) { - bool found = true; - for (int j = 0; j < str_len; j++) { - int read_pos = i + j; - - if (read_pos >= len) { - ERR_PRINT("read_pos>=length()"); - return -1; - } - - const char32_t key_needle = p_str[j]; - int srcc = _find_lower(source[read_pos]); - int keyc = _find_lower(key_needle); - - if (srcc != keyc) { - found = false; - break; - } - } - - if (found) { + if (strings_equal_lower(src + i, p_str, str_len)) { return i; } } diff --git a/core/templates/span.h b/core/templates/span.h index db66527a86e..f61eb74a703 100644 --- a/core/templates/span.h +++ b/core/templates/span.h @@ -116,11 +116,14 @@ public: // Algorithms. constexpr int64_t find(const T &p_val, uint64_t p_from = 0) const; - constexpr int64_t find_sequence(const Span &p_span, uint64_t p_from = 0) const; + template + constexpr int64_t find_sequence(const Span &p_span, uint64_t p_from = 0) const; constexpr int64_t rfind(const T &p_val, uint64_t p_from) const; _FORCE_INLINE_ constexpr int64_t rfind(const T &p_val) const { return rfind(p_val, size() - 1); } - constexpr int64_t rfind_sequence(const Span &p_span, uint64_t p_from) const; - _FORCE_INLINE_ constexpr int64_t rfind_sequence(const Span &p_span) const { return rfind_sequence(p_span, size() - p_span.size()); } + template + constexpr int64_t rfind_sequence(const Span &p_span, uint64_t p_from) const; + template + _FORCE_INLINE_ constexpr int64_t rfind_sequence(const Span &p_span) const { return rfind_sequence(p_span, size() - p_span.size()); } constexpr uint64_t count(const T &p_val) const; /// Find the index of the given value using binary search. /// Note: Assumes that elements in the span are sorted. Otherwise, use find() instead. @@ -142,7 +145,8 @@ constexpr int64_t Span::find(const T &p_val, uint64_t p_from) const { } template -constexpr int64_t Span::find_sequence(const Span &p_span, uint64_t p_from) const { +template +constexpr int64_t Span::find_sequence(const Span &p_span, uint64_t p_from) const { for (uint64_t i = p_from; i <= size() - p_span.size(); i++) { if (are_spans_equal(ptr() + i, p_span.ptr(), p_span.size())) { return i; @@ -154,6 +158,7 @@ constexpr int64_t Span::find_sequence(const Span &p_span, uint64_t p_from) template constexpr int64_t Span::rfind(const T &p_val, uint64_t p_from) const { + DEV_ASSERT(p_from < size()); for (int64_t i = p_from; i >= 0; i--) { if (ptr()[i] == p_val) { return i; @@ -163,7 +168,9 @@ constexpr int64_t Span::rfind(const T &p_val, uint64_t p_from) const { } template -constexpr int64_t Span::rfind_sequence(const Span &p_span, uint64_t p_from) const { +template +constexpr int64_t Span::rfind_sequence(const Span &p_span, uint64_t p_from) const { + DEV_ASSERT(p_from + p_span.size() <= size()); for (int64_t i = p_from; i >= 0; i--) { if (are_spans_equal(ptr() + i, p_span.ptr(), p_span.size())) { return i; diff --git a/tests/core/string/test_string.h b/tests/core/string/test_string.h index 1d91239ad4b..68183f52be7 100644 --- a/tests/core/string/test_string.h +++ b/tests/core/string/test_string.h @@ -396,6 +396,8 @@ TEST_CASE("[String] Find") { MULTICHECK_STRING_EQ(s, find, "tty", 3); MULTICHECK_STRING_EQ(s, find, "Revenge of the Monster Truck", -1); MULTICHECK_STRING_INT_EQ(s, find, "Wo", 9, 13); + MULTICHECK_STRING_INT_EQ(s, find, "Wo", 1000, -1); + MULTICHECK_STRING_INT_EQ(s, find, "Wo", -1, -1); MULTICHECK_STRING_EQ(s, find, "", -1); MULTICHECK_STRING_EQ(s, find, "Pretty Woman Woman", 0); MULTICHECK_STRING_EQ(s, find, "WOMAN", -1); @@ -407,6 +409,8 @@ TEST_CASE("[String] Find") { MULTICHECK_STRING_EQ(s, rfind, "man", 15); MULTICHECK_STRING_EQ(s, rfind, "WOMAN", -1); MULTICHECK_STRING_INT_EQ(s, rfind, "", 15, -1); + MULTICHECK_STRING_INT_EQ(s, rfind, "Wo", 1000, -1); + MULTICHECK_STRING_INT_EQ(s, rfind, "Wo", -1, 13); } TEST_CASE("[String] Find character") { @@ -426,6 +430,8 @@ TEST_CASE("[String] Find case insensitive") { String s = "Pretty Whale Whale"; MULTICHECK_STRING_EQ(s, findn, "WHA", 7); MULTICHECK_STRING_INT_EQ(s, findn, "WHA", 9, 13); + MULTICHECK_STRING_INT_EQ(s, findn, "WHA", 1000, -1); + MULTICHECK_STRING_INT_EQ(s, findn, "WHA", -1, -1); MULTICHECK_STRING_EQ(s, findn, "Revenge of the Monster SawFish", -1); MULTICHECK_STRING_EQ(s, findn, "", -1); MULTICHECK_STRING_EQ(s, findn, "wha", 7); @@ -437,6 +443,8 @@ TEST_CASE("[String] Find case insensitive") { MULTICHECK_STRING_EQ(s, rfindn, "wha", 13); MULTICHECK_STRING_EQ(s, rfindn, "Wha", 13); MULTICHECK_STRING_INT_EQ(s, rfindn, "", 13, -1); + MULTICHECK_STRING_INT_EQ(s, rfindn, "WHA", 1000, -1); + MULTICHECK_STRING_INT_EQ(s, rfindn, "WHA", -1, 13); } TEST_CASE("[String] Find MK") { @@ -453,6 +461,9 @@ TEST_CASE("[String] Find MK") { CHECK(s.findmk(keys, 5, &key) == 9); CHECK(key == 2); + + CHECK(s.findmk(keys, -1, &key) == -1); + CHECK(s.findmk(keys, 1000, &key) == -1); } TEST_CASE("[String] Find and replace") {