Core ubsan fixes

This fixes UBSAN errors reported by running our testsuite, importing the TPS demo, and running the TPS demo. I have tried, wherever possible, to fix issues related to reported issues but not directly reported by UBSAN because thse code paths just happened to not have been exercised in these cases. These fixes apply only to errors reported, and caused by, core/ The following things have been changed: * Make sure there are no implicit sign changing casts in core. * Explicitly type enums that are part of a public API such that users of the API cannot pass in wrongly-sized values leading to potential stack corruption. * Ensure that memcpy is never called with invalid or null pointers as this is undefined behavior, and when the engine is built with optimizations turned on leads to memory corruption and hard to debug crashes. * Replace enum values only used as static values with constexpr static const values instead. This has no runtime overhead. This makes it so that the size of the enums is explicit. * Make sure that nan and inf is handled consistently in String. * Implement a _to_int template to ensure that all of the paths use the same algorhithm, and correct the negative integer case. * Changed the way the json serializer precision work, and added tests to verify the new behavior. The behavior doesn't quite match master in particulary for negative doubles as the original code tried to cast -inf to an int. This then led to negative doubles losing all but one of their decimal points when serializing. Behavior in GDScript remains unchanged.
2026-01-06 19:41:11 +00:00 · 2024-12-14 02:17:09 +01:00
parent b9437c3938
commit 240f510fa7
40 changed files with 373 additions and 248 deletions
--- a/core/string/ustring.cpp
+++ b/core/string/ustring.cpp
@@ -42,10 +42,6 @@
 #include "core/variant/variant.h"
 #include "core/version_generated.gen.h"

-#include <stdio.h>
-#include <stdlib.h>
-#include <cstdint>
-
 #ifdef _MSC_VER
 #define _CRT_SECURE_NO_WARNINGS // to disable build-time warning which suggested to use strcpy_s instead strcpy
 #endif
@@ -1804,6 +1800,10 @@ String String::num_uint64(uint64_t p_num, int base, bool capitalize_hex) {
 }

 String String::num_real(double p_num, bool p_trailing) {
+	if (Math::is_nan(p_num) || Math::is_inf(p_num)) {
+		return num(p_num, 0);
+	}
+
 	if (p_num == (double)(int64_t)p_num) {
 		if (p_trailing) {
 			return num_int64((int64_t)p_num) + ".0";
@@ -1811,6 +1811,7 @@ String String::num_real(double p_num, bool p_trailing) {
 			return num_int64((int64_t)p_num);
 		}
 	}
+
 	int decimals = 14;
 	// We want to align the digits to the above sane default, so we only need
 	// to subtract log10 for numbers with a positive power of ten magnitude.
@@ -1818,10 +1819,15 @@ String String::num_real(double p_num, bool p_trailing) {
 	if (abs_num > 10) {
 		decimals -= (int)floor(log10(abs_num));
 	}
+
 	return num(p_num, decimals);
 }

 String String::num_real(float p_num, bool p_trailing) {
+	if (Math::is_nan(p_num) || Math::is_inf(p_num)) {
+		return num(p_num, 0);
+	}
+
 	if (p_num == (float)(int64_t)p_num) {
 		if (p_trailing) {
 			return num_int64((int64_t)p_num) + ".0";
@@ -1840,16 +1846,8 @@ String String::num_real(float p_num, bool p_trailing) {
 }

 String String::num_scientific(double p_num) {
-	if (Math::is_nan(p_num)) {
-		return "nan";
-	}
-
-	if (Math::is_inf(p_num)) {
-		if (signbit(p_num)) {
-			return "-inf";
-		} else {
-			return "inf";
-		}
+	if (Math::is_nan(p_num) || Math::is_inf(p_num)) {
+		return num(p_num, 0);
 	}

 	char buf[256];
@@ -1947,7 +1945,7 @@ CharString String::ascii(bool p_allow_extended) const {
 	for (int i = 0; i < size(); i++) {
 		char32_t c = this_ptr[i];
 		if ((c <= 0x7f) || (c <= 0xff && p_allow_extended)) {
-			cs_ptrw[i] = c;
+			cs_ptrw[i] = char(c);
 		} else {
 			print_unicode_error(vformat("Invalid unicode codepoint (%x), cannot represent as ASCII/Latin-1", (uint32_t)c));
 			cs_ptrw[i] = 0x20; // ASCII doesn't have a replacement character like unicode, 0x1a is sometimes used but is kinda arcane.
@@ -2487,6 +2485,42 @@ int64_t String::bin_to_int() const {
 	return binary * sign;
 }

+template <typename C, typename T>
+_ALWAYS_INLINE_ int64_t _to_int(const T &p_in, int to) {
+	// Accumulate the total number in an unsigned integer as the range is:
+	// +9223372036854775807 to -9223372036854775808 and the smallest negative
+	// number does not fit inside an int64_t. So we accumulate the positive
+	// number in an unsigned, and then at the very end convert to its signed
+	// form.
+	uint64_t integer = 0;
+	uint8_t digits = 0;
+	bool positive = true;
+
+	for (int i = 0; i < to; i++) {
+		C c = p_in[i];
+		if (is_digit(c)) {
+			// No need to do expensive checks unless we're approaching INT64_MAX / INT64_MIN.
+			if (unlikely(digits > 18)) {
+				bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((positive && c > '7') || (!positive && c > '8')));
+				ERR_FAIL_COND_V_MSG(overflow, positive ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_in) + " as a 64-bit signed integer, since the value is " + (positive ? "too large." : "too small."));
+			}
+
+			integer *= 10;
+			integer += c - '0';
+			++digits;
+
+		} else if (integer == 0 && c == '-') {
+			positive = !positive;
+		}
+	}
+
+	if (positive) {
+		return int64_t(integer);
+	} else {
+		return int64_t(integer * uint64_t(-1));
+	}
+}
+
 int64_t String::to_int() const {
 	if (length() == 0) {
 		return 0;
@@ -2494,23 +2528,7 @@ int64_t String::to_int() const {

 	int to = (find_char('.') >= 0) ? find_char('.') : length();

-	int64_t integer = 0;
-	int64_t sign = 1;
-
-	for (int i = 0; i < to; i++) {
-		char32_t c = operator[](i);
-		if (is_digit(c)) {
-			bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8')));
-			ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + *this + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
-			integer *= 10;
-			integer += c - '0';
-
-		} else if (integer == 0 && c == '-') {
-			sign = -sign;
-		}
-	}
-
-	return integer * sign;
+	return _to_int<char32_t>(*this, to);
 }

 int64_t String::to_int(const char *p_str, int p_len) {
@@ -2523,25 +2541,7 @@ int64_t String::to_int(const char *p_str, int p_len) {
 		}
 	}

-	int64_t integer = 0;
-	int64_t sign = 1;
-
-	for (int i = 0; i < to; i++) {
-		char c = p_str[i];
-		if (is_digit(c)) {
-			bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8')));
-			ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
-			integer *= 10;
-			integer += c - '0';
-
-		} else if (c == '-' && integer == 0) {
-			sign = -sign;
-		} else if (c != ' ') {
-			break;
-		}
-	}
-
-	return integer * sign;
+	return _to_int<char>(p_str, to);
 }

 int64_t String::to_int(const wchar_t *p_str, int p_len) {
@@ -2554,25 +2554,7 @@ int64_t String::to_int(const wchar_t *p_str, int p_len) {
 		}
 	}

-	int64_t integer = 0;
-	int64_t sign = 1;
-
-	for (int i = 0; i < to; i++) {
-		wchar_t c = p_str[i];
-		if (is_digit(c)) {
-			bool overflow = (integer > INT64_MAX / 10) || (integer == INT64_MAX / 10 && ((sign == 1 && c > '7') || (sign == -1 && c > '8')));
-			ERR_FAIL_COND_V_MSG(overflow, sign == 1 ? INT64_MAX : INT64_MIN, "Cannot represent " + String(p_str).substr(0, to) + " as a 64-bit signed integer, since the value is " + (sign == 1 ? "too large." : "too small."));
-			integer *= 10;
-			integer += c - '0';
-
-		} else if (c == '-' && integer == 0) {
-			sign = -sign;
-		} else if (c != ' ') {
-			break;
-		}
-	}
-
-	return integer * sign;
+	return _to_int<wchar_t>(p_str, to);
 }

 bool String::is_numeric() const {
@@ -3969,7 +3951,7 @@ static String _replace_common(const String &p_this, const String &p_key, const S
 		return p_this;
 	}

-	const int key_length = p_key.length();
+	const size_t key_length = p_key.length();

 	int search_from = 0;
 	int result = 0;
@@ -3978,6 +3960,7 @@ static String _replace_common(const String &p_this, const String &p_key, const S

 	while ((result = (p_case_insensitive ? p_this.findn(p_key, search_from) : p_this.find(p_key, search_from))) >= 0) {
 		found.push_back(result);
+		ERR_FAIL_COND_V_MSG((result + key_length) > INT32_MAX, p_this, "Key length too long");
 		search_from = result + key_length;
 	}

@@ -3990,7 +3973,7 @@ static String _replace_common(const String &p_this, const String &p_key, const S
 	const int with_length = p_with.length();
 	const int old_length = p_this.length();

-	new_string.resize(old_length + found.size() * (with_length - key_length) + 1);
+	new_string.resize(old_length + int(found.size()) * (with_length - key_length) + 1);

 	char32_t *new_ptrw = new_string.ptrw();
 	const char32_t *old_ptr = p_this.ptr();
@@ -4021,7 +4004,7 @@ static String _replace_common(const String &p_this, const String &p_key, const S
 }

 static String _replace_common(const String &p_this, char const *p_key, char const *p_with, bool p_case_insensitive) {
-	int key_length = strlen(p_key);
+	size_t key_length = strlen(p_key);

 	if (key_length == 0 || p_this.is_empty()) {
 		return p_this;
@@ -4034,6 +4017,7 @@ static String _replace_common(const String &p_this, char const *p_key, char cons

 	while ((result = (p_case_insensitive ? p_this.findn(p_key, search_from) : p_this.find(p_key, search_from))) >= 0) {
 		found.push_back(result);
+		ERR_FAIL_COND_V_MSG((result + key_length) > INT32_MAX, p_this, "Key length too long");
 		search_from = result + key_length;
 	}

@@ -4048,7 +4032,7 @@ static String _replace_common(const String &p_this, char const *p_key, char cons
 	const int with_length = with_string.length();
 	const int old_length = p_this.length();

-	new_string.resize(old_length + found.size() * (with_length - key_length) + 1);
+	new_string.resize(old_length + int(found.size()) * (with_length - key_length) + 1);

 	char32_t *new_ptrw = new_string.ptrw();
 	const char32_t *old_ptr = p_this.ptr();
@@ -4639,8 +4623,9 @@ bool String::is_valid_string() const {
 String String::uri_encode() const {
 	const CharString temp = utf8();
 	String res;
+
 	for (int i = 0; i < temp.length(); ++i) {
-		uint8_t ord = temp[i];
+		uint8_t ord = uint8_t(temp[i]);
 		if (ord == '.' || ord == '-' || ord == '~' || is_ascii_identifier_char(ord)) {
 			res += ord;
 		} else {