1
0
mirror of https://github.com/godotengine/godot.git synced 2026-01-06 19:41:11 +00:00

ICU: Update to version 69.1, improve ICU data export process.

This commit is contained in:
bruvzg
2021-04-22 15:08:59 +03:00
parent 77a876c6e1
commit b56241f22f
88 changed files with 1417 additions and 1049 deletions

View File

@@ -30,24 +30,6 @@
#include "bmpset.h"
#include "unisetspan.h"
// Define UChar constants using hex for EBCDIC compatibility
// Used #define to reduce private static exports and memory access time.
#define SET_OPEN ((UChar)0x005B) /*[*/
#define SET_CLOSE ((UChar)0x005D) /*]*/
#define HYPHEN ((UChar)0x002D) /*-*/
#define COMPLEMENT ((UChar)0x005E) /*^*/
#define COLON ((UChar)0x003A) /*:*/
#define BACKSLASH ((UChar)0x005C) /*\*/
#define INTERSECTION ((UChar)0x0026) /*&*/
#define UPPER_U ((UChar)0x0055) /*U*/
#define LOWER_U ((UChar)0x0075) /*u*/
#define OPEN_BRACE ((UChar)123) /*{*/
#define CLOSE_BRACE ((UChar)125) /*}*/
#define UPPER_P ((UChar)0x0050) /*P*/
#define LOWER_P ((UChar)0x0070) /*p*/
#define UPPER_N ((UChar)78) /*N*/
#define EQUALS ((UChar)0x003D) /*=*/
// HIGH_VALUE > all valid values. 110000 for codepoints
#define UNICODESET_HIGH 0x0110000
@@ -444,7 +426,6 @@ UBool UnicodeSet::contains(UChar32 start, UChar32 end) const {
* @return <tt>true</tt> if this set contains the specified string
*/
UBool UnicodeSet::contains(const UnicodeString& s) const {
if (s.length() == 0) return FALSE;
int32_t cp = getSingleCP(s);
if (cp < 0) {
return stringsContains(s);
@@ -559,11 +540,9 @@ UBool UnicodeSet::matchesIndexValue(uint8_t v) const {
if (hasStrings()) {
for (i=0; i<strings->size(); ++i) {
const UnicodeString& s = *(const UnicodeString*)strings->elementAt(i);
//if (s.length() == 0) {
// // Empty strings match everything
// return TRUE;
//}
// assert(s.length() != 0); // We enforce this elsewhere
if (s.isEmpty()) {
continue; // skip the empty string
}
UChar32 c = s.char32At(0);
if ((c & 0xFF) == v) {
return TRUE;
@@ -582,9 +561,6 @@ UMatchDegree UnicodeSet::matches(const Replaceable& text,
int32_t limit,
UBool incremental) {
if (offset == limit) {
// Strings, if any, have length != 0, so we don't worry
// about them here. If we ever allow zero-length strings
// we much check for them here.
if (contains(U_ETHER)) {
return incremental ? U_PARTIAL_MATCH : U_MATCH;
} else {
@@ -614,11 +590,9 @@ UMatchDegree UnicodeSet::matches(const Replaceable& text,
for (i=0; i<strings->size(); ++i) {
const UnicodeString& trial = *(const UnicodeString*)strings->elementAt(i);
//if (trial.length() == 0) {
// return U_MATCH; // null-string always matches
//}
// assert(trial.length() != 0); // We ensure this elsewhere
if (trial.isEmpty()) {
continue; // skip the empty string
}
UChar c = trial.charAt(forward ? 0 : trial.length() - 1);
@@ -971,12 +945,12 @@ UnicodeSet& UnicodeSet::add(UChar32 c) {
* present. If this set already contains the multicharacter,
* the call leaves this set unchanged.
* Thus "ch" => {"ch"}
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
*
* @param s the source string
* @return the modified set, for chaining
*/
UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
if (s.length() == 0 || isFrozen() || isBogus()) return *this;
if (isFrozen() || isBogus()) return *this;
int32_t cp = getSingleCP(s);
if (cp < 0) {
if (!stringsContains(s)) {
@@ -991,8 +965,7 @@ UnicodeSet& UnicodeSet::add(const UnicodeString& s) {
/**
* Adds the given string, in order, to 'strings'. The given string
* must have been checked by the caller to not be empty and to not
* already be in 'strings'.
* must have been checked by the caller to not already be in 'strings'.
*/
void UnicodeSet::_add(const UnicodeString& s) {
if (isFrozen() || isBogus()) {
@@ -1021,16 +994,13 @@ void UnicodeSet::_add(const UnicodeString& s) {
* @param string to test
*/
int32_t UnicodeSet::getSingleCP(const UnicodeString& s) {
//if (s.length() < 1) {
// throw new IllegalArgumentException("Can't use zero-length strings in UnicodeSet");
//}
if (s.length() > 2) return -1;
if (s.length() == 1) return s.charAt(0);
// at this point, len = 2
UChar32 cp = s.char32At(0);
if (cp > 0xFFFF) { // is surrogate pair
return cp;
int32_t sLength = s.length();
if (sLength == 1) return s.charAt(0);
if (sLength == 2) {
UChar32 cp = s.char32At(0);
if (cp > 0xFFFF) { // is surrogate pair
return cp;
}
}
return -1;
}
@@ -1150,6 +1120,26 @@ UnicodeSet& UnicodeSet::retain(UChar32 c) {
return retain(c, c);
}
UnicodeSet& UnicodeSet::retain(const UnicodeString &s) {
if (isFrozen() || isBogus()) { return *this; }
UChar32 cp = getSingleCP(s);
if (cp < 0) {
bool isIn = stringsContains(s);
// Check for getRangeCount() first to avoid somewhat-expensive size()
// when there are single code points.
if (isIn && getRangeCount() == 0 && size() == 1) {
return *this;
}
clear();
if (isIn) {
_add(s);
}
} else {
retain(cp, cp);
}
return *this;
}
/**
* Removes the specified range from this set if it is present.
* The set will not contain the specified range once the call
@@ -1186,7 +1176,7 @@ UnicodeSet& UnicodeSet::remove(UChar32 c) {
* @return the modified set, for chaining
*/
UnicodeSet& UnicodeSet::remove(const UnicodeString& s) {
if (s.length() == 0 || isFrozen() || isBogus()) return *this;
if (isFrozen() || isBogus()) return *this;
int32_t cp = getSingleCP(s);
if (cp < 0) {
if (strings != nullptr && strings->removeElement((void*) &s)) {
@@ -1252,12 +1242,12 @@ UnicodeSet& UnicodeSet::complement(void) {
* Complement the specified string in this set.
* The set will not contain the specified string once the call
* returns.
* <br><b>Warning: you cannot add an empty string ("") to a UnicodeSet.</b>
*
* @param s the string to complement
* @return this object, for chaining
*/
UnicodeSet& UnicodeSet::complement(const UnicodeString& s) {
if (s.length() == 0 || isFrozen() || isBogus()) return *this;
if (isFrozen() || isBogus()) return *this;
int32_t cp = getSingleCP(s);
if (cp < 0) {
if (stringsContains(s)) {
@@ -2001,22 +1991,22 @@ escapeUnprintable) {
}
// Okay to let ':' pass through
switch (c) {
case SET_OPEN:
case SET_CLOSE:
case HYPHEN:
case COMPLEMENT:
case INTERSECTION:
case BACKSLASH:
case OPEN_BRACE:
case CLOSE_BRACE:
case COLON:
case u'[':
case u']':
case u'-':
case u'^':
case u'&':
case u'\\':
case u'{':
case u'}':
case u':':
case SymbolTable::SYMBOL_REF:
buf.append(BACKSLASH);
buf.append(u'\\');
break;
default:
// Escape whitespace
if (PatternProps::isWhiteSpace(c)) {
buf.append(BACKSLASH);
buf.append(u'\\');
}
break;
}
@@ -2049,7 +2039,7 @@ UnicodeString& UnicodeSet::_toPattern(UnicodeString& result,
backslashCount = 0;
} else {
result.append(c);
if (c == BACKSLASH) {
if (c == u'\\') {
++backslashCount;
} else {
backslashCount = 0;
@@ -2082,13 +2072,13 @@ UnicodeString& UnicodeSet::toPattern(UnicodeString& result,
UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
UBool escapeUnprintable) const
{
result.append(SET_OPEN);
result.append(u'[');
// // Check against the predefined categories. We implicitly build
// // up ALL category sets the first time toPattern() is called.
// for (int8_t cat=0; cat<Unicode::GENERAL_TYPES_COUNT; ++cat) {
// if (*this == getCategorySet(cat)) {
// result.append(COLON);
// result.append(u':');
// result.append(CATEGORY_NAMES, cat*2, 2);
// return result.append(CATEGORY_CLOSE);
// }
@@ -2104,7 +2094,7 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
getRangeEnd(count-1) == MAX_VALUE) {
// Emit the inverse
result.append(COMPLEMENT);
result.append(u'^');
for (int32_t i = 1; i < count; ++i) {
UChar32 start = getRangeEnd(i-1)+1;
@@ -2112,7 +2102,7 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
_appendToPat(result, start, escapeUnprintable);
if (start != end) {
if ((start+1) != end) {
result.append(HYPHEN);
result.append(u'-');
}
_appendToPat(result, end, escapeUnprintable);
}
@@ -2127,7 +2117,7 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
_appendToPat(result, start, escapeUnprintable);
if (start != end) {
if ((start+1) != end) {
result.append(HYPHEN);
result.append(u'-');
}
_appendToPat(result, end, escapeUnprintable);
}
@@ -2136,14 +2126,14 @@ UnicodeString& UnicodeSet::_generatePattern(UnicodeString& result,
if (strings != nullptr) {
for (int32_t i = 0; i<strings->size(); ++i) {
result.append(OPEN_BRACE);
result.append(u'{');
_appendToPat(result,
*(const UnicodeString*) strings->elementAt(i),
escapeUnprintable);
result.append(CLOSE_BRACE);
result.append(u'}');
}
}
return result.append(SET_CLOSE);
return result.append(u']');
}
/**