diff --git a/thirdparty/README.md b/thirdparty/README.md index b6a4a037498..e822850dd92 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -420,13 +420,13 @@ Files extracted from upstream source: ## icu4c - Upstream: https://github.com/unicode-org/icu -- Version: 76.1 (8eca245c7484ac6cc179e3e5f7c1ea7680810f39, 2024) +- Version: 77.1 (457157a92aa053e632cc7fcfd0e12f8a943b2d11, 2025) - License: Unicode Files extracted from upstream source: - The `common` folder -- `scriptset.*`, `ucln_in.*`, `uspoof.cpp"` and `uspoof_impl.cpp` from the `i18n` folder +- `scriptset.*`, `ucln_in.*`, `uspoof.cpp` and `uspoof_impl.*` from the `i18n` folder - `uspoof.h` from the `i18n/unicode` folder - `LICENSE` diff --git a/thirdparty/icu4c/LICENSE b/thirdparty/icu4c/LICENSE index 180db98fcc6..0b9efcd9092 100644 --- a/thirdparty/icu4c/LICENSE +++ b/thirdparty/icu4c/LICENSE @@ -2,7 +2,7 @@ UNICODE LICENSE V3 COPYRIGHT AND PERMISSION NOTICE -Copyright © 2016-2024 Unicode, Inc. +Copyright © 2016-2025 Unicode, Inc. NOTICE TO USER: Carefully read the following legal agreement. BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING DATA FILES, AND/OR diff --git a/thirdparty/icu4c/common/brkiter.cpp b/thirdparty/icu4c/common/brkiter.cpp index 4d945cc17e2..44a13ee6a2a 100644 --- a/thirdparty/icu4c/common/brkiter.cpp +++ b/thirdparty/icu4c/common/brkiter.cpp @@ -59,7 +59,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st { char fnbuff[256]; char ext[4]={'\0'}; - CharString actualLocale; + CharString actual; int32_t size; const char16_t* brkfname = nullptr; UResourceBundle brkRulesStack; @@ -94,7 +94,7 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st // Use the string if we found it if (U_SUCCESS(status) && brkfname) { - actualLocale.append(ures_getLocaleInternal(brkName, &status), -1, status); + actual.append(ures_getLocaleInternal(brkName, &status), -1, status); char16_t* extStart=u_strchr(brkfname, 0x002e); int len = 0; @@ -123,10 +123,9 @@ BreakIterator::buildInstance(const Locale& loc, const char *type, UErrorCode &st if (U_SUCCESS(status) && result != nullptr) { U_LOCALE_BASED(locBased, *(BreakIterator*)result); - locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), - actualLocale.data()); - uprv_strncpy(result->requestLocale, loc.getName(), ULOC_FULLNAME_CAPACITY); - result->requestLocale[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate + locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), + actual.data(), status); + LocaleBased::setLocaleID(loc.getName(), result->requestLocale, status); } ures_close(b); @@ -206,26 +205,32 @@ BreakIterator::getAvailableLocales(int32_t& count) BreakIterator::BreakIterator() { - *validLocale = *actualLocale = *requestLocale = 0; } BreakIterator::BreakIterator(const BreakIterator &other) : UObject(other) { - uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale)); - uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale)); - uprv_strncpy(requestLocale, other.requestLocale, sizeof(requestLocale)); + UErrorCode status = U_ZERO_ERROR; + U_LOCALE_BASED(locBased, *this); + locBased.setLocaleIDs(other.validLocale, other.actualLocale, status); + LocaleBased::setLocaleID(other.requestLocale, requestLocale, status); + U_ASSERT(U_SUCCESS(status)); } BreakIterator &BreakIterator::operator =(const BreakIterator &other) { if (this != &other) { - uprv_strncpy(actualLocale, other.actualLocale, sizeof(actualLocale)); - uprv_strncpy(validLocale, other.validLocale, sizeof(validLocale)); - uprv_strncpy(requestLocale, other.requestLocale, sizeof(requestLocale)); + UErrorCode status = U_ZERO_ERROR; + U_LOCALE_BASED(locBased, *this); + locBased.setLocaleIDs(other.validLocale, other.actualLocale, status); + LocaleBased::setLocaleID(other.requestLocale, requestLocale, status); + U_ASSERT(U_SUCCESS(status)); } return *this; } BreakIterator::~BreakIterator() { + delete validLocale; + delete actualLocale; + delete requestLocale; } // ------------------------------------------ @@ -394,7 +399,7 @@ BreakIterator::createInstance(const Locale& loc, int32_t kind, UErrorCode& statu // revisit this in ICU 3.0 and clean it up/fix it/remove it. if (U_SUCCESS(status) && (result != nullptr) && *actualLoc.getName() != 0) { U_LOCALE_BASED(locBased, *result); - locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName()); + locBased.setLocaleIDs(actualLoc.getName(), actualLoc.getName(), status); } return result; } @@ -488,6 +493,7 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) } if (U_FAILURE(status)) { + delete result; return nullptr; } @@ -496,20 +502,25 @@ BreakIterator::makeInstance(const Locale& loc, int32_t kind, UErrorCode& status) Locale BreakIterator::getLocale(ULocDataLocaleType type, UErrorCode& status) const { - if (type == ULOC_REQUESTED_LOCALE) { - return {requestLocale}; + if (U_FAILURE(status)) { + return Locale::getRoot(); } - U_LOCALE_BASED(locBased, *this); - return locBased.getLocale(type, status); + if (type == ULOC_REQUESTED_LOCALE) { + return requestLocale == nullptr ? + Locale::getRoot() : Locale(requestLocale->data()); + } + return LocaleBased::getLocale(validLocale, actualLocale, type, status); } const char * BreakIterator::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { - if (type == ULOC_REQUESTED_LOCALE) { - return requestLocale; + if (U_FAILURE(status)) { + return nullptr; } - U_LOCALE_BASED(locBased, *this); - return locBased.getLocaleID(type, status); + if (type == ULOC_REQUESTED_LOCALE) { + return requestLocale == nullptr ? "" : requestLocale->data(); + } + return LocaleBased::getLocaleID(validLocale, actualLocale, type, status); } @@ -536,8 +547,10 @@ int32_t BreakIterator::getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UE } BreakIterator::BreakIterator (const Locale& valid, const Locale& actual) { + UErrorCode status = U_ZERO_ERROR; U_LOCALE_BASED(locBased, (*this)); - locBased.setLocaleIDs(valid, actual); + locBased.setLocaleIDs(valid.getName(), actual.getName(), status); + U_ASSERT(U_SUCCESS(status)); } U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/charstr.cpp b/thirdparty/icu4c/common/charstr.cpp index f76cc8a4dc9..dadc829b0b5 100644 --- a/thirdparty/icu4c/common/charstr.cpp +++ b/thirdparty/icu4c/common/charstr.cpp @@ -70,6 +70,15 @@ CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) { return *this; } +CharString &CharString::copyFrom(StringPiece s, UErrorCode &errorCode) { + if (U_FAILURE(errorCode)) { + return *this; + } + len = 0; + append(s, errorCode); + return *this; +} + int32_t CharString::lastIndexOf(char c) const { for(int32_t i=len; i>0;) { if(buffer[--i]==c) { @@ -143,7 +152,7 @@ CharString &CharString::append(const char *s, int32_t sLength, UErrorCode &error return *this; } -CharString &CharString::appendNumber(int32_t number, UErrorCode &status) { +CharString &CharString::appendNumber(int64_t number, UErrorCode &status) { if (number < 0) { this->append('-', status); if (U_FAILURE(status)) { diff --git a/thirdparty/icu4c/common/charstr.h b/thirdparty/icu4c/common/charstr.h index 08283ca452c..ea54ede735c 100644 --- a/thirdparty/icu4c/common/charstr.h +++ b/thirdparty/icu4c/common/charstr.h @@ -74,6 +74,7 @@ public: * use a UErrorCode where memory allocations might be needed. */ CharString ©From(const CharString &other, UErrorCode &errorCode); + CharString ©From(StringPiece s, UErrorCode &errorCode); UBool isEmpty() const { return len==0; } int32_t length() const { return len; } @@ -135,7 +136,7 @@ public: } CharString &append(const char *s, int32_t sLength, UErrorCode &status); - CharString &appendNumber(int32_t number, UErrorCode &status); + CharString &appendNumber(int64_t number, UErrorCode &status); /** * Returns a writable buffer for appending and writes the buffer's capacity to diff --git a/thirdparty/icu4c/common/localefallback_data.h b/thirdparty/icu4c/common/localefallback_data.h index 0accf0324d7..3b8ad8a3f39 100644 --- a/thirdparty/icu4c/common/localefallback_data.h +++ b/thirdparty/icu4c/common/localefallback_data.h @@ -11,11 +11,11 @@ //====================================================================== // Default script table const char scriptCodeChars[] = - "Aghb\0Ahom\0Arab\0Armi\0Armn\0Avst\0Bamu\0Bass\0Batk\0Beng\0Bopo\0" - "Brah\0Cakm\0Cans\0Cari\0Cham\0Cher\0Chrs\0Copt\0Cprt\0Cyrl\0Deva\0" - "Egyp\0Elym\0Ethi\0Geor\0Gong\0Gonm\0Goth\0Gran\0Grek\0Gujr\0Guru\0" - "Hang\0Hani\0Hans\0Hant\0Hebr\0Hluw\0Hmnp\0Ital\0Java\0Jpan\0Kali\0" - "Kana\0Kawi\0Khar\0Khmr\0Kits\0Knda\0Kore\0Lana\0Laoo\0Latf\0Latg\0" + "Aghb\0Ahom\0Arab\0Armi\0Armn\0Avst\0Bali\0Bamu\0Bass\0Batk\0Beng\0" + "Bopo\0Brah\0Cakm\0Cans\0Cari\0Cham\0Cher\0Chrs\0Copt\0Cprt\0Cyrl\0" + "Deva\0Egyp\0Elym\0Ethi\0Geor\0Gong\0Gonm\0Goth\0Gran\0Grek\0Gujr\0" + "Guru\0Hang\0Hani\0Hans\0Hant\0Hebr\0Hluw\0Hmnp\0Ital\0Java\0Jpan\0" + "Kali\0Kana\0Khar\0Khmr\0Kits\0Knda\0Kore\0Lana\0Laoo\0Latf\0Latg\0" "Lepc\0Lina\0Linb\0Lisu\0Lyci\0Lydi\0Mand\0Mani\0Marc\0Medf\0Merc\0" "Mlym\0Modi\0Mong\0Mroo\0Mtei\0Mymr\0Narb\0Newa\0Nkoo\0Nshu\0Ogam\0" "Olck\0Orkh\0Orya\0Osge\0Ougr\0Pauc\0Phli\0Phnx\0Plrd\0Prti\0Rjng\0" @@ -48,70 +48,71 @@ const char dsLocaleIDChars[] = "gbz\0gdb\0gdo\0gdx\0gez\0ggg\0gha\0ghe\0gho\0ghr\0ght\0gig\0gin\0" "gjk\0gju\0gld\0glh\0glk\0gml\0gmv\0gmy\0goe\0gof\0goj\0gok\0gon\0" "got\0gra\0grc\0grt\0gru\0gu\0gvr\0gwc\0gwf\0gwt\0gyo\0gzi\0ha_CM\0" - "ha_SD\0hac\0hak\0har\0haz\0hbo\0hdy\0he\0hi\0hif\0hii\0hit\0hkh\0" - "hlb\0hlu\0hmd\0hmj\0hmq\0hnd\0hne\0hnj\0hno\0hoc\0hoh\0hoj\0how\0" - "hoy\0hpo\0hrt\0hrz\0hsn\0hss\0htx\0hut\0huy\0huz\0hy\0hyw\0ii\0" - "imy\0inh\0int\0ior\0iru\0isk\0itk\0itl\0iu\0iw\0ja\0jad\0jat\0" - "jbe\0jbn\0jct\0jda\0jdg\0jdt\0jee\0jge\0ji\0jje\0jkm\0jml\0jna\0" - "jnd\0jnl\0jns\0jog\0jpa\0jpr\0jrb\0jul\0jun\0juy\0jya\0jye\0ka\0" - "kaa\0kap\0kaw\0kbd\0kbg\0kbu\0kby\0kca\0kcy\0kdq\0kdt\0ket\0kev\0" - "kex\0key\0kfa\0kfb\0kfc\0kfd\0kfe\0kfg\0kfh\0kfi\0kfk\0kfm\0kfp\0" - "kfq\0kfr\0kfs\0kfu\0kfx\0kfy\0kgj\0kgy\0khb\0khf\0khg\0khn\0kho\0" - "kht\0khv\0khw\0kif\0kim\0kip\0kjg\0kjh\0kjl\0kjo\0kjp\0kjt\0kjz\0" - "kk\0kk_AF\0kk_CN\0kk_IR\0kk_MN\0kkf\0kkh\0kkt\0kle\0klj\0klr\0" - "km\0kmj\0kmz\0kn\0knn\0ko\0koi\0kok\0kpt\0kpy\0kqd\0kqy\0kra\0" - "krc\0krk\0krr\0kru\0krv\0ks\0ksu\0ksw\0ksz\0ktb\0kte\0ktl\0ktp\0" - "ku_LB\0kuf\0kum\0kv\0kva\0kvq\0kvt\0kvx\0kvy\0kxf\0kxk\0kxm\0" - "kxp\0ky\0ky_CN\0kyu\0kyv\0kyw\0lab\0lad\0lae\0lah\0lbe\0lbf\0" - "lbj\0lbm\0lbo\0lbr\0lcp\0lep\0lez\0lhm\0lhs\0lif\0lis\0lkh\0lki\0" - "lmh\0lmn\0lo\0loy\0lpo\0lrc\0lrk\0lrl\0lsa\0lsd\0lss\0ltc\0luk\0" - "luu\0luv\0luz\0lwl\0lwm\0lya\0lzh\0mag\0mai\0man_GN\0mby\0mde\0" - "mdf\0mdx\0mdy\0mfa\0mfi\0mga\0mgp\0mhj\0mid\0mjl\0mjq\0mjr\0mjt\0" - "mju\0mjv\0mjz\0mk\0mkb\0mke\0mki\0mkm\0ml\0mlf\0mn\0mn_CN\0mnc\0" - "mni\0mnj\0mns\0mnw\0mpz\0mr\0mra\0mrd\0mrj\0mro\0mrr\0ms_CC\0" - "mtm\0mtr\0mud\0muk\0mut\0muv\0muz\0mve\0mvf\0mvy\0mvz\0mwr\0mwt\0" - "mww\0my\0mym\0myv\0myz\0mzn\0nan\0nao\0ncd\0ncq\0ndf\0ne\0neg\0" - "neh\0nei\0new\0ngt\0nio\0nit\0niv\0nli\0nlm\0nlx\0nmm\0nnp\0nod\0" - "noe\0nog\0noi\0non\0nos\0npb\0nqo\0nrn\0nsd\0nsf\0nsk\0nst\0nsv\0" - "nty\0ntz\0nwc\0nwx\0nyl\0nyq\0nyw\0oaa\0oac\0oar\0oav\0obm\0obr\0" - "odk\0oht\0oj\0ojs\0okm\0oko\0okz\0ola\0ole\0omk\0omp\0omr\0omx\0" - "oon\0or\0ort\0oru\0orv\0os\0osa\0osc\0osi\0ota\0otb\0otk\0oty\0" - "oui\0pa\0pa_PK\0pal\0paq\0pbt\0pcb\0pce\0pcf\0pcg\0pch\0pci\0" - "pcj\0peg\0peo\0pgd\0pgg\0pgl\0pgn\0phd\0phk\0phl\0phn\0pho\0phr\0" - "pht\0phu\0phv\0phw\0pi\0pka\0pkr\0plk\0pll\0pmh\0pnt\0pra\0prc\0" - "prd\0prt\0prx\0ps\0psh\0psi\0pst\0psu\0pum\0pwo\0pwr\0pww\0pyx\0" - "qxq\0raa\0rab\0raf\0rah\0raj\0rav\0rbb\0rdb\0rei\0rhg\0rji\0rjs\0" - "rka\0rki\0rkt\0rmi\0rmt\0rmz\0rsk\0rtw\0ru\0rue\0rut\0rwr\0ryu\0" - "sa\0sah\0sam\0sat\0saz\0sbn\0sbu\0sck\0scl\0scp\0sct\0scu\0scx\0" - "sd\0sd_IN\0sdb\0sdf\0sdg\0sdh\0sdr\0sds\0sel\0sfm\0sga\0sgh\0" - "sgj\0sgr\0sgt\0sgw\0sgy\0shd\0shi\0shm\0shn\0shu\0shv\0si\0sia\0" - "sip\0siy\0siz\0sjd\0sjp\0sjt\0skb\0skj\0skr\0smh\0smp\0smu\0smy\0" - "soa\0sog\0soi\0sou\0spt\0spv\0sqo\0sqq\0sqt\0sr\0srb\0srh\0srx\0" - "srz\0ssh\0sss\0sts\0stv\0sty\0suz\0sva\0swb\0swi\0swv\0sxu\0syc\0" - "syl\0syn\0syr\0syw\0ta\0tab\0taj\0tbk\0tcn\0tco\0tcx\0tcy\0tda\0" - "tdb\0tdd\0tdg\0tdh\0te\0tes\0tg\0tg_PK\0tge\0tgf\0th\0the\0thf\0" - "thi\0thl\0thm\0thq\0thr\0ths\0ti\0tig\0tij\0tin\0tjl\0tjo\0tkb\0" - "tks\0tkt\0tmr\0tnv\0tov\0tpu\0tra\0trg\0trm\0trw\0tsd\0tsj\0tt\0" - "tth\0tto\0tts\0ttz\0tvn\0twm\0txg\0txo\0tyr\0tyv\0ude\0udg\0udi\0" - "udm\0ug\0ug_KZ\0ug_MN\0uga\0ugh\0ugo\0uk\0uki\0ulc\0unr\0unr_NP\0" - "unx\0ur\0urk\0ush\0uum\0uz_AF\0uz_CN\0uzs\0vaa\0vaf\0vah\0vai\0" - "vas\0vav\0vay\0vgr\0vjk\0vmd\0vmh\0wal\0wbk\0wbq\0wbr\0wle\0wlo\0" - "wme\0wne\0wni\0wsg\0wsv\0wtm\0wuu\0xag\0xal\0xan\0xas\0xco\0xcr\0" - "xdq\0xhe\0xhm\0xis\0xka\0xkc\0xkf\0xkj\0xkp\0xlc\0xld\0xly\0xmf\0" - "xmn\0xmr\0xna\0xnr\0xpg\0xpi\0xpm\0xpr\0xrm\0xrn\0xsa\0xsr\0xtq\0" - "xub\0xuj\0xve\0xvi\0xwo\0xzh\0yai\0ybh\0ybi\0ydg\0yea\0yej\0yeu\0" - "ygp\0yhd\0yi\0yig\0yih\0yiv\0ykg\0ykh\0yna\0ynk\0yoi\0yoy\0yrk\0" - "ysd\0ysn\0ysp\0ysr\0ysy\0yud\0yue\0yue_CN\0yug\0yux\0ywq\0ywu\0" - "zau\0zba\0zch\0zdj\0zeh\0zen\0zgb\0zgh\0zgm\0zgn\0zh\0zh_AU\0" - "zh_BN\0zh_GB\0zh_GF\0zh_HK\0zh_ID\0zh_MO\0zh_PA\0zh_PF\0zh_PH\0" - "zh_SR\0zh_TH\0zh_TW\0zh_US\0zh_VN\0zhd\0zhx\0zko\0zkt\0zkz\0zlj\0" - "zln\0zlq\0zqe\0zrg\0zrp\0zum\0zwa\0zyg\0zyn\0zzj\0"; + "ha_SD\0hac\0hak\0hak_TW\0har\0haz\0hbo\0hdy\0he\0hi\0hif\0hii\0" + "hit\0hkh\0hlb\0hlu\0hmd\0hmj\0hmq\0hnd\0hne\0hnj\0hno\0hoc\0hoh\0" + "hoj\0how\0hoy\0hpo\0hrt\0hrz\0hsn\0hss\0htx\0hut\0huy\0huz\0hy\0" + "hyw\0ii\0imy\0inh\0int\0ior\0iru\0isk\0itk\0itl\0iu\0iw\0ja\0" + "jad\0jat\0jbe\0jbn\0jct\0jda\0jdg\0jdt\0jee\0jge\0ji\0jje\0jkm\0" + "jml\0jna\0jnd\0jnl\0jns\0jog\0jpa\0jpr\0jrb\0jul\0jun\0juy\0jya\0" + "jye\0ka\0kaa\0kap\0kaw\0kbd\0kbg\0kbu\0kby\0kca\0kcy\0kdq\0kdt\0" + "ket\0kev\0kex\0key\0kfa\0kfb\0kfc\0kfd\0kfe\0kfg\0kfh\0kfi\0kfk\0" + "kfm\0kfp\0kfq\0kfr\0kfs\0kfu\0kfx\0kfy\0kgj\0kgy\0khb\0khf\0khg\0" + "khn\0kho\0kht\0khv\0khw\0kif\0kim\0kip\0kjg\0kjh\0kjl\0kjo\0kjp\0" + "kjt\0kjz\0kk\0kk_AF\0kk_CN\0kk_IR\0kk_MN\0kkf\0kkh\0kkt\0kle\0" + "klj\0klr\0km\0kmj\0kmz\0kn\0knn\0ko\0koi\0kok\0kpt\0kpy\0kqd\0" + "kqy\0kra\0krc\0krk\0krr\0kru\0krv\0ks\0ksu\0ksw\0ksz\0ktb\0kte\0" + "ktl\0ktp\0ku_LB\0kuf\0kum\0kv\0kva\0kvq\0kvt\0kvx\0kvy\0kxf\0" + "kxk\0kxm\0kxp\0ky\0ky_CN\0kyu\0kyv\0kyw\0lab\0lad\0lae\0lah\0" + "lbe\0lbf\0lbj\0lbm\0lbo\0lbr\0lcp\0lep\0lez\0lhm\0lhs\0lif\0lis\0" + "lkh\0lki\0lmh\0lmn\0lo\0loy\0lpo\0lrc\0lrk\0lrl\0lsa\0lsd\0lss\0" + "ltc\0luk\0luu\0luv\0luz\0lwl\0lwm\0lya\0lzh\0lzz_GE\0mag\0mai\0" + "mby\0mde\0mdf\0mdx\0mdy\0mfa\0mfi\0mga\0mgp\0mhj\0mid\0mjl\0mjq\0" + "mjr\0mjt\0mju\0mjv\0mjz\0mk\0mkb\0mke\0mki\0mkm\0ml\0mlf\0mn\0" + "mn_CN\0mnc\0mni\0mnj\0mns\0mnw\0mpz\0mr\0mra\0mrd\0mrj\0mro\0" + "mrr\0ms_CC\0mtm\0mtr\0mud\0muk\0mut\0muv\0muz\0mve\0mvf\0mvy\0" + "mvz\0mwr\0mwt\0mww\0my\0mym\0myv\0myz\0mzn\0nan\0nan_TW\0nao\0" + "ncd\0ncq\0ndf\0ne\0neg\0neh\0nei\0new\0ngt\0nio\0nit\0niv\0nli\0" + "nlm\0nlx\0nmm\0nnp\0nod\0noe\0nog\0noi\0non\0nos\0npb\0nqo\0nrn\0" + "nsd\0nsf\0nsk\0nst\0nsv\0nty\0ntz\0nwc\0nwx\0nyl\0nyq\0nyw\0oaa\0" + "oac\0oar\0oav\0obm\0obr\0odk\0oht\0oj\0ojs\0okm\0oko\0okz\0ola\0" + "ole\0omk\0omp\0omr\0omx\0oon\0or\0ort\0oru\0orv\0os\0osa\0osc\0" + "osi\0ota\0otb\0otk\0oty\0oui\0pa\0pa_PK\0pal\0paq\0pbt\0pcb\0" + "pce\0pcf\0pcg\0pch\0pci\0pcj\0peg\0peo\0pgd\0pgg\0pgl\0pgn\0phd\0" + "phk\0phl\0phn\0pho\0phr\0pht\0phu\0phv\0phw\0pi\0pka\0pkr\0plk\0" + "pll\0pmh\0pnt\0pnt_RU\0pra\0prc\0prd\0prt\0prx\0ps\0psh\0psi\0" + "pst\0psu\0pum\0pwo\0pwr\0pww\0pyx\0qxq\0raa\0rab\0raf\0rah\0raj\0" + "rav\0rbb\0rdb\0rei\0rhg\0rji\0rjs\0rka\0rki\0rkt\0rmi\0rmt\0rmz\0" + "rsk\0rtw\0ru\0rue\0rut\0rwr\0ryu\0sa\0sah\0sam\0sat\0saz\0sbn\0" + "sbu\0sck\0scl\0scp\0sct\0scu\0scx\0sd\0sd_IN\0sdb\0sdf\0sdg\0" + "sdh\0sdr\0sds\0sel\0sfm\0sgh\0sgj\0sgr\0sgt\0sgw\0sgy\0shd\0shi\0" + "shm\0shn\0shu\0shv\0si\0sia\0sip\0siy\0siz\0sjd\0sjp\0sjt\0skb\0" + "skj\0skr\0smh\0smp\0smu\0smy\0soa\0sog\0soi\0sou\0spt\0spv\0sqo\0" + "sqq\0sqt\0sr\0srb\0srh\0srx\0srz\0ssh\0sss\0sts\0stv\0sty\0suz\0" + "sva\0swb\0swi\0swv\0sxu\0syc\0syl\0syn\0syr\0syw\0ta\0tab\0taj\0" + "tbk\0tcn\0tco\0tcx\0tcy\0tda\0tdb\0tdd\0tdg\0tdh\0te\0tes\0tg\0" + "tg_PK\0tge\0tgf\0th\0the\0thf\0thi\0thl\0thm\0thq\0thr\0ths\0" + "ti\0tig\0tij\0tin\0tjl\0tjo\0tkb\0tks\0tkt\0tmr\0tnv\0tov\0tpu\0" + "tra\0trg\0trm\0trw\0tsd\0tsj\0tt\0tth\0tto\0tts\0ttz\0tvn\0twm\0" + "txg\0txo\0tyr\0tyv\0ude\0udg\0udi\0udm\0ug\0ug_KZ\0ug_MN\0uga\0" + "ugh\0ugo\0uk\0uki\0ulc\0unr\0unr_NP\0unx\0ur\0urk\0ush\0uum\0" + "uz_AF\0uz_CN\0uzs\0vaa\0vaf\0vah\0vai\0vas\0vav\0vay\0vgr\0vjk\0" + "vmd\0vmh\0wal\0wbk\0wbq\0wbr\0wle\0wlo\0wme\0wne\0wni\0wsg\0wsv\0" + "wtm\0wuu\0xag\0xal\0xan\0xas\0xco\0xcr\0xdq\0xhe\0xhm\0xis\0xka\0" + "xkc\0xkf\0xkj\0xkp\0xlc\0xld\0xly\0xmf\0xmn\0xmr\0xna\0xnr\0xpg\0" + "xpi\0xpm\0xpr\0xrm\0xrn\0xsa\0xsr\0xtq\0xub\0xuj\0xve\0xvi\0xwo\0" + "xzh\0yai\0ybh\0ybi\0ydg\0yea\0yej\0yeu\0ygp\0yhd\0yi\0yig\0yih\0" + "yiv\0ykg\0ykh\0yna\0ynk\0yoi\0yoy\0yrk\0ysd\0ysn\0ysp\0ysr\0ysy\0" + "yud\0yue\0yue_CN\0yug\0yux\0ywq\0ywu\0zau\0zba\0zch\0zdj\0zeh\0" + "zen\0zgb\0zgh\0zgm\0zgn\0zh\0zh_AU\0zh_BN\0zh_GB\0zh_GF\0zh_HK\0" + "zh_ID\0zh_MO\0zh_PA\0zh_PF\0zh_PH\0zh_SR\0zh_TH\0zh_TW\0zh_US\0" + "zh_VN\0zhd\0zhx\0zko\0zkt\0zkz\0zlj\0zln\0zlq\0zqe\0zrg\0zrp\0" + "zum\0zwa\0zyg\0zyn\0zzj\0"; const int32_t defaultScriptTable[] = { 0, 330, // aaf -> Mlym 4, 10, // aao -> Arab - 8, 150, // aat -> Grek - 12, 100, // ab -> Cyrl + 8, 155, // aat -> Grek + 12, 105, // ab -> Cyrl 15, 10, // abh -> Arab 19, 435, // abl -> Rjng 23, 10, // abv -> Arab @@ -121,64 +122,64 @@ const int32_t defaultScriptTable[] = { 39, 10, // acx -> Arab 43, 10, // adf -> Arab 47, 555, // adx -> Tibt - 51, 100, // ady -> Cyrl + 51, 105, // ady -> Cyrl 55, 25, // ae -> Avst 58, 10, // aeb -> Arab 62, 10, // aec -> Arab 66, 10, // aee -> Arab 70, 10, // aeq -> Arab 74, 10, // afb -> Arab - 78, 105, // agi -> Deva - 82, 120, // agj -> Ethi - 86, 100, // agx -> Cyrl - 90, 120, // ahg -> Ethi + 78, 110, // agi -> Deva + 82, 125, // agj -> Ethi + 86, 105, // agx -> Cyrl + 90, 125, // ahg -> Ethi 94, 5, // aho -> Ahom - 98, 105, // ahr -> Deva + 98, 110, // ahr -> Deva 102, 10, // aib -> Arab 106, 495, // aii -> Syrc - 110, 185, // aij -> Hebr - 114, 220, // ain -> Kana + 110, 190, // aij -> Hebr + 114, 225, // ain -> Kana 118, 355, // aio -> Mymr 122, 10, // aiq -> Arab 126, 590, // akk -> Xsux - 130, 100, // akv -> Cyrl + 130, 105, // akv -> Cyrl 134, 260, // alk -> Laoo 138, 330, // all -> Mlym - 142, 100, // alr -> Cyrl - 146, 100, // alt -> Cyrl - 150, 120, // alw -> Ethi - 154, 120, // am -> Ethi - 157, 210, // ams -> Jpan + 142, 105, // alr -> Cyrl + 146, 105, // alt -> Cyrl + 150, 125, // alw -> Ethi + 154, 125, // am -> Ethi + 157, 215, // ams -> Jpan 161, 495, // amw -> Syrc - 165, 100, // ani -> Cyrl - 169, 105, // anp -> Deva - 173, 105, // anq -> Deva - 177, 105, // anr -> Deva - 181, 120, // anu -> Ethi - 185, 45, // aot -> Beng + 165, 105, // ani -> Cyrl + 169, 110, // anp -> Deva + 173, 110, // anq -> Deva + 177, 110, // anr -> Deva + 181, 125, // anu -> Ethi + 185, 50, // aot -> Beng 189, 10, // apc -> Arab 193, 10, // apd -> Arab - 197, 105, // aph -> Deva - 201, 100, // aqc -> Cyrl + 197, 110, // aph -> Deva + 201, 105, // aqc -> Cyrl 205, 10, // ar -> Arab 208, 15, // arc -> Armi 212, 10, // arq -> Arab 216, 10, // ars -> Arab 220, 10, // ary -> Arab 224, 10, // arz -> Arab - 228, 45, // as -> Beng + 228, 50, // as -> Beng 231, 465, // ase -> Sgnw 235, 10, // ask -> Arab - 239, 105, // asr -> Deva + 239, 110, // asr -> Deva 243, 10, // atn -> Arab - 247, 100, // atv -> Cyrl + 247, 105, // atv -> Cyrl 251, 10, // auj -> Arab 255, 10, // auz -> Arab - 259, 100, // av -> Cyrl + 259, 105, // av -> Cyrl 262, 10, // avd -> Arab 266, 10, // avl -> Arab - 270, 105, // awa -> Deva - 274, 120, // awn -> Ethi + 270, 110, // awa -> Deva + 274, 125, // awn -> Ethi 278, 20, // axm -> Armn 282, 10, // ayh -> Arab 286, 10, // ayl -> Arab @@ -186,971 +187,973 @@ const int32_t defaultScriptTable[] = { 294, 10, // ayp -> Arab 298, 10, // az_IQ -> Arab 304, 10, // az_IR -> Arab - 310, 100, // az_RU -> Cyrl + 310, 105, // az_RU -> Cyrl 316, 10, // azb -> Arab - 320, 100, // ba -> Cyrl + 320, 105, // ba -> Cyrl 323, 10, // bal -> Arab - 327, 105, // bap -> Deva - 331, 30, // bax -> Bamu - 335, 125, // bbl -> Geor - 339, 120, // bcq -> Ethi + 327, 110, // bap -> Deva + 331, 35, // bax -> Bamu + 335, 130, // bbl -> Geor + 339, 125, // bcq -> Ethi 343, 395, // bdv -> Orya 347, 10, // bdz -> Arab - 351, 100, // be -> Cyrl - 354, 105, // bee -> Deva + 351, 105, // be -> Cyrl + 354, 110, // bee -> Deva 358, 10, // bej -> Arab - 362, 105, // bfb -> Deva + 362, 110, // bfb -> Deva 366, 520, // bfq -> Taml 370, 10, // bft -> Arab 374, 555, // bfu -> Tibt 378, 395, // bfw -> Orya - 382, 105, // bfy -> Deva - 386, 105, // bfz -> Deva - 390, 100, // bg -> Cyrl - 393, 105, // bgc -> Deva - 397, 105, // bgd -> Deva + 382, 110, // bfy -> Deva + 386, 110, // bfz -> Deva + 390, 105, // bg -> Cyrl + 393, 110, // bgc -> Deva + 397, 110, // bgd -> Deva 401, 10, // bgn -> Arab 405, 10, // bgp -> Arab - 409, 105, // bgq -> Deva - 413, 105, // bgw -> Deva - 417, 150, // bgx -> Grek - 421, 105, // bha -> Deva - 425, 105, // bhb -> Deva - 429, 105, // bhd -> Deva + 409, 110, // bgq -> Deva + 413, 110, // bgw -> Deva + 417, 155, // bgx -> Grek + 421, 110, // bha -> Deva + 425, 110, // bhb -> Deva + 429, 110, // bhd -> Deva 433, 10, // bhe -> Arab - 437, 100, // bhh -> Cyrl - 441, 105, // bhi -> Deva - 445, 105, // bhj -> Deva + 437, 105, // bhh -> Cyrl + 441, 110, // bhi -> Deva + 445, 110, // bhj -> Deva 449, 10, // bhm -> Arab 453, 495, // bhn -> Syrc - 457, 105, // bho -> Deva - 461, 105, // bht -> Deva - 465, 105, // bhu -> Deva - 469, 105, // biy -> Deva + 457, 110, // bho -> Deva + 461, 110, // bht -> Deva + 465, 110, // bhu -> Deva + 469, 110, // biy -> Deva 473, 495, // bjf -> Syrc - 477, 105, // bjj -> Deva + 477, 110, // bjj -> Deva 481, 10, // bjm -> Arab 485, 555, // bkk -> Tibt 489, 355, // blk -> Mymr 493, 530, // blt -> Tavt - 497, 105, // bmj -> Deva - 501, 45, // bn -> Beng - 504, 105, // bns -> Deva + 497, 110, // bmj -> Deva + 501, 50, // bn -> Beng + 504, 110, // bns -> Deva 508, 555, // bo -> Tibt - 511, 100, // bph -> Cyrl - 515, 105, // bpx -> Deva - 519, 45, // bpy -> Beng + 511, 105, // bph -> Cyrl + 515, 110, // bpx -> Deva + 519, 50, // bpy -> Beng 523, 10, // bqi -> Arab - 527, 105, // bra -> Deva + 527, 110, // bra -> Deva 531, 235, // brb -> Khmr - 535, 105, // brd -> Deva + 535, 110, // brd -> Deva 539, 10, // brh -> Arab 543, 10, // brk -> Arab 547, 555, // bro -> Tibt 551, 260, // brv -> Laoo 555, 245, // brw -> Knda - 559, 105, // brx -> Deva + 559, 110, // brx -> Deva 563, 10, // bsh -> Arab 567, 10, // bsk -> Arab - 571, 35, // bsq -> Bass - 575, 120, // bst -> Ethi - 579, 40, // btd -> Batk - 583, 40, // btm -> Batk - 587, 105, // btv -> Deva - 591, 100, // bua -> Cyrl + 571, 40, // bsq -> Bass + 575, 125, // bst -> Ethi + 579, 45, // btd -> Batk + 583, 45, // btm -> Batk + 587, 110, // btv -> Deva + 591, 105, // bua -> Cyrl 595, 355, // bwe -> Mymr - 599, 100, // bxm -> Cyrl + 599, 105, // bxm -> Cyrl 603, 340, // bxu -> Mong - 607, 105, // byh -> Deva - 611, 120, // byn -> Ethi - 615, 105, // byw -> Deva + 607, 110, // byh -> Deva + 611, 125, // byn -> Ethi + 615, 110, // byw -> Deva 619, 550, // bzi -> Thai 623, 550, // cbn -> Thai - 627, 60, // ccp -> Cakm + 627, 65, // ccp -> Cakm 631, 535, // cde -> Telu - 635, 105, // cdh -> Deva - 639, 155, // cdi -> Gujr - 643, 105, // cdj -> Deva - 647, 105, // cdm -> Deva - 651, 175, // cdo -> Hans - 655, 45, // cdz -> Beng - 659, 100, // ce -> Cyrl + 635, 110, // cdh -> Deva + 639, 160, // cdi -> Gujr + 643, 110, // cdj -> Deva + 647, 110, // cdm -> Deva + 651, 180, // cdo -> Hans + 655, 50, // cdz -> Beng + 659, 105, // ce -> Cyrl 662, 555, // cgk -> Tibt 666, 10, // chg -> Arab - 670, 100, // chm -> Cyrl - 674, 80, // chr -> Cher - 678, 105, // chx -> Deva - 682, 105, // cih -> Deva + 670, 105, // chm -> Cyrl + 674, 85, // chr -> Cher + 678, 110, // chx -> Deva + 682, 110, // cih -> Deva 686, 10, // cja -> Arab - 690, 100, // cji -> Cyrl - 694, 75, // cjm -> Cham - 698, 175, // cjy -> Hans + 690, 105, // cji -> Cyrl + 694, 80, // cjm -> Cham + 698, 180, // cjy -> Hans 702, 10, // ckb -> Arab - 706, 100, // ckt -> Cyrl + 706, 105, // ckt -> Cyrl 710, 10, // clh -> Arab - 714, 100, // clw -> Cyrl + 714, 105, // clw -> Cyrl 718, 485, // cmg -> Soyo 722, 555, // cna -> Tibt - 726, 175, // cnp -> Hans + 726, 180, // cnp -> Hans 730, 550, // cog -> Thai - 734, 90, // cop -> Copt - 738, 150, // cpg -> Grek - 742, 65, // cr -> Cans - 745, 100, // crh -> Cyrl - 749, 65, // crj -> Cans - 753, 65, // crk -> Cans - 757, 65, // crl -> Cans - 761, 65, // crm -> Cans + 734, 95, // cop -> Copt + 738, 155, // cpg -> Grek + 742, 70, // cr -> Cans + 745, 105, // crh -> Cyrl + 749, 70, // crj -> Cans + 753, 70, // crk -> Cans + 757, 70, // crl -> Cans + 761, 70, // crm -> Cans 765, 355, // csh -> Mymr - 769, 175, // csp -> Hans - 773, 65, // csw -> Cans + 769, 180, // csp -> Hans + 773, 70, // csw -> Cans 777, 410, // ctd -> Pauc - 781, 45, // ctg -> Beng - 785, 105, // ctn -> Deva + 781, 50, // ctg -> Beng + 785, 110, // ctn -> Deva 789, 520, // ctt -> Taml 793, 520, // cty -> Taml - 797, 100, // cu -> Cyrl + 797, 105, // cu -> Cyrl 800, 255, // cuu -> Lana - 804, 100, // cv -> Cyrl - 807, 175, // czh -> Hans - 811, 185, // czk -> Hebr - 815, 105, // daq -> Deva - 819, 100, // dar -> Cyrl + 804, 105, // cv -> Cyrl + 807, 180, // czh -> Hans + 811, 190, // czk -> Hebr + 815, 110, // daq -> Deva + 819, 105, // dar -> Cyrl 823, 10, // dcc -> Arab - 827, 100, // ddo -> Cyrl + 827, 105, // ddo -> Cyrl 831, 10, // def -> Arab 835, 10, // deh -> Arab - 839, 45, // der -> Beng + 839, 50, // der -> Beng 843, 10, // dgl -> Arab - 847, 105, // dhi -> Deva - 851, 155, // dhn -> Gujr - 855, 105, // dho -> Deva - 859, 105, // dhw -> Deva + 847, 110, // dhi -> Deva + 851, 160, // dhn -> Gujr + 855, 110, // dho -> Deva + 859, 110, // dhw -> Deva 863, 555, // dka -> Tibt - 867, 100, // dlg -> Cyrl + 867, 105, // dlg -> Cyrl 871, 320, // dmf -> Medf 875, 10, // dmk -> Arab 879, 10, // dml -> Arab - 883, 100, // dng -> Cyrl + 883, 105, // dng -> Cyrl 887, 355, // dnu -> Mymr 891, 355, // dnv -> Mymr - 895, 105, // doi -> Deva - 899, 120, // dox -> Ethi + 895, 110, // doi -> Deva + 899, 125, // dox -> Ethi 903, 555, // dre -> Tibt - 907, 105, // drq -> Deva - 911, 120, // drs -> Ethi - 915, 105, // dry -> Deva + 907, 110, // drq -> Deva + 911, 125, // drs -> Ethi + 915, 110, // dry -> Deva 919, 395, // dso -> Orya - 923, 105, // dty -> Deva - 927, 155, // dub -> Gujr - 931, 105, // duh -> Deva - 935, 105, // dus -> Deva + 923, 110, // dty -> Deva + 927, 160, // dub -> Gujr + 931, 110, // duh -> Deva + 935, 110, // dus -> Deva 939, 545, // dv -> Thaa 942, 395, // dwk -> Orya - 946, 105, // dwz -> Deva + 946, 110, // dwz -> Deva 950, 555, // dz -> Tibt 953, 555, // dzl -> Tibt - 957, 150, // ecr -> Grek - 961, 95, // ecy -> Cprt - 965, 110, // egy -> Egyp - 969, 215, // eky -> Kali - 973, 150, // el -> Grek - 976, 105, // emg -> Deva - 980, 105, // emu -> Deva - 984, 100, // enf -> Cyrl - 988, 100, // enh -> Cyrl + 957, 155, // ecr -> Grek + 961, 100, // ecy -> Cprt + 965, 115, // egy -> Egyp + 969, 220, // eky -> Kali + 973, 155, // el -> Grek + 976, 110, // emg -> Deva + 980, 110, // emu -> Deva + 984, 105, // enf -> Cyrl + 988, 105, // enh -> Cyrl 992, 520, // era -> Taml - 996, 135, // esg -> Gonm + 996, 140, // esg -> Gonm 1000, 10, // esh -> Arab - 1004, 200, // ett -> Ital - 1008, 100, // eve -> Cyrl - 1012, 100, // evn -> Cyrl + 1004, 205, // ett -> Ital + 1008, 105, // eve -> Cyrl + 1012, 105, // evn -> Cyrl 1016, 10, // fa -> Arab 1019, 10, // fay -> Arab 1023, 10, // faz -> Arab 1027, 10, // fia -> Arab - 1031, 105, // fmu -> Deva + 1031, 110, // fmu -> Deva 1035, 10, // fub -> Arab - 1039, 175, // gan -> Hans + 1039, 180, // gan -> Hans 1043, 395, // gaq -> Orya - 1047, 155, // gas -> Gujr + 1047, 160, // gas -> Gujr 1051, 535, // gau -> Telu 1055, 395, // gbj -> Orya - 1059, 105, // gbk -> Deva - 1063, 155, // gbl -> Gujr - 1067, 105, // gbm -> Deva + 1059, 110, // gbk -> Deva + 1063, 160, // gbl -> Gujr + 1067, 110, // gbm -> Deva 1071, 10, // gbz -> Arab 1075, 395, // gdb -> Orya - 1079, 100, // gdo -> Cyrl - 1083, 105, // gdx -> Deva - 1087, 120, // gez -> Ethi + 1079, 105, // gdo -> Cyrl + 1083, 110, // gdx -> Deva + 1087, 125, // gez -> Ethi 1091, 10, // ggg -> Arab 1095, 10, // gha -> Arab - 1099, 105, // ghe -> Deva + 1099, 110, // ghe -> Deva 1103, 540, // gho -> Tfng 1107, 10, // ghr -> Arab 1111, 555, // ght -> Tibt 1115, 10, // gig -> Arab - 1119, 100, // gin -> Cyrl + 1119, 105, // gin -> Cyrl 1123, 10, // gjk -> Arab 1127, 10, // gju -> Arab - 1131, 100, // gld -> Cyrl + 1131, 105, // gld -> Cyrl 1135, 10, // glh -> Arab 1139, 10, // glk -> Arab 1143, 265, // gml -> Latf - 1147, 120, // gmv -> Ethi + 1147, 125, // gmv -> Ethi 1151, 285, // gmy -> Linb 1155, 555, // goe -> Tibt - 1159, 120, // gof -> Ethi - 1163, 105, // goj -> Deva - 1167, 105, // gok -> Deva - 1171, 105, // gon -> Deva - 1175, 140, // got -> Goth - 1179, 105, // gra -> Deva - 1183, 95, // grc -> Cprt - 1187, 45, // grt -> Beng - 1191, 120, // gru -> Ethi - 1195, 155, // gu -> Gujr - 1198, 105, // gvr -> Deva + 1159, 125, // gof -> Ethi + 1163, 110, // goj -> Deva + 1167, 110, // gok -> Deva + 1171, 110, // gon -> Deva + 1175, 145, // got -> Goth + 1179, 110, // gra -> Deva + 1183, 155, // grc -> Grek + 1187, 50, // grt -> Beng + 1191, 125, // gru -> Ethi + 1195, 160, // gu -> Gujr + 1198, 110, // gvr -> Deva 1202, 10, // gwc -> Arab 1206, 10, // gwf -> Arab 1210, 10, // gwt -> Arab - 1214, 105, // gyo -> Deva + 1214, 110, // gyo -> Deva 1218, 10, // gzi -> Arab 1222, 10, // ha_CM -> Arab 1228, 10, // ha_SD -> Arab 1234, 10, // hac -> Arab - 1238, 175, // hak -> Hans - 1242, 120, // har -> Ethi - 1246, 10, // haz -> Arab - 1250, 185, // hbo -> Hebr - 1254, 120, // hdy -> Ethi - 1258, 185, // he -> Hebr - 1261, 105, // hi -> Deva - 1264, 105, // hif -> Deva - 1268, 505, // hii -> Takr - 1272, 590, // hit -> Xsux - 1276, 10, // hkh -> Arab - 1280, 105, // hlb -> Deva - 1284, 190, // hlu -> Hluw - 1288, 425, // hmd -> Plrd - 1292, 50, // hmj -> Bopo - 1296, 50, // hmq -> Bopo - 1300, 10, // hnd -> Arab - 1304, 105, // hne -> Deva - 1308, 195, // hnj -> Hmnp - 1312, 10, // hno -> Arab - 1316, 105, // hoc -> Deva - 1320, 10, // hoh -> Arab - 1324, 105, // hoj -> Deva - 1328, 170, // how -> Hani - 1332, 105, // hoy -> Deva - 1336, 355, // hpo -> Mymr - 1340, 495, // hrt -> Syrc - 1344, 10, // hrz -> Arab - 1348, 175, // hsn -> Hans - 1352, 10, // hss -> Arab - 1356, 590, // htx -> Xsux - 1360, 105, // hut -> Deva - 1364, 185, // huy -> Hebr - 1368, 100, // huz -> Cyrl - 1372, 20, // hy -> Armn - 1375, 20, // hyw -> Armn - 1379, 595, // ii -> Yiii - 1382, 295, // imy -> Lyci - 1386, 100, // inh -> Cyrl - 1390, 355, // int -> Mymr - 1394, 120, // ior -> Ethi - 1398, 520, // iru -> Taml - 1402, 10, // isk -> Arab - 1406, 185, // itk -> Hebr - 1410, 100, // itl -> Cyrl - 1414, 65, // iu -> Cans - 1417, 185, // iw -> Hebr - 1420, 210, // ja -> Jpan - 1423, 10, // jad -> Arab - 1427, 10, // jat -> Arab - 1431, 185, // jbe -> Hebr - 1435, 10, // jbn -> Arab - 1439, 100, // jct -> Cyrl - 1443, 555, // jda -> Tibt - 1447, 10, // jdg -> Arab - 1451, 100, // jdt -> Cyrl - 1455, 105, // jee -> Deva - 1459, 125, // jge -> Geor - 1463, 185, // ji -> Hebr - 1466, 165, // jje -> Hang - 1470, 355, // jkm -> Mymr - 1474, 105, // jml -> Deva - 1478, 505, // jna -> Takr - 1482, 10, // jnd -> Arab - 1486, 105, // jnl -> Deva - 1490, 105, // jns -> Deva - 1494, 10, // jog -> Arab - 1498, 185, // jpa -> Hebr - 1502, 185, // jpr -> Hebr - 1506, 185, // jrb -> Hebr - 1510, 105, // jul -> Deva - 1514, 395, // jun -> Orya - 1518, 395, // juy -> Orya - 1522, 555, // jya -> Tibt - 1526, 185, // jye -> Hebr - 1530, 125, // ka -> Geor - 1533, 100, // kaa -> Cyrl - 1537, 100, // kap -> Cyrl - 1541, 225, // kaw -> Kawi - 1545, 100, // kbd -> Cyrl - 1549, 555, // kbg -> Tibt - 1553, 10, // kbu -> Arab - 1557, 10, // kby -> Arab - 1561, 100, // kca -> Cyrl - 1565, 10, // kcy -> Arab - 1569, 45, // kdq -> Beng - 1573, 550, // kdt -> Thai - 1577, 100, // ket -> Cyrl - 1581, 330, // kev -> Mlym - 1585, 105, // kex -> Deva - 1589, 535, // key -> Telu - 1593, 245, // kfa -> Knda - 1597, 105, // kfb -> Deva - 1601, 535, // kfc -> Telu - 1605, 245, // kfd -> Knda - 1609, 520, // kfe -> Taml - 1613, 245, // kfg -> Knda - 1617, 330, // kfh -> Mlym - 1621, 520, // kfi -> Taml - 1625, 105, // kfk -> Deva - 1629, 10, // kfm -> Arab - 1633, 105, // kfp -> Deva - 1637, 105, // kfq -> Deva - 1641, 105, // kfr -> Deva - 1645, 105, // kfs -> Deva - 1649, 105, // kfu -> Deva - 1653, 105, // kfx -> Deva - 1657, 105, // kfy -> Deva - 1661, 105, // kgj -> Deva - 1665, 105, // kgy -> Deva - 1669, 515, // khb -> Talu - 1673, 550, // khf -> Thai - 1677, 555, // khg -> Tibt - 1681, 105, // khn -> Deva - 1685, 55, // kho -> Brah - 1689, 355, // kht -> Mymr - 1693, 100, // khv -> Cyrl - 1697, 10, // khw -> Arab - 1701, 105, // kif -> Deva - 1705, 100, // kim -> Cyrl - 1709, 105, // kip -> Deva - 1713, 260, // kjg -> Laoo - 1717, 100, // kjh -> Cyrl - 1721, 105, // kjl -> Deva - 1725, 105, // kjo -> Deva - 1729, 355, // kjp -> Mymr - 1733, 550, // kjt -> Thai - 1737, 555, // kjz -> Tibt - 1741, 100, // kk -> Cyrl - 1744, 10, // kk_AF -> Arab - 1750, 10, // kk_CN -> Arab - 1756, 10, // kk_IR -> Arab - 1762, 10, // kk_MN -> Arab - 1768, 555, // kkf -> Tibt - 1772, 255, // kkh -> Lana - 1776, 105, // kkt -> Deva - 1780, 105, // kle -> Deva - 1784, 10, // klj -> Arab - 1788, 105, // klr -> Deva - 1792, 235, // km -> Khmr - 1795, 105, // kmj -> Deva - 1799, 10, // kmz -> Arab - 1803, 245, // kn -> Knda - 1806, 105, // knn -> Deva - 1810, 250, // ko -> Kore - 1813, 100, // koi -> Cyrl - 1817, 105, // kok -> Deva - 1821, 100, // kpt -> Cyrl - 1825, 100, // kpy -> Cyrl - 1829, 495, // kqd -> Syrc - 1833, 120, // kqy -> Ethi - 1837, 105, // kra -> Deva - 1841, 100, // krc -> Cyrl - 1845, 100, // krk -> Cyrl - 1849, 235, // krr -> Khmr - 1853, 105, // kru -> Deva - 1857, 235, // krv -> Khmr - 1861, 10, // ks -> Arab - 1864, 355, // ksu -> Mymr - 1868, 355, // ksw -> Mymr - 1872, 105, // ksz -> Deva - 1876, 120, // ktb -> Ethi - 1880, 105, // kte -> Deva - 1884, 10, // ktl -> Arab - 1888, 425, // ktp -> Plrd - 1892, 10, // ku_LB -> Arab - 1898, 260, // kuf -> Laoo - 1902, 100, // kum -> Cyrl - 1906, 100, // kv -> Cyrl - 1909, 100, // kva -> Cyrl - 1913, 355, // kvq -> Mymr - 1917, 355, // kvt -> Mymr - 1921, 10, // kvx -> Arab - 1925, 215, // kvy -> Kali - 1929, 355, // kxf -> Mymr - 1933, 355, // kxk -> Mymr - 1937, 550, // kxm -> Thai - 1941, 10, // kxp -> Arab - 1945, 100, // ky -> Cyrl - 1948, 10, // ky_CN -> Arab - 1954, 215, // kyu -> Kali - 1958, 105, // kyv -> Deva - 1962, 105, // kyw -> Deva - 1966, 280, // lab -> Lina - 1970, 185, // lad -> Hebr - 1974, 105, // lae -> Deva - 1978, 10, // lah -> Arab - 1982, 100, // lbe -> Cyrl - 1986, 105, // lbf -> Deva - 1990, 555, // lbj -> Tibt - 1994, 105, // lbm -> Deva - 1998, 260, // lbo -> Laoo - 2002, 105, // lbr -> Deva - 2006, 550, // lcp -> Thai - 2010, 275, // lep -> Lepc - 2014, 100, // lez -> Cyrl - 2018, 105, // lhm -> Deva - 2022, 495, // lhs -> Syrc - 2026, 105, // lif -> Deva - 2030, 290, // lis -> Lisu - 2034, 555, // lkh -> Tibt - 2038, 10, // lki -> Arab - 2042, 105, // lmh -> Deva - 2046, 535, // lmn -> Telu - 2050, 260, // lo -> Laoo - 2053, 105, // loy -> Deva - 2057, 425, // lpo -> Plrd - 2061, 10, // lrc -> Arab - 2065, 10, // lrk -> Arab - 2069, 10, // lrl -> Arab - 2073, 10, // lsa -> Arab - 2077, 185, // lsd -> Hebr - 2081, 10, // lss -> Arab - 2085, 180, // ltc -> Hant - 2089, 555, // luk -> Tibt - 2093, 105, // luu -> Deva - 2097, 10, // luv -> Arab - 2101, 10, // luz -> Arab - 2105, 550, // lwl -> Thai - 2109, 550, // lwm -> Thai - 2113, 555, // lya -> Tibt - 2117, 175, // lzh -> Hans - 2121, 105, // mag -> Deva - 2125, 105, // mai -> Deva - 2129, 370, // man_GN -> Nkoo - 2136, 10, // mby -> Arab - 2140, 10, // mde -> Arab - 2144, 100, // mdf -> Cyrl - 2148, 120, // mdx -> Ethi - 2152, 120, // mdy -> Ethi - 2156, 10, // mfa -> Arab - 2160, 10, // mfi -> Arab - 2164, 270, // mga -> Latg - 2168, 105, // mgp -> Deva - 2172, 10, // mhj -> Arab - 2176, 305, // mid -> Mand - 2180, 105, // mjl -> Deva - 2184, 330, // mjq -> Mlym - 2188, 330, // mjr -> Mlym - 2192, 105, // mjt -> Deva - 2196, 535, // mju -> Telu - 2200, 330, // mjv -> Mlym - 2204, 105, // mjz -> Deva - 2208, 100, // mk -> Cyrl - 2211, 105, // mkb -> Deva - 2215, 105, // mke -> Deva - 2219, 10, // mki -> Arab - 2223, 550, // mkm -> Thai - 2227, 330, // ml -> Mlym - 2230, 550, // mlf -> Thai - 2234, 100, // mn -> Cyrl - 2237, 340, // mn_CN -> Mong - 2243, 340, // mnc -> Mong - 2247, 45, // mni -> Beng - 2251, 10, // mnj -> Arab - 2255, 100, // mns -> Cyrl - 2259, 355, // mnw -> Mymr - 2263, 550, // mpz -> Thai - 2267, 105, // mr -> Deva - 2270, 550, // mra -> Thai - 2274, 105, // mrd -> Deva - 2278, 100, // mrj -> Cyrl - 2282, 345, // mro -> Mroo - 2286, 105, // mrr -> Deva - 2290, 10, // ms_CC -> Arab - 2296, 100, // mtm -> Cyrl - 2300, 105, // mtr -> Deva - 2304, 100, // mud -> Cyrl - 2308, 555, // muk -> Tibt - 2312, 105, // mut -> Deva - 2316, 520, // muv -> Taml - 2320, 120, // muz -> Ethi - 2324, 10, // mve -> Arab - 2328, 340, // mvf -> Mong - 2332, 10, // mvy -> Arab - 2336, 120, // mvz -> Ethi - 2340, 105, // mwr -> Deva - 2344, 355, // mwt -> Mymr - 2348, 195, // mww -> Hmnp - 2352, 355, // my -> Mymr - 2355, 120, // mym -> Ethi - 2359, 100, // myv -> Cyrl - 2363, 305, // myz -> Mand - 2367, 10, // mzn -> Arab - 2371, 175, // nan -> Hans - 2375, 105, // nao -> Deva - 2379, 105, // ncd -> Deva - 2383, 260, // ncq -> Laoo - 2387, 100, // ndf -> Cyrl - 2391, 105, // ne -> Deva - 2394, 100, // neg -> Cyrl - 2398, 555, // neh -> Tibt - 2402, 590, // nei -> Xsux - 2406, 105, // new -> Deva - 2410, 260, // ngt -> Laoo - 2414, 100, // nio -> Cyrl - 2418, 535, // nit -> Telu - 2422, 100, // niv -> Cyrl - 2426, 10, // nli -> Arab - 2430, 10, // nlm -> Arab - 2434, 105, // nlx -> Deva - 2438, 105, // nmm -> Deva - 2442, 580, // nnp -> Wcho - 2446, 255, // nod -> Lana - 2450, 105, // noe -> Deva - 2454, 100, // nog -> Cyrl - 2458, 105, // noi -> Deva - 2462, 445, // non -> Runr - 2466, 595, // nos -> Yiii - 2470, 555, // npb -> Tibt - 2474, 370, // nqo -> Nkoo - 2478, 445, // nrn -> Runr - 2482, 595, // nsd -> Yiii - 2486, 595, // nsf -> Yiii - 2490, 65, // nsk -> Cans - 2494, 560, // nst -> Tnsa - 2498, 595, // nsv -> Yiii - 2502, 595, // nty -> Yiii - 2506, 10, // ntz -> Arab - 2510, 365, // nwc -> Newa - 2514, 105, // nwx -> Deva - 2518, 550, // nyl -> Thai - 2522, 10, // nyq -> Arab - 2526, 550, // nyw -> Thai - 2530, 100, // oaa -> Cyrl - 2534, 100, // oac -> Cyrl - 2538, 495, // oar -> Syrc - 2542, 125, // oav -> Geor - 2546, 420, // obm -> Phnx - 2550, 355, // obr -> Mymr - 2554, 10, // odk -> Arab - 2558, 590, // oht -> Xsux - 2562, 65, // oj -> Cans - 2565, 65, // ojs -> Cans - 2569, 165, // okm -> Hang - 2573, 170, // oko -> Hani - 2577, 235, // okz -> Khmr - 2581, 105, // ola -> Deva - 2585, 555, // ole -> Tibt - 2589, 100, // omk -> Cyrl - 2593, 350, // omp -> Mtei - 2597, 335, // omr -> Modi - 2601, 355, // omx -> Mymr - 2605, 105, // oon -> Deva - 2609, 395, // or -> Orya - 2612, 535, // ort -> Telu - 2616, 10, // oru -> Arab - 2620, 100, // orv -> Cyrl - 2624, 100, // os -> Cyrl - 2627, 400, // osa -> Osge - 2631, 200, // osc -> Ital - 2635, 205, // osi -> Java - 2639, 10, // ota -> Arab - 2643, 555, // otb -> Tibt - 2647, 390, // otk -> Orkh - 2651, 145, // oty -> Gran - 2655, 405, // oui -> Ougr - 2659, 160, // pa -> Guru - 2662, 10, // pa_PK -> Arab - 2668, 415, // pal -> Phli - 2672, 100, // paq -> Cyrl - 2676, 10, // pbt -> Arab - 2680, 235, // pcb -> Khmr - 2684, 355, // pce -> Mymr - 2688, 330, // pcf -> Mlym - 2692, 330, // pcg -> Mlym - 2696, 105, // pch -> Deva - 2700, 105, // pci -> Deva - 2704, 535, // pcj -> Telu - 2708, 395, // peg -> Orya - 2712, 585, // peo -> Xpeo - 2716, 230, // pgd -> Khar - 2720, 105, // pgg -> Deva - 2724, 380, // pgl -> Ogam - 2728, 200, // pgn -> Ital - 2732, 105, // phd -> Deva - 2736, 355, // phk -> Mymr - 2740, 10, // phl -> Arab - 2744, 420, // phn -> Phnx - 2748, 260, // pho -> Laoo - 2752, 10, // phr -> Arab - 2756, 550, // pht -> Thai - 2760, 550, // phu -> Thai - 2764, 10, // phv -> Arab - 2768, 105, // phw -> Deva - 2772, 470, // pi -> Sinh - 2775, 55, // pka -> Brah - 2779, 330, // pkr -> Mlym - 2783, 10, // plk -> Arab - 2787, 355, // pll -> Mymr - 2791, 55, // pmh -> Brah - 2795, 150, // pnt -> Grek - 2799, 230, // pra -> Khar - 2803, 10, // prc -> Arab - 2807, 10, // prd -> Arab - 2811, 550, // prt -> Thai - 2815, 10, // prx -> Arab - 2819, 10, // ps -> Arab - 2822, 10, // psh -> Arab - 2826, 10, // psi -> Arab - 2830, 10, // pst -> Arab - 2834, 55, // psu -> Brah - 2838, 105, // pum -> Deva - 2842, 355, // pwo -> Mymr - 2846, 105, // pwr -> Deva - 2850, 550, // pww -> Thai - 2854, 355, // pyx -> Mymr - 2858, 10, // qxq -> Arab - 2862, 105, // raa -> Deva - 2866, 105, // rab -> Deva - 2870, 105, // raf -> Deva - 2874, 45, // rah -> Beng - 2878, 105, // raj -> Deva - 2882, 105, // rav -> Deva - 2886, 355, // rbb -> Mymr - 2890, 10, // rdb -> Arab - 2894, 395, // rei -> Orya - 2898, 440, // rhg -> Rohg - 2902, 105, // rji -> Deva - 2906, 105, // rjs -> Deva - 2910, 235, // rka -> Khmr - 2914, 355, // rki -> Mymr - 2918, 45, // rkt -> Beng - 2922, 20, // rmi -> Armn - 2926, 10, // rmt -> Arab - 2930, 355, // rmz -> Mymr - 2934, 100, // rsk -> Cyrl - 2938, 105, // rtw -> Deva - 2942, 100, // ru -> Cyrl - 2945, 100, // rue -> Cyrl - 2949, 100, // rut -> Cyrl - 2953, 105, // rwr -> Deva - 2957, 220, // ryu -> Kana - 2961, 105, // sa -> Deva - 2964, 100, // sah -> Cyrl - 2968, 450, // sam -> Samr - 2972, 385, // sat -> Olck - 2976, 460, // saz -> Saur - 2980, 10, // sbn -> Arab - 2984, 555, // sbu -> Tibt - 2988, 105, // sck -> Deva - 2992, 10, // scl -> Arab - 2996, 105, // scp -> Deva - 3000, 260, // sct -> Laoo - 3004, 505, // scu -> Takr - 3008, 150, // scx -> Grek - 3012, 10, // sd -> Arab - 3015, 105, // sd_IN -> Deva - 3021, 10, // sdb -> Arab - 3025, 10, // sdf -> Arab - 3029, 10, // sdg -> Arab - 3033, 10, // sdh -> Arab - 3037, 45, // sdr -> Beng - 3041, 10, // sds -> Arab - 3045, 100, // sel -> Cyrl - 3049, 425, // sfm -> Plrd - 3053, 380, // sga -> Ogam - 3057, 100, // sgh -> Cyrl - 3061, 105, // sgj -> Deva - 3065, 10, // sgr -> Arab - 3069, 555, // sgt -> Tibt - 3073, 120, // sgw -> Ethi - 3077, 10, // sgy -> Arab - 3081, 10, // shd -> Arab - 3085, 540, // shi -> Tfng - 3089, 10, // shm -> Arab - 3093, 355, // shn -> Mymr - 3097, 10, // shu -> Arab - 3101, 10, // shv -> Arab - 3105, 470, // si -> Sinh - 3108, 100, // sia -> Cyrl - 3112, 555, // sip -> Tibt - 3116, 10, // siy -> Arab - 3120, 10, // siz -> Arab - 3124, 100, // sjd -> Cyrl - 3128, 105, // sjp -> Deva - 3132, 100, // sjt -> Cyrl - 3136, 550, // skb -> Thai - 3140, 105, // skj -> Deva - 3144, 10, // skr -> Arab - 3148, 595, // smh -> Yiii - 3152, 450, // smp -> Samr - 3156, 235, // smu -> Khmr - 3160, 10, // smy -> Arab - 3164, 530, // soa -> Tavt - 3168, 475, // sog -> Sogd - 3172, 105, // soi -> Deva - 3176, 550, // sou -> Thai - 3180, 555, // spt -> Tibt - 3184, 395, // spv -> Orya - 3188, 10, // sqo -> Arab - 3192, 260, // sqq -> Laoo - 3196, 10, // sqt -> Arab - 3200, 100, // sr -> Cyrl - 3203, 480, // srb -> Sora - 3207, 10, // srh -> Arab - 3211, 105, // srx -> Deva - 3215, 10, // srz -> Arab - 3219, 10, // ssh -> Arab - 3223, 260, // sss -> Laoo - 3227, 10, // sts -> Arab - 3231, 120, // stv -> Ethi - 3235, 100, // sty -> Cyrl - 3239, 490, // suz -> Sunu - 3243, 125, // sva -> Geor - 3247, 10, // swb -> Arab - 3251, 170, // swi -> Hani - 3255, 105, // swv -> Deva - 3259, 445, // sxu -> Runr - 3263, 495, // syc -> Syrc - 3267, 45, // syl -> Beng - 3271, 495, // syn -> Syrc - 3275, 495, // syr -> Syrc - 3279, 105, // syw -> Deva - 3283, 520, // ta -> Taml - 3286, 100, // tab -> Cyrl - 3290, 105, // taj -> Deva - 3294, 500, // tbk -> Tagb - 3298, 555, // tcn -> Tibt - 3302, 355, // tco -> Mymr - 3306, 520, // tcx -> Taml - 3310, 245, // tcy -> Knda - 3314, 540, // tda -> Tfng - 3318, 105, // tdb -> Deva - 3322, 510, // tdd -> Tale - 3326, 105, // tdg -> Deva - 3330, 105, // tdh -> Deva - 3334, 535, // te -> Telu - 3337, 205, // tes -> Java - 3341, 100, // tg -> Cyrl - 3344, 10, // tg_PK -> Arab - 3350, 105, // tge -> Deva - 3354, 555, // tgf -> Tibt - 3358, 550, // th -> Thai - 3361, 105, // the -> Deva - 3365, 105, // thf -> Deva - 3369, 510, // thi -> Tale - 3373, 105, // thl -> Deva - 3377, 550, // thm -> Thai - 3381, 105, // thq -> Deva - 3385, 105, // thr -> Deva - 3389, 105, // ths -> Deva - 3393, 120, // ti -> Ethi - 3396, 120, // tig -> Ethi - 3400, 105, // tij -> Deva - 3404, 100, // tin -> Cyrl - 3408, 355, // tjl -> Mymr - 3412, 10, // tjo -> Arab - 3416, 105, // tkb -> Deva - 3420, 10, // tks -> Arab - 3424, 105, // tkt -> Deva - 3428, 495, // tmr -> Syrc - 3432, 60, // tnv -> Cakm - 3436, 10, // tov -> Arab - 3440, 235, // tpu -> Khmr - 3444, 10, // tra -> Arab - 3448, 185, // trg -> Hebr - 3452, 10, // trm -> Arab - 3456, 10, // trw -> Arab - 3460, 150, // tsd -> Grek - 3464, 555, // tsj -> Tibt - 3468, 100, // tt -> Cyrl - 3471, 260, // tth -> Laoo - 3475, 260, // tto -> Laoo - 3479, 550, // tts -> Thai - 3483, 105, // ttz -> Deva - 3487, 355, // tvn -> Mymr - 3491, 105, // twm -> Deva - 3495, 525, // txg -> Tang - 3499, 565, // txo -> Toto - 3503, 530, // tyr -> Tavt - 3507, 100, // tyv -> Cyrl - 3511, 100, // ude -> Cyrl - 3515, 330, // udg -> Mlym - 3519, 100, // udi -> Cyrl - 3523, 100, // udm -> Cyrl - 3527, 10, // ug -> Arab - 3530, 100, // ug_KZ -> Cyrl - 3536, 100, // ug_MN -> Cyrl - 3542, 570, // uga -> Ugar - 3546, 100, // ugh -> Cyrl - 3550, 550, // ugo -> Thai - 3554, 100, // uk -> Cyrl - 3557, 395, // uki -> Orya - 3561, 100, // ulc -> Cyrl - 3565, 45, // unr -> Beng - 3569, 105, // unr_NP -> Deva - 3576, 45, // unx -> Beng - 3580, 10, // ur -> Arab - 3583, 550, // urk -> Thai - 3587, 10, // ush -> Arab - 3591, 150, // uum -> Grek - 3595, 10, // uz_AF -> Arab - 3601, 100, // uz_CN -> Cyrl - 3607, 10, // uzs -> Arab - 3611, 520, // vaa -> Taml - 3615, 10, // vaf -> Arab - 3619, 105, // vah -> Deva - 3623, 575, // vai -> Vaii - 3627, 105, // vas -> Deva - 3631, 105, // vav -> Deva - 3635, 105, // vay -> Deva - 3639, 10, // vgr -> Arab - 3643, 105, // vjk -> Deva - 3647, 245, // vmd -> Knda - 3651, 10, // vmh -> Arab - 3655, 120, // wal -> Ethi - 3659, 10, // wbk -> Arab - 3663, 535, // wbq -> Telu - 3667, 105, // wbr -> Deva - 3671, 120, // wle -> Ethi - 3675, 10, // wlo -> Arab - 3679, 105, // wme -> Deva - 3683, 10, // wne -> Arab - 3687, 10, // wni -> Arab - 3691, 130, // wsg -> Gong - 3695, 10, // wsv -> Arab - 3699, 105, // wtm -> Deva - 3703, 175, // wuu -> Hans - 3707, 0, // xag -> Aghb - 3711, 100, // xal -> Cyrl - 3715, 120, // xan -> Ethi - 3719, 100, // xas -> Cyrl - 3723, 85, // xco -> Chrs - 3727, 70, // xcr -> Cari - 3731, 100, // xdq -> Cyrl - 3735, 10, // xhe -> Arab - 3739, 235, // xhm -> Khmr - 3743, 395, // xis -> Orya - 3747, 10, // xka -> Arab - 3751, 10, // xkc -> Arab - 3755, 555, // xkf -> Tibt - 3759, 10, // xkj -> Arab - 3763, 10, // xkp -> Arab - 3767, 295, // xlc -> Lyci - 3771, 300, // xld -> Lydi - 3775, 115, // xly -> Elym - 3779, 125, // xmf -> Geor - 3783, 310, // xmn -> Mani - 3787, 325, // xmr -> Merc - 3791, 360, // xna -> Narb - 3795, 105, // xnr -> Deva - 3799, 150, // xpg -> Grek - 3803, 380, // xpi -> Ogam - 3807, 100, // xpm -> Cyrl - 3811, 430, // xpr -> Prti - 3815, 100, // xrm -> Cyrl - 3819, 100, // xrn -> Cyrl - 3823, 455, // xsa -> Sarb - 3827, 105, // xsr -> Deva - 3831, 55, // xtq -> Brah - 3835, 520, // xub -> Taml - 3839, 520, // xuj -> Taml - 3843, 200, // xve -> Ital - 3847, 10, // xvi -> Arab - 3851, 100, // xwo -> Cyrl - 3855, 315, // xzh -> Marc - 3859, 100, // yai -> Cyrl - 3863, 105, // ybh -> Deva - 3867, 105, // ybi -> Deva - 3871, 10, // ydg -> Arab - 3875, 330, // yea -> Mlym - 3879, 150, // yej -> Grek - 3883, 535, // yeu -> Telu - 3887, 425, // ygp -> Plrd - 3891, 185, // yhd -> Hebr - 3895, 185, // yi -> Hebr - 3898, 595, // yig -> Yiii - 3902, 185, // yih -> Hebr - 3906, 595, // yiv -> Yiii - 3910, 100, // ykg -> Cyrl - 3914, 100, // ykh -> Cyrl - 3918, 425, // yna -> Plrd - 3922, 100, // ynk -> Cyrl - 3926, 210, // yoi -> Jpan - 3930, 550, // yoy -> Thai - 3934, 100, // yrk -> Cyrl - 3938, 595, // ysd -> Yiii - 3942, 595, // ysn -> Yiii - 3946, 595, // ysp -> Yiii - 3950, 100, // ysr -> Cyrl - 3954, 425, // ysy -> Plrd - 3958, 185, // yud -> Hebr - 3962, 180, // yue -> Hant - 3966, 175, // yue_CN -> Hans - 3973, 100, // yug -> Cyrl - 3977, 100, // yux -> Cyrl - 3981, 425, // ywq -> Plrd - 3985, 425, // ywu -> Plrd - 3989, 555, // zau -> Tibt - 3993, 10, // zba -> Arab - 3997, 170, // zch -> Hani - 4001, 10, // zdj -> Arab - 4005, 170, // zeh -> Hani - 4009, 540, // zen -> Tfng - 4013, 170, // zgb -> Hani - 4017, 540, // zgh -> Tfng - 4021, 170, // zgm -> Hani - 4025, 170, // zgn -> Hani - 4029, 175, // zh -> Hans - 4032, 180, // zh_AU -> Hant - 4038, 180, // zh_BN -> Hant - 4044, 180, // zh_GB -> Hant - 4050, 180, // zh_GF -> Hant - 4056, 180, // zh_HK -> Hant - 4062, 180, // zh_ID -> Hant - 4068, 180, // zh_MO -> Hant - 4074, 180, // zh_PA -> Hant - 4080, 180, // zh_PF -> Hant - 4086, 180, // zh_PH -> Hant - 4092, 180, // zh_SR -> Hant - 4098, 180, // zh_TH -> Hant - 4104, 180, // zh_TW -> Hant - 4110, 180, // zh_US -> Hant - 4116, 180, // zh_VN -> Hant - 4122, 170, // zhd -> Hani - 4126, 375, // zhx -> Nshu - 4130, 100, // zko -> Cyrl - 4134, 240, // zkt -> Kits - 4138, 100, // zkz -> Cyrl - 4142, 170, // zlj -> Hani - 4146, 170, // zln -> Hani - 4150, 170, // zlq -> Hani - 4154, 170, // zqe -> Hani - 4158, 395, // zrg -> Orya - 4162, 185, // zrp -> Hebr - 4166, 10, // zum -> Arab - 4170, 120, // zwa -> Ethi - 4174, 170, // zyg -> Hani - 4178, 170, // zyn -> Hani - 4182, 170, // zzj -> Hani + 1238, 180, // hak -> Hans + 1242, 185, // hak_TW -> Hant + 1249, 125, // har -> Ethi + 1253, 10, // haz -> Arab + 1257, 190, // hbo -> Hebr + 1261, 125, // hdy -> Ethi + 1265, 190, // he -> Hebr + 1268, 110, // hi -> Deva + 1271, 110, // hif -> Deva + 1275, 505, // hii -> Takr + 1279, 590, // hit -> Xsux + 1283, 10, // hkh -> Arab + 1287, 110, // hlb -> Deva + 1291, 195, // hlu -> Hluw + 1295, 425, // hmd -> Plrd + 1299, 55, // hmj -> Bopo + 1303, 55, // hmq -> Bopo + 1307, 10, // hnd -> Arab + 1311, 110, // hne -> Deva + 1315, 200, // hnj -> Hmnp + 1319, 10, // hno -> Arab + 1323, 110, // hoc -> Deva + 1327, 10, // hoh -> Arab + 1331, 110, // hoj -> Deva + 1335, 175, // how -> Hani + 1339, 110, // hoy -> Deva + 1343, 355, // hpo -> Mymr + 1347, 495, // hrt -> Syrc + 1351, 10, // hrz -> Arab + 1355, 180, // hsn -> Hans + 1359, 10, // hss -> Arab + 1363, 590, // htx -> Xsux + 1367, 110, // hut -> Deva + 1371, 190, // huy -> Hebr + 1375, 105, // huz -> Cyrl + 1379, 20, // hy -> Armn + 1382, 20, // hyw -> Armn + 1386, 595, // ii -> Yiii + 1389, 295, // imy -> Lyci + 1393, 105, // inh -> Cyrl + 1397, 355, // int -> Mymr + 1401, 125, // ior -> Ethi + 1405, 520, // iru -> Taml + 1409, 10, // isk -> Arab + 1413, 190, // itk -> Hebr + 1417, 105, // itl -> Cyrl + 1421, 70, // iu -> Cans + 1424, 190, // iw -> Hebr + 1427, 215, // ja -> Jpan + 1430, 10, // jad -> Arab + 1434, 10, // jat -> Arab + 1438, 190, // jbe -> Hebr + 1442, 10, // jbn -> Arab + 1446, 105, // jct -> Cyrl + 1450, 555, // jda -> Tibt + 1454, 10, // jdg -> Arab + 1458, 105, // jdt -> Cyrl + 1462, 110, // jee -> Deva + 1466, 130, // jge -> Geor + 1470, 190, // ji -> Hebr + 1473, 170, // jje -> Hang + 1477, 355, // jkm -> Mymr + 1481, 110, // jml -> Deva + 1485, 505, // jna -> Takr + 1489, 10, // jnd -> Arab + 1493, 110, // jnl -> Deva + 1497, 110, // jns -> Deva + 1501, 10, // jog -> Arab + 1505, 190, // jpa -> Hebr + 1509, 190, // jpr -> Hebr + 1513, 190, // jrb -> Hebr + 1517, 110, // jul -> Deva + 1521, 395, // jun -> Orya + 1525, 395, // juy -> Orya + 1529, 555, // jya -> Tibt + 1533, 190, // jye -> Hebr + 1537, 130, // ka -> Geor + 1540, 105, // kaa -> Cyrl + 1544, 105, // kap -> Cyrl + 1548, 30, // kaw -> Bali + 1552, 105, // kbd -> Cyrl + 1556, 555, // kbg -> Tibt + 1560, 10, // kbu -> Arab + 1564, 10, // kby -> Arab + 1568, 105, // kca -> Cyrl + 1572, 10, // kcy -> Arab + 1576, 50, // kdq -> Beng + 1580, 550, // kdt -> Thai + 1584, 105, // ket -> Cyrl + 1588, 330, // kev -> Mlym + 1592, 110, // kex -> Deva + 1596, 535, // key -> Telu + 1600, 245, // kfa -> Knda + 1604, 110, // kfb -> Deva + 1608, 535, // kfc -> Telu + 1612, 245, // kfd -> Knda + 1616, 520, // kfe -> Taml + 1620, 245, // kfg -> Knda + 1624, 330, // kfh -> Mlym + 1628, 520, // kfi -> Taml + 1632, 110, // kfk -> Deva + 1636, 10, // kfm -> Arab + 1640, 110, // kfp -> Deva + 1644, 110, // kfq -> Deva + 1648, 110, // kfr -> Deva + 1652, 110, // kfs -> Deva + 1656, 110, // kfu -> Deva + 1660, 110, // kfx -> Deva + 1664, 110, // kfy -> Deva + 1668, 110, // kgj -> Deva + 1672, 110, // kgy -> Deva + 1676, 515, // khb -> Talu + 1680, 550, // khf -> Thai + 1684, 555, // khg -> Tibt + 1688, 110, // khn -> Deva + 1692, 60, // kho -> Brah + 1696, 355, // kht -> Mymr + 1700, 105, // khv -> Cyrl + 1704, 10, // khw -> Arab + 1708, 110, // kif -> Deva + 1712, 105, // kim -> Cyrl + 1716, 110, // kip -> Deva + 1720, 260, // kjg -> Laoo + 1724, 105, // kjh -> Cyrl + 1728, 110, // kjl -> Deva + 1732, 110, // kjo -> Deva + 1736, 355, // kjp -> Mymr + 1740, 550, // kjt -> Thai + 1744, 555, // kjz -> Tibt + 1748, 105, // kk -> Cyrl + 1751, 10, // kk_AF -> Arab + 1757, 10, // kk_CN -> Arab + 1763, 10, // kk_IR -> Arab + 1769, 10, // kk_MN -> Arab + 1775, 555, // kkf -> Tibt + 1779, 255, // kkh -> Lana + 1783, 110, // kkt -> Deva + 1787, 110, // kle -> Deva + 1791, 10, // klj -> Arab + 1795, 110, // klr -> Deva + 1799, 235, // km -> Khmr + 1802, 110, // kmj -> Deva + 1806, 10, // kmz -> Arab + 1810, 245, // kn -> Knda + 1813, 110, // knn -> Deva + 1817, 250, // ko -> Kore + 1820, 105, // koi -> Cyrl + 1824, 110, // kok -> Deva + 1828, 105, // kpt -> Cyrl + 1832, 105, // kpy -> Cyrl + 1836, 495, // kqd -> Syrc + 1840, 125, // kqy -> Ethi + 1844, 110, // kra -> Deva + 1848, 105, // krc -> Cyrl + 1852, 105, // krk -> Cyrl + 1856, 235, // krr -> Khmr + 1860, 110, // kru -> Deva + 1864, 235, // krv -> Khmr + 1868, 10, // ks -> Arab + 1871, 355, // ksu -> Mymr + 1875, 355, // ksw -> Mymr + 1879, 110, // ksz -> Deva + 1883, 125, // ktb -> Ethi + 1887, 110, // kte -> Deva + 1891, 10, // ktl -> Arab + 1895, 425, // ktp -> Plrd + 1899, 10, // ku_LB -> Arab + 1905, 260, // kuf -> Laoo + 1909, 105, // kum -> Cyrl + 1913, 105, // kv -> Cyrl + 1916, 105, // kva -> Cyrl + 1920, 355, // kvq -> Mymr + 1924, 355, // kvt -> Mymr + 1928, 10, // kvx -> Arab + 1932, 220, // kvy -> Kali + 1936, 355, // kxf -> Mymr + 1940, 355, // kxk -> Mymr + 1944, 550, // kxm -> Thai + 1948, 10, // kxp -> Arab + 1952, 105, // ky -> Cyrl + 1955, 10, // ky_CN -> Arab + 1961, 220, // kyu -> Kali + 1965, 110, // kyv -> Deva + 1969, 110, // kyw -> Deva + 1973, 280, // lab -> Lina + 1977, 190, // lad -> Hebr + 1981, 110, // lae -> Deva + 1985, 10, // lah -> Arab + 1989, 105, // lbe -> Cyrl + 1993, 110, // lbf -> Deva + 1997, 555, // lbj -> Tibt + 2001, 110, // lbm -> Deva + 2005, 260, // lbo -> Laoo + 2009, 110, // lbr -> Deva + 2013, 550, // lcp -> Thai + 2017, 275, // lep -> Lepc + 2021, 105, // lez -> Cyrl + 2025, 110, // lhm -> Deva + 2029, 495, // lhs -> Syrc + 2033, 110, // lif -> Deva + 2037, 290, // lis -> Lisu + 2041, 555, // lkh -> Tibt + 2045, 10, // lki -> Arab + 2049, 110, // lmh -> Deva + 2053, 535, // lmn -> Telu + 2057, 260, // lo -> Laoo + 2060, 110, // loy -> Deva + 2064, 425, // lpo -> Plrd + 2068, 10, // lrc -> Arab + 2072, 10, // lrk -> Arab + 2076, 10, // lrl -> Arab + 2080, 10, // lsa -> Arab + 2084, 190, // lsd -> Hebr + 2088, 10, // lss -> Arab + 2092, 185, // ltc -> Hant + 2096, 555, // luk -> Tibt + 2100, 110, // luu -> Deva + 2104, 10, // luv -> Arab + 2108, 10, // luz -> Arab + 2112, 550, // lwl -> Thai + 2116, 550, // lwm -> Thai + 2120, 555, // lya -> Tibt + 2124, 180, // lzh -> Hans + 2128, 130, // lzz_GE -> Geor + 2135, 110, // mag -> Deva + 2139, 110, // mai -> Deva + 2143, 10, // mby -> Arab + 2147, 10, // mde -> Arab + 2151, 105, // mdf -> Cyrl + 2155, 125, // mdx -> Ethi + 2159, 125, // mdy -> Ethi + 2163, 10, // mfa -> Arab + 2167, 10, // mfi -> Arab + 2171, 270, // mga -> Latg + 2175, 110, // mgp -> Deva + 2179, 10, // mhj -> Arab + 2183, 305, // mid -> Mand + 2187, 110, // mjl -> Deva + 2191, 330, // mjq -> Mlym + 2195, 330, // mjr -> Mlym + 2199, 110, // mjt -> Deva + 2203, 535, // mju -> Telu + 2207, 330, // mjv -> Mlym + 2211, 110, // mjz -> Deva + 2215, 105, // mk -> Cyrl + 2218, 110, // mkb -> Deva + 2222, 110, // mke -> Deva + 2226, 10, // mki -> Arab + 2230, 550, // mkm -> Thai + 2234, 330, // ml -> Mlym + 2237, 550, // mlf -> Thai + 2241, 105, // mn -> Cyrl + 2244, 340, // mn_CN -> Mong + 2250, 340, // mnc -> Mong + 2254, 50, // mni -> Beng + 2258, 10, // mnj -> Arab + 2262, 105, // mns -> Cyrl + 2266, 355, // mnw -> Mymr + 2270, 550, // mpz -> Thai + 2274, 110, // mr -> Deva + 2277, 550, // mra -> Thai + 2281, 110, // mrd -> Deva + 2285, 105, // mrj -> Cyrl + 2289, 345, // mro -> Mroo + 2293, 110, // mrr -> Deva + 2297, 10, // ms_CC -> Arab + 2303, 105, // mtm -> Cyrl + 2307, 110, // mtr -> Deva + 2311, 105, // mud -> Cyrl + 2315, 555, // muk -> Tibt + 2319, 110, // mut -> Deva + 2323, 520, // muv -> Taml + 2327, 125, // muz -> Ethi + 2331, 10, // mve -> Arab + 2335, 340, // mvf -> Mong + 2339, 10, // mvy -> Arab + 2343, 125, // mvz -> Ethi + 2347, 110, // mwr -> Deva + 2351, 355, // mwt -> Mymr + 2355, 200, // mww -> Hmnp + 2359, 355, // my -> Mymr + 2362, 125, // mym -> Ethi + 2366, 105, // myv -> Cyrl + 2370, 305, // myz -> Mand + 2374, 10, // mzn -> Arab + 2378, 180, // nan -> Hans + 2382, 185, // nan_TW -> Hant + 2389, 110, // nao -> Deva + 2393, 110, // ncd -> Deva + 2397, 260, // ncq -> Laoo + 2401, 105, // ndf -> Cyrl + 2405, 110, // ne -> Deva + 2408, 105, // neg -> Cyrl + 2412, 555, // neh -> Tibt + 2416, 590, // nei -> Xsux + 2420, 110, // new -> Deva + 2424, 260, // ngt -> Laoo + 2428, 105, // nio -> Cyrl + 2432, 535, // nit -> Telu + 2436, 105, // niv -> Cyrl + 2440, 10, // nli -> Arab + 2444, 10, // nlm -> Arab + 2448, 110, // nlx -> Deva + 2452, 110, // nmm -> Deva + 2456, 580, // nnp -> Wcho + 2460, 255, // nod -> Lana + 2464, 110, // noe -> Deva + 2468, 105, // nog -> Cyrl + 2472, 110, // noi -> Deva + 2476, 445, // non -> Runr + 2480, 595, // nos -> Yiii + 2484, 555, // npb -> Tibt + 2488, 370, // nqo -> Nkoo + 2492, 445, // nrn -> Runr + 2496, 595, // nsd -> Yiii + 2500, 595, // nsf -> Yiii + 2504, 70, // nsk -> Cans + 2508, 560, // nst -> Tnsa + 2512, 595, // nsv -> Yiii + 2516, 595, // nty -> Yiii + 2520, 10, // ntz -> Arab + 2524, 365, // nwc -> Newa + 2528, 110, // nwx -> Deva + 2532, 550, // nyl -> Thai + 2536, 10, // nyq -> Arab + 2540, 550, // nyw -> Thai + 2544, 105, // oaa -> Cyrl + 2548, 105, // oac -> Cyrl + 2552, 495, // oar -> Syrc + 2556, 130, // oav -> Geor + 2560, 420, // obm -> Phnx + 2564, 355, // obr -> Mymr + 2568, 10, // odk -> Arab + 2572, 590, // oht -> Xsux + 2576, 70, // oj -> Cans + 2579, 70, // ojs -> Cans + 2583, 170, // okm -> Hang + 2587, 175, // oko -> Hani + 2591, 235, // okz -> Khmr + 2595, 110, // ola -> Deva + 2599, 555, // ole -> Tibt + 2603, 105, // omk -> Cyrl + 2607, 350, // omp -> Mtei + 2611, 335, // omr -> Modi + 2615, 355, // omx -> Mymr + 2619, 110, // oon -> Deva + 2623, 395, // or -> Orya + 2626, 535, // ort -> Telu + 2630, 10, // oru -> Arab + 2634, 105, // orv -> Cyrl + 2638, 105, // os -> Cyrl + 2641, 400, // osa -> Osge + 2645, 205, // osc -> Ital + 2649, 210, // osi -> Java + 2653, 10, // ota -> Arab + 2657, 555, // otb -> Tibt + 2661, 390, // otk -> Orkh + 2665, 150, // oty -> Gran + 2669, 405, // oui -> Ougr + 2673, 165, // pa -> Guru + 2676, 10, // pa_PK -> Arab + 2682, 415, // pal -> Phli + 2686, 105, // paq -> Cyrl + 2690, 10, // pbt -> Arab + 2694, 235, // pcb -> Khmr + 2698, 355, // pce -> Mymr + 2702, 330, // pcf -> Mlym + 2706, 330, // pcg -> Mlym + 2710, 110, // pch -> Deva + 2714, 110, // pci -> Deva + 2718, 535, // pcj -> Telu + 2722, 395, // peg -> Orya + 2726, 585, // peo -> Xpeo + 2730, 230, // pgd -> Khar + 2734, 110, // pgg -> Deva + 2738, 380, // pgl -> Ogam + 2742, 205, // pgn -> Ital + 2746, 110, // phd -> Deva + 2750, 355, // phk -> Mymr + 2754, 10, // phl -> Arab + 2758, 420, // phn -> Phnx + 2762, 260, // pho -> Laoo + 2766, 10, // phr -> Arab + 2770, 550, // pht -> Thai + 2774, 550, // phu -> Thai + 2778, 10, // phv -> Arab + 2782, 110, // phw -> Deva + 2786, 470, // pi -> Sinh + 2789, 60, // pka -> Brah + 2793, 330, // pkr -> Mlym + 2797, 10, // plk -> Arab + 2801, 355, // pll -> Mymr + 2805, 60, // pmh -> Brah + 2809, 155, // pnt -> Grek + 2813, 105, // pnt_RU -> Cyrl + 2820, 230, // pra -> Khar + 2824, 10, // prc -> Arab + 2828, 10, // prd -> Arab + 2832, 550, // prt -> Thai + 2836, 10, // prx -> Arab + 2840, 10, // ps -> Arab + 2843, 10, // psh -> Arab + 2847, 10, // psi -> Arab + 2851, 10, // pst -> Arab + 2855, 60, // psu -> Brah + 2859, 110, // pum -> Deva + 2863, 355, // pwo -> Mymr + 2867, 110, // pwr -> Deva + 2871, 550, // pww -> Thai + 2875, 355, // pyx -> Mymr + 2879, 10, // qxq -> Arab + 2883, 110, // raa -> Deva + 2887, 110, // rab -> Deva + 2891, 110, // raf -> Deva + 2895, 50, // rah -> Beng + 2899, 110, // raj -> Deva + 2903, 110, // rav -> Deva + 2907, 355, // rbb -> Mymr + 2911, 10, // rdb -> Arab + 2915, 395, // rei -> Orya + 2919, 440, // rhg -> Rohg + 2923, 110, // rji -> Deva + 2927, 110, // rjs -> Deva + 2931, 235, // rka -> Khmr + 2935, 355, // rki -> Mymr + 2939, 50, // rkt -> Beng + 2943, 20, // rmi -> Armn + 2947, 10, // rmt -> Arab + 2951, 355, // rmz -> Mymr + 2955, 105, // rsk -> Cyrl + 2959, 110, // rtw -> Deva + 2963, 105, // ru -> Cyrl + 2966, 105, // rue -> Cyrl + 2970, 105, // rut -> Cyrl + 2974, 110, // rwr -> Deva + 2978, 225, // ryu -> Kana + 2982, 110, // sa -> Deva + 2985, 105, // sah -> Cyrl + 2989, 450, // sam -> Samr + 2993, 385, // sat -> Olck + 2997, 460, // saz -> Saur + 3001, 10, // sbn -> Arab + 3005, 555, // sbu -> Tibt + 3009, 110, // sck -> Deva + 3013, 10, // scl -> Arab + 3017, 110, // scp -> Deva + 3021, 260, // sct -> Laoo + 3025, 505, // scu -> Takr + 3029, 155, // scx -> Grek + 3033, 10, // sd -> Arab + 3036, 110, // sd_IN -> Deva + 3042, 10, // sdb -> Arab + 3046, 10, // sdf -> Arab + 3050, 10, // sdg -> Arab + 3054, 10, // sdh -> Arab + 3058, 50, // sdr -> Beng + 3062, 10, // sds -> Arab + 3066, 105, // sel -> Cyrl + 3070, 425, // sfm -> Plrd + 3074, 105, // sgh -> Cyrl + 3078, 110, // sgj -> Deva + 3082, 10, // sgr -> Arab + 3086, 555, // sgt -> Tibt + 3090, 125, // sgw -> Ethi + 3094, 10, // sgy -> Arab + 3098, 10, // shd -> Arab + 3102, 540, // shi -> Tfng + 3106, 10, // shm -> Arab + 3110, 355, // shn -> Mymr + 3114, 10, // shu -> Arab + 3118, 10, // shv -> Arab + 3122, 470, // si -> Sinh + 3125, 105, // sia -> Cyrl + 3129, 555, // sip -> Tibt + 3133, 10, // siy -> Arab + 3137, 10, // siz -> Arab + 3141, 105, // sjd -> Cyrl + 3145, 110, // sjp -> Deva + 3149, 105, // sjt -> Cyrl + 3153, 550, // skb -> Thai + 3157, 110, // skj -> Deva + 3161, 10, // skr -> Arab + 3165, 595, // smh -> Yiii + 3169, 450, // smp -> Samr + 3173, 235, // smu -> Khmr + 3177, 10, // smy -> Arab + 3181, 530, // soa -> Tavt + 3185, 475, // sog -> Sogd + 3189, 110, // soi -> Deva + 3193, 550, // sou -> Thai + 3197, 555, // spt -> Tibt + 3201, 395, // spv -> Orya + 3205, 10, // sqo -> Arab + 3209, 260, // sqq -> Laoo + 3213, 10, // sqt -> Arab + 3217, 105, // sr -> Cyrl + 3220, 480, // srb -> Sora + 3224, 10, // srh -> Arab + 3228, 110, // srx -> Deva + 3232, 10, // srz -> Arab + 3236, 10, // ssh -> Arab + 3240, 260, // sss -> Laoo + 3244, 10, // sts -> Arab + 3248, 125, // stv -> Ethi + 3252, 105, // sty -> Cyrl + 3256, 490, // suz -> Sunu + 3260, 130, // sva -> Geor + 3264, 10, // swb -> Arab + 3268, 175, // swi -> Hani + 3272, 110, // swv -> Deva + 3276, 445, // sxu -> Runr + 3280, 495, // syc -> Syrc + 3284, 50, // syl -> Beng + 3288, 495, // syn -> Syrc + 3292, 495, // syr -> Syrc + 3296, 110, // syw -> Deva + 3300, 520, // ta -> Taml + 3303, 105, // tab -> Cyrl + 3307, 110, // taj -> Deva + 3311, 500, // tbk -> Tagb + 3315, 555, // tcn -> Tibt + 3319, 355, // tco -> Mymr + 3323, 520, // tcx -> Taml + 3327, 245, // tcy -> Knda + 3331, 540, // tda -> Tfng + 3335, 110, // tdb -> Deva + 3339, 510, // tdd -> Tale + 3343, 110, // tdg -> Deva + 3347, 110, // tdh -> Deva + 3351, 535, // te -> Telu + 3354, 210, // tes -> Java + 3358, 105, // tg -> Cyrl + 3361, 10, // tg_PK -> Arab + 3367, 110, // tge -> Deva + 3371, 555, // tgf -> Tibt + 3375, 550, // th -> Thai + 3378, 110, // the -> Deva + 3382, 110, // thf -> Deva + 3386, 510, // thi -> Tale + 3390, 110, // thl -> Deva + 3394, 550, // thm -> Thai + 3398, 110, // thq -> Deva + 3402, 110, // thr -> Deva + 3406, 110, // ths -> Deva + 3410, 125, // ti -> Ethi + 3413, 125, // tig -> Ethi + 3417, 110, // tij -> Deva + 3421, 105, // tin -> Cyrl + 3425, 355, // tjl -> Mymr + 3429, 10, // tjo -> Arab + 3433, 110, // tkb -> Deva + 3437, 10, // tks -> Arab + 3441, 110, // tkt -> Deva + 3445, 495, // tmr -> Syrc + 3449, 65, // tnv -> Cakm + 3453, 10, // tov -> Arab + 3457, 235, // tpu -> Khmr + 3461, 10, // tra -> Arab + 3465, 190, // trg -> Hebr + 3469, 10, // trm -> Arab + 3473, 10, // trw -> Arab + 3477, 155, // tsd -> Grek + 3481, 555, // tsj -> Tibt + 3485, 105, // tt -> Cyrl + 3488, 260, // tth -> Laoo + 3492, 260, // tto -> Laoo + 3496, 550, // tts -> Thai + 3500, 110, // ttz -> Deva + 3504, 355, // tvn -> Mymr + 3508, 110, // twm -> Deva + 3512, 525, // txg -> Tang + 3516, 565, // txo -> Toto + 3520, 530, // tyr -> Tavt + 3524, 105, // tyv -> Cyrl + 3528, 105, // ude -> Cyrl + 3532, 330, // udg -> Mlym + 3536, 105, // udi -> Cyrl + 3540, 105, // udm -> Cyrl + 3544, 10, // ug -> Arab + 3547, 105, // ug_KZ -> Cyrl + 3553, 105, // ug_MN -> Cyrl + 3559, 570, // uga -> Ugar + 3563, 105, // ugh -> Cyrl + 3567, 550, // ugo -> Thai + 3571, 105, // uk -> Cyrl + 3574, 395, // uki -> Orya + 3578, 105, // ulc -> Cyrl + 3582, 50, // unr -> Beng + 3586, 110, // unr_NP -> Deva + 3593, 50, // unx -> Beng + 3597, 10, // ur -> Arab + 3600, 550, // urk -> Thai + 3604, 10, // ush -> Arab + 3608, 155, // uum -> Grek + 3612, 10, // uz_AF -> Arab + 3618, 105, // uz_CN -> Cyrl + 3624, 10, // uzs -> Arab + 3628, 520, // vaa -> Taml + 3632, 10, // vaf -> Arab + 3636, 110, // vah -> Deva + 3640, 575, // vai -> Vaii + 3644, 110, // vas -> Deva + 3648, 110, // vav -> Deva + 3652, 110, // vay -> Deva + 3656, 10, // vgr -> Arab + 3660, 110, // vjk -> Deva + 3664, 245, // vmd -> Knda + 3668, 10, // vmh -> Arab + 3672, 125, // wal -> Ethi + 3676, 10, // wbk -> Arab + 3680, 535, // wbq -> Telu + 3684, 110, // wbr -> Deva + 3688, 125, // wle -> Ethi + 3692, 10, // wlo -> Arab + 3696, 110, // wme -> Deva + 3700, 10, // wne -> Arab + 3704, 10, // wni -> Arab + 3708, 135, // wsg -> Gong + 3712, 10, // wsv -> Arab + 3716, 110, // wtm -> Deva + 3720, 180, // wuu -> Hans + 3724, 0, // xag -> Aghb + 3728, 105, // xal -> Cyrl + 3732, 125, // xan -> Ethi + 3736, 105, // xas -> Cyrl + 3740, 90, // xco -> Chrs + 3744, 75, // xcr -> Cari + 3748, 105, // xdq -> Cyrl + 3752, 10, // xhe -> Arab + 3756, 235, // xhm -> Khmr + 3760, 395, // xis -> Orya + 3764, 10, // xka -> Arab + 3768, 10, // xkc -> Arab + 3772, 555, // xkf -> Tibt + 3776, 10, // xkj -> Arab + 3780, 10, // xkp -> Arab + 3784, 295, // xlc -> Lyci + 3788, 300, // xld -> Lydi + 3792, 120, // xly -> Elym + 3796, 130, // xmf -> Geor + 3800, 310, // xmn -> Mani + 3804, 325, // xmr -> Merc + 3808, 360, // xna -> Narb + 3812, 110, // xnr -> Deva + 3816, 155, // xpg -> Grek + 3820, 380, // xpi -> Ogam + 3824, 105, // xpm -> Cyrl + 3828, 430, // xpr -> Prti + 3832, 105, // xrm -> Cyrl + 3836, 105, // xrn -> Cyrl + 3840, 455, // xsa -> Sarb + 3844, 110, // xsr -> Deva + 3848, 60, // xtq -> Brah + 3852, 520, // xub -> Taml + 3856, 520, // xuj -> Taml + 3860, 205, // xve -> Ital + 3864, 10, // xvi -> Arab + 3868, 105, // xwo -> Cyrl + 3872, 315, // xzh -> Marc + 3876, 105, // yai -> Cyrl + 3880, 110, // ybh -> Deva + 3884, 110, // ybi -> Deva + 3888, 10, // ydg -> Arab + 3892, 330, // yea -> Mlym + 3896, 155, // yej -> Grek + 3900, 535, // yeu -> Telu + 3904, 425, // ygp -> Plrd + 3908, 190, // yhd -> Hebr + 3912, 190, // yi -> Hebr + 3915, 595, // yig -> Yiii + 3919, 190, // yih -> Hebr + 3923, 595, // yiv -> Yiii + 3927, 105, // ykg -> Cyrl + 3931, 105, // ykh -> Cyrl + 3935, 425, // yna -> Plrd + 3939, 105, // ynk -> Cyrl + 3943, 215, // yoi -> Jpan + 3947, 550, // yoy -> Thai + 3951, 105, // yrk -> Cyrl + 3955, 595, // ysd -> Yiii + 3959, 595, // ysn -> Yiii + 3963, 595, // ysp -> Yiii + 3967, 105, // ysr -> Cyrl + 3971, 425, // ysy -> Plrd + 3975, 190, // yud -> Hebr + 3979, 185, // yue -> Hant + 3983, 180, // yue_CN -> Hans + 3990, 105, // yug -> Cyrl + 3994, 105, // yux -> Cyrl + 3998, 425, // ywq -> Plrd + 4002, 425, // ywu -> Plrd + 4006, 555, // zau -> Tibt + 4010, 10, // zba -> Arab + 4014, 175, // zch -> Hani + 4018, 10, // zdj -> Arab + 4022, 175, // zeh -> Hani + 4026, 540, // zen -> Tfng + 4030, 175, // zgb -> Hani + 4034, 540, // zgh -> Tfng + 4038, 175, // zgm -> Hani + 4042, 175, // zgn -> Hani + 4046, 180, // zh -> Hans + 4049, 185, // zh_AU -> Hant + 4055, 185, // zh_BN -> Hant + 4061, 185, // zh_GB -> Hant + 4067, 185, // zh_GF -> Hant + 4073, 185, // zh_HK -> Hant + 4079, 185, // zh_ID -> Hant + 4085, 185, // zh_MO -> Hant + 4091, 185, // zh_PA -> Hant + 4097, 185, // zh_PF -> Hant + 4103, 185, // zh_PH -> Hant + 4109, 185, // zh_SR -> Hant + 4115, 185, // zh_TH -> Hant + 4121, 185, // zh_TW -> Hant + 4127, 185, // zh_US -> Hant + 4133, 185, // zh_VN -> Hant + 4139, 175, // zhd -> Hani + 4143, 375, // zhx -> Nshu + 4147, 105, // zko -> Cyrl + 4151, 240, // zkt -> Kits + 4155, 105, // zkz -> Cyrl + 4159, 175, // zlj -> Hani + 4163, 175, // zln -> Hani + 4167, 175, // zlq -> Hani + 4171, 175, // zqe -> Hani + 4175, 395, // zrg -> Orya + 4179, 190, // zrp -> Hebr + 4183, 10, // zum -> Arab + 4187, 125, // zwa -> Ethi + 4191, 175, // zyg -> Hani + 4195, 175, // zyn -> Hani + 4199, 175, // zzj -> Hani }; //====================================================================== @@ -1159,38 +1162,39 @@ const char parentLocaleChars[] = "az_Arab\0az_Cyrl\0bal_Latn\0blt_Latn\0bm_Nkoo\0bs_Cyrl\0byn_Latn\0" "cu_Glag\0dje_Arab\0dyo_Arab\0en_001\0en_150\0en_AG\0en_AI\0en_AT\0" "en_AU\0en_BB\0en_BE\0en_BM\0en_BS\0en_BW\0en_BZ\0en_CC\0en_CH\0" - "en_CK\0en_CM\0en_CX\0en_CY\0en_DE\0en_DG\0en_DK\0en_DM\0en_Dsrt\0" - "en_ER\0en_FI\0en_FJ\0en_FK\0en_FM\0en_GB\0en_GD\0en_GG\0en_GH\0" - "en_GI\0en_GM\0en_GY\0en_HK\0en_ID\0en_IE\0en_IL\0en_IM\0en_IN\0" - "en_IO\0en_JE\0en_JM\0en_KE\0en_KI\0en_KN\0en_KY\0en_LC\0en_LR\0" - "en_LS\0en_MG\0en_MO\0en_MS\0en_MT\0en_MU\0en_MV\0en_MW\0en_MY\0" - "en_NA\0en_NF\0en_NG\0en_NL\0en_NR\0en_NU\0en_NZ\0en_PG\0en_PK\0" - "en_PN\0en_PW\0en_RW\0en_SB\0en_SC\0en_SD\0en_SE\0en_SG\0en_SH\0" - "en_SI\0en_SL\0en_SS\0en_SX\0en_SZ\0en_Shaw\0en_TC\0en_TK\0en_TO\0" - "en_TT\0en_TV\0en_TZ\0en_UG\0en_VC\0en_VG\0en_VU\0en_WS\0en_ZA\0" - "en_ZM\0en_ZW\0es_419\0es_AR\0es_BO\0es_BR\0es_BZ\0es_CL\0es_CO\0" - "es_CR\0es_CU\0es_DO\0es_EC\0es_GT\0es_HN\0es_JP\0es_MX\0es_NI\0" - "es_PA\0es_PE\0es_PR\0es_PY\0es_SV\0es_US\0es_UY\0es_VE\0ff_Adlm\0" - "ff_Arab\0fr_HT\0ha_Arab\0hi_Latn\0ht\0iu_Latn\0kaa_Latn\0kk_Arab\0" - "kok_Latn\0ks_Deva\0ku_Arab\0kxv_Deva\0kxv_Orya\0kxv_Telu\0ky_Arab\0" - "ky_Latn\0ml_Arab\0mn_Mong\0mni_Mtei\0ms_Arab\0nb\0nn\0no\0no_NO\0" - "pa_Arab\0pt_AO\0pt_CH\0pt_CV\0pt_FR\0pt_GQ\0pt_GW\0pt_LU\0pt_MO\0" - "pt_MZ\0pt_PT\0pt_ST\0pt_TL\0root\0sat_Deva\0sd_Deva\0sd_Khoj\0" - "sd_Sind\0shi_Latn\0so_Arab\0sr_Latn\0sw_Arab\0tg_Arab\0ug_Cyrl\0" - "uz_Arab\0uz_Cyrl\0vai_Latn\0wo_Arab\0yo_Arab\0yue_Hans\0zh_Hant\0" - "zh_Hant_HK\0zh_Hant_MO\0"; + "en_CK\0en_CM\0en_CX\0en_CY\0en_CZ\0en_DE\0en_DG\0en_DK\0en_DM\0" + "en_Dsrt\0en_ER\0en_ES\0en_FI\0en_FJ\0en_FK\0en_FM\0en_FR\0en_GB\0" + "en_GD\0en_GG\0en_GH\0en_GI\0en_GM\0en_GS\0en_GY\0en_HK\0en_HU\0" + "en_ID\0en_IE\0en_IL\0en_IM\0en_IN\0en_IO\0en_IT\0en_JE\0en_JM\0" + "en_KE\0en_KI\0en_KN\0en_KY\0en_LC\0en_LR\0en_LS\0en_MG\0en_MO\0" + "en_MS\0en_MT\0en_MU\0en_MV\0en_MW\0en_MY\0en_NA\0en_NF\0en_NG\0" + "en_NL\0en_NO\0en_NR\0en_NU\0en_NZ\0en_PG\0en_PK\0en_PL\0en_PN\0" + "en_PT\0en_PW\0en_RO\0en_RW\0en_SB\0en_SC\0en_SD\0en_SE\0en_SG\0" + "en_SH\0en_SI\0en_SK\0en_SL\0en_SS\0en_SX\0en_SZ\0en_Shaw\0en_TC\0" + "en_TK\0en_TO\0en_TT\0en_TV\0en_TZ\0en_UG\0en_VC\0en_VG\0en_VU\0" + "en_WS\0en_ZA\0en_ZM\0en_ZW\0es_419\0es_AR\0es_BO\0es_BR\0es_BZ\0" + "es_CL\0es_CO\0es_CR\0es_CU\0es_DO\0es_EC\0es_GT\0es_HN\0es_JP\0" + "es_MX\0es_NI\0es_PA\0es_PE\0es_PR\0es_PY\0es_SV\0es_US\0es_UY\0" + "es_VE\0ff_Adlm\0ff_Arab\0fr_HT\0ha_Arab\0hi_Latn\0ht\0iu_Latn\0" + "kaa_Latn\0kk_Arab\0kok_Latn\0ks_Deva\0ku_Arab\0kxv_Deva\0kxv_Orya\0" + "kxv_Telu\0ky_Arab\0ky_Latn\0ml_Arab\0mn_Mong\0mni_Mtei\0ms_Arab\0" + "nb\0nn\0no\0no_NO\0pa_Arab\0pt_AO\0pt_CH\0pt_CV\0pt_FR\0pt_GQ\0" + "pt_GW\0pt_LU\0pt_MO\0pt_MZ\0pt_PT\0pt_ST\0pt_TL\0root\0sat_Deva\0" + "sd_Deva\0sd_Khoj\0sd_Sind\0shi_Latn\0so_Arab\0sr_Latn\0sw_Arab\0" + "tg_Arab\0ug_Cyrl\0uz_Arab\0uz_Cyrl\0vai_Latn\0wo_Arab\0yo_Arab\0" + "yue_Hans\0zh_Hant\0zh_Hant_HK\0zh_Hant_MO\0"; const int32_t parentLocaleTable[] = { - 0, 1080, // az_Arab -> root - 8, 1080, // az_Cyrl -> root - 16, 1080, // bal_Latn -> root - 25, 1080, // blt_Latn -> root - 34, 1080, // bm_Nkoo -> root - 42, 1080, // bs_Cyrl -> root - 50, 1080, // byn_Latn -> root - 59, 1080, // cu_Glag -> root - 67, 1080, // dje_Arab -> root - 76, 1080, // dyo_Arab -> root + 0, 1146, // az_Arab -> root + 8, 1146, // az_Cyrl -> root + 16, 1146, // bal_Latn -> root + 25, 1146, // blt_Latn -> root + 34, 1146, // bm_Nkoo -> root + 42, 1146, // bs_Cyrl -> root + 50, 1146, // byn_Latn -> root + 59, 1146, // cu_Glag -> root + 67, 1146, // dje_Arab -> root + 76, 1146, // dyo_Arab -> root 92, 85, // en_150 -> en_001 99, 85, // en_AG -> en_001 105, 85, // en_AI -> en_001 @@ -1208,161 +1212,172 @@ const int32_t parentLocaleTable[] = { 177, 85, // en_CM -> en_001 183, 85, // en_CX -> en_001 189, 85, // en_CY -> en_001 - 195, 92, // en_DE -> en_150 - 201, 85, // en_DG -> en_001 - 207, 92, // en_DK -> en_150 - 213, 85, // en_DM -> en_001 - 219, 1080, // en_Dsrt -> root - 227, 85, // en_ER -> en_001 - 233, 92, // en_FI -> en_150 - 239, 85, // en_FJ -> en_001 - 245, 85, // en_FK -> en_001 - 251, 85, // en_FM -> en_001 - 257, 85, // en_GB -> en_001 - 263, 85, // en_GD -> en_001 - 269, 85, // en_GG -> en_001 - 275, 85, // en_GH -> en_001 - 281, 85, // en_GI -> en_001 - 287, 85, // en_GM -> en_001 - 293, 85, // en_GY -> en_001 - 299, 85, // en_HK -> en_001 - 305, 85, // en_ID -> en_001 - 311, 85, // en_IE -> en_001 - 317, 85, // en_IL -> en_001 - 323, 85, // en_IM -> en_001 - 329, 85, // en_IN -> en_001 - 335, 85, // en_IO -> en_001 - 341, 85, // en_JE -> en_001 - 347, 85, // en_JM -> en_001 - 353, 85, // en_KE -> en_001 - 359, 85, // en_KI -> en_001 - 365, 85, // en_KN -> en_001 - 371, 85, // en_KY -> en_001 - 377, 85, // en_LC -> en_001 - 383, 85, // en_LR -> en_001 - 389, 85, // en_LS -> en_001 - 395, 85, // en_MG -> en_001 - 401, 85, // en_MO -> en_001 - 407, 85, // en_MS -> en_001 - 413, 85, // en_MT -> en_001 - 419, 85, // en_MU -> en_001 - 425, 85, // en_MV -> en_001 - 431, 85, // en_MW -> en_001 - 437, 85, // en_MY -> en_001 - 443, 85, // en_NA -> en_001 - 449, 85, // en_NF -> en_001 - 455, 85, // en_NG -> en_001 - 461, 92, // en_NL -> en_150 - 467, 85, // en_NR -> en_001 - 473, 85, // en_NU -> en_001 - 479, 85, // en_NZ -> en_001 - 485, 85, // en_PG -> en_001 - 491, 85, // en_PK -> en_001 - 497, 85, // en_PN -> en_001 - 503, 85, // en_PW -> en_001 - 509, 85, // en_RW -> en_001 - 515, 85, // en_SB -> en_001 - 521, 85, // en_SC -> en_001 - 527, 85, // en_SD -> en_001 - 533, 92, // en_SE -> en_150 - 539, 85, // en_SG -> en_001 - 545, 85, // en_SH -> en_001 - 551, 92, // en_SI -> en_150 - 557, 85, // en_SL -> en_001 - 563, 85, // en_SS -> en_001 - 569, 85, // en_SX -> en_001 - 575, 85, // en_SZ -> en_001 - 581, 1080, // en_Shaw -> root - 589, 85, // en_TC -> en_001 - 595, 85, // en_TK -> en_001 - 601, 85, // en_TO -> en_001 - 607, 85, // en_TT -> en_001 - 613, 85, // en_TV -> en_001 - 619, 85, // en_TZ -> en_001 - 625, 85, // en_UG -> en_001 - 631, 85, // en_VC -> en_001 - 637, 85, // en_VG -> en_001 - 643, 85, // en_VU -> en_001 - 649, 85, // en_WS -> en_001 - 655, 85, // en_ZA -> en_001 - 661, 85, // en_ZM -> en_001 - 667, 85, // en_ZW -> en_001 - 680, 673, // es_AR -> es_419 - 686, 673, // es_BO -> es_419 - 692, 673, // es_BR -> es_419 - 698, 673, // es_BZ -> es_419 - 704, 673, // es_CL -> es_419 - 710, 673, // es_CO -> es_419 - 716, 673, // es_CR -> es_419 - 722, 673, // es_CU -> es_419 - 728, 673, // es_DO -> es_419 - 734, 673, // es_EC -> es_419 - 740, 673, // es_GT -> es_419 - 746, 673, // es_HN -> es_419 - 752, 673, // es_JP -> es_419 - 758, 673, // es_MX -> es_419 - 764, 673, // es_NI -> es_419 - 770, 673, // es_PA -> es_419 - 776, 673, // es_PE -> es_419 - 782, 673, // es_PR -> es_419 - 788, 673, // es_PY -> es_419 - 794, 673, // es_SV -> es_419 - 800, 673, // es_US -> es_419 - 806, 673, // es_UY -> es_419 - 812, 673, // es_VE -> es_419 - 818, 1080, // ff_Adlm -> root - 826, 1080, // ff_Arab -> root - 840, 1080, // ha_Arab -> root - 848, 329, // hi_Latn -> en_IN - 856, 834, // ht -> fr_HT - 859, 1080, // iu_Latn -> root - 867, 1080, // kaa_Latn -> root - 876, 1080, // kk_Arab -> root - 884, 1080, // kok_Latn -> root - 893, 1080, // ks_Deva -> root - 901, 1080, // ku_Arab -> root - 909, 1080, // kxv_Deva -> root - 918, 1080, // kxv_Orya -> root - 927, 1080, // kxv_Telu -> root - 936, 1080, // ky_Arab -> root - 944, 1080, // ky_Latn -> root - 952, 1080, // ml_Arab -> root - 960, 1080, // mn_Mong -> root - 968, 1080, // mni_Mtei -> root - 977, 1080, // ms_Arab -> root - 985, 991, // nb -> no - 988, 991, // nn -> no - 994, 991, // no_NO -> no - 1000, 1080, // pa_Arab -> root - 1008, 1062, // pt_AO -> pt_PT - 1014, 1062, // pt_CH -> pt_PT - 1020, 1062, // pt_CV -> pt_PT - 1026, 1062, // pt_FR -> pt_PT - 1032, 1062, // pt_GQ -> pt_PT - 1038, 1062, // pt_GW -> pt_PT - 1044, 1062, // pt_LU -> pt_PT - 1050, 1062, // pt_MO -> pt_PT - 1056, 1062, // pt_MZ -> pt_PT - 1068, 1062, // pt_ST -> pt_PT - 1074, 1062, // pt_TL -> pt_PT - 1085, 1080, // sat_Deva -> root - 1094, 1080, // sd_Deva -> root - 1102, 1080, // sd_Khoj -> root - 1110, 1080, // sd_Sind -> root - 1118, 1080, // shi_Latn -> root - 1127, 1080, // so_Arab -> root - 1135, 1080, // sr_Latn -> root - 1143, 1080, // sw_Arab -> root - 1151, 1080, // tg_Arab -> root - 1159, 1080, // ug_Cyrl -> root - 1167, 1080, // uz_Arab -> root - 1175, 1080, // uz_Cyrl -> root - 1183, 1080, // vai_Latn -> root - 1192, 1080, // wo_Arab -> root - 1200, 1080, // yo_Arab -> root - 1208, 1080, // yue_Hans -> root - 1217, 1080, // zh_Hant -> root - 1236, 1225, // zh_Hant_MO -> zh_Hant_HK + 195, 92, // en_CZ -> en_150 + 201, 92, // en_DE -> en_150 + 207, 85, // en_DG -> en_001 + 213, 92, // en_DK -> en_150 + 219, 85, // en_DM -> en_001 + 225, 1146, // en_Dsrt -> root + 233, 85, // en_ER -> en_001 + 239, 92, // en_ES -> en_150 + 245, 92, // en_FI -> en_150 + 251, 85, // en_FJ -> en_001 + 257, 85, // en_FK -> en_001 + 263, 85, // en_FM -> en_001 + 269, 92, // en_FR -> en_150 + 275, 85, // en_GB -> en_001 + 281, 85, // en_GD -> en_001 + 287, 85, // en_GG -> en_001 + 293, 85, // en_GH -> en_001 + 299, 85, // en_GI -> en_001 + 305, 85, // en_GM -> en_001 + 311, 85, // en_GS -> en_001 + 317, 85, // en_GY -> en_001 + 323, 85, // en_HK -> en_001 + 329, 92, // en_HU -> en_150 + 335, 85, // en_ID -> en_001 + 341, 85, // en_IE -> en_001 + 347, 85, // en_IL -> en_001 + 353, 85, // en_IM -> en_001 + 359, 85, // en_IN -> en_001 + 365, 85, // en_IO -> en_001 + 371, 92, // en_IT -> en_150 + 377, 85, // en_JE -> en_001 + 383, 85, // en_JM -> en_001 + 389, 85, // en_KE -> en_001 + 395, 85, // en_KI -> en_001 + 401, 85, // en_KN -> en_001 + 407, 85, // en_KY -> en_001 + 413, 85, // en_LC -> en_001 + 419, 85, // en_LR -> en_001 + 425, 85, // en_LS -> en_001 + 431, 85, // en_MG -> en_001 + 437, 85, // en_MO -> en_001 + 443, 85, // en_MS -> en_001 + 449, 85, // en_MT -> en_001 + 455, 85, // en_MU -> en_001 + 461, 85, // en_MV -> en_001 + 467, 85, // en_MW -> en_001 + 473, 85, // en_MY -> en_001 + 479, 85, // en_NA -> en_001 + 485, 85, // en_NF -> en_001 + 491, 85, // en_NG -> en_001 + 497, 92, // en_NL -> en_150 + 503, 92, // en_NO -> en_150 + 509, 85, // en_NR -> en_001 + 515, 85, // en_NU -> en_001 + 521, 85, // en_NZ -> en_001 + 527, 85, // en_PG -> en_001 + 533, 85, // en_PK -> en_001 + 539, 92, // en_PL -> en_150 + 545, 85, // en_PN -> en_001 + 551, 92, // en_PT -> en_150 + 557, 85, // en_PW -> en_001 + 563, 92, // en_RO -> en_150 + 569, 85, // en_RW -> en_001 + 575, 85, // en_SB -> en_001 + 581, 85, // en_SC -> en_001 + 587, 85, // en_SD -> en_001 + 593, 92, // en_SE -> en_150 + 599, 85, // en_SG -> en_001 + 605, 85, // en_SH -> en_001 + 611, 92, // en_SI -> en_150 + 617, 92, // en_SK -> en_150 + 623, 85, // en_SL -> en_001 + 629, 85, // en_SS -> en_001 + 635, 85, // en_SX -> en_001 + 641, 85, // en_SZ -> en_001 + 647, 1146, // en_Shaw -> root + 655, 85, // en_TC -> en_001 + 661, 85, // en_TK -> en_001 + 667, 85, // en_TO -> en_001 + 673, 85, // en_TT -> en_001 + 679, 85, // en_TV -> en_001 + 685, 85, // en_TZ -> en_001 + 691, 85, // en_UG -> en_001 + 697, 85, // en_VC -> en_001 + 703, 85, // en_VG -> en_001 + 709, 85, // en_VU -> en_001 + 715, 85, // en_WS -> en_001 + 721, 85, // en_ZA -> en_001 + 727, 85, // en_ZM -> en_001 + 733, 85, // en_ZW -> en_001 + 746, 739, // es_AR -> es_419 + 752, 739, // es_BO -> es_419 + 758, 739, // es_BR -> es_419 + 764, 739, // es_BZ -> es_419 + 770, 739, // es_CL -> es_419 + 776, 739, // es_CO -> es_419 + 782, 739, // es_CR -> es_419 + 788, 739, // es_CU -> es_419 + 794, 739, // es_DO -> es_419 + 800, 739, // es_EC -> es_419 + 806, 739, // es_GT -> es_419 + 812, 739, // es_HN -> es_419 + 818, 739, // es_JP -> es_419 + 824, 739, // es_MX -> es_419 + 830, 739, // es_NI -> es_419 + 836, 739, // es_PA -> es_419 + 842, 739, // es_PE -> es_419 + 848, 739, // es_PR -> es_419 + 854, 739, // es_PY -> es_419 + 860, 739, // es_SV -> es_419 + 866, 739, // es_US -> es_419 + 872, 739, // es_UY -> es_419 + 878, 739, // es_VE -> es_419 + 884, 1146, // ff_Adlm -> root + 892, 1146, // ff_Arab -> root + 906, 1146, // ha_Arab -> root + 914, 359, // hi_Latn -> en_IN + 922, 900, // ht -> fr_HT + 925, 1146, // iu_Latn -> root + 933, 1146, // kaa_Latn -> root + 942, 1146, // kk_Arab -> root + 950, 1146, // kok_Latn -> root + 959, 1146, // ks_Deva -> root + 967, 1146, // ku_Arab -> root + 975, 1146, // kxv_Deva -> root + 984, 1146, // kxv_Orya -> root + 993, 1146, // kxv_Telu -> root + 1002, 1146, // ky_Arab -> root + 1010, 1146, // ky_Latn -> root + 1018, 1146, // ml_Arab -> root + 1026, 1146, // mn_Mong -> root + 1034, 1146, // mni_Mtei -> root + 1043, 1146, // ms_Arab -> root + 1051, 1057, // nb -> no + 1054, 1057, // nn -> no + 1060, 1057, // no_NO -> no + 1066, 1146, // pa_Arab -> root + 1074, 1128, // pt_AO -> pt_PT + 1080, 1128, // pt_CH -> pt_PT + 1086, 1128, // pt_CV -> pt_PT + 1092, 1128, // pt_FR -> pt_PT + 1098, 1128, // pt_GQ -> pt_PT + 1104, 1128, // pt_GW -> pt_PT + 1110, 1128, // pt_LU -> pt_PT + 1116, 1128, // pt_MO -> pt_PT + 1122, 1128, // pt_MZ -> pt_PT + 1134, 1128, // pt_ST -> pt_PT + 1140, 1128, // pt_TL -> pt_PT + 1151, 1146, // sat_Deva -> root + 1160, 1146, // sd_Deva -> root + 1168, 1146, // sd_Khoj -> root + 1176, 1146, // sd_Sind -> root + 1184, 1146, // shi_Latn -> root + 1193, 1146, // so_Arab -> root + 1201, 1146, // sr_Latn -> root + 1209, 1146, // sw_Arab -> root + 1217, 1146, // tg_Arab -> root + 1225, 1146, // ug_Cyrl -> root + 1233, 1146, // uz_Arab -> root + 1241, 1146, // uz_Cyrl -> root + 1249, 1146, // vai_Latn -> root + 1258, 1146, // wo_Arab -> root + 1266, 1146, // yo_Arab -> root + 1274, 1146, // yue_Hans -> root + 1283, 1146, // zh_Hant -> root + 1302, 1291, // zh_Hant_MO -> zh_Hant_HK }; diff --git a/thirdparty/icu4c/common/locbased.cpp b/thirdparty/icu4c/common/locbased.cpp index 832bc3e88b1..6f35e72210f 100644 --- a/thirdparty/icu4c/common/locbased.cpp +++ b/thirdparty/icu4c/common/locbased.cpp @@ -12,44 +12,84 @@ */ #include "locbased.h" #include "cstring.h" +#include "charstr.h" U_NAMESPACE_BEGIN -Locale LocaleBased::getLocale(ULocDataLocaleType type, UErrorCode& status) const { - const char* id = getLocaleID(type, status); +Locale LocaleBased::getLocale(const CharString* valid, const CharString* actual, + ULocDataLocaleType type, UErrorCode& status) { + const char* id = getLocaleID(valid, actual, type, status); return Locale(id != nullptr ? id : ""); } -const char* LocaleBased::getLocaleID(ULocDataLocaleType type, UErrorCode& status) const { +const char* LocaleBased::getLocaleID(const CharString* valid, const CharString* actual, + ULocDataLocaleType type, UErrorCode& status) { if (U_FAILURE(status)) { return nullptr; } switch(type) { case ULOC_VALID_LOCALE: - return valid; + return valid == nullptr ? "" : valid->data(); case ULOC_ACTUAL_LOCALE: - return actual; + return actual == nullptr ? "" : actual->data(); default: status = U_ILLEGAL_ARGUMENT_ERROR; return nullptr; } } -void LocaleBased::setLocaleIDs(const char* validID, const char* actualID) { - if (validID != nullptr) { - uprv_strncpy(valid, validID, ULOC_FULLNAME_CAPACITY); - valid[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate - } - if (actualID != nullptr) { - uprv_strncpy(actual, actualID, ULOC_FULLNAME_CAPACITY); - actual[ULOC_FULLNAME_CAPACITY-1] = 0; // always terminate +void LocaleBased::setLocaleIDs(const CharString* validID, const CharString* actualID, UErrorCode& status) { + setValidLocaleID(validID, status); + setActualLocaleID(actualID,status); +} +void LocaleBased::setLocaleIDs(const char* validID, const char* actualID, UErrorCode& status) { + setValidLocaleID(validID, status); + setActualLocaleID(actualID,status); +} + +void LocaleBased::setLocaleID(const char* id, CharString*& dest, UErrorCode& status) { + if (U_FAILURE(status)) { return; } + if (id == nullptr || *id == 0) { + delete dest; + dest = nullptr; + } else { + if (dest == nullptr) { + dest = new CharString(id, status); + if (dest == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + } else { + dest->copyFrom(id, status); + } } } -void LocaleBased::setLocaleIDs(const Locale& validID, const Locale& actualID) { - uprv_strcpy(valid, validID.getName()); - uprv_strcpy(actual, actualID.getName()); +void LocaleBased::setLocaleID(const CharString* id, CharString*& dest, UErrorCode& status) { + if (U_FAILURE(status)) { return; } + if (id == nullptr || id->isEmpty()) { + delete dest; + dest = nullptr; + } else { + if (dest == nullptr) { + dest = new CharString(*id, status); + if (dest == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + } else { + dest->copyFrom(*id, status); + } + } +} + +bool LocaleBased::equalIDs(const CharString* left, const CharString* right) { + // true if both are nullptr + if (left == nullptr && right == nullptr) return true; + // false if only one is nullptr + if (left == nullptr || right == nullptr) return false; + return *left == *right; } U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/locbased.h b/thirdparty/icu4c/common/locbased.h index 2d260b52787..9441eb82310 100644 --- a/thirdparty/icu4c/common/locbased.h +++ b/thirdparty/icu4c/common/locbased.h @@ -19,13 +19,14 @@ /** * Macro to declare a locale LocaleBased wrapper object for the given * object, which must have two members named `validLocale' and - * `actualLocale' of size ULOC_FULLNAME_CAPACITY + * `actualLocale' of which are pointers to the internal icu::CharString. */ #define U_LOCALE_BASED(varname, objname) \ LocaleBased varname((objname).validLocale, (objname).actualLocale) U_NAMESPACE_BEGIN +class CharString; /** * A utility class that unifies the implementation of getLocale() by * various ICU services. This class is likely to be removed in the @@ -41,33 +42,35 @@ class U_COMMON_API LocaleBased : public UMemory { * Construct a LocaleBased wrapper around the two pointers. These * will be aliased for the lifetime of this object. */ - inline LocaleBased(char* validAlias, char* actualAlias); - - /** - * Construct a LocaleBased wrapper around the two const pointers. - * These will be aliased for the lifetime of this object. - */ - inline LocaleBased(const char* validAlias, const char* actualAlias); + inline LocaleBased(CharString*& validAlias, CharString*& actualAlias); /** * Return locale meta-data for the service object wrapped by this * object. Either the valid or the actual locale may be * retrieved. + * @param valid The valid locale. + * @param actual The actual locale. * @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE * @param status input-output error code * @return the indicated locale */ - Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const; + static Locale getLocale( + const CharString* valid, const CharString* actual, + ULocDataLocaleType type, UErrorCode& status); /** * Return the locale ID for the service object wrapped by this * object. Either the valid or the actual locale may be * retrieved. + * @param valid The valid locale. + * @param actual The actual locale. * @param type either ULOC_VALID_LOCALE or ULOC_ACTUAL_LOCALE * @param status input-output error code * @return the indicated locale ID */ - const char* getLocaleID(ULocDataLocaleType type, UErrorCode& status) const; + static const char* getLocaleID( + const CharString* valid, const CharString* actual, + ULocDataLocaleType type, UErrorCode& status); /** * Set the locale meta-data for the service object wrapped by this @@ -75,31 +78,40 @@ class U_COMMON_API LocaleBased : public UMemory { * @param valid the ID of the valid locale * @param actual the ID of the actual locale */ - void setLocaleIDs(const char* valid, const char* actual); + void setLocaleIDs(const char* valid, const char* actual, UErrorCode& status); + void setLocaleIDs(const CharString* valid, const CharString* actual, UErrorCode& status); - /** - * Set the locale meta-data for the service object wrapped by this - * object. - * @param valid the ID of the valid locale - * @param actual the ID of the actual locale - */ - void setLocaleIDs(const Locale& valid, const Locale& actual); + static void setLocaleID(const char* id, CharString*& dest, UErrorCode& status); + static void setLocaleID(const CharString* id, CharString*& dest, UErrorCode& status); + + static bool equalIDs(const CharString* left, const CharString* right); private: - char* valid; - - char* actual; + void setValidLocaleID(const CharString* id, UErrorCode& status); + void setActualLocaleID(const CharString* id, UErrorCode& status); + void setValidLocaleID(const char* id, UErrorCode& status); + void setActualLocaleID(const char* id, UErrorCode& status); + + CharString*& valid; + CharString*& actual; }; -inline LocaleBased::LocaleBased(char* validAlias, char* actualAlias) : +inline LocaleBased::LocaleBased(CharString*& validAlias, CharString*& actualAlias) : valid(validAlias), actual(actualAlias) { } -inline LocaleBased::LocaleBased(const char* validAlias, - const char* actualAlias) : - // ugh: cast away const - valid(const_cast(validAlias)), actual(const_cast(actualAlias)) { +inline void LocaleBased::setValidLocaleID(const CharString* id, UErrorCode& status) { + setLocaleID(id, valid, status); +} +inline void LocaleBased::setActualLocaleID(const CharString* id, UErrorCode& status) { + setLocaleID(id, actual, status); +} +inline void LocaleBased::setValidLocaleID(const char* id, UErrorCode& status) { + setLocaleID(id, valid, status); +} +inline void LocaleBased::setActualLocaleID(const char* id, UErrorCode& status) { + setLocaleID(id, actual, status); } U_NAMESPACE_END diff --git a/thirdparty/icu4c/common/locdispnames.cpp b/thirdparty/icu4c/common/locdispnames.cpp index ddf7687a2bf..d3521e879b6 100644 --- a/thirdparty/icu4c/common/locdispnames.cpp +++ b/thirdparty/icu4c/common/locdispnames.cpp @@ -19,6 +19,8 @@ * that then do not depend on resource bundle code and display name data. */ +#include + #include "unicode/utypes.h" #include "unicode/brkiter.h" #include "unicode/locid.h" @@ -359,7 +361,7 @@ _getStringOrCopyKey(const char *path, const char *locale, return u_terminateUChars(dest, destCapacity, length, &errorCode); } -using UDisplayNameGetter = icu::CharString(const char*, UErrorCode&); +using UDisplayNameGetter = icu::CharString(std::string_view, UErrorCode&); int32_t _getDisplayNameForComponent(const char *locale, @@ -377,6 +379,10 @@ _getDisplayNameForComponent(const char *locale, return 0; } + if (locale == nullptr) { + locale = uloc_getDefault(); + } + localStatus = U_ZERO_ERROR; icu::CharString localeBuffer = (*getter)(locale, localStatus); if (U_FAILURE(localStatus)) { diff --git a/thirdparty/icu4c/common/locid.cpp b/thirdparty/icu4c/common/locid.cpp index 4a73f559205..e7e86079ae9 100644 --- a/thirdparty/icu4c/common/locid.cpp +++ b/thirdparty/icu4c/common/locid.cpp @@ -1828,8 +1828,13 @@ ulocimp_isCanonicalizedLocaleForTest(const char* localeName) U_NAMESPACE_BEGIN -/*This function initializes a Locale from a C locale ID*/ Locale& Locale::init(const char* localeID, UBool canonicalize) +{ + return localeID == nullptr ? *this = getDefault() : init(StringPiece{localeID}, canonicalize); +} + +/*This function initializes a Locale from a C locale ID*/ +Locale& Locale::init(StringPiece localeID, UBool canonicalize) { fIsBogus = false; /* Free our current storage */ @@ -1854,19 +1859,28 @@ Locale& Locale::init(const char* localeID, UBool canonicalize) int32_t length; UErrorCode err; - if(localeID == nullptr) { - // not an error, just set the default locale - return *this = getDefault(); - } - /* preset all fields to empty */ language[0] = script[0] = country[0] = 0; + const auto parse = [canonicalize](std::string_view localeID, + char* name, + int32_t nameCapacity, + UErrorCode& status) { + return ByteSinkUtil::viaByteSinkToTerminatedChars( + name, nameCapacity, + [&](ByteSink& sink, UErrorCode& status) { + if (canonicalize) { + ulocimp_canonicalize(localeID, sink, status); + } else { + ulocimp_getName(localeID, sink, status); + } + }, + status); + }; + // "canonicalize" the locale ID to ICU/Java format err = U_ZERO_ERROR; - length = canonicalize ? - uloc_canonicalize(localeID, fullName, sizeof(fullNameBuffer), &err) : - uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err); + length = parse(localeID, fullName, sizeof fullNameBuffer, err); if (err == U_BUFFER_OVERFLOW_ERROR || length >= static_cast(sizeof(fullNameBuffer))) { U_ASSERT(baseName == nullptr); @@ -1877,9 +1891,7 @@ Locale& Locale::init(const char* localeID, UBool canonicalize) } fullName = newFullName; err = U_ZERO_ERROR; - length = canonicalize ? - uloc_canonicalize(localeID, fullName, length+1, &err) : - uloc_getName(localeID, fullName, length+1, &err); + length = parse(localeID, fullName, length + 1, err); } if(U_FAILURE(err) || err == U_STRING_NOT_TERMINATED_WARNING) { /* should never occur */ @@ -2200,6 +2212,13 @@ Locale::createFromName (const char *name) } } +Locale U_EXPORT2 +Locale::createFromName(StringPiece name) { + Locale loc(""); + loc.init(name, false); + return loc; +} + Locale U_EXPORT2 Locale::createCanonical(const char* name) { Locale loc(""); diff --git a/thirdparty/icu4c/common/loclikely.cpp b/thirdparty/icu4c/common/loclikely.cpp index ccbcbfa7a5d..f87fd8dd61c 100644 --- a/thirdparty/icu4c/common/loclikely.cpp +++ b/thirdparty/icu4c/common/loclikely.cpp @@ -300,6 +300,9 @@ ulocimp_addLikelySubtags(const char* localeID, icu::ByteSink& sink, UErrorCode& status) { if (U_FAILURE(status)) { return; } + if (localeID == nullptr) { + localeID = uloc_getDefault(); + } icu::CharString localeBuffer = ulocimp_canonicalize(localeID, status); _uloc_addLikelySubtags(localeBuffer.data(), sink, status); } @@ -334,6 +337,9 @@ ulocimp_minimizeSubtags(const char* localeID, bool favorScript, UErrorCode& status) { if (U_FAILURE(status)) { return; } + if (localeID == nullptr) { + localeID = uloc_getDefault(); + } icu::CharString localeBuffer = ulocimp_canonicalize(localeID, status); _uloc_minimizeSubtags(localeBuffer.data(), sink, favorScript, status); } @@ -349,7 +355,9 @@ uloc_isRightToLeft(const char *locale) { UErrorCode errorCode = U_ZERO_ERROR; icu::CharString lang; icu::CharString script; - ulocimp_getSubtags(locale, &lang, &script, nullptr, nullptr, nullptr, errorCode); + ulocimp_getSubtags( + locale == nullptr ? uloc_getDefault() : locale, + &lang, &script, nullptr, nullptr, nullptr, errorCode); if (U_FAILURE(errorCode) || script.isEmpty()) { // Fastpath: We know the likely scripts and their writing direction // for some common languages. @@ -369,7 +377,7 @@ uloc_isRightToLeft(const char *locale) { if (U_FAILURE(errorCode)) { return false; } - ulocimp_getSubtags(likely.data(), nullptr, &script, nullptr, nullptr, nullptr, errorCode); + ulocimp_getSubtags(likely.toStringPiece(), nullptr, &script, nullptr, nullptr, nullptr, errorCode); if (U_FAILURE(errorCode) || script.isEmpty()) { return false; } @@ -430,7 +438,7 @@ ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion, icu::CharString rgBuf = GetRegionFromKey(localeID, "rg", status); if (U_SUCCESS(status) && rgBuf.isEmpty()) { // No valid rg keyword value, try for unicode_region_subtag - rgBuf = ulocimp_getRegion(localeID, status); + rgBuf = ulocimp_getRegion(localeID == nullptr ? uloc_getDefault() : localeID, status); if (U_SUCCESS(status) && rgBuf.isEmpty() && inferRegion) { // Second check for sd keyword value rgBuf = GetRegionFromKey(localeID, "sd", status); @@ -439,7 +447,7 @@ ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion, UErrorCode rgStatus = U_ZERO_ERROR; icu::CharString locBuf = ulocimp_addLikelySubtags(localeID, rgStatus); if (U_SUCCESS(rgStatus)) { - rgBuf = ulocimp_getRegion(locBuf.data(), status); + rgBuf = ulocimp_getRegion(locBuf.toStringPiece(), status); } } } diff --git a/thirdparty/icu4c/common/loclikelysubtags.cpp b/thirdparty/icu4c/common/loclikelysubtags.cpp index 7c6131197d8..7245a779816 100644 --- a/thirdparty/icu4c/common/loclikelysubtags.cpp +++ b/thirdparty/icu4c/common/loclikelysubtags.cpp @@ -527,7 +527,7 @@ LSR LikelySubtags::makeMaximizedLsrFrom(const Locale &locale, return {}; } const char *name = locale.getName(); - if (uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=") + if (!returnInputIfUnmatch && uprv_isAtSign(name[0]) && name[1] == 'x' && name[2] == '=') { // name.startsWith("@x=") // Private use language tag x-subtag-subtag... which CLDR changes to // und-x-subtag-subtag... return LSR(name, "", "", LSR::EXPLICIT_LSR); diff --git a/thirdparty/icu4c/common/locresdata.cpp b/thirdparty/icu4c/common/locresdata.cpp index 725e6609159..ba7163fa2db 100644 --- a/thirdparty/icu4c/common/locresdata.cpp +++ b/thirdparty/icu4c/common/locresdata.cpp @@ -161,6 +161,9 @@ _uloc_getOrientationHelper(const char* localeId, if (U_FAILURE(status)) { return result; } + if (localeId == nullptr) { + localeId = uloc_getDefault(); + } icu::CharString localeBuffer = ulocimp_canonicalize(localeId, status); if (U_FAILURE(status)) { return result; } diff --git a/thirdparty/icu4c/common/punycode.cpp b/thirdparty/icu4c/common/punycode.cpp index aa02298c5e6..1868a07a856 100644 --- a/thirdparty/icu4c/common/punycode.cpp +++ b/thirdparty/icu4c/common/punycode.cpp @@ -193,7 +193,7 @@ u_strToPunycode(const char16_t *src, int32_t srcLength, return 0; } - if(src==nullptr || srcLength<-1 || (dest==nullptr && destCapacity!=0)) { + if(src==nullptr || srcLength<-1 || destCapacity<0 || (dest==nullptr && destCapacity!=0)) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return 0; } diff --git a/thirdparty/icu4c/common/putil.cpp b/thirdparty/icu4c/common/putil.cpp index 4cf07797ba3..ea15fdff0b0 100644 --- a/thirdparty/icu4c/common/putil.cpp +++ b/thirdparty/icu4c/common/putil.cpp @@ -76,7 +76,7 @@ #include #ifndef U_COMMON_IMPLEMENTATION -#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see https://unicode-org.github.io/icu/userguide/howtouseicu +#error U_COMMON_IMPLEMENTATION not set - must be set for all ICU source files in common/ - see https://unicode-org.github.io/icu/userguide/icu/howtouseicu.html #endif diff --git a/thirdparty/icu4c/common/rbbinode.cpp b/thirdparty/icu4c/common/rbbinode.cpp index 71407b9e684..849ee7180a2 100644 --- a/thirdparty/icu4c/common/rbbinode.cpp +++ b/thirdparty/icu4c/common/rbbinode.cpp @@ -47,7 +47,10 @@ static int gLastSerial = 0; // Constructor. Just set the fields to reasonable default values. // //------------------------------------------------------------------------- -RBBINode::RBBINode(NodeType t) : UMemory() { +RBBINode::RBBINode(NodeType t, UErrorCode& status) : UMemory() { + if (U_FAILURE(status)) { + return; + } #ifdef RBBI_DEBUG fSerialNum = ++gLastSerial; #endif @@ -65,10 +68,13 @@ RBBINode::RBBINode(NodeType t) : UMemory() { fVal = 0; fPrecedence = precZero; - UErrorCode status = U_ZERO_ERROR; - fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere + fFirstPosSet = new UVector(status); fLastPosSet = new UVector(status); fFollowPos = new UVector(status); + if (U_SUCCESS(status) && + (fFirstPosSet == nullptr || fLastPosSet == nullptr || fFollowPos == nullptr)) { + status = U_MEMORY_ALLOCATION_ERROR; + } if (t==opCat) {fPrecedence = precOpCat;} else if (t==opOr) {fPrecedence = precOpOr;} else if (t==opStart) {fPrecedence = precStart;} @@ -77,7 +83,10 @@ RBBINode::RBBINode(NodeType t) : UMemory() { } -RBBINode::RBBINode(const RBBINode &other) : UMemory(other) { +RBBINode::RBBINode(const RBBINode &other, UErrorCode& status) : UMemory(other) { + if (U_FAILURE(status)) { + return; + } #ifdef RBBI_DEBUG fSerialNum = ++gLastSerial; #endif @@ -94,10 +103,13 @@ RBBINode::RBBINode(const RBBINode &other) : UMemory(other) { fVal = other.fVal; fRuleRoot = false; fChainIn = other.fChainIn; - UErrorCode status = U_ZERO_ERROR; fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere fLastPosSet = new UVector(status); fFollowPos = new UVector(status); + if (U_SUCCESS(status) && + (fFirstPosSet == nullptr || fLastPosSet == nullptr || fFollowPos == nullptr)) { + status = U_MEMORY_ALLOCATION_ERROR; + } } @@ -193,27 +205,54 @@ void RBBINode::NRDeleteNode(RBBINode *node) { // references in preparation for generating the DFA tables. // //------------------------------------------------------------------------- -RBBINode *RBBINode::cloneTree() { +constexpr int kRecursiveDepthLimit = 3500; +RBBINode *RBBINode::cloneTree(UErrorCode &status, int depth) { + if (U_FAILURE(status)) { + return nullptr; + } + // If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR + // to avoid stack overflow crash. + if (depth > kRecursiveDepthLimit) { + status = U_INPUT_TOO_LONG_ERROR; + return nullptr; + } RBBINode *n; if (fType == RBBINode::varRef) { // If the current node is a variable reference, skip over it // and clone the definition of the variable instead. - n = fLeftChild->cloneTree(); + n = fLeftChild->cloneTree(status, depth+1); + if (U_FAILURE(status)) { + return nullptr; + } } else if (fType == RBBINode::uset) { n = this; } else { - n = new RBBINode(*this); + n = new RBBINode(*this, status); + if (U_FAILURE(status)) { + delete n; + return nullptr; + } // Check for null pointer. - if (n != nullptr) { - if (fLeftChild != nullptr) { - n->fLeftChild = fLeftChild->cloneTree(); - n->fLeftChild->fParent = n; + if (n == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } + if (fLeftChild != nullptr) { + n->fLeftChild = fLeftChild->cloneTree(status, depth+1); + if (U_FAILURE(status)) { + delete n; + return nullptr; } - if (fRightChild != nullptr) { - n->fRightChild = fRightChild->cloneTree(); - n->fRightChild->fParent = n; + n->fLeftChild->fParent = n; + } + if (fRightChild != nullptr) { + n->fRightChild = fRightChild->cloneTree(status, depth+1); + if (U_FAILURE(status)) { + delete n; + return nullptr; } + n->fRightChild->fParent = n; } } return n; @@ -239,7 +278,6 @@ RBBINode *RBBINode::cloneTree() { // nested references are handled by cloneTree(), not here. // //------------------------------------------------------------------------- -constexpr int kRecursiveDepthLimit = 3500; RBBINode *RBBINode::flattenVariables(UErrorCode& status, int depth) { if (U_FAILURE(status)) { return this; @@ -251,21 +289,34 @@ RBBINode *RBBINode::flattenVariables(UErrorCode& status, int depth) { return this; } if (fType == varRef) { - RBBINode *retNode = fLeftChild->cloneTree(); - if (retNode != nullptr) { - retNode->fRuleRoot = this->fRuleRoot; - retNode->fChainIn = this->fChainIn; + RBBINode *retNode = fLeftChild->cloneTree(status, depth+1); + if (U_FAILURE(status)) { + return this; } + retNode->fRuleRoot = this->fRuleRoot; + retNode->fChainIn = this->fChainIn; delete this; // TODO: undefined behavior. Fix. return retNode; } if (fLeftChild != nullptr) { fLeftChild = fLeftChild->flattenVariables(status, depth+1); + if (fLeftChild == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + } + if (U_FAILURE(status)) { + return this; + } fLeftChild->fParent = this; } if (fRightChild != nullptr) { fRightChild = fRightChild->flattenVariables(status, depth+1); + if (fRightChild == nullptr) { + status = U_MEMORY_ALLOCATION_ERROR; + } + if (U_FAILURE(status)) { + return this; + } fRightChild->fParent = this; } return this; @@ -280,7 +331,16 @@ RBBINode *RBBINode::flattenVariables(UErrorCode& status, int depth) { // the left child of the uset node. // //------------------------------------------------------------------------- -void RBBINode::flattenSets() { +void RBBINode::flattenSets(UErrorCode &status, int depth) { + if (U_FAILURE(status)) { + return; + } + // If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR + // to avoid stack overflow crash. + if (depth > kRecursiveDepthLimit) { + status = U_INPUT_TOO_LONG_ERROR; + return; + } U_ASSERT(fType != setRef); if (fLeftChild != nullptr) { @@ -288,11 +348,15 @@ void RBBINode::flattenSets() { RBBINode *setRefNode = fLeftChild; RBBINode *usetNode = setRefNode->fLeftChild; RBBINode *replTree = usetNode->fLeftChild; - fLeftChild = replTree->cloneTree(); + fLeftChild = replTree->cloneTree(status, depth+1); + if (U_FAILURE(status)) { + delete setRefNode; + return; + } fLeftChild->fParent = this; delete setRefNode; } else { - fLeftChild->flattenSets(); + fLeftChild->flattenSets(status, depth+1); } } @@ -301,11 +365,15 @@ void RBBINode::flattenSets() { RBBINode *setRefNode = fRightChild; RBBINode *usetNode = setRefNode->fLeftChild; RBBINode *replTree = usetNode->fLeftChild; - fRightChild = replTree->cloneTree(); + fRightChild = replTree->cloneTree(status, depth+1); + if (U_FAILURE(status)) { + delete setRefNode; + return; + } fRightChild->fParent = this; delete setRefNode; } else { - fRightChild->flattenSets(); + fRightChild->flattenSets(status, depth+1); } } } diff --git a/thirdparty/icu4c/common/rbbinode.h b/thirdparty/icu4c/common/rbbinode.h index 497a31b8d09..8fbc9d1b588 100644 --- a/thirdparty/icu4c/common/rbbinode.h +++ b/thirdparty/icu4c/common/rbbinode.h @@ -91,14 +91,14 @@ class RBBINode : public UMemory { UVector *fFollowPos; - RBBINode(NodeType t); - RBBINode(const RBBINode &other); + RBBINode(NodeType t, UErrorCode& status); + RBBINode(const RBBINode &other, UErrorCode& status); ~RBBINode(); static void NRDeleteNode(RBBINode *node); - RBBINode *cloneTree(); + RBBINode *cloneTree(UErrorCode &status, int depth=0); RBBINode *flattenVariables(UErrorCode &status, int depth=0); - void flattenSets(); + void flattenSets(UErrorCode &status, int depth=0); void findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status); #ifdef RBBI_DEBUG diff --git a/thirdparty/icu4c/common/rbbiscan.cpp b/thirdparty/icu4c/common/rbbiscan.cpp index cf2d63cd807..77fc3bcd9b7 100644 --- a/thirdparty/icu4c/common/rbbiscan.cpp +++ b/thirdparty/icu4c/common/rbbiscan.cpp @@ -767,15 +767,24 @@ void RBBIRuleScanner::findSetFor(const UnicodeString &s, RBBINode *node, Unicode c = s.char32At(0); setToAdopt = new UnicodeSet(c, c); } + if (setToAdopt == nullptr) { + error(U_MEMORY_ALLOCATION_ERROR); + return; + } } // // Make a new uset node to refer to this UnicodeSet // This new uset node becomes the child of the caller's setReference node. // - RBBINode *usetNode = new RBBINode(RBBINode::uset); + UErrorCode localStatus = U_ZERO_ERROR; + RBBINode *usetNode = new RBBINode(RBBINode::uset, localStatus); if (usetNode == nullptr) { - error(U_MEMORY_ALLOCATION_ERROR); + localStatus = U_MEMORY_ALLOCATION_ERROR; + } + if (U_FAILURE(localStatus)) { + delete usetNode; + error(localStatus); delete setToAdopt; return; } @@ -1191,7 +1200,7 @@ RBBINode *RBBIRuleScanner::pushNewNode(RBBINode::NodeType t) { return nullptr; } fNodeStackPtr++; - fNodeStack[fNodeStackPtr] = new RBBINode(t); + fNodeStack[fNodeStackPtr] = new RBBINode(t, *fRB->fStatus); if (fNodeStack[fNodeStackPtr] == nullptr) { *fRB->fStatus = U_MEMORY_ALLOCATION_ERROR; } diff --git a/thirdparty/icu4c/common/rbbisetb.cpp b/thirdparty/icu4c/common/rbbisetb.cpp index 6c22cf470f8..df94fc8bc4f 100644 --- a/thirdparty/icu4c/common/rbbisetb.cpp +++ b/thirdparty/icu4c/common/rbbisetb.cpp @@ -375,7 +375,11 @@ void RBBISetBuilder::addValToSets(UVector *sets, uint32_t val) { } void RBBISetBuilder::addValToSet(RBBINode *usetNode, uint32_t val) { - RBBINode *leafNode = new RBBINode(RBBINode::leafChar); + RBBINode *leafNode = new RBBINode(RBBINode::leafChar, *fStatus); + if (U_FAILURE(*fStatus)) { + delete leafNode; + return; + } if (leafNode == nullptr) { *fStatus = U_MEMORY_ALLOCATION_ERROR; return; @@ -388,9 +392,13 @@ void RBBISetBuilder::addValToSet(RBBINode *usetNode, uint32_t val) { // There are already input symbols present for this set. // Set up an OR node, with the previous stuff as the left child // and the new value as the right child. - RBBINode *orNode = new RBBINode(RBBINode::opOr); + RBBINode *orNode = new RBBINode(RBBINode::opOr, *fStatus); if (orNode == nullptr) { *fStatus = U_MEMORY_ALLOCATION_ERROR; + } + if (U_FAILURE(*fStatus)) { + delete orNode; + delete leafNode; return; } orNode->fLeftChild = usetNode->fLeftChild; diff --git a/thirdparty/icu4c/common/rbbitblb.cpp b/thirdparty/icu4c/common/rbbitblb.cpp index 4d95137601e..b89909807c2 100644 --- a/thirdparty/icu4c/common/rbbitblb.cpp +++ b/thirdparty/icu4c/common/rbbitblb.cpp @@ -99,13 +99,22 @@ void RBBITableBuilder::buildForwardTable() { // {bof} fake character. // if (fRB->fSetBuilder->sawBOF()) { - RBBINode *bofTop = new RBBINode(RBBINode::opCat); - RBBINode *bofLeaf = new RBBINode(RBBINode::leafChar); - // Delete and exit if memory allocation failed. - if (bofTop == nullptr || bofLeaf == nullptr) { + RBBINode *bofTop = new RBBINode(RBBINode::opCat, *fStatus); + if (bofTop == nullptr) { *fStatus = U_MEMORY_ALLOCATION_ERROR; + } + if (U_FAILURE(*fStatus)) { delete bofTop; + return; + } + RBBINode *bofLeaf = new RBBINode(RBBINode::leafChar, *fStatus); + // Delete and exit if memory allocation failed. + if (bofLeaf == nullptr) { + *fStatus = U_MEMORY_ALLOCATION_ERROR; + } + if (U_FAILURE(*fStatus)) { delete bofLeaf; + delete bofTop; return; } bofTop->fLeftChild = bofLeaf; @@ -120,18 +129,23 @@ void RBBITableBuilder::buildForwardTable() { // Appears as a cat-node, left child being the original tree, // right child being the end marker. // - RBBINode *cn = new RBBINode(RBBINode::opCat); + RBBINode *cn = new RBBINode(RBBINode::opCat, *fStatus); // Exit if memory allocation failed. if (cn == nullptr) { *fStatus = U_MEMORY_ALLOCATION_ERROR; + } + if (U_FAILURE(*fStatus)) { + delete cn; return; } cn->fLeftChild = fTree; fTree->fParent = cn; - RBBINode *endMarkerNode = cn->fRightChild = new RBBINode(RBBINode::endMark); + RBBINode *endMarkerNode = cn->fRightChild = new RBBINode(RBBINode::endMark, *fStatus); // Delete and exit if memory allocation failed. if (cn->fRightChild == nullptr) { *fStatus = U_MEMORY_ALLOCATION_ERROR; + } + if (U_FAILURE(*fStatus)) { delete cn; return; } @@ -142,7 +156,7 @@ void RBBITableBuilder::buildForwardTable() { // Replace all references to UnicodeSets with the tree for the equivalent // expression. // - fTree->flattenSets(); + fTree->flattenSets(*fStatus, 0); #ifdef RBBI_DEBUG if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) { RBBIDebugPuts("\nParse tree after flattening Unicode Set references."); diff --git a/thirdparty/icu4c/common/resbund.cpp b/thirdparty/icu4c/common/resbund.cpp index 41337cdc155..4c14dae133e 100644 --- a/thirdparty/icu4c/common/resbund.cpp +++ b/thirdparty/icu4c/common/resbund.cpp @@ -388,7 +388,7 @@ const Locale &ResourceBundle::getLocale() const { return ncThis->fLocale != nullptr ? *ncThis->fLocale : Locale::getDefault(); } -const Locale ResourceBundle::getLocale(ULocDataLocaleType type, UErrorCode &status) const +Locale ResourceBundle::getLocale(ULocDataLocaleType type, UErrorCode &status) const { return ures_getLocaleByType(fResource, type, &status); } diff --git a/thirdparty/icu4c/common/ucnvmbcs.cpp b/thirdparty/icu4c/common/ucnvmbcs.cpp index f5507043bf3..d65c2847461 100644 --- a/thirdparty/icu4c/common/ucnvmbcs.cpp +++ b/thirdparty/icu4c/common/ucnvmbcs.cpp @@ -3146,11 +3146,8 @@ ucnv_MBCSGetNextUChar(UConverterToUnicodeArgs *pArgs, if(c<0) { if(U_SUCCESS(*pErrorCode) && source==sourceLimit && lastSourcetoUBytes; cnv->toULength = static_cast(source - lastSource); - do { - *bytes++=*lastSource++; - } while(lastSourcetoUBytes, lastSource, cnv->toULength); *pErrorCode=U_TRUNCATED_CHAR_FOUND; } else if(U_FAILURE(*pErrorCode)) { /* callback(illegal) */ diff --git a/thirdparty/icu4c/common/ucurr.cpp b/thirdparty/icu4c/common/ucurr.cpp index b74a80a676a..cccf1130ae8 100644 --- a/thirdparty/icu4c/common/ucurr.cpp +++ b/thirdparty/icu4c/common/ucurr.cpp @@ -372,12 +372,8 @@ struct CReg : public icu::UMemory { CReg(const char16_t* _iso, const char* _id) : next(nullptr) { - int32_t len = static_cast(uprv_strlen(_id)); - if (len > static_cast(sizeof(id) - 1)) { - len = (sizeof(id)-1); - } - uprv_strncpy(id, _id, len); - id[len] = 0; + uprv_strncpy(id, _id, sizeof(id)-1); + id[sizeof(id)-1] = 0; u_memcpy(iso, _iso, ISO_CURRENCY_CODE_LENGTH); iso[ISO_CURRENCY_CODE_LENGTH] = 0; } @@ -682,6 +678,9 @@ ucurr_getName(const char16_t* currency, // this function. UErrorCode ec2 = U_ZERO_ERROR; + if (locale == nullptr) { + locale = uloc_getDefault(); + } CharString loc = ulocimp_getName(locale, ec2); if (U_FAILURE(ec2)) { *ec = U_ILLEGAL_ARGUMENT_ERROR; @@ -780,6 +779,9 @@ ucurr_getPluralName(const char16_t* currency, // this function. UErrorCode ec2 = U_ZERO_ERROR; + if (locale == nullptr) { + locale = uloc_getDefault(); + } CharString loc = ulocimp_getName(locale, ec2); if (U_FAILURE(ec2)) { *ec = U_ILLEGAL_ARGUMENT_ERROR; @@ -973,6 +975,9 @@ collectCurrencyNames(const char* locale, // Look up the Currencies resource for the given locale. UErrorCode ec2 = U_ZERO_ERROR; + if (locale == nullptr) { + locale = uloc_getDefault(); + } CharString loc = ulocimp_getName(locale, ec2); if (U_FAILURE(ec2)) { ec = U_ILLEGAL_ARGUMENT_ERROR; diff --git a/thirdparty/icu4c/common/uloc.cpp b/thirdparty/icu4c/common/uloc.cpp index 51887c97c3e..bea4827a049 100644 --- a/thirdparty/icu4c/common/uloc.cpp +++ b/thirdparty/icu4c/common/uloc.cpp @@ -482,8 +482,8 @@ constexpr CanonicalizationMap CANONICALIZE_MAP[] = { /* ### BCP47 Conversion *******************************************/ /* Gets the size of the shortest subtag in the given localeID. */ -int32_t getShortestSubtagLength(const char *localeID) { - int32_t localeIDLength = static_cast(uprv_strlen(localeID)); +int32_t getShortestSubtagLength(std::string_view localeID) { + int32_t localeIDLength = static_cast(localeID.length()); int32_t length = localeIDLength; int32_t tmpLength = 0; int32_t i; @@ -507,8 +507,8 @@ int32_t getShortestSubtagLength(const char *localeID) { return length; } /* Test if the locale id has BCP47 u extension and does not have '@' */ -inline bool _hasBCP47Extension(const char *id) { - return id != nullptr && uprv_strstr(id, "@") == nullptr && getShortestSubtagLength(id) == 1; +inline bool _hasBCP47Extension(std::string_view id) { + return id.find('@') == std::string_view::npos && getShortestSubtagLength(id) == 1; } /* ### Keywords **************************************************/ @@ -523,10 +523,9 @@ inline bool UPRV_OK_VALUE_PUNCTUATION(char c) { return c == '_' || c == '-' || c #define ULOC_MAX_NO_KEYWORDS 25 U_CAPI const char * U_EXPORT2 -locale_getKeywordsStart(const char *localeID) { - const char *result = nullptr; - if((result = uprv_strchr(localeID, '@')) != nullptr) { - return result; +locale_getKeywordsStart(std::string_view localeID) { + if (size_t pos = localeID.find('@'); pos != std::string_view::npos) { + return localeID.data() + pos; } #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY) else { @@ -536,8 +535,8 @@ locale_getKeywordsStart(const char *localeID) { static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 }; const uint8_t *charToFind = ebcdicSigns; while(*charToFind) { - if((result = uprv_strchr(localeID, *charToFind)) != nullptr) { - return result; + if (size_t pos = localeID.find(*charToFind); pos != std::string_view::npos) { + return localeID.data() + pos; } charToFind++; } @@ -590,7 +589,7 @@ compareKeywordStructs(const void * /*context*/, const void *left, const void *ri } // namespace U_EXPORT CharString -ulocimp_getKeywords(const char* localeID, +ulocimp_getKeywords(std::string_view localeID, char prev, bool valuesToo, UErrorCode& status) @@ -607,7 +606,7 @@ ulocimp_getKeywords(const char* localeID, } U_EXPORT void -ulocimp_getKeywords(const char* localeID, +ulocimp_getKeywords(std::string_view localeID, char prev, ByteSink& sink, bool valuesToo, @@ -619,9 +618,8 @@ ulocimp_getKeywords(const char* localeID, int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS; int32_t numKeywords = 0; - const char* pos = localeID; - const char* equalSign = nullptr; - const char* semicolon = nullptr; + size_t equalSign = std::string_view::npos; + size_t semicolon = std::string_view::npos; int32_t i = 0, j, n; if(prev == '@') { /* start of keyword definition */ @@ -629,74 +627,72 @@ ulocimp_getKeywords(const char* localeID, do { bool duplicate = false; /* skip leading spaces */ - while(*pos == ' ') { - pos++; + while (localeID.front() == ' ') { + localeID.remove_prefix(1); } - if (!*pos) { /* handle trailing "; " */ + if (localeID.empty()) { /* handle trailing "; " */ break; } if(numKeywords == maxKeywords) { status = U_INTERNAL_PROGRAM_ERROR; return; } - equalSign = uprv_strchr(pos, '='); - semicolon = uprv_strchr(pos, ';'); + equalSign = localeID.find('='); + semicolon = localeID.find(';'); /* lack of '=' [foo@currency] is illegal */ /* ';' before '=' [foo@currency;collation=pinyin] is illegal */ - if(!equalSign || (semicolon && semicolon= ULOC_KEYWORD_BUFFER_LEN) { + if (equalSign >= ULOC_KEYWORD_BUFFER_LEN) { /* keyword name too long for internal buffer */ status = U_INTERNAL_PROGRAM_ERROR; return; } - for(i = 0, n = 0; i < equalSign - pos; ++i) { - if (pos[i] != ' ') { - keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]); + for (i = 0, n = 0; static_cast(i) < equalSign; ++i) { + if (localeID[i] != ' ') { + keywordList[numKeywords].keyword[n++] = uprv_tolower(localeID[i]); } } - /* zero-length keyword is an error. */ - if (n == 0) { - status = U_INVALID_FORMAT_ERROR; - return; - } - keywordList[numKeywords].keyword[n] = 0; keywordList[numKeywords].keywordLen = n; /* now grab the value part. First we skip the '=' */ equalSign++; /* then we leading spaces */ - while(*equalSign == ' ') { + while (equalSign < localeID.length() && localeID[equalSign] == ' ') { equalSign++; } /* Premature end or zero-length value */ - if (!*equalSign || equalSign == semicolon) { + if (equalSign == localeID.length() || equalSign == semicolon) { status = U_INVALID_FORMAT_ERROR; return; } - keywordList[numKeywords].valueStart = equalSign; + keywordList[numKeywords].valueStart = localeID.data() + equalSign; - pos = semicolon; - i = 0; - if(pos) { - while(*(pos - i - 1) == ' ') { - i++; - } - keywordList[numKeywords].valueLen = static_cast(pos - equalSign - i); - pos++; + std::string_view value = localeID; + if (semicolon != std::string_view::npos) { + value.remove_suffix(value.length() - semicolon); + localeID.remove_prefix(semicolon + 1); } else { - i = static_cast(uprv_strlen(equalSign)); - while(i && equalSign[i-1] == ' ') { - i--; - } - keywordList[numKeywords].valueLen = i; + localeID = {}; } + value.remove_prefix(equalSign); + if (size_t last = value.find_last_not_of(' '); last != std::string_view::npos) { + value.remove_suffix(value.length() - last - 1); + } + keywordList[numKeywords].valueLen = static_cast(value.length()); + /* If this is a duplicate keyword, then ignore it */ for (j=0; j(locale_getKeywordsStart(buffer)); + char* keywords = const_cast( + locale_getKeywordsStart({buffer, static_cast(bufLen)})); int32_t baseLen = keywords == nullptr ? bufLen : keywords - buffer; // Remove -1 from the capacity so that this function can guarantee NUL termination. CheckedArrayByteSink sink(keywords == nullptr ? buffer + bufLen : keywords, @@ -921,7 +918,7 @@ ulocimp_setKeywordValue(std::string_view keywordName, { if (U_FAILURE(status)) { return; } std::string_view keywords; - if (const char* start = locale_getKeywordsStart(localeID.data()); start != nullptr) { + if (const char* start = locale_getKeywordsStart(localeID.toStringPiece()); start != nullptr) { // This is safe because CharString::truncate() doesn't actually erase any // data, but simply sets the position for where new data will be written. int32_t size = start - localeID.data(); @@ -1138,15 +1135,18 @@ inline bool _isPrefixLetter(char a) { return a == 'x' || a == 'X' || a == 'i' || /*returns true if one of the special prefixes is here (s=string) 'x-' or 'i-' */ -inline bool _isIDPrefix(const char *s) { return _isPrefixLetter(s[0]) && _isIDSeparator(s[1]); } +inline bool _isIDPrefix(std::string_view s) { + return s.size() >= 2 && _isPrefixLetter(s[0]) && _isIDSeparator(s[1]); +} /* Dot terminates it because of POSIX form where dot precedes the codepage * except for variant */ -inline bool _isTerminator(char a) { return a == 0 || a == '.' || a == '@'; } +inline bool _isTerminator(char a) { return a == '.' || a == '@'; } -inline bool _isBCP47Extension(const char* p) { - return p[0] == '-' && +inline bool _isBCP47Extension(std::string_view p) { + return p.size() >= 3 && + p[0] == '-' && (p[1] == 't' || p[1] == 'T' || p[1] == 'u' || p[1] == 'U' || p[1] == 'x' || p[1] == 'X') && @@ -1202,49 +1202,44 @@ namespace { * TODO try to use this in Locale */ -void -_getLanguage(const char* localeID, - ByteSink* sink, - const char** pEnd, - UErrorCode& status) { - U_ASSERT(pEnd != nullptr); - *pEnd = localeID; - - if (uprv_stricmp(localeID, "root") == 0) { - localeID += 4; - } else if (uprv_strnicmp(localeID, "und", 3) == 0 && - (localeID[3] == '\0' || +size_t _getLanguage(std::string_view localeID, ByteSink* sink, UErrorCode& status) { + size_t skip = 0; + if (localeID.size() == 4 && uprv_strnicmp(localeID.data(), "root", 4) == 0) { + skip = 4; + localeID.remove_prefix(skip); + } else if (localeID.size() >= 3 && uprv_strnicmp(localeID.data(), "und", 3) == 0 && + (localeID.size() == 3 || localeID[3] == '-' || localeID[3] == '_' || localeID[3] == '@')) { - localeID += 3; + skip = 3; + localeID.remove_prefix(skip); } constexpr int32_t MAXLEN = ULOC_LANG_CAPACITY - 1; // Minus NUL. /* if it starts with i- or x- then copy that prefix */ - int32_t len = _isIDPrefix(localeID) ? 2 : 0; - while (!_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len])) { + size_t len = _isIDPrefix(localeID) ? 2 : 0; + while (len < localeID.size() && !_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len])) { if (len == MAXLEN) { status = U_ILLEGAL_ARGUMENT_ERROR; - return; + return 0; } len++; } - *pEnd = localeID + len; - if (sink == nullptr || len == 0) { return; } + if (sink == nullptr || len == 0) { return skip + len; } - int32_t minCapacity = uprv_max(len, 4); // Minimum 3 letters plus NUL. + int32_t minCapacity = uprv_max(static_cast(len), 4); // Minimum 3 letters plus NUL. char scratch[MAXLEN]; int32_t capacity = 0; char* buffer = sink->GetAppendBuffer( minCapacity, minCapacity, scratch, UPRV_LENGTHOF(scratch), &capacity); - for (int32_t i = 0; i < len; ++i) { + for (size_t i = 0; i < len; ++i) { buffer[i] = uprv_tolower(localeID[i]); } - if (_isIDSeparator(localeID[1])) { + if (localeID.size() >= 2 && _isIDSeparator(localeID[1])) { buffer[1] = '-'; } @@ -1256,32 +1251,26 @@ _getLanguage(const char* localeID, if (offset.has_value()) { const char* const alias = LANGUAGES[*offset]; sink->Append(alias, static_cast(uprv_strlen(alias))); - return; + return skip + len; } } - sink->Append(buffer, len); + sink->Append(buffer, static_cast(len)); + return skip + len; } -void -_getScript(const char* localeID, - ByteSink* sink, - const char** pEnd) { - U_ASSERT(pEnd != nullptr); - *pEnd = localeID; - +size_t _getScript(std::string_view localeID, ByteSink* sink) { constexpr int32_t LENGTH = 4; - int32_t len = 0; - while (!_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len]) && + size_t len = 0; + while (len < localeID.size() && !_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len]) && uprv_isASCIILetter(localeID[len])) { - if (len == LENGTH) { return; } + if (len == LENGTH) { return 0; } len++; } - if (len != LENGTH) { return; } + if (len != LENGTH) { return 0; } - *pEnd = localeID + LENGTH; - if (sink == nullptr) { return; } + if (sink == nullptr) { return len; } char scratch[LENGTH]; int32_t capacity = 0; @@ -1294,27 +1283,21 @@ _getScript(const char* localeID, } sink->Append(buffer, LENGTH); + return len; } -void -_getRegion(const char* localeID, - ByteSink* sink, - const char** pEnd) { - U_ASSERT(pEnd != nullptr); - *pEnd = localeID; - +size_t _getRegion(std::string_view localeID, ByteSink* sink) { constexpr int32_t MINLEN = 2; constexpr int32_t MAXLEN = ULOC_COUNTRY_CAPACITY - 1; // Minus NUL. - int32_t len = 0; - while (!_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len])) { - if (len == MAXLEN) { return; } + size_t len = 0; + while (len < localeID.size() && !_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len])) { + if (len == MAXLEN) { return 0; } len++; } - if (len < MINLEN) { return; } + if (len < MINLEN) { return 0; } - *pEnd = localeID + len; - if (sink == nullptr) { return; } + if (sink == nullptr) { return len; } char scratch[ULOC_COUNTRY_CAPACITY]; int32_t capacity = 0; @@ -1325,7 +1308,7 @@ _getRegion(const char* localeID, UPRV_LENGTHOF(scratch), &capacity); - for (int32_t i = 0; i < len; ++i) { + for (size_t i = 0; i < len; ++i) { buffer[i] = uprv_toupper(localeID[i]); } @@ -1337,26 +1320,25 @@ _getRegion(const char* localeID, if (offset.has_value()) { const char* const alias = COUNTRIES[*offset]; sink->Append(alias, static_cast(uprv_strlen(alias))); - return; + return len; } } - sink->Append(buffer, len); + sink->Append(buffer, static_cast(len)); + return len; } /** * @param needSeparator if true, then add leading '_' if any variants * are added to 'variant' */ -void -_getVariant(const char* localeID, +size_t +_getVariant(std::string_view localeID, char prev, ByteSink* sink, - const char** pEnd, bool needSeparator, UErrorCode& status) { - if (U_FAILURE(status)) return; - if (pEnd != nullptr) { *pEnd = localeID; } + if (U_FAILURE(status) || localeID.empty()) return 0; // Reasonable upper limit for variants // There are no strict limitation of the syntax of variant in the legacy @@ -1369,63 +1351,82 @@ _getVariant(const char* localeID, constexpr int32_t MAX_VARIANTS_LENGTH = 179; /* get one or more variant tags and separate them with '_' */ - int32_t index = 0; + size_t index = 0; if (_isIDSeparator(prev)) { /* get a variant string after a '-' or '_' */ - for (index=0; !_isTerminator(localeID[index]); index++) { - if (index >= MAX_VARIANTS_LENGTH) { // same as length > MAX_VARIANTS_LENGTH + for (std::string_view sub = localeID;;) { + size_t next = sub.find_first_of(".@_-"); + // For historical reasons, a trailing separator is included in the variant. + bool finished = next == std::string_view::npos || next + 1 == sub.length(); + size_t limit = finished ? sub.length() : next; + index += limit; + if (index > MAX_VARIANTS_LENGTH) { status = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - if (needSeparator) { - if (sink != nullptr) { - sink->Append("_", 1); - } - needSeparator = false; + return 0; } + if (sink != nullptr) { - char c = uprv_toupper(localeID[index]); - if (c == '-') c = '_'; - sink->Append(&c, 1); + if (needSeparator) { + sink->Append("_", 1); + } else { + needSeparator = true; + } + + int32_t length = static_cast(limit); + int32_t minCapacity = uprv_min(length, MAX_VARIANTS_LENGTH); + char scratch[MAX_VARIANTS_LENGTH]; + int32_t capacity = 0; + char* buffer = sink->GetAppendBuffer( + minCapacity, minCapacity, scratch, UPRV_LENGTHOF(scratch), &capacity); + + for (size_t i = 0; i < limit; ++i) { + buffer[i] = uprv_toupper(sub[i]); + } + sink->Append(buffer, length); } + + if (finished) { return index; } + sub.remove_prefix(next); + if (_isTerminator(sub.front()) || _isBCP47Extension(sub)) { return index; } + sub.remove_prefix(1); + index++; } - if (pEnd != nullptr) { *pEnd = localeID+index; } } + size_t skip = 0; /* if there is no variant tag after a '-' or '_' then look for '@' */ - if (index == 0) { - if (prev=='@') { - /* keep localeID */ - } else if((localeID=locale_getKeywordsStart(localeID))!=nullptr) { - ++localeID; /* point after the '@' */ - } else { - return; - } - for(; !_isTerminator(localeID[index]); index++) { - if (index >= MAX_VARIANTS_LENGTH) { // same as length > MAX_VARIANTS_LENGTH - status = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - if (needSeparator) { - if (sink != nullptr) { - sink->Append("_", 1); - } - needSeparator = false; - } - if (sink != nullptr) { - char c = uprv_toupper(localeID[index]); - if (c == '-' || c == ',') c = '_'; - sink->Append(&c, 1); - } - } - if (pEnd != nullptr) { *pEnd = localeID + index; } + if (prev == '@') { + /* keep localeID */ + } else if (const char* p = locale_getKeywordsStart(localeID); p != nullptr) { + skip = 1 + p - localeID.data(); /* point after the '@' */ + localeID.remove_prefix(skip); + } else { + return 0; } + for (; index < localeID.size() && !_isTerminator(localeID[index]); index++) { + if (index >= MAX_VARIANTS_LENGTH) { // same as length > MAX_VARIANTS_LENGTH + status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + if (needSeparator) { + if (sink != nullptr) { + sink->Append("_", 1); + } + needSeparator = false; + } + if (sink != nullptr) { + char c = uprv_toupper(localeID[index]); + if (c == '-' || c == ',') c = '_'; + sink->Append(&c, 1); + } + } + return skip + index; } } // namespace U_EXPORT CharString -ulocimp_getLanguage(const char* localeID, UErrorCode& status) { +ulocimp_getLanguage(std::string_view localeID, UErrorCode& status) { return ByteSinkUtil::viaByteSinkToCharString( [&](ByteSink& sink, UErrorCode& status) { ulocimp_getSubtags( @@ -1441,7 +1442,7 @@ ulocimp_getLanguage(const char* localeID, UErrorCode& status) { } U_EXPORT CharString -ulocimp_getScript(const char* localeID, UErrorCode& status) { +ulocimp_getScript(std::string_view localeID, UErrorCode& status) { return ByteSinkUtil::viaByteSinkToCharString( [&](ByteSink& sink, UErrorCode& status) { ulocimp_getSubtags( @@ -1457,7 +1458,7 @@ ulocimp_getScript(const char* localeID, UErrorCode& status) { } U_EXPORT CharString -ulocimp_getRegion(const char* localeID, UErrorCode& status) { +ulocimp_getRegion(std::string_view localeID, UErrorCode& status) { return ByteSinkUtil::viaByteSinkToCharString( [&](ByteSink& sink, UErrorCode& status) { ulocimp_getSubtags( @@ -1473,7 +1474,7 @@ ulocimp_getRegion(const char* localeID, UErrorCode& status) { } U_EXPORT CharString -ulocimp_getVariant(const char* localeID, UErrorCode& status) { +ulocimp_getVariant(std::string_view localeID, UErrorCode& status) { return ByteSinkUtil::viaByteSinkToCharString( [&](ByteSink& sink, UErrorCode& status) { ulocimp_getSubtags( @@ -1490,7 +1491,7 @@ ulocimp_getVariant(const char* localeID, UErrorCode& status) { U_EXPORT void ulocimp_getSubtags( - const char* localeID, + std::string_view localeID, CharString* language, CharString* script, CharString* region, @@ -1521,7 +1522,7 @@ ulocimp_getSubtags( U_EXPORT void ulocimp_getSubtags( - const char* localeID, + std::string_view localeID, ByteSink* language, ByteSink* script, ByteSink* region, @@ -1531,7 +1532,7 @@ ulocimp_getSubtags( if (U_FAILURE(status)) { return; } if (pEnd != nullptr) { - *pEnd = localeID; + *pEnd = localeID.data(); } else if (language == nullptr && script == nullptr && region == nullptr && @@ -1539,62 +1540,94 @@ ulocimp_getSubtags( return; } + if (localeID.empty()) { return; } + bool hasRegion = false; - if (localeID == nullptr) { - localeID = uloc_getDefault(); + { + size_t len = _getLanguage(localeID, language, status); + if (U_FAILURE(status)) { return; } + if (len > 0) { + localeID.remove_prefix(len); + } } - _getLanguage(localeID, language, &localeID, status); - if (U_FAILURE(status)) { return; } - U_ASSERT(localeID != nullptr); - if (pEnd != nullptr) { - *pEnd = localeID; + *pEnd = localeID.data(); } else if (script == nullptr && region == nullptr && variant == nullptr) { return; } - if (_isIDSeparator(*localeID)) { - const char* begin = localeID + 1; - const char* end = nullptr; - _getScript(begin, script, &end); - U_ASSERT(end != nullptr); - if (end != begin) { - localeID = end; - if (pEnd != nullptr) { *pEnd = localeID; } + if (localeID.empty()) { return; } + + if (_isIDSeparator(localeID.front())) { + std::string_view sub = localeID; + sub.remove_prefix(1); + size_t len = _getScript(sub, script); + if (len > 0) { + localeID.remove_prefix(len + 1); + if (pEnd != nullptr) { *pEnd = localeID.data(); } } } - if (region == nullptr && variant == nullptr && pEnd == nullptr) { return; } + if ((region == nullptr && variant == nullptr && pEnd == nullptr) || localeID.empty()) { return; } - if (_isIDSeparator(*localeID)) { - const char* begin = localeID + 1; - const char* end = nullptr; - _getRegion(begin, region, &end); - U_ASSERT(end != nullptr); - if (end != begin) { + if (_isIDSeparator(localeID.front())) { + std::string_view sub = localeID; + sub.remove_prefix(1); + size_t len = _getRegion(sub, region); + if (len > 0) { hasRegion = true; - localeID = end; - if (pEnd != nullptr) { *pEnd = localeID; } + localeID.remove_prefix(len + 1); + if (pEnd != nullptr) { *pEnd = localeID.data(); } } } - if (variant == nullptr && pEnd == nullptr) { return; } + if ((variant == nullptr && pEnd == nullptr) || localeID.empty()) { return; } - if (_isIDSeparator(*localeID) && !_isBCP47Extension(localeID)) { + bool hasVariant = false; + + if (_isIDSeparator(localeID.front()) && !_isBCP47Extension(localeID)) { + std::string_view sub = localeID; /* If there was no country ID, skip a possible extra IDSeparator */ - if (!hasRegion && _isIDSeparator(localeID[1])) { - localeID++; - } - const char* begin = localeID + 1; - const char* end = nullptr; - _getVariant(begin, *localeID, variant, &end, false, status); + size_t skip = !hasRegion && localeID.size() > 1 && _isIDSeparator(localeID[1]) ? 2 : 1; + sub.remove_prefix(skip); + size_t len = _getVariant(sub, localeID[0], variant, false, status); if (U_FAILURE(status)) { return; } - U_ASSERT(end != nullptr); - if (end != begin && pEnd != nullptr) { *pEnd = end; } + if (len > 0) { + hasVariant = true; + localeID.remove_prefix(skip + len); + if (pEnd != nullptr) { *pEnd = localeID.data(); } + } + } + + if ((variant == nullptr && pEnd == nullptr) || localeID.empty()) { return; } + + if (_isBCP47Extension(localeID)) { + localeID.remove_prefix(2); + constexpr char vaposix[] = "-va-posix"; + constexpr size_t length = sizeof vaposix - 1; + for (size_t next;; localeID.remove_prefix(next)) { + next = localeID.find('-', 1); + if (next == std::string_view::npos) { break; } + next = localeID.find('-', next + 1); + bool finished = next == std::string_view::npos; + std::string_view sub = localeID; + if (!finished) { sub.remove_suffix(sub.length() - next); } + + if (sub.length() == length && uprv_strnicmp(sub.data(), vaposix, length) == 0) { + if (variant != nullptr) { + if (hasVariant) { variant->Append("_", 1); } + constexpr char posix[] = "POSIX"; + variant->Append(posix, sizeof posix - 1); + } + if (pEnd != nullptr) { *pEnd = localeID.data() + length; } + } + + if (finished) { break; } + } } } @@ -1700,7 +1733,7 @@ uloc_openKeywords(const char* localeID, CharString tempBuffer; const char* tmpLocaleID; - if (_hasBCP47Extension(localeID)) { + if (localeID != nullptr && _hasBCP47Extension(localeID)) { tempBuffer = ulocimp_forLanguageTag(localeID, -1, nullptr, *status); tmpLocaleID = U_SUCCESS(*status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID; } else { @@ -1753,7 +1786,7 @@ constexpr int32_t I_DEFAULT_LENGTH = UPRV_LENGTHOF(i_default); * This is the code underlying uloc_getName and uloc_canonicalize. */ void -_canonicalize(const char* localeID, +_canonicalize(std::string_view localeID, ByteSink& sink, uint32_t options, UErrorCode& err) { @@ -1764,33 +1797,30 @@ _canonicalize(const char* localeID, int32_t j, fieldCount=0; CharString tempBuffer; // if localeID has a BCP47 extension, tmpLocaleID points to this CharString localeIDWithHyphens; // if localeID has a BPC47 extension and have _, tmpLocaleID points to this - const char* origLocaleID; - const char* tmpLocaleID; - const char* keywordAssign = nullptr; - const char* separatorIndicator = nullptr; + std::string_view origLocaleID; + std::string_view tmpLocaleID; + size_t keywordAssign = std::string_view::npos; + size_t separatorIndicator = std::string_view::npos; if (_hasBCP47Extension(localeID)) { - const char* localeIDPtr = localeID; + std::string_view localeIDPtr = localeID; // convert all underbars to hyphens, unless the "BCP47 extension" comes at the beginning of the string - if (uprv_strchr(localeID, '_') != nullptr && localeID[1] != '-' && localeID[1] != '_') { - localeIDWithHyphens.append(localeID, -1, err); + if (localeID.size() >= 2 && localeID.find('_') != std::string_view::npos && localeID[1] != '-' && localeID[1] != '_') { + localeIDWithHyphens.append(localeID, err); if (U_SUCCESS(err)) { for (char* p = localeIDWithHyphens.data(); *p != '\0'; ++p) { if (*p == '_') { *p = '-'; } } - localeIDPtr = localeIDWithHyphens.data(); + localeIDPtr = localeIDWithHyphens.toStringPiece(); } } - tempBuffer = ulocimp_forLanguageTag(localeIDPtr, -1, nullptr, err); - tmpLocaleID = U_SUCCESS(err) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeIDPtr; + tempBuffer = ulocimp_forLanguageTag(localeIDPtr.data(), static_cast(localeIDPtr.size()), nullptr, err); + tmpLocaleID = U_SUCCESS(err) && !tempBuffer.isEmpty() ? static_cast(tempBuffer.toStringPiece()) : localeIDPtr; } else { - if (localeID==nullptr) { - localeID=uloc_getDefault(); - } tmpLocaleID=localeID; } @@ -1801,20 +1831,25 @@ _canonicalize(const char* localeID, CharString script; CharString country; CharString variant; + const char* end = nullptr; ulocimp_getSubtags( tmpLocaleID, &tag, &script, &country, &variant, - &tmpLocaleID, + &end, err); if (U_FAILURE(err)) { return; } + U_ASSERT(end != nullptr); + if (end > tmpLocaleID.data()) { + tmpLocaleID.remove_prefix(end - tmpLocaleID.data()); + } - if (tag.length() == I_DEFAULT_LENGTH && - uprv_strncmp(origLocaleID, i_default, I_DEFAULT_LENGTH) == 0) { + if (tag.length() == I_DEFAULT_LENGTH && origLocaleID.length() >= I_DEFAULT_LENGTH && + uprv_strncmp(origLocaleID.data(), i_default, I_DEFAULT_LENGTH) == 0) { tag.clear(); tag.append(uloc_getDefault(), err); } else { @@ -1839,15 +1874,14 @@ _canonicalize(const char* localeID, } /* Copy POSIX-style charset specifier, if any [mr.utf8] */ - if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') { + if (!OPTION_SET(options, _ULOC_CANONICALIZE) && !tmpLocaleID.empty() && tmpLocaleID.front() == '.') { tag.append('.', err); - ++tmpLocaleID; - const char *atPos = nullptr; + tmpLocaleID.remove_prefix(1); size_t length; - if((atPos = uprv_strchr(tmpLocaleID, '@')) != nullptr) { - length = atPos - tmpLocaleID; + if (size_t atPos = tmpLocaleID.find('@'); atPos != std::string_view::npos) { + length = atPos; } else { - length = uprv_strlen(tmpLocaleID); + length = tmpLocaleID.length(); } // The longest charset name we found in IANA charset registry // https://www.iana.org/assignments/character-sets/ is @@ -1859,33 +1893,34 @@ _canonicalize(const char* localeID, err = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */ return; } - tag.append(tmpLocaleID, static_cast(length), err); - tmpLocaleID += length; + if (length > 0) { + tag.append(tmpLocaleID.data(), static_cast(length), err); + tmpLocaleID.remove_prefix(length); + } } /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';' - After this, tmpLocaleID either points to '@' or is nullptr */ - if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=nullptr) { - keywordAssign = uprv_strchr(tmpLocaleID, '='); - separatorIndicator = uprv_strchr(tmpLocaleID, ';'); + After this, tmpLocaleID either starts at '@' or is empty. */ + if (const char* start = locale_getKeywordsStart(tmpLocaleID); start != nullptr) { + if (start > tmpLocaleID.data()) { + tmpLocaleID.remove_prefix(start - tmpLocaleID.data()); + } + keywordAssign = tmpLocaleID.find('='); + separatorIndicator = tmpLocaleID.find(';'); + } else { + tmpLocaleID = {}; } /* Copy POSIX-style variant, if any [mr@FOO] */ if (!OPTION_SET(options, _ULOC_CANONICALIZE) && - tmpLocaleID != nullptr && keywordAssign == nullptr) { - for (;;) { - char c = *tmpLocaleID; - if (c == 0) { - break; - } - tag.append(c, err); - ++tmpLocaleID; - } + !tmpLocaleID.empty() && keywordAssign == std::string_view::npos) { + tag.append(tmpLocaleID, err); + tmpLocaleID = {}; } if (OPTION_SET(options, _ULOC_CANONICALIZE)) { /* Handle @FOO variant if @ is present and not followed by = */ - if (tmpLocaleID!=nullptr && keywordAssign==nullptr) { + if (!tmpLocaleID.empty() && keywordAssign == std::string_view::npos) { /* Add missing '_' if needed */ if (fieldCount < 2 || (fieldCount < 3 && !script.isEmpty())) { do { @@ -1895,7 +1930,9 @@ _canonicalize(const char* localeID, } CharStringByteSink s(&tag); - _getVariant(tmpLocaleID+1, '@', &s, nullptr, !variant.isEmpty(), err); + std::string_view sub = tmpLocaleID; + sub.remove_prefix(1); + _getVariant(sub, '@', &s, !variant.isEmpty(), err); if (U_FAILURE(err)) { return; } } @@ -1903,7 +1940,7 @@ _canonicalize(const char* localeID, for (j=0; j keywordAssign)) { + if (!tmpLocaleID.empty() && keywordAssign != std::string_view::npos && + (separatorIndicator == std::string_view::npos || separatorIndicator > keywordAssign)) { sink.Append("@", 1); ++fieldCount; - ulocimp_getKeywords(tmpLocaleID+1, '@', sink, true, err); + tmpLocaleID.remove_prefix(1); + ulocimp_getKeywords(tmpLocaleID, '@', sink, true, err); } } } @@ -1989,6 +2027,10 @@ uloc_getLanguage(const char* localeID, int32_t languageCapacity, UErrorCode* err) { + if (localeID == nullptr) { + localeID = uloc_getDefault(); + } + /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/ return ByteSinkUtil::viaByteSinkToTerminatedChars( language, languageCapacity, @@ -2011,6 +2053,10 @@ uloc_getScript(const char* localeID, int32_t scriptCapacity, UErrorCode* err) { + if (localeID == nullptr) { + localeID = uloc_getDefault(); + } + return ByteSinkUtil::viaByteSinkToTerminatedChars( script, scriptCapacity, [&](ByteSink& sink, UErrorCode& status) { @@ -2032,6 +2078,10 @@ uloc_getCountry(const char* localeID, int32_t countryCapacity, UErrorCode* err) { + if (localeID == nullptr) { + localeID = uloc_getDefault(); + } + return ByteSinkUtil::viaByteSinkToTerminatedChars( country, countryCapacity, [&](ByteSink& sink, UErrorCode& status) { @@ -2053,6 +2103,10 @@ uloc_getVariant(const char* localeID, int32_t variantCapacity, UErrorCode* err) { + if (localeID == nullptr) { + localeID = uloc_getDefault(); + } + return ByteSinkUtil::viaByteSinkToTerminatedChars( variant, variantCapacity, [&](ByteSink& sink, UErrorCode& status) { @@ -2074,6 +2128,9 @@ uloc_getName(const char* localeID, int32_t nameCapacity, UErrorCode* err) { + if (localeID == nullptr) { + localeID = uloc_getDefault(); + } return ByteSinkUtil::viaByteSinkToTerminatedChars( name, nameCapacity, [&](ByteSink& sink, UErrorCode& status) { @@ -2083,7 +2140,7 @@ uloc_getName(const char* localeID, } U_EXPORT CharString -ulocimp_getName(const char* localeID, +ulocimp_getName(std::string_view localeID, UErrorCode& err) { return ByteSinkUtil::viaByteSinkToCharString( @@ -2094,7 +2151,7 @@ ulocimp_getName(const char* localeID, } U_EXPORT void -ulocimp_getName(const char* localeID, +ulocimp_getName(std::string_view localeID, ByteSink& sink, UErrorCode& err) { @@ -2107,6 +2164,9 @@ uloc_getBaseName(const char* localeID, int32_t nameCapacity, UErrorCode* err) { + if (localeID == nullptr) { + localeID = uloc_getDefault(); + } return ByteSinkUtil::viaByteSinkToTerminatedChars( name, nameCapacity, [&](ByteSink& sink, UErrorCode& status) { @@ -2116,7 +2176,7 @@ uloc_getBaseName(const char* localeID, } U_EXPORT CharString -ulocimp_getBaseName(const char* localeID, +ulocimp_getBaseName(std::string_view localeID, UErrorCode& err) { return ByteSinkUtil::viaByteSinkToCharString( @@ -2127,7 +2187,7 @@ ulocimp_getBaseName(const char* localeID, } U_EXPORT void -ulocimp_getBaseName(const char* localeID, +ulocimp_getBaseName(std::string_view localeID, ByteSink& sink, UErrorCode& err) { @@ -2140,6 +2200,9 @@ uloc_canonicalize(const char* localeID, int32_t nameCapacity, UErrorCode* err) { + if (localeID == nullptr) { + localeID = uloc_getDefault(); + } return ByteSinkUtil::viaByteSinkToTerminatedChars( name, nameCapacity, [&](ByteSink& sink, UErrorCode& status) { @@ -2149,7 +2212,7 @@ uloc_canonicalize(const char* localeID, } U_EXPORT CharString -ulocimp_canonicalize(const char* localeID, +ulocimp_canonicalize(std::string_view localeID, UErrorCode& err) { return ByteSinkUtil::viaByteSinkToCharString( @@ -2160,7 +2223,7 @@ ulocimp_canonicalize(const char* localeID, } U_EXPORT void -ulocimp_canonicalize(const char* localeID, +ulocimp_canonicalize(std::string_view localeID, ByteSink& sink, UErrorCode& err) { diff --git a/thirdparty/icu4c/common/uloc_tag.cpp b/thirdparty/icu4c/common/uloc_tag.cpp index 7b3b1e73a37..b2e9946f48a 100644 --- a/thirdparty/icu4c/common/uloc_tag.cpp +++ b/thirdparty/icu4c/common/uloc_tag.cpp @@ -1043,7 +1043,7 @@ _initializeULanguageTag(ULanguageTag* langtag) { } void -_appendLanguageToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) { +_appendLanguageToLanguageTag(std::string_view localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) { UErrorCode tmpStatus = U_ZERO_ERROR; if (U_FAILURE(status)) { @@ -1088,7 +1088,7 @@ _appendLanguageToLanguageTag(const char* localeID, icu::ByteSink& sink, bool str } void -_appendScriptToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) { +_appendScriptToLanguageTag(std::string_view localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) { UErrorCode tmpStatus = U_ZERO_ERROR; if (U_FAILURE(status)) { @@ -1118,7 +1118,7 @@ _appendScriptToLanguageTag(const char* localeID, icu::ByteSink& sink, bool stric } void -_appendRegionToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) { +_appendRegionToLanguageTag(std::string_view localeID, icu::ByteSink& sink, bool strict, UErrorCode& status) { UErrorCode tmpStatus = U_ZERO_ERROR; if (U_FAILURE(status)) { @@ -1169,7 +1169,7 @@ void _sortVariants(VariantListEntry* first) { } void -_appendVariantsToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, bool& hadPosix, UErrorCode& status) { +_appendVariantsToLanguageTag(std::string_view localeID, icu::ByteSink& sink, bool strict, bool& hadPosix, UErrorCode& status) { if (U_FAILURE(status)) { return; } UErrorCode tmpStatus = U_ZERO_ERROR; @@ -1872,7 +1872,7 @@ _appendKeywords(ULanguageTag* langtag, icu::ByteSink& sink, UErrorCode& status) } void -_appendPrivateuseToLanguageTag(const char* localeID, icu::ByteSink& sink, bool strict, bool /*hadPosix*/, UErrorCode& status) { +_appendPrivateuseToLanguageTag(std::string_view localeID, icu::ByteSink& sink, bool strict, bool /*hadPosix*/, UErrorCode& status) { if (U_FAILURE(status)) { return; } UErrorCode tmpStatus = U_ZERO_ERROR; @@ -2596,6 +2596,9 @@ ulocimp_toLanguageTag(const char* localeID, bool hadPosix = false; const char* pKeywordStart; + if (localeID == nullptr) { + localeID = uloc_getDefault(); + } /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */ icu::CharString canonical = ulocimp_canonicalize(localeID, tmpStatus); if (U_FAILURE(tmpStatus)) { @@ -2604,7 +2607,7 @@ ulocimp_toLanguageTag(const char* localeID, } /* For handling special case - private use only tag */ - pKeywordStart = locale_getKeywordsStart(canonical.data()); + pKeywordStart = locale_getKeywordsStart(canonical.toStringPiece()); if (pKeywordStart == canonical.data()) { int kwdCnt = 0; bool done = false; @@ -2642,12 +2645,12 @@ ulocimp_toLanguageTag(const char* localeID, } } - _appendLanguageToLanguageTag(canonical.data(), sink, strict, status); - _appendScriptToLanguageTag(canonical.data(), sink, strict, status); - _appendRegionToLanguageTag(canonical.data(), sink, strict, status); - _appendVariantsToLanguageTag(canonical.data(), sink, strict, hadPosix, status); + _appendLanguageToLanguageTag(canonical.toStringPiece(), sink, strict, status); + _appendScriptToLanguageTag(canonical.toStringPiece(), sink, strict, status); + _appendRegionToLanguageTag(canonical.toStringPiece(), sink, strict, status); + _appendVariantsToLanguageTag(canonical.toStringPiece(), sink, strict, hadPosix, status); _appendKeywordsToLanguageTag(canonical.data(), sink, strict, hadPosix, status); - _appendPrivateuseToLanguageTag(canonical.data(), sink, strict, hadPosix, status); + _appendPrivateuseToLanguageTag(canonical.toStringPiece(), sink, strict, hadPosix, status); } diff --git a/thirdparty/icu4c/common/ulocale.cpp b/thirdparty/icu4c/common/ulocale.cpp index f2f81bc9710..33814713dc1 100644 --- a/thirdparty/icu4c/common/ulocale.cpp +++ b/thirdparty/icu4c/common/ulocale.cpp @@ -10,7 +10,6 @@ #include "unicode/locid.h" #include "bytesinkutil.h" -#include "charstr.h" #include "cmemory.h" U_NAMESPACE_USE @@ -24,9 +23,7 @@ ulocale_openForLocaleID(const char* localeID, int32_t length, UErrorCode* err) { if (length < 0) { return EXTERNAL(icu::Locale::createFromName(localeID).clone()); } - CharString str(localeID, length, *err); // Make a NUL terminated copy. - if (U_FAILURE(*err)) { return nullptr; } - return EXTERNAL(icu::Locale::createFromName(str.data()).clone()); + return EXTERNAL(icu::Locale::createFromName(StringPiece{localeID, length}).clone()); } ULocale* diff --git a/thirdparty/icu4c/common/ulocimp.h b/thirdparty/icu4c/common/ulocimp.h index 1887e2a849a..7f09748c8ac 100644 --- a/thirdparty/icu4c/common/ulocimp.h +++ b/thirdparty/icu4c/common/ulocimp.h @@ -68,42 +68,42 @@ U_EXPORT std::optional ulocimp_toLegacyTypeWithFallback(std::string_view keyword, std::string_view value); U_EXPORT icu::CharString -ulocimp_getKeywords(const char* localeID, +ulocimp_getKeywords(std::string_view localeID, char prev, bool valuesToo, UErrorCode& status); U_EXPORT void -ulocimp_getKeywords(const char* localeID, +ulocimp_getKeywords(std::string_view localeID, char prev, icu::ByteSink& sink, bool valuesToo, UErrorCode& status); U_EXPORT icu::CharString -ulocimp_getName(const char* localeID, +ulocimp_getName(std::string_view localeID, UErrorCode& err); U_EXPORT void -ulocimp_getName(const char* localeID, +ulocimp_getName(std::string_view localeID, icu::ByteSink& sink, UErrorCode& err); U_EXPORT icu::CharString -ulocimp_getBaseName(const char* localeID, +ulocimp_getBaseName(std::string_view localeID, UErrorCode& err); U_EXPORT void -ulocimp_getBaseName(const char* localeID, +ulocimp_getBaseName(std::string_view localeID, icu::ByteSink& sink, UErrorCode& err); U_EXPORT icu::CharString -ulocimp_canonicalize(const char* localeID, +ulocimp_canonicalize(std::string_view localeID, UErrorCode& err); U_EXPORT void -ulocimp_canonicalize(const char* localeID, +ulocimp_canonicalize(std::string_view localeID, icu::ByteSink& sink, UErrorCode& err); @@ -119,16 +119,16 @@ ulocimp_getKeywordValue(const char* localeID, UErrorCode& status); U_EXPORT icu::CharString -ulocimp_getLanguage(const char* localeID, UErrorCode& status); +ulocimp_getLanguage(std::string_view localeID, UErrorCode& status); U_EXPORT icu::CharString -ulocimp_getScript(const char* localeID, UErrorCode& status); +ulocimp_getScript(std::string_view localeID, UErrorCode& status); U_EXPORT icu::CharString -ulocimp_getRegion(const char* localeID, UErrorCode& status); +ulocimp_getRegion(std::string_view localeID, UErrorCode& status); U_EXPORT icu::CharString -ulocimp_getVariant(const char* localeID, UErrorCode& status); +ulocimp_getVariant(std::string_view localeID, UErrorCode& status); U_EXPORT void ulocimp_setKeywordValue(std::string_view keywordName, @@ -145,7 +145,7 @@ ulocimp_setKeywordValue(std::string_view keywords, U_EXPORT void ulocimp_getSubtags( - const char* localeID, + std::string_view localeID, icu::CharString* language, icu::CharString* script, icu::CharString* region, @@ -155,7 +155,7 @@ ulocimp_getSubtags( U_EXPORT void ulocimp_getSubtags( - const char* localeID, + std::string_view localeID, icu::ByteSink* language, icu::ByteSink* script, icu::ByteSink* region, @@ -165,7 +165,7 @@ ulocimp_getSubtags( inline void ulocimp_getSubtags( - const char* localeID, + std::string_view localeID, std::nullptr_t, std::nullptr_t, std::nullptr_t, @@ -364,7 +364,7 @@ ulocimp_minimizeSubtags(const char* localeID, UErrorCode& err); U_CAPI const char * U_EXPORT2 -locale_getKeywordsStart(const char *localeID); +locale_getKeywordsStart(std::string_view localeID); bool ultag_isExtensionSubtags(const char* s, int32_t len); diff --git a/thirdparty/icu4c/common/umapfile.cpp b/thirdparty/icu4c/common/umapfile.cpp index b58ac37f4d4..3ba0251df9c 100644 --- a/thirdparty/icu4c/common/umapfile.cpp +++ b/thirdparty/icu4c/common/umapfile.cpp @@ -237,8 +237,13 @@ typedef HANDLE MemoryMap; pData->map = (char *)data + length; pData->pHeader=(const DataHeader *)data; pData->mapAddr = data; -#if U_PLATFORM == U_PF_IPHONE +#if U_PLATFORM == U_PF_IPHONE || U_PLATFORM == U_PF_ANDROID + // Apparently supported from Android 23 and higher: + // https://github.com/ggml-org/llama.cpp/pull/3631 + // Checking for the flag itself is safer than checking for __ANDROID_API__. +# ifdef POSIX_MADV_RANDOM posix_madvise(data, length, POSIX_MADV_RANDOM); +# endif #endif return true; } diff --git a/thirdparty/icu4c/common/unicode/brkiter.h b/thirdparty/icu4c/common/unicode/brkiter.h index 30c59c4a94a..d953925bd72 100644 --- a/thirdparty/icu4c/common/unicode/brkiter.h +++ b/thirdparty/icu4c/common/unicode/brkiter.h @@ -58,6 +58,8 @@ U_NAMESPACE_END U_NAMESPACE_BEGIN +class CharString; + /** * The BreakIterator class implements methods for finding the location * of boundaries in text. BreakIterator is an abstract base class. @@ -646,9 +648,9 @@ protected: private: /** @internal (private) */ - char actualLocale[ULOC_FULLNAME_CAPACITY]; - char validLocale[ULOC_FULLNAME_CAPACITY]; - char requestLocale[ULOC_FULLNAME_CAPACITY]; + CharString* actualLocale = nullptr; + CharString* validLocale = nullptr; + CharString* requestLocale = nullptr; }; #ifndef U_HIDE_DEPRECATED_API diff --git a/thirdparty/icu4c/common/unicode/char16ptr.h b/thirdparty/icu4c/common/unicode/char16ptr.h index daf35cd43ba..049de9efee8 100644 --- a/thirdparty/icu4c/common/unicode/char16ptr.h +++ b/thirdparty/icu4c/common/unicode/char16ptr.h @@ -9,10 +9,13 @@ #include "unicode/utypes.h" -#if U_SHOW_CPLUSPLUS_API +#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API #include #include +#include + +#endif /** * \file @@ -21,8 +24,6 @@ * Also conversion functions from char16_t * to UChar * and OldUChar *. */ -U_NAMESPACE_BEGIN - /** * \def U_ALIASING_BARRIER * Barrier for pointer anti-aliasing optimizations even across function boundaries. @@ -36,6 +37,11 @@ U_NAMESPACE_BEGIN # define U_ALIASING_BARRIER(ptr) #endif +// ICU DLL-exported +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + /** * char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types. * @stable ICU 59 @@ -251,6 +257,60 @@ const char16_t *ConstChar16Ptr::get() const { return u_.cp; } #endif /// \endcond +U_NAMESPACE_END + +#endif // U_SHOW_CPLUSPLUS_API + +// Usable in header-only definitions +#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API + +namespace U_ICU_NAMESPACE_OR_INTERNAL { + +#ifndef U_FORCE_HIDE_INTERNAL_API +/** @internal */ +template>> +inline const char16_t *uprv_char16PtrFromUChar(const T *p) { + if constexpr (std::is_same_v) { + return p; + } else { +#if U_SHOW_CPLUSPLUS_API + return ConstChar16Ptr(p).get(); +#else +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +#endif + } +} +#if !U_CHAR16_IS_TYPEDEF && (!defined(_LIBCPP_VERSION) || _LIBCPP_VERSION < 180000) +/** @internal */ +inline const char16_t *uprv_char16PtrFromUint16(const uint16_t *p) { +#if U_SHOW_CPLUSPLUS_API + return ConstChar16Ptr(p).get(); +#else +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +#endif +} +#endif +#if U_SIZEOF_WCHAR_T==2 +/** @internal */ +inline const char16_t *uprv_char16PtrFromWchar(const wchar_t *p) { +#if U_SHOW_CPLUSPLUS_API + return ConstChar16Ptr(p).get(); +#else +#ifdef U_ALIASING_BARRIER + U_ALIASING_BARRIER(p); +#endif + return reinterpret_cast(p); +#endif +} +#endif +#endif + /** * Converts from const char16_t * to const UChar *. * Includes an aliasing barrier if available. @@ -307,6 +367,15 @@ inline OldUChar *toOldUCharPtr(char16_t *p) { return reinterpret_cast(p); } +} // U_ICU_NAMESPACE_OR_INTERNAL + +#endif // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API + +// ICU DLL-exported +#if U_SHOW_CPLUSPLUS_API + +U_NAMESPACE_BEGIN + #ifndef U_FORCE_HIDE_INTERNAL_API /** * Is T convertible to a std::u16string_view or some other 16-bit string view? @@ -379,6 +448,6 @@ inline std::u16string_view toU16StringViewNullable(const T& text) { U_NAMESPACE_END -#endif /* U_SHOW_CPLUSPLUS_API */ +#endif // U_SHOW_CPLUSPLUS_API #endif // __CHAR16PTR_H__ diff --git a/thirdparty/icu4c/common/unicode/locid.h b/thirdparty/icu4c/common/unicode/locid.h index e1afd598cf9..a394cd9347d 100644 --- a/thirdparty/icu4c/common/unicode/locid.h +++ b/thirdparty/icu4c/common/unicode/locid.h @@ -449,6 +449,11 @@ public: */ static Locale U_EXPORT2 createFromName(const char *name); +#ifndef U_HIDE_INTERNAL_API + /** @internal */ + static Locale U_EXPORT2 createFromName(StringPiece name); +#endif /* U_HIDE_INTERNAL_API */ + /** * Creates a locale from the given string after canonicalizing * the string according to CLDR by calling uloc_canonicalize(). @@ -1133,7 +1138,9 @@ private: * @param cLocaleID The new locale name. * @param canonicalize whether to call uloc_canonicalize on cLocaleID */ - Locale& init(const char* cLocaleID, UBool canonicalize); + Locale& init(const char* localeID, UBool canonicalize); + /** @internal */ + Locale& init(StringPiece localeID, UBool canonicalize); /* * Internal constructor to allow construction of a locale object with diff --git a/thirdparty/icu4c/common/unicode/resbund.h b/thirdparty/icu4c/common/unicode/resbund.h index 3965371729d..03ff6faee23 100644 --- a/thirdparty/icu4c/common/unicode/resbund.h +++ b/thirdparty/icu4c/common/unicode/resbund.h @@ -450,7 +450,7 @@ public: * @return a Locale object * @stable ICU 2.8 */ - const Locale + Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const; #ifndef U_HIDE_INTERNAL_API /** diff --git a/thirdparty/icu4c/common/unicode/uchar.h b/thirdparty/icu4c/common/unicode/uchar.h index 0daa7dd2141..82ec63ab524 100644 --- a/thirdparty/icu4c/common/unicode/uchar.h +++ b/thirdparty/icu4c/common/unicode/uchar.h @@ -675,14 +675,14 @@ typedef enum UProperty { * @stable ICU 63 */ UCHAR_VERTICAL_ORIENTATION=0x1018, -#ifndef U_HIDE_DRAFT_API /** * Enumerated property Identifier_Status. * Used for UTS #39 General Security Profile for Identifiers * (https://www.unicode.org/reports/tr39/#General_Security_Profile). - * @draft ICU 75 + * @stable ICU 75 */ UCHAR_IDENTIFIER_STATUS=0x1019, +#ifndef U_HIDE_DRAFT_API /** * Enumerated property Indic_Conjunct_Break. * Used in the grapheme cluster break algorithm in UAX #29. @@ -796,7 +796,6 @@ typedef enum UProperty { UCHAR_SCRIPT_EXTENSIONS=0x7000, /** First constant for Unicode properties with unusual value types. @stable ICU 4.6 */ UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS, -#ifndef U_HIDE_DRAFT_API /** * Miscellaneous property Identifier_Type. * Used for UTS #39 General Security Profile for Identifiers @@ -808,10 +807,9 @@ typedef enum UProperty { * * @see u_hasIDType * @see u_getIDTypes - * @draft ICU 75 + * @stable ICU 75 */ UCHAR_IDENTIFIER_TYPE=0x7001, -#endif // U_HIDE_DRAFT_API #ifndef U_HIDE_DEPRECATED_API /** * One more than the last constant for Unicode properties with unusual value types. @@ -2791,13 +2789,12 @@ typedef enum UVerticalOrientation { U_VO_UPRIGHT, } UVerticalOrientation; -#ifndef U_HIDE_DRAFT_API /** * Identifier Status constants. * See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type. * * @see UCHAR_IDENTIFIER_STATUS - * @draft ICU 75 + * @stable ICU 75 */ typedef enum UIdentifierStatus { /* @@ -2806,9 +2803,9 @@ typedef enum UIdentifierStatus { * U_ID_STATUS_ */ - /** @draft ICU 75 */ + /** @stable ICU 75 */ U_ID_STATUS_RESTRICTED, - /** @draft ICU 75 */ + /** @stable ICU 75 */ U_ID_STATUS_ALLOWED, } UIdentifierStatus; @@ -2817,7 +2814,7 @@ typedef enum UIdentifierStatus { * See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type. * * @see UCHAR_IDENTIFIER_TYPE - * @draft ICU 75 + * @stable ICU 75 */ typedef enum UIdentifierType { /* @@ -2826,32 +2823,31 @@ typedef enum UIdentifierType { * U_ID_TYPE_ */ - /** @draft ICU 75 */ + /** @stable ICU 75 */ U_ID_TYPE_NOT_CHARACTER, - /** @draft ICU 75 */ + /** @stable ICU 75 */ U_ID_TYPE_DEPRECATED, - /** @draft ICU 75 */ + /** @stable ICU 75 */ U_ID_TYPE_DEFAULT_IGNORABLE, - /** @draft ICU 75 */ + /** @stable ICU 75 */ U_ID_TYPE_NOT_NFKC, - /** @draft ICU 75 */ + /** @stable ICU 75 */ U_ID_TYPE_NOT_XID, - /** @draft ICU 75 */ + /** @stable ICU 75 */ U_ID_TYPE_EXCLUSION, - /** @draft ICU 75 */ + /** @stable ICU 75 */ U_ID_TYPE_OBSOLETE, - /** @draft ICU 75 */ + /** @stable ICU 75 */ U_ID_TYPE_TECHNICAL, - /** @draft ICU 75 */ + /** @stable ICU 75 */ U_ID_TYPE_UNCOMMON_USE, - /** @draft ICU 75 */ + /** @stable ICU 75 */ U_ID_TYPE_LIMITED_USE, - /** @draft ICU 75 */ + /** @stable ICU 75 */ U_ID_TYPE_INCLUSION, - /** @draft ICU 75 */ + /** @stable ICU 75 */ U_ID_TYPE_RECOMMENDED, } UIdentifierType; -#endif // U_HIDE_DRAFT_API /** * Check a binary Unicode property for a code point. @@ -4057,7 +4053,6 @@ u_isIDStart(UChar32 c); U_CAPI UBool U_EXPORT2 u_isIDPart(UChar32 c); -#ifndef U_HIDE_DRAFT_API /** * Does the set of Identifier_Type values code point c contain the given type? * @@ -4069,7 +4064,7 @@ u_isIDPart(UChar32 c); * @param c code point * @param type Identifier_Type to check * @return true if type is in Identifier_Type(c) - * @draft ICU 75 + * @stable ICU 75 */ U_CAPI bool U_EXPORT2 u_hasIDType(UChar32 c, UIdentifierType type); @@ -4104,11 +4099,10 @@ u_hasIDType(UChar32 c, UIdentifierType type); * function chaining. (See User Guide for details.) * @return number of values in c's Identifier_Type, * written to types unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity - * @draft ICU 75 + * @stable ICU 75 */ U_CAPI int32_t U_EXPORT2 u_getIDTypes(UChar32 c, UIdentifierType *types, int32_t capacity, UErrorCode *pErrorCode); -#endif // U_HIDE_DRAFT_API /** * Determines if the specified character should be regarded diff --git a/thirdparty/icu4c/common/unicode/uniset.h b/thirdparty/icu4c/common/unicode/uniset.h index d070fd631a2..6b1ac9ba262 100644 --- a/thirdparty/icu4c/common/unicode/uniset.h +++ b/thirdparty/icu4c/common/unicode/uniset.h @@ -1173,10 +1173,12 @@ public: inline U_HEADER_NESTED_NAMESPACE::USetStrings strings() const { return U_HEADER_NESTED_NAMESPACE::USetStrings(toUSet()); } +#endif // U_HIDE_DRAFT_API +#ifndef U_HIDE_DRAFT_API /** * Returns a C++ iterator for iterating over all of the elements of this set. - * Convenient all-in one iteration, but creates a UnicodeString for each + * Convenient all-in one iteration, but creates a std::u16string for each * code point or string. * (Similar to how Java UnicodeSet *is an* Iterable<String>.) * @@ -1185,13 +1187,14 @@ public: * \code * UnicodeSet set(u"[abcçカ🚴{}{abc}{de}]", errorCode); * for (auto el : set) { + * UnicodeString us(el); * std::string u8; - * printf("set.string length %ld \"%s\"\n", (long)el.length(), el.toUTF8String(u8).c_str()); + * printf("set.element length %ld \"%s\"\n", (long)us.length(), us.toUTF8String(u8).c_str()); * } * \endcode * * @return an all-elements iterator. - * @draft ICU 76 + * @draft ICU 77 * @see end * @see codePoints * @see ranges @@ -1203,7 +1206,7 @@ public: /** * @return an exclusive-end sentinel for iterating over all of the elements of this set. - * @draft ICU 76 + * @draft ICU 77 * @see begin * @see codePoints * @see ranges diff --git a/thirdparty/icu4c/common/unicode/uset.h b/thirdparty/icu4c/common/unicode/uset.h index c8f9b5592df..c5e7f23901b 100644 --- a/thirdparty/icu4c/common/unicode/uset.h +++ b/thirdparty/icu4c/common/unicode/uset.h @@ -32,12 +32,13 @@ #include "unicode/utypes.h" #include "unicode/uchar.h" -#if U_SHOW_CPLUSPLUS_API +#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API +#include #include #include "unicode/char16ptr.h" #include "unicode/localpointer.h" -#include "unicode/unistr.h" -#endif // U_SHOW_CPLUSPLUS_API +#include "unicode/utf16.h" +#endif #ifndef USET_DEFINED @@ -1392,8 +1393,8 @@ public: private: friend class USetCodePoints; - USetCodePointIterator(const USet *uset, int32_t rangeIndex, int32_t rangeCount) - : uset(uset), rangeIndex(rangeIndex), rangeCount(rangeCount), + USetCodePointIterator(const USet *pUset, int32_t nRangeIndex, int32_t nRangeCount) + : uset(pUset), rangeIndex(nRangeIndex), rangeCount(nRangeCount), c(U_SENTINEL), end(U_SENTINEL) { // Fetch the first range. operator++(); @@ -1429,7 +1430,7 @@ public: * Constructs a C++ "range" object over the code points of the USet. * @draft ICU 76 */ - USetCodePoints(const USet *uset) : uset(uset), rangeCount(uset_getRangeCount(uset)) {} + USetCodePoints(const USet *pUset) : uset(pUset), rangeCount(uset_getRangeCount(pUset)) {} /** @draft ICU 76 */ USetCodePoints(const USetCodePoints &other) = default; @@ -1460,7 +1461,7 @@ struct CodePointRange { /** @draft ICU 76 */ struct iterator { /** @draft ICU 76 */ - iterator(UChar32 c) : c(c) {} + iterator(UChar32 aC) : c(aC) {} /** @draft ICU 76 */ bool operator==(const iterator &other) const { return c == other.c; } @@ -1573,8 +1574,8 @@ public: private: friend class USetRanges; - USetRangeIterator(const USet *uset, int32_t rangeIndex, int32_t rangeCount) - : uset(uset), rangeIndex(rangeIndex), rangeCount(rangeCount) {} + USetRangeIterator(const USet *pUset, int32_t nRangeIndex, int32_t nRangeCount) + : uset(pUset), rangeIndex(nRangeIndex), rangeCount(nRangeCount) {} const USet *uset; int32_t rangeIndex; @@ -1610,7 +1611,7 @@ public: * Constructs a C++ "range" object over the code point ranges of the USet. * @draft ICU 76 */ - USetRanges(const USet *uset) : uset(uset), rangeCount(uset_getRangeCount(uset)) {} + USetRanges(const USet *pUset) : uset(pUset), rangeCount(uset_getRangeCount(pUset)) {} /** @draft ICU 76 */ USetRanges(const USetRanges &other) = default; @@ -1657,7 +1658,7 @@ public: int32_t length; const UChar *uchars = uset_getString(uset, index, &length); // assert uchars != nullptr; - return {ConstChar16Ptr(uchars), static_cast(length)}; + return {uprv_char16PtrFromUChar(uchars), static_cast(length)}; } return {}; } @@ -1684,8 +1685,8 @@ public: private: friend class USetStrings; - USetStringIterator(const USet *uset, int32_t index, int32_t count) - : uset(uset), index(index), count(count) {} + USetStringIterator(const USet *pUset, int32_t nIndex, int32_t nCount) + : uset(pUset), index(nIndex), count(nCount) {} const USet *uset; int32_t index; @@ -1699,9 +1700,11 @@ private: * using U_HEADER_NESTED_NAMESPACE::USetStrings; * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, &errorCode)); * for (auto s : USetStrings(uset.getAlias())) { - * UnicodeString us(s); - * std::string u8; - * printf("uset.string length %ld \"%s\"\n", (long)s.length(), us.toUTF8String(u8).c_str()); + * int32_t len32 = s.length(); + * char utf8[200]; + * u_strToUTF8WithSub(utf8, int32_t{sizeof(utf8) - 1}, nullptr, + * s.data(), len32, 0xFFFD, nullptr, errorCode); + * printf("uset.string length %ld \"%s\"\n", long{len32}, utf8); * } * \endcode * @@ -1718,7 +1721,7 @@ public: * Constructs a C++ "range" object over the strings of the USet. * @draft ICU 76 */ - USetStrings(const USet *uset) : uset(uset), count(uset_getStringCount(uset)) {} + USetStrings(const USet *pUset) : uset(pUset), count(uset_getStringCount(pUset)) {} /** @draft ICU 76 */ USetStrings(const USetStrings &other) = default; @@ -1737,17 +1740,19 @@ private: const USet *uset; int32_t count; }; +#endif // U_HIDE_DRAFT_API +#ifndef U_HIDE_DRAFT_API /** * Iterator returned by USetElements. - * @draft ICU 76 + * @draft ICU 77 */ class USetElementIterator { public: - /** @draft ICU 76 */ + /** @draft ICU 77 */ USetElementIterator(const USetElementIterator &other) = default; - /** @draft ICU 76 */ + /** @draft ICU 77 */ bool operator==(const USetElementIterator &other) const { // No need to compare rangeCount & end given private constructor // and assuming we don't compare iterators across the set being modified. @@ -1756,26 +1761,28 @@ public: return uset == other.uset && c == other.c && index == other.index; } - /** @draft ICU 76 */ + /** @draft ICU 77 */ bool operator!=(const USetElementIterator &other) const { return !operator==(other); } - /** @draft ICU 76 */ - UnicodeString operator*() const { + /** @draft ICU 77 */ + std::u16string operator*() const { if (c >= 0) { - return UnicodeString(c); + return c <= 0xffff ? + std::u16string({static_cast(c)}) : + std::u16string({U16_LEAD(c), U16_TRAIL(c)}); } else if (index < totalCount) { int32_t length; const UChar *uchars = uset_getString(uset, index - rangeCount, &length); // assert uchars != nullptr; - return UnicodeString(uchars, length); + return {uprv_char16PtrFromUChar(uchars), static_cast(length)}; } else { - return UnicodeString(); + return {}; } } /** * Pre-increment. - * @draft ICU 76 + * @draft ICU 77 */ USetElementIterator &operator++() { if (c < end) { @@ -1800,7 +1807,7 @@ public: /** * Post-increment. - * @draft ICU 76 + * @draft ICU 77 */ USetElementIterator operator++(int) { USetElementIterator result(*this); @@ -1811,8 +1818,8 @@ public: private: friend class USetElements; - USetElementIterator(const USet *uset, int32_t index, int32_t rangeCount, int32_t totalCount) - : uset(uset), index(index), rangeCount(rangeCount), totalCount(totalCount), + USetElementIterator(const USet *pUset, int32_t nIndex, int32_t nRangeCount, int32_t nTotalCount) + : uset(pUset), index(nIndex), rangeCount(nRangeCount), totalCount(nTotalCount), c(U_SENTINEL), end(U_SENTINEL) { if (index < rangeCount) { // Fetch the first range. @@ -1840,7 +1847,7 @@ private: /** * A C++ "range" for iterating over all of the elements of a USet. - * Convenient all-in one iteration, but creates a UnicodeString for each + * Convenient all-in one iteration, but creates a std::u16string for each * code point or string. * * Code points are returned first, then empty and multi-character strings. @@ -1849,15 +1856,18 @@ private: * using U_HEADER_NESTED_NAMESPACE::USetElements; * LocalUSetPointer uset(uset_openPattern(u"[abcçカ🚴{}{abc}{de}]", -1, &errorCode)); * for (auto el : USetElements(uset.getAlias())) { - * std::string u8; - * printf("uset.string length %ld \"%s\"\n", (long)el.length(), el.toUTF8String(u8).c_str()); + * int32_t len32 = el.length(); + * char utf8[200]; + * u_strToUTF8WithSub(utf8, int32_t{sizeof(utf8) - 1}, nullptr, + * el.data(), len32, 0xFFFD, nullptr, errorCode); + * printf("uset.element length %ld \"%s\"\n", long{len32}, utf8); * } * \endcode * * C++ UnicodeSet has member functions for iteration, including begin() and end(). * * @return an all-elements iterator. - * @draft ICU 76 + * @draft ICU 77 * @see USetCodePoints * @see USetRanges * @see USetStrings @@ -1866,21 +1876,21 @@ class USetElements { public: /** * Constructs a C++ "range" object over all of the elements of the USet. - * @draft ICU 76 + * @draft ICU 77 */ - USetElements(const USet *uset) - : uset(uset), rangeCount(uset_getRangeCount(uset)), - stringCount(uset_getStringCount(uset)) {} + USetElements(const USet *pUset) + : uset(pUset), rangeCount(uset_getRangeCount(pUset)), + stringCount(uset_getStringCount(pUset)) {} - /** @draft ICU 76 */ + /** @draft ICU 77 */ USetElements(const USetElements &other) = default; - /** @draft ICU 76 */ + /** @draft ICU 77 */ USetElementIterator begin() const { return USetElementIterator(uset, 0, rangeCount, rangeCount + stringCount); } - /** @draft ICU 76 */ + /** @draft ICU 77 */ USetElementIterator end() const { return USetElementIterator(uset, rangeCount + stringCount, rangeCount, rangeCount + stringCount); } diff --git a/thirdparty/icu4c/common/unicode/utf8.h b/thirdparty/icu4c/common/unicode/utf8.h index 5a07435fcf6..96ad46161aa 100644 --- a/thirdparty/icu4c/common/unicode/utf8.h +++ b/thirdparty/icu4c/common/unicode/utf8.h @@ -124,7 +124,7 @@ * @internal */ U_CAPI UChar32 U_EXPORT2 -utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict); +utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, int8_t strict); /** * Function for handling "append code point" with error-checking. @@ -148,7 +148,7 @@ utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool * @internal */ U_CAPI UChar32 U_EXPORT2 -utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict); +utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, int8_t strict); /** * Function for handling "skip backward one code point" with error-checking. diff --git a/thirdparty/icu4c/common/unicode/utypes.h b/thirdparty/icu4c/common/unicode/utypes.h index 0151ebd4701..ecdee516431 100644 --- a/thirdparty/icu4c/common/unicode/utypes.h +++ b/thirdparty/icu4c/common/unicode/utypes.h @@ -598,12 +598,13 @@ typedef enum UErrorCode { U_MF_DUPLICATE_DECLARATION_ERROR, /**< The same variable is declared in more than one .local or .input declaration. @internal ICU 75 technology preview @deprecated This API is for technology preview only. */ U_MF_OPERAND_MISMATCH_ERROR, /**< An operand provided to a function does not have the required form for that function @internal ICU 75 technology preview @deprecated This API is for technology preview only. */ U_MF_DUPLICATE_VARIANT_ERROR, /**< A message includes a variant with the same key list as another variant. @internal ICU 76 technology preview @deprecated This API is for technology preview only. */ + U_MF_BAD_OPTION, /**< An option value provided to a function does not have the required form for that option. @internal ICU 77 technology preview @deprecated This API is for technology preview only. */ #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal formatting API error code. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - U_FMT_PARSE_ERROR_LIMIT = 0x10120, + U_FMT_PARSE_ERROR_LIMIT = 0x10121, #endif // U_HIDE_DEPRECATED_API /* diff --git a/thirdparty/icu4c/common/unicode/uvernum.h b/thirdparty/icu4c/common/unicode/uvernum.h index a3cb882623b..847c49f4cfb 100644 --- a/thirdparty/icu4c/common/unicode/uvernum.h +++ b/thirdparty/icu4c/common/unicode/uvernum.h @@ -53,7 +53,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION_MAJOR_NUM 76 +#define U_ICU_VERSION_MAJOR_NUM 77 /** The current ICU minor version as an integer. * This value will change in the subsequent releases of ICU @@ -79,7 +79,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.6 */ -#define U_ICU_VERSION_SUFFIX _76 +#define U_ICU_VERSION_SUFFIX _77 /** * \def U_DEF2_ICU_ENTRY_POINT_RENAME @@ -132,7 +132,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION "76.1" +#define U_ICU_VERSION "77.1" /** * The current ICU library major version number as a string, for library name suffixes. @@ -145,13 +145,13 @@ * * @stable ICU 2.6 */ -#define U_ICU_VERSION_SHORT "76" +#define U_ICU_VERSION_SHORT "77" #ifndef U_HIDE_INTERNAL_API /** Data version in ICU4C. * @internal ICU 4.4 Internal Use Only **/ -#define U_ICU_DATA_VERSION "76.1" +#define U_ICU_DATA_VERSION "77.1" #endif /* U_HIDE_INTERNAL_API */ /*=========================================================================== diff --git a/thirdparty/icu4c/common/unicode/uversion.h b/thirdparty/icu4c/common/unicode/uversion.h index 25d73a3aeb5..a29bf21efda 100644 --- a/thirdparty/icu4c/common/unicode/uversion.h +++ b/thirdparty/icu4c/common/unicode/uversion.h @@ -125,7 +125,7 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]; U_NAMESPACE_USE # endif -#ifndef U_HIDE_DRAFT_API +#ifndef U_FORCE_HIDE_DRAFT_API /** * \def U_HEADER_NESTED_NAMESPACE * Nested namespace used inside U_ICU_NAMESPACE for header-only APIs. @@ -150,22 +150,37 @@ typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]; * @draft ICU 76 */ +/** + * \def U_ICU_NAMESPACE_OR_INTERNAL + * Namespace used for header-only APIs that used to be regular C++ APIs. + * Different when used inside ICU to prevent public use of internal instantiations. + * Similar to U_HEADER_ONLY_NAMESPACE, but the public definition is the same as U_ICU_NAMESPACE. + * "U_ICU_NAMESPACE" or "U_ICU_NAMESPACE::internal". + * + * @draft ICU 77 + */ + // The first test is the same as for defining U_EXPORT for Windows. #if defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllexport__) && \ UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllimport__)) # define U_HEADER_NESTED_NAMESPACE header +# define U_ICU_NAMESPACE_OR_INTERNAL U_ICU_NAMESPACE #elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \ defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \ defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION) # define U_HEADER_NESTED_NAMESPACE internal +# define U_ICU_NAMESPACE_OR_INTERNAL U_ICU_NAMESPACE::internal + namespace U_ICU_NAMESPACE_OR_INTERNAL {} + using namespace U_ICU_NAMESPACE_OR_INTERNAL; #else # define U_HEADER_NESTED_NAMESPACE header +# define U_ICU_NAMESPACE_OR_INTERNAL U_ICU_NAMESPACE #endif #define U_HEADER_ONLY_NAMESPACE U_ICU_NAMESPACE::U_HEADER_NESTED_NAMESPACE namespace U_HEADER_ONLY_NAMESPACE {} -#endif // U_HIDE_DRAFT_API +#endif // U_FORCE_HIDE_DRAFT_API #endif /* __cplusplus */ diff --git a/thirdparty/icu4c/common/unistr.cpp b/thirdparty/icu4c/common/unistr.cpp index a720245772e..4e29bad1d3b 100644 --- a/thirdparty/icu4c/common/unistr.cpp +++ b/thirdparty/icu4c/common/unistr.cpp @@ -1945,6 +1945,13 @@ UnicodeString::cloneArrayIfNeeded(int32_t newCapacity, growCapacity = newCapacity; } else if(newCapacity <= US_STACKBUF_SIZE && growCapacity > US_STACKBUF_SIZE) { growCapacity = US_STACKBUF_SIZE; + } else if(newCapacity > growCapacity) { + setToBogus(); + return false; // bad inputs + } + if(growCapacity > kMaxCapacity) { + setToBogus(); + return false; } // save old values diff --git a/thirdparty/icu4c/common/uresbund.cpp b/thirdparty/icu4c/common/uresbund.cpp index 3a09cbf3bca..afda2770fd3 100644 --- a/thirdparty/icu4c/common/uresbund.cpp +++ b/thirdparty/icu4c/common/uresbund.cpp @@ -2716,6 +2716,9 @@ ures_openWithType(UResourceBundle *r, const char* path, const char* localeID, UResourceDataEntry *entry; if(openType != URES_OPEN_DIRECT) { + if (localeID == nullptr) { + localeID = uloc_getDefault(); + } /* first "canonicalize" the locale ID */ CharString canonLocaleID = ulocimp_getBaseName(localeID, *status); if(U_FAILURE(*status)) { @@ -3080,6 +3083,9 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, kwVal.clear(); } } + if (locid == nullptr) { + locid = uloc_getDefault(); + } CharString base = ulocimp_getBaseName(locid, subStatus); #if defined(URES_TREE_DEBUG) fprintf(stderr, "getFunctionalEquivalent: \"%s\" [%s=%s] in %s - %s\n", @@ -3244,7 +3250,7 @@ ures_getFunctionalEquivalent(char *result, int32_t resultCapacity, const char *validLoc = ures_getLocaleByType(res, ULOC_VALID_LOCALE, &subStatus); if (U_SUCCESS(subStatus) && validLoc != nullptr && validLoc[0] != 0 && uprv_strcmp(validLoc, "root") != 0) { CharString validLang = ulocimp_getLanguage(validLoc, subStatus); - CharString parentLang = ulocimp_getLanguage(parent.data(), subStatus); + CharString parentLang = ulocimp_getLanguage(parent.toStringPiece(), subStatus); if (U_SUCCESS(subStatus) && validLang != parentLang) { // validLoc is not root and has a different language than parent, use it instead found.clear().append(validLoc, subStatus); diff --git a/thirdparty/icu4c/common/uscript.cpp b/thirdparty/icu4c/common/uscript.cpp index c48a28fd143..ce40d354958 100644 --- a/thirdparty/icu4c/common/uscript.cpp +++ b/thirdparty/icu4c/common/uscript.cpp @@ -59,6 +59,9 @@ getCodesFromLocale(const char *locale, if (U_FAILURE(*err)) { return 0; } icu::CharString lang; icu::CharString script; + if (locale == nullptr) { + locale = uloc_getDefault(); + } ulocimp_getSubtags(locale, &lang, &script, nullptr, nullptr, nullptr, *err); if (U_FAILURE(*err)) { return 0; } // Multi-script languages, equivalent to the LocaleScript data diff --git a/thirdparty/icu4c/common/ushape.cpp b/thirdparty/icu4c/common/ushape.cpp index 00125635cb2..b7946dc3ce0 100644 --- a/thirdparty/icu4c/common/ushape.cpp +++ b/thirdparty/icu4c/common/ushape.cpp @@ -28,6 +28,7 @@ #include "ubidi_props.h" #include "uassert.h" +#include /* * This implementation is designed for 16-bit Unicode strings. * The main assumption is that the Arabic characters and their @@ -747,6 +748,10 @@ handleGeneratedSpaces(char16_t *dest, int32_t sourceLength, } } + if (static_cast(sourceLength) + 1 > std::numeric_limits::max() / U_SIZEOF_UCHAR) { + *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; + return 0; + } tempbuffer = static_cast(uprv_malloc((sourceLength + 1) * U_SIZEOF_UCHAR)); /* Test for nullptr */ if(tempbuffer == nullptr) { diff --git a/thirdparty/icu4c/common/usprep.cpp b/thirdparty/icu4c/common/usprep.cpp index 477b8f2309d..048b423645f 100644 --- a/thirdparty/icu4c/common/usprep.cpp +++ b/thirdparty/icu4c/common/usprep.cpp @@ -126,7 +126,7 @@ compareEntries(const UHashTok p1, const UHashTok p2) { name2.pointer = b2->name; path1.pointer = b1->path; path2.pointer = b2->path; - return uhash_compareChars(name1, name2) & uhash_compareChars(path1, path2); + return uhash_compareChars(name1, name2) && uhash_compareChars(path1, path2); } static void diff --git a/thirdparty/icu4c/common/utf_impl.cpp b/thirdparty/icu4c/common/utf_impl.cpp index 827a82daf40..7da10c9b2d3 100644 --- a/thirdparty/icu4c/common/utf_impl.cpp +++ b/thirdparty/icu4c/common/utf_impl.cpp @@ -124,11 +124,9 @@ errorValue(int32_t count, int8_t strict) { * >0 Obsolete "strict" behavior of UTF8_NEXT_CHAR_SAFE(..., true): * Same as the obsolete "safe" behavior, but non-characters are also treated * like illegal sequences. - * - * Note that a UBool is the same as an int8_t. */ U_CAPI UChar32 U_EXPORT2 -utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict) { +utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, int8_t strict) { // *pi is one after byte c. int32_t i=*pi; // length can be negative for NUL-terminated strings: Read and validate one byte at a time. @@ -233,7 +231,7 @@ utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool } U_CAPI UChar32 U_EXPORT2 -utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict) { +utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, int8_t strict) { // *pi is the index of byte c. int32_t i=*pi; if(U8_IS_TRAIL(c) && i>start) { diff --git a/thirdparty/icu4c/common/utypes.cpp b/thirdparty/icu4c/common/utypes.cpp index 4602314147f..4d4c1f81b5e 100644 --- a/thirdparty/icu4c/common/utypes.cpp +++ b/thirdparty/icu4c/common/utypes.cpp @@ -140,7 +140,8 @@ _uFmtErrorName[U_FMT_PARSE_ERROR_LIMIT - U_FMT_PARSE_ERROR_START] = { "U_MF_MISSING_SELECTOR_ANNOTATION_ERROR", "U_MF_DUPLICATE_DECLARATION_ERROR", "U_MF_OPERAND_MISMATCH_ERROR", - "U_MF_DUPLICATE_VARIANT_ERROR" + "U_MF_DUPLICATE_VARIANT_ERROR", + "U_MF_BAD_OPTION" }; static const char * const diff --git a/thirdparty/icu4c/i18n/scriptset.cpp b/thirdparty/icu4c/i18n/scriptset.cpp index eec1eeb37da..576917e81c4 100644 --- a/thirdparty/icu4c/i18n/scriptset.cpp +++ b/thirdparty/icu4c/i18n/scriptset.cpp @@ -285,19 +285,19 @@ uhash_equalsScriptSet(const UElement key1, const UElement key2) { return (*s1 == *s2); } -U_CAPI int8_t U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uhash_compareScriptSet(UElement key0, UElement key1) { icu::ScriptSet *s0 = static_cast(key0.pointer); icu::ScriptSet *s1 = static_cast(key1.pointer); int32_t diff = s0->countMembers() - s1->countMembers(); - if (diff != 0) return static_cast(diff); + if (diff != 0) return diff; int32_t i0 = s0->nextSetBit(0); int32_t i1 = s1->nextSetBit(0); while ((diff = i0-i1) == 0 && i0 > 0) { i0 = s0->nextSetBit(i0+1); i1 = s1->nextSetBit(i1+1); } - return (int8_t)diff; + return diff; } U_CAPI int32_t U_EXPORT2 diff --git a/thirdparty/icu4c/i18n/scriptset.h b/thirdparty/icu4c/i18n/scriptset.h index df5cfdc7486..d21d0db8a01 100644 --- a/thirdparty/icu4c/i18n/scriptset.h +++ b/thirdparty/icu4c/i18n/scriptset.h @@ -74,7 +74,7 @@ class U_I18N_API ScriptSet: public UMemory { U_NAMESPACE_END -U_CAPI UBool U_EXPORT2 +U_CAPI int32_t U_EXPORT2 uhash_compareScriptSet(const UElement key1, const UElement key2); U_CAPI int32_t U_EXPORT2 diff --git a/thirdparty/icu4c/i18n/ucln_in.h b/thirdparty/icu4c/i18n/ucln_in.h index 765cdd559fb..74868891c83 100644 --- a/thirdparty/icu4c/i18n/ucln_in.h +++ b/thirdparty/icu4c/i18n/ucln_in.h @@ -39,6 +39,7 @@ typedef enum ECleanupI18NType { UCLN_I18N_HEBREW_CALENDAR, UCLN_I18N_ASTRO_CALENDAR, UCLN_I18N_DANGI_CALENDAR, + UCLN_I18N_PERSIAN_CALENDAR, UCLN_I18N_CALENDAR, UCLN_I18N_TIMEZONEFORMAT, UCLN_I18N_TZDBTIMEZONENAMES, @@ -62,6 +63,7 @@ typedef enum ECleanupI18NType { UCLN_I18N_REGION, UCLN_I18N_LIST_FORMATTER, UCLN_I18N_NUMSYS, + UCLN_I18N_MF2_UNISETS, UCLN_I18N_COUNT /* This must be last */ } ECleanupI18NType; diff --git a/thirdparty/icu4c/icudt_godot.dat b/thirdparty/icu4c/icudt_godot.dat index 58bead426f0..c6b96c477b3 100644 Binary files a/thirdparty/icu4c/icudt_godot.dat and b/thirdparty/icu4c/icudt_godot.dat differ