1
0
mirror of https://github.com/godotengine/godot.git synced 2025-11-10 13:00:37 +00:00

pcre2: Update to upstream version 10.40

Changelog: https://github.com/PCRE2Project/pcre2/blob/pcre2-10.40/ChangeLog
This commit is contained in:
Rémi Verschelde
2022-05-17 16:38:55 +02:00
parent d5c1de784c
commit fd6eb2c2d2
48 changed files with 13966 additions and 9395 deletions

View File

@@ -7,7 +7,11 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2018 University of Cambridge
New API code Copyright (c) 2016-2022 University of Cambridge
This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY!
Instead, modify the maint/GenerateUcpHeader.py script and run it to generate
a new version of this code.
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -38,31 +42,27 @@ POSSIBILITY OF SUCH DAMAGE.
-----------------------------------------------------------------------------
*/
#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
#define PCRE2_UCP_H_IDEMPOTENT_GUARD
/* This file contains definitions of the property values that are returned by
the UCD access macros. New values that are added for new releases of Unicode
should always be at the end of each enum, for backwards compatibility.
/* This file contains definitions of the Unicode property values that are
returned by the UCD access macros and used throughout PCRE2.
IMPORTANT: Note also that the specific numeric values of the enums have to be
the same as the values that are generated by the maint/MultiStage2.py script,
where the equivalent property descriptive names are listed in vectors.
ALSO: The specific values of the first two enums are assumed for the table
called catposstab in pcre2_compile.c. */
IMPORTANT: The specific values of the first two enums (general and particular
character categories) are assumed by the table called catposstab in the file
pcre2_auto_possess.c. They are unlikely to change, but should be checked after
an update. */
/* These are the general character categories. */
enum {
ucp_C, /* Other */
ucp_L, /* Letter */
ucp_M, /* Mark */
ucp_N, /* Number */
ucp_P, /* Punctuation */
ucp_S, /* Symbol */
ucp_Z /* Separator */
ucp_C,
ucp_L,
ucp_M,
ucp_N,
ucp_P,
ucp_S,
ucp_Z,
};
/* These are the particular character categories. */
@@ -97,7 +97,98 @@ enum {
ucp_So, /* Other symbol */
ucp_Zl, /* Line separator */
ucp_Zp, /* Paragraph separator */
ucp_Zs /* Space separator */
ucp_Zs, /* Space separator */
};
/* These are Boolean properties. */
enum {
ucp_ASCII,
ucp_ASCII_Hex_Digit,
ucp_Alphabetic,
ucp_Bidi_Control,
ucp_Bidi_Mirrored,
ucp_Case_Ignorable,
ucp_Cased,
ucp_Changes_When_Casefolded,
ucp_Changes_When_Casemapped,
ucp_Changes_When_Lowercased,
ucp_Changes_When_Titlecased,
ucp_Changes_When_Uppercased,
ucp_Dash,
ucp_Default_Ignorable_Code_Point,
ucp_Deprecated,
ucp_Diacritic,
ucp_Emoji,
ucp_Emoji_Component,
ucp_Emoji_Modifier,
ucp_Emoji_Modifier_Base,
ucp_Emoji_Presentation,
ucp_Extended_Pictographic,
ucp_Extender,
ucp_Grapheme_Base,
ucp_Grapheme_Extend,
ucp_Grapheme_Link,
ucp_Hex_Digit,
ucp_IDS_Binary_Operator,
ucp_IDS_Trinary_Operator,
ucp_ID_Continue,
ucp_ID_Start,
ucp_Ideographic,
ucp_Join_Control,
ucp_Logical_Order_Exception,
ucp_Lowercase,
ucp_Math,
ucp_Noncharacter_Code_Point,
ucp_Pattern_Syntax,
ucp_Pattern_White_Space,
ucp_Prepended_Concatenation_Mark,
ucp_Quotation_Mark,
ucp_Radical,
ucp_Regional_Indicator,
ucp_Sentence_Terminal,
ucp_Soft_Dotted,
ucp_Terminal_Punctuation,
ucp_Unified_Ideograph,
ucp_Uppercase,
ucp_Variation_Selector,
ucp_White_Space,
ucp_XID_Continue,
ucp_XID_Start,
/* This must be last */
ucp_Bprop_Count
};
/* Size of entries in ucd_boolprop_sets[] */
#define ucd_boolprop_sets_item_size 2
/* These are the bidi class values. */
enum {
ucp_bidiAL, /* Arabic letter */
ucp_bidiAN, /* Arabic number */
ucp_bidiB, /* Paragraph separator */
ucp_bidiBN, /* Boundary neutral */
ucp_bidiCS, /* Common separator */
ucp_bidiEN, /* European number */
ucp_bidiES, /* European separator */
ucp_bidiET, /* European terminator */
ucp_bidiFSI, /* First strong isolate */
ucp_bidiL, /* Left to right */
ucp_bidiLRE, /* Left to right embedding */
ucp_bidiLRI, /* Left to right isolate */
ucp_bidiLRO, /* Left to right override */
ucp_bidiNSM, /* Non-spacing mark */
ucp_bidiON, /* Other neutral */
ucp_bidiPDF, /* Pop directional format */
ucp_bidiPDI, /* Pop directional isolate */
ucp_bidiR, /* Right to left */
ucp_bidiRLE, /* Right to left embedding */
ucp_bidiRLI, /* Right to left isolate */
ucp_bidiRLO, /* Right to left override */
ucp_bidiS, /* Segment separator */
ucp_bidiWS, /* White space */
};
/* These are grapheme break properties. The Extended Pictographic property
@@ -115,191 +206,189 @@ enum {
ucp_gbT, /* 8 Hangul syllable type T */
ucp_gbLV, /* 9 Hangul syllable type LV */
ucp_gbLVT, /* 10 Hangul syllable type LVT */
ucp_gbRegionalIndicator, /* 11 */
ucp_gbRegional_Indicator, /* 11 */
ucp_gbOther, /* 12 */
ucp_gbZWJ, /* 13 */
ucp_gbExtended_Pictographic /* 14 */
ucp_gbExtended_Pictographic, /* 14 */
};
/* These are the script identifications. */
enum {
ucp_Unknown,
ucp_Arabic,
ucp_Armenian,
ucp_Bengali,
ucp_Bopomofo,
ucp_Braille,
ucp_Buginese,
ucp_Buhid,
ucp_Canadian_Aboriginal,
ucp_Cherokee,
ucp_Common,
ucp_Coptic,
ucp_Cypriot,
ucp_Cyrillic,
ucp_Deseret,
ucp_Devanagari,
ucp_Ethiopic,
ucp_Georgian,
ucp_Glagolitic,
ucp_Gothic,
ucp_Greek,
ucp_Gujarati,
ucp_Gurmukhi,
ucp_Han,
ucp_Hangul,
ucp_Hanunoo,
ucp_Hebrew,
ucp_Hiragana,
ucp_Inherited,
ucp_Kannada,
ucp_Katakana,
ucp_Kharoshthi,
ucp_Khmer,
ucp_Lao,
/* Scripts which has characters in other scripts. */
ucp_Latin,
ucp_Limbu,
ucp_Linear_B,
ucp_Malayalam,
ucp_Mongolian,
ucp_Myanmar,
ucp_New_Tai_Lue,
ucp_Ogham,
ucp_Old_Italic,
ucp_Old_Persian,
ucp_Oriya,
ucp_Osmanya,
ucp_Runic,
ucp_Shavian,
ucp_Sinhala,
ucp_Syloti_Nagri,
ucp_Greek,
ucp_Cyrillic,
ucp_Arabic,
ucp_Syriac,
ucp_Tagalog,
ucp_Tagbanwa,
ucp_Tai_Le,
ucp_Thaana,
ucp_Devanagari,
ucp_Bengali,
ucp_Gurmukhi,
ucp_Gujarati,
ucp_Oriya,
ucp_Tamil,
ucp_Telugu,
ucp_Thaana,
ucp_Thai,
ucp_Tibetan,
ucp_Tifinagh,
ucp_Ugaritic,
ucp_Kannada,
ucp_Malayalam,
ucp_Sinhala,
ucp_Myanmar,
ucp_Georgian,
ucp_Hangul,
ucp_Mongolian,
ucp_Hiragana,
ucp_Katakana,
ucp_Bopomofo,
ucp_Han,
ucp_Yi,
/* New for Unicode 5.0 */
ucp_Balinese,
ucp_Cuneiform,
ucp_Nko,
ucp_Tagalog,
ucp_Hanunoo,
ucp_Buhid,
ucp_Tagbanwa,
ucp_Limbu,
ucp_Tai_Le,
ucp_Linear_B,
ucp_Cypriot,
ucp_Buginese,
ucp_Coptic,
ucp_Glagolitic,
ucp_Syloti_Nagri,
ucp_Phags_Pa,
ucp_Phoenician,
/* New for Unicode 5.1 */
ucp_Carian,
ucp_Cham,
ucp_Nko,
ucp_Kayah_Li,
ucp_Lepcha,
ucp_Lycian,
ucp_Lydian,
ucp_Ol_Chiki,
ucp_Rejang,
ucp_Saurashtra,
ucp_Sundanese,
ucp_Vai,
/* New for Unicode 5.2 */
ucp_Avestan,
ucp_Bamum,
ucp_Egyptian_Hieroglyphs,
ucp_Imperial_Aramaic,
ucp_Inscriptional_Pahlavi,
ucp_Inscriptional_Parthian,
ucp_Javanese,
ucp_Kaithi,
ucp_Lisu,
ucp_Meetei_Mayek,
ucp_Old_South_Arabian,
ucp_Old_Turkic,
ucp_Samaritan,
ucp_Tai_Tham,
ucp_Tai_Viet,
/* New for Unicode 6.0.0 */
ucp_Batak,
ucp_Brahmi,
ucp_Mandaic,
/* New for Unicode 6.1.0 */
ucp_Chakma,
ucp_Meroitic_Cursive,
ucp_Meroitic_Hieroglyphs,
ucp_Miao,
ucp_Sharada,
ucp_Sora_Sompeng,
ucp_Takri,
/* New for Unicode 7.0.0 */
ucp_Bassa_Vah,
ucp_Caucasian_Albanian,
ucp_Duployan,
ucp_Elbasan,
ucp_Grantha,
ucp_Khojki,
ucp_Khudawadi,
ucp_Linear_A,
ucp_Mahajani,
ucp_Manichaean,
ucp_Mende_Kikakui,
ucp_Modi,
ucp_Mro,
ucp_Nabataean,
ucp_Old_North_Arabian,
ucp_Old_Permic,
ucp_Pahawh_Hmong,
ucp_Palmyrene,
ucp_Psalter_Pahlavi,
ucp_Khudawadi,
ucp_Tirhuta,
ucp_Multani,
ucp_Adlam,
ucp_Masaram_Gondi,
ucp_Dogra,
ucp_Gunjala_Gondi,
ucp_Hanifi_Rohingya,
ucp_Sogdian,
ucp_Nandinagari,
ucp_Yezidi,
ucp_Cypro_Minoan,
ucp_Old_Uyghur,
/* Scripts which has no characters in other scripts. */
ucp_Unknown,
ucp_Common,
ucp_Armenian,
ucp_Hebrew,
ucp_Thai,
ucp_Lao,
ucp_Tibetan,
ucp_Ethiopic,
ucp_Cherokee,
ucp_Canadian_Aboriginal,
ucp_Ogham,
ucp_Runic,
ucp_Khmer,
ucp_Old_Italic,
ucp_Gothic,
ucp_Deseret,
ucp_Inherited,
ucp_Ugaritic,
ucp_Shavian,
ucp_Osmanya,
ucp_Braille,
ucp_New_Tai_Lue,
ucp_Tifinagh,
ucp_Old_Persian,
ucp_Kharoshthi,
ucp_Balinese,
ucp_Cuneiform,
ucp_Phoenician,
ucp_Sundanese,
ucp_Lepcha,
ucp_Ol_Chiki,
ucp_Vai,
ucp_Saurashtra,
ucp_Rejang,
ucp_Lycian,
ucp_Carian,
ucp_Lydian,
ucp_Cham,
ucp_Tai_Tham,
ucp_Tai_Viet,
ucp_Avestan,
ucp_Egyptian_Hieroglyphs,
ucp_Samaritan,
ucp_Lisu,
ucp_Bamum,
ucp_Meetei_Mayek,
ucp_Imperial_Aramaic,
ucp_Old_South_Arabian,
ucp_Inscriptional_Parthian,
ucp_Inscriptional_Pahlavi,
ucp_Old_Turkic,
ucp_Batak,
ucp_Brahmi,
ucp_Meroitic_Cursive,
ucp_Meroitic_Hieroglyphs,
ucp_Miao,
ucp_Sora_Sompeng,
ucp_Caucasian_Albanian,
ucp_Bassa_Vah,
ucp_Elbasan,
ucp_Pahawh_Hmong,
ucp_Mende_Kikakui,
ucp_Mro,
ucp_Old_North_Arabian,
ucp_Nabataean,
ucp_Palmyrene,
ucp_Pau_Cin_Hau,
ucp_Siddham,
ucp_Tirhuta,
ucp_Warang_Citi,
/* New for Unicode 8.0.0 */
ucp_Ahom,
ucp_Anatolian_Hieroglyphs,
ucp_Hatran,
ucp_Multani,
ucp_Old_Hungarian,
ucp_SignWriting,
/* New for Unicode 10.0.0 (no update since 8.0.0) */
ucp_Adlam,
ucp_Bhaiksuki,
ucp_Marchen,
ucp_Newa,
ucp_Osage,
ucp_Tangut,
ucp_Masaram_Gondi,
ucp_Nushu,
ucp_Soyombo,
ucp_Zanabazar_Square,
/* New for Unicode 11.0.0 */
ucp_Dogra,
ucp_Gunjala_Gondi,
ucp_Hanifi_Rohingya,
ucp_Makasar,
ucp_Medefaidrin,
ucp_Old_Sogdian,
ucp_Sogdian,
/* New for Unicode 12.0.0 */
ucp_Elymaic,
ucp_Nandinagari,
ucp_Nyiakeng_Puachue_Hmong,
ucp_Wancho,
/* New for Unicode 13.0.0 */
ucp_Chorasmian,
ucp_Dives_Akuru,
ucp_Khitan_Small_Script,
ucp_Yezidi,
/* New for Unicode 14.0.0 */
ucp_Cypro_Minoan,
ucp_Old_Uyghur,
ucp_Tangsa,
ucp_Toto,
ucp_Vithkuqi
ucp_Vithkuqi,
/* This must be last */
ucp_Script_Count
};
/* Size of entries in ucd_script_sets[] */
#define ucd_script_sets_item_size 3
#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
/* End of pcre2_ucp.h */