1
0
mirror of https://github.com/godotengine/godot.git synced 2025-12-02 16:48:55 +00:00

pcre2: Update to 10.45

This commit is contained in:
Jakub Marcowski
2025-03-23 16:50:13 +01:00
parent 2303ce843a
commit 2c3e302c75
75 changed files with 24071 additions and 12755 deletions

View File

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2023 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -156,6 +156,7 @@ static const uint8_t coptable[] = {
0, /* CLASS */
0, /* NCLASS */
0, /* XCLASS - variable length */
0, /* ECLASS - variable length */
0, /* REF */
0, /* REFI */
0, /* DNREF */
@@ -175,6 +176,7 @@ static const uint8_t coptable[] = {
0, /* Assert behind not */
0, /* NA assert */
0, /* NA assert behind */
0, /* Assert scan substring */
0, /* ONCE */
0, /* SCRIPT_RUN */
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
@@ -188,7 +190,7 @@ static const uint8_t coptable[] = {
0, 0, /* COMMIT, COMMIT_ARG */
0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */
0, 0, 0, /* CLOSE, SKIPZERO, DEFINE */
0, 0 /* \B and \b in UCP mode */
0, 0, /* \B and \b in UCP mode */
};
/* This table identifies those opcodes that inspect a character. It is used to
@@ -234,6 +236,7 @@ static const uint8_t poptable[] = {
1, /* CLASS */
1, /* NCLASS */
1, /* XCLASS - variable length */
1, /* ECLASS - variable length */
0, /* REF */
0, /* REFI */
0, /* DNREF */
@@ -253,6 +256,7 @@ static const uint8_t poptable[] = {
0, /* Assert behind not */
0, /* NA assert */
0, /* NA assert behind */
0, /* Assert scan substring */
0, /* ONCE */
0, /* SCRIPT_RUN */
0, 0, 0, 0, 0, /* BRA, BRAPOS, CBRA, CBRAPOS, COND */
@@ -266,9 +270,13 @@ static const uint8_t poptable[] = {
0, 0, /* COMMIT, COMMIT_ARG */
0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */
0, 0, 0, /* CLOSE, SKIPZERO, DEFINE */
1, 1 /* \B and \b in UCP mode */
1, 1, /* \B and \b in UCP mode */
};
/* Compile-time check that these tables have the correct size. */
STATIC_ASSERT(sizeof(coptable) == OP_TABLE_LENGTH, coptable);
STATIC_ASSERT(sizeof(poptable) == OP_TABLE_LENGTH, poptable);
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
and \w */
@@ -695,7 +703,6 @@ for (;;)
int i, j;
int clen, dlen;
uint32_t c, d;
int forced_fail = 0;
BOOL partial_newline = FALSE;
BOOL could_continue = reset_could_continue;
reset_could_continue = FALSE;
@@ -841,19 +848,6 @@ for (;;)
switch (codevalue)
{
/* ========================================================================== */
/* These cases are never obeyed. This is a fudge that causes a compile-
time error if the vectors coptable or poptable, which are indexed by
opcode, are not the correct length. It seems to be the only way to do
such a check at compile time, as the sizeof() operator does not work
in the C preprocessor. */
case OP_TABLE_LENGTH:
case OP_TABLE_LENGTH +
((sizeof(coptable) == OP_TABLE_LENGTH) &&
(sizeof(poptable) == OP_TABLE_LENGTH)):
return 0;
/* ========================================================================== */
/* Reached a closing bracket. If not at the end of the pattern, carry
on with the next opcode. For repeating opcodes, also add the repeat
@@ -1179,10 +1173,6 @@ for (;;)
const ucd_record * prop = GET_UCD(c);
switch(code[1])
{
case PT_ANY:
OK = TRUE;
break;
case PT_LAMP:
chartype = prop->chartype;
OK = chartype == ucp_Lu || chartype == ucp_Ll ||
@@ -1462,10 +1452,6 @@ for (;;)
const ucd_record * prop = GET_UCD(c);
switch(code[2])
{
case PT_ANY:
OK = TRUE;
break;
case PT_LAMP:
chartype = prop->chartype;
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
@@ -1727,10 +1713,6 @@ for (;;)
const ucd_record * prop = GET_UCD(c);
switch(code[2])
{
case PT_ANY:
OK = TRUE;
break;
case PT_LAMP:
chartype = prop->chartype;
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
@@ -2017,10 +1999,6 @@ for (;;)
const ucd_record * prop = GET_UCD(c);
switch(code[1 + IMM2_SIZE + 1])
{
case PT_ANY:
OK = TRUE;
break;
case PT_LAMP:
chartype = prop->chartype;
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
@@ -2663,35 +2641,54 @@ for (;;)
case OP_CLASS:
case OP_NCLASS:
#ifdef SUPPORT_WIDE_CHARS
case OP_XCLASS:
case OP_ECLASS:
#endif
{
BOOL isinclass = FALSE;
int next_state_offset;
PCRE2_SPTR ecode;
#ifdef SUPPORT_WIDE_CHARS
/* An extended class may have a table or a list of single characters,
ranges, or both, and it may be positive or negative. There's a
function that sorts all this out. */
if (codevalue == OP_XCLASS)
{
ecode = code + GET(code, 1);
if (clen > 0)
isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE,
(const uint8_t*)mb->start_code, utf);
}
/* A nested set-based class has internal opcodes for performing
set operations. */
else if (codevalue == OP_ECLASS)
{
ecode = code + GET(code, 1);
if (clen > 0)
isinclass = PRIV(eclass)(c, code + 1 + LINK_SIZE, ecode,
(const uint8_t*)mb->start_code, utf);
}
else
#endif /* SUPPORT_WIDE_CHARS */
/* For a simple class, there is always just a 32-byte table, and we
can set isinclass from it. */
if (codevalue != OP_XCLASS)
{
ecode = code + 1 + (32 / sizeof(PCRE2_UCHAR));
if (clen > 0)
{
isinclass = (c > 255)? (codevalue == OP_NCLASS) :
((((uint8_t *)(code + 1))[c/8] & (1u << (c&7))) != 0);
((((const uint8_t *)(code + 1))[c/8] & (1u << (c&7))) != 0);
}
}
/* An extended class may have a table or a list of single characters,
ranges, or both, and it may be positive or negative. There's a
function that sorts all this out. */
else
{
ecode = code + GET(code, 1);
if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
}
/* At this point, isinclass is set for all kinds of class, and ecode
points to the byte after the end of the class. If there is a
quantifier, this is where it will be. */
@@ -2784,7 +2781,6 @@ for (;;)
though the other "backtracking verbs" are not supported. */
case OP_FAIL:
forced_fail++; /* Count FAILs for multiple states */
break;
case OP_ASSERT:
@@ -3058,7 +3054,7 @@ for (;;)
if (codevalue == OP_BRAPOSZERO)
{
allow_zero = TRUE;
codevalue = *(++code); /* Codevalue will be one of above BRAs */
++code; /* The following opcode will be one of the above BRAs */
}
else allow_zero = FALSE;
@@ -3271,18 +3267,12 @@ for (;;)
matches that we are going to find. If partial matching has been requested,
check for appropriate conditions.
The "forced_ fail" variable counts the number of (*F) encountered for the
character. If it is equal to the original active_count (saved in
workspace[1]) it means that (*F) was found on every active state. In this
case we don't want to give a partial match.
The "could_continue" variable is true if a state could have continued but
for the fact that the end of the subject was reached. */
if (new_count <= 0)
{
if (could_continue && /* Some could go on, and */
forced_fail != workspace[1] && /* Not all forced fail & */
( /* either... */
(mb->moptions & PCRE2_PARTIAL_HARD) != 0 /* Hard partial */
|| /* or... */
@@ -3438,7 +3428,7 @@ if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
options variable for this function. Users of PCRE2 who are not calling the
function directly would like to have a way of setting these flags, in the same
way that they can set pcre2_compile() flags like PCRE2_NO_AUTOPOSSESS with
way that they can set pcre2_compile() flags like PCRE2_NO_AUTO_POSSESS with
constructions like (*NO_AUTOPOSSESS). To enable this, (*NOTEMPTY) and
(*NOTEMPTY_ATSTART) set bits in the pattern's "flag" function which can now be
transferred to the options for this function. The bits are guaranteed to be
@@ -3528,8 +3518,7 @@ if (mb->match_limit_depth > re->limit_depth)
if (mb->heap_limit > re->limit_heap)
mb->heap_limit = re->limit_heap;
mb->start_code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
re->name_count * re->name_entry_size;
mb->start_code = (PCRE2_SPTR)((const uint8_t *)re + re->code_start);
mb->tables = re->tables;
mb->start_subject = subject;
mb->end_subject = end_subject;
@@ -3576,7 +3565,9 @@ switch(re->newline_convention)
mb->nltype = NLTYPE_ANYCRLF;
break;
default: return PCRE2_ERROR_INTERNAL;
default:
PCRE2_DEBUG_UNREACHABLE();
return PCRE2_ERROR_INTERNAL;
}
/* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
@@ -3705,7 +3696,7 @@ for (;;)
these, for testing and for ensuring that all callouts do actually occur.
The optimizations must also be avoided when restarting a DFA match. */
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
if ((re->optimization_flags & PCRE2_OPTIM_START_OPTIMIZE) != 0 &&
(options & PCRE2_DFA_RESTART) == 0)
{
/* If firstline is TRUE, the start of the match is constrained to the first