1
0
mirror of https://github.com/godotengine/godot.git synced 2025-11-28 16:07:14 +00:00

pcre2: Update to 10.45

This commit is contained in:
Jakub Marcowski
2025-03-23 16:50:13 +01:00
parent 2303ce843a
commit 2c3e302c75
75 changed files with 24071 additions and 12755 deletions

View File

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2023 University of Cambridge
New API code Copyright (c) 2016-2024 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -114,7 +114,7 @@ uint32_t once_fudge = 0;
BOOL had_recurse = FALSE;
BOOL dupcapused = (re->flags & PCRE2_DUPCAPUSED) != 0;
PCRE2_SPTR nextbranch = code + GET(code, 1);
PCRE2_UCHAR *cc = (PCRE2_UCHAR *)code + 1 + LINK_SIZE;
PCRE2_SPTR cc = code + 1 + LINK_SIZE;
recurse_check this_recurse;
/* If this is a "could be empty" group, its minimum length is 0. */
@@ -136,12 +136,13 @@ passes 16-bits, reset to that value and skip the rest of the branch. */
for (;;)
{
int d, min, recno;
PCRE2_UCHAR op, *cs, *ce;
PCRE2_UCHAR op;
PCRE2_SPTR cs, ce;
if (branchlength >= UINT16_MAX)
{
branchlength = UINT16_MAX;
cc = (PCRE2_UCHAR *)nextbranch;
cc = nextbranch;
}
op = *cc;
@@ -249,6 +250,7 @@ for (;;)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ASSERT_NA:
case OP_ASSERT_SCS:
case OP_ASSERTBACK_NA:
do cc += GET(cc, 1); while (*cc == OP_ALT);
/* Fall through */
@@ -417,15 +419,14 @@ for (;;)
case OP_NCLASS:
#ifdef SUPPORT_WIDE_CHARS
case OP_XCLASS:
case OP_ECLASS:
/* The original code caused an unsigned overflow in 64 bit systems,
so now we use a conditional statement. */
if (op == OP_XCLASS)
if (op == OP_XCLASS || op == OP_ECLASS)
cc += GET(cc, 1);
else
cc += PRIV(OP_lengths)[OP_CLASS];
#else
cc += PRIV(OP_lengths)[OP_CLASS];
#endif
cc += PRIV(OP_lengths)[OP_CLASS];
switch (*cc)
{
@@ -479,8 +480,8 @@ for (;;)
if (!dupcapused && (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
{
int count = GET2(cc, 1+IMM2_SIZE);
PCRE2_UCHAR *slot =
(PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
PCRE2_SPTR slot =
(PCRE2_SPTR)((const uint8_t *)re + sizeof(pcre2_real_code)) +
GET2(cc, 1) * re->name_entry_size;
d = INT_MAX;
@@ -496,13 +497,12 @@ for (;;)
dd = backref_cache[recno];
else
{
ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno);
ce = cs = PRIV(find_bracket)(startcode, utf, recno);
if (cs == NULL) return -2;
do ce += GET(ce, 1); while (*ce == OP_ALT);
dd = 0;
if (!dupcapused ||
(PCRE2_UCHAR *)PRIV(find_bracket)(ce, utf, recno) == NULL)
if (!dupcapused || PRIV(find_bracket)(ce, utf, recno) == NULL)
{
if (cc > cs && cc < ce) /* Simple recursion */
{
@@ -539,7 +539,7 @@ for (;;)
}
}
else d = 0;
cc += 1 + 2*IMM2_SIZE;
cc += PRIV(OP_lengths)[*cc];
goto REPEAT_BACK_REFERENCE;
/* Single back reference by number. References by name are converted to by
@@ -557,12 +557,11 @@ for (;;)
if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
{
ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno);
ce = cs = PRIV(find_bracket)(startcode, utf, recno);
if (cs == NULL) return -2;
do ce += GET(ce, 1); while (*ce == OP_ALT);
if (!dupcapused ||
(PCRE2_UCHAR *)PRIV(find_bracket)(ce, utf, recno) == NULL)
if (!dupcapused || PRIV(find_bracket)(ce, utf, recno) == NULL)
{
if (cc > cs && cc < ce) /* Simple recursion */
{
@@ -593,7 +592,7 @@ for (;;)
backref_cache[0] = recno;
}
cc += 1 + IMM2_SIZE;
cc += PRIV(OP_lengths)[*cc];
/* Handle repeated back references */
@@ -643,7 +642,7 @@ for (;;)
pattern contains multiple subpatterns with the same number. */
case OP_RECURSE:
cs = ce = (PCRE2_UCHAR *)startcode + GET(cc, 1);
cs = ce = startcode + GET(cc, 1);
recno = GET2(cs, 1+LINK_SIZE);
if (recno == prev_recurse_recno)
{
@@ -755,10 +754,13 @@ for (;;)
new ones get added they are properly considered. */
default:
PCRE2_DEBUG_UNREACHABLE();
return -3;
}
}
/* Control never gets here */
PCRE2_DEBUG_UNREACHABLE(); /* Control should never reach here */
return -3; /* Avoid compiler warnings */
}
@@ -919,6 +921,138 @@ if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
/*************************************************
* Set starting bits for a character list. *
*************************************************/
/* This function sets starting bits for a character list. It enumerates
all characters and character ranges in the character list, and sets
the starting bits accordingly.
Arguments:
code pointer to the code
start_bitmap pointer to the starting bitmap
Returns: nothing
*/
static void
study_char_list(PCRE2_SPTR code, uint8_t *start_bitmap,
const uint8_t *char_lists_end)
{
uint32_t type, list_ind;
uint32_t char_list_add = XCL_CHAR_LIST_LOW_16_ADD;
uint32_t range_start = ~(uint32_t)0, range_end = 0;
const uint8_t *next_char;
PCRE2_UCHAR start_buffer[6], end_buffer[6];
PCRE2_UCHAR start, end;
/* Only needed in 8-bit mode at the moment. */
type = (uint32_t)(code[0] << 8) | code[1];
code += 2;
/* Align characters. */
next_char = char_lists_end - (GET(code, 0) << 1);
type &= XCL_TYPE_MASK;
list_ind = 0;
if ((type & XCL_BEGIN_WITH_RANGE) != 0)
range_start = XCL_CHAR_LIST_LOW_16_START;
while (type > 0)
{
uint32_t item_count = type & XCL_ITEM_COUNT_MASK;
if (item_count == XCL_ITEM_COUNT_MASK)
{
if (list_ind <= 1)
{
item_count = *(const uint16_t*)next_char;
next_char += 2;
}
else
{
item_count = *(const uint32_t*)next_char;
next_char += 4;
}
}
while (item_count > 0)
{
if (list_ind <= 1)
{
range_end = *(const uint16_t*)next_char;
next_char += 2;
}
else
{
range_end = *(const uint32_t*)next_char;
next_char += 4;
}
if ((range_end & XCL_CHAR_END) != 0)
{
range_end = char_list_add + (range_end >> XCL_CHAR_SHIFT);
PRIV(ord2utf)(range_end, end_buffer);
end = end_buffer[0];
if (range_start < range_end)
{
PRIV(ord2utf)(range_start, start_buffer);
for (start = start_buffer[0]; start <= end; start++)
start_bitmap[start / 8] |= (1u << (start & 7));
}
else
start_bitmap[end / 8] |= (1u << (end & 7));
range_start = ~(uint32_t)0;
}
else
range_start = char_list_add + (range_end >> XCL_CHAR_SHIFT);
item_count--;
}
list_ind++;
type >>= XCL_TYPE_BIT_LEN;
if (range_start == ~(uint32_t)0)
{
if ((type & XCL_BEGIN_WITH_RANGE) != 0)
{
/* In 8 bit mode XCL_CHAR_LIST_HIGH_32_START is not possible. */
if (list_ind == 1) range_start = XCL_CHAR_LIST_HIGH_16_START;
else range_start = XCL_CHAR_LIST_LOW_32_START;
}
}
else if ((type & XCL_BEGIN_WITH_RANGE) == 0)
{
PRIV(ord2utf)(range_start, start_buffer);
/* In 8 bit mode XCL_CHAR_LIST_LOW_32_END and
XCL_CHAR_LIST_HIGH_32_END are not possible. */
if (list_ind == 1) range_end = XCL_CHAR_LIST_LOW_16_END;
else range_end = XCL_CHAR_LIST_HIGH_16_END;
PRIV(ord2utf)(range_end, end_buffer);
end = end_buffer[0];
for (start = start_buffer[0]; start <= end; start++)
start_bitmap[start / 8] |= (1u << (start & 7));
range_start = ~(uint32_t)0;
}
/* In 8 bit mode XCL_CHAR_LIST_HIGH_32_ADD is not possible. */
if (list_ind == 1) char_list_add = XCL_CHAR_LIST_HIGH_16_ADD;
else char_list_add = XCL_CHAR_LIST_LOW_32_ADD;
}
}
#endif
/*************************************************
* Create bitmap of starting code units *
*************************************************/
@@ -980,7 +1114,7 @@ do
{
int rc;
PCRE2_SPTR ncode;
uint8_t *classmap = NULL;
const uint8_t *classmap = NULL;
#ifdef SUPPORT_WIDE_CHARS
PCRE2_UCHAR xclassflags;
#endif
@@ -1134,6 +1268,7 @@ do
case OP_ASSERTBACK_NOT:
case OP_ASSERT_NA:
case OP_ASSERTBACK_NA:
case OP_ASSERT_SCS:
ncode += GET(ncode, 1);
while (*ncode == OP_ALT) ncode += GET(ncode, 1);
ncode += 1 + LINK_SIZE;
@@ -1252,12 +1387,14 @@ do
tcode += GET(tcode, 1 + 2*LINK_SIZE);
break;
/* Skip over lookbehind and negative lookahead assertions */
/* Skip over lookbehind, negative lookahead, and scan substring
assertions */
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ASSERTBACK_NA:
case OP_ASSERT_SCS:
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE;
break;
@@ -1578,6 +1715,13 @@ do
tcode += 2;
break;
/* Set-based ECLASS: treat it the same as a "complex" XCLASS; give up. */
#ifdef SUPPORT_WIDE_CHARS
case OP_ECLASS:
return SSB_FAIL;
#endif
/* Extended class: if there are any property checks, or if this is a
negative XCLASS without a map, give up. If there are no property checks,
there must be wide characters on the XCLASS list, because otherwise an
@@ -1596,7 +1740,7 @@ do
map pointer if there is one, and fall through. */
classmap = ((xclassflags & XCL_MAP) == 0)? NULL :
(uint8_t *)(tcode + 1 + LINK_SIZE + 1);
(const uint8_t *)(tcode + 1 + LINK_SIZE + 1);
/* In UTF-8 mode, scan the character list and set bits for leading bytes,
then jump to handle the map. */
@@ -1608,6 +1752,13 @@ do
PCRE2_SPTR p = tcode + 1 + LINK_SIZE + 1 + ((classmap == NULL)? 0:32);
tcode += GET(tcode, 1);
if (*p >= XCL_LIST)
{
study_char_list(p, re->start_bitmap,
((const uint8_t *)re + re->code_start));
goto HANDLE_CLASSMAP;
}
for (;;) switch (*p++)
{
case XCL_SINGLE:
@@ -1629,6 +1780,7 @@ do
goto HANDLE_CLASSMAP;
default:
PCRE2_DEBUG_UNREACHABLE();
return SSB_UNKNOWN; /* Internal error, should not occur */
}
}
@@ -1665,7 +1817,7 @@ do
case OP_CLASS:
if (*tcode == OP_XCLASS) tcode += GET(tcode, 1); else
{
classmap = (uint8_t *)(++tcode);
classmap = (const uint8_t *)(++tcode);
tcode += 32 / sizeof(PCRE2_UCHAR);
}
@@ -1768,8 +1920,7 @@ BOOL ucp = (re->overall_options & PCRE2_UCP) != 0;
/* Find start of compiled code */
code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
re->name_entry_size * re->name_count;
code = (PCRE2_UCHAR *)((uint8_t *)re + re->code_start);
/* For a pattern that has a first code unit, or a multiline pattern that
matches only at "line start", there is no point in seeking a list of starting
@@ -1779,7 +1930,11 @@ if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
{
int depth = 0;
int rc = set_start_bits(re, code, utf, ucp, &depth);
if (rc == SSB_UNKNOWN) return 1;
if (rc == SSB_UNKNOWN)
{
PCRE2_DEBUG_UNREACHABLE();
return 1;
}
/* If a list of starting code units was set up, scan the list to see if only
one or two were listed. Having only one listed is rare because usually a
@@ -1852,25 +2007,22 @@ if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
}
}
/* Replace the start code unit bits with a first code unit, but only if it
is not the same as a required later code unit. This is because a search for
a required code unit starts after an explicit first code unit, but at a
code unit found from the bitmap. Patterns such as /a*a/ don't work
if both the start unit and required unit are the same. */
/* Replace the start code unit bits with a first code unit. If it is the
same as a required later code unit, then clear the required later code
unit. This is because a search for a required code unit starts after an
explicit first code unit, but at a code unit found from the bitmap.
Patterns such as /a*a/ don't work if both the start unit and required
unit are the same. */
if (a >= 0 &&
(
(re->flags & PCRE2_LASTSET) == 0 ||
(
re->last_codeunit != (uint32_t)a &&
(b < 0 || re->last_codeunit != (uint32_t)b)
)
))
{
if (a >= 0) {
if ((re->flags & PCRE2_LASTSET) && (re->last_codeunit == (uint32_t)a || (b >= 0 && re->last_codeunit == (uint32_t)b))) {
re->flags &= ~(PCRE2_LASTSET | PCRE2_LASTCASELESS);
re->last_codeunit = 0;
}
re->first_codeunit = a;
flags = PCRE2_FIRSTSET;
if (b >= 0) flags |= PCRE2_FIRSTCASELESS;
}
}
DONE:
re->flags |= flags;
@@ -1898,9 +2050,11 @@ if ((re->flags & (PCRE2_MATCH_EMPTY|PCRE2_HASACCEPT)) == 0 &&
break; /* Leave minlength unchanged (will be zero) */
case -2:
PCRE2_DEBUG_UNREACHABLE();
return 2; /* missing capturing bracket */
case -3:
PCRE2_DEBUG_UNREACHABLE();
return 3; /* unrecognized opcode */
default: