You've already forked godot
mirror of
https://github.com/godotengine/godot.git
synced 2025-11-19 14:31:59 +00:00
pcre2: Update to upstream version 10.36
Changelog: https://vcs.pcre.org/pcre2/code/tags/pcre2-10.36/ChangeLog?view=markup
This commit is contained in:
94
thirdparty/pcre2/src/pcre2_study.c
vendored
94
thirdparty/pcre2/src/pcre2_study.c
vendored
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -58,7 +58,7 @@ collecting data (e.g. minimum matching length). */
|
||||
|
||||
/* Returns from set_start_bits() */
|
||||
|
||||
enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN };
|
||||
enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN, SSB_TOODEEP };
|
||||
|
||||
|
||||
/*************************************************
|
||||
@@ -772,15 +772,19 @@ Arguments:
|
||||
p points to the first code unit of the character
|
||||
caseless TRUE if caseless
|
||||
utf TRUE for UTF mode
|
||||
ucp TRUE for UCP mode
|
||||
|
||||
Returns: pointer after the character
|
||||
*/
|
||||
|
||||
static PCRE2_SPTR
|
||||
set_table_bit(pcre2_real_code *re, PCRE2_SPTR p, BOOL caseless, BOOL utf)
|
||||
set_table_bit(pcre2_real_code *re, PCRE2_SPTR p, BOOL caseless, BOOL utf,
|
||||
BOOL ucp)
|
||||
{
|
||||
uint32_t c = *p++; /* First code unit */
|
||||
(void)utf; /* Stop compiler warning when UTF not supported */
|
||||
|
||||
(void)utf; /* Stop compiler warnings when UTF not supported */
|
||||
(void)ucp;
|
||||
|
||||
/* In 16-bit and 32-bit modes, code units greater than 0xff set the bit for
|
||||
0xff. */
|
||||
@@ -810,22 +814,26 @@ if (utf)
|
||||
if (caseless)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
if (utf || ucp)
|
||||
{
|
||||
c = UCD_OTHERCASE(c);
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
PCRE2_UCHAR buff[6];
|
||||
c = UCD_OTHERCASE(c);
|
||||
(void)PRIV(ord2utf)(c, buff);
|
||||
SET_BIT(buff[0]);
|
||||
if (utf)
|
||||
{
|
||||
PCRE2_UCHAR buff[6];
|
||||
(void)PRIV(ord2utf)(c, buff);
|
||||
SET_BIT(buff[0]);
|
||||
}
|
||||
else if (c < 256) SET_BIT(c);
|
||||
#else /* 16-bit or 32-bit mode */
|
||||
c = UCD_OTHERCASE(c);
|
||||
if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
|
||||
#endif
|
||||
}
|
||||
|
||||
else
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
/* Not UTF */
|
||||
/* Not UTF or UCP */
|
||||
|
||||
if (MAX_255(c)) SET_BIT(re->tables[fcc_offset + c]);
|
||||
}
|
||||
@@ -924,19 +932,26 @@ The SSB_CONTINUE return is useful for parenthesized groups in patterns such as
|
||||
must continue at the outer level to find at least one mandatory code unit. At
|
||||
the outermost level, this function fails unless the result is SSB_DONE.
|
||||
|
||||
We restrict recursion (for nested groups) to 1000 to avoid stack overflow
|
||||
issues.
|
||||
|
||||
Arguments:
|
||||
re points to the compiled regex block
|
||||
code points to an expression
|
||||
utf TRUE if in UTF mode
|
||||
ucp TRUE if in UCP mode
|
||||
depthptr pointer to recurse depth
|
||||
|
||||
Returns: SSB_FAIL => Failed to find any starting code units
|
||||
SSB_DONE => Found mandatory starting code units
|
||||
SSB_CONTINUE => Found optional starting code units
|
||||
SSB_UNKNOWN => Hit an unrecognized opcode
|
||||
SSB_TOODEEP => Recursion is too deep
|
||||
*/
|
||||
|
||||
static int
|
||||
set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf)
|
||||
set_start_bits(pcre2_real_code *re, PCRE2_SPTR code, BOOL utf, BOOL ucp,
|
||||
int *depthptr)
|
||||
{
|
||||
uint32_t c;
|
||||
int yield = SSB_DONE;
|
||||
@@ -947,6 +962,9 @@ int table_limit = utf? 16:32;
|
||||
int table_limit = 32;
|
||||
#endif
|
||||
|
||||
*depthptr += 1;
|
||||
if (*depthptr > 1000) return SSB_TOODEEP;
|
||||
|
||||
do
|
||||
{
|
||||
BOOL try_next = TRUE;
|
||||
@@ -1103,13 +1121,17 @@ do
|
||||
case OP_SCRIPT_RUN:
|
||||
case OP_ASSERT:
|
||||
case OP_ASSERT_NA:
|
||||
rc = set_start_bits(re, tcode, utf);
|
||||
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
|
||||
if (rc == SSB_DONE) try_next = FALSE; else
|
||||
rc = set_start_bits(re, tcode, utf, ucp, depthptr);
|
||||
if (rc == SSB_DONE)
|
||||
{
|
||||
try_next = FALSE;
|
||||
}
|
||||
else if (rc == SSB_CONTINUE)
|
||||
{
|
||||
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
}
|
||||
else return rc; /* FAIL, UNKNOWN, or TOODEEP */
|
||||
break;
|
||||
|
||||
/* If we hit ALT or KET, it means we haven't found anything mandatory in
|
||||
@@ -1155,8 +1177,8 @@ do
|
||||
case OP_BRAZERO:
|
||||
case OP_BRAMINZERO:
|
||||
case OP_BRAPOSZERO:
|
||||
rc = set_start_bits(re, ++tcode, utf);
|
||||
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
|
||||
rc = set_start_bits(re, ++tcode, utf, ucp, depthptr);
|
||||
if (rc == SSB_FAIL || rc == SSB_UNKNOWN || rc == SSB_TOODEEP) return rc;
|
||||
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
|
||||
tcode += 1 + LINK_SIZE;
|
||||
break;
|
||||
@@ -1177,7 +1199,7 @@ do
|
||||
case OP_QUERY:
|
||||
case OP_MINQUERY:
|
||||
case OP_POSQUERY:
|
||||
tcode = set_table_bit(re, tcode + 1, FALSE, utf);
|
||||
tcode = set_table_bit(re, tcode + 1, FALSE, utf, ucp);
|
||||
break;
|
||||
|
||||
case OP_STARI:
|
||||
@@ -1186,7 +1208,7 @@ do
|
||||
case OP_QUERYI:
|
||||
case OP_MINQUERYI:
|
||||
case OP_POSQUERYI:
|
||||
tcode = set_table_bit(re, tcode + 1, TRUE, utf);
|
||||
tcode = set_table_bit(re, tcode + 1, TRUE, utf, ucp);
|
||||
break;
|
||||
|
||||
/* Single-char upto sets the bit and tries the next */
|
||||
@@ -1194,13 +1216,13 @@ do
|
||||
case OP_UPTO:
|
||||
case OP_MINUPTO:
|
||||
case OP_POSUPTO:
|
||||
tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, FALSE, utf);
|
||||
tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, FALSE, utf, ucp);
|
||||
break;
|
||||
|
||||
case OP_UPTOI:
|
||||
case OP_MINUPTOI:
|
||||
case OP_POSUPTOI:
|
||||
tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, TRUE, utf);
|
||||
tcode = set_table_bit(re, tcode + 1 + IMM2_SIZE, TRUE, utf, ucp);
|
||||
break;
|
||||
|
||||
/* At least one single char sets the bit and stops */
|
||||
@@ -1212,7 +1234,7 @@ do
|
||||
case OP_PLUS:
|
||||
case OP_MINPLUS:
|
||||
case OP_POSPLUS:
|
||||
(void)set_table_bit(re, tcode + 1, FALSE, utf);
|
||||
(void)set_table_bit(re, tcode + 1, FALSE, utf, ucp);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
@@ -1223,7 +1245,7 @@ do
|
||||
case OP_PLUSI:
|
||||
case OP_MINPLUSI:
|
||||
case OP_POSPLUSI:
|
||||
(void)set_table_bit(re, tcode + 1, TRUE, utf);
|
||||
(void)set_table_bit(re, tcode + 1, TRUE, utf, ucp);
|
||||
try_next = FALSE;
|
||||
break;
|
||||
|
||||
@@ -1652,6 +1674,7 @@ PRIV(study)(pcre2_real_code *re)
|
||||
int count = 0;
|
||||
PCRE2_UCHAR *code;
|
||||
BOOL utf = (re->overall_options & PCRE2_UTF) != 0;
|
||||
BOOL ucp = (re->overall_options & PCRE2_UCP) != 0;
|
||||
|
||||
/* Find start of compiled code */
|
||||
|
||||
@@ -1664,7 +1687,8 @@ code units. */
|
||||
|
||||
if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
|
||||
{
|
||||
int rc = set_start_bits(re, code, utf);
|
||||
int depth = 0;
|
||||
int rc = set_start_bits(re, code, utf, ucp, &depth);
|
||||
if (rc == SSB_UNKNOWN) return 1;
|
||||
|
||||
/* If a list of starting code units was set up, scan the list to see if only
|
||||
@@ -1712,27 +1736,27 @@ if ((re->flags & (PCRE2_FIRSTSET|PCRE2_STARTLINE)) == 0)
|
||||
}
|
||||
|
||||
/* c contains the code unit value, in the range 0-255. In 8-bit UTF
|
||||
mode, only values < 128 can be used. */
|
||||
mode, only values < 128 can be used. In all the other cases, c is a
|
||||
character value. */
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (c > 127) goto DONE;
|
||||
if (utf && c > 127) goto DONE;
|
||||
#endif
|
||||
if (a < 0) a = c; /* First one found */
|
||||
if (a < 0) a = c; /* First one found, save in a */
|
||||
else if (b < 0) /* Second one found */
|
||||
{
|
||||
int d = TABLE_GET((unsigned int)c, re->tables + fcc_offset, c);
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if (utf && UCD_CASESET(c) != 0) goto DONE; /* Multiple case set */
|
||||
#else /* 16-bit or 32-bit */
|
||||
if (UCD_CASESET(c) != 0) goto DONE; /* Multiple case set */
|
||||
if (utf && c > 127) d = UCD_OTHERCASE(c);
|
||||
#endif /* Code width */
|
||||
if (utf || ucp)
|
||||
{
|
||||
if (UCD_CASESET(c) != 0) goto DONE; /* Multiple case set */
|
||||
if (c > 127) d = UCD_OTHERCASE(c);
|
||||
}
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
|
||||
if (d != a) goto DONE; /* Not other case of a */
|
||||
b = c;
|
||||
if (d != a) goto DONE; /* Not the other case of a */
|
||||
b = c; /* Save second in b */
|
||||
}
|
||||
else goto DONE; /* More than two characters found */
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user