You've already forked godot
mirror of
https://github.com/godotengine/godot.git
synced 2025-11-18 14:21:41 +00:00
pcre2: Update to upstream version 10.36
Changelog: https://vcs.pcre.org/pcre2/code/tags/pcre2-10.36/ChangeLog?view=markup
This commit is contained in:
197
thirdparty/pcre2/src/pcre2_compile.c
vendored
197
thirdparty/pcre2/src/pcre2_compile.c
vendored
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -1202,7 +1202,7 @@ in the decoded tables. */
|
||||
|
||||
if ((code->flags & PCRE2_DEREF_TABLES) != 0)
|
||||
{
|
||||
ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
|
||||
ref_count = (PCRE2_SIZE *)(code->tables + TABLES_LENGTH);
|
||||
(*ref_count)++;
|
||||
}
|
||||
|
||||
@@ -1232,15 +1232,15 @@ if (newcode == NULL) return NULL;
|
||||
memcpy(newcode, code, code->blocksize);
|
||||
newcode->executable_jit = NULL;
|
||||
|
||||
newtables = code->memctl.malloc(tables_length + sizeof(PCRE2_SIZE),
|
||||
newtables = code->memctl.malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE),
|
||||
code->memctl.memory_data);
|
||||
if (newtables == NULL)
|
||||
{
|
||||
code->memctl.free((void *)newcode, code->memctl.memory_data);
|
||||
return NULL;
|
||||
}
|
||||
memcpy(newtables, code->tables, tables_length);
|
||||
ref_count = (PCRE2_SIZE *)(newtables + tables_length);
|
||||
memcpy(newtables, code->tables, TABLES_LENGTH);
|
||||
ref_count = (PCRE2_SIZE *)(newtables + TABLES_LENGTH);
|
||||
*ref_count = 1;
|
||||
|
||||
newcode->tables = newtables;
|
||||
@@ -1270,7 +1270,7 @@ if (code != NULL)
|
||||
be freed when there are no more references to them. The *ref_count should
|
||||
always be > 0. */
|
||||
|
||||
ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
|
||||
ref_count = (PCRE2_SIZE *)(code->tables + TABLES_LENGTH);
|
||||
if (*ref_count > 0)
|
||||
{
|
||||
(*ref_count)--;
|
||||
@@ -2344,7 +2344,7 @@ if (ptr > *nameptr + MAX_NAME_SIZE)
|
||||
*errorcodeptr = ERR48;
|
||||
goto FAILED;
|
||||
}
|
||||
*namelenptr = ptr - *nameptr;
|
||||
*namelenptr = (uint32_t)(ptr - *nameptr);
|
||||
|
||||
/* Subpattern names must not be empty, and their terminator is checked here.
|
||||
(What follows a verb or alpha assertion name is checked separately.) */
|
||||
@@ -3653,7 +3653,7 @@ while (ptr < ptrend)
|
||||
if (ptr >= ptrend) goto UNCLOSED_PARENTHESIS;
|
||||
|
||||
/* If ( is not followed by ? it is either a capture or a special verb or an
|
||||
alpha assertion. */
|
||||
alpha assertion or a positive non-atomic lookahead. */
|
||||
|
||||
if (*ptr != CHAR_QUESTION_MARK)
|
||||
{
|
||||
@@ -3685,10 +3685,10 @@ while (ptr < ptrend)
|
||||
break;
|
||||
|
||||
/* Handle "alpha assertions" such as (*pla:...). Most of these are
|
||||
synonyms for the historical symbolic assertions, but the script run ones
|
||||
are new. They are distinguished by starting with a lower case letter.
|
||||
Checking both ends of the alphabet makes this work in all character
|
||||
codes. */
|
||||
synonyms for the historical symbolic assertions, but the script run and
|
||||
non-atomic lookaround ones are new. They are distinguished by starting
|
||||
with a lower case letter. Checking both ends of the alphabet makes this
|
||||
work in all character codes. */
|
||||
|
||||
else if (CHMAX_255(c) && (cb->ctypes[c] & ctype_lcletter) != 0)
|
||||
{
|
||||
@@ -3747,9 +3747,7 @@ while (ptr < ptrend)
|
||||
goto POSITIVE_LOOK_AHEAD;
|
||||
|
||||
case META_LOOKAHEAD_NA:
|
||||
*parsed_pattern++ = meta;
|
||||
ptr++;
|
||||
goto POST_ASSERTION;
|
||||
goto POSITIVE_NONATOMIC_LOOK_AHEAD;
|
||||
|
||||
case META_LOOKAHEADNOT:
|
||||
goto NEGATIVE_LOOK_AHEAD;
|
||||
@@ -4333,6 +4331,7 @@ while (ptr < ptrend)
|
||||
{
|
||||
if (++ptr >= ptrend || !IS_DIGIT(*ptr)) goto BAD_VERSION_CONDITION;
|
||||
minor = (*ptr++ - CHAR_0) * 10;
|
||||
if (ptr >= ptrend) goto BAD_VERSION_CONDITION;
|
||||
if (IS_DIGIT(*ptr)) minor += *ptr++ - CHAR_0;
|
||||
if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS)
|
||||
goto BAD_VERSION_CONDITION;
|
||||
@@ -4438,6 +4437,12 @@ while (ptr < ptrend)
|
||||
ptr++;
|
||||
goto POST_ASSERTION;
|
||||
|
||||
case CHAR_ASTERISK:
|
||||
POSITIVE_NONATOMIC_LOOK_AHEAD: /* Come from (?* */
|
||||
*parsed_pattern++ = META_LOOKAHEAD_NA;
|
||||
ptr++;
|
||||
goto POST_ASSERTION;
|
||||
|
||||
case CHAR_EXCLAMATION_MARK:
|
||||
NEGATIVE_LOOK_AHEAD: /* Come from (*nla: */
|
||||
*parsed_pattern++ = META_LOOKAHEADNOT;
|
||||
@@ -4447,20 +4452,23 @@ while (ptr < ptrend)
|
||||
|
||||
/* ---- Lookbehind assertions ---- */
|
||||
|
||||
/* (?< followed by = or ! is a lookbehind assertion. Otherwise (?< is the
|
||||
start of the name of a capturing group. */
|
||||
/* (?< followed by = or ! or * is a lookbehind assertion. Otherwise (?<
|
||||
is the start of the name of a capturing group. */
|
||||
|
||||
case CHAR_LESS_THAN_SIGN:
|
||||
if (ptrend - ptr <= 1 ||
|
||||
(ptr[1] != CHAR_EQUALS_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK))
|
||||
(ptr[1] != CHAR_EQUALS_SIGN &&
|
||||
ptr[1] != CHAR_EXCLAMATION_MARK &&
|
||||
ptr[1] != CHAR_ASTERISK))
|
||||
{
|
||||
terminator = CHAR_GREATER_THAN_SIGN;
|
||||
goto DEFINE_NAME;
|
||||
}
|
||||
*parsed_pattern++ = (ptr[1] == CHAR_EQUALS_SIGN)?
|
||||
META_LOOKBEHIND : META_LOOKBEHINDNOT;
|
||||
META_LOOKBEHIND : (ptr[1] == CHAR_EXCLAMATION_MARK)?
|
||||
META_LOOKBEHINDNOT : META_LOOKBEHIND_NA;
|
||||
|
||||
POST_LOOKBEHIND: /* Come from (*plb: (*naplb: and (*nlb: */
|
||||
POST_LOOKBEHIND: /* Come from (*plb: (*naplb: and (*nlb: */
|
||||
*has_lookbehind = TRUE;
|
||||
offset = (PCRE2_SIZE)(ptr - cb->start_pattern - 2);
|
||||
PUTOFFSET(offset, parsed_pattern);
|
||||
@@ -4633,8 +4641,6 @@ while (ptr < ptrend)
|
||||
*parsed_pattern++ = META_KET;
|
||||
}
|
||||
|
||||
|
||||
|
||||
if (top_nest == (nest_save *)(cb->start_workspace)) top_nest = NULL;
|
||||
else top_nest--;
|
||||
}
|
||||
@@ -4899,7 +4905,7 @@ range. */
|
||||
if ((options & PCRE2_CASELESS) != 0)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if ((options & PCRE2_UTF) != 0)
|
||||
if ((options & (PCRE2_UTF|PCRE2_UCP)) != 0)
|
||||
{
|
||||
int rc;
|
||||
uint32_t oc, od;
|
||||
@@ -5314,7 +5320,8 @@ dynamically as we process the pattern. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf = (options & PCRE2_UTF) != 0;
|
||||
#else /* No UTF support */
|
||||
BOOL ucp = (options & PCRE2_UCP) != 0;
|
||||
#else /* No Unicode support */
|
||||
BOOL utf = FALSE;
|
||||
#endif
|
||||
|
||||
@@ -5559,12 +5566,12 @@ for (;; pptr++)
|
||||
zerofirstcu = firstcu;
|
||||
zerofirstcuflags = firstcuflags;
|
||||
|
||||
/* For caseless UTF mode, check whether this character has more than
|
||||
one other case. If so, generate a special OP_NOTPROP item instead of
|
||||
/* For caseless UTF or UCP mode, check whether this character has more
|
||||
than one other case. If so, generate a special OP_NOTPROP item instead of
|
||||
OP_NOTI. */
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (options & PCRE2_CASELESS) != 0 &&
|
||||
if ((utf||ucp) && (options & PCRE2_CASELESS) != 0 &&
|
||||
(d = UCD_CASESET(c)) != 0)
|
||||
{
|
||||
*code++ = OP_NOTPROP;
|
||||
@@ -5597,7 +5604,7 @@ for (;; pptr++)
|
||||
uint32_t d;
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && c > 127) d = UCD_OTHERCASE(c); else
|
||||
if ((utf || ucp) && c > 127) d = UCD_OTHERCASE(c); else
|
||||
#endif
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH != 8
|
||||
@@ -6671,23 +6678,11 @@ for (;; pptr++)
|
||||
}
|
||||
|
||||
/* For a back reference, update the back reference map and the
|
||||
maximum back reference. Then, for each group, we must check to
|
||||
see if it is recursive, that is, it is inside the group that it
|
||||
references. A flag is set so that the group can be made atomic.
|
||||
*/
|
||||
maximum back reference. */
|
||||
|
||||
cb->backref_map |= (groupnumber < 32)? (1u << groupnumber) : 1;
|
||||
if (groupnumber > cb->top_backref)
|
||||
cb->top_backref = groupnumber;
|
||||
|
||||
for (oc = cb->open_caps; oc != NULL; oc = oc->next)
|
||||
{
|
||||
if (oc->number == groupnumber)
|
||||
{
|
||||
oc->flag = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7081,15 +7076,18 @@ for (;; pptr++)
|
||||
previous[GET(previous, 1)] != OP_ALT)
|
||||
goto END_REPEAT;
|
||||
|
||||
/* There is no sense in actually repeating assertions. The only
|
||||
potential use of repetition is in cases when the assertion is optional.
|
||||
Therefore, if the minimum is greater than zero, just ignore the repeat.
|
||||
If the maximum is not zero or one, set it to 1. */
|
||||
/* Perl allows all assertions to be quantified, and when they contain
|
||||
capturing parentheses and/or are optional there are potential uses for
|
||||
this feature. PCRE2 used to force the maximum quantifier to 1 on the
|
||||
invalid grounds that further repetition was never useful. This was
|
||||
always a bit pointless, since an assertion could be wrapped with a
|
||||
repeated group to achieve the effect. General repetition is now
|
||||
permitted, but if the maximum is unlimited it is set to one more than
|
||||
the minimum. */
|
||||
|
||||
if (op_previous < OP_ONCE) /* Assertion */
|
||||
{
|
||||
if (repeat_min > 0) goto END_REPEAT;
|
||||
if (repeat_max > 1) repeat_max = 1;
|
||||
if (repeat_max == REPEAT_UNLIMITED) repeat_max = repeat_min + 1;
|
||||
}
|
||||
|
||||
/* The case of a zero minimum is special because of the need to stick
|
||||
@@ -7682,19 +7680,6 @@ for (;; pptr++)
|
||||
|
||||
cb->backref_map |= (meta_arg < 32)? (1u << meta_arg) : 1;
|
||||
if (meta_arg > cb->top_backref) cb->top_backref = meta_arg;
|
||||
|
||||
/* Check to see if this back reference is recursive, that it, it
|
||||
is inside the group that it references. A flag is set so that the
|
||||
group can be made atomic. */
|
||||
|
||||
for (oc = cb->open_caps; oc != NULL; oc = oc->next)
|
||||
{
|
||||
if (oc->number == meta_arg)
|
||||
{
|
||||
oc->flag = TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
|
||||
@@ -7840,11 +7825,12 @@ for (;; pptr++)
|
||||
NORMAL_CHAR_SET: /* Character is already in meta */
|
||||
matched_char = TRUE;
|
||||
|
||||
/* For caseless UTF mode, check whether this character has more than one
|
||||
other case. If so, generate a special OP_PROP item instead of OP_CHARI. */
|
||||
/* For caseless UTF or UCP mode, check whether this character has more than
|
||||
one other case. If so, generate a special OP_PROP item instead of OP_CHARI.
|
||||
*/
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (options & PCRE2_CASELESS) != 0)
|
||||
if ((utf||ucp) && (options & PCRE2_CASELESS) != 0)
|
||||
{
|
||||
uint32_t caseset = UCD_CASESET(meta);
|
||||
if (caseset != 0)
|
||||
@@ -8053,7 +8039,6 @@ if (*code == OP_CBRA)
|
||||
capnumber = GET2(code, 1 + LINK_SIZE);
|
||||
capitem.number = capnumber;
|
||||
capitem.next = cb->open_caps;
|
||||
capitem.flag = FALSE;
|
||||
capitem.assert_depth = cb->assert_depth;
|
||||
cb->open_caps = &capitem;
|
||||
}
|
||||
@@ -8182,26 +8167,9 @@ for (;;)
|
||||
PUT(code, 1, (int)(code - start_bracket));
|
||||
code += 1 + LINK_SIZE;
|
||||
|
||||
/* If it was a capturing subpattern, check to see if it contained any
|
||||
recursive back references. If so, we must wrap it in atomic brackets. In
|
||||
any event, remove the block from the chain. */
|
||||
/* If it was a capturing subpattern, remove the block from the chain. */
|
||||
|
||||
if (capnumber > 0)
|
||||
{
|
||||
if (cb->open_caps->flag)
|
||||
{
|
||||
(void)memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
|
||||
CU2BYTES(code - start_bracket));
|
||||
*start_bracket = OP_ONCE;
|
||||
code += 1 + LINK_SIZE;
|
||||
PUT(start_bracket, 1, (int)(code - start_bracket));
|
||||
*code = OP_KET;
|
||||
PUT(code, 1, (int)(code - start_bracket));
|
||||
code += 1 + LINK_SIZE;
|
||||
length += 2 + 2*LINK_SIZE;
|
||||
}
|
||||
cb->open_caps = cb->open_caps->next;
|
||||
}
|
||||
if (capnumber > 0) cb->open_caps = cb->open_caps->next;
|
||||
|
||||
/* Set values to pass back */
|
||||
|
||||
@@ -8836,9 +8804,10 @@ memset(slot + IMM2_SIZE + length, 0,
|
||||
|
||||
/* This function is called to skip parts of the parsed pattern when finding the
|
||||
length of a lookbehind branch. It is called after (*ACCEPT) and (*FAIL) to find
|
||||
the end of the branch, it is called to skip over an internal lookaround, and it
|
||||
is also called to skip to the end of a class, during which it will never
|
||||
encounter nested groups (but there's no need to have special code for that).
|
||||
the end of the branch, it is called to skip over an internal lookaround or
|
||||
(DEFINE) group, and it is also called to skip to the end of a class, during
|
||||
which it will never encounter nested groups (but there's no need to have
|
||||
special code for that).
|
||||
|
||||
When called to find the end of a branch or group, pptr must point to the first
|
||||
meta code inside the branch, not the branch-starting code. In other cases it
|
||||
@@ -9316,14 +9285,21 @@ for (;; pptr++)
|
||||
itemlength = grouplength;
|
||||
break;
|
||||
|
||||
/* Check nested groups - advance past the initial data for each type and
|
||||
then seek a fixed length with get_grouplength(). */
|
||||
/* A (DEFINE) group is never obeyed inline and so it does not contribute to
|
||||
the length of this branch. Skip from the following item to the next
|
||||
unpaired ket. */
|
||||
|
||||
case META_COND_DEFINE:
|
||||
pptr = parsed_skip(pptr + 1, PSKIP_KET);
|
||||
break;
|
||||
|
||||
/* Check other nested groups - advance past the initial data for each type
|
||||
and then seek a fixed length with get_grouplength(). */
|
||||
|
||||
case META_COND_NAME:
|
||||
case META_COND_NUMBER:
|
||||
case META_COND_RNAME:
|
||||
case META_COND_RNUMBER:
|
||||
case META_COND_DEFINE:
|
||||
pptr += 2 + SIZEOFFSET;
|
||||
goto CHECK_GROUP;
|
||||
|
||||
@@ -9580,6 +9556,10 @@ for (; *pptr != META_END; pptr++)
|
||||
break;
|
||||
|
||||
case META_COND_DEFINE:
|
||||
pptr += SIZEOFFSET;
|
||||
nestlevel++;
|
||||
break;
|
||||
|
||||
case META_COND_NAME:
|
||||
case META_COND_NUMBER:
|
||||
case META_COND_RNAME:
|
||||
@@ -9660,6 +9640,7 @@ pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE patlen, uint32_t options,
|
||||
int *errorptr, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext)
|
||||
{
|
||||
BOOL utf; /* Set TRUE for UTF mode */
|
||||
BOOL ucp; /* Set TRUE for UCP mode */
|
||||
BOOL has_lookbehind = FALSE; /* Set TRUE if a lookbehind is found */
|
||||
BOOL zero_terminated; /* Set TRUE for zero-terminated pattern */
|
||||
pcre2_real_code *re = NULL; /* What we will return */
|
||||
@@ -9947,8 +9928,8 @@ if (utf)
|
||||
|
||||
/* Check UCP lockout. */
|
||||
|
||||
if ((cb.external_options & (PCRE2_UCP|PCRE2_NEVER_UCP)) ==
|
||||
(PCRE2_UCP|PCRE2_NEVER_UCP))
|
||||
ucp = (cb.external_options & PCRE2_UCP) != 0;
|
||||
if (ucp && (cb.external_options & PCRE2_NEVER_UCP) != 0)
|
||||
{
|
||||
errorcode = ERR75;
|
||||
goto HAD_EARLY_ERROR;
|
||||
@@ -10324,7 +10305,7 @@ function call. */
|
||||
if (errorcode == 0 && (re->overall_options & PCRE2_NO_AUTO_POSSESS) == 0)
|
||||
{
|
||||
PCRE2_UCHAR *temp = (PCRE2_UCHAR *)codestart;
|
||||
if (PRIV(auto_possessify)(temp, utf, &cb) != 0) errorcode = ERR80;
|
||||
if (PRIV(auto_possessify)(temp, &cb) != 0) errorcode = ERR80;
|
||||
}
|
||||
|
||||
/* Failed to compile, or error while post-processing. */
|
||||
@@ -10372,21 +10353,25 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
|
||||
|
||||
if ((firstcuflags & REQ_CASELESS) != 0)
|
||||
{
|
||||
if (firstcu < 128 || (!utf && firstcu < 255))
|
||||
if (firstcu < 128 || (!utf && !ucp && firstcu < 255))
|
||||
{
|
||||
if (cb.fcc[firstcu] != firstcu) re->flags |= PCRE2_FIRSTCASELESS;
|
||||
}
|
||||
|
||||
/* The first code unit is > 128 in UTF mode, or > 255 otherwise. In
|
||||
8-bit UTF mode, codepoints in the range 128-255 are introductory code
|
||||
points and cannot have another case. In 16-bit and 32-bit modes, we can
|
||||
check wide characters when UTF (and therefore UCP) is supported. */
|
||||
/* The first code unit is > 128 in UTF or UCP mode, or > 255 otherwise.
|
||||
In 8-bit UTF mode, codepoints in the range 128-255 are introductory code
|
||||
points and cannot have another case, but if UCP is set they may do. */
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
else if (firstcu <= MAX_UTF_CODE_POINT &&
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
else if (ucp && !utf && UCD_OTHERCASE(firstcu) != firstcu)
|
||||
re->flags |= PCRE2_FIRSTCASELESS;
|
||||
#else
|
||||
else if ((utf || ucp) && firstcu <= MAX_UTF_CODE_POINT &&
|
||||
UCD_OTHERCASE(firstcu) != firstcu)
|
||||
re->flags |= PCRE2_FIRSTCASELESS;
|
||||
#endif
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10435,14 +10420,20 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
|
||||
|
||||
if ((reqcuflags & REQ_CASELESS) != 0)
|
||||
{
|
||||
if (reqcu < 128 || (!utf && reqcu < 255))
|
||||
if (reqcu < 128 || (!utf && !ucp && reqcu < 255))
|
||||
{
|
||||
if (cb.fcc[reqcu] != reqcu) re->flags |= PCRE2_LASTCASELESS;
|
||||
}
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
|
||||
else if (reqcu <= MAX_UTF_CODE_POINT && UCD_OTHERCASE(reqcu) != reqcu)
|
||||
re->flags |= PCRE2_LASTCASELESS;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 8
|
||||
else if (ucp && !utf && UCD_OTHERCASE(reqcu) != reqcu)
|
||||
re->flags |= PCRE2_LASTCASELESS;
|
||||
#else
|
||||
else if ((utf || ucp) && reqcu <= MAX_UTF_CODE_POINT &&
|
||||
UCD_OTHERCASE(reqcu) != reqcu)
|
||||
re->flags |= PCRE2_LASTCASELESS;
|
||||
#endif
|
||||
#endif /* SUPPORT_UNICODE */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user