1
0
mirror of https://github.com/godotengine/godot.git synced 2025-11-18 14:21:41 +00:00

pcre2: Update to upstream version 10.36

Changelog: https://vcs.pcre.org/pcre2/code/tags/pcre2-10.36/ChangeLog?view=markup
This commit is contained in:
Rémi Verschelde
2021-01-08 14:10:32 +01:00
parent 9241aebecd
commit 951ad29c0f
50 changed files with 8400 additions and 16557 deletions

View File

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2019 University of Cambridge
New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -1202,7 +1202,7 @@ in the decoded tables. */
if ((code->flags & PCRE2_DEREF_TABLES) != 0)
{
ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
ref_count = (PCRE2_SIZE *)(code->tables + TABLES_LENGTH);
(*ref_count)++;
}
@@ -1232,15 +1232,15 @@ if (newcode == NULL) return NULL;
memcpy(newcode, code, code->blocksize);
newcode->executable_jit = NULL;
newtables = code->memctl.malloc(tables_length + sizeof(PCRE2_SIZE),
newtables = code->memctl.malloc(TABLES_LENGTH + sizeof(PCRE2_SIZE),
code->memctl.memory_data);
if (newtables == NULL)
{
code->memctl.free((void *)newcode, code->memctl.memory_data);
return NULL;
}
memcpy(newtables, code->tables, tables_length);
ref_count = (PCRE2_SIZE *)(newtables + tables_length);
memcpy(newtables, code->tables, TABLES_LENGTH);
ref_count = (PCRE2_SIZE *)(newtables + TABLES_LENGTH);
*ref_count = 1;
newcode->tables = newtables;
@@ -1270,7 +1270,7 @@ if (code != NULL)
be freed when there are no more references to them. The *ref_count should
always be > 0. */
ref_count = (PCRE2_SIZE *)(code->tables + tables_length);
ref_count = (PCRE2_SIZE *)(code->tables + TABLES_LENGTH);
if (*ref_count > 0)
{
(*ref_count)--;
@@ -2344,7 +2344,7 @@ if (ptr > *nameptr + MAX_NAME_SIZE)
*errorcodeptr = ERR48;
goto FAILED;
}
*namelenptr = ptr - *nameptr;
*namelenptr = (uint32_t)(ptr - *nameptr);
/* Subpattern names must not be empty, and their terminator is checked here.
(What follows a verb or alpha assertion name is checked separately.) */
@@ -3653,7 +3653,7 @@ while (ptr < ptrend)
if (ptr >= ptrend) goto UNCLOSED_PARENTHESIS;
/* If ( is not followed by ? it is either a capture or a special verb or an
alpha assertion. */
alpha assertion or a positive non-atomic lookahead. */
if (*ptr != CHAR_QUESTION_MARK)
{
@@ -3685,10 +3685,10 @@ while (ptr < ptrend)
break;
/* Handle "alpha assertions" such as (*pla:...). Most of these are
synonyms for the historical symbolic assertions, but the script run ones
are new. They are distinguished by starting with a lower case letter.
Checking both ends of the alphabet makes this work in all character
codes. */
synonyms for the historical symbolic assertions, but the script run and
non-atomic lookaround ones are new. They are distinguished by starting
with a lower case letter. Checking both ends of the alphabet makes this
work in all character codes. */
else if (CHMAX_255(c) && (cb->ctypes[c] & ctype_lcletter) != 0)
{
@@ -3747,9 +3747,7 @@ while (ptr < ptrend)
goto POSITIVE_LOOK_AHEAD;
case META_LOOKAHEAD_NA:
*parsed_pattern++ = meta;
ptr++;
goto POST_ASSERTION;
goto POSITIVE_NONATOMIC_LOOK_AHEAD;
case META_LOOKAHEADNOT:
goto NEGATIVE_LOOK_AHEAD;
@@ -4333,6 +4331,7 @@ while (ptr < ptrend)
{
if (++ptr >= ptrend || !IS_DIGIT(*ptr)) goto BAD_VERSION_CONDITION;
minor = (*ptr++ - CHAR_0) * 10;
if (ptr >= ptrend) goto BAD_VERSION_CONDITION;
if (IS_DIGIT(*ptr)) minor += *ptr++ - CHAR_0;
if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS)
goto BAD_VERSION_CONDITION;
@@ -4438,6 +4437,12 @@ while (ptr < ptrend)
ptr++;
goto POST_ASSERTION;
case CHAR_ASTERISK:
POSITIVE_NONATOMIC_LOOK_AHEAD: /* Come from (?* */
*parsed_pattern++ = META_LOOKAHEAD_NA;
ptr++;
goto POST_ASSERTION;
case CHAR_EXCLAMATION_MARK:
NEGATIVE_LOOK_AHEAD: /* Come from (*nla: */
*parsed_pattern++ = META_LOOKAHEADNOT;
@@ -4447,20 +4452,23 @@ while (ptr < ptrend)
/* ---- Lookbehind assertions ---- */
/* (?< followed by = or ! is a lookbehind assertion. Otherwise (?< is the
start of the name of a capturing group. */
/* (?< followed by = or ! or * is a lookbehind assertion. Otherwise (?<
is the start of the name of a capturing group. */
case CHAR_LESS_THAN_SIGN:
if (ptrend - ptr <= 1 ||
(ptr[1] != CHAR_EQUALS_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK))
(ptr[1] != CHAR_EQUALS_SIGN &&
ptr[1] != CHAR_EXCLAMATION_MARK &&
ptr[1] != CHAR_ASTERISK))
{
terminator = CHAR_GREATER_THAN_SIGN;
goto DEFINE_NAME;
}
*parsed_pattern++ = (ptr[1] == CHAR_EQUALS_SIGN)?
META_LOOKBEHIND : META_LOOKBEHINDNOT;
META_LOOKBEHIND : (ptr[1] == CHAR_EXCLAMATION_MARK)?
META_LOOKBEHINDNOT : META_LOOKBEHIND_NA;
POST_LOOKBEHIND: /* Come from (*plb: (*naplb: and (*nlb: */
POST_LOOKBEHIND: /* Come from (*plb: (*naplb: and (*nlb: */
*has_lookbehind = TRUE;
offset = (PCRE2_SIZE)(ptr - cb->start_pattern - 2);
PUTOFFSET(offset, parsed_pattern);
@@ -4633,8 +4641,6 @@ while (ptr < ptrend)
*parsed_pattern++ = META_KET;
}
if (top_nest == (nest_save *)(cb->start_workspace)) top_nest = NULL;
else top_nest--;
}
@@ -4899,7 +4905,7 @@ range. */
if ((options & PCRE2_CASELESS) != 0)
{
#ifdef SUPPORT_UNICODE
if ((options & PCRE2_UTF) != 0)
if ((options & (PCRE2_UTF|PCRE2_UCP)) != 0)
{
int rc;
uint32_t oc, od;
@@ -5314,7 +5320,8 @@ dynamically as we process the pattern. */
#ifdef SUPPORT_UNICODE
BOOL utf = (options & PCRE2_UTF) != 0;
#else /* No UTF support */
BOOL ucp = (options & PCRE2_UCP) != 0;
#else /* No Unicode support */
BOOL utf = FALSE;
#endif
@@ -5559,12 +5566,12 @@ for (;; pptr++)
zerofirstcu = firstcu;
zerofirstcuflags = firstcuflags;
/* For caseless UTF mode, check whether this character has more than
one other case. If so, generate a special OP_NOTPROP item instead of
/* For caseless UTF or UCP mode, check whether this character has more
than one other case. If so, generate a special OP_NOTPROP item instead of
OP_NOTI. */
#ifdef SUPPORT_UNICODE
if (utf && (options & PCRE2_CASELESS) != 0 &&
if ((utf||ucp) && (options & PCRE2_CASELESS) != 0 &&
(d = UCD_CASESET(c)) != 0)
{
*code++ = OP_NOTPROP;
@@ -5597,7 +5604,7 @@ for (;; pptr++)
uint32_t d;
#ifdef SUPPORT_UNICODE
if (utf && c > 127) d = UCD_OTHERCASE(c); else
if ((utf || ucp) && c > 127) d = UCD_OTHERCASE(c); else
#endif
{
#if PCRE2_CODE_UNIT_WIDTH != 8
@@ -6671,23 +6678,11 @@ for (;; pptr++)
}
/* For a back reference, update the back reference map and the
maximum back reference. Then, for each group, we must check to
see if it is recursive, that is, it is inside the group that it
references. A flag is set so that the group can be made atomic.
*/
maximum back reference. */
cb->backref_map |= (groupnumber < 32)? (1u << groupnumber) : 1;
if (groupnumber > cb->top_backref)
cb->top_backref = groupnumber;
for (oc = cb->open_caps; oc != NULL; oc = oc->next)
{
if (oc->number == groupnumber)
{
oc->flag = TRUE;
break;
}
}
}
}
@@ -7081,15 +7076,18 @@ for (;; pptr++)
previous[GET(previous, 1)] != OP_ALT)
goto END_REPEAT;
/* There is no sense in actually repeating assertions. The only
potential use of repetition is in cases when the assertion is optional.
Therefore, if the minimum is greater than zero, just ignore the repeat.
If the maximum is not zero or one, set it to 1. */
/* Perl allows all assertions to be quantified, and when they contain
capturing parentheses and/or are optional there are potential uses for
this feature. PCRE2 used to force the maximum quantifier to 1 on the
invalid grounds that further repetition was never useful. This was
always a bit pointless, since an assertion could be wrapped with a
repeated group to achieve the effect. General repetition is now
permitted, but if the maximum is unlimited it is set to one more than
the minimum. */
if (op_previous < OP_ONCE) /* Assertion */
{
if (repeat_min > 0) goto END_REPEAT;
if (repeat_max > 1) repeat_max = 1;
if (repeat_max == REPEAT_UNLIMITED) repeat_max = repeat_min + 1;
}
/* The case of a zero minimum is special because of the need to stick
@@ -7682,19 +7680,6 @@ for (;; pptr++)
cb->backref_map |= (meta_arg < 32)? (1u << meta_arg) : 1;
if (meta_arg > cb->top_backref) cb->top_backref = meta_arg;
/* Check to see if this back reference is recursive, that it, it
is inside the group that it references. A flag is set so that the
group can be made atomic. */
for (oc = cb->open_caps; oc != NULL; oc = oc->next)
{
if (oc->number == meta_arg)
{
oc->flag = TRUE;
break;
}
}
break;
@@ -7840,11 +7825,12 @@ for (;; pptr++)
NORMAL_CHAR_SET: /* Character is already in meta */
matched_char = TRUE;
/* For caseless UTF mode, check whether this character has more than one
other case. If so, generate a special OP_PROP item instead of OP_CHARI. */
/* For caseless UTF or UCP mode, check whether this character has more than
one other case. If so, generate a special OP_PROP item instead of OP_CHARI.
*/
#ifdef SUPPORT_UNICODE
if (utf && (options & PCRE2_CASELESS) != 0)
if ((utf||ucp) && (options & PCRE2_CASELESS) != 0)
{
uint32_t caseset = UCD_CASESET(meta);
if (caseset != 0)
@@ -8053,7 +8039,6 @@ if (*code == OP_CBRA)
capnumber = GET2(code, 1 + LINK_SIZE);
capitem.number = capnumber;
capitem.next = cb->open_caps;
capitem.flag = FALSE;
capitem.assert_depth = cb->assert_depth;
cb->open_caps = &capitem;
}
@@ -8182,26 +8167,9 @@ for (;;)
PUT(code, 1, (int)(code - start_bracket));
code += 1 + LINK_SIZE;
/* If it was a capturing subpattern, check to see if it contained any
recursive back references. If so, we must wrap it in atomic brackets. In
any event, remove the block from the chain. */
/* If it was a capturing subpattern, remove the block from the chain. */
if (capnumber > 0)
{
if (cb->open_caps->flag)
{
(void)memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
CU2BYTES(code - start_bracket));
*start_bracket = OP_ONCE;
code += 1 + LINK_SIZE;
PUT(start_bracket, 1, (int)(code - start_bracket));
*code = OP_KET;
PUT(code, 1, (int)(code - start_bracket));
code += 1 + LINK_SIZE;
length += 2 + 2*LINK_SIZE;
}
cb->open_caps = cb->open_caps->next;
}
if (capnumber > 0) cb->open_caps = cb->open_caps->next;
/* Set values to pass back */
@@ -8836,9 +8804,10 @@ memset(slot + IMM2_SIZE + length, 0,
/* This function is called to skip parts of the parsed pattern when finding the
length of a lookbehind branch. It is called after (*ACCEPT) and (*FAIL) to find
the end of the branch, it is called to skip over an internal lookaround, and it
is also called to skip to the end of a class, during which it will never
encounter nested groups (but there's no need to have special code for that).
the end of the branch, it is called to skip over an internal lookaround or
(DEFINE) group, and it is also called to skip to the end of a class, during
which it will never encounter nested groups (but there's no need to have
special code for that).
When called to find the end of a branch or group, pptr must point to the first
meta code inside the branch, not the branch-starting code. In other cases it
@@ -9316,14 +9285,21 @@ for (;; pptr++)
itemlength = grouplength;
break;
/* Check nested groups - advance past the initial data for each type and
then seek a fixed length with get_grouplength(). */
/* A (DEFINE) group is never obeyed inline and so it does not contribute to
the length of this branch. Skip from the following item to the next
unpaired ket. */
case META_COND_DEFINE:
pptr = parsed_skip(pptr + 1, PSKIP_KET);
break;
/* Check other nested groups - advance past the initial data for each type
and then seek a fixed length with get_grouplength(). */
case META_COND_NAME:
case META_COND_NUMBER:
case META_COND_RNAME:
case META_COND_RNUMBER:
case META_COND_DEFINE:
pptr += 2 + SIZEOFFSET;
goto CHECK_GROUP;
@@ -9580,6 +9556,10 @@ for (; *pptr != META_END; pptr++)
break;
case META_COND_DEFINE:
pptr += SIZEOFFSET;
nestlevel++;
break;
case META_COND_NAME:
case META_COND_NUMBER:
case META_COND_RNAME:
@@ -9660,6 +9640,7 @@ pcre2_compile(PCRE2_SPTR pattern, PCRE2_SIZE patlen, uint32_t options,
int *errorptr, PCRE2_SIZE *erroroffset, pcre2_compile_context *ccontext)
{
BOOL utf; /* Set TRUE for UTF mode */
BOOL ucp; /* Set TRUE for UCP mode */
BOOL has_lookbehind = FALSE; /* Set TRUE if a lookbehind is found */
BOOL zero_terminated; /* Set TRUE for zero-terminated pattern */
pcre2_real_code *re = NULL; /* What we will return */
@@ -9947,8 +9928,8 @@ if (utf)
/* Check UCP lockout. */
if ((cb.external_options & (PCRE2_UCP|PCRE2_NEVER_UCP)) ==
(PCRE2_UCP|PCRE2_NEVER_UCP))
ucp = (cb.external_options & PCRE2_UCP) != 0;
if (ucp && (cb.external_options & PCRE2_NEVER_UCP) != 0)
{
errorcode = ERR75;
goto HAD_EARLY_ERROR;
@@ -10324,7 +10305,7 @@ function call. */
if (errorcode == 0 && (re->overall_options & PCRE2_NO_AUTO_POSSESS) == 0)
{
PCRE2_UCHAR *temp = (PCRE2_UCHAR *)codestart;
if (PRIV(auto_possessify)(temp, utf, &cb) != 0) errorcode = ERR80;
if (PRIV(auto_possessify)(temp, &cb) != 0) errorcode = ERR80;
}
/* Failed to compile, or error while post-processing. */
@@ -10372,21 +10353,25 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
if ((firstcuflags & REQ_CASELESS) != 0)
{
if (firstcu < 128 || (!utf && firstcu < 255))
if (firstcu < 128 || (!utf && !ucp && firstcu < 255))
{
if (cb.fcc[firstcu] != firstcu) re->flags |= PCRE2_FIRSTCASELESS;
}
/* The first code unit is > 128 in UTF mode, or > 255 otherwise. In
8-bit UTF mode, codepoints in the range 128-255 are introductory code
points and cannot have another case. In 16-bit and 32-bit modes, we can
check wide characters when UTF (and therefore UCP) is supported. */
/* The first code unit is > 128 in UTF or UCP mode, or > 255 otherwise.
In 8-bit UTF mode, codepoints in the range 128-255 are introductory code
points and cannot have another case, but if UCP is set they may do. */
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
else if (firstcu <= MAX_UTF_CODE_POINT &&
#ifdef SUPPORT_UNICODE
#if PCRE2_CODE_UNIT_WIDTH == 8
else if (ucp && !utf && UCD_OTHERCASE(firstcu) != firstcu)
re->flags |= PCRE2_FIRSTCASELESS;
#else
else if ((utf || ucp) && firstcu <= MAX_UTF_CODE_POINT &&
UCD_OTHERCASE(firstcu) != firstcu)
re->flags |= PCRE2_FIRSTCASELESS;
#endif
#endif /* SUPPORT_UNICODE */
}
}
@@ -10435,14 +10420,20 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
if ((reqcuflags & REQ_CASELESS) != 0)
{
if (reqcu < 128 || (!utf && reqcu < 255))
if (reqcu < 128 || (!utf && !ucp && reqcu < 255))
{
if (cb.fcc[reqcu] != reqcu) re->flags |= PCRE2_LASTCASELESS;
}
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8
else if (reqcu <= MAX_UTF_CODE_POINT && UCD_OTHERCASE(reqcu) != reqcu)
re->flags |= PCRE2_LASTCASELESS;
#ifdef SUPPORT_UNICODE
#if PCRE2_CODE_UNIT_WIDTH == 8
else if (ucp && !utf && UCD_OTHERCASE(reqcu) != reqcu)
re->flags |= PCRE2_LASTCASELESS;
#else
else if ((utf || ucp) && reqcu <= MAX_UTF_CODE_POINT &&
UCD_OTHERCASE(reqcu) != reqcu)
re->flags |= PCRE2_LASTCASELESS;
#endif
#endif /* SUPPORT_UNICODE */
}
}
}