1
0
mirror of https://github.com/godotengine/godot.git synced 2025-11-27 15:57:02 +00:00

pcre2: Update to upstream version 10.36

Changelog: https://vcs.pcre.org/pcre2/code/tags/pcre2-10.36/ChangeLog?view=markup
This commit is contained in:
Rémi Verschelde
2021-01-08 14:10:32 +01:00
parent 9241aebecd
commit 951ad29c0f
50 changed files with 8400 additions and 16557 deletions

View File

@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
New API code Copyright (c) 2016-2019 University of Cambridge
New API code Copyright (c) 2016-2020 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -49,8 +49,9 @@ POSSIBILITY OF SUCH DAMAGE.
#define SUBSTITUTE_OPTIONS \
(PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_UNKNOWN_UNSET| \
PCRE2_SUBSTITUTE_UNSET_EMPTY)
PCRE2_SUBSTITUTE_LITERAL|PCRE2_SUBSTITUTE_MATCHED| \
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_REPLACEMENT_ONLY| \
PCRE2_SUBSTITUTE_UNKNOWN_UNSET|PCRE2_SUBSTITUTE_UNSET_EMPTY)
@@ -194,6 +195,7 @@ overflow, either give an error immediately, or keep on, accumulating the
length. */
#define CHECKMEMCPY(from,length) \
{ \
if (!overflowed && lengthleft < length) \
{ \
if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
@@ -209,7 +211,8 @@ length. */
memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
buff_offset += length; \
lengthleft -= length; \
}
} \
}
/* Here's the function */
@@ -226,11 +229,14 @@ int forcecasereset = 0;
uint32_t ovector_count;
uint32_t goptions = 0;
uint32_t suboptions;
BOOL match_data_created = FALSE;
BOOL literal = FALSE;
pcre2_match_data *internal_match_data = NULL;
BOOL escaped_literal = FALSE;
BOOL overflowed = FALSE;
BOOL use_existing_match;
BOOL replacement_only;
#ifdef SUPPORT_UNICODE
BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
BOOL ucp = (code->overall_options & PCRE2_UCP) != 0;
#endif
PCRE2_UCHAR temp[6];
PCRE2_SPTR ptr;
@@ -248,23 +254,54 @@ lengthleft = buff_length = *blength;
*blength = PCRE2_UNSET;
ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
/* Partial matching is not valid. This must come after setting *blength to
/* Partial matching is not valid. This must come after setting *blength to
PCRE2_UNSET, so as not to imply an offset in the replacement. */
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
return PCRE2_ERROR_BADOPTION;
/* If no match data block is provided, create one. */
/* Check for using a match that has already happened. Note that the subject
pointer in the match data may be NULL after a no-match. */
use_existing_match = ((options & PCRE2_SUBSTITUTE_MATCHED) != 0);
replacement_only = ((options & PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) != 0);
/* If starting from an existing match, there must be an externally provided
match data block. We create an internal match_data block in two cases: (a) an
external one is not supplied (and we are not starting from an existing match);
(b) an existing match is to be used for the first substitution. In the latter
case, we copy the existing match into the internal block. This ensures that no
changes are made to the existing match data block. */
if (match_data == NULL)
{
pcre2_general_context *gcontext;
if (use_existing_match) return PCRE2_ERROR_NULL;
gcontext = (mcontext == NULL)?
(pcre2_general_context *)code :
(pcre2_general_context *)mcontext;
match_data = internal_match_data =
pcre2_match_data_create_from_pattern(code, gcontext);
if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
}
else if (use_existing_match)
{
pcre2_general_context *gcontext = (mcontext == NULL)?
(pcre2_general_context *)code :
(pcre2_general_context *)mcontext;
match_data = pcre2_match_data_create_from_pattern(code, gcontext);
if (match_data == NULL) return PCRE2_ERROR_NOMEMORY;
match_data_created = TRUE;
int pairs = (code->top_bracket + 1 < match_data->oveccount)?
code->top_bracket + 1 : match_data->oveccount;
internal_match_data = pcre2_match_data_create(match_data->oveccount,
gcontext);
if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
memcpy(internal_match_data, match_data, offsetof(pcre2_match_data, ovector)
+ 2*pairs*sizeof(PCRE2_SIZE));
match_data = internal_match_data;
}
/* Remember ovector details */
ovector = pcre2_get_ovector_pointer(match_data);
ovector_count = pcre2_get_ovector_count(match_data);
@@ -286,7 +323,7 @@ repend = replacement + rlength;
#ifdef SUPPORT_UNICODE
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
{
rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
rc = PRIV(valid_utf)(replacement, rlength, &(match_data->startchar));
if (rc != 0)
{
match_data->leftchar = 0;
@@ -300,7 +337,7 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
suboptions = options & SUBSTITUTE_OPTIONS;
options &= ~SUBSTITUTE_OPTIONS;
/* Copy up to the start offset */
/* Error if the start match offset is greater than the length of the subject. */
if (start_offset > length)
{
@@ -308,9 +345,13 @@ if (start_offset > length)
rc = PCRE2_ERROR_BADOFFSET;
goto EXIT;
}
CHECKMEMCPY(subject, start_offset);
/* Loop for global substituting. */
/* Copy up to the start offset, unless only the replacement is required. */
if (!replacement_only) CHECKMEMCPY(subject, start_offset);
/* Loop for global substituting. If PCRE2_SUBSTITUTE_MATCHED is set, the first
match is taken from the match_data that was passed in. */
subs = 0;
do
@@ -318,7 +359,12 @@ do
PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
uint32_t ptrstackptr = 0;
rc = pcre2_match(code, subject, length, start_offset, options|goptions,
if (use_existing_match)
{
rc = match_data->rc;
use_existing_match = FALSE;
}
else rc = pcre2_match(code, subject, length, start_offset, options|goptions,
match_data, mcontext);
#ifdef SUPPORT_UNICODE
@@ -364,44 +410,44 @@ do
#endif
}
/* Copy what we have advanced past, reset the special global options, and
continue to the next match. */
/* Copy what we have advanced past (unless not required), reset the special
global options, and continue to the next match. */
fraglength = start_offset - save_start;
CHECKMEMCPY(subject + save_start, fraglength);
if (!replacement_only) CHECKMEMCPY(subject + save_start, fraglength);
goptions = 0;
continue;
}
/* Handle a successful match. Matches that use \K to end before they start
or start before the current point in the subject are not supported. */
if (ovector[1] < ovector[0] || ovector[0] < start_offset)
{
rc = PCRE2_ERROR_BADSUBSPATTERN;
goto EXIT;
}
/* Check for the same match as previous. This is legitimate after matching an
/* Check for the same match as previous. This is legitimate after matching an
empty string that starts after the initial match offset. We have tried again
at the match point in case the pattern is one like /(?<=\G.)/ which can never
match at its starting point, so running the match achieves the bumpalong. If
we do get the same (null) match at the original match point, it isn't such a
pattern, so we now do the empty string magic. In all other cases, a repeat
match should never occur. */
if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
{
if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
{
goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
ovecsave[2] = start_offset;
continue; /* Back to the top of the loop */
{
if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
{
goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
ovecsave[2] = start_offset;
continue; /* Back to the top of the loop */
}
rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
goto EXIT;
}
goto EXIT;
}
/* Count substitutions with a paranoid check for integer overflow; surely no
real call to this function would ever hit this! */
@@ -412,21 +458,30 @@ do
}
subs++;
/* Copy the text leading up to the match, and remember where the insert
begins and how many ovector pairs are set. */
/* Copy the text leading up to the match (unless not required), and remember
where the insert begins and how many ovector pairs are set. */
if (rc == 0) rc = ovector_count;
fraglength = ovector[0] - start_offset;
CHECKMEMCPY(subject + start_offset, fraglength);
if (!replacement_only) CHECKMEMCPY(subject + start_offset, fraglength);
scb.output_offsets[0] = buff_offset;
scb.oveccount = rc;
/* Process the replacement string. Literal mode is set by \Q, but only in
extended mode when backslashes are being interpreted. In extended mode we
must handle nested substrings that are to be reprocessed. */
/* Process the replacement string. If the entire replacement is literal, just
copy it with length check. */
ptr = replacement;
for (;;)
if ((suboptions & PCRE2_SUBSTITUTE_LITERAL) != 0)
{
CHECKMEMCPY(ptr, rlength);
}
/* Within a non-literal replacement, which must be scanned character by
character, local literal mode can be set by \Q, but only in extended mode
when backslashes are being interpreted. In extended mode we must handle
nested substrings that are to be reprocessed. */
else for (;;)
{
uint32_t ch;
unsigned int chlen;
@@ -443,11 +498,11 @@ do
/* Handle the next character */
if (literal)
if (escaped_literal)
{
if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
{
literal = FALSE;
escaped_literal = FALSE;
ptr += 2;
continue;
}
@@ -704,7 +759,7 @@ do
if (forcecase != 0)
{
#ifdef SUPPORT_UNICODE
if (utf)
if (utf || ucp)
{
uint32_t type = UCD_CHARTYPE(ch);
if (PRIV(ucp_gentype)[type] == ucp_L &&
@@ -784,7 +839,7 @@ do
continue;
case ESC_Q:
literal = TRUE;
escaped_literal = TRUE;
continue;
case 0: /* Data character */
@@ -806,7 +861,7 @@ do
if (forcecase != 0)
{
#ifdef SUPPORT_UNICODE
if (utf)
if (utf || ucp)
{
uint32_t type = UCD_CHARTYPE(ch);
if (PRIV(ucp_gentype)[type] == ucp_L &&
@@ -835,53 +890,59 @@ do
} /* End handling a literal code unit */
} /* End of loop for scanning the replacement. */
/* The replacement has been copied to the output, or its size has been
remembered. Do the callout if there is one and we have done an actual
/* The replacement has been copied to the output, or its size has been
remembered. Do the callout if there is one and we have done an actual
replacement. */
if (!overflowed && mcontext != NULL && mcontext->substitute_callout != NULL)
{
scb.subscount = subs;
scb.subscount = subs;
scb.output_offsets[1] = buff_offset;
rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data);
rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data);
/* A non-zero return means cancel this substitution. Instead, copy the
/* A non-zero return means cancel this substitution. Instead, copy the
matched string fragment. */
if (rc != 0)
{
PCRE2_SIZE newlength = scb.output_offsets[1] - scb.output_offsets[0];
PCRE2_SIZE oldlength = ovector[1] - ovector[0];
buff_offset -= newlength;
lengthleft += newlength;
CHECKMEMCPY(subject + ovector[0], oldlength);
if (!replacement_only) CHECKMEMCPY(subject + ovector[0], oldlength);
/* A negative return means do not do any more. */
if (rc < 0) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL);
}
}
}
/* Save the details of this match. See above for how this data is used. If we
matched an empty string, do the magic for global matches. Finally, update the
start offset to point to the rest of the subject string. */
ovecsave[0] = ovector[0];
ovecsave[1] = ovector[1];
matched an empty string, do the magic for global matches. Update the start
offset to point to the rest of the subject string. If we re-used an existing
match for the first match, switch to the internal match data block. */
ovecsave[0] = ovector[0];
ovecsave[1] = ovector[1];
ovecsave[2] = start_offset;
goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 :
PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
start_offset = ovector[1];
} while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */
/* Copy the rest of the subject. */
/* Copy the rest of the subject unless not required, and terminate the output
with a binary zero. */
if (!replacement_only)
{
fraglength = length - start_offset;
CHECKMEMCPY(subject + start_offset, fraglength);
}
fraglength = length - start_offset;
CHECKMEMCPY(subject + start_offset, fraglength);
temp[0] = 0;
CHECKMEMCPY(temp , 1);
CHECKMEMCPY(temp, 1);
/* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
and matching has carried on after a full buffer, in order to compute the length
@@ -903,7 +964,7 @@ else
}
EXIT:
if (match_data_created) pcre2_match_data_free(match_data);
if (internal_match_data != NULL) pcre2_match_data_free(internal_match_data);
else match_data->rc = rc;
return rc;