You've already forked godot
mirror of
https://github.com/godotengine/godot.git
synced 2025-11-27 15:57:02 +00:00
pcre2: Update to upstream version 10.36
Changelog: https://vcs.pcre.org/pcre2/code/tags/pcre2-10.36/ChangeLog?view=markup
This commit is contained in:
195
thirdparty/pcre2/src/pcre2_substitute.c
vendored
195
thirdparty/pcre2/src/pcre2_substitute.c
vendored
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2019 University of Cambridge
|
||||
New API code Copyright (c) 2016-2020 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -49,8 +49,9 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#define SUBSTITUTE_OPTIONS \
|
||||
(PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
|
||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_UNKNOWN_UNSET| \
|
||||
PCRE2_SUBSTITUTE_UNSET_EMPTY)
|
||||
PCRE2_SUBSTITUTE_LITERAL|PCRE2_SUBSTITUTE_MATCHED| \
|
||||
PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_REPLACEMENT_ONLY| \
|
||||
PCRE2_SUBSTITUTE_UNKNOWN_UNSET|PCRE2_SUBSTITUTE_UNSET_EMPTY)
|
||||
|
||||
|
||||
|
||||
@@ -194,6 +195,7 @@ overflow, either give an error immediately, or keep on, accumulating the
|
||||
length. */
|
||||
|
||||
#define CHECKMEMCPY(from,length) \
|
||||
{ \
|
||||
if (!overflowed && lengthleft < length) \
|
||||
{ \
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
|
||||
@@ -209,7 +211,8 @@ length. */
|
||||
memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
|
||||
buff_offset += length; \
|
||||
lengthleft -= length; \
|
||||
}
|
||||
} \
|
||||
}
|
||||
|
||||
/* Here's the function */
|
||||
|
||||
@@ -226,11 +229,14 @@ int forcecasereset = 0;
|
||||
uint32_t ovector_count;
|
||||
uint32_t goptions = 0;
|
||||
uint32_t suboptions;
|
||||
BOOL match_data_created = FALSE;
|
||||
BOOL literal = FALSE;
|
||||
pcre2_match_data *internal_match_data = NULL;
|
||||
BOOL escaped_literal = FALSE;
|
||||
BOOL overflowed = FALSE;
|
||||
BOOL use_existing_match;
|
||||
BOOL replacement_only;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
|
||||
BOOL ucp = (code->overall_options & PCRE2_UCP) != 0;
|
||||
#endif
|
||||
PCRE2_UCHAR temp[6];
|
||||
PCRE2_SPTR ptr;
|
||||
@@ -248,23 +254,54 @@ lengthleft = buff_length = *blength;
|
||||
*blength = PCRE2_UNSET;
|
||||
ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
|
||||
|
||||
/* Partial matching is not valid. This must come after setting *blength to
|
||||
/* Partial matching is not valid. This must come after setting *blength to
|
||||
PCRE2_UNSET, so as not to imply an offset in the replacement. */
|
||||
|
||||
if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
|
||||
return PCRE2_ERROR_BADOPTION;
|
||||
|
||||
/* If no match data block is provided, create one. */
|
||||
/* Check for using a match that has already happened. Note that the subject
|
||||
pointer in the match data may be NULL after a no-match. */
|
||||
|
||||
use_existing_match = ((options & PCRE2_SUBSTITUTE_MATCHED) != 0);
|
||||
replacement_only = ((options & PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) != 0);
|
||||
|
||||
/* If starting from an existing match, there must be an externally provided
|
||||
match data block. We create an internal match_data block in two cases: (a) an
|
||||
external one is not supplied (and we are not starting from an existing match);
|
||||
(b) an existing match is to be used for the first substitution. In the latter
|
||||
case, we copy the existing match into the internal block. This ensures that no
|
||||
changes are made to the existing match data block. */
|
||||
|
||||
if (match_data == NULL)
|
||||
{
|
||||
pcre2_general_context *gcontext;
|
||||
if (use_existing_match) return PCRE2_ERROR_NULL;
|
||||
gcontext = (mcontext == NULL)?
|
||||
(pcre2_general_context *)code :
|
||||
(pcre2_general_context *)mcontext;
|
||||
match_data = internal_match_data =
|
||||
pcre2_match_data_create_from_pattern(code, gcontext);
|
||||
if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
}
|
||||
|
||||
else if (use_existing_match)
|
||||
{
|
||||
pcre2_general_context *gcontext = (mcontext == NULL)?
|
||||
(pcre2_general_context *)code :
|
||||
(pcre2_general_context *)mcontext;
|
||||
match_data = pcre2_match_data_create_from_pattern(code, gcontext);
|
||||
if (match_data == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
match_data_created = TRUE;
|
||||
int pairs = (code->top_bracket + 1 < match_data->oveccount)?
|
||||
code->top_bracket + 1 : match_data->oveccount;
|
||||
internal_match_data = pcre2_match_data_create(match_data->oveccount,
|
||||
gcontext);
|
||||
if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY;
|
||||
memcpy(internal_match_data, match_data, offsetof(pcre2_match_data, ovector)
|
||||
+ 2*pairs*sizeof(PCRE2_SIZE));
|
||||
match_data = internal_match_data;
|
||||
}
|
||||
|
||||
/* Remember ovector details */
|
||||
|
||||
ovector = pcre2_get_ovector_pointer(match_data);
|
||||
ovector_count = pcre2_get_ovector_count(match_data);
|
||||
|
||||
@@ -286,7 +323,7 @@ repend = replacement + rlength;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||
{
|
||||
rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
|
||||
rc = PRIV(valid_utf)(replacement, rlength, &(match_data->startchar));
|
||||
if (rc != 0)
|
||||
{
|
||||
match_data->leftchar = 0;
|
||||
@@ -300,7 +337,7 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
|
||||
suboptions = options & SUBSTITUTE_OPTIONS;
|
||||
options &= ~SUBSTITUTE_OPTIONS;
|
||||
|
||||
/* Copy up to the start offset */
|
||||
/* Error if the start match offset is greater than the length of the subject. */
|
||||
|
||||
if (start_offset > length)
|
||||
{
|
||||
@@ -308,9 +345,13 @@ if (start_offset > length)
|
||||
rc = PCRE2_ERROR_BADOFFSET;
|
||||
goto EXIT;
|
||||
}
|
||||
CHECKMEMCPY(subject, start_offset);
|
||||
|
||||
/* Loop for global substituting. */
|
||||
/* Copy up to the start offset, unless only the replacement is required. */
|
||||
|
||||
if (!replacement_only) CHECKMEMCPY(subject, start_offset);
|
||||
|
||||
/* Loop for global substituting. If PCRE2_SUBSTITUTE_MATCHED is set, the first
|
||||
match is taken from the match_data that was passed in. */
|
||||
|
||||
subs = 0;
|
||||
do
|
||||
@@ -318,7 +359,12 @@ do
|
||||
PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
|
||||
uint32_t ptrstackptr = 0;
|
||||
|
||||
rc = pcre2_match(code, subject, length, start_offset, options|goptions,
|
||||
if (use_existing_match)
|
||||
{
|
||||
rc = match_data->rc;
|
||||
use_existing_match = FALSE;
|
||||
}
|
||||
else rc = pcre2_match(code, subject, length, start_offset, options|goptions,
|
||||
match_data, mcontext);
|
||||
|
||||
#ifdef SUPPORT_UNICODE
|
||||
@@ -364,44 +410,44 @@ do
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Copy what we have advanced past, reset the special global options, and
|
||||
continue to the next match. */
|
||||
/* Copy what we have advanced past (unless not required), reset the special
|
||||
global options, and continue to the next match. */
|
||||
|
||||
fraglength = start_offset - save_start;
|
||||
CHECKMEMCPY(subject + save_start, fraglength);
|
||||
if (!replacement_only) CHECKMEMCPY(subject + save_start, fraglength);
|
||||
goptions = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle a successful match. Matches that use \K to end before they start
|
||||
or start before the current point in the subject are not supported. */
|
||||
|
||||
|
||||
if (ovector[1] < ovector[0] || ovector[0] < start_offset)
|
||||
{
|
||||
rc = PCRE2_ERROR_BADSUBSPATTERN;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* Check for the same match as previous. This is legitimate after matching an
|
||||
|
||||
/* Check for the same match as previous. This is legitimate after matching an
|
||||
empty string that starts after the initial match offset. We have tried again
|
||||
at the match point in case the pattern is one like /(?<=\G.)/ which can never
|
||||
match at its starting point, so running the match achieves the bumpalong. If
|
||||
we do get the same (null) match at the original match point, it isn't such a
|
||||
pattern, so we now do the empty string magic. In all other cases, a repeat
|
||||
match should never occur. */
|
||||
|
||||
|
||||
if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
|
||||
{
|
||||
if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
|
||||
{
|
||||
goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
|
||||
ovecsave[2] = start_offset;
|
||||
continue; /* Back to the top of the loop */
|
||||
{
|
||||
if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
|
||||
{
|
||||
goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
|
||||
ovecsave[2] = start_offset;
|
||||
continue; /* Back to the top of the loop */
|
||||
}
|
||||
rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
goto EXIT;
|
||||
}
|
||||
|
||||
/* Count substitutions with a paranoid check for integer overflow; surely no
|
||||
real call to this function would ever hit this! */
|
||||
|
||||
@@ -412,21 +458,30 @@ do
|
||||
}
|
||||
subs++;
|
||||
|
||||
/* Copy the text leading up to the match, and remember where the insert
|
||||
begins and how many ovector pairs are set. */
|
||||
/* Copy the text leading up to the match (unless not required), and remember
|
||||
where the insert begins and how many ovector pairs are set. */
|
||||
|
||||
if (rc == 0) rc = ovector_count;
|
||||
fraglength = ovector[0] - start_offset;
|
||||
CHECKMEMCPY(subject + start_offset, fraglength);
|
||||
if (!replacement_only) CHECKMEMCPY(subject + start_offset, fraglength);
|
||||
scb.output_offsets[0] = buff_offset;
|
||||
scb.oveccount = rc;
|
||||
|
||||
/* Process the replacement string. Literal mode is set by \Q, but only in
|
||||
extended mode when backslashes are being interpreted. In extended mode we
|
||||
must handle nested substrings that are to be reprocessed. */
|
||||
/* Process the replacement string. If the entire replacement is literal, just
|
||||
copy it with length check. */
|
||||
|
||||
ptr = replacement;
|
||||
for (;;)
|
||||
if ((suboptions & PCRE2_SUBSTITUTE_LITERAL) != 0)
|
||||
{
|
||||
CHECKMEMCPY(ptr, rlength);
|
||||
}
|
||||
|
||||
/* Within a non-literal replacement, which must be scanned character by
|
||||
character, local literal mode can be set by \Q, but only in extended mode
|
||||
when backslashes are being interpreted. In extended mode we must handle
|
||||
nested substrings that are to be reprocessed. */
|
||||
|
||||
else for (;;)
|
||||
{
|
||||
uint32_t ch;
|
||||
unsigned int chlen;
|
||||
@@ -443,11 +498,11 @@ do
|
||||
|
||||
/* Handle the next character */
|
||||
|
||||
if (literal)
|
||||
if (escaped_literal)
|
||||
{
|
||||
if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
|
||||
{
|
||||
literal = FALSE;
|
||||
escaped_literal = FALSE;
|
||||
ptr += 2;
|
||||
continue;
|
||||
}
|
||||
@@ -704,7 +759,7 @@ do
|
||||
if (forcecase != 0)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
if (utf || ucp)
|
||||
{
|
||||
uint32_t type = UCD_CHARTYPE(ch);
|
||||
if (PRIV(ucp_gentype)[type] == ucp_L &&
|
||||
@@ -784,7 +839,7 @@ do
|
||||
continue;
|
||||
|
||||
case ESC_Q:
|
||||
literal = TRUE;
|
||||
escaped_literal = TRUE;
|
||||
continue;
|
||||
|
||||
case 0: /* Data character */
|
||||
@@ -806,7 +861,7 @@ do
|
||||
if (forcecase != 0)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (utf)
|
||||
if (utf || ucp)
|
||||
{
|
||||
uint32_t type = UCD_CHARTYPE(ch);
|
||||
if (PRIV(ucp_gentype)[type] == ucp_L &&
|
||||
@@ -835,53 +890,59 @@ do
|
||||
} /* End handling a literal code unit */
|
||||
} /* End of loop for scanning the replacement. */
|
||||
|
||||
/* The replacement has been copied to the output, or its size has been
|
||||
remembered. Do the callout if there is one and we have done an actual
|
||||
/* The replacement has been copied to the output, or its size has been
|
||||
remembered. Do the callout if there is one and we have done an actual
|
||||
replacement. */
|
||||
|
||||
|
||||
if (!overflowed && mcontext != NULL && mcontext->substitute_callout != NULL)
|
||||
{
|
||||
scb.subscount = subs;
|
||||
scb.subscount = subs;
|
||||
scb.output_offsets[1] = buff_offset;
|
||||
rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data);
|
||||
rc = mcontext->substitute_callout(&scb, mcontext->substitute_callout_data);
|
||||
|
||||
/* A non-zero return means cancel this substitution. Instead, copy the
|
||||
/* A non-zero return means cancel this substitution. Instead, copy the
|
||||
matched string fragment. */
|
||||
|
||||
if (rc != 0)
|
||||
{
|
||||
PCRE2_SIZE newlength = scb.output_offsets[1] - scb.output_offsets[0];
|
||||
PCRE2_SIZE oldlength = ovector[1] - ovector[0];
|
||||
|
||||
|
||||
buff_offset -= newlength;
|
||||
lengthleft += newlength;
|
||||
CHECKMEMCPY(subject + ovector[0], oldlength);
|
||||
|
||||
if (!replacement_only) CHECKMEMCPY(subject + ovector[0], oldlength);
|
||||
|
||||
/* A negative return means do not do any more. */
|
||||
|
||||
|
||||
if (rc < 0) suboptions &= (~PCRE2_SUBSTITUTE_GLOBAL);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Save the details of this match. See above for how this data is used. If we
|
||||
matched an empty string, do the magic for global matches. Finally, update the
|
||||
start offset to point to the rest of the subject string. */
|
||||
|
||||
ovecsave[0] = ovector[0];
|
||||
ovecsave[1] = ovector[1];
|
||||
matched an empty string, do the magic for global matches. Update the start
|
||||
offset to point to the rest of the subject string. If we re-used an existing
|
||||
match for the first match, switch to the internal match data block. */
|
||||
|
||||
ovecsave[0] = ovector[0];
|
||||
ovecsave[1] = ovector[1];
|
||||
ovecsave[2] = start_offset;
|
||||
|
||||
|
||||
goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 :
|
||||
PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
|
||||
start_offset = ovector[1];
|
||||
} while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0); /* Repeat "do" loop */
|
||||
|
||||
/* Copy the rest of the subject. */
|
||||
/* Copy the rest of the subject unless not required, and terminate the output
|
||||
with a binary zero. */
|
||||
|
||||
if (!replacement_only)
|
||||
{
|
||||
fraglength = length - start_offset;
|
||||
CHECKMEMCPY(subject + start_offset, fraglength);
|
||||
}
|
||||
|
||||
fraglength = length - start_offset;
|
||||
CHECKMEMCPY(subject + start_offset, fraglength);
|
||||
temp[0] = 0;
|
||||
CHECKMEMCPY(temp , 1);
|
||||
CHECKMEMCPY(temp, 1);
|
||||
|
||||
/* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
|
||||
and matching has carried on after a full buffer, in order to compute the length
|
||||
@@ -903,7 +964,7 @@ else
|
||||
}
|
||||
|
||||
EXIT:
|
||||
if (match_data_created) pcre2_match_data_free(match_data);
|
||||
if (internal_match_data != NULL) pcre2_match_data_free(internal_match_data);
|
||||
else match_data->rc = rc;
|
||||
return rc;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user