You've already forked godot
mirror of
https://github.com/godotengine/godot.git
synced 2025-11-18 14:21:41 +00:00
pcre2: Update to upstream version 10.40
Changelog: https://github.com/PCRE2Project/pcre2/blob/pcre2-10.40/ChangeLog
This commit is contained in:
250
thirdparty/pcre2/src/pcre2_compile.c
vendored
250
thirdparty/pcre2/src/pcre2_compile.c
vendored
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2016-2021 University of Cambridge
|
||||
New API code Copyright (c) 2016-2022 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -124,7 +124,7 @@ static unsigned int
|
||||
|
||||
static int
|
||||
compile_regex(uint32_t, PCRE2_UCHAR **, uint32_t **, int *, uint32_t,
|
||||
uint32_t *, int32_t *, uint32_t *, int32_t *, branch_chain *,
|
||||
uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *,
|
||||
compile_block *, PCRE2_SIZE *);
|
||||
|
||||
static int
|
||||
@@ -385,13 +385,15 @@ compiler is clever with identical subexpressions. */
|
||||
|
||||
#define SETBIT(a,b) a[(b)/8] = (uint8_t)(a[(b)/8] | (1u << ((b)&7)))
|
||||
|
||||
/* Private flags added to firstcu and reqcu. */
|
||||
/* Values and flags for the unsigned xxcuflags variables that accompany xxcu
|
||||
variables, which are concerned with first and required code units. A value
|
||||
greater than or equal to REQ_NONE means "no code unit set"; otherwise the
|
||||
matching xxcu variable is set, and the low valued bits are relevant. */
|
||||
|
||||
#define REQ_CASELESS (1u << 0) /* Indicates caselessness */
|
||||
#define REQ_VARY (1u << 1) /* reqcu followed non-literal item */
|
||||
/* Negative values for the firstcu and reqcu flags */
|
||||
#define REQ_UNSET (-2) /* Not yet found anything */
|
||||
#define REQ_NONE (-1) /* Found not fixed char */
|
||||
#define REQ_UNSET 0xffffffffu /* Not yet found anything */
|
||||
#define REQ_NONE 0xfffffffeu /* Found not fixed character */
|
||||
#define REQ_CASELESS 0x00000001u /* Code unit in xxcu is caseless */
|
||||
#define REQ_VARY 0x00000002u /* Code unit is followed by non-literal */
|
||||
|
||||
/* These flags are used in the groupinfo vector. */
|
||||
|
||||
@@ -2088,7 +2090,9 @@ get_ucp(PCRE2_SPTR *ptrptr, BOOL *negptr, uint16_t *ptypeptr,
|
||||
PCRE2_UCHAR c;
|
||||
PCRE2_SIZE i, bot, top;
|
||||
PCRE2_SPTR ptr = *ptrptr;
|
||||
PCRE2_UCHAR name[32];
|
||||
PCRE2_UCHAR name[50];
|
||||
PCRE2_UCHAR *vptr = NULL;
|
||||
uint16_t ptscript = PT_NOTSCRIPT;
|
||||
|
||||
if (ptr >= cb->end_pattern) goto ERROR_RETURN;
|
||||
c = *ptr++;
|
||||
@@ -2100,36 +2104,95 @@ negation. */
|
||||
if (c == CHAR_LEFT_CURLY_BRACKET)
|
||||
{
|
||||
if (ptr >= cb->end_pattern) goto ERROR_RETURN;
|
||||
|
||||
if (*ptr == CHAR_CIRCUMFLEX_ACCENT)
|
||||
{
|
||||
*negptr = TRUE;
|
||||
ptr++;
|
||||
}
|
||||
|
||||
for (i = 0; i < (int)(sizeof(name) / sizeof(PCRE2_UCHAR)) - 1; i++)
|
||||
{
|
||||
if (ptr >= cb->end_pattern) goto ERROR_RETURN;
|
||||
c = *ptr++;
|
||||
while (c == '_' || c == '-' || isspace(c))
|
||||
{
|
||||
if (ptr >= cb->end_pattern) goto ERROR_RETURN;
|
||||
c = *ptr++;
|
||||
}
|
||||
if (c == CHAR_NUL) goto ERROR_RETURN;
|
||||
if (c == CHAR_RIGHT_CURLY_BRACKET) break;
|
||||
name[i] = c;
|
||||
name[i] = tolower(c);
|
||||
if ((c == ':' || c == '=') && vptr == NULL) vptr = name + i;
|
||||
}
|
||||
|
||||
if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN;
|
||||
name[i] = 0;
|
||||
}
|
||||
|
||||
/* Otherwise there is just one following character, which must be an ASCII
|
||||
letter. */
|
||||
/* If { doesn't follow \p or \P there is just one following character, which
|
||||
must be an ASCII letter. */
|
||||
|
||||
else if (MAX_255(c) && (cb->ctypes[c] & ctype_letter) != 0)
|
||||
{
|
||||
name[0] = c;
|
||||
name[0] = tolower(c);
|
||||
name[1] = 0;
|
||||
}
|
||||
else goto ERROR_RETURN;
|
||||
|
||||
*ptrptr = ptr;
|
||||
|
||||
/* Search for a recognized property name using binary chop. */
|
||||
/* If the property contains ':' or '=' we have class name and value separately
|
||||
specified. The following are supported:
|
||||
|
||||
. Bidi_Class (synonym bc), for which the property names are "bidi<name>".
|
||||
. Script (synonym sc) for which the property name is the script name
|
||||
. Script_Extensions (synonym scx), ditto
|
||||
|
||||
As this is a small number, we currently just check the names directly. If this
|
||||
grows, a sorted table and a switch will be neater.
|
||||
|
||||
For both the script properties, set a PT_xxx value so that (1) they can be
|
||||
distinguished and (2) invalid script names that happen to be the name of
|
||||
another property can be diagnosed. */
|
||||
|
||||
if (vptr != NULL)
|
||||
{
|
||||
int offset = 0;
|
||||
PCRE2_UCHAR sname[8];
|
||||
|
||||
*vptr = 0; /* Terminate property name */
|
||||
if (PRIV(strcmp_c8)(name, STRING_bidiclass) == 0 ||
|
||||
PRIV(strcmp_c8)(name, STRING_bc) == 0)
|
||||
{
|
||||
offset = 4;
|
||||
sname[0] = CHAR_b;
|
||||
sname[1] = CHAR_i; /* There is no strcpy_c8 function */
|
||||
sname[2] = CHAR_d;
|
||||
sname[3] = CHAR_i;
|
||||
}
|
||||
|
||||
else if (PRIV(strcmp_c8)(name, STRING_script) == 0 ||
|
||||
PRIV(strcmp_c8)(name, STRING_sc) == 0)
|
||||
ptscript = PT_SC;
|
||||
|
||||
else if (PRIV(strcmp_c8)(name, STRING_scriptextensions) == 0 ||
|
||||
PRIV(strcmp_c8)(name, STRING_scx) == 0)
|
||||
ptscript = PT_SCX;
|
||||
|
||||
else
|
||||
{
|
||||
*errorcodeptr = ERR47;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
/* Adjust the string in name[] as needed */
|
||||
|
||||
memmove(name + offset, vptr + 1, (name + i - vptr)*sizeof(PCRE2_UCHAR));
|
||||
if (offset != 0) memmove(name, sname, offset*sizeof(PCRE2_UCHAR));
|
||||
}
|
||||
|
||||
/* Search for a recognized property using binary chop. */
|
||||
|
||||
bot = 0;
|
||||
top = PRIV(utt_size);
|
||||
@@ -2139,15 +2202,37 @@ while (bot < top)
|
||||
int r;
|
||||
i = (bot + top) >> 1;
|
||||
r = PRIV(strcmp_c8)(name, PRIV(utt_names) + PRIV(utt)[i].name_offset);
|
||||
|
||||
/* When a matching property is found, some extra checking is needed when the
|
||||
\p{xx:yy} syntax is used and xx is either sc or scx. */
|
||||
|
||||
if (r == 0)
|
||||
{
|
||||
*ptypeptr = PRIV(utt)[i].type;
|
||||
*pdataptr = PRIV(utt)[i].value;
|
||||
return TRUE;
|
||||
if (vptr == NULL || ptscript == PT_NOTSCRIPT)
|
||||
{
|
||||
*ptypeptr = PRIV(utt)[i].type;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
switch (PRIV(utt)[i].type)
|
||||
{
|
||||
case PT_SC:
|
||||
*ptypeptr = PT_SC;
|
||||
return TRUE;
|
||||
|
||||
case PT_SCX:
|
||||
*ptypeptr = ptscript;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
break; /* Non-script found */
|
||||
}
|
||||
|
||||
if (r > 0) bot = i + 1; else top = i;
|
||||
}
|
||||
*errorcodeptr = ERR47; /* Unrecognized name */
|
||||
|
||||
*errorcodeptr = ERR47; /* Unrecognized property */
|
||||
return FALSE;
|
||||
|
||||
ERROR_RETURN: /* Malformed \P or \p */
|
||||
@@ -5285,9 +5370,9 @@ Arguments:
|
||||
pptrptr points to the current parsed pattern pointer
|
||||
errorcodeptr points to error code variable
|
||||
firstcuptr place to put the first required code unit
|
||||
firstcuflagsptr place to put the first code unit flags, or a negative number
|
||||
firstcuflagsptr place to put the first code unit flags
|
||||
reqcuptr place to put the last required code unit
|
||||
reqcuflagsptr place to put the last required code unit flags, or a negative number
|
||||
reqcuflagsptr place to put the last required code unit flags
|
||||
bcptr points to current branch chain
|
||||
cb contains pointers to tables etc.
|
||||
lengthptr NULL during the real compile phase
|
||||
@@ -5300,8 +5385,8 @@ Returns: 0 There's been an error, *errorcodeptr is non-zero
|
||||
|
||||
static int
|
||||
compile_branch(uint32_t *optionsptr, PCRE2_UCHAR **codeptr, uint32_t **pptrptr,
|
||||
int *errorcodeptr, uint32_t *firstcuptr, int32_t *firstcuflagsptr,
|
||||
uint32_t *reqcuptr, int32_t *reqcuflagsptr, branch_chain *bcptr,
|
||||
int *errorcodeptr, uint32_t *firstcuptr, uint32_t *firstcuflagsptr,
|
||||
uint32_t *reqcuptr, uint32_t *reqcuflagsptr, branch_chain *bcptr,
|
||||
compile_block *cb, PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
int bravalue = 0;
|
||||
@@ -5316,9 +5401,9 @@ uint32_t zeroreqcu, zerofirstcu;
|
||||
uint32_t escape;
|
||||
uint32_t *pptr = *pptrptr;
|
||||
uint32_t meta, meta_arg;
|
||||
int32_t firstcuflags, reqcuflags;
|
||||
int32_t zeroreqcuflags, zerofirstcuflags;
|
||||
int32_t req_caseopt, reqvary, tempreqvary;
|
||||
uint32_t firstcuflags, reqcuflags;
|
||||
uint32_t zeroreqcuflags, zerofirstcuflags;
|
||||
uint32_t req_caseopt, reqvary, tempreqvary;
|
||||
PCRE2_SIZE offset = 0;
|
||||
PCRE2_SIZE length_prevgroup = 0;
|
||||
PCRE2_UCHAR *code = *codeptr;
|
||||
@@ -5374,13 +5459,13 @@ item types that can be repeated set these backoff variables appropriately. */
|
||||
firstcu = reqcu = zerofirstcu = zeroreqcu = 0;
|
||||
firstcuflags = reqcuflags = zerofirstcuflags = zeroreqcuflags = REQ_UNSET;
|
||||
|
||||
/* The variable req_caseopt contains either the REQ_CASELESS value or zero,
|
||||
/* The variable req_caseopt contains either the REQ_CASELESS bit or zero,
|
||||
according to the current setting of the caseless flag. The REQ_CASELESS value
|
||||
leaves the lower 28 bit empty. It is added into the firstcu or reqcu variables
|
||||
to record the case status of the value. This is used only for ASCII characters.
|
||||
*/
|
||||
|
||||
req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS:0;
|
||||
req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS : 0;
|
||||
|
||||
/* Switch on next META item until the end of the branch */
|
||||
|
||||
@@ -5395,13 +5480,12 @@ for (;; pptr++)
|
||||
BOOL possessive_quantifier;
|
||||
BOOL note_group_empty;
|
||||
int class_has_8bitchar;
|
||||
int i;
|
||||
uint32_t mclength;
|
||||
uint32_t skipunits;
|
||||
uint32_t subreqcu, subfirstcu;
|
||||
uint32_t groupnumber;
|
||||
uint32_t verbarglen, verbculen;
|
||||
int32_t subreqcuflags, subfirstcuflags; /* Must be signed */
|
||||
uint32_t subreqcuflags, subfirstcuflags;
|
||||
open_capitem *oc;
|
||||
PCRE2_UCHAR mcbuffer[8];
|
||||
|
||||
@@ -5770,9 +5854,9 @@ for (;; pptr++)
|
||||
if (taboffset >= 0)
|
||||
{
|
||||
if (tabopt >= 0)
|
||||
for (i = 0; i < 32; i++) pbits[i] |= cbits[(int)i + taboffset];
|
||||
for (int i = 0; i < 32; i++) pbits[i] |= cbits[(int)i + taboffset];
|
||||
else
|
||||
for (i = 0; i < 32; i++) pbits[i] &= ~cbits[(int)i + taboffset];
|
||||
for (int i = 0; i < 32; i++) pbits[i] &= ~cbits[(int)i + taboffset];
|
||||
}
|
||||
|
||||
/* Now see if we need to remove any special characters. An option
|
||||
@@ -5786,9 +5870,9 @@ for (;; pptr++)
|
||||
being built and we are done. */
|
||||
|
||||
if (local_negate)
|
||||
for (i = 0; i < 32; i++) classbits[i] |= ~pbits[i];
|
||||
for (int i = 0; i < 32; i++) classbits[i] |= (uint8_t)(~pbits[i]);
|
||||
else
|
||||
for (i = 0; i < 32; i++) classbits[i] |= pbits[i];
|
||||
for (int i = 0; i < 32; i++) classbits[i] |= pbits[i];
|
||||
|
||||
/* Every class contains at least one < 256 character. */
|
||||
|
||||
@@ -5827,21 +5911,23 @@ for (;; pptr++)
|
||||
switch(escape)
|
||||
{
|
||||
case ESC_d:
|
||||
for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit];
|
||||
for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit];
|
||||
break;
|
||||
|
||||
case ESC_D:
|
||||
should_flip_negation = TRUE;
|
||||
for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_digit];
|
||||
for (int i = 0; i < 32; i++)
|
||||
classbits[i] |= (uint8_t)(~cbits[i+cbit_digit]);
|
||||
break;
|
||||
|
||||
case ESC_w:
|
||||
for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word];
|
||||
for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word];
|
||||
break;
|
||||
|
||||
case ESC_W:
|
||||
should_flip_negation = TRUE;
|
||||
for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_word];
|
||||
for (int i = 0; i < 32; i++)
|
||||
classbits[i] |= (uint8_t)(~cbits[i+cbit_word]);
|
||||
break;
|
||||
|
||||
/* Perl 5.004 onwards omitted VT from \s, but restored it at Perl
|
||||
@@ -5852,12 +5938,13 @@ for (;; pptr++)
|
||||
longer treat \s and \S specially. */
|
||||
|
||||
case ESC_s:
|
||||
for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space];
|
||||
for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space];
|
||||
break;
|
||||
|
||||
case ESC_S:
|
||||
should_flip_negation = TRUE;
|
||||
for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_space];
|
||||
for (int i = 0; i < 32; i++)
|
||||
classbits[i] |= (uint8_t)(~cbits[i+cbit_space]);
|
||||
break;
|
||||
|
||||
/* When adding the horizontal or vertical space lists to a class, or
|
||||
@@ -6098,7 +6185,7 @@ for (;; pptr++)
|
||||
if (negate_class && !xclass_has_prop)
|
||||
{
|
||||
/* Using 255 ^ instead of ~ avoids clang sanitize warning. */
|
||||
for (i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
|
||||
for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
|
||||
}
|
||||
memcpy(code, classbits, 32);
|
||||
code = class_uchardata + (32 / sizeof(PCRE2_UCHAR));
|
||||
@@ -6124,7 +6211,7 @@ for (;; pptr++)
|
||||
if (negate_class)
|
||||
{
|
||||
/* Using 255 ^ instead of ~ avoids clang sanitize warning. */
|
||||
for (i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
|
||||
for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
|
||||
}
|
||||
memcpy(code, classbits, 32);
|
||||
}
|
||||
@@ -6198,7 +6285,7 @@ for (;; pptr++)
|
||||
verbarglen = *(++pptr);
|
||||
verbculen = 0;
|
||||
tempcode = code++;
|
||||
for (i = 0; i < (int)verbarglen; i++)
|
||||
for (int i = 0; i < (int)verbarglen; i++)
|
||||
{
|
||||
meta = *(++pptr);
|
||||
#ifdef SUPPORT_UNICODE
|
||||
@@ -6247,6 +6334,7 @@ for (;; pptr++)
|
||||
bravalue = OP_COND;
|
||||
{
|
||||
int count, index;
|
||||
unsigned int i;
|
||||
PCRE2_SPTR name;
|
||||
named_group *ng = cb->named_groups;
|
||||
uint32_t length = *(++pptr);
|
||||
@@ -6286,7 +6374,7 @@ for (;; pptr++)
|
||||
groupnumber = 0;
|
||||
if (meta == META_COND_RNUMBER)
|
||||
{
|
||||
for (i = 1; i < (int)length; i++)
|
||||
for (i = 1; i < length; i++)
|
||||
{
|
||||
groupnumber = groupnumber * 10 + name[i] - CHAR_0;
|
||||
if (groupnumber > MAX_GROUP_NUMBER)
|
||||
@@ -6608,7 +6696,7 @@ for (;; pptr++)
|
||||
|
||||
if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET)
|
||||
{
|
||||
if (subfirstcuflags >= 0)
|
||||
if (subfirstcuflags < REQ_NONE)
|
||||
{
|
||||
firstcu = subfirstcu;
|
||||
firstcuflags = subfirstcuflags;
|
||||
@@ -6622,7 +6710,7 @@ for (;; pptr++)
|
||||
into reqcu if there wasn't one, using the vary flag that was in
|
||||
existence beforehand. */
|
||||
|
||||
else if (subfirstcuflags >= 0 && subreqcuflags < 0)
|
||||
else if (subfirstcuflags < REQ_NONE && subreqcuflags >= REQ_NONE)
|
||||
{
|
||||
subreqcu = subfirstcu;
|
||||
subreqcuflags = subfirstcuflags | tempreqvary;
|
||||
@@ -6631,7 +6719,7 @@ for (;; pptr++)
|
||||
/* If the subpattern set a required code unit (or set a first code unit
|
||||
that isn't really the first code unit - see above), set it. */
|
||||
|
||||
if (subreqcuflags >= 0)
|
||||
if (subreqcuflags < REQ_NONE)
|
||||
{
|
||||
reqcu = subreqcu;
|
||||
reqcuflags = subreqcuflags;
|
||||
@@ -6650,7 +6738,7 @@ for (;; pptr++)
|
||||
in that example, 'X' ends up set for both. */
|
||||
|
||||
else if ((bravalue == OP_ASSERT || bravalue == OP_ASSERT_NA) &&
|
||||
subreqcuflags >= 0 && subfirstcuflags >= 0)
|
||||
subreqcuflags < REQ_NONE && subfirstcuflags < REQ_NONE)
|
||||
{
|
||||
reqcu = subreqcu;
|
||||
reqcuflags = subreqcuflags;
|
||||
@@ -6680,7 +6768,7 @@ for (;; pptr++)
|
||||
this name is duplicated. */
|
||||
|
||||
groupnumber = 0;
|
||||
for (i = 0; i < cb->names_found; i++, ng++)
|
||||
for (unsigned int i = 0; i < cb->names_found; i++, ng++)
|
||||
{
|
||||
if (length == ng->length &&
|
||||
PRIV(strncmp)(name, ng->name, length) == 0)
|
||||
@@ -6935,14 +7023,19 @@ for (;; pptr++)
|
||||
#endif /* MAYBE_UTF_MULTI */
|
||||
|
||||
/* Handle the case of a single code unit - either with no UTF support, or
|
||||
with UTF disabled, or for a single-code-unit UTF character. */
|
||||
with UTF disabled, or for a single-code-unit UTF character. In the latter
|
||||
case, for a repeated positive match, get the caseless flag for the
|
||||
required code unit from the previous character, because a class like [Aa]
|
||||
sets a caseless A but by now the req_caseopt flag has been reset. */
|
||||
|
||||
{
|
||||
mcbuffer[0] = code[-1];
|
||||
mclength = 1;
|
||||
if (op_previous <= OP_CHARI && repeat_min > 1)
|
||||
{
|
||||
reqcu = mcbuffer[0];
|
||||
reqcuflags = req_caseopt | cb->req_varyopt;
|
||||
reqcuflags = cb->req_varyopt;
|
||||
if (op_previous == OP_CHARI) reqcuflags |= REQ_CASELESS;
|
||||
}
|
||||
}
|
||||
goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */
|
||||
@@ -7034,7 +7127,7 @@ for (;; pptr++)
|
||||
*lengthptr += delta;
|
||||
}
|
||||
|
||||
else for (i = 0; i < replicate; i++)
|
||||
else for (int i = 0; i < replicate; i++)
|
||||
{
|
||||
memcpy(code, previous, CU2BYTES(1 + LINK_SIZE));
|
||||
previous = code;
|
||||
@@ -7210,12 +7303,12 @@ for (;; pptr++)
|
||||
|
||||
else
|
||||
{
|
||||
if (groupsetfirstcu && reqcuflags < 0)
|
||||
if (groupsetfirstcu && reqcuflags >= REQ_NONE)
|
||||
{
|
||||
reqcu = firstcu;
|
||||
reqcuflags = firstcuflags;
|
||||
}
|
||||
for (i = 1; (uint32_t)i < repeat_min; i++)
|
||||
for (uint32_t i = 1; i < repeat_min; i++)
|
||||
{
|
||||
memcpy(code, previous, CU2BYTES(len));
|
||||
code += len;
|
||||
@@ -7259,14 +7352,14 @@ for (;; pptr++)
|
||||
|
||||
/* This is compiling for real */
|
||||
|
||||
else for (i = repeat_max - 1; i >= 0; i--)
|
||||
else for (uint32_t i = repeat_max; i >= 1; i--)
|
||||
{
|
||||
*code++ = OP_BRAZERO + repeat_type;
|
||||
|
||||
/* All but the final copy start a new nesting, maintaining the
|
||||
chain of brackets outstanding. */
|
||||
|
||||
if (i != 0)
|
||||
if (i != 1)
|
||||
{
|
||||
int linkoffset;
|
||||
*code++ = OP_BRA;
|
||||
@@ -7985,9 +8078,9 @@ Arguments:
|
||||
errorcodeptr -> pointer to error code variable
|
||||
skipunits skip this many code units at start (for brackets and OP_COND)
|
||||
firstcuptr place to put the first required code unit
|
||||
firstcuflagsptr place to put the first code unit flags, or a negative number
|
||||
firstcuflagsptr place to put the first code unit flags
|
||||
reqcuptr place to put the last required code unit
|
||||
reqcuflagsptr place to put the last required code unit flags, or a negative number
|
||||
reqcuflagsptr place to put the last required code unit flags
|
||||
bcptr pointer to the chain of currently open branches
|
||||
cb points to the data block with tables pointers etc.
|
||||
lengthptr NULL during the real compile phase
|
||||
@@ -8001,7 +8094,7 @@ Returns: 0 There has been an error
|
||||
static int
|
||||
compile_regex(uint32_t options, PCRE2_UCHAR **codeptr, uint32_t **pptrptr,
|
||||
int *errorcodeptr, uint32_t skipunits, uint32_t *firstcuptr,
|
||||
int32_t *firstcuflagsptr, uint32_t *reqcuptr,int32_t *reqcuflagsptr,
|
||||
uint32_t *firstcuflagsptr, uint32_t *reqcuptr, uint32_t *reqcuflagsptr,
|
||||
branch_chain *bcptr, compile_block *cb, PCRE2_SIZE *lengthptr)
|
||||
{
|
||||
PCRE2_UCHAR *code = *codeptr;
|
||||
@@ -8014,9 +8107,9 @@ int okreturn = 1;
|
||||
uint32_t *pptr = *pptrptr;
|
||||
uint32_t firstcu, reqcu;
|
||||
uint32_t lookbehindlength;
|
||||
int32_t firstcuflags, reqcuflags;
|
||||
uint32_t firstcuflags, reqcuflags;
|
||||
uint32_t branchfirstcu, branchreqcu;
|
||||
int32_t branchfirstcuflags, branchreqcuflags;
|
||||
uint32_t branchfirstcuflags, branchreqcuflags;
|
||||
PCRE2_SIZE length;
|
||||
branch_chain bc;
|
||||
|
||||
@@ -8135,9 +8228,9 @@ for (;;)
|
||||
|
||||
if (firstcuflags != branchfirstcuflags || firstcu != branchfirstcu)
|
||||
{
|
||||
if (firstcuflags >= 0)
|
||||
if (firstcuflags < REQ_NONE)
|
||||
{
|
||||
if (reqcuflags < 0)
|
||||
if (reqcuflags >= REQ_NONE)
|
||||
{
|
||||
reqcu = firstcu;
|
||||
reqcuflags = firstcuflags;
|
||||
@@ -8149,8 +8242,8 @@ for (;;)
|
||||
/* If we (now or from before) have no firstcu, a firstcu from the
|
||||
branch becomes a reqcu if there isn't a branch reqcu. */
|
||||
|
||||
if (firstcuflags < 0 && branchfirstcuflags >= 0 &&
|
||||
branchreqcuflags < 0)
|
||||
if (firstcuflags >= REQ_NONE && branchfirstcuflags < REQ_NONE &&
|
||||
branchreqcuflags >= REQ_NONE)
|
||||
{
|
||||
branchreqcu = branchfirstcu;
|
||||
branchreqcuflags = branchfirstcuflags;
|
||||
@@ -8298,7 +8391,7 @@ Returns: TRUE or FALSE
|
||||
*/
|
||||
|
||||
static BOOL
|
||||
is_anchored(PCRE2_SPTR code, unsigned int bracket_map, compile_block *cb,
|
||||
is_anchored(PCRE2_SPTR code, uint32_t bracket_map, compile_block *cb,
|
||||
int atomcount, BOOL inassert)
|
||||
{
|
||||
do {
|
||||
@@ -8321,7 +8414,7 @@ do {
|
||||
op == OP_SCBRA || op == OP_SCBRAPOS)
|
||||
{
|
||||
int n = GET2(scode, 1+LINK_SIZE);
|
||||
int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
|
||||
uint32_t new_map = bracket_map | ((n < 32)? (1u << n) : 1);
|
||||
if (!is_anchored(scode, new_map, cb, atomcount, inassert)) return FALSE;
|
||||
}
|
||||
|
||||
@@ -8681,15 +8774,15 @@ Returns: the fixed first code unit, or 0 with REQ_NONE in flags
|
||||
*/
|
||||
|
||||
static uint32_t
|
||||
find_firstassertedcu(PCRE2_SPTR code, int32_t *flags, uint32_t inassert)
|
||||
find_firstassertedcu(PCRE2_SPTR code, uint32_t *flags, uint32_t inassert)
|
||||
{
|
||||
uint32_t c = 0;
|
||||
int cflags = REQ_NONE;
|
||||
uint32_t cflags = REQ_NONE;
|
||||
|
||||
*flags = REQ_NONE;
|
||||
do {
|
||||
uint32_t d;
|
||||
int dflags;
|
||||
uint32_t dflags;
|
||||
int xl = (*code == OP_CBRA || *code == OP_SCBRA ||
|
||||
*code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0;
|
||||
PCRE2_SPTR scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE);
|
||||
@@ -8712,9 +8805,8 @@ do {
|
||||
case OP_SCRIPT_RUN:
|
||||
d = find_firstassertedcu(scode, &dflags, inassert +
|
||||
((op == OP_ASSERT || op == OP_ASSERT_NA)?1:0));
|
||||
if (dflags < 0)
|
||||
return 0;
|
||||
if (cflags < 0) { c = d; cflags = dflags; }
|
||||
if (dflags >= REQ_NONE) return 0;
|
||||
if (cflags >= REQ_NONE) { c = d; cflags = dflags; }
|
||||
else if (c != d || cflags != dflags) return 0;
|
||||
break;
|
||||
|
||||
@@ -8727,7 +8819,7 @@ do {
|
||||
case OP_MINPLUS:
|
||||
case OP_POSPLUS:
|
||||
if (inassert == 0) return 0;
|
||||
if (cflags < 0) { c = scode[1]; cflags = 0; }
|
||||
if (cflags >= REQ_NONE) { c = scode[1]; cflags = 0; }
|
||||
else if (c != scode[1]) return 0;
|
||||
break;
|
||||
|
||||
@@ -8753,7 +8845,7 @@ do {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
|
||||
if (cflags >= REQ_NONE) { c = scode[1]; cflags = REQ_CASELESS; }
|
||||
else if (c != scode[1]) return 0;
|
||||
break;
|
||||
}
|
||||
@@ -9689,7 +9781,7 @@ PCRE2_SIZE re_blocksize; /* Size of memory block */
|
||||
PCRE2_SIZE big32count = 0; /* 32-bit literals >= 0x80000000 */
|
||||
PCRE2_SIZE parsed_size_needed; /* Needed for parsed pattern */
|
||||
|
||||
int32_t firstcuflags, reqcuflags; /* Type of first/req code unit */
|
||||
uint32_t firstcuflags, reqcuflags; /* Type of first/req code unit */
|
||||
uint32_t firstcu, reqcu; /* Value of first/req code unit */
|
||||
uint32_t setflags = 0; /* NL and BSR set flags */
|
||||
|
||||
@@ -10369,13 +10461,13 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
|
||||
(these are not saved during the compile because they can cause conflicts with
|
||||
actual literals that follow). */
|
||||
|
||||
if (firstcuflags < 0)
|
||||
if (firstcuflags >= REQ_NONE)
|
||||
firstcu = find_firstassertedcu(codestart, &firstcuflags, 0);
|
||||
|
||||
/* Save the data for a first code unit. The existence of one means the
|
||||
minimum length must be at least 1. */
|
||||
|
||||
if (firstcuflags >= 0)
|
||||
if (firstcuflags < REQ_NONE)
|
||||
{
|
||||
re->first_codeunit = firstcu;
|
||||
re->flags |= PCRE2_FIRSTSET;
|
||||
@@ -10422,16 +10514,16 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
|
||||
different character and not a non-starting code unit of the first character,
|
||||
because the minimum length count is in characters, not code units. */
|
||||
|
||||
if (reqcuflags >= 0)
|
||||
if (reqcuflags < REQ_NONE)
|
||||
{
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 16
|
||||
if ((re->overall_options & PCRE2_UTF) == 0 || /* Not UTF */
|
||||
firstcuflags < 0 || /* First not set */
|
||||
firstcuflags >= REQ_NONE || /* First not set */
|
||||
(firstcu & 0xf800) != 0xd800 || /* First not surrogate */
|
||||
(reqcu & 0xfc00) != 0xdc00) /* Req not low surrogate */
|
||||
#elif PCRE2_CODE_UNIT_WIDTH == 8
|
||||
if ((re->overall_options & PCRE2_UTF) == 0 || /* Not UTF */
|
||||
firstcuflags < 0 || /* First not set */
|
||||
firstcuflags >= REQ_NONE || /* First not set */
|
||||
(firstcu & 0x80) == 0 || /* First is ASCII */
|
||||
(reqcu & 0x80) == 0) /* Req is ASCII */
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user