1
0
mirror of https://github.com/godotengine/godot.git synced 2025-11-20 14:45:44 +00:00

pcre2: Sync with upstream 10.32

This commit is contained in:
Rémi Verschelde
2019-03-04 14:25:49 +01:00
parent 2bc981948d
commit 0455bc64b6
37 changed files with 4929 additions and 4094 deletions

View File

@@ -181,7 +181,8 @@ static const uint8_t coptable[] = {
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
0, 0, /* COMMIT, COMMIT_ARG */
0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */
0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */
};
@@ -254,7 +255,8 @@ static const uint8_t poptable[] = {
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG */
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */
0, 0, 0, 0, /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT */
0, 0, /* COMMIT, COMMIT_ARG */
0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */
0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */
};
@@ -292,6 +294,35 @@ typedef struct stateblock {
#define INTS_PER_STATEBLOCK (int)(sizeof(stateblock)/sizeof(int))
/* Before version 10.32 the recursive calls of internal_dfa_match() were passed
local working space and output vectors that were created on the stack. This has
caused issues for some patterns, especially in small-stack environments such as
Windows. A new scheme is now in use which sets up a vector on the stack, but if
this is too small, heap memory is used, up to the heap_limit. The main
parameters are all numbers of ints because the workspace is a vector of ints.
The size of the starting stack vector, DFA_START_RWS_SIZE, is in bytes, and is
defined in pcre2_internal.h so as to be available to pcre2test when it is
finding the minimum heap requirement for a match. */
#define OVEC_UNIT (sizeof(PCRE2_SIZE)/sizeof(int))
#define RWS_BASE_SIZE (DFA_START_RWS_SIZE/sizeof(int)) /* Stack vector */
#define RWS_RSIZE 1000 /* Work size for recursion */
#define RWS_OVEC_RSIZE (1000*OVEC_UNIT) /* Ovector for recursion */
#define RWS_OVEC_OSIZE (2*OVEC_UNIT) /* Ovector in other cases */
/* This structure is at the start of each workspace block. */
typedef struct RWS_anchor {
struct RWS_anchor *next;
unsigned int size; /* Number of ints */
unsigned int free; /* Number of ints */
} RWS_anchor;
#define RWS_ANCHOR_SIZE (sizeof(RWS_anchor)/sizeof(int))
/*************************************************
* Process a callout *
@@ -353,6 +384,61 @@ return (mb->callout)(cb, mb->callout_data);
/*************************************************
* Expand local workspace memory *
*************************************************/
/* This function is called when internal_dfa_match() is about to be called
recursively and there is insufficient working space left in the current
workspace block. If there's an existing next block, use it; otherwise get a new
block unless the heap limit is reached.
Arguments:
rwsptr pointer to block pointer (updated)
ovecsize space needed for an ovector
mb the match block
Returns: 0 rwsptr has been updated
!0 an error code
*/
static int
more_workspace(RWS_anchor **rwsptr, unsigned int ovecsize, dfa_match_block *mb)
{
RWS_anchor *rws = *rwsptr;
RWS_anchor *new;
if (rws->next != NULL)
{
new = rws->next;
}
/* All sizes are in units of sizeof(int), except for mb->heaplimit, which is in
kibibytes. */
else
{
unsigned int newsize = rws->size * 2;
unsigned int heapleft = (unsigned int)
(((1024/sizeof(int))*mb->heap_limit - mb->heap_used));
if (newsize > heapleft) newsize = heapleft;
if (newsize < RWS_RSIZE + ovecsize + RWS_ANCHOR_SIZE)
return PCRE2_ERROR_HEAPLIMIT;
new = mb->memctl.malloc(newsize*sizeof(int), mb->memctl.memory_data);
if (new == NULL) return PCRE2_ERROR_NOMEMORY;
mb->heap_used += newsize;
new->next = NULL;
new->size = newsize;
rws->next = new;
}
new->free = new->size - RWS_ANCHOR_SIZE;
*rwsptr = new;
return 0;
}
/*************************************************
* Match a Regular Expression - DFA engine *
*************************************************/
@@ -431,7 +517,8 @@ internal_dfa_match(
uint32_t offsetcount,
int *workspace,
int wscount,
uint32_t rlevel)
uint32_t rlevel,
int *RWS)
{
stateblock *active_states, *new_states, *temp_states;
stateblock *next_active_state, *next_new_state;
@@ -788,7 +875,7 @@ for (;;)
else if (match_count > 0 && ++match_count * 2 > (int)offsetcount)
match_count = 0;
count = ((match_count == 0)? (int)offsetcount : match_count * 2) - 2;
if (count > 0) memmove(offsets + 2, offsets,
if (count > 0) (void)memmove(offsets + 2, offsets,
(size_t)count * sizeof(PCRE2_SIZE));
if (offsetcount >= 2)
{
@@ -2587,10 +2674,22 @@ for (;;)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
{
PCRE2_SPTR endasscode = code + GET(code, 1);
PCRE2_SIZE local_offsets[2];
int rc;
int local_workspace[1000];
int *local_workspace;
PCRE2_SIZE *local_offsets;
PCRE2_SPTR endasscode = code + GET(code, 1);
RWS_anchor *rws = (RWS_anchor *)RWS;
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
{
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
if (rc != 0) return rc;
RWS = (int *)rws;
}
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
@@ -2600,10 +2699,13 @@ for (;;)
ptr, /* where we currently are */
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */
rlevel); /* function recursion level */
RWS_RSIZE, /* size of same */
rlevel, /* function recursion level */
RWS); /* recursion workspace */
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc;
if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
@@ -2615,8 +2717,6 @@ for (;;)
case OP_COND:
case OP_SCOND:
{
PCRE2_SIZE local_offsets[1000];
int local_workspace[1000];
int codelink = (int)GET(code, 1);
PCRE2_UCHAR condcode;
@@ -2673,8 +2773,22 @@ for (;;)
else
{
int rc;
int *local_workspace;
PCRE2_SIZE *local_offsets;
PCRE2_SPTR asscode = code + LINK_SIZE + 1;
PCRE2_SPTR endasscode = asscode + GET(asscode, 1);
RWS_anchor *rws = (RWS_anchor *)RWS;
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
{
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
if (rc != 0) return rc;
RWS = (int *)rws;
}
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
@@ -2684,10 +2798,13 @@ for (;;)
ptr, /* where we currently are */
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */
rlevel); /* function recursion level */
RWS_RSIZE, /* size of same */
rlevel, /* function recursion level */
RWS); /* recursion workspace */
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
if (rc < 0 && rc != PCRE2_ERROR_NOMATCH) return rc;
if ((rc >= 0) ==
@@ -2702,13 +2819,25 @@ for (;;)
/*-----------------------------------------------------------------*/
case OP_RECURSE:
{
int rc;
int *local_workspace;
PCRE2_SIZE *local_offsets;
RWS_anchor *rws = (RWS_anchor *)RWS;
dfa_recursion_info *ri;
PCRE2_SIZE local_offsets[1000];
int local_workspace[1000];
PCRE2_SPTR callpat = start_code + GET(code, 1);
uint32_t recno = (callpat == mb->start_code)? 0 :
GET2(callpat, 1 + LINK_SIZE);
int rc;
if (rws->free < RWS_RSIZE + RWS_OVEC_RSIZE)
{
rc = more_workspace(&rws, RWS_OVEC_RSIZE, mb);
if (rc != 0) return rc;
RWS = (int *)rws;
}
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
local_workspace = ((int *)local_offsets) + RWS_OVEC_RSIZE;
rws->free -= RWS_RSIZE + RWS_OVEC_RSIZE;
/* Check for repeating a recursion without advancing the subject
pointer. This should catch convoluted mutual recursions. (Some simple
@@ -2732,11 +2861,13 @@ for (;;)
ptr, /* where we currently are */
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
RWS_OVEC_RSIZE/OVEC_UNIT, /* size of same */
local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */
rlevel); /* function recursion level */
RWS_RSIZE, /* size of same */
rlevel, /* function recursion level */
RWS); /* recursion workspace */
rws->free += RWS_RSIZE + RWS_OVEC_RSIZE;
mb->recursive = new_recursive.prevrec; /* Done this recursion */
/* Ran out of internal offsets */
@@ -2782,10 +2913,25 @@ for (;;)
case OP_SCBRAPOS:
case OP_BRAPOSZERO:
{
int rc;
int *local_workspace;
PCRE2_SIZE *local_offsets;
PCRE2_SIZE charcount, matched_count;
PCRE2_SPTR local_ptr = ptr;
RWS_anchor *rws = (RWS_anchor *)RWS;
BOOL allow_zero;
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
{
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
if (rc != 0) return rc;
RWS = (int *)rws;
}
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
if (codevalue == OP_BRAPOSZERO)
{
allow_zero = TRUE;
@@ -2798,19 +2944,17 @@ for (;;)
for (matched_count = 0;; matched_count++)
{
PCRE2_SIZE local_offsets[2];
int local_workspace[1000];
int rc = internal_dfa_match(
rc = internal_dfa_match(
mb, /* fixed match data */
code, /* this subexpression's code */
local_ptr, /* where we currently are */
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */
rlevel); /* function recursion level */
RWS_RSIZE, /* size of same */
rlevel, /* function recursion level */
RWS); /* recursion workspace */
/* Failed to match */
@@ -2827,6 +2971,8 @@ for (;;)
local_ptr += charcount; /* Advance temporary position ptr */
}
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
/* At this point we have matched the subpattern matched_count
times, and local_ptr is pointing to the character after the end of the
last match. */
@@ -2869,19 +3015,35 @@ for (;;)
/*-----------------------------------------------------------------*/
case OP_ONCE:
{
PCRE2_SIZE local_offsets[2];
int local_workspace[1000];
int rc;
int *local_workspace;
PCRE2_SIZE *local_offsets;
RWS_anchor *rws = (RWS_anchor *)RWS;
int rc = internal_dfa_match(
if (rws->free < RWS_RSIZE + RWS_OVEC_OSIZE)
{
rc = more_workspace(&rws, RWS_OVEC_OSIZE, mb);
if (rc != 0) return rc;
RWS = (int *)rws;
}
local_offsets = (PCRE2_SIZE *)(RWS + rws->size - rws->free);
local_workspace = ((int *)local_offsets) + RWS_OVEC_OSIZE;
rws->free -= RWS_RSIZE + RWS_OVEC_OSIZE;
rc = internal_dfa_match(
mb, /* fixed match data */
code, /* this subexpression's code */
ptr, /* where we currently are */
(PCRE2_SIZE)(ptr - start_subject), /* start offset */
local_offsets, /* offset vector */
sizeof(local_offsets)/sizeof(PCRE2_SIZE), /* size of same */
RWS_OVEC_OSIZE/OVEC_UNIT, /* size of same */
local_workspace, /* workspace vector */
sizeof(local_workspace)/sizeof(int), /* size of same */
rlevel); /* function recursion level */
RWS_RSIZE, /* size of same */
rlevel, /* function recursion level */
RWS); /* recursion workspace */
rws->free += RWS_RSIZE + RWS_OVEC_OSIZE;
if (rc >= 0)
{
@@ -3063,6 +3225,7 @@ pcre2_dfa_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
pcre2_match_context *mcontext, int *workspace, PCRE2_SIZE wscount)
{
int rc;
const pcre2_real_code *re = (const pcre2_real_code *)code;
PCRE2_SPTR start_match;
@@ -3071,9 +3234,9 @@ PCRE2_SPTR bumpalong_limit;
PCRE2_SPTR req_cu_ptr;
BOOL utf, anchored, startline, firstline;
BOOL has_first_cu = FALSE;
BOOL has_req_cu = FALSE;
PCRE2_UCHAR first_cu = 0;
PCRE2_UCHAR first_cu2 = 0;
PCRE2_UCHAR req_cu = 0;
@@ -3088,6 +3251,17 @@ pcre2_callout_block cb;
dfa_match_block actual_match_block;
dfa_match_block *mb = &actual_match_block;
/* Set up a starting block of memory for use during recursive calls to
internal_dfa_match(). By putting this on the stack, it minimizes resource use
in the case when it is not needed. If this is too small, more memory is
obtained from the heap. At the start of each block is an anchor structure.*/
int base_recursion_workspace[RWS_BASE_SIZE];
RWS_anchor *rws = (RWS_anchor *)base_recursion_workspace;
rws->next = NULL;
rws->size = RWS_BASE_SIZE;
rws->free = RWS_BASE_SIZE - RWS_ANCHOR_SIZE;
/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
subject string. */
@@ -3184,6 +3358,7 @@ if (mcontext == NULL)
mb->memctl = re->memctl;
mb->match_limit = PRIV(default_match_context).match_limit;
mb->match_limit_depth = PRIV(default_match_context).depth_limit;
mb->heap_limit = PRIV(default_match_context).heap_limit;
}
else
{
@@ -3198,6 +3373,7 @@ else
mb->memctl = mcontext->memctl;
mb->match_limit = mcontext->match_limit;
mb->match_limit_depth = mcontext->depth_limit;
mb->heap_limit = mcontext->heap_limit;
}
if (mb->match_limit > re->limit_match)
@@ -3206,6 +3382,9 @@ if (mb->match_limit > re->limit_match)
if (mb->match_limit_depth > re->limit_depth)
mb->match_limit_depth = re->limit_depth;
if (mb->heap_limit > re->limit_heap)
mb->heap_limit = re->limit_heap;
mb->start_code = (PCRE2_UCHAR *)((uint8_t *)re + sizeof(pcre2_real_code)) +
re->name_count * re->name_entry_size;
mb->tables = re->tables;
@@ -3215,6 +3394,7 @@ mb->start_offset = start_offset;
mb->moptions = options;
mb->poptions = re->overall_options;
mb->match_call_count = 0;
mb->heap_used = 0;
/* Process the \R and newline settings. */
@@ -3351,8 +3531,6 @@ a match. */
for (;;)
{
int rc;
/* ----------------- Start of match optimizations ---------------- */
/* There are some optimizations that avoid running the match if a known
@@ -3544,7 +3722,7 @@ for (;;)
in characters, we treat it as code units to avoid spending too much time
in this optimization. */
if (end_subject - start_match < re->minlength) return PCRE2_ERROR_NOMATCH;
if (end_subject - start_match < re->minlength) goto NOMATCH_EXIT;
/* If req_cu is set, we know that that code unit must appear in the
subject for the match to succeed. If the first code unit is set, req_cu
@@ -3621,7 +3799,8 @@ for (;;)
(uint32_t)match_data->oveccount * 2, /* actual size of same */
workspace, /* workspace vector */
(int)wscount, /* size of same */
0); /* function recurse level */
0, /* function recurse level */
base_recursion_workspace); /* initial workspace for recursion */
/* Anything other than "no match" means we are done, always; otherwise, carry
on only if not anchored. */
@@ -3637,7 +3816,7 @@ for (;;)
match_data->rightchar = (PCRE2_SIZE)( mb->last_used_ptr - subject);
match_data->startchar = (PCRE2_SIZE)(start_match - subject);
match_data->rc = rc;
return rc;
goto EXIT;
}
/* Advance to the next subject character unless we are at the end of a line
@@ -3668,8 +3847,18 @@ for (;;)
} /* "Bumpalong" loop */
NOMATCH_EXIT:
rc = PCRE2_ERROR_NOMATCH;
return PCRE2_ERROR_NOMATCH;
EXIT:
while (rws->next != NULL)
{
RWS_anchor *next = rws->next;
rws->next = next->next;
mb->memctl.free(next, mb->memctl.memory_data);
}
return rc;
}
/* End of pcre2_dfa_match.c */