You've already forked godot
mirror of
https://github.com/godotengine/godot.git
synced 2025-11-14 13:41:12 +00:00
pcre2: Update to upstream version 10.40
Changelog: https://github.com/PCRE2Project/pcre2/blob/pcre2-10.40/ChangeLog
This commit is contained in:
332
thirdparty/pcre2/src/pcre2_match.c
vendored
332
thirdparty/pcre2/src/pcre2_match.c
vendored
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
||||
|
||||
Written by Philip Hazel
|
||||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||||
New API code Copyright (c) 2015-2021 University of Cambridge
|
||||
New API code Copyright (c) 2015-2022 University of Cambridge
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -49,7 +49,7 @@ POSSIBILITY OF SUCH DAMAGE.
|
||||
/* #define DEBUG_SHOW_OPS */
|
||||
/* #define DEBUG_SHOW_RMATCH */
|
||||
|
||||
#ifdef DEBUG_FRAME_DISPLAY
|
||||
#ifdef DEBUG_FRAMES_DISPLAY
|
||||
#include <stdarg.h>
|
||||
#endif
|
||||
|
||||
@@ -159,7 +159,8 @@ enum { RM100=100, RM101 };
|
||||
#ifdef SUPPORT_UNICODE
|
||||
enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207,
|
||||
RM208, RM209, RM210, RM211, RM212, RM213, RM214, RM215,
|
||||
RM216, RM217, RM218, RM219, RM220, RM221, RM222 };
|
||||
RM216, RM217, RM218, RM219, RM220, RM221, RM222, RM223,
|
||||
RM224, RM225 };
|
||||
#endif
|
||||
|
||||
/* Define short names for general fields in the current backtrack frame, which
|
||||
@@ -2421,40 +2422,49 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
{
|
||||
const uint32_t *cp;
|
||||
const ucd_record *prop = GET_UCD(fc);
|
||||
BOOL notmatch = Fop == OP_NOTPROP;
|
||||
|
||||
switch(Fecode[1])
|
||||
{
|
||||
case PT_ANY:
|
||||
if (Fop == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
|
||||
if (notmatch) RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_LAMP:
|
||||
if ((prop->chartype == ucp_Lu ||
|
||||
prop->chartype == ucp_Ll ||
|
||||
prop->chartype == ucp_Lt) == (Fop == OP_NOTPROP))
|
||||
prop->chartype == ucp_Lt) == notmatch)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_GC:
|
||||
if ((Fecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (Fop == OP_PROP))
|
||||
if ((Fecode[2] == PRIV(ucp_gentype)[prop->chartype]) == notmatch)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_PC:
|
||||
if ((Fecode[2] != prop->chartype) == (Fop == OP_PROP))
|
||||
if ((Fecode[2] == prop->chartype) == notmatch)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_SC:
|
||||
if ((Fecode[2] != prop->script) == (Fop == OP_PROP))
|
||||
if ((Fecode[2] == prop->script) == notmatch)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_SCX:
|
||||
{
|
||||
BOOL ok = (Fecode[2] == prop->script ||
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Fecode[2]) != 0);
|
||||
if (ok == notmatch) RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
break;
|
||||
|
||||
/* These are specials */
|
||||
|
||||
case PT_ALNUM:
|
||||
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (Fop == OP_NOTPROP))
|
||||
PRIV(ucp_gentype)[prop->chartype] == ucp_N) == notmatch)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
@@ -2468,12 +2478,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
if (Fop == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
|
||||
if (notmatch) RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
default:
|
||||
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
|
||||
(Fop == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
|
||||
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == notmatch)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
@@ -2481,7 +2491,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
case PT_WORD:
|
||||
if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
|
||||
PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
|
||||
fc == CHAR_UNDERSCORE) == (Fop == OP_NOTPROP))
|
||||
fc == CHAR_UNDERSCORE) == notmatch)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
@@ -2490,19 +2500,32 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
for (;;)
|
||||
{
|
||||
if (fc < *cp)
|
||||
{ if (Fop == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
|
||||
{ if (notmatch) break; else { RRETURN(MATCH_NOMATCH); } }
|
||||
if (fc == *cp++)
|
||||
{ if (Fop == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
|
||||
{ if (notmatch) { RRETURN(MATCH_NOMATCH); } else break; }
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_UCNC:
|
||||
if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
|
||||
fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
|
||||
fc >= 0xe000) == (Fop == OP_NOTPROP))
|
||||
fc >= 0xe000) == notmatch)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
if ((UCD_BIDICLASS_PROP(prop) == Fecode[2]) == notmatch)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
case PT_BOOL:
|
||||
{
|
||||
BOOL ok = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||
UCD_BPROPS_PROP(prop), Fecode[2]) != 0;
|
||||
if (ok == notmatch) RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
break;
|
||||
|
||||
/* This should never occur */
|
||||
|
||||
default:
|
||||
@@ -2616,18 +2639,20 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
|
||||
/* First, ensure the minimum number of matches are present. Use inline
|
||||
code for maximizing the speed, and do the type test once at the start
|
||||
(i.e. keep it out of the loop). The code for UTF mode is separated out for
|
||||
tidiness, except for Unicode property tests. */
|
||||
(i.e. keep it out of the loops). As there are no calls to RMATCH in the
|
||||
loops, we can use an ordinary variable for "notmatch". The code for UTF
|
||||
mode is separated out for tidiness, except for Unicode property tests. */
|
||||
|
||||
if (Lmin > 0)
|
||||
{
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (proptype >= 0) /* Property tests in all modes */
|
||||
{
|
||||
BOOL notmatch = Lctype == OP_NOTPROP;
|
||||
switch(proptype)
|
||||
{
|
||||
case PT_ANY:
|
||||
if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
|
||||
if (notmatch) RRETURN(MATCH_NOMATCH);
|
||||
for (i = 1; i <= Lmin; i++)
|
||||
{
|
||||
if (Feptr >= mb->end_subject)
|
||||
@@ -2652,7 +2677,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
chartype = UCD_CHARTYPE(fc);
|
||||
if ((chartype == ucp_Lu ||
|
||||
chartype == ucp_Ll ||
|
||||
chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
|
||||
chartype == ucp_Lt) == notmatch)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
break;
|
||||
@@ -2666,7 +2691,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
|
||||
if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
break;
|
||||
@@ -2680,7 +2705,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
|
||||
if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
break;
|
||||
@@ -2694,7 +2719,26 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
|
||||
if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_SCX:
|
||||
for (i = 1; i <= Lmin; i++)
|
||||
{
|
||||
BOOL ok;
|
||||
const ucd_record *prop;
|
||||
if (Feptr >= mb->end_subject)
|
||||
{
|
||||
SCHECK_PARTIAL();
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
prop = GET_UCD(fc);
|
||||
ok = (prop->script == Lpropvalue ||
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
|
||||
if (ok == notmatch)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
break;
|
||||
@@ -2710,7 +2754,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
}
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
category = UCD_CATEGORY(fc);
|
||||
if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP))
|
||||
if ((category == ucp_L || category == ucp_N) == notmatch)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
break;
|
||||
@@ -2733,11 +2777,11 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
|
||||
if (notmatch) RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
|
||||
default:
|
||||
if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
|
||||
if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
}
|
||||
@@ -2756,7 +2800,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
category = UCD_CATEGORY(fc);
|
||||
if ((category == ucp_L || category == ucp_N ||
|
||||
fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
|
||||
fc == CHAR_UNDERSCORE) == notmatch)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
break;
|
||||
@@ -2776,12 +2820,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
{
|
||||
if (fc < *cp)
|
||||
{
|
||||
if (Lctype == OP_NOTPROP) break;
|
||||
if (notmatch) break;
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
if (fc == *cp++)
|
||||
{
|
||||
if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
|
||||
if (notmatch) RRETURN(MATCH_NOMATCH);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -2799,7 +2843,40 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
|
||||
fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
|
||||
fc >= 0xe000) == (Lctype == OP_NOTPROP))
|
||||
fc >= 0xe000) == notmatch)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
for (i = 1; i <= Lmin; i++)
|
||||
{
|
||||
if (Feptr >= mb->end_subject)
|
||||
{
|
||||
SCHECK_PARTIAL();
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_BOOL:
|
||||
for (i = 1; i <= Lmin; i++)
|
||||
{
|
||||
BOOL ok;
|
||||
const ucd_record *prop;
|
||||
if (Feptr >= mb->end_subject)
|
||||
{
|
||||
SCHECK_PARTIAL();
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
prop = GET_UCD(fc);
|
||||
ok = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||
UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
|
||||
if (ok == notmatch)
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
break;
|
||||
@@ -3343,7 +3420,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
if (Lmin == Lmax) continue;
|
||||
|
||||
/* If minimizing, we have to test the rest of the pattern before each
|
||||
subsequent match. */
|
||||
subsequent match. This means we cannot use a local "notmatch" variable as
|
||||
in the other cases. As all 4 temporary 32-bit values in the frame are
|
||||
already in use, just test the type each time. */
|
||||
|
||||
if (reptype == REPTYPE_MIN)
|
||||
{
|
||||
@@ -3440,6 +3519,28 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
}
|
||||
/* Control never gets here */
|
||||
|
||||
case PT_SCX:
|
||||
for (;;)
|
||||
{
|
||||
BOOL ok;
|
||||
const ucd_record *prop;
|
||||
RMATCH(Fecode, RM225);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
|
||||
if (Feptr >= mb->end_subject)
|
||||
{
|
||||
SCHECK_PARTIAL();
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
prop = GET_UCD(fc);
|
||||
ok = (prop->script == Lpropvalue
|
||||
|| MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
|
||||
if (ok == (Lctype == OP_NOTPROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
/* Control never gets here */
|
||||
|
||||
case PT_ALNUM:
|
||||
for (;;)
|
||||
{
|
||||
@@ -3454,8 +3555,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
}
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
category = UCD_CATEGORY(fc);
|
||||
if ((category == ucp_L || category == ucp_N) ==
|
||||
(Lctype == OP_NOTPROP))
|
||||
if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
/* Control never gets here */
|
||||
@@ -3562,6 +3662,45 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
}
|
||||
/* Control never gets here */
|
||||
|
||||
case PT_BIDICL:
|
||||
for (;;)
|
||||
{
|
||||
RMATCH(Fecode, RM224);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
|
||||
if (Feptr >= mb->end_subject)
|
||||
{
|
||||
SCHECK_PARTIAL();
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
/* Control never gets here */
|
||||
|
||||
case PT_BOOL:
|
||||
for (;;)
|
||||
{
|
||||
BOOL ok;
|
||||
const ucd_record *prop;
|
||||
RMATCH(Fecode, RM223);
|
||||
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
|
||||
if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
|
||||
if (Feptr >= mb->end_subject)
|
||||
{
|
||||
SCHECK_PARTIAL();
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
prop = GET_UCD(fc);
|
||||
ok = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||
UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
|
||||
if (ok == (Lctype == OP_NOTPROP))
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
/* Control never gets here */
|
||||
|
||||
/* This should never occur */
|
||||
default:
|
||||
return PCRE2_ERROR_INTERNAL;
|
||||
@@ -3870,7 +4009,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
}
|
||||
|
||||
/* If maximizing, it is worth using inline code for speed, doing the type
|
||||
test once at the start (i.e. keep it out of the loop). */
|
||||
test once at the start (i.e. keep it out of the loops). Once again,
|
||||
"notmatch" can be an ordinary local variable because the loops do not call
|
||||
RMATCH. */
|
||||
|
||||
else
|
||||
{
|
||||
@@ -3879,6 +4020,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
#ifdef SUPPORT_UNICODE
|
||||
if (proptype >= 0)
|
||||
{
|
||||
BOOL notmatch = Lctype == OP_NOTPROP;
|
||||
switch(proptype)
|
||||
{
|
||||
case PT_ANY:
|
||||
@@ -3891,7 +4033,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
break;
|
||||
}
|
||||
GETCHARLENTEST(fc, Feptr, len);
|
||||
if (Lctype == OP_NOTPROP) break;
|
||||
if (notmatch) break;
|
||||
Feptr+= len;
|
||||
}
|
||||
break;
|
||||
@@ -3910,7 +4052,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
chartype = UCD_CHARTYPE(fc);
|
||||
if ((chartype == ucp_Lu ||
|
||||
chartype == ucp_Ll ||
|
||||
chartype == ucp_Lt) == (Lctype == OP_NOTPROP))
|
||||
chartype == ucp_Lt) == notmatch)
|
||||
break;
|
||||
Feptr+= len;
|
||||
}
|
||||
@@ -3926,8 +4068,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
break;
|
||||
}
|
||||
GETCHARLENTEST(fc, Feptr, len);
|
||||
if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
|
||||
break;
|
||||
if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch) break;
|
||||
Feptr+= len;
|
||||
}
|
||||
break;
|
||||
@@ -3942,8 +4083,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
break;
|
||||
}
|
||||
GETCHARLENTEST(fc, Feptr, len);
|
||||
if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
|
||||
break;
|
||||
if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch) break;
|
||||
Feptr+= len;
|
||||
}
|
||||
break;
|
||||
@@ -3958,8 +4098,27 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
break;
|
||||
}
|
||||
GETCHARLENTEST(fc, Feptr, len);
|
||||
if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP))
|
||||
if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch) break;
|
||||
Feptr+= len;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_SCX:
|
||||
for (i = Lmin; i < Lmax; i++)
|
||||
{
|
||||
BOOL ok;
|
||||
const ucd_record *prop;
|
||||
int len = 1;
|
||||
if (Feptr >= mb->end_subject)
|
||||
{
|
||||
SCHECK_PARTIAL();
|
||||
break;
|
||||
}
|
||||
GETCHARLENTEST(fc, Feptr, len);
|
||||
prop = GET_UCD(fc);
|
||||
ok = (prop->script == Lpropvalue ||
|
||||
MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0);
|
||||
if (ok == notmatch) break;
|
||||
Feptr+= len;
|
||||
}
|
||||
break;
|
||||
@@ -3976,8 +4135,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
}
|
||||
GETCHARLENTEST(fc, Feptr, len);
|
||||
category = UCD_CATEGORY(fc);
|
||||
if ((category == ucp_L || category == ucp_N) ==
|
||||
(Lctype == OP_NOTPROP))
|
||||
if ((category == ucp_L || category == ucp_N) == notmatch)
|
||||
break;
|
||||
Feptr+= len;
|
||||
}
|
||||
@@ -4002,11 +4160,11 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
{
|
||||
HSPACE_CASES:
|
||||
VSPACE_CASES:
|
||||
if (Lctype == OP_NOTPROP) goto ENDLOOP99; /* Break the loop */
|
||||
if (notmatch) goto ENDLOOP99; /* Break the loop */
|
||||
break;
|
||||
|
||||
default:
|
||||
if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP))
|
||||
if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch)
|
||||
goto ENDLOOP99; /* Break the loop */
|
||||
break;
|
||||
}
|
||||
@@ -4028,7 +4186,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
GETCHARLENTEST(fc, Feptr, len);
|
||||
category = UCD_CATEGORY(fc);
|
||||
if ((category == ucp_L || category == ucp_N ||
|
||||
fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
|
||||
fc == CHAR_UNDERSCORE) == notmatch)
|
||||
break;
|
||||
Feptr+= len;
|
||||
}
|
||||
@@ -4049,9 +4207,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
for (;;)
|
||||
{
|
||||
if (fc < *cp)
|
||||
{ if (Lctype == OP_NOTPROP) break; else goto GOT_MAX; }
|
||||
{ if (notmatch) break; else goto GOT_MAX; }
|
||||
if (fc == *cp++)
|
||||
{ if (Lctype == OP_NOTPROP) goto GOT_MAX; else break; }
|
||||
{ if (notmatch) goto GOT_MAX; else break; }
|
||||
}
|
||||
Feptr += len;
|
||||
}
|
||||
@@ -4070,12 +4228,47 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
|
||||
GETCHARLENTEST(fc, Feptr, len);
|
||||
if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT ||
|
||||
fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) ||
|
||||
fc >= 0xe000) == (Lctype == OP_NOTPROP))
|
||||
fc >= 0xe000) == notmatch)
|
||||
break;
|
||||
Feptr += len;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_BIDICL:
|
||||
for (i = Lmin; i < Lmax; i++)
|
||||
{
|
||||
int len = 1;
|
||||
if (Feptr >= mb->end_subject)
|
||||
{
|
||||
SCHECK_PARTIAL();
|
||||
break;
|
||||
}
|
||||
GETCHARLENTEST(fc, Feptr, len);
|
||||
if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch) break;
|
||||
Feptr+= len;
|
||||
}
|
||||
break;
|
||||
|
||||
case PT_BOOL:
|
||||
for (i = Lmin; i < Lmax; i++)
|
||||
{
|
||||
BOOL ok;
|
||||
const ucd_record *prop;
|
||||
int len = 1;
|
||||
if (Feptr >= mb->end_subject)
|
||||
{
|
||||
SCHECK_PARTIAL();
|
||||
break;
|
||||
}
|
||||
GETCHARLENTEST(fc, Feptr, len);
|
||||
prop = GET_UCD(fc);
|
||||
ok = MAPBIT(PRIV(ucd_boolprop_sets) +
|
||||
UCD_BPROPS_PROP(prop), Lpropvalue) != 0;
|
||||
if (ok == notmatch) break;
|
||||
Feptr+= len;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
return PCRE2_ERROR_INTERNAL;
|
||||
}
|
||||
@@ -6066,7 +6259,7 @@ switch (Freturn_id)
|
||||
LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206)
|
||||
LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213)
|
||||
LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220)
|
||||
LBL(221) LBL(222)
|
||||
LBL(221) LBL(222) LBL(223) LBL(224) LBL(225)
|
||||
#endif
|
||||
|
||||
default:
|
||||
@@ -6129,8 +6322,8 @@ PCRE2_UCHAR req_cu2 = 0;
|
||||
PCRE2_SPTR bumpalong_limit;
|
||||
PCRE2_SPTR end_subject;
|
||||
PCRE2_SPTR true_end_subject;
|
||||
PCRE2_SPTR start_match = subject + start_offset;
|
||||
PCRE2_SPTR req_cu_ptr = start_match - 1;
|
||||
PCRE2_SPTR start_match;
|
||||
PCRE2_SPTR req_cu_ptr;
|
||||
PCRE2_SPTR start_partial;
|
||||
PCRE2_SPTR match_partial;
|
||||
|
||||
@@ -6170,9 +6363,18 @@ PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)]
|
||||
PCRE2_KEEP_UNINITIALIZED;
|
||||
mb->stack_frames = (heapframe *)stack_frames_vector;
|
||||
|
||||
/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated
|
||||
subject string. */
|
||||
/* Recognize NULL, length 0 as an empty string. */
|
||||
|
||||
if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
|
||||
|
||||
/* Plausibility checks */
|
||||
|
||||
if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
|
||||
if (code == NULL || subject == NULL || match_data == NULL)
|
||||
return PCRE2_ERROR_NULL;
|
||||
|
||||
start_match = subject + start_offset;
|
||||
req_cu_ptr = start_match - 1;
|
||||
if (length == PCRE2_ZERO_TERMINATED)
|
||||
{
|
||||
length = PRIV(strlen)(subject);
|
||||
@@ -6180,11 +6382,6 @@ if (length == PCRE2_ZERO_TERMINATED)
|
||||
}
|
||||
true_end_subject = end_subject = subject + length;
|
||||
|
||||
/* Plausibility checks */
|
||||
|
||||
if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
|
||||
if (code == NULL || subject == NULL || match_data == NULL)
|
||||
return PCRE2_ERROR_NULL;
|
||||
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
|
||||
|
||||
/* Check that the first field in the block is the magic number. */
|
||||
@@ -6482,7 +6679,7 @@ if (utf &&
|
||||
/* If the end precedes start_match, it means there is invalid UTF in the
|
||||
extra code units we reversed over because of a lookbehind. Advance past the
|
||||
first bad code unit, and then skip invalid character starting code units in
|
||||
8-bit and 16-bit modes, and try again. */
|
||||
8-bit and 16-bit modes, and try again with the original end point. */
|
||||
|
||||
if (end_subject < start_match)
|
||||
{
|
||||
@@ -6491,6 +6688,7 @@ if (utf &&
|
||||
while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject))
|
||||
mb->check_subject++;
|
||||
#endif
|
||||
end_subject = true_end_subject;
|
||||
}
|
||||
|
||||
/* Otherwise, set the not end of line option, and do the match. */
|
||||
@@ -6601,10 +6799,16 @@ the pattern. It is not used at all if there are no capturing parentheses.
|
||||
|
||||
The last of these is changed within the match() function if the frame vector
|
||||
has to be expanded. We therefore put it into the match block so that it is
|
||||
correct when calling match() more than once for non-anchored patterns. */
|
||||
correct when calling match() more than once for non-anchored patterns.
|
||||
|
||||
frame_size = offsetof(heapframe, ovector) +
|
||||
re->top_bracket * 2 * sizeof(PCRE2_SIZE);
|
||||
We must also pad frame_size for alignment to ensure subsequent frames are as
|
||||
aligned as heapframe. Whilst ovector is word-aligned due to being a PCRE2_SIZE
|
||||
array, that does not guarantee it is suitably aligned for pointers, as some
|
||||
architectures have pointers that are larger than a size_t. */
|
||||
|
||||
frame_size = (offsetof(heapframe, ovector) +
|
||||
re->top_bracket * 2 * sizeof(PCRE2_SIZE) + HEAPFRAME_ALIGNMENT - 1) &
|
||||
~(HEAPFRAME_ALIGNMENT - 1);
|
||||
|
||||
/* Limits set in the pattern override the match context only if they are
|
||||
smaller. */
|
||||
@@ -6648,7 +6852,7 @@ mb->match_frames_top =
|
||||
to avoid uninitialized memory read errors when it is copied to a new frame. */
|
||||
|
||||
memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
|
||||
re->top_bracket * 2 * sizeof(PCRE2_SIZE));
|
||||
frame_size - offsetof(heapframe, ovector));
|
||||
|
||||
/* Pointers to the individual character tables */
|
||||
|
||||
|
||||
Reference in New Issue
Block a user