Index: pcre/configure.ac =================================================================== --- pcre/configure.ac (revision 1553) +++ pcre/configure.ac (working copy) @@ -9,9 +9,9 @@ dnl be defined as -RC2, for example. For real releases, it should be empty. m4_define(pcre_major, [8]) -m4_define(pcre_minor, [37]) -m4_define(pcre_prerelease, []) -m4_define(pcre_date, [2015-04-28]) +m4_define(pcre_minor, [38]) +m4_define(pcre_prerelease, [-RC1]) +m4_define(pcre_date, [2015-05-03]) # NOTE: The CMakeLists.txt file searches for the above variables in the first # 50 lines of this file. Please update that if the variables above are moved. Index: pcre/sljit/sljitConfig.h =================================================================== --- pcre/sljit/sljitConfig.h (revision 1553) +++ pcre/sljit/sljitConfig.h (working copy) @@ -96,6 +96,15 @@ #define SLJIT_EXECUTABLE_ALLOCATOR 1 #endif +/* Force cdecl calling convention even if a better calling + convention (e.g. fastcall) is supported by the C compiler. + If this option is enabled, C functions without + SLJIT_CALL can also be called from JIT code. */ +#ifndef SLJIT_USE_CDECL_CALLING_CONVENTION +/* Disabled by default */ +#define SLJIT_USE_CDECL_CALLING_CONVENTION 0 +#endif + /* Return with error when an invalid argument is passed. */ #ifndef SLJIT_ARGUMENT_CHECKS /* Disabled by default */ Index: pcre/sljit/sljitLir.c =================================================================== --- pcre/sljit/sljitLir.c (revision 1553) +++ pcre/sljit/sljitLir.c (working copy) @@ -845,8 +845,8 @@ } static SLJIT_CONST char* op0_names[] = { - (char*)"breakpoint", (char*)"nop", - (char*)"lumul", (char*)"lsmul", (char*)"ludiv", (char*)"lsdiv", + (char*)"breakpoint", (char*)"nop", (char*)"lumul", (char*)"lsmul", + (char*)"udivmod", (char*)"sdivmod", (char*)"udivi", (char*)"sdivi" }; static SLJIT_CONST char* op1_names[] = { @@ -1036,7 +1036,7 @@ { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT((op >= SLJIT_BREAKPOINT && op <= SLJIT_LSMUL) - || ((op & ~SLJIT_INT_OP) >= SLJIT_LUDIV && (op & ~SLJIT_INT_OP) <= SLJIT_LSDIV)); + || ((op & ~SLJIT_INT_OP) >= SLJIT_UDIVMOD && (op & ~SLJIT_INT_OP) <= SLJIT_SDIVI)); CHECK_ARGUMENT(op < SLJIT_LUMUL || compiler->scratches >= 2); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) @@ -1447,6 +1447,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset) { + SLJIT_UNUSED_ARG(offset); + #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) FUNCTION_CHECK_DST(dst, dstw); #endif @@ -1462,6 +1464,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value) { + SLJIT_UNUSED_ARG(init_value); + #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) FUNCTION_CHECK_DST(dst, dstw); #endif Index: pcre/sljit/sljitNativeMIPS_common.c =================================================================== --- pcre/sljit/sljitNativeMIPS_common.c (revision 1553) +++ pcre/sljit/sljitNativeMIPS_common.c (working copy) @@ -1053,8 +1053,11 @@ #endif FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0))); return push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1)); - case SLJIT_LUDIV: - case SLJIT_LSDIV: + case SLJIT_UDIVMOD: + case SLJIT_SDIVMOD: + case SLJIT_UDIVI: + case SLJIT_SDIVI: + SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments); #if !(defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); @@ -1062,15 +1065,15 @@ #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) if (int_op) - FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_UDIVI ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); else - FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DDIVU : DDIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_UDIVI ? DDIVU : DDIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); #else - FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_UDIVI ? DIVU : DIV) | S(SLJIT_R0) | T(SLJIT_R1), MOVABLE_INS)); #endif FAIL_IF(push_inst(compiler, MFLO | D(SLJIT_R0), DR(SLJIT_R0))); - return push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1)); + return (op >= SLJIT_UDIVI) ? SLJIT_SUCCESS : push_inst(compiler, MFHI | D(SLJIT_R1), DR(SLJIT_R1)); } return SLJIT_SUCCESS; Index: pcre/sljit/sljitNativeSPARC_common.c =================================================================== --- pcre/sljit/sljitNativeSPARC_common.c (revision 1553) +++ pcre/sljit/sljitNativeSPARC_common.c (working copy) @@ -777,20 +777,25 @@ #else #error "Implementation required" #endif - case SLJIT_LUDIV: - case SLJIT_LSDIV: + case SLJIT_UDIVMOD: + case SLJIT_SDIVMOD: + case SLJIT_UDIVI: + case SLJIT_SDIVI: + SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments); #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) - if (op == SLJIT_LUDIV) + if ((op | 0x2) == SLJIT_UDIVI) FAIL_IF(push_inst(compiler, WRY | S1(0), MOVABLE_INS)); else { FAIL_IF(push_inst(compiler, SRA | D(TMP_REG1) | S1(SLJIT_R0) | IMM(31), DR(TMP_REG1))); FAIL_IF(push_inst(compiler, WRY | S1(TMP_REG1), MOVABLE_INS)); } - FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_R0), DR(TMP_REG2))); - FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? UDIV : SDIV) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0))); + if (op <= SLJIT_SDIVMOD) + FAIL_IF(push_inst(compiler, OR | D(TMP_REG2) | S1(0) | S2(SLJIT_R0), DR(TMP_REG2))); + FAIL_IF(push_inst(compiler, ((op | 0x2) == SLJIT_UDIVI ? UDIV : SDIV) | D(SLJIT_R0) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R0))); + if (op >= SLJIT_UDIVI) + return SLJIT_SUCCESS; FAIL_IF(push_inst(compiler, SMUL | D(SLJIT_R1) | S1(SLJIT_R0) | S2(SLJIT_R1), DR(SLJIT_R1))); - FAIL_IF(push_inst(compiler, SUB | D(SLJIT_R1) | S1(TMP_REG2) | S2(SLJIT_R1), DR(SLJIT_R1))); - return SLJIT_SUCCESS; + return push_inst(compiler, SUB | D(SLJIT_R1) | S1(TMP_REG2) | S2(SLJIT_R1), DR(SLJIT_R1)); #else #error "Implementation required" #endif Index: pcre/sljit/sljitNativeARM_32.c =================================================================== --- pcre/sljit/sljitNativeARM_32.c (revision 1553) +++ pcre/sljit/sljitNativeARM_32.c (working copy) @@ -1833,18 +1833,33 @@ | (reg_map[SLJIT_R0] << 8) | reg_map[TMP_REG1]); #endif - case SLJIT_LUDIV: - case SLJIT_LSDIV: - if (compiler->scratches >= 3) + case SLJIT_UDIVMOD: + case SLJIT_SDIVMOD: + case SLJIT_UDIVI: + case SLJIT_SDIVI: + SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments); + SLJIT_COMPILE_ASSERT(reg_map[2] == 1 && reg_map[3] == 2, bad_register_mapping); + + if ((op >= SLJIT_UDIVI) && (compiler->scratches >= 3)) { FAIL_IF(push_inst(compiler, 0xe52d2008 /* str r2, [sp, #-8]! */)); + FAIL_IF(push_inst(compiler, 0xe58d1004 /* str r1, [sp, #4] */)); + } + else if ((op >= SLJIT_UDIVI) || (compiler->scratches >= 3)) + FAIL_IF(push_inst(compiler, 0xe52d0008 | (op >= SLJIT_UDIVI ? 0x1000 : 0x2000) /* str r1/r2, [sp, #-8]! */)); + #if defined(__GNUC__) FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, - (op == SLJIT_LUDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); + ((op | 0x2) == SLJIT_UDIVI ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); #else #error "Software divmod functions are needed" #endif - if (compiler->scratches >= 3) - return push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */); + + if ((op >= SLJIT_UDIVI) && (compiler->scratches >= 3)) { + FAIL_IF(push_inst(compiler, 0xe59d1004 /* ldr r1, [sp, #4] */)); + FAIL_IF(push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */)); + } + else if ((op >= SLJIT_UDIVI) || (compiler->scratches >= 3)) + return push_inst(compiler, 0xe49d0008 | (op >= SLJIT_UDIVI ? 0x1000 : 0x2000) /* ldr r1/r2, [sp], #8 */); return SLJIT_SUCCESS; } Index: pcre/sljit/sljitLir.h =================================================================== --- pcre/sljit/sljitLir.h (revision 1553) +++ pcre/sljit/sljitLir.h (working copy) @@ -687,7 +687,7 @@ #define SLJIT_OP0_BASE 0 /* Flags: - (never set any flags) - Note: breakpoint instruction is not supported by all architectures (namely ppc) + Note: breakpoint instruction is not supported by all architectures (e.g. ppc) It falls back to SLJIT_NOP in those cases. */ #define SLJIT_BREAKPOINT (SLJIT_OP0_BASE + 0) /* Flags: - (never set any flags) @@ -696,24 +696,42 @@ #define SLJIT_NOP (SLJIT_OP0_BASE + 1) /* Flags: - (may destroy flags) Unsigned multiplication of SLJIT_R0 and SLJIT_R1. - Result goes to SLJIT_R1:SLJIT_R0 (high:low) word */ + Result is placed into SLJIT_R1:SLJIT_R0 (high:low) word */ #define SLJIT_LUMUL (SLJIT_OP0_BASE + 2) /* Flags: - (may destroy flags) Signed multiplication of SLJIT_R0 and SLJIT_R1. - Result goes to SLJIT_R1:SLJIT_R0 (high:low) word */ + Result is placed into SLJIT_R1:SLJIT_R0 (high:low) word */ #define SLJIT_LSMUL (SLJIT_OP0_BASE + 3) /* Flags: I - (may destroy flags) Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1. - The result is placed in SLJIT_R0 and the remainder goes to SLJIT_R1. - Note: if SLJIT_R1 contains 0, the behaviour is undefined. */ -#define SLJIT_LUDIV (SLJIT_OP0_BASE + 4) -#define SLJIT_ILUDIV (SLJIT_LUDIV | SLJIT_INT_OP) + The result is placed into SLJIT_R0 and the remainder into SLJIT_R1. + Note: if SLJIT_R1 is 0, the behaviour is undefined. */ +#define SLJIT_UDIVMOD (SLJIT_OP0_BASE + 4) +#define SLJIT_IUDIVMOD (SLJIT_UDIVMOD | SLJIT_INT_OP) /* Flags: I - (may destroy flags) Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1. - The result is placed in SLJIT_R0 and the remainder goes to SLJIT_R1. - Note: if SLJIT_R1 contains 0, the behaviour is undefined. */ -#define SLJIT_LSDIV (SLJIT_OP0_BASE + 5) -#define SLJIT_ILSDIV (SLJIT_LSDIV | SLJIT_INT_OP) + The result is placed into SLJIT_R0 and the remainder into SLJIT_R1. + Note: if SLJIT_R1 is 0, the behaviour is undefined. + Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00), + the behaviour is undefined. */ +#define SLJIT_SDIVMOD (SLJIT_OP0_BASE + 5) +#define SLJIT_ISDIVMOD (SLJIT_SDIVMOD | SLJIT_INT_OP) +/* Flags: I - (may destroy flags) + Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1. + The result is placed into SLJIT_R0. SLJIT_R1 preserves its value. + Note: if SLJIT_R1 is 0, the behaviour is undefined. + Note: SLJIT_SDIV is single precision divide. */ +#define SLJIT_UDIVI (SLJIT_OP0_BASE + 6) +#define SLJIT_IUDIVI (SLJIT_UDIVI | SLJIT_INT_OP) +/* Flags: I - (may destroy flags) + Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1. + The result is placed into SLJIT_R0. SLJIT_R1 preserves its value. + Note: if SLJIT_R1 is 0, the behaviour is undefined. + Note: if SLJIT_R1 is -1 and SLJIT_R0 is integer min (0x800..00), + the behaviour is undefined. + Note: SLJIT_SDIV is single precision divide. */ +#define SLJIT_SDIVI (SLJIT_OP0_BASE + 7) +#define SLJIT_ISDIVI (SLJIT_SDIVI | SLJIT_INT_OP) SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op); Index: pcre/sljit/sljitNativeARM_T2_32.c =================================================================== --- pcre/sljit/sljitNativeARM_T2_32.c (revision 1553) +++ pcre/sljit/sljitNativeARM_T2_32.c (working copy) @@ -1239,6 +1239,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op) { + sljit_sw saved_reg_list[3]; + sljit_sw saved_reg_count; + CHECK_ERROR(); CHECK(check_sljit_emit_op0(compiler, op)); @@ -1255,24 +1258,53 @@ | (reg_map[SLJIT_R0] << 12) | (reg_map[SLJIT_R0] << 16) | reg_map[SLJIT_R1]); - case SLJIT_LUDIV: - case SLJIT_LSDIV: - if (compiler->scratches >= 4) { - FAIL_IF(push_inst32(compiler, 0xf84d2d04 /* str r2, [sp, #-4]! */)); - FAIL_IF(push_inst32(compiler, 0xf84dcd04 /* str ip, [sp, #-4]! */)); - } else if (compiler->scratches >= 3) - FAIL_IF(push_inst32(compiler, 0xf84d2d08 /* str r2, [sp, #-8]! */)); + case SLJIT_UDIVMOD: + case SLJIT_SDIVMOD: + case SLJIT_UDIVI: + case SLJIT_SDIVI: + SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments); + SLJIT_COMPILE_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 12, bad_register_mapping); + + saved_reg_count = 0; + if (compiler->scratches >= 4) + saved_reg_list[saved_reg_count++] = 12; + if (compiler->scratches >= 3) + saved_reg_list[saved_reg_count++] = 2; + if (op >= SLJIT_UDIVI) + saved_reg_list[saved_reg_count++] = 1; + + if (saved_reg_count > 0) { + FAIL_IF(push_inst32(compiler, 0xf84d0d00 | (saved_reg_count >= 3 ? 16 : 8) + | (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */)); + if (saved_reg_count >= 2) { + SLJIT_ASSERT(saved_reg_list[1] < 8); + FAIL_IF(push_inst16(compiler, 0x9001 | (saved_reg_list[1] << 8) /* str rX, [sp, #4] */)); + } + if (saved_reg_count >= 3) { + SLJIT_ASSERT(saved_reg_list[2] < 8); + FAIL_IF(push_inst16(compiler, 0x9002 | (saved_reg_list[2] << 8) /* str rX, [sp, #8] */)); + } + } + #if defined(__GNUC__) FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, - (op == SLJIT_LUDIV ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); + ((op | 0x2) == SLJIT_UDIVI ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod)))); #else #error "Software divmod functions are needed" #endif - if (compiler->scratches >= 4) { - FAIL_IF(push_inst32(compiler, 0xf85dcb04 /* ldr ip, [sp], #4 */)); - return push_inst32(compiler, 0xf85d2b04 /* ldr r2, [sp], #4 */); - } else if (compiler->scratches >= 3) - return push_inst32(compiler, 0xf85d2b08 /* ldr r2, [sp], #8 */); + + if (saved_reg_count > 0) { + if (saved_reg_count >= 3) { + SLJIT_ASSERT(saved_reg_list[2] < 8); + FAIL_IF(push_inst16(compiler, 0x9802 | (saved_reg_list[2] << 8) /* ldr rX, [sp, #8] */)); + } + if (saved_reg_count >= 2) { + SLJIT_ASSERT(saved_reg_list[1] < 8); + FAIL_IF(push_inst16(compiler, 0x9801 | (saved_reg_list[1] << 8) /* ldr rX, [sp, #4] */)); + } + return push_inst32(compiler, 0xf85d0b00 | (saved_reg_count >= 3 ? 16 : 8) + | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */); + } return SLJIT_SUCCESS; } Index: pcre/sljit/sljitNativeARM_64.c =================================================================== --- pcre/sljit/sljitNativeARM_64.c (revision 1553) +++ pcre/sljit/sljitNativeARM_64.c (working copy) @@ -1087,14 +1087,20 @@ saved_regs_size += sizeof(sljit_sw); } local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET; - FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10))); + if (saved_regs_size > 0) + FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10))); } tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; prev = -1; for (i = SLJIT_S0; i >= tmp; i--) { if (prev == -1) { - prev = i; + if (!(offs & (1 << 15))) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5))); + offs += 1 << 15; continue; } FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); @@ -1104,7 +1110,12 @@ for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { if (prev == -1) { - prev = i; + if (!(offs & (1 << 15))) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5))); + offs += 1 << 15; continue; } FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); @@ -1112,8 +1123,7 @@ prev = -1; } - if (prev != -1) - FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(TMP_SP) | (offs >> 5))); + SLJIT_ASSERT(prev == -1); if (compiler->local_size > (63 * sizeof(sljit_sw))) { /* The local_size is already adjusted by the saved registers. */ @@ -1188,7 +1198,12 @@ prev = -1; for (i = SLJIT_S0; i >= tmp; i--) { if (prev == -1) { - prev = i; + if (!(offs & (1 << 15))) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5))); + offs += 1 << 15; continue; } FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); @@ -1198,7 +1213,12 @@ for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) { if (prev == -1) { - prev = i; + if (!(offs & (1 << 15))) { + prev = i; + continue; + } + FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5))); + offs += 1 << 15; continue; } FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs)); @@ -1206,13 +1226,12 @@ prev = -1; } - if (prev != -1) - FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(TMP_SP) | (offs >> 5))); + SLJIT_ASSERT(prev == -1); if (compiler->local_size <= (63 * sizeof(sljit_sw))) { FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR) | RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15))); - } else { + } else if (saved_regs_size > 0) { FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10))); } @@ -1242,12 +1261,15 @@ FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); return push_inst(compiler, (op == SLJIT_LUMUL ? UMULH : SMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); - case SLJIT_LUDIV: - case SLJIT_LSDIV: + case SLJIT_UDIVMOD: + case SLJIT_SDIVMOD: FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); - FAIL_IF(push_inst(compiler, ((op == SLJIT_LUDIV ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1))); + FAIL_IF(push_inst(compiler, ((op == SLJIT_UDIVMOD ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1))); FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); + case SLJIT_UDIVI: + case SLJIT_SDIVI: + return push_inst(compiler, ((op == SLJIT_UDIVI ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)); } return SLJIT_SUCCESS; Index: pcre/sljit/sljitNativePPC_common.c =================================================================== --- pcre/sljit/sljitNativePPC_common.c (revision 1553) +++ pcre/sljit/sljitNativePPC_common.c (working copy) @@ -1267,22 +1267,23 @@ FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); return push_inst(compiler, (op == SLJIT_LUMUL ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1)); #endif - case SLJIT_LUDIV: - case SLJIT_LSDIV: + case SLJIT_UDIVMOD: + case SLJIT_SDIVMOD: FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0))); #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (int_op) { - FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DIVWU : DIVW) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); - FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); - } else { - FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DIVDU : DIVD) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); - FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); - } - return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1)); + FAIL_IF(push_inst(compiler, (int_op ? (op == SLJIT_UDIVMOD ? DIVWU : DIVW) : (op == SLJIT_UDIVMOD ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1))); + FAIL_IF(push_inst(compiler, (int_op ? MULLW : MULLD) | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); #else - FAIL_IF(push_inst(compiler, (op == SLJIT_LUDIV ? DIVWU : DIVW) | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1))); + FAIL_IF(push_inst(compiler, (op == SLJIT_UDIVMOD ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1))); FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1))); +#endif return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1)); + case SLJIT_UDIVI: + case SLJIT_SDIVI: +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + return push_inst(compiler, (int_op ? (op == SLJIT_UDIVI ? DIVWU : DIVW) : (op == SLJIT_UDIVI ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)); +#else + return push_inst(compiler, (op == SLJIT_UDIVI ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)); #endif } Index: pcre/sljit/sljitNativeX86_common.c =================================================================== --- pcre/sljit/sljitNativeX86_common.c (revision 1553) +++ pcre/sljit/sljitNativeX86_common.c (working copy) @@ -742,8 +742,10 @@ break; case SLJIT_LUMUL: case SLJIT_LSMUL: - case SLJIT_LUDIV: - case SLJIT_LSDIV: + case SLJIT_UDIVMOD: + case SLJIT_SDIVMOD: + case SLJIT_UDIVI: + case SLJIT_SDIVI: compiler->flags_saved = 0; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) #ifdef _WIN64 @@ -761,9 +763,10 @@ #endif compiler->mode32 = op & SLJIT_INT_OP; #endif + SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments); op = GET_OPCODE(op); - if (op == SLJIT_LUDIV) { + if ((op | 0x2) == SLJIT_UDIVI) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0); @@ -774,7 +777,7 @@ *inst = XOR_r_rm; } - if (op == SLJIT_LSDIV) { + if ((op | 0x2) == SLJIT_SDIVI) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64) EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0); #endif @@ -805,10 +808,10 @@ FAIL_IF(!inst); INC_SIZE(2); *inst++ = GROUP_F7; - *inst = MOD_REG | ((op >= SLJIT_LUDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]); + *inst = MOD_REG | ((op >= SLJIT_UDIVMOD) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]); #else #ifdef _WIN64 - size = (!compiler->mode32 || op >= SLJIT_LUDIV) ? 3 : 2; + size = (!compiler->mode32 || op >= SLJIT_UDIVMOD) ? 3 : 2; #else size = (!compiler->mode32) ? 3 : 2; #endif @@ -817,11 +820,11 @@ INC_SIZE(size); #ifdef _WIN64 if (!compiler->mode32) - *inst++ = REX_W | ((op >= SLJIT_LUDIV) ? REX_B : 0); - else if (op >= SLJIT_LUDIV) + *inst++ = REX_W | ((op >= SLJIT_UDIVMOD) ? REX_B : 0); + else if (op >= SLJIT_UDIVMOD) *inst++ = REX_B; *inst++ = GROUP_F7; - *inst = MOD_REG | ((op >= SLJIT_LUDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]); + *inst = MOD_REG | ((op >= SLJIT_UDIVMOD) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]); #else if (!compiler->mode32) *inst++ = REX_W; @@ -836,15 +839,21 @@ case SLJIT_LSMUL: *inst |= IMUL; break; - case SLJIT_LUDIV: + case SLJIT_UDIVMOD: + case SLJIT_UDIVI: *inst |= DIV; break; - case SLJIT_LSDIV: + case SLJIT_SDIVMOD: + case SLJIT_SDIVI: *inst |= IDIV; break; } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64) - EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); + if (op <= SLJIT_SDIVMOD) + EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); +#else + if (op >= SLJIT_UDIVI) + EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0); #endif break; } @@ -1905,60 +1914,62 @@ return SLJIT_SUCCESS; } - if (FAST_IS_REG(src1)) { + if (!(src1 & SLJIT_IMM)) { if (src2 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (IS_HALFWORD(src2w) || compiler->mode32) { - inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0); + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); FAIL_IF(!inst); *inst = GROUP_F7; } else { FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); - inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0); + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, src1w); FAIL_IF(!inst); *inst = TEST_rm_r; } #else - inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0); + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w); FAIL_IF(!inst); *inst = GROUP_F7; #endif + return SLJIT_SUCCESS; } - else { + else if (FAST_IS_REG(src1)) { inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w); FAIL_IF(!inst); *inst = TEST_rm_r; + return SLJIT_SUCCESS; } - return SLJIT_SUCCESS; } - if (FAST_IS_REG(src2)) { + if (!(src2 & SLJIT_IMM)) { if (src1 & SLJIT_IMM) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (IS_HALFWORD(src1w) || compiler->mode32) { - inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0); + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w); FAIL_IF(!inst); *inst = GROUP_F7; } else { FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w)); - inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0); + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, src2w); FAIL_IF(!inst); *inst = TEST_rm_r; } #else - inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0); + inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w); FAIL_IF(!inst); *inst = GROUP_F7; #endif + return SLJIT_SUCCESS; } - else { + else if (FAST_IS_REG(src2)) { inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w); FAIL_IF(!inst); *inst = TEST_rm_r; + return SLJIT_SUCCESS; } - return SLJIT_SUCCESS; } EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); Index: pcre/sljit/sljitConfigInternal.h =================================================================== --- pcre/sljit/sljitConfigInternal.h (revision 1553) +++ pcre/sljit/sljitConfigInternal.h (working copy) @@ -468,7 +468,12 @@ #ifndef SLJIT_CALL -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +#if (defined SLJIT_USE_CDECL_CALLING_CONVENTION && SLJIT_USE_CDECL_CALLING_CONVENTION) + +/* Force cdecl. */ +#define SLJIT_CALL + +#elif (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) #if defined(__GNUC__) && !defined(__APPLE__) Index: pcre/ChangeLog =================================================================== --- pcre/ChangeLog (revision 1553) +++ pcre/ChangeLog (working copy) @@ -1,6 +1,46 @@ ChangeLog for PCRE ------------------ +Note that the PCRE 8.xx series (PCRE1) is now in a bugfix-only state. All +development is happening in the PCRE2 10.xx series. + +Version 8.38 xx-xxx-xxxx +------------------------ + +1. If a group that contained a recursive back reference also contained a + forward reference subroutine call followed by a non-forward-reference + subroutine call, for example /.((?2)(?R)\1)()/, pcre2_compile() failed to + compile correct code, leading to undefined behaviour or an internally + detected error. This bug was discovered by the LLVM fuzzer. + +2. Quantification of certain items (e.g. atomic back references) could cause + incorrect code to be compiled when recursive forward references were + involved. For example, in this pattern: /(?1)()((((((\1++))\x85)+)|))/. + This bug was discovered by the LLVM fuzzer. + +3. A repeated conditional group whose condition was a reference by name caused + a buffer overflow if there was more than one group with the given name. + This bug was discovered by the LLVM fuzzer. + +4. A recursive back reference by name within a group that had the same name as + another group caused a buffer overflow. For example: + /(?J)(?'d'(?'d'\g{d}))/. This bug was discovered by the LLVM fuzzer. + +5. A forward reference by name to a group whose number is the same as the + current group, for example in this pattern: /(?|(\k'Pm')|(?'Pm'))/, caused + a buffer overflow at compile time. This bug was discovered by the LLVM + fuzzer. + +6. A lookbehind assertion within a set of mutually recursive subpatterns could + provoke a buffer overflow. This bug was discovered by the LLVM fuzzer. + +7. Another buffer overflow bug involved duplicate named groups with a + reference between their definition, with a group that reset capture + numbers, for example: /(?J:(?|(?'R')(\k'R')|((?'R'))))/. This has been + fixed by always allowing for more memory, even if not needed. (A proper fix + is implemented in PCRE2, but it involves more refactoring.) + + Version 8.37 28-April-2015 -------------------------- Index: pcre/testdata/testoutput1 =================================================================== --- pcre/testdata/testoutput1 (revision 1553) +++ pcre/testdata/testoutput1 (working copy) @@ -9429,4 +9429,9 @@ 0: aaaaaaaaa 1: a +"(?|(\k'Pm')|(?'Pm'))" + abcd + 0: + 1: + /-- End of testinput1 --/ Index: pcre/testdata/testoutput2 =================================================================== --- pcre/testdata/testoutput2 (revision 1553) +++ pcre/testdata/testoutput2 (working copy) @@ -14423,4 +14423,42 @@ /((?2){73}(?2))((?1))/ +/.((?2)(?R)\1)()/BZ +------------------------------------------------------------------ + Bra + Any + Once + CBra 1 + Recurse + Recurse + \1 + Ket + Ket + CBra 2 + Ket + Ket + End +------------------------------------------------------------------ + +/(?1)()((((((\1++))\x85)+)|))/ + +/(\9*+(?2);\3++()2|)++{/ +Failed: reference to non-existent subpattern at offset 22 + +/\V\x85\9*+((?2)\3++()2)*:2/ +Failed: reference to non-existent subpattern at offset 26 + +/(((?(R)){0,2}) (?''((?'R')((?'R')))))/J + +/(((?(X)){0,2}) (?''((?'X')((?'X')))))/J + +/(((?(R)){0,2}) (?''((?'X')((?'R')))))/ + +"(?J)(?'d'(?'d'\g{d}))" + +".*?\h.+.\.+\R*?\xd(?i)(?=!(?=b`b`b`\`b\xa9b!)`\a`bbbbbbbbbbbbb`bbbbbbbbbbbb*R\x85bbbbbbb\C?{((?2)(?))(( +\H){8(?<=(?1){29}\xa8bbbb\x16\xd\xc6^($(?a)...(?P=a)bbb(?P>a)d/BM -Memory allocation (code space): 61 +Memory allocation (code space): 77 ------------------------------------------------------------------ 0 24 Bra 2 5 CBra 1 @@ -748,4 +748,21 @@ 22 End ------------------------------------------------------------------ +/.((?2)(?R)\1)()/B +------------------------------------------------------------------ + 0 23 Bra + 2 Any + 3 13 Once + 5 9 CBra 1 + 8 18 Recurse + 10 0 Recurse + 12 \1 + 14 9 Ket + 16 13 Ket + 18 3 CBra 2 + 21 3 Ket + 23 23 Ket + 25 End +------------------------------------------------------------------ + /-- End of testinput11 --/ Index: pcre/testdata/testinput11 =================================================================== --- pcre/testdata/testinput11 (revision 1553) +++ pcre/testdata/testinput11 (working copy) @@ -136,4 +136,6 @@ /((?+1)(\1))/B +/.((?2)(?R)\1)()/B + /-- End of testinput11 --/ Index: pcre/testdata/testoutput11-8 =================================================================== --- pcre/testdata/testoutput11-8 (revision 1553) +++ pcre/testdata/testoutput11-8 (working copy) @@ -231,7 +231,7 @@ ------------------------------------------------------------------ /(?Pa)...(?P=a)bbb(?P>a)d/BM -Memory allocation (code space): 38 +Memory allocation (code space): 50 ------------------------------------------------------------------ 0 30 Bra 3 7 CBra 1 @@ -748,4 +748,21 @@ 34 End ------------------------------------------------------------------ +/.((?2)(?R)\1)()/B +------------------------------------------------------------------ + 0 35 Bra + 3 Any + 4 20 Once + 7 14 CBra 1 + 12 27 Recurse + 15 0 Recurse + 18 \1 + 21 14 Ket + 24 20 Ket + 27 5 CBra 2 + 32 5 Ket + 35 35 Ket + 38 End +------------------------------------------------------------------ + /-- End of testinput11 --/ Index: pcre/testdata/testinput1 =================================================================== --- pcre/testdata/testinput1 (revision 1553) +++ pcre/testdata/testinput1 (working copy) @@ -5730,4 +5730,7 @@ "(?1)(?#?'){8}(a)" baaaaaaaaac +"(?|(\k'Pm')|(?'Pm'))" + abcd + /-- End of testinput1 --/ Index: pcre/testdata/testinput2 =================================================================== --- pcre/testdata/testinput2 (revision 1553) +++ pcre/testdata/testinput2 (working copy) @@ -4152,4 +4152,25 @@ /((?2){73}(?2))((?1))/ +/.((?2)(?R)\1)()/BZ + +/(?1)()((((((\1++))\x85)+)|))/ + +/(\9*+(?2);\3++()2|)++{/ + +/\V\x85\9*+((?2)\3++()2)*:2/ + +/(((?(R)){0,2}) (?''((?'R')((?'R')))))/J + +/(((?(X)){0,2}) (?''((?'X')((?'X')))))/J + +/(((?(R)){0,2}) (?''((?'X')((?'R')))))/ + +"(?J)(?'d'(?'d'\g{d}))" + +".*?\h.+.\.+\R*?\xd(?i)(?=!(?=b`b`b`\`b\xa9b!)`\a`bbbbbbbbbbbbb`bbbbbbbbbbbb*R\x85bbbbbbb\C?{((?2)(?))(( +\H){8(?<=(?1){29}\xa8bbbb\x16\xd\xc6^($(?a)...(?P=a)bbb(?P>a)d/BM -Memory allocation (code space): 125 +Memory allocation (code space): 157 ------------------------------------------------------------------ 0 24 Bra 2 5 CBra 1 @@ -748,4 +748,21 @@ 22 End ------------------------------------------------------------------ +/.((?2)(?R)\1)()/B +------------------------------------------------------------------ + 0 23 Bra + 2 Any + 3 13 Once + 5 9 CBra 1 + 8 18 Recurse + 10 0 Recurse + 12 \1 + 14 9 Ket + 16 13 Ket + 18 3 CBra 2 + 21 3 Ket + 23 23 Ket + 25 End +------------------------------------------------------------------ + /-- End of testinput11 --/ Index: pcre/pcre_compile.c =================================================================== --- pcre/pcre_compile.c (revision 1553) +++ pcre/pcre_compile.c (working copy) @@ -1799,7 +1799,7 @@ case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: do cc += GET(cc, 1); while (*cc == OP_ALT); - cc += PRIV(OP_lengths)[*cc]; + cc += 1 + LINK_SIZE; break; /* Skip over things that don't match chars */ @@ -3985,11 +3985,12 @@ is called, the partially compiled regex must be temporarily terminated with OP_END. -This function has been extended with the possibility of forward references for -recursions and subroutine calls. It must also check the list of such references -for the group we are dealing with. If it finds that one of the recursions in -the current group is on this list, it adjusts the offset in the list, not the -value in the reference (which is a group number). +This function has been extended to cope with forward references for recursions +and subroutine calls. It must check the list of such references for the +group we are dealing with. If it finds that one of the recursions in the +current group is on this list, it does not adjust the value in the reference +(which is a group number). After the group has been scanned, all the offsets in +the forward reference list for the group are adjusted. Arguments: group points to the start of the group @@ -4005,29 +4006,21 @@ adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd, size_t save_hwm_offset) { +int offset; +pcre_uchar *hc; pcre_uchar *ptr = group; while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL) { - int offset; - pcre_uchar *hc; - - /* See if this recursion is on the forward reference list. If so, adjust the - reference. */ - for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm; hc += LINK_SIZE) { offset = (int)GET(hc, 0); - if (cd->start_code + offset == ptr + 1) - { - PUT(hc, 0, offset + adjust); - break; - } + if (cd->start_code + offset == ptr + 1) break; } - /* Otherwise, adjust the recursion offset if it's after the start of this - group. */ + /* If we have not found this recursion on the forward reference list, adjust + the recursion's offset if it's after the start of this group. */ if (hc >= cd->hwm) { @@ -4037,6 +4030,15 @@ ptr += 1 + LINK_SIZE; } + +/* Now adjust all forward reference offsets for the group. */ + +for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm; + hc += LINK_SIZE) + { + offset = (int)GET(hc, 0); + PUT(hc, 0, offset + adjust); + } } @@ -4465,7 +4467,7 @@ const pcre_uchar *nestptr = NULL; pcre_uchar *previous = NULL; pcre_uchar *previous_callout = NULL; -size_t save_hwm_offset = 0; +size_t item_hwm_offset = 0; pcre_uint8 classbits[32]; /* We can fish out the UTF-8 setting once and for all into a BOOL, but we @@ -4767,6 +4769,7 @@ zeroreqchar = reqchar; zeroreqcharflags = reqcharflags; previous = code; + item_hwm_offset = cd->hwm - cd->start_workspace; *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY; break; @@ -4818,6 +4821,7 @@ /* Handle a real character class. */ previous = code; + item_hwm_offset = cd->hwm - cd->start_workspace; /* PCRE supports POSIX class stuff inside a class. Perl gives an error if they are encountered at the top level, so we'll do that too. */ @@ -5930,7 +5934,7 @@ { register int i; int len = (int)(code - previous); - size_t base_hwm_offset = save_hwm_offset; + size_t base_hwm_offset = item_hwm_offset; pcre_uchar *bralink = NULL; pcre_uchar *brazeroptr = NULL; @@ -5985,7 +5989,7 @@ if (repeat_max <= 1) /* Covers 0, 1, and unlimited */ { *code = OP_END; - adjust_recurse(previous, 1, utf, cd, save_hwm_offset); + adjust_recurse(previous, 1, utf, cd, item_hwm_offset); memmove(previous + 1, previous, IN_UCHARS(len)); code++; if (repeat_max == 0) @@ -6009,7 +6013,7 @@ { int offset; *code = OP_END; - adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm_offset); + adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, item_hwm_offset); memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len)); code += 2 + LINK_SIZE; *previous++ = OP_BRAZERO + repeat_type; @@ -6267,7 +6271,7 @@ { int nlen = (int)(code - bracode); *code = OP_END; - adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm_offset); + adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, item_hwm_offset); memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen)); code += 1 + LINK_SIZE; nlen += 1 + LINK_SIZE; @@ -6401,7 +6405,7 @@ else { *code = OP_END; - adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm_offset); + adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, item_hwm_offset); memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len)); code += 1 + LINK_SIZE; len += 1 + LINK_SIZE; @@ -6450,7 +6454,7 @@ default: *code = OP_END; - adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm_offset); + adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, item_hwm_offset); memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len)); code += 1 + LINK_SIZE; len += 1 + LINK_SIZE; @@ -6623,7 +6627,7 @@ newoptions = options; skipbytes = 0; bravalue = OP_CBRA; - save_hwm_offset = cd->hwm - cd->start_workspace; + item_hwm_offset = cd->hwm - cd->start_workspace; reset_bracount = FALSE; /* Deal with the extended parentheses; all are introduced by '?', and the @@ -6769,7 +6773,7 @@ ptr++; } namelen = (int)(ptr - name); - if (lengthptr != NULL) *lengthptr += IMM2_SIZE; + if (lengthptr != NULL) skipbytes += IMM2_SIZE; } /* Check the terminator */ @@ -7173,14 +7177,26 @@ number. If the name is not found, set the value to 0 for a forward reference. */ + recno = 0; ng = cd->named_groups; for (i = 0; i < cd->names_found; i++, ng++) { if (namelen == ng->length && STRNCMP_UC_UC(name, ng->name, namelen) == 0) - break; + { + open_capitem *oc; + recno = ng->number; + if (is_recurse) break; + for (oc = cd->open_caps; oc != NULL; oc = oc->next) + { + if (oc->number == recno) + { + oc->flag = TRUE; + break; + } + } + } } - recno = (i < cd->names_found)? ng->number : 0; /* Count named back references. */ @@ -7191,6 +7207,19 @@ 16-bit data item. */ *lengthptr += IMM2_SIZE; + + /* If this is a forward reference and we are within a (?|...) group, + the reference may end up as the number of a group which we are + currently inside, that is, it could be a recursive reference. In the + real compile this will be picked up and the reference wrapped with + OP_ONCE to make it atomic, so we must space in case this occurs. */ + + /* In fact, this can happen for a non-forward reference because + another group with the same number might be created later. This + issue is fixed "properly" in PCRE2. As PCRE1 is now in maintenance + only mode, we finesse the bug by allowing more memory always. */ + + /* if (recno == 0) */ *lengthptr += 2 + 2*LINK_SIZE; } /* In the real compile, search the name table. We check the name @@ -7247,6 +7276,7 @@ { if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; previous = code; + item_hwm_offset = cd->hwm - cd->start_workspace; *code++ = ((options & PCRE_CASELESS) != 0)? OP_DNREFI : OP_DNREF; PUT2INC(code, 0, index); PUT2INC(code, 0, count); @@ -7360,6 +7390,7 @@ HANDLE_RECURSION: previous = code; + item_hwm_offset = cd->hwm - cd->start_workspace; called = cd->start_code; /* When we are actually compiling, find the bracket that is being @@ -7561,7 +7592,11 @@ previous = NULL; cd->iscondassert = FALSE; } - else previous = code; + else + { + previous = code; + item_hwm_offset = cd->hwm - cd->start_workspace; + } *code = bravalue; tempcode = code; @@ -7809,7 +7844,7 @@ const pcre_uchar *p; pcre_uint32 cf; - save_hwm_offset = cd->hwm - cd->start_workspace; /* Normally this is set when '(' is read */ + item_hwm_offset = cd->hwm - cd->start_workspace; /* Normally this is set when '(' is read */ terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)? CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE; @@ -7877,6 +7912,7 @@ HANDLE_REFERENCE: if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE; previous = code; + item_hwm_offset = cd->hwm - cd->start_workspace; *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF; PUT2INC(code, 0, recno); cd->backref_map |= (recno < 32)? (1 << recno) : 1; @@ -7906,6 +7942,7 @@ if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr)) goto FAILED; previous = code; + item_hwm_offset = cd->hwm - cd->start_workspace; *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP; *code++ = ptype; *code++ = pdata; @@ -7946,6 +7983,7 @@ { previous = (escape > ESC_b && escape < ESC_Z)? code : NULL; + item_hwm_offset = cd->hwm - cd->start_workspace; *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape; } } @@ -7989,6 +8027,7 @@ ONE_CHAR: previous = code; + item_hwm_offset = cd->hwm - cd->start_workspace; /* For caseless UTF-8 mode when UCP support is available, check whether this character has more than one other case. If so, generate a special