Browse Source

Minor regexp executor optimization

Shared check for end-of-input, eliminate a few unnecessary comparisons.
pull/977/head
Sami Vaarala 8 years ago
parent
commit
4ca79a71b8
  1. 56
      src-input/duk_regexp_executor.c

56
src-input/duk_regexp_executor.c

@ -108,10 +108,15 @@ DUK_LOCAL const duk_uint8_t *duk__utf8_advance(duk_hthread *thr, const duk_uint8
/* Get a (possibly canonicalized) input character from current sp. The input /* Get a (possibly canonicalized) input character from current sp. The input
* itself is never modified, and captures always record non-canonicalized * itself is never modified, and captures always record non-canonicalized
* characters even in case-insensitive matching. * characters even in case-insensitive matching. Return <0 if out of input.
*/ */
DUK_LOCAL duk_codepoint_t duk__inp_get_cp(duk_re_matcher_ctx *re_ctx, const duk_uint8_t **sp) { DUK_LOCAL duk_codepoint_t duk__inp_get_cp(duk_re_matcher_ctx *re_ctx, const duk_uint8_t **sp) {
duk_codepoint_t res = (duk_codepoint_t) duk_unicode_decode_xutf8_checked(re_ctx->thr, sp, re_ctx->input, re_ctx->input_end); duk_codepoint_t res;
if (*sp >= re_ctx->input_end) {
return -1;
}
res = (duk_codepoint_t) duk_unicode_decode_xutf8_checked(re_ctx->thr, sp, re_ctx->input, re_ctx->input_end);
if (re_ctx->re_flags & DUK_RE_FLAG_IGNORE_CASE) { if (re_ctx->re_flags & DUK_RE_FLAG_IGNORE_CASE) {
res = duk_unicode_re_canonicalize_char(re_ctx->thr, res); res = duk_unicode_re_canonicalize_char(re_ctx->thr, res);
} }
@ -194,10 +199,17 @@ DUK_LOCAL const duk_uint8_t *duk__match_regexp(duk_re_matcher_ctx *re_ctx, const
c1 = (duk_codepoint_t) duk__bc_get_u32(re_ctx, &pc); c1 = (duk_codepoint_t) duk__bc_get_u32(re_ctx, &pc);
DUK_ASSERT(!(re_ctx->re_flags & DUK_RE_FLAG_IGNORE_CASE) || DUK_ASSERT(!(re_ctx->re_flags & DUK_RE_FLAG_IGNORE_CASE) ||
c1 == duk_unicode_re_canonicalize_char(re_ctx->thr, c1)); /* canonicalized by compiler */ c1 == duk_unicode_re_canonicalize_char(re_ctx->thr, c1)); /* canonicalized by compiler */
if (sp >= re_ctx->input_end) { c2 = duk__inp_get_cp(re_ctx, &sp);
/* No need to check for c2 < 0 (end of input): because c1 >= 0, it
* will fail the match below automatically and cause goto fail.
*/
#if 0
if (c2 < 0) {
goto fail; goto fail;
} }
c2 = duk__inp_get_cp(re_ctx, &sp); #endif
DUK_ASSERT(c1 >= 0);
DUK_DDD(DUK_DDDPRINT("char match, c1=%ld, c2=%ld", (long) c1, (long) c2)); DUK_DDD(DUK_DDDPRINT("char match, c1=%ld, c2=%ld", (long) c1, (long) c2));
if (c1 != c2) { if (c1 != c2) {
goto fail; goto fail;
@ -207,11 +219,8 @@ DUK_LOCAL const duk_uint8_t *duk__match_regexp(duk_re_matcher_ctx *re_ctx, const
case DUK_REOP_PERIOD: { case DUK_REOP_PERIOD: {
duk_codepoint_t c; duk_codepoint_t c;
if (sp >= re_ctx->input_end) {
goto fail;
}
c = duk__inp_get_cp(re_ctx, &sp); c = duk__inp_get_cp(re_ctx, &sp);
if (duk_unicode_is_line_terminator(c)) { if (c < 0 || duk_unicode_is_line_terminator(c)) {
/* E5 Sections 15.10.2.8, 7.3 */ /* E5 Sections 15.10.2.8, 7.3 */
goto fail; goto fail;
} }
@ -224,10 +233,10 @@ DUK_LOCAL const duk_uint8_t *duk__match_regexp(duk_re_matcher_ctx *re_ctx, const
duk_small_int_t match; duk_small_int_t match;
n = duk__bc_get_u32(re_ctx, &pc); n = duk__bc_get_u32(re_ctx, &pc);
if (sp >= re_ctx->input_end) { c = duk__inp_get_cp(re_ctx, &sp);
if (c < 0) {
goto fail; goto fail;
} }
c = duk__inp_get_cp(re_ctx, &sp);
match = 0; match = 0;
while (n) { while (n) {
@ -278,14 +287,14 @@ DUK_LOCAL const duk_uint8_t *duk__match_regexp(duk_re_matcher_ctx *re_ctx, const
duk_codepoint_t c; duk_codepoint_t c;
const duk_uint8_t *tmp_sp; const duk_uint8_t *tmp_sp;
if (sp >= re_ctx->input_end) { tmp_sp = sp;
c = duk__inp_get_cp(re_ctx, &tmp_sp);
if (c < 0) {
break; break;
} }
if (!(re_ctx->re_flags & DUK_RE_FLAG_MULTILINE)) { if (!(re_ctx->re_flags & DUK_RE_FLAG_MULTILINE)) {
goto fail; goto fail;
} }
tmp_sp = sp;
c = duk__inp_get_cp(re_ctx, &tmp_sp);
if (duk_unicode_is_line_terminator(c)) { if (duk_unicode_is_line_terminator(c)) {
/* E5 Sections 15.10.2.8, 7.3 */ /* E5 Sections 15.10.2.8, 7.3 */
break; break;
@ -622,11 +631,17 @@ DUK_LOCAL const duk_uint8_t *duk__match_regexp(duk_re_matcher_ctx *re_ctx, const
* valid compiled regexps cannot write a saved[] entry * valid compiled regexps cannot write a saved[] entry
* which points to outside the string. * which points to outside the string.
*/ */
if (sp >= re_ctx->input_end) {
goto fail;
}
c1 = duk__inp_get_cp(re_ctx, &p); c1 = duk__inp_get_cp(re_ctx, &p);
DUK_ASSERT(c1 >= 0);
c2 = duk__inp_get_cp(re_ctx, &sp); c2 = duk__inp_get_cp(re_ctx, &sp);
/* No need for an explicit c2 < 0 check: because c1 >= 0,
* the comparison will always fail if c2 < 0.
*/
#if 0
if (c2 < 0) {
goto fail;
}
#endif
if (c1 != c2) { if (c1 != c2) {
goto fail; goto fail;
} }
@ -929,22 +944,17 @@ DUK_LOCAL void duk__regexp_match_helper(duk_hthread *thr, duk_small_int_t force_
* as 'undefined'. The same is done when saved[] pointers are insane * as 'undefined'. The same is done when saved[] pointers are insane
* (this should, of course, never happen in practice). * (this should, of course, never happen in practice).
*/ */
if (re_ctx.saved[i] && re_ctx.saved[i+1] && re_ctx.saved[i+1] >= re_ctx.saved[i]) { if (re_ctx.saved[i] && re_ctx.saved[i + 1] && re_ctx.saved[i + 1] >= re_ctx.saved[i]) {
duk_hstring *h_saved;
duk_push_lstring(ctx, duk_push_lstring(ctx,
(const char *) re_ctx.saved[i], (const char *) re_ctx.saved[i],
(duk_size_t) (re_ctx.saved[i+1] - re_ctx.saved[i])); (duk_size_t) (re_ctx.saved[i+1] - re_ctx.saved[i]));
h_saved = duk_get_hstring(ctx, -1);
DUK_ASSERT(h_saved != NULL);
if (i == 0) { if (i == 0) {
/* Assumes that saved[0] and saved[1] are always /* Assumes that saved[0] and saved[1] are always
* set by regexp bytecode (if not, char_end_offset * set by regexp bytecode (if not, char_end_offset
* will be zero). Also assumes clen reflects the * will be zero). Also assumes clen reflects the
* correct char length. * correct char length.
*/ */
char_end_offset = char_offset + DUK_HSTRING_GET_CHARLEN(h_saved); char_end_offset = char_offset + (duk_uint32_t) duk_get_length(ctx, -1); /* add charlen */
} }
} else { } else {
duk_push_undefined(ctx); duk_push_undefined(ctx);

Loading…
Cancel
Save