Browse Source

Merge branch 'crazyjul-feature-accept_literal_curly_brace'

pull/539/head
Sami Vaarala 9 years ago
parent
commit
809176512a
  1. 1
      AUTHORS.rst
  2. 10
      config/config-options/DUK_USE_NONSTD_REGEXP_BRACES.yaml
  3. 54
      src/duk_lexer.c
  4. 75
      tests/ecmascript/test-regexp-non-std-brace.js

1
AUTHORS.rst

@ -65,6 +65,7 @@ bugs, provided ideas, etc; roughly in order of appearance):
* https://github.com/chris-y
* Laurent Zubiaur (https://github.com/lzubiaur)
* Ole André Vadla Ravnås (https://github.com/oleavr)
* Julien Hamaide (https://github.com/crazyjul)
If you are accidentally missing from this list, send me an e-mail
(``sami.vaarala@iki.fi``) and I'll fix the omission.

10
config/config-options/DUK_USE_NONSTD_REGEXP_BRACES.yaml

@ -0,0 +1,10 @@
define: DUK_USE_NONSTD_REGEXP_BRACES
feature_enables: DUK_OPT_NONSTD_REGEXP_BRACES
introduced: 1.3.2
default: true
tags:
- ecmascript
description: >
Enable support for non-standard '{' literal. Ecmascript requires
curly braces to be escaped, but most regex engine support them
when they are not used in valid quantifier. This option is recommended.

54
src/duk_lexer.c

@ -1609,26 +1609,34 @@ DUK_INTERNAL void duk_lexer_parse_re_token(duk_lexer_ctx *lex_ctx, duk_re_token
duk_uint_fast32_t val1 = 0;
duk_uint_fast32_t val2 = DUK_RE_QUANTIFIER_INFINITE;
duk_small_int_t digits = 0;
/*
* Store lexer position, restoring if quantifier is invalid
*/
#ifdef DUK_USE_NONSTD_REGEXP_BRACES
duk_lexer_point lex_pt;
DUK_LEXER_GETPOINT(lex_ctx, &lex_pt);
#endif
for (;;) {
DUK__ADVANCECHARS(lex_ctx, 1); /* eat '{' on entry */
DUK__ADVANCECHARS(lex_ctx, 1); /* eat '{' on entry */
x = DUK__L0();
if (DUK__ISDIGIT(x)) {
digits++;
val1 = val1 * 10 + (duk_uint_fast32_t) duk__hexval(lex_ctx, x);
} else if (x == ',') {
if (digits >= DUK__MAX_RE_QUANT_DIGITS) {
DUK_ERROR(lex_ctx->thr, DUK_ERR_SYNTAX_ERROR,
"invalid regexp quantifier (too many digits)");
}
digits++;
val1 = val1 * 10 + (duk_uint_fast32_t) duk__hexval(lex_ctx, x);
} else if (x == ',') {
if (val2 != DUK_RE_QUANTIFIER_INFINITE) {
DUK_ERROR(lex_ctx->thr, DUK_ERR_SYNTAX_ERROR,
"invalid regexp quantifier (double comma)");
goto invalid_quantifier;
}
if (DUK__L1() == '}') {
if ( DUK__L1() == '}') {
/* form: { DecimalDigits , }, val1 = min count */
if (digits == 0) {
DUK_ERROR(lex_ctx->thr, DUK_ERR_SYNTAX_ERROR,
"invalid regexp quantifier (missing digits)");
goto invalid_quantifier;
}
out_token->qmin = val1;
out_token->qmax = DUK_RE_QUANTIFIER_INFINITE;
@ -1639,9 +1647,12 @@ DUK_INTERNAL void duk_lexer_parse_re_token(duk_lexer_ctx *lex_ctx, duk_re_token
val1 = 0;
digits = 0; /* not strictly necessary because of lookahead '}' above */
} else if (x == '}') {
if (digits == 0) {
if (digits >= DUK__MAX_RE_QUANT_DIGITS) {
DUK_ERROR(lex_ctx->thr, DUK_ERR_SYNTAX_ERROR,
"invalid regexp quantifier (missing digits)");
"invalid regexp quantifier (too many digits)");
}
if (digits == 0) {
goto invalid_quantifier;
}
if (val2 != DUK_RE_QUANTIFIER_INFINITE) {
/* val2 = min count, val1 = max count */
@ -1655,8 +1666,7 @@ DUK_INTERNAL void duk_lexer_parse_re_token(duk_lexer_ctx *lex_ctx, duk_re_token
DUK__ADVANCECHARS(lex_ctx, 1);
break;
} else {
DUK_ERROR(lex_ctx->thr, DUK_ERR_SYNTAX_ERROR,
"invalid regexp quantifier (unknown char)");
goto invalid_quantifier;
}
}
if (DUK__L0() == '?') {
@ -1667,6 +1677,18 @@ DUK_INTERNAL void duk_lexer_parse_re_token(duk_lexer_ctx *lex_ctx, duk_re_token
}
advtok = DUK__ADVTOK(0, DUK_RETOK_QUANTIFIER);
break;
invalid_quantifier:
#ifdef DUK_USE_NONSTD_REGEXP_BRACES
/* Failed to match the quantifier, restore lexer */
DUK_LEXER_SETPOINT(lex_ctx, &lex_pt);
advtok = DUK__ADVTOK(1, DUK_RETOK_ATOM_CHAR);
out_token->num = '{';
#else
DUK_ERROR(lex_ctx->thr, DUK_ERR_SYNTAX_ERROR,
"invalid regexp quantifier");
#endif
break;
}
case '.': {
advtok = DUK__ADVTOK(1, DUK_RETOK_ATOM_PERIOD);
@ -1806,8 +1828,10 @@ DUK_INTERNAL void duk_lexer_parse_re_token(duk_lexer_ctx *lex_ctx, duk_re_token
}
break;
}
case ']':
case '}': {
#ifndef DUK_USE_NONSTD_REGEXP_BRACES
case '}':
#endif
case ']': {
/* Although these could be parsed as PatternCharacters unambiguously (here),
* E5 Section 15.10.1 grammar explicitly forbids these as PatternCharacters.
*/

75
tests/ecmascript/test-regexp-non-std-brace.js

@ -0,0 +1,75 @@
var t;
/*===
a{abc}
a{1b}
a{2,b}
===*/
// Any non-valid character cancels quantifier parsing
t = /a{.*}/.exec("aa{abc}");
print(t[0]);
t = /a{1.}/.exec("aa{1b}");
print(t[0]);
t = /a{2,.}/.exec("aa{2,b}");
print(t[0]);
/*===
a{abc}
===*/
// Closing brace is allowed
t = /a\{.*}/.exec("aa{abc}");
print(t[0]);
/*===
a{1}
a{1,2}
===*/
// Valid quantifier but for the closing brace
t = /a{1\}/.exec("aa{1}");
print(t[0]);
t = /a{1,2\}/.exec("aa{1,2}");
print(t[0]);
/*===
{1111111111111111111111111
===*/
// Do not fail on digits before , or }
t = /{1111111111111111111111111/.exec('{1111111111111111111111111');
print(t[0]);
/*===
a{}
a{,}
a{1,2,3}
===*/
//On parsing failure, treat as a brace
t = /a{}/.exec('a{}');
print(t[0]);
t = /a{,}/.exec('a{,}');
print(t[0]);
t = /a{1,2,3}/.exec('a{1,2,3}');
print(t[0]);
/*===
SyntaxError
===*/
// Current implementation does not allow all types of error
// Too many numbers
try {
eval("/{1111111111111111111111111}/.exec('foo');");
print("no exception");
} catch (e) {
print(e.name);
}
Loading…
Cancel
Save