Merge branch 'crazyjul-feature-accept_literal_curly_brace'

9 years ago · 809176512a
4 changed files with 125 additions and 15 deletions
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@ -65,6 +65,7 @@ bugs, provided ideas, etc; roughly in order of appearance):
 * https://github.com/chris-y
 * Laurent Zubiaur (https://github.com/lzubiaur)
 * Ole André Vadla Ravnås (https://github.com/oleavr)
+* Julien Hamaide (https://github.com/crazyjul)

 If you are accidentally missing from this list, send me an e-mail
 (``sami.vaarala@iki.fi``) and I'll fix the omission.
--- a/config/config-options/DUK_USE_NONSTD_REGEXP_BRACES.yaml
+++ b/config/config-options/DUK_USE_NONSTD_REGEXP_BRACES.yaml
@ -0,0 +1,10 @@
+define: DUK_USE_NONSTD_REGEXP_BRACES
+feature_enables: DUK_OPT_NONSTD_REGEXP_BRACES
+introduced: 1.3.2
+default: true
+tags:
+  - ecmascript
+description: >
+  Enable support for non-standard '{' literal. Ecmascript requires
+  curly braces to be escaped, but most regex engine support them
+  when they are not used in valid quantifier. This option is recommended.
--- a/src/duk_lexer.c
+++ b/src/duk_lexer.c
@ -1609,26 +1609,34 @@ DUK_INTERNAL void duk_lexer_parse_re_token(duk_lexer_ctx *lex_ctx, duk_re_token
 		duk_uint_fast32_t val1 = 0;
 		duk_uint_fast32_t val2 = DUK_RE_QUANTIFIER_INFINITE;
 		duk_small_int_t digits = 0;
+
+		/*
+		 *  Store lexer position, restoring if quantifier is invalid
+		 */
+
+#ifdef DUK_USE_NONSTD_REGEXP_BRACES
+		duk_lexer_point lex_pt;
+		DUK_LEXER_GETPOINT(lex_ctx, &lex_pt);
+#endif
+
 		for (;;) {
-			DUK__ADVANCECHARS(lex_ctx, 1);  /* eat '{' on entry */
+			DUK__ADVANCECHARS(lex_ctx, 1); /* eat '{' on entry */
 			x = DUK__L0();
 			if (DUK__ISDIGIT(x)) {
+				digits++;
+				val1 = val1 * 10 + (duk_uint_fast32_t) duk__hexval(lex_ctx, x);
+			} else if (x == ',') {
 				if (digits >= DUK__MAX_RE_QUANT_DIGITS) {
 					DUK_ERROR(lex_ctx->thr, DUK_ERR_SYNTAX_ERROR,
 					          "invalid regexp quantifier (too many digits)");
 				}
-				digits++;
-				val1 = val1 * 10 + (duk_uint_fast32_t) duk__hexval(lex_ctx, x);
-			} else if (x == ',') {
 				if (val2 != DUK_RE_QUANTIFIER_INFINITE) {
-					DUK_ERROR(lex_ctx->thr, DUK_ERR_SYNTAX_ERROR,
-					          "invalid regexp quantifier (double comma)");
+					goto invalid_quantifier;
 				}
-				if (DUK__L1() == '}') {
+				if ( DUK__L1() == '}') {
 					/* form: { DecimalDigits , }, val1 = min count */
 					if (digits == 0) {
-						DUK_ERROR(lex_ctx->thr, DUK_ERR_SYNTAX_ERROR,
-						          "invalid regexp quantifier (missing digits)");
+						goto invalid_quantifier;
 					}
 					out_token->qmin = val1;
 					out_token->qmax = DUK_RE_QUANTIFIER_INFINITE;
@ -1639,9 +1647,12 @@ DUK_INTERNAL void duk_lexer_parse_re_token(duk_lexer_ctx *lex_ctx, duk_re_token
 				val1 = 0;
 				digits = 0;  /* not strictly necessary because of lookahead '}' above */
 			} else if (x == '}') {
-				if (digits == 0) {
+				if (digits >= DUK__MAX_RE_QUANT_DIGITS) {
 					DUK_ERROR(lex_ctx->thr, DUK_ERR_SYNTAX_ERROR,
-					          "invalid regexp quantifier (missing digits)");
+						"invalid regexp quantifier (too many digits)");
+				}
+				if (digits == 0) {
+					goto invalid_quantifier;
 				}
 				if (val2 != DUK_RE_QUANTIFIER_INFINITE) {
 					/* val2 = min count, val1 = max count */
@ -1655,8 +1666,7 @@ DUK_INTERNAL void duk_lexer_parse_re_token(duk_lexer_ctx *lex_ctx, duk_re_token
 				DUK__ADVANCECHARS(lex_ctx, 1);
 				break;
 			} else {
-				DUK_ERROR(lex_ctx->thr, DUK_ERR_SYNTAX_ERROR,
-				          "invalid regexp quantifier (unknown char)");
+				goto invalid_quantifier;
 			}
 		}
 		if (DUK__L0() == '?') {
@ -1667,6 +1677,18 @@ DUK_INTERNAL void duk_lexer_parse_re_token(duk_lexer_ctx *lex_ctx, duk_re_token
 		}
 		advtok = DUK__ADVTOK(0, DUK_RETOK_QUANTIFIER);
 		break;
+invalid_quantifier:
+#ifdef DUK_USE_NONSTD_REGEXP_BRACES
+
+		/* Failed to match the quantifier, restore lexer */
+		DUK_LEXER_SETPOINT(lex_ctx, &lex_pt);
+		advtok = DUK__ADVTOK(1, DUK_RETOK_ATOM_CHAR);
+		out_token->num = '{';
+#else
+		DUK_ERROR(lex_ctx->thr, DUK_ERR_SYNTAX_ERROR,
+				"invalid regexp quantifier");
+#endif
+		break;
 	}
 	case '.': {
 		advtok = DUK__ADVTOK(1, DUK_RETOK_ATOM_PERIOD);
@ -1806,8 +1828,10 @@ DUK_INTERNAL void duk_lexer_parse_re_token(duk_lexer_ctx *lex_ctx, duk_re_token
 		}
 		break;
 	}
-	case ']':
-	case '}': {
+#ifndef DUK_USE_NONSTD_REGEXP_BRACES
+	case '}':
+#endif
+	case ']': {
 		/* Although these could be parsed as PatternCharacters unambiguously (here),
 		 * E5 Section 15.10.1 grammar explicitly forbids these as PatternCharacters.
 		 */
--- a/tests/ecmascript/test-regexp-non-std-brace.js
+++ b/tests/ecmascript/test-regexp-non-std-brace.js
@ -0,0 +1,75 @@
+var t;
+
+/*===
+a{abc}
+a{1b}
+a{2,b}
+===*/
+
+// Any non-valid character cancels quantifier parsing
+
+t = /a{.*}/.exec("aa{abc}");
+print(t[0]);
+t = /a{1.}/.exec("aa{1b}");
+print(t[0]);
+t = /a{2,.}/.exec("aa{2,b}");
+print(t[0]);
+
+/*===
+a{abc}
+===*/
+
+// Closing brace is allowed
+t = /a\{.*}/.exec("aa{abc}");
+print(t[0]);
+
+/*===
+a{1}
+a{1,2}
+===*/
+
+// Valid quantifier but for the closing brace
+t = /a{1\}/.exec("aa{1}");
+print(t[0]);
+t = /a{1,2\}/.exec("aa{1,2}");
+print(t[0]);
+
+/*===
+{1111111111111111111111111
+===*/
+
+// Do not fail on digits before , or }
+t = /{1111111111111111111111111/.exec('{1111111111111111111111111');
+print(t[0]);
+
+/*===
+a{}
+a{,}
+a{1,2,3}
+===*/
+
+//On parsing failure, treat as a brace
+
+t = /a{}/.exec('a{}');
+print(t[0]);
+
+t = /a{,}/.exec('a{,}');
+print(t[0]);
+
+t = /a{1,2,3}/.exec('a{1,2,3}');
+print(t[0]);
+
+
+/*===
+SyntaxError
+===*/
+
+// Current implementation does not allow all types of error
+
+// Too many numbers
+try {
+    eval("/{1111111111111111111111111}/.exec('foo');");
+    print("no exception");
+} catch (e) {
+    print(e.name);
+}