duktape/src-input/duk_lexer.c


								/*

								 *  Lexer for source files, ToNumber() string conversions, RegExp expressions,

								 *  and JSON.

								 *

								 *  Provides a stream of Ecmascript tokens from an UTF-8/CESU-8 buffer.  The

								 *  caller can also rewind the token stream into a certain position which is

								 *  needed by the compiler part for multi-pass scanning.  Tokens are

								 *  represented as duk_token structures, and contain line number information.

								 *  Token types are identified with DUK_TOK_* defines.

								 *

								 *  Characters are decoded into a fixed size lookup window consisting of

								 *  decoded Unicode code points, with window positions past the end of the

								 *  input filled with an invalid codepoint (-1).  The tokenizer can thus

								 *  perform multiple character lookups efficiently and with few sanity

								 *  checks (such as access outside the end of the input), which keeps the

								 *  tokenization code small at the cost of performance.

								 *

								 *  Character data in tokens, such as identifier names and string literals,

								 *  is encoded into CESU-8 format on-the-fly while parsing the token in

								 *  question.  The string data is made reachable to garbage collection by

								 *  placing the token-related values in value stack entries allocated for

								 *  this purpose by the caller.  The characters exist in Unicode code point

								 *  form only in the fixed size lookup window, which keeps character data

								 *  expansion (of especially ASCII data) low.

								 *

								 *  Token parsing supports the full range of Unicode characters as described

								 *  in the E5 specification.  Parsing has been optimized for ASCII characters

								 *  because ordinary Ecmascript code consists almost entirely of ASCII

								 *  characters.  Matching of complex Unicode codepoint sets (such as in the

								 *  IdentifierStart and IdentifierPart productions) is optimized for size,

								 *  and is done using a linear scan of a bit-packed list of ranges.  This is

								 *  very slow, but should never be entered unless the source code actually

								 *  contains Unicode characters.

								 *

								 *  Ecmascript tokenization is partially context sensitive.  First,

								 *  additional future reserved words are recognized in strict mode (see E5

								 *  Section 7.6.1.2).  Second, a forward slash character ('/') can be

								 *  recognized either as starting a RegExp literal or as a division operator,

								 *  depending on context.  The caller must provide necessary context flags

								 *  when requesting a new token.

								 *

								 *  Future work:

								 *

								 *    * Make line number tracking optional, as it consumes space.

								 *

								 *    * Add a feature flag for disabling UTF-8 decoding of input, as most

								 *      source code is ASCII.  Because of Unicode escapes written in ASCII,

								 *      this does not allow Unicode support to be removed from e.g.

								 *      duk_unicode_is_identifier_start() nor does it allow removal of CESU-8

								 *      encoding of e.g. string literals.

								 *

								 *    * Add a feature flag for disabling Unicode compliance of e.g. identifier

								 *      names.  This allows for a build more than a kilobyte smaller, because

								 *      Unicode ranges needed by duk_unicode_is_identifier_start() and

								 *      duk_unicode_is_identifier_part() can be dropped.  String literals

								 *      should still be allowed to contain escaped Unicode, so this still does

								 *      not allow removal of CESU-8 encoding of e.g. string literals.

								 *

								 *    * Character lookup tables for codepoints above BMP could be stripped.

								 *

								 *    * Strictly speaking, E5 specification requires that source code consists

								 *      of 16-bit code units, and if not, must be conceptually converted to

								 *      that format first.  The current lexer processes Unicode code points

								 *      and allows characters outside the BMP.  These should be converted to

								 *      surrogate pairs while reading the source characters into the window,

								 *      not after tokens have been formed (as is done now).  However, the fix

								 *      is not trivial because two characters are decoded from one codepoint.

								 *

								 *    * Optimize for speed as well as size.  Large if-else ladders are (at

								 *      least potentially) slow.

								 */


								#include "duk_internal.h"


								/*

								 *  Various defines and file specific helper macros

								 */


								#define DUK__MAX_RE_DECESC_DIGITS     9

								#define DUK__MAX_RE_QUANT_DIGITS      9   /* Does not allow e.g. 2**31-1, but one more would allow overflows of u32. */


								/* whether to use macros or helper function depends on call count */

								#define DUK__ISDIGIT(x)          ((x) >= DUK_ASC_0 && (x) <= DUK_ASC_9)

								#define DUK__ISHEXDIGIT(x)       duk__is_hex_digit((x))

								#define DUK__ISOCTDIGIT(x)       ((x) >= DUK_ASC_0 && (x) <= DUK_ASC_7)

								#define DUK__ISDIGIT03(x)        ((x) >= DUK_ASC_0 && (x) <= DUK_ASC_3)

								#define DUK__ISDIGIT47(x)        ((x) >= DUK_ASC_4 && (x) <= DUK_ASC_7)


								/* lexer character window helpers */

								#define DUK__LOOKUP(lex_ctx,idx)          ((lex_ctx)->window[(idx)].codepoint)

								#define DUK__ADVANCECHARS(lex_ctx,count)  duk__advance_bytes((lex_ctx), (count) * sizeof(duk_lexer_codepoint))

								#define DUK__ADVANCEBYTES(lex_ctx,count)  duk__advance_bytes((lex_ctx), (count))

								#define DUK__INITBUFFER(lex_ctx)          duk__initbuffer((lex_ctx))

								#define DUK__APPENDBUFFER(lex_ctx,x)      duk__appendbuffer((lex_ctx), (duk_codepoint_t) (x))


								/* lookup shorthands (note: assume context variable is named 'lex_ctx') */

								#define DUK__L0()  DUK__LOOKUP(lex_ctx, 0)

								#define DUK__L1()  DUK__LOOKUP(lex_ctx, 1)

								#define DUK__L2()  DUK__LOOKUP(lex_ctx, 2)

								#define DUK__L3()  DUK__LOOKUP(lex_ctx, 3)

								#define DUK__L4()  DUK__LOOKUP(lex_ctx, 4)

								#define DUK__L5()  DUK__LOOKUP(lex_ctx, 5)


								/* packed advance/token number macro used by multiple functions */

								#define DUK__ADVTOK(advbytes,tok)  ((((advbytes) * sizeof(duk_lexer_codepoint)) << 8) + (tok))


								/*

								 *  Advance lookup window by N characters, filling in new characters as

								 *  necessary.  After returning caller is guaranteed a character window of

								 *  at least DUK_LEXER_WINDOW_SIZE characters.

								 *

								 *  The main function duk__advance_bytes() is called at least once per every

								 *  token so it has a major lexer/compiler performance impact.  There are two

								 *  variants for the main duk__advance_bytes() algorithm: a sliding window

								 *  approach which is slightly faster at the cost of larger code footprint,

								 *  and a simple copying one.

								 *

								 *  Decoding directly from the source string would be another lexing option.

								 *  But the lookup window based approach has the advantage of hiding the

								 *  source string and its encoding effectively which gives more flexibility

								 *  going forward to e.g. support chunked streaming of source from flash.

								 *

								 *  Decodes UTF-8/CESU-8 leniently with support for code points from U+0000 to

								 *  U+10FFFF, causing an error if the input is unparseable.  Leniency means:

								 *

								 *    * Unicode code point validation is intentionally not performed,

								 *      except to check that the codepoint does not exceed 0x10ffff.

								 *

								 *    * In particular, surrogate pairs are allowed and not combined, which

								 *      allows source files to represent all SourceCharacters with CESU-8.

								 *      Broken surrogate pairs are allowed, as Ecmascript does not mandate

								 *      their validation.

								 *

								 *    * Allow non-shortest UTF-8 encodings.

								 *

								 *  Leniency here causes few security concerns because all character data is

								 *  decoded into Unicode codepoints before lexer processing, and is then

								 *  re-encoded into CESU-8.  The source can be parsed as strict UTF-8 with

								 *  a compiler option.  However, Ecmascript source characters include -all-

								 *  16-bit unsigned integer codepoints, so leniency seems to be appropriate.

								 *

								 *  Note that codepoints above the BMP are not strictly SourceCharacters,

								 *  but the lexer still accepts them as such.  Before ending up in a string

								 *  or an identifier name, codepoints above BMP are converted into surrogate

								 *  pairs and then CESU-8 encoded, resulting in 16-bit Unicode data as

								 *  expected by Ecmascript.

								 *

								 *  An alternative approach to dealing with invalid or partial sequences

								 *  would be to skip them and replace them with e.g. the Unicode replacement

								 *  character U+FFFD.  This has limited utility because a replacement character

								 *  will most likely cause a parse error, unless it occurs inside a string.

								 *  Further, Ecmascript source is typically pure ASCII.

								 *

								 *  See:

								 *

								 *     http://en.wikipedia.org/wiki/UTF-8

								 *     http://en.wikipedia.org/wiki/CESU-8

								 *     http://tools.ietf.org/html/rfc3629

								 *     http://en.wikipedia.org/wiki/UTF-8#Invalid_byte_sequences

								 *

								 *  Future work:

								 *

								 *    * Reject other invalid Unicode sequences (see Wikipedia entry for examples)

								 *      in strict UTF-8 mode.

								 *

								 *    * Size optimize.  An attempt to use a 16-byte lookup table for the first

								 *      byte resulted in a code increase though.

								 *

								 *    * Is checking against maximum 0x10ffff really useful?  4-byte encoding

								 *      imposes a certain limit anyway.

								 *

								 *    * Support chunked streaming of source code.  Can be implemented either

								 *      by streaming chunks of bytes or chunks of codepoints.

								 */


								#if defined(DUK_USE_LEXER_SLIDING_WINDOW)

								DUK_LOCAL void duk__fill_lexer_buffer(duk_lexer_ctx *lex_ctx, duk_small_uint_t start_offset_bytes) {

									duk_lexer_codepoint *cp, *cp_end;

									duk_ucodepoint_t x;

									duk_small_uint_t contlen;

									const duk_uint8_t *p, *p_end;

								#if defined(DUK_USE_STRICT_UTF8_SOURCE)

									duk_ucodepoint_t mincp;

								#endif

									duk_int_t input_line;


									/* Use temporaries and update lex_ctx only when finished. */

									input_line = lex_ctx->input_line;

									p = lex_ctx->input + lex_ctx->input_offset;

									p_end = lex_ctx->input + lex_ctx->input_length;


									cp = (duk_lexer_codepoint *) (void *) ((duk_uint8_t *) lex_ctx->buffer + start_offset_bytes);

									cp_end = lex_ctx->buffer + DUK_LEXER_BUFFER_SIZE;


									for (; cp != cp_end; cp++) {

										cp->offset = (duk_size_t) (p - lex_ctx->input);

										cp->line = input_line;


										/* XXX: potential issue with signed pointers, p_end < p. */

										if (DUK_UNLIKELY(p >= p_end)) {

											/* If input_offset were assigned a negative value, it would

											 * result in a large positive value.  Most likely it would be

											 * larger than input_length and be caught here.  In any case

											 * no memory unsafe behavior would happen.

											 */

											cp->codepoint = -1;

											continue;

										}


										x = (duk_ucodepoint_t) (*p++);


										/* Fast path. */


										if (DUK_LIKELY(x < 0x80UL)) {

											DUK_ASSERT(x != 0x2028UL && x != 0x2029UL);  /* not LS/PS */

											if (DUK_UNLIKELY(x <= 0x000dUL)) {

												if ((x == 0x000aUL) ||

												    ((x == 0x000dUL) && (p >= p_end || *p != 0x000aUL))) {

													/* lookup for 0x000a above assumes shortest encoding now */


													/* E5 Section 7.3, treat the following as newlines:

													 *   LF

													 *   CR [not followed by LF]

													 *   LS

													 *   PS

													 *

													 * For CR LF, CR is ignored if it is followed by LF, and the LF will bump

													 * the line number.

													 */

													input_line++;

												}

											}


											cp->codepoint = (duk_codepoint_t) x;

											continue;

										}


										/* Slow path. */


										if (x < 0xc0UL) {

											/* 10xx xxxx -> invalid */

											goto error_encoding;

										} else if (x < 0xe0UL) {

											/* 110x xxxx   10xx xxxx  */

											contlen = 1;

								#if defined(DUK_USE_STRICT_UTF8_SOURCE)

											mincp = 0x80UL;

								#endif

											x = x & 0x1fUL;

										} else if (x < 0xf0UL) {

											/* 1110 xxxx   10xx xxxx   10xx xxxx */

											contlen = 2;

								#if defined(DUK_USE_STRICT_UTF8_SOURCE)

											mincp = 0x800UL;

								#endif

											x = x & 0x0fUL;

										} else if (x < 0xf8UL) {

											/* 1111 0xxx   10xx xxxx   10xx xxxx   10xx xxxx */

											contlen = 3;

								#if defined(DUK_USE_STRICT_UTF8_SOURCE)

											mincp = 0x10000UL;

								#endif

											x = x & 0x07UL;

										} else {

											/* no point in supporting encodings of 5 or more bytes */

											goto error_encoding;

										}


										DUK_ASSERT(p_end >= p);

										if ((duk_size_t) contlen > (duk_size_t) (p_end - p)) {

											goto error_clipped;

										}


										while (contlen > 0) {

											duk_small_uint_t y;

											y = *p++;

											if ((y & 0xc0U) != 0x80U) {

												/* check that byte has the form 10xx xxxx */

												goto error_encoding;

											}

											x = x << 6;

											x += y & 0x3fUL;

											contlen--;

										}


										/* check final character validity */


										if (x > 0x10ffffUL) {

											goto error_encoding;

										}

								#if defined(DUK_USE_STRICT_UTF8_SOURCE)

										if (x < mincp || (x >= 0xd800UL && x <= 0xdfffUL) || x == 0xfffeUL) {

											goto error_encoding;

										}

								#endif


										DUK_ASSERT(x != 0x000aUL && x != 0x000dUL);

										if ((x == 0x2028UL) || (x == 0x2029UL)) {

											input_line++;

										}


										cp->codepoint = (duk_codepoint_t) x;

									}


									lex_ctx->input_offset = (duk_size_t) (p - lex_ctx->input);

									lex_ctx->input_line = input_line;

									return;


								 error_clipped:   /* clipped codepoint */

								 error_encoding:  /* invalid codepoint encoding or codepoint */

									lex_ctx->input_offset = (duk_size_t) (p - lex_ctx->input);

									lex_ctx->input_line = input_line;


									DUK_ERROR_SYNTAX(lex_ctx->thr, "utf-8 decode failed");

								}


								DUK_LOCAL void duk__advance_bytes(duk_lexer_ctx *lex_ctx, duk_small_uint_t count_bytes) {

									duk_small_uint_t used_bytes, avail_bytes;


									DUK_ASSERT_DISABLE(count_bytes >= 0);  /* unsigned */

									DUK_ASSERT(count_bytes <= (duk_small_uint_t) (DUK_LEXER_WINDOW_SIZE * sizeof(duk_lexer_codepoint)));

									DUK_ASSERT(lex_ctx->window >= lex_ctx->buffer);

									DUK_ASSERT(lex_ctx->window < lex_ctx->buffer + DUK_LEXER_BUFFER_SIZE);

									DUK_ASSERT((duk_uint8_t *) lex_ctx->window + count_bytes <= (duk_uint8_t *) lex_ctx->buffer + DUK_LEXER_BUFFER_SIZE * sizeof(duk_lexer_codepoint));


									/* Zero 'count' is also allowed to make call sites easier.

									 * Arithmetic in bytes generates better code in GCC.

									 */


									lex_ctx->window = (duk_lexer_codepoint *) (void *) ((duk_uint8_t *) lex_ctx->window + count_bytes);  /* avoid multiply */

									used_bytes = (duk_small_uint_t) ((duk_uint8_t *) lex_ctx->window - (duk_uint8_t *) lex_ctx->buffer);

									avail_bytes = DUK_LEXER_BUFFER_SIZE * sizeof(duk_lexer_codepoint) - used_bytes;

									if (avail_bytes < (duk_small_uint_t) (DUK_LEXER_WINDOW_SIZE * sizeof(duk_lexer_codepoint))) {

										/* Not enough data to provide a full window, so "scroll" window to

										 * start of buffer and fill up the rest.

										 */

										DUK_MEMMOVE((void *) lex_ctx->buffer,

										            (const void *) lex_ctx->window,

										            (size_t) avail_bytes);

										lex_ctx->window = lex_ctx->buffer;

										duk__fill_lexer_buffer(lex_ctx, avail_bytes);

									}

								}


								DUK_LOCAL void duk__init_lexer_window(duk_lexer_ctx *lex_ctx) {

									lex_ctx->window = lex_ctx->buffer;

									duk__fill_lexer_buffer(lex_ctx, 0);

								}

								#else  /* DUK_USE_LEXER_SLIDING_WINDOW */

								DUK_LOCAL duk_codepoint_t duk__read_char(duk_lexer_ctx *lex_ctx) {

									duk_ucodepoint_t x;

									duk_small_uint_t len;

									duk_small_uint_t i;

									const duk_uint8_t *p;

								#if defined(DUK_USE_STRICT_UTF8_SOURCE)

									duk_ucodepoint_t mincp;

								#endif

									duk_size_t input_offset;


									input_offset = lex_ctx->input_offset;

									if (DUK_UNLIKELY(input_offset >= lex_ctx->input_length)) {

										/* If input_offset were assigned a negative value, it would

										 * result in a large positive value.  Most likely it would be

										 * larger than input_length and be caught here.  In any case

										 * no memory unsafe behavior would happen.

										 */

										return -1;

									}


									p = lex_ctx->input + input_offset;

									x = (duk_ucodepoint_t) (*p);


									if (DUK_LIKELY(x < 0x80UL)) {

										/* 0xxx xxxx -> fast path */


										/* input offset tracking */

										lex_ctx->input_offset++;


										DUK_ASSERT(x != 0x2028UL && x != 0x2029UL);  /* not LS/PS */

										if (DUK_UNLIKELY(x <= 0x000dUL)) {

											if ((x == 0x000aUL) ||

											    ((x == 0x000dUL) && (lex_ctx->input_offset >= lex_ctx->input_length ||

											                         lex_ctx->input[lex_ctx->input_offset] != 0x000aUL))) {

												/* lookup for 0x000a above assumes shortest encoding now */


												/* E5 Section 7.3, treat the following as newlines:

												 *   LF

												 *   CR [not followed by LF]

												 *   LS

												 *   PS

												 *

												 * For CR LF, CR is ignored if it is followed by LF, and the LF will bump

												 * the line number.

												 */

												lex_ctx->input_line++;

											}

										}


										return (duk_codepoint_t) x;

									}


									/* Slow path. */


									if (x < 0xc0UL) {

										/* 10xx xxxx -> invalid */

										goto error_encoding;

									} else if (x < 0xe0UL) {

										/* 110x xxxx   10xx xxxx  */

										len = 2;

								#if defined(DUK_USE_STRICT_UTF8_SOURCE)

										mincp = 0x80UL;

								#endif

										x = x & 0x1fUL;

									} else if (x < 0xf0UL) {

										/* 1110 xxxx   10xx xxxx   10xx xxxx */

										len = 3;

								#if defined(DUK_USE_STRICT_UTF8_SOURCE)

										mincp = 0x800UL;

								#endif

										x = x & 0x0fUL;

									} else if (x < 0xf8UL) {

										/* 1111 0xxx   10xx xxxx   10xx xxxx   10xx xxxx */

										len = 4;

								#if defined(DUK_USE_STRICT_UTF8_SOURCE)

										mincp = 0x10000UL;

								#endif

										x = x & 0x07UL;

									} else {

										/* no point in supporting encodings of 5 or more bytes */

										goto error_encoding;

									}


									DUK_ASSERT(lex_ctx->input_length >= lex_ctx->input_offset);

									if ((duk_size_t) len > (duk_size_t) (lex_ctx->input_length - lex_ctx->input_offset)) {

										goto error_clipped;

									}


									p++;

									for (i = 1; i < len; i++) {

										duk_small_uint_t y;

										y = *p++;

										if ((y & 0xc0U) != 0x80U) {

											/* check that byte has the form 10xx xxxx */

											goto error_encoding;

										}

										x = x << 6;

										x += y & 0x3fUL;

									}


									/* check final character validity */


									if (x > 0x10ffffUL) {

										goto error_encoding;

									}

								#if defined(DUK_USE_STRICT_UTF8_SOURCE)

									if (x < mincp || (x >= 0xd800UL && x <= 0xdfffUL) || x == 0xfffeUL) {

										goto error_encoding;

									}

								#endif


									/* input offset tracking */

									lex_ctx->input_offset += len;


									/* line tracking */

									DUK_ASSERT(x != 0x000aUL && x != 0x000dUL);

									if ((x == 0x2028UL) || (x == 0x2029UL)) {

										lex_ctx->input_line++;

									}


									return (duk_codepoint_t) x;


								 error_clipped:   /* clipped codepoint */

								 error_encoding:  /* invalid codepoint encoding or codepoint */

									DUK_ERROR_SYNTAX(lex_ctx->thr, "utf-8 decode failed");

									return 0;

								}


								DUK_LOCAL void duk__advance_bytes(duk_lexer_ctx *lex_ctx, duk_small_uint_t count_bytes) {

									duk_small_uint_t keep_bytes;

									duk_lexer_codepoint *cp, *cp_end;


									DUK_ASSERT_DISABLE(count_bytes >= 0);  /* unsigned */

									DUK_ASSERT(count_bytes <= (duk_small_uint_t) (DUK_LEXER_WINDOW_SIZE * sizeof(duk_lexer_codepoint)));


									/* Zero 'count' is also allowed to make call sites easier. */


									keep_bytes = DUK_LEXER_WINDOW_SIZE * sizeof(duk_lexer_codepoint) - count_bytes;

									DUK_MEMMOVE((void *) lex_ctx->window,

									            (const void *) ((duk_uint8_t *) lex_ctx->window + count_bytes),

									            (size_t) keep_bytes);


									cp = (duk_lexer_codepoint *) ((duk_uint8_t *) lex_ctx->window + keep_bytes);

									cp_end = lex_ctx->window + DUK_LEXER_WINDOW_SIZE;

									for (; cp != cp_end; cp++) {

										cp->offset = lex_ctx->input_offset;

										cp->line = lex_ctx->input_line;

										cp->codepoint = duk__read_char(lex_ctx);

									}

								}


								DUK_LOCAL void duk__init_lexer_window(duk_lexer_ctx *lex_ctx) {

									/* Call with count == DUK_LEXER_WINDOW_SIZE to fill buffer initially. */

									duk__advance_bytes(lex_ctx, DUK_LEXER_WINDOW_SIZE * sizeof(duk_lexer_codepoint));  /* fill window */

								}

								#endif  /* DUK_USE_LEXER_SLIDING_WINDOW */


								/*

								 *  (Re)initialize the temporary byte buffer.  May be called extra times

								 *  with little impact.

								 */


								DUK_LOCAL void duk__initbuffer(duk_lexer_ctx *lex_ctx) {

									/* Reuse buffer as is unless buffer has grown large. */

									if (DUK_HBUFFER_DYNAMIC_GET_SIZE(lex_ctx->buf) < DUK_LEXER_TEMP_BUF_LIMIT) {

										/* Keep current size */

									} else {

										duk_hbuffer_resize(lex_ctx->thr, lex_ctx->buf, DUK_LEXER_TEMP_BUF_LIMIT);

									}


									DUK_BW_INIT_WITHBUF(lex_ctx->thr, &lex_ctx->bw, lex_ctx->buf);

								}


								/*

								 *  Append a Unicode codepoint to the temporary byte buffer.  Performs

								 *  CESU-8 surrogate pair encoding for codepoints above the BMP.

								 *  Existing surrogate pairs are allowed and also encoded into CESU-8.

								 */


								DUK_LOCAL void duk__appendbuffer(duk_lexer_ctx *lex_ctx, duk_codepoint_t x) {

									/*

									 *  Since character data is only generated by decoding the source or by

									 *  the compiler itself, we rely on the input codepoints being correct

									 *  and avoid a check here.

									 *

									 *  Character data can also come here through decoding of Unicode

									 *  escapes ("\udead\ubeef") so all 16-but unsigned values can be

									 *  present, even when the source file itself is strict UTF-8.

									 */


									DUK_ASSERT(x >= 0 && x <= 0x10ffff);


									DUK_BW_WRITE_ENSURE_CESU8(lex_ctx->thr, &lex_ctx->bw, (duk_ucodepoint_t) x);

								}


								/*

								 *  Intern the temporary byte buffer into a valstack slot

								 *  (in practice, slot1 or slot2).

								 */


								DUK_LOCAL void duk__internbuffer(duk_lexer_ctx *lex_ctx, duk_idx_t valstack_idx) {

									duk_context *ctx = (duk_context *) lex_ctx->thr;


									DUK_ASSERT(valstack_idx == lex_ctx->slot1_idx || valstack_idx == lex_ctx->slot2_idx);


									DUK_BW_PUSH_AS_STRING(lex_ctx->thr, &lex_ctx->bw);

									duk_replace(ctx, valstack_idx);

								}


								/*

								 *  Init lexer context

								 */


								DUK_INTERNAL void duk_lexer_initctx(duk_lexer_ctx *lex_ctx) {

									DUK_ASSERT(lex_ctx != NULL);


									DUK_MEMZERO(lex_ctx, sizeof(*lex_ctx));

								#if defined(DUK_USE_EXPLICIT_NULL_INIT)

								#if defined(DUK_USE_LEXER_SLIDING_WINDOW)

									lex_ctx->window = NULL;

								#endif

									lex_ctx->thr = NULL;

									lex_ctx->input = NULL;

									lex_ctx->buf = NULL;

								#endif

								}


								/*

								 *  Set lexer input position and reinitialize lookup window.

								 */


								/* NB: duk_lexer_getpoint() is a macro only */


								DUK_INTERNAL void duk_lexer_setpoint(duk_lexer_ctx *lex_ctx, duk_lexer_point *pt) {

									DUK_ASSERT_DISABLE(pt->offset >= 0);  /* unsigned */

									DUK_ASSERT(pt->line >= 1);

									lex_ctx->input_offset = pt->offset;

									lex_ctx->input_line = pt->line;

									duk__init_lexer_window(lex_ctx);

								}


								/*

								 *  Lexing helpers

								 */


								/* numeric value of a hex digit (also covers octal and decimal digits) */

								DUK_LOCAL duk_codepoint_t duk__hexval(duk_lexer_ctx *lex_ctx, duk_codepoint_t x) {

									duk_small_int_t t;


									/* Here 'x' is a Unicode codepoint */

									if (DUK_LIKELY(x >= 0 && x <= 0xff)) {

										t = duk_hex_dectab[x];

										if (DUK_LIKELY(t >= 0)) {

											return t;

										}

									}


									/* Throwing an error this deep makes the error rather vague, but

									 * saves hundreds of bytes of code.

									 */

									DUK_ERROR_SYNTAX(lex_ctx->thr, "decode error");

									return 0;

								}


								/* having this as a separate function provided a size benefit */

								DUK_LOCAL duk_bool_t duk__is_hex_digit(duk_codepoint_t x) {

									if (DUK_LIKELY(x >= 0 && x <= 0xff)) {

										return (duk_hex_dectab[x] >= 0);

									}

									return 0;

								}


								DUK_LOCAL duk_codepoint_t duk__decode_hexesc_from_window(duk_lexer_ctx *lex_ctx, duk_small_int_t lookup_offset) {

									/* validation performed by duk__hexval */

									return (duk__hexval(lex_ctx, lex_ctx->window[lookup_offset].codepoint) << 4) |

									       (duk__hexval(lex_ctx, lex_ctx->window[lookup_offset + 1].codepoint));

								}


								DUK_LOCAL duk_codepoint_t duk__decode_uniesc_from_window(duk_lexer_ctx *lex_ctx, duk_small_int_t lookup_offset) {

									/* validation performed by duk__hexval */

									return (duk__hexval(lex_ctx, lex_ctx->window[lookup_offset].codepoint) << 12) |

									       (duk__hexval(lex_ctx, lex_ctx->window[lookup_offset + 1].codepoint) << 8) |

									       (duk__hexval(lex_ctx, lex_ctx->window[lookup_offset + 2].codepoint) << 4) |

									       (duk__hexval(lex_ctx, lex_ctx->window[lookup_offset + 3].codepoint));

								}


								/*

								 *  Parse Ecmascript source InputElementDiv or InputElementRegExp

								 *  (E5 Section 7), skipping whitespace, comments, and line terminators.

								 *

								 *  Possible results are:

								 *    (1) a token

								 *    (2) a line terminator (skipped)

								 *    (3) a comment (skipped)

								 *    (4) EOF

								 *

								 *  White space is automatically skipped from the current position (but

								 *  not after the input element).  If input has already ended, returns

								 *  DUK_TOK_EOF indefinitely.  If a parse error occurs, uses an DUK_ERROR()

								 *  macro call (and hence a longjmp through current heap longjmp context).

								 *  Comments and line terminator tokens are automatically skipped.

								 *

								 *  The input element being matched is determined by regexp_mode; if set,

								 *  parses a InputElementRegExp, otherwise a InputElementDiv.  The

								 *  difference between these are handling of productions starting with a

								 *  forward slash.

								 *

								 *  If strict_mode is set, recognizes additional future reserved words

								 *  specific to strict mode, and refuses to parse octal literals.

								 *

								 *  The matching strategy below is to (currently) use a six character

								 *  lookup window to quickly determine which production is the -longest-

								 *  matching one, and then parse that.  The top-level if-else clauses

								 *  match the first character, and the code blocks for each clause

								 *  handle -all- alternatives for that first character.  Ecmascript

								 *  specification uses the "longest match wins" semantics, so the order

								 *  of the if-clauses matters.

								 *

								 *  Misc notes:

								 *

								 *    * Ecmascript numeric literals do not accept a sign character.

								 *      Consequently e.g. "-1.0" is parsed as two tokens: a negative

								 *      sign and a positive numeric literal.  The compiler performs

								 *      the negation during compilation, so this has no adverse impact.

								 *

								 *    * There is no token for "undefined": it is just a value available

								 *      from the global object (or simply established by doing a reference

								 *      to an undefined value).

								 *

								 *    * Some contexts want Identifier tokens, which are IdentifierNames

								 *      excluding reserved words, while some contexts want IdentifierNames

								 *      directly.  In the latter case e.g. "while" is interpreted as an

								 *      identifier name, not a DUK_TOK_WHILE token.  The solution here is

								 *      to provide both token types: DUK_TOK_WHILE goes to 't' while

								 *      DUK_TOK_IDENTIFIER goes to 't_nores', and 'slot1' always contains

								 *      the identifier / keyword name.

								 *

								 *    * Directive prologue needs to identify string literals such as

								 *      "use strict" and 'use strict', which are sensitive to line

								 *      continuations and escape sequences.  For instance, "use\u0020strict"

								 *      is a valid directive but is distinct from "use strict".  The solution

								 *      here is to decode escapes while tokenizing, but to keep track of the

								 *      number of escapes.  Directive detection can then check that the

								 *      number of escapes is zero.

								 *

								 *    * Multi-line comments with one or more internal LineTerminator are

								 *      treated like a line terminator to comply with automatic semicolon

								 *      insertion.

								 */


								DUK_INTERNAL

								void duk_lexer_parse_js_input_element(duk_lexer_ctx *lex_ctx,

								                                      duk_token *out_token,

								                                      duk_bool_t strict_mode,

								                                      duk_bool_t regexp_mode) {

									duk_codepoint_t x;           /* temporary, must be signed and 32-bit to hold Unicode code points */

									duk_small_uint_t advtok = 0; /* (advance << 8) + token_type, updated at function end,

									                              * init is unnecessary but suppresses "may be used uninitialized" warnings.

									                              */

									duk_bool_t got_lineterm = 0;  /* got lineterm preceding non-whitespace, non-lineterm token */


									if (++lex_ctx->token_count >= lex_ctx->token_limit) {

										DUK_ERROR_RANGE(lex_ctx->thr, "token limit");

										return;  /* unreachable */

									}


									out_token->t = DUK_TOK_EOF;

									out_token->t_nores = -1;  /* marker: copy t if not changed */

								#if 0  /* not necessary to init, disabled for faster parsing */

									out_token->num = DUK_DOUBLE_NAN;

									out_token->str1 = NULL;

									out_token->str2 = NULL;

								#endif

									out_token->num_escapes = 0;

									/* out_token->lineterm set by caller */


									/* This would be nice, but parsing is faster without resetting the

									 * value slots.  The only side effect is that references to temporary

									 * string values may linger until lexing is finished; they're then

									 * freed normally.

									 */

								#if 0

									duk_to_undefined((duk_context *) lex_ctx->thr, lex_ctx->slot1_idx);

									duk_to_undefined((duk_context *) lex_ctx->thr, lex_ctx->slot2_idx);

								#endif


									/* 'advtok' indicates how much to advance and which token id to assign

									 * at the end.  This shared functionality minimizes code size.  All

									 * code paths are required to set 'advtok' to some value, so no default

									 * init value is used.  Code paths calling DUK_ERROR() never return so

									 * they don't need to set advtok.

									 */


									/*

									 *  Matching order:

									 *

									 *    Punctuator first chars, also covers comments, regexps

									 *    LineTerminator

									 *    Identifier or reserved word, also covers null/true/false literals

									 *    NumericLiteral

									 *    StringLiteral

									 *    EOF

									 *

									 *  The order does not matter as long as the longest match is

									 *  always correctly identified.  There are order dependencies

									 *  in the clauses, so it's not trivial to convert to a switch.

									 */


								 restart_lineupdate:

									out_token->start_line = lex_ctx->window[0].line;


								 restart:

									out_token->start_offset = lex_ctx->window[0].offset;


									x = DUK__L0();


									switch (x) {

									case DUK_ASC_SPACE:

									case DUK_ASC_HT:  /* fast paths for space and tab */

										DUK__ADVANCECHARS(lex_ctx, 1);

										goto restart;

									case DUK_ASC_LF:  /* LF line terminator; CR LF and Unicode lineterms are handled in slow path */

										DUK__ADVANCECHARS(lex_ctx, 1);

										got_lineterm = 1;

										goto restart_lineupdate;

									case DUK_ASC_SLASH:  /* '/' */

										if (DUK__L1() == '/') {

											/*

											 *  E5 Section 7.4, allow SourceCharacter (which is any 16-bit

											 *  code point).

											 */


											/* DUK__ADVANCECHARS(lex_ctx, 2) would be correct here, but it unnecessary */

											for (;;) {

												x = DUK__L0();

												if (x < 0 || duk_unicode_is_line_terminator(x)) {

													break;

												}

												DUK__ADVANCECHARS(lex_ctx, 1);

											}

											goto restart;  /* line terminator will be handled on next round */

										} else if (DUK__L1() == '*') {

											/*

											 *  E5 Section 7.4.  If the multi-line comment contains a newline,

											 *  it is treated like a single line terminator for automatic

											 *  semicolon insertion.

											 */


											duk_bool_t last_asterisk = 0;

											DUK__ADVANCECHARS(lex_ctx, 2);

											for (;;) {

												x = DUK__L0();

												if (x < 0) {

													DUK_ERROR_SYNTAX(lex_ctx->thr, "eof in multiline comment");

												}

												DUK__ADVANCECHARS(lex_ctx, 1);

												if (last_asterisk && x == '/') {

													break;

												}

												if (duk_unicode_is_line_terminator(x)) {

													got_lineterm = 1;

												}

												last_asterisk = (x == '*');

											}

											goto restart_lineupdate;

										} else if (regexp_mode) {

								#if defined(DUK_USE_REGEXP_SUPPORT)

											/*

											 *  "/" followed by something in regexp mode.  See E5 Section 7.8.5.

											 *

											 *  RegExp parsing is a bit complex.  First, the regexp body is delimited

											 *  by forward slashes, but the body may also contain forward slashes as

											 *  part of an escape sequence or inside a character class (delimited by

											 *  square brackets).  A mini state machine is used to implement these.

											 *

											 *  Further, an early (parse time) error must be thrown if the regexp

											 *  would cause a run-time error when used in the expression new RegExp(...).

											 *  Parsing here simply extracts the (candidate) regexp, and also accepts

											 *  invalid regular expressions (which are delimited properly).  The caller

											 *  (compiler) must perform final validation and regexp compilation.

											 *

											 *  RegExp first char may not be '/' (single line comment) or '*' (multi-

											 *  line comment).  These have already been checked above, so there is no

											 *  need below for special handling of the first regexp character as in

											 *  the E5 productions.

											 *

											 *  About unicode escapes within regexp literals:

											 *

											 *      E5 Section 7.8.5 grammar does NOT accept \uHHHH escapes.

											 *      However, Section 6 states that regexps accept the escapes,

											 *      see paragraph starting with "In string literals...".

											 *      The regexp grammar, which sees the decoded regexp literal

											 *      (after lexical parsing) DOES have a \uHHHH unicode escape.

											 *      So, for instance:

											 *

											 *          /\u1234/

											 *

											 *      should first be parsed by the lexical grammar as:

											 *

											 *          '\' 'u'      RegularExpressionBackslashSequence

											 *          '1'          RegularExpressionNonTerminator

											 *          '2'          RegularExpressionNonTerminator

											 *          '3'          RegularExpressionNonTerminator

											 *          '4'          RegularExpressionNonTerminator

											 *

											 *      and the escape itself is then parsed by the regexp engine.

											 *      This is the current implementation.

											 *

											 *  Minor spec inconsistency:

											 *

											 *      E5 Section 7.8.5 RegularExpressionBackslashSequence is:

											 *

											 *         \ RegularExpressionNonTerminator

											 *

											 *      while Section A.1 RegularExpressionBackslashSequence is:

											 *

											 *         \ NonTerminator

											 *

											 *      The latter is not normative and a typo.

											 *

											 */


											/* first, parse regexp body roughly */


											duk_small_int_t state = 0;  /* 0=base, 1=esc, 2=class, 3=class+esc */


											DUK__INITBUFFER(lex_ctx);

											for (;;) {

												DUK__ADVANCECHARS(lex_ctx, 1);  /* skip opening slash on first loop */

												x = DUK__L0();

												if (x < 0 || duk_unicode_is_line_terminator(x)) {

													DUK_ERROR_SYNTAX(lex_ctx->thr, "eof or line terminator in regexp");

												}

												x = DUK__L0();  /* re-read to avoid spill / fetch */

												if (state == 0) {

													if (x == '/') {

														DUK__ADVANCECHARS(lex_ctx, 1);  /* eat closing slash */

														break;

													} else if (x == '\\') {

														state = 1;

													} else if (x == '[') {

														state = 2;

													}

												} else if (state == 1) {

													state = 0;

												} else if (state == 2) {

													if (x == ']') {

														state = 0;

													} else if (x == '\\') {

														state = 3;

													}

												} else { /* state == 3 */

													state = 2;

												}

												DUK__APPENDBUFFER(lex_ctx, x);

											}

											duk__internbuffer(lex_ctx, lex_ctx->slot1_idx);

											out_token->str1 = duk_get_hstring((duk_context *) lex_ctx->thr, lex_ctx->slot1_idx);


											/* second, parse flags */


											DUK__INITBUFFER(lex_ctx);

											for (;;) {

												x = DUK__L0();

												if (!duk_unicode_is_identifier_part(x)) {

													break;

												}

												x = DUK__L0();  /* re-read to avoid spill / fetch */

												DUK__APPENDBUFFER(lex_ctx, x);

												DUK__ADVANCECHARS(lex_ctx, 1);

											}

											duk__internbuffer(lex_ctx, lex_ctx->slot2_idx);

											out_token->str2 = duk_get_hstring((duk_context *) lex_ctx->thr, lex_ctx->slot2_idx);


											DUK__INITBUFFER(lex_ctx);  /* free some memory */


											/* validation of the regexp is caller's responsibility */


											advtok = DUK__ADVTOK(0, DUK_TOK_REGEXP);

								#else

											DUK_ERROR_SYNTAX(lex_ctx->thr, "regexp support disabled");

								#endif

										} else if (DUK__L1() == '=') {

											/* "/=" and not in regexp mode */

											advtok = DUK__ADVTOK(2, DUK_TOK_DIV_EQ);

										} else {

											/* "/" and not in regexp mode */

											advtok = DUK__ADVTOK(1, DUK_TOK_DIV);

										}

										break;

									case DUK_ASC_LCURLY:  /* '{' */

										advtok = DUK__ADVTOK(1, DUK_TOK_LCURLY);

										break;

									case DUK_ASC_RCURLY:  /* '}' */

										advtok = DUK__ADVTOK(1, DUK_TOK_RCURLY);

										break;

									case DUK_ASC_LPAREN:  /* '(' */

										advtok = DUK__ADVTOK(1, DUK_TOK_LPAREN);

										break;

									case DUK_ASC_RPAREN:  /* ')' */

										advtok = DUK__ADVTOK(1, DUK_TOK_RPAREN);

										break;

									case DUK_ASC_LBRACKET:  /* '[' */

										advtok = DUK__ADVTOK(1, DUK_TOK_LBRACKET);

										break;

									case DUK_ASC_RBRACKET:  /* ']' */

										advtok = DUK__ADVTOK(1, DUK_TOK_RBRACKET);

										break;

									case DUK_ASC_PERIOD:  /* '.' */

										if (DUK__ISDIGIT(DUK__L1())) {

											/* Period followed by a digit can only start DecimalLiteral

											 * (handled in slow path).  We could jump straight into the

											 * DecimalLiteral handling but should avoid goto to inside

											 * a block.

											 */

											goto slow_path;

										}

										advtok = DUK__ADVTOK(1, DUK_TOK_PERIOD);

										break;

									case DUK_ASC_SEMICOLON:  /* ';' */

										advtok = DUK__ADVTOK(1, DUK_TOK_SEMICOLON);

										break;

									case DUK_ASC_COMMA:  /* ',' */

										advtok = DUK__ADVTOK(1, DUK_TOK_COMMA);

										break;

									case DUK_ASC_LANGLE:  /* '<' */

										if (DUK__L1() == '<' && DUK__L2() == '=') {

											advtok = DUK__ADVTOK(3, DUK_TOK_ALSHIFT_EQ);

										} else if (DUK__L1() == '=') {

											advtok = DUK__ADVTOK(2, DUK_TOK_LE);

										} else if (DUK__L1() == '<') {

											advtok = DUK__ADVTOK(2, DUK_TOK_ALSHIFT);

										} else {

											advtok = DUK__ADVTOK(1, DUK_TOK_LT);

										}

										break;

									case DUK_ASC_RANGLE:  /* '>' */

										if (DUK__L1() == '>' && DUK__L2() == '>' && DUK__L3() == '=') {

											advtok = DUK__ADVTOK(4, DUK_TOK_RSHIFT_EQ);

										} else if (DUK__L1() == '>' && DUK__L2() == '>') {

											advtok = DUK__ADVTOK(3, DUK_TOK_RSHIFT);

										} else if (DUK__L1() == '>' && DUK__L2() == '=') {

											advtok = DUK__ADVTOK(3, DUK_TOK_ARSHIFT_EQ);

										} else if (DUK__L1() == '=') {

											advtok = DUK__ADVTOK(2, DUK_TOK_GE);

										} else if (DUK__L1() == '>') {

											advtok = DUK__ADVTOK(2, DUK_TOK_ARSHIFT);

										} else {

											advtok = DUK__ADVTOK(1, DUK_TOK_GT);

										}

										break;

									case DUK_ASC_EQUALS:  /* '=' */

										if (DUK__L1() == '=' && DUK__L2() == '=') {

											advtok = DUK__ADVTOK(3, DUK_TOK_SEQ);

										} else if (DUK__L1() == '=') {

											advtok = DUK__ADVTOK(2, DUK_TOK_EQ);

										} else {

											advtok = DUK__ADVTOK(1, DUK_TOK_EQUALSIGN);

										}

										break;

									case DUK_ASC_EXCLAMATION:  /* '!' */

										if (DUK__L1() == '=' && DUK__L2() == '=') {

											advtok = DUK__ADVTOK(3, DUK_TOK_SNEQ);

										} else if (DUK__L1() == '=') {

											advtok = DUK__ADVTOK(2, DUK_TOK_NEQ);

										} else {

											advtok = DUK__ADVTOK(1, DUK_TOK_LNOT);

										}

										break;

									case DUK_ASC_PLUS:  /* '+' */

										if (DUK__L1() == '+') {

											advtok = DUK__ADVTOK(2, DUK_TOK_INCREMENT);

										} else if (DUK__L1() == '=') {

											advtok = DUK__ADVTOK(2, DUK_TOK_ADD_EQ);

										} else {

											advtok = DUK__ADVTOK(1, DUK_TOK_ADD);

										}

										break;

									case DUK_ASC_MINUS:  /* '-' */

										if (DUK__L1() == '-') {

											advtok = DUK__ADVTOK(2, DUK_TOK_DECREMENT);

										} else if (DUK__L1() == '=') {

											advtok = DUK__ADVTOK(2, DUK_TOK_SUB_EQ);

										} else {

											advtok = DUK__ADVTOK(1, DUK_TOK_SUB);

										}

										break;

									case DUK_ASC_STAR:  /* '*' */

										if (DUK__L1() == '*' && DUK__L2() == '=') {

											advtok = DUK__ADVTOK(3, DUK_TOK_EXP_EQ);

										} else if (DUK__L1() == '*') {

											advtok = DUK__ADVTOK(2, DUK_TOK_EXP);

										} else if (DUK__L1() == '=') {

											advtok = DUK__ADVTOK(2, DUK_TOK_MUL_EQ);

										} else {

											advtok = DUK__ADVTOK(1, DUK_TOK_MUL);

										}

										break;

									case DUK_ASC_PERCENT:  /* '%' */

										if (DUK__L1() == '=') {

											advtok = DUK__ADVTOK(2, DUK_TOK_MOD_EQ);

										} else {

											advtok = DUK__ADVTOK(1, DUK_TOK_MOD);

										}

										break;

									case DUK_ASC_AMP:  /* '&' */

										if (DUK__L1() == '&') {

											advtok = DUK__ADVTOK(2, DUK_TOK_LAND);

										} else if (DUK__L1() == '=') {

											advtok = DUK__ADVTOK(2, DUK_TOK_BAND_EQ);

										} else {

											advtok = DUK__ADVTOK(1, DUK_TOK_BAND);

										}

										break;

									case DUK_ASC_PIPE:  /* '|' */

										if (DUK__L1() == '|') {

											advtok = DUK__ADVTOK(2, DUK_TOK_LOR);

										} else if (DUK__L1() == '=') {

											advtok = DUK__ADVTOK(2, DUK_TOK_BOR_EQ);

										} else {

											advtok = DUK__ADVTOK(1, DUK_TOK_BOR);

										}

										break;

									case DUK_ASC_CARET:  /* '^' */

										if (DUK__L1() == '=') {

											advtok = DUK__ADVTOK(2, DUK_TOK_BXOR_EQ);

										} else {

											advtok = DUK__ADVTOK(1, DUK_TOK_BXOR);

										}

										break;

									case DUK_ASC_TILDE:  /* '~' */

										advtok = DUK__ADVTOK(1, DUK_TOK_BNOT);

										break;

									case DUK_ASC_QUESTION:  /* '?' */

										advtok = DUK__ADVTOK(1, DUK_TOK_QUESTION);

										break;

									case DUK_ASC_COLON:  /* ':' */

										advtok = DUK__ADVTOK(1, DUK_TOK_COLON);

										break;

									case DUK_ASC_DOUBLEQUOTE:    /* '"' */

									case DUK_ASC_SINGLEQUOTE: {  /* '\'' */

										duk_small_int_t quote = x;  /* Note: duk_uint8_t type yields larger code */

										duk_small_int_t adv;


										DUK__INITBUFFER(lex_ctx);

										for (;;) {

											DUK__ADVANCECHARS(lex_ctx, 1);  /* eat opening quote on first loop */

											x = DUK__L0();

											if (x < 0 || duk_unicode_is_line_terminator(x)) {

												DUK_ERROR_SYNTAX(lex_ctx->thr, "eof or line terminator in string literal");

											}

											if (x == quote) {

												DUK__ADVANCECHARS(lex_ctx, 1);  /* eat closing quote */

												break;

											}

											if (x == '\\') {

												/* DUK__L0        -> '\' char

												 * DUK__L1 ... DUK__L5 -> more lookup

												 */


												x = DUK__L1();


												/* How much to advance before next loop; note that next loop

												 * will advance by 1 anyway, so -1 from the total escape

												 * length (e.g. len('\uXXXX') - 1 = 6 - 1).  As a default,

												 * 1 is good.

												 */

												adv = 2 - 1;  /* note: long live range */


												if (x < 0) {

													DUK_ERROR_SYNTAX(lex_ctx->thr, "eof or line terminator in string literal");

												}

												if (duk_unicode_is_line_terminator(x)) {

													/* line continuation */

													if (x == 0x000d && DUK__L2() == 0x000a) {

														/* CR LF again a special case */

														adv = 3 - 1;

													}

												} else if (x == '\'') {

													DUK__APPENDBUFFER(lex_ctx, 0x0027);

												} else if (x == '"') {

													DUK__APPENDBUFFER(lex_ctx, 0x0022);

												} else if (x == '\\') {

													DUK__APPENDBUFFER(lex_ctx, 0x005c);

												} else if (x == 'b') {

													DUK__APPENDBUFFER(lex_ctx, 0x0008);

												} else if (x == 'f') {

													DUK__APPENDBUFFER(lex_ctx, 0x000c);

												} else if (x == 'n') {

													DUK__APPENDBUFFER(lex_ctx, 0x000a);

												} else if (x == 'r') {

													DUK__APPENDBUFFER(lex_ctx, 0x000d);

												} else if (x == 't') {

													DUK__APPENDBUFFER(lex_ctx, 0x0009);

												} else if (x == 'v') {

													DUK__APPENDBUFFER(lex_ctx, 0x000b);

												} else if (x == 'x') {

													adv = 4 - 1;

													DUK__APPENDBUFFER(lex_ctx, duk__decode_hexesc_from_window(lex_ctx, 2));

												} else if (x == 'u') {

													adv = 6 - 1;

													DUK__APPENDBUFFER(lex_ctx, duk__decode_uniesc_from_window(lex_ctx, 2));

												} else if (DUK__ISDIGIT(x)) {

													duk_codepoint_t ch = 0;  /* initialized to avoid warnings of unused var */


													/*

													 *  Octal escape or zero escape:

													 *    \0                                     (lookahead not DecimalDigit)

													 *    \1 ... \7                              (lookahead not DecimalDigit)

													 *    \ZeroToThree OctalDigit                (lookahead not DecimalDigit)

													 *    \FourToSeven OctalDigit                (no lookahead restrictions)

													 *    \ZeroToThree OctalDigit OctalDigit     (no lookahead restrictions)

													 *

													 *  Zero escape is part of the standard syntax.  Octal escapes are

													 *  defined in E5 Section B.1.2, and are only allowed in non-strict mode.

													 *  Any other productions starting with a decimal digit are invalid.

													 */


													if (x == '0' && !DUK__ISDIGIT(DUK__L2())) {

														/* Zero escape (also allowed in non-strict mode) */

														ch = 0;

														/* adv = 2 - 1 default OK */

								#if defined(DUK_USE_OCTAL_SUPPORT)

													} else if (strict_mode) {

														/* No other escape beginning with a digit in strict mode */

														DUK_ERROR_SYNTAX(lex_ctx->thr, "invalid escape in string literal");

													} else if (DUK__ISDIGIT03(x) && DUK__ISOCTDIGIT(DUK__L2()) && DUK__ISOCTDIGIT(DUK__L3())) {

														/* Three digit octal escape, digits validated. */

														adv = 4 - 1;

														ch = (duk__hexval(lex_ctx, x) << 6) +

														     (duk__hexval(lex_ctx, DUK__L2()) << 3) +

														     duk__hexval(lex_ctx, DUK__L3());

													} else if (((DUK__ISDIGIT03(x) && !DUK__ISDIGIT(DUK__L3())) || DUK__ISDIGIT47(x)) &&

													           DUK__ISOCTDIGIT(DUK__L2())) {

														/* Two digit octal escape, digits validated.

														 *

														 * The if-condition is a bit tricky.  We could catch e.g.

														 * '\039' in the three-digit escape and fail it there (by

													         * validating the digits), but we want to avoid extra

														 * additional validation code.

														 */

														adv = 3 - 1;

														ch = (duk__hexval(lex_ctx, x) << 3) +

														     duk__hexval(lex_ctx, DUK__L2());

													} else if (DUK__ISDIGIT(x) && !DUK__ISDIGIT(DUK__L2())) {

														/* One digit octal escape, digit validated. */

														/* adv = 2 default OK */

														ch = duk__hexval(lex_ctx, x);

								#else

													/* fall through to error */

								#endif

													} else {

														DUK_ERROR_SYNTAX(lex_ctx->thr, "invalid escape in string literal");

													}


													DUK__APPENDBUFFER(lex_ctx, ch);

												} else {

													/* escaped NonEscapeCharacter */

													DUK__APPENDBUFFER(lex_ctx, x);

												}

												DUK__ADVANCECHARS(lex_ctx, adv);


												/* Track number of escapes; count not really needed but directive

												 * prologues need to detect whether there were any escapes or line

												 * continuations or not.

												 */

												out_token->num_escapes++;

											} else {

												/* part of string */

												DUK__APPENDBUFFER(lex_ctx, x);

											}

										}


										duk__internbuffer(lex_ctx, lex_ctx->slot1_idx);

										out_token->str1 = duk_get_hstring((duk_context *) lex_ctx->thr, lex_ctx->slot1_idx);


										DUK__INITBUFFER(lex_ctx);  /* free some memory */


										advtok = DUK__ADVTOK(0, DUK_TOK_STRING);

										break;

									}

									default:

										goto slow_path;

									}  /* switch */


									goto skip_slow_path;


								 slow_path:

									if (duk_unicode_is_line_terminator(x)) {

										if (x == 0x000d && DUK__L1() == 0x000a) {

											/*

											 *  E5 Section 7.3: CR LF is detected as a single line terminator for

											 *  line numbers.  Here we also detect it as a single line terminator

											 *  token.

											 */

											DUK__ADVANCECHARS(lex_ctx, 2);

										} else {

											DUK__ADVANCECHARS(lex_ctx, 1);

										}

										got_lineterm = 1;

										goto restart_lineupdate;

									} else if (duk_unicode_is_identifier_start(x) || x == '\\') {

										/*

										 *  Parse an identifier and then check whether it is:

										 *    - reserved word (keyword or other reserved word)

										 *    - "null"  (NullLiteral)

										 *    - "true"  (BooleanLiteral)

										 *    - "false" (BooleanLiteral)

										 *    - anything else => identifier

										 *

										 *  This does not follow the E5 productions cleanly, but is

										 *  useful and compact.

										 *

										 *  Note that identifiers may contain Unicode escapes,

										 *  see E5 Sections 6 and 7.6.  They must be decoded first,

										 *  and the result checked against allowed characters.

										 *  The above if-clause accepts an identifier start and an

										 *  '\' character -- no other token can begin with a '\'.

										 *

										 *  Note that "get" and "set" are not reserved words in E5

										 *  specification so they are recognized as plain identifiers

										 *  (the tokens DUK_TOK_GET and DUK_TOK_SET are actually not

										 *  used now).  The compiler needs to work around this.

										 *

										 *  Strictly speaking, following Ecmascript longest match

										 *  specification, an invalid escape for the first character

										 *  should cause a syntax error.  However, an invalid escape

										 *  for IdentifierParts should just terminate the identifier

										 *  early (longest match), and let the next tokenization

										 *  fail.  For instance Rhino croaks with 'foo\z' when

										 *  parsing the identifier.  This has little practical impact.

										 */


										duk_small_int_t i, i_end;

										duk_bool_t first = 1;

										duk_hstring *str;


										DUK__INITBUFFER(lex_ctx);

										for (;;) {

											/* re-lookup first char on first loop */

											if (DUK__L0() == '\\') {

												duk_codepoint_t ch;

												if (DUK__L1() != 'u') {

													DUK_ERROR_SYNTAX(lex_ctx->thr, "invalid unicode escape in identifier");

												}


												ch = duk__decode_uniesc_from_window(lex_ctx, 2);


												/* IdentifierStart is stricter than IdentifierPart, so if the first

												 * character is escaped, must have a stricter check here.

												 */

												if (!(first ? duk_unicode_is_identifier_start(ch) : duk_unicode_is_identifier_part(ch))) {

													DUK_ERROR_SYNTAX(lex_ctx->thr, "invalid unicode escape in identifier");

												}

												DUK__APPENDBUFFER(lex_ctx, ch);

												DUK__ADVANCECHARS(lex_ctx, 6);


												/* Track number of escapes: necessary for proper keyword

												 * detection.

												 */

												out_token->num_escapes++;

											} else {

												/* Note: first character is checked against this.  But because

												 * IdentifierPart includes all IdentifierStart characters, and

												 * the first character (if unescaped) has already been checked

												 * in the if condition, this is OK.

												 */

												if (!duk_unicode_is_identifier_part(DUK__L0())) {

													break;

												}

												DUK__APPENDBUFFER(lex_ctx, DUK__L0());

												DUK__ADVANCECHARS(lex_ctx, 1);

											}

											first = 0;

										}


										duk__internbuffer(lex_ctx, lex_ctx->slot1_idx);

										out_token->str1 = duk_get_hstring((duk_context *) lex_ctx->thr, lex_ctx->slot1_idx);

										str = out_token->str1;

										DUK_ASSERT(str != NULL);

										out_token->t_nores = DUK_TOK_IDENTIFIER;


										DUK__INITBUFFER(lex_ctx);  /* free some memory */


										/*

										 *  Interned identifier is compared against reserved words, which are

										 *  currently interned into the heap context.  See genbuiltins.py.

										 *

										 *  Note that an escape in the identifier disables recognition of

										 *  keywords; e.g. "\u0069f = 1;" is a valid statement (assigns to

										 *  identifier named "if").  This is not necessarily compliant,

										 *  see test-dec-escaped-char-in-keyword.js.

										 *

										 *  Note: "get" and "set" are awkward.  They are not officially

										 *  ReservedWords (and indeed e.g. "var set = 1;" is valid), and

										 *  must come out as DUK_TOK_IDENTIFIER.  The compiler needs to

										 *  work around this a bit.

										 */


										/* XXX: optimize by adding the token numbers directly into the

										 * always interned duk_hstring objects (there should be enough

										 * flag bits free for that)?

										 */


										i_end = (strict_mode ? DUK_STRIDX_END_RESERVED : DUK_STRIDX_START_STRICT_RESERVED);


										advtok = DUK__ADVTOK(0, DUK_TOK_IDENTIFIER);

										if (out_token->num_escapes == 0) {

											for (i = DUK_STRIDX_START_RESERVED; i < i_end; i++) {

												DUK_ASSERT(i >= 0 && i < DUK_HEAP_NUM_STRINGS);

												if (DUK_HTHREAD_GET_STRING(lex_ctx->thr, i) == str) {

													advtok = DUK__ADVTOK(0, DUK_STRIDX_TO_TOK(i));

													break;

												}

											}

										}

									} else if (DUK__ISDIGIT(x) || (x == '.')) {

										/* Note: decimal number may start with a period, but must be followed by a digit */


										/*

										 *  DecimalLiteral, HexIntegerLiteral, OctalIntegerLiteral

										 *  "pre-parsing", followed by an actual, accurate parser step.

										 *

										 *  Note: the leading sign character ('+' or '-') is -not- part of

										 *  the production in E5 grammar, and that the a DecimalLiteral

										 *  starting with a '0' must be followed by a non-digit.  Leading

										 *  zeroes are syntax errors and must be checked for.

										 *

										 *  XXX: the two step parsing process is quite awkward, it would

										 *  be more straightforward to allow numconv to parse the longest

										 *  valid prefix (it already does that, it only needs to indicate

										 *  where the input ended).  However, the lexer decodes characters

										 *  using a lookup window, so this is not a trivial change.

										 */


										/* XXX: because of the final check below (that the literal is not

										 * followed by a digit), this could maybe be simplified, if we bail

										 * out early from a leading zero (and if there are no periods etc).

										 * Maybe too complex.

										 */


										duk_double_t val;

										duk_bool_t int_only = 0;

										duk_bool_t allow_hex = 0;

										duk_small_int_t state;  /* 0=before period/exp,

										                         * 1=after period, before exp

										                         * 2=after exp, allow '+' or '-'

										                         * 3=after exp and exp sign

										                         */

										duk_small_uint_t s2n_flags;

										duk_codepoint_t y;


										DUK__INITBUFFER(lex_ctx);

										y = DUK__L1();

										if (x == '0' && (y == 'x' || y == 'X')) {

											DUK__APPENDBUFFER(lex_ctx, x);

											DUK__APPENDBUFFER(lex_ctx, y);

											DUK__ADVANCECHARS(lex_ctx, 2);

											int_only = 1;

											allow_hex = 1;

								#if defined(DUK_USE_OCTAL_SUPPORT)

										} else if (!strict_mode && x == '0' && DUK__ISDIGIT(y)) {

											/* Note: if DecimalLiteral starts with a '0', it can only be

											 * followed by a period or an exponent indicator which starts

											 * with 'e' or 'E'.  Hence the if-check above ensures that

											 * OctalIntegerLiteral is the only valid NumericLiteral

											 * alternative at this point (even if y is, say, '9').

											 */


											DUK__APPENDBUFFER(lex_ctx, x);

											DUK__ADVANCECHARS(lex_ctx, 1);

											int_only = 1;

								#endif

										}


										state = 0;

										for (;;) {

											x = DUK__L0();  /* re-lookup curr char on first round */

											if (DUK__ISDIGIT(x)) {

												/* Note: intentionally allow leading zeroes here, as the

												 * actual parser will check for them.

												 */

												if (state == 2) {

													state = 3;

												}

											} else if (allow_hex && DUK__ISHEXDIGIT(x)) {

												/* Note: 'e' and 'E' are also accepted here. */

												;

											} else if (x == '.') {

												if (state >= 1 || int_only) {

													break;

												} else {

													state = 1;

												}

											} else if (x == 'e' || x == 'E') {

												if (state >= 2 || int_only) {

													break;

												} else {

													state = 2;

												}

											} else if (x == '-' || x == '+') {

												if (state != 2) {

													break;

												} else {

													state = 3;

												}

											} else {

												break;

											}

											DUK__APPENDBUFFER(lex_ctx, x);

											DUK__ADVANCECHARS(lex_ctx, 1);

										}


										/* XXX: better coercion */

										duk__internbuffer(lex_ctx, lex_ctx->slot1_idx);


										s2n_flags = DUK_S2N_FLAG_ALLOW_EXP |

										            DUK_S2N_FLAG_ALLOW_FRAC |

										            DUK_S2N_FLAG_ALLOW_NAKED_FRAC |

										            DUK_S2N_FLAG_ALLOW_EMPTY_FRAC |

								#if defined(DUK_USE_OCTAL_SUPPORT)

										            (strict_mode ? 0 : DUK_S2N_FLAG_ALLOW_AUTO_OCT_INT) |

								#endif

										            DUK_S2N_FLAG_ALLOW_AUTO_HEX_INT;


										duk_dup((duk_context *) lex_ctx->thr, lex_ctx->slot1_idx);

										duk_numconv_parse((duk_context *) lex_ctx->thr, 10 /*radix*/, s2n_flags);

										val = duk_to_number((duk_context *) lex_ctx->thr, -1);

										if (DUK_ISNAN(val)) {

											DUK_ERROR_SYNTAX(lex_ctx->thr, "invalid numeric literal");

										}

										duk_replace((duk_context *) lex_ctx->thr, lex_ctx->slot1_idx);  /* could also just pop? */


										DUK__INITBUFFER(lex_ctx);  /* free some memory */


										/* Section 7.8.3 (note): NumericLiteral must be followed by something other than

										 * IdentifierStart or DecimalDigit.

										 */


										if (DUK__ISDIGIT(DUK__L0()) || duk_unicode_is_identifier_start(DUK__L0())) {

											DUK_ERROR_SYNTAX(lex_ctx->thr, "invalid numeric literal");

										}


										out_token->num = val;

										advtok = DUK__ADVTOK(0, DUK_TOK_NUMBER);

									} else if (duk_unicode_is_whitespace(DUK__LOOKUP(lex_ctx, 0))) {

										DUK__ADVANCECHARS(lex_ctx, 1);

										goto restart;

									} else if (x < 0) {

										advtok = DUK__ADVTOK(0, DUK_TOK_EOF);

									} else {

										DUK_ERROR_SYNTAX(lex_ctx->thr, "invalid token");

									}

								 skip_slow_path:


									/*

									 *  Shared exit path

									 */


									DUK__ADVANCEBYTES(lex_ctx, advtok >> 8);

									out_token->t = advtok & 0xff;

									if (out_token->t_nores < 0) {

										out_token->t_nores = out_token->t;

									}

									out_token->lineterm = got_lineterm;


									/* Automatic semicolon insertion is allowed if a token is preceded

									 * by line terminator(s), or terminates a statement list (right curly

									 * or EOF).

									 */

									if (got_lineterm || out_token->t == DUK_TOK_RCURLY || out_token->t == DUK_TOK_EOF) {

										out_token->allow_auto_semi = 1;

									} else {

										out_token->allow_auto_semi = 0;

									}

								}


								#if defined(DUK_USE_REGEXP_SUPPORT)


								/*

								 *  Parse a RegExp token.  The grammar is described in E5 Section 15.10.

								 *  Terminal constructions (such as quantifiers) are parsed directly here.

								 *

								 *  0xffffffffU is used as a marker for "infinity" in quantifiers.  Further,

								 *  DUK__MAX_RE_QUANT_DIGITS limits the maximum number of digits that

								 *  will be accepted for a quantifier.

								 */


								DUK_INTERNAL void duk_lexer_parse_re_token(duk_lexer_ctx *lex_ctx, duk_re_token *out_token) {

									duk_small_int_t advtok = 0;  /* init is unnecessary but suppresses "may be used uninitialized" warnings */

									duk_codepoint_t x, y;


									if (++lex_ctx->token_count >= lex_ctx->token_limit) {

										DUK_ERROR_RANGE(lex_ctx->thr, "token limit");

										return;  /* unreachable */

									}


									DUK_MEMZERO(out_token, sizeof(*out_token));


									x = DUK__L0();

									y = DUK__L1();


									DUK_DDD(DUK_DDDPRINT("parsing regexp token, L0=%ld, L1=%ld", (long) x, (long) y));


									switch (x) {

									case '|': {

										advtok = DUK__ADVTOK(1, DUK_RETOK_DISJUNCTION);

										break;

									}

									case '^': {

										advtok = DUK__ADVTOK(1, DUK_RETOK_ASSERT_START);

										break;

									}

									case '$': {

										advtok = DUK__ADVTOK(1, DUK_RETOK_ASSERT_END);

										break;

									}

									case '?': {

										out_token->qmin = 0;

										out_token->qmax = 1;

										if (y == '?') {

											advtok = DUK__ADVTOK(2, DUK_RETOK_QUANTIFIER);

											out_token->greedy = 0;

										} else {

											advtok = DUK__ADVTOK(1, DUK_RETOK_QUANTIFIER);

											out_token->greedy = 1;

										}

										break;

									}

									case '*': {

										out_token->qmin = 0;

										out_token->qmax = DUK_RE_QUANTIFIER_INFINITE;

										if (y == '?') {

											advtok = DUK__ADVTOK(2, DUK_RETOK_QUANTIFIER);

											out_token->greedy = 0;

										} else {

											advtok = DUK__ADVTOK(1, DUK_RETOK_QUANTIFIER);

											out_token->greedy = 1;

										}

										break;

									}

									case '+': {

										out_token->qmin = 1;

										out_token->qmax = DUK_RE_QUANTIFIER_INFINITE;

										if (y == '?') {

											advtok = DUK__ADVTOK(2, DUK_RETOK_QUANTIFIER);

											out_token->greedy = 0;

										} else {

											advtok = DUK__ADVTOK(1, DUK_RETOK_QUANTIFIER);

											out_token->greedy = 1;

										}

										break;

									}

									case '{': {

										/* Production allows 'DecimalDigits', including leading zeroes */

										duk_uint_fast32_t val1 = 0;

										duk_uint_fast32_t val2 = DUK_RE_QUANTIFIER_INFINITE;

										duk_small_int_t digits = 0;

								#if defined(DUK_USE_ES6_REGEXP_SYNTAX)

										duk_lexer_point lex_pt;

								#endif


								#if defined(DUK_USE_ES6_REGEXP_SYNTAX)

										/* Store lexer position, restoring if quantifier is invalid. */

										DUK_LEXER_GETPOINT(lex_ctx, &lex_pt);

								#endif


										for (;;) {

											DUK__ADVANCECHARS(lex_ctx, 1);  /* eat '{' on entry */

											x = DUK__L0();

											if (DUK__ISDIGIT(x)) {

												digits++;

												val1 = val1 * 10 + (duk_uint_fast32_t) duk__hexval(lex_ctx, x);

											} else if (x == ',') {

												if (digits > DUK__MAX_RE_QUANT_DIGITS) {

													goto invalid_quantifier;

												}

												if (val2 != DUK_RE_QUANTIFIER_INFINITE) {

													goto invalid_quantifier;

												}

												if (DUK__L1() == '}') {

													/* form: { DecimalDigits , }, val1 = min count */

													if (digits == 0) {

														goto invalid_quantifier;

													}

													out_token->qmin = val1;

													out_token->qmax = DUK_RE_QUANTIFIER_INFINITE;

													DUK__ADVANCECHARS(lex_ctx, 2);

													break;

												}

												val2 = val1;

												val1 = 0;

												digits = 0;  /* not strictly necessary because of lookahead '}' above */

											} else if (x == '}') {

												if (digits > DUK__MAX_RE_QUANT_DIGITS) {

													goto invalid_quantifier;

												}

												if (digits == 0) {

													goto invalid_quantifier;

												}

												if (val2 != DUK_RE_QUANTIFIER_INFINITE) {

													/* val2 = min count, val1 = max count */

													out_token->qmin = val2;

													out_token->qmax = val1;

												} else {

													/* val1 = count */

													out_token->qmin = val1;

													out_token->qmax = val1;

												}

												DUK__ADVANCECHARS(lex_ctx, 1);

												break;

											} else {

												goto invalid_quantifier;

											}

										}

										if (DUK__L0() == '?') {

											out_token->greedy = 0;

											DUK__ADVANCECHARS(lex_ctx, 1);

										} else {

											out_token->greedy = 1;

										}

										advtok = DUK__ADVTOK(0, DUK_RETOK_QUANTIFIER);

										break;

								 invalid_quantifier:

								#if defined(DUK_USE_ES6_REGEXP_SYNTAX)

										/* Failed to match the quantifier, restore lexer and parse

										 * opening brace as a literal.

										 */

										DUK_LEXER_SETPOINT(lex_ctx, &lex_pt);

										advtok = DUK__ADVTOK(1, DUK_RETOK_ATOM_CHAR);

										out_token->num = '{';

								#else

										DUK_ERROR_SYNTAX(lex_ctx->thr, "invalid regexp quantifier");

								#endif

										break;

									}

									case '.': {

										advtok = DUK__ADVTOK(1, DUK_RETOK_ATOM_PERIOD);

										break;

									}

									case '\\': {

										/* The E5.1 specification does not seem to allow IdentifierPart characters

										 * to be used as identity escapes.  Unfortunately this includes '$', which

										 * cannot be escaped as '\$'; it needs to be escaped e.g. as '\u0024'.

										 * Many other implementations (including V8 and Rhino, for instance) do

										 * accept '\$' as a valid identity escape, which is quite pragmatic.

										 * See: test-regexp-identity-escape-dollar.js.

										 */


										advtok = DUK__ADVTOK(2, DUK_RETOK_ATOM_CHAR);  /* default: char escape (two chars) */

										if (y == 'b') {

											advtok = DUK__ADVTOK(2, DUK_RETOK_ASSERT_WORD_BOUNDARY);

										} else if (y == 'B') {

											advtok = DUK__ADVTOK(2, DUK_RETOK_ASSERT_NOT_WORD_BOUNDARY);

										} else if (y == 'f') {

											out_token->num = 0x000c;

										} else if (y == 'n') {

											out_token->num = 0x000a;

										} else if (y == 't') {

											out_token->num = 0x0009;

										} else if (y == 'r') {

											out_token->num = 0x000d;

										} else if (y == 'v') {

											out_token->num = 0x000b;

										} else if (y == 'c') {

											x = DUK__L2();

											if ((x >= 'a' && x <= 'z') ||

											    (x >= 'A' && x <= 'Z')) {

												out_token->num = (x % 32);

												advtok = DUK__ADVTOK(3, DUK_RETOK_ATOM_CHAR);

											} else {

												DUK_ERROR_SYNTAX(lex_ctx->thr, "invalid regexp escape");

											}

										} else if (y == 'x') {

											out_token->num = duk__decode_hexesc_from_window(lex_ctx, 2);

											advtok = DUK__ADVTOK(4, DUK_RETOK_ATOM_CHAR);

										} else if (y == 'u') {

											out_token->num = duk__decode_uniesc_from_window(lex_ctx, 2);

											advtok = DUK__ADVTOK(6, DUK_RETOK_ATOM_CHAR);

										} else if (y == 'd') {

											advtok = DUK__ADVTOK(2, DUK_RETOK_ATOM_DIGIT);

										} else if (y == 'D') {

											advtok = DUK__ADVTOK(2, DUK_RETOK_ATOM_NOT_DIGIT);

										} else if (y == 's') {

											advtok = DUK__ADVTOK(2, DUK_RETOK_ATOM_WHITE);

										} else if (y == 'S') {

											advtok = DUK__ADVTOK(2, DUK_RETOK_ATOM_NOT_WHITE);

										} else if (y == 'w') {

											advtok = DUK__ADVTOK(2, DUK_RETOK_ATOM_WORD_CHAR);

										} else if (y == 'W') {

											advtok = DUK__ADVTOK(2, DUK_RETOK_ATOM_NOT_WORD_CHAR);

										} else if (DUK__ISDIGIT(y)) {

											/* E5 Section 15.10.2.11 */

											if (y == '0') {

												if (DUK__ISDIGIT(DUK__L2())) {

													DUK_ERROR_SYNTAX(lex_ctx->thr, "invalid regexp escape");

												}

												out_token->num = 0x0000;

												advtok = DUK__ADVTOK(2, DUK_RETOK_ATOM_CHAR);

											} else {

												/* XXX: shared parsing? */

												duk_uint_fast32_t val = 0;

												duk_small_int_t i;

												for (i = 0; ; i++) {

													if (i >= DUK__MAX_RE_DECESC_DIGITS) {

														DUK_ERROR_SYNTAX(lex_ctx->thr, "invalid regexp escape");

													}

													DUK__ADVANCECHARS(lex_ctx, 1);  /* eat backslash on entry */

													x = DUK__L0();

													if (!DUK__ISDIGIT(x)) {

														break;

													}

													val = val * 10 + (duk_uint_fast32_t) duk__hexval(lex_ctx, x);

												}

												/* DUK__L0() cannot be a digit, because the loop doesn't terminate if it is */

												advtok = DUK__ADVTOK(0, DUK_RETOK_ATOM_BACKREFERENCE);

												out_token->num = val;

											}

								#if defined(DUK_USE_ES6_REGEXP_SYNTAX)

										} else if (y >= 0) {

											/* For ES6 Annex B, accept any source character as identity

											 * escape except 'c' which is used for control characters.

											 * http://www.ecma-international.org/ecma-262/6.0/#sec-regular-expressions-patterns

											 * Careful not to match end-of-buffer (<0) here.

											 * This is not yet full ES6 Annex B because cases above

											 * (like hex escape) won't backtrack.

											 */

											DUK_ASSERT(y != DUK_ASC_LC_C);  /* covered above */

								#else  /* DUK_USE_ES6_REGEXP_SYNTAX */

										} else if ((y >= 0 && !duk_unicode_is_identifier_part(y)) ||

										           y == DUK_UNICODE_CP_ZWNJ ||

										           y == DUK_UNICODE_CP_ZWJ) {

											/* For ES5.1 identity escapes are not allowed for identifier

											 * parts.  This conflicts with a lot of real world code as this

											 * doesn't e.g. allow escaping a dollar sign as /\$/, see

											 * test-regexp-identity-escape-dollar.js.

											 */

								#endif  /* DUK_USE_ES6_REGEXP_SYNTAX */

											out_token->num = y;

										} else {

											DUK_ERROR_SYNTAX(lex_ctx->thr, "invalid regexp escape");

										}

										break;

									}

									case '(': {

										/* XXX: naming is inconsistent: ATOM_END_GROUP ends an ASSERT_START_LOOKAHEAD */


										if (y == '?') {

											if (DUK__L2() == '=') {

												/* (?= */

												advtok = DUK__ADVTOK(3, DUK_RETOK_ASSERT_START_POS_LOOKAHEAD);

											} else if (DUK__L2() == '!') {

												/* (?! */

												advtok = DUK__ADVTOK(3, DUK_RETOK_ASSERT_START_NEG_LOOKAHEAD);

											} else if (DUK__L2() == ':') {

												/* (?: */

												advtok = DUK__ADVTOK(3, DUK_RETOK_ATOM_START_NONCAPTURE_GROUP);

											}

										} else {

											/* ( */

											advtok = DUK__ADVTOK(1, DUK_RETOK_ATOM_START_CAPTURE_GROUP);

										}

										break;

									}

									case ')': {

										advtok = DUK__ADVTOK(1, DUK_RETOK_ATOM_END_GROUP);

										break;

									}

									case '[': {

										/*

										 *  To avoid creating a heavy intermediate value for the list of ranges,

										 *  only the start token ('[' or '[^') is parsed here.  The regexp

										 *  compiler parses the ranges itself.

										 */


										/* XXX: with DUK_USE_ES6_REGEXP_SYNTAX we should allow left bracket

										 * literal too, but it's not easy to parse without backtracking.

										 */


										advtok = DUK__ADVTOK(1, DUK_RETOK_ATOM_START_CHARCLASS);

										if (y == '^') {

											advtok = DUK__ADVTOK(2, DUK_RETOK_ATOM_START_CHARCLASS_INVERTED);

										}

										break;

									}

								#if !defined(DUK_USE_ES6_REGEXP_SYNTAX)

									case '}':

									case ']': {

										/* Although these could be parsed as PatternCharacters unambiguously (here),

										 * E5 Section 15.10.1 grammar explicitly forbids these as PatternCharacters.

										 */

										DUK_ERROR_SYNTAX(lex_ctx->thr, "invalid regexp character");

										break;

									}

								#endif

									case -1: {

										/* EOF */

										advtok = DUK__ADVTOK(0, DUK_TOK_EOF);

										break;

									}

									default: {

										/* PatternCharacter, all excluded characters are matched by cases above */

										advtok = DUK__ADVTOK(1, DUK_RETOK_ATOM_CHAR);

										out_token->num = x;

										break;

									}

									}


									/*

									 *  Shared exit path

									 */


									DUK__ADVANCEBYTES(lex_ctx, advtok >> 8);

									out_token->t = advtok & 0xff;

								}


								/*

								 *  Special parser for character classes; calls callback for every

								 *  range parsed and returns the number of ranges present.

								 */


								/* XXX: this duplicates functionality in duk_regexp.c where a similar loop is

								 * required anyway.  We could use that BUT we need to update the regexp compiler

								 * 'nranges' too.  Work this out a bit more cleanly to save space.

								 */


								/* XXX: the handling of character range detection is a bit convoluted.

								 * Try to simplify and make smaller.

								 */


								/* XXX: logic for handling character ranges is now incorrect, it will accept

								 * e.g. [\d-z] whereas it should croak from it?  SMJS accepts this too, though.

								 *

								 * Needs a read through and a lot of additional tests.

								 */


								DUK_LOCAL

								void duk__emit_u16_direct_ranges(duk_lexer_ctx *lex_ctx,

								                                 duk_re_range_callback gen_range,

								                                 void *userdata,

								                                 const duk_uint16_t *ranges,

								                                 duk_small_int_t num) {

									const duk_uint16_t *ranges_end;


									DUK_UNREF(lex_ctx);


									ranges_end = ranges + num;

									while (ranges < ranges_end) {

										/* mark range 'direct', bypass canonicalization (see Wiki) */

										gen_range(userdata, (duk_codepoint_t) ranges[0], (duk_codepoint_t) ranges[1], 1);

										ranges += 2;

									}

								}


								DUK_INTERNAL void duk_lexer_parse_re_ranges(duk_lexer_ctx *lex_ctx, duk_re_range_callback gen_range, void *userdata) {

									duk_codepoint_t start = -1;

									duk_codepoint_t ch;

									duk_codepoint_t x;

									duk_bool_t dash = 0;


									DUK_DD(DUK_DDPRINT("parsing regexp ranges"));


									for (;;) {

										x = DUK__L0();

										DUK__ADVANCECHARS(lex_ctx, 1);


										ch = -1;  /* not strictly necessary, but avoids "uninitialized variable" warnings */

										DUK_UNREF(ch);


										if (x < 0) {

											DUK_ERROR_SYNTAX(lex_ctx->thr, "eof in character class");

										} else if (x == ']') {

											DUK_ASSERT(!dash);  /* lookup should prevent this */

											if (start >= 0) {

												gen_range(userdata, start, start, 0);

											}

											break;

										} else if (x == '-') {

											if (start >= 0 && !dash && DUK__L0() != ']') {

												/* '-' as a range indicator */

												dash = 1;

												continue;

											} else {

												/* '-' verbatim */

												ch = x;

											}

										} else if (x == '\\') {

											/*

											 *  The escapes are same as outside a character class, except that \b has a

											 *  different meaning, and \B and backreferences are prohibited (see E5

											 *  Section 15.10.2.19).  However, it's difficult to share code because we

											 *  handle e.g. "\n" very differently: here we generate a single character

											 *  range for it.

											 */


											x = DUK__L0();

											DUK__ADVANCECHARS(lex_ctx, 1);


											if (x == 'b') {

												/* Note: '\b' in char class is different than outside (assertion),

												 * '\B' is not allowed and is caught by the duk_unicode_is_identifier_part()

												 * check below.

												 */

												ch = 0x0008;

											} else if (x == 'f') {

												ch = 0x000c;

											} else if (x == 'n') {

												ch = 0x000a;

											} else if (x == 't') {

												ch = 0x0009;

											} else if (x == 'r') {

												ch = 0x000d;

											} else if (x == 'v') {

												ch = 0x000b;

											} else if (x == 'c') {

												x = DUK__L0();

												DUK__ADVANCECHARS(lex_ctx, 1);

												if ((x >= 'a' && x <= 'z') ||

												    (x >= 'A' && x <= 'Z')) {

													ch = (x % 32);

												} else {

													DUK_ERROR_SYNTAX(lex_ctx->thr, "invalid regexp escape");

													return;  /* never reached, but avoids warnings of

													          * potentially unused variables.

													          */

												}

											} else if (x == 'x') {

												ch = duk__decode_hexesc_from_window(lex_ctx, 0);

												DUK__ADVANCECHARS(lex_ctx, 2);

											} else if (x == 'u') {

												ch = duk__decode_uniesc_from_window(lex_ctx, 0);

												DUK__ADVANCECHARS(lex_ctx, 4);

											} else if (x == 'd') {

												duk__emit_u16_direct_ranges(lex_ctx,

												                            gen_range,

												                            userdata,

												                            duk_unicode_re_ranges_digit,

												                            sizeof(duk_unicode_re_ranges_digit) / sizeof(duk_uint16_t));

												ch = -1;

											} else if (x == 'D') {

												duk__emit_u16_direct_ranges(lex_ctx,

												                            gen_range,

												                            userdata,

												                            duk_unicode_re_ranges_not_digit,

												                            sizeof(duk_unicode_re_ranges_not_digit) / sizeof(duk_uint16_t));

												ch = -1;

											} else if (x == 's') {

												duk__emit_u16_direct_ranges(lex_ctx,

												                            gen_range,

												                            userdata,

												                            duk_unicode_re_ranges_white,

												                            sizeof(duk_unicode_re_ranges_white) / sizeof(duk_uint16_t));

												ch = -1;

											} else if (x == 'S') {

												duk__emit_u16_direct_ranges(lex_ctx,

												                            gen_range,

												                            userdata,

												                            duk_unicode_re_ranges_not_white,

												                            sizeof(duk_unicode_re_ranges_not_white) / sizeof(duk_uint16_t));

												ch = -1;

											} else if (x == 'w') {

												duk__emit_u16_direct_ranges(lex_ctx,

												                            gen_range,

												                            userdata,

												                            duk_unicode_re_ranges_wordchar,

												                            sizeof(duk_unicode_re_ranges_wordchar) / sizeof(duk_uint16_t));

												ch = -1;

											} else if (x == 'W') {

												duk__emit_u16_direct_ranges(lex_ctx,

												                            gen_range,

												                            userdata,

												                            duk_unicode_re_ranges_not_wordchar,

												                            sizeof(duk_unicode_re_ranges_not_wordchar) / sizeof(duk_uint16_t));

												ch = -1;

											} else if (DUK__ISDIGIT(x)) {

												/* DecimalEscape, only \0 is allowed, no leading zeroes are allowed */

												if (x == '0' && !DUK__ISDIGIT(DUK__L0())) {

													ch = 0x0000;

												} else {

													DUK_ERROR_SYNTAX(lex_ctx->thr, "invalid regexp escape");

												}

								#if defined(DUK_USE_ES6_REGEXP_SYNTAX)

											} else if (x >= 0) {

												/* IdentityEscape: ES6 Annex B allows almost all

												 * source characters here.  Match anything except

												 * EOF here.

												 */

												ch = x;

								#else  /* DUK_USE_ES6_REGEXP_SYNTAX */

											} else if (!duk_unicode_is_identifier_part(x)) {

												/* IdentityEscape: ES5.1 doesn't allow identity escape

												 * for identifier part characters, which conflicts with

												 * some real world code.  For example, it doesn't allow

												 * /[\$]/ which is awkward.

												 */

												ch = x;

								#endif  /* DUK_USE_ES6_REGEXP_SYNTAX */

											} else {

												DUK_ERROR_SYNTAX(lex_ctx->thr, "invalid regexp escape");

											}

										} else {

											/* character represents itself */

											ch = x;

										}


										/* ch is a literal character here or -1 if parsed entity was

										 * an escape such as "\s".

										 */


										if (ch < 0) {

											/* multi-character sets not allowed as part of ranges, see

											 * E5 Section 15.10.2.15, abstract operation CharacterRange.

											 */

											if (start >= 0) {

												if (dash) {

													DUK_ERROR_SYNTAX(lex_ctx->thr, "invalid range");

												} else {

													gen_range(userdata, start, start, 0);

													start = -1;

													/* dash is already 0 */

												}

											}

										} else {

											if (start >= 0) {

												if (dash) {

													if (start > ch) {

														DUK_ERROR_SYNTAX(lex_ctx->thr, "invalid range");

													}

													gen_range(userdata, start, ch, 0);

													start = -1;

													dash = 0;

												} else {

													gen_range(userdata, start, start, 0);

													start = ch;

													/* dash is already 0 */

												}

											} else {

												start = ch;

											}

										}

									}


									return;

								}


								#endif  /* DUK_USE_REGEXP_SUPPORT */