Browse Source

Treat negative lexer input_offset as an internal error

Before the change a negative input offset would lead to memory unsafe
behavior.  This is correct as such because a negative input offset
should never happen (so an assert is sufficient).  However, the compiler
saves and restores lexer "points" to and from duk_tval numbers, so if
duk_tval representation is chosen improperly or something similar happens,
the input offset could be corrupted (again, this is only the case if
something is pretty badly wrong anyway).

The new behavior is to throw an internal error for negative input offsets.
This is explicit and memory safe, and protects against potential compiler
bugs too.
pull/20/head
Sami Vaarala 11 years ago
parent
commit
1c4869b163
  1. 41
      src/duk_lexer.c
  2. 2
      src/duk_lexer.h

41
src/duk_lexer.c

@ -168,14 +168,26 @@ static int duk__read_char(duk_lexer_ctx *lex_ctx) {
#ifdef DUK_USE_STRICT_UTF8_SOURCE #ifdef DUK_USE_STRICT_UTF8_SOURCE
int mincp; int mincp;
#endif #endif
duk_int_t input_offset;
if (lex_ctx->input_offset >= lex_ctx->input_length) { /* The case where input_offset < 0 should never happen, but it's
* worth checking because lexer "points" are stored and restored
* to/from duk_tvals by the compiler. It's nice if any duk_tval issue
* is caught cleanly rather than leading to memory unsafe behavior.
*/
input_offset = lex_ctx->input_offset;
if (DUK_UNLIKELY(input_offset >= lex_ctx->input_length ||
input_offset < 0)) {
if (input_offset < 0) {
/* Log negative offset to ease detection w/o asserts. */
DUK_D(DUK_DPRINT("negative input_offset, should never happen"));
goto error_internal;
}
DUK_ASSERT(input_offset >= 0);
return -1; return -1;
} }
DUK_ASSERT(lex_ctx->input_offset >= 0); p = lex_ctx->input + input_offset;
DUK_ASSERT(lex_ctx->input_offset < lex_ctx->input_length);
p = lex_ctx->input + lex_ctx->input_offset;
x = (int) *p++; x = (int) *p++;
if (x < 0x80) { if (x < 0x80) {
@ -184,7 +196,7 @@ static int duk__read_char(duk_lexer_ctx *lex_ctx) {
goto fastpath; goto fastpath;
} else if (x < 0xc0) { } else if (x < 0xc0) {
/* 10xx xxxx -> invalid */ /* 10xx xxxx -> invalid */
goto error; goto error_encoding;
} else if (x < 0xe0) { } else if (x < 0xe0) {
/* 110x xxxx 10xx xxxx */ /* 110x xxxx 10xx xxxx */
len = 2; len = 2;
@ -208,18 +220,18 @@ static int duk__read_char(duk_lexer_ctx *lex_ctx) {
x = x & 0x07; x = x & 0x07;
} else { } else {
/* no point in supporting encodings of 5 or more bytes */ /* no point in supporting encodings of 5 or more bytes */
goto error; goto error_encoding;
} }
if (len > lex_ctx->input_length - lex_ctx->input_offset) { if (len > lex_ctx->input_length - lex_ctx->input_offset) {
goto error; goto error_clipped;
} }
for (i = 1; i < len; i++) { for (i = 1; i < len; i++) {
int y = *p++; int y = *p++;
if ((y & 0xc0) != 0x80) { if ((y & 0xc0) != 0x80) {
/* check that byte has the form 10xx xxxx */ /* check that byte has the form 10xx xxxx */
goto error; goto error_encoding;
} }
x = x << 6; x = x << 6;
x += y & 0x3f; x += y & 0x3f;
@ -228,11 +240,11 @@ static int duk__read_char(duk_lexer_ctx *lex_ctx) {
/* check final character validity */ /* check final character validity */
if (x > 0x10ffff) { if (x > 0x10ffff) {
goto error; goto error_encoding;
} }
#ifdef DUK_USE_STRICT_UTF8_SOURCE #ifdef DUK_USE_STRICT_UTF8_SOURCE
if (x < mincp || (x >= 0xd800 && x <= 0xdfff) || x == 0xfffe) { if (x < mincp || (x >= 0xd800 && x <= 0xdfff) || x == 0xfffe) {
goto error; goto error_encoding;
} }
#endif #endif
@ -264,8 +276,13 @@ static int duk__read_char(duk_lexer_ctx *lex_ctx) {
return x; return x;
error: error_internal: /* internal error */
DUK_ERROR(lex_ctx->thr, DUK_ERR_SYNTAX_ERROR, "invalid char encoding in source"); DUK_ERROR(lex_ctx->thr, DUK_ERR_INTERNAL_ERROR, "internal error");
return 0;
error_clipped: /* clipped codepoint */
error_encoding: /* invalid codepoint encoding or codepoint */
DUK_ERROR(lex_ctx->thr, DUK_ERR_SYNTAX_ERROR, "char decode failed");
return 0; return 0;
} }

2
src/duk_lexer.h

@ -385,7 +385,7 @@ struct duk_lexer_ctx {
duk_hthread *thr; /* thread; minimizes argument passing */ duk_hthread *thr; /* thread; minimizes argument passing */
duk_uint8_t *input; duk_uint8_t *input;
int input_length; duk_int_t input_length;
int window[DUK_LEXER_WINDOW_SIZE]; /* window of unicode code points */ int window[DUK_LEXER_WINDOW_SIZE]; /* window of unicode code points */
int offsets[DUK_LEXER_WINDOW_SIZE]; /* input byte offset for each char */ int offsets[DUK_LEXER_WINDOW_SIZE]; /* input byte offset for each char */
int lines[DUK_LEXER_WINDOW_SIZE]; /* input lines for each char */ int lines[DUK_LEXER_WINDOW_SIZE]; /* input lines for each char */

Loading…
Cancel
Save