diff --git a/py/lexer.c b/py/lexer.c index d4205236c3..f7f9c631f3 100644 --- a/py/lexer.c +++ b/py/lexer.c @@ -299,8 +299,15 @@ static void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs // backslash (outside string literals) must appear just before a physical newline next_char(lex); if (!is_physical_newline(lex)) { - // TODO SyntaxError - assert(0); + // SyntaxError: unexpected character after line continuation character + tok->src_name = lex->name; + tok->src_line = lex->line; + tok->src_column = lex->column; + tok->kind = MP_TOKEN_BAD_LINE_CONTINUATION; + vstr_reset(&lex->vstr); + tok->str = vstr_str(&lex->vstr); + tok->len = 0; + return; } else { next_char(lex); } diff --git a/py/lexer.h b/py/lexer.h index 9dfcb128c5..428ff03c5b 100644 --- a/py/lexer.h +++ b/py/lexer.h @@ -10,19 +10,20 @@ typedef enum _mp_token_kind_t { MP_TOKEN_INVALID, MP_TOKEN_DEDENT_MISMATCH, MP_TOKEN_LONELY_STRING_OPEN, + MP_TOKEN_BAD_LINE_CONTINUATION, - MP_TOKEN_NEWLINE, // 4 - MP_TOKEN_INDENT, // 5 - MP_TOKEN_DEDENT, // 6 + MP_TOKEN_NEWLINE, // 5 + MP_TOKEN_INDENT, // 6 + MP_TOKEN_DEDENT, // 7 - MP_TOKEN_NAME, // 7 + MP_TOKEN_NAME, // 8 MP_TOKEN_NUMBER, MP_TOKEN_STRING, MP_TOKEN_BYTES, MP_TOKEN_ELLIPSIS, - MP_TOKEN_KW_FALSE, // 12 + MP_TOKEN_KW_FALSE, // 13 MP_TOKEN_KW_NONE, MP_TOKEN_KW_TRUE, MP_TOKEN_KW_AND, @@ -31,7 +32,7 @@ typedef enum _mp_token_kind_t { MP_TOKEN_KW_BREAK, MP_TOKEN_KW_CLASS, MP_TOKEN_KW_CONTINUE, - MP_TOKEN_KW_DEF, // 21 + MP_TOKEN_KW_DEF, // 22 MP_TOKEN_KW_DEL, MP_TOKEN_KW_ELIF, MP_TOKEN_KW_ELSE, @@ -41,7 +42,7 @@ typedef enum _mp_token_kind_t { MP_TOKEN_KW_FROM, MP_TOKEN_KW_GLOBAL, MP_TOKEN_KW_IF, - MP_TOKEN_KW_IMPORT, // 31 + MP_TOKEN_KW_IMPORT, // 32 MP_TOKEN_KW_IN, MP_TOKEN_KW_IS, MP_TOKEN_KW_LAMBDA, @@ -51,12 +52,12 @@ typedef enum _mp_token_kind_t { MP_TOKEN_KW_PASS, MP_TOKEN_KW_RAISE, MP_TOKEN_KW_RETURN, - MP_TOKEN_KW_TRY, // 41 + MP_TOKEN_KW_TRY, // 42 MP_TOKEN_KW_WHILE, MP_TOKEN_KW_WITH, MP_TOKEN_KW_YIELD, - MP_TOKEN_OP_PLUS, // 45 + MP_TOKEN_OP_PLUS, // 46 MP_TOKEN_OP_MINUS, MP_TOKEN_OP_STAR, MP_TOKEN_OP_DBL_STAR, @@ -66,7 +67,7 @@ typedef enum _mp_token_kind_t { MP_TOKEN_OP_LESS, MP_TOKEN_OP_DBL_LESS, MP_TOKEN_OP_MORE, - MP_TOKEN_OP_DBL_MORE, // 55 + MP_TOKEN_OP_DBL_MORE, // 56 MP_TOKEN_OP_AMPERSAND, MP_TOKEN_OP_PIPE, MP_TOKEN_OP_CARET, @@ -76,7 +77,7 @@ typedef enum _mp_token_kind_t { MP_TOKEN_OP_DBL_EQUAL, MP_TOKEN_OP_NOT_EQUAL, - MP_TOKEN_DEL_PAREN_OPEN, // 64 + MP_TOKEN_DEL_PAREN_OPEN, // 65 MP_TOKEN_DEL_PAREN_CLOSE, MP_TOKEN_DEL_BRACKET_OPEN, MP_TOKEN_DEL_BRACKET_CLOSE, @@ -86,7 +87,7 @@ typedef enum _mp_token_kind_t { MP_TOKEN_DEL_COLON, MP_TOKEN_DEL_PERIOD, MP_TOKEN_DEL_SEMICOLON, - MP_TOKEN_DEL_AT, // 74 + MP_TOKEN_DEL_AT, // 75 MP_TOKEN_DEL_EQUAL, MP_TOKEN_DEL_PLUS_EQUAL, MP_TOKEN_DEL_MINUS_EQUAL, @@ -96,7 +97,7 @@ typedef enum _mp_token_kind_t { MP_TOKEN_DEL_PERCENT_EQUAL, MP_TOKEN_DEL_AMPERSAND_EQUAL, MP_TOKEN_DEL_PIPE_EQUAL, - MP_TOKEN_DEL_CARET_EQUAL, // 84 + MP_TOKEN_DEL_CARET_EQUAL, // 85 MP_TOKEN_DEL_DBL_MORE_EQUAL, MP_TOKEN_DEL_DBL_LESS_EQUAL, MP_TOKEN_DEL_DBL_STAR_EQUAL, diff --git a/py/parse.c b/py/parse.c index d3786ba956..a619c90507 100644 --- a/py/parse.c +++ b/py/parse.c @@ -88,6 +88,7 @@ typedef struct _parser_t { uint rule_stack_top; rule_stack_t *rule_stack; + uint result_stack_alloc; uint result_stack_top; mp_parse_node_t *result_stack; } parser_t; @@ -121,7 +122,7 @@ mp_parse_node_t mp_parse_node_new_leaf(machine_int_t kind, machine_int_t arg) { int num_parse_nodes_allocated = 0; mp_parse_node_struct_t *parse_node_new_struct(int rule_id, int num_args) { - mp_parse_node_struct_t *pn = m_malloc(sizeof(mp_parse_node_struct_t) + num_args * sizeof(mp_parse_node_t)); + mp_parse_node_struct_t *pn = m_new_obj_var(mp_parse_node_struct_t, mp_parse_node_t, num_args); pn->source = 0; // TODO pn->kind_num_nodes = (rule_id & 0xff) | (num_args << 8); num_parse_nodes_allocated += 1; @@ -180,6 +181,10 @@ static mp_parse_node_t peek_result(parser_t *parser, int pos) { } static void push_result_node(parser_t *parser, mp_parse_node_t pn) { + if (parser->result_stack_top >= parser->result_stack_alloc) { + parser->result_stack = m_renew(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc * 2); + parser->result_stack_alloc *= 2; + } parser->result_stack[parser->result_stack_top++] = pn; } @@ -252,14 +257,20 @@ static void push_result_rule(parser_t *parser, const rule_t *rule, int num_args) } mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) { - parser_t *parser = m_new(parser_t, 1); + + // allocate memory for the parser and its stacks + + parser_t *parser = m_new_obj(parser_t); + parser->rule_stack_alloc = 64; parser->rule_stack_top = 0; parser->rule_stack = m_new(rule_stack_t, parser->rule_stack_alloc); - parser->result_stack = m_new(mp_parse_node_t, 1000); + parser->result_stack_alloc = 64; parser->result_stack_top = 0; + parser->result_stack = m_new(mp_parse_node_t, parser->result_stack_alloc); + // work out the top-level rule to use, and push it on the stack int top_level_rule; switch (input_kind) { case MP_PARSE_SINGLE_INPUT: top_level_rule = RULE_single_input; break; @@ -268,6 +279,8 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) { } push_rule(parser, rules[top_level_rule], 0); + // parse! + uint n, i; bool backtrack = false; const rule_t *rule; @@ -558,12 +571,25 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) { //printf("--------------\n"); //result_stack_show(parser); - assert(parser->result_stack_top == 1); - //printf("maximum depth: %d\n", parser->rule_stack_alloc); + //printf("rule stack alloc: %d\n", parser->rule_stack_alloc); + //printf("result stack alloc: %d\n", parser->result_stack_alloc); //printf("number of parse nodes allocated: %d\n", num_parse_nodes_allocated); - return parser->result_stack[0]; + + // get the root parse node that we created + assert(parser->result_stack_top == 1); + mp_parse_node_t result = parser->result_stack[0]; + +finished: + // free the memory that we don't need anymore + m_del(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc); + m_del(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc); + m_del_obj(parser_t, parser); + + // return the result + return result; syntax_error: + // TODO these should raise a proper exception if (mp_lexer_is_kind(lex, MP_TOKEN_INDENT)) { mp_lexer_show_error_pythonic(lex, "IndentationError: unexpected indent"); } else if (mp_lexer_is_kind(lex, MP_TOKEN_DEDENT_MISMATCH)) { @@ -575,5 +601,6 @@ syntax_error: #endif mp_token_show(mp_lexer_cur(lex)); } - return MP_PARSE_NODE_NULL; + result = MP_PARSE_NODE_NULL; + goto finished; }