From 8721087661ea607ea68d45b4e518e53607cadbd3 Mon Sep 17 00:00:00 2001 From: Damien George Date: Sun, 13 Apr 2014 00:30:32 +0100 Subject: [PATCH] py: Big improvements to inline assembler. Improved the Thumb assembler back end. Added many more Thumb instructions to the inline assembler. Improved parsing of assembler instructions and arguments. Assembler functions can now be passed the address of any object that supports the buffer protocol (to get the address of the buffer). Added an example of how to sum numbers from an array in assembler. --- examples/asmled.py | 34 ++++--- examples/asmsum.py | 57 +++++++++++ py/asmthumb.c | 84 +++++++--------- py/asmthumb.h | 93 ++++++++++++++++-- py/emitinlinethumb.c | 228 +++++++++++++++++++++++++++++++------------ py/emitnative.c | 8 +- py/objfun.c | 45 +++++---- 7 files changed, 390 insertions(+), 159 deletions(-) create mode 100644 examples/asmsum.py diff --git a/examples/asmled.py b/examples/asmled.py index e0d6c73ce4..917d9ba03c 100644 --- a/examples/asmled.py +++ b/examples/asmled.py @@ -1,4 +1,5 @@ # flash LED #1 using inline assembler +# this version is overly verbose and uses word stores @micropython.asm_thumb def flash_led(r0): movw(r1, (stm.GPIOA + stm.GPIO_BSRRL) & 0xffff) @@ -13,69 +14,72 @@ def flash_led(r0): label(loop1) # turn LED on - str(r2, r1, 0) + str(r2, [r1, 0]) # delay for a bit movw(r4, 5599900 & 0xffff) movt(r4, (5599900 >> 16) & 0xffff) label(delay_on) - subs(r4, r4, 1) + sub(r4, r4, 1) cmp(r4, 0) bgt(delay_on) # turn LED off - str(r3, r1, 0) + str(r3, [r1, 0]) # delay for a bit movw(r4, 5599900 & 0xffff) movt(r4, (5599900 >> 16) & 0xffff) label(delay_off) - subs(r4, r4, 1) + sub(r4, r4, 1) cmp(r4, 0) bgt(delay_off) # loop r0 times - subs(r0, r0, 1) + sub(r0, r0, 1) label(loop_entry) cmp(r0, 0) bgt(loop1) -# flash LED #1 using inline assembler -# this version uses the convenience assembler operation 'movwt' +# flash LED #2 using inline assembler +# this version uses half-word sortes, and the convenience assembler operation 'movwt' @micropython.asm_thumb def flash_led_v2(r0): - movwt(r1, stm.GPIOA + stm.GPIO_BSRRL) - movwt(r2, 1 << 13) - movwt(r3, 1 << (16 + 13)) + # get the GPIOA address in r1 + movwt(r1, stm.GPIOA) + + # get the bit mask for PA14 (the pin LED #2 is on) + movw(r2, 1 << 14) b(loop_entry) label(loop1) # turn LED on - str(r2, r1, 0) + strh(r2, [r1, stm.GPIO_BSRRL]) # delay for a bit movwt(r4, 5599900) label(delay_on) - subs(r4, r4, 1) + sub(r4, r4, 1) cmp(r4, 0) bgt(delay_on) # turn LED off - str(r3, r1, 0) + strh(r2, [r1, stm.GPIO_BSRRH]) # delay for a bit movwt(r4, 5599900) label(delay_off) - subs(r4, r4, 1) + sub(r4, r4, 1) cmp(r4, 0) bgt(delay_off) # loop r0 times - subs(r0, r0, 1) + sub(r0, r0, 1) label(loop_entry) cmp(r0, 0) bgt(loop1) +flash_led(5) flash_led_v2(5) diff --git a/examples/asmsum.py b/examples/asmsum.py new file mode 100644 index 0000000000..07e71c7384 --- /dev/null +++ b/examples/asmsum.py @@ -0,0 +1,57 @@ +@micropython.asm_thumb +def asm_sum_words(r0, r1): + + # r0 = len + # r1 = ptr + # r2 = sum + # r3 = dummy + mov(r2, 0) + + b(loop_entry) + + label(loop1) + ldr(r3, [r1, 0]) + add(r2, r2, r3) + + add(r1, r1, 4) + sub(r0, r0, 1) + + label(loop_entry) + cmp(r0, 0) + bgt(loop1) + + mov(r0, r2) + +@micropython.asm_thumb +def asm_sum_bytes(r0, r1): + + # r0 = len + # r1 = ptr + # r2 = sum + # r3 = dummy + mov(r2, 0) + + b(loop_entry) + + label(loop1) + ldrb(r3, [r1, 0]) + add(r2, r2, r3) + + add(r1, r1, 1) + sub(r0, r0, 1) + + label(loop_entry) + cmp(r0, 0) + bgt(loop1) + + mov(r0, r2) + +import array + +b = array.array('l', (100, 200, 300, 400)) +n = asm_sum_words(len(b), b) +print(b, n) + +b = array.array('b', (10, 20, 30, 40, 50, 60, 70, 80)) +n = asm_sum_bytes(len(b), b) +print(b, n) diff --git a/py/asmthumb.c b/py/asmthumb.c index 1cd971c76b..6bf6d66584 100644 --- a/py/asmthumb.c +++ b/py/asmthumb.c @@ -230,33 +230,33 @@ STATIC int get_label_dest(asm_thumb_t *as, uint label) { return as->label_offsets[label]; } -#define OP_MOVS_RLO_I8(rlo_dest, i8_src) (0x2000 | ((rlo_dest) << 8) | (i8_src)) +#define OP_FORMAT_2(op, rlo_dest, rlo_src, src_b) ((op) | ((src_b) << 6) | ((rlo_src) << 3) | (rlo_dest)) -// the i8_src value will be zero extended into the r32 register! -void asm_thumb_movs_rlo_i8(asm_thumb_t *as, uint rlo_dest, int i8_src) { +void asm_thumb_format_2(asm_thumb_t *as, uint op, uint rlo_dest, uint rlo_src, int src_b) { assert(rlo_dest < REG_R8); - // movs rlo_dest, #i8_src - asm_thumb_write_op16(as, OP_MOVS_RLO_I8(rlo_dest, i8_src)); + assert(rlo_src < REG_R8); + asm_thumb_write_op16(as, OP_FORMAT_2(op, rlo_dest, rlo_src, src_b)); } -#define OP_MOVW (0xf240) -#define OP_MOVT (0xf2c0) +#define OP_FORMAT_3(op, rlo, i8) ((op) | ((rlo) << 8) | (i8)) -// if loading lo half with movw, the i16 value will be zero extended into the r32 register! -STATIC void asm_thumb_mov_reg_i16(asm_thumb_t *as, uint mov_op, uint reg_dest, int i16_src) { - assert(reg_dest < REG_R15); - // mov[wt] reg_dest, #i16_src - asm_thumb_write_op32(as, mov_op | ((i16_src >> 1) & 0x0400) | ((i16_src >> 12) & 0xf), ((i16_src << 4) & 0x7000) | (reg_dest << 8) | (i16_src & 0xff)); +void asm_thumb_format_3(asm_thumb_t *as, uint op, uint rlo, int i8) { + assert(rlo < REG_R8); + asm_thumb_write_op16(as, OP_FORMAT_3(op, rlo, i8)); } -// the i16_src value will be zero extended into the r32 register! -void asm_thumb_movw_reg_i16(asm_thumb_t *as, uint reg_dest, int i16_src) { - asm_thumb_mov_reg_i16(as, OP_MOVW, reg_dest, i16_src); +#define OP_FORMAT_4(op, rlo_dest, rlo_src) ((op) | ((rlo_src) << 3) | (rlo_dest)) + +void asm_thumb_format_4(asm_thumb_t *as, uint op, uint rlo_dest, uint rlo_src) { + assert(rlo_dest < REG_R8); + assert(rlo_src < REG_R8); + asm_thumb_write_op16(as, OP_FORMAT_4(op, rlo_dest, rlo_src)); } -// the i16_src value will be zero extended into the r32 register! -void asm_thumb_movt_reg_i16(asm_thumb_t *as, uint reg_dest, int i16_src) { - asm_thumb_mov_reg_i16(as, OP_MOVT, reg_dest, i16_src); +#define OP_FORMAT_9_10(op, rlo_dest, rlo_base, offset) ((op) | (((offset) << 6) & 0x07c0) | ((rlo_base) << 3) | (rlo_dest)) + +void asm_thumb_format_9_10(asm_thumb_t *as, uint op, uint rlo_dest, uint rlo_base, uint offset) { + asm_thumb_write_op16(as, OP_FORMAT_9_10(op, rlo_dest, rlo_base, offset)); } void asm_thumb_mov_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_src) { @@ -275,42 +275,24 @@ void asm_thumb_mov_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_src) { asm_thumb_write_op16(as, 0x4600 | op_lo); } -#define OP_ADD_RLO_RLO_RLO(rlo_dest, rlo_src_a, rlo_src_b) (0x1800 | ((rlo_src_b) << 6) | ((rlo_src_a) << 3) | (rlo_dest)) - -void asm_thumb_add_rlo_rlo_rlo(asm_thumb_t *as, uint rlo_dest, uint rlo_src_a, uint rlo_src_b) { - asm_thumb_write_op16(as, OP_ADD_RLO_RLO_RLO(rlo_dest, rlo_src_a, rlo_src_b)); -} - -#define OP_SUBS_RLO_RLO_I3(rlo_dest, rlo_src, i3_src) (0x1e00 | ((i3_src) << 6) | ((rlo_src) << 3) | (rlo_dest)) - -void asm_thumb_subs_rlo_rlo_i3(asm_thumb_t *as, uint rlo_dest, uint rlo_src, int i3_src) { - assert(rlo_dest < REG_R8); - assert(rlo_src < REG_R8); - asm_thumb_write_op16(as, OP_SUBS_RLO_RLO_I3(rlo_dest, rlo_src, i3_src)); -} - -#define OP_CMP_REG_REG(rlo_a, rlo_b) (0x4280 | ((rlo_b) << 3) | (rlo_a)) - -void asm_thumb_cmp_reg_reg(asm_thumb_t *as, uint rlo_a, uint rlo_b) { - asm_thumb_write_op16(as, OP_CMP_REG_REG(rlo_a, rlo_b)); -} - -#define OP_CMP_RLO_I8(rlo, i8) (0x2800 | ((rlo) << 8) | (i8)) +#define OP_MOVW (0xf240) +#define OP_MOVT (0xf2c0) -void asm_thumb_cmp_rlo_i8(asm_thumb_t *as, uint rlo, int i8) { - assert(rlo < REG_R8); - asm_thumb_write_op16(as, OP_CMP_RLO_I8(rlo, i8)); +// if loading lo half with movw, the i16 value will be zero extended into the r32 register! +STATIC void asm_thumb_mov_reg_i16(asm_thumb_t *as, uint mov_op, uint reg_dest, int i16_src) { + assert(reg_dest < REG_R15); + // mov[wt] reg_dest, #i16_src + asm_thumb_write_op32(as, mov_op | ((i16_src >> 1) & 0x0400) | ((i16_src >> 12) & 0xf), ((i16_src << 4) & 0x7000) | (reg_dest << 8) | (i16_src & 0xff)); } -#define OP_LDR_RLO_RLO_I5(rlo_dest, rlo_base, word_offset) (0x6800 | (((word_offset) << 6) & 0x07c0) | ((rlo_base) << 3) | (rlo_dest)) -#define OP_STR_RLO_RLO_I5(rlo_dest, rlo_base, word_offset) (0x6000 | (((word_offset) << 6) & 0x07c0) | ((rlo_base) << 3) | (rlo_dest)) - -void asm_thumb_ldr_rlo_rlo_i5(asm_thumb_t *as, uint rlo_dest, uint rlo_base, uint word_offset) { - asm_thumb_write_op16(as, OP_LDR_RLO_RLO_I5(rlo_dest, rlo_base, word_offset)); +// the i16_src value will be zero extended into the r32 register! +void asm_thumb_movw_reg_i16(asm_thumb_t *as, uint reg_dest, int i16_src) { + asm_thumb_mov_reg_i16(as, OP_MOVW, reg_dest, i16_src); } -void asm_thumb_str_rlo_rlo_i5(asm_thumb_t *as, uint rlo_src, uint rlo_base, uint word_offset) { - asm_thumb_write_op16(as, OP_STR_RLO_RLO_I5(rlo_src, rlo_base, word_offset)); +// the i16_src value will be zero extended into the r32 register! +void asm_thumb_movt_reg_i16(asm_thumb_t *as, uint reg_dest, int i16_src) { + asm_thumb_mov_reg_i16(as, OP_MOVT, reg_dest, i16_src); } void asm_thumb_ite_ge(asm_thumb_t *as) { @@ -353,7 +335,7 @@ void asm_thumb_mov_reg_i32(asm_thumb_t *as, uint reg_dest, machine_uint_t i32) { void asm_thumb_mov_reg_i32_optimised(asm_thumb_t *as, uint reg_dest, int i32) { if (reg_dest < 8 && UNSIGNED_FIT8(i32)) { - asm_thumb_movs_rlo_i8(as, reg_dest, i32); + asm_thumb_mov_rlo_i8(as, reg_dest, i32); } else if (UNSIGNED_FIT16(i32)) { asm_thumb_mov_reg_i16(as, OP_MOVW, reg_dest, i32); } else { @@ -452,7 +434,7 @@ void asm_thumb_bl_ind(asm_thumb_t *as, void *fun_ptr, uint fun_id, uint reg_temp asm_thumb_mov_reg_i32(as, reg_temp, (machine_uint_t)fun_ptr); asm_thumb_write_op16(as, OP_BLX(reg_temp)); } else if (1) { - asm_thumb_write_op16(as, OP_LDR_RLO_RLO_I5(reg_temp, REG_R7, fun_id)); + asm_thumb_write_op16(as, OP_FORMAT_9_10(ASM_THUMB_FORMAT_9_LDR | ASM_THUMB_FORMAT_9_WORD_TRANSFER, reg_temp, REG_R7, fun_id)); asm_thumb_write_op16(as, OP_BLX(reg_temp)); } else { // use SVC diff --git a/py/asmthumb.h b/py/asmthumb.h index de376fd2ce..6b4f5506b6 100644 --- a/py/asmthumb.h +++ b/py/asmthumb.h @@ -58,16 +58,93 @@ void asm_thumb_label_assign(asm_thumb_t *as, uint label); // argument order follows ARM, in general dest is first // note there is a difference between movw and mov.w, and many others! -void asm_thumb_movs_rlo_i8(asm_thumb_t *as, uint rlo_dest, int i8_src); +// FORMAT 2: add/subtract + +#define ASM_THUMB_FORMAT_2_ADD (0x1800) +#define ASM_THUMB_FORMAT_2_SUB (0x1a00) +#define ASM_THUMB_FORMAT_2_REG_OPERAND (0x0000) +#define ASM_THUMB_FORMAT_2_IMM_OPERAND (0x0400) + +void asm_thumb_format_2(asm_thumb_t *as, uint op, uint rlo_dest, uint rlo_src, int src_b); + +static inline void asm_thumb_add_rlo_rlo_rlo(asm_thumb_t *as, uint rlo_dest, uint rlo_src_a, uint rlo_src_b) + { asm_thumb_format_2(as, ASM_THUMB_FORMAT_2_ADD | ASM_THUMB_FORMAT_2_REG_OPERAND, rlo_dest, rlo_src_a, rlo_src_b); } +static inline void asm_thumb_add_rlo_rlo_i3(asm_thumb_t *as, uint rlo_dest, uint rlo_src_a, int i3_src) + { asm_thumb_format_2(as, ASM_THUMB_FORMAT_2_ADD | ASM_THUMB_FORMAT_2_IMM_OPERAND, rlo_dest, rlo_src_a, i3_src); } +static inline void asm_thumb_sub_rlo_rlo_rlo(asm_thumb_t *as, uint rlo_dest, uint rlo_src_a, uint rlo_src_b) + { asm_thumb_format_2(as, ASM_THUMB_FORMAT_2_SUB | ASM_THUMB_FORMAT_2_REG_OPERAND, rlo_dest, rlo_src_a, rlo_src_b); } +static inline void asm_thumb_sub_rlo_rlo_i3(asm_thumb_t *as, uint rlo_dest, uint rlo_src_a, int i3_src) + { asm_thumb_format_2(as, ASM_THUMB_FORMAT_2_SUB | ASM_THUMB_FORMAT_2_IMM_OPERAND, rlo_dest, rlo_src_a, i3_src); } + +// FORMAT 3: move/compare/add/subtract immediate +// These instructions all do zero extension of the i8 value + +#define ASM_THUMB_FORMAT_3_MOV (0x2000) +#define ASM_THUMB_FORMAT_3_CMP (0x2800) +#define ASM_THUMB_FORMAT_3_ADD (0x3000) +#define ASM_THUMB_FORMAT_3_SUB (0x3800) + +void asm_thumb_format_3(asm_thumb_t *as, uint op, uint rlo, int i8); + +static inline void asm_thumb_mov_rlo_i8(asm_thumb_t *as, uint rlo, int i8) { asm_thumb_format_3(as, ASM_THUMB_FORMAT_3_MOV, rlo, i8); } +static inline void asm_thumb_cmp_rlo_i8(asm_thumb_t *as, uint rlo, int i8) { asm_thumb_format_3(as, ASM_THUMB_FORMAT_3_CMP, rlo, i8); } +static inline void asm_thumb_add_rlo_i8(asm_thumb_t *as, uint rlo, int i8) { asm_thumb_format_3(as, ASM_THUMB_FORMAT_3_ADD, rlo, i8); } +static inline void asm_thumb_sub_rlo_i8(asm_thumb_t *as, uint rlo, int i8) { asm_thumb_format_3(as, ASM_THUMB_FORMAT_3_SUB, rlo, i8); } + +// FORMAT 4: ALU operations + +#define ASM_THUMB_FORMAT_4_AND (0x4000) +#define ASM_THUMB_FORMAT_4_EOR (0x4040) +#define ASM_THUMB_FORMAT_4_LSL (0x4080) +#define ASM_THUMB_FORMAT_4_LSR (0x40c0) +#define ASM_THUMB_FORMAT_4_ASR (0x4100) +#define ASM_THUMB_FORMAT_4_ADC (0x4140) +#define ASM_THUMB_FORMAT_4_SBC (0x4180) +#define ASM_THUMB_FORMAT_4_ROR (0x41c0) +#define ASM_THUMB_FORMAT_4_TST (0x4200) +#define ASM_THUMB_FORMAT_4_NEG (0x4240) +#define ASM_THUMB_FORMAT_4_CMP (0x4280) +#define ASM_THUMB_FORMAT_4_CMN (0x42c0) +#define ASM_THUMB_FORMAT_4_ORR (0x4300) +#define ASM_THUMB_FORMAT_4_MUL (0x4340) +#define ASM_THUMB_FORMAT_4_BIC (0x4380) +#define ASM_THUMB_FORMAT_4_MVN (0x43c0) + +void asm_thumb_format_4(asm_thumb_t *as, uint op, uint rlo_dest, uint rlo_src); + +static inline void asm_thumb_cmp_rlo_rlo(asm_thumb_t *as, uint rlo_dest, uint rlo_src) { asm_thumb_format_4(as, ASM_THUMB_FORMAT_4_CMP, rlo_dest, rlo_src); } + +// FORMAT 9: load/store with immediate offset +// For word transfers the offset must be aligned, and >>2 + +// FORMAT 10: load/store halfword +// The offset must be aligned, and >>1 +// The load is zero extended into the register + +#define ASM_THUMB_FORMAT_9_STR (0x6000) +#define ASM_THUMB_FORMAT_9_LDR (0x6800) +#define ASM_THUMB_FORMAT_9_WORD_TRANSFER (0x0000) +#define ASM_THUMB_FORMAT_9_BYTE_TRANSFER (0x1000) + +#define ASM_THUMB_FORMAT_10_STRH (0x8000) +#define ASM_THUMB_FORMAT_10_LDRH (0x8800) + +void asm_thumb_format_9_10(asm_thumb_t *as, uint op, uint rlo_dest, uint rlo_base, uint offset); + +static inline void asm_thumb_str_rlo_rlo_i5(asm_thumb_t *as, uint rlo_src, uint rlo_base, uint word_offset) + { asm_thumb_format_9_10(as, ASM_THUMB_FORMAT_9_STR | ASM_THUMB_FORMAT_9_WORD_TRANSFER, rlo_src, rlo_base, word_offset); } +static inline void asm_thumb_strb_rlo_rlo_i5(asm_thumb_t *as, uint rlo_src, uint rlo_base, uint byte_offset) + { asm_thumb_format_9_10(as, ASM_THUMB_FORMAT_9_STR | ASM_THUMB_FORMAT_9_BYTE_TRANSFER, rlo_src, rlo_base, byte_offset); } +static inline void asm_thumb_ldr_rlo_rlo_i5(asm_thumb_t *as, uint rlo_dest, uint rlo_base, uint word_offset) + { asm_thumb_format_9_10(as, ASM_THUMB_FORMAT_9_LDR | ASM_THUMB_FORMAT_9_WORD_TRANSFER, rlo_dest, rlo_base, word_offset); } +static inline void asm_thumb_ldrb_rlo_rlo_i5(asm_thumb_t *as, uint rlo_dest, uint rlo_base, uint byte_offset) + { asm_thumb_format_9_10(as, ASM_THUMB_FORMAT_9_LDR | ASM_THUMB_FORMAT_9_BYTE_TRANSFER , rlo_dest, rlo_base, byte_offset); } + +// TODO convert these to above format style + +void asm_thumb_mov_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_src); void asm_thumb_movw_reg_i16(asm_thumb_t *as, uint reg_dest, int i16_src); void asm_thumb_movt_reg_i16(asm_thumb_t *as, uint reg_dest, int i16_src); -void asm_thumb_mov_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_src); -void asm_thumb_add_rlo_rlo_rlo(asm_thumb_t *as, uint rlo_dest, uint rlo_src_a, uint rlo_src_b); -void asm_thumb_subs_rlo_rlo_i3(asm_thumb_t *as, uint rlo_dest, uint rlo_src, int i3_src); -void asm_thumb_cmp_reg_reg(asm_thumb_t *as, uint rlo_a, uint rlo_b); -void asm_thumb_cmp_rlo_i8(asm_thumb_t *as, uint rlo, int i8); -void asm_thumb_ldr_rlo_rlo_i5(asm_thumb_t *as, uint rlo_dest, uint rlo_base, uint word_offset); -void asm_thumb_str_rlo_rlo_i5(asm_thumb_t *as, uint rlo_src, uint rlo_base, uint word_offset); void asm_thumb_ite_ge(asm_thumb_t *as); void asm_thumb_b_n(asm_thumb_t *as, uint label); void asm_thumb_bcc_n(asm_thumb_t *as, int cond, uint label); diff --git a/py/emitinlinethumb.c b/py/emitinlinethumb.c index 58aeed1f64..fc881f3d18 100644 --- a/py/emitinlinethumb.c +++ b/py/emitinlinethumb.c @@ -17,6 +17,14 @@ #if MICROPY_EMIT_INLINE_THUMB +typedef enum { + PN_none = 0, +#define DEF_RULE(rule, comp, kind, ...) PN_##rule, +#include "grammar.h" +#undef DEF_RULE + PN_maximum_number_of, +} pn_kind_t; + struct _emit_inline_asm_t { uint16_t pass; uint16_t success; @@ -120,15 +128,15 @@ STATIC const reg_name_t reg_name_table[] = { {15, "pc\0"}, }; -STATIC uint get_arg_reg(emit_inline_asm_t *emit, const char *op, mp_parse_node_t *pn_args, uint wanted_arg_num, uint max_reg) { - if (MP_PARSE_NODE_IS_ID(pn_args[wanted_arg_num])) { - qstr reg_qstr = MP_PARSE_NODE_LEAF_ARG(pn_args[wanted_arg_num]); +STATIC uint get_arg_reg(emit_inline_asm_t *emit, const char *op, mp_parse_node_t pn, uint max_reg) { + if (MP_PARSE_NODE_IS_ID(pn)) { + qstr reg_qstr = MP_PARSE_NODE_LEAF_ARG(pn); const char *reg_str = qstr_str(reg_qstr); for (uint i = 0; i < sizeof(reg_name_table) / sizeof(reg_name_table[0]); i++) { const reg_name_t *r = ®_name_table[i]; if (reg_str[0] == r->name[0] && reg_str[1] == r->name[1] && reg_str[2] == r->name[2] && (reg_str[2] == '\0' || reg_str[3] == '\0')) { if (r->reg > max_reg) { - emit_inline_thumb_error(emit, "'%s' expects at most r%d in position %d\n", op, max_reg, wanted_arg_num); + emit_inline_thumb_error(emit, "'%s' expects at most r%d\n", op, max_reg); return 0; } else { return r->reg; @@ -136,16 +144,16 @@ STATIC uint get_arg_reg(emit_inline_asm_t *emit, const char *op, mp_parse_node_t } } } - emit_inline_thumb_error(emit, "'%s' expects a register in position %d\n", op, wanted_arg_num); + emit_inline_thumb_error(emit, "'%s' expects a register\n", op); return 0; } -STATIC int get_arg_i(emit_inline_asm_t *emit, const char *op, mp_parse_node_t *pn_args, int wanted_arg_num, int fit_mask) { - if (!MP_PARSE_NODE_IS_SMALL_INT(pn_args[wanted_arg_num])) { - emit_inline_thumb_error(emit, "'%s' expects an integer in position %d\n", op, wanted_arg_num); +STATIC int get_arg_i(emit_inline_asm_t *emit, const char *op, mp_parse_node_t pn, int fit_mask) { + if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) { + emit_inline_thumb_error(emit, "'%s' expects an integer\n", op); return 0; } - int i = MP_PARSE_NODE_LEAF_SMALL_INT(pn_args[wanted_arg_num]); + int i = MP_PARSE_NODE_LEAF_SMALL_INT(pn); if ((i & (~fit_mask)) != 0) { emit_inline_thumb_error(emit, "'%s' integer 0x%x does not fit in mask 0x%x\n", op, i, fit_mask); return 0; @@ -153,12 +161,34 @@ STATIC int get_arg_i(emit_inline_asm_t *emit, const char *op, mp_parse_node_t *p return i; } -STATIC int get_arg_label(emit_inline_asm_t *emit, const char *op, mp_parse_node_t *pn_args, int wanted_arg_num) { - if (!MP_PARSE_NODE_IS_ID(pn_args[wanted_arg_num])) { - emit_inline_thumb_error(emit, "'%s' expects a label in position %d\n", op, wanted_arg_num); +STATIC bool get_arg_addr(emit_inline_asm_t *emit, const char *op, mp_parse_node_t pn, mp_parse_node_t *pn_base, mp_parse_node_t *pn_offset) { + if (!MP_PARSE_NODE_IS_STRUCT_KIND(pn, PN_atom_bracket)) { + goto bad_arg; + } + mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn; + if (!MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_testlist_comp)) { + goto bad_arg; + } + pns = (mp_parse_node_struct_t*)pns->nodes[0]; + if (MP_PARSE_NODE_STRUCT_NUM_NODES(pns) != 2) { + goto bad_arg; + } + + *pn_base = pns->nodes[0]; + *pn_offset = pns->nodes[1]; + return true; + +bad_arg: + emit_inline_thumb_error(emit, "'%s' expects an address of the form [a, b]\n", op); + return false; +} + +STATIC int get_arg_label(emit_inline_asm_t *emit, const char *op, mp_parse_node_t pn) { + if (!MP_PARSE_NODE_IS_ID(pn)) { + emit_inline_thumb_error(emit, "'%s' expects a label\n", op); return 0; } - qstr label_qstr = MP_PARSE_NODE_LEAF_ARG(pn_args[wanted_arg_num]); + qstr label_qstr = MP_PARSE_NODE_LEAF_ARG(pn); for (int i = 0; i < emit->max_num_labels; i++) { if (emit->label_lookup[i] == label_qstr) { return i; @@ -212,7 +242,7 @@ STATIC void emit_inline_thumb_op(emit_inline_asm_t *emit, qstr op, int n_args, m } else if (n_args == 1) { if (strcmp(op_str, "b") == 0) { - int label_num = get_arg_label(emit, op_str, pn_args, 0); + int label_num = get_arg_label(emit, op_str, pn_args[0]); // TODO check that this succeeded, ie branch was within range asm_thumb_b_n(emit->as, label_num); } else if (op_str[0] == 'b' && op_len == 3) { @@ -225,7 +255,7 @@ STATIC void emit_inline_thumb_op(emit_inline_asm_t *emit, qstr op, int n_args, m if (cc == -1) { goto unknown_op; } - int label_num = get_arg_label(emit, op_str, pn_args, 0); + int label_num = get_arg_label(emit, op_str, pn_args[0]); // TODO check that this succeeded, ie branch was within range asm_thumb_bcc_n(emit->as, cc, label_num); } else { @@ -233,59 +263,131 @@ STATIC void emit_inline_thumb_op(emit_inline_asm_t *emit, qstr op, int n_args, m } } else if (n_args == 2) { - if (strcmp(op_str, "mov") == 0) { - uint rlo_dest = get_arg_reg(emit, op_str, pn_args, 0, 7); - uint rlo_src = get_arg_reg(emit, op_str, pn_args, 1, 7); - asm_thumb_mov_reg_reg(emit->as, rlo_dest, rlo_src); - } else if (strcmp(op_str, "movs") == 0) { - uint rlo_dest = get_arg_reg(emit, op_str, pn_args, 0, 7); - int i_src = get_arg_i(emit, op_str, pn_args, 1, 0xff); - asm_thumb_movs_rlo_i8(emit->as, rlo_dest, i_src); - } else if (strcmp(op_str, "movw") == 0) { - uint reg_dest = get_arg_reg(emit, op_str, pn_args, 0, 15); - int i_src = get_arg_i(emit, op_str, pn_args, 1, 0xffff); - asm_thumb_movw_reg_i16(emit->as, reg_dest, i_src); - } else if (strcmp(op_str, "movt") == 0) { - uint reg_dest = get_arg_reg(emit, op_str, pn_args, 0, 15); - int i_src = get_arg_i(emit, op_str, pn_args, 1, 0xffff); - asm_thumb_movt_reg_i16(emit->as, reg_dest, i_src); - } else if (strcmp(op_str, "movwt") == 0) { - // this is a convenience instruction - // we clear the MSB since it might be set from extracting the small int value - uint reg_dest = get_arg_reg(emit, op_str, pn_args, 0, 15); - int i_src = get_arg_i(emit, op_str, pn_args, 1, 0xffffffff); - asm_thumb_movw_reg_i16(emit->as, reg_dest, i_src & 0xffff); - asm_thumb_movt_reg_i16(emit->as, reg_dest, (i_src >> 16) & 0x7fff); - } else if (strcmp(op_str, "cmp") == 0) { - uint rlo = get_arg_reg(emit, op_str, pn_args, 0, 7); - int i8 = get_arg_i(emit, op_str, pn_args, 1, 0xff); - asm_thumb_cmp_rlo_i8(emit->as, rlo, i8); + if (MP_PARSE_NODE_IS_ID(pn_args[1])) { + // second arg is a register (or should be) + uint op_code; + if (strcmp(op_str, "mov") == 0) { + uint reg_dest = get_arg_reg(emit, op_str, pn_args[0], 15); + uint reg_src = get_arg_reg(emit, op_str, pn_args[1], 15); + asm_thumb_mov_reg_reg(emit->as, reg_dest, reg_src); + } else if (strcmp(op_str, "and") == 0) { + op_code = ASM_THUMB_FORMAT_4_AND; + uint reg_dest, reg_src; + op_format_4: + reg_dest = get_arg_reg(emit, op_str, pn_args[0], 7); + reg_src = get_arg_reg(emit, op_str, pn_args[1], 7); + asm_thumb_format_4(emit->as, op_code, reg_dest, reg_src); + // TODO probably uses less ROM if these ops are in a lookup table + } else if (strcmp(op_str, "and") == 0) { op_code = ASM_THUMB_FORMAT_4_AND; goto op_format_4; + } else if (strcmp(op_str, "eor") == 0) { op_code = ASM_THUMB_FORMAT_4_EOR; goto op_format_4; + } else if (strcmp(op_str, "lsl") == 0) { op_code = ASM_THUMB_FORMAT_4_LSL; goto op_format_4; + } else if (strcmp(op_str, "lsr") == 0) { op_code = ASM_THUMB_FORMAT_4_LSR; goto op_format_4; + } else if (strcmp(op_str, "asr") == 0) { op_code = ASM_THUMB_FORMAT_4_ASR; goto op_format_4; + } else if (strcmp(op_str, "adc") == 0) { op_code = ASM_THUMB_FORMAT_4_ADC; goto op_format_4; + } else if (strcmp(op_str, "sbc") == 0) { op_code = ASM_THUMB_FORMAT_4_SBC; goto op_format_4; + } else if (strcmp(op_str, "ror") == 0) { op_code = ASM_THUMB_FORMAT_4_ROR; goto op_format_4; + } else if (strcmp(op_str, "tst") == 0) { op_code = ASM_THUMB_FORMAT_4_TST; goto op_format_4; + } else if (strcmp(op_str, "neg") == 0) { op_code = ASM_THUMB_FORMAT_4_NEG; goto op_format_4; + } else if (strcmp(op_str, "cmp") == 0) { op_code = ASM_THUMB_FORMAT_4_CMP; goto op_format_4; + } else if (strcmp(op_str, "cmn") == 0) { op_code = ASM_THUMB_FORMAT_4_CMN; goto op_format_4; + } else if (strcmp(op_str, "orr") == 0) { op_code = ASM_THUMB_FORMAT_4_ORR; goto op_format_4; + } else if (strcmp(op_str, "mul") == 0) { op_code = ASM_THUMB_FORMAT_4_MUL; goto op_format_4; + } else if (strcmp(op_str, "bic") == 0) { op_code = ASM_THUMB_FORMAT_4_BIC; goto op_format_4; + } else if (strcmp(op_str, "mvn") == 0) { op_code = ASM_THUMB_FORMAT_4_MVN; goto op_format_4; + } else { + goto unknown_op; + } } else { - goto unknown_op; + // second arg is not a register + uint op_code; + if (strcmp(op_str, "mov") == 0) { + op_code = ASM_THUMB_FORMAT_3_MOV; + uint rlo_dest, i8_src; + op_format_3: + rlo_dest = get_arg_reg(emit, op_str, pn_args[0], 7); + i8_src = get_arg_i(emit, op_str, pn_args[1], 0xff); + asm_thumb_format_3(emit->as, op_code, rlo_dest, i8_src); + } else if (strcmp(op_str, "cmp") == 0) { + op_code = ASM_THUMB_FORMAT_3_CMP; + goto op_format_3; + } else if (strcmp(op_str, "add") == 0) { + op_code = ASM_THUMB_FORMAT_3_ADD; + goto op_format_3; + } else if (strcmp(op_str, "sub") == 0) { + op_code = ASM_THUMB_FORMAT_3_SUB; + goto op_format_3; + } else if (strcmp(op_str, "movw") == 0) { + uint reg_dest = get_arg_reg(emit, op_str, pn_args[0], 15); + int i_src = get_arg_i(emit, op_str, pn_args[1], 0xffff); + asm_thumb_movw_reg_i16(emit->as, reg_dest, i_src); + } else if (strcmp(op_str, "movt") == 0) { + uint reg_dest = get_arg_reg(emit, op_str, pn_args[0], 15); + int i_src = get_arg_i(emit, op_str, pn_args[1], 0xffff); + asm_thumb_movt_reg_i16(emit->as, reg_dest, i_src); + } else if (strcmp(op_str, "movwt") == 0) { + // this is a convenience instruction + // we clear the MSB since it might be set from extracting the small int value + uint reg_dest = get_arg_reg(emit, op_str, pn_args[0], 15); + int i_src = get_arg_i(emit, op_str, pn_args[1], 0xffffffff); + asm_thumb_movw_reg_i16(emit->as, reg_dest, i_src & 0xffff); + asm_thumb_movt_reg_i16(emit->as, reg_dest, (i_src >> 16) & 0x7fff); + } else if (strcmp(op_str, "ldr") == 0) { + op_code = ASM_THUMB_FORMAT_9_LDR | ASM_THUMB_FORMAT_9_WORD_TRANSFER; + uint rlo_dest, rlo_base, i5; + mp_parse_node_t pn_base, pn_offset; + op_format_9_10: + rlo_dest = get_arg_reg(emit, op_str, pn_args[0], 7); + if (get_arg_addr(emit, op_str, pn_args[1], &pn_base, &pn_offset)) { + rlo_base = get_arg_reg(emit, op_str, pn_base, 7); + if (op_code & ASM_THUMB_FORMAT_9_BYTE_TRANSFER) { + i5 = get_arg_i(emit, op_str, pn_offset, 0x1f); + } else if (op_code & ASM_THUMB_FORMAT_10_STRH) { // also catches LDRH + i5 = get_arg_i(emit, op_str, pn_offset, 0x3e) >> 1; + } else { + i5 = get_arg_i(emit, op_str, pn_offset, 0x7c) >> 2; + } + asm_thumb_format_9_10(emit->as, op_code, rlo_dest, rlo_base, i5); + } + } else if (strcmp(op_str, "ldrb") == 0) { + op_code = ASM_THUMB_FORMAT_9_LDR | ASM_THUMB_FORMAT_9_BYTE_TRANSFER; + goto op_format_9_10; + } else if (strcmp(op_str, "ldrh") == 0) { + op_code = ASM_THUMB_FORMAT_10_LDRH; + goto op_format_9_10; + } else if (strcmp(op_str, "str") == 0) { + op_code = ASM_THUMB_FORMAT_9_STR | ASM_THUMB_FORMAT_9_WORD_TRANSFER; + goto op_format_9_10; + } else if (strcmp(op_str, "strb") == 0) { + op_code = ASM_THUMB_FORMAT_9_STR | ASM_THUMB_FORMAT_9_BYTE_TRANSFER; + goto op_format_9_10; + } else if (strcmp(op_str, "strh") == 0) { + op_code = ASM_THUMB_FORMAT_10_STRH; + goto op_format_9_10; + } else { + goto unknown_op; + } } } else if (n_args == 3) { + uint op_code; if (strcmp(op_str, "add") == 0) { - uint rlo_dest = get_arg_reg(emit, op_str, pn_args, 0, 7); - uint rlo_src_a = get_arg_reg(emit, op_str, pn_args, 1, 7); - uint rlo_src_b = get_arg_reg(emit, op_str, pn_args, 2, 7); - asm_thumb_add_rlo_rlo_rlo(emit->as, rlo_dest, rlo_src_a, rlo_src_b); - } else if (strcmp(op_str, "subs") == 0) { - uint rlo_dest = get_arg_reg(emit, op_str, pn_args, 0, 7); - uint rlo_src = get_arg_reg(emit, op_str, pn_args, 1, 7); - int i3_src = get_arg_i(emit, op_str, pn_args, 2, 0x7); - asm_thumb_subs_rlo_rlo_i3(emit->as, rlo_dest, rlo_src, i3_src); - } else if (strcmp(op_str, "ldr") == 0) { - // TODO maybe use ldr(rd, [rb, 4]) syntax? - uint rlo_dest = get_arg_reg(emit, op_str, pn_args, 0, 7); - uint rlo_base = get_arg_reg(emit, op_str, pn_args, 1, 7); - int i5 = get_arg_i(emit, op_str, pn_args, 2, 0x7c); - asm_thumb_ldr_rlo_rlo_i5(emit->as, rlo_dest, rlo_base, i5 >> 2); - } else if (strcmp(op_str, "str") == 0) { - uint rlo_src = get_arg_reg(emit, op_str, pn_args, 0, 7); - uint rlo_base = get_arg_reg(emit, op_str, pn_args, 1, 7); - int i5 = get_arg_i(emit, op_str, pn_args, 2, 0x7c); - asm_thumb_str_rlo_rlo_i5(emit->as, rlo_src, rlo_base, i5 >> 2); + op_code = ASM_THUMB_FORMAT_2_ADD; + uint rlo_dest, rlo_src; + op_format_2: + rlo_dest = get_arg_reg(emit, op_str, pn_args[0], 7); + rlo_src = get_arg_reg(emit, op_str, pn_args[1], 7); + int src_b; + if (MP_PARSE_NODE_IS_ID(pn_args[2])) { + op_code |= ASM_THUMB_FORMAT_2_REG_OPERAND; + src_b = get_arg_reg(emit, op_str, pn_args[2], 7); + } else { + op_code |= ASM_THUMB_FORMAT_2_IMM_OPERAND; + src_b = get_arg_i(emit, op_str, pn_args[2], 0x7); + } + asm_thumb_format_2(emit->as, op_code, rlo_dest, rlo_src, src_b); + } else if (strcmp(op_str, "sub") == 0) { + op_code = ASM_THUMB_FORMAT_2_SUB; + goto op_format_2; } else { goto unknown_op; } diff --git a/py/emitnative.c b/py/emitnative.c index 29da5b8c99..b5a3acc231 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -1020,7 +1020,7 @@ STATIC void emit_native_for_iter(emit_t *emit, uint label) { asm_x64_cmp_r64_with_r64(emit->as, REG_RET, REG_TEMP1); asm_x64_jcc_label(emit->as, JCC_JE, label); #elif N_THUMB - asm_thumb_cmp_reg_reg(emit->as, REG_RET, REG_TEMP1); + asm_thumb_cmp_rlo_rlo(emit->as, REG_RET, REG_TEMP1); asm_thumb_bcc_label(emit->as, THUMB_CC_EQ, label); #endif emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET); @@ -1067,10 +1067,10 @@ STATIC void emit_native_binary_op(emit_t *emit, mp_binary_op_t op) { asm_x64_cmp_r64_with_r64(emit->as, REG_ARG_3, REG_ARG_2); asm_x64_setcc_r8(emit->as, JCC_JL, REG_RET); #elif N_THUMB - asm_thumb_cmp_reg_reg(emit->as, REG_ARG_2, REG_ARG_3); + asm_thumb_cmp_rlo_rlo(emit->as, REG_ARG_2, REG_ARG_3); asm_thumb_ite_ge(emit->as); - asm_thumb_movs_rlo_i8(emit->as, REG_RET, 0); // if r0 >= r1 - asm_thumb_movs_rlo_i8(emit->as, REG_RET, 1); // if r0 < r1 + asm_thumb_mov_rlo_i8(emit->as, REG_RET, 0); // if r0 >= r1 + asm_thumb_mov_rlo_i8(emit->as, REG_RET, 1); // if r0 < r1 #endif emit_post_push_reg(emit, VTYPE_BOOL, REG_RET); } else { diff --git a/py/objfun.c b/py/objfun.c index dd4b7347ca..4ef92c0256 100644 --- a/py/objfun.c +++ b/py/objfun.c @@ -422,26 +422,35 @@ STATIC machine_uint_t convert_obj_for_inline_asm(mp_obj_t obj) { // pointer to the string (it's probably constant though!) uint l; return (machine_uint_t)mp_obj_str_get_data(obj, &l); + } else { + mp_obj_type_t *type = mp_obj_get_type(obj); + if (0) { #if MICROPY_ENABLE_FLOAT - } else if (MP_OBJ_IS_TYPE(obj, &mp_type_float)) { - // convert float to int (could also pass in float registers) - return (machine_int_t)mp_obj_float_get(obj); + } else if (type == &mp_type_float) { + // convert float to int (could also pass in float registers) + return (machine_int_t)mp_obj_float_get(obj); #endif - } else if (MP_OBJ_IS_TYPE(obj, &mp_type_tuple)) { - // pointer to start of tuple (could pass length, but then could use len(x) for that) - uint len; - mp_obj_t *items; - mp_obj_tuple_get(obj, &len, &items); - return (machine_uint_t)items; - } else if (MP_OBJ_IS_TYPE(obj, &mp_type_list)) { - // pointer to start of list (could pass length, but then could use len(x) for that) - uint len; - mp_obj_t *items; - mp_obj_list_get(obj, &len, &items); - return (machine_uint_t)items; - } else { - // just pass along a pointer to the object - return (machine_uint_t)obj; + } else if (type == &mp_type_tuple) { + // pointer to start of tuple (could pass length, but then could use len(x) for that) + uint len; + mp_obj_t *items; + mp_obj_tuple_get(obj, &len, &items); + return (machine_uint_t)items; + } else if (type == &mp_type_list) { + // pointer to start of list (could pass length, but then could use len(x) for that) + uint len; + mp_obj_t *items; + mp_obj_list_get(obj, &len, &items); + return (machine_uint_t)items; + } else if (type->buffer_p.get_buffer != NULL) { + // supports the buffer protocol, get a pointer to the data + buffer_info_t bufinfo; + type->buffer_p.get_buffer(obj, &bufinfo, BUFFER_READ); + return (machine_uint_t)bufinfo.buf; + } else { + // just pass along a pointer to the object + return (machine_uint_t)obj; + } } }