diff --git a/py/emitinlinethumb.c b/py/emitinlinethumb.c index 17db2d8414..391d057891 100644 --- a/py/emitinlinethumb.c +++ b/py/emitinlinethumb.c @@ -196,6 +196,35 @@ STATIC mp_uint_t get_arg_reg(emit_inline_asm_t *emit, const char *op, mp_parse_n return 0; } +#if MICROPY_EMIT_INLINE_THUMB_FLOAT +STATIC mp_uint_t get_arg_vfpreg(emit_inline_asm_t *emit, const char *op, mp_parse_node_t pn) { + const char *reg_str = get_arg_str(pn); + if (reg_str[0] == 's' && reg_str[1] != '\0') { + mp_uint_t regno = 0; + for (++reg_str; *reg_str; ++reg_str) { + mp_uint_t v = *reg_str; + if (!('0' <= v && v <= '9')) { + goto malformed; + } + regno = 10 * regno + v - '0'; + } + if (regno > 31) { + emit_inline_thumb_error_exc(emit, + mp_obj_new_exception_msg_varg(&mp_type_SyntaxError, + "'%s' expects at most r%d", op, 31)); + return 0; + } else { + return regno; + } + } +malformed: + emit_inline_thumb_error_exc(emit, + mp_obj_new_exception_msg_varg(&mp_type_SyntaxError, + "'%s' expects an FPU register", op)); + return 0; +} +#endif + STATIC mp_uint_t get_arg_reglist(emit_inline_asm_t *emit, const char *op, mp_parse_node_t pn) { // a register list looks like {r0, r1, r2} and is parsed as a Python set @@ -352,6 +381,17 @@ STATIC const format_9_10_op_t format_9_10_op_table[] = { }; #undef X +#if MICROPY_EMIT_INLINE_THUMB_FLOAT +// actual opcodes are: 0xee00 | op.hi_nibble, 0x0a00 | op.lo_nibble +typedef struct _format_vfp_op_t { byte op; char name[3]; } format_vfp_op_t; +STATIC const format_vfp_op_t format_vfp_op_table[] = { + { 0x30, "add" }, + { 0x34, "sub" }, + { 0x20, "mul" }, + { 0x80, "div" }, +}; +#endif + STATIC void emit_inline_thumb_op(emit_inline_asm_t *emit, qstr op, mp_uint_t n_args, mp_parse_node_t *pn_args) { // TODO perhaps make two tables: // one_args = @@ -366,6 +406,102 @@ STATIC void emit_inline_thumb_op(emit_inline_asm_t *emit, qstr op, mp_uint_t n_a mp_uint_t op_len; const char *op_str = (const char*)qstr_data(op, &op_len); + #if MICROPY_EMIT_INLINE_THUMB_FLOAT + if (op_str[0] == 'v') { + // floating point operations + if (n_args == 2) { + mp_uint_t op_code = 0x0ac0, op_code_hi; + if (strcmp(op_str, "vcmp") == 0) { + op_code_hi = 0xeeb4; + op_vfp_twoargs:; + mp_uint_t vd = get_arg_vfpreg(emit, op_str, pn_args[0]); + mp_uint_t vm = get_arg_vfpreg(emit, op_str, pn_args[1]); + asm_thumb_op32(emit->as, + op_code_hi | ((vd & 1) << 6), + op_code | ((vd & 0x1e) << 11) | ((vm & 1) << 5) | (vm & 0x1e) >> 1); + } else if (strcmp(op_str, "vsqrt") == 0) { + op_code_hi = 0xeeb1; + goto op_vfp_twoargs; + } else if (strcmp(op_str, "vneg") == 0) { + op_code_hi = 0xeeb1; + op_code = 0x0a40; + goto op_vfp_twoargs; + } else if (strcmp(op_str, "vcvt_f32_s32") == 0) { + op_code_hi = 0xeeb8; // int to float + goto op_vfp_twoargs; + } else if (strcmp(op_str, "vcvt_s32_f32") == 0) { + op_code_hi = 0xeebd; // float to int + goto op_vfp_twoargs; + } else if (strcmp(op_str, "vmrs") == 0) { + mp_uint_t reg_dest; + const char *reg_str0 = get_arg_str(pn_args[0]); + if (strcmp(reg_str0, "APSR_nzcv") == 0) { + reg_dest = 15; + } else { + reg_dest = get_arg_reg(emit, op_str, pn_args[0], 15); + } + const char *reg_str1 = get_arg_str(pn_args[1]); + if (strcmp(reg_str1, "FPSCR") == 0) { + // FP status to ARM reg + asm_thumb_op32(emit->as, 0xeef1, 0x0a10 | (reg_dest << 12)); + } else { + goto unknown_op; + } + } else if (strcmp(op_str, "vmov") == 0) { + op_code_hi = 0xee00; + mp_uint_t r_arm, vm; + const char *reg_str = get_arg_str(pn_args[0]); + if (reg_str[0] == 'r') { + r_arm = get_arg_reg(emit, op_str, pn_args[0], 15); + vm = get_arg_vfpreg(emit, op_str, pn_args[1]); + op_code_hi |= 0x10; + } else { + vm = get_arg_vfpreg(emit, op_str, pn_args[0]); + r_arm = get_arg_reg(emit, op_str, pn_args[1], 15); + } + asm_thumb_op32(emit->as, + op_code_hi | ((vm & 0x1e) >> 1), + 0x0a10 | (r_arm << 12) | ((vm & 1) << 7)); + } else if (strcmp(op_str, "vldr") == 0) { + op_code_hi = 0xed90; + op_vldr_vstr:; + mp_uint_t vd = get_arg_vfpreg(emit, op_str, pn_args[0]); + mp_parse_node_t pn_base, pn_offset; + if (get_arg_addr(emit, op_str, pn_args[1], &pn_base, &pn_offset)) { + mp_uint_t rlo_base = get_arg_reg(emit, op_str, pn_base, 7); + mp_uint_t i8; + i8 = get_arg_i(emit, op_str, pn_offset, 0xff); + asm_thumb_op32(emit->as, + op_code_hi | rlo_base | ((vd & 1) << 6), + 0x0a00 | ((vd & 0x1e) << 11) | i8); + } + } else if (strcmp(op_str, "vstr") == 0) { + op_code_hi = 0xed80; + goto op_vldr_vstr; + } else { + goto unknown_op; + } + } else if (n_args == 3) { + // search table for arith ops + for (mp_uint_t i = 0; i < MP_ARRAY_SIZE(format_vfp_op_table); i++) { + if (strncmp(op_str + 1, format_vfp_op_table[i].name, 3) == 0 && op_str[4] == '\0') { + mp_uint_t op_code_hi = 0xee00 | (format_vfp_op_table[i].op & 0xf0); + mp_uint_t op_code = 0x0a00 | ((format_vfp_op_table[i].op & 0x0f) << 4); + mp_uint_t vd = get_arg_vfpreg(emit, op_str, pn_args[0]); + mp_uint_t vn = get_arg_vfpreg(emit, op_str, pn_args[1]); + mp_uint_t vm = get_arg_vfpreg(emit, op_str, pn_args[2]); + asm_thumb_op32(emit->as, + op_code_hi | ((vd & 1) << 6) | (vn >> 1), + op_code | (vm >> 1) | ((vm & 1) << 5) | ((vd & 0x1e) << 11) | ((vn & 1) << 7)); + return; + } + } + goto unknown_op; + } else { + goto unknown_op; + } + } else + #endif if (n_args == 0) { if (strcmp(op_str, "nop") == 0) { asm_thumb_op16(emit->as, ASM_THUMB_OP_NOP); diff --git a/py/mpconfig.h b/py/mpconfig.h index c78221f3a5..a403a66633 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -186,6 +186,11 @@ #define MICROPY_EMIT_INLINE_THUMB (0) #endif +// Whether to enable float support in the Thumb2 inline assembler +#ifndef MICROPY_EMIT_INLINE_THUMB_FLOAT +#define MICROPY_EMIT_INLINE_THUMB_FLOAT (1) +#endif + // Whether to emit ARM native code #ifndef MICROPY_EMIT_ARM #define MICROPY_EMIT_ARM (0) diff --git a/tests/inlineasm/asmfpaddsub.py b/tests/inlineasm/asmfpaddsub.py new file mode 100644 index 0000000000..b5fcecb6ce --- /dev/null +++ b/tests/inlineasm/asmfpaddsub.py @@ -0,0 +1,15 @@ +@micropython.asm_thumb # r0 = r0+r1-r2 +def add_sub(r0, r1, r2): + vmov(s0, r0) + vcvt_f32_s32(s0, s0) + vmov(s1, r1) + vcvt_f32_s32(s1, s1) + vmov(s2, r2) + vcvt_f32_s32(s2, s2) + vadd(s0, s0, s1) + vsub(s0, s0, s2) + vcvt_s32_f32(s31, s0) + vmov(r0, s31) + +print(add_sub(100, 20, 30)) + diff --git a/tests/inlineasm/asmfpaddsub.py.exp b/tests/inlineasm/asmfpaddsub.py.exp new file mode 100644 index 0000000000..d61f00d8ca --- /dev/null +++ b/tests/inlineasm/asmfpaddsub.py.exp @@ -0,0 +1 @@ +90 diff --git a/tests/inlineasm/asmfpcmp.py b/tests/inlineasm/asmfpcmp.py new file mode 100644 index 0000000000..d4fa1f2410 --- /dev/null +++ b/tests/inlineasm/asmfpcmp.py @@ -0,0 +1,14 @@ +@micropython.asm_thumb # test vcmp, vmrs +def f(r0, r1): + vmov(s0, r0) + vcvt_f32_s32(s0, s0) + vmov(s1, r1) + vcvt_f32_s32(s1, s1) + vcmp(s1, s0) + vmrs(r0, FPSCR) + mov(r1, 28) + lsr(r0, r1) + +print(f(0,1)) +print(f(1,1)) +print(f(1,0)) diff --git a/tests/inlineasm/asmfpcmp.py.exp b/tests/inlineasm/asmfpcmp.py.exp new file mode 100644 index 0000000000..104b3580f7 --- /dev/null +++ b/tests/inlineasm/asmfpcmp.py.exp @@ -0,0 +1,3 @@ +2 +6 +8 diff --git a/tests/inlineasm/asmfpldrstr.py b/tests/inlineasm/asmfpldrstr.py new file mode 100644 index 0000000000..75054a6796 --- /dev/null +++ b/tests/inlineasm/asmfpldrstr.py @@ -0,0 +1,12 @@ +import array +@micropython.asm_thumb # test vldr, vstr +def arrayadd(r0): + vldr(s0, [r0, 0]) + vldr(s1, [r0, 1]) + vadd(s2, s0, s1) + vstr(s2, [r0, 2]) + +z = array.array("f", [2, 4, 10]) +arrayadd(z) +print(z[2]) + diff --git a/tests/inlineasm/asmfpldrstr.py.exp b/tests/inlineasm/asmfpldrstr.py.exp new file mode 100644 index 0000000000..e0ea36feef --- /dev/null +++ b/tests/inlineasm/asmfpldrstr.py.exp @@ -0,0 +1 @@ +6.0 diff --git a/tests/inlineasm/asmfpmuldiv.py b/tests/inlineasm/asmfpmuldiv.py new file mode 100644 index 0000000000..edf9511bcd --- /dev/null +++ b/tests/inlineasm/asmfpmuldiv.py @@ -0,0 +1,15 @@ +@micropython.asm_thumb # r0 = (int)(r0*r1/r2) +def muldiv(r0, r1, r2): + vmov(s0, r0) + vcvt_f32_s32(s0, s0) + vmov(s1, r1) + vcvt_f32_s32(s1, s1) + vmov(s2, r2) + vcvt_f32_s32(s2, s2) + vmul(s7, s0, s1) + vdiv(s8, s7, s2) + vcvt_s32_f32(s31, s8) + vmov(r0, s31) + +print(muldiv(100, 10, 50)) + diff --git a/tests/inlineasm/asmfpmuldiv.py.exp b/tests/inlineasm/asmfpmuldiv.py.exp new file mode 100644 index 0000000000..209e3ef4b6 --- /dev/null +++ b/tests/inlineasm/asmfpmuldiv.py.exp @@ -0,0 +1 @@ +20 diff --git a/tests/inlineasm/asmfpsqrt.py b/tests/inlineasm/asmfpsqrt.py new file mode 100644 index 0000000000..f2c2d3a954 --- /dev/null +++ b/tests/inlineasm/asmfpsqrt.py @@ -0,0 +1,15 @@ +# test vsqrt, vneg +@micropython.asm_thumb # r0 = -(int)(sqrt(r0)*r1) +def sqrt_test(r0, r1): + vmov(s1, r0) + vcvt_f32_s32(s1, s1) + vsqrt(s1, s1) + vmov(s2, r1) + vcvt_f32_s32(s2, s2) + vmul(s0, s1, s2) + vneg(s7, s0) + vcvt_s32_f32(s31, s7) + vmov(r0, s31) + +print(sqrt_test(256, 10)) + diff --git a/tests/inlineasm/asmfpsqrt.py.exp b/tests/inlineasm/asmfpsqrt.py.exp new file mode 100644 index 0000000000..88a1e93bab --- /dev/null +++ b/tests/inlineasm/asmfpsqrt.py.exp @@ -0,0 +1 @@ +-160