From 0f96ec826811e3cec3703141684445bea639e2bc Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Tue, 18 Feb 2014 21:21:22 +0200 Subject: [PATCH 1/2] Bytecode uint varlen encoding: support arbitrary values. --- py/emitbc.c | 26 +++++++++++++------------- py/vm.c | 14 ++++++++++++-- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/py/emitbc.c b/py/emitbc.c index 269fcdeb7e..4fe7ae8cd0 100644 --- a/py/emitbc.c +++ b/py/emitbc.c @@ -108,19 +108,19 @@ STATIC void emit_write_byte_code_byte_byte(emit_t* emit, byte b1, uint b2) { } STATIC void emit_write_byte_code_uint(emit_t* emit, uint num) { - if (num <= 127) { // fits in 0x7f - // fit argument in single byte - byte* c = emit_get_cur_to_write_byte_code(emit, 1); - c[0] = num; - } else if (num <= 16383) { // fits in 0x3fff - // fit argument in two bytes - byte* c = emit_get_cur_to_write_byte_code(emit, 2); - c[0] = (num >> 8) | 0x80; - c[1] = num; - } else { - // larger numbers not implemented/supported - assert(0); - } + // We store each 7 bits in a separate byte, and that's how many bytes needed + byte buf[(BYTES_PER_WORD * 8 + 7) / 7]; + byte *p = buf + sizeof(buf); + // We encode in little-ending order, but store in big-endian, to help decoding + do { + *--p = num & 0x7f; + num >>= 7; + } while (num != 0); + byte* c = emit_get_cur_to_write_byte_code(emit, buf + sizeof(buf) - p); + while (p != buf + sizeof(buf) - 1) { + *c++ = *p++ | 0x80; + } + *c = *p; } // integers (for small ints) are stored as 24 bits, in excess diff --git a/py/vm.c b/py/vm.c index 573167b57d..0fdf5d4e31 100644 --- a/py/vm.c +++ b/py/vm.c @@ -38,10 +38,20 @@ typedef enum { UNWIND_JUMP, } mp_unwind_reason_t; -#define DECODE_UINT do { unum = *ip++; if (unum > 127) { unum = ((unum & 0x3f) << 8) | (*ip++); } } while (0) +#define DECODE_UINT { \ + unum = 0; \ + do { \ + unum = (unum << 7) + (*ip & 0x7f); \ + } while ((*ip++ & 0x80) != 0); \ +} #define DECODE_ULABEL do { unum = (ip[0] | (ip[1] << 8)); ip += 2; } while (0) #define DECODE_SLABEL do { unum = (ip[0] | (ip[1] << 8)) - 0x8000; ip += 2; } while (0) -#define DECODE_QSTR do { qst = *ip++; if (qst > 127) { qst = ((qst & 0x3f) << 8) | (*ip++); } } while (0) +#define DECODE_QSTR { \ + qst = 0; \ + do { \ + qst = (qst << 7) + (*ip & 0x7f); \ + } while ((*ip++ & 0x80) != 0); \ +} #define PUSH(val) *++sp = (val) #define POP() (*sp--) #define TOP() (*sp) From 047cd40313e39b662650bbf6c8059ab0910e5986 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Wed, 19 Feb 2014 15:47:59 +0200 Subject: [PATCH 2/2] Bytecode int varlen encoding: support arbitrary values for signed ints too. --- py/emitbc.c | 32 ++++++++++++++++++++++++-------- py/showbc.c | 17 ++++++++++++----- py/vm.c | 15 +++++++++++---- 3 files changed, 47 insertions(+), 17 deletions(-) diff --git a/py/emitbc.c b/py/emitbc.c index 4fe7ae8cd0..9fab977909 100644 --- a/py/emitbc.c +++ b/py/emitbc.c @@ -123,15 +123,31 @@ STATIC void emit_write_byte_code_uint(emit_t* emit, uint num) { *c = *p; } -// integers (for small ints) are stored as 24 bits, in excess +// Similar to emit_write_byte_code_uint(), just some extra handling to encode sign STATIC void emit_write_byte_code_byte_int(emit_t* emit, byte b1, machine_int_t num) { - num += 0x800000; - assert(0 <= num && num <= 0xffffff); - byte* c = emit_get_cur_to_write_byte_code(emit, 4); - c[0] = b1; - c[1] = num; - c[2] = num >> 8; - c[3] = num >> 16; + emit_write_byte_code_byte(emit, b1); + + // We store each 7 bits in a separate byte, and that's how many bytes needed + byte buf[(BYTES_PER_WORD * 8 + 7) / 7]; + byte *p = buf + sizeof(buf); + // We encode in little-ending order, but store in big-endian, to help decoding + do { + *--p = num & 0x7f; + num >>= 7; + } while (num != 0 && num != -1); + // Make sure that highest bit we stored (mask 0x40) matches sign + // of the number. If not, store extra byte just to encode sign + if (num == -1 && (*p & 0x40) == 0) { + *--p = 0x7f; + } else if (num == 0 && (*p & 0x40) != 0) { + *--p = 0; + } + + byte* c = emit_get_cur_to_write_byte_code(emit, buf + sizeof(buf) - p); + while (p != buf + sizeof(buf) - 1) { + *c++ = *p++ | 0x80; + } + *c = *p; } STATIC void emit_write_byte_code_byte_uint(emit_t* emit, byte b, uint num) { diff --git a/py/showbc.c b/py/showbc.c index e3387dbe27..837ee7611f 100644 --- a/py/showbc.c +++ b/py/showbc.c @@ -57,11 +57,18 @@ void mp_byte_code_print(const byte *ip, int len) { printf("LOAD_CONST_ELLIPSIS"); break; - case MP_BC_LOAD_CONST_SMALL_INT: - unum = (ip[0] | (ip[1] << 8) | (ip[2] << 16)) - 0x800000; - ip += 3; - printf("LOAD_CONST_SMALL_INT %d", (int)unum); - break; + case MP_BC_LOAD_CONST_SMALL_INT: { + int num = 0; + if ((ip[0] & 0x40) != 0) { + // Number is negative + num--; + } + do { + num = (num << 7) | (*ip & 0x7f); + } while ((*ip++ & 0x80) != 0); + printf("LOAD_CONST_SMALL_INT %d", num); + break; + } case MP_BC_LOAD_CONST_INT: DECODE_QSTR; diff --git a/py/vm.c b/py/vm.c index 0fdf5d4e31..cab340ff38 100644 --- a/py/vm.c +++ b/py/vm.c @@ -156,11 +156,18 @@ dispatch_loop: PUSH(mp_const_ellipsis); break; - case MP_BC_LOAD_CONST_SMALL_INT: - unum = (ip[0] | (ip[1] << 8) | (ip[2] << 16)) - 0x800000; - ip += 3; - PUSH(MP_OBJ_NEW_SMALL_INT(unum)); + case MP_BC_LOAD_CONST_SMALL_INT: { + int num = 0; + if ((ip[0] & 0x40) != 0) { + // Number is negative + num--; + } + do { + num = (num << 7) | (*ip & 0x7f); + } while ((*ip++ & 0x80) != 0); + PUSH(MP_OBJ_NEW_SMALL_INT(num)); break; + } case MP_BC_LOAD_CONST_INT: DECODE_QSTR;