diff --git a/py/mpconfig.h b/py/mpconfig.h index 17c5a770c4..56495d9156 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -4,6 +4,20 @@ #include +#ifndef INT_FMT +// printf format spec to use for machine_int_t and friends +#ifdef __LP64__ +// Archs where machine_int_t == long, long != int +#define UINT_FMT "%lu" +#define INT_FMT "%ld" +#else +// Archs where machine_int_t == int +#define UINT_FMT "%u" +#define INT_FMT "%d" +#endif +#endif //INT_FMT + + // Any options not explicitly set in mpconfigport.h will get default // values below. @@ -11,3 +25,9 @@ #ifndef MICROPY_MEM_STATS #define MICROPY_MEM_STATS (1) #endif + +// Whether to support slice object and correspondingly +// slice subscript operators +#ifndef MICROPY_ENABLE_SLICE +#define MICROPY_ENABLE_SLICE (1) +#endif diff --git a/py/obj.h b/py/obj.h index 86234bea27..b39b84239f 100644 --- a/py/obj.h +++ b/py/obj.h @@ -146,6 +146,7 @@ mp_obj_t mp_obj_new_list(uint n, mp_obj_t *items); mp_obj_t mp_obj_new_list_reverse(uint n, mp_obj_t *items); mp_obj_t mp_obj_new_dict(int n_args); mp_obj_t mp_obj_new_set(int n_args, mp_obj_t *items); +mp_obj_t mp_obj_new_slice(mp_obj_t start, mp_obj_t stop, mp_obj_t step); mp_obj_t mp_obj_new_bound_meth(mp_obj_t self, mp_obj_t meth); mp_obj_t mp_obj_new_class(struct _mp_map_t *class_locals); mp_obj_t mp_obj_new_instance(mp_obj_t clas); @@ -216,6 +217,10 @@ mp_obj_t mp_obj_dict_store(mp_obj_t self_in, mp_obj_t key, mp_obj_t value); // set void mp_obj_set_store(mp_obj_t self_in, mp_obj_t item); +// slice +extern const mp_obj_type_t slice_type; +void mp_obj_slice_get(mp_obj_t self_in, machine_int_t *start, machine_int_t *stop, machine_int_t *step); + // functions typedef struct _mp_obj_fun_native_t { // need this so we can define const objects (to go in ROM) mp_obj_base_t base; diff --git a/py/objslice.c b/py/objslice.c new file mode 100644 index 0000000000..03607e4c3e --- /dev/null +++ b/py/objslice.c @@ -0,0 +1,74 @@ +#include +#include +#include +#include + +#include "nlr.h" +#include "misc.h" +#include "mpconfig.h" +#include "obj.h" +#include "runtime0.h" + +#if MICROPY_ENABLE_SLICE + +// TODO: This implements only variant of slice with 2 integer args only. +// CPython supports 3rd arg (step), plus args can be arbitrary Python objects. +typedef struct _mp_obj_slice_t { + mp_obj_base_t base; + machine_int_t start; + machine_int_t stop; +} mp_obj_slice_t; + +void slice_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t o_in) { + mp_obj_slice_t *o = o_in; + print(env, "slice(" INT_FMT ", " INT_FMT ")", o->start, o->stop); +} + +const mp_obj_type_t slice_type = { + { &mp_const_type }, + "slice", + slice_print, + NULL, // call_n + NULL, // unary_op + NULL, // binary_op + NULL, // getiter + NULL, // iternext + { { NULL, NULL }, }, // method list +}; + +// TODO: Make sure to handle "empty" values, which are signified by None in CPython +mp_obj_t mp_obj_new_slice(mp_obj_t ostart, mp_obj_t ostop, mp_obj_t ostep) { + assert(ostep == NULL); + machine_int_t start = 0, stop = 0; + if (ostart != mp_const_none) { + start = mp_obj_get_int(ostart); + } + if (ostop != mp_const_none) { + stop = mp_obj_get_int(ostop); + if (stop == 0) { + // [x:0] is a special case - in our slice object, stop = 0 means + // "end of sequence". Fortunately, [x:0] is an empty seqence for + // any x (including negative). [x:x] is also always empty sequence. + // but x also can be 0. But note that b""[x:x] is b"" for any x (i.e. + // no IndexError, at least in Python 3.3.3). So, we just use -1's to + // signify that. -1 is catchy "special" number in case someone will + // try to print [x:0] slice ever. + start = stop = -1; + } + } + mp_obj_slice_t *o = m_new(mp_obj_slice_t, 1); + o->base.type = &slice_type; + o->start = start; + o->stop = stop; + return (mp_obj_t)o; +} + +void mp_obj_slice_get(mp_obj_t self_in, machine_int_t *start, machine_int_t *stop, machine_int_t *step) { + assert(MP_OBJ_IS_TYPE(self_in, &slice_type)); + mp_obj_slice_t *self = self_in; + *start = self->start; + *stop = self->stop; + *step = 1; +} + +#endif diff --git a/py/objstr.c b/py/objstr.c index 48abf4951d..59547e3cd6 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -27,9 +27,31 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) { const char *lhs_str = qstr_str(lhs->qstr); switch (op) { case RT_BINARY_OP_SUBSCR: - // string access - // XXX a massive hack! - return mp_obj_new_int(lhs_str[mp_obj_get_int(rhs_in)]); + // TODO: need predicate to check for int-like type (bools are such for example) + // ["no", "yes"][1 == 2] is common idiom + if (MP_OBJ_IS_SMALL_INT(rhs_in)) { + // TODO: This implements byte string access for single index so far + // TODO: Handle negative indexes. + return mp_obj_new_int(lhs_str[mp_obj_get_int(rhs_in)]); +#if MICROPY_ENABLE_SLICE + } else if (MP_OBJ_IS_TYPE(rhs_in, &slice_type)) { + machine_int_t start, stop, step; + mp_obj_slice_get(rhs_in, &start, &stop, &step); + assert(step == 1); + int len = strlen(lhs_str); + if (start < 0) { + start = len + start; + } + if (stop <= 0) { + stop = len + stop; + } + return mp_obj_new_str(qstr_from_strn_copy(lhs_str + start, stop - start)); +#endif + } else { + // Message doesn't match CPython, but we don't have so much bytes as they + // to spend them on verbose wording + nlr_jump(mp_obj_new_exception_msg(rt_q_TypeError, "index must be int")); + } case RT_BINARY_OP_ADD: case RT_BINARY_OP_INPLACE_ADD: diff --git a/py/vm.c b/py/vm.c index c549e2b490..382780640b 100644 --- a/py/vm.c +++ b/py/vm.c @@ -410,6 +410,20 @@ bool mp_execute_byte_code_2(const byte **ip_in_out, mp_obj_t *fastn, mp_obj_t ** sp++; break; +#if MICROPY_ENABLE_SLICE + case MP_BC_BUILD_SLICE: + DECODE_UINT; + if (unum == 2) { + obj2 = POP(); + obj1 = TOP(); + SET_TOP(mp_obj_new_slice(obj1, obj2, NULL)); + } else { + printf("3-argument slice is not supported\n"); + assert(0); + } + break; +#endif + case MP_BC_UNPACK_SEQUENCE: DECODE_UINT; rt_unpack_sequence(sp[0], unum, sp - unum + 1); diff --git a/stm/Makefile b/stm/Makefile index d6c77e2bd7..e84e21eae2 100644 --- a/stm/Makefile +++ b/stm/Makefile @@ -78,6 +78,7 @@ PY_O = \ objnone.o \ objrange.o \ objset.o \ + objslice.o \ objstr.o \ objtuple.o \ objtype.o \ diff --git a/tests/basics/tests/slice-bstr1.py b/tests/basics/tests/slice-bstr1.py new file mode 100644 index 0000000000..0bed959141 --- /dev/null +++ b/tests/basics/tests/slice-bstr1.py @@ -0,0 +1,27 @@ +print(b"123"[0:1]) + +print(b"123"[0:2]) + +print(b"123"[:1]) + +print(b"123"[1:]) + +# Idiom for copying sequence +print(b"123"[:]) + +print(b"123"[:-1]) + +# Weird cases +print(b"123"[0:0]) +print(b"123"[1:0]) +print(b"123"[1:1]) +print(b"123"[-1:-1]) +print(b"123"[-3:]) +print(b"123"[-3:3]) +print(b"123"[0:]) +print(b"123"[:0]) +print(b"123"[:-3]) +print(b"123"[:-4]) +# No IndexError! +print(b""[1:1]) +print(b""[-1:-1]) diff --git a/unix-cpy/Makefile b/unix-cpy/Makefile index 48c3179460..7fee3438fb 100644 --- a/unix-cpy/Makefile +++ b/unix-cpy/Makefile @@ -43,6 +43,7 @@ PY_O = \ objnone.o \ objrange.o \ objset.o \ + objslice.o \ objstr.o \ objtuple.o \ objtype.o \ diff --git a/unix-cpy/mpconfigport.h b/unix-cpy/mpconfigport.h index db72b31455..983b166a55 100644 --- a/unix-cpy/mpconfigport.h +++ b/unix-cpy/mpconfigport.h @@ -11,15 +11,11 @@ #ifdef __LP64__ typedef long machine_int_t; // must be pointer size typedef unsigned long machine_uint_t; // must be pointer size -#define UINT_FMT "%lu" -#define INT_FMT "%ld" #else // These are definitions for machines where sizeof(int) == sizeof(void*), // regardless for actual size. typedef int machine_int_t; // must be pointer size typedef unsigned int machine_uint_t; // must be pointer size -#define UINT_FMT "%u" -#define INT_FMT "%d" #endif #define BYTES_PER_WORD sizeof(machine_int_t) diff --git a/unix/Makefile b/unix/Makefile index fd5b6b43e0..38d6ba8e15 100644 --- a/unix/Makefile +++ b/unix/Makefile @@ -50,6 +50,7 @@ PY_O = \ objnone.o \ objrange.o \ objset.o \ + objslice.o \ objstr.o \ objtuple.o \ objtype.o \ diff --git a/unix/mpconfigport.h b/unix/mpconfigport.h index 3d0dc8a567..36cf138c17 100644 --- a/unix/mpconfigport.h +++ b/unix/mpconfigport.h @@ -11,15 +11,11 @@ #ifdef __LP64__ typedef long machine_int_t; // must be pointer size typedef unsigned long machine_uint_t; // must be pointer size -#define UINT_FMT "%lu" -#define INT_FMT "%ld" #else // These are definitions for machines where sizeof(int) == sizeof(void*), // regardless for actual size. typedef int machine_int_t; // must be pointer size typedef unsigned int machine_uint_t; // must be pointer size -#define UINT_FMT "%u" -#define INT_FMT "%d" #endif #define BYTES_PER_WORD sizeof(machine_int_t)