@ -41,6 +41,26 @@ STATIC mp_obj_t str_modulo_format(mp_obj_t pattern, size_t n_args, const mp_obj_
STATIC mp_obj_t mp_obj_new_bytes_iterator ( mp_obj_t str , mp_obj_iter_buf_t * iter_buf ) ;
STATIC NORETURN void bad_implicit_conversion ( mp_obj_t self_in ) ;
STATIC void str_check_arg_type ( const mp_obj_type_t * self_type , const mp_obj_t arg ) {
// String operations generally need the args type to match the object they're called on,
// e.g. str.find(str), byte.startswith(byte)
// with the exception that bytes may be used for bytearray and vice versa.
const mp_obj_type_t * arg_type = mp_obj_get_type ( arg ) ;
# if MICROPY_PY_BUILTINS_BYTEARRAY
if ( arg_type = = & mp_type_bytearray ) {
arg_type = & mp_type_bytes ;
}
if ( self_type = = & mp_type_bytearray ) {
self_type = & mp_type_bytes ;
}
# endif
if ( arg_type ! = self_type ) {
bad_implicit_conversion ( arg ) ;
}
}
/******************************************************************************/
/* str */
@ -452,6 +472,7 @@ STATIC mp_obj_t bytes_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
STATIC mp_obj_t str_join ( mp_obj_t self_in , mp_obj_t arg ) {
mp_check_self ( mp_obj_is_str_or_bytes ( self_in ) ) ;
const mp_obj_type_t * self_type = mp_obj_get_type ( self_in ) ;
const mp_obj_type_t * ret_type = self_type ;
// get separation string
GET_STR_DATA_LEN ( self_in , sep_str , sep_len ) ;
@ -469,8 +490,19 @@ STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
// count required length
size_t required_len = 0 ;
# if MICROPY_PY_BUILTINS_BYTEARRAY
if ( self_type = = & mp_type_bytearray ) {
self_type = & mp_type_bytes ;
}
# endif
for ( size_t i = 0 ; i < seq_len ; i + + ) {
if ( mp_obj_get_type ( seq_items [ i ] ) ! = self_type ) {
const mp_obj_type_t * seq_type = mp_obj_get_type ( seq_items [ i ] ) ;
# if MICROPY_PY_BUILTINS_BYTEARRAY
if ( seq_type = = & mp_type_bytearray ) {
seq_type = & mp_type_bytes ;
}
# endif
if ( seq_type ! = self_type ) {
mp_raise_TypeError (
MP_ERROR_TEXT ( " join expects a list of str/bytes objects consistent with self object " ) ) ;
}
@ -496,7 +528,7 @@ STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
}
// return joined string
return mp_obj_new_str_from_vstr ( self _type, & vstr ) ;
return mp_obj_new_str_from_vstr ( ret _type, & vstr ) ;
}
MP_DEFINE_CONST_FUN_OBJ_2 ( str_join_obj , str_join ) ;
@ -545,9 +577,7 @@ mp_obj_t mp_obj_str_split(size_t n_args, const mp_obj_t *args) {
} else {
// sep given
if ( mp_obj_get_type ( sep ) ! = self_type ) {
bad_implicit_conversion ( sep ) ;
}
str_check_arg_type ( self_type , sep ) ;
size_t sep_len ;
const char * sep_str = mp_obj_str_get_data ( sep , & sep_len ) ;
@ -699,9 +729,7 @@ STATIC mp_obj_t str_finder(size_t n_args, const mp_obj_t *args, int direction, b
mp_check_self ( mp_obj_is_str_or_bytes ( args [ 0 ] ) ) ;
// check argument type
if ( mp_obj_get_type ( args [ 1 ] ) ! = self_type ) {
bad_implicit_conversion ( args [ 1 ] ) ;
}
str_check_arg_type ( self_type , args [ 1 ] ) ;
GET_STR_DATA_LEN ( args [ 0 ] , haystack , haystack_len ) ;
GET_STR_DATA_LEN ( args [ 1 ] , needle , needle_len ) ;
@ -805,9 +833,7 @@ STATIC mp_obj_t str_uni_strip(int type, size_t n_args, const mp_obj_t *args) {
chars_to_del = whitespace ;
chars_to_del_len = sizeof ( whitespace ) - 1 ;
} else {
if ( mp_obj_get_type ( args [ 1 ] ) ! = self_type ) {
bad_implicit_conversion ( args [ 1 ] ) ;
}
str_check_arg_type ( self_type , args [ 1 ] ) ;
GET_STR_DATA_LEN ( args [ 1 ] , s , l ) ;
chars_to_del = s ;
chars_to_del_len = l ;
@ -1633,13 +1659,8 @@ STATIC mp_obj_t str_replace(size_t n_args, const mp_obj_t *args) {
const mp_obj_type_t * self_type = mp_obj_get_type ( args [ 0 ] ) ;
if ( mp_obj_get_type ( args [ 1 ] ) ! = self_type ) {
bad_implicit_conversion ( args [ 1 ] ) ;
}
if ( mp_obj_get_type ( args [ 2 ] ) ! = self_type ) {
bad_implicit_conversion ( args [ 2 ] ) ;
}
str_check_arg_type ( self_type , args [ 1 ] ) ;
str_check_arg_type ( self_type , args [ 2 ] ) ;
// extract string data
@ -1726,9 +1747,7 @@ STATIC mp_obj_t str_count(size_t n_args, const mp_obj_t *args) {
mp_check_self ( mp_obj_is_str_or_bytes ( args [ 0 ] ) ) ;
// check argument type
if ( mp_obj_get_type ( args [ 1 ] ) ! = self_type ) {
bad_implicit_conversion ( args [ 1 ] ) ;
}
str_check_arg_type ( self_type , args [ 1 ] ) ;
GET_STR_DATA_LEN ( args [ 0 ] , haystack , haystack_len ) ;
GET_STR_DATA_LEN ( args [ 1 ] , needle , needle_len ) ;
@ -1767,9 +1786,7 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_count_obj, 2, 4, str_count);
STATIC mp_obj_t str_partitioner ( mp_obj_t self_in , mp_obj_t arg , int direction ) {
mp_check_self ( mp_obj_is_str_or_bytes ( self_in ) ) ;
const mp_obj_type_t * self_type = mp_obj_get_type ( self_in ) ;
if ( self_type ! = mp_obj_get_type ( arg ) ) {
bad_implicit_conversion ( arg ) ;
}
str_check_arg_type ( self_type , arg ) ;
GET_STR_DATA_LEN ( self_in , str , str_len ) ;
GET_STR_DATA_LEN ( arg , sep , sep_len ) ;
@ -1795,6 +1812,12 @@ STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, int direction) {
result [ 2 ] = self_in ;
}
# if MICROPY_PY_BUILTINS_BYTEARRAY
if ( mp_obj_get_type ( arg ) ! = self_type ) {
arg = mp_obj_new_str_of_type ( self_type , sep , sep_len ) ;
}
# endif
const byte * position_ptr = find_subbytes ( str , str_len , sep , sep_len , direction ) ;
if ( position_ptr ! = NULL ) {
size_t position = position_ptr - str ;
@ -1940,17 +1963,15 @@ mp_int_t mp_obj_str_get_buffer(mp_obj_t self_in, mp_buffer_info_t *bufinfo, mp_u
}
}
STATIC const mp_rom_map_elem_t str8_locals_dict_table [ ] = {
// This locals table is used for the following types: str, bytes, bytearray, array.array.
// Each type takes a different section (start to end offset) of this table.
STATIC const mp_rom_map_elem_t array_bytearray_str_bytes_locals_table [ ] = {
# if MICROPY_PY_ARRAY || MICROPY_PY_BUILTINS_BYTEARRAY
{ MP_ROM_QSTR ( MP_QSTR_append ) , MP_ROM_PTR ( & mp_obj_array_append_obj ) } ,
{ MP_ROM_QSTR ( MP_QSTR_extend ) , MP_ROM_PTR ( & mp_obj_array_extend_obj ) } ,
# endif
# if MICROPY_CPYTHON_COMPAT
{ MP_ROM_QSTR ( MP_QSTR_decode ) , MP_ROM_PTR ( & bytes_decode_obj ) } ,
# if !MICROPY_PY_BUILTINS_STR_UNICODE
// If we have separate unicode type, then here we have methods only
// for bytes type, and it should not have encode() methods. Otherwise,
// we have non-compliant-but-practical bytestring type, which shares
// method table with bytes, so they both have encode() and decode()
// methods (which should do type checking at runtime).
{ MP_ROM_QSTR ( MP_QSTR_encode ) , MP_ROM_PTR ( & str_encode_obj ) } ,
# endif
# endif
{ MP_ROM_QSTR ( MP_QSTR_find ) , MP_ROM_PTR ( & str_find_obj ) } ,
{ MP_ROM_QSTR ( MP_QSTR_rfind ) , MP_ROM_PTR ( & str_rfind_obj ) } ,
@ -1986,9 +2007,46 @@ STATIC const mp_rom_map_elem_t str8_locals_dict_table[] = {
{ MP_ROM_QSTR ( MP_QSTR_isdigit ) , MP_ROM_PTR ( & str_isdigit_obj ) } ,
{ MP_ROM_QSTR ( MP_QSTR_isupper ) , MP_ROM_PTR ( & str_isupper_obj ) } ,
{ MP_ROM_QSTR ( MP_QSTR_islower ) , MP_ROM_PTR ( & str_islower_obj ) } ,
# if MICROPY_CPYTHON_COMPAT
{ MP_ROM_QSTR ( MP_QSTR_encode ) , MP_ROM_PTR ( & str_encode_obj ) } ,
# endif
} ;
STATIC MP_DEFINE_CONST_DICT ( str8_locals_dict , str8_locals_dict_table ) ;
# if MICROPY_CPYTHON_COMPAT
# define TABLE_ENTRIES_COMPAT 1
# else
# define TABLE_ENTRIES_COMPAT 0
# endif
# if MICROPY_PY_ARRAY || MICROPY_PY_BUILTINS_BYTEARRAY
# define TABLE_ENTRIES_ARRAY 2
# else
# define TABLE_ENTRIES_ARRAY 0
# endif
MP_DEFINE_CONST_DICT_WITH_SIZE ( mp_obj_str_locals_dict ,
array_bytearray_str_bytes_locals_table + TABLE_ENTRIES_ARRAY + TABLE_ENTRIES_COMPAT ,
MP_ARRAY_SIZE ( array_bytearray_str_bytes_locals_table ) - ( TABLE_ENTRIES_ARRAY + TABLE_ENTRIES_COMPAT ) ) ;
# if TABLE_ENTRIES_COMPAT == 0
# define mp_obj_bytes_locals_dict mp_obj_str_locals_dict
# else
MP_DEFINE_CONST_DICT_WITH_SIZE ( mp_obj_bytes_locals_dict ,
array_bytearray_str_bytes_locals_table + TABLE_ENTRIES_ARRAY ,
MP_ARRAY_SIZE ( array_bytearray_str_bytes_locals_table ) - ( TABLE_ENTRIES_ARRAY + TABLE_ENTRIES_COMPAT ) ) ;
# endif
# if MICROPY_PY_BUILTINS_BYTEARRAY
MP_DEFINE_CONST_DICT_WITH_SIZE ( mp_obj_bytearray_locals_dict ,
array_bytearray_str_bytes_locals_table ,
MP_ARRAY_SIZE ( array_bytearray_str_bytes_locals_table ) - TABLE_ENTRIES_COMPAT ) ;
# endif
# if MICROPY_PY_ARRAY
MP_DEFINE_CONST_DICT_WITH_SIZE ( mp_obj_array_locals_dict ,
array_bytearray_str_bytes_locals_table ,
TABLE_ENTRIES_ARRAY ) ;
# endif
# if !MICROPY_PY_BUILTINS_STR_UNICODE
STATIC mp_obj_t mp_obj_new_str_iterator ( mp_obj_t str , mp_obj_iter_buf_t * iter_buf ) ;
@ -2002,9 +2060,9 @@ const mp_obj_type_t mp_type_str = {
. subscr = bytes_subscr ,
. getiter = mp_obj_new_str_iterator ,
. buffer_p = { . get_buffer = mp_obj_str_get_buffer } ,
. locals_dict = ( mp_obj_dict_t * ) & str8 _locals_dict ,
. locals_dict = ( mp_obj_dict_t * ) & mp_obj_ str_locals_dict,
} ;
# endif
# endif // !MICROPY_PY_BUILTINS_STR_UNICODE
// Reuses most of methods from str
const mp_obj_type_t mp_type_bytes = {
@ -2016,7 +2074,7 @@ const mp_obj_type_t mp_type_bytes = {
. subscr = bytes_subscr ,
. getiter = mp_obj_new_bytes_iterator ,
. buffer_p = { . get_buffer = mp_obj_str_get_buffer } ,
. locals_dict = ( mp_obj_dict_t * ) & str8 _locals_dict,
. locals_dict = ( mp_obj_dict_t * ) & mp_obj_bytes _locals_dict,
} ;
// The zero-length bytes object, with data that includes a null-terminating byte
@ -2044,6 +2102,10 @@ mp_obj_t mp_obj_new_str_copy(const mp_obj_type_t *type, const byte *data, size_t
mp_obj_t mp_obj_new_str_of_type ( const mp_obj_type_t * type , const byte * data , size_t len ) {
if ( type = = & mp_type_str ) {
return mp_obj_new_str ( ( const char * ) data , len ) ;
# if MICROPY_PY_BUILTINS_BYTEARRAY
} else if ( type = = & mp_type_bytearray ) {
return mp_obj_new_bytearray ( len , data ) ;
# endif
} else {
return mp_obj_new_bytes ( data , len ) ;
}
@ -2068,18 +2130,24 @@ mp_obj_t mp_obj_new_str_from_vstr(const mp_obj_type_t *type, vstr_t *vstr) {
}
}
// make a new str/bytes object
mp_obj_str_t * o = mp_obj_malloc ( mp_obj_str_t , type ) ;
o - > len = vstr - > len ;
o - > hash = qstr_compute_hash ( ( byte * ) vstr - > buf , vstr - > len ) ;
byte * data ;
if ( vstr - > len + 1 = = vstr - > alloc ) {
o - > data = ( byte * ) vstr - > buf ;
data = ( byte * ) vstr - > buf ;
} else {
o - > data = ( byte * ) m_renew ( char , vstr - > buf , vstr - > alloc , vstr - > len + 1 ) ;
data = ( byte * ) m_renew ( char , vstr - > buf , vstr - > alloc , vstr - > len + 1 ) ;
}
( ( byte * ) o - > data ) [ o - > len ] = ' \0 ' ; // add null byte
data [ vstr - > len ] = ' \0 ' ; // add null byte
vstr - > buf = NULL ;
vstr - > alloc = 0 ;
# if MICROPY_PY_BUILTINS_BYTEARRAY
if ( type = = & mp_type_bytearray ) {
return mp_obj_new_bytearray_by_ref ( vstr - > len , data ) ;
}
# endif
mp_obj_str_t * o = mp_obj_malloc ( mp_obj_str_t , type ) ;
o - > len = vstr - > len ;
o - > hash = qstr_compute_hash ( data , vstr - > len ) ;
o - > data = data ;
return MP_OBJ_FROM_PTR ( o ) ;
}
@ -2179,6 +2247,7 @@ const byte *mp_obj_str_get_data_no_check(mp_obj_t self_in, size_t *len) {
if ( mp_obj_is_qstr ( self_in ) ) {
return qstr_data ( MP_OBJ_QSTR_VALUE ( self_in ) , len ) ;
} else {
MP_STATIC_ASSERT_STR_ARRAY_COMPATIBLE ;
* len = ( ( mp_obj_str_t * ) MP_OBJ_TO_PTR ( self_in ) ) - > len ;
return ( ( mp_obj_str_t * ) MP_OBJ_TO_PTR ( self_in ) ) - > data ;
}