From ac2f7a7f6aab135e90dd12d30b51d857628b0a59 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Sat, 4 Apr 2015 00:09:23 +0300 Subject: [PATCH] objstr: Add .splitlines() method. splitlines() occurs ~179 times in CPython3 standard library, so was deemed worthy to implement. The method has subtle semantic differences from just .split("\n"). It is also defined as working for any end-of-line combination, but this is currently not implemented - it works only with LF line-endings (which should be OK for text strings on any platforms, but not OK for bytes). --- py/mpconfig.h | 5 +++++ py/objstr.c | 36 ++++++++++++++++++++++++++++++++++-- py/objstr.h | 1 + py/objstrunicode.c | 3 +++ py/qstrdefs.h | 5 +++++ unix/mpconfigport.h | 1 + 6 files changed, 49 insertions(+), 2 deletions(-) diff --git a/py/mpconfig.h b/py/mpconfig.h index 27ec5dd3ef..b2f37e99c7 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -407,6 +407,11 @@ typedef double mp_float_t; #define MICROPY_PY_BUILTINS_STR_UNICODE (0) #endif +// Whether str.splitlines() method provided +#ifndef MICROPY_PY_BUILTINS_STR_SPLITLINES +#define MICROPY_PY_BUILTINS_STR_SPLITLINES (0) +#endif + // Whether to support bytearray object #ifndef MICROPY_PY_BUILTINS_BYTEARRAY #define MICROPY_PY_BUILTINS_BYTEARRAY (1) diff --git a/py/objstr.c b/py/objstr.c index a2309364d7..84f872fe28 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -455,8 +455,9 @@ STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) { } #define is_ws(c) ((c) == ' ' || (c) == '\t') +enum {SPLIT = 0, KEEP = 1, SPLITLINES = 2}; -mp_obj_t mp_obj_str_split(mp_uint_t n_args, const mp_obj_t *args) { +STATIC inline mp_obj_t str_split_internal(mp_uint_t n_args, const mp_obj_t *args, int type) { const mp_obj_type_t *self_type = mp_obj_get_type(args[0]); mp_int_t splits = -1; mp_obj_t sep = mp_const_none; @@ -517,7 +518,13 @@ mp_obj_t mp_obj_str_split(mp_uint_t n_args, const mp_obj_t *args) { } s++; } - mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, s - start)); + mp_uint_t len = s - start; + if (MP_LIKELY(!(len == 0 && s == top && (type && SPLITLINES)))) { + if (start + len != top && (type & KEEP)) { + len++; + } + mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, len)); + } if (s >= top) { break; } @@ -531,6 +538,25 @@ mp_obj_t mp_obj_str_split(mp_uint_t n_args, const mp_obj_t *args) { return res; } +mp_obj_t mp_obj_str_split(mp_uint_t n_args, const mp_obj_t *args) { + return str_split_internal(n_args, args, SPLIT); +} + +#if MICROPY_PY_BUILTINS_STR_SPLITLINES +STATIC mp_obj_t str_splitlines(mp_uint_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) { + static const mp_arg_t allowed_args[] = { + { MP_QSTR_keepends, MP_ARG_BOOL, {.u_bool = false} }, + }; + + // parse args + mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)]; + mp_arg_parse_all(n_args - 1, pos_args + 1, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args); + + mp_obj_t new_args[2] = {pos_args[0], MP_OBJ_NEW_QSTR(MP_QSTR__backslash_n)}; + return str_split_internal(2, new_args, SPLITLINES | (args[0].u_bool ? KEEP : 0)); +} +#endif + STATIC mp_obj_t str_rsplit(mp_uint_t n_args, const mp_obj_t *args) { if (n_args < 3) { // If we don't have split limit, it doesn't matter from which side @@ -1763,6 +1789,9 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_index_obj, 2, 4, str_index); MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rindex_obj, 2, 4, str_rindex); MP_DEFINE_CONST_FUN_OBJ_2(str_join_obj, str_join); MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_split_obj, 1, 3, mp_obj_str_split); +#if MICROPY_PY_BUILTINS_STR_SPLITLINES +MP_DEFINE_CONST_FUN_OBJ_KW(str_splitlines_obj, 1, str_splitlines); +#endif MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rsplit_obj, 1, 3, str_rsplit); MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_startswith_obj, 2, 3, str_startswith); MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_endswith_obj, 2, 3, str_endswith); @@ -1800,6 +1829,9 @@ STATIC const mp_map_elem_t str8_locals_dict_table[] = { { MP_OBJ_NEW_QSTR(MP_QSTR_rindex), (mp_obj_t)&str_rindex_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_join), (mp_obj_t)&str_join_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_split), (mp_obj_t)&str_split_obj }, + #if MICROPY_PY_BUILTINS_STR_SPLITLINES + { MP_OBJ_NEW_QSTR(MP_QSTR_splitlines), (mp_obj_t)&str_splitlines_obj }, + #endif { MP_OBJ_NEW_QSTR(MP_QSTR_rsplit), (mp_obj_t)&str_rsplit_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_startswith), (mp_obj_t)&str_startswith_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_endswith), (mp_obj_t)&str_endswith_obj }, diff --git a/py/objstr.h b/py/objstr.h index 7cba6193d6..d028b09f0c 100644 --- a/py/objstr.h +++ b/py/objstr.h @@ -73,6 +73,7 @@ MP_DECLARE_CONST_FUN_OBJ(str_index_obj); MP_DECLARE_CONST_FUN_OBJ(str_rindex_obj); MP_DECLARE_CONST_FUN_OBJ(str_join_obj); MP_DECLARE_CONST_FUN_OBJ(str_split_obj); +MP_DECLARE_CONST_FUN_OBJ(str_splitlines_obj); MP_DECLARE_CONST_FUN_OBJ(str_rsplit_obj); MP_DECLARE_CONST_FUN_OBJ(str_startswith_obj); MP_DECLARE_CONST_FUN_OBJ(str_endswith_obj); diff --git a/py/objstrunicode.c b/py/objstrunicode.c index 1cf4ed4743..4e7f770c30 100644 --- a/py/objstrunicode.c +++ b/py/objstrunicode.c @@ -245,6 +245,9 @@ STATIC const mp_map_elem_t struni_locals_dict_table[] = { { MP_OBJ_NEW_QSTR(MP_QSTR_rindex), (mp_obj_t)&str_rindex_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_join), (mp_obj_t)&str_join_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_split), (mp_obj_t)&str_split_obj }, + #if MICROPY_PY_BUILTINS_STR_SPLITLINES + { MP_OBJ_NEW_QSTR(MP_QSTR_splitlines), (mp_obj_t)&str_splitlines_obj }, + #endif { MP_OBJ_NEW_QSTR(MP_QSTR_rsplit), (mp_obj_t)&str_rsplit_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_startswith), (mp_obj_t)&str_startswith_obj }, { MP_OBJ_NEW_QSTR(MP_QSTR_endswith), (mp_obj_t)&str_endswith_obj }, diff --git a/py/qstrdefs.h b/py/qstrdefs.h index 560b16d6a8..446d86cb84 100644 --- a/py/qstrdefs.h +++ b/py/qstrdefs.h @@ -299,6 +299,11 @@ Q(find) Q(rfind) Q(rindex) Q(split) +#if MICROPY_PY_BUILTINS_STR_SPLITLINES +Q(splitlines) +Q(keepends) +Q(\n) +#endif Q(rsplit) Q(startswith) Q(endswith) diff --git a/unix/mpconfigport.h b/unix/mpconfigport.h index 891ba82e5e..00cb12139d 100644 --- a/unix/mpconfigport.h +++ b/unix/mpconfigport.h @@ -60,6 +60,7 @@ #define MICROPY_PY_FUNCTION_ATTRS (1) #define MICROPY_PY_DESCRIPTORS (1) #define MICROPY_PY_BUILTINS_STR_UNICODE (1) +#define MICROPY_PY_BUILTINS_STR_SPLITLINES (1) #define MICROPY_PY_BUILTINS_MEMORYVIEW (1) #define MICROPY_PY_BUILTINS_FROZENSET (1) #define MICROPY_PY_BUILTINS_COMPILE (1)