Browse Source

objstr: Add .splitlines() method.

splitlines() occurs ~179 times in CPython3 standard library, so was
deemed worthy to implement. The method has subtle semantic differences
from just .split("\n"). It is also defined as working for any end-of-line
combination, but this is currently not implemented - it works only with
LF line-endings (which should be OK for text strings on any platforms,
but not OK for bytes).
pull/1174/merge
Paul Sokolovsky 10 years ago
parent
commit
ac2f7a7f6a
  1. 5
      py/mpconfig.h
  2. 36
      py/objstr.c
  3. 1
      py/objstr.h
  4. 3
      py/objstrunicode.c
  5. 5
      py/qstrdefs.h
  6. 1
      unix/mpconfigport.h

5
py/mpconfig.h

@ -407,6 +407,11 @@ typedef double mp_float_t;
#define MICROPY_PY_BUILTINS_STR_UNICODE (0)
#endif
// Whether str.splitlines() method provided
#ifndef MICROPY_PY_BUILTINS_STR_SPLITLINES
#define MICROPY_PY_BUILTINS_STR_SPLITLINES (0)
#endif
// Whether to support bytearray object
#ifndef MICROPY_PY_BUILTINS_BYTEARRAY
#define MICROPY_PY_BUILTINS_BYTEARRAY (1)

36
py/objstr.c

@ -455,8 +455,9 @@ STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
}
#define is_ws(c) ((c) == ' ' || (c) == '\t')
enum {SPLIT = 0, KEEP = 1, SPLITLINES = 2};
mp_obj_t mp_obj_str_split(mp_uint_t n_args, const mp_obj_t *args) {
STATIC inline mp_obj_t str_split_internal(mp_uint_t n_args, const mp_obj_t *args, int type) {
const mp_obj_type_t *self_type = mp_obj_get_type(args[0]);
mp_int_t splits = -1;
mp_obj_t sep = mp_const_none;
@ -517,7 +518,13 @@ mp_obj_t mp_obj_str_split(mp_uint_t n_args, const mp_obj_t *args) {
}
s++;
}
mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, s - start));
mp_uint_t len = s - start;
if (MP_LIKELY(!(len == 0 && s == top && (type && SPLITLINES)))) {
if (start + len != top && (type & KEEP)) {
len++;
}
mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, len));
}
if (s >= top) {
break;
}
@ -531,6 +538,25 @@ mp_obj_t mp_obj_str_split(mp_uint_t n_args, const mp_obj_t *args) {
return res;
}
mp_obj_t mp_obj_str_split(mp_uint_t n_args, const mp_obj_t *args) {
return str_split_internal(n_args, args, SPLIT);
}
#if MICROPY_PY_BUILTINS_STR_SPLITLINES
STATIC mp_obj_t str_splitlines(mp_uint_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
static const mp_arg_t allowed_args[] = {
{ MP_QSTR_keepends, MP_ARG_BOOL, {.u_bool = false} },
};
// parse args
mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
mp_arg_parse_all(n_args - 1, pos_args + 1, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
mp_obj_t new_args[2] = {pos_args[0], MP_OBJ_NEW_QSTR(MP_QSTR__backslash_n)};
return str_split_internal(2, new_args, SPLITLINES | (args[0].u_bool ? KEEP : 0));
}
#endif
STATIC mp_obj_t str_rsplit(mp_uint_t n_args, const mp_obj_t *args) {
if (n_args < 3) {
// If we don't have split limit, it doesn't matter from which side
@ -1763,6 +1789,9 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_index_obj, 2, 4, str_index);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rindex_obj, 2, 4, str_rindex);
MP_DEFINE_CONST_FUN_OBJ_2(str_join_obj, str_join);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_split_obj, 1, 3, mp_obj_str_split);
#if MICROPY_PY_BUILTINS_STR_SPLITLINES
MP_DEFINE_CONST_FUN_OBJ_KW(str_splitlines_obj, 1, str_splitlines);
#endif
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rsplit_obj, 1, 3, str_rsplit);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_startswith_obj, 2, 3, str_startswith);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_endswith_obj, 2, 3, str_endswith);
@ -1800,6 +1829,9 @@ STATIC const mp_map_elem_t str8_locals_dict_table[] = {
{ MP_OBJ_NEW_QSTR(MP_QSTR_rindex), (mp_obj_t)&str_rindex_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_join), (mp_obj_t)&str_join_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_split), (mp_obj_t)&str_split_obj },
#if MICROPY_PY_BUILTINS_STR_SPLITLINES
{ MP_OBJ_NEW_QSTR(MP_QSTR_splitlines), (mp_obj_t)&str_splitlines_obj },
#endif
{ MP_OBJ_NEW_QSTR(MP_QSTR_rsplit), (mp_obj_t)&str_rsplit_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_startswith), (mp_obj_t)&str_startswith_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_endswith), (mp_obj_t)&str_endswith_obj },

1
py/objstr.h

@ -73,6 +73,7 @@ MP_DECLARE_CONST_FUN_OBJ(str_index_obj);
MP_DECLARE_CONST_FUN_OBJ(str_rindex_obj);
MP_DECLARE_CONST_FUN_OBJ(str_join_obj);
MP_DECLARE_CONST_FUN_OBJ(str_split_obj);
MP_DECLARE_CONST_FUN_OBJ(str_splitlines_obj);
MP_DECLARE_CONST_FUN_OBJ(str_rsplit_obj);
MP_DECLARE_CONST_FUN_OBJ(str_startswith_obj);
MP_DECLARE_CONST_FUN_OBJ(str_endswith_obj);

3
py/objstrunicode.c

@ -245,6 +245,9 @@ STATIC const mp_map_elem_t struni_locals_dict_table[] = {
{ MP_OBJ_NEW_QSTR(MP_QSTR_rindex), (mp_obj_t)&str_rindex_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_join), (mp_obj_t)&str_join_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_split), (mp_obj_t)&str_split_obj },
#if MICROPY_PY_BUILTINS_STR_SPLITLINES
{ MP_OBJ_NEW_QSTR(MP_QSTR_splitlines), (mp_obj_t)&str_splitlines_obj },
#endif
{ MP_OBJ_NEW_QSTR(MP_QSTR_rsplit), (mp_obj_t)&str_rsplit_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_startswith), (mp_obj_t)&str_startswith_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_endswith), (mp_obj_t)&str_endswith_obj },

5
py/qstrdefs.h

@ -299,6 +299,11 @@ Q(find)
Q(rfind)
Q(rindex)
Q(split)
#if MICROPY_PY_BUILTINS_STR_SPLITLINES
Q(splitlines)
Q(keepends)
Q(\n)
#endif
Q(rsplit)
Q(startswith)
Q(endswith)

1
unix/mpconfigport.h

@ -60,6 +60,7 @@
#define MICROPY_PY_FUNCTION_ATTRS (1)
#define MICROPY_PY_DESCRIPTORS (1)
#define MICROPY_PY_BUILTINS_STR_UNICODE (1)
#define MICROPY_PY_BUILTINS_STR_SPLITLINES (1)
#define MICROPY_PY_BUILTINS_MEMORYVIEW (1)
#define MICROPY_PY_BUILTINS_FROZENSET (1)
#define MICROPY_PY_BUILTINS_COMPILE (1)

Loading…
Cancel
Save