From 4c316552c16fa2bcb77e007d330dc32beaf6e652 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Tue, 21 Jan 2014 05:00:21 +0200 Subject: [PATCH] Implement str.split(None). Note that splitting by explicit string is not implemented so far. --- py/objstr.c | 40 ++++++++++++++++++++++++++++++++++++ tests/basics/string_split.py | 7 +++++++ 2 files changed, 47 insertions(+) create mode 100644 tests/basics/string_split.py diff --git a/py/objstr.c b/py/objstr.c index 4adfef6f80..758e8c2938 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -175,6 +175,44 @@ bad_arg: nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "?str.join expecting a list of str's")); } +#define is_ws(c) ((c) == ' ' || (c) == '\t') + +static mp_obj_t str_split(uint n_args, const mp_obj_t *args) { + int splits = -1; + mp_obj_t sep = mp_const_none; + if (n_args > 1) { + sep = args[1]; + if (n_args > 2) { + splits = MP_OBJ_SMALL_INT_VALUE(args[2]); + } + } + assert(sep == mp_const_none); + mp_obj_t res = mp_obj_new_list(0, NULL); + const char *s = qstr_str(mp_obj_str_get(args[0])); + const char *start; + + // Initial whitespace is not counted as split, so we pre-do it + while (is_ws(*s)) s++; + while (*s && splits != 0) { + start = s; + while (*s != 0 && !is_ws(*s)) s++; + rt_list_append(res, MP_OBJ_NEW_QSTR(qstr_from_strn_copy(start, s - start))); + if (*s == 0) { + break; + } + while (is_ws(*s)) s++; + if (splits > 0) { + splits--; + } + } + + if (*s != 0) { + rt_list_append(res, MP_OBJ_NEW_QSTR(qstr_from_strn_copy(s, strlen(s)))); + } + + return res; +} + static bool chr_in_str(const char* const str, const size_t str_len, const char c) { for (size_t i = 0; i < str_len; i++) { if (str[i] == c) { @@ -293,12 +331,14 @@ mp_obj_t str_format(uint n_args, const mp_obj_t *args) { static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_find_obj, 2, 4, str_find); static MP_DEFINE_CONST_FUN_OBJ_2(str_join_obj, str_join); +static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_split_obj, 1, 3, str_split); static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_strip_obj, 1, 2, str_strip); static MP_DEFINE_CONST_FUN_OBJ_VAR(str_format_obj, 1, str_format); static const mp_method_t str_type_methods[] = { { "find", &str_find_obj }, { "join", &str_join_obj }, + { "split", &str_split_obj }, { "strip", &str_strip_obj }, { "format", &str_format_obj }, { NULL, NULL }, // end-of-list sentinel diff --git a/tests/basics/string_split.py b/tests/basics/string_split.py new file mode 100644 index 0000000000..f73cb4291e --- /dev/null +++ b/tests/basics/string_split.py @@ -0,0 +1,7 @@ +print("a b".split()) +print(" a b ".split(None)) +print(" a b ".split(None, 1)) +print(" a b ".split(None, 2)) +print(" a b c ".split(None, 1)) +print(" a b c ".split(None, 0)) +print(" a b c ".split(None, -1))