diff --git a/py/objstr.c b/py/objstr.c index 3d3845f4a4e5d952b9dda86e6b9f1bac1bf83200..aa10294f53cf9927eb15f35f674c9cbe6d0bd97e 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -464,9 +464,7 @@ STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) { return mp_obj_new_str_from_vstr(self_type, &vstr); } -enum {SPLIT = 0, KEEP = 1, SPLITLINES = 2}; - -STATIC inline mp_obj_t str_split_internal(mp_uint_t n_args, const mp_obj_t *args, int type) { +mp_obj_t mp_obj_str_split(size_t n_args, const mp_obj_t *args) { const mp_obj_type_t *self_type = mp_obj_get_type(args[0]); mp_int_t splits = -1; mp_obj_t sep = mp_const_none; @@ -527,13 +525,7 @@ STATIC inline mp_obj_t str_split_internal(mp_uint_t n_args, const mp_obj_t *args } s++; } - mp_uint_t sub_len = s - start; - if (MP_LIKELY(!(sub_len == 0 && s == top && (type && SPLITLINES)))) { - if (start + sub_len != top && (type & KEEP)) { - sub_len++; - } - mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, sub_len)); - } + mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, s - start)); if (s >= top) { break; } @@ -547,25 +539,49 @@ STATIC inline mp_obj_t str_split_internal(mp_uint_t n_args, const mp_obj_t *args return res; } -mp_obj_t mp_obj_str_split(size_t n_args, const mp_obj_t *args) { - return str_split_internal(n_args, args, SPLIT); -} - #if MICROPY_PY_BUILTINS_STR_SPLITLINES STATIC mp_obj_t str_splitlines(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) { + enum { ARG_keepends }; static const mp_arg_t allowed_args[] = { { MP_QSTR_keepends, MP_ARG_BOOL, {.u_bool = false} }, }; // parse args - struct { - mp_arg_val_t keepends; - } args; - mp_arg_parse_all(n_args - 1, pos_args + 1, kw_args, - MP_ARRAY_SIZE(allowed_args), allowed_args, (mp_arg_val_t*)&args); - - mp_obj_t new_args[2] = {pos_args[0], MP_OBJ_NEW_QSTR(MP_QSTR__0x0a_)}; - return str_split_internal(2, new_args, SPLITLINES | (args.keepends.u_bool ? KEEP : 0)); + mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)]; + mp_arg_parse_all(n_args - 1, pos_args + 1, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args); + + const mp_obj_type_t *self_type = mp_obj_get_type(pos_args[0]); + mp_obj_t res = mp_obj_new_list(0, NULL); + + GET_STR_DATA_LEN(pos_args[0], s, len); + const byte *top = s + len; + + while (s < top) { + const byte *start = s; + size_t match = 0; + while (s < top) { + if (*s == '\n') { + match = 1; + break; + } else if (*s == '\r') { + if (s[1] == '\n') { + match = 2; + } else { + match = 1; + } + break; + } + s++; + } + size_t sub_len = s - start; + if (args[ARG_keepends].u_bool) { + sub_len += match; + } + mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, sub_len)); + s += match; + } + + return res; } #endif diff --git a/tests/basics/string_splitlines.py b/tests/basics/string_splitlines.py index cb4dacef9fd9d2dddfb5bd3582b4d2e1aa25158f..79d22b1852d4b8d569a3ceaad5fd990062629323 100644 --- a/tests/basics/string_splitlines.py +++ b/tests/basics/string_splitlines.py @@ -1,13 +1,31 @@ -try: - str.splitlines -except: - import sys - print("SKIP") - sys.exit() +# test string.splitlines() method +# test \n as newline print("foo\nbar".splitlines()) print("foo\nbar\n".splitlines()) +print("foo and\nbar\n".splitlines()) +print("foo\nbar\n\n".splitlines()) +print("foo\n\nbar\n\n".splitlines()) +print("\nfoo\nbar\n".splitlines()) + +# test \r as newline +print("foo\rbar\r".splitlines()) +print("\rfoo and\r\rbar\r".splitlines()) + +# test \r\n as newline +print("foo\r\nbar\r\n".splitlines()) +print("\r\nfoo and\r\n\r\nbar\r\n".splitlines()) + +# test keepends arg print("foo\nbar".splitlines(True)) print("foo\nbar\n".splitlines(True)) -print("foo\nbar".splitlines(keepends=True)) -print("foo\nbar\n".splitlines(keepends=True)) +print("foo\nbar\n\n".splitlines(True)) +print("foo\rbar".splitlines(keepends=True)) +print("foo\rbar\r\r".splitlines(keepends=True)) +print("foo\r\nbar".splitlines(keepends=True)) +print("foo\r\nbar\r\n\r\n".splitlines(keepends=True)) + +# test splitting bytes objects +print(b"foo\nbar".splitlines()) +print(b"foo\nbar\n".splitlines()) +print(b"foo\r\nbar\r\n\r\n".splitlines(True))