Skip to content
Snippets Groups Projects
Commit ac2f7a7f authored by Paul Sokolovsky's avatar Paul Sokolovsky
Browse files

objstr: Add .splitlines() method.

splitlines() occurs ~179 times in CPython3 standard library, so was
deemed worthy to implement. The method has subtle semantic differences
from just .split("\n"). It is also defined as working for any end-of-line
combination, but this is currently not implemented - it works only with
LF line-endings (which should be OK for text strings on any platforms,
but not OK for bytes).
parent 82f37bf0
No related branches found
No related tags found
No related merge requests found
......@@ -407,6 +407,11 @@ typedef double mp_float_t;
#define MICROPY_PY_BUILTINS_STR_UNICODE (0)
#endif
// Whether str.splitlines() method provided
#ifndef MICROPY_PY_BUILTINS_STR_SPLITLINES
#define MICROPY_PY_BUILTINS_STR_SPLITLINES (0)
#endif
// Whether to support bytearray object
#ifndef MICROPY_PY_BUILTINS_BYTEARRAY
#define MICROPY_PY_BUILTINS_BYTEARRAY (1)
......
......@@ -455,8 +455,9 @@ STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
}
#define is_ws(c) ((c) == ' ' || (c) == '\t')
enum {SPLIT = 0, KEEP = 1, SPLITLINES = 2};
mp_obj_t mp_obj_str_split(mp_uint_t n_args, const mp_obj_t *args) {
STATIC inline mp_obj_t str_split_internal(mp_uint_t n_args, const mp_obj_t *args, int type) {
const mp_obj_type_t *self_type = mp_obj_get_type(args[0]);
mp_int_t splits = -1;
mp_obj_t sep = mp_const_none;
......@@ -517,7 +518,13 @@ mp_obj_t mp_obj_str_split(mp_uint_t n_args, const mp_obj_t *args) {
}
s++;
}
mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, s - start));
mp_uint_t len = s - start;
if (MP_LIKELY(!(len == 0 && s == top && (type && SPLITLINES)))) {
if (start + len != top && (type & KEEP)) {
len++;
}
mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, len));
}
if (s >= top) {
break;
}
......@@ -531,6 +538,25 @@ mp_obj_t mp_obj_str_split(mp_uint_t n_args, const mp_obj_t *args) {
return res;
}
mp_obj_t mp_obj_str_split(mp_uint_t n_args, const mp_obj_t *args) {
return str_split_internal(n_args, args, SPLIT);
}
#if MICROPY_PY_BUILTINS_STR_SPLITLINES
STATIC mp_obj_t str_splitlines(mp_uint_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
static const mp_arg_t allowed_args[] = {
{ MP_QSTR_keepends, MP_ARG_BOOL, {.u_bool = false} },
};
// parse args
mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
mp_arg_parse_all(n_args - 1, pos_args + 1, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
mp_obj_t new_args[2] = {pos_args[0], MP_OBJ_NEW_QSTR(MP_QSTR__backslash_n)};
return str_split_internal(2, new_args, SPLITLINES | (args[0].u_bool ? KEEP : 0));
}
#endif
STATIC mp_obj_t str_rsplit(mp_uint_t n_args, const mp_obj_t *args) {
if (n_args < 3) {
// If we don't have split limit, it doesn't matter from which side
......@@ -1763,6 +1789,9 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_index_obj, 2, 4, str_index);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rindex_obj, 2, 4, str_rindex);
MP_DEFINE_CONST_FUN_OBJ_2(str_join_obj, str_join);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_split_obj, 1, 3, mp_obj_str_split);
#if MICROPY_PY_BUILTINS_STR_SPLITLINES
MP_DEFINE_CONST_FUN_OBJ_KW(str_splitlines_obj, 1, str_splitlines);
#endif
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rsplit_obj, 1, 3, str_rsplit);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_startswith_obj, 2, 3, str_startswith);
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_endswith_obj, 2, 3, str_endswith);
......@@ -1800,6 +1829,9 @@ STATIC const mp_map_elem_t str8_locals_dict_table[] = {
{ MP_OBJ_NEW_QSTR(MP_QSTR_rindex), (mp_obj_t)&str_rindex_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_join), (mp_obj_t)&str_join_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_split), (mp_obj_t)&str_split_obj },
#if MICROPY_PY_BUILTINS_STR_SPLITLINES
{ MP_OBJ_NEW_QSTR(MP_QSTR_splitlines), (mp_obj_t)&str_splitlines_obj },
#endif
{ MP_OBJ_NEW_QSTR(MP_QSTR_rsplit), (mp_obj_t)&str_rsplit_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_startswith), (mp_obj_t)&str_startswith_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_endswith), (mp_obj_t)&str_endswith_obj },
......
......@@ -73,6 +73,7 @@ MP_DECLARE_CONST_FUN_OBJ(str_index_obj);
MP_DECLARE_CONST_FUN_OBJ(str_rindex_obj);
MP_DECLARE_CONST_FUN_OBJ(str_join_obj);
MP_DECLARE_CONST_FUN_OBJ(str_split_obj);
MP_DECLARE_CONST_FUN_OBJ(str_splitlines_obj);
MP_DECLARE_CONST_FUN_OBJ(str_rsplit_obj);
MP_DECLARE_CONST_FUN_OBJ(str_startswith_obj);
MP_DECLARE_CONST_FUN_OBJ(str_endswith_obj);
......
......@@ -245,6 +245,9 @@ STATIC const mp_map_elem_t struni_locals_dict_table[] = {
{ MP_OBJ_NEW_QSTR(MP_QSTR_rindex), (mp_obj_t)&str_rindex_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_join), (mp_obj_t)&str_join_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_split), (mp_obj_t)&str_split_obj },
#if MICROPY_PY_BUILTINS_STR_SPLITLINES
{ MP_OBJ_NEW_QSTR(MP_QSTR_splitlines), (mp_obj_t)&str_splitlines_obj },
#endif
{ MP_OBJ_NEW_QSTR(MP_QSTR_rsplit), (mp_obj_t)&str_rsplit_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_startswith), (mp_obj_t)&str_startswith_obj },
{ MP_OBJ_NEW_QSTR(MP_QSTR_endswith), (mp_obj_t)&str_endswith_obj },
......
......@@ -299,6 +299,11 @@ Q(find)
Q(rfind)
Q(rindex)
Q(split)
#if MICROPY_PY_BUILTINS_STR_SPLITLINES
Q(splitlines)
Q(keepends)
Q(\n)
#endif
Q(rsplit)
Q(startswith)
Q(endswith)
......
......@@ -60,6 +60,7 @@
#define MICROPY_PY_FUNCTION_ATTRS (1)
#define MICROPY_PY_DESCRIPTORS (1)
#define MICROPY_PY_BUILTINS_STR_UNICODE (1)
#define MICROPY_PY_BUILTINS_STR_SPLITLINES (1)
#define MICROPY_PY_BUILTINS_MEMORYVIEW (1)
#define MICROPY_PY_BUILTINS_FROZENSET (1)
#define MICROPY_PY_BUILTINS_COMPILE (1)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment