From ac2f7a7f6aab135e90dd12d30b51d857628b0a59 Mon Sep 17 00:00:00 2001
From: Paul Sokolovsky <pfalcon@users.sourceforge.net>
Date: Sat, 4 Apr 2015 00:09:23 +0300
Subject: [PATCH] objstr: Add .splitlines() method.

splitlines() occurs ~179 times in CPython3 standard library, so was
deemed worthy to implement. The method has subtle semantic differences
from just .split("\n"). It is also defined as working for any end-of-line
combination, but this is currently not implemented - it works only with
LF line-endings (which should be OK for text strings on any platforms,
but not OK for bytes).
---
 py/mpconfig.h       |  5 +++++
 py/objstr.c         | 36 ++++++++++++++++++++++++++++++++++--
 py/objstr.h         |  1 +
 py/objstrunicode.c  |  3 +++
 py/qstrdefs.h       |  5 +++++
 unix/mpconfigport.h |  1 +
 6 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/py/mpconfig.h b/py/mpconfig.h
index 27ec5dd3e..b2f37e99c 100644
--- a/py/mpconfig.h
+++ b/py/mpconfig.h
@@ -407,6 +407,11 @@ typedef double mp_float_t;
 #define MICROPY_PY_BUILTINS_STR_UNICODE (0)
 #endif
 
+// Whether str.splitlines() method provided
+#ifndef MICROPY_PY_BUILTINS_STR_SPLITLINES
+#define MICROPY_PY_BUILTINS_STR_SPLITLINES (0)
+#endif
+
 // Whether to support bytearray object
 #ifndef MICROPY_PY_BUILTINS_BYTEARRAY
 #define MICROPY_PY_BUILTINS_BYTEARRAY (1)
diff --git a/py/objstr.c b/py/objstr.c
index a2309364d..84f872fe2 100644
--- a/py/objstr.c
+++ b/py/objstr.c
@@ -455,8 +455,9 @@ STATIC mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
 }
 
 #define is_ws(c) ((c) == ' ' || (c) == '\t')
+enum {SPLIT = 0, KEEP = 1, SPLITLINES = 2};
 
-mp_obj_t mp_obj_str_split(mp_uint_t n_args, const mp_obj_t *args) {
+STATIC inline mp_obj_t str_split_internal(mp_uint_t n_args, const mp_obj_t *args, int type) {
     const mp_obj_type_t *self_type = mp_obj_get_type(args[0]);
     mp_int_t splits = -1;
     mp_obj_t sep = mp_const_none;
@@ -517,7 +518,13 @@ mp_obj_t mp_obj_str_split(mp_uint_t n_args, const mp_obj_t *args) {
                 }
                 s++;
             }
-            mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, s - start));
+            mp_uint_t len = s - start;
+            if (MP_LIKELY(!(len == 0 && s == top && (type && SPLITLINES)))) {
+                if (start + len != top && (type & KEEP)) {
+                    len++;
+                }
+                mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, len));
+            }
             if (s >= top) {
                 break;
             }
@@ -531,6 +538,25 @@ mp_obj_t mp_obj_str_split(mp_uint_t n_args, const mp_obj_t *args) {
     return res;
 }
 
+mp_obj_t mp_obj_str_split(mp_uint_t n_args, const mp_obj_t *args) {
+    return str_split_internal(n_args, args, SPLIT);
+}
+
+#if MICROPY_PY_BUILTINS_STR_SPLITLINES
+STATIC mp_obj_t str_splitlines(mp_uint_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
+    static const mp_arg_t allowed_args[] = {
+        { MP_QSTR_keepends, MP_ARG_BOOL, {.u_bool = false} },
+    };
+
+    // parse args
+    mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
+    mp_arg_parse_all(n_args - 1, pos_args + 1, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
+
+    mp_obj_t new_args[2] = {pos_args[0], MP_OBJ_NEW_QSTR(MP_QSTR__backslash_n)};
+    return str_split_internal(2, new_args, SPLITLINES | (args[0].u_bool ? KEEP : 0));
+}
+#endif
+
 STATIC mp_obj_t str_rsplit(mp_uint_t n_args, const mp_obj_t *args) {
     if (n_args < 3) {
         // If we don't have split limit, it doesn't matter from which side
@@ -1763,6 +1789,9 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_index_obj, 2, 4, str_index);
 MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rindex_obj, 2, 4, str_rindex);
 MP_DEFINE_CONST_FUN_OBJ_2(str_join_obj, str_join);
 MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_split_obj, 1, 3, mp_obj_str_split);
+#if MICROPY_PY_BUILTINS_STR_SPLITLINES
+MP_DEFINE_CONST_FUN_OBJ_KW(str_splitlines_obj, 1, str_splitlines);
+#endif
 MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rsplit_obj, 1, 3, str_rsplit);
 MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_startswith_obj, 2, 3, str_startswith);
 MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_endswith_obj, 2, 3, str_endswith);
@@ -1800,6 +1829,9 @@ STATIC const mp_map_elem_t str8_locals_dict_table[] = {
     { MP_OBJ_NEW_QSTR(MP_QSTR_rindex), (mp_obj_t)&str_rindex_obj },
     { MP_OBJ_NEW_QSTR(MP_QSTR_join), (mp_obj_t)&str_join_obj },
     { MP_OBJ_NEW_QSTR(MP_QSTR_split), (mp_obj_t)&str_split_obj },
+    #if MICROPY_PY_BUILTINS_STR_SPLITLINES
+    { MP_OBJ_NEW_QSTR(MP_QSTR_splitlines), (mp_obj_t)&str_splitlines_obj },
+    #endif
     { MP_OBJ_NEW_QSTR(MP_QSTR_rsplit), (mp_obj_t)&str_rsplit_obj },
     { MP_OBJ_NEW_QSTR(MP_QSTR_startswith), (mp_obj_t)&str_startswith_obj },
     { MP_OBJ_NEW_QSTR(MP_QSTR_endswith), (mp_obj_t)&str_endswith_obj },
diff --git a/py/objstr.h b/py/objstr.h
index 7cba6193d..d028b09f0 100644
--- a/py/objstr.h
+++ b/py/objstr.h
@@ -73,6 +73,7 @@ MP_DECLARE_CONST_FUN_OBJ(str_index_obj);
 MP_DECLARE_CONST_FUN_OBJ(str_rindex_obj);
 MP_DECLARE_CONST_FUN_OBJ(str_join_obj);
 MP_DECLARE_CONST_FUN_OBJ(str_split_obj);
+MP_DECLARE_CONST_FUN_OBJ(str_splitlines_obj);
 MP_DECLARE_CONST_FUN_OBJ(str_rsplit_obj);
 MP_DECLARE_CONST_FUN_OBJ(str_startswith_obj);
 MP_DECLARE_CONST_FUN_OBJ(str_endswith_obj);
diff --git a/py/objstrunicode.c b/py/objstrunicode.c
index 1cf4ed474..4e7f770c3 100644
--- a/py/objstrunicode.c
+++ b/py/objstrunicode.c
@@ -245,6 +245,9 @@ STATIC const mp_map_elem_t struni_locals_dict_table[] = {
     { MP_OBJ_NEW_QSTR(MP_QSTR_rindex), (mp_obj_t)&str_rindex_obj },
     { MP_OBJ_NEW_QSTR(MP_QSTR_join), (mp_obj_t)&str_join_obj },
     { MP_OBJ_NEW_QSTR(MP_QSTR_split), (mp_obj_t)&str_split_obj },
+    #if MICROPY_PY_BUILTINS_STR_SPLITLINES
+    { MP_OBJ_NEW_QSTR(MP_QSTR_splitlines), (mp_obj_t)&str_splitlines_obj },
+    #endif
     { MP_OBJ_NEW_QSTR(MP_QSTR_rsplit), (mp_obj_t)&str_rsplit_obj },
     { MP_OBJ_NEW_QSTR(MP_QSTR_startswith), (mp_obj_t)&str_startswith_obj },
     { MP_OBJ_NEW_QSTR(MP_QSTR_endswith), (mp_obj_t)&str_endswith_obj },
diff --git a/py/qstrdefs.h b/py/qstrdefs.h
index 560b16d6a..446d86cb8 100644
--- a/py/qstrdefs.h
+++ b/py/qstrdefs.h
@@ -299,6 +299,11 @@ Q(find)
 Q(rfind)
 Q(rindex)
 Q(split)
+#if MICROPY_PY_BUILTINS_STR_SPLITLINES
+Q(splitlines)
+Q(keepends)
+Q(\n)
+#endif
 Q(rsplit)
 Q(startswith)
 Q(endswith)
diff --git a/unix/mpconfigport.h b/unix/mpconfigport.h
index 891ba82e5..00cb12139 100644
--- a/unix/mpconfigport.h
+++ b/unix/mpconfigport.h
@@ -60,6 +60,7 @@
 #define MICROPY_PY_FUNCTION_ATTRS   (1)
 #define MICROPY_PY_DESCRIPTORS      (1)
 #define MICROPY_PY_BUILTINS_STR_UNICODE (1)
+#define MICROPY_PY_BUILTINS_STR_SPLITLINES (1)
 #define MICROPY_PY_BUILTINS_MEMORYVIEW (1)
 #define MICROPY_PY_BUILTINS_FROZENSET (1)
 #define MICROPY_PY_BUILTINS_COMPILE (1)
-- 
GitLab