From f600a6a0851bc2023e9322bac807f5d2217e8451 Mon Sep 17 00:00:00 2001
From: Damien George <damien.p.george@gmail.com>
Date: Sun, 25 May 2014 22:34:34 +0100
Subject: [PATCH] py: Slightly improve efficiency of mp_obj_new_str; rename
 str_new.

Reorder interning logic in mp_obj_new_str, to be more efficient.

str_new is globally accessible, so should be prefixed with mp_obj_.
---
 py/objstr.c      | 45 +++++++++++++++++++++++----------------------
 py/objstr.h      |  2 +-
 py/objstringio.c |  2 +-
 py/stream.c      |  6 +++---
 4 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/py/objstr.c b/py/objstr.c
index 48d824f07..83fd002d1 100644
--- a/py/objstr.c
+++ b/py/objstr.c
@@ -54,7 +54,6 @@ const mp_obj_t mp_const_empty_bytes;
 
 STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str);
 STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str);
-mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len);
 STATIC NORETURN void bad_implicit_conversion(mp_obj_t self_in);
 STATIC NORETURN void arg_type_mixup();
 
@@ -143,7 +142,7 @@ STATIC mp_obj_t str_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_
             }
             GET_STR_DATA_LEN(args[0], str_data, str_len);
             GET_STR_HASH(args[0], str_hash);
-            mp_obj_str_t *o = str_new(&mp_type_str, NULL, str_len);
+            mp_obj_str_t *o = mp_obj_new_str_of_type(&mp_type_str, NULL, str_len);
             o->data = str_data;
             o->hash = str_hash;
             return o;
@@ -171,7 +170,7 @@ STATIC mp_obj_t bytes_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const m
         }
         GET_STR_DATA_LEN(args[0], str_data, str_len);
         GET_STR_HASH(args[0], str_hash);
-        mp_obj_str_t *o = str_new(&mp_type_bytes, NULL, str_len);
+        mp_obj_str_t *o = mp_obj_new_str_of_type(&mp_type_bytes, NULL, str_len);
         o->data = str_data;
         o->hash = str_hash;
         return o;
@@ -356,7 +355,7 @@ STATIC mp_obj_t str_subscr(mp_obj_t self_in, mp_obj_t index, mp_obj_t value) {
                 nlr_raise(mp_obj_new_exception_msg(&mp_type_NotImplementedError,
                     "Only slices with step=1 (aka None) are supported"));
             }
-            return str_new(type, self_data + slice.start, slice.stop - slice.start);
+            return mp_obj_new_str_of_type(type, self_data + slice.start, slice.stop - slice.start);
         }
 #endif
         uint index_val = mp_get_index(type, self_len, index, false);
@@ -447,7 +446,7 @@ STATIC mp_obj_t str_split(uint n_args, const mp_obj_t *args) {
         while (s < top && splits != 0) {
             const byte *start = s;
             while (s < top && !is_ws(*s)) s++;
-            mp_obj_list_append(res, str_new(self_type, start, s - start));
+            mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, s - start));
             if (s >= top) {
                 break;
             }
@@ -458,7 +457,7 @@ STATIC mp_obj_t str_split(uint n_args, const mp_obj_t *args) {
         }
 
         if (s < top) {
-            mp_obj_list_append(res, str_new(self_type, s, top - s));
+            mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, s, top - s));
         }
 
     } else {
@@ -482,7 +481,7 @@ STATIC mp_obj_t str_split(uint n_args, const mp_obj_t *args) {
                 }
                 s++;
             }
-            mp_obj_list_append(res, str_new(self_type, start, s - start));
+            mp_obj_list_append(res, mp_obj_new_str_of_type(self_type, start, s - start));
             if (s >= top) {
                 break;
             }
@@ -537,10 +536,10 @@ STATIC mp_obj_t str_rsplit(uint n_args, const mp_obj_t *args) {
                 s--;
             }
             if (s < beg || splits == 0) {
-                res->items[idx] = str_new(self_type, beg, last - beg);
+                res->items[idx] = mp_obj_new_str_of_type(self_type, beg, last - beg);
                 break;
             }
-            res->items[idx--] = str_new(self_type, s + sep_len, last - s - sep_len);
+            res->items[idx--] = mp_obj_new_str_of_type(self_type, s + sep_len, last - s - sep_len);
             last = s;
             if (splits > 0) {
                 splits--;
@@ -692,7 +691,7 @@ STATIC mp_obj_t str_uni_strip(int type, uint n_args, const mp_obj_t *args) {
     assert(last_good_char_pos >= first_good_char_pos);
     //+1 to accomodate the last character
     machine_uint_t stripped_len = last_good_char_pos - first_good_char_pos + 1;
-    return str_new(self_type, orig_str + first_good_char_pos, stripped_len);
+    return mp_obj_new_str_of_type(self_type, orig_str + first_good_char_pos, stripped_len);
 }
 
 STATIC mp_obj_t str_strip(uint n_args, const mp_obj_t *args) {
@@ -1455,9 +1454,9 @@ STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, machine_int_t di
     const byte *position_ptr = find_subbytes(str, str_len, sep, sep_len, direction);
     if (position_ptr != NULL) {
         machine_uint_t position = position_ptr - str;
-        result[0] = str_new(self_type, str, position);
+        result[0] = mp_obj_new_str_of_type(self_type, str, position);
         result[1] = arg;
-        result[2] = str_new(self_type, str + position + sep_len, str_len - position - sep_len);
+        result[2] = mp_obj_new_str_of_type(self_type, str + position + sep_len, str_len - position - sep_len);
     }
 
     return mp_obj_new_tuple(3, result);
@@ -1641,7 +1640,7 @@ mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) {
     return o;
 }
 
-mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len) {
+mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, uint len) {
     mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
     o->base.type = type;
     o->len = len;
@@ -1656,21 +1655,23 @@ mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len) {
 }
 
 mp_obj_t mp_obj_new_str(const char* data, uint len, bool make_qstr_if_not_already) {
-    qstr q = qstr_find_strn(data, len);
-    if (q != MP_QSTR_NULL) {
-        // qstr with this data already exists
-        return MP_OBJ_NEW_QSTR(q);
-    } else if (make_qstr_if_not_already) {
-        // no existing qstr, make a new one
+    if (make_qstr_if_not_already) {
+        // use existing, or make a new qstr
         return MP_OBJ_NEW_QSTR(qstr_from_strn(data, len));
     } else {
-        // no existing qstr, don't make one
-        return str_new(&mp_type_str, (const byte*)data, len);
+        qstr q = qstr_find_strn(data, len);
+        if (q != MP_QSTR_NULL) {
+            // qstr with this data already exists
+            return MP_OBJ_NEW_QSTR(q);
+        } else {
+            // no existing qstr, don't make one
+            return mp_obj_new_str_of_type(&mp_type_str, (const byte*)data, len);
+        }
     }
 }
 
 mp_obj_t mp_obj_new_bytes(const byte* data, uint len) {
-    return str_new(&mp_type_bytes, data, len);
+    return mp_obj_new_str_of_type(&mp_type_bytes, data, len);
 }
 
 bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2) {
diff --git a/py/objstr.h b/py/objstr.h
index c0ef701a8..5be137d36 100644
--- a/py/objstr.h
+++ b/py/objstr.h
@@ -36,4 +36,4 @@ typedef struct _mp_obj_str_t {
 #define MP_DEFINE_STR_OBJ(obj_name, str) mp_obj_str_t obj_name = {{&mp_type_str}, 0, sizeof(str) - 1, (const byte*)str};
 
 mp_obj_t mp_obj_str_format(uint n_args, const mp_obj_t *args);
-mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len);
+mp_obj_t mp_obj_new_str_of_type(const mp_obj_type_t *type, const byte* data, uint len);
diff --git a/py/objstringio.c b/py/objstringio.c
index 7a3e0f893..d9ad604b4 100644
--- a/py/objstringio.c
+++ b/py/objstringio.c
@@ -83,7 +83,7 @@ STATIC machine_int_t stringio_write(mp_obj_t o_in, const void *buf, machine_uint
 
 STATIC mp_obj_t stringio_getvalue(mp_obj_t self_in) {
     mp_obj_stringio_t *self = self_in;
-    return str_new(STREAM_TO_CONTENT_TYPE(self), (byte*)self->vstr->buf, self->vstr->len);
+    return mp_obj_new_str_of_type(STREAM_TO_CONTENT_TYPE(self), (byte*)self->vstr->buf, self->vstr->len);
 }
 STATIC MP_DEFINE_CONST_FUN_OBJ_1(stringio_getvalue_obj, stringio_getvalue);
 
diff --git a/py/stream.c b/py/stream.c
index b1a64cdcf..71aaa4e99 100644
--- a/py/stream.c
+++ b/py/stream.c
@@ -81,7 +81,7 @@ STATIC mp_obj_t stream_read(uint n_args, const mp_obj_t *args) {
         }
         nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_OSError, "[Errno %d]", error));
     } else {
-        mp_obj_t s = str_new(STREAM_CONTENT_TYPE(o->type->stream_p), buf, out_sz); // will reallocate to use exact size
+        mp_obj_t s = mp_obj_new_str_of_type(STREAM_CONTENT_TYPE(o->type->stream_p), buf, out_sz); // will reallocate to use exact size
         m_free(buf, sz);
         return s;
     }
@@ -158,7 +158,7 @@ STATIC mp_obj_t stream_readall(mp_obj_t self_in) {
         }
     }
 
-    mp_obj_t s = str_new(STREAM_CONTENT_TYPE(o->type->stream_p), (byte*)vstr->buf, total_size);
+    mp_obj_t s = mp_obj_new_str_of_type(STREAM_CONTENT_TYPE(o->type->stream_p), (byte*)vstr->buf, total_size);
     vstr_free(vstr);
     return s;
 }
@@ -207,7 +207,7 @@ STATIC mp_obj_t stream_unbuffered_readline(uint n_args, const mp_obj_t *args) {
         }
     }
     // TODO need a string creation API that doesn't copy the given data
-    mp_obj_t ret = str_new(STREAM_CONTENT_TYPE(o->type->stream_p), (byte*)vstr->buf, vstr->len);
+    mp_obj_t ret = mp_obj_new_str_of_type(STREAM_CONTENT_TYPE(o->type->stream_p), (byte*)vstr->buf, vstr->len);
     vstr_free(vstr);
     return ret;
 }
-- 
GitLab