From 5fa93b67557f21c22a41449c3266571c427f6798 Mon Sep 17 00:00:00 2001
From: Damien George <damien.p.george@gmail.com>
Date: Wed, 22 Jan 2014 14:35:10 +0000
Subject: [PATCH] Second stage of qstr revamp: uPy str object can be qstr or
 not.

---
 py/builtin.c         |  16 ++-
 py/builtineval.c     |   2 +-
 py/builtinimport.c   |   7 +-
 py/obj.c             |  52 ++------
 py/obj.h             |  18 ++-
 py/objarray.c        |   6 +-
 py/objfun.c          |   8 +-
 py/objint.c          |  35 +++--
 py/objint_longlong.c |   8 +-
 py/objmodule.c       |   2 +-
 py/objstr.c          | 298 ++++++++++++++++++++++++++++---------------
 py/objtype.c         |   6 +-
 py/qstr.c            |  10 +-
 py/qstr.h            |   3 +
 py/runtime.c         |  85 ++++++------
 py/stream.c          |  17 +--
 stm/lcd.c            |   4 +-
 stm/main.c           |  39 +++---
 stm/string0.c        |   2 +-
 stm/usart.c          |  11 +-
 unix-cpy/main.c      |   1 +
 unix/file.c          |   4 +-
 unix/main.c          |   2 +-
 unix/socket.c        |  14 +-
 24 files changed, 376 insertions(+), 274 deletions(-)

diff --git a/py/builtin.c b/py/builtin.c
index 9cbc03767..56cb49de7 100644
--- a/py/builtin.c
+++ b/py/builtin.c
@@ -139,8 +139,8 @@ MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_callable_obj, mp_builtin_callable);
 static mp_obj_t mp_builtin_chr(mp_obj_t o_in) {
     int ord = mp_obj_get_int(o_in);
     if (0 <= ord && ord <= 0x10ffff) {
-        char str[1] = {ord};
-        return mp_obj_new_str(qstr_from_strn(str, 1));
+        byte str[1] = {ord};
+        return mp_obj_new_str(str, 1, true);
     } else {
         nlr_jump(mp_obj_new_exception_msg(MP_QSTR_ValueError, "chr() arg not in range(0x110000)"));
     }
@@ -258,7 +258,7 @@ MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_next_obj, mp_builtin_next);
 
 static mp_obj_t mp_builtin_ord(mp_obj_t o_in) {
     uint len;
-    const byte *str = qstr_data(mp_obj_get_qstr(o_in), &len);
+    const byte *str = mp_obj_str_get_data(o_in, &len);
     if (len == 1) {
         return mp_obj_new_int(str[0]);
     } else {
@@ -305,8 +305,9 @@ MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_range_obj, 1, 3, mp_builtin_range
 static mp_obj_t mp_builtin_repr(mp_obj_t o_in) {
     vstr_t *vstr = vstr_new();
     mp_obj_print_helper((void (*)(void *env, const char *fmt, ...))vstr_printf, vstr, o_in, PRINT_REPR);
-    // TODO don't intern this string
-    return mp_obj_new_str(qstr_from_strn_take(vstr->buf, vstr->alloc, vstr->len));
+    mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false);
+    vstr_free(vstr);
+    return s;
 }
 
 MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_repr_obj, mp_builtin_repr);
@@ -345,8 +346,9 @@ MP_DEFINE_CONST_FUN_OBJ_KW(mp_builtin_sorted_obj, 1, mp_builtin_sorted);
 static mp_obj_t mp_builtin_str(mp_obj_t o_in) {
     vstr_t *vstr = vstr_new();
     mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, o_in, PRINT_STR);
-    // TODO don't intern this string
-    return mp_obj_new_str(qstr_from_strn_take(vstr->buf, vstr->alloc, vstr->len));
+    mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false);
+    vstr_free(vstr);
+    return s;
 }
 
 MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_str_obj, mp_builtin_str);
diff --git a/py/builtineval.c b/py/builtineval.c
index 67072a0fa..0e8f9e31d 100644
--- a/py/builtineval.c
+++ b/py/builtineval.c
@@ -21,7 +21,7 @@
 
 static mp_obj_t mp_builtin_eval(mp_obj_t o_in) {
     uint str_len;
-    const byte *str = qstr_data(mp_obj_get_qstr(o_in), &str_len);
+    const byte *str = mp_obj_str_get_data(o_in, &str_len);
 
     // create the lexer
     mp_lexer_t *lex = mp_lexer_new_from_str_len("<string>", (const char*)str, str_len, 0);
diff --git a/py/builtinimport.c b/py/builtinimport.c
index 4cdad4e24..3cfd64e88 100644
--- a/py/builtinimport.c
+++ b/py/builtinimport.c
@@ -29,7 +29,10 @@ mp_obj_t mp_builtin___import__(int n_args, mp_obj_t *args) {
     }
     */
 
-    qstr mod_name = mp_obj_get_qstr(args[0]);
+    uint mod_name_l;
+    const byte *mod_name_s = mp_obj_str_get_data(args[0], &mod_name_l);
+    qstr mod_name = qstr_from_strn((const char*)mod_name_s, mod_name_l);
+
     mp_obj_t loaded = mp_obj_module_get(mod_name);
     if (loaded != MP_OBJ_NULL) {
         return loaded;
@@ -43,7 +46,7 @@ mp_obj_t mp_builtin___import__(int n_args, mp_obj_t *args) {
     }
 
     // create a new module object
-    mp_obj_t module_obj = mp_obj_new_module(mp_obj_get_qstr(args[0]));
+    mp_obj_t module_obj = mp_obj_new_module(mod_name);
 
     // save the old context
     mp_map_t *old_locals = rt_locals_get();
diff --git a/py/obj.c b/py/obj.c
index a78af595a..c2f726bb9 100644
--- a/py/obj.c
+++ b/py/obj.c
@@ -14,7 +14,7 @@
 #include "runtime.h"
 #include "map.h"
 
-mp_obj_t mp_obj_get_type(mp_obj_t o_in) {
+mp_obj_type_t *mp_obj_get_type(mp_obj_t o_in) {
     if (MP_OBJ_IS_SMALL_INT(o_in)) {
         return (mp_obj_t)&int_type;
     } else if (MP_OBJ_IS_QSTR(o_in)) {
@@ -26,14 +26,7 @@ mp_obj_t mp_obj_get_type(mp_obj_t o_in) {
 }
 
 const char *mp_obj_get_type_str(mp_obj_t o_in) {
-    if (MP_OBJ_IS_SMALL_INT(o_in)) {
-        return "int";
-    } else if (MP_OBJ_IS_QSTR(o_in)) {
-        return "str";
-    } else {
-        mp_obj_base_t *o = o_in;
-        return o->type->name;
-    }
+    return mp_obj_get_type(o_in)->name;
 }
 
 void printf_wrapper(void *env, const char *fmt, ...) {
@@ -44,17 +37,11 @@ void printf_wrapper(void *env, const char *fmt, ...) {
 }
 
 void mp_obj_print_helper(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t o_in, mp_print_kind_t kind) {
-    if (MP_OBJ_IS_SMALL_INT(o_in)) {
-        print(env, "%d", (int)MP_OBJ_SMALL_INT_VALUE(o_in));
-    } else if (MP_OBJ_IS_QSTR(o_in)) {
-        mp_obj_str_print_qstr(print, env, MP_OBJ_QSTR_VALUE(o_in), kind);
+    mp_obj_type_t *type = mp_obj_get_type(o_in);
+    if (type->print != NULL) {
+        type->print(print, env, o_in, kind);
     } else {
-        mp_obj_base_t *o = o_in;
-        if (o->type->print != NULL) {
-            o->type->print(print, env, o_in, kind);
-        } else {
-            print(env, "<%s>", o->type->name);
-        }
+        print(env, "<%s>", type->name);
     }
 }
 
@@ -94,12 +81,10 @@ machine_int_t mp_obj_hash(mp_obj_t o_in) {
         return 1; // needs to hash to same as the integer 1, since True==1
     } else if (MP_OBJ_IS_SMALL_INT(o_in)) {
         return MP_OBJ_SMALL_INT_VALUE(o_in);
-    } else if (MP_OBJ_IS_QSTR(o_in)) {
-        return MP_OBJ_QSTR_VALUE(o_in);
+    } else if (MP_OBJ_IS_STR(o_in)) {
+        return mp_obj_str_get_hash(o_in);
     } else if (MP_OBJ_IS_TYPE(o_in, &none_type)) {
         return (machine_int_t)o_in;
-    } else if (MP_OBJ_IS_TYPE(o_in, &str_type)) {
-        return mp_obj_str_get(o_in);
     } else {
         assert(0);
         return 0;
@@ -138,10 +123,8 @@ bool mp_obj_equal(mp_obj_t o1, mp_obj_t o2) {
             }
             return false;
         }
-    } else if (MP_OBJ_IS_QSTR(o1) || MP_OBJ_IS_QSTR(o2)) {
-        return false;
-    } else if (MP_OBJ_IS_TYPE(o1, &str_type) && MP_OBJ_IS_TYPE(o2, &str_type)) {
-        return mp_obj_str_get(o1) == mp_obj_str_get(o2);
+    } else if (MP_OBJ_IS_STR(o1) && MP_OBJ_IS_STR(o2)) {
+        return mp_obj_str_equal(o1, o2);
     } else {
         mp_obj_base_t *o = o1;
         if (o->type->binary_op != NULL) {
@@ -218,17 +201,6 @@ void mp_obj_get_complex(mp_obj_t arg, mp_float_t *real, mp_float_t *imag) {
 }
 #endif
 
-qstr mp_obj_get_qstr(mp_obj_t arg) {
-    if (MP_OBJ_IS_QSTR(arg)) {
-        return MP_OBJ_QSTR_VALUE(arg);
-    } else if (MP_OBJ_IS_TYPE(arg, &str_type)) {
-        return mp_obj_str_get(arg);
-    } else {
-        assert(0);
-        return 0;
-    }
-}
-
 mp_obj_t *mp_obj_get_array_fixed_n(mp_obj_t o_in, machine_int_t n) {
     if (MP_OBJ_IS_TYPE(o_in, &tuple_type) || MP_OBJ_IS_TYPE(o_in, &list_type)) {
         uint seq_len;
@@ -266,8 +238,8 @@ uint mp_get_index(const mp_obj_type_t *type, machine_uint_t len, mp_obj_t index)
 // may return MP_OBJ_NULL
 mp_obj_t mp_obj_len_maybe(mp_obj_t o_in) {
     mp_small_int_t len = 0;
-    if (MP_OBJ_IS_TYPE(o_in, &str_type)) {
-        len = qstr_len(mp_obj_str_get(o_in));
+    if (MP_OBJ_IS_STR(o_in)) {
+        len = mp_obj_str_get_len(o_in);
     } else if (MP_OBJ_IS_TYPE(o_in, &tuple_type)) {
         uint seq_len;
         mp_obj_t *seq_items;
diff --git a/py/obj.h b/py/obj.h
index e98cc552e..05ccb2757 100644
--- a/py/obj.h
+++ b/py/obj.h
@@ -40,6 +40,7 @@ typedef struct _mp_obj_base_t mp_obj_base_t;
 #define MP_OBJ_IS_QSTR(o) ((((mp_small_int_t)(o)) & 3) == 2)
 #define MP_OBJ_IS_OBJ(o) ((((mp_small_int_t)(o)) & 3) == 0)
 #define MP_OBJ_IS_TYPE(o, t) (MP_OBJ_IS_OBJ(o) && (((mp_obj_base_t*)(o))->type == (t)))
+#define MP_OBJ_IS_STR(o) (MP_OBJ_IS_QSTR(o) || MP_OBJ_IS_TYPE(o, &str_type))
 
 #define MP_OBJ_SMALL_INT_VALUE(o) (((mp_small_int_t)(o)) >> 1)
 #define MP_OBJ_NEW_SMALL_INT(small_int) ((mp_obj_t)(((small_int) << 1) | 1))
@@ -199,14 +200,14 @@ extern const mp_obj_t mp_const_stop_iteration; // special object indicating end
 
 // General API for objects
 
-mp_obj_t mp_obj_new_type(qstr name, mp_obj_t bases_tuple, mp_obj_t locals_dict);
+mp_obj_t mp_obj_new_type(const char *name, mp_obj_t bases_tuple, mp_obj_t locals_dict);
 mp_obj_t mp_obj_new_none(void);
 mp_obj_t mp_obj_new_bool(bool value);
 mp_obj_t mp_obj_new_cell(mp_obj_t obj);
 mp_obj_t mp_obj_new_int(machine_int_t value);
 mp_obj_t mp_obj_new_int_from_uint(machine_uint_t value);
 mp_obj_t mp_obj_new_int_from_long_str(const char *s);
-mp_obj_t mp_obj_new_str(qstr qstr);
+mp_obj_t mp_obj_new_str(const byte* data, uint len, bool make_qstr_if_not_already);
 #if MICROPY_ENABLE_FLOAT
 mp_obj_t mp_obj_new_float(mp_float_t val);
 mp_obj_t mp_obj_new_complex(mp_float_t real, mp_float_t imag);
@@ -231,7 +232,7 @@ mp_obj_t mp_obj_new_slice(mp_obj_t start, mp_obj_t stop, mp_obj_t step);
 mp_obj_t mp_obj_new_bound_meth(mp_obj_t meth, mp_obj_t self);
 mp_obj_t mp_obj_new_module(qstr module_name);
 
-mp_obj_t mp_obj_get_type(mp_obj_t o_in);
+mp_obj_type_t *mp_obj_get_type(mp_obj_t o_in);
 const char *mp_obj_get_type_str(mp_obj_t o_in);
 
 void mp_obj_print_helper(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t o_in, mp_print_kind_t kind);
@@ -248,7 +249,7 @@ machine_int_t mp_obj_get_int(mp_obj_t arg);
 mp_float_t mp_obj_get_float(mp_obj_t self_in);
 void mp_obj_get_complex(mp_obj_t self_in, mp_float_t *real, mp_float_t *imag);
 #endif
-qstr mp_obj_get_qstr(mp_obj_t arg);
+//qstr mp_obj_get_qstr(mp_obj_t arg);
 mp_obj_t *mp_obj_get_array_fixed_n(mp_obj_t o, machine_int_t n);
 uint mp_get_index(const mp_obj_type_t *type, machine_uint_t len, mp_obj_t index);
 mp_obj_t mp_obj_len_maybe(mp_obj_t o_in); /* may return NULL */
@@ -279,8 +280,13 @@ void mp_obj_exception_get_traceback(mp_obj_t self_in, machine_uint_t *n, machine
 
 // str
 extern const mp_obj_type_t str_type;
-qstr mp_obj_str_get(mp_obj_t self_in);
-void mp_obj_str_print_qstr(void (*print)(void *env, const char *fmt, ...), void *env, qstr q, mp_print_kind_t kind);
+mp_obj_t mp_obj_str_builder_start(uint len, byte **data);
+mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in);
+bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2);
+uint mp_obj_str_get_hash(mp_obj_t self_in);
+uint mp_obj_str_get_len(mp_obj_t self_in);
+const char *mp_obj_str_get_str(mp_obj_t self_in); // use this only if you need the string to be null terminated
+const byte *mp_obj_str_get_data(mp_obj_t self_in, uint *len);
 
 #if MICROPY_ENABLE_FLOAT
 // float
diff --git a/py/objarray.c b/py/objarray.c
index 343a3f6e9..4f3656115 100644
--- a/py/objarray.c
+++ b/py/objarray.c
@@ -167,9 +167,11 @@ static mp_obj_t array_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const m
     switch (n_args) {
         case 2:
         {
-            const char *code = qstr_str(mp_obj_str_get(args[0]));
+            // TODO check args
+            uint l;
+            const byte *s = mp_obj_str_get_data(args[0], &l);
             mp_obj_t initializer = args[1];
-            return array_construct(*code, initializer);
+            return array_construct(*s, initializer);
         }
 
         default:
diff --git a/py/objfun.c b/py/objfun.c
index b749860c2..1f6ad68ea 100644
--- a/py/objfun.c
+++ b/py/objfun.c
@@ -56,8 +56,7 @@ mp_obj_t fun_native_call(mp_obj_t self_in, uint n_args, uint n_kw, const mp_obj_
         // TODO if n_kw==0 then don't allocate any memory for map (either pass NULL or allocate it on the heap)
         mp_map_t *kw_args = mp_map_new(n_kw);
         for (int i = 0; i < 2 * n_kw; i += 2) {
-            qstr name = mp_obj_str_get(args[n_args + i]);
-            mp_map_lookup(kw_args, MP_OBJ_NEW_QSTR(name), MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = args[n_args + i + 1];
+            mp_map_lookup(kw_args, args[n_args + i], MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = args[n_args + i + 1];
         }
         mp_obj_t res = ((mp_fun_kw_t)self->fun)(n_args, args, kw_args);
         // TODO clean up kw_args
@@ -214,9 +213,10 @@ machine_uint_t convert_obj_for_inline_asm(mp_obj_t obj) {
         return 0;
     } else if (obj == mp_const_true) {
         return 1;
-    } else if (MP_OBJ_IS_TYPE(obj, &str_type)) {
+    } else if (MP_OBJ_IS_STR(obj)) {
         // pointer to the string (it's probably constant though!)
-        return (machine_uint_t)qstr_str(mp_obj_str_get(obj));
+        uint l;
+        return (machine_uint_t)mp_obj_str_get_data(obj, &l);
 #if MICROPY_ENABLE_FLOAT
     } else if (MP_OBJ_IS_TYPE(obj, &float_type)) {
         // convert float to int (could also pass in float registers)
diff --git a/py/objint.c b/py/objint.c
index 02628b7ef..1305f1900 100644
--- a/py/objint.c
+++ b/py/objint.c
@@ -20,34 +20,35 @@ static mp_obj_t int_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_
             return MP_OBJ_NEW_SMALL_INT(0);
 
         case 1:
-            if (MP_OBJ_IS_TYPE(args[0], &str_type)) {
+            if (MP_OBJ_IS_STR(args[0])) {
                 // a string, parse it
-                return MP_OBJ_NEW_SMALL_INT(strtonum(qstr_str(mp_obj_get_qstr(args[0])), 0));
+                uint l;
+                const byte *s = mp_obj_str_get_data(args[0], &l);
+                return MP_OBJ_NEW_SMALL_INT(strtonum((const char*)s, 0));
             } else {
                 return MP_OBJ_NEW_SMALL_INT(mp_obj_get_int(args[0]));
             }
 
         case 2:
+        {
             // should be a string, parse it
             // TODO proper error checking of argument types
-            return MP_OBJ_NEW_SMALL_INT(strtonum(qstr_str(mp_obj_get_qstr(args[0])), mp_obj_get_int(args[1])));
+            uint l;
+            const byte *s = mp_obj_str_get_data(args[0], &l);
+            return MP_OBJ_NEW_SMALL_INT(strtonum((const char*)s, mp_obj_get_int(args[1])));
+        }
 
         default:
             nlr_jump(mp_obj_new_exception_msg_1_arg(MP_QSTR_TypeError, "int takes at most 2 arguments, %d given", (void*)(machine_int_t)n_args));
     }
 }
 
-const mp_obj_type_t int_type = {
-    { &mp_const_type },
-    "int",
-    .print = int_print,
-    .make_new = int_make_new,
-    .binary_op = int_binary_op,
-};
-
 #if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE
-// This is called only for non-SMALL_INT
+
 void int_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
+    if (MP_OBJ_IS_SMALL_INT(self_in)) {
+        print(env, "%d", (int)MP_OBJ_SMALL_INT_VALUE(self_in));
+    }
 }
 
 // This is called only for non-SMALL_INT
@@ -88,4 +89,12 @@ machine_int_t mp_obj_int_get_checked(mp_obj_t self_in) {
     return MP_OBJ_SMALL_INT_VALUE(self_in);
 }
 
-#endif
+#endif // MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE
+
+const mp_obj_type_t int_type = {
+    { &mp_const_type },
+    "int",
+    .print = int_print,
+    .make_new = int_make_new,
+    .binary_op = int_binary_op,
+};
diff --git a/py/objint_longlong.c b/py/objint_longlong.c
index fd13a038b..7eaee3bc9 100644
--- a/py/objint_longlong.c
+++ b/py/objint_longlong.c
@@ -24,8 +24,12 @@ static mp_obj_t mp_obj_new_int_from_ll(long long val);
 #endif
 
 void int_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
-    mp_obj_int_t *self = self_in;
-    print(env, "%lld" SUFFIX, self->val);
+    if (MP_OBJ_IS_SMALL_INT(self_in)) {
+        print(env, "%d", (int)MP_OBJ_SMALL_INT_VALUE(self_in));
+    } else {
+        mp_obj_int_t *self = self_in;
+        print(env, "%lld" SUFFIX, self->val);
+    }
 }
 
 mp_obj_t int_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
diff --git a/py/objmodule.c b/py/objmodule.c
index 73f146131..749d345bc 100644
--- a/py/objmodule.c
+++ b/py/objmodule.c
@@ -64,7 +64,7 @@ mp_obj_t mp_obj_new_module(qstr module_name) {
     o->name = module_name;
     o->globals = mp_map_new(1);
     el->value = o;
-    mp_map_lookup(o->globals, MP_OBJ_NEW_QSTR(MP_QSTR___name__), MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = mp_obj_new_str(module_name);
+    mp_map_lookup(o->globals, MP_OBJ_NEW_QSTR(MP_QSTR___name__), MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = MP_OBJ_NEW_QSTR(module_name);
     return o;
 }
 
diff --git a/py/objstr.c b/py/objstr.c
index f4dc85739..09d4958fb 100644
--- a/py/objstr.c
+++ b/py/objstr.c
@@ -14,28 +14,35 @@
 
 typedef struct _mp_obj_str_t {
     mp_obj_base_t base;
-    qstr qstr;
+    machine_uint_t hash : 16; // XXX here we assume the hash size is 16 bits (it is at the moment; see qstr.c)
+    machine_uint_t len : 16; // len == number of bytes used in data, alloc = len + 1 because (at the moment) we also append a null byte
+    byte data[];
 } mp_obj_str_t;
 
-static mp_obj_t mp_obj_new_str_iterator(mp_obj_str_t *str, int cur);
+// use this macro to extract the string hash
+#define GET_STR_HASH(str_obj_in, str_hash) uint str_hash; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_hash = qstr_hash(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_hash = ((mp_obj_str_t*)str_obj_in)->hash; }
+
+// use this macro to extract the string length
+#define GET_STR_LEN(str_obj_in, str_len) uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_len = qstr_len(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; }
+
+// use this macro to extract the string data and length
+#define GET_STR_DATA_LEN(str_obj_in, str_data, str_len) const byte *str_data; uint str_len; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_data = qstr_data(MP_OBJ_QSTR_VALUE(str_obj_in), &str_len); } else { str_len = ((mp_obj_str_t*)str_obj_in)->len; str_data = ((mp_obj_str_t*)str_obj_in)->data; }
+
+static mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, int cur);
 
 /******************************************************************************/
 /* str                                                                        */
 
-void mp_obj_str_print_qstr(void (*print)(void *env, const char *fmt, ...), void *env, qstr q, mp_print_kind_t kind) {
+void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
+    GET_STR_DATA_LEN(self_in, str_data, str_len);
     if (kind == PRINT_STR) {
-        print(env, "%s", qstr_str(q));
+        print(env, "%.*s", str_len, str_data);
     } else {
         // TODO need to escape chars etc
-        print(env, "'%s'", qstr_str(q));
+        print(env, "'%.*s'", str_len, str_data);
     }
 }
 
-void str_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
-    mp_obj_str_t *self = self_in;
-    mp_obj_str_print_qstr(print, env, self->qstr, kind);
-}
-
 // like strstr but with specified length and allows \0 bytes
 // TODO replace with something more efficient/standard
 static const byte *find_subbytes(const byte *haystack, uint hlen, const byte *needle, uint nlen) {
@@ -57,16 +64,14 @@ static const byte *find_subbytes(const byte *haystack, uint hlen, const byte *ne
 }
 
 mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
-    mp_obj_str_t *lhs = lhs_in;
-    uint lhs_len;
-    const byte *lhs_data = qstr_data(lhs->qstr, &lhs_len);
+    GET_STR_DATA_LEN(lhs_in, lhs_data, lhs_len);
     switch (op) {
         case RT_BINARY_OP_SUBSCR:
             // TODO: need predicate to check for int-like type (bools are such for example)
             // ["no", "yes"][1 == 2] is common idiom
             if (MP_OBJ_IS_SMALL_INT(rhs_in)) {
-                uint index = mp_get_index(lhs->base.type, lhs_len, rhs_in);
-                return mp_obj_new_str(qstr_from_strn((const char*)lhs_data + index, 1));
+                uint index = mp_get_index(mp_obj_get_type(lhs_in), lhs_len, rhs_in);
+                return mp_obj_new_str(lhs_data + index, 1, true);
 #if MICROPY_ENABLE_SLICE
             } else if (MP_OBJ_IS_TYPE(rhs_in, &slice_type)) {
                 machine_int_t start, stop, step;
@@ -89,7 +94,7 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
                 } else if (stop > lhs_len) {
                     stop = lhs_len;
                 }
-                return mp_obj_new_str(qstr_from_strn((const char*)lhs_data + start, stop - start));
+                return mp_obj_new_str(lhs_data + start, stop - start, false);
 #endif
             } else {
                 // Message doesn't match CPython, but we don't have so much bytes as they
@@ -99,37 +104,48 @@ mp_obj_t str_binary_op(int op, mp_obj_t lhs_in, mp_obj_t rhs_in) {
 
         case RT_BINARY_OP_ADD:
         case RT_BINARY_OP_INPLACE_ADD:
-            if (MP_OBJ_IS_TYPE(rhs_in, &str_type)) {
+            if (MP_OBJ_IS_STR(rhs_in)) {
                 // add 2 strings
-                uint rhs_len;
-                const byte *rhs_data = qstr_data(((mp_obj_str_t*)rhs_in)->qstr, &rhs_len);
+
+                GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len);
                 int alloc_len = lhs_len + rhs_len;
+
+                /* code for making qstr
                 byte *q_ptr;
                 byte *val = qstr_build_start(alloc_len, &q_ptr);
                 memcpy(val, lhs_data, lhs_len);
                 memcpy(val + lhs_len, rhs_data, rhs_len);
-                return mp_obj_new_str(qstr_build_end(q_ptr));
+                return MP_OBJ_NEW_QSTR(qstr_build_end(q_ptr));
+                */
+
+                // code for non-qstr
+                byte *data;
+                mp_obj_t s = mp_obj_str_builder_start(alloc_len, &data);
+                memcpy(data, lhs_data, lhs_len);
+                memcpy(data + lhs_len, rhs_data, rhs_len);
+                return mp_obj_str_builder_end(s);
             }
             break;
+
         case RT_COMPARE_OP_IN:
         case RT_COMPARE_OP_NOT_IN:
             /* NOTE `a in b` is `b.__contains__(a)` */
-            if (MP_OBJ_IS_TYPE(rhs_in, &str_type)) {
-                uint rhs_len;
-                const byte *rhs_data = qstr_data(((mp_obj_str_t*)rhs_in)->qstr, &rhs_len);
+            if (MP_OBJ_IS_STR(rhs_in)) {
+                GET_STR_DATA_LEN(rhs_in, rhs_data, rhs_len);
                 return MP_BOOL((op == RT_COMPARE_OP_IN) ^ (find_subbytes(lhs_data, lhs_len, rhs_data, rhs_len) == NULL));
-                return mp_const_false;
             }
             break;
+
         case RT_BINARY_OP_MULTIPLY:
         {
             if (!MP_OBJ_IS_SMALL_INT(rhs_in)) {
                 return NULL;
             }
             int n = MP_OBJ_SMALL_INT_VALUE(rhs_in);
-            char *s = m_new(char, lhs_len * n);
-            mp_seq_multiply(lhs_data, sizeof(*lhs_data), lhs_len, n, s);
-            return MP_OBJ_NEW_QSTR(qstr_from_strn_take(s, lhs_len * n, lhs_len * n));
+            byte *data;
+            mp_obj_t s = mp_obj_str_builder_start(lhs_len * n, &data);
+            mp_seq_multiply(lhs_data, sizeof(*lhs_data), lhs_len, n, data);
+            return mp_obj_str_builder_end(s);
         }
     }
 
@@ -141,12 +157,10 @@ static mp_obj_t str_getiter(mp_obj_t o_in) {
 }
 
 mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
-    assert(MP_OBJ_IS_TYPE(self_in, &str_type));
-    mp_obj_str_t *self = self_in;
+    assert(MP_OBJ_IS_STR(self_in));
 
     // get separation string
-    const char *sep_str = qstr_str(self->qstr);
-    size_t sep_len = strlen(sep_str);
+    GET_STR_DATA_LEN(self_in, sep_str, sep_len);
 
     // process args
     uint seq_len;
@@ -162,32 +176,31 @@ mp_obj_t str_join(mp_obj_t self_in, mp_obj_t arg) {
     // count required length
     int required_len = 0;
     for (int i = 0; i < seq_len; i++) {
-        if (!MP_OBJ_IS_TYPE(seq_items[i], &str_type)) {
+        if (!MP_OBJ_IS_STR(seq_items[i])) {
             goto bad_arg;
         }
         if (i > 0) {
             required_len += sep_len;
         }
-        required_len += strlen(qstr_str(mp_obj_str_get(seq_items[i])));
+        GET_STR_LEN(seq_items[i], l);
+        required_len += l;
     }
 
     // make joined string
-    byte *q_ptr;
-    byte *s_dest = qstr_build_start(required_len, &q_ptr);
+    byte *data;
+    mp_obj_t joined_str = mp_obj_str_builder_start(required_len, &data);
     for (int i = 0; i < seq_len; i++) {
         if (i > 0) {
-            memcpy(s_dest, sep_str, sep_len);
-            s_dest += sep_len;
+            memcpy(data, sep_str, sep_len);
+            data += sep_len;
         }
-        uint s2_len;
-        const byte *s2 = qstr_data(mp_obj_str_get(seq_items[i]), &s2_len);
-        memcpy(s_dest, s2, s2_len);
-        s_dest += s2_len;
+        GET_STR_DATA_LEN(seq_items[i], s, l);
+        memcpy(data, s, l);
+        data += l;
     }
-    qstr q = qstr_build_end(q_ptr);
 
     // return joined string
-    return mp_obj_new_str(q);
+    return mp_obj_str_builder_end(joined_str);
 
 bad_arg:
     nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "?str.join expecting a list of str's"));
@@ -207,47 +220,39 @@ static mp_obj_t str_split(uint n_args, const mp_obj_t *args) {
     assert(sep == mp_const_none);
     (void)sep; // unused; to hush compiler warning
     mp_obj_t res = mp_obj_new_list(0, NULL);
-    const char *s = qstr_str(mp_obj_str_get(args[0]));
-    const char *start;
+    GET_STR_DATA_LEN(args[0], s, len);
+    const byte *top = s + len;
+    const byte *start;
 
     // Initial whitespace is not counted as split, so we pre-do it
-    while (is_ws(*s)) s++;
-    while (*s && splits != 0) {
+    while (s < top && is_ws(*s)) s++;
+    while (s < top && splits != 0) {
         start = s;
-        while (*s != 0 && !is_ws(*s)) s++;
-        rt_list_append(res, MP_OBJ_NEW_QSTR(qstr_from_strn(start, s - start)));
-        if (*s == 0) {
+        while (s < top && !is_ws(*s)) s++;
+        rt_list_append(res, mp_obj_new_str(start, s - start, false));
+        if (s >= top) {
             break;
         }
-        while (is_ws(*s)) s++;
+        while (s < top && is_ws(*s)) s++;
         if (splits > 0) {
             splits--;
         }
     }
 
-    if (*s != 0) {
-        rt_list_append(res, MP_OBJ_NEW_QSTR(qstr_from_str(s)));
+    if (s < top) {
+        rt_list_append(res, mp_obj_new_str(s, top - s, false));
     }
 
     return res;
 }
 
-static bool chr_in_str(const char* const str, const size_t str_len, const char c) {
-    for (size_t i = 0; i < str_len; i++) {
-        if (str[i] == c) {
-            return true;
-        }
-    }
-    return false;
-}
-
 static mp_obj_t str_find(uint n_args, const mp_obj_t *args) {
     assert(2 <= n_args && n_args <= 4);
-    const char* haystack = qstr_str(mp_obj_str_get(args[0]));
-    const char* needle = qstr_str(mp_obj_str_get(args[1]));
+    assert(MP_OBJ_IS_STR(args[0]));
+    assert(MP_OBJ_IS_STR(args[1]));
 
-    size_t haystack_len = strlen(haystack);
-    size_t needle_len = strlen(needle);
+    GET_STR_DATA_LEN(args[0], haystack, haystack_len);
+    GET_STR_DATA_LEN(args[1], needle, needle_len);
 
     size_t start = 0;
     size_t end = haystack_len;
@@ -259,7 +264,7 @@ static mp_obj_t str_find(uint n_args, const mp_obj_t *args) {
         end = mp_get_index(&str_type, haystack_len, args[3]);
     }
 
-    char *p = strstr(haystack + start, needle);
+    const byte *p = find_subbytes(haystack + start, haystack_len - start, needle, needle_len);
     if (p == NULL) {
         // not found
         return MP_OBJ_NEW_SMALL_INT(-1);
@@ -273,21 +278,34 @@ static mp_obj_t str_find(uint n_args, const mp_obj_t *args) {
     }
 }
 
+static bool chr_in_str(const byte* const str, const size_t str_len, int c) {
+    for (size_t i = 0; i < str_len; i++) {
+        if (str[i] == c) {
+            return true;
+        }
+    }
+    return false;
+}
+
 mp_obj_t str_strip(uint n_args, const mp_obj_t *args) {
     assert(1 <= n_args && n_args <= 2);
-    assert(MP_OBJ_IS_TYPE(args[0], &str_type));
-    const char *chars_to_del;
-    static const char whitespace[] = " \t\n\r\v\f";
+    assert(MP_OBJ_IS_STR(args[0]));
+
+    const byte *chars_to_del;
+    uint chars_to_del_len;
+    static const byte whitespace[] = " \t\n\r\v\f";
 
     if (n_args == 1) {
         chars_to_del = whitespace;
+        chars_to_del_len = sizeof(whitespace);
     } else {
-        chars_to_del = qstr_str(mp_obj_str_get(args[1]));
+        assert(MP_OBJ_IS_STR(args[1]));
+        GET_STR_DATA_LEN(args[1], s, l);
+        chars_to_del = s;
+        chars_to_del_len = l;
     }
 
-    const size_t chars_to_del_len = strlen(chars_to_del);
-    const char *orig_str = qstr_str(mp_obj_str_get(args[0]));
-    const size_t orig_str_len = strlen(orig_str);
+    GET_STR_DATA_LEN(args[0], orig_str, orig_str_len);
 
     size_t first_good_char_pos = 0;
     bool first_good_char_pos_set = false;
@@ -303,30 +321,29 @@ mp_obj_t str_strip(uint n_args, const mp_obj_t *args) {
     }
 
     if (first_good_char_pos == 0 && last_good_char_pos == 0) {
-        //string is all whitespace, return ''
-        return mp_obj_new_str(MP_QSTR_);
+        // string is all whitespace, return ''
+        return MP_OBJ_NEW_QSTR(MP_QSTR_);
     }
 
     assert(last_good_char_pos >= first_good_char_pos);
     //+1 to accomodate the last character
     size_t stripped_len = last_good_char_pos - first_good_char_pos + 1;
-    return mp_obj_new_str(qstr_from_strn(orig_str + first_good_char_pos, stripped_len));
+    return mp_obj_new_str(orig_str + first_good_char_pos, stripped_len, false);
 }
 
 mp_obj_t str_format(uint n_args, const mp_obj_t *args) {
-    assert(MP_OBJ_IS_TYPE(args[0], &str_type));
-    mp_obj_str_t *self = args[0];
+    assert(MP_OBJ_IS_STR(args[0]));
 
-    const char *str = qstr_str(self->qstr);
+    GET_STR_DATA_LEN(args[0], str, len);
     int arg_i = 1;
     vstr_t *vstr = vstr_new();
-    for (; *str; str++) {
+    for (const byte *top = str + len; str < top; str++) {
         if (*str == '{') {
             str++;
-            if (*str == '{') {
+            if (str < top && *str == '{') {
                 vstr_add_char(vstr, '{');
             } else {
-                while (*str != '}') str++;
+                while (str < top && *str != '}') str++;
                 if (arg_i >= n_args) {
                     nlr_jump(mp_obj_new_exception_msg(MP_QSTR_IndexError, "tuple index out of range"));
                 }
@@ -339,7 +356,9 @@ mp_obj_t str_format(uint n_args, const mp_obj_t *args) {
         }
     }
 
-    return mp_obj_new_str(qstr_from_strn_take(vstr->buf, vstr->alloc, vstr->len));
+    mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false);
+    vstr_free(vstr);
+    return s;
 }
 
 static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_find_obj, 2, 4, str_find);
@@ -366,23 +385,102 @@ const mp_obj_type_t str_type = {
     .methods = str_type_methods,
 };
 
-mp_obj_t mp_obj_new_str(qstr qstr) {
-    mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
+mp_obj_t mp_obj_str_builder_start(uint len, byte **data) {
+    mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1);
     o->base.type = &str_type;
-    o->qstr = qstr;
+    o->len = len;
+    *data = o->data;
+    return o;
+}
+
+mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) {
+    assert(MP_OBJ_IS_STR(o_in));
+    mp_obj_str_t *o = o_in;
+    o->hash = qstr_compute_hash(o->data, o->len);
+    o->data[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
     return o;
 }
 
-qstr mp_obj_str_get(mp_obj_t self_in) {
-    if (MP_OBJ_IS_QSTR(self_in)) {
-        return MP_OBJ_QSTR_VALUE(self_in);
+mp_obj_t mp_obj_new_str(const byte* data, uint len, bool make_qstr_if_not_already) {
+    qstr q = qstr_find_strn(data, len);
+    if (q != MP_QSTR_NULL) {
+        // qstr with this data already exists
+        return MP_OBJ_NEW_QSTR(q);
+    } else if (make_qstr_if_not_already) {
+        // no existing qstr, make a new one
+        return MP_OBJ_NEW_QSTR(qstr_from_strn((const char*)data, len));
+    } else {
+        // no existing qstr, don't make one
+        mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1);
+        o->base.type = &str_type;
+        o->hash = qstr_compute_hash(data, len);
+        o->len = len;
+        memcpy(o->data, data, len * sizeof(byte));
+        o->data[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
+        return o;
+    }
+}
+
+bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2) {
+    if (MP_OBJ_IS_QSTR(s1) && MP_OBJ_IS_QSTR(s2)) {
+        return s1 == s2;
+    } else {
+        GET_STR_HASH(s1, h1);
+        GET_STR_HASH(s2, h2);
+        if (h1 != h2) {
+            return false;
+        }
+        GET_STR_DATA_LEN(s1, d1, l1);
+        GET_STR_DATA_LEN(s2, d2, l2);
+        if (l1 != l2) {
+            return false;
+        }
+        return strncmp((const char*)d1, (const char*)d2, l1) == 0;
+    }
+}
+
+uint mp_obj_str_get_hash(mp_obj_t self_in) {
+    if (MP_OBJ_IS_STR(self_in)) {
+        GET_STR_HASH(self_in, h);
+        return h;
+    } else {
+        nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly",
+                 mp_obj_get_type_str(self_in)));
     }
-    if (MP_OBJ_IS_TYPE(self_in, &str_type)) {
-        mp_obj_str_t *self = self_in;
-        return self->qstr;
+}
+
+uint mp_obj_str_get_len(mp_obj_t self_in) {
+    if (MP_OBJ_IS_STR(self_in)) {
+        GET_STR_LEN(self_in, l);
+        return l;
+    } else {
+        nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly",
+                 mp_obj_get_type_str(self_in)));
+    }
+}
+
+// only use this function if you need the str data to be zero terminated
+// at the moment all strings are zero terminated to help with C ASCIIZ compatibility
+const char *mp_obj_str_get_str(mp_obj_t self_in) {
+    if (MP_OBJ_IS_STR(self_in)) {
+        GET_STR_DATA_LEN(self_in, s, l);
+        (void)l; // len unused
+        return (const char*)s;
+    } else {
+        nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly",
+                 mp_obj_get_type_str(self_in)));
+    }
+}
+
+const byte *mp_obj_str_get_data(mp_obj_t self_in, uint *len) {
+    if (MP_OBJ_IS_STR(self_in)) {
+        GET_STR_DATA_LEN(self_in, s, l);
+        *len = l;
+        return s;
+    } else {
+        nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly",
+                 mp_obj_get_type_str(self_in)));
     }
-    nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly",
-             mp_obj_get_type_str(self_in)));
 }
 
 /******************************************************************************/
@@ -390,15 +488,15 @@ qstr mp_obj_str_get(mp_obj_t self_in) {
 
 typedef struct _mp_obj_str_it_t {
     mp_obj_base_t base;
-    mp_obj_str_t *str;
+    mp_obj_t str;
     machine_uint_t cur;
 } mp_obj_str_it_t;
 
 mp_obj_t str_it_iternext(mp_obj_t self_in) {
     mp_obj_str_it_t *self = self_in;
-    const char *str = qstr_str(self->str->qstr);
-    if (self->cur < strlen(str)) {
-        mp_obj_t o_out = mp_obj_new_str(qstr_from_strn(str + self->cur, 1));
+    GET_STR_DATA_LEN(self->str, str, len);
+    if (self->cur < len) {
+        mp_obj_t o_out = mp_obj_new_str(str + self->cur, 1, true);
         self->cur += 1;
         return o_out;
     } else {
@@ -412,7 +510,7 @@ static const mp_obj_type_t str_it_type = {
     .iternext = str_it_iternext,
 };
 
-mp_obj_t mp_obj_new_str_iterator(mp_obj_str_t *str, int cur) {
+mp_obj_t mp_obj_new_str_iterator(mp_obj_t str, int cur) {
     mp_obj_str_it_t *o = m_new_obj(mp_obj_str_it_t);
     o->base.type = &str_it_type;
     o->str = str;
diff --git a/py/objtype.c b/py/objtype.c
index 5dea6938d..75755f4fb 100644
--- a/py/objtype.c
+++ b/py/objtype.c
@@ -251,7 +251,7 @@ static mp_obj_t type_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp
             // args[0] = name
             // args[1] = bases tuple
             // args[2] = locals dict
-            return mp_obj_new_type(mp_obj_get_qstr(args[0]), args[1], args[2]);
+            return mp_obj_new_type(mp_obj_str_get_str(args[0]), args[1], args[2]);
 
         default:
             nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "type takes at 1 or 3 arguments"));
@@ -323,12 +323,12 @@ const mp_obj_type_t mp_const_type = {
     .store_attr = type_store_attr,
 };
 
-mp_obj_t mp_obj_new_type(qstr name, mp_obj_t bases_tuple, mp_obj_t locals_dict) {
+mp_obj_t mp_obj_new_type(const char *name, mp_obj_t bases_tuple, mp_obj_t locals_dict) {
     assert(MP_OBJ_IS_TYPE(bases_tuple, &tuple_type)); // Micro Python restriction, for now
     assert(MP_OBJ_IS_TYPE(locals_dict, &dict_type)); // Micro Python restriction, for now
     mp_obj_type_t *o = m_new0(mp_obj_type_t, 1);
     o->base.type = &mp_const_type;
-    o->name = qstr_str(name);
+    o->name = name;
     o->print = class_print;
     o->make_new = class_make_new;
     o->binary_op = class_binary_op;
diff --git a/py/qstr.c b/py/qstr.c
index 2830341a2..6ce9e8be5 100644
--- a/py/qstr.c
+++ b/py/qstr.c
@@ -29,7 +29,7 @@
 #define Q_GET_LENGTH(q) ((q)[2] | ((q)[3] << 8))
 #define Q_GET_DATA(q)   ((q) + 4)
 
-static machine_uint_t compute_hash(const byte *data, uint len) {
+machine_uint_t qstr_compute_hash(const byte *data, uint len) {
     machine_uint_t hash = 0;
     for (const byte *top = data + len; data < top; data++) {
         hash += *data;
@@ -99,9 +99,9 @@ static qstr qstr_add(const byte *q_ptr) {
     return last_pool->total_prev_len + last_pool->len - 1;
 }
 
-static qstr qstr_find_strn(const byte *str, uint str_len) {
+qstr qstr_find_strn(const byte *str, uint str_len) {
     // work out hash of str
-    machine_uint_t str_hash = compute_hash((const byte*)str, str_len);
+    machine_uint_t str_hash = qstr_compute_hash((const byte*)str, str_len);
 
     // search pools for the data
     for (qstr_pool_t *pool = last_pool; pool != NULL; pool = pool->prev) {
@@ -123,7 +123,7 @@ qstr qstr_from_str(const char *str) {
 qstr qstr_from_strn(const char *str, uint len) {
     qstr q = qstr_find_strn((const byte*)str, len);
     if (q == 0) {
-        machine_uint_t hash = compute_hash((const byte*)str, len);
+        machine_uint_t hash = qstr_compute_hash((const byte*)str, len);
         byte *q_ptr = m_new(byte, 4 + len + 1);
         q_ptr[0] = hash;
         q_ptr[1] = hash >> 8;
@@ -154,7 +154,7 @@ qstr qstr_build_end(byte *q_ptr) {
     qstr q = qstr_find_strn(Q_GET_DATA(q_ptr), Q_GET_LENGTH(q_ptr));
     if (q == 0) {
         machine_uint_t len = Q_GET_LENGTH(q_ptr);
-        machine_uint_t hash = compute_hash(Q_GET_DATA(q_ptr), len);
+        machine_uint_t hash = qstr_compute_hash(Q_GET_DATA(q_ptr), len);
         q_ptr[0] = hash;
         q_ptr[1] = hash >> 8;
         q_ptr[4 + len] = '\0';
diff --git a/py/qstr.h b/py/qstr.h
index 5c331c34a..271e2117c 100644
--- a/py/qstr.h
+++ b/py/qstr.h
@@ -20,6 +20,9 @@ typedef machine_uint_t qstr;
 
 void qstr_init(void);
 
+machine_uint_t qstr_compute_hash(const byte *data, uint len);
+qstr qstr_find_strn(const byte *str, uint str_len); // returns MP_QSTR_NULL if not found
+
 qstr qstr_from_str(const char *str);
 qstr qstr_from_strn(const char *str, uint len);
 //qstr qstr_from_str_static(const char *str);
diff --git a/py/runtime.c b/py/runtime.c
index ccd3d7d0a..7937a65b8 100644
--- a/py/runtime.c
+++ b/py/runtime.c
@@ -18,6 +18,7 @@
 #include "map.h"
 #include "builtin.h"
 #include "objarray.h"
+#include "bc.h"
 
 #if 0 // print debugging info
 #define DEBUG_PRINT (1)
@@ -77,7 +78,7 @@ static void mp_map_add_qstr(mp_map_t *map, qstr qstr, mp_obj_t value) {
 void rt_init(void) {
     // locals = globals for outer module (see Objects/frameobject.c/PyFrame_New())
     map_locals = map_globals = mp_map_new(1);
-    mp_map_add_qstr(map_globals, MP_QSTR___name__, mp_obj_new_str(MP_QSTR___main__));
+    mp_map_add_qstr(map_globals, MP_QSTR___name__, MP_OBJ_NEW_QSTR(MP_QSTR___main__));
 
     // init built-in hash table
     mp_map_init(&map_builtins, 3);
@@ -306,12 +307,8 @@ int rt_is_true(mp_obj_t arg) {
         return 0;
     } else if (arg == mp_const_true) {
         return 1;
-    } else if (MP_OBJ_IS_QSTR(arg)) {
-        // TODO: \0
-        return *qstr_str(MP_OBJ_QSTR_VALUE(arg)) != 0;
-    } else if (MP_OBJ_IS_TYPE(arg, &str_type)) {
-        // TODO: \0
-        return *qstr_str(mp_obj_str_get(arg)) != 0;
+    } else if (MP_OBJ_IS_STR(arg)) {
+        return mp_obj_str_get_len(arg) != 0;
     } else if (MP_OBJ_IS_TYPE(arg, &list_type)) {
         uint len;
         mp_obj_t *dummy;
@@ -404,7 +401,7 @@ mp_obj_t rt_load_const_dec(qstr qstr) {
 
 mp_obj_t rt_load_const_str(qstr qstr) {
     DEBUG_OP_printf("load '%s'\n", qstr_str(qstr));
-    return mp_obj_new_str(qstr);
+    return MP_OBJ_NEW_QSTR(qstr);
 }
 
 mp_obj_t rt_load_name(qstr qstr) {
@@ -616,25 +613,23 @@ mp_obj_t rt_binary_op(int op, mp_obj_t lhs, mp_obj_t rhs) {
      * needs to go below
      */
     if (op == RT_COMPARE_OP_IN || op == RT_COMPARE_OP_NOT_IN) {
-        if (!MP_OBJ_IS_SMALL_INT(rhs)) {
-            mp_obj_base_t *o = rhs;
-            if (o->type->binary_op != NULL) {
-                mp_obj_t res = o->type->binary_op(op, rhs, lhs);
-                if (res != NULL) {
-                    return res;
-                }
+        mp_obj_type_t *type = mp_obj_get_type(rhs);
+        if (type->binary_op != NULL) {
+            mp_obj_t res = type->binary_op(op, rhs, lhs);
+            if (res != NULL) {
+                return res;
             }
-            if (o->type->getiter != NULL) {
-                /* second attempt, walk the iterator */
-                mp_obj_t next = NULL;
-                mp_obj_t iter = rt_getiter(rhs);
-                while ((next = rt_iternext(iter)) != mp_const_stop_iteration) {
-                    if (mp_obj_equal(next, lhs)) {
-                        return MP_BOOL(op == RT_COMPARE_OP_IN);
-                    }
+        }
+        if (type->getiter != NULL) {
+            /* second attempt, walk the iterator */
+            mp_obj_t next = NULL;
+            mp_obj_t iter = rt_getiter(rhs);
+            while ((next = rt_iternext(iter)) != mp_const_stop_iteration) {
+                if (mp_obj_equal(next, lhs)) {
+                    return MP_BOOL(op == RT_COMPARE_OP_IN);
                 }
-                return MP_BOOL(op != RT_COMPARE_OP_IN);
             }
+            return MP_BOOL(op != RT_COMPARE_OP_IN);
         }
 
         nlr_jump(mp_obj_new_exception_msg_varg(
@@ -643,17 +638,17 @@ mp_obj_t rt_binary_op(int op, mp_obj_t lhs, mp_obj_t rhs) {
         return mp_const_none;
     }
 
-    if (MP_OBJ_IS_OBJ(lhs)) {
-        mp_obj_base_t *o = lhs;
-        if (o->type->binary_op != NULL) {
-            mp_obj_t result = o->type->binary_op(op, lhs, rhs);
-            if (result != NULL) {
-                return result;
-            }
+    // generic binary_op supplied by type
+    mp_obj_type_t *type = mp_obj_get_type(lhs);
+    if (type->binary_op != NULL) {
+        mp_obj_t result = type->binary_op(op, lhs, rhs);
+        if (result != MP_OBJ_NULL) {
+            return result;
         }
-        // TODO implement dispatch for reverse binary ops
     }
 
+    // TODO implement dispatch for reverse binary ops
+
     // TODO specify in error message what the operator is
     nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError,
         "unsupported operand types for binary operator: '%s', '%s'",
@@ -900,35 +895,27 @@ void rt_store_subscr(mp_obj_t base, mp_obj_t index, mp_obj_t value) {
 }
 
 mp_obj_t rt_getiter(mp_obj_t o_in) {
-    if (MP_OBJ_IS_SMALL_INT(o_in)) {
-        nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "'int' object is not iterable"));
+    mp_obj_type_t *type = mp_obj_get_type(o_in);
+    if (type->getiter != NULL) {
+        return type->getiter(o_in);
     } else {
-        mp_obj_base_t *o = o_in;
-        if (o->type->getiter != NULL) {
-            return o->type->getiter(o_in);
-        } else {
-            nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "'%s' object is not iterable", o->type->name));
-        }
+        nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "'%s' object is not iterable", type->name));
     }
 }
 
 mp_obj_t rt_iternext(mp_obj_t o_in) {
-    if (MP_OBJ_IS_SMALL_INT(o_in)) {
-        nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "'int' object is not an iterator"));
+    mp_obj_type_t *type = mp_obj_get_type(o_in);
+    if (type->iternext != NULL) {
+        return type->iternext(o_in);
     } else {
-        mp_obj_base_t *o = o_in;
-        if (o->type->iternext != NULL) {
-            return o->type->iternext(o_in);
-        } else {
-            nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "'%s' object is not an iterator", o->type->name));
-        }
+        nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "'%s' object is not an iterator", type->name));
     }
 }
 
 mp_obj_t rt_import_name(qstr name, mp_obj_t fromlist, mp_obj_t level) {
     // build args array
     mp_obj_t args[5];
-    args[0] = mp_obj_new_str(name);
+    args[0] = MP_OBJ_NEW_QSTR(name);
     args[1] = mp_const_none; // TODO should be globals
     args[2] = mp_const_none; // TODO should be locals
     args[3] = fromlist;
diff --git a/py/stream.c b/py/stream.c
index be560d3c2..d47d7e419 100644
--- a/py/stream.c
+++ b/py/stream.c
@@ -23,15 +23,15 @@ static mp_obj_t stream_read(uint n_args, const mp_obj_t *args) {
     if (n_args == 1 || ((sz = mp_obj_get_int(args[1])) == -1)) {
         return stream_readall(args[0]);
     }
-    char *buf = m_new(char, sz);
+    byte *buf = m_new(byte, sz);
     int error;
     machine_int_t out_sz = o->type->stream_p.read(o, buf, sz, &error);
     if (out_sz == -1) {
         nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_OSError, "[Errno %d]", error));
     } else {
-        // TODO don't intern this string
-        buf = m_realloc(buf, sz, out_sz);
-        return mp_obj_new_str(qstr_from_strn_take(buf, out_sz, out_sz));
+        mp_obj_t s = mp_obj_new_str(buf, out_sz, false); // will reallocate to use exact size
+        m_free(buf, sz);
+        return s;
     }
 }
 
@@ -43,7 +43,7 @@ static mp_obj_t stream_write(mp_obj_t self_in, mp_obj_t arg) {
     }
 
     uint sz;
-    const byte *buf = qstr_data(mp_obj_get_qstr(arg), &sz);
+    const byte *buf = mp_obj_str_get_data(arg, &sz);
     int error;
     machine_int_t out_sz = o->type->stream_p.write(self_in, buf, sz, &error);
     if (out_sz == -1) {
@@ -92,9 +92,10 @@ static mp_obj_t stream_readall(mp_obj_t self_in) {
             }
         }
     }
-    // TODO don't intern this string
-    vstr_set_size(vstr, total_size);
-    return mp_obj_new_str(qstr_from_strn_take(vstr->buf, vstr->alloc, total_size));
+
+    mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, total_size, false);
+    vstr_free(vstr);
+    return s;
 }
 
 // Unbuffered, inefficient implementation of readline() for raw I/O files.
diff --git a/stm/lcd.c b/stm/lcd.c
index 56f0ffe64..0567b4333 100644
--- a/stm/lcd.c
+++ b/stm/lcd.c
@@ -165,7 +165,9 @@ mp_obj_t lcd_pix_show(void) {
 }
 
 mp_obj_t lcd_print(mp_obj_t text) {
-    lcd_print_str(qstr_str(mp_obj_get_qstr(text)));
+    uint len;
+    const byte *data = mp_obj_str_get_data(text, &len);
+    lcd_print_strn((const char*)data, len);
     return mp_const_none;
 }
 
diff --git a/stm/main.c b/stm/main.c
index 990450202..8d2c2d6b3 100644
--- a/stm/main.c
+++ b/stm/main.c
@@ -69,16 +69,20 @@ void __fatal_error(const char *msg) {
     }
 }
 
-static qstr pyb_config_source_dir = 0;
-static qstr pyb_config_main = 0;
+static mp_obj_t pyb_config_source_dir = MP_OBJ_NULL;
+static mp_obj_t pyb_config_main = MP_OBJ_NULL;
 
 mp_obj_t pyb_source_dir(mp_obj_t source_dir) {
-    pyb_config_source_dir = mp_obj_get_qstr(source_dir);
+    if (MP_OBJ_IS_STR(source_dir)) {
+        pyb_config_source_dir = source_dir;
+    }
     return mp_const_none;
 }
 
 mp_obj_t pyb_main(mp_obj_t main) {
-    pyb_config_main = mp_obj_get_qstr(main);
+    if (MP_OBJ_IS_STR(main)) {
+        pyb_config_main = main;
+    }
     return mp_const_none;
 }
 
@@ -482,7 +486,7 @@ mp_obj_t pyb_gc(void) {
 mp_obj_t pyb_gpio(uint n_args, mp_obj_t *args) {
     //assert(1 <= n_args && n_args <= 2);
 
-    const char *pin_name = qstr_str(mp_obj_get_qstr(args[0]));
+    const char *pin_name = mp_obj_str_get_str(args[0]);
     GPIO_TypeDef *port;
     switch (pin_name[0]) {
         case 'A': case 'a': port = GPIOA; break;
@@ -630,21 +634,22 @@ void file_obj_print(void (*print)(void *env, const char *fmt, ...), void *env, m
 mp_obj_t file_obj_read(mp_obj_t self_in, mp_obj_t arg) {
     pyb_file_obj_t *self = self_in;
     int n = mp_obj_get_int(arg);
-    char *buf = m_new(char, n);
+    byte *buf = m_new(byte, n);
     UINT n_out;
     f_read(&self->fp, buf, n, &n_out);
-    return mp_obj_new_str(qstr_from_strn_take(buf, n, n_out));
+    return mp_obj_new_str(buf, n_out, false);
 }
 
 mp_obj_t file_obj_write(mp_obj_t self_in, mp_obj_t arg) {
     pyb_file_obj_t *self = self_in;
-    const char *s = qstr_str(mp_obj_get_qstr(arg));
+    uint l;
+    const byte *s = mp_obj_str_get_data(arg, &l);
     UINT n_out;
-    FRESULT res = f_write(&self->fp, s, strlen(s), &n_out);
+    FRESULT res = f_write(&self->fp, s, l, &n_out);
     if (res != FR_OK) {
         printf("File error: could not write to file; error code %d\n", res);
-    } else if (n_out != strlen(s)) {
-        printf("File error: could not write all data to file; wrote %d / %d bytes\n", n_out, strlen(s));
+    } else if (n_out != l) {
+        printf("File error: could not write all data to file; wrote %d / %d bytes\n", n_out, l);
     }
     return mp_const_none;
 }
@@ -676,8 +681,8 @@ static const mp_obj_type_t file_obj_type = {
 };
 
 mp_obj_t pyb_io_open(mp_obj_t o_filename, mp_obj_t o_mode) {
-    const char *filename = qstr_str(mp_obj_get_qstr(o_filename));
-    const char *mode = qstr_str(mp_obj_get_qstr(o_mode));
+    const char *filename = mp_obj_str_get_str(o_filename);
+    const char *mode = mp_obj_str_get_str(o_mode);
     pyb_file_obj_t *self = m_new_obj(pyb_file_obj_t);
     self->base.type = &file_obj_type;
     if (mode[0] == 'r') {
@@ -931,16 +936,16 @@ soft_reset:
     {
         vstr_t *vstr = vstr_new();
         vstr_add_str(vstr, "0:/");
-        if (pyb_config_source_dir == 0) {
+        if (pyb_config_source_dir == MP_OBJ_NULL) {
             vstr_add_str(vstr, "src");
         } else {
-            vstr_add_str(vstr, qstr_str(pyb_config_source_dir));
+            vstr_add_str(vstr, mp_obj_str_get_str(pyb_config_source_dir));
         }
         vstr_add_char(vstr, '/');
-        if (pyb_config_main == 0) {
+        if (pyb_config_main == MP_OBJ_NULL) {
             vstr_add_str(vstr, "main.py");
         } else {
-            vstr_add_str(vstr, qstr_str(pyb_config_main));
+            vstr_add_str(vstr, mp_obj_str_get_str(pyb_config_main));
         }
         if (!do_file(vstr_str(vstr))) {
             flash_error(3);
diff --git a/stm/string0.c b/stm/string0.c
index d67c5f2b1..4899e7b0f 100644
--- a/stm/string0.c
+++ b/stm/string0.c
@@ -34,7 +34,7 @@ void *memset(void *s, int c, size_t n) {
     return s;
 }
 
-int strlen(const char *str) {
+size_t strlen(const char *str) {
     int len = 0;
     for (const char *s = str; *s; s++) {
         len += 1;
diff --git a/stm/usart.c b/stm/usart.c
index c687cff05..17ff146d5 100644
--- a/stm/usart.c
+++ b/stm/usart.c
@@ -151,6 +151,12 @@ void usart_tx_str(pyb_usart_t usart_id, const char *str) {
     }
 }
 
+void usart_tx_bytes(pyb_usart_t usart_id, const byte *data, uint len) {
+    for (; len > 0; data++, len--) {
+        usart_tx_char(usart_id, *data);
+    }
+}
+
 void usart_tx_strn_cooked(pyb_usart_t usart_id, const char *str, int len) {
     for (const char *top = str + len; str < top; str++) {
         if (*str == '\n') {
@@ -201,8 +207,9 @@ static mp_obj_t usart_obj_tx_str(mp_obj_t self_in, mp_obj_t s) {
     pyb_usart_obj_t *self = self_in;
     if (self->is_enabled) {
         if (MP_OBJ_IS_TYPE(s, &str_type)) {
-            const char *str = qstr_str(mp_obj_get_qstr(s));
-            usart_tx_str(self->usart_id, str);
+            uint len;
+            const byte *data = mp_obj_str_get_data(s, &len);
+            usart_tx_bytes(self->usart_id, data, len);
         }
     }
     return mp_const_none;
diff --git a/unix-cpy/main.c b/unix-cpy/main.c
index 0da1fca29..4bd924954 100644
--- a/unix-cpy/main.c
+++ b/unix-cpy/main.c
@@ -5,6 +5,7 @@
 #include "nlr.h"
 #include "misc.h"
 #include "mpconfig.h"
+#include "qstr.h"
 #include "lexer.h"
 #include "lexerunix.h"
 #include "parse.h"
diff --git a/unix/file.c b/unix/file.c
index 3181c08b6..5249b5bba 100644
--- a/unix/file.c
+++ b/unix/file.c
@@ -64,10 +64,10 @@ static mp_obj_t fdfile_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const
         return o;
     }
 
-    const char *fname = qstr_str(mp_obj_get_qstr(args[0]));
+    const char *fname = mp_obj_str_get_str(args[0]);
     const char *mode_s;
     if (n_args > 1) {
-        mode_s = qstr_str(mp_obj_get_qstr(args[1]));
+        mode_s = mp_obj_str_get_str(args[1]);
     } else {
         mode_s = "r";
     }
diff --git a/unix/main.c b/unix/main.c
index facc250a7..c93610670 100644
--- a/unix/main.c
+++ b/unix/main.c
@@ -235,7 +235,7 @@ int main(int argc, char **argv) {
     // test_obj = TestClass()
     // test_obj.attr = 42
     mp_obj_t test_class_type, test_class_instance;
-    test_class_type = mp_obj_new_type(QSTR_FROM_STR_STATIC("TestClass"), mp_const_empty_tuple, mp_obj_new_dict(0));
+    test_class_type = mp_obj_new_type("TestClass", mp_const_empty_tuple, mp_obj_new_dict(0));
     rt_store_name(QSTR_FROM_STR_STATIC("test_obj"), test_class_instance = rt_call_function_0(test_class_type));
     rt_store_attr(test_class_instance, QSTR_FROM_STR_STATIC("attr"), mp_obj_new_int(42));
 
diff --git a/unix/socket.c b/unix/socket.c
index ae87ba465..36b848468 100644
--- a/unix/socket.c
+++ b/unix/socket.c
@@ -153,8 +153,8 @@ static mp_obj_t socket_send(uint n_args, const mp_obj_t *args) {
         flags = MP_OBJ_SMALL_INT_VALUE(args[2]);
     }
 
-    const char *buf = qstr_str(mp_obj_str_get(args[1]));
-    int sz = strlen(buf);
+    uint sz;
+    const byte *buf = mp_obj_str_get_data(args[1], &sz);
     int out_sz = send(self->fd, buf, sz, flags);
     RAISE_ERRNO(out_sz, errno);
 
@@ -225,7 +225,7 @@ static MP_DEFINE_CONST_FUN_OBJ_1(mod_socket_htons_obj, mod_socket_htons);
 
 static mp_obj_t mod_socket_inet_aton(mp_obj_t arg) {
     assert(MP_OBJ_IS_TYPE(arg, &str_type));
-    const char *s = qstr_str(mp_obj_str_get(arg));
+    const char *s = mp_obj_str_get_str(arg);
     struct in_addr addr;
     if (!inet_aton(s, &addr)) {
         nlr_jump(mp_obj_new_exception_msg(MP_QSTR_OSError, "Invalid IP address"));
@@ -238,7 +238,7 @@ static MP_DEFINE_CONST_FUN_OBJ_1(mod_socket_inet_aton_obj, mod_socket_inet_aton)
 #if MICROPY_SOCKET_EXTRA
 static mp_obj_t mod_socket_gethostbyname(mp_obj_t arg) {
     assert(MP_OBJ_IS_TYPE(arg, &str_type));
-    const char *s = qstr_str(mp_obj_str_get(arg));
+    const char *s = mp_obj_str_get_str(arg);
     struct hostent *h = gethostbyname(s);
     if (h == NULL) {
         nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_OSError, "[Errno %d]", errno));
@@ -252,9 +252,9 @@ static MP_DEFINE_CONST_FUN_OBJ_1(mod_socket_gethostbyname_obj, mod_socket_gethos
 static mp_obj_t mod_socket_getaddrinfo(uint n_args, const mp_obj_t *args) {
     // TODO: Implement all args
     assert(n_args == 2);
-    assert(MP_OBJ_IS_TYPE(args[0], &str_type));
+    assert(MP_OBJ_IS_STR(args[0]));
 
-    const char *host = qstr_str(mp_obj_str_get(args[0]));
+    const char *host = mp_obj_str_get_str(args[0]);
     const char *serv = NULL;
     // getaddrinfo accepts port in string notation, so however
     // it may seem stupid, we need to convert int to str
@@ -264,7 +264,7 @@ static mp_obj_t mod_socket_getaddrinfo(uint n_args, const mp_obj_t *args) {
         sprintf(buf, "%d", port);
         serv = buf;
     } else {
-        serv = qstr_str(mp_obj_str_get(args[1]));
+        serv = mp_obj_str_get_str(args[1]);
     }
 
     struct addrinfo hints;
-- 
GitLab