diff --git a/py/obj.h b/py/obj.h
index 0f99e5dfc85e8b0a62ba06f86befd484ce2be71c..56e4d96df678957dd594eb8f208a7330abf359de 100644
--- a/py/obj.h
+++ b/py/obj.h
@@ -223,7 +223,7 @@ mp_obj_t mp_obj_new_range(int start, int stop, int step);
 mp_obj_t mp_obj_new_range_iterator(int cur, int stop, int step);
 mp_obj_t mp_obj_new_fun_bc(int n_args, uint n_state, const byte *code);
 mp_obj_t mp_obj_new_fun_asm(uint n_args, void *fun);
-mp_obj_t mp_obj_new_gen_wrap(uint n_locals, uint n_stack, mp_obj_t fun);
+mp_obj_t mp_obj_new_gen_wrap(mp_obj_t fun);
 mp_obj_t mp_obj_new_gen_instance(const byte *bytecode, uint n_state, int n_args, const mp_obj_t *args);
 mp_obj_t mp_obj_new_closure(mp_obj_t fun, mp_obj_t closure_tuple);
 mp_obj_t mp_obj_new_tuple(uint n, const mp_obj_t *items);
diff --git a/py/objgenerator.c b/py/objgenerator.c
index 91bbbceb2f42dcf1fc3a8269c6eecc410e2d8e2d..67f8eed59c25ec1f3f8a96141f45472a801787d2 100644
--- a/py/objgenerator.c
+++ b/py/objgenerator.c
@@ -16,7 +16,6 @@
 
 typedef struct _mp_obj_gen_wrap_t {
     mp_obj_base_t base;
-    uint n_state;
     mp_obj_t *fun;
 } mp_obj_gen_wrap_t;
 
@@ -35,7 +34,7 @@ mp_obj_t gen_wrap_call(mp_obj_t self_in, uint n_args, uint n_kw, const mp_obj_t
         nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "function does not take keyword arguments"));
     }
 
-    return mp_obj_new_gen_instance(bc_code, self->n_state, n_args, args);
+    return mp_obj_new_gen_instance(bc_code, bc_n_state, n_args, args);
 }
 
 const mp_obj_type_t gen_wrap_type = {
@@ -44,11 +43,9 @@ const mp_obj_type_t gen_wrap_type = {
     .call = gen_wrap_call,
 };
 
-mp_obj_t mp_obj_new_gen_wrap(uint n_locals, uint n_stack, mp_obj_t fun) {
+mp_obj_t mp_obj_new_gen_wrap(mp_obj_t fun) {
     mp_obj_gen_wrap_t *o = m_new_obj(mp_obj_gen_wrap_t);
     o->base.type = &gen_wrap_type;
-    // we have at least 3 locals so the bc can write back fast[0,1,2] safely; should improve how this is done
-    o->n_state = (n_locals < 3 ? 3 : n_locals) + n_stack;
     o->fun = fun;
     return o;
 }
diff --git a/py/runtime.c b/py/runtime.c
index 3c97505bb75ac2039b4ca02e05c843e469391d91..6dd6921599f9c77382a94f61dbd35b07babfb424 100644
--- a/py/runtime.c
+++ b/py/runtime.c
@@ -44,11 +44,14 @@ typedef enum {
 } mp_code_kind_t;
 
 typedef struct _mp_code_t {
-    mp_code_kind_t kind;
-    int n_args;
-    int n_locals;
-    int n_stack;
-    bool is_generator;
+    struct {
+        mp_code_kind_t kind : 8;
+        bool is_generator : 1;
+    };
+    struct {
+        uint n_args : 16;
+        uint n_state : 16;
+    };
     union {
         struct {
             byte *code;
@@ -63,7 +66,7 @@ typedef struct _mp_code_t {
     };
 } mp_code_t;
 
-static int next_unique_code_id;
+static uint next_unique_code_id;
 static machine_uint_t unique_codes_alloc = 0;
 static mp_code_t *unique_codes = NULL;
 
@@ -187,30 +190,30 @@ void rt_deinit(void) {
 #endif
 }
 
-int rt_get_unique_code_id(void) {
+uint rt_get_unique_code_id(void) {
     return next_unique_code_id++;
 }
 
 static void alloc_unique_codes(void) {
     if (next_unique_code_id > unique_codes_alloc) {
+        DEBUG_printf("allocate more unique codes: " UINT_FMT " -> %u\n", unique_codes_alloc, next_unique_code_id);
         // increase size of unique_codes table
         unique_codes = m_renew(mp_code_t, unique_codes, unique_codes_alloc, next_unique_code_id);
-        for (int i = unique_codes_alloc; i < next_unique_code_id; i++) {
+        for (uint i = unique_codes_alloc; i < next_unique_code_id; i++) {
             unique_codes[i].kind = MP_CODE_NONE;
         }
         unique_codes_alloc = next_unique_code_id;
     }
 }
 
-void rt_assign_byte_code(int unique_code_id, byte *code, uint len, int n_args, int n_locals, int n_stack, bool is_generator) {
+void rt_assign_byte_code(uint unique_code_id, byte *code, uint len, int n_args, int n_locals, int n_stack, bool is_generator) {
     alloc_unique_codes();
 
     assert(1 <= unique_code_id && unique_code_id < next_unique_code_id && unique_codes[unique_code_id].kind == MP_CODE_NONE);
     unique_codes[unique_code_id].kind = MP_CODE_BYTE;
-    unique_codes[unique_code_id].n_args = n_args;
-    unique_codes[unique_code_id].n_locals = n_locals;
-    unique_codes[unique_code_id].n_stack = n_stack;
     unique_codes[unique_code_id].is_generator = is_generator;
+    unique_codes[unique_code_id].n_args = n_args;
+    unique_codes[unique_code_id].n_state = n_locals + n_stack;
     unique_codes[unique_code_id].u_byte.code = code;
     unique_codes[unique_code_id].u_byte.len = len;
 
@@ -238,15 +241,14 @@ void rt_assign_byte_code(int unique_code_id, byte *code, uint len, int n_args, i
 #endif
 }
 
-void rt_assign_native_code(int unique_code_id, void *fun, uint len, int n_args) {
+void rt_assign_native_code(uint unique_code_id, void *fun, uint len, int n_args) {
     alloc_unique_codes();
 
     assert(1 <= unique_code_id && unique_code_id < next_unique_code_id && unique_codes[unique_code_id].kind == MP_CODE_NONE);
     unique_codes[unique_code_id].kind = MP_CODE_NATIVE;
-    unique_codes[unique_code_id].n_args = n_args;
-    unique_codes[unique_code_id].n_locals = 0;
-    unique_codes[unique_code_id].n_stack = 0;
     unique_codes[unique_code_id].is_generator = false;
+    unique_codes[unique_code_id].n_args = n_args;
+    unique_codes[unique_code_id].n_state = 0;
     unique_codes[unique_code_id].u_native.fun = fun;
 
     //printf("native code: %d bytes\n", len);
@@ -271,15 +273,14 @@ void rt_assign_native_code(int unique_code_id, void *fun, uint len, int n_args)
 #endif
 }
 
-void rt_assign_inline_asm_code(int unique_code_id, void *fun, uint len, int n_args) {
+void rt_assign_inline_asm_code(uint unique_code_id, void *fun, uint len, int n_args) {
     alloc_unique_codes();
 
     assert(1 <= unique_code_id && unique_code_id < next_unique_code_id && unique_codes[unique_code_id].kind == MP_CODE_NONE);
     unique_codes[unique_code_id].kind = MP_CODE_INLINE_ASM;
-    unique_codes[unique_code_id].n_args = n_args;
-    unique_codes[unique_code_id].n_locals = 0;
-    unique_codes[unique_code_id].n_stack = 0;
     unique_codes[unique_code_id].is_generator = false;
+    unique_codes[unique_code_id].n_args = n_args;
+    unique_codes[unique_code_id].n_state = 0;
     unique_codes[unique_code_id].u_inline_asm.fun = fun;
 
 #ifdef DEBUG_PRINT
@@ -678,7 +679,7 @@ mp_obj_t rt_make_function_from_id(int unique_code_id) {
     mp_obj_t fun;
     switch (c->kind) {
         case MP_CODE_BYTE:
-            fun = mp_obj_new_fun_bc(c->n_args, c->n_locals + c->n_stack, c->u_byte.code);
+            fun = mp_obj_new_fun_bc(c->n_args, c->n_state, c->u_byte.code);
             break;
         case MP_CODE_NATIVE:
             fun = rt_make_function_n(c->n_args, c->u_native.fun);
@@ -693,7 +694,7 @@ mp_obj_t rt_make_function_from_id(int unique_code_id) {
 
     // check for generator functions and if so wrap in generator object
     if (c->is_generator) {
-        fun = mp_obj_new_gen_wrap(c->n_locals, c->n_stack, fun);
+        fun = mp_obj_new_gen_wrap(fun);
     }
 
     return fun;
diff --git a/py/runtime0.h b/py/runtime0.h
index b7b5afaeb679d5aabfa1c867c30029ea403137b4..cd82b1412bb73ff129378ea793764ea0a0618628 100644
--- a/py/runtime0.h
+++ b/py/runtime0.h
@@ -78,7 +78,7 @@ extern void *const rt_fun_table[RT_F_NUMBER_OF];
 
 void rt_init(void);
 void rt_deinit(void);
-int rt_get_unique_code_id(void);
-void rt_assign_byte_code(int unique_code_id, byte *code, uint len, int n_args, int n_locals, int n_stack, bool is_generator);
-void rt_assign_native_code(int unique_code_id, void *f, uint len, int n_args);
-void rt_assign_inline_asm_code(int unique_code_id, void *f, uint len, int n_args);
+uint rt_get_unique_code_id(void);
+void rt_assign_byte_code(uint unique_code_id, byte *code, uint len, int n_args, int n_locals, int n_stack, bool is_generator);
+void rt_assign_native_code(uint unique_code_id, void *f, uint len, int n_args);
+void rt_assign_inline_asm_code(uint unique_code_id, void *f, uint len, int n_args);
diff --git a/py/vm.c b/py/vm.c
index cb4c6a8f73bf0013b52efbbbe6cb3adbc4582b41..8f5bb1ee5741fcef573c5ea79d0138b936aa6f63 100644
--- a/py/vm.c
+++ b/py/vm.c
@@ -80,7 +80,6 @@ bool mp_execute_byte_code_2(const byte *code_info, const byte **ip_in_out, mp_ob
     machine_uint_t unum;
     qstr qst;
     mp_obj_t obj1, obj2;
-    mp_obj_t fast0 = fastn[0], fast1 = fastn[-1], fast2 = fastn[-2];
     nlr_buf_t nlr;
 
     volatile machine_uint_t currently_in_except_block = 0; // 0 or 1, to detect nested exceptions
@@ -88,8 +87,6 @@ bool mp_execute_byte_code_2(const byte *code_info, const byte **ip_in_out, mp_ob
     machine_uint_t *volatile exc_sp = &exc_stack[0] - 1; // stack grows up, exc_sp points to top of stack
     const byte *volatile save_ip = ip; // this is so we can access ip in the exception handler without making ip volatile (which means the compiler can't keep it in a register in the main loop)
 
-    // TODO if an exception occurs, do fast[0,1,2] become invalid??
-
     // outer exception handling loop
     for (;;) {
         if (nlr_push(&nlr) == 0) {
@@ -146,15 +143,15 @@ bool mp_execute_byte_code_2(const byte *code_info, const byte **ip_in_out, mp_ob
                         break;
 
                     case MP_BC_LOAD_FAST_0:
-                        PUSH(fast0);
+                        PUSH(fastn[0]);
                         break;
 
                     case MP_BC_LOAD_FAST_1:
-                        PUSH(fast1);
+                        PUSH(fastn[-1]);
                         break;
 
                     case MP_BC_LOAD_FAST_2:
-                        PUSH(fast2);
+                        PUSH(fastn[-2]);
                         break;
 
                     case MP_BC_LOAD_FAST_N:
@@ -164,16 +161,7 @@ bool mp_execute_byte_code_2(const byte *code_info, const byte **ip_in_out, mp_ob
 
                     case MP_BC_LOAD_DEREF:
                         DECODE_UINT;
-                        if (unum == 0) {
-                            obj1 = fast0;
-                        } else if (unum == 1) {
-                            obj1 = fast1;
-                        } else if (unum == 2) {
-                            obj1 = fast2;
-                        } else {
-                            obj1 = fastn[-unum];
-                        }
-                        PUSH(rt_get_cell(obj1));
+                        PUSH(rt_get_cell(fastn[-unum]));
                         break;
 
                     case MP_BC_LOAD_NAME:
@@ -202,15 +190,15 @@ bool mp_execute_byte_code_2(const byte *code_info, const byte **ip_in_out, mp_ob
                         break;
 
                     case MP_BC_STORE_FAST_0:
-                        fast0 = POP();
+                        fastn[0] = POP();
                         break;
 
                     case MP_BC_STORE_FAST_1:
-                        fast1 = POP();
+                        fastn[-1] = POP();
                         break;
 
                     case MP_BC_STORE_FAST_2:
-                        fast2 = POP();
+                        fastn[-2] = POP();
                         break;
 
                     case MP_BC_STORE_FAST_N:
@@ -220,16 +208,7 @@ bool mp_execute_byte_code_2(const byte *code_info, const byte **ip_in_out, mp_ob
 
                     case MP_BC_STORE_DEREF:
                         DECODE_UINT;
-                        if (unum == 0) {
-                            obj1 = fast0;
-                        } else if (unum == 1) {
-                            obj1 = fast1;
-                        } else if (unum == 2) {
-                            obj1 = fast2;
-                        } else {
-                            obj1 = fastn[-unum];
-                        }
-                        rt_set_cell(obj1, POP());
+                        rt_set_cell(fastn[-unum], POP());
                         break;
 
                     case MP_BC_STORE_NAME:
@@ -511,9 +490,6 @@ bool mp_execute_byte_code_2(const byte *code_info, const byte **ip_in_out, mp_ob
                     case MP_BC_YIELD_VALUE:
                         nlr_pop();
                         *ip_in_out = ip;
-                        fastn[0] = fast0;
-                        fastn[-1] = fast1;
-                        fastn[-2] = fast2;
                         *sp_in_out = sp;
                         return true;