diff --git a/py/bc.h b/py/bc.h
index 153851be8737206e9568f09a721cf55502f17466..065daece470ca6cfe9774faa78184e2f1f614060 100644
--- a/py/bc.h
+++ b/py/bc.h
@@ -14,7 +14,7 @@ typedef struct _mp_exc_stack {
     byte opcode;
 } mp_exc_stack;
 
-mp_vm_return_kind_t mp_execute_byte_code(const byte *code, const mp_obj_t *args, uint n_args, const mp_obj_t *args2, uint n_args2, uint n_state, mp_obj_t *ret);
+mp_vm_return_kind_t mp_execute_byte_code(const byte *code, const mp_obj_t *args, uint n_args, const mp_obj_t *args2, uint n_args2, mp_obj_t *ret);
 mp_vm_return_kind_t mp_execute_byte_code_2(const byte *code_info, const byte **ip_in_out, mp_obj_t *fastn, mp_obj_t **sp_in_out, mp_exc_stack *exc_stack, mp_exc_stack **exc_sp_in_out, volatile mp_obj_t inject_exc);
 void mp_byte_code_print(const byte *code, int len);
 
diff --git a/py/emitbc.c b/py/emitbc.c
index fcaa9b7fae0cd2f03480cea69e2ad0f562532c92..1516b41e5bea7579e304754e04bad223cccd4e28 100644
--- a/py/emitbc.c
+++ b/py/emitbc.c
@@ -224,11 +224,14 @@ STATIC void emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) {
     emit_write_code_info_qstr(emit, scope->source_file);
     emit_write_code_info_qstr(emit, scope->simple_name);
 
-    // bytecode prelude: exception stack size; 16 bit uint for now
+    // bytecode prelude: local state size and exception stack size; 16 bit uints for now
     {
-        byte* c = emit_get_cur_to_write_byte_code(emit, 2);
-        c[0] = scope->exc_stack_size & 0xff;
-        c[1] = (scope->exc_stack_size >> 8) & 0xff;
+        byte* c = emit_get_cur_to_write_byte_code(emit, 4);
+        uint n_state = scope->num_locals + scope->stack_size;
+        c[0] = n_state & 0xff;
+        c[1] = (n_state >> 8) & 0xff;
+        c[2] = scope->exc_stack_size & 0xff;
+        c[3] = (scope->exc_stack_size >> 8) & 0xff;
     }
 
     // bytecode prelude: initialise closed over variables
diff --git a/py/obj.h b/py/obj.h
index 12f8cabf48455565efa27b48098cff8de615265a..f816b8544ded294ab486ae96a055ff2ed4bdfeff 100644
--- a/py/obj.h
+++ b/py/obj.h
@@ -257,10 +257,10 @@ mp_obj_t mp_obj_new_exception_msg(const mp_obj_type_t *exc_type, const char *msg
 mp_obj_t mp_obj_new_exception_msg_varg(const mp_obj_type_t *exc_type, const char *fmt, ...); // counts args by number of % symbols in fmt, excluding %%; can only handle void* sizes (ie no float/double!)
 mp_obj_t mp_obj_new_range(int start, int stop, int step);
 mp_obj_t mp_obj_new_range_iterator(int cur, int stop, int step);
-mp_obj_t mp_obj_new_fun_bc(uint scope_flags, qstr *args, uint n_args, mp_obj_t def_args, uint n_state, const byte *code);
+mp_obj_t mp_obj_new_fun_bc(uint scope_flags, qstr *args, uint n_args, mp_obj_t def_args, const byte *code);
 mp_obj_t mp_obj_new_fun_asm(uint n_args, void *fun);
 mp_obj_t mp_obj_new_gen_wrap(mp_obj_t fun);
-mp_obj_t mp_obj_new_gen_instance(const byte *bytecode, uint n_state, int n_args, const mp_obj_t *args);
+mp_obj_t mp_obj_new_gen_instance(const byte *bytecode, int n_args, const mp_obj_t *args);
 mp_obj_t mp_obj_new_closure(mp_obj_t fun, mp_obj_t closure_tuple);
 mp_obj_t mp_obj_new_tuple(uint n, const mp_obj_t *items);
 mp_obj_t mp_obj_new_list(uint n, mp_obj_t *items);
@@ -419,7 +419,7 @@ typedef struct _mp_obj_fun_native_t { // need this so we can define const object
 
 extern const mp_obj_type_t fun_native_type;
 extern const mp_obj_type_t fun_bc_type;
-void mp_obj_fun_bc_get(mp_obj_t self_in, int *n_args, uint *n_state, const byte **code);
+void mp_obj_fun_bc_get(mp_obj_t self_in, int *n_args, const byte **code);
 
 mp_obj_t mp_identity(mp_obj_t self);
 MP_DECLARE_CONST_FUN_OBJ(mp_identity_obj);
diff --git a/py/objfun.c b/py/objfun.c
index 7d49f18da4faea49afed78ab27e57148ce26eabe..e626c152a241ab5c545734af5ea47b3890b6f820 100644
--- a/py/objfun.c
+++ b/py/objfun.c
@@ -147,7 +147,6 @@ typedef struct _mp_obj_fun_bc_t {
     machine_uint_t n_def_args : 15;     // number of default arguments
     machine_uint_t takes_var_args : 1;  // set if this function takes variable args
     machine_uint_t takes_kw_args : 1;   // set if this function takes keyword args
-    uint n_state;           // total state size for the executing function (incl args, locals, stack)
     const byte *bytecode;   // bytecode for the function
     qstr *args;             // argument names (needed to resolve positional args passed as keywords)
     mp_obj_t extra_args[];  // values of default args (if any), plus a slot at the end for var args and/or kw args (if it takes them)
@@ -285,7 +284,7 @@ continue2:;
     DEBUG_printf("Calling: args=%p, n_args=%d, extra_args=%p, n_extra_args=%d\n", args, n_args, extra_args, n_extra_args);
     dump_args(args, n_args);
     dump_args(extra_args, n_extra_args);
-    mp_vm_return_kind_t vm_return_kind = mp_execute_byte_code(self->bytecode, args, n_args, extra_args, n_extra_args, self->n_state, &result);
+    mp_vm_return_kind_t vm_return_kind = mp_execute_byte_code(self->bytecode, args, n_args, extra_args, n_extra_args, &result);
     rt_globals_set(old_globals);
 
     if (vm_return_kind == MP_VM_RETURN_NORMAL) {
@@ -304,7 +303,7 @@ const mp_obj_type_t fun_bc_type = {
     .call = fun_bc_call,
 };
 
-mp_obj_t mp_obj_new_fun_bc(uint scope_flags, qstr *args, uint n_args, mp_obj_t def_args_in, uint n_state, const byte *code) {
+mp_obj_t mp_obj_new_fun_bc(uint scope_flags, qstr *args, uint n_args, mp_obj_t def_args_in, const byte *code) {
     uint n_def_args = 0;
     uint n_extra_args = 0;
     mp_obj_tuple_t *def_args = def_args_in;
@@ -326,7 +325,6 @@ mp_obj_t mp_obj_new_fun_bc(uint scope_flags, qstr *args, uint n_args, mp_obj_t d
     o->n_def_args = n_def_args;
     o->takes_var_args = (scope_flags & MP_SCOPE_FLAG_VARARGS) != 0;
     o->takes_kw_args = (scope_flags & MP_SCOPE_FLAG_VARKEYWORDS) != 0;
-    o->n_state = n_state;
     o->bytecode = code;
     if (def_args != MP_OBJ_NULL) {
         memcpy(o->extra_args, def_args->items, n_def_args * sizeof(mp_obj_t));
@@ -334,11 +332,10 @@ mp_obj_t mp_obj_new_fun_bc(uint scope_flags, qstr *args, uint n_args, mp_obj_t d
     return o;
 }
 
-void mp_obj_fun_bc_get(mp_obj_t self_in, int *n_args, uint *n_state, const byte **code) {
+void mp_obj_fun_bc_get(mp_obj_t self_in, int *n_args, const byte **code) {
     assert(MP_OBJ_IS_TYPE(self_in, &fun_bc_type));
     mp_obj_fun_bc_t *self = self_in;
     *n_args = self->n_args;
-    *n_state = self->n_state;
     *code = self->bytecode;
 }
 
diff --git a/py/objgenerator.c b/py/objgenerator.c
index 29ef4e235f95c454b69c46a15cd693fd2b938965..aeb5f6219a03b8b4d6d0f814cb864015512813db 100644
--- a/py/objgenerator.c
+++ b/py/objgenerator.c
@@ -24,9 +24,8 @@ STATIC mp_obj_t gen_wrap_call(mp_obj_t self_in, uint n_args, uint n_kw, const mp
     mp_obj_t self_fun = self->fun;
     assert(MP_OBJ_IS_TYPE(self_fun, &fun_bc_type));
     int bc_n_args;
-    uint bc_n_state;
     const byte *bc_code;
-    mp_obj_fun_bc_get(self_fun, &bc_n_args, &bc_n_state, &bc_code);
+    mp_obj_fun_bc_get(self_fun, &bc_n_args, &bc_code);
     if (n_args != bc_n_args) {
         nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_TypeError, "function takes %d positional arguments but %d were given", bc_n_args, n_args));
     }
@@ -34,7 +33,7 @@ STATIC mp_obj_t gen_wrap_call(mp_obj_t self_in, uint n_args, uint n_kw, const mp
         nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "function does not take keyword arguments"));
     }
 
-    return mp_obj_new_gen_instance(bc_code, bc_n_state, n_args, args);
+    return mp_obj_new_gen_instance(bc_code, n_args, args);
 }
 
 const mp_obj_type_t gen_wrap_type = {
@@ -210,14 +209,15 @@ const mp_obj_type_t gen_instance_type = {
     .locals_dict = (mp_obj_t)&gen_instance_locals_dict,
 };
 
-mp_obj_t mp_obj_new_gen_instance(const byte *bytecode, uint n_state, int n_args, const mp_obj_t *args) {
+mp_obj_t mp_obj_new_gen_instance(const byte *bytecode, int n_args, const mp_obj_t *args) {
     // get code info size, and skip the line number table
     machine_uint_t code_info_size = bytecode[0] | (bytecode[1] << 8) | (bytecode[2] << 16) | (bytecode[3] << 24);
     bytecode += code_info_size;
 
-    // bytecode prelude: get exception stack size
-    machine_uint_t n_exc_stack = bytecode[0] | (bytecode[1] << 8);
-    bytecode += 2;
+    // bytecode prelude: get state size and exception stack size
+    machine_uint_t n_state = bytecode[0] | (bytecode[1] << 8);
+    machine_uint_t n_exc_stack = bytecode[2] | (bytecode[3] << 8);
+    bytecode += 4;
 
     // bytecode prelude: initialise closed over variables
     // TODO
diff --git a/py/runtime.c b/py/runtime.c
index 3fc7d6ac083e017a478905e8a6b713ab12300099..60e975687dc5dcb0ebb8902b6e605e6a170beb78 100644
--- a/py/runtime.c
+++ b/py/runtime.c
@@ -48,7 +48,6 @@ typedef struct _mp_code_t {
     mp_code_kind_t kind : 8;
     uint scope_flags : 8;
     uint n_args : 16;
-    uint n_state : 16;
     union {
         struct {
             byte *code;
@@ -147,7 +146,6 @@ void rt_assign_byte_code(uint unique_code_id, byte *code, uint len, int n_args,
     unique_codes[unique_code_id].kind = MP_CODE_BYTE;
     unique_codes[unique_code_id].scope_flags = scope_flags;
     unique_codes[unique_code_id].n_args = n_args;
-    unique_codes[unique_code_id].n_state = n_locals + n_stack;
     unique_codes[unique_code_id].u_byte.code = code;
     unique_codes[unique_code_id].u_byte.len = len;
     unique_codes[unique_code_id].arg_names = arg_names;
@@ -176,7 +174,6 @@ void rt_assign_native_code(uint unique_code_id, void *fun, uint len, int n_args)
     unique_codes[unique_code_id].kind = MP_CODE_NATIVE;
     unique_codes[unique_code_id].scope_flags = 0;
     unique_codes[unique_code_id].n_args = n_args;
-    unique_codes[unique_code_id].n_state = 0;
     unique_codes[unique_code_id].u_native.fun = fun;
 
     //printf("native code: %d bytes\n", len);
@@ -208,7 +205,6 @@ void rt_assign_inline_asm_code(uint unique_code_id, void *fun, uint len, int n_a
     unique_codes[unique_code_id].kind = MP_CODE_INLINE_ASM;
     unique_codes[unique_code_id].scope_flags = 0;
     unique_codes[unique_code_id].n_args = n_args;
-    unique_codes[unique_code_id].n_state = 0;
     unique_codes[unique_code_id].u_inline_asm.fun = fun;
 
 #ifdef DEBUG_PRINT
@@ -662,7 +658,7 @@ mp_obj_t rt_make_function_from_id(int unique_code_id, mp_obj_t def_args) {
     mp_obj_t fun;
     switch (c->kind) {
         case MP_CODE_BYTE:
-            fun = mp_obj_new_fun_bc(c->scope_flags, c->arg_names, c->n_args, def_args, c->n_state, c->u_byte.code);
+            fun = mp_obj_new_fun_bc(c->scope_flags, c->arg_names, c->n_args, def_args, c->u_byte.code);
             break;
         case MP_CODE_NATIVE:
             fun = rt_make_function_n(c->n_args, c->u_native.fun);
diff --git a/py/vm.c b/py/vm.c
index 7e515a2279ce1bc3b5b4f5bb8df7c4f01ac8159b..f939a7fdfd9205374b3756d38004720d342adaef 100644
--- a/py/vm.c
+++ b/py/vm.c
@@ -44,16 +44,17 @@ typedef enum {
 #define TOP() (*sp)
 #define SET_TOP(val) *sp = (val)
 
-mp_vm_return_kind_t mp_execute_byte_code(const byte *code, const mp_obj_t *args, uint n_args, const mp_obj_t *args2, uint n_args2, uint n_state, mp_obj_t *ret) {
+mp_vm_return_kind_t mp_execute_byte_code(const byte *code, const mp_obj_t *args, uint n_args, const mp_obj_t *args2, uint n_args2, mp_obj_t *ret) {
     const byte *ip = code;
 
     // get code info size, and skip line number table
     machine_uint_t code_info_size = ip[0] | (ip[1] << 8) | (ip[2] << 16) | (ip[3] << 24);
     ip += code_info_size;
 
-    // bytecode prelude: exception stack size; 16 bit uint for now
-    machine_uint_t n_exc_stack = ip[0] | (ip[1] << 8);
-    ip += 2;
+    // bytecode prelude: state size and exception stack size; 16 bit uints
+    machine_uint_t n_state = ip[0] | (ip[1] << 8);
+    machine_uint_t n_exc_stack = ip[2] | (ip[3] << 8);
+    ip += 4;
 
     // allocate state for locals and stack
     mp_obj_t temp_state[10];