diff --git a/py/asmarm.h b/py/asmarm.h
index f72a7f732f55389938e17e4a338b1e32beba7616..3ee633c221ea99b0466a0ce3a1b3c95593f34788 100644
--- a/py/asmarm.h
+++ b/py/asmarm.h
@@ -124,6 +124,9 @@ void asm_arm_b_label(asm_arm_t *as, uint label);
 void asm_arm_bl_ind(asm_arm_t *as, void *fun_ptr, uint fun_id, uint reg_temp);
 void asm_arm_bx_reg(asm_arm_t *as, uint reg_src);
 
+// Holds a pointer to mp_fun_table
+#define ASM_ARM_REG_FUN_TABLE ASM_ARM_REG_R7
+
 #if GENERIC_ASM_API
 
 // The following macros provide a (mostly) arch-independent API to
@@ -146,6 +149,9 @@ void asm_arm_bx_reg(asm_arm_t *as, uint reg_src);
 #define REG_LOCAL_3 ASM_ARM_REG_R6
 #define REG_LOCAL_NUM (3)
 
+// Holds a pointer to mp_fun_table
+#define REG_FUN_TABLE ASM_ARM_REG_FUN_TABLE
+
 #define ASM_T               asm_arm_t
 #define ASM_END_PASS        asm_arm_end_pass
 #define ASM_ENTRY           asm_arm_entry
diff --git a/py/asmthumb.c b/py/asmthumb.c
index 54b539a8d9e1d7ee2713d8ee88530992af2d8d7e..1ef09c78e2c4b32006aec48600c0b17936a21145 100644
--- a/py/asmthumb.c
+++ b/py/asmthumb.c
@@ -383,7 +383,7 @@ void asm_thumb_bcc_label(asm_thumb_t *as, int cond, uint label) {
 
 void asm_thumb_bl_ind(asm_thumb_t *as, void *fun_ptr, uint fun_id, uint reg_temp) {
     // Load ptr to function from table, indexed by fun_id, then call it
-    asm_thumb_ldr_reg_reg_i12_optimised(as, reg_temp, ASM_THUMB_REG_R7, fun_id);
+    asm_thumb_ldr_reg_reg_i12_optimised(as, reg_temp, ASM_THUMB_REG_FUN_TABLE, fun_id);
     asm_thumb_op16(as, OP_BLX(reg_temp));
 }
 
diff --git a/py/asmthumb.h b/py/asmthumb.h
index 83aec0287bc939137ae1ed4f81767e9fbf20fabe..0fd39120e5465a310f5569bcd4e20aaaa03a3217 100644
--- a/py/asmthumb.h
+++ b/py/asmthumb.h
@@ -261,6 +261,9 @@ void asm_thumb_b_label(asm_thumb_t *as, uint label); // convenience: picks narro
 void asm_thumb_bcc_label(asm_thumb_t *as, int cc, uint label); // convenience: picks narrow or wide branch
 void asm_thumb_bl_ind(asm_thumb_t *as, void *fun_ptr, uint fun_id, uint reg_temp); // convenience
 
+// Holds a pointer to mp_fun_table
+#define ASM_THUMB_REG_FUN_TABLE ASM_THUMB_REG_R7
+
 #if GENERIC_ASM_API
 
 // The following macros provide a (mostly) arch-independent API to
@@ -284,6 +287,8 @@ void asm_thumb_bl_ind(asm_thumb_t *as, void *fun_ptr, uint fun_id, uint reg_temp
 #define REG_LOCAL_3 ASM_THUMB_REG_R6
 #define REG_LOCAL_NUM (3)
 
+#define REG_FUN_TABLE ASM_THUMB_REG_FUN_TABLE
+
 #define ASM_T               asm_thumb_t
 #define ASM_END_PASS        asm_thumb_end_pass
 #define ASM_ENTRY           asm_thumb_entry
diff --git a/py/asmx64.c b/py/asmx64.c
index 9cd2fc64cd0e1d3102e271d88ea36b8d1737a00b..3609f49d30706e065b47978d2fa5e69bfe73efa3 100644
--- a/py/asmx64.c
+++ b/py/asmx64.c
@@ -623,7 +623,7 @@ void asm_x64_call_i1(asm_x64_t *as, void* func, int i1) {
 
 void asm_x64_call_ind(asm_x64_t *as, size_t fun_id, int temp_r64) {
     assert(temp_r64 < 8);
-    asm_x64_mov_mem64_to_r64(as, ASM_X64_REG_RBP, fun_id * WORD_SIZE, temp_r64);
+    asm_x64_mov_mem64_to_r64(as, ASM_X64_REG_FUN_TABLE, fun_id * WORD_SIZE, temp_r64);
     asm_x64_write_byte_2(as, OPCODE_CALL_RM32, MODRM_R64(2) | MODRM_RM_REG | MODRM_RM_R64(temp_r64));
 }
 
diff --git a/py/asmx64.h b/py/asmx64.h
index f40b127e52a2a4f776d9f209b479f900e03cec53..76e3ad55662b4a0a74b579dd77319faf0dbbf7c9 100644
--- a/py/asmx64.h
+++ b/py/asmx64.h
@@ -116,6 +116,9 @@ void asm_x64_mov_local_addr_to_r64(asm_x64_t* as, int local_num, int dest_r64);
 void asm_x64_mov_reg_pcrel(asm_x64_t *as, int dest_r64, mp_uint_t label);
 void asm_x64_call_ind(asm_x64_t* as, size_t fun_id, int temp_r32);
 
+// Holds a pointer to mp_fun_table
+#define ASM_X64_REG_FUN_TABLE ASM_X64_REG_RBP
+
 #if GENERIC_ASM_API
 
 // The following macros provide a (mostly) arch-independent API to
@@ -141,6 +144,9 @@ void asm_x64_call_ind(asm_x64_t* as, size_t fun_id, int temp_r32);
 #define REG_LOCAL_3 ASM_X64_REG_R13
 #define REG_LOCAL_NUM (3)
 
+// Holds a pointer to mp_fun_table
+#define REG_FUN_TABLE ASM_X64_REG_FUN_TABLE
+
 #define ASM_T               asm_x64_t
 #define ASM_END_PASS        asm_x64_end_pass
 #define ASM_ENTRY           asm_x64_entry
diff --git a/py/asmx86.c b/py/asmx86.c
index 60917fdeb76a3bc64e560a3478335deb20b17ea5..8ce576ac89dfcfb4f7be04e3a5a0baf44537c615 100644
--- a/py/asmx86.c
+++ b/py/asmx86.c
@@ -514,7 +514,7 @@ void asm_x86_call_ind(asm_x86_t *as, size_t fun_id, mp_uint_t n_args, int temp_r
     }
 
     // Load the pointer to the function and make the call
-    asm_x86_mov_mem32_to_r32(as, ASM_X86_REG_EBP, fun_id * WORD_SIZE, temp_r32);
+    asm_x86_mov_mem32_to_r32(as, ASM_X86_REG_FUN_TABLE, fun_id * WORD_SIZE, temp_r32);
     asm_x86_write_byte_2(as, OPCODE_CALL_RM32, MODRM_R32(2) | MODRM_RM_REG | MODRM_RM_R32(temp_r32));
 
     // the caller must clean up the stack
diff --git a/py/asmx86.h b/py/asmx86.h
index a5535b5488fb6beecfe4af07613bdac0de3629da..1e3d3170a843a9d7ab6f6317276822ea9832cceb 100644
--- a/py/asmx86.h
+++ b/py/asmx86.h
@@ -114,6 +114,9 @@ void asm_x86_mov_local_addr_to_r32(asm_x86_t* as, int local_num, int dest_r32);
 void asm_x86_mov_reg_pcrel(asm_x86_t *as, int dest_r64, mp_uint_t label);
 void asm_x86_call_ind(asm_x86_t* as, size_t fun_id, mp_uint_t n_args, int temp_r32);
 
+// Holds a pointer to mp_fun_table
+#define ASM_X86_REG_FUN_TABLE ASM_X86_REG_EBP
+
 #if GENERIC_ASM_API
 
 // The following macros provide a (mostly) arch-independent API to
@@ -139,6 +142,9 @@ void asm_x86_call_ind(asm_x86_t* as, size_t fun_id, mp_uint_t n_args, int temp_r
 #define REG_LOCAL_3 ASM_X86_REG_EDI
 #define REG_LOCAL_NUM (3)
 
+// Holds a pointer to mp_fun_table
+#define REG_FUN_TABLE ASM_X86_REG_FUN_TABLE
+
 #define ASM_T               asm_x86_t
 #define ASM_END_PASS        asm_x86_end_pass
 #define ASM_ENTRY           asm_x86_entry
diff --git a/py/asmxtensa.c b/py/asmxtensa.c
index 6a3a874f1668e3ed3a6b9387437c60dc60ddeaeb..8da56ffe30ef377c358f40cda25a8d2ea03e0459 100644
--- a/py/asmxtensa.c
+++ b/py/asmxtensa.c
@@ -213,9 +213,9 @@ void asm_xtensa_mov_reg_pcrel(asm_xtensa_t *as, uint reg_dest, uint label) {
 
 void asm_xtensa_call_ind(asm_xtensa_t *as, uint idx) {
     if (idx < 16) {
-        asm_xtensa_op_l32i_n(as, ASM_XTENSA_REG_A0, ASM_XTENSA_REG_A15, idx);
+        asm_xtensa_op_l32i_n(as, ASM_XTENSA_REG_A0, ASM_XTENSA_REG_FUN_TABLE, idx);
     } else {
-        asm_xtensa_op_l32i(as, ASM_XTENSA_REG_A0, ASM_XTENSA_REG_A15, idx);
+        asm_xtensa_op_l32i(as, ASM_XTENSA_REG_A0, ASM_XTENSA_REG_FUN_TABLE, idx);
     }
     asm_xtensa_op_callx0(as, ASM_XTENSA_REG_A0);
 }
diff --git a/py/asmxtensa.h b/py/asmxtensa.h
index 07c3aa8192e4c35c7bcd3bde93b00baaf25d650f..c348b854b809700bdb28272759458a0e6d437701 100644
--- a/py/asmxtensa.h
+++ b/py/asmxtensa.h
@@ -245,6 +245,9 @@ void asm_xtensa_mov_reg_local_addr(asm_xtensa_t *as, uint reg_dest, int local_nu
 void asm_xtensa_mov_reg_pcrel(asm_xtensa_t *as, uint reg_dest, uint label);
 void asm_xtensa_call_ind(asm_xtensa_t *as, uint idx);
 
+// Holds a pointer to mp_fun_table
+#define ASM_XTENSA_REG_FUN_TABLE ASM_XTENSA_REG_A15
+
 #if GENERIC_ASM_API
 
 // The following macros provide a (mostly) arch-independent API to
@@ -268,6 +271,8 @@ void asm_xtensa_call_ind(asm_xtensa_t *as, uint idx);
 #define REG_LOCAL_3 ASM_XTENSA_REG_A14
 #define REG_LOCAL_NUM (3)
 
+#define REG_FUN_TABLE ASM_XTENSA_REG_FUN_TABLE
+
 #define ASM_T               asm_xtensa_t
 #define ASM_END_PASS        asm_xtensa_end_pass
 #define ASM_ENTRY           asm_xtensa_entry
diff --git a/py/emitnative.c b/py/emitnative.c
index 4d6c3445f45a036a135a7fcfb7870860a4311dd8..26af7f94786c04d6e9a3b31e38319b16a6722c02 100644
--- a/py/emitnative.c
+++ b/py/emitnative.c
@@ -287,7 +287,7 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop
     emit->pass = pass;
     emit->do_viper_types = scope->emit_options == MP_EMIT_OPT_VIPER;
     emit->stack_size = 0;
-    emit->const_table_cur_obj = 0;
+    emit->const_table_cur_obj = 1; // first entry is for mp_fun_table
     emit->const_table_cur_raw_code = 0;
     emit->last_emit_was_return_value = false;
     emit->scope = scope;
@@ -372,24 +372,16 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop
         // Entry to function
         ASM_ENTRY(emit->as, emit->stack_start + emit->n_state - num_locals_in_regs);
 
-        // TODO don't load r7 if we don't need it
-        #if N_THUMB
-        asm_thumb_mov_reg_i32(emit->as, ASM_THUMB_REG_R7, (mp_uint_t)mp_fun_table);
-        #elif N_ARM
-        asm_arm_mov_reg_i32(emit->as, ASM_ARM_REG_R7, (mp_uint_t)mp_fun_table);
-        #elif N_XTENSA
-        ASM_MOV_REG_IMM(emit->as, ASM_XTENSA_REG_A15, (uint32_t)mp_fun_table);
-        #elif N_X86
-        asm_x86_mov_i32_to_r32(emit->as, (intptr_t)mp_fun_table, ASM_X86_REG_EBP);
-        #elif N_X64
-        asm_x64_mov_i64_to_r64_optimised(emit->as, (intptr_t)mp_fun_table, ASM_X64_REG_RBP);
+        #if N_X86
+        asm_x86_mov_arg_to_r32(emit->as, 0, REG_ARG_1);
         #endif
 
+        // Load REG_FUN_TABLE with a pointer to mp_fun_table, found in the const_table
+        ASM_LOAD_REG_REG_OFFSET(emit->as, REG_LOCAL_3, REG_ARG_1, offsetof(mp_obj_fun_bc_t, const_table) / sizeof(uintptr_t));
+        ASM_LOAD_REG_REG_OFFSET(emit->as, REG_FUN_TABLE, REG_LOCAL_3, 0);
+
         // Store function object (passed as first arg) to stack if needed
         if (NEED_FUN_OBJ(emit)) {
-            #if N_X86
-            asm_x86_mov_arg_to_r32(emit->as, 0, REG_ARG_1);
-            #endif
             ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_FUN_OBJ(emit), REG_ARG_1);
         }
 
@@ -458,28 +450,18 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop
             asm_x86_mov_arg_to_r32(emit->as, 1, REG_ARG_2);
             #endif
             ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_VAL(emit), REG_ARG_2);
+
+            // Load REG_FUN_TABLE with a pointer to mp_fun_table, found in the const_table
+            ASM_LOAD_REG_REG_OFFSET(emit->as, REG_TEMP0, REG_GENERATOR_STATE, LOCAL_IDX_FUN_OBJ(emit));
+            ASM_LOAD_REG_REG_OFFSET(emit->as, REG_TEMP0, REG_TEMP0, offsetof(mp_obj_fun_bc_t, const_table) / sizeof(uintptr_t));
+            ASM_LOAD_REG_REG_OFFSET(emit->as, REG_FUN_TABLE, REG_TEMP0, emit->scope->num_pos_args + emit->scope->num_kwonly_args);
         } else {
             // The locals and stack start after the code_state structure
             emit->stack_start = emit->code_state_start + sizeof(mp_code_state_t) / sizeof(mp_uint_t);
 
             // Allocate space on C-stack for code_state structure, which includes state
             ASM_ENTRY(emit->as, emit->stack_start + emit->n_state);
-        }
-
-        // TODO don't load r7 if we don't need it
-        #if N_THUMB
-        asm_thumb_mov_reg_i32(emit->as, ASM_THUMB_REG_R7, (mp_uint_t)mp_fun_table);
-        #elif N_ARM
-        asm_arm_mov_reg_i32(emit->as, ASM_ARM_REG_R7, (mp_uint_t)mp_fun_table);
-        #elif N_XTENSA
-        ASM_MOV_REG_IMM(emit->as, ASM_XTENSA_REG_A15, (uint32_t)mp_fun_table);
-        #elif N_X86
-        asm_x86_mov_i32_to_r32(emit->as, (intptr_t)mp_fun_table, ASM_X86_REG_EBP);
-        #elif N_X64
-        asm_x64_mov_i64_to_r64_optimised(emit->as, (intptr_t)mp_fun_table, ASM_X64_REG_RBP);
-        #endif
 
-        if (!(emit->scope->scope_flags & MP_SCOPE_FLAG_GENERATOR)) {
             // Prepare incoming arguments for call to mp_setup_code_state
 
             #if N_X86
@@ -489,6 +471,10 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop
             asm_x86_mov_arg_to_r32(emit->as, 3, REG_ARG_4);
             #endif
 
+            // Load REG_FUN_TABLE with a pointer to mp_fun_table, found in the const_table
+            ASM_LOAD_REG_REG_OFFSET(emit->as, REG_LOCAL_3, REG_ARG_1, offsetof(mp_obj_fun_bc_t, const_table) / sizeof(uintptr_t));
+            ASM_LOAD_REG_REG_OFFSET(emit->as, REG_FUN_TABLE, REG_LOCAL_3, emit->scope->num_pos_args + emit->scope->num_kwonly_args);
+
             // Set code_state.fun_bc
             ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_FUN_OBJ(emit), REG_ARG_1);
 
@@ -591,11 +577,15 @@ STATIC void emit_native_end_pass(emit_t *emit) {
     emit->const_table_num_obj = emit->const_table_cur_obj;
     if (emit->pass == MP_PASS_CODE_SIZE) {
         size_t const_table_alloc = emit->const_table_num_obj + emit->const_table_cur_raw_code;
+        size_t nqstr = 0;
         if (!emit->do_viper_types) {
             // Add room for qstr names of arguments
-            const_table_alloc += emit->scope->num_pos_args + emit->scope->num_kwonly_args;
+            nqstr = emit->scope->num_pos_args + emit->scope->num_kwonly_args;
+            const_table_alloc += nqstr;
         }
         emit->const_table = m_new(mp_uint_t, const_table_alloc);
+        // Store mp_fun_table pointer just after qstrs
+        emit->const_table[nqstr] = (mp_uint_t)(uintptr_t)mp_fun_table;
     }
 
     if (emit->pass == MP_PASS_EMIT) {