diff --git a/py/emitbc.c b/py/emitbc.c
index 7fd0f8abcb3a56af57e377b8d99aeb7113161be6..f242828e621456e56c329ae534fd26253f09834e 100644
--- a/py/emitbc.c
+++ b/py/emitbc.c
@@ -13,7 +13,7 @@
 #include "scope.h"
 #include "runtime.h"
 #include "emit.h"
-#include "vm.h"
+#include "bc.h"
 
 struct _emit_t {
     pass_kind_t pass;
diff --git a/py/emitnative.c b/py/emitnative.c
index 2687e4fba659c368ba3c82abf51d1ed8d86f222c..db375181af89d1758de2a737d7817a77cd4eac37 100644
--- a/py/emitnative.c
+++ b/py/emitnative.c
@@ -59,7 +59,7 @@
 #define REG_TEMP2 (REG_RSI)
 #define ASM_MOV_REG_TO_LOCAL(reg, local_num) asm_x64_mov_r64_to_local(emit->as, (reg), (local_num))
 #define ASM_MOV_IMM_TO_REG(imm, reg) asm_x64_mov_i64_to_r64_optimised(emit->as, (imm), (reg))
-#define ASM_MOV_IMM_TO_LOCAL(imm, local_num) do { asm_x64_mov_i64_to_r64_optimised(emit->as, (imm), REG_RAX); asm_x64_mov_r64_to_local(emit->as, REG_RAX, (local_num)); } while (false)
+#define ASM_MOV_IMM_TO_LOCAL_USING(imm, local_num, reg_temp) do { asm_x64_mov_i64_to_r64_optimised(emit->as, (imm), (reg_temp)); asm_x64_mov_r64_to_local(emit->as, (reg_temp), (local_num)); } while (false)
 #define ASM_MOV_LOCAL_TO_REG(local_num, reg) asm_x64_mov_local_to_r64(emit->as, (local_num), (reg))
 #define ASM_MOV_REG_TO_REG(reg_src, reg_dest) asm_x64_mov_r64_to_r64(emit->as, (reg_src), (reg_dest))
 #define ASM_MOV_LOCAL_ADDR_TO_REG(local_num, reg) asm_x64_mov_local_addr_to_r64(emit->as, (local_num), (reg))
@@ -82,7 +82,7 @@
 #define REG_TEMP2 (REG_R2)
 #define ASM_MOV_REG_TO_LOCAL(reg, local_num) asm_thumb_mov_local_reg(emit->as, (local_num), (reg))
 #define ASM_MOV_IMM_TO_REG(imm, reg) asm_thumb_mov_reg_i32_optimised(emit->as, (reg), (imm))
-#define ASM_MOV_IMM_TO_LOCAL(imm, local_num) do { asm_thumb_mov_reg_i32_optimised(emit->as, REG_R0, (imm)); asm_thumb_mov_local_reg(emit->as, (local_num), REG_R0); } while (false)
+#define ASM_MOV_IMM_TO_LOCAL_USING(imm, local_num, reg_temp) do { asm_thumb_mov_reg_i32_optimised(emit->as, (reg_temp), (imm)); asm_thumb_mov_local_reg(emit->as, (local_num), (reg_temp)); } while (false)
 #define ASM_MOV_LOCAL_TO_REG(local_num, reg) asm_thumb_mov_reg_local(emit->as, (reg), (local_num))
 #define ASM_MOV_REG_TO_REG(reg_src, reg_dest) asm_thumb_mov_reg_reg(emit->as, (reg_dest), (reg_src))
 #define ASM_MOV_LOCAL_ADDR_TO_REG(local_num, reg) asm_thumb_mov_reg_local_addr(emit->as, (reg), (local_num))
@@ -344,7 +344,7 @@ static void need_reg_single(emit_t *emit, int reg_needed) {
     }
 }
 
-static void need_reg_all(emit_t *emit, int num_stack_top_that_must_be_value) {
+static void need_reg_all(emit_t *emit) {
     for (int i = 0; i < emit->stack_size; i++) {
         stack_info_t *si = &emit->stack_info[i];
         if (si->kind == STACK_REG) {
@@ -352,14 +352,6 @@ static void need_reg_all(emit_t *emit, int num_stack_top_that_must_be_value) {
             ASM_MOV_REG_TO_LOCAL(si->u_reg, emit->stack_start + i);
         }
     }
-    // must do this after making all registers available because ASM_MOV_IMM_TO_LOCAL uses a temporary register
-    for (int i = 0; i < num_stack_top_that_must_be_value; i++) {
-        stack_info_t *si = &emit->stack_info[emit->stack_size - 1 - i];
-        if (si->kind == STACK_IMM) {
-            si->kind = STACK_VALUE;
-            ASM_MOV_IMM_TO_LOCAL(si->u_imm, emit->stack_start + emit->stack_size - 1 - i);
-        }
-    }
 }
 
 static void emit_pre_pop_reg(emit_t *emit, vtype_kind_t *vtype, int reg_dest) {
@@ -434,11 +426,18 @@ static void emit_post_push_reg_reg_reg_reg(emit_t *emit, vtype_kind_t vtypea, in
 }
 
 // vtype of all n_pop objects is VTYPE_PYOBJ
+// does not use any temporary registers (but may use reg_dest before loading it with stack pointer)
 static void emit_get_stack_pointer_to_reg_for_pop(emit_t *emit, int reg_dest, int n_pop) {
-    need_reg_all(emit, n_pop);
+    need_reg_all(emit);
     for (int i = 0; i < n_pop; i++) {
-        assert(emit->stack_info[emit->stack_size - 1 - i].kind == STACK_VALUE);
-        assert(emit->stack_info[emit->stack_size - 1 - i].vtype == VTYPE_PYOBJ);
+        stack_info_t *si = &emit->stack_info[emit->stack_size - 1 - i];
+        // must push any imm's to stack
+        if (si->kind == STACK_IMM) {
+            si->kind = STACK_VALUE;
+            ASM_MOV_IMM_TO_LOCAL_USING(si->u_imm, emit->stack_start + emit->stack_size - 1 - i, reg_dest);
+        }
+        assert(si->kind == STACK_VALUE);
+        assert(si->vtype == VTYPE_PYOBJ);
     }
     ASM_MOV_LOCAL_ADDR_TO_REG(emit->stack_start + emit->stack_size - 1, reg_dest);
     adjust_stack(emit, -n_pop);
@@ -446,7 +445,7 @@ static void emit_get_stack_pointer_to_reg_for_pop(emit_t *emit, int reg_dest, in
 
 // vtype of all n_push objects is VTYPE_PYOBJ
 static void emit_get_stack_pointer_to_reg_for_push(emit_t *emit, int reg_dest, int n_push) {
-    need_reg_all(emit, 0);
+    need_reg_all(emit);
     for (int i = 0; i < n_push; i++) {
         emit->stack_info[emit->stack_size + i].kind = STACK_VALUE;
         emit->stack_info[emit->stack_size + i].vtype = VTYPE_PYOBJ;
@@ -464,7 +463,7 @@ static void emit_call(emit_t *emit, rt_fun_kind_t fun_kind, void *fun) {
 }
 
 static void emit_call_with_imm_arg(emit_t *emit, rt_fun_kind_t fun_kind, void *fun, machine_int_t arg_val, int arg_reg) {
-    need_reg_all(emit, 0);
+    need_reg_all(emit);
     ASM_MOV_IMM_TO_REG(arg_val, arg_reg);
     emit_call(emit, fun_kind, fun);
 }
@@ -1049,6 +1048,7 @@ static void emit_native_make_closure(emit_t *emit, scope_t *scope, int n_dict_pa
 static void emit_native_call_function(emit_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
     // call special viper runtime routine with type info for args, and wanted type info for return
     assert(n_keyword == 0 && !have_star_arg && !have_dbl_star_arg);
+    /*
     if (n_positional == 0) {
         vtype_kind_t vtype_fun;
         emit_pre_pop_reg(emit, &vtype_fun, REG_ARG_1); // the function
@@ -1068,13 +1068,22 @@ static void emit_native_call_function(emit_t *emit, int n_positional, int n_keyw
         assert(vtype_arg2 == VTYPE_PYOBJ);
         emit_call(emit, RT_F_CALL_FUNCTION_2, rt_call_function_2);
     } else {
-        assert(0);
-    }
+    */
+        emit_pre(emit);
+        if (n_positional != 0) {
+            emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_3, n_positional); // pointer to args in reverse order
+        }
+        vtype_kind_t vtype_fun;
+        emit_pre_pop_reg(emit, &vtype_fun, REG_ARG_1); // the function
+        assert(vtype_fun == VTYPE_PYOBJ);
+        emit_call_with_imm_arg(emit, RT_F_CALL_FUNCTION_N, rt_call_function_n, n_positional, REG_ARG_2);
+    //}
     emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
 }
 
 static void emit_native_call_method(emit_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) {
     assert(n_keyword == 0 && !have_star_arg && !have_dbl_star_arg);
+    /*
     if (n_positional == 0) {
         vtype_kind_t vtype_meth, vtype_self;
         emit_pre_pop_reg_reg(emit, &vtype_self, REG_ARG_2, &vtype_meth, REG_ARG_1); // the self object (or NULL), the method
@@ -1089,10 +1098,11 @@ static void emit_native_call_method(emit_t *emit, int n_positional, int n_keywor
         assert(vtype_arg1 == VTYPE_PYOBJ);
         emit_call(emit, RT_F_CALL_METHOD_2, rt_call_method_2);
     } else {
+    */
         emit_pre(emit);
         emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_2, n_positional + 2); // pointer to items in reverse order, including meth and self
         emit_call_with_imm_arg(emit, RT_F_CALL_METHOD_N, rt_call_method_n, n_positional, REG_ARG_1);
-    }
+    //}
     emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
 }
 
diff --git a/py/runtime.c b/py/runtime.c
index e9a861438919cc4a5a1493a69c329c73ae2673df..81487c2b687f3e3f738bed217b86bafc101fcc18 100644
--- a/py/runtime.c
+++ b/py/runtime.c
@@ -7,7 +7,7 @@
 #include "misc.h"
 #include "machine.h"
 #include "runtime.h"
-#include "vm.h"
+#include "bc.h"
 
 #if 0 // print debugging info
 #define DEBUG_printf(args...) printf(args)
@@ -120,8 +120,9 @@ py_obj_t py_const_none;
 py_obj_t py_const_false;
 py_obj_t py_const_true;
 
-py_map_t map_locals;
-py_map_t map_globals;
+// locals and globals need to be pointers because they can be the same in outer module scope
+py_map_t *map_locals;
+py_map_t *map_globals;
 py_map_t map_builtins;
 
 // approximatelly doubling primes; made with Mathematica command: Table[Prime[Floor[(1.7)^n]], {n, 3, 24}]
@@ -339,9 +340,9 @@ py_obj_t py_builtin_len(py_obj_t o_in) {
 
 py_obj_t py_builtin___build_class__(py_obj_t o_class_fun, py_obj_t o_class_name) {
     // we differ from CPython: we set the new __locals__ object here
-    py_map_t old_locals = map_locals;
+    py_map_t *old_locals = map_locals;
     py_map_t *class_locals = py_map_new(MAP_QSTR, 0);
-    map_locals = *class_locals;
+    map_locals = class_locals;
 
     // call the class code
     rt_call_function_1(o_class_fun, (py_obj_t)0xdeadbeef);
@@ -368,10 +369,9 @@ void rt_init() {
     py_const_false = py_obj_new_const("False");
     py_const_true = py_obj_new_const("True");
 
-    py_map_init(&map_locals, MAP_QSTR, 0);
-
-    py_map_init(&map_globals, MAP_QSTR, 1);
-    py_qstr_map_lookup(&map_globals, qstr_from_str_static("__name__"), true)->value = py_obj_new_str(qstr_from_str_static("__main__"));
+    // locals = globals for outer module (see Objects/frameobject.c/PyFrame_New())
+    map_locals = map_globals = py_map_new(MAP_QSTR, 1);
+    py_qstr_map_lookup(map_globals, qstr_from_str_static("__name__"), true)->value = py_obj_new_str(qstr_from_str_static("__main__"));
 
     py_map_init(&map_builtins, MAP_QSTR, 3);
     py_qstr_map_lookup(&map_builtins, q_print, true)->value = rt_make_function_1(py_builtin_print);
@@ -476,6 +476,7 @@ bool py_obj_is_callable(py_obj_t o_in) {
             case O_FUN_N:
             case O_FUN_BC:
             case O_FUN_ASM:
+            // what about O_CLASS, and an O_OBJ that has a __call__ method?
                 return true;
             default:
                 return false;
@@ -633,9 +634,9 @@ py_obj_t rt_load_const_str(qstr qstr) {
 py_obj_t rt_load_name(qstr qstr) {
     // logic: search locals, globals, builtins
     DEBUG_OP_printf("load name %s\n", qstr_str(qstr));
-    py_map_elem_t *elem = py_qstr_map_lookup(&map_locals, qstr, false);
+    py_map_elem_t *elem = py_qstr_map_lookup(map_locals, qstr, false);
     if (elem == NULL) {
-        elem = py_qstr_map_lookup(&map_globals, qstr, false);
+        elem = py_qstr_map_lookup(map_globals, qstr, false);
         if (elem == NULL) {
             elem = py_qstr_map_lookup(&map_builtins, qstr, false);
             if (elem == NULL) {
@@ -650,7 +651,7 @@ py_obj_t rt_load_name(qstr qstr) {
 py_obj_t rt_load_global(qstr qstr) {
     // logic: search globals, builtins
     DEBUG_OP_printf("load global %s\n", qstr_str(qstr));
-    py_map_elem_t *elem = py_qstr_map_lookup(&map_globals, qstr, false);
+    py_map_elem_t *elem = py_qstr_map_lookup(map_globals, qstr, false);
     if (elem == NULL) {
         elem = py_qstr_map_lookup(&map_builtins, qstr, false);
         if (elem == NULL) {
@@ -673,12 +674,12 @@ py_obj_t rt_load_build_class() {
 
 void rt_store_name(qstr qstr, py_obj_t obj) {
     DEBUG_OP_printf("store name %s <- %p\n", qstr_str(qstr), obj);
-    py_qstr_map_lookup(&map_locals, qstr, true)->value = obj;
+    py_qstr_map_lookup(map_locals, qstr, true)->value = obj;
 }
 
 void rt_store_global(qstr qstr, py_obj_t obj) {
     DEBUG_OP_printf("store global %s <- %p\n", qstr_str(qstr), obj);
-    py_qstr_map_lookup(&map_globals, qstr, true)->value = obj;
+    py_qstr_map_lookup(map_globals, qstr, true)->value = obj;
 }
 
 py_obj_t rt_unary_op(int op, py_obj_t arg) {
@@ -809,6 +810,26 @@ py_obj_t rt_make_function(int n_args, py_fun_t code) {
     return o;
 }
 
+py_obj_t rt_call_function_0(py_obj_t fun) {
+    return rt_call_function_n(fun, 0, NULL);
+}
+
+py_obj_t rt_call_function_1(py_obj_t fun, py_obj_t arg) {
+    return rt_call_function_n(fun, 1, &arg);
+}
+
+py_obj_t rt_call_function_2(py_obj_t fun, py_obj_t arg1, py_obj_t arg2) {
+    py_obj_t args[2];
+    args[1] = arg1;
+    args[0] = arg2;
+    return rt_call_function_n(fun, 2, args);
+}
+
+typedef machine_uint_t (*inline_asm_fun_0_t)();
+typedef machine_uint_t (*inline_asm_fun_1_t)(machine_uint_t);
+typedef machine_uint_t (*inline_asm_fun_2_t)(machine_uint_t, machine_uint_t);
+typedef machine_uint_t (*inline_asm_fun_3_t)(machine_uint_t, machine_uint_t, machine_uint_t);
+
 // convert a Python object to a sensible value for inline asm
 machine_uint_t rt_convert_obj_for_inline_asm(py_obj_t obj) {
     // TODO for byte_array, pass pointer to the array
@@ -847,130 +868,109 @@ py_obj_t rt_convert_val_from_inline_asm(machine_uint_t val) {
     return TO_SMALL_INT(val);
 }
 
-typedef machine_uint_t (*inline_asm_fun_0_t)();
-typedef machine_uint_t (*inline_asm_fun_1_t)(machine_uint_t);
-typedef machine_uint_t (*inline_asm_fun_2_t)(machine_uint_t, machine_uint_t);
-
-py_obj_t rt_call_function_0(py_obj_t fun) {
+// args are in reverse order in the array
+py_obj_t rt_call_function_n(py_obj_t fun, int n_args, const py_obj_t *args) {
+    int n_args_fun = 0;
     if (IS_O(fun, O_FUN_0)) {
         py_obj_base_t *o = fun;
-        DEBUG_OP_printf("calling native %p with no args\n", o->u_fun.fun);
+        if (n_args != 0) {
+            n_args_fun = 0;
+            goto bad_n_args;
+        }
+        DEBUG_OP_printf("calling native %p()\n", o->u_fun.fun);
         return ((py_fun_0_t)o->u_fun.fun)();
+
+    } else if (IS_O(fun, O_FUN_1)) {
+        py_obj_base_t *o = fun;
+        if (n_args != 1) {
+            n_args_fun = 1;
+            goto bad_n_args;
+        }
+        DEBUG_OP_printf("calling native %p(%p)\n", o->u_fun.fun, args[0]);
+        return ((py_fun_1_t)o->u_fun.fun)(args[0]);
+
+    } else if (IS_O(fun, O_FUN_2)) {
+        py_obj_base_t *o = fun;
+        if (n_args != 2) {
+            n_args_fun = 2;
+            goto bad_n_args;
+        }
+        DEBUG_OP_printf("calling native %p(%p, %p)\n", o->u_fun.fun, args[1], args[0]);
+        return ((py_fun_2_t)o->u_fun.fun)(args[1], args[0]);
+
+    // TODO O_FUN_N
+
     } else if (IS_O(fun, O_FUN_BC)) {
         py_obj_base_t *o = fun;
-        assert(o->u_fun_bc.n_args == 0);
-        DEBUG_OP_printf("calling byte code %p with no args\n", o->u_fun_bc.code);
-        return py_execute_byte_code(o->u_fun_bc.code, o->u_fun_bc.len, NULL, 0);
+        if (n_args != o->u_fun_bc.n_args) {
+            n_args_fun = o->u_fun_bc.n_args;
+            goto bad_n_args;
+        }
+        DEBUG_OP_printf("calling byte code %p(n_args=%d)\n", o->u_fun_bc.code, n_args);
+        return py_execute_byte_code(o->u_fun_bc.code, o->u_fun_bc.len, args, n_args);
+
     } else if (IS_O(fun, O_FUN_ASM)) {
         py_obj_base_t *o = fun;
-        assert(o->u_fun_asm.n_args == 0);
-        DEBUG_OP_printf("calling inline asm %p with no args\n", o->u_fun_asm.fun);
-        return rt_convert_val_from_inline_asm(((inline_asm_fun_0_t)o->u_fun_asm.fun)());
+        if (n_args != o->u_fun_asm.n_args) {
+            n_args_fun = o->u_fun_asm.n_args;
+            goto bad_n_args;
+        }
+        DEBUG_OP_printf("calling inline asm %p(n_args=%d)\n", o->u_fun_asm.fun, n_args);
+        machine_uint_t ret;
+        if (n_args == 0) {
+            ret = ((inline_asm_fun_0_t)o->u_fun_asm.fun)();
+        } else if (n_args == 1) {
+            ret = ((inline_asm_fun_1_t)o->u_fun_asm.fun)(rt_convert_obj_for_inline_asm(args[0]));
+        } else if (n_args == 2) {
+            ret = ((inline_asm_fun_2_t)o->u_fun_asm.fun)(rt_convert_obj_for_inline_asm(args[1]), rt_convert_obj_for_inline_asm(args[0]));
+        } else if (n_args == 3) {
+            ret = ((inline_asm_fun_3_t)o->u_fun_asm.fun)(rt_convert_obj_for_inline_asm(args[2]), rt_convert_obj_for_inline_asm(args[1]), rt_convert_obj_for_inline_asm(args[0]));
+        } else {
+            assert(0);
+            ret = 0;
+        }
+        return rt_convert_val_from_inline_asm(ret);
+
     } else if (IS_O(fun, O_BOUND_METH)) {
         py_obj_base_t *o = fun;
-        DEBUG_OP_printf("calling bound method %p with self and no args\n", o->u_bound_meth.meth);
-        return rt_call_function_1(o->u_bound_meth.meth, o->u_bound_meth.self);
+        DEBUG_OP_printf("calling bound method %p(self=%p, n_args=%d)\n", o->u_bound_meth.meth, o->u_bound_meth.self, n_args);
+        if (n_args == 0) {
+            return rt_call_function_n(o->u_bound_meth.meth, 1, &o->u_bound_meth.self);
+        } else if (n_args == 1) {
+            py_obj_t args2[2];
+            args2[1] = o->u_bound_meth.self;
+            args2[0] = args[0];
+            return rt_call_function_n(o->u_bound_meth.meth, 2, args2);
+        } else {
+            // TODO not implemented
+            assert(0);
+            return py_const_none;
+            //return rt_call_function_2(o->u_bound_meth.meth, n_args + 1, o->u_bound_meth.self + args);
+        }
+
     } else if (IS_O(fun, O_CLASS)) {
         // instantiate an instance of a class
+        if (n_args != 0) {
+            n_args_fun = 0;
+            goto bad_n_args;
+        }
         DEBUG_OP_printf("instantiate object of class %p with no args\n", fun);
         py_obj_base_t *o = m_new(py_obj_base_t, 1);
         o->kind = O_OBJ;
         o->u_obj.class = fun;
         o->u_obj.members = py_map_new(MAP_QSTR, 0);
         return o;
-    } else {
-        printf("fun0:%p\n", fun);
-        assert(0);
-        return py_const_none;
-    }
-}
 
-py_obj_t rt_call_function_1(py_obj_t fun, py_obj_t arg) {
-    if (IS_O(fun, O_FUN_1)) {
-        py_obj_base_t *o = fun;
-        DEBUG_OP_printf("calling native %p with 1 arg\n", o->u_fun.fun);
-        return ((py_fun_1_t)o->u_fun.fun)(arg);
-    } else if (IS_O(fun, O_FUN_BC)) {
-        py_obj_base_t *o = fun;
-        if (o->u_fun_bc.n_args != 1) {
-            printf("rt_call_function_1: trying to pass 1 argument to a function that takes %d arguments\n", o->u_fun_bc.n_args);
-            assert(0);
-        }
-        DEBUG_OP_printf("calling byte code %p with 1 arg\n", o->u_fun_bc.code);
-        return py_execute_byte_code(o->u_fun_bc.code, o->u_fun_bc.len, &arg, 1);
-    } else if (IS_O(fun, O_FUN_ASM)) {
-        py_obj_base_t *o = fun;
-        assert(o->u_fun_asm.n_args == 1);
-        DEBUG_OP_printf("calling inline asm %p with 1 arg\n", o->u_fun_asm.fun);
-        return rt_convert_val_from_inline_asm(((inline_asm_fun_1_t)o->u_fun_asm.fun)(rt_convert_obj_for_inline_asm(arg)));
-    } else if (IS_O(fun, O_BOUND_METH)) {
-        py_obj_base_t *o = fun;
-        return rt_call_function_2(o->u_bound_meth.meth, o->u_bound_meth.self, arg);
     } else {
-        printf("fun1:%p\n", fun);
+        printf("fun %p %d\n", fun, ((py_obj_base_t*)fun)->kind);
         assert(0);
         return py_const_none;
     }
-}
 
-py_obj_t rt_call_function_2(py_obj_t fun, py_obj_t arg1, py_obj_t arg2) {
-    if (IS_O(fun, O_FUN_2)) {
-        py_obj_base_t *o = fun;
-        DEBUG_OP_printf("calling native %p(%p, %p)\n", o->u_fun.fun, arg1, arg2);
-        return ((py_fun_2_t)o->u_fun.fun)(arg1, arg2);
-    } else if (IS_O(fun, O_FUN_BC)) {
-        py_obj_base_t *o = fun;
-        assert(o->u_fun_bc.n_args == 2);
-        DEBUG_OP_printf("calling byte code %p with 2 args\n", o->u_fun_bc.code);
-        py_obj_t args[2];
-        args[1] = arg1;
-        args[0] = arg2;
-        return py_execute_byte_code(o->u_fun_bc.code, o->u_fun_bc.len, &args[0], 2);
-    } else if (IS_O(fun, O_FUN_ASM)) {
-        py_obj_base_t *o = fun;
-        assert(o->u_fun_asm.n_args == 2);
-        DEBUG_OP_printf("calling inline asm %p with 2 args\n", o->u_fun_asm.fun);
-        return rt_convert_val_from_inline_asm(((inline_asm_fun_2_t)o->u_fun_asm.fun)(rt_convert_obj_for_inline_asm(arg1), rt_convert_obj_for_inline_asm(arg2)));
-    } else {
-        assert(0);
-        return py_const_none;
-    }
-}
-
-// args are in reverse order in the array
-py_obj_t rt_call_function_n(py_obj_t fun, int n_args, const py_obj_t *args) {
-    if (IS_O(fun, O_FUN_2)) {
-        assert(n_args == 2);
-        py_obj_base_t *o = fun;
-        DEBUG_OP_printf("calling native %p(%p, %p)\n", o->u_fun.fun, args[1], args[0]);
-        return ((py_fun_2_t)o->u_fun.fun)(args[1], args[0]);
-    } else if (IS_O(fun, O_FUN_BC)) {
-        py_obj_base_t *o = fun;
-        assert(o->u_fun_bc.n_args == n_args);
-        DEBUG_OP_printf("calling byte code %p with %d args\n", o->u_fun_bc.code, n_args);
-        return py_execute_byte_code(o->u_fun_bc.code, o->u_fun_bc.len, args, n_args);
-    } else {
-        assert(0);
-        return py_const_none;
-    }
-}
-
-py_obj_t rt_call_method_1(py_obj_t fun, py_obj_t self) {
-    DEBUG_OP_printf("call method %p(self=%p)\n", fun, self);
-    if (self == NULL) {
-        return rt_call_function_0(fun);
-    } else {
-        return rt_call_function_1(fun, self);
-    }
-}
-
-py_obj_t rt_call_method_2(py_obj_t fun, py_obj_t self, py_obj_t arg) {
-    DEBUG_OP_printf("call method %p(self=%p, %p)\n", fun, self, arg);
-    if (self == NULL) {
-        return rt_call_function_1(fun, arg);
-    } else {
-        return rt_call_function_2(fun, self, arg);
-    }
+bad_n_args:
+    printf("TypeError: function takes %d positional arguments but %d were given\n", n_args_fun, n_args);
+    assert(0);
+    return py_const_none;
 }
 
 // args contains: arg(n_args-1)  arg(n_args-2)  ...  arg(0)  self/NULL  fun
@@ -1206,11 +1206,7 @@ void *rt_fun_table[RT_F_NUMBER_OF] = {
     rt_store_map,
     rt_build_set,
     rt_make_function_from_id,
-    rt_call_function_0,
-    rt_call_function_1,
-    rt_call_function_2,
-    rt_call_method_1,
-    rt_call_method_2,
+    rt_call_function_n,
     rt_call_method_n,
     rt_binary_op,
     rt_compare_op,
diff --git a/py/runtime.h b/py/runtime.h
index f1832be247adfc277054ce9482c63283a3775b0a..2823ba187d54f31a59f27647c7f369098b786362 100644
--- a/py/runtime.h
+++ b/py/runtime.h
@@ -64,11 +64,7 @@ typedef enum {
     RT_F_STORE_MAP,
     RT_F_BUILD_SET,
     RT_F_MAKE_FUNCTION_FROM_ID,
-    RT_F_CALL_FUNCTION_0,
-    RT_F_CALL_FUNCTION_1,
-    RT_F_CALL_FUNCTION_2,
-    RT_F_CALL_METHOD_1,
-    RT_F_CALL_METHOD_2,
+    RT_F_CALL_FUNCTION_N,
     RT_F_CALL_METHOD_N,
     RT_F_BINARY_OP,
     RT_F_COMPARE_OP,
@@ -115,6 +111,7 @@ py_obj_t rt_make_function(int n_args, py_fun_t code);
 py_obj_t rt_call_function_0(py_obj_t fun);
 py_obj_t rt_call_function_1(py_obj_t fun, py_obj_t arg);
 py_obj_t rt_call_function_2(py_obj_t fun, py_obj_t arg1, py_obj_t arg2);
+py_obj_t rt_call_function_n(py_obj_t fun, int n_args, const py_obj_t *args);
 py_obj_t rt_call_method_1(py_obj_t fun, py_obj_t self);
 py_obj_t rt_call_method_2(py_obj_t fun, py_obj_t self, py_obj_t arg);
 py_obj_t rt_call_method_n(int n_args, const py_obj_t *args);
diff --git a/py/vm.c b/py/vm.c
index ac76089a164d48ea9a562577abf6094848dcf15e..e672ef799ec5a66af7ff42e6c2dfd55a79d82799 100644
--- a/py/vm.c
+++ b/py/vm.c
@@ -7,7 +7,7 @@
 #include "misc.h"
 #include "machine.h"
 #include "runtime.h"
-#include "vm.h"
+#include "bc.h"
 
 #define DECODE_UINT do { unum = *ip++; if (unum > 127) { unum = ((unum & 0x3f) << 8) | (*ip++); } } while (0)
 #define DECODE_QSTR do { qstr = *ip++; if (qstr > 127) { qstr = ((qstr & 0x3f) << 8) | (*ip++); } } while (0)
@@ -236,38 +236,18 @@ py_obj_t py_execute_byte_code(const byte *code, uint len, const py_obj_t *args,
             case PYBC_CALL_FUNCTION:
                 DECODE_UINT;
                 assert((unum & 0xff00) == 0); // n_keyword
-                // switch on n_positional
-                if ((unum & 0xff) == 0) {
-                    *sp = rt_call_function_0(*sp);
-                } else if ((unum & 0xff) == 1) {
-                    obj1 = *sp++; // the single argument
-                    *sp = rt_call_function_1(*sp, obj1);
-                } else if ((unum & 0xff) == 2) {
-                    obj2 = *sp++; // the second argument
-                    obj1 = *sp++; // the first argument
-                    *sp = rt_call_function_2(*sp, obj1, obj2);
-                } else {
-                    assert(0);
-                }
+                unum &= 0xff; // n_positional
+                sp += unum;
+                *sp = rt_call_function_n(*sp, unum, sp - unum);
                 break;
 
             case PYBC_CALL_METHOD:
                 DECODE_UINT;
                 assert((unum & 0xff00) == 0); // n_keyword
-                // switch on n_positional
-                if ((unum & 0xff) == 0) {
-                    obj1 = *sp++; // the self object (or NULL)
-                    *sp = rt_call_method_1(*sp, obj1);
-                } else if ((unum & 0xff) == 1) {
-                    obj2 = *sp++; // the first argument
-                    obj1 = *sp++; // the self object (or NULL)
-                    *sp = rt_call_method_2(*sp, obj1, obj2);
-                } else {
-                    unum = unum & 0xff;
-                    obj1 = rt_call_method_n(unum, sp);
-                    sp += unum + 1;
-                    *sp = obj1;
-                }
+                unum &= 0xff;
+                obj1 = rt_call_method_n(unum, sp);
+                sp += unum + 1;
+                *sp = obj1;
                 break;
 
             case PYBC_RETURN_VALUE: