diff --git a/py/compile.c b/py/compile.c
index 30355a11cf71b5860bdcada864f8fb8cb610e3d2..ec6b463c05ea5cc208dc2818603dc8830fa927ca 100644
--- a/py/compile.c
+++ b/py/compile.c
@@ -2938,7 +2938,7 @@ STATIC void compile_scope(compiler_t *comp, scope_t *scope, pass_kind_t pass) {
     comp->scope_cur = scope;
     comp->next_label = 0;
     EMIT_ARG(start_pass, pass, scope);
-    reserve_labels_for_native(comp, 4); // used by native's start_pass
+    reserve_labels_for_native(comp, 6); // used by native's start_pass
 
     if (comp->pass == MP_PASS_SCOPE) {
         // reset maximum stack sizes in scope
diff --git a/py/emitglue.c b/py/emitglue.c
index f75a57437fa3987308bbba03606dcc9722a6d54b..f99631450ba7715dbb68172a2475caf7a0816ca7 100644
--- a/py/emitglue.c
+++ b/py/emitglue.c
@@ -134,10 +134,8 @@ mp_obj_t mp_make_function_from_raw_code(const mp_raw_code_t *rc, mp_obj_t def_ar
     switch (rc->kind) {
         #if MICROPY_EMIT_NATIVE
         case MP_CODE_NATIVE_PY:
-            fun = mp_obj_new_fun_native(def_args, def_kw_args, rc->data.u_native.fun_data, rc->data.u_native.const_table);
-            break;
         case MP_CODE_NATIVE_VIPER:
-            fun = mp_obj_new_fun_viper(rc->n_pos_args, rc->data.u_native.fun_data, rc->data.u_native.type_sig);
+            fun = mp_obj_new_fun_native(def_args, def_kw_args, rc->data.u_native.fun_data, rc->data.u_native.const_table);
             break;
         #endif
         #if MICROPY_EMIT_INLINE_ASM
diff --git a/py/emitnative.c b/py/emitnative.c
index c95ef889b6c30d066d84e0b0a1083c6128175323..b26beb4075d12f00e4f0de28440fdbefa8dfdc45 100644
--- a/py/emitnative.c
+++ b/py/emitnative.c
@@ -207,7 +207,6 @@ struct _emit_t {
     ASM_T *as;
 };
 
-STATIC const uint8_t reg_arg_table[REG_ARG_NUM] = {REG_ARG_1, REG_ARG_2, REG_ARG_3, REG_ARG_4};
 STATIC const uint8_t reg_local_table[REG_LOCAL_NUM] = {REG_LOCAL_1, REG_LOCAL_2, REG_LOCAL_3};
 
 STATIC void emit_native_global_exc_entry(emit_t *emit);
@@ -237,6 +236,7 @@ void EXPORT_FUN(free)(emit_t *emit) {
 
 STATIC void emit_pre_pop_reg(emit_t *emit, vtype_kind_t *vtype, int reg_dest);
 STATIC void emit_post_push_reg(emit_t *emit, vtype_kind_t vtype, int reg);
+STATIC void emit_call_with_imm_arg(emit_t *emit, mp_fun_kind_t fun_kind, mp_int_t arg_val, int arg_reg);
 STATIC void emit_native_load_fast(emit_t *emit, qstr qst, mp_uint_t local_num);
 STATIC void emit_native_store_fast(emit_t *emit, qstr qst, mp_uint_t local_num);
 
@@ -311,6 +311,10 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop
             if (num_locals_in_regs > REG_LOCAL_NUM) {
                 num_locals_in_regs = REG_LOCAL_NUM;
             }
+            // Need a spot for REG_LOCAL_3 if 4 or more args (see below)
+            if (scope->num_pos_args >= 4) {
+                --num_locals_in_regs;
+            }
         }
 
         // The locals and stack start at the beginning of the C stack
@@ -326,26 +330,45 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop
         asm_arm_mov_reg_i32(emit->as, ASM_ARM_REG_R7, (mp_uint_t)mp_fun_table);
         #endif
 
-        // Store arguments into locals
+        // Put n_args in REG_ARG_1, n_kw in REG_ARG_2, args array in REG_LOCAL_3
         #if N_X86
-        for (int i = 0; i < scope->num_pos_args; i++) {
-            if (i < REG_LOCAL_NUM && CAN_USE_REGS_FOR_LOCALS(emit)) {
-                asm_x86_mov_arg_to_r32(emit->as, i, reg_local_table[i]);
-            } else {
-                asm_x86_mov_arg_to_r32(emit->as, i, REG_TEMP0);
-                asm_x86_mov_r32_to_local(emit->as, REG_TEMP0, LOCAL_IDX_LOCAL_VAR(emit, i));
-            }
-        }
+        asm_x86_mov_arg_to_r32(emit->as, 1, REG_ARG_1);
+        asm_x86_mov_arg_to_r32(emit->as, 2, REG_ARG_2);
+        asm_x86_mov_arg_to_r32(emit->as, 3, REG_LOCAL_3);
         #else
-        for (int i = 0; i < scope->num_pos_args; i++) {
-            if (i < REG_LOCAL_NUM && CAN_USE_REGS_FOR_LOCALS(emit)) {
-                ASM_MOV_REG_REG(emit->as, reg_local_table[i], reg_arg_table[i]);
+        ASM_MOV_REG_REG(emit->as, REG_ARG_1, REG_ARG_2);
+        ASM_MOV_REG_REG(emit->as, REG_ARG_2, REG_ARG_3);
+        ASM_MOV_REG_REG(emit->as, REG_LOCAL_3, REG_ARG_4);
+        #endif
+
+        // Check number of args matches this function, and call mp_arg_check_num_sig if not
+        ASM_JUMP_IF_REG_NONZERO(emit->as, REG_ARG_2, *emit->label_slot + 4, true);
+        ASM_MOV_REG_IMM(emit->as, REG_ARG_3, scope->num_pos_args);
+        ASM_JUMP_IF_REG_EQ(emit->as, REG_ARG_1, REG_ARG_3, *emit->label_slot + 5);
+        mp_asm_base_label_assign(&emit->as->base, *emit->label_slot + 4);
+        ASM_MOV_REG_IMM(emit->as, REG_ARG_3, MP_OBJ_FUN_MAKE_SIG(scope->num_pos_args, scope->num_pos_args, false));
+        ASM_CALL_IND(emit->as, mp_fun_table[MP_F_ARG_CHECK_NUM_SIG], MP_F_ARG_CHECK_NUM_SIG);
+        mp_asm_base_label_assign(&emit->as->base, *emit->label_slot + 5);
+
+        // Store arguments into locals (reg or stack), converting to native if needed
+        for (int i = 0; i < emit->scope->num_pos_args; i++) {
+            int r = REG_ARG_1;
+            ASM_LOAD_REG_REG_OFFSET(emit->as, REG_ARG_1, REG_LOCAL_3, i);
+            if (emit->local_vtype[i] != VTYPE_PYOBJ) {
+                emit_call_with_imm_arg(emit, MP_F_CONVERT_OBJ_TO_NATIVE, emit->local_vtype[i], REG_ARG_2);
+                r = REG_RET;
+            }
+            // REG_LOCAL_3 points to the args array so be sure not to overwrite it if it's still needed
+            if (i < REG_LOCAL_NUM && CAN_USE_REGS_FOR_LOCALS(emit) && (i != 2 || emit->scope->num_pos_args == 3)) {
+                ASM_MOV_REG_REG(emit->as, reg_local_table[i], r);
             } else {
-                assert(i < REG_ARG_NUM); // should be true; max args is checked above
-                ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_LOCAL_VAR(emit, i), reg_arg_table[i]);
+                ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_LOCAL_VAR(emit, i), r);
             }
         }
-        #endif
+        // Get 3rd local from the stack back into REG_LOCAL_3 if this reg couldn't be written to above
+        if (emit->scope->num_pos_args >= 4 && CAN_USE_REGS_FOR_LOCALS(emit)) {
+            ASM_MOV_REG_LOCAL(emit->as, REG_LOCAL_3, LOCAL_IDX_LOCAL_VAR(emit, 2));
+        }
 
         emit_native_global_exc_entry(emit);
 
@@ -477,17 +500,10 @@ STATIC void emit_native_end_pass(emit_t *emit) {
         void *f = mp_asm_base_get_code(&emit->as->base);
         mp_uint_t f_len = mp_asm_base_get_code_size(&emit->as->base);
 
-        // compute type signature
-        // note that the lower 4 bits of a vtype are tho correct MP_NATIVE_TYPE_xxx
-        mp_uint_t type_sig = emit->scope->scope_flags >> MP_SCOPE_FLAG_VIPERRET_POS;
-        for (mp_uint_t i = 0; i < emit->scope->num_pos_args; i++) {
-            type_sig |= (emit->local_vtype[i] & 0xf) << (i * 4 + 4);
-        }
-
         mp_emit_glue_assign_native(emit->scope->raw_code,
             emit->do_viper_types ? MP_CODE_NATIVE_VIPER : MP_CODE_NATIVE_PY,
             f, f_len, (mp_uint_t*)((byte*)f + emit->const_table_offset),
-            emit->scope->num_pos_args, emit->scope->scope_flags, type_sig);
+            emit->scope->num_pos_args, emit->scope->scope_flags, 0);
     }
 }
 
@@ -2409,17 +2425,20 @@ STATIC void emit_native_return_value(emit_t *emit) {
             if (return_vtype == VTYPE_PYOBJ) {
                 ASM_MOV_REG_IMM(emit->as, REG_RET, (mp_uint_t)mp_const_none);
             } else {
-                ASM_MOV_REG_IMM(emit->as, REG_RET, 0);
+                ASM_MOV_REG_IMM(emit->as, REG_ARG_1, 0);
             }
         } else {
             vtype_kind_t vtype;
-            emit_pre_pop_reg(emit, &vtype, REG_RET);
+            emit_pre_pop_reg(emit, &vtype, return_vtype == VTYPE_PYOBJ ? REG_RET : REG_ARG_1);
             if (vtype != return_vtype) {
                 EMIT_NATIVE_VIPER_TYPE_ERROR(emit,
                     "return expected '%q' but got '%q'",
                     vtype_to_qstr(return_vtype), vtype_to_qstr(vtype));
             }
         }
+        if (return_vtype != VTYPE_PYOBJ) {
+            emit_call_with_imm_arg(emit, MP_F_CONVERT_NATIVE_TO_OBJ, return_vtype, REG_ARG_2);
+        }
     } else {
         vtype_kind_t vtype;
         emit_pre_pop_reg(emit, &vtype, REG_RET);
diff --git a/py/emitnx86.c b/py/emitnx86.c
index a536b9851eda0019ee6fdc7a5d087b417fc34fff..597a0fd4a84064dc383498540649b5b67d5b8e33 100644
--- a/py/emitnx86.c
+++ b/py/emitnx86.c
@@ -62,6 +62,7 @@ STATIC byte mp_f_n_args[MP_F_NUMBER_OF] = {
     [MP_F_DELETE_GLOBAL] = 1,
     [MP_F_NEW_CELL] = 1,
     [MP_F_MAKE_CLOSURE_FROM_RAW_CODE] = 3,
+    [MP_F_ARG_CHECK_NUM_SIG] = 3,
     [MP_F_SETUP_CODE_STATE] = 4,
     [MP_F_SMALL_INT_FLOOR_DIVIDE] = 2,
     [MP_F_SMALL_INT_MODULO] = 2,
diff --git a/py/nativeglue.c b/py/nativeglue.c
index 7ff8273f9c70f35085e24db976a28c547b7075f6..a15a2eae31d9d081f6c5633460a0411f43fd2522 100644
--- a/py/nativeglue.c
+++ b/py/nativeglue.c
@@ -185,6 +185,7 @@ void *const mp_fun_table[MP_F_NUMBER_OF] = {
     mp_delete_global,
     mp_obj_new_cell,
     mp_make_closure_from_raw_code,
+    mp_arg_check_num_sig,
     mp_setup_code_state,
     mp_small_int_floor_divide,
     mp_small_int_modulo,
diff --git a/py/objfun.c b/py/objfun.c
index e7f2b79ada7805cb3ee159898f78bdb2c11b2864..b03d4194fc32239c481521b8d3bbfa7b6213f487 100644
--- a/py/objfun.c
+++ b/py/objfun.c
@@ -415,72 +415,6 @@ mp_obj_t mp_obj_new_fun_native(mp_obj_t def_args_in, mp_obj_t def_kw_args, const
 
 #endif // MICROPY_EMIT_NATIVE
 
-/******************************************************************************/
-/* viper functions                                                            */
-
-#if MICROPY_EMIT_NATIVE
-
-typedef struct _mp_obj_fun_viper_t {
-    mp_obj_base_t base;
-    size_t n_args;
-    void *fun_data; // GC must be able to trace this pointer
-    mp_uint_t type_sig;
-} mp_obj_fun_viper_t;
-
-typedef mp_uint_t (*viper_fun_0_t)(void);
-typedef mp_uint_t (*viper_fun_1_t)(mp_uint_t);
-typedef mp_uint_t (*viper_fun_2_t)(mp_uint_t, mp_uint_t);
-typedef mp_uint_t (*viper_fun_3_t)(mp_uint_t, mp_uint_t, mp_uint_t);
-typedef mp_uint_t (*viper_fun_4_t)(mp_uint_t, mp_uint_t, mp_uint_t, mp_uint_t);
-
-STATIC mp_obj_t fun_viper_call(mp_obj_t self_in, size_t n_args, size_t n_kw, const mp_obj_t *args) {
-    mp_obj_fun_viper_t *self = self_in;
-
-    mp_arg_check_num(n_args, n_kw, self->n_args, self->n_args, false);
-
-    void *fun = MICROPY_MAKE_POINTER_CALLABLE(self->fun_data);
-
-    mp_uint_t ret;
-    if (n_args == 0) {
-        ret = ((viper_fun_0_t)fun)();
-    } else if (n_args == 1) {
-        ret = ((viper_fun_1_t)fun)(mp_convert_obj_to_native(args[0], self->type_sig >> 4));
-    } else if (n_args == 2) {
-        ret = ((viper_fun_2_t)fun)(mp_convert_obj_to_native(args[0], self->type_sig >> 4), mp_convert_obj_to_native(args[1], self->type_sig >> 8));
-    } else if (n_args == 3) {
-        ret = ((viper_fun_3_t)fun)(mp_convert_obj_to_native(args[0], self->type_sig >> 4), mp_convert_obj_to_native(args[1], self->type_sig >> 8), mp_convert_obj_to_native(args[2], self->type_sig >> 12));
-    } else {
-        // compiler allows at most 4 arguments
-        assert(n_args == 4);
-        ret = ((viper_fun_4_t)fun)(
-            mp_convert_obj_to_native(args[0], self->type_sig >> 4),
-            mp_convert_obj_to_native(args[1], self->type_sig >> 8),
-            mp_convert_obj_to_native(args[2], self->type_sig >> 12),
-            mp_convert_obj_to_native(args[3], self->type_sig >> 16)
-        );
-    }
-
-    return mp_convert_native_to_obj(ret, self->type_sig);
-}
-
-STATIC const mp_obj_type_t mp_type_fun_viper = {
-    { &mp_type_type },
-    .name = MP_QSTR_function,
-    .call = fun_viper_call,
-    .unary_op = mp_generic_unary_op,
-};
-
-mp_obj_t mp_obj_new_fun_viper(size_t n_args, void *fun_data, mp_uint_t type_sig) {
-    mp_obj_fun_viper_t *o = m_new_obj(mp_obj_fun_viper_t);
-    o->base.type = &mp_type_fun_viper;
-    o->n_args = n_args;
-    o->fun_data = fun_data;
-    o->type_sig = type_sig;
-    return o;
-}
-
-#endif // MICROPY_EMIT_NATIVE
-
 /******************************************************************************/
 /* inline assembler functions                                                 */
 
diff --git a/py/runtime0.h b/py/runtime0.h
index f26b701bf12219edb04cb7ef7a3354a34ae88efd..652204b67c8203a85297305f7c2c16f8b3cbb66e 100644
--- a/py/runtime0.h
+++ b/py/runtime0.h
@@ -191,6 +191,7 @@ typedef enum {
     MP_F_DELETE_GLOBAL,
     MP_F_NEW_CELL,
     MP_F_MAKE_CLOSURE_FROM_RAW_CODE,
+    MP_F_ARG_CHECK_NUM_SIG,
     MP_F_SETUP_CODE_STATE,
     MP_F_SMALL_INT_FLOOR_DIVIDE,
     MP_F_SMALL_INT_MODULO,