diff --git a/py/compile.c b/py/compile.c
index 58cf7de1c2b334e95f5c53204374f08ee53fffca..d8e175bb6e779639a2eaa7ff3732ec005455cc57 100644
--- a/py/compile.c
+++ b/py/compile.c
@@ -567,6 +567,11 @@ STATIC void close_over_variables_etc(compiler_t *comp, scope_t *this_scope, int
     }
     this_scope->num_def_pos_args = n_pos_defaults;
 
+    #if MICROPY_EMIT_NATIVE
+    // When creating a function/closure it will take a reference to the current globals
+    comp->scope_cur->scope_flags |= MP_SCOPE_FLAG_REFGLOBALS;
+    #endif
+
     // make closed over variables, if any
     // ensure they are closed over in the order defined in the outer scope (mainly to agree with CPython)
     int nfree = 0;
@@ -3304,6 +3309,12 @@ STATIC void scope_compute_things(scope_t *scope) {
         if (SCOPE_IS_FUNC_LIKE(scope->kind) && id->kind == ID_INFO_KIND_GLOBAL_IMPLICIT) {
             id->kind = ID_INFO_KIND_GLOBAL_EXPLICIT;
         }
+        #if MICROPY_EMIT_NATIVE
+        if (id->kind == ID_INFO_KIND_GLOBAL_EXPLICIT) {
+            // This function makes a reference to a global variable
+            scope->scope_flags |= MP_SCOPE_FLAG_REFGLOBALS;
+        }
+        #endif
         // params always count for 1 local, even if they are a cell
         if (id->kind == ID_INFO_KIND_LOCAL || (id->flags & ID_FLAG_IS_PARAM)) {
             id->local_num = scope->num_locals++;
diff --git a/py/emitnative.c b/py/emitnative.c
index 73899b9e90bfabbb870d3f002751550ad4f50e1e..eb402c06b08cd734b178577bd5798fe8429b3020 100644
--- a/py/emitnative.c
+++ b/py/emitnative.c
@@ -75,7 +75,8 @@
 #define NLR_BUF_IDX_RET_VAL (1)
 
 // Whether the native/viper function needs to be wrapped in an exception handler
-#define NEED_GLOBAL_EXC_HANDLER(emit) ((emit)->scope->exc_stack_size > 0)
+#define NEED_GLOBAL_EXC_HANDLER(emit) ((emit)->scope->exc_stack_size > 0 \
+    || (!(emit)->do_viper_types && ((emit)->scope->scope_flags & MP_SCOPE_FLAG_REFGLOBALS)))
 
 // Whether registers can be used to store locals (only true if there are no
 // exception handlers, because otherwise an nlr_jump will restore registers to
@@ -928,30 +929,56 @@ STATIC void emit_native_global_exc_entry(emit_t *emit) {
         mp_uint_t start_label = *emit->label_slot + 2;
         mp_uint_t global_except_label = *emit->label_slot + 3;
 
-        // Clear the unwind state
-        ASM_XOR_REG_REG(emit->as, REG_TEMP0, REG_TEMP0);
-        ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_UNWIND(emit), REG_TEMP0);
+        if (!emit->do_viper_types) {
+            // Set new globals
+            ASM_MOV_REG_LOCAL(emit->as, REG_ARG_1, offsetof(mp_code_state_t, fun_bc) / sizeof(uintptr_t));
+            ASM_LOAD_REG_REG_OFFSET(emit->as, REG_ARG_1, REG_ARG_1, offsetof(mp_obj_fun_bc_t, globals) / sizeof(uintptr_t));
+            emit_call(emit, MP_F_NATIVE_SWAP_GLOBALS);
 
-        // Put PC of start code block into REG_LOCAL_1
-        ASM_MOV_REG_PCREL(emit->as, REG_LOCAL_1, start_label);
+            // Save old globals (or NULL if globals didn't change)
+            ASM_MOV_LOCAL_REG(emit->as, offsetof(mp_code_state_t, old_globals) / sizeof(uintptr_t), REG_RET);
+        }
 
-        // Wrap everything in an nlr context
-        emit_native_label_assign(emit, nlr_label);
-        ASM_MOV_REG_LOCAL(emit->as, REG_LOCAL_2, LOCAL_IDX_EXC_HANDLER_UNWIND(emit));
-        emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_1, sizeof(nlr_buf_t) / sizeof(uintptr_t));
-        emit_call(emit, MP_F_NLR_PUSH);
-        ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_UNWIND(emit), REG_LOCAL_2);
-        ASM_JUMP_IF_REG_NONZERO(emit->as, REG_RET, global_except_label, true);
+        if (emit->scope->exc_stack_size == 0) {
+            // Optimisation: if globals didn't change don't push the nlr context
+            ASM_JUMP_IF_REG_ZERO(emit->as, REG_RET, start_label, false);
 
-        // Clear PC of current code block, and jump there to resume execution
-        ASM_XOR_REG_REG(emit->as, REG_TEMP0, REG_TEMP0);
-        ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_PC(emit), REG_TEMP0);
-        ASM_JUMP_REG(emit->as, REG_LOCAL_1);
+            // Wrap everything in an nlr context
+            emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_1, sizeof(nlr_buf_t) / sizeof(uintptr_t));
+            emit_call(emit, MP_F_NLR_PUSH);
+            ASM_JUMP_IF_REG_ZERO(emit->as, REG_RET, start_label, true);
+        } else {
+            // Clear the unwind state
+            ASM_XOR_REG_REG(emit->as, REG_TEMP0, REG_TEMP0);
+            ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_UNWIND(emit), REG_TEMP0);
+
+            // Put PC of start code block into REG_LOCAL_1
+            ASM_MOV_REG_PCREL(emit->as, REG_LOCAL_1, start_label);
+
+            // Wrap everything in an nlr context
+            emit_native_label_assign(emit, nlr_label);
+            ASM_MOV_REG_LOCAL(emit->as, REG_LOCAL_2, LOCAL_IDX_EXC_HANDLER_UNWIND(emit));
+            emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_1, sizeof(nlr_buf_t) / sizeof(uintptr_t));
+            emit_call(emit, MP_F_NLR_PUSH);
+            ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_UNWIND(emit), REG_LOCAL_2);
+            ASM_JUMP_IF_REG_NONZERO(emit->as, REG_RET, global_except_label, true);
+
+            // Clear PC of current code block, and jump there to resume execution
+            ASM_XOR_REG_REG(emit->as, REG_TEMP0, REG_TEMP0);
+            ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_PC(emit), REG_TEMP0);
+            ASM_JUMP_REG(emit->as, REG_LOCAL_1);
+
+            // Global exception handler: check for valid exception handler
+            emit_native_label_assign(emit, global_except_label);
+            ASM_MOV_REG_LOCAL(emit->as, REG_LOCAL_1, LOCAL_IDX_EXC_HANDLER_PC(emit));
+            ASM_JUMP_IF_REG_NONZERO(emit->as, REG_LOCAL_1, nlr_label, false);
+        }
 
-        // Global exception handler: check for valid exception handler
-        emit_native_label_assign(emit, global_except_label);
-        ASM_MOV_REG_LOCAL(emit->as, REG_LOCAL_1, LOCAL_IDX_EXC_HANDLER_PC(emit));
-        ASM_JUMP_IF_REG_NONZERO(emit->as, REG_LOCAL_1, nlr_label, false);
+        if (!emit->do_viper_types) {
+            // Restore old globals
+            ASM_MOV_REG_LOCAL(emit->as, REG_ARG_1, offsetof(mp_code_state_t, old_globals) / sizeof(uintptr_t));
+            emit_call(emit, MP_F_NATIVE_SWAP_GLOBALS);
+        }
 
         // Re-raise exception out to caller
         ASM_MOV_REG_LOCAL(emit->as, REG_ARG_1, LOCAL_IDX_EXC_VAL(emit));
@@ -967,10 +994,28 @@ STATIC void emit_native_global_exc_exit(emit_t *emit) {
     emit_native_label_assign(emit, emit->exit_label);
 
     if (NEED_GLOBAL_EXC_HANDLER(emit)) {
+        if (!emit->do_viper_types) {
+            // Get old globals
+            ASM_MOV_REG_LOCAL(emit->as, REG_ARG_1, offsetof(mp_code_state_t, old_globals) / sizeof(uintptr_t));
+
+            if (emit->scope->exc_stack_size == 0) {
+                // Optimisation: if globals didn't change then don't restore them and don't do nlr_pop
+                ASM_JUMP_IF_REG_ZERO(emit->as, REG_ARG_1, emit->exit_label + 1, false);
+            }
+
+            // Restore old globals
+            emit_call(emit, MP_F_NATIVE_SWAP_GLOBALS);
+        }
+
         // Pop the nlr context
         emit_call(emit, MP_F_NLR_POP);
         adjust_stack(emit, -(mp_int_t)(sizeof(nlr_buf_t) / sizeof(uintptr_t)));
 
+        if (emit->scope->exc_stack_size == 0) {
+            // Destination label for above optimisation
+            emit_native_label_assign(emit, emit->exit_label + 1);
+        }
+
         // Load return value
         ASM_MOV_REG_LOCAL(emit->as, REG_RET, LOCAL_IDX_RET_VAL(emit));
     }
diff --git a/py/emitnx86.c b/py/emitnx86.c
index 056c3f052df0a7a28f320ced77ed0ec2e057071c..a536b9851eda0019ee6fdc7a5d087b417fc34fff 100644
--- a/py/emitnx86.c
+++ b/py/emitnx86.c
@@ -18,6 +18,7 @@
 STATIC byte mp_f_n_args[MP_F_NUMBER_OF] = {
     [MP_F_CONVERT_OBJ_TO_NATIVE] = 2,
     [MP_F_CONVERT_NATIVE_TO_OBJ] = 2,
+    [MP_F_NATIVE_SWAP_GLOBALS] = 1,
     [MP_F_LOAD_NAME] = 1,
     [MP_F_LOAD_GLOBAL] = 1,
     [MP_F_LOAD_BUILD_CLASS] = 0,
diff --git a/py/nativeglue.c b/py/nativeglue.c
index b87da6931e3915b2cf8723946251324855a44c7b..7ff8273f9c70f35085e24db976a28c547b7075f6 100644
--- a/py/nativeglue.c
+++ b/py/nativeglue.c
@@ -83,6 +83,20 @@ mp_obj_t mp_convert_native_to_obj(mp_uint_t val, mp_uint_t type) {
 
 #if MICROPY_EMIT_NATIVE
 
+mp_obj_dict_t *mp_native_swap_globals(mp_obj_dict_t *new_globals) {
+    if (new_globals == NULL) {
+        // Globals were the originally the same so don't restore them
+        return NULL;
+    }
+    mp_obj_dict_t *old_globals = mp_globals_get();
+    if (old_globals == new_globals) {
+        // Don't set globals if they are the same, and return NULL to indicate this
+        return NULL;
+    }
+    mp_globals_set(new_globals);
+    return old_globals;
+}
+
 // wrapper that accepts n_args and n_kw in one argument
 // (native emitter can only pass at most 3 arguments to a function)
 mp_obj_t mp_native_call_function_n_kw(mp_obj_t fun_in, size_t n_args_kw, const mp_obj_t *args) {
@@ -127,6 +141,7 @@ STATIC mp_obj_t mp_native_iternext(mp_obj_iter_buf_t *iter) {
 void *const mp_fun_table[MP_F_NUMBER_OF] = {
     mp_convert_obj_to_native,
     mp_convert_native_to_obj,
+    mp_native_swap_globals,
     mp_load_name,
     mp_load_global,
     mp_load_build_class,
diff --git a/py/runtime.h b/py/runtime.h
index ad65f3f46df9a3ff65e63abd3ff5950f5ecd4cf3..99a2204aaf09b3673f9b7017f899bd52cdbb3556 100644
--- a/py/runtime.h
+++ b/py/runtime.h
@@ -168,6 +168,7 @@ NORETURN void mp_raise_recursion_depth(void);
 // helper functions for native/viper code
 mp_uint_t mp_convert_obj_to_native(mp_obj_t obj, mp_uint_t type);
 mp_obj_t mp_convert_native_to_obj(mp_uint_t val, mp_uint_t type);
+mp_obj_dict_t *mp_native_swap_globals(mp_obj_dict_t *new_globals);
 mp_obj_t mp_native_call_function_n_kw(mp_obj_t fun_in, size_t n_args_kw, const mp_obj_t *args);
 void mp_native_raise(mp_obj_t o);
 
diff --git a/py/runtime0.h b/py/runtime0.h
index 2e89de9f4167f03c527149e4406d6fa2f9588cbc..b47a10ea22a0486cf5157bdc870985b5fe230d8b 100644
--- a/py/runtime0.h
+++ b/py/runtime0.h
@@ -31,6 +31,7 @@
 #define MP_SCOPE_FLAG_VARKEYWORDS  (0x02)
 #define MP_SCOPE_FLAG_GENERATOR    (0x04)
 #define MP_SCOPE_FLAG_DEFKWARGS    (0x08)
+#define MP_SCOPE_FLAG_REFGLOBALS   (0x10) // used only if native emitter enabled
 
 // types for native (viper) function signature
 #define MP_NATIVE_TYPE_OBJ  (0x00)
@@ -145,6 +146,7 @@ typedef enum {
 typedef enum {
     MP_F_CONVERT_OBJ_TO_NATIVE = 0,
     MP_F_CONVERT_NATIVE_TO_OBJ,
+    MP_F_NATIVE_SWAP_GLOBALS,
     MP_F_LOAD_NAME,
     MP_F_LOAD_GLOBAL,
     MP_F_LOAD_BUILD_CLASS,
diff --git a/tests/run-tests b/tests/run-tests
index 07d326811909537e5aa28743bc57fddf56bf09b5..8c087f9f58bd80482350c2f31e2c34db440ee88f 100755
--- a/tests/run-tests
+++ b/tests/run-tests
@@ -376,7 +376,6 @@ def run_tests(pyb, tests, args, base_path="."):
         skip_tests.add('micropython/schedule.py') # native code doesn't check pending events
         skip_tests.add('stress/gc_trace.py') # requires yield
         skip_tests.add('stress/recursive_gen.py') # requires yield
-        skip_tests.add('extmod/vfs_userfs.py') # because native doesn't properly handle globals across different modules
 
     for test_file in tests:
         test_file = test_file.replace('\\', '/')