diff --git a/py/compile.c b/py/compile.c
index 8ef05d2388705bd89d29fd573572ecbb7bc58f5b..89505c85a9f0af30691fb5f8f04e4e170e84f096 100644
--- a/py/compile.c
+++ b/py/compile.c
@@ -1595,6 +1595,7 @@ STATIC void compile_try_except(compiler_t *comp, mp_parse_node_t pn_body, int n_
 
             compile_decrease_except_level(comp);
             EMIT(end_finally);
+            reserve_labels_for_native(comp, 1);
         }
         EMIT_ARG(jump, l2);
         EMIT_ARG(label_assign, end_finally_label);
@@ -1603,6 +1604,7 @@ STATIC void compile_try_except(compiler_t *comp, mp_parse_node_t pn_body, int n_
 
     compile_decrease_except_level(comp);
     EMIT(end_finally);
+    reserve_labels_for_native(comp, 1);
     EMIT(end_except_handler);
 
     EMIT_ARG(label_assign, success_label);
@@ -1631,6 +1633,7 @@ STATIC void compile_try_finally(compiler_t *comp, mp_parse_node_t pn_body, int n
 
     compile_decrease_except_level(comp);
     EMIT(end_finally);
+    reserve_labels_for_native(comp, 1);
 }
 
 STATIC void compile_try_stmt(compiler_t *comp, mp_parse_node_struct_t *pns) {
@@ -1683,9 +1686,10 @@ STATIC void compile_with_stmt_helper(compiler_t *comp, int n, mp_parse_node_t *n
         compile_with_stmt_helper(comp, n - 1, nodes + 1, body);
         // finish this with block
         EMIT_ARG(with_cleanup, l_end);
-        reserve_labels_for_native(comp, 2); // used by native's with_cleanup
+        reserve_labels_for_native(comp, 3); // used by native's with_cleanup
         compile_decrease_except_level(comp);
         EMIT(end_finally);
+        reserve_labels_for_native(comp, 1);
     }
 }
 
@@ -1752,6 +1756,7 @@ STATIC void compile_async_for_stmt(compiler_t *comp, mp_parse_node_struct_t *pns
     EMIT_ARG(adjust_stack_size, 1); // if we jump here, the exc is on the stack
     compile_decrease_except_level(comp);
     EMIT(end_finally);
+    reserve_labels_for_native(comp, 1);
     EMIT(end_except_handler);
 
     EMIT_ARG(label_assign, try_else_label);
@@ -1879,6 +1884,7 @@ STATIC void compile_async_with_stmt_helper(compiler_t *comp, int n, mp_parse_nod
         EMIT_ARG(label_assign, l_end);
         compile_decrease_except_level(comp);
         EMIT(end_finally);
+        reserve_labels_for_native(comp, 1);
     }
 }
 
diff --git a/py/emitnarm.c b/py/emitnarm.c
index 89467052cbe9cdd8e36cf2b268beb3683d59c055..8297ad61921a816564a2f87a37702b96846f1ac9 100644
--- a/py/emitnarm.c
+++ b/py/emitnarm.c
@@ -8,8 +8,10 @@
 #define GENERIC_ASM_API (1)
 #include "py/asmarm.h"
 
-// Word index of REG_LOCAL_1(=r4) in nlr_buf_t
-#define NLR_BUF_IDX_LOCAL_1 (3)
+// Word indices of REG_LOCAL_x in nlr_buf_t
+#define NLR_BUF_IDX_LOCAL_1 (3) // r4
+#define NLR_BUF_IDX_LOCAL_2 (4) // r5
+#define NLR_BUF_IDX_LOCAL_3 (5) // r6
 
 #define N_ARM (1)
 #define EXPORT_FUN(name) emit_native_arm_##name
diff --git a/py/emitnative.c b/py/emitnative.c
index a5075eead5d24bcd55db17179a7dfe1e234d8d1d..6a5bcd7ee03568837f914bec07f16a62c8588d15 100644
--- a/py/emitnative.c
+++ b/py/emitnative.c
@@ -85,6 +85,8 @@
 // Indices within the local C stack for various variables
 #define LOCAL_IDX_EXC_VAL(emit) ((emit)->stack_start + NLR_BUF_IDX_RET_VAL)
 #define LOCAL_IDX_EXC_HANDLER_PC(emit) ((emit)->stack_start + NLR_BUF_IDX_LOCAL_1)
+#define LOCAL_IDX_EXC_HANDLER_UNWIND(emit) ((emit)->stack_start + NLR_BUF_IDX_LOCAL_2)
+#define LOCAL_IDX_RET_VAL(emit) ((emit)->stack_start + NLR_BUF_IDX_LOCAL_3)
 #define LOCAL_IDX_LOCAL_VAR(emit, local_num) ((emit)->stack_start + (emit)->n_state - 1 - (local_num))
 
 // number of arguments to viper functions are limited to this value
@@ -148,9 +150,14 @@ typedef struct _stack_info_t {
     } data;
 } stack_info_t;
 
+#define UNWIND_LABEL_UNUSED (0x7fff)
+#define UNWIND_LABEL_DO_FINAL_UNWIND (0x7ffe)
+
 typedef struct _exc_stack_entry_t {
     uint16_t label : 15;
     uint16_t is_finally : 1;
+    uint16_t unwind_label : 15;
+    uint16_t is_active : 1;
 } exc_stack_entry_t;
 
 struct _emit_t {
@@ -843,27 +850,44 @@ STATIC void emit_native_push_exc_stack(emit_t *emit, uint label, bool is_finally
     exc_stack_entry_t *e = &emit->exc_stack[emit->exc_stack_size++];
     e->label = label;
     e->is_finally = is_finally;
+    e->unwind_label = UNWIND_LABEL_UNUSED;
+    e->is_active = true;
 
     ASM_MOV_REG_PCREL(emit->as, REG_RET, label);
     ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_PC(emit), REG_RET);
 }
 
-STATIC void emit_native_pop_exc_stack(emit_t *emit, bool do_pop) {
+STATIC void emit_native_leave_exc_stack(emit_t *emit, bool start_of_handler) {
     assert(emit->exc_stack_size > 0);
-    if (emit->exc_stack_size == 1) {
-        if (do_pop) {
-            --emit->exc_stack_size;
+
+    // Get current exception handler and deactivate it
+    exc_stack_entry_t *e = &emit->exc_stack[emit->exc_stack_size - 1];
+    e->is_active = false;
+
+    // Find next innermost active exception handler, to restore as current handler
+    for (--e; e >= emit->exc_stack && !e->is_active; --e) {
+    }
+
+    // Update the PC of the new exception handler
+    if (e < emit->exc_stack) {
+        // No active handler, clear handler PC to zero
+        if (start_of_handler) {
+            // Optimisation: PC is already cleared by global exc handler
             return;
         }
         ASM_XOR_REG_REG(emit->as, REG_RET, REG_RET);
     } else {
-        uint label = emit->exc_stack[emit->exc_stack_size - 2].label;
-        ASM_MOV_REG_PCREL(emit->as, REG_RET, label);
+        // Found new active handler, get its PC
+        ASM_MOV_REG_PCREL(emit->as, REG_RET, e->label);
     }
     ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_PC(emit), REG_RET);
-    if (do_pop) {
-        --emit->exc_stack_size;
-    }
+}
+
+STATIC exc_stack_entry_t *emit_native_pop_exc_stack(emit_t *emit) {
+    assert(emit->exc_stack_size > 0);
+    exc_stack_entry_t *e = &emit->exc_stack[--emit->exc_stack_size];
+    assert(e->is_active == false);
+    return e;
 }
 
 STATIC void emit_native_label_assign(emit_t *emit, mp_uint_t l) {
@@ -890,7 +914,7 @@ STATIC void emit_native_label_assign(emit_t *emit, mp_uint_t l) {
 
     if (is_finally) {
         // Label is at start of finally handler: pop exception stack
-        emit_native_pop_exc_stack(emit, true);
+        emit_native_leave_exc_stack(emit, true);
     }
 }
 
@@ -904,13 +928,19 @@ STATIC void emit_native_global_exc_entry(emit_t *emit) {
         mp_uint_t start_label = *emit->label_slot + 2;
         mp_uint_t global_except_label = *emit->label_slot + 3;
 
+        // Clear the unwind state
+        ASM_XOR_REG_REG(emit->as, REG_TEMP0, REG_TEMP0);
+        ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_UNWIND(emit), REG_TEMP0);
+
         // Put PC of start code block into REG_LOCAL_1
         ASM_MOV_REG_PCREL(emit->as, REG_LOCAL_1, start_label);
 
         // Wrap everything in an nlr context
         emit_native_label_assign(emit, nlr_label);
+        ASM_MOV_REG_LOCAL(emit->as, REG_LOCAL_2, LOCAL_IDX_EXC_HANDLER_UNWIND(emit));
         emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_1, sizeof(nlr_buf_t) / sizeof(uintptr_t));
         emit_call(emit, MP_F_NLR_PUSH);
+        ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_UNWIND(emit), REG_LOCAL_2);
         ASM_JUMP_IF_REG_NONZERO(emit->as, REG_RET, global_except_label, true);
 
         // Clear PC of current code block, and jump there to resume execution
@@ -937,15 +967,12 @@ STATIC void emit_native_global_exc_exit(emit_t *emit) {
     emit_native_label_assign(emit, emit->exit_label);
 
     if (NEED_GLOBAL_EXC_HANDLER(emit)) {
-        // Save return value
-        ASM_MOV_REG_REG(emit->as, REG_LOCAL_1, REG_RET);
-
         // Pop the nlr context
         emit_call(emit, MP_F_NLR_POP);
         adjust_stack(emit, -(mp_int_t)(sizeof(nlr_buf_t) / sizeof(uintptr_t)));
 
-        // Restore return value
-        ASM_MOV_REG_REG(emit->as, REG_RET, REG_LOCAL_1);
+        // Load return value
+        ASM_MOV_REG_LOCAL(emit->as, REG_RET, LOCAL_IDX_RET_VAL(emit));
     }
 
     ASM_EXIT(emit->as);
@@ -1717,8 +1744,46 @@ STATIC void emit_native_jump_if_or_pop(emit_t *emit, bool cond, mp_uint_t label)
 }
 
 STATIC void emit_native_unwind_jump(emit_t *emit, mp_uint_t label, mp_uint_t except_depth) {
-    (void)except_depth;
-    emit_native_jump(emit, label & ~MP_EMIT_BREAK_FROM_FOR); // TODO properly
+    if (except_depth > 0) {
+        exc_stack_entry_t *first_finally = NULL;
+        exc_stack_entry_t *prev_finally = NULL;
+        exc_stack_entry_t *e = &emit->exc_stack[emit->exc_stack_size - 1];
+        for (; except_depth > 0; --except_depth, --e) {
+            if (e->is_finally && e->is_active) {
+                // Found an active finally handler
+                if (first_finally == NULL) {
+                    first_finally = e;
+                }
+                if (prev_finally != NULL) {
+                    // Mark prev finally as needed to unwind a jump
+                    prev_finally->unwind_label = e->label;
+                }
+                prev_finally = e;
+            }
+        }
+        if (prev_finally == NULL) {
+            // No finally, handle the jump ourselves
+            // First, restore the exception handler address for the jump
+            if (e < emit->exc_stack) {
+                ASM_XOR_REG_REG(emit->as, REG_RET, REG_RET);
+            } else {
+                ASM_MOV_REG_PCREL(emit->as, REG_RET, e->label);
+            }
+            ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_PC(emit), REG_RET);
+        } else {
+            // Last finally should do our jump for us
+            // Mark finally as needing to decide the type of jump
+            prev_finally->unwind_label = UNWIND_LABEL_DO_FINAL_UNWIND;
+            ASM_MOV_REG_PCREL(emit->as, REG_RET, label & ~MP_EMIT_BREAK_FROM_FOR);
+            ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_HANDLER_UNWIND(emit), REG_RET);
+            // Cancel any active exception (see also emit_native_pop_except)
+            ASM_MOV_REG_IMM(emit->as, REG_RET, (mp_uint_t)mp_const_none);
+            ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_EXC_VAL(emit), REG_RET);
+            // Jump to the innermost active finally
+            label = first_finally->label;
+        }
+    }
+    emit_native_jump(emit, label & ~MP_EMIT_BREAK_FROM_FOR);
 }
 
 STATIC void emit_native_setup_with(emit_t *emit, mp_uint_t label) {
@@ -1754,7 +1819,7 @@ STATIC void emit_native_setup_with(emit_t *emit, mp_uint_t label) {
 
     // need to commit stack because we may jump elsewhere
     need_stack_settled(emit);
-    emit_native_push_exc_stack(emit, label, false);
+    emit_native_push_exc_stack(emit, label, true);
 
     emit_native_dup_top(emit);
     // stack: (..., __exit__, self, as_value, as_value)
@@ -1773,14 +1838,17 @@ STATIC void emit_native_setup_block(emit_t *emit, mp_uint_t label, int kind) {
 }
 
 STATIC void emit_native_with_cleanup(emit_t *emit, mp_uint_t label) {
-    // Note: 2 labels are reserved for this function, starting at *emit->label_slot
+    // Note: 3 labels are reserved for this function, starting at *emit->label_slot
 
     // stack: (..., __exit__, self, as_value)
     emit_native_pre(emit);
-    emit_native_pop_exc_stack(emit, false);
+    emit_native_leave_exc_stack(emit, false);
     adjust_stack(emit, -1);
     // stack: (..., __exit__, self)
 
+    // Label for case where __exit__ is called from an unwind jump
+    emit_native_label_assign(emit, *emit->label_slot + 2);
+
     // call __exit__
     emit_post_push_imm(emit, VTYPE_PYOBJ, (mp_uint_t)mp_const_none);
     emit_post_push_imm(emit, VTYPE_PYOBJ, (mp_uint_t)mp_const_none);
@@ -1792,16 +1860,22 @@ STATIC void emit_native_with_cleanup(emit_t *emit, mp_uint_t label) {
     emit_native_jump(emit, *emit->label_slot);
 
     // nlr_catch
-    emit_native_label_assign(emit, label);
+    // Don't use emit_native_label_assign because this isn't a real finally label
+    mp_asm_base_label_assign(&emit->as->base, label);
 
-    // Pop with's exception handler
-    emit_native_pop_exc_stack(emit, true);
+    // Leave with's exception handler
+    emit_native_leave_exc_stack(emit, true);
 
     // Adjust stack counter for: __exit__, self (implicitly discard as_value which is above self)
     emit_native_adjust_stack_size(emit, 2);
     // stack: (..., __exit__, self)
 
     ASM_MOV_REG_LOCAL(emit->as, REG_ARG_1, LOCAL_IDX_EXC_VAL(emit)); // get exc
+
+    // Check if exc is None and jump to non-exc handler if it is
+    ASM_MOV_REG_IMM(emit->as, REG_ARG_2, (mp_uint_t)mp_const_none);
+    ASM_JUMP_IF_REG_EQ(emit->as, REG_ARG_1, REG_ARG_2, *emit->label_slot + 2);
+
     ASM_LOAD_REG_REG_OFFSET(emit->as, REG_ARG_2, REG_ARG_1, 0); // get type(exc)
     emit_post_push_reg(emit, VTYPE_PYOBJ, REG_ARG_2); // push type(exc)
     emit_post_push_reg(emit, VTYPE_PYOBJ, REG_ARG_1); // push exc value
@@ -1840,6 +1914,20 @@ STATIC void emit_native_end_finally(emit_t *emit) {
     emit_native_pre(emit);
     ASM_MOV_REG_LOCAL(emit->as, REG_ARG_1, LOCAL_IDX_EXC_VAL(emit));
     emit_call(emit, MP_F_NATIVE_RAISE);
+
+    // Get state for this finally and see if we need to unwind
+    exc_stack_entry_t *e = emit_native_pop_exc_stack(emit);
+    if (e->unwind_label != UNWIND_LABEL_UNUSED) {
+        ASM_MOV_REG_LOCAL(emit->as, REG_RET, LOCAL_IDX_EXC_HANDLER_UNWIND(emit));
+        ASM_JUMP_IF_REG_ZERO(emit->as, REG_RET, *emit->label_slot, false);
+        if (e->unwind_label == UNWIND_LABEL_DO_FINAL_UNWIND) {
+            ASM_JUMP_REG(emit->as, REG_RET);
+        } else {
+            emit_native_jump(emit, e->unwind_label);
+        }
+        emit_native_label_assign(emit, *emit->label_slot);
+    }
+
     emit_post(emit);
 }
 
@@ -1886,7 +1974,7 @@ STATIC void emit_native_for_iter_end(emit_t *emit) {
 STATIC void emit_native_pop_block(emit_t *emit) {
     emit_native_pre(emit);
     if (!emit->exc_stack[emit->exc_stack_size - 1].is_finally) {
-        emit_native_pop_exc_stack(emit, false);
+        emit_native_leave_exc_stack(emit, false);
     }
     emit_post(emit);
 }
@@ -2314,8 +2402,12 @@ STATIC void emit_native_return_value(emit_t *emit) {
         emit_pre_pop_reg(emit, &vtype, REG_RET);
         assert(vtype == VTYPE_PYOBJ);
     }
+    if (NEED_GLOBAL_EXC_HANDLER(emit)) {
+        // Save return value for the global exception handler to use
+        ASM_MOV_LOCAL_REG(emit->as, LOCAL_IDX_RET_VAL(emit), REG_RET);
+    }
+    emit_native_unwind_jump(emit, emit->exit_label, emit->exc_stack_size);
     emit->last_emit_was_return_value = true;
-    ASM_JUMP(emit->as, emit->exit_label);
 }
 
 STATIC void emit_native_raise_varargs(emit_t *emit, mp_uint_t n_args) {
@@ -2337,8 +2429,8 @@ STATIC void emit_native_yield(emit_t *emit, int kind) {
 }
 
 STATIC void emit_native_start_except_handler(emit_t *emit) {
-    // Protected block has finished so pop the exception stack
-    emit_native_pop_exc_stack(emit, true);
+    // Protected block has finished so leave the current exception handler
+    emit_native_leave_exc_stack(emit, true);
 
     // Get and push nlr_buf.ret_val
     ASM_MOV_REG_LOCAL(emit->as, REG_TEMP0, LOCAL_IDX_EXC_VAL(emit));
diff --git a/py/emitnthumb.c b/py/emitnthumb.c
index e1dc4976d7382ac2a4ba4daa5562e28f5083a3d9..1c33e7a68b59e68fbbe9e2577eb330096440f2d6 100644
--- a/py/emitnthumb.c
+++ b/py/emitnthumb.c
@@ -8,8 +8,10 @@
 #define GENERIC_ASM_API (1)
 #include "py/asmthumb.h"
 
-// Word index of REG_LOCAL_1(=r4) in nlr_buf_t
-#define NLR_BUF_IDX_LOCAL_1 (3)
+// Word indices of REG_LOCAL_x in nlr_buf_t
+#define NLR_BUF_IDX_LOCAL_1 (3) // r4
+#define NLR_BUF_IDX_LOCAL_2 (4) // r5
+#define NLR_BUF_IDX_LOCAL_3 (5) // r6
 
 #define N_THUMB (1)
 #define EXPORT_FUN(name) emit_native_thumb_##name
diff --git a/py/emitnx64.c b/py/emitnx64.c
index 5b04a50f546dd74a117f36d3b269c40885e6ea8a..4abb3ecad3a32685a8c3430b9959967ebf22edb9 100644
--- a/py/emitnx64.c
+++ b/py/emitnx64.c
@@ -8,8 +8,10 @@
 #define GENERIC_ASM_API (1)
 #include "py/asmx64.h"
 
-// Word index of REG_LOCAL_1(=rbx) in nlr_buf_t
-#define NLR_BUF_IDX_LOCAL_1 (5)
+// Word indices of REG_LOCAL_x in nlr_buf_t
+#define NLR_BUF_IDX_LOCAL_1 (5) // rbx
+#define NLR_BUF_IDX_LOCAL_2 (6) // r12
+#define NLR_BUF_IDX_LOCAL_3 (7) // r13
 
 #define N_X64 (1)
 #define EXPORT_FUN(name) emit_native_x64_##name
diff --git a/py/emitnx86.c b/py/emitnx86.c
index e94634d27e87d110745dd71c051e1d1d121d539d..056c3f052df0a7a28f320ced77ed0ec2e057071c 100644
--- a/py/emitnx86.c
+++ b/py/emitnx86.c
@@ -9,8 +9,10 @@
 #define GENERIC_ASM_API (1)
 #include "py/asmx86.h"
 
-// Word index of REG_LOCAL_1(=ebx) in nlr_buf_t
-#define NLR_BUF_IDX_LOCAL_1 (5)
+// Word indices of REG_LOCAL_x in nlr_buf_t
+#define NLR_BUF_IDX_LOCAL_1 (5) // ebx
+#define NLR_BUF_IDX_LOCAL_2 (7) // esi
+#define NLR_BUF_IDX_LOCAL_3 (6) // edi
 
 // x86 needs a table to know how many args a given function has
 STATIC byte mp_f_n_args[MP_F_NUMBER_OF] = {
diff --git a/py/emitnxtensa.c b/py/emitnxtensa.c
index 89ecb34de56954f4e10b4356ab077455fb4e9bb5..34089e90dc27b7a78023c368f2db142ddbab89cf 100644
--- a/py/emitnxtensa.c
+++ b/py/emitnxtensa.c
@@ -8,8 +8,10 @@
 #define GENERIC_ASM_API (1)
 #include "py/asmxtensa.h"
 
-// Word index of REG_LOCAL_1(=a12) in nlr_buf_t
-#define NLR_BUF_IDX_LOCAL_1 (8)
+// Word indices of REG_LOCAL_x in nlr_buf_t
+#define NLR_BUF_IDX_LOCAL_1 (8) // a12
+#define NLR_BUF_IDX_LOCAL_2 (9) // a13
+#define NLR_BUF_IDX_LOCAL_3 (10) // a14
 
 #define N_XTENSA (1)
 #define EXPORT_FUN(name) emit_native_xtensa_##name