diff --git a/py/nlrx86.S b/py/nlrx86.S
index 8a96af81ce5ca9a765ba4d41ff93b6e8aefebf12..8c538ba1760aa7b8c105a7df23a2156f56557d89 100644
--- a/py/nlrx86.S
+++ b/py/nlrx86.S
@@ -37,9 +37,18 @@
 
 #if defined(_WIN32) || defined(__CYGWIN__)
 #define NLR_OS_WINDOWS
+#endif
+
+#if defined(__APPLE__) && defined(__MACH__)
+#define NLR_OS_MAC
+#endif
+
+#if defined(NLR_OS_WINDOWS) || defined(NLR_OS_MAC)
 #define NLR_TOP (_mp_state_ctx + NLR_TOP_OFFSET)
+#define MP_THREAD_GET_STATE _mp_thread_get_state
 #else
 #define NLR_TOP (mp_state_ctx + NLR_TOP_OFFSET)
+#define MP_THREAD_GET_STATE mp_thread_get_state
 #endif
 
 // offset of nlr_top within mp_state_thread_t structure
@@ -55,6 +64,9 @@
     .globl  _nlr_push
     .def    _nlr_push; .scl 2; .type 32; .endef
 _nlr_push:
+#elif defined(NLR_OS_MAC)
+    .globl  _nlr_push
+_nlr_push:
 #else
     .globl  nlr_push
     .type   nlr_push, @function
@@ -75,7 +87,7 @@ nlr_push:
     mov     %edx, NLR_TOP           # stor new nlr_buf (to make linked list)
 #else
     // to check: stack is aligned to 16-byte boundary before this call
-    call    mp_thread_get_state     # get mp_state_thread ptr into eax
+    call    MP_THREAD_GET_STATE     # get mp_state_thread ptr into eax
     mov     4(%esp), %edx           # load nlr_buf argument into edx (edx clobbered by call)
     mov     NLR_TOP_TH_OFF(%eax), %ecx # get thread.nlr_top (last nlr_buf)
     mov     %ecx, (%edx)            # store it
@@ -84,7 +96,7 @@ nlr_push:
 
     xor     %eax, %eax              # return 0, normal return
     ret                             # return
-#if !defined(NLR_OS_WINDOWS)
+#if !defined(NLR_OS_WINDOWS) && !defined(NLR_OS_MAC)
     .size   nlr_push, .-nlr_push
 #endif
 
@@ -95,6 +107,9 @@ nlr_push:
     .globl  _nlr_pop
     .def    _nlr_pop; .scl 2; .type 32; .endef
 _nlr_pop:
+#elif defined(NLR_OS_MAC)
+    .globl  _nlr_pop
+_nlr_pop:
 #else
     .globl  nlr_pop
     .type   nlr_pop, @function
@@ -106,14 +121,14 @@ nlr_pop:
     mov     (%eax), %eax            # load prev nlr_buf
     mov     %eax, NLR_TOP           # store nlr_top (to unlink list)
 #else
-    call    mp_thread_get_state     # get mp_state_thread ptr into eax
+    call    MP_THREAD_GET_STATE     # get mp_state_thread ptr into eax
     mov     NLR_TOP_TH_OFF(%eax), %ecx # get thread.nlr_top (last nlr_buf)
     mov     (%ecx), %ecx            # load prev nlr_buf
     mov     %ecx, NLR_TOP_TH_OFF(%eax) # store prev nlr_buf (to unlink list)
 #endif
 
     ret                             # return
-#if !defined(NLR_OS_WINDOWS)
+#if !defined(NLR_OS_WINDOWS) && !defined(NLR_OS_MAC)
     .size   nlr_pop, .-nlr_pop
 #endif
 
@@ -124,6 +139,9 @@ nlr_pop:
     .globl  _nlr_jump
     .def    _nlr_jump; .scl 2; .type 32; .endef
 _nlr_jump:
+#elif defined(NLR_OS_MAC)
+    .globl  _nlr_jump
+_nlr_jump:
 #else
     .globl  nlr_jump
     .type   nlr_jump, @function
@@ -133,7 +151,7 @@ nlr_jump:
 #if !MICROPY_PY_THREAD
     mov     NLR_TOP, %edx           # load nlr_top
     test    %edx, %edx              # check for nlr_top being NULL
-#if defined(NLR_OS_WINDOWS)
+#if defined(NLR_OS_WINDOWS) || defined(NLR_OS_MAC)
     je      _nlr_jump_fail           # fail if nlr_top is NULL
 #else
     je      nlr_jump_fail           # fail if nlr_top is NULL
@@ -143,10 +161,10 @@ nlr_jump:
     mov     (%edx), %eax            # load prev nlr_top
     mov     %eax, NLR_TOP           # store nlr_top (to unlink list)
 #else
-    call    mp_thread_get_state     # get mp_state_thread ptr into eax
+    call    MP_THREAD_GET_STATE     # get mp_state_thread ptr into eax
     mov     NLR_TOP_TH_OFF(%eax), %edx # get thread.nlr_top (last nlr_buf)
     test    %edx, %edx              # check for nlr_top being NULL
-#if defined(NLR_OS_WINDOWS)
+#if defined(NLR_OS_WINDOWS) || defined(NLR_OS_MAC)
     je      _nlr_jump_fail          # fail if nlr_top is NULL
 #else
     je      nlr_jump_fail           # fail if nlr_top is NULL
@@ -167,7 +185,7 @@ nlr_jump:
     xor     %eax, %eax              # clear return register
     inc     %al                     # increase to make 1, non-local return
     ret                             # return
-#if !defined(NLR_OS_WINDOWS)
+#if !defined(NLR_OS_WINDOWS) && !defined(NLR_OS_MAC)
     .size   nlr_jump, .-nlr_jump
 #endif
 
diff --git a/unix/Makefile b/unix/Makefile
index d5ad928b348d3362af015e9f611c3e2fe045d1a5..fd98d2ced228370946d1e3427585f2a77fb614b6 100644
--- a/unix/Makefile
+++ b/unix/Makefile
@@ -58,10 +58,14 @@ endif
 
 # On OSX, 'gcc' is a symlink to clang unless a real gcc is installed.
 # The unix port of micropython on OSX must be compiled with clang,
-# while cross-compile ports require gcc, so we test here for OSX and 
+# while cross-compile ports require gcc, so we test here for OSX and
 # if necessary override the value of 'CC' set in py/mkenv.mk
 ifeq ($(UNAME_S),Darwin)
+ifeq ($(MICROPY_FORCE_32BIT),1)
+CC = clang -m32
+else
 CC = clang
+endif
 # Use clang syntax for map file
 LDFLAGS_ARCH = -Wl,-map,$@.map -Wl,-dead_strip
 else
diff --git a/unix/gccollect.c b/unix/gccollect.c
index 397c4ffe1c1b465158ac9e554996020023ca7715..4ec8c2bf549c0bc332bfd0b5f2f3909ef470f69d 100644
--- a/unix/gccollect.c
+++ b/unix/gccollect.c
@@ -80,6 +80,18 @@ STATIC void gc_helper_get_regs(regs_t arr) {
     register long esi asm ("esi");
     register long edi asm ("edi");
     register long ebp asm ("ebp");
+#ifdef __clang__
+    // TODO:
+    // This is dirty workaround for Clang. It tries to get around
+    // uncompliant (wrt to GCC) behavior of handling register variables.
+    // Application of this patch here is random, and done only to unbreak
+    // MacOS build. Better, cross-arch ways to deal with Clang issues should
+    // be found.
+    asm("" : "=r"(ebx));
+    asm("" : "=r"(esi));
+    asm("" : "=r"(edi));
+    asm("" : "=r"(ebp));
+#endif
     arr[0] = ebx;
     arr[1] = esi;
     arr[2] = edi;