diff --git a/py/asmx64.c b/py/asmx64.c
index 2a11de207f894f2b9067b3822a7ba254e41fd8fe..c023900a8fa8f8940306d9ad5b7477e10aab50f2 100644
--- a/py/asmx64.c
+++ b/py/asmx64.c
@@ -5,6 +5,7 @@
 #include <string.h>
 
 #include "misc.h"
+#include "mpconfig.h"
 #include "asmx64.h"
 
 /* all offsets are measured in multiples of 8 bytes */
@@ -602,7 +603,7 @@ void asm_x64_call_i1(asm_x64_t* as, void* func, int i1)
 */
 
 void asm_x64_call_ind(asm_x64_t* as, void *ptr, int temp_r64) {
-    asm_x64_mov_i64_to_r64_optimised(as, (int64_t)ptr, temp_r64);
+    asm_x64_mov_i64_to_r64_optimised(as, (machine_int_t)ptr, temp_r64);
     asm_x64_write_byte_2(as, OPCODE_CALL_RM32, MODRM_R64(2) | MODRM_RM_REG | MODRM_RM_R64(temp_r64));
     // this reduces code size by 2 bytes per call, but doesn't seem to speed it up at all
     // doesn't work anymore because calls are 64 bits away
diff --git a/py/nlrthumb.s b/py/nlrthumb.S
similarity index 94%
rename from py/nlrthumb.s
rename to py/nlrthumb.S
index d4d1bff233502f5aaff97fe19146cc0f1e63832f..22e0b6ba4d8dba1607c4d01561c8c526f74653de 100644
--- a/py/nlrthumb.s
+++ b/py/nlrthumb.S
@@ -1,4 +1,5 @@
-@ thumb callee save: bx, bp, sp, r12, r14, r14, r15
+#ifdef __thumb2__
+/* thumb callee save: bx, bp, sp, r12, r14, r14, r15 */
 
     .syntax unified
     .cpu cortex-m4
@@ -6,7 +7,7 @@
     .text
     .align  2
 
-@ uint nlr_push(r0=nlr_buf_t *nlr)
+/* uint nlr_push(r0=nlr_buf_t *nlr) */
     .global nlr_push
     .thumb
     .thumb_func
@@ -51,7 +52,7 @@ nlr_pop:
     .word    .LANCHOR0
     .size   nlr_pop, .-nlr_pop
 
-@ void nlr_jump(r0=uint val)
+/* void nlr_jump(r0=uint val) */
     .global nlr_jump
     .thumb
     .thumb_func
@@ -81,7 +82,7 @@ nlr_jump:
     .word    .LANCHOR0
     .size   nlr_jump, .-nlr_jump
 
-@ local variable nlr_top
+/* local variable nlr_top */
     .bss
     .align  2
     .set    .LANCHOR0,. + 0
@@ -89,3 +90,4 @@ nlr_jump:
     .size   nlr_top, 4
 nlr_top:
     .space  4
+#endif
diff --git a/py/nlrx64.s b/py/nlrx64.S
similarity index 93%
rename from py/nlrx64.s
rename to py/nlrx64.S
index cf0eb3cd718231fa0f169359e8d0030e60aab02d..6d0e2118f724bd3ab7b1262189885c4d9017edaf 100644
--- a/py/nlrx64.s
+++ b/py/nlrx64.S
@@ -1,9 +1,10 @@
-# x64 callee save: bx, bp, sp, r12, r14, r14, r15
+#ifdef __x86_64__
+/* x64 callee save: bx, bp, sp, r12, r14, r14, r15 */
 
     .file   "nlr.s"
     .text
 
-# uint nlr_push(rdi=nlr_buf_t *nlr)
+/* uint nlr_push(rdi=nlr_buf_t *nlr) */
     .globl  nlr_push
     .type   nlr_push, @function
 nlr_push:
@@ -23,7 +24,7 @@ nlr_push:
     ret                             # return
     .size   nlr_push, .-nlr_push
 
-# void nlr_pop()
+/* void nlr_pop() */
     .globl  nlr_pop
     .type   nlr_pop, @function
 nlr_pop:
@@ -33,7 +34,7 @@ nlr_pop:
     ret                             # return
     .size   nlr_pop, .-nlr_pop
 
-# void nlr_jump(rdi=uint val)
+/* void nlr_jump(rdi=uint val) */
     .globl  nlr_jump
     .type   nlr_jump, @function
 nlr_jump:
@@ -58,3 +59,4 @@ nlr_jump:
 
     .local  nlr_top
     .comm   nlr_top,8,8
+#endif
diff --git a/py/nlrx86.s b/py/nlrx86.S
similarity index 92%
rename from py/nlrx86.s
rename to py/nlrx86.S
index 364766da706ae24f2a81bc22c5ff07ce51e53263..3e2eec7a7b879af20119a088b9c5bb29b8ada873 100644
--- a/py/nlrx86.s
+++ b/py/nlrx86.S
@@ -1,9 +1,10 @@
-# x86 callee save: bx, di, si, bp, sp
+#ifdef __i386__
+/* x86 callee save: bx, di, si, bp, sp */
 
     .file   "nlr.s"
     .text
 
-# uint nlr_push(4(%esp)=nlr_buf_t *nlr)
+/* uint nlr_push(4(%esp)=nlr_buf_t *nlr) */
     .globl  nlr_push
     .type   nlr_push, @function
 nlr_push:
@@ -22,7 +23,7 @@ nlr_push:
     ret                             # return
     .size   nlr_push, .-nlr_push
 
-# void nlr_pop()
+/* void nlr_pop() */
     .globl  nlr_pop
     .type   nlr_pop, @function
 nlr_pop:
@@ -32,7 +33,7 @@ nlr_pop:
     ret                             # return
     .size   nlr_pop, .-nlr_pop
 
-# void nlr_jump(4(%esp)=uint val)
+/* void nlr_jump(4(%esp)=uint val) */
     .globl  nlr_jump
     .type   nlr_jump, @function
 nlr_jump:
@@ -55,3 +56,4 @@ nlr_jump:
 
     .local  nlr_top
     .comm   nlr_top,4,4
+#endif
diff --git a/py/showbc.c b/py/showbc.c
index 20a9790f99da0fc8b877eb5bb5d9da003c6f21e1..4bdbee89f0c7f5b36c3d1e836a1aefc0c9104a58 100644
--- a/py/showbc.c
+++ b/py/showbc.c
@@ -70,7 +70,7 @@ void mp_show_byte_code(const byte *ip, int len) {
 
             case MP_BC_LOAD_FAST_N:
                 DECODE_UINT;
-                printf("LOAD_FAST_N %lu", unum);
+                printf("LOAD_FAST_N " UINT_FMT, unum);
                 break;
 
             case MP_BC_LOAD_NAME:
@@ -111,7 +111,7 @@ void mp_show_byte_code(const byte *ip, int len) {
 
             case MP_BC_STORE_FAST_N:
                 DECODE_UINT;
-                printf("STORE_FAST_N %lu", unum);
+                printf("STORE_FAST_N " UINT_FMT, unum);
                 break;
 
             case MP_BC_STORE_NAME:
@@ -161,17 +161,17 @@ void mp_show_byte_code(const byte *ip, int len) {
 
             case MP_BC_JUMP:
                 DECODE_SLABEL;
-                printf("JUMP %lu", ip + unum - ip_start);
+                printf("JUMP " UINT_FMT, ip + unum - ip_start);
                 break;
 
             case MP_BC_POP_JUMP_IF_TRUE:
                 DECODE_SLABEL;
-                printf("POP_JUMP_IF_TRUE %lu", ip + unum - ip_start);
+                printf("POP_JUMP_IF_TRUE " UINT_FMT, ip + unum - ip_start);
                 break;
 
             case MP_BC_POP_JUMP_IF_FALSE:
                 DECODE_SLABEL;
-                printf("POP_JUMP_IF_FALSE %lu", ip + unum - ip_start);
+                printf("POP_JUMP_IF_FALSE " UINT_FMT, ip + unum - ip_start);
                 break;
 
                 /*
@@ -196,7 +196,7 @@ void mp_show_byte_code(const byte *ip, int len) {
 
             case MP_BC_SETUP_EXCEPT:
                 DECODE_ULABEL; // except labels are always forward
-                printf("SETUP_EXCEPT %lu", ip + unum - ip_start);
+                printf("SETUP_EXCEPT " UINT_FMT, ip + unum - ip_start);
                 break;
 
             case MP_BC_END_FINALLY:
@@ -213,7 +213,7 @@ void mp_show_byte_code(const byte *ip, int len) {
 
             case MP_BC_FOR_ITER:
                 DECODE_ULABEL; // the jump offset if iteration finishes; for labels are always forward
-                printf("FOR_ITER %lu", ip + unum - ip_start);
+                printf("FOR_ITER " UINT_FMT, ip + unum - ip_start);
                 break;
 
             case MP_BC_POP_BLOCK:
@@ -235,22 +235,22 @@ void mp_show_byte_code(const byte *ip, int len) {
 
             case MP_BC_BINARY_OP:
                 unum = *ip++;
-                printf("BINARY_OP %lu", unum);
+                printf("BINARY_OP " UINT_FMT, unum);
                 break;
 
             case MP_BC_COMPARE_OP:
                 unum = *ip++;
-                printf("COMPARE_OP %lu", unum);
+                printf("COMPARE_OP " UINT_FMT, unum);
                 break;
 
             case MP_BC_BUILD_TUPLE:
                 DECODE_UINT;
-                printf("BUILD_TUPLE %lu", unum);
+                printf("BUILD_TUPLE " UINT_FMT, unum);
                 break;
 
             case MP_BC_BUILD_LIST:
                 DECODE_UINT;
-                printf("BUILD_LIST %lu", unum);
+                printf("BUILD_LIST " UINT_FMT, unum);
                 break;
 
                 /*
@@ -264,7 +264,7 @@ void mp_show_byte_code(const byte *ip, int len) {
 
             case MP_BC_BUILD_MAP:
                 DECODE_UINT;
-                printf("BUILD_MAP %lu", unum);
+                printf("BUILD_MAP " UINT_FMT, unum);
                 break;
 
                 /*
@@ -297,22 +297,22 @@ void mp_show_byte_code(const byte *ip, int len) {
 
             case MP_BC_UNPACK_SEQUENCE:
                 DECODE_UINT;
-                printf("UNPACK_SEQUENCE %lu", unum);
+                printf("UNPACK_SEQUENCE " UINT_FMT, unum);
                 break;
 
             case MP_BC_MAKE_FUNCTION:
                 DECODE_UINT;
-                printf("MAKE_FUNCTION %lu", unum);
+                printf("MAKE_FUNCTION " UINT_FMT, unum);
                 break;
 
             case MP_BC_CALL_FUNCTION:
                 DECODE_UINT;
-                printf("CALL_FUNCTION n=%lu nkw=%lu", unum & 0xff, (unum >> 8) & 0xff);
+                printf("CALL_FUNCTION n=" UINT_FMT " nkw=" UINT_FMT, unum & 0xff, (unum >> 8) & 0xff);
                 break;
 
             case MP_BC_CALL_METHOD:
                 DECODE_UINT;
-                printf("CALL_METHOD n=%lu nkw=%lu", unum & 0xff, (unum >> 8) & 0xff);
+                printf("CALL_METHOD n=" UINT_FMT " nkw=" UINT_FMT, unum & 0xff, (unum >> 8) & 0xff);
                 break;
 
             case MP_BC_RETURN_VALUE:
diff --git a/unix/Makefile b/unix/Makefile
index 91e26e524722b22f25c489672fbed06aee279d11..91d05a2ec2b1078d64d67c951bde290b63a225e3 100644
--- a/unix/Makefile
+++ b/unix/Makefile
@@ -9,7 +9,9 @@ SRC_C = \
 	main.c \
 
 PY_O = \
+	nlrx86.o \
 	nlrx64.o \
+	nlrthumb.o \
 	malloc.o \
 	qstr.o \
 	vstr.o \
@@ -73,6 +75,9 @@ $(BUILD)/%.o: %.c
 $(BUILD)/%.o: $(PYSRC)/%.s
 	$(AS) -o $@ $<
 
+$(BUILD)/%.o: $(PYSRC)/%.S
+	$(CC) $(CFLAGS) -c -o $@ $<
+
 $(BUILD)/%.o: $(PYSRC)/%.c mpconfig.h
 	$(CC) $(CFLAGS) -c -o $@ $<
 
diff --git a/unix/mpconfig.h b/unix/mpconfig.h
index a358d7db814afdc66d1df13c4a982521d980f273..3d0dc8a5677c87657f98eb482bd7e1b8256ce606 100644
--- a/unix/mpconfig.h
+++ b/unix/mpconfig.h
@@ -8,10 +8,22 @@
 
 // type definitions for the specific machine
 
-#define BYTES_PER_WORD (8)
+#ifdef __LP64__
+typedef long machine_int_t; // must be pointer size
+typedef unsigned long machine_uint_t; // must be pointer size
+#define UINT_FMT "%lu"
+#define INT_FMT "%ld"
+#else
+// These are definitions for machines where sizeof(int) == sizeof(void*),
+// regardless for actual size.
+typedef int machine_int_t; // must be pointer size
+typedef unsigned int machine_uint_t; // must be pointer size
+#define UINT_FMT "%u"
+#define INT_FMT "%d"
+#endif
+
+#define BYTES_PER_WORD sizeof(machine_int_t)
 
-typedef int64_t machine_int_t; // must be pointer size
-typedef uint64_t machine_uint_t; // must be pointer size
 typedef void *machine_ptr_t; // must be of pointer size
 typedef const void *machine_const_ptr_t; // must be of pointer size
 typedef double machine_float_t;