diff --git a/py/builtin.h b/py/builtin.h
index 349f49682e2a2f575411b50e04121bad3fe0f06f..cd1be3ab88be3fbf9807dcae30357866d2353c9e 100644
--- a/py/builtin.h
+++ b/py/builtin.h
@@ -94,6 +94,7 @@ extern const mp_obj_module_t mp_module_micropython;
 extern const mp_obj_module_t mp_module_ustruct;
 extern const mp_obj_module_t mp_module_sys;
 extern const mp_obj_module_t mp_module_gc;
+extern const mp_obj_module_t mp_module_thread;
 
 extern const mp_obj_dict_t mp_module_builtins_globals;
 
diff --git a/py/modthread.c b/py/modthread.c
new file mode 100644
index 0000000000000000000000000000000000000000..5104ef1275f1487dc3f0ea7faaab620b19640ab7
--- /dev/null
+++ b/py/modthread.c
@@ -0,0 +1,148 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2016 Damien P. George on behalf of Pycom Ltd
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "py/runtime.h"
+#include "py/stackctrl.h"
+
+#if MICROPY_PY_THREAD
+
+#include "py/mpthread.h"
+
+#if 0 // print debugging info
+#define DEBUG_PRINT (1)
+#define DEBUG_printf DEBUG_printf
+#else // don't print debugging info
+#define DEBUG_PRINT (0)
+#define DEBUG_printf(...) (void)0
+#endif
+
+/****************************************************************/
+// _thread module
+
+STATIC mp_obj_t mod_thread_get_ident(void) {
+    return mp_obj_new_int_from_uint((uintptr_t)mp_thread_get_state());
+}
+STATIC MP_DEFINE_CONST_FUN_OBJ_0(mod_thread_get_ident_obj, mod_thread_get_ident);
+
+typedef struct _thread_entry_args_t {
+    mp_obj_t fun;
+    size_t n_args;
+    size_t n_kw;
+    const mp_obj_t *args;
+} thread_entry_args_t;
+
+STATIC void *thread_entry(void *args_in) {
+    thread_entry_args_t *args = (thread_entry_args_t*)args_in;
+
+    mp_state_thread_t ts;
+    mp_thread_set_state(&ts);
+
+    mp_stack_set_top(&ts + 1); // need to include ts in root-pointer scan
+    mp_stack_set_limit(16 * 1024); // fixed stack limit for now
+
+    // TODO set more thread-specific state here:
+    //  mp_pending_exception? (root pointer)
+    //  cur_exception (root pointer)
+    //  dict_locals? (root pointer) uPy doesn't make a new locals dict for functions, just for classes, so it's different to CPy
+
+    DEBUG_printf("[thread] start ts=%p args=%p stack=%p\n", &ts, &args, MP_STATE_THREAD(stack_top));
+
+    nlr_buf_t nlr;
+    if (nlr_push(&nlr) == 0) {
+        mp_call_function_n_kw(args->fun, args->n_args, args->n_kw, args->args);
+        nlr_pop();
+    } else {
+        // uncaught exception
+        // check for SystemExit
+        if (mp_obj_is_subclass_fast(mp_obj_get_type((mp_obj_t)nlr.ret_val), &mp_type_SystemExit)) {
+            // swallow exception silently
+        } else {
+            // print exception out
+            mp_printf(&mp_plat_print, "Unhandled exception in thread started by ");
+            mp_obj_print_helper(&mp_plat_print, args->fun, PRINT_REPR);
+            mp_printf(&mp_plat_print, "\n");
+            mp_obj_print_exception(&mp_plat_print, (mp_obj_t)nlr.ret_val);
+        }
+    }
+
+    DEBUG_printf("[thread] finish ts=%p\n", &ts);
+
+    return NULL;
+}
+
+STATIC mp_obj_t mod_thread_start_new_thread(size_t n_args, const mp_obj_t *args) {
+    mp_uint_t pos_args_len;
+    mp_obj_t *pos_args_items;
+    mp_obj_get_array(args[1], &pos_args_len, &pos_args_items);
+    thread_entry_args_t *th_args = m_new_obj(thread_entry_args_t);
+    th_args->fun = args[0];
+    if (n_args == 2) {
+        // just position arguments
+        th_args->n_args = pos_args_len;
+        th_args->n_kw = 0;
+        th_args->args = pos_args_items;
+    } else {
+        // positional and keyword arguments
+        if (mp_obj_get_type(args[2]) != &mp_type_dict) {
+            nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "expecting a dict for keyword args"));
+        }
+        mp_map_t *map = &((mp_obj_dict_t*)MP_OBJ_TO_PTR(args[2]))->map;
+        th_args->n_args = pos_args_len;
+        th_args->n_kw = map->used;
+        mp_obj_t *all_args = m_new(mp_obj_t, th_args->n_args + 2 * th_args->n_kw);
+        memcpy(all_args, pos_args_items, pos_args_len * sizeof(mp_obj_t));
+        for (size_t i = 0, n = pos_args_len; i < map->alloc; ++i) {
+            if (MP_MAP_SLOT_IS_FILLED(map, i)) {
+                all_args[n++] = map->table[i].key;
+                all_args[n++] = map->table[i].value;
+            }
+        }
+        th_args->args = all_args;
+    }
+    // TODO implement setting thread stack size
+    mp_thread_create(thread_entry, th_args);
+    return mp_const_none;
+}
+STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mod_thread_start_new_thread_obj, 2, 3, mod_thread_start_new_thread);
+
+STATIC const mp_rom_map_elem_t mp_module_thread_globals_table[] = {
+    { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR__thread) },
+    { MP_ROM_QSTR(MP_QSTR_get_ident), MP_ROM_PTR(&mod_thread_get_ident_obj) },
+    { MP_ROM_QSTR(MP_QSTR_start_new_thread), MP_ROM_PTR(&mod_thread_start_new_thread_obj) },
+};
+
+STATIC MP_DEFINE_CONST_DICT(mp_module_thread_globals, mp_module_thread_globals_table);
+
+const mp_obj_module_t mp_module_thread = {
+    .base = { &mp_type_module },
+    .name = MP_QSTR__thread,
+    .globals = (mp_obj_dict_t*)&mp_module_thread_globals,
+};
+
+#endif // MICROPY_PY_THREAD
diff --git a/py/mpconfig.h b/py/mpconfig.h
index 25aac8b7e3733a1df847e5eeadbf6832f9954fd0..998d1b6924cfcf95b8934d751714b38b50798599 100644
--- a/py/mpconfig.h
+++ b/py/mpconfig.h
@@ -824,6 +824,11 @@ typedef double mp_float_t;
 #define MICROPY_PY_UERRNO (0)
 #endif
 
+// Whether to provide "_thread" module
+#ifndef MICROPY_PY_THREAD
+#define MICROPY_PY_THREAD (0)
+#endif
+
 // Extended modules
 
 #ifndef MICROPY_PY_UCTYPES
diff --git a/py/mpstate.h b/py/mpstate.h
index 824aba1ff980e8ce7e7278888d0201d5b0d4bfee..18bec09e42dbd6edb2c49dcf7a7dbc4aa6d711e1 100644
--- a/py/mpstate.h
+++ b/py/mpstate.h
@@ -201,6 +201,11 @@ extern mp_state_ctx_t mp_state_ctx;
 #define MP_STATE_VM(x) (mp_state_ctx.vm.x)
 #define MP_STATE_MEM(x) (mp_state_ctx.mem.x)
 
+#if MICROPY_PY_THREAD
+extern mp_state_thread_t *mp_thread_get_state(void);
+#define MP_STATE_THREAD(x) (mp_thread_get_state()->x)
+#else
 #define MP_STATE_THREAD(x) (mp_state_ctx.thread.x)
+#endif
 
 #endif // __MICROPY_INCLUDED_PY_MPSTATE_H__
diff --git a/py/mpthread.h b/py/mpthread.h
new file mode 100644
index 0000000000000000000000000000000000000000..6c6de7988610c37a24ecde07b8d472fbcccd9e0a
--- /dev/null
+++ b/py/mpthread.h
@@ -0,0 +1,45 @@
+/*
+ * This file is part of the MicroPython project, http://micropython.org/
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2016 Damien P. George on behalf of Pycom Ltd
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef __MICROPY_INCLUDED_PY_MPTHREAD_H__
+#define __MICROPY_INCLUDED_PY_MPTHREAD_H__
+
+#include "py/mpconfig.h"
+
+#if MICROPY_PY_THREAD
+
+#ifdef MICROPY_MPTHREADPORT_H
+#include MICROPY_MPTHREADPORT_H
+#else
+#include <mpthreadport.h>
+#endif
+
+mp_state_thread_t *mp_thread_get_state(void);
+void mp_thread_set_state(void *state);
+void mp_thread_create(void *(*entry)(void*), void *arg);
+
+#endif // MICROPY_PY_THREAD
+
+#endif // __MICROPY_INCLUDED_PY_MPTHREAD_H__
diff --git a/py/nlrx64.S b/py/nlrx64.S
index 8dda025cadab86024733101b2752c36dcc569b98..ad2b66fdb269d65cdf22500fce8ab9a17de6747d 100644
--- a/py/nlrx64.S
+++ b/py/nlrx64.S
@@ -41,6 +41,9 @@
 #define NLR_TOP (mp_state_ctx + NLR_TOP_OFFSET)
 #endif
 
+// offset of nlr_top within mp_state_thread_t structure
+#define NLR_TOP_TH_OFF (0)
+
 #if defined(_WIN32) || defined(__CYGWIN__)
 #define NLR_OS_WINDOWS
 #endif
@@ -77,9 +80,20 @@ _nlr_push:
     movq    %r13, 56(%rdi)          # store %r13 into nlr_buf
     movq    %r14, 64(%rdi)          # store %r14 into nlr_buf
     movq    %r15, 72(%rdi)          # store %r15 into nlr_buf
+
+#if !MICROPY_PY_THREAD
     movq    NLR_TOP(%rip), %rax     # get last nlr_buf
     movq    %rax, (%rdi)            # store it
     movq    %rdi, NLR_TOP(%rip)     # stor new nlr_buf (to make linked list)
+#else
+    movq    %rdi, %rbp              # since we make a call, must save rdi in rbp
+    callq   mp_thread_get_state     # get mp_state_thread ptr into rax
+    movq    NLR_TOP_TH_OFF(%rax), %rsi # get thread.nlr_top (last nlr_buf)
+    movq    %rsi, (%rbp)            # store it
+    movq    %rbp, NLR_TOP_TH_OFF(%rax) # store new nlr_buf (to make linked list)
+    movq    24(%rbp), %rbp          # restore rbp
+#endif
+
     xorq    %rax, %rax              # return 0, normal return
     ret                             # return
 #if !(defined(__APPLE__) && defined(__MACH__))
@@ -97,9 +111,18 @@ nlr_pop:
     .globl  _nlr_pop
 _nlr_pop:
 #endif
+
+#if !MICROPY_PY_THREAD
     movq    NLR_TOP(%rip), %rax     # get nlr_top into %rax
     movq    (%rax), %rax            # load prev nlr_buf
     movq    %rax, NLR_TOP(%rip)     # store prev nlr_buf (to unlink list)
+#else
+    callq   mp_thread_get_state     # get mp_state_thread ptr into rax
+    movq    NLR_TOP_TH_OFF(%rax), %rdi # get thread.nlr_top (last nlr_buf)
+    movq    (%rdi), %rdi            # load prev nlr_buf
+    movq    %rdi, NLR_TOP_TH_OFF(%rax) # store prev nlr_buf (to unlink list)
+#endif
+
     ret                             # return
 #if !(defined(__APPLE__) && defined(__MACH__))
     .size   nlr_pop, .-nlr_pop
@@ -116,6 +139,8 @@ nlr_jump:
     .globl  _nlr_jump
     _nlr_jump:
 #endif
+
+#if !MICROPY_PY_THREAD
     movq    %rdi, %rax              # put return value in %rax
     movq    NLR_TOP(%rip), %rdi     # get nlr_top into %rdi
     test    %rdi, %rdi              # check for nlr_top being NULL
@@ -123,6 +148,19 @@ nlr_jump:
     movq    %rax, 8(%rdi)           # store return value
     movq    (%rdi), %rax            # load prev nlr_buf
     movq    %rax, NLR_TOP(%rip)     # store prev nlr_buf (to unlink list)
+#else
+    movq    %rdi, %rbp              # put return value in rbp
+    callq   mp_thread_get_state     # get thread ptr in rax
+    movq    %rax, %rsi              # put thread ptr in rsi
+    movq    %rbp, %rax              # put return value to rax (for je .fail)
+    movq    NLR_TOP_TH_OFF(%rsi), %rdi # get thread.nlr_top in rdi
+    test    %rdi, %rdi              # check for nlr_top being NULL
+    je      .fail                   # fail if nlr_top is NULL
+    movq    %rax, 8(%rdi)           # store return value
+    movq    (%rdi), %rax            # load prev nlr_buf
+    movq    %rax, NLR_TOP_TH_OFF(%rsi) # store prev nlr_buf (to unlink list)
+#endif
+
     movq    72(%rdi), %r15          # load saved %r15
     movq    64(%rdi), %r14          # load saved %r14
     movq    56(%rdi), %r13          # load saved %r13
diff --git a/py/nlrx86.S b/py/nlrx86.S
index a6ec4b73c7ab169906b48ab8730ec8a1736b4a3e..8a96af81ce5ca9a765ba4d41ff93b6e8aefebf12 100644
--- a/py/nlrx86.S
+++ b/py/nlrx86.S
@@ -42,6 +42,9 @@
 #define NLR_TOP (mp_state_ctx + NLR_TOP_OFFSET)
 #endif
 
+// offset of nlr_top within mp_state_thread_t structure
+#define NLR_TOP_TH_OFF (0)
+
     .file   "nlr.s"
     .text
 
@@ -65,9 +68,20 @@ nlr_push:
     mov     %ebx, 20(%edx)          # store %bx into nlr_buf+20
     mov     %edi, 24(%edx)          # store %di into nlr_buf
     mov     %esi, 28(%edx)          # store %si into nlr_buf
+
+#if !MICROPY_PY_THREAD
     mov     NLR_TOP, %eax           # load nlr_top
     mov     %eax, (%edx)            # store it
     mov     %edx, NLR_TOP           # stor new nlr_buf (to make linked list)
+#else
+    // to check: stack is aligned to 16-byte boundary before this call
+    call    mp_thread_get_state     # get mp_state_thread ptr into eax
+    mov     4(%esp), %edx           # load nlr_buf argument into edx (edx clobbered by call)
+    mov     NLR_TOP_TH_OFF(%eax), %ecx # get thread.nlr_top (last nlr_buf)
+    mov     %ecx, (%edx)            # store it
+    mov     %edx, NLR_TOP_TH_OFF(%eax) # store new nlr_buf (to make linked list)
+#endif
+
     xor     %eax, %eax              # return 0, normal return
     ret                             # return
 #if !defined(NLR_OS_WINDOWS)
@@ -86,9 +100,18 @@ _nlr_pop:
     .type   nlr_pop, @function
 nlr_pop:
 #endif
+
+#if !MICROPY_PY_THREAD
     mov     NLR_TOP, %eax           # load nlr_top
     mov     (%eax), %eax            # load prev nlr_buf
     mov     %eax, NLR_TOP           # store nlr_top (to unlink list)
+#else
+    call    mp_thread_get_state     # get mp_state_thread ptr into eax
+    mov     NLR_TOP_TH_OFF(%eax), %ecx # get thread.nlr_top (last nlr_buf)
+    mov     (%ecx), %ecx            # load prev nlr_buf
+    mov     %ecx, NLR_TOP_TH_OFF(%eax) # store prev nlr_buf (to unlink list)
+#endif
+
     ret                             # return
 #if !defined(NLR_OS_WINDOWS)
     .size   nlr_pop, .-nlr_pop
@@ -106,6 +129,8 @@ _nlr_jump:
     .type   nlr_jump, @function
 nlr_jump:
 #endif
+
+#if !MICROPY_PY_THREAD
     mov     NLR_TOP, %edx           # load nlr_top
     test    %edx, %edx              # check for nlr_top being NULL
 #if defined(NLR_OS_WINDOWS)
@@ -117,6 +142,21 @@ nlr_jump:
     mov     %eax, 4(%edx)           # store return value
     mov     (%edx), %eax            # load prev nlr_top
     mov     %eax, NLR_TOP           # store nlr_top (to unlink list)
+#else
+    call    mp_thread_get_state     # get mp_state_thread ptr into eax
+    mov     NLR_TOP_TH_OFF(%eax), %edx # get thread.nlr_top (last nlr_buf)
+    test    %edx, %edx              # check for nlr_top being NULL
+#if defined(NLR_OS_WINDOWS)
+    je      _nlr_jump_fail          # fail if nlr_top is NULL
+#else
+    je      nlr_jump_fail           # fail if nlr_top is NULL
+#endif
+    mov     4(%esp), %ecx           # load return value
+    mov     %ecx, 4(%edx)           # store return value
+    mov     (%edx), %ecx            # load prev nlr_top
+    mov     %ecx, NLR_TOP_TH_OFF(%eax) # store nlr_top (to unlink list)
+#endif
+
     mov     28(%edx), %esi          # load saved %si
     mov     24(%edx), %edi          # load saved %di
     mov     20(%edx), %ebx          # load saved %bx
diff --git a/py/objmodule.c b/py/objmodule.c
index 6d4b47958c965e6c34054d9501c1e66d3bf9a0fb..dc2ce787b4d7f73bb2da3ccb5ee033d8e869766c 100644
--- a/py/objmodule.c
+++ b/py/objmodule.c
@@ -160,6 +160,9 @@ STATIC const mp_rom_map_elem_t mp_builtin_module_table[] = {
 #if MICROPY_PY_GC && MICROPY_ENABLE_GC
     { MP_ROM_QSTR(MP_QSTR_gc), MP_ROM_PTR(&mp_module_gc) },
 #endif
+#if MICROPY_PY_THREAD
+    { MP_ROM_QSTR(MP_QSTR__thread), MP_ROM_PTR(&mp_module_thread) },
+#endif
 
     // extmod modules
 
diff --git a/py/py.mk b/py/py.mk
index 984519baa388afcd899915cbd1bac61329f8229a..16622f43ee6e3ce75494a393bab32fe3a280532b 100644
--- a/py/py.mk
+++ b/py/py.mk
@@ -186,6 +186,7 @@ PY_O_BASENAME = \
 	modstruct.o \
 	modsys.o \
 	moduerrno.o \
+	modthread.o \
 	vm.o \
 	bc.o \
 	showbc.o \