From 05a4859585c4e0a55fca2e7467ba70da6453fdcb Mon Sep 17 00:00:00 2001
From: Damien George <damien.p.george@gmail.com>
Date: Mon, 6 Feb 2017 15:13:30 +1100
Subject: [PATCH] stmhal: Implement a proper thread scheduler.

This patch changes the threading implementation from simple round-robin
with busy waits on mutexs, to proper scheduling whereby threads that are
waiting on a mutex are only scheduled when the mutex becomes available.
---
 stmhal/main.c         |   4 +
 stmhal/modmachine.c   |   5 ++
 stmhal/mpconfigport.h |  14 ++++
 stmhal/mpthreadport.c |  35 +--------
 stmhal/mpthreadport.h |  14 +++-
 stmhal/pybthread.c    | 179 ++++++++++++++++++++++++++++++++++++------
 stmhal/pybthread.h    |  26 ++++--
 stmhal/stm32_it.c     |  12 ++-
 stmhal/systick.c      |  10 +++
 9 files changed, 236 insertions(+), 63 deletions(-)

diff --git a/stmhal/main.c b/stmhal/main.c
index 7bfdc52c3..bcc429df2 100644
--- a/stmhal/main.c
+++ b/stmhal/main.c
@@ -695,6 +695,10 @@ soft_reset_exit:
     can_deinit();
 #endif
 
+    #if MICROPY_PY_THREAD
+    pyb_thread_deinit();
+    #endif
+
     first_soft_reset = false;
     goto soft_reset;
 }
diff --git a/stmhal/modmachine.c b/stmhal/modmachine.c
index 4a3fe1ec9..16c50079d 100644
--- a/stmhal/modmachine.c
+++ b/stmhal/modmachine.c
@@ -41,6 +41,7 @@
 #include "extmod/vfs_fat.h"
 #include "gccollect.h"
 #include "irq.h"
+#include "pybthread.h"
 #include "rng.h"
 #include "storage.h"
 #include "pin.h"
@@ -159,6 +160,10 @@ STATIC mp_obj_t machine_info(mp_uint_t n_args, const mp_obj_t *args) {
         }
     }
 
+    #if MICROPY_PY_THREAD
+    pyb_thread_dump();
+    #endif
+
     if (n_args == 1) {
         // arg given means dump gc allocation table
         gc_dump_alloc_table();
diff --git a/stmhal/mpconfigport.h b/stmhal/mpconfigport.h
index 5828f07c7..38a5ac5da 100644
--- a/stmhal/mpconfigport.h
+++ b/stmhal/mpconfigport.h
@@ -299,7 +299,21 @@ static inline mp_uint_t disable_irq(void) {
 
 #define MICROPY_BEGIN_ATOMIC_SECTION()     disable_irq()
 #define MICROPY_END_ATOMIC_SECTION(state)  enable_irq(state)
+
+#if MICROPY_PY_THREAD
+#define MICROPY_EVENT_POLL_HOOK \
+    do { \
+        if (pyb_thread_enabled) { \
+            MP_THREAD_GIL_EXIT(); \
+            pyb_thread_yield(); \
+            MP_THREAD_GIL_ENTER(); \
+        } else { \
+            __WFI(); \
+        } \
+    } while (0);
+#else
 #define MICROPY_EVENT_POLL_HOOK            __WFI();
+#endif
 
 // There is no classical C heap in bare-metal ports, only Python
 // garbage-collected heap. For completeness, emulate C heap via
diff --git a/stmhal/mpthreadport.c b/stmhal/mpthreadport.c
index 97c19647c..d7c5b569b 100644
--- a/stmhal/mpthreadport.c
+++ b/stmhal/mpthreadport.c
@@ -44,15 +44,13 @@ void mp_thread_init(void) {
 
 void mp_thread_gc_others(void) {
     mp_thread_mutex_lock(&thread_mutex, 1);
-    gc_collect_root((void**)&pyb_thread_cur, 1);
-    for (pyb_thread_t *th = pyb_thread_cur;; th = th->next) {
+    for (pyb_thread_t *th = pyb_thread_all; th != NULL; th = th->all_next) {
+        gc_collect_root((void**)&th, 1);
         gc_collect_root(&th->arg, 1);
+        gc_collect_root(&th->stack, 1);
         if (th != pyb_thread_cur) {
             gc_collect_root(th->stack, th->stack_len);
         }
-        if (th->next == pyb_thread_cur) {
-            break;
-        }
     }
     mp_thread_mutex_unlock(&thread_mutex);
 }
@@ -93,31 +91,4 @@ void mp_thread_start(void) {
 void mp_thread_finish(void) {
 }
 
-void mp_thread_mutex_init(mp_thread_mutex_t *mutex) {
-    *mutex = 0;
-}
-
-int mp_thread_mutex_lock(mp_thread_mutex_t *mutex, int wait) {
-    uint32_t irq_state = disable_irq();
-    if (*mutex) {
-        // mutex is locked
-        if (!wait) {
-            enable_irq(irq_state);
-            return 0; // failed to lock mutex
-        }
-        while (*mutex) {
-            enable_irq(irq_state);
-            pyb_thread_yield();
-            irq_state = disable_irq();
-        }
-    }
-    *mutex = 1;
-    enable_irq(irq_state);
-    return 1; // have mutex
-}
-
-void mp_thread_mutex_unlock(mp_thread_mutex_t *mutex) {
-    *mutex = 0;
-}
-
 #endif // MICROPY_PY_THREAD
diff --git a/stmhal/mpthreadport.h b/stmhal/mpthreadport.h
index 4fef323eb..3d8b4ef01 100644
--- a/stmhal/mpthreadport.h
+++ b/stmhal/mpthreadport.h
@@ -29,7 +29,7 @@
 #include "py/mpthread.h"
 #include "pybthread.h"
 
-typedef uint32_t mp_thread_mutex_t;
+typedef pyb_mutex_t mp_thread_mutex_t;
 
 void mp_thread_init(void);
 void mp_thread_gc_others(void);
@@ -42,4 +42,16 @@ static inline struct _mp_state_thread_t *mp_thread_get_state(void) {
     return pyb_thread_get_local();
 }
 
+static inline void mp_thread_mutex_init(mp_thread_mutex_t *m) {
+    pyb_mutex_init(m);
+}
+
+static inline int mp_thread_mutex_lock(mp_thread_mutex_t *m, int wait) {
+    return pyb_mutex_lock(m, wait);
+}
+
+static inline void mp_thread_mutex_unlock(mp_thread_mutex_t *m) {
+    pyb_mutex_unlock(m);
+}
+
 #endif // __MICROPY_INCLUDED_STMHAL_MPTHREADPORT_H__
diff --git a/stmhal/pybthread.c b/stmhal/pybthread.c
index 9f9f82a45..51e9a738d 100644
--- a/stmhal/pybthread.c
+++ b/stmhal/pybthread.c
@@ -34,29 +34,80 @@
 
 #if MICROPY_PY_THREAD
 
-int pyb_thread_enabled;
-pyb_thread_t *pyb_thread_cur;
+#define PYB_MUTEX_UNLOCKED ((void*)0)
+#define PYB_MUTEX_LOCKED ((void*)1)
+
+extern void __fatal_error(const char*);
+
+volatile int pyb_thread_enabled;
+pyb_thread_t *volatile pyb_thread_all;
+pyb_thread_t *volatile pyb_thread_cur;
+
+static inline void pyb_thread_add_to_runable(pyb_thread_t *thread) {
+    thread->run_prev = pyb_thread_cur->run_prev;
+    thread->run_next = pyb_thread_cur;
+    pyb_thread_cur->run_prev->run_next = thread;
+    pyb_thread_cur->run_prev = thread;
+}
+
+static inline void pyb_thread_remove_from_runable(pyb_thread_t *thread) {
+    if (thread->run_next == thread) {
+        __fatal_error("deadlock");
+    }
+    thread->run_prev->run_next = thread->run_next;
+    thread->run_next->run_prev = thread->run_prev;
+}
 
 void pyb_thread_init(pyb_thread_t *thread) {
+    pyb_thread_enabled = 0;
+    pyb_thread_all = thread;
     pyb_thread_cur = thread;
-    pyb_thread_cur->sp = NULL; // will be set when this thread switches out
-    pyb_thread_cur->local_state = 0; // will be set by mp_thread_init
-    pyb_thread_cur->arg = NULL;
-    pyb_thread_cur->stack = &_heap_end;
-    pyb_thread_cur->stack_len = ((uint32_t)&_estack - (uint32_t)&_heap_end) / sizeof(uint32_t);
-    pyb_thread_cur->prev = thread;
-    pyb_thread_cur->next = thread;
+    thread->sp = NULL; // will be set when this thread switches out
+    thread->local_state = 0; // will be set by mp_thread_init
+    thread->arg = NULL;
+    thread->stack = &_heap_end;
+    thread->stack_len = ((uint32_t)&_estack - (uint32_t)&_heap_end) / sizeof(uint32_t);
+    thread->all_next = NULL;
+    thread->run_prev = thread;
+    thread->run_next = thread;
+    thread->queue_next = NULL;
+}
+
+void pyb_thread_deinit() {
+    uint32_t irq_state = disable_irq();
+    pyb_thread_enabled = 0;
+    pyb_thread_all = pyb_thread_cur;
+    pyb_thread_cur->all_next = NULL;
+    pyb_thread_cur->run_prev = pyb_thread_cur;
+    pyb_thread_cur->run_next = pyb_thread_cur;
+    enable_irq(irq_state);
 }
 
 STATIC void pyb_thread_terminate(void) {
-    uint32_t irq_state = raise_irq_pri(IRQ_PRI_PENDSV);
-    pyb_thread_cur->prev->next = pyb_thread_cur->next;
-    pyb_thread_cur->next->prev = pyb_thread_cur->prev;
-    if (pyb_thread_cur->next == pyb_thread_cur->prev) {
+    uint32_t irq_state = disable_irq();
+    pyb_thread_t *thread = pyb_thread_cur;
+    // take current thread off the run list
+    pyb_thread_remove_from_runable(thread);
+    // take current thread off the list of all threads
+    for (pyb_thread_t **n = (pyb_thread_t**)&pyb_thread_all;; n = &(*n)->all_next) {
+        if (*n == thread) {
+            *n = thread->all_next;
+            break;
+        }
+    }
+    // clean pointers as much as possible to help GC
+    thread->all_next = NULL;
+    thread->queue_next = NULL;
+    thread->stack = NULL;
+    if (pyb_thread_all->all_next == NULL) {
+        // only 1 thread left
         pyb_thread_enabled = 0;
     }
-    restore_irq_pri(irq_state);
-    pyb_thread_yield(); // should not return
+    // thread switch will occur after we enable irqs
+    SCB->ICSR = SCB_ICSR_PENDSVSET_Msk;
+    enable_irq(irq_state);
+    // should not return
+    __fatal_error("could not terminate");
 }
 
 uint32_t pyb_thread_new(pyb_thread_t *thread, void *stack, size_t stack_len, void *entry, void *arg) {
@@ -77,21 +128,105 @@ uint32_t pyb_thread_new(pyb_thread_t *thread, void *stack, size_t stack_len, voi
     thread->arg = arg;
     thread->stack = stack;
     thread->stack_len = stack_len;
-    uint32_t irq_state = raise_irq_pri(IRQ_PRI_PENDSV);
+    thread->queue_next = NULL;
+    uint32_t irq_state = disable_irq();
     pyb_thread_enabled = 1;
-    thread->next = pyb_thread_cur->next;
-    thread->prev = pyb_thread_cur;
-    pyb_thread_cur->next->prev = thread;
-    pyb_thread_cur->next = thread;
-    restore_irq_pri(irq_state);
+    thread->all_next = pyb_thread_all;
+    pyb_thread_all = thread;
+    pyb_thread_add_to_runable(thread);
+    enable_irq(irq_state);
     return (uint32_t)thread; // success
 }
 
+void pyb_thread_dump(void) {
+    if (!pyb_thread_enabled) {
+        printf("THREAD: only main thread\n");
+    } else {
+        printf("THREAD:\n");
+        for (pyb_thread_t *th = pyb_thread_all; th != NULL; th = th->all_next) {
+            bool runable = false;
+            for (pyb_thread_t *th2 = pyb_thread_cur;; th2 = th2->run_next) {
+                if (th == th2) {
+                    runable = true;
+                    break;
+                }
+                if (th2->run_next == pyb_thread_cur) {
+                    break;
+                }
+            }
+            printf("    id=%p sp=%p sz=%u", th, th->stack, th->stack_len);
+            if (runable) {
+                printf(" (runable)");
+            }
+            printf("\n");
+        }
+    }
+}
+
 // should only be called from pendsv_isr_handler
 void *pyb_thread_next(void *sp) {
     pyb_thread_cur->sp = sp;
-    pyb_thread_cur = pyb_thread_cur->next;
+    pyb_thread_cur = pyb_thread_cur->run_next;
+    pyb_thread_cur->timeslice = 4; // in milliseconds
     return pyb_thread_cur->sp;
 }
 
+void pyb_mutex_init(pyb_mutex_t *m) {
+    *m = PYB_MUTEX_UNLOCKED;
+}
+
+int pyb_mutex_lock(pyb_mutex_t *m, int wait) {
+    uint32_t irq_state = disable_irq();
+    if (*m == PYB_MUTEX_UNLOCKED) {
+        // mutex is available
+        *m = PYB_MUTEX_LOCKED;
+        enable_irq(irq_state);
+    } else {
+        // mutex is locked
+        if (!wait) {
+            enable_irq(irq_state);
+            return 0; // failed to lock mutex
+        }
+        if (*m == PYB_MUTEX_LOCKED) {
+            *m = pyb_thread_cur;
+        } else {
+            for (pyb_thread_t *n = *m;; n = n->queue_next) {
+                if (n->queue_next == NULL) {
+                    n->queue_next = pyb_thread_cur;
+                    break;
+                }
+            }
+        }
+        pyb_thread_cur->queue_next = NULL;
+        // take current thread off the run list
+        pyb_thread_remove_from_runable(pyb_thread_cur);
+        // thread switch will occur after we enable irqs
+        SCB->ICSR = SCB_ICSR_PENDSVSET_Msk;
+        enable_irq(irq_state);
+        // when we come back we have the mutex
+    }
+    return 1; // have mutex
+}
+
+void pyb_mutex_unlock(pyb_mutex_t *m) {
+    uint32_t irq_state = disable_irq();
+    if (*m == PYB_MUTEX_LOCKED) {
+        // no threads are blocked on the mutex
+        *m = PYB_MUTEX_UNLOCKED;
+    } else {
+        // at least one thread is blocked on this mutex
+        pyb_thread_t *th = *m;
+        if (th->queue_next == NULL) {
+            // no other threads are blocked
+            *m = PYB_MUTEX_LOCKED;
+        } else {
+            // at least one other thread is still blocked
+            *m = th->queue_next;
+        }
+        // put unblocked thread on runable list
+        pyb_thread_add_to_runable(th);
+    }
+    enable_irq(irq_state);
+}
+
 #endif // MICROPY_PY_THREAD
diff --git a/stmhal/pybthread.h b/stmhal/pybthread.h
index d4310c66a..6edb2400e 100644
--- a/stmhal/pybthread.h
+++ b/stmhal/pybthread.h
@@ -33,15 +33,23 @@ typedef struct _pyb_thread_t {
     void *arg;                  // thread Python args, a GC root pointer
     void *stack;                // pointer to the stack
     size_t stack_len;           // number of words in the stack
-    struct _pyb_thread_t *prev;
-    struct _pyb_thread_t *next;
+    uint32_t timeslice;
+    struct _pyb_thread_t *all_next;
+    struct _pyb_thread_t *run_prev;
+    struct _pyb_thread_t *run_next;
+    struct _pyb_thread_t *queue_next;
 } pyb_thread_t;
 
-extern int pyb_thread_enabled;
-extern pyb_thread_t *pyb_thread_cur;
+typedef pyb_thread_t *pyb_mutex_t;
+
+extern volatile int pyb_thread_enabled;
+extern pyb_thread_t *volatile pyb_thread_all;
+extern pyb_thread_t *volatile pyb_thread_cur;
 
 void pyb_thread_init(pyb_thread_t *th);
+void pyb_thread_deinit();
 uint32_t pyb_thread_new(pyb_thread_t *th, void *stack, size_t stack_len, void *entry, void *arg);
+void pyb_thread_dump(void);
 
 static inline uint32_t pyb_thread_get_id(void) {
     return (uint32_t)pyb_thread_cur;
@@ -56,7 +64,15 @@ static inline void *pyb_thread_get_local(void) {
 }
 
 static inline void pyb_thread_yield(void) {
-    SCB->ICSR = SCB_ICSR_PENDSVSET_Msk;
+    if (pyb_thread_cur->run_next == pyb_thread_cur) {
+        __WFI();
+    } else {
+        SCB->ICSR = SCB_ICSR_PENDSVSET_Msk;
+    }
 }
 
+void pyb_mutex_init(pyb_mutex_t *m);
+int pyb_mutex_lock(pyb_mutex_t *m, int wait);
+void pyb_mutex_unlock(pyb_mutex_t *m);
+
 #endif // MICROPY_INCLUDED_STMHAL_PYBTHREAD_H
diff --git a/stmhal/stm32_it.c b/stmhal/stm32_it.c
index 4152050a9..d8fcc59ce 100644
--- a/stmhal/stm32_it.c
+++ b/stmhal/stm32_it.c
@@ -70,6 +70,7 @@
 #include "stm32_it.h"
 #include STM32_HAL_H
 
+#include "py/mpstate.h"
 #include "py/obj.h"
 #include "py/mphal.h"
 #include "pendsv.h"
@@ -315,9 +316,14 @@ void SysTick_Handler(void) {
     }
 
     #if MICROPY_PY_THREAD
-    // signal a thread switch at 4ms=250Hz
-    if (pyb_thread_enabled && (uwTick & 0x03) == 0x03) {
-        SCB->ICSR = SCB_ICSR_PENDSVSET_Msk;
+    if (pyb_thread_enabled) {
+        if (pyb_thread_cur->timeslice == 0) {
+            if (pyb_thread_cur->run_next != pyb_thread_cur) {
+                SCB->ICSR = SCB_ICSR_PENDSVSET_Msk;
+            }
+        } else {
+            --pyb_thread_cur->timeslice;
+        }
     }
     #endif
 }
diff --git a/stmhal/systick.c b/stmhal/systick.c
index aed5b9690..ade05d74d 100644
--- a/stmhal/systick.c
+++ b/stmhal/systick.c
@@ -29,9 +29,11 @@
 #include "py/obj.h"
 #include "irq.h"
 #include "systick.h"
+#include "pybthread.h"
 
 // We provide our own version of HAL_Delay that calls __WFI while waiting, in
 // order to reduce power consumption.
+// Note: Upon entering this function we may or may not have the GIL.
 void HAL_Delay(uint32_t Delay) {
     if (query_irq() == IRQ_STATE_ENABLED) {
         // IRQs enabled, so can use systick counter to do the delay
@@ -40,7 +42,15 @@ void HAL_Delay(uint32_t Delay) {
         // Wraparound of tick is taken care of by 2's complement arithmetic.
         while (uwTick - start < Delay) {
             // Enter sleep mode, waiting for (at least) the SysTick interrupt.
+            #if MICROPY_PY_THREAD
+            if (pyb_thread_enabled) {
+                pyb_thread_yield();
+            } else {
+                __WFI();
+            }
+            #else
             __WFI();
+            #endif
         }
     } else {
         // IRQs disabled, so need to use a busy loop for the delay.
-- 
GitLab