diff --git a/components/st3m/CMakeLists.txt b/components/st3m/CMakeLists.txt
index a2531db558ec53bb2ed8215482faec91435b5fb0..f651a48db589bb3eea551ef5cfbe0716bf8e72f9 100644
--- a/components/st3m/CMakeLists.txt
+++ b/components/st3m/CMakeLists.txt
@@ -44,6 +44,7 @@ idf_component_register(
         audio_mod
         audio_mp3
         video_mpeg
+        bl00mbox
 )
 
 idf_component_get_property(tusb_lib tinyusb COMPONENT_LIB)
diff --git a/components/st3m/st3m_audio.c b/components/st3m/st3m_audio.c
index 4d8805dbc5648cffda900bf841fcd7041d4154f6..41a607b656c520498f4460454b04ed0304dd991c 100644
--- a/components/st3m/st3m_audio.c
+++ b/components/st3m/st3m_audio.c
@@ -13,8 +13,47 @@
 #include "freertos/semphr.h"
 #include "freertos/task.h"
 
+#include "bl00mbox.h"
+#include "st3m_media.h"
+
 static const char *TAG = "st3m-audio";
 
+// TODO: clean up
+static void bl00mbox_init_wrapper(uint32_t sample_rate, uint16_t max_len) {
+    bl00mbox_init();
+}
+static bool bl00mbox_audio_render_wrapper(int16_t *rx, int16_t *tx,
+                                          uint16_t len) {
+    bl00mbox_audio_render(rx, tx, len);
+    return true;
+}
+
+/* You can add your own audio engine here by simply adding a valid struct to
+ * this list! For details about the fields check out st3m_audio.h.
+ */
+
+static const st3m_audio_engine_t engines[] = {
+    {
+        .name = "bl00mbox",
+        .render_fun = bl00mbox_audio_render_wrapper,
+        .init_fun = bl00mbox_init_wrapper,
+    },
+    {
+        .name = "media_audio",
+        .render_fun = st3m_media_audio_render,
+        .init_fun = NULL,
+    }
+};
+
+static const uint8_t num_engines =
+    (sizeof(engines)) / (sizeof(st3m_audio_engine_t));
+
+typedef struct {
+    int32_t volume;
+    bool mute;
+    bool active;  // whether the engine has been filling tx in the last run
+} _engine_data_t;
+
 #define TIMEOUT_MS 1000
 
 static void _audio_player_task(void *data);
@@ -203,13 +242,12 @@ typedef struct {
     st3m_audio_input_source_t thru_target_source;
     st3m_audio_input_source_t source;
 
+    _engine_data_t *engines_data;
+
     // Software-based audio pipe settings.
     int32_t input_thru_vol;
     int32_t input_thru_vol_int;
     bool input_thru_mute;
-
-    // Main player function callback.
-    st3m_audio_player_function_t function;
 } st3m_audio_state_t;
 
 SemaphoreHandle_t state_mutex;
@@ -265,7 +303,6 @@ static st3m_audio_state_t state = {
     .thru_source = st3m_audio_input_source_none,
     .thru_target_source = st3m_audio_input_source_none,
     .source = st3m_audio_input_source_none,
-    .function = st3m_audio_player_function_dummy,
 };
 
 // Returns whether we should be outputting audio through headphones. If not,
@@ -480,18 +517,29 @@ static void _update_routing() {
     _update_thru_source();
 }
 
-void st3m_audio_player_function_dummy(int16_t *rx, int16_t *tx, uint16_t len) {
-    for (uint16_t i = 0; i < len; i++) {
-        tx[i] = 0;
-    }
-}
-
 void st3m_audio_init(void) {
     state_mutex = xSemaphoreCreateRecursiveMutex();
     assert(state_mutex != NULL);
-    state.function = st3m_audio_player_function_dummy;
 
     flow3r_bsp_audio_init();
+    {
+        _engine_data_t *tmp = malloc(sizeof(_engine_data_t) * num_engines);
+        LOCK;
+        state.engines_data = tmp;
+        UNLOCK;
+    }
+
+    for (uint8_t i = 0; i < num_engines; i++) {
+        LOCK;
+        state.engines_data[i].volume = 4096;
+        state.engines_data[i].mute = false;
+        state.engines_data[i].active = false;  // is ignored by engine anyways
+        UNLOCK;
+        if (engines[i].init_fun != NULL) {
+            (*engines[i].init_fun)(FLOW3R_BSP_AUDIO_SAMPLE_RATE,
+                                   FLOW3R_BSP_AUDIO_DMA_BUFFER_SIZE);
+        }
+    }
 
     _update_routing();
     _output_apply(&state.speaker);
@@ -511,11 +559,13 @@ static void _audio_player_task(void *data) {
     int16_t buffer_tx[FLOW3R_BSP_AUDIO_DMA_BUFFER_SIZE * 2];
     int16_t buffer_rx[FLOW3R_BSP_AUDIO_DMA_BUFFER_SIZE * 2];
     int16_t buffer_rx_dummy[FLOW3R_BSP_AUDIO_DMA_BUFFER_SIZE * 2];
+    int32_t output_acc[FLOW3R_BSP_AUDIO_DMA_BUFFER_SIZE * 2];
+    int16_t engine_tx[FLOW3R_BSP_AUDIO_DMA_BUFFER_SIZE * 2];
     memset(buffer_tx, 0, sizeof(buffer_tx));
     memset(buffer_rx, 0, sizeof(buffer_rx));
-    memset(buffer_rx_dummy, 0, sizeof(buffer_rx));
-    size_t count;
+    memset(buffer_rx_dummy, 0, sizeof(buffer_rx_dummy));
 
+    size_t count;
     st3m_audio_input_source_t source_prev = st3m_audio_input_source_none;
 
     while (true) {
@@ -532,12 +582,19 @@ static void _audio_player_task(void *data) {
             continue;
         }
 
+        int32_t engines_vol[num_engines];
+        bool engines_mute[num_engines];
+        bool engines_active[num_engines];
+
         LOCK;
+        for (uint8_t e = 0; e < num_engines; e++) {
+            engines_vol[e] = state.engines_data[e].volume;
+            engines_mute[e] = state.engines_data[e].mute;
+        }
         st3m_audio_input_source_t source = state.source;
         st3m_audio_input_source_t engine_source = state.engine_source;
         st3m_audio_input_source_t thru_source = state.thru_source;
         bool headphones = _headphones_connected();
-        st3m_audio_player_function_t function = state.function;
         int32_t software_volume = headphones ? state.headphones.volume_software
                                              : state.speaker.volume_software;
         bool input_thru_mute = state.input_thru_mute;
@@ -552,21 +609,19 @@ static void _audio_player_task(void *data) {
             // state change: throw away buffer
             source_prev = source;
             memset(buffer_rx, 0, sizeof(buffer_rx));
-        } else if (source == st3m_audio_input_source_headset_mic) {
-            // headset has its own gain thing going on, leave at unity
-            rx_chan = 1;
-        } else if (source == st3m_audio_input_source_line_in) {
-            LOCK;
-            int16_t gain = state.line_in_gain_software;
-            UNLOCK;
-            rx_gain = gain;
-            rx_chan = 0;
-        } else if (source == st3m_audio_input_source_onboard_mic) {
+        } else {
             LOCK;
-            int16_t gain = state.onboard_mic_gain_software;
+            if (source == st3m_audio_input_source_headset_mic) {
+                rx_gain = state.headset_mic_gain_software;
+                rx_chan = 0;  // not sure, don't have one here, need to test
+            } else if (source == st3m_audio_input_source_line_in) {
+                rx_gain = state.line_in_gain_software;
+                rx_chan = 0;
+            } else if (source == st3m_audio_input_source_onboard_mic) {
+                rx_gain = state.onboard_mic_gain_software;
+                rx_chan = 1;
+            }
             UNLOCK;
-            rx_gain = gain;
-            rx_chan = 1;
         }
 
         if (rx_chan == 0) {
@@ -592,33 +647,67 @@ static void _audio_player_task(void *data) {
         } else {
             engine_rx = buffer_rx;
         }
+        // </RX SIGNAL PREPROCESSING>
+
+        // <ACCUMULATING ENGINES>
+
+        bool output_acc_uninit = true;
+        for (uint8_t e = 0; e < num_engines; e++) {
+            // always run function even when muted, else the engine
+            // might suffer from being deprived of the passage of time
+            engines_active[e] = (*engines[e].render_fun)(
+                engine_rx, engine_tx, FLOW3R_BSP_AUDIO_DMA_BUFFER_SIZE * 2);
+            if ((!engines_active[e]) || (!engines_vol[e]) || engines_mute[e])
+                continue;
+            if (output_acc_uninit) {
+                for (uint16_t i = 0; i < FLOW3R_BSP_AUDIO_DMA_BUFFER_SIZE * 2;
+                     i++) {
+                    output_acc[i] = (engine_tx[i] * engines_vol[e]) >> 12;
+                }
+            } else {
+                for (uint16_t i = 0; i < FLOW3R_BSP_AUDIO_DMA_BUFFER_SIZE * 2;
+                     i++) {
+                    output_acc[i] += (engine_tx[i] * engines_vol[e]) >> 12;
+                }
+            }
+            output_acc_uninit = false;
+        }
+        if (output_acc_uninit) {
+            memset(output_acc, 0, sizeof(output_acc));
+        }
 
-        // <ACTUAL ENGINE CALL>
+        LOCK;
+        for (uint8_t e = 0; e < num_engines; e++) {
+            state.engines_data[e].active = engines_active[e];
+        }
+        UNLOCK;
 
-        (*function)(engine_rx, buffer_tx, FLOW3R_BSP_AUDIO_DMA_BUFFER_SIZE * 2);
+        // </ACCUMULATING ENGINES>
 
-        // </ACTUAL ENGINE CALL>
+        // <VOLUME AND THRU>
 
         for (uint16_t i = 0; i < FLOW3R_BSP_AUDIO_DMA_BUFFER_SIZE; i++) {
-            st3m_scope_write(
-                (buffer_tx[2 * i] + (uint32_t)buffer_tx[2 * i + 1]) >> 3);
+            st3m_scope_write((output_acc[2 * i] + output_acc[2 * i + 1]) >> 3);
         }
 
         for (int i = 0; i < (FLOW3R_BSP_AUDIO_DMA_BUFFER_SIZE * 2); i += 1) {
-            int32_t acc = buffer_tx[i];
-
             if ((thru_source != st3m_audio_input_source_none) &&
                 ((engine_source == thru_source) ||
                  (engine_source == st3m_audio_input_source_none)) &&
                 (!input_thru_mute)) {
-                acc += (buffer_rx[i] * input_thru_vol_int) >> 15;
+                output_acc[i] += (buffer_rx[i] * input_thru_vol_int) >> 15;
             }
 
-            acc = (acc * software_volume) >> 15;
+            output_acc[i] = (output_acc[i] * software_volume) >> 15;
+
+            if (output_acc[i] > 32767) output_acc[i] = 32767;
+            if (output_acc[i] < -32767) output_acc[i] = -32767;
 
-            buffer_tx[i] = acc;
+            buffer_tx[i] = output_acc[i];
         }
 
+        // </VOLUME AND THRU>
+
         flow3r_bsp_audio_write(buffer_tx, sizeof(buffer_tx), &count, 1000);
         if (count != sizeof(buffer_tx)) {
             ESP_LOGE(TAG, "audio_write: count (%d) != length (%d)\n", count,
@@ -782,12 +871,6 @@ float st3m_audio_input_thru_set_volume_dB(float vol_dB) {
     return vol_dB;
 }
 
-void st3m_audio_set_player_function(st3m_audio_player_function_t fun) {
-    LOCK;
-    state.function = fun;
-    UNLOCK;
-}
-
 bool st3m_audio_headphones_are_connected(void) {
     LOCK;
     bool res = _headphones_connected();
diff --git a/components/st3m/st3m_audio.h b/components/st3m/st3m_audio.h
index b8bb4bd8ef4adffcf3a25a4bb9cee9f7dc6694f8..28d38ecfe46edbdbdb3a3428cf2e0b7c53b98967 100644
--- a/components/st3m/st3m_audio.h
+++ b/components/st3m/st3m_audio.h
@@ -16,20 +16,47 @@ typedef enum {
     st3m_audio_input_source_auto = 4
 } st3m_audio_input_source_t;
 
-typedef void (*st3m_audio_player_function_t)(int16_t* tx, int16_t* rx,
-                                             uint16_t len);
-
-/* The default audio task takes a function of prototype
- * &st3m_audio_player_function_t, loops it and sets software volume/adds
- * software thru. tx is the stereo zipped l/r output, rx is the stereo zipped
- * input, each buffer the size of len.
+/* Initializes the audio engine and passes sample rate as well as max buffer
+ * length. At this point those values are always 48000/128, but this might
+ * become variable in the future. However, we see flow3r primarily as a real
+ * time instrument, and longer buffers introduce latency; the current buffer
+ * length corresponds to 1.3ms latency which isn't much, but given the up to
+ * 10ms captouch latency on top we shouldn't be super careless here.
  */
-void st3m_audio_set_player_function(st3m_audio_player_function_t fun);
-
-/* Dummy for st3m_audio_set_player_function that just writes zeros to the
- * output. Default state.
+typedef void (*st3m_audio_engine_init_function_t)(uint32_t sample_rate,
+                                                  uint16_t max_len);
+
+/* Renders the output of the audio engine and returns whether or not it has
+ * overwritten tx. Always called for each buffer, no exceptions. This means you
+ * can keep track of time within the engine easily and use the audio player task
+ * to handle musical events (the current 1.3ms buffer rate is well paced for
+ * this), but it also puts the burden on you of exiting early if there's nothing
+ * to do.
+ *
+ * rx (input) and tx (output) are both stereo interlaced, i.e. the even indices
+ * represent the left channel, the odd indices represent the right channel. The
+ * length is the total length of the buffer so that each channel has len/2 data
+ * points. len is always even.
+ *
+ * The function must never modify rx. This is so that we can pass the same
+ * buffer to all the engines without having to memcpy by default, so if you need
+ * to modify rx please do your own memcpy of it.
+ *
+ * In a similar manner, tx is not cleaned up when calling the function, it
+ * carries random junk data that is not supposed to be read by the user. The
+ * return value indicates whether tx should be used or if tx should be ignored
+ * andit should be treated as if you had written all zeroes into it (without you
+ * actually doing so). If you choose to return true please make sure you have
+ * overwritten the entirety of tx with valid data.
  */
-void st3m_audio_player_function_dummy(int16_t* rx, int16_t* tx, uint16_t len);
+typedef bool (*st3m_audio_engine_render_function_t)(int16_t* rx, int16_t* tx,
+                                                    uint16_t len);
+
+typedef struct {
+    char* name;  // used for UI, no longer than 14 characters
+    st3m_audio_engine_render_function_t render_fun;
+    st3m_audio_engine_init_function_t init_fun;  // optional, else NULL
+} st3m_audio_engine_t;
 
 /* Initializes I2S bus, the audio task and required data structures.
  * Expects an initialized I2C bus, will fail ungracefully otherwise (TODO).
diff --git a/components/st3m/st3m_media.c b/components/st3m/st3m_media.c
index 5538212f7a0462cbffe5593c4166c2e298b012f8..d40b9b35612f0a8a2d5012ff170a1fac5a76891a 100644
--- a/components/st3m/st3m_media.c
+++ b/components/st3m/st3m_media.c
@@ -17,13 +17,10 @@
 static st3m_media *audio_media = NULL;
 static TaskHandle_t media_task;
 static bool media_pending_destroy = false;
+static bool audio_inactive = true;
 
 static int16_t *audio_buffer = NULL;
 
-// XXX : it would be better to be able to push and pop the
-//       st3m_audio_player_function
-void bl00mbox_audio_render(int16_t *rx, int16_t *tx, uint16_t len);
-
 static inline int16_t mix_and_clip(int16_t a, int16_t b, int16_t gain) {
     if (a == 0 && gain == 4096) return b;
     int32_t val = a;
@@ -36,20 +33,20 @@ static inline int16_t mix_and_clip(int16_t a, int16_t b, int16_t gain) {
     return val;
 }
 
-void st3m_media_audio_render(int16_t *rx, int16_t *tx, uint16_t len) {
-    bl00mbox_audio_render(rx, tx, len);
-    if (!audio_media) return;
+bool st3m_media_audio_render(int16_t *rx, int16_t *tx, uint16_t len) {
+    if (audio_inactive || (!audio_media)) return false;
     for (int i = 0; i < len; i++) {
         if ((audio_media->audio_r + 1 != audio_media->audio_w) &&
             (audio_media->audio_r + 1 - AUDIO_BUF_SIZE !=
              audio_media->audio_w)) {
             tx[i] = mix_and_clip(
-                tx[i], audio_media->audio_buffer[audio_media->audio_r++],
+                0, audio_media->audio_buffer[audio_media->audio_r++],
                 audio_media->volume);
             if (audio_media->audio_r >= AUDIO_BUF_SIZE)
                 audio_media->audio_r = 0;
         }
     }
+    return true;
 }
 int st3m_media_samples_queued(void) {
     if (!audio_media) return 0;
@@ -70,7 +67,7 @@ static void st3m_media_task(void *_arg) {
     }
     if (audio_media->destroy) audio_media->destroy(audio_media);
     audio_media = 0;
-    st3m_audio_set_player_function(bl00mbox_audio_render);
+    audio_inactive = true;
     if (audio_buffer) {
         free(audio_buffer);
         audio_buffer = NULL;
@@ -241,7 +238,7 @@ int st3m_media_load(const char *path) {
 
     if (!audio_buffer)
         audio_buffer = heap_caps_malloc(AUDIO_BUF_SIZE * 2, MALLOC_CAP_DMA);
-    st3m_audio_set_player_function(st3m_media_audio_render);
+    audio_inactive = false;
     audio_media->audio_buffer = audio_buffer;
     audio_media->audio_r = 0;
     audio_media->audio_w = 1;
diff --git a/components/st3m/st3m_media.h b/components/st3m/st3m_media.h
index a0286395f2e506b89d855ef0a956155aaa7feeed..3254096355bff99fee3d3ebf56059b9fe69c1c4c 100644
--- a/components/st3m/st3m_media.h
+++ b/components/st3m/st3m_media.h
@@ -106,3 +106,5 @@ void st3m_media_set(const char *key, float value);
 // API for use in implementations
 // query how manu audio samples have been queued in the pcm output buffer
 int st3m_media_samples_queued(void);
+
+bool st3m_media_audio_render(int16_t *rx, int16_t *tx, uint16_t len);
diff --git a/docs/api/audio.rst b/docs/api/audio.rst
index a53c4d211bf7e3b5475b155ab8dedfa95bcf97d2..6df70e7393406490ec9fe9f9ce72b2e7e4dcc7ad 100644
--- a/docs/api/audio.rst
+++ b/docs/api/audio.rst
@@ -4,7 +4,9 @@
 ================
 
 The audio module provides the backbone for handling basic audio bookkeeping such as volume and signal routing.
-Actual sound is created by the engines, i.e. bl00mbox and media player at the moment.
+Actual sound is created by the engines, i.e. bl00mbox and media player at the moment. If you wish to add your
+own C-based sound engine to flow3r, please check out the framework we have set up for that in
+``components/st3m/st3m_audio.*``.
 
 Jack Detection
 --------------
diff --git a/main/main.c b/main/main.c
index 8d9fb86a8d9f739f3f0a5d9625767d529359d109..c08bf1c774066a69c65ace5e1e71b68f90ba92c7 100644
--- a/main/main.c
+++ b/main/main.c
@@ -1,4 +1,3 @@
-#include "bl00mbox.h"
 #include "flow3r_bsp.h"
 #include "st3m_audio.h"
 #include "st3m_badgenet.h"
@@ -113,8 +112,6 @@ void flow3r_startup(void) {
 
     st3m_scope_init();
     st3m_audio_init();
-    bl00mbox_init();
-    st3m_audio_set_player_function(bl00mbox_audio_render);
     st3m_badgenet_init();
 
     st3m_mode_set(st3m_mode_kind_starting, "micropython");
diff --git a/recovery/components/bl00mbox/CMakeLists.txt b/recovery/components/bl00mbox/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..2c66391aecb94ffea0d24a936014edec561044c5
--- /dev/null
+++ b/recovery/components/bl00mbox/CMakeLists.txt
@@ -0,0 +1,5 @@
+idf_component_register(
+    SRCS
+    INCLUDE_DIRS
+        .
+)
diff --git a/recovery/components/bl00mbox/bl00mbox.h b/recovery/components/bl00mbox/bl00mbox.h
new file mode 100644
index 0000000000000000000000000000000000000000..e2481c4c0f229000da1c6e8c43e75530de022e24
--- /dev/null
+++ b/recovery/components/bl00mbox/bl00mbox.h
@@ -0,0 +1,2 @@
+void bl00mbox_init(void);
+void bl00mbox_audio_render(int16_t *rx, int16_t *tx, uint16_t len);