diff --git a/components/st3m/st3m_gfx.c b/components/st3m/st3m_gfx.c
index cbe1e6463d38e703b84d2e301665c2c0b9e74bb1..fd0e131afb447e2d4ebe616c8cf4916959de7fef 100644
--- a/components/st3m/st3m_gfx.c
+++ b/components/st3m/st3m_gfx.c
@@ -76,7 +76,9 @@ static QueueHandle_t user_ctx_rastq = NULL;
 static st3m_counter_rate_t rast_rate;
 static TaskHandle_t graphics_task;
 
-//////////////////////////
+static int _st3m_gfx_low_latency = 0;
+
+///////////////////////////////////////////////////////
 
 static Ctx *st3m_ctx_int(st3m_gfx_mode mode) {
     if (mode == st3m_gfx_osd) {
@@ -84,13 +86,18 @@ static Ctx *st3m_ctx_int(st3m_gfx_mode mode) {
             _st3m_gfx_mode ? _st3m_gfx_mode : ST3M_GFX_DEFAULT_MODE;
         switch (set_mode) {
             case st3m_gfx_default:  // overriden
+            case st3m_gfx_low_latency:
             case st3m_gfx_16bpp:
             case st3m_gfx_16bpp_osd:
+            case st3m_gfx_16bpp_low_latency:
                 return fb_RGBA8_ctx;
             case st3m_gfx_32bpp:
             case st3m_gfx_32bpp_osd:
+            case st3m_gfx_32bpp_low_latency:
             case st3m_gfx_24bpp:
+            case st3m_gfx_24bpp_low_latency:
             case st3m_gfx_8bpp:
+            case st3m_gfx_8bpp_low_latency:
             case st3m_gfx_8bpp_osd:
             case st3m_gfx_osd:
             case st3m_gfx_4bpp:
@@ -149,16 +156,6 @@ void st3m_gfx_set_palette(uint8_t *pal_in, int count) {
     for (int i = 0; i < count * 3; i++) pal[i] = pal_in[i];
 }
 
-static void grayscale_palette(void) {
-    uint8_t pal[256 * 3];
-    for (int i = 0; i < 256; i++) {
-        pal[i * 3 + 0] = i;
-        pal[i * 3 + 1] = i;
-        pal[i * 3 + 2] = i;
-    }
-    st3m_gfx_set_palette(pal, 256);
-}
-
 static void ega_palette(void) {
     uint8_t pal[16 * 3];
     int idx = 0;
@@ -183,46 +180,36 @@ static void fire_palette(void) {
     st3m_gfx_set_palette(pal, 256);
 }
 
-static void ice_palette(void) {
-    uint8_t pal[256 * 3];
-    for (int i = 0; i < 256; i++) {
-        pal[i * 3 + 0] = (i / 255.0) * (i / 255.0) * (i / 255.0) * 255;
-        pal[i * 3 + 1] = (i / 255.0) * (i / 255.0) * 255;
-        pal[i * 3 + 2] = i;
-    }
-    st3m_gfx_set_palette(pal, 256);
-}
-
 void st3m_gfx_set_mode(st3m_gfx_mode mode) {
     if (mode == _st3m_gfx_mode) return;
 
     memset(fb, 0, sizeof(fb));
     memset(st3m_osd_fb, 0, sizeof(st3m_osd_fb));
 
-    if (mode == st3m_gfx_default) mode = ST3M_GFX_DEFAULT_MODE;
+    if (mode == st3m_gfx_default)
+        mode = ST3M_GFX_DEFAULT_MODE;
+    else if (mode == st3m_gfx_low_latency)
+        mode = ST3M_GFX_DEFAULT_MODE + st3m_gfx_low_latency;
 
-    switch ((int)mode) {
+    switch (((int)mode) & ~3) {
         case st3m_gfx_4bpp:
             ega_palette();
             break;
         case st3m_gfx_8bpp:
         case st3m_gfx_8bpp_osd:
+        case st3m_gfx_8bpp_low_latency:
             fire_palette();
             break;
-        case 10:
-            ice_palette();
-            mode = 8;
-            break;
-        case 12:
-            grayscale_palette();
-            mode = 8;
-            break;
     }
 
     if (mode == ST3M_GFX_DEFAULT_MODE)
         _st3m_gfx_mode = st3m_gfx_default;
+    else if (mode == (ST3M_GFX_DEFAULT_MODE + st3m_gfx_low_latency))
+        _st3m_gfx_mode = st3m_gfx_low_latency;
     else
         _st3m_gfx_mode = mode;
+
+    _st3m_gfx_low_latency = ((mode & st3m_gfx_low_latency) != 0);
 }
 
 st3m_gfx_mode st3m_gfx_get_mode(void) { return _st3m_gfx_mode; }
@@ -233,14 +220,19 @@ uint8_t *st3m_gfx_fb(st3m_gfx_mode mode) {
     if (mode == st3m_gfx_default) {
         switch (set_mode) {
             case st3m_gfx_default:
+            case st3m_gfx_low_latency:
             case st3m_gfx_16bpp:
             case st3m_gfx_16bpp_osd:
+            case st3m_gfx_16bpp_low_latency:
                 return (uint8_t *)fb;
             case st3m_gfx_32bpp:
             case st3m_gfx_32bpp_osd:
+            case st3m_gfx_32bpp_low_latency:
             case st3m_gfx_24bpp:
+            case st3m_gfx_24bpp_low_latency:
             case st3m_gfx_8bpp:
             case st3m_gfx_8bpp_osd:
+            case st3m_gfx_8bpp_low_latency:
             case st3m_gfx_osd:
             case st3m_gfx_4bpp:
                 return st3m_osd_fb;
@@ -249,15 +241,20 @@ uint8_t *st3m_gfx_fb(st3m_gfx_mode mode) {
     if (mode == st3m_gfx_osd) {
         switch (set_mode) {
             case st3m_gfx_default:
+            case st3m_gfx_low_latency:
             case st3m_gfx_16bpp:
             case st3m_gfx_16bpp_osd:
+            case st3m_gfx_16bpp_low_latency:
             case st3m_gfx_osd:
                 return st3m_osd_fb;
             case st3m_gfx_32bpp:
             case st3m_gfx_32bpp_osd:
+            case st3m_gfx_32bpp_low_latency:
             case st3m_gfx_24bpp:
+            case st3m_gfx_24bpp_low_latency:
             case st3m_gfx_8bpp:
             case st3m_gfx_8bpp_osd:
+            case st3m_gfx_8bpp_low_latency:
             case st3m_gfx_4bpp:
                 return (uint8_t *)fb;
         }
@@ -265,15 +262,20 @@ uint8_t *st3m_gfx_fb(st3m_gfx_mode mode) {
 
     switch (set_mode) {
         case st3m_gfx_default:
+        case st3m_gfx_low_latency:
         case st3m_gfx_16bpp:
         case st3m_gfx_16bpp_osd:
+        case st3m_gfx_16bpp_low_latency:
             return (uint8_t *)fb;
         case st3m_gfx_4bpp:
         case st3m_gfx_32bpp:
         case st3m_gfx_32bpp_osd:
+        case st3m_gfx_32bpp_low_latency:
         case st3m_gfx_24bpp:
+        case st3m_gfx_24bpp_low_latency:
         case st3m_gfx_8bpp:
         case st3m_gfx_8bpp_osd:
+        case st3m_gfx_8bpp_low_latency:
         case st3m_gfx_osd:
             return st3m_osd_fb;
     }
@@ -282,7 +284,7 @@ uint8_t *st3m_gfx_fb(st3m_gfx_mode mode) {
 
 static void st3m_gfx_task(void *_arg) {
     (void)_arg;
-    st3m_gfx_set_mode(0);
+    st3m_gfx_set_mode(st3m_gfx_default);
 
     while (true) {
         int desc_no = 0;
@@ -305,13 +307,16 @@ static void st3m_gfx_task(void *_arg) {
                 break;
             case st3m_gfx_8bpp:
             case st3m_gfx_8bpp_osd:
+            case st3m_gfx_8bpp_low_latency:
                 ctx_render_ctx(user_ctx[desc_no], fb_GRAY8_ctx);
                 flow3r_bsp_display_send_fb(st3m_osd_fb, 8);
                 break;
             case st3m_gfx_24bpp:
+            case st3m_gfx_24bpp_low_latency:
                 flow3r_bsp_display_send_fb(st3m_osd_fb, 24);
                 break;
             case st3m_gfx_32bpp:
+            case st3m_gfx_32bpp_low_latency:
                 ctx_render_ctx(user_ctx[desc_no], fb_RGBA8_ctx);
                 flow3r_bsp_display_send_fb(st3m_osd_fb, 32);
                 break;
@@ -323,6 +328,8 @@ static void st3m_gfx_task(void *_arg) {
                 flow3r_bsp_display_send_fb(st3m_osd_fb, 32);
                 break;
             case st3m_gfx_16bpp:
+            case st3m_gfx_16bpp_low_latency:
+            case st3m_gfx_low_latency:
                 ctx_render_ctx(user_ctx[desc_no], fb_RGB565_BS_ctx);
                 flow3r_bsp_display_send_fb(fb, 16);
                 break;
@@ -341,13 +348,13 @@ static void st3m_gfx_task(void *_arg) {
                         _st3m_osd_x0[desc_no], _st3m_osd_y0[desc_no],
                         _st3m_osd_x1[desc_no] - _st3m_osd_x0[desc_no] + 1,
                         _st3m_osd_y1[desc_no] - _st3m_osd_y0[desc_no] + 1);
+                    pthread_mutex_unlock(&osd_mutex);
                     flow3r_bsp_display_send_fb(fb, 16);
                     st3m_ctx_unmerge_osd(
                         fb, st3m_osd_backup, _st3m_osd_x0[desc_no],
                         _st3m_osd_y0[desc_no],
                         _st3m_osd_x1[desc_no] - _st3m_osd_x0[desc_no] + 1,
                         _st3m_osd_y1[desc_no] - _st3m_osd_y0[desc_no] + 1);
-                    pthread_mutex_unlock(&osd_mutex);
                 } else
                     flow3r_bsp_display_send_fb(fb, 16);
                 break;
@@ -686,7 +693,7 @@ void st3m_ctx_end_frame(Ctx *ctx) {
 }
 
 uint8_t st3m_gfx_drawctx_pipe_full(void) {
-    return uxQueueMessagesWaiting(user_ctx_freeq) == 0;
+    return uxQueueMessagesWaiting(user_ctx_freeq) <= _st3m_gfx_low_latency;
 }
 
 void st3m_gfx_flush(int timeout_ms) {
diff --git a/components/st3m/st3m_gfx.h b/components/st3m/st3m_gfx.h
index 36992db64c251619b6022270c177243d48cdf0cc..bd79c8bb38c1cc92f8986cb1f630880faff54621 100644
--- a/components/st3m/st3m_gfx.h
+++ b/components/st3m/st3m_gfx.h
@@ -7,34 +7,53 @@
 #include "ctx.h"
 // clang-format on
 
-// There are three separate graphics modes that can be set, on application
-// exit RGBA8_over_RGB565_BYTESWAPPED should be restored.
-//
-// The two other modes cause a scan-out of without compositing/clipping
 typedef enum {
     st3m_gfx_default = 0,
+    // bitmask flag over base bpp to turn on OSD, only 16bpp for now
     st3m_gfx_osd = 1,
+    // shallower pipeline, in the future might mean immediate mode
+    st3m_gfx_low_latency = 2,
+    // 4 and 8bpp modes use the configured palette, the palette resides
+    // in video ram and is lost upon mode change
     st3m_gfx_4bpp = 4,
-
     st3m_gfx_8bpp = 8,
     st3m_gfx_8bpp_osd,
-
+    st3m_gfx_8bpp_low_latency,
+    // 16bpp modes have the lowest blit overhead - no osd for now
     st3m_gfx_16bpp = 16,
     st3m_gfx_16bpp_osd,
-
+    st3m_gfx_16bpp_low_latency,
+    // for pixel poking 24bpp might be a little faster than 32bpp
+    // for now there is no ctx drawing support in 24bpp mode.
     st3m_gfx_24bpp = 24,
-
+    st3m_gfx_24bpp_low_latency = 26,
     st3m_gfx_32bpp = 32,
-    st3m_gfx_32bpp_osd
+    // 32bpp modes - are faster at doing compositing, for solid text/fills
+    // 16bpp is probabl faster.
+    st3m_gfx_32bpp_osd,
+    st3m_gfx_32bpp_low_latency,
 } st3m_gfx_mode;
 
 // sets the current graphics mode
 void st3m_gfx_set_mode(st3m_gfx_mode mode);
 
+// gets the current graphics mode
 st3m_gfx_mode st3m_gfx_get_mode(void);
 
+// returns a ctx for drawing at the specified mode/target
+// should be paired with a st3m_ctx_end_frame
+// normal values are 0 and 1 for base framebuffer of current
+// mode and st3m_gfx_osd for getting the overlay drawing context.
+Ctx *st3m_ctx(st3m_gfx_mode mode);
+
+// get the framebuffer associated with graphics mode
+// if you ask for st3m_gfx_default you get the current modes fb
+// and if you ask for st3m_gfx_osd you get the current modes overlay fb
 uint8_t *st3m_gfx_fb(st3m_gfx_mode mode);
 
+// sets the palette, pal_in is an array with 3 uint8_t's per entry,
+// support values for count is 1-256, used only in 4bpp and 8bpp
+// graphics modes.
 void st3m_gfx_set_palette(uint8_t *pal_in, int count);
 
 // specifies the corners of the clipping rectangle
@@ -44,11 +63,9 @@ void st3m_gfx_overlay_clip(int x0, int y0, int x1, int y1);
 // returns a running average of fps
 float st3m_gfx_fps(void);
 
-// returns a ctx for drawing at the specified mode/target
-Ctx *st3m_ctx(st3m_gfx_mode mode);
-
-void st3m_ctx_end_frame(Ctx *ctx);  // temporary, signature compatible
-                                    // with ctx_end_frame()
+// temporary, signature compatible
+// with ctx_end_frame()
+void st3m_ctx_end_frame(Ctx *ctx);
 
 // Initialize the gfx subsystem of st3m, includng the rasterization and
 // crtx/blitter pipeline.
diff --git a/components/video_mpeg/video_mpeg.c b/components/video_mpeg/video_mpeg.c
index 2985322154fb68275270d69a800cf18c990e27f7..d6e33ce14a6ee126047cf0d1504cd1103cc2a584 100644
--- a/components/video_mpeg/video_mpeg.c
+++ b/components/video_mpeg/video_mpeg.c
@@ -191,7 +191,7 @@ static void mpg1_destroy(st3m_media *media) {
     plm_destroy(self->plm);
     free(self->frame_data);
     free(self);
-    st3m_gfx_set_mode(0);
+    st3m_gfx_set_mode(st3m_gfx_default);
 }
 
 st3m_media *st3m_media_load_mpg1(const char *path) {
@@ -238,7 +238,7 @@ st3m_media *st3m_media_load_mpg1(const char *path) {
         (uint8_t *)malloc(plm_get_width(self->plm) * plm_get_height(self->plm) *
                           2);  // XXX : this is not quite right
 
-    st3m_gfx_set_mode(32);
+    st3m_gfx_set_mode(st3m_gfx_32bpp_low_latency);
     mpg1_think((st3m_media *)self, 0);  // the frame is constructed in think
     return (st3m_media *)self;
 }