diff --git a/components/micropython/usermodule/mp_sys_display.c b/components/micropython/usermodule/mp_sys_display.c
index 2515ddd9e708444abbf08bcef0dc06bad086c861..1ce01ddd45de51175f2a12ab2381ecceb0aac020 100644
--- a/components/micropython/usermodule/mp_sys_display.c
+++ b/components/micropython/usermodule/mp_sys_display.c
@@ -143,8 +143,9 @@ STATIC const mp_rom_map_elem_t mp_module_sys_display_globals_table[] = {
     { MP_ROM_QSTR(MP_QSTR_cool), MP_ROM_INT((int)st3m_gfx_cool) },
     { MP_ROM_QSTR(MP_QSTR_low_latency), MP_ROM_INT((int)st3m_gfx_low_latency) },
     { MP_ROM_QSTR(MP_QSTR_direct_ctx), MP_ROM_INT((int)st3m_gfx_direct_ctx) },
-    { MP_ROM_QSTR(MP_QSTR_unset), MP_ROM_INT((int)st3m_gfx_unset) },
     { MP_ROM_QSTR(MP_QSTR_lock), MP_ROM_INT((int)st3m_gfx_lock) },
+    { MP_ROM_QSTR(MP_QSTR_EXPERIMENTAL_think_per_draw),
+      MP_ROM_INT((int)st3m_gfx_EXPERIMENTAL_think_per_draw) },
     { MP_ROM_QSTR(MP_QSTR_x2), MP_ROM_INT((int)st3m_gfx_2x) },
     { MP_ROM_QSTR(MP_QSTR_x3), MP_ROM_INT((int)st3m_gfx_3x) },
     { MP_ROM_QSTR(MP_QSTR_x4), MP_ROM_INT((int)st3m_gfx_4x) },
diff --git a/components/st3m/st3m_gfx.c b/components/st3m/st3m_gfx.c
index 56914b64f2dd7d4cb1700a4d67f3e7096d25cd8a..b4542fcf6ae11d70f649b7fc218a9fde9c2347a5 100644
--- a/components/st3m/st3m_gfx.c
+++ b/components/st3m/st3m_gfx.c
@@ -223,18 +223,7 @@ void st3m_gfx_set_palette(uint8_t *pal_in, int count) {
 }
 
 void st3m_gfx_set_default_mode(st3m_gfx_mode mode) {
-    if (mode & st3m_gfx_unset) {
-        if (mode & st3m_gfx_lock)
-            default_mode &= ~st3m_gfx_lock;
-        else if (mode & st3m_gfx_4x)
-            default_mode &= ~st3m_gfx_4x;
-        else if (mode & st3m_gfx_osd)
-            default_mode &= ~st3m_gfx_osd;
-        else if (mode & st3m_gfx_low_latency)
-            default_mode &= ~st3m_gfx_low_latency;
-        else if (mode & st3m_gfx_direct_ctx)
-            default_mode &= ~st3m_gfx_direct_ctx;
-    } else if ((mode & (1 | 2 | 4 | 8 | 16 | 32)) == mode) {
+    if ((mode & (1 | 2 | 4 | 8 | 16 | 32)) == mode) {
         default_mode &= ~(1 | 2 | 4 | 8 | 16 | 32);
         default_mode |= mode;
     } else if (mode == st3m_gfx_2x) {
@@ -279,12 +268,13 @@ static void st3m_gfx_init_palette(st3m_gfx_mode mode) {
             break;
         case 2:
             for (int i = 0; i < 4; i++) {
-                st3m_pal[i * 3 + 0] = i * 63;
-                st3m_pal[i * 3 + 1] = i * 63;
-                st3m_pal[i * 3 + 2] = i * 63;
+                st3m_pal[i * 3 + 0] = (i * 255) / 3;
+                st3m_pal[i * 3 + 1] = (i * 255) / 3;
+                st3m_pal[i * 3 + 2] = (i * 255) / 3;
             }
             break;
-        case 4: {  // ega palette
+        case 4: {
+            // ega palette
             int idx = 0;
             for (int i = 0; i < 2; i++)
                 for (int r = 0; r < 2; r++)
@@ -899,10 +889,18 @@ void st3m_gfx_end_frame(Ctx *ctx) {
 }
 
 uint8_t st3m_gfx_pipe_available(void) {
+    st3m_gfx_mode set_mode = _st3m_gfx_mode ? _st3m_gfx_mode : default_mode;
+    if ((set_mode & st3m_gfx_EXPERIMENTAL_think_per_draw) &&
+        (smoothed_fps > 13.0))
+        return 1;
     return uxQueueMessagesWaiting(user_ctx_freeq) > _st3m_gfx_low_latency;
 }
 
 uint8_t st3m_gfx_pipe_full(void) {
+    st3m_gfx_mode set_mode = _st3m_gfx_mode ? _st3m_gfx_mode : default_mode;
+    if ((set_mode & st3m_gfx_EXPERIMENTAL_think_per_draw) &&
+        (smoothed_fps > 13.0))
+        return 0;
     return uxQueueSpacesAvailable(user_ctx_rastq) == 0;
 }
 
diff --git a/components/st3m/st3m_gfx.h b/components/st3m/st3m_gfx.h
index 611e0b0c7c618513f9fcd6387895023497617013..5e9afde03ad9b53b36ae34b31e0ca2153cb1cd71 100644
--- a/components/st3m/st3m_gfx.h
+++ b/components/st3m/st3m_gfx.h
@@ -13,17 +13,29 @@ typedef enum {
     // become available for other bitdepths as grayscale rather than color
     // overlays.
 
+    // lock the graphics mode, this makes st3m_gfx_set_mode() a no-op
+    st3m_gfx_lock = 1 << 8,
+
     // directly manipulate target framebuffer instead of having
-    // separate rasterization task
-    st3m_gfx_direct_ctx = 1 << 7,
-    // enable osd compositing
-    st3m_gfx_osd = 1 << 8,
-    // shallower pipeline
-    st3m_gfx_low_latency = 1 << 9,
-    st3m_gfx_unset = 1 << 10,
-    st3m_gfx_lock = 1 << 11,
-    st3m_gfx_2x = 1 << 12,
-    st3m_gfx_3x = 1 << 13,
+    // separate rasterization task - this causes the rasterization overhead
+    // to occur in the micropython task rather than the graphics rasterization
+    // task.
+    st3m_gfx_direct_ctx = 1 << 9,
+
+    // enable osd compositing, for a small performance boost
+    st3m_gfx_osd = 1 << 10,
+
+    // shallower pipeline, prioritize short time from drawing until shown on
+    // screen over frame rate
+    st3m_gfx_low_latency = 1 << 11,
+
+    // boost FPS by always reporting readiness for drawing, this gets disabled
+    // dynamically if FPS falls <13fps
+    st3m_gfx_EXPERIMENTAL_think_per_draw = 1 << 12,
+
+    // pixel-doubling
+    st3m_gfx_2x = 1 << 13,
+    st3m_gfx_3x = 1 << 14,
     st3m_gfx_4x = st3m_gfx_2x | st3m_gfx_3x,
 
     // 4 and 8bpp modes use the configured palette, the palette resides
@@ -38,11 +50,11 @@ typedef enum {
     st3m_gfx_palette = 15,
     // 16bpp modes have the lowest blit overhead - no osd for now
     st3m_gfx_16bpp = 16,
-    // for pixel poking 24bpp might be a little faster than 32bpp
-    // for now there is no ctx drawing support in 24bpp mode.
     st3m_gfx_24bpp = 24,
-    // 32bpp modes - are faster at doing compositing, for solid text/fills
-    // 16bpp is probably faster.
+    // 32bpp modes - are slightly faster at doing compositing, but the memory
+    // overhead of higher bitdepths cancels out the overhead of converting back
+    // and forth between rgb565 and RGBA8, 24 and 32bit modes are here
+    // mostly because it leads to nicer math in python.
     st3m_gfx_32bpp = 32,
 } st3m_gfx_mode;
 
diff --git a/python_payload/apps/graphics_mode/__init__.py b/python_payload/apps/graphics_mode/__init__.py
index 2422b0ef1363925182af638e8628f34692eb3e29..6318cbf616b2ed2cc6fffe14a60bf64b7f4135a2 100644
--- a/python_payload/apps/graphics_mode/__init__.py
+++ b/python_payload/apps/graphics_mode/__init__.py
@@ -33,8 +33,8 @@ class App(Application):
                 self.left_pressed = False
             elif self.right_pressed:
                 self.focused_widget += 1
-                if self.focused_widget > 7:
-                    self.focused_widget = 7
+                if self.focused_widget > 8:
+                    self.focused_widget = 8
                 self.right_pressed = False
         if self.widget_no == self.focused_widget and not self.active:
             ctx.rectangle(-130, int(self.y - self.font_size * 0.8), 260, self.font_size)
@@ -129,6 +129,7 @@ class App(Application):
         direct_ctx = (curmode & sys_display.direct_ctx) != 0
         lock = (curmode & sys_display.lock) != 0
         osd = (curmode & sys_display.osd) != 0
+        think_per_draw = (curmode & sys_display.EXPERIMENTAL_think_per_draw) != 0
         scale = 0
         if (curmode & sys_display.x4) == sys_display.x2:
             scale = 1
@@ -176,6 +177,7 @@ class App(Application):
         scale = self.draw_choice("scale", ["1x", "2x", "3x", "4x"], scale)
         low_latency = self.draw_boolean("low latency", low_latency)
         direct_ctx = self.draw_boolean("direct ctx", direct_ctx)
+        think_per_draw = self.draw_boolean("think per draw", think_per_draw)
         osd = self.draw_boolean("osd", osd)
         lock = self.draw_boolean("lock", lock)
         if direct_ctx:
@@ -209,6 +211,7 @@ class App(Application):
         mode += low_latency * sys_display.low_latency
         mode += direct_ctx * sys_display.direct_ctx
         mode += lock * sys_display.lock
+        mode += think_per_draw * sys_display.EXPERIMENTAL_think_per_draw
 
         if scale == 1:
             mode += sys_display.x2