diff --git a/components/ctx/ctx.c b/components/ctx/ctx.c
index f21e2e7d512af9f113cdfe3e8d8681745fd49684..6ad182f041177c234369ea615d2a087b73ad14c3 100644
--- a/components/ctx/ctx.c
+++ b/components/ctx/ctx.c
@@ -10,3 +10,65 @@
 
 #define CTX_IMPLEMENTATION
 #include "ctx.h"
+
+#define FB_WIDTH  240
+#define FB_HEIGHT 240
+
+static inline uint16_t
+ctx_565_pack (uint8_t  red,
+              uint8_t  green,
+              uint8_t  blue,
+              const int      byteswap);
+static inline void
+ctx_565_unpack (const uint16_t pixel,
+                uint8_t *red,
+                uint8_t *green,
+                uint8_t *blue,
+                const int byteswap);
+
+void st3m_ctx_merge_overlay(uint16_t *fb,
+                            uint8_t *overlay,
+                            uint16_t *overlay_backup, int x0, int y0, int w, int h)
+{
+  uint8_t rgba[4]={0,0,0,255};
+  for (int scanline = y0; scanline < y0 + h; scanline++)
+  {
+     uint16_t *fb_p = &fb[scanline * 240 + x0];
+     uint32_t *overlay_p = (uint32_t*)&overlay[((scanline-y0) * w)*4];
+     uint16_t *backup_p = &overlay_backup[(scanline-y0) * w];
+     uint32_t *ddst = (uint32_t*)&rgba[0];
+     
+     for (int x = 0; x < w; x++)
+     {
+       *backup_p = *fb_p;
+       ctx_565_unpack(*fb_p, &rgba[0], &rgba[1], &rgba[2], 1);
+       uint32_t si_ga = ((*overlay_p) & 0xff00ff00) >> 8;
+       uint32_t si_rb = (*overlay_p) & 0x00ff00ff;
+       uint32_t si_a  = si_ga >> 16;
+       uint32_t racov = si_a^255;
+      *(ddst) =
+     (((si_rb*255+0xff00ff+(((*ddst)&0x00ff00ff)*racov))>>8)&0x00ff00ff)|
+     ((si_ga*255+0xff00ff+((((*ddst)&0xff00ff00)>>8)*racov))&0xff00ff00);
+       *fb_p = ctx_565_pack(rgba[0], rgba[1], rgba[2], 1);
+       //*fb_p = ctx_565_pack(overlay_p[0], overlay_p[1], overlay_p[2], 1);
+       fb_p++;
+       overlay_p++;
+       backup_p++;
+     }
+  }
+}
+
+void st3m_ctx_unmerge_overlay(uint16_t *fb, uint16_t *overlay_backup, int x0, int y0, int w, int h)
+{
+  for (int scanline = y0; scanline < y0 + h; scanline++)
+  {
+     uint16_t *fb_p = &fb[scanline * 240 + x0];
+     uint16_t *backup_p = &overlay_backup[(scanline-y0) * w];
+     for (int x = 0; x < w; x++)
+     {
+       *fb_p = *backup_p;
+       fb_p++;
+       backup_p++;
+     }
+  }
+}
diff --git a/components/micropython/usermodule/mp_sys_display.c b/components/micropython/usermodule/mp_sys_display.c
index 2fc105133e50485978b5de825743f06948808a4a..80378aefa73f3c3b2039eaf8bd3174bb12d2015f 100644
--- a/components/micropython/usermodule/mp_sys_display.c
+++ b/components/micropython/usermodule/mp_sys_display.c
@@ -18,6 +18,14 @@
 
 #include "mp_uctx.h"
 
+STATIC mp_obj_t mp_set_overlay_height(mp_obj_t height_in) {
+    int height = mp_obj_get_int(height_in);
+    st3m_gfx_set_overlay_height(height);
+    return mp_const_none;
+}
+STATIC MP_DEFINE_CONST_FUN_OBJ_1(mp_set_overlay_height_obj,
+                                 mp_set_overlay_height);
+
 STATIC mp_obj_t mp_set_backlight(mp_obj_t percent_in) {
     uint8_t percent = mp_obj_get_int(percent_in);
     flow3r_bsp_display_set_backlight(percent);
@@ -33,6 +41,11 @@ STATIC mp_obj_t mp_get_ctx(void) {
 }
 STATIC MP_DEFINE_CONST_FUN_OBJ_0(mp_get_ctx_obj, mp_get_ctx);
 
+STATIC mp_obj_t mp_get_overlay_ctx(void) {
+    return mp_ctx_from_ctx(st3m_overlay_ctx());
+}
+STATIC MP_DEFINE_CONST_FUN_OBJ_0(mp_get_overlay_ctx_obj, mp_get_overlay_ctx);
+
 STATIC mp_obj_t mp_update(mp_obj_t ctx_in) {
     mp_ctx_obj_t *self = MP_OBJ_TO_PTR(ctx_in);
     if (self->base.type != &mp_ctx_type) {
@@ -66,8 +79,12 @@ STATIC const mp_rom_map_elem_t mp_module_sys_display_globals_table[] = {
     { MP_ROM_QSTR(MP_QSTR_pipe_full), MP_ROM_PTR(&mp_pipe_full_obj) },
     { MP_ROM_QSTR(MP_QSTR_pipe_flush), MP_ROM_PTR(&mp_pipe_flush_obj) },
     { MP_ROM_QSTR(MP_QSTR_set_backlight), MP_ROM_PTR(&mp_set_backlight_obj) },
+    { MP_ROM_QSTR(MP_QSTR_set_overlay_height),
+      MP_ROM_PTR(&mp_set_overlay_height_obj) },
     { MP_ROM_QSTR(MP_QSTR_update), MP_ROM_PTR(&mp_update_obj) },
     { MP_ROM_QSTR(MP_QSTR_get_ctx), MP_ROM_PTR(&mp_get_ctx_obj) },
+    { MP_ROM_QSTR(MP_QSTR_get_overlay_ctx),
+      MP_ROM_PTR(&mp_get_overlay_ctx_obj) },
 };
 
 STATIC MP_DEFINE_CONST_DICT(mp_module_sys_display_globals,
diff --git a/components/st3m/st3m_gfx.c b/components/st3m/st3m_gfx.c
index f1ec58a6cda833b39a2aa69780b4229661240a00..7600f5201271bf82261273cea93dc97197b38dbc 100644
--- a/components/st3m/st3m_gfx.c
+++ b/components/st3m/st3m_gfx.c
@@ -54,6 +54,17 @@ static const char *TAG = "st3m-gfx";
 EXT_RAM_BSS_ATTR static st3m_framebuffer_desc_t
     framebuffer_descs[ST3M_GFX_NBUFFERS];
 
+#define OVERLAY_WIDTH 120
+#define OVERLAY_HEIGHT 160
+#define OVERLAY_X 60
+#define OVERLAY_Y 0
+
+static int _st3m_overlay_height = 0;
+EXT_RAM_BSS_ATTR static uint8_t
+    st3m_overlay_fb[OVERLAY_WIDTH * OVERLAY_HEIGHT * 4];
+EXT_RAM_BSS_ATTR uint16_t st3m_overlay_backup[OVERLAY_WIDTH * OVERLAY_HEIGHT];
+static Ctx *_st3m_overlay_ctx = NULL;
+
 static st3m_ctx_desc_t dctx_descs[ST3M_GFX_NCTX];
 
 // Queue of free framebuffer descriptors, written into by crtc once rendered,
@@ -95,6 +106,12 @@ static void xQueueReceiveNotifyStarved(QueueHandle_t q, void *dst,
     }
 }
 
+void st3m_ctx_merge_overlay(uint16_t *fb, uint8_t *overlay,
+                            uint16_t *overlay_backup, int x0, int y0, int w,
+                            int h);
+void st3m_ctx_unmerge_overlay(uint16_t *fb, uint16_t *overlay_backup, int x0,
+                              int y0, int w, int h);
+
 static void st3m_gfx_crtc_task(void *_arg) {
     (void)_arg;
 
@@ -108,7 +125,14 @@ static void st3m_gfx_crtc_task(void *_arg) {
         st3m_counter_timer_sample(&blit_read_time, end - start);
 
         start = esp_timer_get_time();
+        st3m_overlay_ctx();
+        if (_st3m_overlay_height)
+            st3m_ctx_merge_overlay(framebuffer_descs[descno].buffer,
+                                   st3m_overlay_fb, st3m_overlay_backup,
+                                   OVERLAY_X, OVERLAY_Y, OVERLAY_WIDTH,
+                                   _st3m_overlay_height);
         flow3r_bsp_display_send_fb(framebuffer_descs[descno].buffer);
+
         end = esp_timer_get_time();
         st3m_counter_timer_sample(&blit_work_time, end - start);
 
@@ -116,6 +140,10 @@ static void st3m_gfx_crtc_task(void *_arg) {
         xQueueSend(framebuffer_freeq, &descno, portMAX_DELAY);
         end = esp_timer_get_time();
         st3m_counter_timer_sample(&blit_write_time, end - start);
+        if (_st3m_overlay_height)
+            st3m_ctx_unmerge_overlay(framebuffer_descs[descno].buffer,
+                                     st3m_overlay_backup, OVERLAY_X, OVERLAY_Y,
+                                     OVERLAY_WIDTH, _st3m_overlay_height);
 
         st3m_counter_rate_sample(&blit_rate);
 
@@ -560,6 +588,49 @@ Ctx *st3m_ctx(TickType_t ticks_to_wait) {
     return foo->ctx;
 }
 
+void st3m_overlay_clear(void) {
+    Ctx *ctx = st3m_overlay_ctx();
+    ctx_save(ctx);
+    ctx_compositing_mode(ctx, CTX_COMPOSITE_CLEAR);
+    ctx_gray(ctx, 0);  // BUG(ctx) - alpha=0 should be allowed here
+    ctx_rectangle(ctx, -120, -120, 240, 240);
+    ctx_fill(ctx);
+    ctx_restore(ctx);
+}
+
+Ctx *st3m_overlay_ctx(void) {
+    if (!_st3m_overlay_ctx) {
+        Ctx *ctx = _st3m_overlay_ctx = ctx_new_for_framebuffer(
+            st3m_overlay_fb, OVERLAY_WIDTH, OVERLAY_HEIGHT, OVERLAY_WIDTH * 4,
+            CTX_FORMAT_RGBA8);
+
+        ctx_translate(ctx, 60, 120);
+        memset(st3m_overlay_fb, 0, sizeof(st3m_overlay_fb));
+#if 0
+        ctx_rectangle (ctx, -120, -120, 240, 240);
+        ctx_rgba (ctx, 0,0,1,1.0);
+        ctx_fill (ctx);
+        ctx_rectangle (ctx, -30, -30, 60, 60);
+        ctx_rgba (ctx, 0,1.0,0,1.0);
+        ctx_fill(ctx);
+
+        ctx_save (ctx);
+        ctx_compositing_mode(ctx,CTX_COMPOSITE_CLEAR);
+        ctx_gray (ctx, 0); // XXX bug - if alpha is 0 clear doesnt happeddn
+        ctx_rectangle (ctx, -120, -120, 240, 240);
+        ctx_fill (ctx);
+        ctx_restore (ctx);
+#endif
+    }
+    return _st3m_overlay_ctx;
+}
+
 void st3m_ctx_end_frame(Ctx *ctx) {
     xQueueSend(dctx_rastq, &st3m_dctx_no, portMAX_DELAY);
 }
+
+void st3m_gfx_set_overlay_height(int height) {
+    if (height < 0) height = 0;
+    if (height > OVERLAY_HEIGHT) height = OVERLAY_HEIGHT;
+    _st3m_overlay_height = height;
+}
diff --git a/components/st3m/st3m_gfx.h b/components/st3m/st3m_gfx.h
index 698f07dfcd8745532d44b73165e95e08fd8386ec..61cf47d9d86a14139a2872ae426e09a21fe2cced 100644
--- a/components/st3m/st3m_gfx.h
+++ b/components/st3m/st3m_gfx.h
@@ -7,6 +7,8 @@
 #include "ctx.h"
 // clang-format on
 
+Ctx *st3m_overlay_ctx(void);
+
 Ctx *st3m_ctx(TickType_t ticks_to_wait);
 void st3m_ctx_end_frame(Ctx *ctx);  // temporary, signature compatible
                                     // with ctx_end_frame()
@@ -53,3 +55,7 @@ void st3m_gfx_splash(const char *text);
 // Draw the flow3r multi-coloured logo at coordinates x,y and with given
 // dimension (approx. bounding box size).
 void st3m_gfx_flow3r_logo(Ctx *ctx, float x, float y, float dim);
+
+// Set the number of pixels to draw of the overlay screen, more pixels
+// adds overhead to every frame, when set to 0 - no composite overhead
+void st3m_gfx_set_overlay_height(int height);
diff --git a/python_payload/st3m/ui/elements/overlays.py b/python_payload/st3m/ui/elements/overlays.py
index 39275300a5bd42038199caee7409c3229515384f..1e9b60a6be1c138784194de6b49e7ea90ab15ea1 100644
--- a/python_payload/st3m/ui/elements/overlays.py
+++ b/python_payload/st3m/ui/elements/overlays.py
@@ -18,6 +18,7 @@ import st3m.wifi
 import math
 import audio
 import sys_kernel
+import sys_display
 import network
 
 
@@ -37,6 +38,8 @@ _all_kinds = [
     OverlayKind.Toast,
 ]
 
+_max_y = 0
+
 
 class Overlay(Responder):
     """
@@ -60,6 +63,7 @@ class Compositor(Responder):
             OverlayKind.Debug: True,
             OverlayKind.Toast: True,
         }
+        self._frame_skip = 0
 
     def _enabled_overlays(self) -> List[Responder]:
         res: List[Responder] = []
@@ -74,13 +78,25 @@ class Compositor(Responder):
 
     def think(self, ins: InputState, delta_ms: int) -> None:
         self.main.think(ins, delta_ms)
-        for overlay in self._enabled_overlays():
-            overlay.think(ins, delta_ms)
+        if self._frame_skip <= 0:
+            for overlay in self._enabled_overlays():
+                overlay.think(ins, delta_ms)
 
     def draw(self, ctx: Context) -> None:
+        global _max_y
         self.main.draw(ctx)
-        for overlay in self._enabled_overlays():
-            overlay.draw(ctx)
+        if self._frame_skip <= 0:
+            _max_y = 0
+            octx = sys_display.get_overlay_ctx()
+            octx.save()
+            octx.compositing_mode = octx.CLEAR
+            octx.rectangle(-120, -120, 240, 240).fill()
+            octx.restore()
+            for overlay in self._enabled_overlays():
+                overlay.draw(octx)
+            self._frame_skip = 8
+            sys_display.set_overlay_height(_max_y)
+        self._frame_skip -= 1
 
     def add_overlay(self, ov: Overlay) -> None:
         """
@@ -196,7 +212,6 @@ class OverlayCaptouch(Overlay):
             offs_x = self.phi / 1000
             offs_y = -self.rad / 1000
             ctx.rectangle(-5 + offs_x, -5 + offs_y, 10, 10)
-            ctx.rgb(1, 0, 1)
             ctx.fill()
 
     def __init__(self) -> None:
@@ -207,6 +222,7 @@ class OverlayCaptouch(Overlay):
             dot.think(ins, delta_ms)
 
     def draw(self, ctx: Context) -> None:
+        ctx.rgb(1, 0, 1)
         for dot in self.dots:
             ctx.save()
             dot.draw(ctx)
@@ -272,13 +288,15 @@ class OverlayVolume(Overlay):
 
         if self._showing is None:
             return
-        self._showing -= delta_ms
+        self._showing -= delta_ms * 8
         if self._showing < 0:
             self._showing = None
 
     def draw(self, ctx: Context) -> None:
+        global _max_y
         if self._showing is None:
             return
+        _max_y = max(_max_y, 160)
 
         opacity = self._showing / 200
         opacity = min(opacity, 0.8)
@@ -467,8 +485,10 @@ class IconTray(Overlay):
             v.think(ins, delta_ms)
 
     def draw(self, ctx: Context) -> None:
+        global _max_y
         if len(self.visible) < 1:
             return
+        _max_y = max(_max_y, 32)
         width = 0
         for icon in self.visible:
             width += icon.WIDTH