From bbab73f6a0ede36dabd79b117e332275d855a5b5 Mon Sep 17 00:00:00 2001
From: Damien Tournoud <damien@platform.sh>
Date: Thu, 15 Dec 2022 14:09:19 -0800
Subject: [PATCH] py/gc: Speed up incremental GC cycles by tracking the last
 used block.

In applications that use little memory and run GC regularly, the cost of
the sweep phase quickly becomes prohibitives as the amount of RAM
increases.

On an ESP32-S3 with 2 MB of external SPIRAM, for example, a trivial GC
cycle takes a minimum of 40ms, virtually all of it in the sweep phase.

Similarly, on the UNIX port with 1 GB of heap, a trivial GC takes 47 ms,
again virtually all of it in the sweep phase.

This commit speeds up the sweep phase in the case most of the heap is empty
by keeping track of the ID of the highest block we allocated in an area
since the last GC.

The performance benchmark run on PYBV10 shows between +0 and +2%
improvement across the existing performance tests.  These tests don't
really stress the GC, so they were also run with gc.threshold(30000) and
gc.threshold(10000).  For the 30000 case, performance improved by up to
+10% with this commit.  For the 10000 case, performance improved by at
least +10% on 6 tests, and up to +25%.

Signed-off-by: Damien George <damien@micropython.org>
---
 components/micropython/vendor/py/gc.c      | 22 ++++++++++++++++++++--
 components/micropython/vendor/py/mpstate.h |  1 +
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/components/micropython/vendor/py/gc.c b/components/micropython/vendor/py/gc.c
index ba5c569d50..1430ca3281 100644
--- a/components/micropython/vendor/py/gc.c
+++ b/components/micropython/vendor/py/gc.c
@@ -158,6 +158,7 @@ STATIC void gc_setup_area(mp_state_mem_area_t *area, void *start, void *end) {
     #endif
 
     area->gc_last_free_atb_index = 0;
+    area->gc_last_used_block = 0;
 
     #if MICROPY_GC_SPLIT_HEAP
     area->next = NULL;
@@ -380,7 +381,14 @@ STATIC void gc_sweep(void) {
     // free unmarked heads and their tails
     int free_tail = 0;
     for (mp_state_mem_area_t *area = &MP_STATE_MEM(area); area != NULL; area = NEXT_AREA(area)) {
-        for (size_t block = 0; block < area->gc_alloc_table_byte_len * BLOCKS_PER_ATB; block++) {
+        size_t end_block = area->gc_alloc_table_byte_len * BLOCKS_PER_ATB;
+        if (area->gc_last_used_block < end_block) {
+            end_block = area->gc_last_used_block + 1;
+        }
+
+        size_t last_used_block = 0;
+
+        for (size_t block = 0; block < end_block; block++) {
             MICROPY_GC_HOOK_LOOP
             switch (ATB_GET_KIND(area, block)) {
                 case AT_HEAD:
@@ -420,15 +428,20 @@ STATIC void gc_sweep(void) {
                         #if CLEAR_ON_SWEEP
                         memset((void *)PTR_FROM_BLOCK(area, block), 0, BYTES_PER_BLOCK);
                         #endif
+                    } else {
+                        last_used_block = block;
                     }
                     break;
 
                 case AT_MARK:
                     ATB_MARK_TO_HEAD(area, block);
                     free_tail = 0;
+                    last_used_block = block;
                     break;
             }
         }
+
+        area->gc_last_used_block = last_used_block;
     }
 }
 
@@ -680,6 +693,8 @@ found:
         area->gc_last_free_atb_index = (i + 1) / BLOCKS_PER_ATB;
     }
 
+    area->gc_last_used_block = MAX(area->gc_last_used_block, end_block);
+
     // mark first block as used head
     ATB_FREE_TO_HEAD(area, start_block);
 
@@ -969,11 +984,14 @@ void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) {
     // check if we can expand in place
     if (new_blocks <= n_blocks + n_free) {
         // mark few more blocks as used tail
-        for (size_t bl = block + n_blocks; bl < block + new_blocks; bl++) {
+        size_t end_block = block + new_blocks;
+        for (size_t bl = block + n_blocks; bl < end_block; bl++) {
             assert(ATB_GET_KIND(area, bl) == AT_FREE);
             ATB_FREE_TO_TAIL(area, bl);
         }
 
+        area->gc_last_used_block = MAX(area->gc_last_used_block, end_block);
+
         GC_EXIT();
 
         #if MICROPY_GC_CONSERVATIVE_CLEAR
diff --git a/components/micropython/vendor/py/mpstate.h b/components/micropython/vendor/py/mpstate.h
index f6b911af56..11caeba055 100644
--- a/components/micropython/vendor/py/mpstate.h
+++ b/components/micropython/vendor/py/mpstate.h
@@ -87,6 +87,7 @@ typedef struct _mp_state_mem_area_t {
     byte *gc_pool_end;
 
     size_t gc_last_free_atb_index;
+    size_t gc_last_used_block; // The block ID of the highest block allocated in the area
 } mp_state_mem_area_t;
 
 // This structure hold information about the memory allocation system.
-- 
GitLab