diff --git a/minimal/frozentest.mpy b/minimal/frozentest.mpy
index 5cb356d611968fc8169bb3758b365928ec5982f3..87f9581bfe7290b67f473a49441fd25db204f459 100644
Binary files a/minimal/frozentest.mpy and b/minimal/frozentest.mpy differ
diff --git a/py/bc.c b/py/bc.c
index 4c0eb5391c126eabefdc41ab60e2c93979a72281..fc1794683949b69325950d080622550a1a4a3c79 100644
--- a/py/bc.c
+++ b/py/bc.c
@@ -304,7 +304,7 @@ STATIC const byte opcode_format_table[64] = {
     OC4(U, U, U, U), // 0x0c-0x0f
     OC4(B, B, B, U), // 0x10-0x13
     OC4(V, U, Q, V), // 0x14-0x17
-    OC4(B, U, V, V), // 0x18-0x1b
+    OC4(B, V, V, Q), // 0x18-0x1b
     OC4(Q, Q, Q, Q), // 0x1c-0x1f
     OC4(B, B, V, V), // 0x20-0x23
     OC4(Q, Q, Q, B), // 0x24-0x27
diff --git a/py/bc0.h b/py/bc0.h
index c2b019f1a9587b3e27b2641b46d8cbd844d1812b..b5650abe4159df26ce7568841ef40ac81007de63 100644
--- a/py/bc0.h
+++ b/py/bc0.h
@@ -37,12 +37,13 @@
 #define MP_BC_LOAD_CONST_OBJ     (0x17) // ptr
 #define MP_BC_LOAD_NULL          (0x18)
 
-#define MP_BC_LOAD_FAST_N        (0x1a) // uint
-#define MP_BC_LOAD_DEREF         (0x1b) // uint
-#define MP_BC_LOAD_NAME          (0x1c) // qstr
-#define MP_BC_LOAD_GLOBAL        (0x1d) // qstr
-#define MP_BC_LOAD_ATTR          (0x1e) // qstr
-#define MP_BC_LOAD_METHOD        (0x1f) // qstr
+#define MP_BC_LOAD_FAST_N        (0x19) // uint
+#define MP_BC_LOAD_DEREF         (0x1a) // uint
+#define MP_BC_LOAD_NAME          (0x1b) // qstr
+#define MP_BC_LOAD_GLOBAL        (0x1c) // qstr
+#define MP_BC_LOAD_ATTR          (0x1d) // qstr
+#define MP_BC_LOAD_METHOD        (0x1e) // qstr
+#define MP_BC_LOAD_SUPER_METHOD  (0x1f) // qstr
 #define MP_BC_LOAD_BUILD_CLASS   (0x20)
 #define MP_BC_LOAD_SUBSCR        (0x21)
 
diff --git a/py/compile.c b/py/compile.c
index 42c2cc3a243d4bd404ab38962cda9c4f27256ccf..8533e0528fe25fc8311d92e2c0e5a430cfde1b33 100644
--- a/py/compile.c
+++ b/py/compile.c
@@ -1694,7 +1694,7 @@ STATIC void compile_yield_from(compiler_t *comp) {
 
 #if MICROPY_PY_ASYNC_AWAIT
 STATIC void compile_await_object_method(compiler_t *comp, qstr method) {
-    EMIT_ARG(load_method, method);
+    EMIT_ARG(load_method, method, false);
     EMIT_ARG(call_method, 0, 0, 0);
     compile_yield_from(comp);
 }
@@ -1785,7 +1785,7 @@ STATIC void compile_async_with_stmt_helper(compiler_t *comp, int n, mp_parse_nod
         }
 
         compile_load_id(comp, context);
-        EMIT_ARG(load_method, MP_QSTR___aexit__);
+        EMIT_ARG(load_method, MP_QSTR___aexit__, false);
 
         EMIT_ARG(setup_except, try_exception_label);
         compile_increase_except_level(comp);
@@ -2219,9 +2219,20 @@ STATIC void compile_atom_expr_normal(compiler_t *comp, mp_parse_node_struct_t *p
             return;
         }
 
-        // a super() call
-        EMIT_ARG(call_function, 2, 0, 0);
-        i = 1;
+        if (num_trail >= 3
+            && MP_PARSE_NODE_STRUCT_KIND(pns_trail[1]) == PN_trailer_period
+            && MP_PARSE_NODE_STRUCT_KIND(pns_trail[2]) == PN_trailer_paren) {
+            // optimisation for method calls super().f(...), to eliminate heap allocation
+            mp_parse_node_struct_t *pns_period = pns_trail[1];
+            mp_parse_node_struct_t *pns_paren = pns_trail[2];
+            EMIT_ARG(load_method, MP_PARSE_NODE_LEAF_ARG(pns_period->nodes[0]), true);
+            compile_trailer_paren_helper(comp, pns_paren->nodes[0], true, 0);
+            i = 3;
+        } else {
+            // a super() call
+            EMIT_ARG(call_function, 2, 0, 0);
+            i = 1;
+        }
     }
 
     // compile the remaining trailers
@@ -2232,7 +2243,7 @@ STATIC void compile_atom_expr_normal(compiler_t *comp, mp_parse_node_struct_t *p
             // optimisation for method calls a.f(...), following PyPy
             mp_parse_node_struct_t *pns_period = pns_trail[i];
             mp_parse_node_struct_t *pns_paren = pns_trail[i + 1];
-            EMIT_ARG(load_method, MP_PARSE_NODE_LEAF_ARG(pns_period->nodes[0]));
+            EMIT_ARG(load_method, MP_PARSE_NODE_LEAF_ARG(pns_period->nodes[0]), false);
             compile_trailer_paren_helper(comp, pns_paren->nodes[0], true, 0);
             i += 1;
         } else {
diff --git a/py/emit.h b/py/emit.h
index 64bb957f60960fac7ddaccc07a6cef5d3967ec34..0236a9b8d0eba2afc6ade8c39cca0065e3ecad55 100644
--- a/py/emit.h
+++ b/py/emit.h
@@ -88,7 +88,7 @@ typedef struct _emit_method_table_t {
     void (*load_const_obj)(emit_t *emit, mp_obj_t obj);
     void (*load_null)(emit_t *emit);
     void (*load_attr)(emit_t *emit, qstr qst);
-    void (*load_method)(emit_t *emit, qstr qst);
+    void (*load_method)(emit_t *emit, qstr qst, bool is_super);
     void (*load_build_class)(emit_t *emit);
     void (*load_subscr)(emit_t *emit);
     void (*store_attr)(emit_t *emit, qstr qst);
@@ -205,7 +205,7 @@ void mp_emit_bc_load_const_str(emit_t *emit, qstr qst);
 void mp_emit_bc_load_const_obj(emit_t *emit, mp_obj_t obj);
 void mp_emit_bc_load_null(emit_t *emit);
 void mp_emit_bc_load_attr(emit_t *emit, qstr qst);
-void mp_emit_bc_load_method(emit_t *emit, qstr qst);
+void mp_emit_bc_load_method(emit_t *emit, qstr qst, bool is_super);
 void mp_emit_bc_load_build_class(emit_t *emit);
 void mp_emit_bc_load_subscr(emit_t *emit);
 void mp_emit_bc_store_attr(emit_t *emit, qstr qst);
diff --git a/py/emitbc.c b/py/emitbc.c
index 673cd405f99bdd475b968760e8c68547ade95e69..6d8db81bc66f4598f450ca199893bbeca0bbc8d0 100644
--- a/py/emitbc.c
+++ b/py/emitbc.c
@@ -594,9 +594,9 @@ void mp_emit_bc_load_attr(emit_t *emit, qstr qst) {
     }
 }
 
-void mp_emit_bc_load_method(emit_t *emit, qstr qst) {
-    emit_bc_pre(emit, 1);
-    emit_write_bytecode_byte_qstr(emit, MP_BC_LOAD_METHOD, qst);
+void mp_emit_bc_load_method(emit_t *emit, qstr qst, bool is_super) {
+    emit_bc_pre(emit, 1 - 2 * is_super);
+    emit_write_bytecode_byte_qstr(emit, is_super ? MP_BC_LOAD_SUPER_METHOD : MP_BC_LOAD_METHOD, qst);
 }
 
 void mp_emit_bc_load_build_class(emit_t *emit) {
diff --git a/py/emitnative.c b/py/emitnative.c
index 3ab001f8d0187902dea665d0e497bdfdac1d41d8..99adc809c75e1612ecb519c381c728b3177a0f78 100644
--- a/py/emitnative.c
+++ b/py/emitnative.c
@@ -85,6 +85,7 @@ STATIC byte mp_f_n_args[MP_F_NUMBER_OF] = {
     [MP_F_LOAD_BUILD_CLASS] = 0,
     [MP_F_LOAD_ATTR] = 2,
     [MP_F_LOAD_METHOD] = 3,
+    [MP_F_LOAD_SUPER_METHOD] = 2,
     [MP_F_STORE_NAME] = 2,
     [MP_F_STORE_GLOBAL] = 2,
     [MP_F_STORE_ATTR] = 3,
@@ -1065,12 +1066,18 @@ STATIC void emit_native_load_attr(emit_t *emit, qstr qst) {
     emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
 }
 
-STATIC void emit_native_load_method(emit_t *emit, qstr qst) {
-    vtype_kind_t vtype_base;
-    emit_pre_pop_reg(emit, &vtype_base, REG_ARG_1); // arg1 = base
-    assert(vtype_base == VTYPE_PYOBJ);
-    emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_3, 2); // arg3 = dest ptr
-    emit_call_with_imm_arg(emit, MP_F_LOAD_METHOD, qst, REG_ARG_2); // arg2 = method name
+STATIC void emit_native_load_method(emit_t *emit, qstr qst, bool is_super) {
+    if (is_super) {
+        emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_2, 3); // arg2 = dest ptr
+        emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_2, 2); // arg2 = dest ptr
+        emit_call_with_imm_arg(emit, MP_F_LOAD_SUPER_METHOD, qst, REG_ARG_1); // arg1 = method name
+    } else {
+        vtype_kind_t vtype_base;
+        emit_pre_pop_reg(emit, &vtype_base, REG_ARG_1); // arg1 = base
+        assert(vtype_base == VTYPE_PYOBJ);
+        emit_get_stack_pointer_to_reg_for_push(emit, REG_ARG_3, 2); // arg3 = dest ptr
+        emit_call_with_imm_arg(emit, MP_F_LOAD_METHOD, qst, REG_ARG_2); // arg2 = method name
+    }
 }
 
 STATIC void emit_native_load_build_class(emit_t *emit) {
diff --git a/py/nativeglue.c b/py/nativeglue.c
index 694dfca74abd44bc56f54e007196e4713cde1be5..c75e5ec0476315c995e608f95a5e79f9635d7e9a 100644
--- a/py/nativeglue.c
+++ b/py/nativeglue.c
@@ -133,6 +133,7 @@ void *const mp_fun_table[MP_F_NUMBER_OF] = {
     mp_load_build_class,
     mp_load_attr,
     mp_load_method,
+    mp_load_super_method,
     mp_store_name,
     mp_store_global,
     mp_store_attr,
diff --git a/py/objtype.c b/py/objtype.c
index de1ee8c421ee5ffa35f8a5e6878dce1c93418181..2a119e40fb3416745567e8d97af1095917b3c5bd 100644
--- a/py/objtype.c
+++ b/py/objtype.c
@@ -1070,6 +1070,11 @@ const mp_obj_type_t mp_type_super = {
     .attr = super_attr,
 };
 
+void mp_load_super_method(qstr attr, mp_obj_t *dest) {
+    mp_obj_super_t super = {{&mp_type_super}, dest[1], dest[2]};
+    mp_load_method(MP_OBJ_FROM_PTR(&super), attr, dest);
+}
+
 /******************************************************************************/
 // subclassing and built-ins specific to types
 
diff --git a/py/persistentcode.c b/py/persistentcode.c
index 2a9a5b7cc0b2052a6f53e0fc0e4b5f513b277e9f..a71045a290d3265849bdaf2f8a14a75958cc6fc0 100644
--- a/py/persistentcode.c
+++ b/py/persistentcode.c
@@ -39,7 +39,7 @@
 #include "py/smallint.h"
 
 // The current version of .mpy files
-#define MPY_VERSION (1)
+#define MPY_VERSION (2)
 
 // The feature flags byte encodes the compile-time config options that
 // affect the generate bytecode.
diff --git a/py/runtime.h b/py/runtime.h
index 177869145400b06f122c5200c41a4d983fa1aa49..d75d23ff1802f8d12e3540e80c383d1f463e698b 100644
--- a/py/runtime.h
+++ b/py/runtime.h
@@ -131,6 +131,7 @@ mp_obj_t mp_load_attr(mp_obj_t base, qstr attr);
 void mp_convert_member_lookup(mp_obj_t obj, const mp_obj_type_t *type, mp_obj_t member, mp_obj_t *dest);
 void mp_load_method(mp_obj_t base, qstr attr, mp_obj_t *dest);
 void mp_load_method_maybe(mp_obj_t base, qstr attr, mp_obj_t *dest);
+void mp_load_super_method(qstr attr, mp_obj_t *dest);
 void mp_store_attr(mp_obj_t base, qstr attr, mp_obj_t val);
 
 mp_obj_t mp_getiter(mp_obj_t o, mp_obj_iter_buf_t *iter_buf);
diff --git a/py/runtime0.h b/py/runtime0.h
index b1ed71026255c7240148d7cc8b78803da8ce0eba..720fe6a23bf2d8950af18b1f4514f97fe6a044f5 100644
--- a/py/runtime0.h
+++ b/py/runtime0.h
@@ -107,6 +107,7 @@ typedef enum {
     MP_F_LOAD_BUILD_CLASS,
     MP_F_LOAD_ATTR,
     MP_F_LOAD_METHOD,
+    MP_F_LOAD_SUPER_METHOD,
     MP_F_STORE_NAME,
     MP_F_STORE_GLOBAL,
     MP_F_STORE_ATTR,
diff --git a/py/showbc.c b/py/showbc.c
index b52905f6774ccb2c53063cbc57628c62792f5cda..0bccf8427f5ea02e7c43430d02a2bf9a1e82bd40 100644
--- a/py/showbc.c
+++ b/py/showbc.c
@@ -245,6 +245,11 @@ const byte *mp_bytecode_print_str(const byte *ip) {
             printf("LOAD_METHOD %s", qstr_str(qst));
             break;
 
+        case MP_BC_LOAD_SUPER_METHOD:
+            DECODE_QSTR;
+            printf("LOAD_SUPER_METHOD %s", qstr_str(qst));
+            break;
+
         case MP_BC_LOAD_BUILD_CLASS:
             printf("LOAD_BUILD_CLASS");
             break;
diff --git a/py/vm.c b/py/vm.c
index 8ce635ca87374a6a93c309c0bf32a49e2deee41e..469528df41008604c162582dc25f6073fc675864 100644
--- a/py/vm.c
+++ b/py/vm.c
@@ -376,6 +376,14 @@ dispatch_loop:
                     DISPATCH();
                 }
 
+                ENTRY(MP_BC_LOAD_SUPER_METHOD): {
+                    MARK_EXC_IP_SELECTIVE();
+                    DECODE_QSTR;
+                    sp -= 1;
+                    mp_load_super_method(qst, sp - 1);
+                    DISPATCH();
+                }
+
                 ENTRY(MP_BC_LOAD_BUILD_CLASS):
                     MARK_EXC_IP_SELECTIVE();
                     PUSH(mp_load_build_class());
diff --git a/py/vmentrytable.h b/py/vmentrytable.h
index 8731c3d4c4e0de051beaffb590157f05b690d5cc..dd9789e34851df0fbf030c97b3184d34ce88c66a 100644
--- a/py/vmentrytable.h
+++ b/py/vmentrytable.h
@@ -44,6 +44,7 @@ static const void *const entry_table[256] = {
     [MP_BC_LOAD_GLOBAL] = &&entry_MP_BC_LOAD_GLOBAL,
     [MP_BC_LOAD_ATTR] = &&entry_MP_BC_LOAD_ATTR,
     [MP_BC_LOAD_METHOD] = &&entry_MP_BC_LOAD_METHOD,
+    [MP_BC_LOAD_SUPER_METHOD] = &&entry_MP_BC_LOAD_SUPER_METHOD,
     [MP_BC_LOAD_BUILD_CLASS] = &&entry_MP_BC_LOAD_BUILD_CLASS,
     [MP_BC_LOAD_SUBSCR] = &&entry_MP_BC_LOAD_SUBSCR,
     [MP_BC_STORE_FAST_N] = &&entry_MP_BC_STORE_FAST_N,
diff --git a/tools/mpy-tool.py b/tools/mpy-tool.py
index d14e0f4ea4c8ec202b077f316b6316ca8ef0c971..d2a1c67ad9e0f7f84e9ff23e034ebd180467145b 100755
--- a/tools/mpy-tool.py
+++ b/tools/mpy-tool.py
@@ -57,7 +57,7 @@ class FreezeError(Exception):
         return 'error while freezing %s: %s' % (self.rawcode.source_file, self.msg)
 
 class Config:
-    MPY_VERSION = 1
+    MPY_VERSION = 2
     MICROPY_LONGINT_IMPL_NONE = 0
     MICROPY_LONGINT_IMPL_LONGLONG = 1
     MICROPY_LONGINT_IMPL_MPZ = 2
@@ -94,7 +94,7 @@ def make_opcode_format():
     OC4(U, U, U, U), # 0x0c-0x0f
     OC4(B, B, B, U), # 0x10-0x13
     OC4(V, U, Q, V), # 0x14-0x17
-    OC4(B, U, V, V), # 0x18-0x1b
+    OC4(B, V, V, Q), # 0x18-0x1b
     OC4(Q, Q, Q, Q), # 0x1c-0x1f
     OC4(B, B, V, V), # 0x20-0x23
     OC4(Q, Q, Q, B), # 0x24-0x27