From 8e4b4bac7079caafbe40646b9303dc9e3ce23fbc Mon Sep 17 00:00:00 2001
From: Damien George <damien.p.george@gmail.com>
Date: Sat, 13 Oct 2018 12:57:32 +1100
Subject: [PATCH] py/asmx64: Change indirect calls to load fun ptr from the
 native table.

Instead of storing the function pointer directly in the assembly code.
This makes the generated code more independent of the runtime (so easier to
relocate the code), and reduces the generated code size.
---
 py/asmx64.c     | 15 ++-------------
 py/asmx64.h     |  4 ++--
 py/emitnative.c |  4 ++++
 3 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/py/asmx64.c b/py/asmx64.c
index 9a3d5fb0d..9cd2fc64c 100644
--- a/py/asmx64.c
+++ b/py/asmx64.c
@@ -621,21 +621,10 @@ void asm_x64_call_i1(asm_x64_t *as, void* func, int i1) {
 }
 */
 
-void asm_x64_call_ind(asm_x64_t *as, void *ptr, int temp_r64) {
+void asm_x64_call_ind(asm_x64_t *as, size_t fun_id, int temp_r64) {
     assert(temp_r64 < 8);
-#ifdef __LP64__
-    asm_x64_mov_i64_to_r64_optimised(as, (int64_t)ptr, temp_r64);
-#else
-    // If we get here, sizeof(int) == sizeof(void*).
-    asm_x64_mov_i64_to_r64_optimised(as, (int64_t)(unsigned int)ptr, temp_r64);
-#endif
+    asm_x64_mov_mem64_to_r64(as, ASM_X64_REG_RBP, fun_id * WORD_SIZE, temp_r64);
     asm_x64_write_byte_2(as, OPCODE_CALL_RM32, MODRM_R64(2) | MODRM_RM_REG | MODRM_RM_R64(temp_r64));
-    // this reduces code size by 2 bytes per call, but doesn't seem to speed it up at all
-    // doesn't work anymore because calls are 64 bits away
-    /*
-    asm_x64_write_byte_1(as, OPCODE_CALL_REL32);
-    asm_x64_write_word32(as, ptr - (void*)(as->code_base + as->code_offset + 4));
-    */
 }
 
 #endif // MICROPY_EMIT_X64
diff --git a/py/asmx64.h b/py/asmx64.h
index e2ab1f855..f40b127e5 100644
--- a/py/asmx64.h
+++ b/py/asmx64.h
@@ -114,7 +114,7 @@ void asm_x64_mov_local_to_r64(asm_x64_t* as, int src_local_num, int dest_r64);
 void asm_x64_mov_r64_to_local(asm_x64_t* as, int src_r64, int dest_local_num);
 void asm_x64_mov_local_addr_to_r64(asm_x64_t* as, int local_num, int dest_r64);
 void asm_x64_mov_reg_pcrel(asm_x64_t *as, int dest_r64, mp_uint_t label);
-void asm_x64_call_ind(asm_x64_t* as, void* ptr, int temp_r32);
+void asm_x64_call_ind(asm_x64_t* as, size_t fun_id, int temp_r32);
 
 #if GENERIC_ASM_API
 
@@ -171,7 +171,7 @@ void asm_x64_call_ind(asm_x64_t* as, void* ptr, int temp_r32);
         asm_x64_jcc_label(as, ASM_X64_CC_JE, label); \
     } while (0)
 #define ASM_JUMP_REG(as, reg) asm_x64_jmp_reg((as), (reg))
-#define ASM_CALL_IND(as, ptr, idx) asm_x64_call_ind(as, ptr, ASM_X64_REG_RAX)
+#define ASM_CALL_IND(as, ptr, idx) asm_x64_call_ind(as, idx, ASM_X64_REG_RAX)
 
 #define ASM_MOV_LOCAL_REG(as, local_num, reg_src) asm_x64_mov_r64_to_local((as), (reg_src), (local_num))
 #define ASM_MOV_REG_IMM(as, reg_dest, imm) asm_x64_mov_i64_to_r64_optimised((as), (imm), (reg_dest))
diff --git a/py/emitnative.c b/py/emitnative.c
index 1abdb6792..81669af7c 100644
--- a/py/emitnative.c
+++ b/py/emitnative.c
@@ -379,6 +379,8 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop
         asm_arm_mov_reg_i32(emit->as, ASM_ARM_REG_R7, (mp_uint_t)mp_fun_table);
         #elif N_XTENSA
         ASM_MOV_REG_IMM(emit->as, ASM_XTENSA_REG_A15, (uint32_t)mp_fun_table);
+        #elif N_X64
+        asm_x64_mov_i64_to_r64_optimised(emit->as, (intptr_t)mp_fun_table, ASM_X64_REG_RBP);
         #endif
 
         // Store function object (passed as first arg) to stack if needed
@@ -469,6 +471,8 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop
         asm_arm_mov_reg_i32(emit->as, ASM_ARM_REG_R7, (mp_uint_t)mp_fun_table);
         #elif N_XTENSA
         ASM_MOV_REG_IMM(emit->as, ASM_XTENSA_REG_A15, (uint32_t)mp_fun_table);
+        #elif N_X64
+        asm_x64_mov_i64_to_r64_optimised(emit->as, (intptr_t)mp_fun_table, ASM_X64_REG_RBP);
         #endif
 
         if (!(emit->scope->scope_flags & MP_SCOPE_FLAG_GENERATOR)) {
-- 
GitLab