From b8f9ac54111ad0962401c764112c9a5669699deb Mon Sep 17 00:00:00 2001
From: Damien George <damien.p.george@gmail.com>
Date: Tue, 13 Oct 2015 00:50:17 +0100
Subject: [PATCH] py: Implement ptr32 load and store in viper emitter.

---
 py/asmx64.c                                | 20 ++++++
 py/asmx64.h                                |  2 +
 py/emitnative.c                            | 82 +++++++++++++++++++++-
 py/qstrdefs.h                              |  1 +
 tests/micropython/viper_ptr32_load.py      | 21 ++++++
 tests/micropython/viper_ptr32_load.py.exp  |  3 +
 tests/micropython/viper_ptr32_store.py     | 26 +++++++
 tests/micropython/viper_ptr32_store.py.exp |  4 ++
 8 files changed, 156 insertions(+), 3 deletions(-)
 create mode 100644 tests/micropython/viper_ptr32_load.py
 create mode 100644 tests/micropython/viper_ptr32_load.py.exp
 create mode 100644 tests/micropython/viper_ptr32_store.py
 create mode 100644 tests/micropython/viper_ptr32_store.py.exp

diff --git a/py/asmx64.c b/py/asmx64.c
index 0b7c87415..1eab585ac 100644
--- a/py/asmx64.c
+++ b/py/asmx64.c
@@ -340,6 +340,16 @@ void asm_x64_mov_r16_to_mem16(asm_x64_t *as, int src_r64, int dest_r64, int dest
     asm_x64_write_r64_disp(as, src_r64, dest_r64, dest_disp);
 }
 
+void asm_x64_mov_r32_to_mem32(asm_x64_t *as, int src_r64, int dest_r64, int dest_disp) {
+    assert(dest_r64 < 8);
+    if (src_r64 < 8) {
+        asm_x64_write_byte_1(as, OPCODE_MOV_R64_TO_RM64);
+    } else {
+        asm_x64_write_byte_2(as, REX_PREFIX | REX_R, OPCODE_MOV_R64_TO_RM64);
+    }
+    asm_x64_write_r64_disp(as, src_r64, dest_r64, dest_disp);
+}
+
 void asm_x64_mov_r64_to_mem64(asm_x64_t *as, int src_r64, int dest_r64, int dest_disp) {
     // use REX prefix for 64 bit operation
     asm_x64_write_byte_2(as, REX_PREFIX | REX_W | (src_r64 < 8 ? 0 : REX_R) | (dest_r64 < 8 ? 0 : REX_B), OPCODE_MOV_R64_TO_RM64);
@@ -366,6 +376,16 @@ void asm_x64_mov_mem16_to_r64zx(asm_x64_t *as, int src_r64, int src_disp, int de
     asm_x64_write_r64_disp(as, dest_r64, src_r64, src_disp);
 }
 
+void asm_x64_mov_mem32_to_r64zx(asm_x64_t *as, int src_r64, int src_disp, int dest_r64) {
+    assert(src_r64 < 8);
+    if (dest_r64 < 8) {
+        asm_x64_write_byte_1(as, OPCODE_MOV_RM64_TO_R64);
+    } else {
+        asm_x64_write_byte_2(as, REX_PREFIX | REX_R, OPCODE_MOV_RM64_TO_R64);
+    }
+    asm_x64_write_r64_disp(as, dest_r64, src_r64, src_disp);
+}
+
 void asm_x64_mov_mem64_to_r64(asm_x64_t *as, int src_r64, int src_disp, int dest_r64) {
     // use REX prefix for 64 bit operation
     asm_x64_write_byte_2(as, REX_PREFIX | REX_W | (dest_r64 < 8 ? 0 : REX_R) | (src_r64 < 8 ? 0 : REX_B), OPCODE_MOV_RM64_TO_R64);
diff --git a/py/asmx64.h b/py/asmx64.h
index 67ff2b457..6fbc2c906 100644
--- a/py/asmx64.h
+++ b/py/asmx64.h
@@ -94,9 +94,11 @@ void asm_x64_mov_i64_to_r64_optimised(asm_x64_t *as, int64_t src_i64, int dest_r
 void asm_x64_mov_i64_to_r64_aligned(asm_x64_t *as, int64_t src_i64, int dest_r64);
 void asm_x64_mov_r8_to_mem8(asm_x64_t *as, int src_r64, int dest_r64, int dest_disp);
 void asm_x64_mov_r16_to_mem16(asm_x64_t *as, int src_r64, int dest_r64, int dest_disp);
+void asm_x64_mov_r32_to_mem32(asm_x64_t *as, int src_r64, int dest_r64, int dest_disp);
 void asm_x64_mov_r64_to_mem64(asm_x64_t *as, int src_r64, int dest_r64, int dest_disp);
 void asm_x64_mov_mem8_to_r64zx(asm_x64_t *as, int src_r64, int src_disp, int dest_r64);
 void asm_x64_mov_mem16_to_r64zx(asm_x64_t *as, int src_r64, int src_disp, int dest_r64);
+void asm_x64_mov_mem32_to_r64zx(asm_x64_t *as, int src_r64, int src_disp, int dest_r64);
 void asm_x64_mov_mem64_to_r64(asm_x64_t *as, int src_r64, int src_disp, int dest_r64);
 void asm_x64_and_r64_r64(asm_x64_t *as, int dest_r64, int src_r64);
 void asm_x64_or_r64_r64(asm_x64_t *as, int dest_r64, int src_r64);
diff --git a/py/emitnative.c b/py/emitnative.c
index 7c87b0389..78988aa75 100644
--- a/py/emitnative.c
+++ b/py/emitnative.c
@@ -152,11 +152,13 @@
 #define ASM_LOAD_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_x64_mov_mem64_to_r64((as), (reg_base), 8 * (word_offset), (reg_dest))
 #define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) asm_x64_mov_mem8_to_r64zx((as), (reg_base), 0, (reg_dest))
 #define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) asm_x64_mov_mem16_to_r64zx((as), (reg_base), 0, (reg_dest))
+#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) asm_x64_mov_mem32_to_r64zx((as), (reg_base), 0, (reg_dest))
 
 #define ASM_STORE_REG_REG(as, reg_src, reg_base) asm_x64_mov_r64_to_mem64((as), (reg_src), (reg_base), 0)
 #define ASM_STORE_REG_REG_OFFSET(as, reg_src, reg_base, word_offset) asm_x64_mov_r64_to_mem64((as), (reg_src), (reg_base), 8 * (word_offset))
 #define ASM_STORE8_REG_REG(as, reg_src, reg_base) asm_x64_mov_r8_to_mem8((as), (reg_src), (reg_base), 0)
 #define ASM_STORE16_REG_REG(as, reg_src, reg_base) asm_x64_mov_r16_to_mem16((as), (reg_src), (reg_base), 0)
+#define ASM_STORE32_REG_REG(as, reg_src, reg_base) asm_x64_mov_r32_to_mem32((as), (reg_src), (reg_base), 0)
 
 #elif N_X86
 
@@ -295,11 +297,13 @@ STATIC byte mp_f_n_args[MP_F_NUMBER_OF] = {
 #define ASM_LOAD_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_x86_mov_mem32_to_r32((as), (reg_base), 4 * (word_offset), (reg_dest))
 #define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) asm_x86_mov_mem8_to_r32zx((as), (reg_base), 0, (reg_dest))
 #define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) asm_x86_mov_mem16_to_r32zx((as), (reg_base), 0, (reg_dest))
+#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) asm_x86_mov_mem32_to_r32((as), (reg_base), 0, (reg_dest))
 
 #define ASM_STORE_REG_REG(as, reg_src, reg_base) asm_x86_mov_r32_to_mem32((as), (reg_src), (reg_base), 0)
 #define ASM_STORE_REG_REG_OFFSET(as, reg_src, reg_base, word_offset) asm_x86_mov_r32_to_mem32((as), (reg_src), (reg_base), 4 * (word_offset))
 #define ASM_STORE8_REG_REG(as, reg_src, reg_base) asm_x86_mov_r8_to_mem8((as), (reg_src), (reg_base), 0)
 #define ASM_STORE16_REG_REG(as, reg_src, reg_base) asm_x86_mov_r16_to_mem16((as), (reg_src), (reg_base), 0)
+#define ASM_STORE32_REG_REG(as, reg_src, reg_base) asm_x86_mov_r32_to_mem32((as), (reg_src), (reg_base), 0)
 
 #elif N_THUMB
 
@@ -388,11 +392,13 @@ STATIC byte mp_f_n_args[MP_F_NUMBER_OF] = {
 #define ASM_LOAD_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_thumb_ldr_rlo_rlo_i5((as), (reg_dest), (reg_base), (word_offset))
 #define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) asm_thumb_ldrb_rlo_rlo_i5((as), (reg_dest), (reg_base), 0)
 #define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) asm_thumb_ldrh_rlo_rlo_i5((as), (reg_dest), (reg_base), 0)
+#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) asm_thumb_ldr_rlo_rlo_i5((as), (reg_dest), (reg_base), 0)
 
 #define ASM_STORE_REG_REG(as, reg_src, reg_base) asm_thumb_str_rlo_rlo_i5((as), (reg_src), (reg_base), 0)
 #define ASM_STORE_REG_REG_OFFSET(as, reg_src, reg_base, word_offset) asm_thumb_str_rlo_rlo_i5((as), (reg_src), (reg_base), (word_offset))
 #define ASM_STORE8_REG_REG(as, reg_src, reg_base) asm_thumb_strb_rlo_rlo_i5((as), (reg_src), (reg_base), 0)
 #define ASM_STORE16_REG_REG(as, reg_src, reg_base) asm_thumb_strh_rlo_rlo_i5((as), (reg_src), (reg_base), 0)
+#define ASM_STORE32_REG_REG(as, reg_src, reg_base) asm_thumb_str_rlo_rlo_i5((as), (reg_src), (reg_base), 0)
 
 #elif N_ARM
 
@@ -480,11 +486,13 @@ STATIC byte mp_f_n_args[MP_F_NUMBER_OF] = {
 #define ASM_LOAD_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_arm_ldr_reg_reg((as), (reg_dest), (reg_base), 4 * (word_offset))
 #define ASM_LOAD8_REG_REG(as, reg_dest, reg_base) asm_arm_ldrb_reg_reg((as), (reg_dest), (reg_base))
 #define ASM_LOAD16_REG_REG(as, reg_dest, reg_base) asm_arm_ldrh_reg_reg((as), (reg_dest), (reg_base))
+#define ASM_LOAD32_REG_REG(as, reg_dest, reg_base) asm_arm_ldr_reg_reg((as), (reg_dest), (reg_base))
 
 #define ASM_STORE_REG_REG(as, reg_value, reg_base) asm_arm_str_reg_reg((as), (reg_value), (reg_base), 0)
 #define ASM_STORE_REG_REG_OFFSET(as, reg_dest, reg_base, word_offset) asm_arm_str_reg_reg((as), (reg_dest), (reg_base), 4 * (word_offset))
 #define ASM_STORE8_REG_REG(as, reg_value, reg_base) asm_arm_strb_reg_reg((as), (reg_value), (reg_base))
 #define ASM_STORE16_REG_REG(as, reg_value, reg_base) asm_arm_strh_reg_reg((as), (reg_value), (reg_base))
+#define ASM_STORE32_REG_REG(as, reg_value, reg_base) asm_arm_str_reg_reg((as), (reg_value), (reg_base))
 
 #else
 
@@ -513,10 +521,11 @@ typedef enum {
     VTYPE_PTR = 0x10 | MP_NATIVE_TYPE_UINT, // pointer to word sized entity
     VTYPE_PTR8 = 0x20 | MP_NATIVE_TYPE_UINT,
     VTYPE_PTR16 = 0x30 | MP_NATIVE_TYPE_UINT,
-    VTYPE_PTR_NONE = 0x40 | MP_NATIVE_TYPE_UINT,
+    VTYPE_PTR32 = 0x40 | MP_NATIVE_TYPE_UINT,
+    VTYPE_PTR_NONE = 0x50 | MP_NATIVE_TYPE_UINT,
 
-    VTYPE_UNBOUND = 0x50 | MP_NATIVE_TYPE_OBJ,
-    VTYPE_BUILTIN_CAST = 0x60 | MP_NATIVE_TYPE_OBJ,
+    VTYPE_UNBOUND = 0x60 | MP_NATIVE_TYPE_OBJ,
+    VTYPE_BUILTIN_CAST = 0x70 | MP_NATIVE_TYPE_OBJ,
 } vtype_kind_t;
 
 STATIC qstr vtype_to_qstr(vtype_kind_t vtype) {
@@ -528,6 +537,7 @@ STATIC qstr vtype_to_qstr(vtype_kind_t vtype) {
         case VTYPE_PTR: return MP_QSTR_ptr;
         case VTYPE_PTR8: return MP_QSTR_ptr8;
         case VTYPE_PTR16: return MP_QSTR_ptr16;
+        case VTYPE_PTR32: return MP_QSTR_ptr32;
         case VTYPE_PTR_NONE: default: return MP_QSTR_None;
     }
 }
@@ -600,6 +610,7 @@ STATIC void emit_native_set_native_type(emit_t *emit, mp_uint_t op, mp_uint_t ar
                 case MP_QSTR_ptr: type = VTYPE_PTR; break;
                 case MP_QSTR_ptr8: type = VTYPE_PTR8; break;
                 case MP_QSTR_ptr16: type = VTYPE_PTR16; break;
+                case MP_QSTR_ptr32: type = VTYPE_PTR32; break;
                 default: EMIT_NATIVE_VIPER_TYPE_ERROR(emit, "unknown type '%q'", arg2); return;
             }
             if (op == MP_EMIT_NATIVE_TYPE_RETURN) {
@@ -1391,6 +1402,8 @@ STATIC void emit_native_load_global(emit_t *emit, qstr qst) {
         emit_post_push_imm(emit, VTYPE_BUILTIN_CAST, VTYPE_PTR8);
     } else if (emit->do_viper_types && qst == MP_QSTR_ptr16) {
         emit_post_push_imm(emit, VTYPE_BUILTIN_CAST, VTYPE_PTR16);
+    } else if (emit->do_viper_types && qst == MP_QSTR_ptr32) {
+        emit_post_push_imm(emit, VTYPE_BUILTIN_CAST, VTYPE_PTR32);
     } else {
         emit_call_with_imm_arg(emit, MP_F_LOAD_GLOBAL, qst, REG_ARG_1);
         emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
@@ -1494,6 +1507,23 @@ STATIC void emit_native_load_subscr(emit_t *emit) {
                     ASM_LOAD16_REG_REG(emit->as, REG_RET, reg_base); // load from (base+2*index)
                     break;
                 }
+                case VTYPE_PTR32: {
+                    // pointer to 32-bit memory
+                    if (index_value != 0) {
+                        // index is a non-zero immediate
+                        #if N_THUMB
+                        if (index_value > 0 && index_value < 32) {
+                            asm_thumb_ldr_rlo_rlo_i5(emit->as, REG_RET, reg_base, index_value);
+                            break;
+                        }
+                        #endif
+                        ASM_MOV_IMM_TO_REG(emit->as, index_value << 2, reg_index);
+                        ASM_ADD_REG_REG(emit->as, reg_index, reg_base); // add 4*index to base
+                        reg_base = reg_index;
+                    }
+                    ASM_LOAD32_REG_REG(emit->as, REG_RET, reg_base); // load from (base+4*index)
+                    break;
+                }
                 default:
                     EMIT_NATIVE_VIPER_TYPE_ERROR(emit,
                         "can't load from '%q'", vtype_to_qstr(vtype_base));
@@ -1521,6 +1551,16 @@ STATIC void emit_native_load_subscr(emit_t *emit) {
                     ASM_LOAD16_REG_REG(emit->as, REG_RET, REG_ARG_1); // load from (base+2*index)
                     break;
                 }
+                case VTYPE_PTR32: {
+                    // pointer to word-size memory
+                    assert(vtype_index == VTYPE_INT);
+                    ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
+                    ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
+                    ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
+                    ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
+                    ASM_LOAD32_REG_REG(emit->as, REG_RET, REG_ARG_1); // load from (base+4*index)
+                    break;
+                }
                 default:
                     EMIT_NATIVE_VIPER_TYPE_ERROR(emit,
                         "can't load from '%q'", vtype_to_qstr(vtype_base));
@@ -1690,6 +1730,27 @@ STATIC void emit_native_store_subscr(emit_t *emit) {
                     ASM_STORE16_REG_REG(emit->as, reg_value, reg_base); // store value to (base+2*index)
                     break;
                 }
+                case VTYPE_PTR32: {
+                    // pointer to 32-bit memory
+                    if (index_value != 0) {
+                        // index is a non-zero immediate
+                        #if N_THUMB
+                        if (index_value > 0 && index_value < 32) {
+                            asm_thumb_str_rlo_rlo_i5(emit->as, reg_value, reg_base, index_value);
+                            break;
+                        }
+                        #endif
+                        ASM_MOV_IMM_TO_REG(emit->as, index_value << 2, reg_index);
+                        #if N_ARM
+                        asm_arm_str_reg_reg_reg(emit->as, reg_value, reg_base, reg_index);
+                        return;
+                        #endif
+                        ASM_ADD_REG_REG(emit->as, reg_index, reg_base); // add 4*index to base
+                        reg_base = reg_index;
+                    }
+                    ASM_STORE32_REG_REG(emit->as, reg_value, reg_base); // store value to (base+4*index)
+                    break;
+                }
                 default:
                     EMIT_NATIVE_VIPER_TYPE_ERROR(emit,
                         "can't store to '%q'", vtype_to_qstr(vtype_base));
@@ -1732,6 +1793,20 @@ STATIC void emit_native_store_subscr(emit_t *emit) {
                     ASM_STORE16_REG_REG(emit->as, reg_value, REG_ARG_1); // store value to (base+2*index)
                     break;
                 }
+                case VTYPE_PTR32: {
+                    // pointer to 32-bit memory
+                    assert(vtype_index == VTYPE_INT);
+                    #if N_ARM
+                    asm_arm_str_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index);
+                    break;
+                    #endif
+                    ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
+                    ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
+                    ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
+                    ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
+                    ASM_STORE32_REG_REG(emit->as, reg_value, REG_ARG_1); // store value to (base+4*index)
+                    break;
+                }
                 default:
                     EMIT_NATIVE_VIPER_TYPE_ERROR(emit,
                         "can't store to '%q'", vtype_to_qstr(vtype_base));
@@ -2311,6 +2386,7 @@ STATIC void emit_native_call_function(emit_t *emit, mp_uint_t n_positional, mp_u
             case VTYPE_PTR:
             case VTYPE_PTR8:
             case VTYPE_PTR16:
+            case VTYPE_PTR32:
             case VTYPE_PTR_NONE:
                 emit_fold_stack_top(emit, REG_ARG_1);
                 emit_post_top_set_vtype(emit, vtype_cast);
diff --git a/py/qstrdefs.h b/py/qstrdefs.h
index e91c665c2..dcde1b5c1 100644
--- a/py/qstrdefs.h
+++ b/py/qstrdefs.h
@@ -103,6 +103,7 @@ Q(uint)
 Q(ptr)
 Q(ptr8)
 Q(ptr16)
+Q(ptr32)
 #endif
 
 #if MICROPY_EMIT_INLINE_THUMB
diff --git a/tests/micropython/viper_ptr32_load.py b/tests/micropython/viper_ptr32_load.py
new file mode 100644
index 000000000..d552b9df6
--- /dev/null
+++ b/tests/micropython/viper_ptr32_load.py
@@ -0,0 +1,21 @@
+# test loading from ptr32 type
+
+@micropython.viper
+def get(src:ptr32) -> int:
+    return src[0]
+
+@micropython.viper
+def get1(src:ptr32) -> int:
+    return src[1]
+
+@micropython.viper
+def memadd(src:ptr32, n:int) -> int:
+    sum = 0
+    for i in range(n):
+        sum += src[i]
+    return sum
+
+b = bytearray(b'\x12\x12\x12\x12\x34\x34\x34\x34')
+print(b)
+print(hex(get(b)), hex(get1(b)))
+print(hex(memadd(b, 2)))
diff --git a/tests/micropython/viper_ptr32_load.py.exp b/tests/micropython/viper_ptr32_load.py.exp
new file mode 100644
index 000000000..e7ce2d972
--- /dev/null
+++ b/tests/micropython/viper_ptr32_load.py.exp
@@ -0,0 +1,3 @@
+bytearray(b'\x12\x12\x12\x124444')
+0x12121212 0x34343434
+0x46464646
diff --git a/tests/micropython/viper_ptr32_store.py b/tests/micropython/viper_ptr32_store.py
new file mode 100644
index 000000000..b63bac9ee
--- /dev/null
+++ b/tests/micropython/viper_ptr32_store.py
@@ -0,0 +1,26 @@
+# test store to ptr32 type
+
+@micropython.viper
+def set(dest:ptr32, val:int):
+    dest[0] = val
+
+@micropython.viper
+def set1(dest:ptr32, val:int):
+    dest[1] = val
+
+@micropython.viper
+def memset(dest:ptr32, val:int, n:int):
+    for i in range(n):
+        dest[i] = val
+
+b = bytearray(8)
+print(b)
+
+set(b, 0x42424242)
+print(b)
+
+set1(b, 0x43434343)
+print(b)
+
+memset(b, 0x44444444, len(b) // 4)
+print(b)
diff --git a/tests/micropython/viper_ptr32_store.py.exp b/tests/micropython/viper_ptr32_store.py.exp
new file mode 100644
index 000000000..13b9f418f
--- /dev/null
+++ b/tests/micropython/viper_ptr32_store.py.exp
@@ -0,0 +1,4 @@
+bytearray(b'\x00\x00\x00\x00\x00\x00\x00\x00')
+bytearray(b'BBBB\x00\x00\x00\x00')
+bytearray(b'BBBBCCCC')
+bytearray(b'DDDDDDDD')
-- 
GitLab