From 953074315e594f5a30f455dc6a1a67340a3e6ea7 Mon Sep 17 00:00:00 2001
From: Damien George <damien.p.george@gmail.com>
Date: Wed, 10 Sep 2014 22:10:33 +0100
Subject: [PATCH] py: Enable struct/binary-helper to parse q and Q sized ints.

Addresses issue #848.
---
 py/binary.c          | 27 ++++++++++++++++++++-------
 py/binary.h          |  2 +-
 py/mpz.c             |  8 ++++----
 py/mpz.h             |  4 ++--
 py/obj.h             |  1 +
 py/objint.c          |  6 ++++++
 py/objint_longlong.c | 11 ++++++++++-
 py/objint_mpz.c      |  8 +++++++-
 8 files changed, 51 insertions(+), 16 deletions(-)

diff --git a/py/binary.c b/py/binary.c
index 919ba8784..835ba8aa2 100644
--- a/py/binary.c
+++ b/py/binary.c
@@ -34,6 +34,7 @@
 #include "misc.h"
 #include "qstr.h"
 #include "obj.h"
+#include "smallint.h"
 #include "binary.h"
 
 // Helpers to work with binary-encoded data
@@ -136,7 +137,10 @@ mp_obj_t mp_binary_get_val_array(char typecode, void *p, int index) {
     return MP_OBJ_NEW_SMALL_INT(val);
 }
 
-mp_int_t mp_binary_get_int(mp_uint_t size, bool is_signed, bool big_endian, byte *p) {
+// The long long type is guaranteed to hold at least 64 bits, and size is at
+// most 8 (for q and Q), so we will always be able to parse the given data
+// and fit it into a long long.
+long long mp_binary_get_int(mp_uint_t size, bool is_signed, bool big_endian, byte *p) {
     int delta;
     if (!big_endian) {
         delta = -1;
@@ -145,7 +149,7 @@ mp_int_t mp_binary_get_int(mp_uint_t size, bool is_signed, bool big_endian, byte
         delta = 1;
     }
 
-    mp_int_t val = 0;
+    long long val = 0;
     if (is_signed && *p & 0x80) {
         val = -1;
     }
@@ -175,16 +179,25 @@ mp_obj_t mp_binary_get_val(char struct_type, char val_type, byte **ptr) {
     }
     *ptr = p + size;
 
-    mp_int_t val = mp_binary_get_int(size, is_signed(val_type), (struct_type == '>'), p);
+    long long val = mp_binary_get_int(size, is_signed(val_type), (struct_type == '>'), p);
 
     if (val_type == 'O') {
-        return (mp_obj_t)val;
+        return (mp_obj_t)(mp_uint_t)val;
     } else if (val_type == 'S') {
-        return mp_obj_new_str((char*)val, strlen((char*)val), false);
+        const char *s_val = (const char*)(mp_uint_t)val;
+        return mp_obj_new_str(s_val, strlen(s_val), false);
     } else if (is_signed(val_type)) {
-        return mp_obj_new_int(val);
+        if ((long long)MP_SMALL_INT_MIN <= val && val <= (long long)MP_SMALL_INT_MAX) {
+            return mp_obj_new_int((mp_int_t)val);
+        } else {
+            return mp_obj_new_int_from_ll(val);
+        }
     } else {
-        return mp_obj_new_int_from_uint(val);
+        if ((unsigned long long)val <= (unsigned long long)MP_SMALL_INT_MAX) {
+            return mp_obj_new_int_from_uint((mp_uint_t)val);
+        } else {
+            return mp_obj_new_int_from_ull(val);
+        }
     }
 }
 
diff --git a/py/binary.h b/py/binary.h
index 3e8861b0c..5f577da02 100644
--- a/py/binary.h
+++ b/py/binary.h
@@ -34,5 +34,5 @@ void mp_binary_set_val_array(char typecode, void *p, int index, mp_obj_t val_in)
 void mp_binary_set_val_array_from_int(char typecode, void *p, int index, mp_int_t val);
 mp_obj_t mp_binary_get_val(char struct_type, char val_type, byte **ptr);
 void mp_binary_set_val(char struct_type, char val_type, mp_obj_t val_in, byte **ptr);
-mp_int_t mp_binary_get_int(mp_uint_t size, bool is_signed, bool big_endian, byte *p);
+long long mp_binary_get_int(mp_uint_t size, bool is_signed, bool big_endian, byte *p);
 void mp_binary_set_int(mp_uint_t val_sz, bool big_endian, byte *p, byte *val_ptr);
diff --git a/py/mpz.c b/py/mpz.c
index 186229569..0599656ea 100644
--- a/py/mpz.c
+++ b/py/mpz.c
@@ -589,9 +589,9 @@ mpz_t *mpz_from_int(mp_int_t val) {
     return z;
 }
 
-mpz_t *mpz_from_ll(long long val) {
+mpz_t *mpz_from_ll(long long val, bool is_signed) {
     mpz_t *z = mpz_zero();
-    mpz_set_from_ll(z, val);
+    mpz_set_from_ll(z, val, is_signed);
     return z;
 }
 
@@ -668,11 +668,11 @@ void mpz_set_from_int(mpz_t *z, mp_int_t val) {
     }
 }
 
-void mpz_set_from_ll(mpz_t *z, long long val) {
+void mpz_set_from_ll(mpz_t *z, long long val, bool is_signed) {
     mpz_need_dig(z, MPZ_NUM_DIG_FOR_LL);
 
     unsigned long long uval;
-    if (val < 0) {
+    if (is_signed && val < 0) {
         z->neg = 1;
         uval = -val;
     } else {
diff --git a/py/mpz.h b/py/mpz.h
index 79e5ea231..a2d8923b9 100644
--- a/py/mpz.h
+++ b/py/mpz.h
@@ -71,7 +71,7 @@ void mpz_deinit(mpz_t *z);
 
 mpz_t *mpz_zero();
 mpz_t *mpz_from_int(mp_int_t i);
-mpz_t *mpz_from_ll(long long i);
+mpz_t *mpz_from_ll(long long i, bool is_signed);
 mpz_t *mpz_from_str(const char *str, mp_uint_t len, bool neg, mp_uint_t base);
 void mpz_free(mpz_t *z);
 
@@ -79,7 +79,7 @@ mpz_t *mpz_clone(const mpz_t *src);
 
 void mpz_set(mpz_t *dest, const mpz_t *src);
 void mpz_set_from_int(mpz_t *z, mp_int_t src);
-void mpz_set_from_ll(mpz_t *z, long long i);
+void mpz_set_from_ll(mpz_t *z, long long i, bool is_signed);
 mp_uint_t mpz_set_from_str(mpz_t *z, const char *str, mp_uint_t len, bool neg, mp_uint_t base);
 
 bool mpz_is_zero(const mpz_t *z);
diff --git a/py/obj.h b/py/obj.h
index bb0de7976..eff4a4eaf 100644
--- a/py/obj.h
+++ b/py/obj.h
@@ -369,6 +369,7 @@ mp_obj_t mp_obj_new_int(mp_int_t value);
 mp_obj_t mp_obj_new_int_from_uint(mp_uint_t value);
 mp_obj_t mp_obj_new_int_from_str_len(const char **str, mp_uint_t len, bool neg, mp_uint_t base);
 mp_obj_t mp_obj_new_int_from_ll(long long val); // this must return a multi-precision integer object (or raise an overflow exception)
+mp_obj_t mp_obj_new_int_from_ull(unsigned long long val); // this must return a multi-precision integer object (or raise an overflow exception)
 mp_obj_t mp_obj_new_str(const char* data, mp_uint_t len, bool make_qstr_if_not_already);
 mp_obj_t mp_obj_new_bytes(const byte* data, mp_uint_t len);
 #if MICROPY_PY_BUILTINS_FLOAT
diff --git a/py/objint.c b/py/objint.c
index a5392273c..240be3283 100644
--- a/py/objint.c
+++ b/py/objint.c
@@ -246,6 +246,12 @@ mp_obj_t mp_obj_new_int_from_ll(long long val) {
     return mp_const_none;
 }
 
+// This is called when an integer larger than a SMALL_INT is needed (although val might still fit in a SMALL_INT)
+mp_obj_t mp_obj_new_int_from_ull(unsigned long long val) {
+    nlr_raise(mp_obj_new_exception_msg(&mp_type_OverflowError, "small int overflow"));
+    return mp_const_none;
+}
+
 mp_obj_t mp_obj_new_int_from_uint(mp_uint_t value) {
     // SMALL_INT accepts only signed numbers, of one bit less size
     // then word size, which totals 2 bits less for unsigned numbers.
diff --git a/py/objint_longlong.c b/py/objint_longlong.c
index 43bdcabdc..8d47308b0 100644
--- a/py/objint_longlong.c
+++ b/py/objint_longlong.c
@@ -178,7 +178,16 @@ mp_obj_t mp_obj_new_int_from_ll(long long val) {
     return o;
 }
 
-mp_obj_t mp_obj_new_int_from_str_len(const char **str, uint len, bool neg, uint base) {
+mp_obj_t mp_obj_new_int_from_ull(unsigned long long val) {
+    // TODO raise an exception if the unsigned long long won't fit
+    assert(val >> (sizeof(unsigned long long) * 8 - 1) == 0);
+    mp_obj_int_t *o = m_new_obj(mp_obj_int_t);
+    o->base.type = &mp_type_int;
+    o->val = val;
+    return o;
+}
+
+mp_obj_t mp_obj_new_int_from_str_len(const char **str, mp_uint_t len, bool neg, mp_uint_t base) {
     // TODO this does not honor the given length of the string, but it all cases it should anyway be null terminated
     // TODO check overflow
     mp_obj_int_t *o = m_new_obj(mp_obj_int_t);
diff --git a/py/objint_mpz.c b/py/objint_mpz.c
index 440b4f318..39806bb27 100644
--- a/py/objint_mpz.c
+++ b/py/objint_mpz.c
@@ -279,7 +279,13 @@ mp_obj_t mp_obj_new_int(mp_int_t value) {
 
 mp_obj_t mp_obj_new_int_from_ll(long long val) {
     mp_obj_int_t *o = mp_obj_int_new_mpz();
-    mpz_set_from_ll(&o->mpz, val);
+    mpz_set_from_ll(&o->mpz, val, true);
+    return o;
+}
+
+mp_obj_t mp_obj_new_int_from_ull(unsigned long long val) {
+    mp_obj_int_t *o = mp_obj_int_new_mpz();
+    mpz_set_from_ll(&o->mpz, val, false);
     return o;
 }
 
-- 
GitLab