From c3bd9415cc8e6ba3619fe4b7a2bdac20260b0236 Mon Sep 17 00:00:00 2001
From: Damien George <damien.p.george@gmail.com>
Date: Mon, 20 Jul 2015 11:03:13 +0000
Subject: [PATCH] py: Make qstr hash size configurable, defaults to 2 bytes.

This patch makes configurable, via MICROPY_QSTR_BYTES_IN_HASH, the
number of bytes used for a qstr hash.  It was originally fixed at 2
bytes, and now defaults to 2 bytes.  Setting it to 1 byte will save
ROM and RAM at a small expense of hash collisions.
---
 bare-arm/mpconfigport.h |  1 +
 py/makeqstrdata.py      | 12 +++++----
 py/mpconfig.h           |  5 ++++
 py/qstr.c               | 55 +++++++++++++++++++++++------------------
 py/qstrdefs.h           |  1 +
 5 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/bare-arm/mpconfigport.h b/bare-arm/mpconfigport.h
index cae27cfd9..b35f9ab46 100644
--- a/bare-arm/mpconfigport.h
+++ b/bare-arm/mpconfigport.h
@@ -2,6 +2,7 @@
 
 // options to control how Micro Python is built
 
+#define MICROPY_QSTR_BYTES_IN_HASH  (1)
 #define MICROPY_ALLOC_PATH_MAX      (512)
 #define MICROPY_EMIT_X64            (0)
 #define MICROPY_EMIT_THUMB          (0)
diff --git a/py/makeqstrdata.py b/py/makeqstrdata.py
index 6b7df39cb..234e65d46 100644
--- a/py/makeqstrdata.py
+++ b/py/makeqstrdata.py
@@ -37,12 +37,12 @@ codepoint2name[ord('!')] = 'bang'
 codepoint2name[ord('\\')] = 'backslash'
 
 # this must match the equivalent function in qstr.c
-def compute_hash(qstr):
+def compute_hash(qstr, bytes_hash):
     hash = 5381
     for char in qstr:
         hash = (hash * 33) ^ ord(char)
     # Make sure that valid hash is never zero, zero means "hash not computed"
-    return (hash & 0xffff) or 1
+    return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1
 
 def do_work(infiles):
     # read the qstrs in from the input files
@@ -81,6 +81,7 @@ def do_work(infiles):
 
     # get config variables
     cfg_bytes_len = int(qcfgs['BYTES_IN_LEN'])
+    cfg_bytes_hash = int(qcfgs['BYTES_IN_HASH'])
     cfg_max_len = 1 << (8 * cfg_bytes_len)
 
     # print out the starte of the generated C header file
@@ -88,11 +89,11 @@ def do_work(infiles):
     print('')
 
     # add NULL qstr with no hash or data
-    print('QDEF(MP_QSTR_NULL, (const byte*)"\\x00\\x00%s" "")' % ('\\x00' * cfg_bytes_len))
+    print('QDEF(MP_QSTR_NULL, (const byte*)"%s%s" "")' % ('\\x00' * cfg_bytes_hash, '\\x00' * cfg_bytes_len))
 
     # go through each qstr and print it out
     for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]):
-        qhash = compute_hash(qstr)
+        qhash = compute_hash(qstr, cfg_bytes_hash)
         # Calculate len of str, taking escapes into account
         qlen = len(qstr.replace("\\\\", "-").replace("\\", ""))
         qdata = qstr.replace('"', '\\"')
@@ -100,7 +101,8 @@ def do_work(infiles):
             print('qstr is too long:', qstr)
             assert False
         qlen_str = ('\\x%02x' * cfg_bytes_len) % tuple(((qlen >> (8 * i)) & 0xff) for i in range(cfg_bytes_len))
-        print('QDEF(MP_QSTR_%s, (const byte*)"\\x%02x\\x%02x%s" "%s")' % (ident, qhash & 0xff, (qhash >> 8) & 0xff, qlen_str, qdata))
+        qhash_str = ('\\x%02x' * cfg_bytes_hash) % tuple(((qhash >> (8 * i)) & 0xff) for i in range(cfg_bytes_hash))
+        print('QDEF(MP_QSTR_%s, (const byte*)"%s%s" "%s")' % (ident, qhash_str, qlen_str, qdata))
 
 if __name__ == "__main__":
     do_work(sys.argv[1:])
diff --git a/py/mpconfig.h b/py/mpconfig.h
index 0b86ec435..9f0e909f5 100644
--- a/py/mpconfig.h
+++ b/py/mpconfig.h
@@ -152,6 +152,11 @@
 #define MICROPY_QSTR_BYTES_IN_LEN (1)
 #endif
 
+// Number of bytes used to store qstr hash
+#ifndef MICROPY_QSTR_BYTES_IN_HASH
+#define MICROPY_QSTR_BYTES_IN_HASH (2)
+#endif
+
 // Avoid using C stack when making Python function calls. C stack still
 // may be used if there's no free heap.
 #ifndef MICROPY_STACKLESS
diff --git a/py/qstr.c b/py/qstr.c
index dd04ae8f7..afbf0ff9b 100644
--- a/py/qstr.c
+++ b/py/qstr.c
@@ -43,22 +43,31 @@
 #endif
 
 // A qstr is an index into the qstr pool.
-// The data for a qstr contains (hash, length, data).
-// For now we use very simple encoding, just to get the framework correct:
-//  - hash is 2 bytes (see function below)
-//  - length is 2 bytes
-//  - data follows
-//  - \0 terminated (for now, so they can be printed using printf)
-
-#define Q_GET_HASH(q)   ((mp_uint_t)(q)[0] | ((mp_uint_t)(q)[1] << 8))
-#define Q_GET_ALLOC(q)  (2 + MICROPY_QSTR_BYTES_IN_LEN + Q_GET_LENGTH(q) + 1)
-#define Q_GET_DATA(q)   ((q) + 2 + MICROPY_QSTR_BYTES_IN_LEN)
+// The data for a qstr contains (hash, length, data):
+//  - hash (configurable number of bytes)
+//  - length (configurable number of bytes)
+//  - data ("length" number of bytes)
+//  - \0 terminated (so they can be printed using printf)
+
+#if MICROPY_QSTR_BYTES_IN_HASH == 1
+    #define Q_HASH_MASK (0xff)
+    #define Q_GET_HASH(q) ((mp_uint_t)(q)[0])
+    #define Q_SET_HASH(q, hash) do { (q)[0] = (hash); } while (0)
+#elif MICROPY_QSTR_BYTES_IN_HASH == 2
+    #define Q_HASH_MASK (0xffff)
+    #define Q_GET_HASH(q) ((mp_uint_t)(q)[0] | ((mp_uint_t)(q)[1] << 8))
+    #define Q_SET_HASH(q, hash) do { (q)[0] = (hash); (q)[1] = (hash) >> 8; } while (0)
+#else
+    #error unimplemented qstr hash decoding
+#endif
+#define Q_GET_ALLOC(q)  (MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + Q_GET_LENGTH(q) + 1)
+#define Q_GET_DATA(q)   ((q) + MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN)
 #if MICROPY_QSTR_BYTES_IN_LEN == 1
-    #define Q_GET_LENGTH(q) ((q)[2])
-    #define Q_SET_LENGTH(q, len) do { (q)[2] = (len); } while (0)
+    #define Q_GET_LENGTH(q) ((q)[MICROPY_QSTR_BYTES_IN_HASH])
+    #define Q_SET_LENGTH(q, len) do { (q)[MICROPY_QSTR_BYTES_IN_HASH] = (len); } while (0)
 #elif MICROPY_QSTR_BYTES_IN_LEN == 2
-    #define Q_GET_LENGTH(q) ((q)[2] | ((q)[3] << 8))
-    #define Q_SET_LENGTH(q, len) do { (q)[2] = (len); (q)[3] = (len) >> 8; } while (0)
+    #define Q_GET_LENGTH(q) ((q)[MICROPY_QSTR_BYTES_IN_HASH] | ((q)[MICROPY_QSTR_BYTES_IN_HASH + 1] << 8))
+    #define Q_SET_LENGTH(q, len) do { (q)[MICROPY_QSTR_BYTES_IN_HASH] = (len); (q)[MICROPY_QSTR_BYTES_IN_HASH + 1] = (len) >> 8; } while (0)
 #else
     #error unimplemented qstr length decoding
 #endif
@@ -70,7 +79,7 @@ mp_uint_t qstr_compute_hash(const byte *data, mp_uint_t len) {
     for (const byte *top = data + len; data < top; data++) {
         hash = ((hash << 5) + hash) ^ (*data); // hash * 33 ^ data
     }
-    hash &= 0xffff;
+    hash &= Q_HASH_MASK;
     // Make sure that valid hash is never zero, zero means "hash not computed"
     if (hash == 0) {
         hash++;
@@ -156,7 +165,7 @@ qstr qstr_from_strn(const char *str, mp_uint_t len) {
         // qstr does not exist in interned pool so need to add it
 
         // compute number of bytes needed to intern this string
-        mp_uint_t n_bytes = 2 + MICROPY_QSTR_BYTES_IN_LEN + len + 1;
+        mp_uint_t n_bytes = MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len + 1;
 
         if (MP_STATE_VM(qstr_last_chunk) != NULL && MP_STATE_VM(qstr_last_used) + n_bytes > MP_STATE_VM(qstr_last_alloc)) {
             // not enough room at end of previously interned string so try to grow
@@ -193,11 +202,10 @@ qstr qstr_from_strn(const char *str, mp_uint_t len) {
 
         // store the interned strings' data
         mp_uint_t hash = qstr_compute_hash((const byte*)str, len);
-        q_ptr[0] = hash;
-        q_ptr[1] = hash >> 8;
+        Q_SET_HASH(q_ptr, hash);
         Q_SET_LENGTH(q_ptr, len);
-        memcpy(q_ptr + 2 + MICROPY_QSTR_BYTES_IN_LEN, str, len);
-        q_ptr[2 + MICROPY_QSTR_BYTES_IN_LEN + len] = '\0';
+        memcpy(q_ptr + MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN, str, len);
+        q_ptr[MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len] = '\0';
         q = qstr_add(q_ptr);
     }
     return q;
@@ -205,7 +213,7 @@ qstr qstr_from_strn(const char *str, mp_uint_t len) {
 
 byte *qstr_build_start(mp_uint_t len, byte **q_ptr) {
     assert(len < (1 << (8 * MICROPY_QSTR_BYTES_IN_LEN)));
-    *q_ptr = m_new(byte, 2 + MICROPY_QSTR_BYTES_IN_LEN + len + 1);
+    *q_ptr = m_new(byte, MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len + 1);
     Q_SET_LENGTH(*q_ptr, len);
     return Q_GET_DATA(*q_ptr);
 }
@@ -215,9 +223,8 @@ qstr qstr_build_end(byte *q_ptr) {
     if (q == 0) {
         mp_uint_t len = Q_GET_LENGTH(q_ptr);
         mp_uint_t hash = qstr_compute_hash(Q_GET_DATA(q_ptr), len);
-        q_ptr[0] = hash;
-        q_ptr[1] = hash >> 8;
-        q_ptr[2 + MICROPY_QSTR_BYTES_IN_LEN + len] = '\0';
+        Q_SET_HASH(q_ptr, hash);
+        q_ptr[MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len] = '\0';
         q = qstr_add(q_ptr);
     } else {
         m_del(byte, q_ptr, Q_GET_ALLOC(q_ptr));
diff --git a/py/qstrdefs.h b/py/qstrdefs.h
index 48131636f..0f066e324 100644
--- a/py/qstrdefs.h
+++ b/py/qstrdefs.h
@@ -31,6 +31,7 @@
 
 // qstr configuration passed to makeqstrdata.py of the form QCFG(key, value)
 QCFG(BYTES_IN_LEN, MICROPY_QSTR_BYTES_IN_LEN)
+QCFG(BYTES_IN_HASH, MICROPY_QSTR_BYTES_IN_HASH)
 
 Q()
 Q(*)
-- 
GitLab