Skip to content
Snippets Groups Projects
Commit f127bef3 authored by Damien George's avatar Damien George
Browse files

py/makeqstrdata.py: Compute the qstr hash from bytes, not characters.

parent f98bb2dd
Branches
No related tags found
No related merge requests found
...@@ -14,11 +14,13 @@ import sys ...@@ -14,11 +14,13 @@ import sys
# - codepoint2name lives in a different module # - codepoint2name lives in a different module
import platform import platform
if platform.python_version_tuple()[0] == '2': if platform.python_version_tuple()[0] == '2':
ord_bytes = ord bytes_cons = lambda val, enc=None: bytearray(val)
from htmlentitydefs import codepoint2name from htmlentitydefs import codepoint2name
elif platform.python_version_tuple()[0] == '3': elif platform.python_version_tuple()[0] == '3':
ord_bytes = lambda x:x bytes_cons = bytes
from html.entities import codepoint2name from html.entities import codepoint2name
# end compatibility code
codepoint2name[ord('-')] = 'hyphen'; codepoint2name[ord('-')] = 'hyphen';
# add some custom names to map characters that aren't in HTML # add some custom names to map characters that aren't in HTML
...@@ -52,8 +54,8 @@ codepoint2name[ord('~')] = 'tilde' ...@@ -52,8 +54,8 @@ codepoint2name[ord('~')] = 'tilde'
# this must match the equivalent function in qstr.c # this must match the equivalent function in qstr.c
def compute_hash(qstr, bytes_hash): def compute_hash(qstr, bytes_hash):
hash = 5381 hash = 5381
for char in qstr: for b in qstr:
hash = (hash * 33) ^ ord(char) hash = (hash * 33) ^ b
# Make sure that valid hash is never zero, zero means "hash not computed" # Make sure that valid hash is never zero, zero means "hash not computed"
return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1 return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1
...@@ -115,16 +117,15 @@ def parse_input_headers(infiles): ...@@ -115,16 +117,15 @@ def parse_input_headers(infiles):
return qcfgs, qstrs return qcfgs, qstrs
def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr): def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr):
qhash = compute_hash(qstr, cfg_bytes_hash) qbytes = bytes_cons(qstr, 'utf8')
qlen = len(qbytes)
qhash = compute_hash(qbytes, cfg_bytes_hash)
if all(32 <= ord(c) <= 126 and c != '\\' and c != '"' for c in qstr): if all(32 <= ord(c) <= 126 and c != '\\' and c != '"' for c in qstr):
# qstr is all printable ASCII so render it as-is (for easier debugging) # qstr is all printable ASCII so render it as-is (for easier debugging)
qlen = len(qstr)
qdata = qstr qdata = qstr
else: else:
# qstr contains non-printable codes so render entire thing as hex pairs # qstr contains non-printable codes so render entire thing as hex pairs
qbytes = qstr.encode('utf8') qdata = ''.join(('\\x%02x' % b) for b in qbytes)
qlen = len(qbytes)
qdata = ''.join(('\\x%02x' % ord_bytes(b)) for b in qbytes)
if qlen >= (1 << (8 * cfg_bytes_len)): if qlen >= (1 << (8 * cfg_bytes_len)):
print('qstr is too long:', qstr) print('qstr is too long:', qstr)
assert False assert False
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment