From 5bdf1650de782d766a648f992270306269cc985a Mon Sep 17 00:00:00 2001
From: Damien George <damien.p.george@gmail.com>
Date: Wed, 16 Nov 2016 18:27:20 +1100
Subject: [PATCH] py/lexer: Make lexer use an mp_reader as its source.

---
 esp8266/lexerstr32.c | 11 +++++++----
 py/lexer.c           | 31 +++++++++++++------------------
 py/lexer.h           | 15 +++------------
 3 files changed, 23 insertions(+), 34 deletions(-)

diff --git a/esp8266/lexerstr32.c b/esp8266/lexerstr32.c
index 669df8ea7..3fc62399e 100644
--- a/esp8266/lexerstr32.c
+++ b/esp8266/lexerstr32.c
@@ -35,10 +35,11 @@ typedef struct _mp_lexer_str32_buf_t {
     uint8_t byte_off;
 } mp_lexer_str32_buf_t;
 
-STATIC mp_uint_t str32_buf_next_byte(mp_lexer_str32_buf_t *sb) {
+STATIC mp_uint_t str32_buf_next_byte(void *sb_in) {
+    mp_lexer_str32_buf_t *sb = (mp_lexer_str32_buf_t*)sb_in;
     byte c = sb->val & 0xff;
     if (c == 0) {
-        return MP_LEXER_EOF;
+        return MP_READER_EOF;
     }
 
     if (++sb->byte_off > 3) {
@@ -51,7 +52,8 @@ STATIC mp_uint_t str32_buf_next_byte(mp_lexer_str32_buf_t *sb) {
     return c;
 }
 
-STATIC void str32_buf_free(mp_lexer_str32_buf_t *sb) {
+STATIC void str32_buf_free(void *sb_in) {
+    mp_lexer_str32_buf_t *sb = (mp_lexer_str32_buf_t*)sb_in;
     m_del_obj(mp_lexer_str32_buf_t, sb);
 }
 
@@ -63,7 +65,8 @@ mp_lexer_t *mp_lexer_new_from_str32(qstr src_name, const char *str, mp_uint_t le
     sb->byte_off = (uint32_t)str & 3;
     sb->src_cur = (uint32_t*)(str - sb->byte_off);
     sb->val = *sb->src_cur++ >> sb->byte_off * 8;
-    return mp_lexer_new(src_name, sb, (mp_lexer_stream_next_byte_t)str32_buf_next_byte, (mp_lexer_stream_close_t)str32_buf_free);
+    mp_reader_t reader = {sb, str32_buf_next_byte, str32_buf_free};
+    return mp_lexer_new(src_name, reader);
 }
 
 #endif // MICROPY_ENABLE_COMPILER
diff --git a/py/lexer.c b/py/lexer.c
index 9342ce8cc..c6ecdf1f8 100644
--- a/py/lexer.c
+++ b/py/lexer.c
@@ -52,6 +52,7 @@ STATIC bool str_strn_equal(const char *str, const char *strn, mp_uint_t len) {
     return i == len && *str == 0;
 }
 
+#define MP_LEXER_EOF ((unichar)MP_READER_EOF)
 #define CUR_CHAR(lex) ((lex)->chr0)
 
 STATIC bool is_end(mp_lexer_t *lex) {
@@ -145,7 +146,7 @@ STATIC void next_char(mp_lexer_t *lex) {
 
     lex->chr0 = lex->chr1;
     lex->chr1 = lex->chr2;
-    lex->chr2 = lex->stream_next_byte(lex->stream_data);
+    lex->chr2 = lex->reader.readbyte(lex->reader.data);
 
     if (lex->chr0 == '\r') {
         // CR is a new line, converted to LF
@@ -153,7 +154,7 @@ STATIC void next_char(mp_lexer_t *lex) {
         if (lex->chr1 == '\n') {
             // CR LF is a single new line
             lex->chr1 = lex->chr2;
-            lex->chr2 = lex->stream_next_byte(lex->stream_data);
+            lex->chr2 = lex->reader.readbyte(lex->reader.data);
         }
     }
 
@@ -689,21 +690,17 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
     }
 }
 
-mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_byte_t stream_next_byte, mp_lexer_stream_close_t stream_close) {
+mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader) {
     mp_lexer_t *lex = m_new_obj_maybe(mp_lexer_t);
 
     // check for memory allocation error
     if (lex == NULL) {
-        if (stream_close) {
-            stream_close(stream_data);
-        }
+        reader.close(reader.data);
         return NULL;
     }
 
     lex->source_name = src_name;
-    lex->stream_data = stream_data;
-    lex->stream_next_byte = stream_next_byte;
-    lex->stream_close = stream_close;
+    lex->reader = reader;
     lex->line = 1;
     lex->column = 1;
     lex->emit_dent = 0;
@@ -724,9 +721,9 @@ mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_
     lex->indent_level[0] = 0;
 
     // preload characters
-    lex->chr0 = stream_next_byte(stream_data);
-    lex->chr1 = stream_next_byte(stream_data);
-    lex->chr2 = stream_next_byte(stream_data);
+    lex->chr0 = reader.readbyte(reader.data);
+    lex->chr1 = reader.readbyte(reader.data);
+    lex->chr2 = reader.readbyte(reader.data);
 
     // if input stream is 0, 1 or 2 characters long and doesn't end in a newline, then insert a newline at the end
     if (lex->chr0 == MP_LEXER_EOF) {
@@ -756,7 +753,7 @@ mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t
     if (!mp_reader_new_mem(&reader, (const byte*)str, len, free_len)) {
         return NULL;
     }
-    return mp_lexer_new(src_name, reader.data, (mp_lexer_stream_next_byte_t)reader.readbyte, (mp_lexer_stream_close_t)reader.close);
+    return mp_lexer_new(src_name, reader);
 }
 
 #if MICROPY_READER_POSIX || MICROPY_READER_FATFS
@@ -767,7 +764,7 @@ mp_lexer_t *mp_lexer_new_from_file(const char *filename) {
     if (ret != 0) {
         return NULL;
     }
-    return mp_lexer_new(qstr_from_str(filename), reader.data, (mp_lexer_stream_next_byte_t)reader.readbyte, (mp_lexer_stream_close_t)reader.close);
+    return mp_lexer_new(qstr_from_str(filename), reader);
 }
 
 #if MICROPY_HELPER_LEXER_UNIX
@@ -778,7 +775,7 @@ mp_lexer_t *mp_lexer_new_from_fd(qstr filename, int fd, bool close_fd) {
     if (ret != 0) {
         return NULL;
     }
-    return mp_lexer_new(filename, reader.data, (mp_lexer_stream_next_byte_t)reader.readbyte, (mp_lexer_stream_close_t)reader.close);
+    return mp_lexer_new(filename, reader);
 }
 
 #endif
@@ -787,9 +784,7 @@ mp_lexer_t *mp_lexer_new_from_fd(qstr filename, int fd, bool close_fd) {
 
 void mp_lexer_free(mp_lexer_t *lex) {
     if (lex) {
-        if (lex->stream_close) {
-            lex->stream_close(lex->stream_data);
-        }
+        lex->reader.close(lex->reader.data);
         vstr_clear(&lex->vstr);
         m_del(uint16_t, lex->indent_level, lex->alloc_indent_level);
         m_del_obj(mp_lexer_t, lex);
diff --git a/py/lexer.h b/py/lexer.h
index 463be5fff..1461f9c8c 100644
--- a/py/lexer.h
+++ b/py/lexer.h
@@ -30,6 +30,7 @@
 
 #include "py/mpconfig.h"
 #include "py/qstr.h"
+#include "py/reader.h"
 
 /* lexer.h -- simple tokeniser for Micro Python
  *
@@ -142,21 +143,11 @@ typedef enum _mp_token_kind_t {
     MP_TOKEN_DEL_MINUS_MORE,
 } mp_token_kind_t;
 
-// the next-byte function must return the next byte in the stream
-// it must return MP_LEXER_EOF if end of stream
-// it can be called again after returning MP_LEXER_EOF, and in that case must return MP_LEXER_EOF
-#define MP_LEXER_EOF ((unichar)(-1))
-
-typedef mp_uint_t (*mp_lexer_stream_next_byte_t)(void*);
-typedef void (*mp_lexer_stream_close_t)(void*);
-
 // this data structure is exposed for efficiency
 // public members are: source_name, tok_line, tok_column, tok_kind, vstr
 typedef struct _mp_lexer_t {
     qstr source_name;           // name of source
-    void *stream_data;          // data for stream
-    mp_lexer_stream_next_byte_t stream_next_byte;   // stream callback to get next byte
-    mp_lexer_stream_close_t stream_close;           // stream callback to free
+    mp_reader_t reader;         // stream source
 
     unichar chr0, chr1, chr2;   // current cached characters from source
 
@@ -176,7 +167,7 @@ typedef struct _mp_lexer_t {
     vstr_t vstr;                // token data
 } mp_lexer_t;
 
-mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_byte_t stream_next_byte, mp_lexer_stream_close_t stream_close);
+mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader);
 mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t len, mp_uint_t free_len);
 
 void mp_lexer_free(mp_lexer_t *lex);
-- 
GitLab