From a5185f4bc8f60fb2be82580e6f2d93fec53d0a33 Mon Sep 17 00:00:00 2001
From: Damien <damien.p.george@gmail.com>
Date: Sun, 20 Oct 2013 14:41:27 +0100
Subject: [PATCH] Abstract out back-end stream functionality from lexer.

---
 py/emitcpy.c     |  10 +--
 py/lexer.c       | 212 +++++++++++++++++++++++------------------------
 py/lexer.h       |  21 +++--
 py/lexerfile.c   |  23 -----
 unix/Makefile    |   3 +-
 unix/lexerunix.c |  55 ++++++++++++
 unix/lexerunix.h |   2 +
 unix/main.c      |  11 ++-
 unix/mpyconfig.h |   2 +-
 9 files changed, 186 insertions(+), 153 deletions(-)
 delete mode 100644 py/lexerfile.c
 create mode 100644 unix/lexerunix.c
 create mode 100644 unix/lexerunix.h

diff --git a/py/emitcpy.c b/py/emitcpy.c
index 6e3543da3..089352c0f 100644
--- a/py/emitcpy.c
+++ b/py/emitcpy.c
@@ -211,7 +211,6 @@ static void emit_cpy_load_const_verbatim_strn(emit_t *emit, const char *str, int
 }
 
 static void emit_cpy_load_const_verbatim_quoted_str(emit_t *emit, qstr qstr, bool bytes) {
-    // TODO strings should be escaped before we get here
     if (emit->pass == PASS_3) {
         const char *str = qstr_str(qstr);
         int len = strlen(str);
@@ -237,13 +236,8 @@ static void emit_cpy_load_const_verbatim_quoted_str(emit_t *emit, qstr qstr, boo
         for (int i = 0; i < len; i++) {
             if (str[i] == '\n') {
                 printf("\\n");
-            } else if (str[i] == '\\' && str[i + 1] == '\'') {
-                i += 1;
-                if (quote_single) {
-                    printf("\\'");
-                } else {
-                    printf("'");
-                }
+            } else if (str[i] == '\\') {
+                printf("\\\\");
             } else if (str[i] == '\'' && quote_single) {
                 printf("\\'");
             } else {
diff --git a/py/lexer.c b/py/lexer.c
index 7167b9327..56f1ed0df 100644
--- a/py/lexer.c
+++ b/py/lexer.c
@@ -9,48 +9,43 @@
 #include "lexer.h"
 
 #define TAB_SIZE (8)
-#define CHR_EOF (-1)
 
 struct _py_lexer_t {
-    const char *name;           // (file) name of source
-    bool free;                  // free source when done with it
+    const char *name;           // name of source
+    void *stream_data;          // data for stream
+    py_lexer_stream_next_char_t stream_next_char;   // stream callback to get next char
+    py_lexer_stream_free_t stream_free;             // stream callback to free
 
-    const char *src_beg;        // beginning of source
-    const char *src_cur;        // current location in source; points to chr0
-    const char *src_end;        // end (exclusive) of source
-    unichar chr0, chr1, chr2;   // current characters from source
+    unichar chr0, chr1, chr2;   // current cached characters from source
 
     uint line;                  // source line
     uint column;                // source column
 
-    uint cont_line;             // continued line
-
-    int emit_dent;
-    int nested_bracket_level;
+    int emit_dent;              // non-zero when there are INDENT/DEDENT tokens to emit
+    int nested_bracket_level;   // >0 when there are nested brackets over multiple lines
 
     uint alloc_indent_level;
     uint num_indent_level;
     uint16_t *indent_level;
 
+    vstr_t vstr;
     py_token_t tok_cur;
-    py_token_t tok_next;
 };
 
-static bool py_token_is_str(const py_token_t *tok, const char *str) {
+bool str_strn_equal(const char *str, const char *strn, int len) {
     uint i = 0;
-    const char *tstr = tok->str;
 
-    while (i < tok->len && *tstr == *str) {
+    while (i < len && *str == *strn) {
         ++i;
-        ++tstr;
         ++str;
+        ++strn;
     }
 
-    return i == tok->len && *str == 0;
+    return i == len && *str == 0;
 }
 
 void py_token_show(const py_token_t *tok) {
-    printf("(%s:%d:%d) kind:%d cont_line:%d str:%p len:%d", tok->src_name, tok->src_line, tok->src_column, tok->kind, tok->cont_line, tok->str, tok->len);
+    printf("(%s:%d:%d) kind:%d str:%p len:%d", tok->src_name, tok->src_line, tok->src_column, tok->kind, tok->str, tok->len);
     if (tok->str != NULL && tok->len > 0) {
         const char *i = tok->str;
         const char *j = i + tok->len;
@@ -77,8 +72,10 @@ bool py_token_show_error(const py_token_t *tok, const char *msg) {
     return false;
 }
 
+#define CUR_CHAR(lex) ((lex)->chr0)
+
 static bool is_end(py_lexer_t *lex) {
-    return lex->chr0 == CHR_EOF;
+    return lex->chr0 == PY_LEXER_CHAR_EOF;
 }
 
 static bool is_physical_newline(py_lexer_t *lex) {
@@ -142,7 +139,7 @@ static bool is_tail_of_identifier(py_lexer_t *lex) {
 }
 
 static void next_char(py_lexer_t *lex) {
-    if (lex->chr0 == CHR_EOF) {
+    if (lex->chr0 == PY_LEXER_CHAR_EOF) {
         return;
     }
 
@@ -152,12 +149,10 @@ static void next_char(py_lexer_t *lex) {
         // LF is a new line
         ++lex->line;
         lex->column = 1;
-        lex->cont_line = lex->line;
     } else if (lex->chr0 == '\r') {
         // CR is a new line
         ++lex->line;
         lex->column = 1;
-        lex->cont_line = lex->line;
         if (lex->chr1 == '\n') {
             // CR LF is a single new line
             advance = 2;
@@ -173,15 +168,11 @@ static void next_char(py_lexer_t *lex) {
     for (; advance > 0; advance--) {
         lex->chr0 = lex->chr1;
         lex->chr1 = lex->chr2;
-        lex->src_cur++;
-        if (lex->src_cur + 2 < lex->src_end) {
-            lex->chr2 = lex->src_cur[2];
-        } else {
+        lex->chr2 = lex->stream_next_char(lex->stream_data);
+        if (lex->chr2 == PY_LEXER_CHAR_EOF) {
             // EOF
-            if (lex->chr1 != CHR_EOF && lex->chr1 != '\n' && lex->chr1 != '\r') {
+            if (lex->chr1 != PY_LEXER_CHAR_EOF && lex->chr1 != '\n' && lex->chr1 != '\r') {
                 lex->chr2 = '\n'; // insert newline at end of file
-            } else {
-                lex->chr2 = CHR_EOF;
             }
         }
     }
@@ -286,9 +277,9 @@ static const char *tok_kw[] = {
     NULL,
 };
 
-static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok) {
+static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok, bool first_token) {
+    // skip white space and comments
     bool had_physical_newline = false;
-
     while (!is_end(lex)) {
         if (is_physical_newline(lex)) {
             had_physical_newline = true;
@@ -315,15 +306,22 @@ static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok) {
         }
     }
 
+    // set token source information
     tok->src_name = lex->name;
     tok->src_line = lex->line;
     tok->src_column = lex->column;
-    tok->kind = PY_TOKEN_INVALID;
-    tok->cont_line = lex->cont_line;
-    tok->str = lex->src_cur;
-    tok->len = 0;
 
-    if (lex->emit_dent < 0) {
+    // start new token text
+    vstr_reset(&lex->vstr);
+
+    if (first_token && lex->line == 1 && lex->column != 1) {
+        // check that the first token is in the first column
+        // if first token is not on first line, we get a physical newline and
+        // this check is done as part of normal indent/dedent checking below
+        // (done to get equivalence with CPython)
+        tok->kind = PY_TOKEN_INDENT;
+
+    } else if (lex->emit_dent < 0) {
         tok->kind = PY_TOKEN_DEDENT;
         lex->emit_dent += 1;
 
@@ -414,19 +412,42 @@ static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok) {
             num_quotes = 1;
         }
 
-        // set start of token
-        tok->str = lex->src_cur;
-
         // parse the literal
-        // TODO proper escaping
         int n_closing = 0;
         while (!is_end(lex) && (num_quotes > 1 || !is_char(lex, '\n')) && n_closing < num_quotes) {
             if (is_char(lex, quote_char)) {
                 n_closing += 1;
+                vstr_add_char(&lex->vstr, CUR_CHAR(lex));
             } else {
                 n_closing = 0;
                 if (!is_raw && is_char(lex, '\\')) {
                     next_char(lex);
+                    unichar c = CUR_CHAR(lex);
+                    switch (c) {
+                        case PY_LEXER_CHAR_EOF: break; // TODO a proper error message?
+                        case '\n': c = PY_LEXER_CHAR_EOF; break; // TODO check this works correctly (we are supposed to ignore it
+                        case '\\': break;
+                        case '\'': break;
+                        case '"': break;
+                        case 'a': c = 0x07; break;
+                        case 'b': c = 0x08; break;
+                        case 't': c = 0x09; break;
+                        case 'n': c = 0x0a; break;
+                        case 'v': c = 0x0b; break;
+                        case 'f': c = 0x0c; break;
+                        case 'r': c = 0x0d; break;
+                        // TODO \ooo octal
+                        case 'x': // TODO \xhh
+                        case 'N': // TODO \N{name} only in strings
+                        case 'u': // TODO \uxxxx only in strings
+                        case 'U': // TODO \Uxxxxxxxx only in strings
+                        default: break; // TODO error message
+                    }
+                    if (c != PY_LEXER_CHAR_EOF) {
+                        vstr_add_char(&lex->vstr, c);
+                    }
+                } else {
+                    vstr_add_char(&lex->vstr, CUR_CHAR(lex));
                 }
             }
             next_char(lex);
@@ -437,33 +458,40 @@ static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok) {
             tok->kind = PY_TOKEN_LONELY_STRING_OPEN;
         }
 
-        // set token string (byte) length
-        tok->len = lex->src_cur - tok->str - n_closing;
-
-        // we set the length, return now so it's not set incorrectly below
-        return;
+        // cut off the end quotes from the token text
+        vstr_cut_tail(&lex->vstr, n_closing);
 
     } else if (is_head_of_identifier(lex)) {
         tok->kind = PY_TOKEN_NAME;
 
+        // get first char
+        vstr_add_char(&lex->vstr, CUR_CHAR(lex));
         next_char(lex);
 
+        // get tail chars
         while (!is_end(lex) && is_tail_of_identifier(lex)) {
+            vstr_add_char(&lex->vstr, CUR_CHAR(lex));
             next_char(lex);
         }
 
     } else if (is_digit(lex) || (is_char(lex, '.') && is_following_digit(lex))) {
         tok->kind = PY_TOKEN_NUMBER;
 
+        // get first char
+        vstr_add_char(&lex->vstr, CUR_CHAR(lex));
         next_char(lex);
 
+        // get tail chars
         while (!is_end(lex)) {
             if (is_char_or(lex, 'e', 'E')) {
+                vstr_add_char(&lex->vstr, 'e');
                 next_char(lex);
                 if (is_char(lex, '+') || is_char(lex, '-')) {
+                    vstr_add_char(&lex->vstr, CUR_CHAR(lex));
                     next_char(lex);
                 }
             } else if (is_letter(lex) || is_digit(lex) || is_char_or(lex, '_', '.')) {
+                vstr_add_char(&lex->vstr, CUR_CHAR(lex));
                 next_char(lex);
             } else {
                 break;
@@ -546,13 +574,14 @@ static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok) {
         }
     }
 
-    // set token string (byte) length
-    tok->len = lex->src_cur - tok->str;
+    // point token text to vstr buffer
+    tok->str = vstr_str(&lex->vstr);
+    tok->len = vstr_len(&lex->vstr);
 
-    // check for keywords (must be done after setting token string length)
+    // check for keywords
     if (tok->kind == PY_TOKEN_NAME) {
         for (int i = 0; tok_kw[i] != NULL; i++) {
-            if (py_token_is_str(tok, tok_kw[i])) {
+            if (str_strn_equal(tok_kw[i], tok->str, tok->len)) {
                 tok->kind = PY_TOKEN_KW_FALSE + i;
                 break;
             }
@@ -560,83 +589,58 @@ static void py_lexer_next_token_into(py_lexer_t *lex, py_token_t *tok) {
     }
 }
 
-py_lexer_t *py_lexer_from_str_len(const char *src_name, const char *str, uint len, bool free_str) {
-    py_lexer_t *lex;
+py_lexer_t *py_lexer_new(const char *src_name, void *stream_data, py_lexer_stream_next_char_t stream_next_char, py_lexer_stream_free_t stream_free) {
+    py_lexer_t *lex = m_new(py_lexer_t, 1);
 
-    lex = m_new(py_lexer_t, 1);
-
-    //lex->name = g_strdup(src_name); // TODO
-    lex->name = src_name;
-    lex->free = free_str;
-    lex->src_beg = str;
-    lex->src_cur = str;
-    lex->src_end = str + len;
+    lex->name = src_name; // TODO do we need to strdup this?
+    lex->stream_data = stream_data;
+    lex->stream_next_char = stream_next_char;
+    lex->stream_free = stream_free;
     lex->line = 1;
     lex->column = 1;
-    lex->cont_line = lex->line;
     lex->emit_dent = 0;
     lex->nested_bracket_level = 0;
     lex->alloc_indent_level = 16;
     lex->num_indent_level = 1;
     lex->indent_level = m_new(uint16_t, lex->alloc_indent_level);
     lex->indent_level[0] = 0;
+    vstr_init(&lex->vstr);
 
     // preload characters
-    // TODO unicode
-    if (len == 0) {
-        lex->chr0 = '\n'; // insert newline at end of file
-        lex->chr1 = CHR_EOF;
-        lex->chr2 = CHR_EOF;
-    } else if (len == 1) {
-        lex->chr0 = str[0];
+    lex->chr0 = stream_next_char(stream_data);
+    lex->chr1 = stream_next_char(stream_data);
+    lex->chr2 = stream_next_char(stream_data);
+
+    // if input stream is 0, 1 or 2 characters long and doesn't end in a newline, then insert a newline at the end
+    if (lex->chr0 == PY_LEXER_CHAR_EOF) {
+        lex->chr0 = '\n';
+    } else if (lex->chr1 == PY_LEXER_CHAR_EOF) {
         if (lex->chr0 != '\n' && lex->chr0 != '\r') {
-            lex->chr1 = '\n'; // insert newline at end of file
-        } else {
-            lex->chr1 = CHR_EOF;
+            lex->chr1 = '\n';
         }
-        lex->chr2 = CHR_EOF;
-    } else if (len == 2) {
-        lex->chr0 = str[0];
-        lex->chr1 = str[1];
+    } else if (lex->chr2 == PY_LEXER_CHAR_EOF) {
         if (lex->chr1 != '\n' && lex->chr1 != '\r') {
-            lex->chr2 = '\n'; // insert newline at end of file
-        } else {
-            lex->chr2 = CHR_EOF;
+            lex->chr2 = '\n';
         }
-    } else {
-        lex->chr0 = str[0];
-        lex->chr1 = str[1];
-        lex->chr2 = str[2];
     }
 
-    py_lexer_next_token_into(lex, &lex->tok_cur);
-
-    // check that the first token is in the first column
-    // (done to get equivalence with CPython)
-    if (lex->tok_cur.src_line == 1 && lex->tok_cur.src_column != 1) {
-        lex->tok_next = lex->tok_cur;
-        lex->tok_cur.kind = PY_TOKEN_INDENT;
-    } else {
-        py_lexer_next_token_into(lex, &lex->tok_next);
-    }
+    // preload first token
+    py_lexer_next_token_into(lex, &lex->tok_cur, true);
 
     return lex;
 }
 
 void py_lexer_free(py_lexer_t *lex) {
-    if (lex == NULL) {
-        return;
-    }
-    //m_free(lex->name);
-    if (lex->free) {
-        m_free((char*)lex->src_beg);
+    if (lex) {
+        if (lex->stream_free) {
+            lex->stream_free(lex->stream_data);
+        }
+        m_free(lex);
     }
-    m_free(lex);
 }
 
 void py_lexer_to_next(py_lexer_t *lex) {
-    lex->tok_cur = lex->tok_next;
-    py_lexer_next_token_into(lex, &lex->tok_next);
+    py_lexer_next_token_into(lex, &lex->tok_cur, false);
 }
 
 const py_token_t *py_lexer_cur(const py_lexer_t *lex) {
@@ -652,14 +656,6 @@ bool py_lexer_is_str(py_lexer_t *lex, const char *str) {
     return py_token_is_str(&lex->tok_cur, str);
 }
 
-bool py_lexer_is_next_kind(py_lexer_t *lex, py_token_kind_t kind) {
-    return lex->tok_next.kind == kind;
-}
-
-bool py_lexer_is_next_str(py_lexer_t *lex, const char *str) {
-    return py_token_is_str(&lex->tok_next, str);
-}
-
 bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind) {
     if (py_lexer_is_kind(lex, kind)) {
         py_lexer_to_next(lex);
diff --git a/py/lexer.h b/py/lexer.h
index 948901259..889a55e2b 100644
--- a/py/lexer.h
+++ b/py/lexer.h
@@ -108,32 +108,35 @@ typedef enum _py_token_kind_t {
 } py_token_kind_t;
 
 typedef struct _py_token_t {
-    const char *src_name;       // (file) name of source
-    uint src_line;              // actual source line
-    uint src_column;            // actual source column
+    const char *src_name;       // name of source
+    uint src_line;              // source line
+    uint src_column;            // source column
 
     py_token_kind_t kind;       // kind of token
-    uint cont_line;             // token belongs to this line in a continued line
-    const char *str;            // string of token
+    const char *str;            // string of token (valid only while this token is current token)
     uint len;                   // (byte) length of string of token
 } py_token_t;
 
+// the next-char function must return the next character in the stream
+// it must return PY_LEXER_CHAR_EOF if end of stream
+// it can be called again after returning PY_LEXER_CHAR_EOF, and in that case must return PY_LEXER_CHAR_EOF
+#define PY_LEXER_CHAR_EOF (-1)
+typedef unichar (*py_lexer_stream_next_char_t)(void*);
+typedef void (*py_lexer_stream_free_t)(void*);
+
 typedef struct _py_lexer_t py_lexer_t;
 
 void py_token_show(const py_token_t *tok);
 void py_token_show_error_prefix(const py_token_t *tok);
 bool py_token_show_error(const py_token_t *tok, const char *msg);
 
-py_lexer_t *py_lexer_from_file(const char *filename);
-py_lexer_t *py_lexer_from_str_len(const char *src_name, const char *str, uint len, bool free_str);
+py_lexer_t *py_lexer_new(const char *src_name, void *stream_data, py_lexer_stream_next_char_t stream_next_char, py_lexer_stream_free_t stream_free);
 void py_lexer_free(py_lexer_t *lex);
 void py_lexer_to_next(py_lexer_t *lex);
 const py_token_t *py_lexer_cur(const py_lexer_t *lex);
 bool py_lexer_is_kind(py_lexer_t *lex, py_token_kind_t kind);
 /* unused
 bool py_lexer_is_str(py_lexer_t *lex, const char *str);
-bool py_lexer_is_next_kind(py_lexer_t *lex, py_token_kind_t kind);
-bool py_lexer_is_next_str(py_lexer_t *lex, const char *str);
 bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind);
 bool py_lexer_opt_str(py_lexer_t *lex, const char *str);
 */
diff --git a/py/lexerfile.c b/py/lexerfile.c
deleted file mode 100644
index 74bb5a061..000000000
--- a/py/lexerfile.c
+++ /dev/null
@@ -1,23 +0,0 @@
-#include <stdint.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <fcntl.h>
-
-#include "misc.h"
-#include "lexer.h"
-
-py_lexer_t *py_lexer_from_file(const char *filename) {
-    // TODO abstract away file functionality
-    int fd = open(filename, O_RDONLY);
-    if (fd < 0) {
-        printf("cannot open file %s\n", filename);
-        return NULL;
-    }
-    uint size = lseek(fd, 0, SEEK_END);
-    lseek(fd, 0, SEEK_SET);
-    char *data = m_new(char, size);
-    read(fd, data, size);
-    close(fd);
-
-    return py_lexer_from_str_len(filename, data, size, true);
-}
diff --git a/unix/Makefile b/unix/Makefile
index a2c9b9f5f..7c8b5a2b9 100644
--- a/unix/Makefile
+++ b/unix/Makefile
@@ -7,14 +7,15 @@ LDFLAGS =
 
 SRC_C = \
 	main.c \
+	lexerunix.c \
 
 PY_O = \
 	nlrx64.o \
 	malloc.o \
 	qstr.o \
+	vstr.o \
 	misc.o \
 	lexer.o \
-	lexerfile.o \
 	parse.o \
 	scope.o \
 	compile.o \
diff --git a/unix/lexerunix.c b/unix/lexerunix.c
new file mode 100644
index 000000000..617d92bb8
--- /dev/null
+++ b/unix/lexerunix.c
@@ -0,0 +1,55 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "misc.h"
+#include "lexer.h"
+
+typedef struct _str_buf_t {
+    bool free;                  // free src_beg when done
+    const char *src_beg;        // beginning of source
+    const char *src_cur;        // current location in source
+    const char *src_end;        // end (exclusive) of source
+} str_buf_t;
+
+unichar str_buf_next_char(str_buf_t *sb) {
+    if (sb->src_cur < sb->src_end) {
+        return *sb->src_cur++;
+    } else {
+        return PY_LEXER_CHAR_EOF;
+    }
+}
+
+void str_buf_free(str_buf_t *sb) {
+    if (sb) {
+        if (sb->free) {
+            m_free((char*)sb->src_beg);
+        }
+        m_free(sb);
+    }
+}
+
+py_lexer_t *py_lexer_new_from_str_len(const char *src_name, const char *str, uint len, bool free_str) {
+    str_buf_t *sb = m_new(str_buf_t, 1);
+    sb->free = free_str;
+    sb->src_beg = str;
+    sb->src_cur = str;
+    sb->src_end = str + len;
+    return py_lexer_new(src_name, sb, (py_lexer_stream_next_char_t)str_buf_next_char, (py_lexer_stream_free_t)str_buf_free);
+}
+
+py_lexer_t *py_lexer_new_from_file(const char *filename) {
+    int fd = open(filename, O_RDONLY);
+    if (fd < 0) {
+        printf("cannot open file %s\n", filename);
+        return NULL;
+    }
+    uint size = lseek(fd, 0, SEEK_END);
+    lseek(fd, 0, SEEK_SET);
+    char *data = m_new(char, size);
+    read(fd, data, size);
+    close(fd);
+
+    return py_lexer_new_from_str_len(filename, data, size, true);
+}
diff --git a/unix/lexerunix.h b/unix/lexerunix.h
new file mode 100644
index 000000000..aa7631cb0
--- /dev/null
+++ b/unix/lexerunix.h
@@ -0,0 +1,2 @@
+py_lexer_t *py_lexer_new_from_str_len(const char *src_name, const char *str, uint len, bool free_str);
+py_lexer_t *py_lexer_new_from_file(const char *filename);
diff --git a/unix/main.c b/unix/main.c
index 018e1a970..e3999db68 100644
--- a/unix/main.c
+++ b/unix/main.c
@@ -6,6 +6,7 @@
 #include "misc.h"
 #include "mpyconfig.h"
 #include "lexer.h"
+#include "lexerunix.h"
 #include "parse.h"
 #include "compile.h"
 #include "runtime.h"
@@ -67,7 +68,7 @@ void do_repl() {
                 line = line3;
             }
         }
-        py_lexer_t *lex = py_lexer_from_str_len("<stdin>", line, strlen(line), false);
+        py_lexer_t *lex = py_lexer_new_from_str_len("<stdin>", line, strlen(line), false);
         py_parse_node_t pn = py_parse(lex, PY_PARSE_SINGLE_INPUT);
         if (pn != PY_PARSE_NODE_NULL) {
             //py_parse_node_show(pn, 0);
@@ -91,7 +92,7 @@ void do_repl() {
 }
 
 void do_file(const char *file) {
-    py_lexer_t *lex = py_lexer_from_file(file);
+    py_lexer_t *lex = py_lexer_new_from_file(file);
     //const char *pysrc = "def f():\n  x=x+1\n  print(42)\n";
     //py_lexer_t *lex = py_lexer_from_str_len("<>", pysrc, strlen(pysrc), false);
     if (lex == NULL) {
@@ -119,7 +120,11 @@ void do_file(const char *file) {
 
             py_lexer_free(lex);
 
-#if !MICROPY_EMIT_CPYTHON
+#if MICROPY_EMIT_CPYTHON
+            if (!comp_ok) {
+                printf("compile error\n");
+            }
+#else
             if (1 && comp_ok) {
                 // execute it
                 py_obj_t module_fun = rt_make_function_from_id(1);
diff --git a/unix/mpyconfig.h b/unix/mpyconfig.h
index 3ab17e6ca..587b09b16 100644
--- a/unix/mpyconfig.h
+++ b/unix/mpyconfig.h
@@ -1,7 +1,7 @@
 // options to control how Micro Python is built
 
 #define MICROPY_ENABLE_FLOAT        (1)
-#define MICROPY_EMIT_CPYTHON        (0)
+#define MICROPY_EMIT_CPYTHON        (1)
 #define MICROPY_EMIT_X64            (1)
 #define MICROPY_EMIT_THUMB          (0)
 #define MICROPY_EMIT_INLINE_THUMB   (0)
-- 
GitLab