From 9528cd66d7c94d7376884a53c2080b29e9bc3a0a Mon Sep 17 00:00:00 2001
From: Damien George <damien.p.george@gmail.com>
Date: Wed, 15 Jan 2014 21:23:31 +0000
Subject: [PATCH] Convert parse errors to exceptions.

Parser no longer prints an error, but instead returns an exception ID
and message.
---
 py/builtinimport.c |  9 ++++++---
 py/lexer.c         | 15 ++++-----------
 py/lexer.h         | 10 ++--------
 py/mpqstrraw.h     |  3 ++-
 py/parse.c         | 18 +++++++++++-------
 py/parse.h         |  3 ++-
 stm/main.c         | 31 ++++++++++++++++++++++++-------
 unix-cpy/main.c    | 17 +++++++++++++++--
 unix/main.c        | 10 ++++++++--
 9 files changed, 74 insertions(+), 42 deletions(-)

diff --git a/py/builtinimport.c b/py/builtinimport.c
index 33576e3f0..2eca36fbc 100644
--- a/py/builtinimport.c
+++ b/py/builtinimport.c
@@ -48,16 +48,19 @@ mp_obj_t mp_builtin___import__(int n_args, mp_obj_t *args) {
     rt_globals_set(mp_obj_module_get_globals(module_obj));
 
     // parse the imported script
-    mp_parse_node_t pn = mp_parse(lex, MP_PARSE_FILE_INPUT);
+    qstr parse_exc_id;
+    const char *parse_exc_msg;
+    mp_parse_node_t pn = mp_parse(lex, MP_PARSE_FILE_INPUT, &parse_exc_id, &parse_exc_msg);
     mp_lexer_free(lex);
 
     if (pn == MP_PARSE_NODE_NULL) {
-        // TODO handle parse error correctly
+        // parse error; clean up and raise exception
         rt_locals_set(old_locals);
         rt_globals_set(old_globals);
-        return mp_const_none;
+        nlr_jump(mp_obj_new_exception_msg(parse_exc_id, parse_exc_msg));
     }
 
+    // compile the imported script
     mp_obj_t module_fun = mp_compile(pn, false);
 
     if (module_fun == mp_const_none) {
diff --git a/py/lexer.c b/py/lexer.c
index da8967b16..6feb231e0 100644
--- a/py/lexer.c
+++ b/py/lexer.c
@@ -35,6 +35,7 @@ struct _mp_lexer_t {
     mp_token_t tok_cur;
 };
 
+// TODO replace with a call to a standard function
 bool str_strn_equal(const char *str, const char *strn, int len) {
     uint i = 0;
 
@@ -66,15 +67,6 @@ void mp_token_show(const mp_token_t *tok) {
     printf("\n");
 }
 
-void mp_token_show_error_prefix(const mp_token_t *tok) {
-    printf("(%s:%d:%d) ", tok->src_name, tok->src_line, tok->src_column);
-}
-
-bool mp_token_show_error(const mp_token_t *tok, const char *msg) {
-    printf("(%s:%d:%d) %s\n", tok->src_name, tok->src_line, tok->src_column, msg);
-    return false;
-}
-
 #define CUR_CHAR(lex) ((lex)->chr0)
 
 static bool is_end(mp_lexer_t *lex) {
@@ -684,8 +676,9 @@ bool mp_lexer_opt_str(mp_lexer_t *lex, const char *str) {
 }
 */
 
-bool mp_lexer_show_error(mp_lexer_t *lex, const char *msg) {
-    return mp_token_show_error(&lex->tok_cur, msg);
+bool mp_lexer_show_error_pythonic_prefix(mp_lexer_t *lex) {
+    printf("  File \"%s\", line %d column %d\n", lex->tok_cur.src_name, lex->tok_cur.src_line, lex->tok_cur.src_column);
+    return false;
 }
 
 bool mp_lexer_show_error_pythonic(mp_lexer_t *lex, const char *msg) {
diff --git a/py/lexer.h b/py/lexer.h
index 428ff03c5..ea928c77f 100644
--- a/py/lexer.h
+++ b/py/lexer.h
@@ -124,8 +124,6 @@ typedef void (*mp_lexer_stream_close_t)(void*);
 typedef struct _mp_lexer_t mp_lexer_t;
 
 void mp_token_show(const mp_token_t *tok);
-void mp_token_show_error_prefix(const mp_token_t *tok);
-bool mp_token_show_error(const mp_token_t *tok, const char *msg);
 
 mp_lexer_t *mp_lexer_new(const char *src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close);
 mp_lexer_t *mp_lexer_new_from_str_len(const char *src_name, const char *str, uint len, uint free_len);
@@ -134,12 +132,8 @@ void mp_lexer_free(mp_lexer_t *lex);
 void mp_lexer_to_next(mp_lexer_t *lex);
 const mp_token_t *mp_lexer_cur(const mp_lexer_t *lex);
 bool mp_lexer_is_kind(mp_lexer_t *lex, mp_token_kind_t kind);
-/* unused
-bool mp_lexer_is_str(mp_lexer_t *lex, const char *str);
-bool mp_lexer_opt_kind(mp_lexer_t *lex, mp_token_kind_t kind);
-bool mp_lexer_opt_str(mp_lexer_t *lex, const char *str);
-*/
-bool mp_lexer_show_error(mp_lexer_t *lex, const char *msg);
+
+bool mp_lexer_show_error_pythonic_prefix(mp_lexer_t *lex);
 bool mp_lexer_show_error_pythonic(mp_lexer_t *lex, const char *msg);
 
 // used to import a module; must be implemented for a specific port
diff --git a/py/mpqstrraw.h b/py/mpqstrraw.h
index c3cda84b4..84b24f274 100644
--- a/py/mpqstrraw.h
+++ b/py/mpqstrraw.h
@@ -24,13 +24,14 @@ Q(StopIteration)
 
 Q(AssertionError)
 Q(AttributeError)
+Q(IndentationError)
 Q(IndexError)
 Q(KeyError)
 Q(NameError)
+Q(OSError)
 Q(SyntaxError)
 Q(TypeError)
 Q(ValueError)
-Q(OSError)
 
 Q(abs)
 Q(all)
diff --git a/py/parse.c b/py/parse.c
index e2c952073..e26e235aa 100644
--- a/py/parse.c
+++ b/py/parse.c
@@ -8,6 +8,7 @@
 
 #include "misc.h"
 #include "mpconfig.h"
+#include "mpqstr.h"
 #include "lexer.h"
 #include "parse.h"
 
@@ -265,7 +266,7 @@ static void push_result_rule(parser_t *parser, const rule_t *rule, int num_args)
     push_result_node(parser, (mp_parse_node_t)pn);
 }
 
-mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
+mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, qstr *exc_id_out, const char **exc_msg_out) {
 
     // allocate memory for the parser and its stacks
 
@@ -598,17 +599,20 @@ finished:
     return result;
 
 syntax_error:
-    // TODO these should raise a proper exception
     if (mp_lexer_is_kind(lex, MP_TOKEN_INDENT)) {
-        mp_lexer_show_error_pythonic(lex, "IndentationError: unexpected indent");
+        *exc_id_out = MP_QSTR_IndentationError;
+        *exc_msg_out = "unexpected indent";
     } else if (mp_lexer_is_kind(lex, MP_TOKEN_DEDENT_MISMATCH)) {
-        mp_lexer_show_error_pythonic(lex, "IndentationError: unindent does not match any outer indentation level");
+        *exc_id_out = MP_QSTR_IndentationError;
+        *exc_msg_out = "unindent does not match any outer indentation level";
     } else {
-        mp_lexer_show_error_pythonic(lex, "syntax error:");
+        *exc_id_out = MP_QSTR_SyntaxError;
+        *exc_msg_out = "invalid syntax";
 #ifdef USE_RULE_NAME
-        mp_lexer_show_error(lex, rule->rule_name);
-#endif
+        // debugging: print the rule name that failed and the token
+        mp_lexer_show_error_pythonic(lex, rule->rule_name);
         mp_token_show(mp_lexer_cur(lex));
+#endif
     }
     result = MP_PARSE_NODE_NULL;
     goto finished;
diff --git a/py/parse.h b/py/parse.h
index 7326243ea..be2073ae5 100644
--- a/py/parse.h
+++ b/py/parse.h
@@ -62,4 +62,5 @@ typedef enum {
     MP_PARSE_EVAL_INPUT,
 } mp_parse_input_kind_t;
 
-mp_parse_node_t mp_parse(struct _mp_lexer_t *lex, mp_parse_input_kind_t input_kind);
+// returns MP_PARSE_NODE_NULL on error, and then exc_id_out and exc_msg_out are valid
+mp_parse_node_t mp_parse(struct _mp_lexer_t *lex, mp_parse_input_kind_t input_kind, qstr *exc_id_out, const char **exc_msg_out);
diff --git a/stm/main.c b/stm/main.c
index ab8cced6a..dce7f6a74 100644
--- a/stm/main.c
+++ b/stm/main.c
@@ -418,10 +418,18 @@ void do_repl(void) {
         }
 
         mp_lexer_t *lex = mp_lexer_new_from_str_len("<stdin>", vstr_str(&line), vstr_len(&line), 0);
-        mp_parse_node_t pn = mp_parse(lex, MP_PARSE_SINGLE_INPUT);
-        mp_lexer_free(lex);
-
-        if (pn != MP_PARSE_NODE_NULL) {
+        qstr parse_exc_id;
+        const char *parse_exc_msg;
+        mp_parse_node_t pn = mp_parse(lex, MP_PARSE_SINGLE_INPUT, &parse_exc_id, &parse_exc_msg);
+
+        if (pn == MP_PARSE_NODE_NULL) {
+            // parse error
+            mp_lexer_show_error_pythonic_prefix(lex);
+            printf("%s: %s\n", qstr_str(parse_exc_id), parse_exc_msg);
+            mp_lexer_free(lex);
+        } else {
+            // parse okay
+            mp_lexer_free(lex);
             mp_obj_t module_fun = mp_compile(pn, true);
             if (module_fun != mp_const_none) {
                 nlr_buf_t nlr;
@@ -455,13 +463,20 @@ bool do_file(const char *filename) {
         return false;
     }
 
-    mp_parse_node_t pn = mp_parse(lex, MP_PARSE_FILE_INPUT);
-    mp_lexer_free(lex);
+    qstr parse_exc_id;
+    const char *parse_exc_msg;
+    mp_parse_node_t pn = mp_parse(lex, MP_PARSE_FILE_INPUT, &parse_exc_id, &parse_exc_msg);
 
     if (pn == MP_PARSE_NODE_NULL) {
+        // parse error
+        mp_lexer_show_error_pythonic_prefix(lex);
+        printf("%s: %s\n", qstr_str(parse_exc_id), parse_exc_msg);
+        mp_lexer_free(lex);
         return false;
     }
 
+    mp_lexer_free(lex);
+
     mp_obj_t module_fun = mp_compile(pn, false);
     if (module_fun == mp_const_none) {
         return false;
@@ -1073,7 +1088,9 @@ soft_reset:
         // nalloc=1740;6340;6836 -> 140;4600;496 bytes for lexer, parser, compiler
         printf("lex; al=%u\n", m_get_total_bytes_allocated());
         sys_tick_delay_ms(1000);
-        mp_parse_node_t pn = mp_parse(lex, MP_PARSE_FILE_INPUT);
+        qstr parse_exc_id;
+        const char *parse_exc_msg;
+        mp_parse_node_t pn = mp_parse(lex, MP_PARSE_FILE_INPUT, &parse_exc_id, &parse_exc_msg);
         mp_lexer_free(lex);
         if (pn != MP_PARSE_NODE_NULL) {
             printf("pars;al=%u\n", m_get_total_bytes_allocated());
diff --git a/unix-cpy/main.c b/unix-cpy/main.c
index ea85e3275..7d56ceaf3 100644
--- a/unix-cpy/main.c
+++ b/unix-cpy/main.c
@@ -27,16 +27,29 @@ void do_file(const char *file) {
         mp_lexer_free(lex);
 
     } else {
-        // compile
+        // parse
+        qstr parse_exc_id;
+        const char *parse_exc_msg;
+        mp_parse_node_t pn = mp_parse(lex, MP_PARSE_FILE_INPUT, &parse_exc_id, &parse_exc_msg);
+
+        if (pn == MP_PARSE_NODE_NULL) {
+            // parse error
+            mp_lexer_show_error_pythonic_prefix(lex);
+            printf("%s: %s\n", qstr_str(parse_exc_id), parse_exc_msg);
+            mp_lexer_free(lex);
+            return;
+        }
 
-        mp_parse_node_t pn = mp_parse(lex, MP_PARSE_FILE_INPUT);
         mp_lexer_free(lex);
 
         if (pn != MP_PARSE_NODE_NULL) {
             //printf("----------------\n");
             //parse_node_show(pn, 0);
             //printf("----------------\n");
+
+            // compile
             mp_obj_t module_fun = mp_compile(pn, false);
+
             //printf("----------------\n");
 
             if (module_fun == mp_const_none) {
diff --git a/unix/main.c b/unix/main.c
index 15a4000ab..d74247e59 100644
--- a/unix/main.c
+++ b/unix/main.c
@@ -37,14 +37,20 @@ static void execute_from_lexer(mp_lexer_t *lex, mp_parse_input_kind_t input_kind
         return;
     }
 
-    mp_parse_node_t pn = mp_parse(lex, input_kind);
-    mp_lexer_free(lex);
+    qstr parse_exc_id;
+    const char *parse_exc_msg;
+    mp_parse_node_t pn = mp_parse(lex, input_kind, &parse_exc_id, &parse_exc_msg);
 
     if (pn == MP_PARSE_NODE_NULL) {
         // parse error
+        mp_lexer_show_error_pythonic_prefix(lex);
+        printf("%s: %s\n", qstr_str(parse_exc_id), parse_exc_msg);
+        mp_lexer_free(lex);
         return;
     }
 
+    mp_lexer_free(lex);
+
     //printf("----------------\n");
     //parse_node_show(pn, 0);
     //printf("----------------\n");
-- 
GitLab