From 58ba4c3b4c12e9bf6b8731fd26e0c9cac527122f Mon Sep 17 00:00:00 2001
From: Damien George <damien.p.george@gmail.com>
Date: Thu, 10 Apr 2014 14:27:31 +0000
Subject: [PATCH] py: Check explicitly for memory allocation failure in parser.

Previously, a failed malloc/realloc would throw an exception, which was
not caught.  I think it's better to keep the parser free from NLR
(exception throwing), hence this patch.
---
 py/malloc.c      | 20 +++++++++++++++
 py/misc.h        |  3 +++
 py/objexcept.c   |  4 +--
 py/parse.c       | 66 ++++++++++++++++++++++++++++++++++++++----------
 py/parse.h       |  1 +
 py/parsehelper.c |  8 ++++++
 6 files changed, 86 insertions(+), 16 deletions(-)

diff --git a/py/malloc.c b/py/malloc.c
index 1d18a9a64..db2578d9a 100644
--- a/py/malloc.c
+++ b/py/malloc.c
@@ -118,6 +118,26 @@ void *m_realloc(void *ptr, int old_num_bytes, int new_num_bytes) {
     return new_ptr;
 }
 
+void *m_realloc_maybe(void *ptr, int old_num_bytes, int new_num_bytes) {
+    void *new_ptr = realloc(ptr, new_num_bytes);
+    if (new_ptr == NULL) {
+        return NULL;
+    }
+#if MICROPY_MEM_STATS
+    // At first thought, "Total bytes allocated" should only grow,
+    // after all, it's *total*. But consider for example 2K block
+    // shrunk to 1K and then grown to 2K again. It's still 2K
+    // allocated total. If we process only positive increments,
+    // we'll count 3K.
+    int diff = new_num_bytes - old_num_bytes;
+    total_bytes_allocated += diff;
+    current_bytes_allocated += diff;
+    UPDATE_PEAK();
+#endif
+    DEBUG_printf("realloc %p, %d, %d : %p\n", ptr, old_num_bytes, new_num_bytes, new_ptr);
+    return new_ptr;
+}
+
 void m_free(void *ptr, int num_bytes) {
     if (ptr != NULL) {
         free(ptr);
diff --git a/py/misc.h b/py/misc.h
index 3f538b98e..002a97ffe 100644
--- a/py/misc.h
+++ b/py/misc.h
@@ -27,12 +27,14 @@ typedef unsigned int uint;
 #define m_new0(type, num) ((type*)(m_malloc0(sizeof(type) * (num))))
 #define m_new_obj(type) (m_new(type, 1))
 #define m_new_obj_var(obj_type, var_type, var_num) ((obj_type*)m_malloc(sizeof(obj_type) + sizeof(var_type) * (var_num)))
+#define m_new_obj_var_maybe(obj_type, var_type, var_num) ((obj_type*)m_malloc_maybe(sizeof(obj_type) + sizeof(var_type) * (var_num)))
 #if MICROPY_ENABLE_FINALISER
 #define m_new_obj_with_finaliser(type) ((type*)(m_malloc_with_finaliser(sizeof(type))))
 #else
 #define m_new_obj_with_finaliser(type) m_new_obj(type)
 #endif
 #define m_renew(type, ptr, old_num, new_num) ((type*)(m_realloc((ptr), sizeof(type) * (old_num), sizeof(type) * (new_num))))
+#define m_renew_maybe(type, ptr, old_num, new_num) ((type*)(m_realloc_maybe((ptr), sizeof(type) * (old_num), sizeof(type) * (new_num))))
 #define m_del(type, ptr, num) m_free(ptr, sizeof(type) * (num))
 #define m_del_obj(type, ptr) (m_del(type, ptr, 1))
 #define m_del_var(obj_type, var_type, var_num, ptr) (m_free(ptr, sizeof(obj_type) + sizeof(var_type) * (var_num)))
@@ -42,6 +44,7 @@ void *m_malloc_maybe(int num_bytes);
 void *m_malloc_with_finaliser(int num_bytes);
 void *m_malloc0(int num_bytes);
 void *m_realloc(void *ptr, int old_num_bytes, int new_num_bytes);
+void *m_realloc_maybe(void *ptr, int old_num_bytes, int new_num_bytes);
 void m_free(void *ptr, int num_bytes);
 void *m_malloc_fail(int num_bytes);
 
diff --git a/py/objexcept.c b/py/objexcept.c
index 1ef3e31dc..781a00405 100644
--- a/py/objexcept.c
+++ b/py/objexcept.c
@@ -54,7 +54,7 @@ STATIC mp_obj_t mp_obj_exception_make_new(mp_obj_t type_in, uint n_args, uint n_
         nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, "%s does not take keyword arguments", mp_obj_get_type_str(type_in)));
     }
 
-    mp_obj_exception_t *o = m_malloc_maybe(sizeof(mp_obj_exception_t) + n_args * sizeof(mp_obj_t));
+    mp_obj_exception_t *o = m_new_obj_var_maybe(mp_obj_exception_t, mp_obj_t, n_args);
     if (o == NULL) {
         // Couldn't allocate heap memory; use local data instead.
         o = &mp_emergency_exception_obj;
@@ -205,7 +205,7 @@ mp_obj_t mp_obj_new_exception_msg_varg(const mp_obj_type_t *exc_type, const char
     assert(exc_type->make_new == mp_obj_exception_make_new);
 
     // make exception object
-    mp_obj_exception_t *o = m_malloc_maybe(sizeof(mp_obj_exception_t) + 1 * sizeof(mp_obj_t));
+    mp_obj_exception_t *o = m_new_obj_var_maybe(mp_obj_exception_t, mp_obj_t, 1);
     if (o == NULL) {
         // Couldn't allocate heap memory; use local data instead.
         // Unfortunately, we won't be able to format the string...
diff --git a/py/parse.c b/py/parse.c
index 1381f1293..6c899ce56 100644
--- a/py/parse.c
+++ b/py/parse.c
@@ -86,6 +86,8 @@ typedef struct _rule_stack_t {
 } rule_stack_t;
 
 typedef struct _parser_t {
+    bool had_memory_error;
+
     uint rule_stack_alloc;
     uint rule_stack_top;
     rule_stack_t *rule_stack;
@@ -97,9 +99,21 @@ typedef struct _parser_t {
     mp_lexer_t *lexer;
 } parser_t;
 
+STATIC inline void memory_error(parser_t *parser) {
+    parser->had_memory_error = true;
+}
+
 STATIC void push_rule(parser_t *parser, int src_line, const rule_t *rule, int arg_i) {
+    if (parser->had_memory_error) {
+        return;
+    }
     if (parser->rule_stack_top >= parser->rule_stack_alloc) {
-        parser->rule_stack = m_renew(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc * 2);
+        rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc * 2);
+        if (rs == NULL) {
+            memory_error(parser);
+            return;
+        }
+        parser->rule_stack = rs;
         parser->rule_stack_alloc *= 2;
     }
     rule_stack_t *rs = &parser->rule_stack[parser->rule_stack_top++];
@@ -116,6 +130,7 @@ STATIC void push_rule_from_arg(parser_t *parser, uint arg) {
 }
 
 STATIC void pop_rule(parser_t *parser, const rule_t **rule, uint *arg_i, uint *src_line) {
+    assert(!parser->had_memory_error);
     parser->rule_stack_top -= 1;
     *rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id];
     *arg_i = parser->rule_stack[parser->rule_stack_top].arg_i;
@@ -129,15 +144,6 @@ mp_parse_node_t mp_parse_node_new_leaf(machine_int_t kind, machine_int_t arg) {
     return (mp_parse_node_t)(kind | (arg << 5));
 }
 
-//int num_parse_nodes_allocated = 0;
-mp_parse_node_struct_t *parse_node_new_struct(int src_line, int rule_id, int num_args) {
-    mp_parse_node_struct_t *pn = m_new_obj_var(mp_parse_node_struct_t, mp_parse_node_t, num_args);
-    pn->source_line = src_line;
-    pn->kind_num_nodes = (rule_id & 0xff) | (num_args << 8);
-    //num_parse_nodes_allocated += 1;
-    return pn;
-}
-
 uint mp_parse_node_free(mp_parse_node_t pn) {
     uint cnt = 0;
     if (MP_PARSE_NODE_IS_STRUCT(pn)) {
@@ -211,18 +217,32 @@ STATIC void result_stack_show(parser_t *parser) {
 */
 
 STATIC mp_parse_node_t pop_result(parser_t *parser) {
+    if (parser->had_memory_error) {
+        return MP_PARSE_NODE_NULL;
+    }
     assert(parser->result_stack_top > 0);
     return parser->result_stack[--parser->result_stack_top];
 }
 
 STATIC mp_parse_node_t peek_result(parser_t *parser, int pos) {
+    if (parser->had_memory_error) {
+        return MP_PARSE_NODE_NULL;
+    }
     assert(parser->result_stack_top > pos);
     return parser->result_stack[parser->result_stack_top - 1 - pos];
 }
 
 STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
+    if (parser->had_memory_error) {
+        return;
+    }
     if (parser->result_stack_top >= parser->result_stack_alloc) {
-        parser->result_stack = m_renew(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc * 2);
+        mp_parse_node_t *pn = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc * 2);
+        if (pn == NULL) {
+            memory_error(parser);
+            return;
+        }
+        parser->result_stack = pn;
         parser->result_stack_alloc *= 2;
     }
     parser->result_stack[parser->result_stack_top++] = pn;
@@ -283,7 +303,13 @@ STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) {
 }
 
 STATIC void push_result_rule(parser_t *parser, int src_line, const rule_t *rule, int num_args) {
-    mp_parse_node_struct_t *pn = parse_node_new_struct(src_line, rule->rule_id, num_args);
+    mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, num_args);
+    if (pn == NULL) {
+        memory_error(parser);
+        return;
+    }
+    pn->source_line = src_line;
+    pn->kind_num_nodes = (rule->rule_id & 0xff) | (num_args << 8);
     for (int i = num_args; i > 0; i--) {
         pn->nodes[i - 1] = pop_result(parser);
     }
@@ -296,6 +322,8 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p
 
     parser_t *parser = m_new_obj(parser_t);
 
+    parser->had_memory_error = false;
+
     parser->rule_stack_alloc = 64;
     parser->rule_stack_top = 0;
     parser->rule_stack = m_new(rule_stack_t, parser->rule_stack_alloc);
@@ -327,7 +355,7 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p
 
     for (;;) {
         next_rule:
-        if (parser->rule_stack_top == 0) {
+        if (parser->rule_stack_top == 0 || parser->had_memory_error) {
             break;
         }
 
@@ -596,6 +624,16 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p
         }
     }
 
+    mp_parse_node_t result;
+
+    // check if we had a memory error
+    if (parser->had_memory_error) {
+        *parse_error_kind_out = MP_PARSE_ERROR_MEMORY;
+        result = MP_PARSE_NODE_NULL;
+        goto finished;
+
+    }
+
     // check we are at the end of the token stream
     if (!mp_lexer_is_kind(lex, MP_TOKEN_END)) {
         goto syntax_error;
@@ -609,7 +647,7 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p
 
     // get the root parse node that we created
     assert(parser->result_stack_top == 1);
-    mp_parse_node_t result = parser->result_stack[0];
+    result = parser->result_stack[0];
 
 finished:
     // free the memory that we don't need anymore
diff --git a/py/parse.h b/py/parse.h
index 135de47d1..29bcf36e7 100644
--- a/py/parse.h
+++ b/py/parse.h
@@ -67,6 +67,7 @@ typedef enum {
 } mp_parse_input_kind_t;
 
 typedef enum {
+    MP_PARSE_ERROR_MEMORY,
     MP_PARSE_ERROR_UNEXPECTED_INDENT,
     MP_PARSE_ERROR_UNMATCHED_UNINDENT,
     MP_PARSE_ERROR_INVALID_SYNTAX,
diff --git a/py/parsehelper.c b/py/parsehelper.c
index 3177e9a34..e069657b1 100644
--- a/py/parsehelper.c
+++ b/py/parsehelper.c
@@ -11,6 +11,7 @@
 #include "obj.h"
 #include "parsehelper.h"
 
+#define STR_MEMORY "parser could not allocate enough memory"
 #define STR_UNEXPECTED_INDENT "unexpected indent"
 #define STR_UNMATCHED_UNINDENT "unindent does not match any outer indentation level"
 #define STR_INVALID_SYNTAX "invalid syntax"
@@ -18,6 +19,10 @@
 void mp_parse_show_exception(mp_lexer_t *lex, mp_parse_error_kind_t parse_error_kind) {
     printf("  File \"%s\", line %d, column %d\n", qstr_str(mp_lexer_source_name(lex)), mp_lexer_cur(lex)->src_line, mp_lexer_cur(lex)->src_column);
     switch (parse_error_kind) {
+        case MP_PARSE_ERROR_MEMORY:
+            printf("MemoryError: %s\n", STR_MEMORY);
+            break;
+
         case MP_PARSE_ERROR_UNEXPECTED_INDENT:
             printf("IndentationError: %s\n", STR_UNEXPECTED_INDENT);
             break;
@@ -36,6 +41,9 @@ void mp_parse_show_exception(mp_lexer_t *lex, mp_parse_error_kind_t parse_error_
 mp_obj_t mp_parse_make_exception(mp_parse_error_kind_t parse_error_kind) {
     // TODO add source file and line number to exception?
     switch (parse_error_kind) {
+        case MP_PARSE_ERROR_MEMORY:
+            return mp_obj_new_exception_msg(&mp_type_MemoryError, STR_MEMORY);
+
         case MP_PARSE_ERROR_UNEXPECTED_INDENT:
             return mp_obj_new_exception_msg(&mp_type_IndentationError, STR_UNEXPECTED_INDENT);
 
-- 
GitLab