From 845511af257cd33f1397c5dd05b1c9198d2be96a Mon Sep 17 00:00:00 2001
From: Damien George <damien.p.george@gmail.com>
Date: Sat, 23 Dec 2017 23:36:08 +1100
Subject: [PATCH] py/parse: Break rule data into separate act and arg arrays.

Instead of each rule being stored in ROM as a struct with rule_id, act and
arg, the act and arg parts are now in separate arrays and the rule_id part
is removed because it's not needed.  This reduces code size, by roughly one
byte per grammar rule, around 150 bytes.
---
 py/parse.c | 84 ++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 57 insertions(+), 27 deletions(-)

diff --git a/py/parse.c b/py/parse.c
index 7fcdf2c43..bc2afb494 100644
--- a/py/parse.c
+++ b/py/parse.c
@@ -61,7 +61,7 @@
 typedef struct _rule_t {
     byte rule_id;
     byte act;
-    uint16_t arg[];
+    const uint16_t *arg;
 } rule_t;
 
 enum {
@@ -81,6 +81,8 @@ enum {
 #undef DEF_RULE_NC
 };
 
+// Define an array of actions corresponding to each rule
+STATIC const uint8_t rule_act_table[] = {
 #define or(n)                   (RULE_ACT_OR | n)
 #define and(n)                  (RULE_ACT_AND | n)
 #define and_ident(n)            (RULE_ACT_AND | n | RULE_ACT_ALLOW_IDENT)
@@ -88,35 +90,55 @@ enum {
 #define one_or_more             (RULE_ACT_LIST | 2)
 #define list                    (RULE_ACT_LIST | 1)
 #define list_with_end           (RULE_ACT_LIST | 3)
-#define tok(t)                  (RULE_ARG_TOK | MP_TOKEN_##t)
-#define rule(r)                 (RULE_ARG_RULE | RULE_##r)
-#define opt_rule(r)             (RULE_ARG_OPT_RULE | RULE_##r)
-#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, { __VA_ARGS__ } };
-#define DEF_RULE_NC(rule, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, { __VA_ARGS__ } };
+
+#define DEF_RULE(rule, comp, kind, ...) kind,
+#define DEF_RULE_NC(rule, kind, ...)
 #include "py/grammar.h"
+#undef DEF_RULE
+#undef DEF_RULE_NC
+
+    0, // RULE_const_object
+
+#define DEF_RULE(rule, comp, kind, ...)
+#define DEF_RULE_NC(rule, kind, ...) kind,
+#include "py/grammar.h"
+#undef DEF_RULE
+#undef DEF_RULE_NC
+
 #undef or
 #undef and
+#undef and_ident
+#undef and_blank
+#undef one_or_more
 #undef list
 #undef list_with_end
+};
+
+// Define the argument data for each rule
+#define tok(t)                  (RULE_ARG_TOK | MP_TOKEN_##t)
+#define rule(r)                 (RULE_ARG_RULE | RULE_##r)
+#define opt_rule(r)             (RULE_ARG_OPT_RULE | RULE_##r)
+
+#define DEF_RULE(rule, comp, kind, ...) static const uint16_t const rule_arg_##rule[] = { __VA_ARGS__ };
+#define DEF_RULE_NC(rule, kind, ...) static const uint16_t const rule_arg_##rule[] = { __VA_ARGS__ };
+#include "py/grammar.h"
+#undef DEF_RULE
+#undef DEF_RULE_NC
+
 #undef tok
 #undef rule
 #undef opt_rule
-#undef one_or_more
-#undef DEF_RULE
-#undef DEF_RULE_NC
 
-STATIC const rule_t *const rules[] = {
-// define rules with a compile function
-#define DEF_RULE(rule, comp, kind, ...) &rule_##rule,
+// Define an array of pointers to corresponding rule data
+STATIC const uint16_t *const rule_arg_table[] = {
+#define DEF_RULE(rule, comp, kind, ...) &rule_arg_##rule[0],
 #define DEF_RULE_NC(rule, kind, ...)
 #include "py/grammar.h"
 #undef DEF_RULE
 #undef DEF_RULE_NC
     NULL, // RULE_const_object
-
-// define rules without a compile function
 #define DEF_RULE(rule, comp, kind, ...)
-#define DEF_RULE_NC(rule, kind, ...) &rule_##rule,
+#define DEF_RULE_NC(rule, kind, ...) &rule_arg_##rule[0],
 #include "py/grammar.h"
 #undef DEF_RULE
 #undef DEF_RULE_NC
@@ -139,6 +161,10 @@ STATIC const char *const rule_name_table[] = {
 };
 #endif
 
+#if (MICROPY_COMP_CONST_FOLDING && MICROPY_COMP_CONST) || !MICROPY_ENABLE_DOC_STRING
+static const rule_t rule_pass_stmt = {RULE_pass_stmt, (RULE_ACT_AND | 1), &rule_arg_pass_stmt[0]};
+#endif
+
 typedef struct _rule_stack_t {
     size_t src_line : 8 * sizeof(size_t) - 8; // maximum bits storing source line number
     size_t rule_id : 8; // this must be large enough to fit largest rule number
@@ -214,7 +240,7 @@ STATIC void *parser_alloc(parser_t *parser, size_t num_bytes) {
     return ret;
 }
 
-STATIC void push_rule(parser_t *parser, size_t src_line, const rule_t *rule, size_t arg_i) {
+STATIC void push_rule(parser_t *parser, size_t src_line, uint8_t rule_id, size_t arg_i) {
     if (parser->rule_stack_top >= parser->rule_stack_alloc) {
         rule_stack_t *rs = m_renew(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC);
         parser->rule_stack = rs;
@@ -222,19 +248,21 @@ STATIC void push_rule(parser_t *parser, size_t src_line, const rule_t *rule, siz
     }
     rule_stack_t *rs = &parser->rule_stack[parser->rule_stack_top++];
     rs->src_line = src_line;
-    rs->rule_id = rule->rule_id;
+    rs->rule_id = rule_id;
     rs->arg_i = arg_i;
 }
 
 STATIC void push_rule_from_arg(parser_t *parser, size_t arg) {
     assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE);
     size_t rule_id = arg & RULE_ARG_ARG_MASK;
-    push_rule(parser, parser->lexer->tok_line, rules[rule_id], 0);
+    push_rule(parser, parser->lexer->tok_line, rule_id, 0);
 }
 
-STATIC void pop_rule(parser_t *parser, const rule_t **rule, size_t *arg_i, size_t *src_line) {
+STATIC void pop_rule(parser_t *parser, rule_t *rule, size_t *arg_i, size_t *src_line) {
     parser->rule_stack_top -= 1;
-    *rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id];
+    rule->rule_id = parser->rule_stack[parser->rule_stack_top].rule_id;
+    rule->act = rule_act_table[rule->rule_id];
+    rule->arg = rule_arg_table[rule->rule_id];
     *arg_i = parser->rule_stack[parser->rule_stack_top].arg_i;
     *src_line = parser->rule_stack[parser->rule_stack_top].src_line;
 }
@@ -656,7 +684,7 @@ STATIC bool fold_constants(parser_t *parser, const rule_t *rule, size_t num_args
                 if (qstr_str(id)[0] == '_') {
                     pop_result(parser); // pop const(value)
                     pop_result(parser); // pop id
-                    push_result_rule(parser, 0, rules[RULE_pass_stmt], 0); // replace with "pass"
+                    push_result_rule(parser, 0, &rule_pass_stmt, 0); // replace with "pass"
                     return true;
                 }
 
@@ -782,7 +810,7 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
         case MP_PARSE_EVAL_INPUT: top_level_rule = RULE_eval_input; break;
         default: top_level_rule = RULE_file_input;
     }
-    push_rule(&parser, lex->tok_line, rules[top_level_rule], 0);
+    push_rule(&parser, lex->tok_line, top_level_rule, 0);
 
     // parse!
 
@@ -797,7 +825,9 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
             break;
         }
 
-        pop_rule(&parser, &rule, &i, &rule_src_line);
+        rule_t rule_data;
+        pop_rule(&parser, &rule_data, &i, &rule_src_line);
+        rule = &rule_data;
         n = rule->act & RULE_ACT_ARG_MASK;
 
         /*
@@ -827,7 +857,7 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
                     } else {
                         assert(kind == RULE_ARG_RULE);
                         if (i + 1 < n) {
-                            push_rule(&parser, rule_src_line, rule, i + 1); // save this or-rule
+                            push_rule(&parser, rule_src_line, rule->rule_id, i + 1); // save this or-rule
                         }
                         push_rule_from_arg(&parser, rule->arg[i]); // push child of or-rule
                         goto next_rule;
@@ -879,7 +909,7 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
                             }
                         }
                     } else {
-                        push_rule(&parser, rule_src_line, rule, i + 1); // save this and-rule
+                        push_rule(&parser, rule_src_line, rule->rule_id, i + 1); // save this and-rule
                         push_rule_from_arg(&parser, rule->arg[i]); // push child of and-rule
                         goto next_rule;
                     }
@@ -900,7 +930,7 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
                         // Pushing the "pass" rule here will overwrite any RULE_const_object
                         // entry that was on the result stack, allowing the GC to reclaim
                         // the memory from the const object when needed.
-                        push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0);
+                        push_result_rule(&parser, rule_src_line, &rule_pass_stmt, 0);
                         break;
                     }
                 }
@@ -1009,7 +1039,7 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
                             }
                         } else {
                             assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE);
-                            push_rule(&parser, rule_src_line, rule, i + 1); // save this list-rule
+                            push_rule(&parser, rule_src_line, rule->rule_id, i + 1); // save this list-rule
                             push_rule_from_arg(&parser, arg); // push child of list-rule
                             goto next_rule;
                         }
-- 
GitLab