Skip to content
Snippets Groups Projects
Commit f88fc7bd authored by Damien George's avatar Damien George
Browse files

Merge branch 'pfalcon-keep-strings-uninterned'

parents 5fd5af98 5042bce8
Branches
Tags
No related merge requests found
...@@ -56,6 +56,7 @@ typedef enum { ...@@ -56,6 +56,7 @@ typedef enum {
#include "grammar.h" #include "grammar.h"
#undef DEF_RULE #undef DEF_RULE
PN_maximum_number_of, PN_maximum_number_of,
PN_string, // special node for non-interned string
} pn_kind_t; } pn_kind_t;
#define EMIT(fun) (comp->emit_method_table->fun(comp->emit)) #define EMIT(fun) (comp->emit_method_table->fun(comp->emit))
...@@ -177,6 +178,8 @@ STATIC mp_parse_node_t fold_constants(compiler_t *comp, mp_parse_node_t pn, mp_m ...@@ -177,6 +178,8 @@ STATIC mp_parse_node_t fold_constants(compiler_t *comp, mp_parse_node_t pn, mp_m
} }
break; break;
#endif #endif
case PN_string:
return pn;
} }
// fold arguments // fold arguments
...@@ -426,6 +429,9 @@ void compile_generic_all_nodes(compiler_t *comp, mp_parse_node_struct_t *pns) { ...@@ -426,6 +429,9 @@ void compile_generic_all_nodes(compiler_t *comp, mp_parse_node_struct_t *pns) {
#if MICROPY_EMIT_CPYTHON #if MICROPY_EMIT_CPYTHON
STATIC bool cpython_c_tuple_is_const(mp_parse_node_t pn) { STATIC bool cpython_c_tuple_is_const(mp_parse_node_t pn) {
if (MP_PARSE_NODE_IS_STRUCT_KIND(pn, PN_string)) {
return true;
}
if (!MP_PARSE_NODE_IS_LEAF(pn)) { if (!MP_PARSE_NODE_IS_LEAF(pn)) {
return false; return false;
} }
...@@ -435,9 +441,7 @@ STATIC bool cpython_c_tuple_is_const(mp_parse_node_t pn) { ...@@ -435,9 +441,7 @@ STATIC bool cpython_c_tuple_is_const(mp_parse_node_t pn) {
return true; return true;
} }
STATIC void cpython_c_print_quoted_str(vstr_t *vstr, qstr qstr, bool bytes) { STATIC void cpython_c_print_quoted_str(vstr_t *vstr, const char *str, uint len, bool bytes) {
uint len;
const byte *str = qstr_data(qstr, &len);
bool has_single_quote = false; bool has_single_quote = false;
bool has_double_quote = false; bool has_double_quote = false;
for (int i = 0; i < len; i++) { for (int i = 0; i < len; i++) {
...@@ -476,6 +480,12 @@ STATIC void cpython_c_print_quoted_str(vstr_t *vstr, qstr qstr, bool bytes) { ...@@ -476,6 +480,12 @@ STATIC void cpython_c_print_quoted_str(vstr_t *vstr, qstr qstr, bool bytes) {
} }
STATIC void cpython_c_tuple_emit_const(compiler_t *comp, mp_parse_node_t pn, vstr_t *vstr) { STATIC void cpython_c_tuple_emit_const(compiler_t *comp, mp_parse_node_t pn, vstr_t *vstr) {
if (MP_PARSE_NODE_IS_STRUCT_KIND(pn, PN_string)) {
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
cpython_c_print_quoted_str(vstr, (const char*)pns->nodes[0], (machine_uint_t)pns->nodes[1], false);
return;
}
assert(MP_PARSE_NODE_IS_LEAF(pn)); assert(MP_PARSE_NODE_IS_LEAF(pn));
if (MP_PARSE_NODE_IS_SMALL_INT(pn)) { if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
vstr_printf(vstr, INT_FMT, MP_PARSE_NODE_LEAF_SMALL_INT(pn)); vstr_printf(vstr, INT_FMT, MP_PARSE_NODE_LEAF_SMALL_INT(pn));
...@@ -487,8 +497,13 @@ STATIC void cpython_c_tuple_emit_const(compiler_t *comp, mp_parse_node_t pn, vst ...@@ -487,8 +497,13 @@ STATIC void cpython_c_tuple_emit_const(compiler_t *comp, mp_parse_node_t pn, vst
case MP_PARSE_NODE_ID: assert(0); case MP_PARSE_NODE_ID: assert(0);
case MP_PARSE_NODE_INTEGER: vstr_printf(vstr, "%s", qstr_str(arg)); break; case MP_PARSE_NODE_INTEGER: vstr_printf(vstr, "%s", qstr_str(arg)); break;
case MP_PARSE_NODE_DECIMAL: vstr_printf(vstr, "%s", qstr_str(arg)); break; case MP_PARSE_NODE_DECIMAL: vstr_printf(vstr, "%s", qstr_str(arg)); break;
case MP_PARSE_NODE_STRING: cpython_c_print_quoted_str(vstr, arg, false); break; case MP_PARSE_NODE_STRING:
case MP_PARSE_NODE_BYTES: cpython_c_print_quoted_str(vstr, arg, true); break; case MP_PARSE_NODE_BYTES: {
uint len;
const byte *str = qstr_data(arg, &len);
cpython_c_print_quoted_str(vstr, (const char*)str, len, MP_PARSE_NODE_LEAF_KIND(pn) == MP_PARSE_NODE_BYTES);
break;
}
case MP_PARSE_NODE_TOKEN: case MP_PARSE_NODE_TOKEN:
switch (arg) { switch (arg) {
case MP_TOKEN_KW_FALSE: vstr_printf(vstr, "False"); break; case MP_TOKEN_KW_FALSE: vstr_printf(vstr, "False"); break;
...@@ -2058,7 +2073,8 @@ void compile_expr_stmt(compiler_t *comp, mp_parse_node_struct_t *pns) { ...@@ -2058,7 +2073,8 @@ void compile_expr_stmt(compiler_t *comp, mp_parse_node_struct_t *pns) {
} else { } else {
// for non-REPL, evaluate then discard the expression // for non-REPL, evaluate then discard the expression
if (MP_PARSE_NODE_IS_LEAF(pns->nodes[0]) && !MP_PARSE_NODE_IS_ID(pns->nodes[0])) { if ((MP_PARSE_NODE_IS_LEAF(pns->nodes[0]) && !MP_PARSE_NODE_IS_ID(pns->nodes[0]))
|| MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_string)) {
// do nothing with a lonely constant // do nothing with a lonely constant
} else { } else {
compile_node(comp, pns->nodes[0]); // just an expression compile_node(comp, pns->nodes[0]); // just an expression
...@@ -2498,26 +2514,40 @@ void compile_atom_string(compiler_t *comp, mp_parse_node_struct_t *pns) { ...@@ -2498,26 +2514,40 @@ void compile_atom_string(compiler_t *comp, mp_parse_node_struct_t *pns) {
int n_bytes = 0; int n_bytes = 0;
int string_kind = MP_PARSE_NODE_NULL; int string_kind = MP_PARSE_NODE_NULL;
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
assert(MP_PARSE_NODE_IS_LEAF(pns->nodes[i])); int pn_kind;
int pn_kind = MP_PARSE_NODE_LEAF_KIND(pns->nodes[i]); if (MP_PARSE_NODE_IS_LEAF(pns->nodes[i])) {
pn_kind = MP_PARSE_NODE_LEAF_KIND(pns->nodes[i]);
assert(pn_kind == MP_PARSE_NODE_STRING || pn_kind == MP_PARSE_NODE_BYTES); assert(pn_kind == MP_PARSE_NODE_STRING || pn_kind == MP_PARSE_NODE_BYTES);
n_bytes += qstr_len(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]));
} else {
assert(MP_PARSE_NODE_IS_STRUCT(pns->nodes[i]));
mp_parse_node_struct_t *pns_string = (mp_parse_node_struct_t*)pns->nodes[i];
assert(MP_PARSE_NODE_STRUCT_KIND(pns_string) == PN_string);
pn_kind = MP_PARSE_NODE_STRING;
n_bytes += (machine_uint_t)pns_string->nodes[1];
}
if (i == 0) { if (i == 0) {
string_kind = pn_kind; string_kind = pn_kind;
} else if (pn_kind != string_kind) { } else if (pn_kind != string_kind) {
compile_syntax_error(comp, (mp_parse_node_t)pns, "cannot mix bytes and nonbytes literals"); compile_syntax_error(comp, (mp_parse_node_t)pns, "cannot mix bytes and nonbytes literals");
return; return;
} }
n_bytes += qstr_len(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]));
} }
// concatenate string/bytes // concatenate string/bytes
byte *q_ptr; byte *q_ptr;
byte *s_dest = qstr_build_start(n_bytes, &q_ptr); byte *s_dest = qstr_build_start(n_bytes, &q_ptr);
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
if (MP_PARSE_NODE_IS_LEAF(pns->nodes[i])) {
uint s_len; uint s_len;
const byte *s = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &s_len); const byte *s = qstr_data(MP_PARSE_NODE_LEAF_ARG(pns->nodes[i]), &s_len);
memcpy(s_dest, s, s_len); memcpy(s_dest, s, s_len);
s_dest += s_len; s_dest += s_len;
} else {
mp_parse_node_struct_t *pns_string = (mp_parse_node_struct_t*)pns->nodes[i];
memcpy(s_dest, (const char*)pns_string->nodes[0], (machine_uint_t)pns_string->nodes[1]);
s_dest += (machine_uint_t)pns_string->nodes[1];
}
} }
qstr q = qstr_build_end(q_ptr); qstr q = qstr_build_end(q_ptr);
...@@ -2848,6 +2878,9 @@ void compile_node(compiler_t *comp, mp_parse_node_t pn) { ...@@ -2848,6 +2878,9 @@ void compile_node(compiler_t *comp, mp_parse_node_t pn) {
} else { } else {
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn; mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
EMIT_ARG(set_line_number, pns->source_line); EMIT_ARG(set_line_number, pns->source_line);
if (MP_PARSE_NODE_STRUCT_KIND(pns) == PN_string) {
EMIT_ARG(load_const_str, qstr_from_strn((const char*)pns->nodes[0], (machine_uint_t)pns->nodes[1]), false);
} else {
compile_function_t f = compile_function[MP_PARSE_NODE_STRUCT_KIND(pns)]; compile_function_t f = compile_function[MP_PARSE_NODE_STRUCT_KIND(pns)];
if (f == NULL) { if (f == NULL) {
printf("node %u cannot be compiled\n", (uint)MP_PARSE_NODE_STRUCT_KIND(pns)); printf("node %u cannot be compiled\n", (uint)MP_PARSE_NODE_STRUCT_KIND(pns));
...@@ -2860,6 +2893,7 @@ void compile_node(compiler_t *comp, mp_parse_node_t pn) { ...@@ -2860,6 +2893,7 @@ void compile_node(compiler_t *comp, mp_parse_node_t pn) {
} }
} }
} }
}
void compile_scope_func_lambda_param(compiler_t *comp, mp_parse_node_t pn, pn_kind_t pn_name, pn_kind_t pn_star, pn_kind_t pn_dbl_star, bool allow_annotations) { void compile_scope_func_lambda_param(compiler_t *comp, mp_parse_node_t pn, pn_kind_t pn_name, pn_kind_t pn_star, pn_kind_t pn_dbl_star, bool allow_annotations) {
// TODO verify that *k and **k are last etc // TODO verify that *k and **k are last etc
...@@ -3033,15 +3067,15 @@ STATIC void check_for_doc_string(compiler_t *comp, mp_parse_node_t pn) { ...@@ -3033,15 +3067,15 @@ STATIC void check_for_doc_string(compiler_t *comp, mp_parse_node_t pn) {
// check the first statement for a doc string // check the first statement for a doc string
if (MP_PARSE_NODE_IS_STRUCT_KIND(pn, PN_expr_stmt)) { if (MP_PARSE_NODE_IS_STRUCT_KIND(pn, PN_expr_stmt)) {
mp_parse_node_struct_t* pns = (mp_parse_node_struct_t*)pn; mp_parse_node_struct_t* pns = (mp_parse_node_struct_t*)pn;
if (MP_PARSE_NODE_IS_LEAF(pns->nodes[0])) { if ((MP_PARSE_NODE_IS_LEAF(pns->nodes[0])
int kind = MP_PARSE_NODE_LEAF_KIND(pns->nodes[0]); && MP_PARSE_NODE_LEAF_KIND(pns->nodes[0]) == MP_PARSE_NODE_STRING)
if (kind == MP_PARSE_NODE_STRING) { || MP_PARSE_NODE_IS_STRUCT_KIND(pns->nodes[0], PN_string)) {
compile_node(comp, pns->nodes[0]); // a doc string // compile the doc string
// store doc string compile_node(comp, pns->nodes[0]);
// store the doc string
EMIT_ARG(store_id, MP_QSTR___doc__); EMIT_ARG(store_id, MP_QSTR___doc__);
} }
} }
}
#endif #endif
} }
......
...@@ -66,6 +66,11 @@ ...@@ -66,6 +66,11 @@
#define MICROPY_ALLOC_PARSE_RESULT_INC (16) #define MICROPY_ALLOC_PARSE_RESULT_INC (16)
#endif #endif
// Strings this length or less will be interned by the parser
#ifndef MICROPY_ALLOC_PARSE_INTERN_STRING_LEN
#define MICROPY_ALLOC_PARSE_INTERN_STRING_LEN (10)
#endif
// Initial amount for ids in a scope // Initial amount for ids in a scope
#ifndef MICROPY_ALLOC_SCOPE_ID_INIT #ifndef MICROPY_ALLOC_SCOPE_ID_INIT
#define MICROPY_ALLOC_SCOPE_ID_INIT (4) #define MICROPY_ALLOC_SCOPE_ID_INIT (4)
......
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
#include <assert.h> #include <assert.h>
#include <string.h>
#include "misc.h" #include "misc.h"
#include "mpconfig.h" #include "mpconfig.h"
...@@ -70,6 +71,7 @@ enum { ...@@ -70,6 +71,7 @@ enum {
#include "grammar.h" #include "grammar.h"
#undef DEF_RULE #undef DEF_RULE
RULE_maximum_number_of, RULE_maximum_number_of,
RULE_string, // special node for non-interned string
}; };
#define or(n) (RULE_ACT_OR | n) #define or(n) (RULE_ACT_OR | n)
...@@ -170,26 +172,26 @@ mp_parse_node_t mp_parse_node_new_leaf(machine_int_t kind, machine_int_t arg) { ...@@ -170,26 +172,26 @@ mp_parse_node_t mp_parse_node_new_leaf(machine_int_t kind, machine_int_t arg) {
return (mp_parse_node_t)(kind | (arg << 5)); return (mp_parse_node_t)(kind | (arg << 5));
} }
uint mp_parse_node_free(mp_parse_node_t pn) { void mp_parse_node_free(mp_parse_node_t pn) {
uint cnt = 0;
if (MP_PARSE_NODE_IS_STRUCT(pn)) { if (MP_PARSE_NODE_IS_STRUCT(pn)) {
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn; mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns); uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
uint rule_id = MP_PARSE_NODE_STRUCT_KIND(pns); uint rule_id = MP_PARSE_NODE_STRUCT_KIND(pns);
if (rule_id == RULE_string) {
return;
}
bool adjust = ADD_BLANK_NODE(rule_id); bool adjust = ADD_BLANK_NODE(rule_id);
if (adjust) { if (adjust) {
n--; n--;
} }
for (uint i = 0; i < n; i++) { for (uint i = 0; i < n; i++) {
cnt += mp_parse_node_free(pns->nodes[i]); mp_parse_node_free(pns->nodes[i]);
} }
if (adjust) { if (adjust) {
n++; n++;
} }
m_del_var(mp_parse_node_struct_t, mp_parse_node_t, n, pns); m_del_var(mp_parse_node_struct_t, mp_parse_node_t, n, pns);
cnt++;
} }
return cnt;
} }
#if MICROPY_DEBUG_PRINTERS #if MICROPY_DEBUG_PRINTERS
...@@ -219,7 +221,11 @@ void mp_parse_node_print(mp_parse_node_t pn, int indent) { ...@@ -219,7 +221,11 @@ void mp_parse_node_print(mp_parse_node_t pn, int indent) {
default: assert(0); default: assert(0);
} }
} else { } else {
// node must be a mp_parse_node_struct_t
mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn; mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) {
printf("literal str(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
} else {
uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns); uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
#ifdef USE_RULE_NAME #ifdef USE_RULE_NAME
printf("%s(%d) (n=%d)\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns)]->rule_name, MP_PARSE_NODE_STRUCT_KIND(pns), n); printf("%s(%d) (n=%d)\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns)]->rule_name, MP_PARSE_NODE_STRUCT_KIND(pns), n);
...@@ -231,6 +237,7 @@ void mp_parse_node_print(mp_parse_node_t pn, int indent) { ...@@ -231,6 +237,7 @@ void mp_parse_node_print(mp_parse_node_t pn, int indent) {
} }
} }
} }
}
#endif // MICROPY_DEBUG_PRINTERS #endif // MICROPY_DEBUG_PRINTERS
/* /*
...@@ -274,6 +281,21 @@ STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) { ...@@ -274,6 +281,21 @@ STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
parser->result_stack[parser->result_stack_top++] = pn; parser->result_stack[parser->result_stack_top++] = pn;
} }
STATIC void push_result_string(parser_t *parser, int src_line, const char *str, uint len) {
mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, 2);
if (pn == NULL) {
memory_error(parser);
return;
}
pn->source_line = src_line;
pn->kind_num_nodes = RULE_string | (2 << 8);
char *p = m_new(char, len);
memcpy(p, str, len);
pn->nodes[0] = (machine_int_t)p;
pn->nodes[1] = len;
push_result_node(parser, (mp_parse_node_t)pn);
}
STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) { STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) {
const mp_token_t *tok = mp_lexer_cur(lex); const mp_token_t *tok = mp_lexer_cur(lex);
mp_parse_node_t pn; mp_parse_node_t pn;
...@@ -319,7 +341,24 @@ STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) { ...@@ -319,7 +341,24 @@ STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_INTEGER, qstr_from_strn(str, len)); pn = mp_parse_node_new_leaf(MP_PARSE_NODE_INTEGER, qstr_from_strn(str, len));
} }
} else if (tok->kind == MP_TOKEN_STRING) { } else if (tok->kind == MP_TOKEN_STRING) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_STRING, qstr_from_strn(tok->str, tok->len)); // Don't automatically intern all strings. doc strings (which are usually large)
// will be discarded by the compiler, and so we shouldn't intern them.
qstr qst = MP_QSTR_NULL;
if (tok->len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) {
// intern short strings
qst = qstr_from_strn(tok->str, tok->len);
} else {
// check if this string is already interned
qst = qstr_find_strn((const byte*)tok->str, tok->len);
}
if (qst != MP_QSTR_NULL) {
// qstr exists, make a leaf node
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_STRING, qst);
} else {
// not interned, make a node holding a pointer to the string data
push_result_string(parser, mp_lexer_cur(lex)->src_line, tok->str, tok->len);
return;
}
} else if (tok->kind == MP_TOKEN_BYTES) { } else if (tok->kind == MP_TOKEN_BYTES) {
pn = mp_parse_node_new_leaf(MP_PARSE_NODE_BYTES, qstr_from_strn(tok->str, tok->len)); pn = mp_parse_node_new_leaf(MP_PARSE_NODE_BYTES, qstr_from_strn(tok->str, tok->len));
} else { } else {
...@@ -516,14 +555,13 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p ...@@ -516,14 +555,13 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_p
} }
} }
#if 0 && !MICROPY_ENABLE_DOC_STRING #if !MICROPY_EMIT_CPYTHON && !MICROPY_ENABLE_DOC_STRING
// this code discards lonely statement, such as doc strings // this code discards lonely statements, such as doc strings
// problem is that doc strings have already been interned, so this doesn't really help reduce RAM usage
if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) { if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) {
mp_parse_node_t p = peek_result(&parser, 1); mp_parse_node_t p = peek_result(&parser, 1);
if (MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) { if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_string)) {
pop_result(parser); pop_result(&parser);
pop_result(parser); pop_result(&parser);
push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0); push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0);
break; break;
} }
......
...@@ -82,7 +82,7 @@ typedef struct _mp_parse_node_struct_t { ...@@ -82,7 +82,7 @@ typedef struct _mp_parse_node_struct_t {
#define MP_PARSE_NODE_STRUCT_NUM_NODES(pns) ((pns)->kind_num_nodes >> 8) #define MP_PARSE_NODE_STRUCT_NUM_NODES(pns) ((pns)->kind_num_nodes >> 8)
mp_parse_node_t mp_parse_node_new_leaf(machine_int_t kind, machine_int_t arg); mp_parse_node_t mp_parse_node_new_leaf(machine_int_t kind, machine_int_t arg);
uint mp_parse_node_free(mp_parse_node_t pn); void mp_parse_node_free(mp_parse_node_t pn);
void mp_parse_node_print(mp_parse_node_t pn, int indent); void mp_parse_node_print(mp_parse_node_t pn, int indent);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment