diff --git a/py/bc.h b/py/bc.h index 35847f458944ede0b6e1371cc4a7e9558ba0e0e2..6a7b91690f6bb53046ff174288a8605764c15412 100644 --- a/py/bc.h +++ b/py/bc.h @@ -1,2 +1,2 @@ mp_obj_t mp_execute_byte_code(const byte *code, const mp_obj_t *args, uint n_args, uint n_state); -bool mp_execute_byte_code_2(const byte **ip_in_out, mp_obj_t *fastn, mp_obj_t **sp_in_out); +bool mp_execute_byte_code_2(const byte *code_info, const byte **ip_in_out, mp_obj_t *fastn, mp_obj_t **sp_in_out); diff --git a/py/builtineval.c b/py/builtineval.c index a8a7a476a3bed7e66b8431ce5c1bfa4b3aa5f53b..c7bd6b6298e3e4ea6ba09beab817907b52bae875 100644 --- a/py/builtineval.c +++ b/py/builtineval.c @@ -28,6 +28,7 @@ static mp_obj_t mp_builtin_eval(mp_obj_t o_in) { qstr parse_exc_id; const char *parse_exc_msg; mp_parse_node_t pn = mp_parse(lex, MP_PARSE_EVAL_INPUT, &parse_exc_id, &parse_exc_msg); + qstr source_name = mp_lexer_source_name(lex); mp_lexer_free(lex); if (pn == MP_PARSE_NODE_NULL) { @@ -36,7 +37,7 @@ static mp_obj_t mp_builtin_eval(mp_obj_t o_in) { } // compile the string - mp_obj_t module_fun = mp_compile(pn, false); + mp_obj_t module_fun = mp_compile(pn, source_name, false); if (module_fun == mp_const_none) { // TODO handle compile error correctly diff --git a/py/builtinimport.c b/py/builtinimport.c index 2eca36fbc679bd1f3e8bb811363abe2f2f20582a..e3c630a0a4ac2ab56d1d4e81a7890adf9447e449 100644 --- a/py/builtinimport.c +++ b/py/builtinimport.c @@ -51,6 +51,7 @@ mp_obj_t mp_builtin___import__(int n_args, mp_obj_t *args) { qstr parse_exc_id; const char *parse_exc_msg; mp_parse_node_t pn = mp_parse(lex, MP_PARSE_FILE_INPUT, &parse_exc_id, &parse_exc_msg); + qstr source_name = mp_lexer_source_name(lex); mp_lexer_free(lex); if (pn == MP_PARSE_NODE_NULL) { @@ -61,7 +62,7 @@ mp_obj_t mp_builtin___import__(int n_args, mp_obj_t *args) { } // compile the imported script - mp_obj_t module_fun = mp_compile(pn, false); + mp_obj_t module_fun = mp_compile(pn, source_name, false); if (module_fun == mp_const_none) { // TODO handle compile error correctly diff --git a/py/compile.c b/py/compile.c index b24e94a8dc4eee9682d01bb9c60300cb9b43e7e2..cf87e018d9117e77dff5eed15963d5d12f81b37c 100644 --- a/py/compile.c +++ b/py/compile.c @@ -2505,6 +2505,7 @@ void compile_node(compiler_t *comp, mp_parse_node_t pn) { } } else { mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn; + EMIT(set_line_number, pns->source_line); compile_function_t f = compile_function[MP_PARSE_NODE_STRUCT_KIND(pns)]; if (f == NULL) { printf("node %u cannot be compiled\n", (uint)MP_PARSE_NODE_STRUCT_KIND(pns)); @@ -3024,7 +3025,7 @@ void compile_scope_compute_things(compiler_t *comp, scope_t *scope) { } } -mp_obj_t mp_compile(mp_parse_node_t pn, bool is_repl) { +mp_obj_t mp_compile(mp_parse_node_t pn, qstr source_file, bool is_repl) { compiler_t *comp = m_new(compiler_t, 1); comp->is_repl = is_repl; @@ -3131,7 +3132,7 @@ mp_obj_t mp_compile(mp_parse_node_t pn, bool is_repl) { default: if (emit_bc == NULL) { - emit_bc = emit_bc_new(max_num_labels); + emit_bc = emit_bc_new(source_file, max_num_labels); } comp->emit = emit_bc; comp->emit_method_table = &emit_bc_method_table; diff --git a/py/compile.h b/py/compile.h index 770c2524dae39f40a23c6038ffc6901d3cfe8ae2..552d36fa5a7d71f1369e84519e79e24d83b2fb12 100644 --- a/py/compile.h +++ b/py/compile.h @@ -1 +1 @@ -mp_obj_t mp_compile(mp_parse_node_t pn, bool is_repl); +mp_obj_t mp_compile(mp_parse_node_t pn, qstr source_file, bool is_repl); diff --git a/py/emit.h b/py/emit.h index fd41764d40de09bb8179f1f51e9d5726574a42eb..521b43cae2b66e5ad7e0adef5e1aad3fa728ad12 100644 --- a/py/emit.h +++ b/py/emit.h @@ -23,6 +23,7 @@ typedef struct _emit_method_table_t { bool (*last_emit_was_return_value)(emit_t *emit); int (*get_stack_size)(emit_t *emit); void (*set_stack_size)(emit_t *emit, int size); + void (*set_line_number)(emit_t *emit, int line); void (*load_id)(emit_t *emit, qstr qstr); void (*store_id)(emit_t *emit, qstr qstr); @@ -119,7 +120,7 @@ extern const emit_method_table_t emit_native_thumb_method_table; emit_t *emit_pass1_new(qstr qstr___class__); void emit_pass1_free(emit_t *emit); emit_t *emit_cpython_new(uint max_num_labels); -emit_t *emit_bc_new(uint max_num_labels); +emit_t *emit_bc_new(qstr source_file, uint max_num_labels); emit_t *emit_native_x64_new(uint max_num_labels); emit_t *emit_native_thumb_new(uint max_num_labels); diff --git a/py/emitbc.c b/py/emitbc.c index fdef070a5017f8a76c0c9386599188d9ed7d14e2..e1a81adac1616d8ddd9b07c9f5b361ba6935cf8d 100644 --- a/py/emitbc.c +++ b/py/emitbc.c @@ -21,79 +21,104 @@ struct _emit_t { scope_t *scope; + qstr source_file; + uint last_source_line_offset; + uint last_source_line; + uint max_num_labels; uint *label_offsets; - uint code_offset; - uint code_size; - byte *code_base; + uint code_info_offset; + uint code_info_size; + uint byte_code_offset; + uint byte_code_size; + byte *code_base; // stores both byte code and code info byte dummy_data[8]; }; -emit_t *emit_bc_new(uint max_num_labels) { - emit_t *emit = m_new(emit_t, 1); +emit_t *emit_bc_new(qstr source_file, uint max_num_labels) { + emit_t *emit = m_new0(emit_t, 1); + emit->source_file = source_file; emit->max_num_labels = max_num_labels; emit->label_offsets = m_new(uint, emit->max_num_labels); - emit->code_offset = 0; - emit->code_size = 0; - emit->code_base = NULL; return emit; } -uint emit_bc_get_code_size(emit_t* emit) { - return emit->code_size; +// all functions must go through this one to emit code info +static byte* emit_get_cur_to_write_code_info(emit_t* emit, int num_bytes_to_write) { + //printf("emit %d\n", num_bytes_to_write); + if (emit->pass < PASS_3) { + emit->code_info_offset += num_bytes_to_write; + return emit->dummy_data; + } else { + assert(emit->code_info_offset + num_bytes_to_write <= emit->code_info_size); + byte *c = emit->code_base + emit->code_info_offset; + emit->code_info_offset += num_bytes_to_write; + return c; + } } -void* emit_bc_get_code(emit_t* emit) { - return emit->code_base; +static void emit_write_code_info_qstr(emit_t* emit, qstr qstr) { + byte* c = emit_get_cur_to_write_code_info(emit, 4); + // TODO variable length encoding for qstr + c[0] = qstr & 0xff; + c[1] = (qstr >> 8) & 0xff; + c[2] = (qstr >> 16) & 0xff; + c[3] = (qstr >> 24) & 0xff; } -// all functions must go through this one to emit bytes -static byte* emit_get_cur_to_write_bytes(emit_t* emit, int num_bytes_to_write) { +static void emit_write_code_info_byte_byte(emit_t* emit, byte b1, uint b2) { + byte* c = emit_get_cur_to_write_code_info(emit, 2); + c[0] = b1; + c[1] = b2; +} + +// all functions must go through this one to emit byte code +static byte* emit_get_cur_to_write_byte_code(emit_t* emit, int num_bytes_to_write) { //printf("emit %d\n", num_bytes_to_write); if (emit->pass < PASS_3) { - emit->code_offset += num_bytes_to_write; + emit->byte_code_offset += num_bytes_to_write; return emit->dummy_data; } else { - assert(emit->code_offset + num_bytes_to_write <= emit->code_size); - byte *c = emit->code_base + emit->code_offset; - emit->code_offset += num_bytes_to_write; + assert(emit->byte_code_offset + num_bytes_to_write <= emit->byte_code_size); + byte *c = emit->code_base + emit->code_info_size + emit->byte_code_offset; + emit->byte_code_offset += num_bytes_to_write; return c; } } -static void emit_write_byte_1(emit_t* emit, byte b1) { - byte* c = emit_get_cur_to_write_bytes(emit, 1); +static void emit_write_byte_code_byte(emit_t* emit, byte b1) { + byte* c = emit_get_cur_to_write_byte_code(emit, 1); c[0] = b1; } -static void emit_write_byte_1_byte(emit_t* emit, byte b1, uint b2) { +static void emit_write_byte_code_byte_byte(emit_t* emit, byte b1, uint b2) { assert((b2 & (~0xff)) == 0); - byte* c = emit_get_cur_to_write_bytes(emit, 2); + byte* c = emit_get_cur_to_write_byte_code(emit, 2); c[0] = b1; c[1] = b2; } // integers (for small ints) are stored as 24 bits, in excess -static void emit_write_byte_1_int(emit_t* emit, byte b1, int num) { +static void emit_write_byte_code_byte_int(emit_t* emit, byte b1, int num) { num += 0x800000; assert(0 <= num && num <= 0xffffff); - byte* c = emit_get_cur_to_write_bytes(emit, 4); + byte* c = emit_get_cur_to_write_byte_code(emit, 4); c[0] = b1; c[1] = num; c[2] = num >> 8; c[3] = num >> 16; } -static void emit_write_byte_1_uint(emit_t* emit, byte b1, uint num) { +static void emit_write_byte_code_byte_uint(emit_t* emit, byte b1, uint num) { if (num <= 127) { // fits in 0x7f // fit argument in single byte - byte* c = emit_get_cur_to_write_bytes(emit, 2); + byte* c = emit_get_cur_to_write_byte_code(emit, 2); c[0] = b1; c[1] = num; } else if (num <= 16383) { // fits in 0x3fff // fit argument in two bytes - byte* c = emit_get_cur_to_write_bytes(emit, 3); + byte* c = emit_get_cur_to_write_byte_code(emit, 3); c[0] = b1; c[1] = (num >> 8) | 0x80; c[2] = num; @@ -103,36 +128,36 @@ static void emit_write_byte_1_uint(emit_t* emit, byte b1, uint num) { } } -static void emit_write_byte_1_qstr(emit_t* emit, byte b1, qstr qstr) { - emit_write_byte_1_uint(emit, b1, qstr); +static void emit_write_byte_code_byte_qstr(emit_t* emit, byte b1, qstr qstr) { + emit_write_byte_code_byte_uint(emit, b1, qstr); } // unsigned labels are relative to ip following this instruction, stored as 16 bits -static void emit_write_byte_1_unsigned_label(emit_t* emit, byte b1, int label) { - uint code_offset; +static void emit_write_byte_code_byte_unsigned_label(emit_t* emit, byte b1, int label) { + uint byte_code_offset; if (emit->pass < PASS_3) { - code_offset = 0; + byte_code_offset = 0; } else { - code_offset = emit->label_offsets[label] - emit->code_offset - 3; + byte_code_offset = emit->label_offsets[label] - emit->byte_code_offset - 3; } - byte* c = emit_get_cur_to_write_bytes(emit, 3); + byte* c = emit_get_cur_to_write_byte_code(emit, 3); c[0] = b1; - c[1] = code_offset; - c[2] = code_offset >> 8; + c[1] = byte_code_offset; + c[2] = byte_code_offset >> 8; } // signed labels are relative to ip following this instruction, stored as 16 bits, in excess -static void emit_write_byte_1_signed_label(emit_t* emit, byte b1, int label) { - int code_offset; +static void emit_write_byte_code_byte_signed_label(emit_t* emit, byte b1, int label) { + int byte_code_offset; if (emit->pass < PASS_3) { - code_offset = 0; + byte_code_offset = 0; } else { - code_offset = emit->label_offsets[label] - emit->code_offset - 3 + 0x8000; + byte_code_offset = emit->label_offsets[label] - emit->byte_code_offset - 3 + 0x8000; } - byte* c = emit_get_cur_to_write_bytes(emit, 3); + byte* c = emit_get_cur_to_write_byte_code(emit, 3); c[0] = b1; - c[1] = code_offset; - c[2] = code_offset >> 8; + c[1] = byte_code_offset; + c[2] = byte_code_offset >> 8; } static void emit_bc_set_native_types(emit_t *emit, bool do_native_types) { @@ -143,10 +168,26 @@ static void emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) { emit->stack_size = 0; emit->last_emit_was_return_value = false; emit->scope = scope; + emit->last_source_line_offset = 0; + emit->last_source_line = 1; if (pass == PASS_2) { memset(emit->label_offsets, -1, emit->max_num_labels * sizeof(uint)); } - emit->code_offset = 0; + emit->byte_code_offset = 0; + emit->code_info_offset = 0; + + // write code info size (don't know size at this stage in PASS_2 so need to use maximum space (4 bytes) to write it) + { + byte* c = emit_get_cur_to_write_code_info(emit, 4); + machine_uint_t s = emit->code_info_size; + c[0] = s & 0xff; + c[1] = (s >> 8) & 0xff; + c[2] = (s >> 16) & 0xff; + c[3] = (s >> 24) & 0xff; + } + + // code info + emit_write_code_info_qstr(emit, emit->source_file); // prelude for initialising closed over variables int num_cell = 0; @@ -157,11 +198,11 @@ static void emit_bc_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scope) { } } assert(num_cell <= 255); - emit_write_byte_1(emit, num_cell); // write number of locals that are cells + emit_write_byte_code_byte(emit, num_cell); // write number of locals that are cells for (int i = 0; i < scope->id_info_len; i++) { id_info_t *id = &scope->id_info[i]; if (id->kind == ID_INFO_KIND_CELL) { - emit_write_byte_1(emit, id->local_num); // write the local which should be converted to a cell + emit_write_byte_code_byte(emit, id->local_num); // write the local which should be converted to a cell } } } @@ -172,13 +213,16 @@ static void emit_bc_end_pass(emit_t *emit) { printf("ERROR: stack size not back to zero; got %d\n", emit->stack_size); } + emit_write_code_info_byte_byte(emit, 0, 0); // end of line number info + if (emit->pass == PASS_2) { // calculate size of code in bytes - emit->code_size = emit->code_offset; - emit->code_base = m_new(byte, emit->code_size); + emit->code_info_size = emit->code_info_offset; + emit->byte_code_size = emit->byte_code_offset; + emit->code_base = m_new(byte, emit->code_info_size + emit->byte_code_size); } else if (emit->pass == PASS_3) { - rt_assign_byte_code(emit->scope->unique_code_id, emit->code_base, emit->code_size, emit->scope->num_params, emit->scope->num_locals, emit->scope->stack_size, (emit->scope->flags & SCOPE_FLAG_GENERATOR) != 0); + rt_assign_byte_code(emit->scope->unique_code_id, emit->code_base, emit->code_info_size + emit->byte_code_size, emit->scope->num_params, emit->scope->num_locals, emit->scope->stack_size, (emit->scope->flags & SCOPE_FLAG_GENERATOR) != 0); } } @@ -194,6 +238,22 @@ static void emit_bc_set_stack_size(emit_t *emit, int size) { emit->stack_size = size; } +static void emit_bc_set_source_line(emit_t *emit, int source_line) { + if (source_line > emit->last_source_line) { + int bytes_to_skip = emit->byte_code_offset - emit->last_source_line_offset; + for (; bytes_to_skip > 255; bytes_to_skip -= 255) { + emit_write_code_info_byte_byte(emit, 255, 0); + } + int lines_to_skip = source_line - emit->last_source_line; + for (; lines_to_skip > 255; lines_to_skip -= 255) { + emit_write_code_info_byte_byte(emit, 0, 255); + } + emit_write_code_info_byte_byte(emit, bytes_to_skip, lines_to_skip); + emit->last_source_line_offset = emit->byte_code_offset; + emit->last_source_line = source_line; + } +} + static void emit_bc_load_id(emit_t *emit, qstr qstr) { emit_common_load_id(emit, &emit_bc_method_table, emit->scope, qstr); } @@ -220,66 +280,66 @@ static void emit_bc_label_assign(emit_t *emit, int l) { if (emit->pass == PASS_2) { // assign label offset assert(emit->label_offsets[l] == -1); - emit->label_offsets[l] = emit->code_offset; + emit->label_offsets[l] = emit->byte_code_offset; } else if (emit->pass == PASS_3) { // ensure label offset has not changed from PASS_2 to PASS_3 - //printf("l%d: (at %d vs %d)\n", l, emit->code_offset, emit->label_offsets[l]); - assert(emit->label_offsets[l] == emit->code_offset); + //printf("l%d: (at %d vs %d)\n", l, emit->byte_code_offset, emit->label_offsets[l]); + assert(emit->label_offsets[l] == emit->byte_code_offset); } } static void emit_bc_import_name(emit_t *emit, qstr qstr) { emit_pre(emit, -1); - emit_write_byte_1_qstr(emit, MP_BC_IMPORT_NAME, qstr); + emit_write_byte_code_byte_qstr(emit, MP_BC_IMPORT_NAME, qstr); } static void emit_bc_import_from(emit_t *emit, qstr qstr) { emit_pre(emit, 1); - emit_write_byte_1_qstr(emit, MP_BC_IMPORT_FROM, qstr); + emit_write_byte_code_byte_qstr(emit, MP_BC_IMPORT_FROM, qstr); } static void emit_bc_import_star(emit_t *emit) { emit_pre(emit, -1); - emit_write_byte_1(emit, MP_BC_IMPORT_STAR); + emit_write_byte_code_byte(emit, MP_BC_IMPORT_STAR); } static void emit_bc_load_const_tok(emit_t *emit, mp_token_kind_t tok) { emit_pre(emit, 1); switch (tok) { - case MP_TOKEN_KW_FALSE: emit_write_byte_1(emit, MP_BC_LOAD_CONST_FALSE); break; - case MP_TOKEN_KW_NONE: emit_write_byte_1(emit, MP_BC_LOAD_CONST_NONE); break; - case MP_TOKEN_KW_TRUE: emit_write_byte_1(emit, MP_BC_LOAD_CONST_TRUE); break; - case MP_TOKEN_ELLIPSIS: emit_write_byte_1(emit, MP_BC_LOAD_CONST_ELLIPSIS); break; + case MP_TOKEN_KW_FALSE: emit_write_byte_code_byte(emit, MP_BC_LOAD_CONST_FALSE); break; + case MP_TOKEN_KW_NONE: emit_write_byte_code_byte(emit, MP_BC_LOAD_CONST_NONE); break; + case MP_TOKEN_KW_TRUE: emit_write_byte_code_byte(emit, MP_BC_LOAD_CONST_TRUE); break; + case MP_TOKEN_ELLIPSIS: emit_write_byte_code_byte(emit, MP_BC_LOAD_CONST_ELLIPSIS); break; default: assert(0); } } static void emit_bc_load_const_small_int(emit_t *emit, int arg) { emit_pre(emit, 1); - emit_write_byte_1_int(emit, MP_BC_LOAD_CONST_SMALL_INT, arg); + emit_write_byte_code_byte_int(emit, MP_BC_LOAD_CONST_SMALL_INT, arg); } static void emit_bc_load_const_int(emit_t *emit, qstr qstr) { emit_pre(emit, 1); - emit_write_byte_1_qstr(emit, MP_BC_LOAD_CONST_INT, qstr); + emit_write_byte_code_byte_qstr(emit, MP_BC_LOAD_CONST_INT, qstr); } static void emit_bc_load_const_dec(emit_t *emit, qstr qstr) { emit_pre(emit, 1); - emit_write_byte_1_qstr(emit, MP_BC_LOAD_CONST_DEC, qstr); + emit_write_byte_code_byte_qstr(emit, MP_BC_LOAD_CONST_DEC, qstr); } static void emit_bc_load_const_id(emit_t *emit, qstr qstr) { emit_pre(emit, 1); - emit_write_byte_1_qstr(emit, MP_BC_LOAD_CONST_ID, qstr); + emit_write_byte_code_byte_qstr(emit, MP_BC_LOAD_CONST_ID, qstr); } static void emit_bc_load_const_str(emit_t *emit, qstr qstr, bool bytes) { emit_pre(emit, 1); if (bytes) { - emit_write_byte_1_qstr(emit, MP_BC_LOAD_CONST_BYTES, qstr); + emit_write_byte_code_byte_qstr(emit, MP_BC_LOAD_CONST_BYTES, qstr); } else { - emit_write_byte_1_qstr(emit, MP_BC_LOAD_CONST_STRING, qstr); + emit_write_byte_code_byte_qstr(emit, MP_BC_LOAD_CONST_STRING, qstr); } } @@ -292,16 +352,16 @@ static void emit_bc_load_fast(emit_t *emit, qstr qstr, int local_num) { assert(local_num >= 0); emit_pre(emit, 1); switch (local_num) { - case 0: emit_write_byte_1(emit, MP_BC_LOAD_FAST_0); break; - case 1: emit_write_byte_1(emit, MP_BC_LOAD_FAST_1); break; - case 2: emit_write_byte_1(emit, MP_BC_LOAD_FAST_2); break; - default: emit_write_byte_1_uint(emit, MP_BC_LOAD_FAST_N, local_num); break; + case 0: emit_write_byte_code_byte(emit, MP_BC_LOAD_FAST_0); break; + case 1: emit_write_byte_code_byte(emit, MP_BC_LOAD_FAST_1); break; + case 2: emit_write_byte_code_byte(emit, MP_BC_LOAD_FAST_2); break; + default: emit_write_byte_code_byte_uint(emit, MP_BC_LOAD_FAST_N, local_num); break; } } static void emit_bc_load_deref(emit_t *emit, qstr qstr, int local_num) { emit_pre(emit, 1); - emit_write_byte_1_uint(emit, MP_BC_LOAD_DEREF, local_num); + emit_write_byte_code_byte_uint(emit, MP_BC_LOAD_DEREF, local_num); } static void emit_bc_load_closure(emit_t *emit, qstr qstr, int local_num) { @@ -311,200 +371,200 @@ static void emit_bc_load_closure(emit_t *emit, qstr qstr, int local_num) { static void emit_bc_load_name(emit_t *emit, qstr qstr) { emit_pre(emit, 1); - emit_write_byte_1_qstr(emit, MP_BC_LOAD_NAME, qstr); + emit_write_byte_code_byte_qstr(emit, MP_BC_LOAD_NAME, qstr); } static void emit_bc_load_global(emit_t *emit, qstr qstr) { emit_pre(emit, 1); - emit_write_byte_1_qstr(emit, MP_BC_LOAD_GLOBAL, qstr); + emit_write_byte_code_byte_qstr(emit, MP_BC_LOAD_GLOBAL, qstr); } static void emit_bc_load_attr(emit_t *emit, qstr qstr) { emit_pre(emit, 0); - emit_write_byte_1_qstr(emit, MP_BC_LOAD_ATTR, qstr); + emit_write_byte_code_byte_qstr(emit, MP_BC_LOAD_ATTR, qstr); } static void emit_bc_load_method(emit_t *emit, qstr qstr) { emit_pre(emit, 0); - emit_write_byte_1_qstr(emit, MP_BC_LOAD_METHOD, qstr); + emit_write_byte_code_byte_qstr(emit, MP_BC_LOAD_METHOD, qstr); } static void emit_bc_load_build_class(emit_t *emit) { emit_pre(emit, 1); - emit_write_byte_1(emit, MP_BC_LOAD_BUILD_CLASS); + emit_write_byte_code_byte(emit, MP_BC_LOAD_BUILD_CLASS); } static void emit_bc_store_fast(emit_t *emit, qstr qstr, int local_num) { assert(local_num >= 0); emit_pre(emit, -1); switch (local_num) { - case 0: emit_write_byte_1(emit, MP_BC_STORE_FAST_0); break; - case 1: emit_write_byte_1(emit, MP_BC_STORE_FAST_1); break; - case 2: emit_write_byte_1(emit, MP_BC_STORE_FAST_2); break; - default: emit_write_byte_1_uint(emit, MP_BC_STORE_FAST_N, local_num); break; + case 0: emit_write_byte_code_byte(emit, MP_BC_STORE_FAST_0); break; + case 1: emit_write_byte_code_byte(emit, MP_BC_STORE_FAST_1); break; + case 2: emit_write_byte_code_byte(emit, MP_BC_STORE_FAST_2); break; + default: emit_write_byte_code_byte_uint(emit, MP_BC_STORE_FAST_N, local_num); break; } } static void emit_bc_store_deref(emit_t *emit, qstr qstr, int local_num) { emit_pre(emit, -1); - emit_write_byte_1_uint(emit, MP_BC_STORE_DEREF, local_num); + emit_write_byte_code_byte_uint(emit, MP_BC_STORE_DEREF, local_num); } static void emit_bc_store_name(emit_t *emit, qstr qstr) { emit_pre(emit, -1); - emit_write_byte_1_qstr(emit, MP_BC_STORE_NAME, qstr); + emit_write_byte_code_byte_qstr(emit, MP_BC_STORE_NAME, qstr); } static void emit_bc_store_global(emit_t *emit, qstr qstr) { emit_pre(emit, -1); - emit_write_byte_1_qstr(emit, MP_BC_STORE_GLOBAL, qstr); + emit_write_byte_code_byte_qstr(emit, MP_BC_STORE_GLOBAL, qstr); } static void emit_bc_store_attr(emit_t *emit, qstr qstr) { emit_pre(emit, -2); - emit_write_byte_1_qstr(emit, MP_BC_STORE_ATTR, qstr); + emit_write_byte_code_byte_qstr(emit, MP_BC_STORE_ATTR, qstr); } static void emit_bc_store_subscr(emit_t *emit) { emit_pre(emit, -3); - emit_write_byte_1(emit, MP_BC_STORE_SUBSCR); + emit_write_byte_code_byte(emit, MP_BC_STORE_SUBSCR); } static void emit_bc_store_locals(emit_t *emit) { // not needed emit_pre(emit, -1); - emit_write_byte_1(emit, MP_BC_POP_TOP); + emit_write_byte_code_byte(emit, MP_BC_POP_TOP); } static void emit_bc_delete_fast(emit_t *emit, qstr qstr, int local_num) { assert(local_num >= 0); emit_pre(emit, 0); - emit_write_byte_1_uint(emit, MP_BC_DELETE_FAST_N, local_num); + emit_write_byte_code_byte_uint(emit, MP_BC_DELETE_FAST_N, local_num); } static void emit_bc_delete_deref(emit_t *emit, qstr qstr, int local_num) { emit_pre(emit, 0); - emit_write_byte_1_qstr(emit, MP_BC_DELETE_DEREF, local_num); + emit_write_byte_code_byte_qstr(emit, MP_BC_DELETE_DEREF, local_num); } static void emit_bc_delete_name(emit_t *emit, qstr qstr) { emit_pre(emit, 0); - emit_write_byte_1_qstr(emit, MP_BC_DELETE_NAME, qstr); + emit_write_byte_code_byte_qstr(emit, MP_BC_DELETE_NAME, qstr); } static void emit_bc_delete_global(emit_t *emit, qstr qstr) { emit_pre(emit, 0); - emit_write_byte_1_qstr(emit, MP_BC_DELETE_GLOBAL, qstr); + emit_write_byte_code_byte_qstr(emit, MP_BC_DELETE_GLOBAL, qstr); } static void emit_bc_delete_attr(emit_t *emit, qstr qstr) { emit_pre(emit, -1); - emit_write_byte_1_qstr(emit, MP_BC_DELETE_ATTR, qstr); + emit_write_byte_code_byte_qstr(emit, MP_BC_DELETE_ATTR, qstr); } static void emit_bc_delete_subscr(emit_t *emit) { emit_pre(emit, -2); - emit_write_byte_1(emit, MP_BC_DELETE_SUBSCR); + emit_write_byte_code_byte(emit, MP_BC_DELETE_SUBSCR); } static void emit_bc_dup_top(emit_t *emit) { emit_pre(emit, 1); - emit_write_byte_1(emit, MP_BC_DUP_TOP); + emit_write_byte_code_byte(emit, MP_BC_DUP_TOP); } static void emit_bc_dup_top_two(emit_t *emit) { emit_pre(emit, 2); - emit_write_byte_1(emit, MP_BC_DUP_TOP_TWO); + emit_write_byte_code_byte(emit, MP_BC_DUP_TOP_TWO); } static void emit_bc_pop_top(emit_t *emit) { emit_pre(emit, -1); - emit_write_byte_1(emit, MP_BC_POP_TOP); + emit_write_byte_code_byte(emit, MP_BC_POP_TOP); } static void emit_bc_rot_two(emit_t *emit) { emit_pre(emit, 0); - emit_write_byte_1(emit, MP_BC_ROT_TWO); + emit_write_byte_code_byte(emit, MP_BC_ROT_TWO); } static void emit_bc_rot_three(emit_t *emit) { emit_pre(emit, 0); - emit_write_byte_1(emit, MP_BC_ROT_THREE); + emit_write_byte_code_byte(emit, MP_BC_ROT_THREE); } static void emit_bc_jump(emit_t *emit, int label) { emit_pre(emit, 0); - emit_write_byte_1_signed_label(emit, MP_BC_JUMP, label); + emit_write_byte_code_byte_signed_label(emit, MP_BC_JUMP, label); } static void emit_bc_pop_jump_if_true(emit_t *emit, int label) { emit_pre(emit, -1); - emit_write_byte_1_signed_label(emit, MP_BC_POP_JUMP_IF_TRUE, label); + emit_write_byte_code_byte_signed_label(emit, MP_BC_POP_JUMP_IF_TRUE, label); } static void emit_bc_pop_jump_if_false(emit_t *emit, int label) { emit_pre(emit, -1); - emit_write_byte_1_signed_label(emit, MP_BC_POP_JUMP_IF_FALSE, label); + emit_write_byte_code_byte_signed_label(emit, MP_BC_POP_JUMP_IF_FALSE, label); } static void emit_bc_jump_if_true_or_pop(emit_t *emit, int label) { emit_pre(emit, -1); - emit_write_byte_1_signed_label(emit, MP_BC_JUMP_IF_TRUE_OR_POP, label); + emit_write_byte_code_byte_signed_label(emit, MP_BC_JUMP_IF_TRUE_OR_POP, label); } static void emit_bc_jump_if_false_or_pop(emit_t *emit, int label) { emit_pre(emit, -1); - emit_write_byte_1_signed_label(emit, MP_BC_JUMP_IF_FALSE_OR_POP, label); + emit_write_byte_code_byte_signed_label(emit, MP_BC_JUMP_IF_FALSE_OR_POP, label); } static void emit_bc_setup_loop(emit_t *emit, int label) { emit_pre(emit, 0); - emit_write_byte_1_unsigned_label(emit, MP_BC_SETUP_LOOP, label); + emit_write_byte_code_byte_unsigned_label(emit, MP_BC_SETUP_LOOP, label); } static void emit_bc_break_loop(emit_t *emit, int label) { emit_pre(emit, 0); - emit_write_byte_1_unsigned_label(emit, MP_BC_BREAK_LOOP, label); + emit_write_byte_code_byte_unsigned_label(emit, MP_BC_BREAK_LOOP, label); } static void emit_bc_continue_loop(emit_t *emit, int label) { emit_pre(emit, 0); - emit_write_byte_1_unsigned_label(emit, MP_BC_CONTINUE_LOOP, label); + emit_write_byte_code_byte_unsigned_label(emit, MP_BC_CONTINUE_LOOP, label); } static void emit_bc_setup_with(emit_t *emit, int label) { emit_pre(emit, 7); - emit_write_byte_1_unsigned_label(emit, MP_BC_SETUP_WITH, label); + emit_write_byte_code_byte_unsigned_label(emit, MP_BC_SETUP_WITH, label); } static void emit_bc_with_cleanup(emit_t *emit) { emit_pre(emit, -7); - emit_write_byte_1(emit, MP_BC_WITH_CLEANUP); + emit_write_byte_code_byte(emit, MP_BC_WITH_CLEANUP); } static void emit_bc_setup_except(emit_t *emit, int label) { emit_pre(emit, 6); - emit_write_byte_1_unsigned_label(emit, MP_BC_SETUP_EXCEPT, label); + emit_write_byte_code_byte_unsigned_label(emit, MP_BC_SETUP_EXCEPT, label); } static void emit_bc_setup_finally(emit_t *emit, int label) { emit_pre(emit, 6); - emit_write_byte_1_unsigned_label(emit, MP_BC_SETUP_FINALLY, label); + emit_write_byte_code_byte_unsigned_label(emit, MP_BC_SETUP_FINALLY, label); } static void emit_bc_end_finally(emit_t *emit) { emit_pre(emit, -1); - emit_write_byte_1(emit, MP_BC_END_FINALLY); + emit_write_byte_code_byte(emit, MP_BC_END_FINALLY); } static void emit_bc_get_iter(emit_t *emit) { emit_pre(emit, 0); - emit_write_byte_1(emit, MP_BC_GET_ITER); + emit_write_byte_code_byte(emit, MP_BC_GET_ITER); } static void emit_bc_for_iter(emit_t *emit, int label) { emit_pre(emit, 1); - emit_write_byte_1_unsigned_label(emit, MP_BC_FOR_ITER, label); + emit_write_byte_code_byte_unsigned_label(emit, MP_BC_FOR_ITER, label); } static void emit_bc_for_iter_end(emit_t *emit) { @@ -513,99 +573,99 @@ static void emit_bc_for_iter_end(emit_t *emit) { static void emit_bc_pop_block(emit_t *emit) { emit_pre(emit, 0); - emit_write_byte_1(emit, MP_BC_POP_BLOCK); + emit_write_byte_code_byte(emit, MP_BC_POP_BLOCK); } static void emit_bc_pop_except(emit_t *emit) { emit_pre(emit, 0); - emit_write_byte_1(emit, MP_BC_POP_EXCEPT); + emit_write_byte_code_byte(emit, MP_BC_POP_EXCEPT); } static void emit_bc_unary_op(emit_t *emit, rt_unary_op_t op) { emit_pre(emit, 0); - emit_write_byte_1_byte(emit, MP_BC_UNARY_OP, op); + emit_write_byte_code_byte_byte(emit, MP_BC_UNARY_OP, op); } static void emit_bc_binary_op(emit_t *emit, rt_binary_op_t op) { emit_pre(emit, -1); - emit_write_byte_1_byte(emit, MP_BC_BINARY_OP, op); + emit_write_byte_code_byte_byte(emit, MP_BC_BINARY_OP, op); } static void emit_bc_build_tuple(emit_t *emit, int n_args) { assert(n_args >= 0); emit_pre(emit, 1 - n_args); - emit_write_byte_1_uint(emit, MP_BC_BUILD_TUPLE, n_args); + emit_write_byte_code_byte_uint(emit, MP_BC_BUILD_TUPLE, n_args); } static void emit_bc_build_list(emit_t *emit, int n_args) { assert(n_args >= 0); emit_pre(emit, 1 - n_args); - emit_write_byte_1_uint(emit, MP_BC_BUILD_LIST, n_args); + emit_write_byte_code_byte_uint(emit, MP_BC_BUILD_LIST, n_args); } static void emit_bc_list_append(emit_t *emit, int list_stack_index) { assert(list_stack_index >= 0); emit_pre(emit, -1); - emit_write_byte_1_uint(emit, MP_BC_LIST_APPEND, list_stack_index); + emit_write_byte_code_byte_uint(emit, MP_BC_LIST_APPEND, list_stack_index); } static void emit_bc_build_map(emit_t *emit, int n_args) { assert(n_args >= 0); emit_pre(emit, 1); - emit_write_byte_1_uint(emit, MP_BC_BUILD_MAP, n_args); + emit_write_byte_code_byte_uint(emit, MP_BC_BUILD_MAP, n_args); } static void emit_bc_store_map(emit_t *emit) { emit_pre(emit, -2); - emit_write_byte_1(emit, MP_BC_STORE_MAP); + emit_write_byte_code_byte(emit, MP_BC_STORE_MAP); } static void emit_bc_map_add(emit_t *emit, int map_stack_index) { assert(map_stack_index >= 0); emit_pre(emit, -2); - emit_write_byte_1_uint(emit, MP_BC_MAP_ADD, map_stack_index); + emit_write_byte_code_byte_uint(emit, MP_BC_MAP_ADD, map_stack_index); } static void emit_bc_build_set(emit_t *emit, int n_args) { assert(n_args >= 0); emit_pre(emit, 1 - n_args); - emit_write_byte_1_uint(emit, MP_BC_BUILD_SET, n_args); + emit_write_byte_code_byte_uint(emit, MP_BC_BUILD_SET, n_args); } static void emit_bc_set_add(emit_t *emit, int set_stack_index) { assert(set_stack_index >= 0); emit_pre(emit, -1); - emit_write_byte_1_uint(emit, MP_BC_SET_ADD, set_stack_index); + emit_write_byte_code_byte_uint(emit, MP_BC_SET_ADD, set_stack_index); } static void emit_bc_build_slice(emit_t *emit, int n_args) { assert(n_args >= 0); emit_pre(emit, 1 - n_args); - emit_write_byte_1_uint(emit, MP_BC_BUILD_SLICE, n_args); + emit_write_byte_code_byte_uint(emit, MP_BC_BUILD_SLICE, n_args); } static void emit_bc_unpack_sequence(emit_t *emit, int n_args) { assert(n_args >= 0); emit_pre(emit, -1 + n_args); - emit_write_byte_1_uint(emit, MP_BC_UNPACK_SEQUENCE, n_args); + emit_write_byte_code_byte_uint(emit, MP_BC_UNPACK_SEQUENCE, n_args); } static void emit_bc_unpack_ex(emit_t *emit, int n_left, int n_right) { assert(n_left >=0 && n_right >= 0); emit_pre(emit, -1 + n_left + n_right + 1); - emit_write_byte_1_uint(emit, MP_BC_UNPACK_EX, n_left | (n_right << 8)); + emit_write_byte_code_byte_uint(emit, MP_BC_UNPACK_EX, n_left | (n_right << 8)); } static void emit_bc_make_function(emit_t *emit, scope_t *scope, int n_dict_params, int n_default_params) { assert(n_default_params == 0 && n_dict_params == 0); emit_pre(emit, 1); - emit_write_byte_1_uint(emit, MP_BC_MAKE_FUNCTION, scope->unique_code_id); + emit_write_byte_code_byte_uint(emit, MP_BC_MAKE_FUNCTION, scope->unique_code_id); } static void emit_bc_make_closure(emit_t *emit, scope_t *scope, int n_dict_params, int n_default_params) { assert(n_default_params == 0 && n_dict_params == 0); emit_pre(emit, 0); - emit_write_byte_1_uint(emit, MP_BC_MAKE_CLOSURE, scope->unique_code_id); + emit_write_byte_code_byte_uint(emit, MP_BC_MAKE_CLOSURE, scope->unique_code_id); } static void emit_bc_call_function(emit_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) { @@ -631,7 +691,7 @@ static void emit_bc_call_function(emit_t *emit, int n_positional, int n_keyword, op = MP_BC_CALL_FUNCTION; } } - emit_write_byte_1_uint(emit, op, (n_keyword << 8) | n_positional); // TODO make it 2 separate uints + emit_write_byte_code_byte_uint(emit, op, (n_keyword << 8) | n_positional); // TODO make it 2 separate uints } static void emit_bc_call_method(emit_t *emit, int n_positional, int n_keyword, bool have_star_arg, bool have_dbl_star_arg) { @@ -657,19 +717,19 @@ static void emit_bc_call_method(emit_t *emit, int n_positional, int n_keyword, b op = MP_BC_CALL_METHOD; } } - emit_write_byte_1_uint(emit, op, (n_keyword << 8) | n_positional); // TODO make it 2 separate uints + emit_write_byte_code_byte_uint(emit, op, (n_keyword << 8) | n_positional); // TODO make it 2 separate uints } static void emit_bc_return_value(emit_t *emit) { emit_pre(emit, -1); emit->last_emit_was_return_value = true; - emit_write_byte_1(emit, MP_BC_RETURN_VALUE); + emit_write_byte_code_byte(emit, MP_BC_RETURN_VALUE); } static void emit_bc_raise_varargs(emit_t *emit, int n_args) { assert(0 <= n_args && n_args <= 2); emit_pre(emit, -n_args); - emit_write_byte_1_byte(emit, MP_BC_RAISE_VARARGS, n_args); + emit_write_byte_code_byte_byte(emit, MP_BC_RAISE_VARARGS, n_args); } static void emit_bc_yield_value(emit_t *emit) { @@ -677,7 +737,7 @@ static void emit_bc_yield_value(emit_t *emit) { if (emit->pass == PASS_2) { emit->scope->flags |= SCOPE_FLAG_GENERATOR; } - emit_write_byte_1(emit, MP_BC_YIELD_VALUE); + emit_write_byte_code_byte(emit, MP_BC_YIELD_VALUE); } static void emit_bc_yield_from(emit_t *emit) { @@ -685,7 +745,7 @@ static void emit_bc_yield_from(emit_t *emit) { if (emit->pass == PASS_2) { emit->scope->flags |= SCOPE_FLAG_GENERATOR; } - emit_write_byte_1(emit, MP_BC_YIELD_FROM); + emit_write_byte_code_byte(emit, MP_BC_YIELD_FROM); } const emit_method_table_t emit_bc_method_table = { @@ -695,6 +755,7 @@ const emit_method_table_t emit_bc_method_table = { emit_bc_last_emit_was_return_value, emit_bc_get_stack_size, emit_bc_set_stack_size, + emit_bc_set_source_line, emit_bc_load_id, emit_bc_store_id, diff --git a/py/emitcpy.c b/py/emitcpy.c index ee96e589a3a91d713a327aa420f7f8486fdb7327..42eef91d11d661180f1d5be7eaab5044c4cf4922 100644 --- a/py/emitcpy.c +++ b/py/emitcpy.c @@ -68,6 +68,9 @@ static void emit_cpy_set_stack_size(emit_t *emit, int size) { emit->stack_size = size; } +static void emit_cpy_set_source_line(emit_t *emit, int source_line) { +} + static void emit_cpy_load_id(emit_t *emit, qstr qstr) { emit_common_load_id(emit, &emit_cpython_method_table, emit->scope, qstr); } @@ -798,6 +801,7 @@ const emit_method_table_t emit_cpython_method_table = { emit_cpy_last_emit_was_return_value, emit_cpy_get_stack_size, emit_cpy_set_stack_size, + emit_cpy_set_source_line, emit_cpy_load_id, emit_cpy_store_id, diff --git a/py/emitnative.c b/py/emitnative.c index aea25ac36d010e9f0ee2c0c00e58a7eaedf61d2a..a80cd2cf10f8cc04434b384f2e13be57df258013 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -281,6 +281,9 @@ static void emit_native_set_stack_size(emit_t *emit, int size) { emit->stack_size = size; } +static void emit_native_set_source_line(emit_t *emit, int source_line) { +} + static void adjust_stack(emit_t *emit, int stack_size_delta) { emit->stack_size += stack_size_delta; assert(emit->stack_size >= 0); @@ -1228,6 +1231,7 @@ const emit_method_table_t EXPORT_FUN(method_table) = { emit_native_last_emit_was_return_value, emit_native_get_stack_size, emit_native_set_stack_size, + emit_native_set_source_line, emit_native_load_id, emit_native_store_id, diff --git a/py/emitpass1.c b/py/emitpass1.c index 60fdd0b8251739301f5e76b73a5634dbbe17493a..c73522e47480900f1db5196fa71f903feb282104 100644 --- a/py/emitpass1.c +++ b/py/emitpass1.c @@ -103,6 +103,7 @@ const emit_method_table_t emit_pass1_method_table = { (void*)emit_pass1_dummy, (void*)emit_pass1_dummy, (void*)emit_pass1_dummy, + (void*)emit_pass1_dummy, emit_pass1_load_id, emit_pass1_store_id, diff --git a/py/lexer.c b/py/lexer.c index 6feb231e0cd1f79562b552a317d00842001d2d92..e8c6bc3082e3695560689e2aa87ca2c90be34e8b 100644 --- a/py/lexer.c +++ b/py/lexer.c @@ -3,6 +3,7 @@ #include <stdint.h> #include <stdio.h> +#include <string.h> #include <assert.h> #include "misc.h" @@ -14,7 +15,7 @@ // don't know if that's intentional or not, but we don't allow it struct _mp_lexer_t { - const char *name; // name of source + qstr source_name; // name of source void *stream_data; // data for stream mp_lexer_stream_next_char_t stream_next_char; // stream callback to get next char mp_lexer_stream_close_t stream_close; // stream callback to free @@ -49,7 +50,7 @@ bool str_strn_equal(const char *str, const char *strn, int len) { } void mp_token_show(const mp_token_t *tok) { - printf("(%s:%d:%d) kind:%d str:%p len:%d", tok->src_name, tok->src_line, tok->src_column, tok->kind, tok->str, tok->len); + printf("(%d:%d) kind:%d str:%p len:%d", tok->src_line, tok->src_column, tok->kind, tok->str, tok->len); if (tok->str != NULL && tok->len > 0) { const char *i = tok->str; const char *j = i + tok->len; @@ -292,7 +293,6 @@ static void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs next_char(lex); if (!is_physical_newline(lex)) { // SyntaxError: unexpected character after line continuation character - tok->src_name = lex->name; tok->src_line = lex->line; tok->src_column = lex->column; tok->kind = MP_TOKEN_BAD_LINE_CONTINUATION; @@ -309,7 +309,6 @@ static void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs } // set token source information - tok->src_name = lex->name; tok->src_line = lex->line; tok->src_column = lex->column; @@ -594,7 +593,7 @@ static void mp_lexer_next_token_into(mp_lexer_t *lex, mp_token_t *tok, bool firs mp_lexer_t *mp_lexer_new(const char *src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close) { mp_lexer_t *lex = m_new(mp_lexer_t, 1); - lex->name = src_name; // TODO do we need to strdup this? + lex->source_name = qstr_from_strn_copy(src_name, strlen(src_name)); lex->stream_data = stream_data; lex->stream_next_char = stream_next_char; lex->stream_close = stream_close; @@ -642,6 +641,10 @@ void mp_lexer_free(mp_lexer_t *lex) { } } +qstr mp_lexer_source_name(mp_lexer_t *lex) { + return lex->source_name; +} + void mp_lexer_to_next(mp_lexer_t *lex) { mp_lexer_next_token_into(lex, &lex->tok_cur, false); } @@ -677,11 +680,11 @@ bool mp_lexer_opt_str(mp_lexer_t *lex, const char *str) { */ bool mp_lexer_show_error_pythonic_prefix(mp_lexer_t *lex) { - printf(" File \"%s\", line %d column %d\n", lex->tok_cur.src_name, lex->tok_cur.src_line, lex->tok_cur.src_column); + printf(" File \"%s\", line %d column %d\n", qstr_str(lex->source_name), lex->tok_cur.src_line, lex->tok_cur.src_column); return false; } bool mp_lexer_show_error_pythonic(mp_lexer_t *lex, const char *msg) { - printf(" File \"%s\", line %d column %d\n%s\n", lex->tok_cur.src_name, lex->tok_cur.src_line, lex->tok_cur.src_column, msg); + printf(" File \"%s\", line %d column %d\n%s\n", qstr_str(lex->source_name), lex->tok_cur.src_line, lex->tok_cur.src_column, msg); return false; } diff --git a/py/lexer.h b/py/lexer.h index ea928c77f8e293200d89674bab4b1dce3a46b859..69e97329b668834d22854019ebc0b8eaace27841 100644 --- a/py/lexer.h +++ b/py/lexer.h @@ -105,7 +105,6 @@ typedef enum _mp_token_kind_t { } mp_token_kind_t; typedef struct _mp_token_t { - const char *src_name; // name of source uint src_line; // source line uint src_column; // source column @@ -129,6 +128,7 @@ mp_lexer_t *mp_lexer_new(const char *src_name, void *stream_data, mp_lexer_strea mp_lexer_t *mp_lexer_new_from_str_len(const char *src_name, const char *str, uint len, uint free_len); void mp_lexer_free(mp_lexer_t *lex); +qstr mp_lexer_source_name(mp_lexer_t *lex); void mp_lexer_to_next(mp_lexer_t *lex); const mp_token_t *mp_lexer_cur(const mp_lexer_t *lex); bool mp_lexer_is_kind(mp_lexer_t *lex, mp_token_kind_t kind); diff --git a/py/obj.h b/py/obj.h index 5691f09be369bfac3c9699acad539b5ceee0d7c2..4690f6bf0eb041c3f298d2dacfca9588900dc873 100644 --- a/py/obj.h +++ b/py/obj.h @@ -273,6 +273,8 @@ machine_int_t mp_obj_int_get_checked(mp_obj_t self_in); // exception extern const mp_obj_type_t exception_type; qstr mp_obj_exception_get_type(mp_obj_t self_in); +void mp_obj_exception_set_source_info(mp_obj_t self_in, qstr file, machine_uint_t line); +void mp_obj_exception_get_source_info(mp_obj_t self_in, qstr *file, machine_uint_t *line); // str extern const mp_obj_type_t str_type; diff --git a/py/objexcept.c b/py/objexcept.c index 1d30758dd25b275172b976d9c7a924cb1550a83c..7f87478a8986df5cfc6984de0672414eb97914b3 100644 --- a/py/objexcept.c +++ b/py/objexcept.c @@ -17,6 +17,8 @@ // have args tuple (or otherwise have it as NULL). typedef struct mp_obj_exception_t { mp_obj_base_t base; + qstr source_file; + machine_uint_t source_line; qstr id; qstr msg; mp_obj_tuple_t args; @@ -87,6 +89,8 @@ mp_obj_t mp_obj_new_exception_msg_varg(qstr id, const char *fmt, ...) { // make exception object mp_obj_exception_t *o = m_new_obj_var(mp_obj_exception_t, mp_obj_t*, 0); o->base.type = &exception_type; + o->source_file = 0; + o->source_line = 0; o->id = id; o->args.len = 0; if (fmt == NULL) { @@ -109,3 +113,23 @@ qstr mp_obj_exception_get_type(mp_obj_t self_in) { mp_obj_exception_t *self = self_in; return self->id; } + +void mp_obj_exception_set_source_info(mp_obj_t self_in, qstr file, machine_uint_t line) { + assert(MP_OBJ_IS_TYPE(self_in, &exception_type)); + mp_obj_exception_t *self = self_in; + // TODO make a list of file/line pairs for the traceback + // for now, just keep the first one + if (file != 0 && self->source_file == 0) { + self->source_file = file; + } + if (line != 0 && self->source_line == 0) { + self->source_line = line; + } +} + +void mp_obj_exception_get_source_info(mp_obj_t self_in, qstr *file, machine_uint_t *line) { + assert(MP_OBJ_IS_TYPE(self_in, &exception_type)); + mp_obj_exception_t *self = self_in; + *file = self->source_file; + *line = self->source_line; +} diff --git a/py/objgenerator.c b/py/objgenerator.c index d58480f9e7e715ac14b55f49e19462dfc74023f1..2e8bd3d32834e08d68d897d43cfa21c5a700aaea 100644 --- a/py/objgenerator.c +++ b/py/objgenerator.c @@ -58,6 +58,7 @@ mp_obj_t mp_obj_new_gen_wrap(uint n_locals, uint n_stack, mp_obj_t fun) { typedef struct _mp_obj_gen_instance_t { mp_obj_base_t base; + const byte *code_info; const byte *ip; mp_obj_t *sp; uint n_state; @@ -74,7 +75,7 @@ mp_obj_t gen_instance_getiter(mp_obj_t self_in) { mp_obj_t gen_instance_iternext(mp_obj_t self_in) { mp_obj_gen_instance_t *self = self_in; - bool yield = mp_execute_byte_code_2(&self->ip, &self->state[self->n_state - 1], &self->sp); + bool yield = mp_execute_byte_code_2(self->code_info, &self->ip, &self->state[self->n_state - 1], &self->sp); if (yield) { return *self->sp; } else { @@ -98,6 +99,7 @@ const mp_obj_type_t gen_instance_type = { mp_obj_t mp_obj_new_gen_instance(const byte *bytecode, uint n_state, int n_args, const mp_obj_t *args) { mp_obj_gen_instance_t *o = m_new_obj_var(mp_obj_gen_instance_t, mp_obj_t, n_state); o->base.type = &gen_instance_type; + o->code_info = bytecode; o->ip = bytecode; o->sp = &o->state[0] - 1; // sp points to top of stack, which starts off 1 below the state o->n_state = n_state; @@ -111,6 +113,10 @@ mp_obj_t mp_obj_new_gen_instance(const byte *bytecode, uint n_state, int n_args, // prelude for making cells (closed over variables) // for now we just make sure there are no cells variables // need to work out how to implement closed over variables in generators + + // get code info size + machine_uint_t code_info_size = bytecode[0] | (bytecode[1] << 8) | (bytecode[2] << 16) | (bytecode[3] << 24); + o->ip += code_info_size; assert(o->ip[0] == 0); o->ip += 1; diff --git a/py/parse.c b/py/parse.c index 49b42e5d771c694dd060a766cc6fc86c31b60b89..fc74a5fa507a8ae931737a8e08525c7b69ffdd2f 100644 --- a/py/parse.c +++ b/py/parse.c @@ -80,7 +80,8 @@ static const rule_t *rules[] = { }; typedef struct _rule_stack_t { - byte rule_id; + unsigned int src_line : 24; + unsigned int rule_id : 8; int32_t arg_i; // what should be the size and signedness? } rule_stack_t; @@ -92,45 +93,54 @@ typedef struct _parser_t { uint result_stack_alloc; uint result_stack_top; mp_parse_node_t *result_stack; + + mp_lexer_t *lexer; } parser_t; -static void push_rule(parser_t *parser, const rule_t *rule, int arg_i) { +static void push_rule(parser_t *parser, int src_line, const rule_t *rule, int arg_i) { if (parser->rule_stack_top >= parser->rule_stack_alloc) { parser->rule_stack = m_renew(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc * 2); parser->rule_stack_alloc *= 2; } - parser->rule_stack[parser->rule_stack_top].rule_id = rule->rule_id; - parser->rule_stack[parser->rule_stack_top].arg_i = arg_i; - parser->rule_stack_top += 1; + rule_stack_t *rs = &parser->rule_stack[parser->rule_stack_top++]; + rs->src_line = src_line; + rs->rule_id = rule->rule_id; + rs->arg_i = arg_i; } static void push_rule_from_arg(parser_t *parser, uint arg) { assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE); uint rule_id = arg & RULE_ARG_ARG_MASK; assert(rule_id < RULE_maximum_number_of); - push_rule(parser, rules[rule_id], 0); + push_rule(parser, mp_lexer_cur(parser->lexer)->src_line, rules[rule_id], 0); } -static void pop_rule(parser_t *parser, const rule_t **rule, uint *arg_i) { +static void pop_rule(parser_t *parser, const rule_t **rule, uint *arg_i, uint *src_line) { parser->rule_stack_top -= 1; *rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id]; *arg_i = parser->rule_stack[parser->rule_stack_top].arg_i; + *src_line = parser->rule_stack[parser->rule_stack_top].src_line; } mp_parse_node_t mp_parse_node_new_leaf(machine_int_t kind, machine_int_t arg) { return (mp_parse_node_t)(kind | (arg << 4)); } -int num_parse_nodes_allocated = 0; -mp_parse_node_struct_t *parse_node_new_struct(int rule_id, int num_args) { +//int num_parse_nodes_allocated = 0; +mp_parse_node_struct_t *parse_node_new_struct(int src_line, int rule_id, int num_args) { mp_parse_node_struct_t *pn = m_new_obj_var(mp_parse_node_struct_t, mp_parse_node_t, num_args); - pn->source = 0; // TODO + pn->source_line = src_line; pn->kind_num_nodes = (rule_id & 0xff) | (num_args << 8); - num_parse_nodes_allocated += 1; + //num_parse_nodes_allocated += 1; return pn; } void mp_parse_node_show(mp_parse_node_t pn, int indent) { + if (MP_PARSE_NODE_IS_STRUCT(pn)) { + printf("[% 4d] ", (int)((mp_parse_node_struct_t*)pn)->source_line); + } else { + printf(" "); + } for (int i = 0; i < indent; i++) { printf(" "); } @@ -258,8 +268,8 @@ static void push_result_token(parser_t *parser, const mp_lexer_t *lex) { push_result_node(parser, pn); } -static void push_result_rule(parser_t *parser, const rule_t *rule, int num_args) { - mp_parse_node_struct_t *pn = parse_node_new_struct(rule->rule_id, num_args); +static void push_result_rule(parser_t *parser, int src_line, const rule_t *rule, int num_args) { + mp_parse_node_struct_t *pn = parse_node_new_struct(src_line, rule->rule_id, num_args); for (int i = num_args; i > 0; i--) { pn->nodes[i - 1] = pop_result(parser); } @@ -280,6 +290,8 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, qstr parser->result_stack_top = 0; parser->result_stack = m_new(mp_parse_node_t, parser->result_stack_alloc); + parser->lexer = lex; + // work out the top-level rule to use, and push it on the stack int top_level_rule; switch (input_kind) { @@ -287,13 +299,14 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, qstr case MP_PARSE_EVAL_INPUT: top_level_rule = RULE_eval_input; break; default: top_level_rule = RULE_file_input; } - push_rule(parser, rules[top_level_rule], 0); + push_rule(parser, mp_lexer_cur(lex)->src_line, rules[top_level_rule], 0); // parse! - uint n, i; + uint n, i; // state for the current rule + uint rule_src_line; // source line for the first token matched by the current rule bool backtrack = false; - const rule_t *rule; + const rule_t *rule = NULL; mp_token_kind_t tok_kind; bool emit_rule; bool had_trailing_sep; @@ -304,7 +317,7 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, qstr break; } - pop_rule(parser, &rule, &i); + pop_rule(parser, &rule, &i, &rule_src_line); n = rule->act & RULE_ACT_ARG_MASK; /* @@ -333,8 +346,8 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, qstr } break; case RULE_ARG_RULE: - push_rule(parser, rule, i + 1); - push_rule_from_arg(parser, rule->arg[i]); + push_rule(parser, rule_src_line, rule, i + 1); // save this or-rule + push_rule_from_arg(parser, rule->arg[i]); // push child of or-rule goto next_rule; default: assert(0); @@ -398,14 +411,9 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, qstr } break; case RULE_ARG_RULE: - //if (i + 1 < n) { - push_rule(parser, rule, i + 1); - //} - push_rule_from_arg(parser, rule->arg[i]); - goto next_rule; case RULE_ARG_OPT_RULE: - push_rule(parser, rule, i + 1); - push_rule_from_arg(parser, rule->arg[i]); + push_rule(parser, rule_src_line, rule, i + 1); // save this and-rule + push_rule_from_arg(parser, rule->arg[i]); // push child of and-rule goto next_rule; default: assert(0); @@ -462,9 +470,9 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, qstr } //printf("done and %s n=%d i=%d notnil=%d\n", rule->rule_name, n, i, num_not_nil); if (emit_rule) { - push_result_rule(parser, rule, i); + push_result_rule(parser, rule_src_line, rule, i); } else if (num_not_nil == 0) { - push_result_rule(parser, rule, i); // needed for, eg, atom_paren, testlist_comp_3b + push_result_rule(parser, rule_src_line, rule, i); // needed for, eg, atom_paren, testlist_comp_3b //result_stack_show(parser); //assert(0); } else if (num_not_nil == 1) { @@ -478,7 +486,7 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, qstr } push_result_node(parser, pn); } else { - push_result_rule(parser, rule, i); + push_result_rule(parser, rule_src_line, rule, i); } break; @@ -538,8 +546,8 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, qstr } break; case RULE_ARG_RULE: - push_rule(parser, rule, i + 1); - push_rule_from_arg(parser, arg); + push_rule(parser, rule_src_line, rule, i + 1); // save this list-rule + push_rule_from_arg(parser, arg); // push child of list-rule goto next_rule; default: assert(0); @@ -559,13 +567,13 @@ mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, qstr // list matched single item if (had_trailing_sep) { // if there was a trailing separator, make a list of a single item - push_result_rule(parser, rule, i); + push_result_rule(parser, rule_src_line, rule, i); } else { // just leave single item on stack (ie don't wrap in a list) } } else { //printf("done list %s %d %d\n", rule->rule_name, n, i); - push_result_rule(parser, rule, i); + push_result_rule(parser, rule_src_line, rule, i); } break; diff --git a/py/parse.h b/py/parse.h index be2073ae5dd9a43d2f43b22a7bbad736a73a5efe..e1e7f730223c22f511795bb3d1b3ea300d4ed372 100644 --- a/py/parse.h +++ b/py/parse.h @@ -28,7 +28,7 @@ struct _mp_lexer_t; typedef machine_uint_t mp_parse_node_t; // must be pointer size typedef struct _mp_parse_node_struct_t { - uint32_t source; // file identifier, and line number + uint32_t source_line; // line number in source file uint32_t kind_num_nodes; // parse node kind, and number of nodes mp_parse_node_t nodes[]; // nodes } mp_parse_node_struct_t; diff --git a/py/showbc.c b/py/showbc.c index 36393ce0cfe3e79645ad0802977f24da97f4abcc..ba7c309b31b6f163ed5cc390ae42b2a2c2c6c8de 100644 --- a/py/showbc.c +++ b/py/showbc.c @@ -18,6 +18,10 @@ void mp_show_byte_code(const byte *ip, int len) { const byte *ip_start = ip; + // get code info size + machine_uint_t code_info_size = ip[0] | (ip[1] << 8) | (ip[2] << 16) | (ip[3] << 24); + ip += code_info_size; + // decode prelude { uint n_local = *ip++; diff --git a/py/vm.c b/py/vm.c index f352b12262c398e316bab774c66493886b504851..cacb6aca0fba86c65f64f0c36efa0dbe0c8f1d13 100644 --- a/py/vm.c +++ b/py/vm.c @@ -38,8 +38,13 @@ mp_obj_t mp_execute_byte_code(const byte *code, const mp_obj_t *args, uint n_arg assert(i < 8); state[n_state - 1 - i] = args[i]; } + const byte *ip = code; + // get code info size + machine_uint_t code_info_size = ip[0] | (ip[1] << 8) | (ip[2] << 16) | (ip[3] << 24); + ip += code_info_size; + // execute prelude to make any cells (closed over variables) { for (uint n_local = *ip++; n_local > 0; n_local--) { @@ -53,7 +58,7 @@ mp_obj_t mp_execute_byte_code(const byte *code, const mp_obj_t *args, uint n_arg } // execute the byte code - if (mp_execute_byte_code_2(&ip, &state[n_state - 1], &sp)) { + if (mp_execute_byte_code_2(code, &ip, &state[n_state - 1], &sp)) { // it shouldn't yield assert(0); } @@ -65,7 +70,7 @@ mp_obj_t mp_execute_byte_code(const byte *code, const mp_obj_t *args, uint n_arg // fastn has items in reverse order (fastn[0] is local[0], fastn[-1] is local[1], etc) // sp points to bottom of stack which grows up -bool mp_execute_byte_code_2(const byte **ip_in_out, mp_obj_t *fastn, mp_obj_t **sp_in_out) { +bool mp_execute_byte_code_2(const byte *code_info, const byte **ip_in_out, mp_obj_t *fastn, mp_obj_t **sp_in_out) { // careful: be sure to declare volatile any variables read in the exception handler (written is ok, I think) const byte *ip = *ip_in_out; @@ -79,12 +84,14 @@ bool mp_execute_byte_code_2(const byte **ip_in_out, mp_obj_t *fastn, mp_obj_t ** volatile machine_uint_t currently_in_except_block = 0; // 0 or 1, to detect nested exceptions machine_uint_t exc_stack[8]; // on the exception stack we store (ip, sp | X) for each block, X = previous value of currently_in_except_block machine_uint_t *volatile exc_sp = &exc_stack[0] - 1; // stack grows up, exc_sp points to top of stack + const byte *volatile save_ip = ip; // this is so we can access ip in the exception handler without making ip volatile (which means the compiler can't keep it in a register in the main loop) // outer exception handling loop for (;;) { if (nlr_push(&nlr) == 0) { // loop to execute byte code for (;;) { + save_ip = ip; int op = *ip++; switch (op) { case MP_BC_LOAD_CONST_FALSE: @@ -518,6 +525,23 @@ bool mp_execute_byte_code_2(const byte **ip_in_out, mp_obj_t *fastn, mp_obj_t ** } else { // exception occurred + // set file and line number that the exception occurred at + if (MP_OBJ_IS_TYPE(nlr.ret_val, &exception_type)) { + machine_uint_t code_info_size = code_info[0] | (code_info[1] << 8) | (code_info[2] << 16) | (code_info[3] << 24); + qstr = code_info[4] | (code_info[5] << 8) | (code_info[6] << 16) | (code_info[7] << 24); + machine_uint_t source_line = 1; + machine_uint_t bc = save_ip - code_info - code_info_size; + //printf("find %lu %d %d\n", bc, code_info[8], code_info[9]); + for (const byte* ci = code_info + 8; bc > ci[0]; ci += 2) { + bc -= ci[0]; + source_line += ci[1]; + if (ci[0] == 0 && ci[1] == 0) { + break; + } + } + mp_obj_exception_set_source_info(nlr.ret_val, qstr, source_line); + } + while (currently_in_except_block) { // nested exception diff --git a/stm/main.c b/stm/main.c index 4b6aaf76f94eb7da3b4b4db6e219adbcbe0796d0..c49fa42ff9a8c82673a003197db225519ea4641a 100644 --- a/stm/main.c +++ b/stm/main.c @@ -421,6 +421,7 @@ void do_repl(void) { qstr parse_exc_id; const char *parse_exc_msg; mp_parse_node_t pn = mp_parse(lex, MP_PARSE_SINGLE_INPUT, &parse_exc_id, &parse_exc_msg); + qstr source_name = mp_lexer_source_name(lex); if (pn == MP_PARSE_NODE_NULL) { // parse error @@ -430,7 +431,7 @@ void do_repl(void) { } else { // parse okay mp_lexer_free(lex); - mp_obj_t module_fun = mp_compile(pn, true); + mp_obj_t module_fun = mp_compile(pn, source_name, true); if (module_fun != mp_const_none) { nlr_buf_t nlr; uint32_t start = sys_tick_counter; @@ -465,6 +466,7 @@ bool do_file(const char *filename) { qstr parse_exc_id; const char *parse_exc_msg; mp_parse_node_t pn = mp_parse(lex, MP_PARSE_FILE_INPUT, &parse_exc_id, &parse_exc_msg); + qstr source_name = mp_lexer_source_name(lex); if (pn == MP_PARSE_NODE_NULL) { // parse error @@ -476,7 +478,7 @@ bool do_file(const char *filename) { mp_lexer_free(lex); - mp_obj_t module_fun = mp_compile(pn, false); + mp_obj_t module_fun = mp_compile(pn, source_name, false); if (module_fun == mp_const_none) { return false; } @@ -1095,7 +1097,7 @@ soft_reset: printf("pars;al=%u\n", m_get_total_bytes_allocated()); sys_tick_delay_ms(1000); //parse_node_show(pn, 0); - mp_obj_t module_fun = mp_compile(pn, false); + mp_obj_t module_fun = mp_compile(pn, 0, false); printf("comp;al=%u\n", m_get_total_bytes_allocated()); sys_tick_delay_ms(1000); diff --git a/teensy/main.c b/teensy/main.c index d5860cd9d48040c778f476ea9dad1047d7d6980e..0bc085aed9b69bdf1ef796c5a630bfe764ddc3ec 100644 --- a/teensy/main.c +++ b/teensy/main.c @@ -310,13 +310,14 @@ bool do_file(const char *filename) { } mp_parse_node_t pn = mp_parse(lex, MP_PARSE_FILE_INPUT); + qstr source_name = mp_lexer_source_name(lex); mp_lexer_free(lex); if (pn == MP_PARSE_NODE_NULL) { return false; } - mp_obj_t module_fun = mp_compile(pn, false); + mp_obj_t module_fun = mp_compile(pn, source_name, false); if (module_fun == mp_const_none) { return false; } diff --git a/unix-cpy/main.c b/unix-cpy/main.c index 7d56ceaf343ef57e354884f63ab61822aff1decc..1f54b6f479e6a5134f3342a1ddd633343cbc1178 100644 --- a/unix-cpy/main.c +++ b/unix-cpy/main.c @@ -48,7 +48,7 @@ void do_file(const char *file) { //printf("----------------\n"); // compile - mp_obj_t module_fun = mp_compile(pn, false); + mp_obj_t module_fun = mp_compile(pn, 0, false); //printf("----------------\n"); diff --git a/unix/main.c b/unix/main.c index d89f39da70e23728f7a0c978f8feaf44aa3268db..c33a4393769c27acf8e07f93c981b2470a5b9125 100644 --- a/unix/main.c +++ b/unix/main.c @@ -49,13 +49,14 @@ static void execute_from_lexer(mp_lexer_t *lex, mp_parse_input_kind_t input_kind return; } + qstr source_name = mp_lexer_source_name(lex); mp_lexer_free(lex); //printf("----------------\n"); - //parse_node_show(pn, 0); + //mp_parse_node_show(pn, 0); //printf("----------------\n"); - mp_obj_t module_fun = mp_compile(pn, is_repl); + mp_obj_t module_fun = mp_compile(pn, source_name, is_repl); if (module_fun == mp_const_none) { // compile error @@ -69,7 +70,14 @@ static void execute_from_lexer(mp_lexer_t *lex, mp_parse_input_kind_t input_kind nlr_pop(); } else { // uncaught exception - mp_obj_print((mp_obj_t)nlr.ret_val, PRINT_REPR); + mp_obj_t exc = (mp_obj_t)nlr.ret_val; + if (MP_OBJ_IS_TYPE(exc, &exception_type)) { + qstr file; + machine_uint_t line; + mp_obj_exception_get_source_info(exc, &file, &line); + printf("File \"%s\", line %d\n", qstr_str(file), (int)line); + } + mp_obj_print(exc, PRINT_REPR); printf("\n"); } } diff --git a/unix/mpconfigport.h b/unix/mpconfigport.h index daa18961c06e3bea8f06f8d63bdee2304436bd72..b36c1719690c10d8da0c57c71e2d6edc927bdb2a 100644 --- a/unix/mpconfigport.h +++ b/unix/mpconfigport.h @@ -13,6 +13,7 @@ #define MICROPY_ENABLE_LEXER_UNIX (1) #define MICROPY_ENABLE_FLOAT (1) #define MICROPY_LONGINT_IMPL (MICROPY_LONGINT_IMPL_LONGLONG) +#define MICROPY_SHOW_BC (0) // type definitions for the specific machine