Skip to content

Commit 9b1e97b

Browse files
hasumikinyui-knk
authored andcommitted
[Universal parser] DeVALUE of p->debug_lines and ast->body.script_lines
This patch is part of universal parser work. ## Summary - Decouple VALUE from members below: - `(struct parser_params *)->debug_lines` - `(rb_ast_t *)->body.script_lines` - Instead, they are now `rb_parser_ary_t *` - They can also be a `(VALUE)FIXNUM` as before to hold line count - `ISEQ_BODY(iseq)->variable.script_lines` remains VALUE - In order to do this, - Add `VALUE script_lines` param to `rb_iseq_new_with_opt()` - Introduce `rb_parser_build_script_lines_from()` to convert `rb_parser_ary_t *` into `VALUE` ## Other details - Extend `rb_parser_ary_t *`. It previously could only store `rb_parser_ast_token *`, now can store script_lines, too - Change tactics of building the top-level `SCRIPT_LINES__` in `yycompile0()` - Before: While parsing, each line of the script is added to `SCRIPT_LINES__[path]` - After: After `yyparse(p)`, `SCRIPT_LINES__[path]` will be built from `p->debug_lines` - Remove the second parameter of `rb_parser_set_script_lines()` to make it simple - Introduce `script_lines_free()` to be called from `rb_ast_free()` because the GC no longer takes care of the script_lines - Introduce `rb_parser_string_deep_copy()` in parse.y to maintain script_lines when `rb_ruby_parser_free()` called - With regard to this, please see *Future tasks* below ## Future tasks - Decouple IMEMO from `rb_ast_t *` - This lifts the five-members-restriction of Ruby object, - So we will be able to move the ownership of the `lex.string_buffer` from parser to AST - Then we remove `rb_parser_string_deep_copy()` to make the whole thing simple
1 parent bb1c341 commit 9b1e97b

File tree

17 files changed

+234
-110
lines changed

17 files changed

+234
-110
lines changed

ast.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ rb_ast_parse_str(VALUE str, VALUE keep_script_lines, VALUE error_tolerant, VALUE
9797

9898
StringValue(str);
9999
VALUE vparser = ast_parse_new();
100-
if (RTEST(keep_script_lines)) rb_parser_set_script_lines(vparser, Qtrue);
100+
if (RTEST(keep_script_lines)) rb_parser_set_script_lines(vparser);
101101
if (RTEST(error_tolerant)) rb_parser_error_tolerant(vparser);
102102
if (RTEST(keep_tokens)) rb_parser_keep_tokens(vparser);
103103
ast = rb_parser_compile_string_path(vparser, Qnil, str, 1);
@@ -120,7 +120,7 @@ rb_ast_parse_file(VALUE path, VALUE keep_script_lines, VALUE error_tolerant, VAL
120120
f = rb_file_open_str(path, "r");
121121
rb_funcall(f, rb_intern("set_encoding"), 2, rb_enc_from_encoding(enc), rb_str_new_cstr("-"));
122122
VALUE vparser = ast_parse_new();
123-
if (RTEST(keep_script_lines)) rb_parser_set_script_lines(vparser, Qtrue);
123+
if (RTEST(keep_script_lines)) rb_parser_set_script_lines(vparser);
124124
if (RTEST(error_tolerant)) rb_parser_error_tolerant(vparser);
125125
if (RTEST(keep_tokens)) rb_parser_keep_tokens(vparser);
126126
ast = rb_parser_compile_file_path(vparser, Qnil, f, 1);
@@ -148,7 +148,7 @@ rb_ast_parse_array(VALUE array, VALUE keep_script_lines, VALUE error_tolerant, V
148148

149149
array = rb_check_array_type(array);
150150
VALUE vparser = ast_parse_new();
151-
if (RTEST(keep_script_lines)) rb_parser_set_script_lines(vparser, Qtrue);
151+
if (RTEST(keep_script_lines)) rb_parser_set_script_lines(vparser);
152152
if (RTEST(error_tolerant)) rb_parser_error_tolerant(vparser);
153153
if (RTEST(keep_tokens)) rb_parser_keep_tokens(vparser);
154154
ast = rb_parser_compile_generic(vparser, lex_array, Qnil, array, 1);
@@ -806,9 +806,9 @@ ast_node_script_lines(rb_execution_context_t *ec, VALUE self)
806806
{
807807
struct ASTNodeData *data;
808808
TypedData_Get_Struct(self, struct ASTNodeData, &rb_node_type, data);
809-
VALUE ret = data->ast->body.script_lines;
810-
if (!RB_TYPE_P(ret, T_ARRAY)) return Qnil;
811-
return ret;
809+
rb_parser_ary_t *ret = data->ast->body.script_lines;
810+
if (!ret || FIXNUM_P((VALUE)ret)) return Qnil;
811+
return rb_parser_build_script_lines_from(ret);
812812
}
813813

814814
#include "ast.rbinc"

compile.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1483,15 +1483,16 @@ new_child_iseq(rb_iseq_t *iseq, const NODE *const node,
14831483
ast.root = node;
14841484
ast.frozen_string_literal = -1;
14851485
ast.coverage_enabled = -1;
1486-
ast.script_lines = ISEQ_BODY(iseq)->variable.script_lines;
1486+
ast.script_lines = NULL;
14871487

14881488
debugs("[new_child_iseq]> ---------------------------------------\n");
14891489
int isolated_depth = ISEQ_COMPILE_DATA(iseq)->isolated_depth;
14901490
ret_iseq = rb_iseq_new_with_opt(&ast, name,
14911491
rb_iseq_path(iseq), rb_iseq_realpath(iseq),
14921492
line_no, parent,
14931493
isolated_depth ? isolated_depth + 1 : 0,
1494-
type, ISEQ_COMPILE_DATA(iseq)->option);
1494+
type, ISEQ_COMPILE_DATA(iseq)->option,
1495+
ISEQ_BODY(iseq)->variable.script_lines);
14951496
debugs("[new_child_iseq]< ---------------------------------------\n");
14961497
return ret_iseq;
14971498
}
@@ -8740,14 +8741,15 @@ compile_builtin_mandatory_only_method(rb_iseq_t *iseq, const NODE *node, const N
87408741
.root = RNODE(&scope_node),
87418742
.frozen_string_literal = -1,
87428743
.coverage_enabled = -1,
8743-
.script_lines = ISEQ_BODY(iseq)->variable.script_lines,
8744+
.script_lines = NULL
87448745
};
87458746

87468747
ISEQ_BODY(iseq)->mandatory_only_iseq =
87478748
rb_iseq_new_with_opt(&ast, rb_iseq_base_label(iseq),
87488749
rb_iseq_path(iseq), rb_iseq_realpath(iseq),
87498750
nd_line(line_node), NULL, 0,
8750-
ISEQ_TYPE_METHOD, ISEQ_COMPILE_DATA(iseq)->option);
8751+
ISEQ_TYPE_METHOD, ISEQ_COMPILE_DATA(iseq)->option,
8752+
ISEQ_BODY(iseq)->variable.script_lines);
87518753

87528754
ALLOCV_END(idtmp);
87538755
return COMPILE_OK;

imemo.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ rb_imemo_mark_and_move(VALUE obj, bool reference_updating)
274274
{
275275
switch (imemo_type(obj)) {
276276
case imemo_ast:
277-
rb_ast_mark_and_move((rb_ast_t *)obj, reference_updating);
277+
// TODO: Make AST decoupled from IMEMO
278278

279279
break;
280280
case imemo_callcache: {

internal/parse.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ size_t rb_ruby_parser_memsize(const void *ptr);
5151

5252
void rb_ruby_parser_set_options(rb_parser_t *p, int print, int loop, int chomp, int split);
5353
rb_parser_t *rb_ruby_parser_set_context(rb_parser_t *p, const struct rb_iseq_struct *base, int main);
54-
void rb_ruby_parser_set_script_lines(rb_parser_t *p, VALUE lines_array);
54+
void rb_ruby_parser_set_script_lines(rb_parser_t *p);
5555
void rb_ruby_parser_error_tolerant(rb_parser_t *p);
5656
rb_ast_t* rb_ruby_parser_compile_file_path(rb_parser_t *p, VALUE fname, VALUE file, int start);
5757
void rb_ruby_parser_keep_tokens(rb_parser_t *p);

internal/ruby_parser.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,11 @@ RUBY_SYMBOL_EXPORT_END
3939
VALUE rb_parser_end_seen_p(VALUE);
4040
VALUE rb_parser_encoding(VALUE);
4141
VALUE rb_parser_set_yydebug(VALUE, VALUE);
42+
VALUE rb_parser_build_script_lines_from(rb_parser_ary_t *script_lines);
43+
void rb_parser_aset_script_lines_for(VALUE path, rb_parser_ary_t *script_lines);
4244
void rb_parser_set_options(VALUE, int, int, int, int);
4345
void *rb_parser_load_file(VALUE parser, VALUE name);
44-
void rb_parser_set_script_lines(VALUE vparser, VALUE lines_array);
46+
void rb_parser_set_script_lines(VALUE vparser);
4547
void rb_parser_error_tolerant(VALUE vparser);
4648
void rb_parser_keep_tokens(VALUE vparser);
4749

iseq.c

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -839,20 +839,21 @@ rb_iseq_new(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath,
839839
const rb_iseq_t *parent, enum rb_iseq_type type)
840840
{
841841
return rb_iseq_new_with_opt(ast, name, path, realpath, 0, parent,
842-
0, type, &COMPILE_OPTION_DEFAULT);
842+
0, type, &COMPILE_OPTION_DEFAULT,
843+
Qnil);
843844
}
844845

845846
static int
846847
ast_line_count(const rb_ast_body_t *ast)
847848
{
848-
if (ast->script_lines == Qfalse) {
849+
if (ast->script_lines == NULL) {
849850
// this occurs when failed to parse the source code with a syntax error
850851
return 0;
851852
}
852-
if (RB_TYPE_P(ast->script_lines, T_ARRAY)){
853-
return (int)RARRAY_LEN(ast->script_lines);
853+
if (!FIXNUM_P((VALUE)ast->script_lines)) {
854+
return (int)ast->script_lines->len;
854855
}
855-
return FIX2INT(ast->script_lines);
856+
return FIX2INT((VALUE)ast->script_lines);
856857
}
857858

858859
static VALUE
@@ -888,7 +889,8 @@ rb_iseq_new_top(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath
888889
iseq_new_setup_coverage(path, ast, 0);
889890

890891
return rb_iseq_new_with_opt(ast, name, path, realpath, 0, parent, 0,
891-
ISEQ_TYPE_TOP, &COMPILE_OPTION_DEFAULT);
892+
ISEQ_TYPE_TOP, &COMPILE_OPTION_DEFAULT,
893+
Qnil);
892894
}
893895

894896
/**
@@ -910,7 +912,8 @@ rb_iseq_new_main(const rb_ast_body_t *ast, VALUE path, VALUE realpath, const rb_
910912

911913
return rb_iseq_new_with_opt(ast, rb_fstring_lit("<main>"),
912914
path, realpath, 0,
913-
parent, 0, ISEQ_TYPE_MAIN, opt ? &COMPILE_OPTION_DEFAULT : &COMPILE_OPTION_FALSE);
915+
parent, 0, ISEQ_TYPE_MAIN, opt ? &COMPILE_OPTION_DEFAULT : &COMPILE_OPTION_FALSE,
916+
Qnil);
914917
}
915918

916919
/**
@@ -938,7 +941,8 @@ rb_iseq_new_eval(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpat
938941
}
939942

940943
return rb_iseq_new_with_opt(ast, name, path, realpath, first_lineno,
941-
parent, isolated_depth, ISEQ_TYPE_EVAL, &COMPILE_OPTION_DEFAULT);
944+
parent, isolated_depth, ISEQ_TYPE_EVAL, &COMPILE_OPTION_DEFAULT,
945+
Qnil);
942946
}
943947

944948
rb_iseq_t *
@@ -966,7 +970,8 @@ iseq_translate(rb_iseq_t *iseq)
966970
rb_iseq_t *
967971
rb_iseq_new_with_opt(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE realpath,
968972
int first_lineno, const rb_iseq_t *parent, int isolated_depth,
969-
enum rb_iseq_type type, const rb_compile_option_t *option)
973+
enum rb_iseq_type type, const rb_compile_option_t *option,
974+
VALUE script_lines)
970975
{
971976
const NODE *node = ast ? ast->root : 0;
972977
/* TODO: argument check */
@@ -979,10 +984,11 @@ rb_iseq_new_with_opt(const rb_ast_body_t *ast, VALUE name, VALUE path, VALUE rea
979984
option = set_compile_option_from_ast(&new_opt, ast);
980985
}
981986

982-
VALUE script_lines = Qnil;
983-
984-
if (ast && !FIXNUM_P(ast->script_lines) && ast->script_lines) {
985-
script_lines = ast->script_lines;
987+
if (!NIL_P(script_lines)) {
988+
// noop
989+
}
990+
else if (ast && !FIXNUM_P((VALUE)ast->script_lines) && ast->script_lines) {
991+
script_lines = rb_parser_build_script_lines_from(ast->script_lines);
986992
}
987993
else if (parent) {
988994
script_lines = ISEQ_BODY(parent)->variable.script_lines;
@@ -1225,7 +1231,7 @@ rb_iseq_compile_with_option(VALUE src, VALUE file, VALUE realpath, VALUE line, V
12251231
const rb_iseq_t *outer_scope = rb_iseq_new(NULL, name, name, Qnil, 0, ISEQ_TYPE_TOP);
12261232
VALUE outer_scope_v = (VALUE)outer_scope;
12271233
rb_parser_set_context(parser, outer_scope, FALSE);
1228-
rb_parser_set_script_lines(parser, RBOOL(ruby_vm_keep_script_lines));
1234+
if (ruby_vm_keep_script_lines) rb_parser_set_script_lines(parser);
12291235
RB_GC_GUARD(outer_scope_v);
12301236
ast = (*parse)(parser, file, src, ln);
12311237
}
@@ -1236,7 +1242,8 @@ rb_iseq_compile_with_option(VALUE src, VALUE file, VALUE realpath, VALUE line, V
12361242
}
12371243
else {
12381244
iseq = rb_iseq_new_with_opt(&ast->body, name, file, realpath, ln,
1239-
NULL, 0, ISEQ_TYPE_TOP, &option);
1245+
NULL, 0, ISEQ_TYPE_TOP, &option,
1246+
Qnil);
12401247
rb_ast_dispose(ast);
12411248
}
12421249

@@ -1627,7 +1634,8 @@ iseqw_s_compile_file(int argc, VALUE *argv, VALUE self)
16271634
ret = iseqw_new(rb_iseq_new_with_opt(&ast->body, rb_fstring_lit("<main>"),
16281635
file,
16291636
rb_realpath_internal(Qnil, file, 1),
1630-
1, NULL, 0, ISEQ_TYPE_TOP, &option));
1637+
1, NULL, 0, ISEQ_TYPE_TOP, &option,
1638+
Qnil));
16311639
rb_ast_dispose(ast);
16321640

16331641
rb_vm_pop_frame(ec);

mini_builtin.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ builtin_iseq_load(const char *feature_name, const struct rb_builtin_function *ta
3939
.coverage_enabled = FALSE,
4040
.debug_level = 0,
4141
};
42-
const rb_iseq_t *iseq = rb_iseq_new_with_opt(&ast->body, name_str, name_str, Qnil, 0, NULL, 0, ISEQ_TYPE_TOP, &optimization);
42+
const rb_iseq_t *iseq = rb_iseq_new_with_opt(&ast->body, name_str, name_str, Qnil, 0, NULL, 0, ISEQ_TYPE_TOP, &optimization, Qnil);
4343
GET_VM()->builtin_function_table = NULL;
4444

4545
rb_ast_dispose(ast);

node.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,13 @@
2020

2121
#include "internal.h"
2222
#include "internal/hash.h"
23-
#include "internal/variable.h"
2423
#include "ruby/ruby.h"
2524
#include "vm_core.h"
2625

2726
#endif
2827

28+
#include "internal/variable.h"
29+
2930
#define NODE_BUF_DEFAULT_SIZE (sizeof(struct RNode) * 16)
3031

3132
static void
@@ -344,18 +345,24 @@ iterate_node_values(rb_ast_t *ast, node_buffer_list_t *nb, node_itr_t * func, vo
344345
}
345346
}
346347

347-
void
348-
rb_ast_mark_and_move(rb_ast_t *ast, bool reference_updating)
348+
static void
349+
script_lines_free(rb_ast_t *ast, rb_parser_ary_t *script_lines)
349350
{
350-
if (ast->node_buffer) {
351-
if (ast->body.script_lines) rb_gc_mark_and_move(&ast->body.script_lines);
351+
for (long i = 0; i < script_lines->len; i++) {
352+
parser_string_free(ast, (rb_parser_string_t *)script_lines->data[i]);
352353
}
354+
xfree(script_lines->data);
355+
xfree(script_lines);
353356
}
354357

355358
void
356359
rb_ast_free(rb_ast_t *ast)
357360
{
358361
if (ast->node_buffer) {
362+
if (ast->body.script_lines && !FIXNUM_P((VALUE)ast->body.script_lines)) {
363+
script_lines_free(ast, ast->body.script_lines);
364+
ast->body.script_lines = NULL;
365+
}
359366
rb_node_buffer_free(ast, ast->node_buffer);
360367
ast->node_buffer = 0;
361368
}

node.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@ void rb_ast_dispose(rb_ast_t*);
5656
const char *ruby_node_name(int node);
5757
void rb_node_init(NODE *n, enum node_type type);
5858

59-
void rb_ast_mark_and_move(rb_ast_t *ast, bool reference_updating);
6059
void rb_ast_update_references(rb_ast_t*);
6160
void rb_ast_free(rb_ast_t*);
6261
NODE *rb_ast_newnode(rb_ast_t*, enum node_type type, size_t size, size_t alignment);

0 commit comments

Comments
 (0)