Skip to content

Commit

Permalink
add gc roots and gc finalist roots to fix unrooted nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
JianFangAtRai committed Dec 21, 2023
1 parent dad5588 commit 1639884
Show file tree
Hide file tree
Showing 4 changed files with 184 additions and 14 deletions.
140 changes: 134 additions & 6 deletions src/gc-heap-snapshot.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@
#include <vector>
#include <string>
#include <sstream>
#include <iostream>
#include <set>

using std::vector;
using std::string;
using std::set;
using std::ostringstream;
using std::pair;
using std::make_pair;
Expand Down Expand Up @@ -70,7 +73,7 @@ struct Node {
size_t id; // This should be a globally-unique counter, but we use the memory address
size_t self_size;
size_t trace_node_id; // This is ALWAYS 0 in Javascript heap-snapshots.
// whether the from_node is attached or dettached from the main application state
// whether the from_node is attached or detached from the main application state
// https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/include/v8-profiler.h#L739-L745
int detachedness; // 0 - unknown, 1 - attached, 2 - detached
vector<Edge> edges;
Expand Down Expand Up @@ -115,6 +118,8 @@ struct HeapSnapshot {
DenseMap<void *, size_t> node_ptr_to_index_map;

size_t num_edges = 0; // For metadata, updated as you add each edge. Needed because edges owned by nodes.
size_t _gc_root_idx = 1; // node index of the GC roots node
size_t _gc_finlist_root_idx = 2; // node index of the GC finlist roots node
};

// global heap snapshot, mutated by garbage collector
Expand All @@ -127,13 +132,13 @@ void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one
static inline void _record_gc_edge(const char *edge_type,
jl_value_t *a, jl_value_t *b, size_t name_or_index) JL_NOTSAFEPOINT;
void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT;
void _add_internal_root(HeapSnapshot *snapshot);
void _add_synthetic_root_entries(HeapSnapshot *snapshot);


JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one)
{
HeapSnapshot snapshot;
_add_internal_root(&snapshot);
_add_synthetic_root_entries(&snapshot);

jl_mutex_lock(&heapsnapshot_lock);

Expand All @@ -155,10 +160,12 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one)
serialize_heap_snapshot((ios_t*)stream, snapshot, all_one);
}

// adds a node at id 0 which is the "uber root":
// a synthetic node which points to all the GC roots.
void _add_internal_root(HeapSnapshot *snapshot)
// mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L212
// add synthetic nodes for the uber root, the GC roots, and the GC finalizer list roots
void _add_synthetic_root_entries(HeapSnapshot *snapshot)
{
// adds a node at id 0 which is the "uber root":
// a synthetic node which points to all the GC roots.
Node internal_root{
snapshot->node_types.find_or_create_string_id("synthetic"),
snapshot->names.find_or_create_string_id(""), // name
Expand All @@ -169,6 +176,44 @@ void _add_internal_root(HeapSnapshot *snapshot)
vector<Edge>() // outgoing edges
};
snapshot->nodes.push_back(internal_root);

// Add a node for the GC roots
snapshot->_gc_root_idx = snapshot->nodes.size();
Node gc_roots{
snapshot->node_types.find_or_create_string_id("synthetic"),
snapshot->names.find_or_create_string_id("GC roots"), // name
snapshot->_gc_root_idx, // id
0, // size
0, // size_t trace_node_id (unused)
0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached
vector<Edge>() // outgoing edges
};
snapshot->nodes.push_back(gc_roots);
snapshot->nodes.front().edges.push_back(Edge{
snapshot->edge_types.find_or_create_string_id("internal"),
snapshot->names.find_or_create_string_id("GC roots"), // edge label
snapshot->_gc_root_idx // to
});
snapshot->num_edges += 1;

// add a node for the gc finalizer list roots
snapshot->_gc_finlist_root_idx = snapshot->nodes.size();
Node gc_finlist_roots{
snapshot->node_types.find_or_create_string_id("synthetic"),
snapshot->names.find_or_create_string_id("GC finlist roots"), // name
snapshot->_gc_finlist_root_idx, // id
0, // size
0, // size_t trace_node_id (unused)
0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached
vector<Edge>() // outgoing edges
};
snapshot->nodes.push_back(gc_finlist_roots);
snapshot->nodes.front().edges.push_back(Edge{
snapshot->edge_types.find_or_create_string_id("internal"),
snapshot->names.find_or_create_string_id("GC finlist roots"), // edge label
snapshot->_gc_finlist_root_idx // to
});
snapshot->num_edges += 1;
}

// mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L597-L597
Expand Down Expand Up @@ -326,6 +371,26 @@ void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT
_record_gc_just_edge("internal", internal_root, to_node_idx, edge_label);
}

void _gc_heap_snapshot_record_gc_roots(jl_value_t *root, char *name) JL_NOTSAFEPOINT
{
record_node_to_gc_snapshot(root);

auto from_node_idx = g_snapshot->_gc_root_idx;
auto to_node_idx = record_node_to_gc_snapshot(root);
auto edge_label = g_snapshot->names.find_or_create_string_id(name);
_record_gc_just_edge("internal", g_snapshot->nodes[from_node_idx], to_node_idx, edge_label);
}

void _gc_heap_snapshot_record_finlist(jl_value_t *obj, size_t index) JL_NOTSAFEPOINT
{
auto from_node_idx = g_snapshot->_gc_finlist_root_idx;
auto to_node_idx = record_node_to_gc_snapshot(obj);
ostringstream ss;
ss << "finlist-" << index;
auto edge_label = g_snapshot->names.find_or_create_string_id(ss.str());
_record_gc_just_edge("internal", g_snapshot->nodes[from_node_idx], to_node_idx, edge_label);
}

// Add a node to the heap snapshot representing a Julia stack frame.
// Each task points at a stack frame, which points at the stack frame of
// the function it's currently calling, forming a linked list.
Expand Down Expand Up @@ -468,6 +533,28 @@ void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx,
g_snapshot->num_edges += 1;
}

template <typename T>
std::string to_json(const std::set<T>& set) {
std::stringstream ss;
ss << "[";

bool first_element = true;
for (const auto& element : set) {
if (!first_element) {
ss << ",";
}
first_element = false;

ss << "\"" << element << "\"";
}
ss << "]";
return ss.str();
}

std::string get_string(StringTable &table, size_t id) {
return table.strings[id].str();
}

void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one)
{
// mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2567-L2567
Expand All @@ -490,6 +577,8 @@ void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one

ios_printf(stream, "\"nodes\":[");
bool first_node = true;
// use a set to track the nodes that do not have parents
set<size_t> orphans;
for (const auto &from_node : snapshot.nodes) {
if (first_node) {
first_node = false;
Expand All @@ -506,6 +595,14 @@ void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one
from_node.edges.size(),
from_node.trace_node_id,
from_node.detachedness);
if (from_node.id != snapshot._gc_root_idx && from_node.id != snapshot._gc_finlist_root_idx) {
// find the node index from the node object pointer
void * ptr = (void*)from_node.id;
size_t n_id = snapshot.node_ptr_to_index_map[ptr];
orphans.insert(n_id);
} else {
orphans.insert(from_node.id);
}
}
ios_printf(stream, "],\n");

Expand All @@ -523,6 +620,12 @@ void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one
edge.type,
edge.name_or_index,
edge.to_node * k_node_number_of_fields);
auto n_id = edge.to_node;
auto it = orphans.find(n_id);
if (it != orphans.end()) {
// remove the node from the orphans if it has at least one incoming edge
orphans.erase(it);
}
}
}
ios_printf(stream, "],\n"); // end "edges"
Expand All @@ -532,4 +635,29 @@ void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one
snapshot.names.print_json_array(stream, true);

ios_printf(stream, "}");

// remove the uber node from the orphans
orphans.erase(0);
// print out the orphans in case that we have any
std::cout << "node count: " << snapshot.nodes.size() << "\n";
std::cout << "edge count: " << snapshot.num_edges << "\n";
std::cout << "orphan node count: " << orphans.size() << "\n";
std::cout << "orphan nodes: " << to_json(orphans) << "\n";
for (const auto &from_node : snapshot.nodes) {
size_t n_id = from_node.id;
if (from_node.id != snapshot._gc_root_idx && from_node.id != snapshot._gc_finlist_root_idx) {
void * ptr = (void*)from_node.id;
n_id = snapshot.node_ptr_to_index_map[ptr];
}
if (orphans.find(n_id) != orphans.end()) {
std::cout << "orphan node: {type:(" << from_node.type << "," << get_string(snapshot.node_types, from_node.type) << ")"
<< ", name:(" << from_node.name << "," << get_string(snapshot.names, from_node.name) << ")"
<< ", id:(" << std::showbase << std::hex << from_node.id << "," << std::dec << n_id << ")"
<< ", self_size:" << (all_one ? (size_t)1 : from_node.self_size)
<< ", edge_count:" << from_node.edges.size()
<< ", trace_node_id:" << from_node.trace_node_id
<< ", detachedness:" << from_node.detachedness
<< "}\n";
}
}
}
25 changes: 24 additions & 1 deletion src/gc-heap-snapshot.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@ void _gc_heap_snapshot_record_internal_array_edge(jl_value_t *from, jl_value_t *
// Used for objects manually allocated in C (outside julia GC), to still tell the heap snapshot about the
// size of the object, even though we're never going to mark that object.
void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t bytes, uint16_t alloc_type) JL_NOTSAFEPOINT;

// Used for objects that are reachable from the GC roots
void _gc_heap_snapshot_record_gc_roots(jl_value_t *root, char *name) JL_NOTSAFEPOINT;
// Used for objects that are reachable from the finalizer list
void _gc_heap_snapshot_record_finlist(jl_value_t *finlist, size_t index) JL_NOTSAFEPOINT;

extern int gc_heap_snapshot_enabled;
extern int prev_sweep_full;
Expand Down Expand Up @@ -60,6 +63,12 @@ static inline void gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL
_gc_heap_snapshot_record_root(root, name);
}
}
static inline void gc_heap_snapshot_record_array_edge_index(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT
{
if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full && from != NULL && to != NULL)) {
_gc_heap_snapshot_record_array_edge(from, to, index);
}
}
static inline void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t **to) JL_NOTSAFEPOINT
{
if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) {
Expand Down Expand Up @@ -94,6 +103,20 @@ static inline void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* t
}
}

static inline void gc_heap_snapshot_record_gc_roots(jl_value_t *root, char *name) JL_NOTSAFEPOINT
{
if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full && root != NULL)) {
_gc_heap_snapshot_record_gc_roots(root, name);
}
}

static inline void gc_heap_snapshot_record_finlist(jl_value_t *finlist, size_t index) JL_NOTSAFEPOINT
{
if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full && finlist != NULL)) {
_gc_heap_snapshot_record_finlist(finlist, index);
}
}

// ---------------------------------------------------------------------
// Functions to call from Julia to take heap snapshot
// ---------------------------------------------------------------------
Expand Down
31 changes: 25 additions & 6 deletions src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2352,9 +2352,10 @@ STATIC_INLINE void gc_mark_chunk(jl_ptls_t ptls, jl_gc_markqueue_t *mq, jl_gc_ch
break;
}
case GC_finlist_chunk: {
jl_value_t *fl_parent = c->parent;
jl_value_t **fl_begin = c->begin;
jl_value_t **fl_end = c->end;
gc_mark_finlist_(mq, fl_begin, fl_end);
gc_mark_finlist_(mq, fl_parent, fl_begin, fl_end);
break;
}
default: {
Expand Down Expand Up @@ -2491,7 +2492,7 @@ STATIC_INLINE void gc_mark_module_binding(jl_ptls_t ptls, jl_module_t *mb_parent
}
}

void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t **fl_end)
void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t *fl_parent, jl_value_t **fl_begin, jl_value_t **fl_end)
{
jl_value_t *new_obj;
// Decide whether need to chunk finlist
Expand All @@ -2501,8 +2502,10 @@ void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t *
gc_chunkqueue_push(mq, &c);
fl_end = fl_begin + GC_CHUNK_BATCH_SIZE;
}
size_t i = 0;
for (; fl_begin < fl_end; fl_begin++) {
new_obj = *fl_begin;
jl_value_t **slot = fl_begin;
new_obj = *slot;
if (__unlikely(!new_obj))
continue;
if (gc_ptr_tag(new_obj, 1)) {
Expand All @@ -2513,6 +2516,13 @@ void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t *
if (gc_ptr_tag(new_obj, 2))
continue;
gc_try_claim_and_push(mq, new_obj, NULL);
if (fl_parent != NULL) {
gc_heap_snapshot_record_array_edge(fl_parent, slot);
} else {
// This is a list of objects following the same format as a finlist
// if `fl_parent` is NULL
gc_heap_snapshot_record_finlist(new_obj, ++i);
}
}
}

Expand All @@ -2524,7 +2534,7 @@ void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, size_t start)
return;
jl_value_t **fl_begin = (jl_value_t **)list->items + start;
jl_value_t **fl_end = (jl_value_t **)list->items + len;
gc_mark_finlist_(mq, fl_begin, fl_end);
gc_mark_finlist_(mq, NULL, fl_begin, fl_end);
}

JL_DLLEXPORT int jl_gc_mark_queue_obj(jl_ptls_t ptls, jl_value_t *obj)
Expand Down Expand Up @@ -3187,27 +3197,36 @@ static void gc_mark_roots(jl_gc_markqueue_t *mq)
{
// modules
gc_try_claim_and_push(mq, jl_main_module, NULL);
gc_heap_snapshot_record_root((jl_value_t*)jl_main_module, "main_module");
gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_main_module, "main_module");
// invisible builtin values
gc_try_claim_and_push(mq, jl_an_empty_vec_any, NULL);
gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_an_empty_vec_any, "an_empty_vec_any");
gc_try_claim_and_push(mq, jl_module_init_order, NULL);
gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_module_init_order, "module_init_order");
for (size_t i = 0; i < jl_current_modules.size; i += 2) {
if (jl_current_modules.table[i + 1] != HT_NOTFOUND) {
gc_try_claim_and_push(mq, jl_current_modules.table[i], NULL);
gc_heap_snapshot_record_root((jl_value_t*)jl_current_modules.table[i], "top level module");
gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_current_modules.table[i], "top level module");
}
}
gc_try_claim_and_push(mq, jl_anytuple_type_type, NULL);
gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_anytuple_type_type, "anytuple_type_type");
for (size_t i = 0; i < N_CALL_CACHE; i++) {
jl_typemap_entry_t *v = jl_atomic_load_relaxed(&call_cache[i]);
gc_try_claim_and_push(mq, v, NULL);
gc_heap_snapshot_record_array_edge_index((jl_value_t*)jl_anytuple_type_type, (jl_value_t*)v, i);
}
gc_try_claim_and_push(mq, jl_all_methods, NULL);
gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_all_methods, "all_methods");
gc_try_claim_and_push(mq, _jl_debug_method_invalidation, NULL);
gc_heap_snapshot_record_gc_roots((jl_value_t*)_jl_debug_method_invalidation, "debug_method_invalidation");
// constants
gc_try_claim_and_push(mq, jl_emptytuple_type, NULL);
gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_emptytuple_type, "emptytuple_type");
gc_try_claim_and_push(mq, cmpswap_names, NULL);
gc_heap_snapshot_record_gc_roots((jl_value_t*)cmpswap_names, "cmpswap_names");
gc_try_claim_and_push(mq, jl_global_roots_table, NULL);
gc_heap_snapshot_record_gc_roots((jl_value_t*)jl_global_roots_table, "global_roots_table");
}

// find unmarked objects that need to be finalized from the finalizer list "list".
Expand Down
2 changes: 1 addition & 1 deletion src/gc.h
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ extern uv_cond_t gc_threads_cond;
extern _Atomic(int) gc_n_threads_marking;
extern _Atomic(int) gc_n_threads_sweeping;
void gc_mark_queue_all_roots(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t **fl_end) JL_NOTSAFEPOINT;
void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t *fl_parent, jl_value_t **fl_begin, jl_value_t **fl_end) JL_NOTSAFEPOINT;
void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, size_t start) JL_NOTSAFEPOINT;
void gc_mark_loop_serial_(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
void gc_mark_loop_serial(jl_ptls_t ptls);
Expand Down

0 comments on commit 1639884

Please sign in to comment.