Skip to content

Commit

Permalink
gc: improve mallocarrays locality
Browse files Browse the repository at this point in the history
small_arraylist_t has much better memory locality and space utilization
than a linked list with individually malloc'd elements
  • Loading branch information
vtjnash committed Dec 12, 2024
1 parent 9118ea7 commit ed028da
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 46 deletions.
13 changes: 2 additions & 11 deletions src/gc-common.c
Original file line number Diff line number Diff line change
Expand Up @@ -557,17 +557,8 @@ size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT
// tracking Memorys with malloc'd storage
void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){
// This is **NOT** a GC safe point.
mallocmemory_t *ma;
if (ptls->gc_tls_common.heap.mafreelist == NULL) {
ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t));
}
else {
ma = ptls->gc_tls_common.heap.mafreelist;
ptls->gc_tls_common.heap.mafreelist = ma->next;
}
ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned);
ma->next = ptls->gc_tls_common.heap.mallocarrays;
ptls->gc_tls_common.heap.mallocarrays = ma;
void *a = (void*)((uintptr_t)m | !!isaligned);
small_arraylist_push(&ptls->gc_tls_common.heap.mallocarrays, a);
}

// =========================================================================== //
Expand Down
6 changes: 0 additions & 6 deletions src/gc-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,6 @@ extern jl_gc_callback_list_t *gc_cblist_notify_gc_pressure;
// malloc wrappers, aligned allocation
// =========================================================================== //

// data structure for tracking malloc'd genericmemory.
typedef struct _mallocmemory_t {
jl_genericmemory_t *a; // lowest bit is tagged if this is aligned memory
struct _mallocmemory_t *next;
} mallocmemory_t;

#if defined(_OS_WINDOWS_)
STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align)
{
Expand Down
10 changes: 4 additions & 6 deletions src/gc-debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -1025,12 +1025,11 @@ void gc_stats_big_obj(void)
v = v->next;
}

mallocmemory_t *ma = ptls2->gc_tls.heap.mallocarrays;
while (ma != NULL) {
uint8_t bits =jl_astaggedvalue(ma->a)->bits.gc;
void **lst = ptls2->gc_tls.heap.mallocarrays.items;
for (size_t i = 0, l = ptls2->gc_tls.heap.mallocarrays.len; i < l; i++) {
jl_genericmemory_t *m = (jl_genericmemory_t*)((uintptr_t)lst[i] & ~(uintptr_t)1);
uint8_t bits = jl_astaggedvalue(m)->bits.gc;
if (gc_marked(bits)) {
jl_genericmemory_t *m = (jl_genericmemory_t*)ma->a;
m = (jl_genericmemory_t*)((uintptr_t)m & ~(uintptr_t)1);
size_t sz = jl_genericmemory_nbytes(m);
if (gc_old(bits)) {
assert(bits == GC_OLD_MARKED);
Expand All @@ -1042,7 +1041,6 @@ void gc_stats_big_obj(void)
stat.nbytes_used += sz;
}
}
ma = ma->next;
}
}
jl_safe_printf("%lld kB (%lld%% old) in %lld large objects (%lld%% old)\n",
Expand Down
36 changes: 16 additions & 20 deletions src/gc-stock.c
Original file line number Diff line number Diff line change
Expand Up @@ -629,10 +629,9 @@ void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT
reset_thread_gc_counts();
}

static void jl_gc_free_memory(jl_value_t *v, int isaligned) JL_NOTSAFEPOINT
static void jl_gc_free_memory(jl_genericmemory_t *m, int isaligned) JL_NOTSAFEPOINT
{
assert(jl_is_genericmemory(v));
jl_genericmemory_t *m = (jl_genericmemory_t*)v;
assert(jl_is_genericmemory(m));
assert(jl_genericmemory_how(m) == 1 || jl_genericmemory_how(m) == 2);
char *d = (char*)m->ptr;
size_t freed_bytes = memory_block_usable_size(d, isaligned);
Expand All @@ -654,25 +653,23 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 != NULL) {
mallocmemory_t *ma = ptls2->gc_tls_common.heap.mallocarrays;
mallocmemory_t **pma = &ptls2->gc_tls_common.heap.mallocarrays;
while (ma != NULL) {
mallocmemory_t *nxt = ma->next;
jl_value_t *a = (jl_value_t*)((uintptr_t)ma->a & ~1);
int bits = jl_astaggedvalue(a)->bits.gc;
if (gc_marked(bits)) {
pma = &ma->next;
size_t n = 0;
size_t l = ptls2->gc_tls_common.heap.mallocarrays.len;
void **lst = ptls2->gc_tls_common.heap.mallocarrays.items;
// filter without preserving order
while (n < l) {
jl_genericmemory_t *m = (jl_genericmemory_t*)((uintptr_t)lst[n] & ~1);
if (gc_marked(jl_astaggedvalue(m)->bits.gc)) {
n++;
}
else {
*pma = nxt;
int isaligned = (uintptr_t)ma->a & 1;
jl_gc_free_memory(a, isaligned);
ma->next = ptls2->gc_tls_common.heap.mafreelist;
ptls2->gc_tls_common.heap.mafreelist = ma;
int isaligned = (uintptr_t)lst[n] & 1;
jl_gc_free_memory(m, isaligned);
l--;
lst[n] = lst[l];
}
gc_time_count_mallocd_memory(bits);
ma = nxt;
}
ptls2->gc_tls_common.heap.mallocarrays.len = l;
}
}
gc_time_mallocd_memory_end();
Expand Down Expand Up @@ -3439,8 +3436,7 @@ void jl_init_thread_heap(jl_ptls_t ptls)
small_arraylist_new(&common_heap->live_tasks, 0);
for (int i = 0; i < JL_N_STACK_POOLS; i++)
small_arraylist_new(&common_heap->free_stacks[i], 0);
common_heap->mallocarrays = NULL;
common_heap->mafreelist = NULL;
small_arraylist_new(&common_heap->mallocarrays, 0);
heap->young_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel
assert(gc_bigval_sentinel_tag != 0); // make sure the sentinel is initialized
heap->young_generation_of_bigvals->header = gc_bigval_sentinel_tag;
Expand Down
5 changes: 2 additions & 3 deletions src/gc-tls-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,8 @@ typedef struct {
// that are holding onto a stack from the pool
small_arraylist_t live_tasks;

// variables for tracking malloc'd arrays
struct _mallocmemory_t *mallocarrays;
struct _mallocmemory_t *mafreelist;
// variable for tracking malloc'd arrays
small_arraylist_t mallocarrays;

#define JL_N_STACK_POOLS 16
small_arraylist_t free_stacks[JL_N_STACK_POOLS];
Expand Down

0 comments on commit ed028da

Please sign in to comment.