Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gc: improve mallocarrays locality #56801

Merged
merged 1 commit into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 2 additions & 11 deletions src/gc-common.c
Original file line number Diff line number Diff line change
Expand Up @@ -557,17 +557,8 @@ size_t jl_genericmemory_nbytes(jl_genericmemory_t *m) JL_NOTSAFEPOINT
// tracking Memorys with malloc'd storage
void jl_gc_track_malloced_genericmemory(jl_ptls_t ptls, jl_genericmemory_t *m, int isaligned){
// This is **NOT** a GC safe point.
mallocmemory_t *ma;
if (ptls->gc_tls_common.heap.mafreelist == NULL) {
ma = (mallocmemory_t*)malloc_s(sizeof(mallocmemory_t));
}
else {
ma = ptls->gc_tls_common.heap.mafreelist;
ptls->gc_tls_common.heap.mafreelist = ma->next;
}
ma->a = (jl_genericmemory_t*)((uintptr_t)m | !!isaligned);
ma->next = ptls->gc_tls_common.heap.mallocarrays;
ptls->gc_tls_common.heap.mallocarrays = ma;
void *a = (void*)((uintptr_t)m | !!isaligned);
small_arraylist_push(&ptls->gc_tls_common.heap.mallocarrays, a);
}

// =========================================================================== //
Expand Down
6 changes: 0 additions & 6 deletions src/gc-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,6 @@ extern jl_gc_callback_list_t *gc_cblist_notify_gc_pressure;
// malloc wrappers, aligned allocation
// =========================================================================== //

// data structure for tracking malloc'd genericmemory.
typedef struct _mallocmemory_t {
jl_genericmemory_t *a; // lowest bit is tagged if this is aligned memory
struct _mallocmemory_t *next;
} mallocmemory_t;

#if defined(_OS_WINDOWS_)
STATIC_INLINE void *jl_malloc_aligned(size_t sz, size_t align)
{
Expand Down
10 changes: 4 additions & 6 deletions src/gc-debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -1025,12 +1025,11 @@ void gc_stats_big_obj(void)
v = v->next;
}

mallocmemory_t *ma = ptls2->gc_tls.heap.mallocarrays;
while (ma != NULL) {
uint8_t bits =jl_astaggedvalue(ma->a)->bits.gc;
void **lst = ptls2->gc_tls.heap.mallocarrays.items;
for (size_t i = 0, l = ptls2->gc_tls.heap.mallocarrays.len; i < l; i++) {
jl_genericmemory_t *m = (jl_genericmemory_t*)((uintptr_t)lst[i] & ~(uintptr_t)1);
uint8_t bits = jl_astaggedvalue(m)->bits.gc;
if (gc_marked(bits)) {
jl_genericmemory_t *m = (jl_genericmemory_t*)ma->a;
m = (jl_genericmemory_t*)((uintptr_t)m & ~(uintptr_t)1);
size_t sz = jl_genericmemory_nbytes(m);
if (gc_old(bits)) {
assert(bits == GC_OLD_MARKED);
Expand All @@ -1042,7 +1041,6 @@ void gc_stats_big_obj(void)
stat.nbytes_used += sz;
}
}
ma = ma->next;
}
}
jl_safe_printf("%lld kB (%lld%% old) in %lld large objects (%lld%% old)\n",
Expand Down
36 changes: 16 additions & 20 deletions src/gc-stock.c
Original file line number Diff line number Diff line change
Expand Up @@ -629,10 +629,9 @@ void jl_gc_reset_alloc_count(void) JL_NOTSAFEPOINT
reset_thread_gc_counts();
}

static void jl_gc_free_memory(jl_value_t *v, int isaligned) JL_NOTSAFEPOINT
static void jl_gc_free_memory(jl_genericmemory_t *m, int isaligned) JL_NOTSAFEPOINT
{
assert(jl_is_genericmemory(v));
jl_genericmemory_t *m = (jl_genericmemory_t*)v;
assert(jl_is_genericmemory(m));
assert(jl_genericmemory_how(m) == 1 || jl_genericmemory_how(m) == 2);
char *d = (char*)m->ptr;
size_t freed_bytes = memory_block_usable_size(d, isaligned);
Expand All @@ -654,25 +653,23 @@ static void sweep_malloced_memory(void) JL_NOTSAFEPOINT
for (int t_i = 0; t_i < gc_n_threads; t_i++) {
jl_ptls_t ptls2 = gc_all_tls_states[t_i];
if (ptls2 != NULL) {
mallocmemory_t *ma = ptls2->gc_tls_common.heap.mallocarrays;
mallocmemory_t **pma = &ptls2->gc_tls_common.heap.mallocarrays;
while (ma != NULL) {
mallocmemory_t *nxt = ma->next;
jl_value_t *a = (jl_value_t*)((uintptr_t)ma->a & ~1);
int bits = jl_astaggedvalue(a)->bits.gc;
if (gc_marked(bits)) {
pma = &ma->next;
size_t n = 0;
size_t l = ptls2->gc_tls_common.heap.mallocarrays.len;
void **lst = ptls2->gc_tls_common.heap.mallocarrays.items;
// filter without preserving order
while (n < l) {
jl_genericmemory_t *m = (jl_genericmemory_t*)((uintptr_t)lst[n] & ~1);
if (gc_marked(jl_astaggedvalue(m)->bits.gc)) {
n++;
}
else {
*pma = nxt;
int isaligned = (uintptr_t)ma->a & 1;
jl_gc_free_memory(a, isaligned);
ma->next = ptls2->gc_tls_common.heap.mafreelist;
ptls2->gc_tls_common.heap.mafreelist = ma;
int isaligned = (uintptr_t)lst[n] & 1;
jl_gc_free_memory(m, isaligned);
l--;
lst[n] = lst[l];
}
gc_time_count_mallocd_memory(bits);
ma = nxt;
}
ptls2->gc_tls_common.heap.mallocarrays.len = l;
}
}
gc_time_mallocd_memory_end();
Expand Down Expand Up @@ -3439,8 +3436,7 @@ void jl_init_thread_heap(jl_ptls_t ptls)
small_arraylist_new(&common_heap->live_tasks, 0);
for (int i = 0; i < JL_N_STACK_POOLS; i++)
small_arraylist_new(&common_heap->free_stacks[i], 0);
common_heap->mallocarrays = NULL;
common_heap->mafreelist = NULL;
small_arraylist_new(&common_heap->mallocarrays, 0);
heap->young_generation_of_bigvals = (bigval_t*)calloc_s(sizeof(bigval_t)); // sentinel
assert(gc_bigval_sentinel_tag != 0); // make sure the sentinel is initialized
heap->young_generation_of_bigvals->header = gc_bigval_sentinel_tag;
Expand Down
5 changes: 2 additions & 3 deletions src/gc-tls-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,8 @@ typedef struct {
// that are holding onto a stack from the pool
small_arraylist_t live_tasks;

// variables for tracking malloc'd arrays
struct _mallocmemory_t *mallocarrays;
struct _mallocmemory_t *mafreelist;
// variable for tracking malloc'd arrays
small_arraylist_t mallocarrays;

#define JL_N_STACK_POOLS 16
small_arraylist_t free_stacks[JL_N_STACK_POOLS];
Expand Down
4 changes: 2 additions & 2 deletions src/mtarraylist.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ extern "C" {
// but there can be any number of observers

typedef struct {
_Atomic(uint32_t) len;
uint32_t max;
_Atomic(size_t) len;
size_t max;
_Atomic(_Atomic(void*)*) items;
_Atomic(void*) _space[SMALL_AL_N_INLINE];
} small_mtarraylist_t;
Expand Down
10 changes: 5 additions & 5 deletions src/support/arraylist.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@

#define AL_N_INLINE 29

#define SMALL_AL_N_INLINE 6
#define SMALL_AL_N_INLINE 5

#ifdef __cplusplus
extern "C" {
#endif

#include "analyzer_annotations.h"

typedef struct {
typedef struct { // 32 words
size_t len;
size_t max;
void **items;
Expand All @@ -27,9 +27,9 @@ JL_DLLEXPORT void arraylist_push(arraylist_t *a, void *elt) JL_NOTSAFEPOINT;
JL_DLLEXPORT void *arraylist_pop(arraylist_t *a) JL_NOTSAFEPOINT;
JL_DLLEXPORT void arraylist_grow(arraylist_t *a, size_t n) JL_NOTSAFEPOINT;

typedef struct {
uint32_t len;
uint32_t max;
typedef struct { // 8 words
size_t len;
size_t max;
void **items;
void *_space[SMALL_AL_N_INLINE];
} small_arraylist_t;
Expand Down
Loading