From d0d8f4c4eb7e40c392368b3d5b949c516772a79b Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 20 Mar 2017 18:35:02 +0100 Subject: [PATCH] refactor gc to only need to allocate virtual addresses on demand --- src/gc-debug.c | 59 ++++++---- src/gc-pages.c | 289 ++++++++++++++++++++++++++++++------------------- src/gc.c | 127 +++++++++++++++------- src/gc.h | 145 +++++++++++++++++-------- 4 files changed, 403 insertions(+), 217 deletions(-) diff --git a/src/gc-debug.c b/src/gc-debug.c index d1502b6459542f..9ea1025c64fdd7 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -20,11 +20,6 @@ jl_gc_pagemeta_t *jl_gc_page_metadata(void *data) return page_metadata(data); } -region_t *jl_gc_find_region(void *ptr) -{ - return find_region(ptr); -} - // Find the memory block in the pool that owns the byte pointed to by p. // For end of object pointer (which is always the case for pointer to a // singleton object), this usually returns the same pointer which points to @@ -32,21 +27,19 @@ region_t *jl_gc_find_region(void *ptr) // the end of the page. JL_DLLEXPORT jl_taggedvalue_t *jl_gc_find_taggedvalue_pool(char *p, size_t *osize_p) { - region_t *r = find_region(p); - // Not in the pool - if (!r) + if (!page_metadata(p)) + // Not in the pool return NULL; + struct jl_gc_metadata_ext info = page_metadata_ext(p); char *page_begin = gc_page_data(p) + GC_PAGE_OFFSET; // In the page header if (p < page_begin) return NULL; size_t ofs = p - page_begin; - int pg_idx = page_index(r, page_begin); // Check if this is a free page - if (!(r->allocmap[pg_idx / 32] & (uint32_t)(1 << (pg_idx % 32)))) + if (!(info.region0->allocmap[info.region0_i32 / 32] & (uint32_t)(1 << info.region0_i))) return NULL; - jl_gc_pagemeta_t *pagemeta = &r->meta[pg_idx]; - int osize = pagemeta->osize; + int osize = info.meta->osize; // Shouldn't be needed, just in case if (osize == 0) return NULL; @@ -1016,14 +1009,42 @@ static void gc_count_pool_page(jl_gc_pagemeta_t *pg) } } -static void gc_count_pool_region(region_t *region) +static void gc_count_pool_region0(region0_t *region0) +{ + for (int pg_i = 0; pg_i < REGION0_PG_COUNT / 32; pg_i++) { + uint32_t line = region0->allocmap[pg_i]; + if (line) { + for (int j = 0; j < 32; j++) { + if ((line >> j) & 1) { + gc_count_pool_page(region0->meta[pg_i * 32 + j]); + } + } + } + } +} + +static void gc_count_pool_region1(region1_t *region1) { - for (int pg_i = 0; pg_i < region->pg_cnt / 32; pg_i++) { - uint32_t line = region->allocmap[pg_i]; + for (int pg_i = 0; pg_i < REGION1_PG_COUNT / 32; pg_i++) { + uint32_t line = region1->allocmap0[pg_i]; if (line) { for (int j = 0; j < 32; j++) { if ((line >> j) & 1) { - gc_count_pool_page(®ion->meta[pg_i*32 + j]); + gc_count_pool_region0(region1->meta0[pg_i * 32 + j]); + } + } + } + } +} + +static void gc_count_pool_regions(void) +{ + for (int pg_i = 0; pg_i < (REGION2_PG_COUNT + 31) / 32; pg_i++) { + uint32_t line = memory_map.allocmap1[pg_i]; + if (line) { + for (int j = 0; j < 32; j++) { + if ((line >> j) & 1) { + gc_count_pool_region1(memory_map.meta1[pg_i * 32 + j]); } } } @@ -1034,11 +1055,7 @@ void gc_count_pool(void) { memset(&poolobj_sizes, 0, sizeof(poolobj_sizes)); empty_pages = 0; - for (int i = 0; i < REGION_COUNT; i++) { - if (!regions[i].pages) - break; - gc_count_pool_region(®ions[i]); - } + gc_count_pool_regions(); jl_safe_printf("****** Pool stat: ******\n"); for (int i = 0;i < 4;i++) jl_safe_printf("bits(%d): %" PRId64 "\n", i, poolobj_sizes[i]); diff --git a/src/gc-pages.c b/src/gc-pages.c index ca549e81074bcb..4b8adb1b52222e 100644 --- a/src/gc-pages.c +++ b/src/gc-pages.c @@ -9,14 +9,12 @@ extern "C" { #endif -// A region is contiguous storage for up to DEFAULT_REGION_PG_COUNT naturally aligned GC_PAGE_SZ pages -// It uses a very naive allocator (see jl_gc_alloc_page & jl_gc_free_page) -#if defined(_P64) -#define DEFAULT_REGION_PG_COUNT (16 * 8 * 4096) // 8 GB +#ifdef _P64 +#define DEFAULT_REGION_PG_COUNT (4096) // 64 MB #else -#define DEFAULT_REGION_PG_COUNT (8 * 4096) // 512 MB +#define DEFAULT_REGION_PG_COUNT (1024) // 16 MB #endif -#define MIN_REGION_PG_COUNT 64 // 1 MB +#define MIN_REGION_PG_COUNT (32) // same as the size of allocmap/freemap granularity static int region_pg_cnt = DEFAULT_REGION_PG_COUNT; static jl_mutex_t pagealloc_lock; @@ -24,17 +22,8 @@ static size_t current_pg_count = 0; void jl_gc_init_page(void) { -#ifndef _OS_WINDOWS_ - struct rlimit rl; - if (getrlimit(RLIMIT_AS, &rl) == 0) { - // This is not 100% precise and not the most efficient implementation - // but should be close enough and fast enough for the normal case. - while (rl.rlim_cur < region_pg_cnt * sizeof(jl_gc_page_t) * 2 && - region_pg_cnt >= MIN_REGION_PG_COUNT) { - region_pg_cnt /= 2; - } - } -#endif + if (GC_PAGE_SZ * region_pg_cnt < jl_page_size) + region_pg_cnt = jl_page_size / GC_PAGE_SZ; // exact division } #ifndef MAP_NORESERVE // not defined in POSIX, FreeBSD, etc. @@ -43,156 +32,229 @@ void jl_gc_init_page(void) // Try to allocate a memory block for a region with `pg_cnt` pages. // Return `NULL` if allocation failed. Result is aligned to `GC_PAGE_SZ`. -static char *jl_gc_try_alloc_region(int pg_cnt) +static char *jl_gc_try_alloc_pages(int pg_cnt) { - const size_t pages_sz = sizeof(jl_gc_page_t) * pg_cnt; - const size_t freemap_sz = sizeof(uint32_t) * pg_cnt / 32; - const size_t meta_sz = sizeof(jl_gc_pagemeta_t) * pg_cnt; - size_t alloc_size = pages_sz + freemap_sz + meta_sz; + size_t pages_sz = GC_PAGE_SZ * pg_cnt; #ifdef _OS_WINDOWS_ - char *mem = (char*)VirtualAlloc(NULL, alloc_size + GC_PAGE_SZ, + char *mem = (char*)VirtualAlloc(NULL, pages_sz + GC_PAGE_SZ, MEM_RESERVE, PAGE_READWRITE); if (mem == NULL) return NULL; #else if (GC_PAGE_SZ > jl_page_size) - alloc_size += GC_PAGE_SZ; - char *mem = (char*)mmap(0, alloc_size, PROT_READ | PROT_WRITE, + pages_sz += GC_PAGE_SZ; + char *mem = (char*)mmap(0, pages_sz, PROT_READ | PROT_WRITE, MAP_NORESERVE | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (mem == MAP_FAILED) return NULL; #endif - if (GC_PAGE_SZ > jl_page_size) { + if (GC_PAGE_SZ > jl_page_size) // round data pointer up to the nearest gc_page_data-aligned // boundary if mmap didn't already do so. mem = (char*)gc_page_data(mem + GC_PAGE_SZ - 1); - } return mem; } -// Allocate the memory for a `region_t`. Starts with `region_pg_cnt` number +// Allocate the memory for a new page. Starts with `region_pg_cnt` number // of pages. Decrease 4x every time so that there are enough space for a few. // more regions (or other allocations). The final page count is recorded // and will be used as the starting count next time. If the page count is // smaller `MIN_REGION_PG_COUNT` a `jl_memory_exception` is thrown. -// Assume `pagealloc_lock` is acquired, the lock is released before the +// Assumes `pagealloc_lock` is acquired, the lock is released before the // exception is thrown. -static void jl_gc_alloc_region(region_t *region) +static jl_gc_pagemeta_t *jl_gc_alloc_new_page(void) { - int pg_cnt = region_pg_cnt; + unsigned pg, pg_cnt = region_pg_cnt; char *mem = NULL; while (1) { - if (__likely((mem = jl_gc_try_alloc_region(pg_cnt)))) + if (__likely((mem = jl_gc_try_alloc_pages(pg_cnt)))) break; - if (pg_cnt >= MIN_REGION_PG_COUNT * 4) { + size_t min_region_pg_count = MIN_REGION_PG_COUNT; + if (GC_PAGE_SZ * min_region_pg_count < jl_page_size) + min_region_pg_count = jl_page_size / GC_PAGE_SZ; // exact division + if (pg_cnt >= 4 * min_region_pg_count) { pg_cnt /= 4; region_pg_cnt = pg_cnt; } - else if (pg_cnt > MIN_REGION_PG_COUNT) { - region_pg_cnt = pg_cnt = MIN_REGION_PG_COUNT; + else if (pg_cnt > min_region_pg_count) { + region_pg_cnt = pg_cnt = min_region_pg_count; } else { JL_UNLOCK_NOGC(&pagealloc_lock); jl_throw(jl_memory_exception); } } - const size_t pages_sz = sizeof(jl_gc_page_t) * pg_cnt; - const size_t allocmap_sz = sizeof(uint32_t) * pg_cnt / 32; - region->pages = (jl_gc_page_t*)mem; - region->allocmap = (uint32_t*)(mem + pages_sz); - region->meta = (jl_gc_pagemeta_t*)(mem + pages_sz +allocmap_sz); - region->lb = 0; - region->ub = 0; - region->pg_cnt = pg_cnt; -#ifdef _OS_WINDOWS_ - VirtualAlloc(region->allocmap, pg_cnt / 8, MEM_COMMIT, PAGE_READWRITE); - VirtualAlloc(region->meta, pg_cnt * sizeof(jl_gc_pagemeta_t), - MEM_COMMIT, PAGE_READWRITE); -#endif + + jl_gc_pagemeta_t *page_meta = (jl_gc_pagemeta_t*)calloc(pg_cnt, sizeof(jl_gc_pagemeta_t)); + if (!page_meta) + abort(); + for (pg = 0; pg < pg_cnt; pg++) { + struct jl_gc_metadata_ext info; + uint32_t msk; + unsigned i; + region1_t **pregion1; + region0_t **pregion0; + jl_gc_pagemeta_t **pmeta; + + char *ptr = mem + (GC_PAGE_SZ * pg); + page_meta[pg].data = ptr; + + i = REGION_INDEX(ptr); + info.region_i = i % 32; + info.region_i32 = i / 32; + msk = (1 << info.region_i); + if ((memory_map.freemap1[info.region_i32] & msk) == 0) + memory_map.freemap1[info.region_i32] |= msk; // has free + info.region1 = *(pregion1 = &memory_map.meta1[i]); + if (!info.region1) { + info.region1 = (*pregion1 = (region1_t*)calloc(1, sizeof(region1_t))); + if (!info.region1) + abort(); + } + + i = REGION1_INDEX(ptr); + info.region1_i = i % 32; + info.region1_i32 = i / 32; + msk = (1 << info.region1_i); + if ((info.region1->freemap0[info.region1_i32] & msk) == 0) + info.region1->freemap0[info.region1_i32] |= msk; // has free + info.region0 = *(pregion0 = &info.region1->meta0[i]); + if (!info.region0) { + info.region0 = (*pregion0 = (region0_t*)calloc(1, sizeof(region0_t))); + if (!info.region0) + abort(); + } + + i = REGION0_INDEX(ptr); + info.region0_i = i % 32; + info.region0_i32 = i / 32; + msk = (1 << info.region0_i); + info.region0->freemap[info.region0_i32] |= msk; // is free + pmeta = &info.region0->meta[i]; + info.meta = (*pmeta = &page_meta[pg]); + } + return page_meta; } -NOINLINE void *jl_gc_alloc_page(void) +NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) { - int i; - region_t *region; - int region_i = 0; + struct jl_gc_metadata_ext info; JL_LOCK_NOGC(&pagealloc_lock); - while (region_i < REGION_COUNT) { - region = ®ions[region_i]; - if (region->pages == NULL) - jl_gc_alloc_region(region); - for (i = region->lb; i < region->pg_cnt / 32; i++) { - if (~region->allocmap[i]) - break; - } - if (i == region->pg_cnt / 32) { - // region full - region_i++; - continue; + + // scan over memory_map for existing allocated but unused pages + for (info.region_i32 = memory_map.lb; info.region_i32 < (REGION2_PG_COUNT + 31) / 32; info.region_i32++) { + uint32_t freemap1 = memory_map.freemap1[info.region_i32]; + for (info.region_i = 0; freemap1; info.region_i++, freemap1 >>= 1) { + unsigned next = ffs_u32(freemap1); + info.region_i += next; + freemap1 >>= next; + info.region1 = memory_map.meta1[info.region_i + info.region_i32 * 32]; + // repeat over region1 + for (info.region1_i32 = info.region1->lb; info.region1_i32 < REGION1_PG_COUNT / 32; info.region1_i32++) { + uint32_t freemap0 = info.region1->freemap0[info.region1_i32]; + for (info.region1_i = 0; freemap0; info.region1_i++, freemap0 >>= 1) { + unsigned next = ffs_u32(freemap0); + info.region1_i += next; + freemap0 >>= next; + info.region0 = info.region1->meta0[info.region1_i + info.region1_i32 * 32]; + // repeat over region0 + for (info.region0_i32 = info.region0->lb; info.region0_i32 < REGION0_PG_COUNT / 32; info.region0_i32++) { + uint32_t freemap = info.region0->freemap[info.region0_i32]; + if (freemap) { + info.region0_i = ffs_u32(freemap); + info.meta = info.region0->meta[info.region0_i + info.region0_i32 * 32]; + assert(info.meta->data); + // new pages available starting at min of lb and region_i32 + if (memory_map.lb < info.region_i32) + memory_map.lb = info.region_i32; + if (info.region1->lb < info.region1_i32) + info.region1->lb = info.region1_i32; + if (info.region0->lb < info.region0_i32) + info.region0->lb = info.region0_i32; + goto have_free_page; + } + } + info.region1->freemap0[info.region1_i32] &= ~(uint32_t)(1 << info.region1_i); // record that this was full + } + } + memory_map.freemap1[info.region_i32] &= ~(uint32_t)(1 << info.region_i); // record that this was full } - break; } - if (__unlikely(region_i >= REGION_COUNT)) { - JL_UNLOCK_NOGC(&pagealloc_lock); - jl_throw(jl_memory_exception); + + // no existing pages found, allocate a new one + { + jl_gc_pagemeta_t *meta = jl_gc_alloc_new_page(); + info = page_metadata_ext(meta->data); + assert(meta == info.meta); } - if (region->lb < i) - region->lb = i; - if (region->ub < i) - region->ub = i; - -#if defined(_COMPILER_MINGW_) - int j = __builtin_ffs(~region->allocmap[i]) - 1; -#elif defined(_COMPILER_MICROSOFT_) - unsigned long j; - _BitScanForward(&j, ~region->allocmap[i]); -#else - int j = ffs(~region->allocmap[i]) - 1; -#endif + // new pages available starting at max of lb and region_i32 + if (memory_map.lb > info.region_i32) + memory_map.lb = info.region_i32; + if (info.region1->lb > info.region1_i32) + info.region1->lb = info.region1_i32; + if (info.region0->lb > info.region0_i32) + info.region0->lb = info.region0_i32; + +have_free_page: + if (memory_map.ub < info.region_i32) + memory_map.ub = info.region_i32; + if (info.region1->ub < info.region1_i32) + info.region1->ub = info.region1_i32; + if (info.region0->ub < info.region0_i32) + info.region0->ub = info.region0_i32; + + // mark this entry as in-use and not free + info.region0->freemap[info.region0_i32] &= ~(uint32_t)(1 << info.region0_i); + info.region0->allocmap[info.region0_i32] |= (uint32_t)(1 << info.region0_i); + info.region1->allocmap0[info.region1_i32] |= (uint32_t)(1 << info.region1_i); + memory_map.allocmap1[info.region_i32] |= (uint32_t)(1 << info.region_i); - region->allocmap[i] |= (uint32_t)(1 << j); - void *ptr = region->pages[i * 32 + j].data; #ifdef _OS_WINDOWS_ - VirtualAlloc(ptr, GC_PAGE_SZ, MEM_COMMIT, PAGE_READWRITE); + VirtualAlloc(info.meta->data, GC_PAGE_SZ, MEM_COMMIT, PAGE_READWRITE); #endif current_pg_count++; gc_final_count_page(current_pg_count); JL_UNLOCK_NOGC(&pagealloc_lock); - return ptr; + return info.meta; } void jl_gc_free_page(void *p) { - int pg_idx = -1; - int i; - region_t *region = regions; - for (i = 0; i < REGION_COUNT && regions[i].pages != NULL; i++) { - region = ®ions[i]; - pg_idx = page_index(region, p); - if (pg_idx >= 0 && pg_idx < region->pg_cnt) { - break; - } - } - assert(i < REGION_COUNT && region->pages != NULL); - uint32_t msk = (uint32_t)(1 << (pg_idx % 32)); - assert(region->allocmap[pg_idx/32] & msk); - region->allocmap[pg_idx/32] ^= msk; - free(region->meta[pg_idx].ages); + // update the allocmap and freemap to indicate this contains a free entry + struct jl_gc_metadata_ext info = page_metadata_ext(p); + uint32_t msk; + msk = (uint32_t)(1 << info.region0_i); + assert(!(info.region0->freemap[info.region0_i32] & msk)); + assert(info.region0->allocmap[info.region0_i32] & msk); + info.region0->allocmap[info.region0_i32] &= ~msk; + info.region0->freemap[info.region0_i32] |= msk; + + msk = (uint32_t)(1 << info.region1_i); + assert(info.region1->allocmap0[info.region1_i32] & msk); + if ((info.region1->freemap0[info.region1_i32] & msk) == 0) + info.region1->freemap0[info.region1_i32] |= msk; + + msk = (uint32_t)(1 << info.region_i); + assert(memory_map.allocmap1[info.region_i32] & msk); + if ((memory_map.freemap1[info.region_i32] & msk) == 0) + memory_map.freemap1[info.region_i32] |= msk; + + free(info.meta->ages); + info.meta->ages = NULL; + // tell the OS we don't need these pages right now size_t decommit_size = GC_PAGE_SZ; if (GC_PAGE_SZ < jl_page_size) { // ensure so we don't release more memory than intended - size_t n_pages = (GC_PAGE_SZ + jl_page_size - 1) / GC_PAGE_SZ; + size_t n_pages = jl_page_size / GC_PAGE_SZ; // exact division decommit_size = jl_page_size; - p = (void*)((uintptr_t)region->pages[pg_idx].data & ~(jl_page_size - 1)); // round down to the nearest page - pg_idx = page_index(region, p); - if (pg_idx + n_pages > region->pg_cnt) - goto no_decommit; - for (; n_pages--; pg_idx++) { - msk = (uint32_t)(1 << ((pg_idx % 32))); - if (region->allocmap[pg_idx / 32] & msk) { + p = (void*)((uintptr_t)p & ~(jl_page_size - 1)); // round down to the nearest physical page + while (n_pages--) { + struct jl_gc_metadata_ext info = page_metadata_ext(p); + msk = (uint32_t)(1 << info.region0_i); + if (info.region0->allocmap[info.region0_i32] & msk) goto no_decommit; - } + p = (void*)((char*)p + GC_PAGE_SZ); } } #ifdef _OS_WINDOWS_ @@ -200,9 +262,14 @@ void jl_gc_free_page(void *p) #else madvise(p, decommit_size, MADV_DONTNEED); #endif + no_decommit: - if (region->lb > pg_idx / 32) - region->lb = pg_idx / 32; + if (memory_map.lb > info.region_i32) + memory_map.lb = info.region_i32; + if (info.region1->lb > info.region1_i32) + info.region1->lb = info.region1_i32; + if (info.region0->lb > info.region0_i32) + info.region0->lb = info.region0_i32; current_pg_count--; } diff --git a/src/gc.c b/src/gc.c index 471669a4afee94..11ae9a095391f0 100644 --- a/src/gc.c +++ b/src/gc.c @@ -48,7 +48,7 @@ static jl_mutex_t gc_cache_lock; jl_gc_num_t gc_num = {0,0,0,0,0,0,0,0,0,0,0,0,0,0}; static size_t last_long_collect_interval; -region_t regions[REGION_COUNT]; +region_t memory_map; // List of marked big objects. Not per-thread. Accessed only by master thread. bigval_t *big_objects_marked = NULL; @@ -404,7 +404,7 @@ static int mark_reset_age = 0; #define PROMOTE_AGE 1 // this cannot be increased as is without changing : // - sweep_page which is specialized for 1bit age -// - the size of the age storage in region_t +// - the size of the age storage in jl_gc_pagemeta_t static int64_t scanned_bytes; // young bytes scanned while marking @@ -590,7 +590,7 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o, STATIC_INLINE void gc_setmark_pool(jl_ptls_t ptls, jl_taggedvalue_t *o, uint8_t mark_mode) { - gc_setmark_pool_(ptls, o, mark_mode, page_metadata(o)); + gc_setmark_pool_(ptls, o, mark_mode, page_metadata_ext(o).meta); } STATIC_INLINE void gc_setmark(jl_ptls_t ptls, jl_taggedvalue_t *o, @@ -618,7 +618,7 @@ inline void gc_setmark_buf(jl_ptls_t ptls, void *o, // sure. if (__likely(gc_setmark_tag(buf, mark_mode, tag, &bits)) && !gc_verifying) { if (minsz <= GC_MAX_SZCLASS) { - jl_gc_pagemeta_t *page = page_metadata(o); + jl_gc_pagemeta_t *page = page_metadata(buf); if (page) { gc_setmark_pool_(ptls, buf, bits, page); return; @@ -875,11 +875,7 @@ static NOINLINE jl_taggedvalue_t *add_page(jl_gc_pool_t *p) // Do not pass in `ptls` as argument. This slows down the fast path // in pool_alloc significantly jl_ptls_t ptls = jl_get_ptls_states(); - char *data = (char*)jl_gc_alloc_page(); - if (data == NULL) - jl_throw(jl_memory_exception); - jl_gc_pagemeta_t *pg = page_metadata(data + GC_PAGE_OFFSET); - pg->data = data; + jl_gc_pagemeta_t *pg = jl_gc_alloc_page(); pg->osize = p->osize; pg->ages = (uint8_t*)malloc(GC_PAGE_SZ / 8 / p->osize + 1); pg->thread_n = ptls->tid; @@ -920,7 +916,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset, if (__unlikely(gc_page_data(v) != gc_page_data(next))) { // we only update pg's fields when the freelist changes page // since pg's metadata is likely not in cache - jl_gc_pagemeta_t *pg = page_metadata(v); + jl_gc_pagemeta_t *pg = page_metadata_ext(v).meta; assert(pg->osize == p->osize); pg->nfree = 0; pg->has_young = 1; @@ -937,7 +933,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset, if (v) { // like the freelist case, // but only update the page metadata when it is full - jl_gc_pagemeta_t *pg = page_metadata((char*)v - 1); + jl_gc_pagemeta_t *pg = page_metadata_ext((char*)v - 1).meta; assert(pg->osize == p->osize); pg->nfree = 0; pg->has_young = 1; @@ -1082,35 +1078,88 @@ static jl_taggedvalue_t **sweep_page(jl_gc_pool_t *p, jl_gc_pagemeta_t *pg, jl_t return pfl; } -static void sweep_pool_region(jl_taggedvalue_t ***pfl, int region_i, int sweep_full) +// the actual sweeping over all allocated pages in all regions +static inline void sweep_pool_page(jl_taggedvalue_t ***pfl, jl_gc_pagemeta_t *pg, int sweep_full) { - region_t *region = ®ions[region_i]; + int p_n = pg->pool_n; + int t_n = pg->thread_n; + jl_ptls_t ptls2 = jl_all_tls_states[t_n]; + jl_gc_pool_t *p = &ptls2->heap.norm_pools[p_n]; + int osize = pg->osize; + pfl[t_n * JL_GC_N_POOLS + p_n] = sweep_page(p, pg, pfl[t_n * JL_GC_N_POOLS + p_n], sweep_full, osize); +} - // the actual sweeping - int ub = 0; - int lb = region->lb; - for (int pg_i = 0; pg_i <= region->ub; pg_i++) { - uint32_t line = region->allocmap[pg_i]; - if (line) { +static inline int sweep_pool_region0(jl_taggedvalue_t ***pfl, region0_t *region0, int sweep_full) +{ + unsigned ub = 0; + unsigned alloc = 0; + for (unsigned pg_i = 0; pg_i <= region0->ub; pg_i++) { + uint32_t line = region0->allocmap[pg_i]; + unsigned j; + if (!line) + continue; + ub = pg_i; + alloc = 1; + for (j = 0; line; j++, line >>= 1) { + unsigned next = ffs_u32(line); + j += next; + line >>= next; + jl_gc_pagemeta_t *pg = region0->meta[pg_i * 32 + j]; + sweep_pool_page(pfl, pg, sweep_full); + } + } + region0->ub = ub; + return alloc; +} + +static inline int sweep_pool_region1(jl_taggedvalue_t ***pfl, region1_t *region1, int sweep_full) +{ + unsigned ub = 0; + unsigned alloc = 0; + for (unsigned pg_i = 0; pg_i <= region1->ub; pg_i++) { + uint32_t line = region1->allocmap0[pg_i]; + unsigned j; + for (j = 0; line; j++, line >>= 1) { + unsigned next = ffs_u32(line); + j += next; + line >>= next; + region0_t *region0 = region1->meta0[pg_i * 32 + j]; + if (region0 && !sweep_pool_region0(pfl, region0, sweep_full)) + region1->allocmap0[pg_i] &= ~(1 << j); + } + if (region1->allocmap0[pg_i]) { ub = pg_i; - for (int j = 0; j < 32; j++) { - if ((line >> j) & 1) { - jl_gc_pagemeta_t *pg = ®ion->meta[pg_i*32 + j]; - int p_n = pg->pool_n; - int t_n = pg->thread_n; - jl_ptls_t ptls2 = jl_all_tls_states[t_n]; - jl_gc_pool_t *p = &ptls2->heap.norm_pools[p_n]; - int osize = pg->osize; - pfl[t_n * JL_GC_N_POOLS + p_n] = sweep_page(p, pg, pfl[t_n * JL_GC_N_POOLS + p_n], sweep_full, osize); - } - } + alloc = 1; + } + } + region1->ub = ub; + return alloc; +} + +static void sweep_pool_regions(jl_taggedvalue_t ***pfl, int sweep_full) +{ + if (REGION2_PG_COUNT == 1) { // compile-time optimization + region1_t *region1 = memory_map.meta1[0]; + sweep_pool_region1(pfl, region1, sweep_full); + return; + } + unsigned ub = 0; + for (unsigned pg_i = 0; pg_i <= memory_map.ub; pg_i++) { + uint32_t line = memory_map.allocmap1[pg_i]; + unsigned j; + for (j = 0; line; j++, line >>= 1) { + unsigned next = ffs_u32(line); + j += next; + line >>= next; + region1_t *region1 = memory_map.meta1[pg_i * 32 + j]; + if (region1 && !sweep_pool_region1(pfl, region1, sweep_full)) + memory_map.allocmap1[pg_i] &= ~(1 << j); } - else if (pg_i < lb) { - lb = pg_i; + if (memory_map.allocmap1[pg_i]) { + ub = pg_i; } } - region->ub = ub; - region->lb = lb; + memory_map.ub = ub; } static void gc_sweep_other(jl_ptls_t ptls, int sweep_full) @@ -1150,7 +1199,7 @@ static void gc_sweep_pool(int sweep_full) jl_gc_pool_t *p = &ptls2->heap.norm_pools[i]; jl_taggedvalue_t *last = p->freelist; if (last) { - jl_gc_pagemeta_t *pg = page_metadata(last); + jl_gc_pagemeta_t *pg = page_metadata_ext(last).meta; gc_pool_sync_nfree(pg, last); pg->has_young = 1; } @@ -1160,7 +1209,7 @@ static void gc_sweep_pool(int sweep_full) last = p->newpages; if (last) { char *last_p = (char*)last; - jl_gc_pagemeta_t *pg = page_metadata(last_p - 1); + jl_gc_pagemeta_t *pg = page_metadata_ext(last_p - 1).meta; assert(last_p - gc_page_data(last_p - 1) >= GC_PAGE_OFFSET); pg->nfree = (GC_PAGE_SZ - (last_p - gc_page_data(last_p - 1))) / p->osize; pg->has_young = 1; @@ -1169,11 +1218,7 @@ static void gc_sweep_pool(int sweep_full) } } - for (int i = 0; i < REGION_COUNT; i++) { - if (!regions[i].pages) - break; - sweep_pool_region(pfl, i, sweep_full); - } + sweep_pool_regions(pfl, sweep_full); // null out terminal pointers of free lists diff --git a/src/gc.h b/src/gc.h index f49bc4f13be875..fe6a13d291e2c7 100644 --- a/src/gc.h +++ b/src/gc.h @@ -147,33 +147,77 @@ typedef struct { uint8_t *ages; } jl_gc_pagemeta_t; -typedef struct { - char data[GC_PAGE_SZ]; -} jl_gc_page_t -#if !defined(_COMPILER_MICROSOFT_) && !(defined(_COMPILER_MINGW_) && defined(_COMPILER_CLANG_)) -__attribute__((aligned(GC_PAGE_SZ))) +// Page layout: +// Newpage freelist: sizeof(void*) +// Padding: GC_PAGE_OFFSET - sizeof(void*) +// Blocks: osize * n +// Tag: sizeof(jl_taggedvalue_t) +// Data: <= osize - sizeof(jl_taggedvalue_t) + +// these plus GC_PAGE_LG2 must sum to sizeof(void*) +// and should be multiples of 32 (MIN_REGION_PG_COUNT), except REGION2_PG_COUNT can be 1 +#ifdef _P64 +#define REGION0_PG_COUNT (1 << 16) +#define REGION1_PG_COUNT (1 << 16) +#define REGION2_PG_COUNT (1 << 18) +#define REGION0_INDEX(p) (((uintptr_t)(p) >> 14) & 0xFFFF) // shift by GC_PAGE_LG2 +#define REGION1_INDEX(p) (((uintptr_t)(p) >> 30) & 0xFFFF) +#define REGION_INDEX(p) (((uintptr_t)(p) >> 46) & 0x3FFFF) +#else +#define REGION0_PG_COUNT (1 << 8) +#define REGION1_PG_COUNT (1 << 10) +#define REGION2_PG_COUNT (1 << 0) +#define REGION0_INDEX(p) (((uintptr_t)(p) >> 14) & 0xFF) // shift by GC_PAGE_LG2 +#define REGION1_INDEX(p) (((uintptr_t)(p) >> 22) & 0x3FF) +#define REGION_INDEX(p) (0) #endif -; + + +typedef struct { + jl_gc_pagemeta_t *meta[REGION0_PG_COUNT]; + uint32_t allocmap[REGION0_PG_COUNT / 32]; + uint32_t freemap[REGION0_PG_COUNT / 32]; + // store a lower bound of the first free page in each region + int lb; + // an upper bound of the last non-free page + int ub; +} region0_t; + +typedef struct { + region0_t *meta0[REGION1_PG_COUNT]; + uint32_t allocmap0[REGION1_PG_COUNT / 32]; + uint32_t freemap0[REGION1_PG_COUNT / 32]; + // store a lower bound of the first free page in each region + int lb; + // an upper bound of the last non-free page + int ub; +} region1_t; typedef struct { - // Page layout: - // Newpage freelist: sizeof(void*) - // Padding: GC_PAGE_OFFSET - sizeof(void*) - // Blocks: osize * n - // Tag: sizeof(jl_taggedvalue_t) - // Data: <= osize - sizeof(jl_taggedvalue_t) - jl_gc_page_t *pages; // [pg_cnt]; must be first, to preserve page alignment - uint32_t *allocmap; // [pg_cnt / 32] - jl_gc_pagemeta_t *meta; // [pg_cnt] - int pg_cnt; + region1_t *meta1[REGION2_PG_COUNT]; + uint32_t allocmap1[(REGION2_PG_COUNT + 31) / 32]; + uint32_t freemap1[(REGION2_PG_COUNT + 31) / 32]; // store a lower bound of the first free page in each region int lb; // an upper bound of the last non-free page int ub; } region_t; +STATIC_INLINE unsigned ffs_u32(uint32_t bitvec) +{ +#if defined(_COMPILER_MINGW_) + return __builtin_ffs(bitvec) - 1; +#elif defined(_COMPILER_MICROSOFT_) + unsigned long j; + _BitScanForward(&j, bitvec); + return j; +#else + return ffs(bitvec) - 1; +#endif +} + extern jl_gc_num_t gc_num; -extern region_t regions[REGION_COUNT]; +extern region_t memory_map; extern bigval_t *big_objects_marked; extern arraylist_t finalizer_list_marked; extern arraylist_t to_finalize; @@ -200,11 +244,6 @@ STATIC_INLINE jl_taggedvalue_t *page_pfl_end(jl_gc_pagemeta_t *p) return (jl_taggedvalue_t*)(p->data + p->fl_end_offset); } -STATIC_INLINE int page_index(region_t *region, void *data) -{ - return (gc_page_data(data) - region->pages->data) / GC_PAGE_SZ; -} - STATIC_INLINE int gc_marked(uintptr_t bits) { return (bits & GC_MARKED) != 0; @@ -232,31 +271,49 @@ STATIC_INLINE void *gc_ptr_clear_tag(void *v, uintptr_t mask) NOINLINE uintptr_t gc_get_stack_ptr(void); -STATIC_INLINE region_t *find_region(void *ptr) +STATIC_INLINE jl_gc_pagemeta_t *page_metadata(void *_data) { - for (int i = 0; i < REGION_COUNT && regions[i].pages; i++) { - region_t *region = ®ions[i]; - char *begin = region->pages->data; - char *end = begin + region->pg_cnt * sizeof(jl_gc_page_t); - if ((char*)ptr >= begin && (char*)ptr <= end) { - return region; - } - } - return NULL; + uintptr_t data = ((uintptr_t)_data); + unsigned i; + i = REGION_INDEX(data); + region1_t *r1 = memory_map.meta1[i]; + if (!r1) + return NULL; + i = REGION1_INDEX(data); + region0_t *r0 = r1->meta0[i]; + if (!r0) + return NULL; + i = REGION0_INDEX(data); + return r0->meta[i]; } -STATIC_INLINE jl_gc_pagemeta_t *page_metadata(void *_data) +struct jl_gc_metadata_ext { + region1_t *region1; + region0_t *region0; + jl_gc_pagemeta_t *meta; + unsigned region_i32, region1_i32, region0_i32; + unsigned region_i, region1_i, region0_i; +}; + +STATIC_INLINE struct jl_gc_metadata_ext page_metadata_ext(void *_data) { - uintptr_t data = ((uintptr_t)_data) - 1; - for (int i = 0; i < REGION_COUNT && regions[i].pages; i++) { - region_t *region = ®ions[i]; - uintptr_t begin = (uintptr_t)region->pages->data; - uintptr_t offset = data - begin; - if (offset < region->pg_cnt * sizeof(jl_gc_page_t)) { - return ®ion->meta[offset >> GC_PAGE_LG2]; - } - } - return NULL; + uintptr_t data = (uintptr_t)_data; + struct jl_gc_metadata_ext info; + unsigned i; + i = REGION_INDEX(data); + info.region_i = i % 32; + info.region_i32 = i / 32; + info.region1 = memory_map.meta1[i]; + i = REGION1_INDEX(data); + info.region1_i = i % 32; + info.region1_i32 = i / 32; + info.region0 = info.region1->meta0[i]; + i = REGION0_INDEX(data); + info.region0_i = i % 32; + info.region0_i32 = i / 32; + info.meta = info.region0->meta[i]; + assert(info.meta); + return info; } STATIC_INLINE void gc_big_object_unlink(const bigval_t *hdr) @@ -285,7 +342,7 @@ void jl_mark_box_caches(jl_ptls_t ptls); // GC pages void jl_gc_init_page(void); -NOINLINE void *jl_gc_alloc_page(void); +NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void); void jl_gc_free_page(void *p); // GC debug