From bdb29cf1eea49672029bbde2d98ef02eaf13c667 Mon Sep 17 00:00:00 2001 From: gbaraldi Date: Fri, 4 Oct 2024 17:32:27 -0300 Subject: [PATCH] make memorynew intrinsic Co-authored-by: Jameson Nash Co-authored-by: Jeff Bezanson Co-authored-by: Gabriel Baraldi --- Compiler/src/tfuncs.jl | 21 ++- Compiler/test/irpasses.jl | 7 +- base/boot.jl | 7 +- base/essentials.jl | 13 +- base/genericmemory.jl | 7 +- doc/src/manual/performance-tips.md | 22 +-- src/array.c | 13 +- src/builtin_proto.h | 1 + src/builtins.c | 10 ++ src/ccall.cpp | 27 ---- src/cgutils.cpp | 206 ++++++++++++++++++++++++++++- src/codegen.cpp | 64 ++++++++- src/common_symbols2.inc | 2 + src/genericmemory.c | 61 +++++---- src/julia.h | 2 + src/llvm-alloc-helpers.cpp | 4 +- src/llvm-alloc-opt.cpp | 9 +- src/pipeline.cpp | 1 + src/rtutils.c | 7 + src/staticdata.c | 5 +- test/arrayops.jl | 41 ++++++ test/core.jl | 9 ++ 22 files changed, 431 insertions(+), 108 deletions(-) diff --git a/Compiler/src/tfuncs.jl b/Compiler/src/tfuncs.jl index af8863c1b3a3a..539844e0a4cf9 100644 --- a/Compiler/src/tfuncs.jl +++ b/Compiler/src/tfuncs.jl @@ -2017,6 +2017,12 @@ function tuple_tfunc(𝕃::AbstractLattice, argtypes::Vector{Any}) return anyinfo ? PartialStruct(𝕃, typ, argtypes) : typ end +@nospecs function memorynew_tfunc(𝕃::AbstractLattice, memtype, m) + hasintersect(widenconst(m), Int) || return Bottom + return tmeet(𝕃, instanceof_tfunc(memtype, true)[1], GenericMemory) +end +add_tfunc(Core.memorynew, 2, 2, memorynew_tfunc, 10) + @nospecs function memoryrefget_tfunc(𝕃::AbstractLattice, mem, order, boundscheck) memoryref_builtin_common_errorcheck(mem, order, boundscheck) || return Bottom return memoryref_elemtype(mem) @@ -2244,7 +2250,16 @@ function _builtin_nothrow(𝕃::AbstractLattice, @nospecialize(f::Builtin), argt @nospecialize(rt)) ⊑ = partialorder(𝕃) na = length(argtypes) - if f === memoryrefnew + if f === Core.memorynew + argtypes[1] isa Const && argtypes[2] isa Const || return false + MemT = argtypes[1].val + isconcretetype(MemT) && MemT <: GenericMemory || return false + len = argtypes[2].val + len isa Int && 0 <= len < typemax(Int) || return false + elsz = datatype_layoutsize(MemT) + checked_smul_int(len, elsz)[2] && return false + return true + elseif f === memoryrefnew return memoryref_builtin_common_nothrow(argtypes) elseif f === memoryrefoffset length(argtypes) == 1 || return false @@ -2347,6 +2362,7 @@ const _EFFECT_FREE_BUILTINS = [ isa, UnionAll, getfield, + Core.memorynew, memoryrefnew, memoryrefoffset, memoryrefget, @@ -2381,6 +2397,7 @@ const _INACCESSIBLEMEM_BUILTINS = Any[ compilerbarrier, Core._typevar, donotdelete, + Core.memorynew, ] const _ARGMEM_BUILTINS = Any[ @@ -2543,7 +2560,7 @@ function builtin_effects(𝕃::AbstractLattice, @nospecialize(f::Builtin), argty consistent = ALWAYS_TRUE elseif f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned consistent = CONSISTENT_IF_INACCESSIBLEMEMONLY - elseif f === Core._typevar + elseif f === Core._typevar || f === Core.memorynew consistent = CONSISTENT_IF_NOTRETURNED else consistent = ALWAYS_FALSE diff --git a/Compiler/test/irpasses.jl b/Compiler/test/irpasses.jl index e9d6f57337530..90d8c9dc6ab3d 100644 --- a/Compiler/test/irpasses.jl +++ b/Compiler/test/irpasses.jl @@ -1083,12 +1083,13 @@ end # test `flags_for_effects` and DCE # ================================ -let # effect-freeness computation for array allocation +@testset "effect-freeness computation for array allocation" begin # should eliminate dead allocations good_dims = [1, 2, 3, 4, 10] Ns = [1, 2, 3, 4, 10] - for dim = good_dims, N = Ns + Ts = Any[Int, Union{Missing,Nothing}, Nothing, Any] + @testset "$dim, $N" for dim in good_dims, N in Ns Int64(dim)^N > typemax(Int) && continue dims = ntuple(i->dim, N) @test @eval fully_eliminated() do @@ -1099,7 +1100,7 @@ let # effect-freeness computation for array allocation # shouldn't eliminate erroneous dead allocations bad_dims = [-1, typemax(Int)] - for dim in bad_dims, N in [1, 2, 3, 4, 10], T in Any[Int, Union{Missing,Nothing}, Nothing, Any] + @testset "$dim, $N, $T" for dim in bad_dims, N in Ns, T in Ts dims = ntuple(i->dim, N) @test @eval !fully_eliminated() do Array{$T,$N}(undef, $(dims...)) diff --git a/base/boot.jl b/base/boot.jl index 4badedae3cfb7..b5ee4aa843444 100644 --- a/base/boot.jl +++ b/base/boot.jl @@ -575,12 +575,7 @@ struct UndefInitializer end const undef = UndefInitializer() # type and dimensionality specified -(self::Type{GenericMemory{kind,T,addrspace}})(::UndefInitializer, m::Int) where {T,addrspace,kind} = - if isdefined(self, :instance) && m === 0 - self.instance - else - ccall(:jl_alloc_genericmemory, Ref{GenericMemory{kind,T,addrspace}}, (Any, Int), self, m) - end +(self::Type{GenericMemory{kind,T,addrspace}})(::UndefInitializer, m::Int) where {T,addrspace,kind} = Core.memorynew(self, m) (self::Type{GenericMemory{kind,T,addrspace}})(::UndefInitializer, d::NTuple{1,Int}) where {T,kind,addrspace} = self(undef, getfield(d,1)) # empty vector constructor (self::Type{GenericMemory{kind,T,addrspace}})() where {T,kind,addrspace} = self(undef, 0) diff --git a/base/essentials.jl b/base/essentials.jl index 3574116261968..0cdf3a810f553 100644 --- a/base/essentials.jl +++ b/base/essentials.jl @@ -382,8 +382,17 @@ default_access_order(a::GenericMemory{:atomic}) = :monotonic default_access_order(a::GenericMemoryRef{:not_atomic}) = :not_atomic default_access_order(a::GenericMemoryRef{:atomic}) = :monotonic -getindex(A::GenericMemory, i::Int) = (@_noub_if_noinbounds_meta; - memoryrefget(memoryrefnew(memoryrefnew(A), i, @_boundscheck), default_access_order(A), false)) +# bootstrap version for Memory{Any} +#getindex(A::Memory{Any}, i::Int) = (@_noub_if_noinbounds_meta; +# memoryrefget(memoryrefnew(memoryrefnew(A), i, @_boundscheck), default_access_order(A), false)) + +function getindex(A::GenericMemory, i::Int) + @_noub_if_noinbounds_meta + if @_boundscheck + ult_int(bitcast(UInt, sub_int(i, 1)), bitcast(UInt, A.length)) || throw_boundserror(A, (i,)) + end + memoryrefget(memoryrefnew(memoryrefnew(A), i, false), default_access_order(A), false) +end getindex(A::GenericMemoryRef) = memoryrefget(A, default_access_order(A), @_boundscheck) """ diff --git a/base/genericmemory.jl b/base/genericmemory.jl index fbf60255935a3..6e220552dd5e9 100644 --- a/base/genericmemory.jl +++ b/base/genericmemory.jl @@ -241,9 +241,10 @@ getindex(A::Memory, c::Colon) = copy(A) ## Indexing: setindex! ## -function _setindex!(A::Memory{T}, x::T, i1::Int) where {T} - ref = memoryrefnew(memoryref(A), i1, @_boundscheck) - memoryrefset!(ref, x, :not_atomic, @_boundscheck) +function _setindex!(A::Memory{T}, x::T, i::Int) where {T} + @boundscheck Core.Intrinsics.ult_int(i, A.length) + ref = memoryrefnew(memoryref(A), i, false) + memoryrefset!(ref, x, :not_atomic, false) return A end diff --git a/doc/src/manual/performance-tips.md b/doc/src/manual/performance-tips.md index 038000f55e761..1166164241c60 100644 --- a/doc/src/manual/performance-tips.md +++ b/doc/src/manual/performance-tips.md @@ -1058,12 +1058,12 @@ the output. As a trivial example, compare ```jldoctest prealloc julia> function xinc(x) - return [x, x+1, x+2] + return [x + i for i in 1:3000] end; julia> function loopinc() y = 0 - for i = 1:10^7 + for i = 1:10^5 ret = xinc(i) y += ret[2] end @@ -1075,16 +1075,16 @@ with ```jldoctest prealloc julia> function xinc!(ret::AbstractVector{T}, x::T) where T - ret[1] = x - ret[2] = x+1 - ret[3] = x+2 + for i in 1:3000 + ret[i] = x+i + end nothing end; julia> function loopinc_prealloc() - ret = Vector{Int}(undef, 3) + ret = Vector{Int}(undef, 3000) y = 0 - for i = 1:10^7 + for i = 1:10^5 xinc!(ret, i) y += ret[2] end @@ -1096,12 +1096,12 @@ Timing results: ```jldoctest prealloc; filter = r"[0-9\.]+ seconds \(.*?\)" julia> @time loopinc() - 0.529894 seconds (40.00 M allocations: 1.490 GiB, 12.14% gc time) -50000015000000 + 0.297454 seconds (200.00 k allocations: 2.239 GiB, 39.80% gc time) +5000250000 julia> @time loopinc_prealloc() - 0.030850 seconds (6 allocations: 288 bytes) -50000015000000 + 0.009410 seconds (2 allocations: 23.477 KiB) +5000250000 ``` Preallocation has other advantages, for example by allowing the caller to control the "output" diff --git a/src/array.c b/src/array.c index f0051ec17565a..da9cb24b4d0e9 100644 --- a/src/array.c +++ b/src/array.c @@ -16,12 +16,6 @@ extern "C" { #endif -#if defined(_P64) && defined(UINT128MAX) -typedef __uint128_t wideint_t; -#else -typedef uint64_t wideint_t; -#endif - #define MAXINTVAL (((size_t)-1)>>1) JL_DLLEXPORT int jl_array_validate_dims(size_t *nel, uint32_t ndims, size_t *dims) @@ -30,10 +24,9 @@ JL_DLLEXPORT int jl_array_validate_dims(size_t *nel, uint32_t ndims, size_t *dim size_t _nel = 1; for (i = 0; i < ndims; i++) { size_t di = dims[i]; - wideint_t prod = (wideint_t)_nel * (wideint_t)di; - if (prod >= (wideint_t) MAXINTVAL || di >= MAXINTVAL) + int overflow = __builtin_mul_overflow(_nel, di, &_nel); + if (overflow || di >= MAXINTVAL) return 1; - _nel = prod; } *nel = _nel; return 0; @@ -204,7 +197,7 @@ JL_DLLEXPORT void jl_array_grow_end(jl_array_t *a, size_t inc) int isbitsunion = jl_genericmemory_isbitsunion(a->ref.mem); size_t newnrows = n + inc; if (!isbitsunion && elsz == 0) { - jl_genericmemory_t *newmem = jl_alloc_genericmemory(mtype, MAXINTVAL - 1); + jl_genericmemory_t *newmem = jl_alloc_genericmemory(mtype, MAXINTVAL - 2); a->ref.mem = newmem; jl_gc_wb(a, newmem); a->dimsize[0] = newnrows; diff --git a/src/builtin_proto.h b/src/builtin_proto.h index 7fbd555758675..70ad67040991d 100644 --- a/src/builtin_proto.h +++ b/src/builtin_proto.h @@ -46,6 +46,7 @@ DECLARE_BUILTIN(is); DECLARE_BUILTIN(isa); DECLARE_BUILTIN(isdefined); DECLARE_BUILTIN(issubtype); +DECLARE_BUILTIN(memorynew); DECLARE_BUILTIN(memoryref); DECLARE_BUILTIN(memoryref_isassigned); DECLARE_BUILTIN(memoryrefget); diff --git a/src/builtins.c b/src/builtins.c index 4b9c1ad6043c2..06dbd84aff6c0 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -1675,6 +1675,15 @@ JL_CALLABLE(jl_f__typevar) } // genericmemory --------------------------------------------------------------------- +JL_CALLABLE(jl_f_memorynew) +{ + JL_NARGS(memorynew, 2, 2); + jl_datatype_t *jl_genericmemory_type_type = jl_datatype_type; + JL_TYPECHK(memorynew, genericmemory_type, args[0]); + JL_TYPECHK(memorynew, long, args[1]); + size_t nel = jl_unbox_long(args[1]); + return (jl_value_t*)jl_alloc_genericmemory(args[0], nel); +} JL_CALLABLE(jl_f_memoryref) { @@ -2441,6 +2450,7 @@ void jl_init_primitives(void) JL_GC_DISABLED jl_builtin_setglobalonce = add_builtin_func("setglobalonce!", jl_f_setglobalonce); // memory primitives + jl_builtin_memorynew = add_builtin_func("memorynew", jl_f_memorynew); jl_builtin_memoryref = add_builtin_func("memoryrefnew", jl_f_memoryref); jl_builtin_memoryrefoffset = add_builtin_func("memoryrefoffset", jl_f_memoryrefoffset); jl_builtin_memoryrefget = add_builtin_func("memoryrefget", jl_f_memoryrefget); diff --git a/src/ccall.cpp b/src/ccall.cpp index 9aa1c56e2e78f..3937570896f82 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -1877,33 +1877,6 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs) JL_GC_POP(); return mark_julia_type(ctx, obj, true, jl_any_type); } - else if (is_libjulia_func(jl_alloc_genericmemory)) { - ++CCALL_STAT(jl_alloc_genericmemory); - assert(lrt == ctx.types().T_prjlvalue); - assert(!isVa && !llvmcall && nccallargs == 2); - const jl_cgval_t &typ = argv[0]; - const jl_cgval_t &nel = argv[1]; - auto arg_typename = [&] JL_NOTSAFEPOINT { - auto istyp = argv[0].constant; - std::string type_str; - if (istyp && jl_is_datatype(istyp) && jl_is_genericmemory_type(istyp)){ - auto eltype = jl_tparam1(istyp); - if (jl_is_datatype(eltype)) - type_str = jl_symbol_name(((jl_datatype_t*)eltype)->name->name); - else if (jl_is_uniontype(eltype)) - type_str = "Union"; - else - type_str = ""; - } - else - type_str = ""; - return "Memory{" + type_str + "}[]"; - }; - auto alloc = ctx.builder.CreateCall(prepare_call(jl_allocgenericmemory), { boxed(ctx,typ), emit_unbox(ctx, ctx.types().T_size, nel, (jl_value_t*)jl_ulong_type)}); - setName(ctx.emission_context, alloc, arg_typename); - JL_GC_POP(); - return mark_julia_type(ctx, alloc, true, jl_any_type); - } else if (is_libjulia_func(memcpy) && (rt == (jl_value_t*)jl_nothing_type || jl_is_cpointer_type(rt))) { ++CCALL_STAT(memcpy); const jl_cgval_t &dst = argv[0]; diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 9e170555e6149..74a236017e02b 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -1548,19 +1548,24 @@ static void emit_error(jl_codectx_t &ctx, const Twine &txt) } // DO NOT PASS IN A CONST CONDITION! -static void error_unless(jl_codectx_t &ctx, Value *cond, const Twine &msg) +static void error_unless(jl_codectx_t &ctx, Function *F, Value *cond, const Twine &msg) { ++EmittedConditionalErrors; BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f); BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(), "pass"); ctx.builder.CreateCondBr(cond, passBB, failBB); ctx.builder.SetInsertPoint(failBB); - just_emit_error(ctx, prepare_call(jlerror_func), msg); + just_emit_error(ctx, F, msg); ctx.builder.CreateUnreachable(); passBB->insertInto(ctx.f); ctx.builder.SetInsertPoint(passBB); } +static void error_unless(jl_codectx_t &ctx, Value *cond, const Twine &msg) +{ + error_unless(ctx, prepare_call(jlerror_func), cond, msg); +} + static void raise_exception(jl_codectx_t &ctx, Value *exc, BasicBlock *contBB=nullptr) { @@ -3316,7 +3321,7 @@ static Value *emit_genericmemoryptr(jl_codectx_t &ctx, Value *mem, const jl_data LoadInst *LI = ctx.builder.CreateAlignedLoad(PPT, addr, Align(sizeof(char*))); LI->setOrdering(AtomicOrdering::NotAtomic); LI->setMetadata(LLVMContext::MD_nonnull, MDNode::get(ctx.builder.getContext(), None)); - jl_aliasinfo_t aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); + jl_aliasinfo_t aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryptr); aliasinfo.decorateInst(LI); Value *ptr = LI; if (AS) { @@ -3342,7 +3347,7 @@ static Value *emit_genericmemoryowner(jl_codectx_t &ctx, Value *t) return emit_guarded_test(ctx, foreign, t, [&] { addr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_jlgenericmemory, m, 1); LoadInst *owner = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, addr, Align(sizeof(void*))); - jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); + jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryptr); ai.decorateInst(owner); return ctx.builder.CreateSelect(ctx.builder.CreateIsNull(owner), t, owner); }); @@ -4427,6 +4432,199 @@ static int compare_cgparams(const jl_cgparams_t *a, const jl_cgparams_t *b) } #endif + +static jl_cgval_t emit_const_len_memorynew(jl_codectx_t &ctx, jl_datatype_t *typ, size_t nel, jl_genericmemory_t *inst) +{ + if (nel == 0) { + Value *empty_alloc = track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)inst)); + return mark_julia_type(ctx, empty_alloc, true, typ); + } + const jl_datatype_layout_t *layout = ((jl_datatype_t*)typ)->layout; + assert(((jl_datatype_t*)typ)->has_concrete_subtype && layout != NULL); + size_t elsz = layout->size; + int isboxed = layout->flags.arrayelem_isboxed; + int isunion = layout->flags.arrayelem_isunion; + int zi = ((jl_datatype_t*)typ)->zeroinit; + if (isboxed) + elsz = sizeof(void*); + + size_t nbytes; + bool overflow = __builtin_mul_overflow(nel, elsz, &nbytes); + if (isunion) { + // an extra byte for each isbits union memory element, stored at m->ptr + m->length + overflow |= __builtin_add_overflow(nbytes, nel, &nbytes); + } + // overflow if signed size is too big or nel is too big (the latter matters iff elsz==0) + ssize_t tmp=1; + overflow |= __builtin_add_overflow(nel, 1, &tmp) || __builtin_add_overflow(nbytes, 1, &tmp); + if (overflow) + emit_error(ctx, prepare_call(jlargumenterror_func), "invalid GenericMemory size: the number of elements is either negative or too large for system address width"); + + auto T_size = ctx.types().T_size; + auto int8t = getInt8Ty(ctx.builder.getContext()); + auto cg_typ = literal_pointer_val(ctx, (jl_value_t*) typ); + auto cg_nbytes = ConstantInt::get(T_size, nbytes); + auto cg_nel = ConstantInt::get(T_size, nel); + + // else actually allocate mem + auto arg_typename = [&] JL_NOTSAFEPOINT { + std::string type_str; + auto eltype = jl_tparam1(typ); + if (jl_is_datatype(eltype)) + type_str = jl_symbol_name(((jl_datatype_t*)eltype)->name->name); + else if (jl_is_uniontype(eltype)) + type_str = "Union"; + else + type_str = ""; + return "Memory{" + type_str + "}[]"; + }; + size_t tot = nbytes + LLT_ALIGN(sizeof(jl_genericmemory_t),JL_SMALL_BYTE_ALIGNMENT); + + int pooled = tot <= GC_MAX_SZCLASS; + Value *alloc, *decay_alloc, *memory_ptr; + jl_aliasinfo_t aliasinfo; + if (pooled) { + alloc = emit_allocobj(ctx, tot, cg_typ, false, JL_SMALL_BYTE_ALIGNMENT); + decay_alloc = decay_derived(ctx, alloc); + memory_ptr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, decay_alloc, 1); + setName(ctx.emission_context, memory_ptr, "memory_ptr"); + auto objref = emit_pointer_from_objref(ctx, alloc); + Value *memory_data = emit_ptrgep(ctx, objref, JL_SMALL_BYTE_ALIGNMENT); + auto *store = ctx.builder.CreateAlignedStore(memory_data, memory_ptr, Align(sizeof(void*))); + aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryptr); + aliasinfo.decorateInst(store); + setName(ctx.emission_context, memory_data, "memory_data"); + } else { // just use the dynamic length version since the malloc will be slow anyway + auto ptls = get_current_ptls(ctx); + auto call = prepare_call(jl_alloc_genericmemory_unchecked_func); + alloc = ctx.builder.CreateCall(call, { ptls, cg_nbytes, cg_typ}); + decay_alloc = maybe_decay_tracked(ctx, alloc); + } + // set length (jl_alloc_genericmemory_unchecked_func doesn't have it) + Value *len_field = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, decay_alloc, 0); + auto *len_store = ctx.builder.CreateAlignedStore(cg_nel, len_field, Align(sizeof(void*))); + aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memorylen); + aliasinfo.decorateInst(len_store); + // The array length must be initialized so add a compiler fence + // If the memory is pooled sweeping does not need the length so we can avoid the barrier. + // This is dependent on the implementation of the GC + if (!pooled) + ctx.builder.CreateFence(AtomicOrdering::Release, SyncScope::SingleThread); + + // zeroinit pointers and unions + if (zi) { + memory_ptr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, decay_alloc, 1); + auto *load = ctx.builder.CreateAlignedLoad(ctx.types().T_ptr, memory_ptr, Align(sizeof(void*))); + aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryptr); + aliasinfo.decorateInst(load); + ctx.builder.CreateMemSet(load, ConstantInt::get(int8t, 0), cg_nbytes, Align(sizeof(void*))); + } + + setName(ctx.emission_context, alloc, arg_typename); + return mark_julia_type(ctx, alloc, true, typ); +} + +static jl_cgval_t emit_memorynew(jl_codectx_t &ctx, jl_datatype_t *typ, jl_cgval_t nel, jl_genericmemory_t *inst) +{ + emit_typecheck(ctx, nel, (jl_value_t*)jl_long_type, "memorynew"); + nel = update_julia_type(ctx, nel, (jl_value_t*)jl_long_type); + if (nel.typ == jl_bottom_type) + return jl_cgval_t(); + + const jl_datatype_layout_t *layout = ((jl_datatype_t*)typ)->layout; + assert(((jl_datatype_t*)typ)->has_concrete_subtype && layout != NULL); + size_t elsz = layout->size; + int isboxed = layout->flags.arrayelem_isboxed; + int isunion = layout->flags.arrayelem_isunion; + int zi = ((jl_datatype_t*)typ)->zeroinit; + if (isboxed) + elsz = sizeof(void*); + + auto ptls = get_current_ptls(ctx); + auto T_size = ctx.types().T_size; + auto int8t = getInt8Ty(ctx.builder.getContext()); + BasicBlock *emptymemBB, *nonemptymemBB, *retvalBB; + emptymemBB = BasicBlock::Create(ctx.builder.getContext(), "emptymem"); + nonemptymemBB = BasicBlock::Create(ctx.builder.getContext(), "nonemptymem"); + retvalBB = BasicBlock::Create(ctx.builder.getContext(), "retval"); + auto nel_unboxed = emit_unbox(ctx, ctx.types().T_size, nel, (jl_value_t*)jl_long_type); + Value *memorynew_empty = ctx.builder.CreateICmpEQ(nel_unboxed, ConstantInt::get(T_size, 0)); + setName(ctx.emission_context, memorynew_empty, "memorynew_empty"); + ctx.builder.CreateCondBr(memorynew_empty, emptymemBB, nonemptymemBB); + // if nel == 0 + emptymemBB->insertInto(ctx.f); + ctx.builder.SetInsertPoint(emptymemBB); + auto emptyalloc = track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)inst)); + ctx.builder.CreateBr(retvalBB); + nonemptymemBB->insertInto(ctx.f); + ctx.builder.SetInsertPoint(nonemptymemBB); + // else actually allocate mem + auto arg_typename = [&] JL_NOTSAFEPOINT { + std::string type_str; + auto eltype = jl_tparam1(typ); + if (jl_is_datatype(eltype)) + type_str = jl_symbol_name(((jl_datatype_t*)eltype)->name->name); + else if (jl_is_uniontype(eltype)) + type_str = "Union"; + else + type_str = ""; + return "Memory{" + type_str + "}[]"; + }; + auto cg_typ = literal_pointer_val(ctx, (jl_value_t*) typ); + auto cg_elsz = ConstantInt::get(T_size, elsz); + + FunctionCallee intr = Intrinsic::getDeclaration(jl_Module, Intrinsic::smul_with_overflow, ArrayRef(T_size)); + // compute nbytes with possible overflow + Value *prod_with_overflow = ctx.builder.CreateCall(intr, {nel_unboxed, cg_elsz}); + Value *nbytes = ctx.builder.CreateExtractValue(prod_with_overflow, 0); + Value *overflow = ctx.builder.CreateExtractValue(prod_with_overflow, 1); + if (isunion) { + // if isunion, we need to allocate the union selector bytes as well + intr = Intrinsic::getDeclaration(jl_Module, Intrinsic::sadd_with_overflow, ArrayRef(T_size)); + Value *add_with_overflow = ctx.builder.CreateCall(intr, {nel_unboxed, nbytes}); + nbytes = ctx.builder.CreateExtractValue(add_with_overflow, 0); + Value *overflow1 = ctx.builder.CreateExtractValue(add_with_overflow, 1); + overflow = ctx.builder.CreateOr(overflow, overflow1); + } + Value *negnel = ctx.builder.CreateICmpSLT(nel_unboxed, ConstantInt::get(T_size, 0)); + overflow = ctx.builder.CreateOr(overflow, negnel); + auto cg_typemax_int = ConstantInt::get(T_size, (((size_t)-1)>>1)-1); + Value *tobignel = ctx.builder.CreateICmpSLT(cg_typemax_int, elsz == 0 ? nel_unboxed: nbytes); + overflow = ctx.builder.CreateOr(overflow, tobignel); + Value *notoverflow = ctx.builder.CreateNot(overflow); + error_unless(ctx, prepare_call(jlargumenterror_func), notoverflow, "invalid GenericMemory size: the number of elements is either negative or too large for system address width"); + // actually allocate + auto call = prepare_call(jl_alloc_genericmemory_unchecked_func); + Value *alloc = ctx.builder.CreateCall(call, { ptls, nbytes, cg_typ}); + // set length (jl_alloc_genericmemory_unchecked_func doesn't have it) + Value *decay_alloc = decay_derived(ctx, alloc); + Value *len_field = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, decay_alloc, 0); + auto len_store = ctx.builder.CreateAlignedStore(nel_unboxed, len_field, Align(sizeof(void*))); + auto aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memorylen); + aliasinfo.decorateInst(len_store); + //This avoids the length store from being deleted which is illegal + ctx.builder.CreateFence(AtomicOrdering::Release, SyncScope::SingleThread); + // zeroinit pointers and unions + if (zi) { + Value *memory_ptr = ctx.builder.CreateStructGEP(ctx.types().T_jlgenericmemory, decay_alloc, 1); + auto *load = ctx.builder.CreateAlignedLoad(ctx.types().T_ptr, memory_ptr, Align(sizeof(void*))); + aliasinfo = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_memoryptr); + aliasinfo.decorateInst(load); + ctx.builder.CreateMemSet(load, ConstantInt::get(int8t, 0), nbytes, Align(sizeof(void*))); + } + + setName(ctx.emission_context, alloc, arg_typename); + ctx.builder.CreateBr(retvalBB); + nonemptymemBB = ctx.builder.GetInsertBlock(); + // phi node to choose which side of branch + retvalBB->insertInto(ctx.f); + ctx.builder.SetInsertPoint(retvalBB); + auto phi = ctx.builder.CreatePHI(ctx.types().T_prjlvalue, 2); + phi->addIncoming(emptyalloc, emptymemBB); + phi->addIncoming(alloc, nonemptymemBB); + return mark_julia_type(ctx, phi, true, typ); +} + static jl_cgval_t _emit_memoryref(jl_codectx_t &ctx, Value *mem, Value *data, const jl_datatype_layout_t *layout, jl_value_t *typ) { //jl_cgval_t argv[] = { diff --git a/src/codegen.cpp b/src/codegen.cpp index 21591acedc632..2894afbedbffd 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -408,8 +408,8 @@ struct jl_tbaacache_t { tbaa_arrayptr = tbaa_make_child(mbuilder, "jtbaa_arrayptr", tbaa_array_scalar).first; tbaa_arraysize = tbaa_make_child(mbuilder, "jtbaa_arraysize", tbaa_array_scalar).first; tbaa_arrayselbyte = tbaa_make_child(mbuilder, "jtbaa_arrayselbyte", tbaa_array_scalar).first; - tbaa_memoryptr = tbaa_make_child(mbuilder, "jtbaa_memoryptr", tbaa_array_scalar, true).first; - tbaa_memorylen = tbaa_make_child(mbuilder, "jtbaa_memorylen", tbaa_array_scalar, true).first; + tbaa_memoryptr = tbaa_make_child(mbuilder, "jtbaa_memoryptr", tbaa_array_scalar).first; + tbaa_memorylen = tbaa_make_child(mbuilder, "jtbaa_memorylen", tbaa_array_scalar).first; tbaa_memoryown = tbaa_make_child(mbuilder, "jtbaa_memoryown", tbaa_array_scalar, true).first; tbaa_const = tbaa_make_child(mbuilder, "jtbaa_const", nullptr, true).first; } @@ -801,6 +801,12 @@ static const auto jlerror_func = new JuliaFunction<>{ {getPointerTy(C)}, false); }, get_attrs_noreturn, }; +static const auto jlargumenterror_func = new JuliaFunction<>{ + XSTR(jl_argument_error), + [](LLVMContext &C) { return FunctionType::get(getVoidTy(C), + {getPointerTy(C)}, false); }, + get_attrs_noreturn, +}; static const auto jlatomicerror_func = new JuliaFunction<>{ XSTR(jl_atomic_error), [](LLVMContext &C) { return FunctionType::get(getVoidTy(C), @@ -1153,6 +1159,30 @@ static const auto jl_alloc_obj_func = new JuliaFunction{ None); }, }; +static const auto jl_alloc_genericmemory_unchecked_func = new JuliaFunction{ + XSTR(jl_alloc_genericmemory_unchecked), + [](LLVMContext &C, Type *T_size) { + auto T_jlvalue = JuliaType::get_jlvalue_ty(C); + auto T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked); + auto T_pjlvalue = PointerType::get(T_jlvalue, 0); + return FunctionType::get(T_prjlvalue, + {T_pjlvalue, T_size, T_pjlvalue}, false); + }, + [](LLVMContext &C) { + auto FnAttrs = AttrBuilder(C); + FnAttrs.addMemoryAttr(MemoryEffects::argMemOnly(ModRefInfo::Ref) | MemoryEffects::inaccessibleMemOnly(ModRefInfo::ModRef)); + FnAttrs.addAttribute(Attribute::WillReturn); + FnAttrs.addAttribute(Attribute::NoUnwind); + auto RetAttrs = AttrBuilder(C); + RetAttrs.addAttribute(Attribute::NoAlias); + RetAttrs.addAttribute(Attribute::NonNull); + return AttributeList::get(C, + AttributeSet::get(C, FnAttrs), + AttributeSet::get(C, RetAttrs), + None); + }, +}; + static const auto jl_newbits_func = new JuliaFunction<>{ XSTR(jl_new_bits), [](LLVMContext &C) { @@ -1461,6 +1491,10 @@ static const auto pointer_from_objref_func = new JuliaFunction<>{ AttrBuilder FnAttrs(C); FnAttrs.addMemoryAttr(MemoryEffects::none()); FnAttrs.addAttribute(Attribute::NoUnwind); + FnAttrs.addAttribute(Attribute::Speculatable); + FnAttrs.addAttribute(Attribute::WillReturn); + FnAttrs.addAttribute(Attribute::NoRecurse); + FnAttrs.addAttribute(Attribute::NoSync); return AttributeList::get(C, AttributeSet::get(C, FnAttrs), Attributes(C, {Attribute::NonNull}), @@ -1574,6 +1608,7 @@ static const auto &builtin_func_map() { { jl_f_nfields_addr, new JuliaFunction<>{XSTR(jl_f_nfields), get_func_sig, get_func_attrs} }, { jl_f__expr_addr, new JuliaFunction<>{XSTR(jl_f__expr), get_func_sig, get_func_attrs} }, { jl_f__typevar_addr, new JuliaFunction<>{XSTR(jl_f__typevar), get_func_sig, get_func_attrs} }, + { jl_f_memorynew_addr, new JuliaFunction<>{XSTR(jl_f_memorynew), get_func_sig, get_func_attrs} }, { jl_f_memoryref_addr, new JuliaFunction<>{XSTR(jl_f_memoryref), get_func_sig, get_func_attrs} }, { jl_f_memoryrefoffset_addr, new JuliaFunction<>{XSTR(jl_f_memoryrefoffset), get_func_sig, get_func_attrs} }, { jl_f_memoryrefset_addr, new JuliaFunction<>{XSTR(jl_f_memoryrefset), get_func_sig, get_func_attrs} }, @@ -4421,6 +4456,30 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f, return true; } + else if (f == jl_builtin_memorynew && (nargs == 2)) { + const jl_cgval_t &memty = argv[1]; + if (!memty.constant) + return false; + jl_datatype_t *typ = (jl_datatype_t*) memty.constant; + if (!jl_is_concrete_type((jl_value_t*)typ) || !jl_is_genericmemory_type(typ)) + return false; + jl_genericmemory_t *inst = (jl_genericmemory_t*)((jl_datatype_t*)typ)->instance; + if (inst == NULL) + return false; + if (argv[2].constant) { + if (!jl_is_long(argv[2].constant)) + return false; + size_t nel = jl_unbox_long(argv[2].constant); + if (nel < 0) + return false; + *ret = emit_const_len_memorynew(ctx, typ, nel, inst); + } + else { + *ret = emit_memorynew(ctx, typ, argv[2], inst); + } + return true; + } + else if (f == jl_builtin_memoryref && nargs == 1) { const jl_cgval_t &mem = argv[1]; jl_datatype_t *mty_dt = (jl_datatype_t*)jl_unwrap_unionall(mem.typ); @@ -10220,6 +10279,7 @@ static void init_jit_functions(void) add_named_global(jltypeassert_func, &jl_typeassert); add_named_global(jlapplytype_func, &jl_instantiate_type_in_env); add_named_global(jl_object_id__func, &jl_object_id_); + add_named_global(jl_alloc_genericmemory_unchecked_func, &jl_alloc_genericmemory_unchecked); add_named_global(jl_alloc_obj_func, (void*)NULL); add_named_global(jl_newbits_func, (void*)jl_new_bits); add_named_global(jl_typeof_func, (void*)NULL); diff --git a/src/common_symbols2.inc b/src/common_symbols2.inc index 2a6990bac52ff..f905f88094b4e 100644 --- a/src/common_symbols2.inc +++ b/src/common_symbols2.inc @@ -246,3 +246,5 @@ jl_symbol("invokelatest"), jl_symbol("jl_array_del_end"), jl_symbol("_mod64"), jl_symbol("parameters"), +jl_symbol("monotonic"), +jl_symbol("regex.jl"), diff --git a/src/genericmemory.c b/src/genericmemory.c index 2b02f021ccdd0..e435ec3b63c9f 100644 --- a/src/genericmemory.c +++ b/src/genericmemory.c @@ -24,49 +24,51 @@ JL_DLLEXPORT char *jl_genericmemory_typetagdata(jl_genericmemory_t *m) JL_NOTSAF return (char*)m->ptr + m->length * layout->size; } -#if defined(_P64) && defined(UINT128MAX) -typedef __uint128_t wideint_t; -#else -typedef uint64_t wideint_t; -#endif - #define MAXINTVAL (((size_t)-1)>>1) -jl_genericmemory_t *_new_genericmemory_(jl_value_t *mtype, size_t nel, int8_t isunion, int8_t zeroinit, size_t elsz) +// ONLY USE FROM CODEGEN. It only partially initializes the mem +JL_DLLEXPORT jl_genericmemory_t *jl_alloc_genericmemory_unchecked(jl_ptls_t ptls, size_t nbytes, jl_datatype_t *mtype) { - jl_task_t *ct = jl_current_task; - char *data; - jl_genericmemory_t *m; - if (nel == 0) // zero-sized allocation optimization - return (jl_genericmemory_t*)((jl_datatype_t*)mtype)->instance; - wideint_t prod = (wideint_t)nel * elsz; - if (isunion) { - // an extra byte for each isbits union memory element, stored at m->ptr + m->length - prod += nel; - } - if (nel >= MAXINTVAL || prod >= (wideint_t) MAXINTVAL) - jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory size: the number of elements is either negative or too large for system address width"); - size_t tot = (size_t)prod + LLT_ALIGN(sizeof(jl_genericmemory_t),JL_SMALL_BYTE_ALIGNMENT); + size_t tot = nbytes + LLT_ALIGN(sizeof(jl_genericmemory_t),JL_SMALL_BYTE_ALIGNMENT); int pooled = tot <= GC_MAX_SZCLASS; + char *data; + jl_genericmemory_t *m; if (!pooled) { - data = (char*)jl_gc_managed_malloc(prod); + data = (char*)jl_gc_managed_malloc(nbytes); tot = sizeof(jl_genericmemory_t) + sizeof(void*); } - m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tot, mtype); + m = (jl_genericmemory_t*)jl_gc_alloc(ptls, tot, mtype); if (pooled) { data = (char*)m + JL_SMALL_BYTE_ALIGNMENT; } else { int isaligned = 1; // jl_gc_managed_malloc is always aligned - jl_gc_track_malloced_genericmemory(ct->ptls, m, isaligned); + jl_gc_track_malloced_genericmemory(ptls, m, isaligned); jl_genericmemory_data_owner_field(m) = (jl_value_t*)m; } - m->length = nel; + // length set by codegen m->ptr = data; + return m; +} +jl_genericmemory_t *_new_genericmemory_(jl_value_t *mtype, size_t nel, int8_t isunion, int8_t zeroinit, size_t elsz) +{ + if (nel == 0) // zero-sized allocation optimization + return (jl_genericmemory_t*)((jl_datatype_t*)mtype)->instance; + size_t nbytes; + int overflow = __builtin_mul_overflow(nel, elsz, &nbytes); + if (isunion) { + // an extra byte for each isbits union memory element, stored at m->ptr + m->length + overflow |= __builtin_add_overflow(nel, nbytes, &nbytes); + } + if ((nel >= MAXINTVAL-1) || (nbytes >= MAXINTVAL-1) || overflow) + jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory size: the number of elements is either negative or too large for system address width"); + jl_task_t *ct = jl_current_task; + jl_genericmemory_t *m = jl_alloc_genericmemory_unchecked((jl_ptls_t) ct->ptls, nbytes, (jl_datatype_t*)mtype); + m->length = nel; if (zeroinit) - memset(data, 0, (size_t)prod); + memset((char*)m->ptr, 0, nbytes); return m; } @@ -150,13 +152,14 @@ JL_DLLEXPORT jl_genericmemory_t *jl_ptr_to_genericmemory(jl_value_t *mtype, void if (((uintptr_t)data) & ((align > JL_HEAP_ALIGNMENT ? JL_HEAP_ALIGNMENT : align) - 1)) jl_exceptionf(jl_argumenterror_type, "unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align); - wideint_t prod = (wideint_t)nel * elsz; + size_t nbytes; + int overflow = __builtin_mul_overflow(nel, elsz, &nbytes); if (isunion) { // an extra byte for each isbits union memory element, stored at m->ptr + m->length - prod += nel; + overflow |= __builtin_add_overflow(nel, nbytes, &nbytes); } - if (nel >= MAXINTVAL || prod >= (wideint_t) MAXINTVAL) - jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory size: too large for system address width"); + if ((nel >= MAXINTVAL) || (nbytes >= MAXINTVAL) || overflow) + jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory size: the number of elements is either negative or too large for system address width"); int tsz = sizeof(jl_genericmemory_t) + sizeof(void*); m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tsz, mtype); m->ptr = data; diff --git a/src/julia.h b/src/julia.h index a9864aad16ccc..049416a33336f 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1949,6 +1949,7 @@ JL_DLLEXPORT jl_genericmemory_t *jl_ptr_to_genericmemory(jl_value_t *mtype, void size_t nel, int own_buffer); JL_DLLEXPORT jl_genericmemory_t *jl_alloc_genericmemory(jl_value_t *mtype, size_t nel); JL_DLLEXPORT jl_genericmemory_t *jl_pchar_to_memory(const char *str, size_t len); +JL_DLLEXPORT jl_genericmemory_t *jl_alloc_genericmemory_unchecked(jl_ptls_t ptls, size_t nbytes, jl_datatype_t *mtype); JL_DLLEXPORT jl_value_t *jl_genericmemory_to_string(jl_genericmemory_t *m, size_t len); JL_DLLEXPORT jl_genericmemory_t *jl_alloc_memory_any(size_t n); JL_DLLEXPORT jl_value_t *jl_genericmemoryref(jl_genericmemory_t *m, size_t i); // 0-indexed @@ -2068,6 +2069,7 @@ JL_DLLEXPORT void JL_NORETURN jl_type_error_rt(const char *fname, jl_value_t *got JL_MAYBE_UNROOTED); JL_DLLEXPORT void JL_NORETURN jl_undefined_var_error(jl_sym_t *var, jl_value_t *scope JL_MAYBE_UNROOTED); JL_DLLEXPORT void JL_NORETURN jl_has_no_field_error(jl_datatype_t *t, jl_sym_t *var); +JL_DLLEXPORT void JL_NORETURN jl_argument_error(char *str); JL_DLLEXPORT void JL_NORETURN jl_atomic_error(char *str); JL_DLLEXPORT void JL_NORETURN jl_bounds_error(jl_value_t *v JL_MAYBE_UNROOTED, jl_value_t *t JL_MAYBE_UNROOTED); diff --git a/src/llvm-alloc-helpers.cpp b/src/llvm-alloc-helpers.cpp index 59fce1235e14e..194c6837860ca 100644 --- a/src/llvm-alloc-helpers.cpp +++ b/src/llvm-alloc-helpers.cpp @@ -250,8 +250,8 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r return true; } if (required.pass.gc_loaded_func == callee) { - required.use_info.haspreserve = true; - required.use_info.hasload = true; + // TODO add manual load->store forwarding + push_inst(inst); return true; } if (required.pass.typeof_func == callee) { diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index a9e1b1e02da42..3f73a44088ace 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -752,11 +752,10 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref, AllocF call->eraseFromParent(); return; } - //if (pass.gc_loaded_func == callee) { - // call->replaceAllUsesWith(new_i); - // call->eraseFromParent(); - // return; - //} + if (pass.gc_loaded_func == callee) { + user->replaceUsesOfWith(orig_i, Constant::getNullValue(orig_i->getType())); + return; + } if (pass.typeof_func == callee) { ++RemovedTypeofs; call->replaceAllUsesWith(tag); diff --git a/src/pipeline.cpp b/src/pipeline.cpp index 8c9054c0d65ff..39f896ba656d2 100644 --- a/src/pipeline.cpp +++ b/src/pipeline.cpp @@ -527,6 +527,7 @@ static void buildIntrinsicLoweringPipeline(ModulePassManager &MPM, PassBuilder * JULIA_PASS(FPM.addPass(LateLowerGCPass())); JULIA_PASS(FPM.addPass(FinalLowerGCPass())); if (O.getSpeedupLevel() >= 2) { + FPM.addPass(DSEPass()); FPM.addPass(GVNPass()); FPM.addPass(SCCPPass()); FPM.addPass(DCEPass()); diff --git a/src/rtutils.c b/src/rtutils.c index fcc4a489d3f38..00a5b639d8683 100644 --- a/src/rtutils.c +++ b/src/rtutils.c @@ -157,6 +157,13 @@ JL_DLLEXPORT void JL_NORETURN jl_has_no_field_error(jl_datatype_t *t, jl_sym_t * jl_throw(jl_new_struct(jl_fielderror_type, t, var)); } +JL_DLLEXPORT void JL_NORETURN jl_argument_error(char *str) // == jl_exceptionf(jl_argumenterror_type, "%s", str) +{ + jl_value_t *msg = jl_pchar_to_string((char*)str, strlen(str)); + JL_GC_PUSH1(&msg); + jl_throw(jl_new_struct(jl_argumenterror_type, msg)); +} + JL_DLLEXPORT void JL_NORETURN jl_atomic_error(char *str) // == jl_exceptionf(jl_atomicerror_type, "%s", str) { jl_value_t *msg = jl_pchar_to_string((char*)str, strlen(str)); diff --git a/src/staticdata.c b/src/staticdata.c index 1665b5be4ed0f..a0d56f3e38f54 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -101,7 +101,7 @@ extern "C" { // TODO: put WeakRefs on the weak_refs list during deserialization // TODO: handle finalizers -#define NUM_TAGS 193 +#define NUM_TAGS 194 // An array of references that need to be restored from the sysimg // This is a manually constructed dual of the gvars array, which would be produced by codegen for Julia code, for C. @@ -290,6 +290,7 @@ jl_value_t **const*const get_tags(void) { INSERT_TAG(jl_builtin_replacefield); INSERT_TAG(jl_builtin_setfieldonce); INSERT_TAG(jl_builtin_fieldtype); + INSERT_TAG(jl_builtin_memorynew); INSERT_TAG(jl_builtin_memoryref); INSERT_TAG(jl_builtin_memoryrefoffset); INSERT_TAG(jl_builtin_memoryrefget); @@ -508,7 +509,7 @@ static const jl_fptr_args_t id_to_fptrs[] = { &jl_f__call_latest, &jl_f__call_in_world, &jl_f__call_in_world_total, &jl_f_isdefined, &jl_f_tuple, &jl_f_svec, &jl_f_intrinsic_call, &jl_f_getfield, &jl_f_setfield, &jl_f_swapfield, &jl_f_modifyfield, &jl_f_setfieldonce, - &jl_f_replacefield, &jl_f_fieldtype, &jl_f_nfields, &jl_f_apply_type, + &jl_f_replacefield, &jl_f_fieldtype, &jl_f_nfields, &jl_f_apply_type, &jl_f_memorynew, &jl_f_memoryref, &jl_f_memoryrefoffset, &jl_f_memoryrefget, &jl_f_memoryref_isassigned, &jl_f_memoryrefset, &jl_f_memoryrefswap, &jl_f_memoryrefmodify, &jl_f_memoryrefreplace, &jl_f_memoryrefsetonce, &jl_f_applicable, &jl_f_invoke, &jl_f_sizeof, &jl_f__expr, &jl_f__typevar, diff --git a/test/arrayops.jl b/test/arrayops.jl index ca378c3f3036b..f793987ed1ff5 100644 --- a/test/arrayops.jl +++ b/test/arrayops.jl @@ -3295,3 +3295,44 @@ end ref = memoryref(mem, 2) @test parent(ref) === mem end + +# some tests marked broken because CI runs with check-bounds=yes which impedes escape analysis +@testset "Array/Memory escape analysis" begin + function no_allocate(T::Type{<:Union{Memory, Vector}}) + v = T(undef, 2) + v[1] = 2 + v[2] = 3 + return v[1] + v[2] + end + function test_alloc(T; broken=false) + @test (@allocated no_allocate(T)) == 0 broken=broken + end + @testset "$T" for T in [Memory, Vector] + @testset "$ET" for ET in [Int, Union{Int, Float64}] + no_allocate(T{ET}) #compile + test_alloc(T{ET}, broken=(ET==Union{Int, Float64})) + end + end + function f() # this was causing a bug on an in progress version of #55913. + m = Memory{Float64}(undef, 4) + m .= 1.0 + s = 0.0 + for x ∈ m + s += x + end + s + end + @test f() === 4.0 + function confuse_alias_analysis() + mem0 = Memory{Int}(undef, 1) + mem1 = Memory{Int}(undef, 1) + @inbounds mem0[1] = 3 + for width in 1:2 + @inbounds mem1[1] = mem0[1] + mem0 = mem1 + end + mem0[1] + end + @test confuse_alias_analysis() == 3 + @test_broken (@allocated confuse_alias_analysis()) == 0 +end diff --git a/test/core.jl b/test/core.jl index 7e4f655222ea5..4bbb2ca368019 100644 --- a/test/core.jl +++ b/test/core.jl @@ -4505,6 +4505,15 @@ for T in (Any, ValueWrapper) end end +#test grow_end ccall directly since it's used in the C source +for ET in [Nothing, Int, Union{Int, Nothing}, Any] + for n in [0, 1, 10] + arr = Vector{ET}(undef, n) + ccall(:jl_array_grow_end, Cvoid, (Any, UInt), arr, 1) + @test length(arr) == n+1 + end +end + # check if we can run multiple finalizers at the same time # Use a `@noinline` function to make sure the inefficient gc root generation # doesn't keep the object alive.