Skip to content

Commit

Permalink
make memorynew intrinsic
Browse files Browse the repository at this point in the history
Co-authored-by: Jameson Nash <[email protected]>
Co-authored-by: Jeff Bezanson <[email protected]>
Co-authored-by: Gabriel Baraldi  <[email protected]>
  • Loading branch information
gbaraldi authored and oscardssmith committed Dec 11, 2024
1 parent d269d7d commit 69d40b6
Show file tree
Hide file tree
Showing 15 changed files with 112 additions and 95 deletions.
21 changes: 19 additions & 2 deletions Compiler/src/tfuncs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2017,6 +2017,12 @@ function tuple_tfunc(𝕃::AbstractLattice, argtypes::Vector{Any})
return anyinfo ? PartialStruct(𝕃, typ, argtypes) : typ
end

@nospecs function memorynew_tfunc(𝕃::AbstractLattice, memtype, m)
hasintersect(widenconst(m), Int) || return Bottom
return tmeet(𝕃, instanceof_tfunc(memtype, true)[1], GenericMemory)
end
add_tfunc(Core.memorynew, 2, 2, memorynew_tfunc, 10)

@nospecs function memoryrefget_tfunc(𝕃::AbstractLattice, mem, order, boundscheck)
memoryref_builtin_common_errorcheck(mem, order, boundscheck) || return Bottom
return memoryref_elemtype(mem)
Expand Down Expand Up @@ -2244,7 +2250,16 @@ function _builtin_nothrow(𝕃::AbstractLattice, @nospecialize(f::Builtin), argt
@nospecialize(rt))
= partialorder(𝕃)
na = length(argtypes)
if f === memoryrefnew
if f === Core.memorynew
argtypes[1] isa Const && argtypes[2] isa Const || return false
MemT = argtypes[1].val
isconcretetype(MemT) && MemT <: GenericMemory || return false
len = argtypes[2].val
len isa Int && 0 <= len < typemax(Int) || return false
elsz = datatype_layoutsize(MemT)
checked_smul_int(len, elsz)[2] && return false
return true
elseif f === memoryrefnew
return memoryref_builtin_common_nothrow(argtypes)
elseif f === memoryrefoffset
length(argtypes) == 1 || return false
Expand Down Expand Up @@ -2347,6 +2362,7 @@ const _EFFECT_FREE_BUILTINS = [
isa,
UnionAll,
getfield,
Core.memorynew,
memoryrefnew,
memoryrefoffset,
memoryrefget,
Expand Down Expand Up @@ -2381,6 +2397,7 @@ const _INACCESSIBLEMEM_BUILTINS = Any[
compilerbarrier,
Core._typevar,
donotdelete,
Core.memorynew,
]

const _ARGMEM_BUILTINS = Any[
Expand Down Expand Up @@ -2543,7 +2560,7 @@ function builtin_effects(𝕃::AbstractLattice, @nospecialize(f::Builtin), argty
consistent = ALWAYS_TRUE
elseif f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned
consistent = CONSISTENT_IF_INACCESSIBLEMEMONLY
elseif f === Core._typevar
elseif f === Core._typevar || f === Core.memorynew
consistent = CONSISTENT_IF_NOTRETURNED
else
consistent = ALWAYS_FALSE
Expand Down
7 changes: 4 additions & 3 deletions Compiler/test/irpasses.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1083,12 +1083,13 @@ end
# test `flags_for_effects` and DCE
# ================================

let # effect-freeness computation for array allocation
@testset "effect-freeness computation for array allocation" begin

# should eliminate dead allocations
good_dims = [1, 2, 3, 4, 10]
Ns = [1, 2, 3, 4, 10]
for dim = good_dims, N = Ns
Ts = Any[Int, Union{Missing,Nothing}, Nothing, Any]
@testset "$dim, $N" for dim in good_dims, N in Ns
Int64(dim)^N > typemax(Int) && continue
dims = ntuple(i->dim, N)
@test @eval fully_eliminated() do
Expand All @@ -1099,7 +1100,7 @@ let # effect-freeness computation for array allocation

# shouldn't eliminate erroneous dead allocations
bad_dims = [-1, typemax(Int)]
for dim in bad_dims, N in [1, 2, 3, 4, 10], T in Any[Int, Union{Missing,Nothing}, Nothing, Any]
@testset "$dim, $N, $T" for dim in bad_dims, N in Ns, T in Ts
dims = ntuple(i->dim, N)
@test @eval !fully_eliminated() do
Array{$T,$N}(undef, $(dims...))
Expand Down
7 changes: 1 addition & 6 deletions base/boot.jl
Original file line number Diff line number Diff line change
Expand Up @@ -575,12 +575,7 @@ struct UndefInitializer end
const undef = UndefInitializer()

# type and dimensionality specified
(self::Type{GenericMemory{kind,T,addrspace}})(::UndefInitializer, m::Int) where {T,addrspace,kind} =
if isdefined(self, :instance) && m === 0
self.instance
else
ccall(:jl_alloc_genericmemory, Ref{GenericMemory{kind,T,addrspace}}, (Any, Int), self, m)
end
(self::Type{GenericMemory{kind,T,addrspace}})(::UndefInitializer, m::Int) where {T,addrspace,kind} = Core.memorynew(self, m)
(self::Type{GenericMemory{kind,T,addrspace}})(::UndefInitializer, d::NTuple{1,Int}) where {T,kind,addrspace} = self(undef, getfield(d,1))
# empty vector constructor
(self::Type{GenericMemory{kind,T,addrspace}})() where {T,kind,addrspace} = self(undef, 0)
Expand Down
13 changes: 11 additions & 2 deletions base/essentials.jl
Original file line number Diff line number Diff line change
Expand Up @@ -382,8 +382,17 @@ default_access_order(a::GenericMemory{:atomic}) = :monotonic
default_access_order(a::GenericMemoryRef{:not_atomic}) = :not_atomic
default_access_order(a::GenericMemoryRef{:atomic}) = :monotonic

getindex(A::GenericMemory, i::Int) = (@_noub_if_noinbounds_meta;
memoryrefget(memoryrefnew(memoryrefnew(A), i, @_boundscheck), default_access_order(A), false))
# bootstrap version for Memory{Any}
#getindex(A::Memory{Any}, i::Int) = (@_noub_if_noinbounds_meta;
# memoryrefget(memoryrefnew(memoryrefnew(A), i, @_boundscheck), default_access_order(A), false))

function getindex(A::GenericMemory, i::Int)
@_noub_if_noinbounds_meta
if @_boundscheck
ult_int(bitcast(UInt, sub_int(i, 1)), bitcast(UInt, A.length)) || throw_boundserror(A, (i,))
end
memoryrefget(memoryrefnew(memoryrefnew(A), i, false), default_access_order(A), false)
end
getindex(A::GenericMemoryRef) = memoryrefget(A, default_access_order(A), @_boundscheck)

"""
Expand Down
7 changes: 4 additions & 3 deletions base/genericmemory.jl
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,10 @@ getindex(A::Memory, c::Colon) = copy(A)

## Indexing: setindex! ##

function _setindex!(A::Memory{T}, x::T, i1::Int) where {T}
ref = memoryrefnew(memoryref(A), i1, @_boundscheck)
memoryrefset!(ref, x, :not_atomic, @_boundscheck)
function _setindex!(A::Memory{T}, x::T, i::Int) where {T}
@boundscheck Core.Intrinsics.ult_int(i, A.length)
ref = memoryrefnew(memoryref(A), i, false)
memoryrefset!(ref, x, :not_atomic, false)
return A
end

Expand Down
22 changes: 11 additions & 11 deletions doc/src/manual/performance-tips.md
Original file line number Diff line number Diff line change
Expand Up @@ -1058,12 +1058,12 @@ the output. As a trivial example, compare

```jldoctest prealloc
julia> function xinc(x)
return [x, x+1, x+2]
return [x + i for i in 1:3000]
end;
julia> function loopinc()
y = 0
for i = 1:10^7
for i = 1:10^5
ret = xinc(i)
y += ret[2]
end
Expand All @@ -1075,16 +1075,16 @@ with

```jldoctest prealloc
julia> function xinc!(ret::AbstractVector{T}, x::T) where T
ret[1] = x
ret[2] = x+1
ret[3] = x+2
for i in 1:3000
ret[i] = x+i
end
nothing
end;
julia> function loopinc_prealloc()
ret = Vector{Int}(undef, 3)
ret = Vector{Int}(undef, 3000)
y = 0
for i = 1:10^7
for i = 1:10^5
xinc!(ret, i)
y += ret[2]
end
Expand All @@ -1096,12 +1096,12 @@ Timing results:

```jldoctest prealloc; filter = r"[0-9\.]+ seconds \(.*?\)"
julia> @time loopinc()
0.529894 seconds (40.00 M allocations: 1.490 GiB, 12.14% gc time)
50000015000000
0.297454 seconds (200.00 k allocations: 2.239 GiB, 39.80% gc time)
5000250000
julia> @time loopinc_prealloc()
0.030850 seconds (6 allocations: 288 bytes)
50000015000000
0.009410 seconds (2 allocations: 23.477 KiB)
5000250000
```

Preallocation has other advantages, for example by allowing the caller to control the "output"
Expand Down
13 changes: 3 additions & 10 deletions src/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,6 @@
extern "C" {
#endif

#if defined(_P64) && defined(UINT128MAX)
typedef __uint128_t wideint_t;
#else
typedef uint64_t wideint_t;
#endif

#define MAXINTVAL (((size_t)-1)>>1)

JL_DLLEXPORT int jl_array_validate_dims(size_t *nel, uint32_t ndims, size_t *dims)
Expand All @@ -30,10 +24,9 @@ JL_DLLEXPORT int jl_array_validate_dims(size_t *nel, uint32_t ndims, size_t *dim
size_t _nel = 1;
for (i = 0; i < ndims; i++) {
size_t di = dims[i];
wideint_t prod = (wideint_t)_nel * (wideint_t)di;
if (prod >= (wideint_t) MAXINTVAL || di >= MAXINTVAL)
int overflow = __builtin_mul_overflow(_nel, di, &_nel);
if (overflow || di >= MAXINTVAL)
return 1;
_nel = prod;
}
*nel = _nel;
return 0;
Expand Down Expand Up @@ -204,7 +197,7 @@ JL_DLLEXPORT void jl_array_grow_end(jl_array_t *a, size_t inc)
int isbitsunion = jl_genericmemory_isbitsunion(a->ref.mem);
size_t newnrows = n + inc;
if (!isbitsunion && elsz == 0) {
jl_genericmemory_t *newmem = jl_alloc_genericmemory(mtype, MAXINTVAL - 1);
jl_genericmemory_t *newmem = jl_alloc_genericmemory(mtype, MAXINTVAL - 2);
a->ref.mem = newmem;
jl_gc_wb(a, newmem);
a->dimsize[0] = newnrows;
Expand Down
1 change: 1 addition & 0 deletions src/builtin_proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ DECLARE_BUILTIN(is);
DECLARE_BUILTIN(isa);
DECLARE_BUILTIN(isdefined);
DECLARE_BUILTIN(issubtype);
DECLARE_BUILTIN(memorynew);
DECLARE_BUILTIN(memoryref);
DECLARE_BUILTIN(memoryref_isassigned);
DECLARE_BUILTIN(memoryrefget);
Expand Down
10 changes: 10 additions & 0 deletions src/builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -1675,6 +1675,15 @@ JL_CALLABLE(jl_f__typevar)
}

// genericmemory ---------------------------------------------------------------------
JL_CALLABLE(jl_f_memorynew)
{
JL_NARGS(memorynew, 2, 2);
jl_datatype_t *jl_genericmemory_type_type = jl_datatype_type;
JL_TYPECHK(memorynew, genericmemory_type, args[0]);
JL_TYPECHK(memorynew, long, args[1]);
size_t nel = jl_unbox_long(args[1]);
return (jl_value_t*)jl_alloc_genericmemory(args[0], nel);
}

JL_CALLABLE(jl_f_memoryref)
{
Expand Down Expand Up @@ -2441,6 +2450,7 @@ void jl_init_primitives(void) JL_GC_DISABLED
jl_builtin_setglobalonce = add_builtin_func("setglobalonce!", jl_f_setglobalonce);

// memory primitives
jl_builtin_memorynew = add_builtin_func("memorynew", jl_f_memorynew);
jl_builtin_memoryref = add_builtin_func("memoryrefnew", jl_f_memoryref);
jl_builtin_memoryrefoffset = add_builtin_func("memoryrefoffset", jl_f_memoryrefoffset);
jl_builtin_memoryrefget = add_builtin_func("memoryrefget", jl_f_memoryrefget);
Expand Down
27 changes: 0 additions & 27 deletions src/ccall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1877,33 +1877,6 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
JL_GC_POP();
return mark_julia_type(ctx, obj, true, jl_any_type);
}
else if (is_libjulia_func(jl_alloc_genericmemory)) {
++CCALL_STAT(jl_alloc_genericmemory);
assert(lrt == ctx.types().T_prjlvalue);
assert(!isVa && !llvmcall && nccallargs == 2);
const jl_cgval_t &typ = argv[0];
const jl_cgval_t &nel = argv[1];
auto arg_typename = [&] JL_NOTSAFEPOINT {
auto istyp = argv[0].constant;
std::string type_str;
if (istyp && jl_is_datatype(istyp) && jl_is_genericmemory_type(istyp)){
auto eltype = jl_tparam1(istyp);
if (jl_is_datatype(eltype))
type_str = jl_symbol_name(((jl_datatype_t*)eltype)->name->name);
else if (jl_is_uniontype(eltype))
type_str = "Union";
else
type_str = "<unknown type>";
}
else
type_str = "<unknown type>";
return "Memory{" + type_str + "}[]";
};
auto alloc = ctx.builder.CreateCall(prepare_call(jl_allocgenericmemory), { boxed(ctx,typ), emit_unbox(ctx, ctx.types().T_size, nel, (jl_value_t*)jl_ulong_type)});
setName(ctx.emission_context, alloc, arg_typename);
JL_GC_POP();
return mark_julia_type(ctx, alloc, true, jl_any_type);
}
else if (is_libjulia_func(memcpy) && (rt == (jl_value_t*)jl_nothing_type || jl_is_cpointer_type(rt))) {
++CCALL_STAT(memcpy);
const jl_cgval_t &dst = argv[0];
Expand Down
2 changes: 2 additions & 0 deletions src/common_symbols2.inc
Original file line number Diff line number Diff line change
Expand Up @@ -246,3 +246,5 @@ jl_symbol("invokelatest"),
jl_symbol("jl_array_del_end"),
jl_symbol("_mod64"),
jl_symbol("parameters"),
jl_symbol("monotonic"),
jl_symbol("regex.jl"),
61 changes: 32 additions & 29 deletions src/genericmemory.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,49 +24,51 @@ JL_DLLEXPORT char *jl_genericmemory_typetagdata(jl_genericmemory_t *m) JL_NOTSAF
return (char*)m->ptr + m->length * layout->size;
}

#if defined(_P64) && defined(UINT128MAX)
typedef __uint128_t wideint_t;
#else
typedef uint64_t wideint_t;
#endif

#define MAXINTVAL (((size_t)-1)>>1)

jl_genericmemory_t *_new_genericmemory_(jl_value_t *mtype, size_t nel, int8_t isunion, int8_t zeroinit, size_t elsz)
// ONLY USE FROM CODEGEN. It only partially initializes the mem
JL_DLLEXPORT jl_genericmemory_t *jl_alloc_genericmemory_unchecked(jl_ptls_t ptls, size_t nbytes, jl_datatype_t *mtype)
{
jl_task_t *ct = jl_current_task;
char *data;
jl_genericmemory_t *m;
if (nel == 0) // zero-sized allocation optimization
return (jl_genericmemory_t*)((jl_datatype_t*)mtype)->instance;
wideint_t prod = (wideint_t)nel * elsz;
if (isunion) {
// an extra byte for each isbits union memory element, stored at m->ptr + m->length
prod += nel;
}
if (nel >= MAXINTVAL || prod >= (wideint_t) MAXINTVAL)
jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory size: the number of elements is either negative or too large for system address width");
size_t tot = (size_t)prod + LLT_ALIGN(sizeof(jl_genericmemory_t),JL_SMALL_BYTE_ALIGNMENT);
size_t tot = nbytes + LLT_ALIGN(sizeof(jl_genericmemory_t),JL_SMALL_BYTE_ALIGNMENT);

int pooled = tot <= GC_MAX_SZCLASS;
char *data;
jl_genericmemory_t *m;
if (!pooled) {
data = (char*)jl_gc_managed_malloc(prod);
data = (char*)jl_gc_managed_malloc(nbytes);
tot = sizeof(jl_genericmemory_t) + sizeof(void*);
}
m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tot, mtype);
m = (jl_genericmemory_t*)jl_gc_alloc(ptls, tot, mtype);
if (pooled) {
data = (char*)m + JL_SMALL_BYTE_ALIGNMENT;
}
else {
int isaligned = 1; // jl_gc_managed_malloc is always aligned
jl_gc_track_malloced_genericmemory(ct->ptls, m, isaligned);
jl_gc_track_malloced_genericmemory(ptls, m, isaligned);
jl_genericmemory_data_owner_field(m) = (jl_value_t*)m;
}
m->length = nel;
// length set by codegen
m->ptr = data;
return m;
}

jl_genericmemory_t *_new_genericmemory_(jl_value_t *mtype, size_t nel, int8_t isunion, int8_t zeroinit, size_t elsz)
{
if (nel == 0) // zero-sized allocation optimization
return (jl_genericmemory_t*)((jl_datatype_t*)mtype)->instance;
size_t nbytes;
int overflow = __builtin_mul_overflow(nel, elsz, &nbytes);
if (isunion) {
// an extra byte for each isbits union memory element, stored at m->ptr + m->length
overflow |= __builtin_add_overflow(nel, nbytes, &nbytes);
}
if ((nel >= MAXINTVAL-1) || (nbytes >= MAXINTVAL-1) || overflow)
jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory size: the number of elements is either negative or too large for system address width");
jl_task_t *ct = jl_current_task;
jl_genericmemory_t *m = jl_alloc_genericmemory_unchecked((jl_ptls_t) ct->ptls, nbytes, (jl_datatype_t*)mtype);
m->length = nel;
if (zeroinit)
memset(data, 0, (size_t)prod);
memset((char*)m->ptr, 0, nbytes);
return m;
}

Expand Down Expand Up @@ -150,13 +152,14 @@ JL_DLLEXPORT jl_genericmemory_t *jl_ptr_to_genericmemory(jl_value_t *mtype, void
if (((uintptr_t)data) & ((align > JL_HEAP_ALIGNMENT ? JL_HEAP_ALIGNMENT : align) - 1))
jl_exceptionf(jl_argumenterror_type,
"unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align);
wideint_t prod = (wideint_t)nel * elsz;
size_t nbytes;
int overflow = __builtin_mul_overflow(nel, elsz, &nbytes);
if (isunion) {
// an extra byte for each isbits union memory element, stored at m->ptr + m->length
prod += nel;
overflow |= __builtin_add_overflow(nel, nbytes, &nbytes);
}
if (nel >= MAXINTVAL || prod >= (wideint_t) MAXINTVAL)
jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory size: too large for system address width");
if ((nel >= MAXINTVAL) || (nbytes >= MAXINTVAL) || overflow)
jl_exceptionf(jl_argumenterror_type, "invalid GenericMemory size: the number of elements is either negative or too large for system address width");
int tsz = sizeof(jl_genericmemory_t) + sizeof(void*);
m = (jl_genericmemory_t*)jl_gc_alloc(ct->ptls, tsz, mtype);
m->ptr = data;
Expand Down
Loading

0 comments on commit 69d40b6

Please sign in to comment.