Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

optimize memorynew intrinsic for constant length Memory #55913

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions Compiler/src/tfuncs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2017,6 +2017,12 @@ function tuple_tfunc(𝕃::AbstractLattice, argtypes::Vector{Any})
return anyinfo ? PartialStruct(𝕃, typ, argtypes) : typ
end

@nospecs function memorynew_tfunc(𝕃::AbstractLattice, memtype, m)
hasintersect(widenconst(m), Int) || return Bottom
return tmeet(𝕃, instanceof_tfunc(memtype, true)[1], GenericMemory)
end
add_tfunc(Core.memorynew, 2, 2, memorynew_tfunc, 10)

@nospecs function memoryrefget_tfunc(𝕃::AbstractLattice, mem, order, boundscheck)
memoryref_builtin_common_errorcheck(mem, order, boundscheck) || return Bottom
return memoryref_elemtype(mem)
Expand Down Expand Up @@ -2244,7 +2250,16 @@ function _builtin_nothrow(𝕃::AbstractLattice, @nospecialize(f::Builtin), argt
@nospecialize(rt))
= partialorder(𝕃)
na = length(argtypes)
if f === memoryrefnew
if f === Core.memorynew
argtypes[1] isa Const && argtypes[2] isa Const || return false
MemT = argtypes[1].val
isconcretetype(MemT) && MemT <: GenericMemory || return false
len = argtypes[2].val
len isa Int && 0 <= len < typemax(Int) || return false
elsz = datatype_layoutsize(MemT)
checked_smul_int(len, elsz)[2] && return false
return true
elseif f === memoryrefnew
return memoryref_builtin_common_nothrow(argtypes)
elseif f === memoryrefoffset
length(argtypes) == 1 || return false
Expand Down Expand Up @@ -2347,6 +2362,7 @@ const _EFFECT_FREE_BUILTINS = [
isa,
UnionAll,
getfield,
Core.memorynew,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is memoryrefnew available in Base by default, but memorynew is not?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

didn't seem needful to import.

memoryrefnew,
memoryrefoffset,
memoryrefget,
Expand Down Expand Up @@ -2381,6 +2397,7 @@ const _INACCESSIBLEMEM_BUILTINS = Any[
compilerbarrier,
Core._typevar,
donotdelete,
Core.memorynew,
]

const _ARGMEM_BUILTINS = Any[
Expand Down Expand Up @@ -2543,7 +2560,7 @@ function builtin_effects(𝕃::AbstractLattice, @nospecialize(f::Builtin), argty
consistent = ALWAYS_TRUE
elseif f === memoryrefget || f === memoryrefset! || f === memoryref_isassigned
consistent = CONSISTENT_IF_INACCESSIBLEMEMONLY
elseif f === Core._typevar
elseif f === Core._typevar || f === Core.memorynew
consistent = CONSISTENT_IF_NOTRETURNED
else
consistent = ALWAYS_FALSE
Expand Down
7 changes: 4 additions & 3 deletions Compiler/test/irpasses.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1083,12 +1083,13 @@ end
# test `flags_for_effects` and DCE
# ================================

let # effect-freeness computation for array allocation
@testset "effect-freeness computation for array allocation" begin

# should eliminate dead allocations
good_dims = [1, 2, 3, 4, 10]
Ns = [1, 2, 3, 4, 10]
for dim = good_dims, N = Ns
Ts = Any[Int, Union{Missing,Nothing}, Nothing, Any]
@testset "$dim, $N" for dim in good_dims, N in Ns
Int64(dim)^N > typemax(Int) && continue
dims = ntuple(i->dim, N)
@test @eval fully_eliminated() do
Expand All @@ -1099,7 +1100,7 @@ let # effect-freeness computation for array allocation

# shouldn't eliminate erroneous dead allocations
bad_dims = [-1, typemax(Int)]
for dim in bad_dims, N in [1, 2, 3, 4, 10], T in Any[Int, Union{Missing,Nothing}, Nothing, Any]
@testset "$dim, $N, $T" for dim in bad_dims, N in Ns, T in Ts
dims = ntuple(i->dim, N)
@test @eval !fully_eliminated() do
Array{$T,$N}(undef, $(dims...))
Expand Down
7 changes: 1 addition & 6 deletions base/boot.jl
Original file line number Diff line number Diff line change
Expand Up @@ -575,12 +575,7 @@ struct UndefInitializer end
const undef = UndefInitializer()

# type and dimensionality specified
(self::Type{GenericMemory{kind,T,addrspace}})(::UndefInitializer, m::Int) where {T,addrspace,kind} =
if isdefined(self, :instance) && m === 0
self.instance
else
ccall(:jl_alloc_genericmemory, Ref{GenericMemory{kind,T,addrspace}}, (Any, Int), self, m)
end
(self::Type{GenericMemory{kind,T,addrspace}})(::UndefInitializer, m::Int) where {T,addrspace,kind} = Core.memorynew(self, m)
(self::Type{GenericMemory{kind,T,addrspace}})(::UndefInitializer, d::NTuple{1,Int}) where {T,kind,addrspace} = self(undef, getfield(d,1))
# empty vector constructor
(self::Type{GenericMemory{kind,T,addrspace}})() where {T,kind,addrspace} = self(undef, 0)
Expand Down
13 changes: 11 additions & 2 deletions base/essentials.jl
Original file line number Diff line number Diff line change
Expand Up @@ -382,8 +382,17 @@ default_access_order(a::GenericMemory{:atomic}) = :monotonic
default_access_order(a::GenericMemoryRef{:not_atomic}) = :not_atomic
default_access_order(a::GenericMemoryRef{:atomic}) = :monotonic

getindex(A::GenericMemory, i::Int) = (@_noub_if_noinbounds_meta;
memoryrefget(memoryrefnew(memoryrefnew(A), i, @_boundscheck), default_access_order(A), false))
# bootstrap version for Memory{Any}
#getindex(A::Memory{Any}, i::Int) = (@_noub_if_noinbounds_meta;
# memoryrefget(memoryrefnew(memoryrefnew(A), i, @_boundscheck), default_access_order(A), false))

function getindex(A::GenericMemory, i::Int)
@_noub_if_noinbounds_meta
if @_boundscheck
ult_int(bitcast(UInt, sub_int(i, 1)), bitcast(UInt, A.length)) || throw_boundserror(A, (i,))
end
memoryrefget(memoryrefnew(memoryrefnew(A), i, false), default_access_order(A), false)
end
getindex(A::GenericMemoryRef) = memoryrefget(A, default_access_order(A), @_boundscheck)

"""
Expand Down
7 changes: 4 additions & 3 deletions base/genericmemory.jl
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,10 @@ getindex(A::Memory, c::Colon) = copy(A)

## Indexing: setindex! ##

function _setindex!(A::Memory{T}, x::T, i1::Int) where {T}
ref = memoryrefnew(memoryref(A), i1, @_boundscheck)
memoryrefset!(ref, x, :not_atomic, @_boundscheck)
function _setindex!(A::Memory{T}, x::T, i::Int) where {T}
@boundscheck Core.Intrinsics.ult_int(i, A.length)
ref = memoryrefnew(memoryref(A), i, false)
memoryrefset!(ref, x, :not_atomic, false)
return A
end

Expand Down
22 changes: 11 additions & 11 deletions doc/src/manual/performance-tips.md
Original file line number Diff line number Diff line change
Expand Up @@ -1058,12 +1058,12 @@ the output. As a trivial example, compare

```jldoctest prealloc
julia> function xinc(x)
return [x, x+1, x+2]
return [x + i for i in 1:3000]
end;

julia> function loopinc()
y = 0
for i = 1:10^7
for i = 1:10^5
ret = xinc(i)
y += ret[2]
end
Expand All @@ -1075,16 +1075,16 @@ with

```jldoctest prealloc
julia> function xinc!(ret::AbstractVector{T}, x::T) where T
ret[1] = x
ret[2] = x+1
ret[3] = x+2
for i in 1:3000
ret[i] = x+i
end
nothing
end;

julia> function loopinc_prealloc()
ret = Vector{Int}(undef, 3)
ret = Vector{Int}(undef, 3000)
y = 0
for i = 1:10^7
for i = 1:10^5
xinc!(ret, i)
y += ret[2]
end
Expand All @@ -1096,12 +1096,12 @@ Timing results:

```jldoctest prealloc; filter = r"[0-9\.]+ seconds \(.*?\)"
julia> @time loopinc()
0.529894 seconds (40.00 M allocations: 1.490 GiB, 12.14% gc time)
50000015000000
0.297454 seconds (200.00 k allocations: 2.239 GiB, 39.80% gc time)
5000250000

julia> @time loopinc_prealloc()
0.030850 seconds (6 allocations: 288 bytes)
50000015000000
0.009410 seconds (2 allocations: 23.477 KiB)
5000250000
```

Preallocation has other advantages, for example by allowing the caller to control the "output"
Expand Down
13 changes: 3 additions & 10 deletions src/array.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,6 @@
extern "C" {
#endif

#if defined(_P64) && defined(UINT128MAX)
typedef __uint128_t wideint_t;
#else
typedef uint64_t wideint_t;
#endif

#define MAXINTVAL (((size_t)-1)>>1)

JL_DLLEXPORT int jl_array_validate_dims(size_t *nel, uint32_t ndims, size_t *dims)
Expand All @@ -30,10 +24,9 @@ JL_DLLEXPORT int jl_array_validate_dims(size_t *nel, uint32_t ndims, size_t *dim
size_t _nel = 1;
for (i = 0; i < ndims; i++) {
size_t di = dims[i];
wideint_t prod = (wideint_t)_nel * (wideint_t)di;
if (prod >= (wideint_t) MAXINTVAL || di >= MAXINTVAL)
int overflow = __builtin_mul_overflow(_nel, di, &_nel);
if (overflow || di >= MAXINTVAL)
return 1;
_nel = prod;
}
*nel = _nel;
return 0;
Expand Down Expand Up @@ -204,7 +197,7 @@ JL_DLLEXPORT void jl_array_grow_end(jl_array_t *a, size_t inc)
int isbitsunion = jl_genericmemory_isbitsunion(a->ref.mem);
size_t newnrows = n + inc;
if (!isbitsunion && elsz == 0) {
jl_genericmemory_t *newmem = jl_alloc_genericmemory(mtype, MAXINTVAL - 1);
jl_genericmemory_t *newmem = jl_alloc_genericmemory(mtype, MAXINTVAL - 2);
oscardssmith marked this conversation as resolved.
Show resolved Hide resolved
a->ref.mem = newmem;
jl_gc_wb(a, newmem);
a->dimsize[0] = newnrows;
Expand Down
1 change: 1 addition & 0 deletions src/builtin_proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ DECLARE_BUILTIN(is);
DECLARE_BUILTIN(isa);
DECLARE_BUILTIN(isdefined);
DECLARE_BUILTIN(issubtype);
DECLARE_BUILTIN(memorynew);
DECLARE_BUILTIN(memoryref);
DECLARE_BUILTIN(memoryref_isassigned);
DECLARE_BUILTIN(memoryrefget);
Expand Down
10 changes: 10 additions & 0 deletions src/builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -1675,6 +1675,15 @@ JL_CALLABLE(jl_f__typevar)
}

// genericmemory ---------------------------------------------------------------------
JL_CALLABLE(jl_f_memorynew)
{
JL_NARGS(memorynew, 2, 2);
jl_datatype_t *jl_genericmemory_type_type = jl_datatype_type;
JL_TYPECHK(memorynew, genericmemory_type, args[0]);
JL_TYPECHK(memorynew, long, args[1]);
size_t nel = jl_unbox_long(args[1]);
return (jl_value_t*)jl_alloc_genericmemory(args[0], nel);
}

JL_CALLABLE(jl_f_memoryref)
{
Expand Down Expand Up @@ -2441,6 +2450,7 @@ void jl_init_primitives(void) JL_GC_DISABLED
jl_builtin_setglobalonce = add_builtin_func("setglobalonce!", jl_f_setglobalonce);

// memory primitives
jl_builtin_memorynew = add_builtin_func("memorynew", jl_f_memorynew);
jl_builtin_memoryref = add_builtin_func("memoryrefnew", jl_f_memoryref);
jl_builtin_memoryrefoffset = add_builtin_func("memoryrefoffset", jl_f_memoryrefoffset);
jl_builtin_memoryrefget = add_builtin_func("memoryrefget", jl_f_memoryrefget);
Expand Down
27 changes: 0 additions & 27 deletions src/ccall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1877,33 +1877,6 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
JL_GC_POP();
return mark_julia_type(ctx, obj, true, jl_any_type);
}
else if (is_libjulia_func(jl_alloc_genericmemory)) {
++CCALL_STAT(jl_alloc_genericmemory);
assert(lrt == ctx.types().T_prjlvalue);
assert(!isVa && !llvmcall && nccallargs == 2);
const jl_cgval_t &typ = argv[0];
const jl_cgval_t &nel = argv[1];
auto arg_typename = [&] JL_NOTSAFEPOINT {
auto istyp = argv[0].constant;
std::string type_str;
if (istyp && jl_is_datatype(istyp) && jl_is_genericmemory_type(istyp)){
auto eltype = jl_tparam1(istyp);
if (jl_is_datatype(eltype))
type_str = jl_symbol_name(((jl_datatype_t*)eltype)->name->name);
else if (jl_is_uniontype(eltype))
type_str = "Union";
else
type_str = "<unknown type>";
}
else
type_str = "<unknown type>";
return "Memory{" + type_str + "}[]";
};
auto alloc = ctx.builder.CreateCall(prepare_call(jl_allocgenericmemory), { boxed(ctx,typ), emit_unbox(ctx, ctx.types().T_size, nel, (jl_value_t*)jl_ulong_type)});
setName(ctx.emission_context, alloc, arg_typename);
JL_GC_POP();
return mark_julia_type(ctx, alloc, true, jl_any_type);
}
else if (is_libjulia_func(memcpy) && (rt == (jl_value_t*)jl_nothing_type || jl_is_cpointer_type(rt))) {
++CCALL_STAT(memcpy);
const jl_cgval_t &dst = argv[0];
Expand Down
Loading
Loading