From e67ae7c604a9e9787a7d63e2e38f3214a5180318 Mon Sep 17 00:00:00 2001 From: Ian Atol Date: Wed, 29 Sep 2021 18:33:29 -0400 Subject: [PATCH] Implement maybecopy in BoundsError, start optimization, refactor memory_opt! --- base/abstractarray.jl | 4 + base/array.jl | 54 +++- base/boot.jl | 13 +- base/broadcast.jl | 13 + base/compiler/optimize.jl | 1 + base/compiler/ssair/passes.jl | 161 +++++++++-- base/pointer.jl | 1 + src/builtin_proto.h | 1 + src/builtins.c | 14 + src/codegen.cpp | 4 +- src/staticdata.c | 5 +- test/compiler/immutablearray.jl | 456 +++++++++++++++++++++++++++++++- 12 files changed, 685 insertions(+), 42 deletions(-) diff --git a/base/abstractarray.jl b/base/abstractarray.jl index d5d47fe855bd53..20d49e0bfbcf38 100644 --- a/base/abstractarray.jl +++ b/base/abstractarray.jl @@ -1073,6 +1073,10 @@ function copy(a::AbstractArray) copymutable(a) end +function copy(a::Core.ImmutableArray) + a +end + function copyto!(B::AbstractVecOrMat{R}, ir_dest::AbstractRange{Int}, jr_dest::AbstractRange{Int}, A::AbstractVecOrMat{S}, ir_src::AbstractRange{Int}, jr_src::AbstractRange{Int}) where {R,S} if length(ir_dest) != length(ir_src) diff --git a/base/array.jl b/base/array.jl index f38e2e10c08e2e..c1fb85bf9b48f8 100644 --- a/base/array.jl +++ b/base/array.jl @@ -118,6 +118,35 @@ Union type of [`DenseVector{T}`](@ref) and [`DenseMatrix{T}`](@ref). """ const DenseVecOrMat{T} = Union{DenseVector{T}, DenseMatrix{T}} +""" + ImmutableArray + +Dynamically allocated, immutable array. + +""" +const ImmutableArray = Core.ImmutableArray + +""" + IMArray{T,N} + +Union type of [`Array{T,N}`](@ref) and [`ImmutableArray{T,N}`](@ref) +""" +const IMArray{T,N} = Union{Array{T, N}, ImmutableArray{T,N}} + +""" + IMVector{T} + +One-dimensional [`ImmutableArray`](@ref) or [`Array`](@ref) with elements of type `T`. Alias for `IMArray{T, 1}`. +""" +const IMVector{T} = IMArray{T, 1} + +""" + IMMatrix{T} + +Two-dimensional [`ImmutableArray`](@ref) or [`Array`](@ref) with elements of type `T`. Alias for `IMArray{T,2}`. +""" +const IMMatrix{T} = IMArray{T, 2} + ## Basic functions ## import Core: arraysize, arrayset, arrayref, const_arrayref @@ -147,14 +176,13 @@ function vect(X...) return copyto!(Vector{T}(undef, length(X)), X) end -const ImmutableArray = Core.ImmutableArray -const IMArray{T,N} = Union{Array{T, N}, ImmutableArray{T,N}} -const IMVector{T} = IMArray{T, 1} -const IMMatrix{T} = IMArray{T, 2} - +# Freeze and thaw constructors ImmutableArray(a::Array) = Core.arrayfreeze(a) Array(a::ImmutableArray) = Core.arraythaw(a) +ImmutableArray(a::AbstractArray{T,N}) where {T,N} = ImmutableArray{T,N}(a) + +# Size functions for arrays, both mutable and immutable size(a::IMArray, d::Integer) = arraysize(a, convert(Int, d)) size(a::IMVector) = (arraysize(a,1),) size(a::IMMatrix) = (arraysize(a,1), arraysize(a,2)) @@ -393,6 +421,19 @@ similar(a::Array{T}, m::Int) where {T} = Vector{T}(undef, m) similar(a::Array, T::Type, dims::Dims{N}) where {N} = Array{T,N}(undef, dims) similar(a::Array{T}, dims::Dims{N}) where {T,N} = Array{T,N}(undef, dims) +ImmutableArray{T}(undef::UndefInitializer, m::Int) where T = ImmutableArray(Array{T}(undef, m)) +ImmutableArray{T}(undef::UndefInitializer, dims::Dims) where T = ImmutableArray(Array{T}(undef, dims)) + +""" + maybecopy(x) + +`maybecopy` provides access to `x` while ensuring it does not escape. +To do so, the optimizer decides whether to create a copy of `x` or not based on +if the `maybecopy` is passed to an escaping function. +That is, `maybecopy` will either be transformed to [`copy`](@ref) or just a reference to x. +""" +maybecopy = Core.maybecopy + # T[x...] constructs Array{T,1} """ getindex(type[, elements...]) @@ -626,8 +667,8 @@ oneunit(x::AbstractMatrix{T}) where {T} = _one(oneunit(T), x) ## Conversions ## -convert(::Type{T}, a::AbstractArray) where {T<:Array} = a isa T ? a : T(a) convert(::Type{Union{}}, a::AbstractArray) = throw(MethodError(convert, (Union{}, a))) +convert(T::Union{Type{<:Array},Type{<:Core.ImmutableArray}}, a::AbstractArray) = a isa T ? a : T(a) promote_rule(a::Type{Array{T,n}}, b::Type{Array{S,n}}) where {T,n,S} = el_same(promote_type(T,S), a, b) @@ -637,6 +678,7 @@ if nameof(@__MODULE__) === :Base # avoid method overwrite # constructors should make copies Array{T,N}(x::AbstractArray{S,N}) where {T,N,S} = copyto_axcheck!(Array{T,N}(undef, size(x)), x) AbstractArray{T,N}(A::AbstractArray{S,N}) where {T,N,S} = copyto_axcheck!(similar(A,T), A) +ImmutableArray{T,N}(Ar::AbstractArray{S,N}) where {T,N,S} = Core.arrayfreeze(copyto_axcheck!(Array{T,N}(undef, size(Ar)), Ar)) end ## copying iterators to containers diff --git a/base/boot.jl b/base/boot.jl index 98b8cf2e9cf40e..d59af90d54f13e 100644 --- a/base/boot.jl +++ b/base/boot.jl @@ -279,8 +279,17 @@ struct BoundsError <: Exception a::Any i::Any BoundsError() = new() - BoundsError(@nospecialize(a)) = (@_noinline_meta; new(a)) - BoundsError(@nospecialize(a), i) = (@_noinline_meta; new(a,i)) + # For now, always copy arrays to avoid escaping them + # Eventually, we want to figure out if the copy is needed to save the performance of copying + # (i.e., if a escapes elsewhere, don't bother to make a copy) + + BoundsError(@nospecialize(a)) = a isa Array ? + (@_noinline_meta; new(Core.maybecopy(a))) : + (@_noinline_meta; new(a)) + + BoundsError(@nospecialize(a), i) = a isa Array ? + (@_noinline_meta; new(Core.maybecopy(a), i)) : + (@_noinline_meta; new(a, i)) end struct DivideError <: Exception end struct OutOfMemoryError <: Exception end diff --git a/base/broadcast.jl b/base/broadcast.jl index b34a73041708b0..83067ed46f5e48 100644 --- a/base/broadcast.jl +++ b/base/broadcast.jl @@ -1385,4 +1385,17 @@ function Base.show(io::IO, op::BroadcastFunction) end Base.show(io::IO, ::MIME"text/plain", op::BroadcastFunction) = show(io, op) +struct IMArrayStyle <: Broadcast.AbstractArrayStyle{Any} end +BroadcastStyle(::Type{<:Core.ImmutableArray}) = IMArrayStyle() + +#similar has to return mutable array +function Base.similar(bc::Broadcasted{IMArrayStyle}, ::Type{ElType}) where ElType + similar(Array{ElType}, axes(bc)) +end + +@inline function copy(bc::Broadcasted{IMArrayStyle}) + ElType = combine_eltypes(bc.f, bc.args) + return Core.ImmutableArray(copyto!(similar(bc, ElType), bc)) +end + end # module diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index 62f4e32b90aea8..de6294393d3c15 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -307,6 +307,7 @@ function run_passes(ci::CodeInfo, sv::OptimizationState) ir = adce_pass!(ir) #@Base.show ("after_adce", ir) @timeit "type lift" ir = type_lift_pass!(ir) + #@timeit "compact 3" ir = compact!(ir) ir = memory_opt!(ir) #@Base.show ir if JLOptions().debug_level == 2 diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 54d4d46b2c5abe..119c2ea8a4189b 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -1256,76 +1256,183 @@ function cfg_simplify!(ir::IRCode) return finish(compact) end -function is_allocation(stmt) +# function is_known_fcall(stmt::Expr, @nospecialize(func)) +# isexpr(stmt, :foreigncall) || return false +# s = stmt.args[1] +# isa(s, QuoteNode) && (s = s.value) +# return s === func +# end + +function is_known_fcall(stmt::Expr, funcs::Vector{Symbol}) isexpr(stmt, :foreigncall) || return false s = stmt.args[1] isa(s, QuoteNode) && (s = s.value) - return s === :jl_alloc_array_1d + # return any(e -> s === e, funcs) + return true in map(e -> s === e, funcs) +end + +function is_allocation(stmt::Expr) + isexpr(stmt, :foreigncall) || return false + s = stmt.args[1] + isa(s, QuoteNode) && (s = s.value) + return (s === :jl_alloc_array_1d + || s === :jl_alloc_array_2d + || s === :jl_alloc_array_3d + || s === :jl_new_array) end function memory_opt!(ir::IRCode) compact = IncrementalCompact(ir, false) uses = IdDict{Int, Vector{Int}}() - relevant = IdSet{Int}() - revisit = Int[] - function mark_val(val) + relevant = IdSet{Int}() # allocations + revisit = Int[] # potential targets for a mutating_arrayfreeze drop-in + maybecopies = Int[] # calls to maybecopy + + function mark_escape(@nospecialize val) isa(val, SSAValue) || return + #println(val.id, " escaped.") val.id in relevant && pop!(relevant, val.id) end + + function mark_use(val::SSAValue, idx) + id = val.id + id in relevant || return + (haskey(uses, id)) || (uses[id] = Int[]) + push!(uses[id], idx) + end + for ((_, idx), stmt) in compact + + #println("idx: ", idx, " = ", stmt) + if isa(stmt, ReturnNode) isdefined(stmt, :val) || continue val = stmt.val - if isa(val, SSAValue) && val.id in relevant - (haskey(uses, val.id)) || (uses[val.id] = Int[]) - push!(uses[val.id], idx) - end + mark_use(val, idx) continue + + # check for phinodes that are possibly allocations + elseif isa(stmt, PhiNode) + + # ensure all of the phinode values are defined + defined = true + for i = 1:length(stmt.values) + if !isassigned(stmt.values, i) + defined = false + end + end + + defined || continue + + for val in stmt.values + if isa(val, SSAValue) && val.id in relevant + push!(relevant, idx) + end + end end + (isexpr(stmt, :call) || isexpr(stmt, :foreigncall)) || continue + + if is_known_call(stmt, Core.maybecopy, compact) + push!(maybecopies, idx) + continue + end + if is_allocation(stmt) push!(relevant, idx) # TODO: Mark everything else here continue end - # TODO: Replace this by interprocedural escape analysis - if is_known_call(stmt, arrayset, compact) + + if is_known_call(stmt, arrayset, compact) && length(stmt.args) >= 5 # The value being set escapes, everything else doesn't - mark_val(stmt.args[4]) + mark_escape(stmt.args[4]) arr = stmt.args[3] - if isa(arr, SSAValue) && arr.id in relevant - (haskey(uses, arr.id)) || (uses[arr.id] = Int[]) - push!(uses[arr.id], idx) - end + mark_use(arr, idx) + + elseif is_known_call(stmt, arrayref, compact) && length(stmt.args) == 4 + arr = stmt.args[3] + mark_use(arr, idx) + + elseif is_known_call(stmt, setindex!, compact) && length(stmt.args) == 4 + # handle similarly to arrayset + val = stmt.args[3] + mark_escape(val) + + arr = stmt.args[2] + mark_use(arr, idx) + + elseif is_known_call(stmt, (===), compact) && length(stmt.args) == 3 + arr1 = stmt.args[2] + arr2 = stmt.args[3] + + mark_use(arr1, idx) + mark_use(arr2, idx) + + # these foreigncalls have similar structure and don't escape our array, so handle them all at once + elseif is_known_fcall(stmt, [:jl_array_ptr, :jl_array_copy]) && length(stmt.args) == 6 + arr = stmt.args[6] + mark_use(arr, idx) + + elseif is_known_call(stmt, arraysize, compact) && isa(stmt.args[2], SSAValue) + arr = stmt.args[2] + mark_use(arr, idx) + elseif is_known_call(stmt, Core.arrayfreeze, compact) && isa(stmt.args[2], SSAValue) + # mark these for potential replacement with mutating_arrayfreeze push!(revisit, idx) + else - # For now we assume everything escapes - # TODO: We could handle PhiNodes specially and improve this + # Assume everything else escapes for ur in userefs(stmt) - mark_val(ur[]) + mark_escape(ur[]) end end end + ir = finish(compact) - isempty(revisit) && return ir + isempty(revisit) && isempty(maybecopies) && return ir + domtree = construct_domtree(ir.cfg.blocks) + for idx in revisit # Make sure that the value we reference didn't escape - id = ir.stmts[idx][:inst].args[2].id + stmt = ir.stmts[idx][:inst]::Expr + id = (stmt.args[2]::SSAValue).id (id in relevant) || continue + #println("Revisiting ", stmt) + # We're ok to steal the memory if we don't dominate any uses ok = true - for use in uses[id] - if ssadominates(ir, domtree, idx, use) - ok = false - break + if haskey(uses, id) + for use in uses[id] + if ssadominates(ir, domtree, idx, use) + ok = false + break + end end end ok || continue - - ir.stmts[idx][:inst].args[1] = Core.mutating_arrayfreeze + stmt.args[1] = Core.mutating_arrayfreeze end + + # TODO: Use escape analysis info to determine if maybecopy should copy + + # for idx in maybecopies + # stmt = ir.stmts[idx][:inst]::Expr + # #println(stmt.args) + # arr = stmt.args[2] + # id = isa(arr, SSAValue) ? arr.id : arr.n # SSAValue or Core.Argument + + # if (id in relevant) # didn't escape elsewhere, so make a copy to keep it un-escaped + # #println("didn't escape maybecopy") + # stmt.args[1] = Main.Base.copy + # else # already escaped, so save the cost of copying and just pass the actual object + # #println("escaped maybecopy") + # ir.stmts[idx][:inst] = arr + # end + # end + return ir end diff --git a/base/pointer.jl b/base/pointer.jl index b315e589ffd9a6..2c526a7063b2eb 100644 --- a/base/pointer.jl +++ b/base/pointer.jl @@ -63,6 +63,7 @@ cconvert(::Type{Ptr{UInt8}}, s::AbstractString) = String(s) cconvert(::Type{Ptr{Int8}}, s::AbstractString) = String(s) unsafe_convert(::Type{Ptr{T}}, a::Array{T}) where {T} = ccall(:jl_array_ptr, Ptr{T}, (Any,), a) +unsafe_convert(::Type{Ptr{T}}, a::Core.ImmutableArray{T}) where {T} = ccall(:jl_array_ptr, Ptr{T}, (Any,), a) unsafe_convert(::Type{Ptr{S}}, a::AbstractArray{T}) where {S,T} = convert(Ptr{S}, unsafe_convert(Ptr{T}, a)) unsafe_convert(::Type{Ptr{T}}, a::AbstractArray{T}) where {T} = error("conversion to pointer not defined for $(typeof(a))") diff --git a/src/builtin_proto.h b/src/builtin_proto.h index f781658ed55f95..b04fc1c671b14d 100644 --- a/src/builtin_proto.h +++ b/src/builtin_proto.h @@ -54,6 +54,7 @@ DECLARE_BUILTIN(_typevar); DECLARE_BUILTIN(arrayfreeze); DECLARE_BUILTIN(arraythaw); DECLARE_BUILTIN(mutating_arrayfreeze); +DECLARE_BUILTIN(maybecopy); JL_CALLABLE(jl_f_invoke_kwsorter); JL_CALLABLE(jl_f__structtype); diff --git a/src/builtins.c b/src/builtins.c index 7bccc23cd56df9..be9fb45f2226af 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -1395,6 +1395,19 @@ JL_CALLABLE(jl_f_arrayset) return args[1]; } +JL_CALLABLE(jl_f_maybecopy) +{ + // maybecopy --- this builtin is never actually supposed to be executed + // instead, calls to it are analyzed and replaced with either a call to copy + // or directly replaced with the object itself that is the target of the maybecopy + // therefore, we just check that there is one argument and do a no-op + JL_NARGSV(maybecopy, 1); + JL_TYPECHK(maybecopy, array, args[0]); + jl_array_t *a = (jl_array_t*)args[0]; + jl_array_t *na = jl_array_copy(a); + return (jl_value_t*)na; +} + // type definition ------------------------------------------------------------ JL_CALLABLE(jl_f__structtype) @@ -1877,6 +1890,7 @@ void jl_init_primitives(void) JL_GC_DISABLED add_builtin_func("_setsuper!", jl_f__setsuper); jl_builtin__typebody = add_builtin_func("_typebody!", jl_f__typebody); add_builtin_func("_equiv_typedef", jl_f__equiv_typedef); + jl_builtin_maybecopy = add_builtin_func("maybecopy", jl_f_maybecopy); // builtin types add_builtin("Any", (jl_value_t*)jl_any_type); diff --git a/src/codegen.cpp b/src/codegen.cpp index 127e4a5cf8adc1..bca74ca6a5fe39 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -663,12 +663,13 @@ static const auto jl_newbits_func = new JuliaFunction{ // `julia.typeof` does read memory, but it is effectively readnone before we lower // the allocation function. This is OK as long as we lower `julia.typeof` no later than // `julia.gc_alloc_obj`. +// Updated to argmemonly due to C++ deconstructor style usage in jl_f_arrayfreeze / mutating_arrayfreeze static const auto jl_typeof_func = new JuliaFunction{ "julia.typeof", [](LLVMContext &C) { return FunctionType::get(T_prjlvalue, {T_prjlvalue}, false); }, [](LLVMContext &C) { return AttributeList::get(C, - Attributes(C, {Attribute::ReadNone, Attribute::NoUnwind, Attribute::NoRecurse}), + Attributes(C, {Attribute::ArgMemOnly, Attribute::NoUnwind, Attribute::NoRecurse}), Attributes(C, {Attribute::NonNull}), None); }, }; @@ -906,6 +907,7 @@ static const std::map builtin_func_map = { { &jl_f_arrayfreeze, new JuliaFunction{"jl_f_arrayfreeze", get_func_sig, get_func_attrs} }, { &jl_f_arraythaw, new JuliaFunction{"jl_f_arraythaw", get_func_sig, get_func_attrs} }, { &jl_f_mutating_arrayfreeze,new JuliaFunction{"jl_f_mutating_arrayfreeze", get_func_sig, get_func_attrs} }, + { &jl_f_maybecopy, new JuliaFunction{"jl_f_maybecopy", get_func_sig, get_func_attrs} }, }; static const auto jl_new_opaque_closure_jlcall_func = new JuliaFunction{"jl_new_opaque_closure_jlcall", get_func_sig, get_func_attrs}; diff --git a/src/staticdata.c b/src/staticdata.c index 53e83a0aa2df99..8be7d95f2e717b 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -30,7 +30,7 @@ extern "C" { // TODO: put WeakRefs on the weak_refs list during deserialization // TODO: handle finalizers -#define NUM_TAGS 155 +#define NUM_TAGS 156 // An array of references that need to be restored from the sysimg // This is a manually constructed dual of the gvars array, which would be produced by codegen for Julia code, for C. @@ -205,6 +205,7 @@ jl_value_t **const*const get_tags(void) { INSERT_TAG(jl_builtin_arrayfreeze); INSERT_TAG(jl_builtin_mutating_arrayfreeze); INSERT_TAG(jl_builtin_arraythaw); + INSERT_TAG(jl_builtin_maybecopy); // All optional tags must be placed at the end, so that we // don't accidentally have a `NULL` in the middle @@ -255,7 +256,7 @@ static const jl_fptr_args_t id_to_fptrs[] = { &jl_f_applicable, &jl_f_invoke, &jl_f_sizeof, &jl_f__expr, &jl_f__typevar, &jl_f_ifelse, &jl_f__structtype, &jl_f__abstracttype, &jl_f__primitivetype, &jl_f__typebody, &jl_f__setsuper, &jl_f__equiv_typedef, &jl_f_opaque_closure_call, - &jl_f_arrayfreeze, &jl_f_mutating_arrayfreeze, &jl_f_arraythaw, + &jl_f_arrayfreeze, &jl_f_mutating_arrayfreeze, &jl_f_arraythaw, &jl_f_maybecopy, NULL }; diff --git a/test/compiler/immutablearray.jl b/test/compiler/immutablearray.jl index 474e5dfc0f6578..d4dc0d85da3a52 100644 --- a/test/compiler/immutablearray.jl +++ b/test/compiler/immutablearray.jl @@ -1,12 +1,460 @@ using Base.Experimental: ImmutableArray -function simple() +using Test + +function test_allocate1() a = Vector{Float64}(undef, 5) for i = 1:5 a[i] = i end - ImmutableArray(a) + Core.ImmutableArray(a) end + +function test_allocate2() + a = [1,2,3,4,5] + Core.ImmutableArray(a) +end + +function test_allocate3() + a = Matrix{Float64}(undef, 5, 2) + for i = 1:5 + for j = 1:2 + a[i, j] = i + j + end + end + Core.ImmutableArray(a) +end + +function test_allocate4() + a = Core.ImmutableArray{Float64}(undef, 5) +end + +function test_broadcast1() + a = Core.ImmutableArray([1,2,3]) + typeof(a .+ a) <: Core.ImmutableArray +end + +function test_allocate5() # test that throwing boundserror doesn't escape + a = [1,2,3] + try + getindex(a, 4) + catch end + Core.ImmutableArray(a) +end + +# function test_maybecopy1() +# a = Vector{Int64}(undef, 5) +# b = Core.maybecopy(a) # doesn't escape in this function - so a !=== b +# @test !(a === b) +# end + +# function test_maybecopy2() +# a = Vector{Int64}(undef, 5) +# try +# a[6] +# catch e +# @test !(e.a === a) +# end +# end + +# function test_maybecopy3() +# @noinline function escaper(arr) +# return arr +# end + +# a = Vector{Int64}(undef, 5) +# escaper(a) +# b = Core.maybecopy(a) +# @test a === b # this time, it does escape, so we give back the actual object +# end + +# function test_maybecopy4() +# @noinline function escaper(arr) +# return arr +# end + +# a = Vector{Int64}(undef, 5) +# escaper(a) +# try +# a[6] +# catch e +# if isa(e, BoundsError) +# @test e.a === a # already escaped so we don't copy +# end +# end +# end + + + + let - @allocated(simple()) - @test @allocated(simple()) < 100 + # warmup for @allocated + a,b,c,d,e = test_allocate1(), test_allocate2(), test_allocate3(), test_allocate4(), test_allocate5() + + # these magic values are ~ what the mutable array version would allocate + @test @allocated(test_allocate1()) < 100 + @test @allocated(test_allocate2()) < 100 + @test @allocated(test_allocate3()) < 150 + @test @allocated(test_allocate4()) < 100 + @test @allocated(test_allocate5()) < 170 + @test test_broadcast1() == true + + # test_maybecopy1() + # test_maybecopy2() + # test_maybecopy3() + # test_maybecopy4() end + + +# DiffEq Performance Tests + +# using DifferentialEquations +# using StaticArrays + +# function _build_atsit5_caches(::Type{T}) where {T} + +# cs = SVector{6, T}(0.161, 0.327, 0.9, 0.9800255409045097, 1.0, 1.0) + +# as = SVector{21, T}( +# #=a21=# convert(T,0.161), +# #=a31=# convert(T,-0.008480655492356989), +# #=a32=# convert(T,0.335480655492357), +# #=a41=# convert(T,2.8971530571054935), +# #=a42=# convert(T,-6.359448489975075), +# #=a43=# convert(T,4.3622954328695815), +# #=a51=# convert(T,5.325864828439257), +# #=a52=# convert(T,-11.748883564062828), +# #=a53=# convert(T,7.4955393428898365), +# #=a54=# convert(T,-0.09249506636175525), +# #=a61=# convert(T,5.86145544294642), +# #=a62=# convert(T,-12.92096931784711), +# #=a63=# convert(T,8.159367898576159), +# #=a64=# convert(T,-0.071584973281401), +# #=a65=# convert(T,-0.028269050394068383), +# #=a71=# convert(T,0.09646076681806523), +# #=a72=# convert(T,0.01), +# #=a73=# convert(T,0.4798896504144996), +# #=a74=# convert(T,1.379008574103742), +# #=a75=# convert(T,-3.290069515436081), +# #=a76=# convert(T,2.324710524099774) +# ) + +# btildes = SVector{7,T}( +# convert(T,-0.00178001105222577714), +# convert(T,-0.0008164344596567469), +# convert(T,0.007880878010261995), +# convert(T,-0.1447110071732629), +# convert(T,0.5823571654525552), +# convert(T,-0.45808210592918697), +# convert(T,0.015151515151515152) +# ) +# rs = SVector{22, T}( +# #=r11=# convert(T,1.0), +# #=r12=# convert(T,-2.763706197274826), +# #=r13=# convert(T,2.9132554618219126), +# #=r14=# convert(T,-1.0530884977290216), +# #=r22=# convert(T,0.13169999999999998), +# #=r23=# convert(T,-0.2234), +# #=r24=# convert(T,0.1017), +# #=r32=# convert(T,3.9302962368947516), +# #=r33=# convert(T,-5.941033872131505), +# #=r34=# convert(T,2.490627285651253), +# #=r42=# convert(T,-12.411077166933676), +# #=r43=# convert(T,30.33818863028232), +# #=r44=# convert(T,-16.548102889244902), +# #=r52=# convert(T,37.50931341651104), +# #=r53=# convert(T,-88.1789048947664), +# #=r54=# convert(T,47.37952196281928), +# #=r62=# convert(T,-27.896526289197286), +# #=r63=# convert(T,65.09189467479366), +# #=r64=# convert(T,-34.87065786149661), +# #=r72=# convert(T,1.5), +# #=r73=# convert(T,-4), +# #=r74=# convert(T,2.5), +# ) +# return cs, as, btildes, rs +# end + +# function test_imarrays() +# function lorenz(u, p, t) +# a,b,c = u +# x,y,z = p +# dx_dt = x * (b - a) +# dy_dt = a*(y - c) - b +# dz_dt = a*b - z * c +# res = Vector{Float64}(undef, 3) +# res[1], res[2], res[3] = dx_dt, dy_dt, dz_dt +# Core.ImmutableArray(res) +# end + +# _u0 = Core.ImmutableArray([1.0, 1.0, 1.0]) +# _tspan = (0.0, 100.0) +# _p = (10.0, 28.0, 8.0/3.0) +# prob = ODEProblem(lorenz, _u0, _tspan, _p) + +# u0 = prob.u0 +# tspan = prob.tspan +# f = prob.f +# p = prob.p + +# dt = 0.1f0 +# saveat = nothing +# save_everystep = true +# abstol = 1f-6 +# reltol = 1f-3 + +# t = tspan[1] +# tf = prob.tspan[2] + +# beta1 = 7/50 +# beta2 = 2/25 +# qmax = 10.0 +# qmin = 1/5 +# gamma = 9/10 +# qoldinit = 1e-4 + +# if saveat === nothing +# ts = Vector{eltype(dt)}(undef,1) +# ts[1] = prob.tspan[1] +# us = Vector{typeof(u0)}(undef,0) +# push!(us,recursivecopy(u0)) +# else +# ts = saveat +# cur_t = 1 +# us = MVector{length(ts),typeof(u0)}(undef) +# if prob.tspan[1] == ts[1] +# cur_t += 1 +# us[1] = u0 +# end +# end + +# u = u0 +# qold = 1e-4 +# k7 = f(u, p, t) + +# cs, as, btildes, rs = _build_atsit5_caches(eltype(u0)) +# c1, c2, c3, c4, c5, c6 = cs +# a21, a31, a32, a41, a42, a43, a51, a52, a53, a54, +# a61, a62, a63, a64, a65, a71, a72, a73, a74, a75, a76 = as +# btilde1, btilde2, btilde3, btilde4, btilde5, btilde6, btilde7 = btildes + +# # FSAL +# while t < tspan[2] +# uprev = u +# k1 = k7 +# EEst = Inf + +# while EEst > 1 +# dt < 1e-14 && error("dt 1 +# dt = dt/min(inv(qmin),q11/gamma) +# else # EEst <= 1 +# @fastmath q = max(inv(qmax),min(inv(qmin),q/gamma)) +# qold = max(EEst,qoldinit) +# dtold = dt +# dt = dt/q #dtnew +# dt = min(abs(dt),abs(tf-t-dtold)) +# told = t + +# if (tf - t - dtold) < 1e-14 +# t = tf +# else +# t += dtold +# end + +# if saveat === nothing && save_everystep +# push!(us,recursivecopy(u)) +# push!(ts,t) +# else saveat !== nothing +# while cur_t <= length(ts) && ts[cur_t] <= t +# savet = ts[cur_t] +# θ = (savet - told)/dtold +# b1θ, b2θ, b3θ, b4θ, b5θ, b6θ, b7θ = bθs(rs, θ) +# us[cur_t] = uprev + dtold*( +# b1θ*k1 + b2θ*k2 + b3θ*k3 + b4θ*k4 + b5θ*k5 + b6θ*k6 + b7θ*k7) +# cur_t += 1 +# end +# end +# end +# end +# end + +# if saveat === nothing && !save_everystep +# push!(us,u) +# push!(ts,t) +# end + +# sol = DiffEqBase.build_solution(prob,Tsit5(),ts,us,calculate_error = false) + +# DiffEqBase.has_analytic(prob.f) && DiffEqBase.calculate_solution_errors!(sol;timeseries_errors=true,dense_errors=false) + +# sol +# end + +# function test_marrays() +# function lorenz(u, p, t) +# a,b,c = u +# x,y,z = p +# dx_dt = x * (b - a) +# dy_dt = a*(y - c) - b +# dz_dt = a*b - z * c +# res = Vector{Float64}(undef, 3) +# res[1], res[2], res[3] = dx_dt, dy_dt, dz_dt +# res +# end + +# _u0 = [1.0, 1.0, 1.0] +# _tspan = (0.0, 100.0) +# _p = (10.0, 28.0, 8.0/3.0) +# prob = ODEProblem(lorenz, _u0, _tspan, _p) + +# u0 = prob.u0 +# tspan = prob.tspan +# f = prob.f +# p = prob.p + +# dt = 0.1f0 +# saveat = nothing +# save_everystep = true +# abstol = 1f-6 +# reltol = 1f-3 + +# t = tspan[1] +# tf = prob.tspan[2] + +# beta1 = 7/50 +# beta2 = 2/25 +# qmax = 10.0 +# qmin = 1/5 +# gamma = 9/10 +# qoldinit = 1e-4 + +# if saveat === nothing +# ts = Vector{eltype(dt)}(undef,1) +# ts[1] = prob.tspan[1] +# us = Vector{typeof(u0)}(undef,0) +# push!(us,recursivecopy(u0)) +# else +# ts = saveat +# cur_t = 1 +# us = MVector{length(ts),typeof(u0)}(undef) +# if prob.tspan[1] == ts[1] +# cur_t += 1 +# us[1] = u0 +# end +# end + +# u = u0 +# qold = 1e-4 +# k7 = f(u, p, t) + +# cs, as, btildes, rs = _build_atsit5_caches(eltype(u0)) +# c1, c2, c3, c4, c5, c6 = cs +# a21, a31, a32, a41, a42, a43, a51, a52, a53, a54, +# a61, a62, a63, a64, a65, a71, a72, a73, a74, a75, a76 = as +# btilde1, btilde2, btilde3, btilde4, btilde5, btilde6, btilde7 = btildes + +# # FSAL +# while t < tspan[2] +# uprev = u +# k1 = k7 +# EEst = Inf + +# while EEst > 1 +# dt < 1e-14 && error("dt 1 +# dt = dt/min(inv(qmin),q11/gamma) +# else # EEst <= 1 +# @fastmath q = max(inv(qmax),min(inv(qmin),q/gamma)) +# qold = max(EEst,qoldinit) +# dtold = dt +# dt = dt/q #dtnew +# dt = min(abs(dt),abs(tf-t-dtold)) +# told = t + +# if (tf - t - dtold) < 1e-14 +# t = tf +# else +# t += dtold +# end + +# if saveat === nothing && save_everystep +# push!(us,recursivecopy(u)) +# push!(ts,t) +# else saveat !== nothing +# while cur_t <= length(ts) && ts[cur_t] <= t +# savet = ts[cur_t] +# θ = (savet - told)/dtold +# b1θ, b2θ, b3θ, b4θ, b5θ, b6θ, b7θ = bθs(rs, θ) +# us[cur_t] = uprev + dtold*( +# b1θ*k1 + b2θ*k2 + b3θ*k3 + b4θ*k4 + b5θ*k5 + b6θ*k6 + b7θ*k7) +# cur_t += 1 +# end +# end +# end +# end +# end + +# if saveat === nothing && !save_everystep +# push!(us,u) +# push!(ts,t) +# end + +# sol = DiffEqBase.build_solution(prob,Tsit5(),ts,us,calculate_error = false) + +# DiffEqBase.has_analytic(prob.f) && DiffEqBase.calculate_solution_errors!(sol;timeseries_errors=true,dense_errors=false) + +# sol +# end +