From 3864f50746d249301214b46fb5f20811ad3aadac Mon Sep 17 00:00:00 2001 From: Mikel Date: Wed, 11 Oct 2023 21:32:16 +0200 Subject: [PATCH] complex equations --- ... Complex Numbers (Github)-checkpoint.ipynb | 425 +++++++++ ...ample using Complex Numbers (Github).ipynb | 425 +++++++++ src/IRKCoefficients.jl | 114 +-- src/IRKGL16AuxFunctions.jl | 50 +- src/IRKGL16Solver.jl | 839 +++++++++--------- src/IRKGL16step_adaptive_par.jl | 389 ++++---- src/IRKGL16step_adaptive_seq.jl | 390 ++++---- src/IRKGL16step_fixed_par.jl | 377 ++++---- src/IRKGL16step_fixed_seq.jl | 377 ++++---- 9 files changed, 2205 insertions(+), 1181 deletions(-) create mode 100644 Examples/.ipynb_checkpoints/Burrau example using Complex Numbers (Github)-checkpoint.ipynb create mode 100644 Examples/Burrau example using Complex Numbers (Github).ipynb diff --git a/Examples/.ipynb_checkpoints/Burrau example using Complex Numbers (Github)-checkpoint.ipynb b/Examples/.ipynb_checkpoints/Burrau example using Complex Numbers (Github)-checkpoint.ipynb new file mode 100644 index 0000000..54734d8 --- /dev/null +++ b/Examples/.ipynb_checkpoints/Burrau example using Complex Numbers (Github)-checkpoint.ipynb @@ -0,0 +1,425 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Burrau example using Complex Numbers (Github)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "using IRKGaussLegendre\n", + "using Plots, LinearAlgebra, LaTeXStrings\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Defining the Problem\n", + "To solve this numerically, we define a problem type by giving it the equation, the initial condition, and the timespan to solve over:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "NbodyODE_Complex! (generic function with 1 method)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "function NbodyODE_Complex!(du,u,Gm,t)\n", + " \n", + " N = length(Gm)\n", + " du[1,:] .= zero(eltype(u))\n", + " for i in 1:N\n", + " qi = u[2,i]\n", + " Gmi = Gm[i]\n", + " du[2,i] = u[1,i]\n", + " for j in (i+1):N\n", + " qj = u[2,j]\n", + " Gmj = Gm[j]\n", + " qij = qi - qj\n", + " auxij=abs(qij)^(-3)\n", + " du[1,i] -= Gmj*auxij*qij\n", + " du[1,j] += Gmi*auxij*qij\n", + " end\n", + " end\n", + "\n", + " return\n", + " \n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "u0=[1.0+0.0im,0.0+0.0im];\n", + "\n", + "Gm = [5, 4, 3]\n", + "N=length(Gm)\n", + "q0=[1.0+-1.0im, -2.0+-1.0im,1.0+3.0im]\n", + "v0=zero(q0)\n", + "u0 = Array{ComplexF64}(undef,2,N)\n", + "u0[1,:]=v0\n", + "u0[2,:]=q0\n", + "tspan = (0.0,63.0)\n", + "prob=ODEProblem(NbodyODE_Complex!,u0,tspan,Gm);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2: Solving the problem\n", + "After defining a problem, you solve it using solve" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "sol1=solve(prob,IRKGL16(),adaptive=true, reltol=1e-12, abstol=1e-12);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Orbits" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bodylist = [\"Body-1\", \"Body-2\", \"Body-3\"]\n", + "pl = plot(title=\"Burrau problem\",xlabel=\"x\", ylabel=\"y\",aspect_ratio=1)\n", + "\n", + "ulist1 = sol1.u[1:end]\n", + "tlist1 = sol1.t[1:end]\n", + "\n", + "for j = 1:3\n", + " xlist = map(u->real(u[2,j]), ulist1)\n", + " ylist = map(u->imag(u[2,j]), ulist1)\n", + " pl = plot!(xlist,ylist, label = bodylist[j]) \n", + "end \n", + "plot(pl)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step size" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "plot(xlabel=\"t\", ylabel=\"step size\",title=\"Adaptive step size\")\n", + "steps1 =sol1.t[2:end]-sol1.t[1:end-1]\n", + "plot!(sol1.t[2:end],steps1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Energy-Error" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "NbodyEnergy_Complex (generic function with 1 method)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "function NbodyEnergy_Complex(u,Gm)\n", + " \n", + " N = length(Gm)\n", + " zerouel = zero(eltype(u))\n", + " T = real(zerouel)\n", + " U = real(zerouel)\n", + " for i in 1:N\n", + " qi = u[2,i]\n", + " vi = u[1,i]\n", + " Gmi = Gm[i]\n", + " T += Gmi*(vi*conj(vi))\n", + " for j in (i+1):N\n", + " qj = u[2,j] \n", + " Gmj = Gm[j]\n", + " qij = qi - qj\n", + " U -= Gmi*Gmj/norm(qij)\n", + " end\n", + " end\n", + " 1/2*T + U\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "setprecision(BigFloat, 256)\n", + "u0Big=convert.(Complex{BigFloat},u0)\n", + "GmBig=BigFloat.(Gm)\n", + "\n", + "E0=NbodyEnergy_Complex(u0Big,GmBig)\n", + "ΔE1 = map(x->NbodyEnergy_Complex(convert.(Complex{BigFloat},x),GmBig), sol1.u)./E0.-1\n", + "plot(title=\"Energy error\", xlabel=\"t\", ylabel=L\"\\Delta E\")\n", + "plot!(sol1.t,log10.(abs.(ΔE1)), label=\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "@webio": { + "lastCommId": null, + "lastKernelId": null + }, + "kernelspec": { + "display_name": "Julia 1.6.6", + "language": "julia", + "name": "julia-1.6" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "1.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Examples/Burrau example using Complex Numbers (Github).ipynb b/Examples/Burrau example using Complex Numbers (Github).ipynb new file mode 100644 index 0000000..54734d8 --- /dev/null +++ b/Examples/Burrau example using Complex Numbers (Github).ipynb @@ -0,0 +1,425 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Burrau example using Complex Numbers (Github)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "using IRKGaussLegendre\n", + "using Plots, LinearAlgebra, LaTeXStrings\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Defining the Problem\n", + "To solve this numerically, we define a problem type by giving it the equation, the initial condition, and the timespan to solve over:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "NbodyODE_Complex! (generic function with 1 method)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "function NbodyODE_Complex!(du,u,Gm,t)\n", + " \n", + " N = length(Gm)\n", + " du[1,:] .= zero(eltype(u))\n", + " for i in 1:N\n", + " qi = u[2,i]\n", + " Gmi = Gm[i]\n", + " du[2,i] = u[1,i]\n", + " for j in (i+1):N\n", + " qj = u[2,j]\n", + " Gmj = Gm[j]\n", + " qij = qi - qj\n", + " auxij=abs(qij)^(-3)\n", + " du[1,i] -= Gmj*auxij*qij\n", + " du[1,j] += Gmi*auxij*qij\n", + " end\n", + " end\n", + "\n", + " return\n", + " \n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "u0=[1.0+0.0im,0.0+0.0im];\n", + "\n", + "Gm = [5, 4, 3]\n", + "N=length(Gm)\n", + "q0=[1.0+-1.0im, -2.0+-1.0im,1.0+3.0im]\n", + "v0=zero(q0)\n", + "u0 = Array{ComplexF64}(undef,2,N)\n", + "u0[1,:]=v0\n", + "u0[2,:]=q0\n", + "tspan = (0.0,63.0)\n", + "prob=ODEProblem(NbodyODE_Complex!,u0,tspan,Gm);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2: Solving the problem\n", + "After defining a problem, you solve it using solve" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "sol1=solve(prob,IRKGL16(),adaptive=true, reltol=1e-12, abstol=1e-12);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Orbits" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bodylist = [\"Body-1\", \"Body-2\", \"Body-3\"]\n", + "pl = plot(title=\"Burrau problem\",xlabel=\"x\", ylabel=\"y\",aspect_ratio=1)\n", + "\n", + "ulist1 = sol1.u[1:end]\n", + "tlist1 = sol1.t[1:end]\n", + "\n", + "for j = 1:3\n", + " xlist = map(u->real(u[2,j]), ulist1)\n", + " ylist = map(u->imag(u[2,j]), ulist1)\n", + " pl = plot!(xlist,ylist, label = bodylist[j]) \n", + "end \n", + "plot(pl)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step size" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "plot(xlabel=\"t\", ylabel=\"step size\",title=\"Adaptive step size\")\n", + "steps1 =sol1.t[2:end]-sol1.t[1:end-1]\n", + "plot!(sol1.t[2:end],steps1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Energy-Error" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "NbodyEnergy_Complex (generic function with 1 method)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "function NbodyEnergy_Complex(u,Gm)\n", + " \n", + " N = length(Gm)\n", + " zerouel = zero(eltype(u))\n", + " T = real(zerouel)\n", + " U = real(zerouel)\n", + " for i in 1:N\n", + " qi = u[2,i]\n", + " vi = u[1,i]\n", + " Gmi = Gm[i]\n", + " T += Gmi*(vi*conj(vi))\n", + " for j in (i+1):N\n", + " qj = u[2,j] \n", + " Gmj = Gm[j]\n", + " qij = qi - qj\n", + " U -= Gmi*Gmj/norm(qij)\n", + " end\n", + " end\n", + " 1/2*T + U\n", + "end" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "setprecision(BigFloat, 256)\n", + "u0Big=convert.(Complex{BigFloat},u0)\n", + "GmBig=BigFloat.(Gm)\n", + "\n", + "E0=NbodyEnergy_Complex(u0Big,GmBig)\n", + "ΔE1 = map(x->NbodyEnergy_Complex(convert.(Complex{BigFloat},x),GmBig), sol1.u)./E0.-1\n", + "plot(title=\"Energy error\", xlabel=\"t\", ylabel=L\"\\Delta E\")\n", + "plot!(sol1.t,log10.(abs.(ΔE1)), label=\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "@webio": { + "lastCommId": null, + "lastKernelId": null + }, + "kernelspec": { + "display_name": "Julia 1.6.6", + "language": "julia", + "name": "julia-1.6" + }, + "language_info": { + "file_extension": ".jl", + "mimetype": "application/julia", + "name": "julia", + "version": "1.6.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/IRKCoefficients.jl b/src/IRKCoefficients.jl index cbe15f1..cca7946 100644 --- a/src/IRKCoefficients.jl +++ b/src/IRKCoefficients.jl @@ -6,8 +6,8 @@ # EstimateCoeffs! function PolInterp(X::AbstractVector{ctype}, - Y::AbstractMatrix{ctype}, - Z::AbstractVector{ctype}) where {ctype} + Y::AbstractMatrix{ctype}, + Z::AbstractVector{ctype}) where {ctype} N = length(X) M = length(Z) K = size(Y, 1) @@ -16,26 +16,28 @@ function PolInterp(X::AbstractVector{ctype}, end pz = zeros(ctype, K, M) - @inbounds begin for i in 1:N - lag = 1.0 - for j in 1:N - if (j != i) - lag *= X[i] - X[j] - end - end - lag = 1 / lag - for m in 1:M - liz = lag + @inbounds begin + for i in 1:N + lag = 1.0 for j in 1:N if (j != i) - liz *= Z[m] - X[j] + lag *= X[i] - X[j] end end - for k in 1:K - pz[k, m] += Y[k, i] * liz + lag = 1 / lag + for m in 1:M + liz = lag + for j in 1:N + if (j != i) + liz *= Z[m] - X[j] + end + end + for k in 1:K + pz[k, m] += Y[k, i] * liz + end end end - end end + end return pz end @@ -85,11 +87,13 @@ function MuCoefficients!(mu, T::Type{<:CompiledFloats}) mu[8, 8] = convert(T, 0.5) s = 8 - @inbounds begin for i in 1:s - for j in (i + 1):s - mu[i, j] = 1 - mu[j, i] + @inbounds begin + for i in 1:s + for j in (i + 1):s + mu[i, j] = 1 - mu[j, i] + end end - end end + end end function MuCoefficients!(mu, T) @@ -141,37 +145,39 @@ function MuCoefficients!(mu, T) mu[8, 8] = parse(T, "0.5") s = 8 - @inbounds begin for i in 1:s - for j in (i + 1):s - mu[i, j] = 1 - mu[j, i] + @inbounds begin + for i in 1:s + for j in (i + 1):s + mu[i, j] = 1 - mu[j, i] + end end - end end + end end function HCoefficients!(mu, hc, hb, nu, h, hprev, T::Type{<:CompiledFloats}) hb .= convert.(T, - [ - +5.0614268145188129576265677154981094e-02, - +1.1119051722668723527217799721312045e-01, - +1.5685332293894364366898110099330067e-01, - +1.8134189168918099148257522463859781e-01, - +1.8134189168918099148257522463859781e-01, - +1.5685332293894364366898110099330067e-01, - +1.1119051722668723527217799721312045e-01, - +5.0614268145188129576265677154981094e-02, - ]) + [ + +5.0614268145188129576265677154981094e-02, + +1.1119051722668723527217799721312045e-01, + +1.5685332293894364366898110099330067e-01, + +1.8134189168918099148257522463859781e-01, + +1.8134189168918099148257522463859781e-01, + +1.5685332293894364366898110099330067e-01, + +1.1119051722668723527217799721312045e-01, + +5.0614268145188129576265677154981094e-02, + ]) hc .= convert.(T, - [ - +1.9855071751231884158219565715263505e-02, - +1.0166676129318663020422303176208480e-01, - +2.3723379504183550709113047540537686e-01, - +4.0828267875217509753026192881990801e-01, - +5.9171732124782490246973807118009203e-01, - +7.6276620495816449290886952459462321e-01, - +8.9833323870681336979577696823791522e-01, - +9.8014492824876811584178043428473653e-01, - ]) + [ + +1.9855071751231884158219565715263505e-02, + +1.0166676129318663020422303176208480e-01, + +2.3723379504183550709113047540537686e-01, + +4.0828267875217509753026192881990801e-01, + +5.9171732124782490246973807118009203e-01, + +7.6276620495816449290886952459462321e-01, + +8.9833323870681336979577696823791522e-01, + +9.8014492824876811584178043428473653e-01, + ]) s = length(hb) @@ -253,16 +259,16 @@ end function EstimateCoeffs!(alpha, T::Type{<:CompiledFloats}) c = convert.(T, - [ - +1.9855071751231884158219565715263505e-02, - +1.0166676129318663020422303176208480e-01, - +2.3723379504183550709113047540537686e-01, - +4.0828267875217509753026192881990801e-01, - +5.9171732124782490246973807118009203e-01, - +7.6276620495816449290886952459462321e-01, - +8.9833323870681336979577696823791522e-01, - +9.8014492824876811584178043428473653e-01, - ]) + [ + +1.9855071751231884158219565715263505e-02, + +1.0166676129318663020422303176208480e-01, + +2.3723379504183550709113047540537686e-01, + +4.0828267875217509753026192881990801e-01, + +5.9171732124782490246973807118009203e-01, + +7.6276620495816449290886952459462321e-01, + +8.9833323870681336979577696823791522e-01, + +9.8014492824876811584178043428473653e-01, + ]) s = length(c) diff --git a/src/IRKGL16AuxFunctions.jl b/src/IRKGL16AuxFunctions.jl index 5dc0b68..a11592b 100644 --- a/src/IRKGL16AuxFunctions.jl +++ b/src/IRKGL16AuxFunctions.jl @@ -6,41 +6,57 @@ function ErrorEst(U, F, dt, beta, abstol, reltol) uiType = eltype(U[1]) + realuiType = real(uiType) (s,) = size(F) D = length(U[1]) - est = zero(uiType) + est = zero(realuiType) - @inbounds begin for k in eachindex(U[1]) - sum = zero(uiType) - maxU = zero(uiType) - for is in 1:s - sum += beta[is] * F[is][k] - maxU = max(maxU, abs(U[is][k])) - end + @inbounds begin + for k in eachindex(U[1]) + sum = zero(realuiType) + maxU = zero(realuiType) + for is in 1:s + sum += beta[is] * F[is][k] + maxU = max(maxU, abs(U[is][k])) + end - est += (abs(dt * sum))^2 / (abstol + maxU^2 * reltol) - end end + est += (abs(dt * sum))^2 / (abstol + maxU^2 * reltol) + end + end return (est / D) end function MyNorm(u, abstol, reltol) uiType = eltype(u) - norm = zero(uiType) - @inbounds begin for k in eachindex(u) - aux = u[k] / (abstol + abs(u[k]) * reltol) - norm += aux * aux - end end + realuiType = real(uiType) + norm = zero(realuiType) + + if uiType <: Complex + @inbounds begin + for k in eachindex(u) + aux = u[k] / (abstol + abs(u[k]) * reltol) + norm += aux * conj(aux) + end + end + else + @inbounds begin + for k in eachindex(u) + aux = u[k] / (abstol + abs(u[k]) * reltol) + norm += aux * aux + end + end + end norm = sqrt(norm / (length(u))) - return (norm) + return (real(norm)) end -function Rdigits(x::Real, r::Integer) +function Rdigits(x, r::Integer) mx = r * x mxx = mx + x return (mxx - mx) diff --git a/src/IRKGL16Solver.jl b/src/IRKGL16Solver.jl index 667f253..a2b30df 100644 --- a/src/IRKGL16Solver.jl +++ b/src/IRKGL16Solver.jl @@ -7,33 +7,35 @@ struct tcoeffs{T} alpha::Array{T, 1} end -struct tcache{uType, elTypeu, uLowType, low_prec_type} +struct tcache{uType, realuType, realuiType, uLowType, low_prec_type} U::Array{uType, 1} Uz::Array{uType, 1} L::Array{uType, 1} Lz::Array{uType, 1} F::Array{uType, 1} - Dmin::Array{uType, 1} + # Dmin::Array{uType, 1} + Dmin::Array{realuType, 1} Eval::Array{Bool, 1} - DY::Array{elTypeu, 1} + DY::Array{realuiType, 1} rejects::Array{Int64, 1} nfcn::Array{Int64, 1} - lambdas::Array{elTypeu, 1} + lambdas::Array{realuiType, 1} nrmdigits::Array{Int64, 0} end -struct tcacheMix{uType, elTypeu, uLowType, low_prec_type} +struct tcacheMix{uType, realuType, realuiType, uLowType, low_prec_type} U::Array{uType, 1} Uz::Array{uType, 1} L::Array{uType, 1} Lz::Array{uType, 1} F::Array{uType, 1} - Dmin::Array{uType, 1} + # Dmin::Array{uType, 1} + Dmin::Array{realuType, 1} Eval::Array{Bool, 1} - DY::Array{elTypeu, 1} + DY::Array{realuiType, 1} rejects::Array{Int64, 1} nfcn::Array{Int64, 1} - lambdas::Array{elTypeu, 1} + lambdas::Array{realuiType, 1} Ulow::Array{uLowType, 1} DU::Array{uLowType, 1} DF::Array{uLowType, 1} @@ -49,86 +51,86 @@ struct tcacheMix{uType, elTypeu, uLowType, low_prec_type} end abstract type IRKAlgorithm{ - mstep, - maxtrials, - initial_interp, - myoutputs, - threading, - mixed_precision, - low_prec_type, - nrmbits - } <: OrdinaryDiffEqAlgorithm end + mstep, + maxtrials, + initial_interp, + myoutputs, + threading, + mixed_precision, + low_prec_type, + nrmbits, +} <: OrdinaryDiffEqAlgorithm end struct IRKGL16{ - mstep, - maxtrials, - initial_interp, - myoutputs, - threading, - mixed_precision, - low_prec_type, - nrmbits - } <: IRKAlgorithm{ - mstep, - maxtrials, - initial_interp, - myoutputs, - threading, - mixed_precision, - low_prec_type, - nrmbits - } end + mstep, + maxtrials, + initial_interp, + myoutputs, + threading, + mixed_precision, + low_prec_type, + nrmbits, +} <: IRKAlgorithm{ + mstep, + maxtrials, + initial_interp, + myoutputs, + threading, + mixed_precision, + low_prec_type, + nrmbits, +} end function IRKGL16(; - mstep = 1, - maxtrials = 5, - initial_interp = true, - myoutputs = false, - threading = false, - mixed_precision = false, - low_prec_type = Float64, - nrmbits = 6) + mstep = 1, + maxtrials = 5, + initial_interp = true, + myoutputs = false, + threading = false, + mixed_precision = false, + low_prec_type = Float64, + nrmbits = 6) IRKGL16{ - mstep, - maxtrials, - initial_interp, - myoutputs, - threading, - mixed_precision, - low_prec_type, - nrmbits - }() + mstep, + maxtrials, + initial_interp, + myoutputs, + threading, + mixed_precision, + low_prec_type, + nrmbits, + }() end function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem{uType, tType, isinplace}, - alg::IRKGL16{ - mstep, - maxtrials, - initial_interp, - myoutputs, - threading, - mixed_precision, - low_prec_type, - nrmbits - }, - args...; - dt = 0.0, - maxiters = 100, - save_everystep = true, - adaptive = true, - reltol = 1e-6, - abstol = 1e-6, - kwargs...) where { - uType, - tType, - isinplace, - mstep, - maxtrials, - initial_interp, - myoutputs, - threading, - mixed_precision, - low_prec_type, - nrmbits - } + alg::IRKGL16{ + mstep, + maxtrials, + initial_interp, + myoutputs, + threading, + mixed_precision, + low_prec_type, + nrmbits, + }, + args...; + dt = 0.0, + maxiters = 100, + save_everystep = true, + adaptive = true, + reltol = 1e-6, + abstol = 1e-6, + kwargs...) where { + uType, + tType, + isinplace, + mstep, + maxtrials, + initial_interp, + myoutputs, + threading, + mixed_precision, + low_prec_type, + nrmbits, +} s = 8 stats = DiffEqBase.Stats(0) @@ -151,6 +153,8 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem{uType, tType, is tf = tspan[2] tType2 = eltype(tspan) uiType = eltype(u0) + realuType = typeof(real(u0)) + realuiType = real(uiType) nrmdig = Array{Int64, 0}(undef) if (nrmbits > 0) @@ -159,15 +163,20 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem{uType, tType, is nrmdig[] = 0 end - lu0 = convert.(low_prec_type, u0) - uLowType = typeof(lu0) + if uiType <: Complex + lu0 = convert.(Complex{low_prec_type}, u0) + uLowType = typeof(lu0) + else + lu0 = convert.(low_prec_type, u0) + uLowType = typeof(lu0) + end - coeffs = tcoeffs{uiType}(zeros(s, s), zeros(s), zeros(s), zeros(s, s), zeros(s)) + coeffs = tcoeffs{realuiType}(zeros(s, s), zeros(s), zeros(s), zeros(s, s), zeros(s)) @unpack mu, hc, hb, nu, alpha = coeffs - Treltol = convert(uiType, reltol) - Tabstol = convert(uiType, abstol) + Treltol = convert(realuiType, reltol) + Tabstol = convert(realuiType, abstol) if (dt == 0) d0 = MyNorm(u0, Tabstol, Treltol) @@ -188,8 +197,8 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem{uType, tType, is dt = min(dt, tf - t0) - EstimateCoeffs!(alpha, uiType) - MuCoefficients!(mu, uiType) + EstimateCoeffs!(alpha, realuiType) + MuCoefficients!(mu, realuiType) dts = Array{tType2}(undef, 1) @@ -201,7 +210,7 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem{uType, tType, is dts = [dt, dtprev] sdt = sign(dt) - HCoefficients!(mu, hc, hb, nu, dt, dtprev, uiType) + HCoefficients!(mu, hc, hb, nu, dt, dtprev, realuiType) # m: output saved at every m steps # n: Number of macro-steps (Output is saved for n+1 time values) @@ -219,14 +228,16 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem{uType, tType, is U3 = Array{uType}(undef, s) U4 = Array{uType}(undef, s) U5 = Array{uType}(undef, s) - U6 = Array{uType}(undef, s) + # U6 = Array{uType}(undef, s) + U6 = Array{realuType}(undef, s) for i in 1:s U1[i] = zero(u0) U2[i] = zero(u0) U3[i] = zero(u0) U4[i] = zero(u0) U5[i] = zero(u0) - U6[i] = zero(u0) + # U6[i] = zero(u0) + U6[i] = zero(real(u0)) end if (mixed_precision == true && typeof(prob.f) <: ODEFunction) @@ -250,36 +261,36 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem{uType, tType, is lmu = convert.(low_prec_type, mu) lhb = convert.(low_prec_type, hb) - if typeof(p) != SciMLBase.NullParameters + if (typeof(p) != SciMLBase.NullParameters && p != nothing) Plow = convert.(low_prec_type, p) else Plow = [] end - cache = tcacheMix{uType, uiType, uLowType, low_prec_type}(U1, - U2, - U3, - U4, - U5, - U6, - fill(true, s), - fill(zero(uiType), s), - [0], - [0, 0], - fill(zero(uiType), 2), - U11, - U12, - U13, - U14, - U15, - U16, - U17, - Plow, - fill(zero(low_prec_type), - s), - lhb, - lmu, - nrmdig) + cache = tcacheMix{uType, realuType, realuiType, uLowType, low_prec_type}(U1, + U2, + U3, + U4, + U5, + U6, + fill(true, s), + fill(zero(realuiType), s), + [0], + [0, 0], + fill(zero(realuiType), 2), + U11, + U12, + U13, + U14, + U15, + U16, + U17, + Plow, + fill(zero(low_prec_type), + s), + lhb, + lmu, + nrmdig) @unpack U, Uz, @@ -306,18 +317,18 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem{uType, tType, is nrmdigits = cache else - cache = tcache{uType, uiType, uLowType, low_prec_type}(U1, - U2, - U3, - U4, - U5, - U6, - fill(true, s), - fill(zero(uiType), s), - [0], - [0, 0], - fill(zero(uiType), 2), - nrmdig) + cache = tcache{uType, realuType, realuiType, uLowType, low_prec_type}(U1, + U2, + U3, + U4, + U5, + U6, + fill(true, s), + fill(zero(realuiType), s), + [0], + [0, 0], + fill(zero(realuiType), 2), + nrmdig) @unpack U, Uz, L, Lz, F, Dmin, Eval, DY, rejects, nfcn, lambdas, nrmdigits = cache end @@ -344,40 +355,42 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem{uType, tType, is it = 0 k = 0 - @inbounds begin for i in 1:m - j += 1 - k += 1 - (status, it) = IRKStep_par!(s, - j, - tj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - maxtrials, - initial_interp, - Tabstol, - Treltol, - adaptive, - threading, - mixed_precision, - low_prec_type) - - if (status == "Failure") - # println("Fail") - sol = DiffEqBase.build_solution(prob, alg, tt, uu, - retcode = ReturnCode.Failure) - return (sol) - end - tit += it - - if (dts[1] == 0) - break + @inbounds begin + for i in 1:m + j += 1 + k += 1 + (status, it) = IRKStep_par!(s, + j, + tj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + maxtrials, + initial_interp, + Tabstol, + Treltol, + adaptive, + threading, + mixed_precision, + low_prec_type) + + if (status == "Failure") + # println("Fail") + sol = DiffEqBase.build_solution(prob, alg, tt, uu, + retcode = ReturnCode.Failure) + return (sol) + end + tit += it + + if (dts[1] == 0) + break + end end - end end + end cont = (sdt * (tj[1] + tj[2]) < sdt * tf) && (j < n * m) @@ -398,40 +411,42 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem{uType, tType, is it = 0 k = 0 - @inbounds begin for i in 1:m - j += 1 - k += 1 - (status, it) = IRKStep_seq!(s, - j, - tj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - maxtrials, - initial_interp, - Tabstol, - Treltol, - adaptive, - threading, - mixed_precision, - low_prec_type) - - if (status == "Failure") - # println("Fail") - sol = DiffEqBase.build_solution(prob, alg, tt, uu, - retcode = ReturnCode.Failure) - return (sol) - end - tit += it - - if (dts[1] == 0) - break + @inbounds begin + for i in 1:m + j += 1 + k += 1 + (status, it) = IRKStep_seq!(s, + j, + tj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + maxtrials, + initial_interp, + Tabstol, + Treltol, + adaptive, + threading, + mixed_precision, + low_prec_type) + + if (status == "Failure") + # println("Fail") + sol = DiffEqBase.build_solution(prob, alg, tt, uu, + retcode = ReturnCode.Failure) + return (sol) + end + tit += it + + if (dts[1] == 0) + break + end end - end end + end cont = (sdt * (tj[1] + tj[2]) < sdt * tf) && (j < n * m) @@ -448,7 +463,7 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem{uType, tType, is end sol = DiffEqBase.build_solution(prob, alg, tt, uu, stats = stats, - retcode = ReturnCode.Success) + retcode = ReturnCode.Success) sol.stats.nf = nfcn[1] sol.stats.nf2 = nfcn[2] @@ -463,134 +478,134 @@ function DiffEqBase.__solve(prob::DiffEqBase.AbstractODEProblem{uType, tType, is end function IRKStep_seq!(s, - j, - tj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - maxtrials, - initial_interp, - abstol, - reltol, - adaptive, - threading, - mixed_precision, - low_prec_type) + j, + tj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + maxtrials, + initial_interp, + abstol, + reltol, + adaptive, + threading, + mixed_precision, + low_prec_type) if (typeof(prob.f) <: ODEFunction) if (adaptive == true) if (mixed_precision == true) (status, it) = IRKstep_adaptive_Mix!(s, - j, - tj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - maxtrials, - initial_interp, - abstol, - reltol, - adaptive, - threading, - mixed_precision, - low_prec_type) + j, + tj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + maxtrials, + initial_interp, + abstol, + reltol, + adaptive, + threading, + mixed_precision, + low_prec_type) else (status, it) = IRKstep_adaptive!(s, - j, - tj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - maxtrials, - initial_interp, - abstol, - reltol, - adaptive, - threading) + j, + tj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + maxtrials, + initial_interp, + abstol, + reltol, + adaptive, + threading) end else if (mixed_precision == true) (status, it) = IRKstep_fixed_Mix!(s, - j, - tj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - initial_interp, - abstol, - reltol, - adaptive, - threading, - mixed_precision, - low_prec_type) + j, + tj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + initial_interp, + abstol, + reltol, + adaptive, + threading, + mixed_precision, + low_prec_type) else (status, it) = IRKstep_fixed!(s, - j, - tj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - initial_interp, - abstol, - reltol, - adaptive, - threading) + j, + tj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + initial_interp, + abstol, + reltol, + adaptive, + threading) end end else # (typeof(prob.f<:DynamicalODEFunction)) if (adaptive == true) (status, it) = IRKstepDynODE_adaptive!(s, - j, - tj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - maxtrials, - initial_interp, - abstol, - reltol, - adaptive, - threading) + j, + tj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + maxtrials, + initial_interp, + abstol, + reltol, + adaptive, + threading) else (status, it) = IRKstepDynODE_fixed!(s, - j, - tj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - initial_interp, - abstol, - reltol, - adaptive, - threading) + j, + tj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + initial_interp, + abstol, + reltol, + adaptive, + threading) end end @@ -598,134 +613,134 @@ function IRKStep_seq!(s, end function IRKStep_par!(s, - j, - tj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - maxtrials, - initial_interp, - abstol, - reltol, - adaptive, - threading, - mixed_precision, - low_prec_type) + j, + tj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + maxtrials, + initial_interp, + abstol, + reltol, + adaptive, + threading, + mixed_precision, + low_prec_type) if (typeof(prob.f) <: ODEFunction) if (adaptive == true) if (mixed_precision == true) (status, it) = IRKstep_par_adaptive_Mix!(s, - j, - tj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - maxtrials, - initial_interp, - abstol, - reltol, - adaptive, - threading, - mixed_precision, - low_prec_type) + j, + tj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + maxtrials, + initial_interp, + abstol, + reltol, + adaptive, + threading, + mixed_precision, + low_prec_type) else (status, it) = IRKstep_par_adaptive!(s, - j, - tj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - maxtrials, - initial_interp, - abstol, - reltol, - adaptive, - threading) + j, + tj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + maxtrials, + initial_interp, + abstol, + reltol, + adaptive, + threading) end else if (mixed_precision == true) (status, it) = IRKstep_par_fixed_Mix!(s, - j, - tj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - initial_interp, - abstol, - reltol, - adaptive, - threading, - mixed_precision, - low_prec_type) + j, + tj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + initial_interp, + abstol, + reltol, + adaptive, + threading, + mixed_precision, + low_prec_type) else (status, it) = IRKstep_par_fixed!(s, - j, - tj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - initial_interp, - abstol, - reltol, - adaptive, - threading) + j, + tj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + initial_interp, + abstol, + reltol, + adaptive, + threading) end end else # (typeof(prob.f<:DynamicalODEFunction)) if (adaptive == true) (status, it) = IRKstepDynODE_par_adaptive!(s, - j, - tj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - maxtrials, - initial_interp, - abstol, - reltol, - adaptive, - threading) + j, + tj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + maxtrials, + initial_interp, + abstol, + reltol, + adaptive, + threading) else (status, it) = IRKstepDynODE_par_fixed!(s, - j, - tj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - initial_interp, - abstol, - reltol, - adaptive, - threading) + j, + tj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + initial_interp, + abstol, + reltol, + adaptive, + threading) end end diff --git a/src/IRKGL16step_adaptive_par.jl b/src/IRKGL16step_adaptive_par.jl index 8bec48b..ff0decd 100644 --- a/src/IRKGL16step_adaptive_par.jl +++ b/src/IRKGL16step_adaptive_par.jl @@ -5,26 +5,27 @@ # IRKstepDynODE_par_adaptive! function IRKstep_par_adaptive!(s, - j, - ttj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - maxtrials, - initial_interp, - abstol, - reltol, - adaptive, - threading) + j, + ttj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + maxtrials, + initial_interp, + abstol, + reltol, + adaptive, + threading) @unpack mu, hc, hb, nu, alpha = coeffs @unpack f, u0, p, tspan = prob @unpack U, Uz, L, Lz, F, Dmin, Eval, DY, rejects, nfcn, lambdas, nrmdigits = cache uiType = eltype(uj) + realuiType = real(uiType) lambda = lambdas[1] lambdaprev = lambdas[2] @@ -34,7 +35,7 @@ function IRKstep_par_adaptive!(s, tf = tspan[2] elems = s * length(uj) - pow = eltype(uj)(1 / (2 * s)) + pow = realuiType(1 / (2 * s)) tj = ttj[1] te = ttj[2] @@ -57,31 +58,37 @@ function IRKstep_par_adaptive!(s, while (!accept && ntrials < maxtrialsj) if (dt != dtprev) - HCoefficients!(mu, hc, hb, nu, dt, dtprev, uiType) + HCoefficients!(mu, hc, hb, nu, dt, dtprev, realuiType) @unpack mu, hc, hb, nu, alpha = coeffs end if initial_interp - @inbounds begin for is in 1:s - for k in eachindex(uj) - aux = zero(eltype(uj)) - for js in 1:s - aux += nu[is, js] * Lz[js][k] + @inbounds begin + for is in 1:s + for k in eachindex(uj) + aux = zero(eltype(uj)) + for js in 1:s + aux += nu[is, js] * Lz[js][k] + end + U[is][k] = (uj[k] + ej[k]) + aux end - U[is][k] = (uj[k] + ej[k]) + aux end - end end + end else - @inbounds begin for is in 1:s - @. U[is] = uj + ej - end end + @inbounds begin + for is in 1:s + @. U[is] = uj + ej + end + end end - @inbounds begin Threads.@threads for is in 1:s - nfcn[1] += 1 - f(F[is], U[is], p, tj + hc[is]) - @. L[is] = hb[is] * F[is] - end end + @inbounds begin + Threads.@threads for is in 1:s + nfcn[1] += 1 + f(F[is], U[is], p, tj + hc[is]) + @. L[is] = hb[is] * F[is] + end + end iter = true plusIt = true @@ -96,18 +103,20 @@ function IRKstep_par_adaptive!(s, iter = false D0 = 0 - @inbounds begin for is in 1:s - Uz[is] .= U[is] - DiffEqBase.@.. U[is] = uj + (ej + - mu[is, 1] * L[1] + - mu[is, 2] * L[2] + - mu[is, 3] * L[3] + - mu[is, 4] * L[4] + - mu[is, 5] * L[5] + - mu[is, 6] * L[6] + - mu[is, 7] * L[7] + - mu[is, 8] * L[8]) - end end #inbound + @inbounds begin + for is in 1:s + Uz[is] .= U[is] + DiffEqBase.@.. U[is] = uj + (ej + + mu[is, 1] * L[1] + + mu[is, 2] * L[2] + + mu[is, 3] * L[3] + + mu[is, 4] * L[4] + + mu[is, 5] * L[5] + + mu[is, 6] * L[6] + + mu[is, 7] * L[7] + + mu[is, 8] * L[8]) + end + end #inbound Threads.@threads for is in 1:s Eval[is] = false @@ -155,8 +164,8 @@ function IRKstep_par_adaptive!(s, if (!accept && ntrials == maxtrialsj) println("Failure (adaptive step): maximum number of trials=", maxtrialsj, - " at step=", j, - " dt=", dts[1]) + " at step=", j, + " dt=", dts[1]) return ("Failure", 0) end @@ -165,18 +174,20 @@ function IRKstep_par_adaptive!(s, # ~Compensated summation indices = eachindex(uj) - @inbounds begin for k in indices - e0 = ej[k] - for is in 1:s - e0 += muladd(F[is][k], hb[is], -L[is][k]) - end - res = Base.TwicePrecision(uj[k], e0) - for is in 1:s - res += L[is][k] + @inbounds begin + for k in indices + e0 = ej[k] + for is in 1:s + e0 += muladd(F[is][k], hb[is], -L[is][k]) + end + res = Base.TwicePrecision(uj[k], e0) + for is in 1:s + res += L[is][k] + end + uj[k] = res.hi + ej[k] = res.lo end - uj[k] = res.hi - ej[k] = res.lo - end end + end res = Base.TwicePrecision(tj, te) + dt ttj[1] = res.hi @@ -205,23 +216,23 @@ function IRKstep_par_adaptive!(s, end function IRKstep_par_adaptive_Mix!(s, - j, - ttj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - maxtrials, - initial_interp, - abstol, - reltol, - adaptive, - threading, - mixed_precision, - low_prec_type) + j, + ttj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + maxtrials, + initial_interp, + abstol, + reltol, + adaptive, + threading, + mixed_precision, + low_prec_type) @unpack mu, hc, hb, nu, alpha = coeffs @unpack f, u0, p, tspan, kwargs = prob @@ -250,6 +261,7 @@ function IRKstep_par_adaptive_Mix!(s, nrmdigits = cache uiType = eltype(uj) + realuiType = real(uiType) lambda = lambdas[1] lambdaprev = lambdas[2] @@ -259,7 +271,7 @@ function IRKstep_par_adaptive_Mix!(s, tf = tspan[2] elems = s * length(uj) - pow = eltype(uj)(1 / (2 * s)) + pow = realuiType(1 / (2 * s)) tj = ttj[1] te = ttj[2] @@ -282,32 +294,38 @@ function IRKstep_par_adaptive_Mix!(s, while (!accept && ntrials < maxtrialsj) if (dt != dtprev) - HCoefficients!(mu, hc, hb, nu, dt, dtprev, uiType) + HCoefficients!(mu, hc, hb, nu, dt, dtprev, realuiType) @unpack mu, hc, hb, nu, alpha = coeffs lhb .= hb end if initial_interp - @inbounds begin for is in 1:s - for k in eachindex(uj) - aux = zero(eltype(uj)) - for js in 1:s - aux += nu[is, js] * Lz[js][k] + @inbounds begin + for is in 1:s + for k in eachindex(uj) + aux = zero(eltype(uj)) + for js in 1:s + aux += nu[is, js] * Lz[js][k] + end + U[is][k] = (uj[k] + ej[k]) + aux end - U[is][k] = (uj[k] + ej[k]) + aux end - end end + end else - @inbounds begin for is in 1:s - @. U[is] = uj + ej - end end + @inbounds begin + for is in 1:s + @. U[is] = uj + ej + end + end end - @inbounds begin Threads.@threads for is in 1:s - nfcn[1] += 1 - f(F[is], U[is], p, tj + hc[is]) - @. L[is] = hb[is] * F[is] - end end + @inbounds begin + Threads.@threads for is in 1:s + nfcn[1] += 1 + f(F[is], U[is], p, tj + hc[is]) + @. L[is] = hb[is] * F[is] + end + end lmax = 1 iter = true @@ -323,20 +341,22 @@ function IRKstep_par_adaptive_Mix!(s, iter = false D0 = 0 - @inbounds begin for is in 1:s - Uz[is] .= U[is] - DiffEqBase.@.. U[is] = uj + (ej + - mu[is, 1] * L[1] + - mu[is, 2] * L[2] + - mu[is, 3] * L[3] + - mu[is, 4] * L[4] + - mu[is, 5] * L[5] + - mu[is, 6] * L[6] + - mu[is, 7] * L[7] + - mu[is, 8] * L[8]) - Ulow[is] .= U[is] - normU[is] = copy(norm(Ulow[is])) - end end #inbound + @inbounds begin + for is in 1:s + Uz[is] .= U[is] + DiffEqBase.@.. U[is] = uj + (ej + + mu[is, 1] * L[1] + + mu[is, 2] * L[2] + + mu[is, 3] * L[3] + + mu[is, 4] * L[4] + + mu[is, 5] * L[5] + + mu[is, 6] * L[6] + + mu[is, 7] * L[7] + + mu[is, 8] * L[8]) + Ulow[is] .= U[is] + normU[is] = copy(norm(Ulow[is])) + end + end #inbound Threads.@threads for is in 1:s Eval[is] = false @@ -427,8 +447,8 @@ function IRKstep_par_adaptive_Mix!(s, if (!accept && ntrials == maxtrialsj) println("Failure (adaptive step): maximum number of trials=", maxtrialsj, - " at step=", j, - " dt=", dts[1]) + " at step=", j, + " dt=", dts[1]) return ("Failure", 0) end @@ -438,18 +458,20 @@ function IRKstep_par_adaptive_Mix!(s, indices = eachindex(uj) - @inbounds begin for k in indices - e0 = ej[k] - for is in 1:s - e0 += muladd(F[is][k], hb[is], -L[is][k]) - end - res = Base.TwicePrecision(uj[k], e0) - for is in 1:s - res += L[is][k] + @inbounds begin + for k in indices + e0 = ej[k] + for is in 1:s + e0 += muladd(F[is][k], hb[is], -L[is][k]) + end + res = Base.TwicePrecision(uj[k], e0) + for is in 1:s + res += L[is][k] + end + uj[k] = res.hi + ej[k] = res.lo end - uj[k] = res.hi - ej[k] = res.lo - end end + end res = Base.TwicePrecision(tj, te) + dt ttj[1] = res.hi @@ -479,21 +501,21 @@ function IRKstep_par_adaptive_Mix!(s, end function IRKstepDynODE_par_adaptive!(s, - j, - ttj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - maxtrials, - initial_interp, - abstol, - reltol, - adaptive, - threading) + j, + ttj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + maxtrials, + initial_interp, + abstol, + reltol, + adaptive, + threading) @unpack mu, hc, hb, nu, alpha = coeffs @unpack tspan, p = prob f1 = prob.f.f1 @@ -501,6 +523,7 @@ function IRKstepDynODE_par_adaptive!(s, @unpack U, Uz, L, Lz, F, Dmin, Eval, DY, rejects, nfcn, lambdas, nrmdigits = cache uiType = eltype(uj) + realuiType = real(uiType) lambda = lambdas[1] lambdaprev = lambdas[2] @@ -510,7 +533,7 @@ function IRKstepDynODE_par_adaptive!(s, tf = tspan[2] elems = s * length(uj) - pow = eltype(uj)(1 / (2 * s)) + pow = realuiType(1 / (2 * s)) tj = ttj[1] te = ttj[2] @@ -533,19 +556,21 @@ function IRKstepDynODE_par_adaptive!(s, while (!accept && ntrials < maxtrialsj) if (dt != dtprev) - HCoefficients!(mu, hc, hb, nu, dt, dtprev, uiType) + HCoefficients!(mu, hc, hb, nu, dt, dtprev, realuiType) @unpack mu, hc, hb, nu, alpha = coeffs end - @inbounds begin for is in 1:s - for k in eachindex(uj) - aux = zero(eltype(uj)) - for js in 1:s - aux += nu[is, js] * Lz[js][k] + @inbounds begin + for is in 1:s + for k in eachindex(uj) + aux = zero(eltype(uj)) + for js in 1:s + aux += nu[is, js] * Lz[js][k] + end + U[is][k] = (uj[k] + ej[k]) + aux end - U[is][k] = (uj[k] + ej[k]) + aux end - end end + end iter = true plusIt = true @@ -567,18 +592,20 @@ function IRKstepDynODE_par_adaptive!(s, D0 = 0 # First part - @inbounds begin for is in 1:s - Uz[is].x[1] .= U[is].x[1] - DiffEqBase.@.. U[is].x[1] = uj.x[1] + (ej.x[1] + - mu[is, 1] * L[1].x[1] + - mu[is, 2] * L[2].x[1] + - mu[is, 3] * L[3].x[1] + - mu[is, 4] * L[4].x[1] + - mu[is, 5] * L[5].x[1] + - mu[is, 6] * L[6].x[1] + - mu[is, 7] * L[7].x[1] + - mu[is, 8] * L[8].x[1]) - end end #inbound + @inbounds begin + for is in 1:s + Uz[is].x[1] .= U[is].x[1] + DiffEqBase.@.. U[is].x[1] = uj.x[1] + (ej.x[1] + + mu[is, 1] * L[1].x[1] + + mu[is, 2] * L[2].x[1] + + mu[is, 3] * L[3].x[1] + + mu[is, 4] * L[4].x[1] + + mu[is, 5] * L[5].x[1] + + mu[is, 6] * L[6].x[1] + + mu[is, 7] * L[7].x[1] + + mu[is, 8] * L[8].x[1]) + end + end #inbound Threads.@threads for is in 1:s Eval[is] = false @@ -606,18 +633,20 @@ function IRKstepDynODE_par_adaptive!(s, # Second part - @inbounds begin for is in 1:s - Uz[is].x[2] .= U[is].x[2] - DiffEqBase.@.. U[is].x[2] = uj.x[2] + (ej.x[2] + - mu[is, 1] * L[1].x[2] + - mu[is, 2] * L[2].x[2] + - mu[is, 3] * L[3].x[2] + - mu[is, 4] * L[4].x[2] + - mu[is, 5] * L[5].x[2] + - mu[is, 6] * L[6].x[2] + - mu[is, 7] * L[7].x[2] + - mu[is, 8] * L[8].x[2]) - end end #inbound + @inbounds begin + for is in 1:s + Uz[is].x[2] .= U[is].x[2] + DiffEqBase.@.. U[is].x[2] = uj.x[2] + (ej.x[2] + + mu[is, 1] * L[1].x[2] + + mu[is, 2] * L[2].x[2] + + mu[is, 3] * L[3].x[2] + + mu[is, 4] * L[4].x[2] + + mu[is, 5] * L[5].x[2] + + mu[is, 6] * L[6].x[2] + + mu[is, 7] * L[7].x[2] + + mu[is, 8] * L[8].x[2]) + end + end #inbound Threads.@threads for is in 1:s Eval[is] = false @@ -665,8 +694,8 @@ function IRKstepDynODE_par_adaptive!(s, if (!accept && ntrials == maxtrialsj) println("Failure (adaptive step): maximum number of trials=", maxtrialsj, - " at step=", j, - " dt=", dts[1]) + " at step=", j, + " dt=", dts[1]) return ("Failure", 0) end @@ -674,18 +703,20 @@ function IRKstepDynODE_par_adaptive!(s, # ~ Compensated summation indices = eachindex(uj) - @inbounds begin for k in indices - e0 = ej[k] - for is in 1:s - e0 += muladd(F[is][k], hb[is], -L[is][k]) - end - res = Base.TwicePrecision(uj[k], e0) - for is in 1:s - res += L[is][k] + @inbounds begin + for k in indices + e0 = ej[k] + for is in 1:s + e0 += muladd(F[is][k], hb[is], -L[is][k]) + end + res = Base.TwicePrecision(uj[k], e0) + for is in 1:s + res += L[is][k] + end + uj[k] = res.hi + ej[k] = res.lo end - uj[k] = res.hi - ej[k] = res.lo - end end + end res = Base.TwicePrecision(tj, te) + dt ttj[1] = res.hi ttj[2] = res.lo diff --git a/src/IRKGL16step_adaptive_seq.jl b/src/IRKGL16step_adaptive_seq.jl index 13bb55f..2fed519 100644 --- a/src/IRKGL16step_adaptive_seq.jl +++ b/src/IRKGL16step_adaptive_seq.jl @@ -5,26 +5,27 @@ # IRKstepDynODE_adaptive! function IRKstep_adaptive!(s, - j, - ttj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - maxtrials, - initial_interp, - abstol, - reltol, - adaptive, - threading) + j, + ttj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + maxtrials, + initial_interp, + abstol, + reltol, + adaptive, + threading) @unpack mu, hc, hb, nu, alpha = coeffs @unpack f, u0, p, tspan = prob @unpack U, Uz, L, Lz, F, Dmin, Eval, DY, rejects, nfcn, lambdas, nrmdigits = cache uiType = eltype(uj) + realuiType = real(uiType) lambda = lambdas[1] lambdaprev = lambdas[2] @@ -34,7 +35,7 @@ function IRKstep_adaptive!(s, tf = tspan[2] elems = s * length(uj) - pow = eltype(uj)(1 / (2 * s)) + pow = realuiType(1 / (2 * s)) tj = ttj[1] te = ttj[2] @@ -57,31 +58,37 @@ function IRKstep_adaptive!(s, while (!accept && ntrials < maxtrialsj) if (dt != dtprev) - HCoefficients!(mu, hc, hb, nu, dt, dtprev, uiType) + HCoefficients!(mu, hc, hb, nu, dt, dtprev, realuiType) @unpack mu, hc, hb, nu, alpha = coeffs end if initial_interp - @inbounds begin for is in 1:s - for k in eachindex(uj) - aux = zero(eltype(uj)) - for js in 1:s - aux += nu[is, js] * Lz[js][k] + @inbounds begin + for is in 1:s + for k in eachindex(uj) + aux = zero(eltype(uj)) + for js in 1:s + aux += nu[is, js] * Lz[js][k] + end + U[is][k] = (uj[k] + ej[k]) + aux end - U[is][k] = (uj[k] + ej[k]) + aux end - end end + end else - @inbounds begin for is in 1:s - @. U[is] = uj + ej - end end + @inbounds begin + for is in 1:s + @. U[is] = uj + ej + end + end end - @inbounds begin for is in 1:s - nfcn[1] += 1 - f(F[is], U[is], p, tj + hc[is]) - @. L[is] = hb[is] * F[is] - end end + @inbounds begin + for is in 1:s + nfcn[1] += 1 + f(F[is], U[is], p, tj + hc[is]) + @. L[is] = hb[is] * F[is] + end + end iter = true plusIt = true @@ -96,18 +103,20 @@ function IRKstep_adaptive!(s, iter = false D0 = 0 - @inbounds begin for is in 1:s - Uz[is] .= U[is] - DiffEqBase.@.. U[is] = uj + (ej + - mu[is, 1] * L[1] + - mu[is, 2] * L[2] + - mu[is, 3] * L[3] + - mu[is, 4] * L[4] + - mu[is, 5] * L[5] + - mu[is, 6] * L[6] + - mu[is, 7] * L[7] + - mu[is, 8] * L[8]) - end end #inbound + @inbounds begin + for is in 1:s + Uz[is] .= U[is] + DiffEqBase.@.. U[is] = uj + (ej + + mu[is, 1] * L[1] + + mu[is, 2] * L[2] + + mu[is, 3] * L[3] + + mu[is, 4] * L[4] + + mu[is, 5] * L[5] + + mu[is, 6] * L[6] + + mu[is, 7] * L[7] + + mu[is, 8] * L[8]) + end + end #inbound for is in 1:s Eval[is] = false @@ -145,6 +154,7 @@ function IRKstep_adaptive!(s, estimate = ErrorEst(U, F, dt, alpha, abstol, reltol) lambda = (estimate)^pow + if (estimate < 2 && nit < maxiters) accept = true else @@ -155,8 +165,8 @@ function IRKstep_adaptive!(s, if (!accept && ntrials == maxtrialsj) println("Failure (adaptive step): maximum number of trials=", maxtrialsj, - " at step=", j, - " dt=", dts[1]) + " at step=", j, + " dt=", dts[1]) return ("Failure", 0) end @@ -165,18 +175,20 @@ function IRKstep_adaptive!(s, # ~Compensated summation indices = eachindex(uj) - @inbounds begin for k in indices - e0 = ej[k] - for is in 1:s - e0 += muladd(F[is][k], hb[is], -L[is][k]) - end - res = Base.TwicePrecision(uj[k], e0) - for is in 1:s - res += L[is][k] + @inbounds begin + for k in indices + e0 = ej[k] + for is in 1:s + e0 += muladd(F[is][k], hb[is], -L[is][k]) + end + res = Base.TwicePrecision(uj[k], e0) + for is in 1:s + res += L[is][k] + end + uj[k] = res.hi + ej[k] = res.lo end - uj[k] = res.hi - ej[k] = res.lo - end end + end res = Base.TwicePrecision(tj, te) + dt ttj[1] = res.hi @@ -205,23 +217,23 @@ function IRKstep_adaptive!(s, end function IRKstep_adaptive_Mix!(s, - j, - ttj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - maxtrials, - initial_interp, - abstol, - reltol, - adaptive, - threading, - mixed_precision, - low_prec_type) + j, + ttj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + maxtrials, + initial_interp, + abstol, + reltol, + adaptive, + threading, + mixed_precision, + low_prec_type) @unpack mu, hc, hb, nu, alpha = coeffs @unpack f, u0, p, tspan, kwargs = prob @@ -250,6 +262,7 @@ function IRKstep_adaptive_Mix!(s, nrmdigits = cache uiType = eltype(uj) + realuiType = real(uiType) lambda = lambdas[1] lambdaprev = lambdas[2] @@ -259,7 +272,7 @@ function IRKstep_adaptive_Mix!(s, tf = tspan[2] elems = s * length(uj) - pow = eltype(uj)(1 / (2 * s)) + pow = realuiType(1 / (2 * s)) tj = ttj[1] te = ttj[2] @@ -282,32 +295,38 @@ function IRKstep_adaptive_Mix!(s, while (!accept && ntrials < maxtrialsj) if (dt != dtprev) - HCoefficients!(mu, hc, hb, nu, dt, dtprev, uiType) + HCoefficients!(mu, hc, hb, nu, dt, dtprev, realuiType) @unpack mu, hc, hb, nu, alpha = coeffs lhb .= hb end if initial_interp - @inbounds begin for is in 1:s - for k in eachindex(uj) - aux = zero(eltype(uj)) - for js in 1:s - aux += nu[is, js] * Lz[js][k] + @inbounds begin + for is in 1:s + for k in eachindex(uj) + aux = zero(eltype(uj)) + for js in 1:s + aux += nu[is, js] * Lz[js][k] + end + U[is][k] = (uj[k] + ej[k]) + aux end - U[is][k] = (uj[k] + ej[k]) + aux end - end end + end else - @inbounds begin for is in 1:s - @. U[is] = uj + ej - end end + @inbounds begin + for is in 1:s + @. U[is] = uj + ej + end + end end - @inbounds begin for is in 1:s - nfcn[1] += 1 - f(F[is], U[is], p, tj + hc[is]) - @. L[is] = hb[is] * F[is] - end end + @inbounds begin + for is in 1:s + nfcn[1] += 1 + f(F[is], U[is], p, tj + hc[is]) + @. L[is] = hb[is] * F[is] + end + end lmax = 1 iter = true @@ -323,20 +342,22 @@ function IRKstep_adaptive_Mix!(s, iter = false D0 = 0 - @inbounds begin for is in 1:s - Uz[is] .= U[is] - DiffEqBase.@.. U[is] = uj + (ej + - mu[is, 1] * L[1] + - mu[is, 2] * L[2] + - mu[is, 3] * L[3] + - mu[is, 4] * L[4] + - mu[is, 5] * L[5] + - mu[is, 6] * L[6] + - mu[is, 7] * L[7] + - mu[is, 8] * L[8]) - Ulow[is] .= U[is] - normU[is] = copy(norm(Ulow[is])) - end end #inbound + @inbounds begin + for is in 1:s + Uz[is] .= U[is] + DiffEqBase.@.. U[is] = uj + (ej + + mu[is, 1] * L[1] + + mu[is, 2] * L[2] + + mu[is, 3] * L[3] + + mu[is, 4] * L[4] + + mu[is, 5] * L[5] + + mu[is, 6] * L[6] + + mu[is, 7] * L[7] + + mu[is, 8] * L[8]) + Ulow[is] .= U[is] + normU[is] = copy(norm(Ulow[is])) + end + end #inbound for is in 1:s Eval[is] = false @@ -427,8 +448,8 @@ function IRKstep_adaptive_Mix!(s, if (!accept && ntrials == maxtrialsj) println("Failure (adaptive step): maximum number of trials=", maxtrialsj, - " at step=", j, - " dt=", dts[1]) + " at step=", j, + " dt=", dts[1]) return ("Failure", 0) end @@ -438,18 +459,20 @@ function IRKstep_adaptive_Mix!(s, indices = eachindex(uj) - @inbounds begin for k in indices - e0 = ej[k] - for is in 1:s - e0 += muladd(F[is][k], hb[is], -L[is][k]) - end - res = Base.TwicePrecision(uj[k], e0) - for is in 1:s - res += L[is][k] + @inbounds begin + for k in indices + e0 = ej[k] + for is in 1:s + e0 += muladd(F[is][k], hb[is], -L[is][k]) + end + res = Base.TwicePrecision(uj[k], e0) + for is in 1:s + res += L[is][k] + end + uj[k] = res.hi + ej[k] = res.lo end - uj[k] = res.hi - ej[k] = res.lo - end end + end res = Base.TwicePrecision(tj, te) + dt ttj[1] = res.hi @@ -479,21 +502,21 @@ function IRKstep_adaptive_Mix!(s, end function IRKstepDynODE_adaptive!(s, - j, - ttj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - maxtrials, - initial_interp, - abstol, - reltol, - adaptive, - threading) + j, + ttj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + maxtrials, + initial_interp, + abstol, + reltol, + adaptive, + threading) @unpack mu, hc, hb, nu, alpha = coeffs @unpack tspan, p = prob f1 = prob.f.f1 @@ -501,6 +524,7 @@ function IRKstepDynODE_adaptive!(s, @unpack U, Uz, L, Lz, F, Dmin, Eval, DY, rejects, nfcn, lambdas, nrmdigits = cache uiType = eltype(uj) + realuiType = real(uiType) lambda = lambdas[1] lambdaprev = lambdas[2] @@ -510,7 +534,7 @@ function IRKstepDynODE_adaptive!(s, tf = tspan[2] elems = s * length(uj) - pow = eltype(uj)(1 / (2 * s)) + pow = realuiType(1 / (2 * s)) tj = ttj[1] te = ttj[2] @@ -533,19 +557,21 @@ function IRKstepDynODE_adaptive!(s, while (!accept && ntrials < maxtrialsj) if (dt != dtprev) - HCoefficients!(mu, hc, hb, nu, dt, dtprev, uiType) + HCoefficients!(mu, hc, hb, nu, dt, dtprev, realuiType) @unpack mu, hc, hb, nu, alpha = coeffs end - @inbounds begin for is in 1:s - for k in eachindex(uj) - aux = zero(eltype(uj)) - for js in 1:s - aux += nu[is, js] * Lz[js][k] + @inbounds begin + for is in 1:s + for k in eachindex(uj) + aux = zero(eltype(uj)) + for js in 1:s + aux += nu[is, js] * Lz[js][k] + end + U[is][k] = (uj[k] + ej[k]) + aux end - U[is][k] = (uj[k] + ej[k]) + aux end - end end + end iter = true plusIt = true @@ -567,18 +593,20 @@ function IRKstepDynODE_adaptive!(s, D0 = 0 # First part - @inbounds begin for is in 1:s - Uz[is].x[1] .= U[is].x[1] - DiffEqBase.@.. U[is].x[1] = uj.x[1] + (ej.x[1] + - mu[is, 1] * L[1].x[1] + - mu[is, 2] * L[2].x[1] + - mu[is, 3] * L[3].x[1] + - mu[is, 4] * L[4].x[1] + - mu[is, 5] * L[5].x[1] + - mu[is, 6] * L[6].x[1] + - mu[is, 7] * L[7].x[1] + - mu[is, 8] * L[8].x[1]) - end end #inbound + @inbounds begin + for is in 1:s + Uz[is].x[1] .= U[is].x[1] + DiffEqBase.@.. U[is].x[1] = uj.x[1] + (ej.x[1] + + mu[is, 1] * L[1].x[1] + + mu[is, 2] * L[2].x[1] + + mu[is, 3] * L[3].x[1] + + mu[is, 4] * L[4].x[1] + + mu[is, 5] * L[5].x[1] + + mu[is, 6] * L[6].x[1] + + mu[is, 7] * L[7].x[1] + + mu[is, 8] * L[8].x[1]) + end + end #inbound for is in 1:s Eval[is] = false @@ -606,18 +634,20 @@ function IRKstepDynODE_adaptive!(s, # Second part - @inbounds begin for is in 1:s - Uz[is].x[2] .= U[is].x[2] - DiffEqBase.@.. U[is].x[2] = uj.x[2] + (ej.x[2] + - mu[is, 1] * L[1].x[2] + - mu[is, 2] * L[2].x[2] + - mu[is, 3] * L[3].x[2] + - mu[is, 4] * L[4].x[2] + - mu[is, 5] * L[5].x[2] + - mu[is, 6] * L[6].x[2] + - mu[is, 7] * L[7].x[2] + - mu[is, 8] * L[8].x[2]) - end end #inbound + @inbounds begin + for is in 1:s + Uz[is].x[2] .= U[is].x[2] + DiffEqBase.@.. U[is].x[2] = uj.x[2] + (ej.x[2] + + mu[is, 1] * L[1].x[2] + + mu[is, 2] * L[2].x[2] + + mu[is, 3] * L[3].x[2] + + mu[is, 4] * L[4].x[2] + + mu[is, 5] * L[5].x[2] + + mu[is, 6] * L[6].x[2] + + mu[is, 7] * L[7].x[2] + + mu[is, 8] * L[8].x[2]) + end + end #inbound for is in 1:s Eval[is] = false @@ -666,8 +696,8 @@ function IRKstepDynODE_adaptive!(s, if (!accept && ntrials == maxtrialsj) println("Failure (adaptive step): maximum number of trials=", maxtrialsj, - " at step=", j, - " dt=", dts[1]) + " at step=", j, + " dt=", dts[1]) return ("Failure", 0) end @@ -675,18 +705,20 @@ function IRKstepDynODE_adaptive!(s, # ~ Compensated summation indices = eachindex(uj) - @inbounds begin for k in indices - e0 = ej[k] - for is in 1:s - e0 += muladd(F[is][k], hb[is], -L[is][k]) - end - res = Base.TwicePrecision(uj[k], e0) - for is in 1:s - res += L[is][k] + @inbounds begin + for k in indices + e0 = ej[k] + for is in 1:s + e0 += muladd(F[is][k], hb[is], -L[is][k]) + end + res = Base.TwicePrecision(uj[k], e0) + for is in 1:s + res += L[is][k] + end + uj[k] = res.hi + ej[k] = res.lo end - uj[k] = res.hi - ej[k] = res.lo - end end + end res = Base.TwicePrecision(tj, te) + dt ttj[1] = res.hi ttj[2] = res.lo diff --git a/src/IRKGL16step_fixed_par.jl b/src/IRKGL16step_fixed_par.jl index 1694f90..94517ce 100644 --- a/src/IRKGL16step_fixed_par.jl +++ b/src/IRKGL16step_fixed_par.jl @@ -5,25 +5,26 @@ # IRKstepDynODE_par_fixed! function IRKstep_par_fixed!(s, - j, - ttj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - initial_interp, - abstol, - reltol, - adaptive, - threading) + j, + ttj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + initial_interp, + abstol, + reltol, + adaptive, + threading) @unpack mu, hc, hb, nu, alpha = coeffs @unpack f, u0, p, tspan = prob @unpack U, Uz, L, Lz, F, Dmin, Eval, DY, rejects, nfcn, lambdas, nrmdigits = cache uiType = eltype(uj) + realuiType = real(uiType) dt = dts[1] dtprev = dts[2] @@ -37,39 +38,47 @@ function IRKstep_par_fixed!(s, nit = 0 if (dt != dtprev) - HCoefficients!(mu, hc, hb, nu, dt, dtprev, uiType) + HCoefficients!(mu, hc, hb, nu, dt, dtprev, realuiType) @unpack mu, hc, hb, nu, alpha = coeffs end if initial_interp - @inbounds begin for is in 1:s - for k in eachindex(uj) - aux = zero(eltype(uj)) - for js in 1:s - aux += nu[is, js] * L[js][k] + @inbounds begin + for is in 1:s + for k in eachindex(uj) + aux = zero(eltype(uj)) + for js in 1:s + aux += nu[is, js] * L[js][k] + end + U[is][k] = (uj[k] + ej[k]) + aux end - U[is][k] = (uj[k] + ej[k]) + aux end - end end + end else - @inbounds begin for is in 1:s - @. U[is] = uj + ej - end end + @inbounds begin + for is in 1:s + @. U[is] = uj + ej + end + end end iter = true plusIt = true nit = 0 - @inbounds begin for is in 1:s - Dmin[is] .= Inf - end end + @inbounds begin + for is in 1:s + Dmin[is] .= Inf + end + end - @inbounds begin Threads.@threads for is in 1:s - nfcn[1] += 1 - f(F[is], U[is], p, tj + hc[is]) - @. L[is] = hb[is] * F[is] - end end + @inbounds begin + Threads.@threads for is in 1:s + nfcn[1] += 1 + f(F[is], U[is], p, tj + hc[is]) + @. L[is] = hb[is] * F[is] + end + end while (nit < maxiters && iter) nit += 1 @@ -77,18 +86,20 @@ function IRKstep_par_fixed!(s, iter = false D0 = 0 - @inbounds begin for is in 1:s - Uz[is] .= U[is] - DiffEqBase.@.. U[is] = uj + (ej + - mu[is, 1] * L[1] + - mu[is, 2] * L[2] + - mu[is, 3] * L[3] + - mu[is, 4] * L[4] + - mu[is, 5] * L[5] + - mu[is, 6] * L[6] + - mu[is, 7] * L[7] + - mu[is, 8] * L[8]) - end end #inbounds + @inbounds begin + for is in 1:s + Uz[is] .= U[is] + DiffEqBase.@.. U[is] = uj + (ej + + mu[is, 1] * L[1] + + mu[is, 2] * L[2] + + mu[is, 3] * L[3] + + mu[is, 4] * L[4] + + mu[is, 5] * L[5] + + mu[is, 6] * L[6] + + mu[is, 7] * L[7] + + mu[is, 8] * L[8]) + end + end #inbounds Threads.@threads for is in 1:s Eval[is] = false @@ -131,18 +142,20 @@ function IRKstep_par_fixed!(s, #~Compensated summation indices = eachindex(uj) - @inbounds begin for k in indices - e0 = ej[k] - for is in 1:s - e0 += muladd(F[is][k], hb[is], -L[is][k]) - end - res = Base.TwicePrecision(uj[k], e0) - for is in 1:s - res += L[is][k] + @inbounds begin + for k in indices + e0 = ej[k] + for is in 1:s + e0 += muladd(F[is][k], hb[is], -L[is][k]) + end + res = Base.TwicePrecision(uj[k], e0) + for is in 1:s + res += L[is][k] + end + uj[k] = res.hi + ej[k] = res.lo end - uj[k] = res.hi - ej[k] = res.lo - end end + end res = Base.TwicePrecision(tj, te) + dt ttj[1] = res.hi ttj[2] = res.lo @@ -158,22 +171,22 @@ function IRKstep_par_fixed!(s, end function IRKstep_par_fixed_Mix!(s, - j, - ttj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - initial_interp, - abstol, - reltol, - adaptive, - threading, - mixed_precision, - low_prec_type) + j, + ttj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + initial_interp, + abstol, + reltol, + adaptive, + threading, + mixed_precision, + low_prec_type) @unpack mu, hc, hb, nu, alpha = coeffs @unpack f, u0, p, tspan, kwargs = prob @@ -202,6 +215,7 @@ function IRKstep_par_fixed_Mix!(s, nrmdigits = cache uiType = eltype(uj) + realuiType = real(uiType) dt = dts[1] dtprev = dts[2] @@ -215,40 +229,48 @@ function IRKstep_par_fixed_Mix!(s, nit = 0 if (dt != dtprev) - HCoefficients!(mu, hc, hb, nu, dt, dtprev, uiType) + HCoefficients!(mu, hc, hb, nu, dt, dtprev, realuiType) @unpack mu, hc, hb, nu, alpha = coeffs lhb .= hb end if initial_interp - @inbounds begin for is in 1:s - for k in eachindex(uj) - aux = zero(eltype(uj)) - for js in 1:s - aux += nu[is, js] * L[js][k] + @inbounds begin + for is in 1:s + for k in eachindex(uj) + aux = zero(eltype(uj)) + for js in 1:s + aux += nu[is, js] * L[js][k] + end + U[is][k] = (uj[k] + ej[k]) + aux end - U[is][k] = (uj[k] + ej[k]) + aux end - end end + end else - @inbounds begin for is in 1:s - @. U[is] = uj + ej - end end + @inbounds begin + for is in 1:s + @. U[is] = uj + ej + end + end end iter = true plusIt = true nit = 0 - @inbounds begin for is in 1:s - Dmin[is] .= Inf - end end + @inbounds begin + for is in 1:s + Dmin[is] .= Inf + end + end - @inbounds begin Threads.@threads for is in 1:s - nfcn[1] += 1 - f(F[is], U[is], p, tj + hc[is]) - @. L[is] = hb[is] * F[is] - end end + @inbounds begin + Threads.@threads for is in 1:s + nfcn[1] += 1 + f(F[is], U[is], p, tj + hc[is]) + @. L[is] = hb[is] * F[is] + end + end lmax = 1 @@ -257,20 +279,22 @@ function IRKstep_par_fixed_Mix!(s, iter = false D0 = 0 - @inbounds begin for is in 1:s - Uz[is] .= U[is] - DiffEqBase.@.. U[is] = uj + (ej + - mu[is, 1] * L[1] + - mu[is, 2] * L[2] + - mu[is, 3] * L[3] + - mu[is, 4] * L[4] + - mu[is, 5] * L[5] + - mu[is, 6] * L[6] + - mu[is, 7] * L[7] + - mu[is, 8] * L[8]) - Ulow[is] .= U[is] - normU[is] = copy(norm(Ulow[is])) - end end # inbound + @inbounds begin + for is in 1:s + Uz[is] .= U[is] + DiffEqBase.@.. U[is] = uj + (ej + + mu[is, 1] * L[1] + + mu[is, 2] * L[2] + + mu[is, 3] * L[3] + + mu[is, 4] * L[4] + + mu[is, 5] * L[5] + + mu[is, 6] * L[6] + + mu[is, 7] * L[7] + + mu[is, 8] * L[8]) + Ulow[is] .= U[is] + normU[is] = copy(norm(Ulow[is])) + end + end # inbound Threads.@threads for is in 1:s Eval[is] = false @@ -357,18 +381,20 @@ function IRKstep_par_fixed_Mix!(s, # ~ Compensated summation indices = eachindex(uj) - @inbounds begin for k in indices - e0 = ej[k] - for is in 1:s - e0 += muladd(F[is][k], hb[is], -L[is][k]) - end - res = Base.TwicePrecision(uj[k], e0) - for is in 1:s - res += L[is][k] + @inbounds begin + for k in indices + e0 = ej[k] + for is in 1:s + e0 += muladd(F[is][k], hb[is], -L[is][k]) + end + res = Base.TwicePrecision(uj[k], e0) + for is in 1:s + res += L[is][k] + end + uj[k] = res.hi + ej[k] = res.lo end - uj[k] = res.hi - ej[k] = res.lo - end end + end res = Base.TwicePrecision(tj, te) + dt ttj[1] = res.hi @@ -389,20 +415,20 @@ end # function IRKstepDynODE_par_fixed!(s, - j, - ttj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - initial_interp, - abstol, - reltol, - adaptive, - threading) + j, + ttj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + initial_interp, + abstol, + reltol, + adaptive, + threading) @unpack mu, hc, hb, nu, alpha = coeffs @unpack tspan, p = prob f1 = prob.f.f1 @@ -410,6 +436,7 @@ function IRKstepDynODE_par_fixed!(s, @unpack U, Uz, L, Lz, F, Dmin, Eval, DY, rejects, nfcn, lambdas, nrmdigits = cache uiType = eltype(uj) + realuiType = real(uiType) dt = dts[1] dtprev = dts[2] @@ -423,24 +450,28 @@ function IRKstepDynODE_par_fixed!(s, nit = 0 if (dt != dtprev) - HCoefficients!(mu, hc, hb, nu, dt, dtprev, uiType) + HCoefficients!(mu, hc, hb, nu, dt, dtprev, realuiType) @unpack mu, hc, hb, nu, alpha = coeffs end if initial_interp - @inbounds begin for is in 1:s - for k in eachindex(uj) - aux = zero(eltype(uj)) - for js in 1:s - aux += nu[is, js] * L[js][k] + @inbounds begin + for is in 1:s + for k in eachindex(uj) + aux = zero(eltype(uj)) + for js in 1:s + aux += nu[is, js] * L[js][k] + end + U[is][k] = (uj[k] + ej[k]) + aux end - U[is][k] = (uj[k] + ej[k]) + aux end - end end + end else - @inbounds begin for is in 1:s - @. U[is] = uj + ej - end end + @inbounds begin + for is in 1:s + @. U[is] = uj + ej + end + end end iter = true @@ -451,12 +482,14 @@ function IRKstepDynODE_par_fixed!(s, Dmin[is] .= Inf end - @inbounds begin Threads.@threads for is in 1:s - nfcn[1] += 1 - f1(F[is].x[1], U[is].x[1], U[is].x[2], p, tj + hc[is]) - f2(F[is].x[2], U[is].x[1], U[is].x[2], p, tj + hc[is]) - @. L[is] = hb[is] * F[is] - end end + @inbounds begin + Threads.@threads for is in 1:s + nfcn[1] += 1 + f1(F[is].x[1], U[is].x[1], U[is].x[2], p, tj + hc[is]) + f2(F[is].x[2], U[is].x[1], U[is].x[2], p, tj + hc[is]) + @. L[is] = hb[is] * F[is] + end + end while (nit < maxiters && iter) nit += 1 @@ -465,18 +498,20 @@ function IRKstepDynODE_par_fixed!(s, # First part - @inbounds begin for is in 1:s - Uz[is].x[1] .= U[is].x[1] - DiffEqBase.@.. U[is].x[1] = uj.x[1] + (ej.x[1] + - mu[is, 1] * L[1].x[1] + - mu[is, 2] * L[2].x[1] + - mu[is, 3] * L[3].x[1] + - mu[is, 4] * L[4].x[1] + - mu[is, 5] * L[5].x[1] + - mu[is, 6] * L[6].x[1] + - mu[is, 7] * L[7].x[1] + - mu[is, 8] * L[8].x[1]) - end end #inbounds + @inbounds begin + for is in 1:s + Uz[is].x[1] .= U[is].x[1] + DiffEqBase.@.. U[is].x[1] = uj.x[1] + (ej.x[1] + + mu[is, 1] * L[1].x[1] + + mu[is, 2] * L[2].x[1] + + mu[is, 3] * L[3].x[1] + + mu[is, 4] * L[4].x[1] + + mu[is, 5] * L[5].x[1] + + mu[is, 6] * L[6].x[1] + + mu[is, 7] * L[7].x[1] + + mu[is, 8] * L[8].x[1]) + end + end #inbounds Threads.@threads for is in 1:s Eval[is] = false @@ -559,18 +594,20 @@ function IRKstepDynODE_par_fixed!(s, # ~Compensated summation indices = eachindex(uj) - @inbounds begin for k in indices - e0 = ej[k] - for is in 1:s - e0 += muladd(F[is][k], hb[is], -L[is][k]) - end - res = Base.TwicePrecision(uj[k], e0) - for is in 1:s - res += L[is][k] + @inbounds begin + for k in indices + e0 = ej[k] + for is in 1:s + e0 += muladd(F[is][k], hb[is], -L[is][k]) + end + res = Base.TwicePrecision(uj[k], e0) + for is in 1:s + res += L[is][k] + end + uj[k] = res.hi + ej[k] = res.lo end - uj[k] = res.hi - ej[k] = res.lo - end end + end res = Base.TwicePrecision(tj, te) + dt ttj[1] = res.hi diff --git a/src/IRKGL16step_fixed_seq.jl b/src/IRKGL16step_fixed_seq.jl index 5df3c4c..9dc1a8d 100644 --- a/src/IRKGL16step_fixed_seq.jl +++ b/src/IRKGL16step_fixed_seq.jl @@ -5,25 +5,26 @@ # IRKstepDynODE_fixed! function IRKstep_fixed!(s, - j, - ttj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - initial_interp, - abstol, - reltol, - adaptive, - threading) + j, + ttj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + initial_interp, + abstol, + reltol, + adaptive, + threading) @unpack mu, hc, hb, nu, alpha = coeffs @unpack f, u0, p, tspan = prob @unpack U, Uz, L, Lz, F, Dmin, Eval, DY, rejects, nfcn, lambdas, nrmdigits = cache uiType = eltype(uj) + realuiType = real(uiType) dt = dts[1] dtprev = dts[2] @@ -37,39 +38,47 @@ function IRKstep_fixed!(s, nit = 0 if (dt != dtprev) - HCoefficients!(mu, hc, hb, nu, dt, dtprev, uiType) + HCoefficients!(mu, hc, hb, nu, dt, dtprev, realuiType) @unpack mu, hc, hb, nu, alpha = coeffs end if initial_interp - @inbounds begin for is in 1:s - for k in eachindex(uj) - aux = zero(eltype(uj)) - for js in 1:s - aux += nu[is, js] * L[js][k] + @inbounds begin + for is in 1:s + for k in eachindex(uj) + aux = zero(eltype(uj)) + for js in 1:s + aux += nu[is, js] * L[js][k] + end + U[is][k] = (uj[k] + ej[k]) + aux end - U[is][k] = (uj[k] + ej[k]) + aux end - end end + end else - @inbounds begin for is in 1:s - @. U[is] = uj + ej - end end + @inbounds begin + for is in 1:s + @. U[is] = uj + ej + end + end end iter = true plusIt = true nit = 0 - @inbounds begin for is in 1:s - Dmin[is] .= Inf - end end + @inbounds begin + for is in 1:s + Dmin[is] .= Inf + end + end - @inbounds begin for is in 1:s - nfcn[1] += 1 - f(F[is], U[is], p, tj + hc[is]) - @. L[is] = hb[is] * F[is] - end end + @inbounds begin + for is in 1:s + nfcn[1] += 1 + f(F[is], U[is], p, tj + hc[is]) + @. L[is] = hb[is] * F[is] + end + end while (nit < maxiters && iter) nit += 1 @@ -77,18 +86,20 @@ function IRKstep_fixed!(s, iter = false D0 = 0 - @inbounds begin for is in 1:s - Uz[is] .= U[is] - DiffEqBase.@.. U[is] = uj + (ej + - mu[is, 1] * L[1] + - mu[is, 2] * L[2] + - mu[is, 3] * L[3] + - mu[is, 4] * L[4] + - mu[is, 5] * L[5] + - mu[is, 6] * L[6] + - mu[is, 7] * L[7] + - mu[is, 8] * L[8]) - end end #inbounds + @inbounds begin + for is in 1:s + Uz[is] .= U[is] + DiffEqBase.@.. U[is] = uj + (ej + + mu[is, 1] * L[1] + + mu[is, 2] * L[2] + + mu[is, 3] * L[3] + + mu[is, 4] * L[4] + + mu[is, 5] * L[5] + + mu[is, 6] * L[6] + + mu[is, 7] * L[7] + + mu[is, 8] * L[8]) + end + end #inbounds for is in 1:s Eval[is] = false @@ -131,18 +142,20 @@ function IRKstep_fixed!(s, #~Compensated summation indices = eachindex(uj) - @inbounds begin for k in indices - e0 = ej[k] - for is in 1:s - e0 += muladd(F[is][k], hb[is], -L[is][k]) - end - res = Base.TwicePrecision(uj[k], e0) - for is in 1:s - res += L[is][k] + @inbounds begin + for k in indices + e0 = ej[k] + for is in 1:s + e0 += muladd(F[is][k], hb[is], -L[is][k]) + end + res = Base.TwicePrecision(uj[k], e0) + for is in 1:s + res += L[is][k] + end + uj[k] = res.hi + ej[k] = res.lo end - uj[k] = res.hi - ej[k] = res.lo - end end + end res = Base.TwicePrecision(tj, te) + dt ttj[1] = res.hi ttj[2] = res.lo @@ -158,22 +171,22 @@ function IRKstep_fixed!(s, end function IRKstep_fixed_Mix!(s, - j, - ttj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - initial_interp, - abstol, - reltol, - adaptive, - threading, - mixed_precision, - low_prec_type) + j, + ttj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + initial_interp, + abstol, + reltol, + adaptive, + threading, + mixed_precision, + low_prec_type) @unpack mu, hc, hb, nu, alpha = coeffs @unpack f, u0, p, tspan, kwargs = prob @@ -202,6 +215,7 @@ function IRKstep_fixed_Mix!(s, nrmdigits = cache uiType = eltype(uj) + realuiType = real(uiType) dt = dts[1] dtprev = dts[2] @@ -215,40 +229,48 @@ function IRKstep_fixed_Mix!(s, nit = 0 if (dt != dtprev) - HCoefficients!(mu, hc, hb, nu, dt, dtprev, uiType) + HCoefficients!(mu, hc, hb, nu, dt, dtprev, realuiType) @unpack mu, hc, hb, nu, alpha = coeffs lhb .= hb end if initial_interp - @inbounds begin for is in 1:s - for k in eachindex(uj) - aux = zero(eltype(uj)) - for js in 1:s - aux += nu[is, js] * L[js][k] + @inbounds begin + for is in 1:s + for k in eachindex(uj) + aux = zero(eltype(uj)) + for js in 1:s + aux += nu[is, js] * L[js][k] + end + U[is][k] = (uj[k] + ej[k]) + aux end - U[is][k] = (uj[k] + ej[k]) + aux end - end end + end else - @inbounds begin for is in 1:s - @. U[is] = uj + ej - end end + @inbounds begin + for is in 1:s + @. U[is] = uj + ej + end + end end iter = true plusIt = true nit = 0 - @inbounds begin for is in 1:s - Dmin[is] .= Inf - end end + @inbounds begin + for is in 1:s + Dmin[is] .= Inf + end + end - @inbounds begin for is in 1:s - nfcn[1] += 1 - f(F[is], U[is], p, tj + hc[is]) - @. L[is] = hb[is] * F[is] - end end + @inbounds begin + for is in 1:s + nfcn[1] += 1 + f(F[is], U[is], p, tj + hc[is]) + @. L[is] = hb[is] * F[is] + end + end lmax = 1 @@ -257,20 +279,22 @@ function IRKstep_fixed_Mix!(s, iter = false D0 = 0 - @inbounds begin for is in 1:s - Uz[is] .= U[is] - DiffEqBase.@.. U[is] = uj + (ej + - mu[is, 1] * L[1] + - mu[is, 2] * L[2] + - mu[is, 3] * L[3] + - mu[is, 4] * L[4] + - mu[is, 5] * L[5] + - mu[is, 6] * L[6] + - mu[is, 7] * L[7] + - mu[is, 8] * L[8]) - Ulow[is] .= U[is] - normU[is] = copy(norm(Ulow[is])) - end end # inbound + @inbounds begin + for is in 1:s + Uz[is] .= U[is] + DiffEqBase.@.. U[is] = uj + (ej + + mu[is, 1] * L[1] + + mu[is, 2] * L[2] + + mu[is, 3] * L[3] + + mu[is, 4] * L[4] + + mu[is, 5] * L[5] + + mu[is, 6] * L[6] + + mu[is, 7] * L[7] + + mu[is, 8] * L[8]) + Ulow[is] .= U[is] + normU[is] = copy(norm(Ulow[is])) + end + end # inbound for is in 1:s Eval[is] = false @@ -357,18 +381,20 @@ function IRKstep_fixed_Mix!(s, # ~ Compensated summation indices = eachindex(uj) - @inbounds begin for k in indices - e0 = ej[k] - for is in 1:s - e0 += muladd(F[is][k], hb[is], -L[is][k]) - end - res = Base.TwicePrecision(uj[k], e0) - for is in 1:s - res += L[is][k] + @inbounds begin + for k in indices + e0 = ej[k] + for is in 1:s + e0 += muladd(F[is][k], hb[is], -L[is][k]) + end + res = Base.TwicePrecision(uj[k], e0) + for is in 1:s + res += L[is][k] + end + uj[k] = res.hi + ej[k] = res.lo end - uj[k] = res.hi - ej[k] = res.lo - end end + end res = Base.TwicePrecision(tj, te) + dt ttj[1] = res.hi @@ -389,20 +415,20 @@ end # function IRKstepDynODE_fixed!(s, - j, - ttj, - uj, - ej, - prob, - dts, - coeffs, - cache, - maxiters, - initial_interp, - abstol, - reltol, - adaptive, - threading) + j, + ttj, + uj, + ej, + prob, + dts, + coeffs, + cache, + maxiters, + initial_interp, + abstol, + reltol, + adaptive, + threading) @unpack mu, hc, hb, nu, alpha = coeffs @unpack tspan, p = prob f1 = prob.f.f1 @@ -410,6 +436,7 @@ function IRKstepDynODE_fixed!(s, @unpack U, Uz, L, Lz, F, Dmin, Eval, DY, rejects, nfcn, lambdas, nrmdigits = cache uiType = eltype(uj) + realuiType = real(uiType) dt = dts[1] dtprev = dts[2] @@ -423,24 +450,28 @@ function IRKstepDynODE_fixed!(s, nit = 0 if (dt != dtprev) - HCoefficients!(mu, hc, hb, nu, dt, dtprev, uiType) + HCoefficients!(mu, hc, hb, nu, dt, dtprev, realuiType) @unpack mu, hc, hb, nu, alpha = coeffs end if initial_interp - @inbounds begin for is in 1:s - for k in eachindex(uj) - aux = zero(eltype(uj)) - for js in 1:s - aux += nu[is, js] * L[js][k] + @inbounds begin + for is in 1:s + for k in eachindex(uj) + aux = zero(eltype(uj)) + for js in 1:s + aux += nu[is, js] * L[js][k] + end + U[is][k] = (uj[k] + ej[k]) + aux end - U[is][k] = (uj[k] + ej[k]) + aux end - end end + end else - @inbounds begin for is in 1:s - @. U[is] = uj + ej - end end + @inbounds begin + for is in 1:s + @. U[is] = uj + ej + end + end end iter = true @@ -451,12 +482,14 @@ function IRKstepDynODE_fixed!(s, Dmin[is] .= Inf end - @inbounds begin for is in 1:s - nfcn[1] += 1 - f1(F[is].x[1], U[is].x[1], U[is].x[2], p, tj + hc[is]) - f2(F[is].x[2], U[is].x[1], U[is].x[2], p, tj + hc[is]) - @. L[is] = hb[is] * F[is] - end end + @inbounds begin + for is in 1:s + nfcn[1] += 1 + f1(F[is].x[1], U[is].x[1], U[is].x[2], p, tj + hc[is]) + f2(F[is].x[2], U[is].x[1], U[is].x[2], p, tj + hc[is]) + @. L[is] = hb[is] * F[is] + end + end while (nit < maxiters && iter) nit += 1 @@ -465,18 +498,20 @@ function IRKstepDynODE_fixed!(s, # First part - @inbounds begin for is in 1:s - Uz[is].x[1] .= U[is].x[1] - DiffEqBase.@.. U[is].x[1] = uj.x[1] + (ej.x[1] + - mu[is, 1] * L[1].x[1] + - mu[is, 2] * L[2].x[1] + - mu[is, 3] * L[3].x[1] + - mu[is, 4] * L[4].x[1] + - mu[is, 5] * L[5].x[1] + - mu[is, 6] * L[6].x[1] + - mu[is, 7] * L[7].x[1] + - mu[is, 8] * L[8].x[1]) - end end #inbounds + @inbounds begin + for is in 1:s + Uz[is].x[1] .= U[is].x[1] + DiffEqBase.@.. U[is].x[1] = uj.x[1] + (ej.x[1] + + mu[is, 1] * L[1].x[1] + + mu[is, 2] * L[2].x[1] + + mu[is, 3] * L[3].x[1] + + mu[is, 4] * L[4].x[1] + + mu[is, 5] * L[5].x[1] + + mu[is, 6] * L[6].x[1] + + mu[is, 7] * L[7].x[1] + + mu[is, 8] * L[8].x[1]) + end + end #inbounds for is in 1:s Eval[is] = false @@ -559,18 +594,20 @@ function IRKstepDynODE_fixed!(s, # ~Compensated summation indices = eachindex(uj) - @inbounds begin for k in indices - e0 = ej[k] - for is in 1:s - e0 += muladd(F[is][k], hb[is], -L[is][k]) - end - res = Base.TwicePrecision(uj[k], e0) - for is in 1:s - res += L[is][k] + @inbounds begin + for k in indices + e0 = ej[k] + for is in 1:s + e0 += muladd(F[is][k], hb[is], -L[is][k]) + end + res = Base.TwicePrecision(uj[k], e0) + for is in 1:s + res += L[is][k] + end + uj[k] = res.hi + ej[k] = res.lo end - uj[k] = res.hi - ej[k] = res.lo - end end + end res = Base.TwicePrecision(tj, te) + dt ttj[1] = res.hi