diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 837686c5a..81069a001 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -18,7 +18,7 @@ steps: RETESTITEMS_NWORKERS: 1 # These tests require quite a lot of GPU memory GROUP: CUDA DATADEPS_ALWAYS_ACCEPT: 'true' - JULIA_PKG_SERVER: "" # it often struggles with our large artifacts + JULIA_PKG_SERVER: "" - label: "Documentation" plugins: @@ -42,5 +42,6 @@ steps: DATADEPS_ALWAYS_ACCEPT: true JULIA_DEBUG: "Documenter" SECRET_DOCUMENTER_KEY: "AdqcYtp4x3U5j1ELurHIoOwURqXcOan+qmihqVjsjhoGUzS/snTyZNQ5fxaJr8Yawm9CyyGvh+Q5O98St1LJ9S+pi9C5TFSbPWnNp/vXabMmeUEVLHVYHUeR2wgMCciSnM/oLw5sNAEj3hrWFjLslEGKQSptUCTWuU5WRizhQONDxeA3tz9biZUYvKanP8GjsHUkD3te15n1t6o78T1+EJxb1znrBSd9aK1Y4UaVjBEfVtLtTD8Z6VP1L4SVXVipxrDdzwzbzUDaTpvjo3z3e9qx2u6Xn5qa/os6JY81jRa5ZTWFkev73DYhoFmordSI85grOPwNpvrNWqOAs5kTDg==;U2FsdGVkX1+TXM0w98SRH5lY0Dw+nmRJ1xtJmffK+GVWHdMjhiIxQoYGGoP065hgl1VOf+oLzqWWFoYIfcz5i/VKD5F7O3EhfLdcmG2Y15u4sxr/hLKKMedSCueiusNd4N9EGGmJ4LLY0I1K6vA8Pa2eRwE+yDE+wfSpqpTC3yxMo40Xk7wra6ZwAybGpSHzOItV+2QGHttr/WLntRbx7GD8HDBC/LGNhtmFzFhAo/2CiQ0qgMDHBxyVqZlAdCWdu9xGmD9FC2+HDGv6QyNge1Ajmg6TNd6tuRhAP6VfDidEtaohcGl2TxbuUcd1OSrSbwmqxcw0IhVriRN8FgB8pKYmol7J71za9ljViyzgAjhQFvute/PYB2nw9MB8yCoNu6X0hqoLdmSxbzerpeYh0yRdi6SedESBJV3PgL7uahnyHbhC4dudFPavobeP9nU0okzKXG7fYfR1aiqgeeed4WFi48u+pciWv6Uo8J4lbBTUXu4xI+yWCpt01LNXyTmsIPYUvqbE8U0DkJNsNie9lw4of3UYkKhtVkLBoQg8++uc6i0w70+/sKZDp2OA5Y1jMFrRQaWUHyaRfpX3pXqvghAfVLEybiSWnpE0JiAnBsDcI3zajc4Cp+lui9G0+E8Lc+NbXOMbjiYHejjN46/03sIHNu0YPlU5p7o2xrGpa3cw6o3yHhBE7yVTcBc7A0AFPGZQxTLOEw1lYf7+B6J5AEpDhxR9+gUhmL+S+2kUw+nxsMxdD0Tunfeg4CIoeB9Tl7uIBrZDQ23uVRrcEyE6t3zf7skBcW3wlrHpAY61CxuGuMolcTl0JaeYFTJPYzOgPa+nD/vKaICsrRDkaSUUHcGufbTgqdJLjIkh+M9a7+DPKpfoT7H4gp4VrocqClFmmPoZZAIKjxXAEnEHWILBw0a9cOar1DKfJVoyN1vQIdVeux90a50Ao62m3sHoYiXY3DeutHkAmfWWDl/5zcU2h1T0XWHmRnjjmAW0fZPL+E38rKXYQECeHMDFEYYfbGyZMJx9T9pwfvxTM4Yzd4nB0qspOXMdeGvnVbzqlnaGJpxs/M5zyxILMQzq979bwSI0TPFRqLojhNezOhZHZaJdFoWq6UqW1kFDyzNIIRCQak0kGuhpCeFqqxiFFtC1M1vskpZ5UfqtCQSgILU+XbJWAxZOqrAxy2T7+h9JMS/jLPW+tyjCJx/bhqSGF9fBt6Q1R4ZL1MjaBSocnMj/5H7IZI2TdH6ulTyigZv7OEMQZRSyTrZgBNPLAiMHVoZVLkZF2NZse/4tHG/7i0Lio+m2z8WsjSQa04LPjtnlCSpYrug8EoGN4NruaRBDBIlTV9w6Rvz4YYB4iDht14ifF6XJVl/uo4jWKHAJC6Bc5IwFD80A/jYmx1vbLwvwVgYGCOW80WSFUGSnBSwVDLsLufXWt9ct8Kql3ICl2/iLO1ZZofELOgddV8yds1vrBdhn8jP1QCrTqtS0ITgLOabDEJMAma7St5R+Oa7kAj2zlVh/A0WXolGD04ReHnuiNN7S6C/ePpSTP/fMSa5bCrmQkw93fEgHdNOpQq9DwYNa9wEijtdEJsw5Kl+B73SNIhG+X7h1sN1DsCao2v1EPtxzaXw51kfJHzhtdCKnKXYap8Lk+twZ6KKH3QZaSsq2LSL7E7da3ZXwo4oRMjV5OTkWaklGKZmzJaMrUnpbJMQUfb4tyNDBJ/52arcTLOn9b72d+927qUfKNCwm+Ma5tUJZ9PkOUxObbXgguXvgVVBL5Li0kfcCyYQC8HcNu1ZkmP7FbJzYo4m/e8v+jASNorC+49BvDE8WlSw+6dJQoP29S0u2OyZ4CSLYvqVDz5WIiLYR9OH2Dk7dB6n69jgngYkEtTF+1TWwQOR0d+6xungqU86W+4JvPkBx4YwVHmnw+iFRNqJd/OTmBVVDYpEkC6N38SuCRAuZwjcVfl6ERm0C7FEJOlmXqs6UeUuPNxuCE7yKcD4J0JGhVjSxN4c3dbV0aipIt9/ob+I2rXA3TUVOU7G+svsboOo1bHlUfoL1HQcasUHwst6ScsrlzJtSLActVb8QMh2iOw5zlxHGyq/MqU+tcquLZc3ctYZwzXatMjEqNqP4nHF1HHkYavwrhFr4U6lbnPZ6ZlUCdrXKDsn7BrRnn2MQ4My7k/Cau6174Zln9RRB4LRs45P3oDUug3Xc8/erekvf8L1HRFsiHzv+8ssvO/dfOEkD1hUTacNejaWt6HXCPC3zXhnwsk/lw3TLXSuxWO3hDpxBuAy8gRmCVZq4HPLAod/lEIpXQY3Rij8mmU28tCnhXrjxTTCe1ci9F4dy2IigN/1YeA5k+6gVpNmV9NvSDlPKN5vkGymFXokBiHkBKOd/uIkNORPYbjEGq17mO4CTfbJ8uDKneibC22VOaqB5Xk3/Xp4zz4TVCCr4xznVui7OOeuNaUptrypsMRtWfYiJ52s8TYiOsQ4SyKmXdCy51k16wGT9/ZrPpEktYFsC6g4SdY04MvBaowPYsudy9uOyUnZFuxPX+SLusRfLHlgkZU2SxrPMlHbOFpFVHKgCiKXLBoDWL3Bkn/9SUfoFYWf65KEfTeKJc7FCxsH/r6ngLMlJidn/fxI0D5FyVh0FVJnpdhGgv8jXo28nVCRLTNwRl5TzUjdz4EIvSxugmFR38qbpdkYeOlCdggE8safaxy+xXpYX0diec3x9MYvV7cWMtOCmTwKkevlPUTL+UuQiWKNf7fOzmGG67wNKfFtwmMxCbQ20nT2eAT6TjZ9eowW/E7s8i3cpCycJjeshuz7Y1G5ECLjSKe2UjCEGhBqRR8T/cBObQBErrilsRgjdO6w4UvBy4FkuzVpvB2cKhMt3HocBLNMlhUHaIgQfh1C6PMRT5S10+e659+qvF0GZOCJ7Y6b5Z9X9XipJEJo8ISC5uR7Z/WmEqgVtTdfezJvnGoibun2FjFriTP1W+HqHcGTpgwfWrVwHuGXYreckkzZaP//Q5uHajxj4AYQhEmjcQ6jcRNAWxOYLKZWqy7d2sDw79wdvKT6kfKg3wiKB/Arl1Mv33b1FkCfr+MRu1nzWeuAG72su6L91T0CVymOQQCZpA2ubPYrIH3vyKmRLvFIgwIwrWRZtsY8dvznOGmHvlEgUw+C2Ln0loc6fDDQ4Jk20LSXzRtsCeJ+PUahaDT9aW4xgWXHTAjGXjaZUak+5rs3ck7ZH3vbvVtbfg3/PKiCKWKXtt/2ZIZTxgdQVlGhZcz/LVzjIPOSTfKgcyz+pVXFq+hC2fubtsgTTm6DkXzP5pFdens05f27nXdr5FYNoYSJcU7w58gHTZOIez7oiMdkG+FObg9u/cpz923hOeBOmYaoQ9JePCQzQkWyPwHVD1IFsGBKtVIwahdKnpmjxubdr2ehdYbC7SsMD772YdPKOx07ipmz56pGBVqG7nUqYDxX2Mr5hvrHKbnLmYQykBSfazMUmK5/c+dNngn7QU8kN0fxFDmDzaUeQcZwbBNVlfp51Gsc+LvYoyXMUhBu53wQgNmq+ZJCSO4V+XBIgJAKIOSlVDq0GXe1VtyfNC4XcU0ey84K0/mD3RmtNGc2YYNp6OPqwzJAIexQcSr8pehN53fqGuGrRX3EqPzxNZwM7W+VzzpT+Ky2jpLl2YrQyuROIAMV09P8HoDxBorSHAQXkijs2ByIAGbQqJhwtbcovSPIMqvHPeKKMuFfNzKnmCkUysklNxQynM2clwKwbOchghZOBNH2sQ4atfhHjdo66dXtzSmngyPujZcwinq1b1VUbG1n9BuusgdUrhpt/28MJRYLt4tJFVBqYGu98Ewa1oX+7xqCmhEe2us43fg7EYBpwLBAVDNsohVO69upLR+Yy2C1lhqJSSbO+JLKg70/7onpMI8JcCtiNYOMFYMix9ynkpBf8gN+cM/VgL4cldHYwbaAJXgnD7PxdmDIy7r8oZnGOHE//a3iDyB+Xqy0t9c41OYYn6PkB32BqRHFUvbzU+6kaDpQD/gk0EBTb51SLmy3IBBLKpKw1R0CVfS2wY5XX7vYYpgAMQzsoZpL3Ep0NpcRqtutcec0o0VXkd3B9wXJhDG+en0MaY9vc6V4g+nT8Z2jZw0A6lXnbDxlQN/CmvvrcsexHGGIj6vjpQs/oSyvOYaD1gVTWdQgcPhCYZGVH5O/llnKfxsRFVU3g6XvL4ND0oQ7S98eHRhz+8TqOx9Se47vEEC1O3bNDf6Rnnm+aB9vD0GKQ6iAETWI74yF5HrCpZY2XIDK8OgOQJoiWpDWaDxfNjK8nWMSjV8bnEdIzLP08p8fLWP/+JPeJkUB91mxmi5mhMjKFpKo/lXtvM2E8zmzkjo/VyLGYaij7EgF1XNIWRC5LWlsrrPiqVfQlmftDzjaG+jCx/47NLws=" + JULIA_PKG_SERVER: "" if: build.message !~ /\[skip docs\]/ && !build.pull_request.draft timeout_in_minutes: 1000 diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 839ed6a4a..935e8c8de 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -42,8 +42,6 @@ jobs: ${{ runner.os }}- - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 - with: - coverage: false env: GROUP: ${{ matrix.group }} - uses: julia-actions/julia-processcoverage@v1 diff --git a/Project.toml b/Project.toml index 72a79b18c..aee97ae05 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "DiffEqFlux" uuid = "aae7a2af-3d4f-5e19-a356-7da93b79d9d0" authors = ["Chris Rackauckas "] -version = "3.6.0" +version = "4.0.0" [deps] ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b" @@ -18,6 +18,7 @@ Reexport = "189a3867-3050-52da-a836-e630ba90ab69" SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" SciMLSensitivity = "1ed8b502-d754-442c-8d5d-10ac956f44a1" Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46" +Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3" [weakdeps] DataInterpolations = "82cc6244-b520-54b8-b5a6-8a565e85f1d0" @@ -29,7 +30,7 @@ DiffEqFluxDataInterpolationsExt = "DataInterpolations" ADTypes = "1.5" Aqua = "0.8.7" BenchmarkTools = "1.5.0" -Boltz = "0.4.2" +Boltz = "1" ChainRulesCore = "1" ComponentArrays = "0.15.17" ConcreteStructs = "0.2" @@ -46,11 +47,10 @@ ForwardDiff = "0.10" Hwloc = "3" InteractiveUtils = "<0.0.1, 1" LinearAlgebra = "1.10" -Lux = "0.5.65" +Lux = "1" LuxCUDA = "0.3.2" -LuxCore = "0.1" -LuxLib = "0.3.50" -MLDatasets = "0.7.14" +LuxCore = "1" +LuxLib = "1.2" NNlib = "0.9.22" OneHotArrays = "0.2.5" Optimisers = "0.3" @@ -65,6 +65,7 @@ Reexport = "0.2, 1" SciMLBase = "2" SciMLSensitivity = "7" Setfield = "1.1.1" +Static = "1.1.1" Statistics = "1.10" StochasticDiffEq = "6.68.0" Test = "1.10" @@ -87,7 +88,6 @@ ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" Hwloc = "0e44f5e4-bd66-52a0-8798-143a42290a1d" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda" -MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd" OneHotArrays = "0b1bfda6-eb8a-41d2-88d8-f5af5cad476f" Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2" @@ -105,4 +105,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["Aqua", "BenchmarkTools", "ComponentArrays", "DataInterpolations", "DelayDiffEq", "DiffEqCallbacks", "Distances", "Distributed", "DistributionsAD", "ExplicitImports", "ForwardDiff", "Flux", "Hwloc", "InteractiveUtils", "LuxCUDA", "MLDatasets", "NNlib", "OneHotArrays", "Optimisers", "Optimization", "OptimizationOptimJL", "OptimizationOptimisers", "OrdinaryDiffEq", "Printf", "Random", "ReTestItems", "Reexport", "Statistics", "StochasticDiffEq", "Test", "Zygote"] +test = ["Aqua", "BenchmarkTools", "ComponentArrays", "DataInterpolations", "DelayDiffEq", "DiffEqCallbacks", "Distances", "Distributed", "DistributionsAD", "ExplicitImports", "ForwardDiff", "Flux", "Hwloc", "InteractiveUtils", "LuxCUDA", "NNlib", "OneHotArrays", "Optimisers", "Optimization", "OptimizationOptimJL", "OptimizationOptimisers", "OrdinaryDiffEq", "Printf", "Random", "ReTestItems", "Reexport", "Statistics", "StochasticDiffEq", "Test", "Zygote"] diff --git a/README.md b/README.md index 0824a4a52..708612e47 100644 --- a/README.md +++ b/README.md @@ -63,17 +63,18 @@ explore various ways to integrate the two methodologies: ## Breaking Changes -### v4 (upcoming) +### v4 - `TensorLayer` has been removed, use `Boltz.Layers.TensorProductLayer` instead. - Basis functions in DiffEqFlux have been removed in favor of `Boltz.Basis` module. - `SplineLayer` has been removed, use `Boltz.Layers.SplineLayer` instead. - `NeuralHamiltonianDE` has been removed, use `NeuralODE` with `Layers.HamiltonianNN` instead. - `HamiltonianNN` has been removed in favor of `Layers.HamiltonianNN`. + - `Lux` and `Boltz` are updated to v1. ### v3 - - Flux dependency is dropped. If a non Lux `AbstractExplicitLayer` is passed we try to automatically convert it to a Lux model with `FromFluxAdaptor()(model)`. + - Flux dependency is dropped. If a non Lux `AbstractLuxLayer` is passed we try to automatically convert it to a Lux model with `FromFluxAdaptor()(model)`. - `Flux` is no longer re-exported from `DiffEqFlux`. Instead we reexport `Lux`. - `NeuralDAE` now allows an optional `du0` as input. - `TensorLayer` is now a Lux Neural Network. diff --git a/docs/Project.toml b/docs/Project.toml index 87d3a86d0..09aa6590e 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -42,7 +42,7 @@ CUDA = "5" ComponentArrays = "0.15" DataDeps = "0.7" DataFrames = "1" -DiffEqFlux = "3" +DiffEqFlux = "4" Distances = "0.10.7" Distributions = "0.25.78" Documenter = "1" @@ -50,9 +50,9 @@ Flux = "0.14" ForwardDiff = "0.10" IterTools = "1" LinearAlgebra = "1" -Lux = "0.5.5" +Lux = "1" LuxCUDA = "0.3" -MLDatasets = "0.7" +MLDatasets = "0.7.18" MLUtils = "0.4" NNlib = "0.9" OneHotArrays = "0.2" diff --git a/docs/src/examples/hamiltonian_nn.md b/docs/src/examples/hamiltonian_nn.md index dc359b0c0..9c1716bad 100644 --- a/docs/src/examples/hamiltonian_nn.md +++ b/docs/src/examples/hamiltonian_nn.md @@ -33,7 +33,7 @@ dataloader = ncycle( for i in 1:(size(data, 2) ÷ B)), NEPOCHS) -hnn = HamiltonianNN(Chain(Dense(2 => 64, relu), Dense(64 => 1)); ad = AutoZygote()) +hnn = Layers.HamiltonianNN{true}(Layers.MLP(2, (64, 1)); autodiff = AutoZygote()) ps, st = Lux.setup(Xoshiro(0), hnn) ps_c = ps |> ComponentArray @@ -57,7 +57,7 @@ res = Optimization.solve(opt_prob, opt, dataloader; callback) ps_trained = res.u -model = NeuralHamiltonianDE( +model = NeuralODE( hnn, (0.0f0, 1.0f0), Tsit5(); save_everystep = false, save_start = true, saveat = t) pred = Array(first(model(data[:, 1], ps_trained, st))) @@ -97,10 +97,10 @@ dataloader = ncycle( ### Training the HamiltonianNN -We parameterize the HamiltonianNN with a small MultiLayered Perceptron. HNNs are trained by optimizing the gradients of the Neural Network. Zygote currently doesn't support nesting itself, so we will be using ForwardDiff in the training loop to compute the gradients of the HNN Layer for Optimization. +We parameterize the with a small MultiLayered Perceptron. HNNs are trained by optimizing the gradients of the Neural Network. Zygote currently doesn't support nesting itself, so we will be using ForwardDiff in the training loop to compute the gradients of the HNN Layer for Optimization. ```@example hamiltonian -hnn = HamiltonianNN(Chain(Dense(2 => 64, relu), Dense(64 => 1)); ad = AutoZygote()) +hnn = Layers.HamiltonianNN{true}(Layers.MLP(2, (64, 1)); autodiff = AutoZygote()) ps, st = Lux.setup(Xoshiro(0), hnn) ps_c = ps |> ComponentArray @@ -127,10 +127,11 @@ ps_trained = res.u ### Solving the ODE using trained HNN -In order to visualize the learned trajectories, we need to solve the ODE. We will use the `NeuralHamiltonianDE` layer, which is essentially a wrapper over `HamiltonianNN` layer, and solves the ODE. +In order to visualize the learned trajectories, we need to solve the ODE. We will use the +`NeuralODE` layer with `HamiltonianNN` layer, and solves the ODE. ```@example hamiltonian -model = NeuralHamiltonianDE( +model = NeuralODE( hnn, (0.0f0, 1.0f0), Tsit5(); save_everystep = false, save_start = true, saveat = t) pred = Array(first(model(data[:, 1], ps_trained, st))) diff --git a/docs/src/examples/neural_gde.md b/docs/src/examples/neural_gde.md index 11643b7c8..e50c70245 100644 --- a/docs/src/examples/neural_gde.md +++ b/docs/src/examples/neural_gde.md @@ -14,7 +14,7 @@ using GraphNeuralNetworks, DifferentialEquations using DiffEqFlux: NeuralODE using GraphNeuralNetworks.GNNGraphs: normalized_adjacency using Lux, NNlib, Optimisers, Zygote, Random, ComponentArrays -using Lux: AbstractExplicitLayer, glorot_normal, zeros32 +using Lux: AbstractLuxLayer, glorot_normal, zeros32 import Lux: initialparameters, initialstates using SciMLSensitivity using Statistics: mean @@ -46,7 +46,7 @@ nout = length(classes) epochs = 20 # Define the graph neural network -struct ExplicitGCNConv{F1, F2, F3, F4} <: AbstractExplicitLayer +struct ExplicitGCNConv{F1, F2, F3, F4} <: AbstractLuxLayer in_chs::Int out_chs::Int activation::F1 @@ -152,7 +152,7 @@ using GraphNeuralNetworks, DifferentialEquations using DiffEqFlux: NeuralODE using GraphNeuralNetworks.GNNGraphs: normalized_adjacency using Lux, NNlib, Optimisers, Zygote, Random, ComponentArrays -using Lux: AbstractExplicitLayer, glorot_normal, zeros32 +using Lux: AbstractLuxLayer, glorot_normal, zeros32 import Lux: initialparameters, initialstates using SciMLSensitivity using Statistics: mean @@ -207,10 +207,10 @@ epochs = 20 ## Define the Graph Neural Network -Here, we define a type of graph neural networks called `GCNConv`. We use the name `ExplicitGCNConv` to avoid naming conflicts with `GraphNeuralNetworks`. For more information on defining a layer with `Lux`, please consult to the [doc](http://lux.csail.mit.edu/dev/introduction/overview/#AbstractExplicitLayer-API). +Here, we define a type of graph neural networks called `GCNConv`. We use the name `ExplicitGCNConv` to avoid naming conflicts with `GraphNeuralNetworks`. For more information on defining a layer with `Lux`, please consult to the [doc](http://lux.csail.mit.edu/dev/introduction/overview/#AbstractLuxLayer-API). ```julia -struct ExplicitGCNConv{F1, F2, F3} <: AbstractExplicitLayer +struct ExplicitGCNConv{F1, F2, F3} <: AbstractLuxLayer Ã::AbstractMatrix # nomalized_adjacency matrix in_chs::Int out_chs::Int diff --git a/docs/src/examples/tensor_layer.md b/docs/src/examples/tensor_layer.md index ed717cb98..4bb7e3cae 100644 --- a/docs/src/examples/tensor_layer.md +++ b/docs/src/examples/tensor_layer.md @@ -33,8 +33,8 @@ Now, we create a TensorLayer that will be able to perform 10th order expansions a Legendre Basis: ```@example tensor -A = [LegendreBasis(10), LegendreBasis(10)] -nn = TensorLayer(A, 1) +A = [Basis.Legendre(10), Basis.Legendre(10)] +nn = Layers.TensorProductLayer(A, 1) ps, st = Lux.setup(Xoshiro(0), nn) ps = ComponentArray(ps) nn = StatefulLuxLayer{true}(nn, nothing, st) diff --git a/src/DiffEqFlux.jl b/src/DiffEqFlux.jl index f21e657a7..dc1b9d2a2 100644 --- a/src/DiffEqFlux.jl +++ b/src/DiffEqFlux.jl @@ -6,7 +6,7 @@ using ConcreteStructs: @concrete using Distributions: Distributions, ContinuousMultivariateDistribution, Distribution, logpdf using LinearAlgebra: LinearAlgebra, Diagonal, det, tr, mul! using Lux: Lux, Chain, Dense, StatefulLuxLayer, FromFluxAdaptor -using LuxCore: LuxCore, AbstractExplicitLayer, AbstractExplicitContainerLayer +using LuxCore: LuxCore, AbstractLuxLayer, AbstractLuxContainerLayer, AbstractLuxWrapperLayer using LuxLib: batched_matmul using Random: Random, AbstractRNG, randn! using Reexport: @reexport @@ -20,13 +20,15 @@ using SciMLSensitivity: SciMLSensitivity, AdjointLSS, BacksolveAdjoint, EnzymeVJ SteadyStateAdjoint, TrackerAdjoint, TrackerVJP, ZygoteAdjoint, ZygoteVJP using Setfield: @set! +using Static: True, False const CRC = ChainRulesCore @reexport using ADTypes, Lux, Boltz fixed_state_type(_) = true -fixed_state_type(::Layers.HamiltonianNN{FST}) where {FST} = FST +fixed_state_type(::Layers.HamiltonianNN{True}) = true +fixed_state_type(::Layers.HamiltonianNN{False}) = false include("ffjord.jl") include("neural_de.jl") @@ -34,8 +36,6 @@ include("neural_de.jl") include("collocation.jl") include("multiple_shooting.jl") -include("deprecated.jl") - export NeuralODE, NeuralDSDE, NeuralSDE, NeuralCDDE, NeuralDAE, AugmentedNDELayer, NeuralODEMM export FFJORD, FFJORDDistribution diff --git a/src/deprecated.jl b/src/deprecated.jl deleted file mode 100644 index 485cb5aad..000000000 --- a/src/deprecated.jl +++ /dev/null @@ -1,47 +0,0 @@ -# Tensor Layer -Base.@deprecate TensorProductBasisFunction(f, n) Basis.GeneralBasisFunction{:none}(f, n, 1) - -for B in (:Chebyshev, :Sin, :Cos, :Fourier, :Legendre, :Polynomial) - Bold = Symbol(B, :Basis) - @eval Base.@deprecate $(Bold)(n) Basis.$(B)(n) -end - -Base.@deprecate TensorLayer(model, out_dim::Int, init_p::F = randn) where {F <: Function} Boltz.Layers.TensorProductLayer( - model, out_dim; init_weight = init_p) - -# Spline Layer -function SplineLayer(tspan, tstep, spline_basis; init_saved_points::F = nothing) where {F} - Base.depwarn( - "SplineLayer is deprecated and will be removed in the next major release. Refer to \ - Boltz.jl `Layers.SplineLayer` for the newer version.", - :SplineLayer) - - init_saved_points_corrected = if init_saved_points === nothing - nothing - else - let init_saved_points = init_saved_points - (rng, _, grid_min, grid_max, grid_step) -> begin - return init_saved_points(rng, (grid_min, grid_max), grid_step) - end - end - end - - return Layers.SplineLayer((), first(tspan), last(tspan), tstep, spline_basis; - init_saved_points = init_saved_points_corrected) -end - -export SplineLayer - -# Hamiltonian Neural Network -Base.@deprecate HamiltonianNN(model; ad = AutoZygote()) Layers.HamiltonianNN{true}( - model; autodiff = ad) - -function NeuralHamiltonianDE(model, tspan, args...; ad = AutoForwardDiff(), kwargs...) - Base.depwarn( - "NeuralHamiltonianDE is deprecated, use `NeuralODE` with `Layers.HamiltonianNN` instead.", - :NeuralHamiltonianDE) - hnn = model isa Layers.HamiltonianNN ? model : HamiltonianNN(model; ad) - return NeuralODE(hnn, tspan, args, kwargs) -end - -export NeuralHamiltonianDE diff --git a/src/ffjord.jl b/src/ffjord.jl index 8d89ea755..c8fe77b25 100644 --- a/src/ffjord.jl +++ b/src/ffjord.jl @@ -1,4 +1,4 @@ -abstract type CNFLayer <: LuxCore.AbstractExplicitContainerLayer{(:model,)} end +abstract type CNFLayer <: AbstractLuxWrapperLayer{:model} end """ FFJORD(model, tspan, input_dims, args...; ad = nothing, basedist = nothing, kwargs...) @@ -21,7 +21,7 @@ for new values of x. Arguments: - - `model`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines the + - `model`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the dynamics of the model. - `basedist`: Distribution of the base variable. Set to the unit normal by default. - `input_dims`: Input Dimensions of the model. @@ -49,7 +49,7 @@ Information Processing Systems, pp. 6572-6583. 2018. preprint arXiv:1810.01367 (2018). """ @concrete struct FFJORD <: CNFLayer - model <: AbstractExplicitLayer + model <: AbstractLuxLayer basedist <: Union{Nothing, Distribution} ad input_dims @@ -65,7 +65,7 @@ end function FFJORD( model, tspan, input_dims, args...; ad = nothing, basedist = nothing, kwargs...) - !(model isa AbstractExplicitLayer) && (model = FromFluxAdaptor()(model)) + !(model isa AbstractLuxLayer) && (model = FromFluxAdaptor()(model)) return FFJORD(model, basedist, ad, input_dims, tspan, args, kwargs) end diff --git a/src/neural_de.jl b/src/neural_de.jl index fcdfdab0b..cb69e960e 100644 --- a/src/neural_de.jl +++ b/src/neural_de.jl @@ -1,5 +1,5 @@ -abstract type NeuralDELayer <: AbstractExplicitContainerLayer{(:model,)} end -abstract type NeuralSDELayer <: AbstractExplicitContainerLayer{(:drift, :diffusion)} end +abstract type NeuralDELayer <: AbstractLuxWrapperLayer{:model} end +abstract type NeuralSDELayer <: AbstractLuxContainerLayer{(:drift, :diffusion)} end basic_tgrad(u, p, t) = zero(u) basic_dde_tgrad(u, h, p, t) = zero(u) @@ -15,7 +15,7 @@ derivatives of the loss backwards in time. Arguments: - - `model`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines the + - `model`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the ̇x. - `tspan`: The timespan to be solved on. - `alg`: The algorithm used to solve the ODE. Defaults to `nothing`, i.e. the @@ -33,14 +33,14 @@ References: [1] Pontryagin, Lev Semenovich. Mathematical theory of optimal processes. CRC press, 1987. """ @concrete struct NeuralODE <: NeuralDELayer - model <: AbstractExplicitLayer + model <: AbstractLuxLayer tspan args kwargs end function NeuralODE(model, tspan, args...; kwargs...) - !(model isa AbstractExplicitLayer) && (model = FromFluxAdaptor()(model)) + !(model isa AbstractLuxLayer) && (model = FromFluxAdaptor()(model)) return NeuralODE(model, tspan, args, kwargs) end @@ -65,9 +65,9 @@ Constructs a neural stochastic differential equation (neural SDE) with diagonal Arguments: - - `drift`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines the + - `drift`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the drift function. - - `diffusion`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines + - `diffusion`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the diffusion function. Should output a vector of the same size as the input. - `tspan`: The timespan to be solved on. - `alg`: The algorithm used to solve the ODE. Defaults to `nothing`, i.e. the @@ -78,16 +78,16 @@ Arguments: documentation for more details. """ @concrete struct NeuralDSDE <: NeuralSDELayer - drift <: AbstractExplicitLayer - diffusion <: AbstractExplicitLayer + drift <: AbstractLuxLayer + diffusion <: AbstractLuxLayer tspan args kwargs end function NeuralDSDE(drift, diffusion, tspan, args...; kwargs...) - !(drift isa AbstractExplicitLayer) && (drift = FromFluxAdaptor()(drift)) - !(diffusion isa AbstractExplicitLayer) && (diffusion = FromFluxAdaptor()(diffusion)) + !(drift isa AbstractLuxLayer) && (drift = FromFluxAdaptor()(drift)) + !(diffusion isa AbstractLuxLayer) && (diffusion = FromFluxAdaptor()(diffusion)) return NeuralDSDE(drift, diffusion, tspan, args, kwargs) end @@ -113,9 +113,9 @@ Constructs a neural stochastic differential equation (neural SDE). Arguments: - - `drift`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines the + - `drift`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the drift function. - - `diffusion`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines + - `diffusion`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the diffusion function. Should output a matrix that is `nbrown x size(x, 1)`. - `tspan`: The timespan to be solved on. - `nbrown`: The number of Brownian processes. @@ -127,8 +127,8 @@ Arguments: documentation for more details. """ @concrete struct NeuralSDE <: NeuralSDELayer - drift <: AbstractExplicitLayer - diffusion <: AbstractExplicitLayer + drift <: AbstractLuxLayer + diffusion <: AbstractLuxLayer tspan nbrown::Int args @@ -136,8 +136,8 @@ Arguments: end function NeuralSDE(drift, diffusion, tspan, nbrown, args...; kwargs...) - !(drift isa AbstractExplicitLayer) && (drift = FromFluxAdaptor()(drift)) - !(diffusion isa AbstractExplicitLayer) && (diffusion = FromFluxAdaptor()(diffusion)) + !(drift isa AbstractLuxLayer) && (drift = FromFluxAdaptor()(drift)) + !(diffusion isa AbstractLuxLayer) && (diffusion = FromFluxAdaptor()(diffusion)) return NeuralSDE(drift, diffusion, tspan, nbrown, args, kwargs) end @@ -165,7 +165,7 @@ Constructs a neural delay differential equation (neural DDE) with constant delay Arguments: - - `model`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines the + - `model`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the derivative function. Should take an input of size `[x; x(t - lag_1); ...; x(t - lag_n)]` and produce and output shaped like `x`. - `tspan`: The timespan to be solved on. @@ -182,7 +182,7 @@ Arguments: documentation for more details. """ @concrete struct NeuralCDDE <: NeuralDELayer - model <: AbstractExplicitLayer + model <: AbstractLuxLayer tspan hist lags @@ -191,7 +191,7 @@ Arguments: end function NeuralCDDE(model, tspan, hist, lags, args...; kwargs...) - !(model isa AbstractExplicitLayer) && (model = FromFluxAdaptor()(model)) + !(model isa AbstractLuxLayer) && (model = FromFluxAdaptor()(model)) return NeuralCDDE(model, tspan, hist, lags, args, kwargs) end @@ -218,7 +218,7 @@ Constructs a neural differential-algebraic equation (neural DAE). Arguments: - - `model`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines the + - `model`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the derivative function. Should take an input of size `x` and produce the residual of `f(dx,x,t)` for only the differential variables. - `constraints_model`: A function `constraints_model(u,p,t)` for the fixed @@ -233,7 +233,7 @@ Arguments: documentation for more details. """ @concrete struct NeuralDAE <: NeuralDELayer - model <: AbstractExplicitLayer + model <: AbstractLuxLayer constraints_model tspan args @@ -243,7 +243,7 @@ end function NeuralDAE( model, constraints_model, tspan, args...; differential_vars = nothing, kwargs...) - !(model isa AbstractExplicitLayer) && (model = FromFluxAdaptor()(model)) + !(model isa AbstractLuxLayer) && (model = FromFluxAdaptor()(model)) return NeuralDAE(model, constraints_model, tspan, args, differential_vars, kwargs) end @@ -288,7 +288,7 @@ constraint equations. Arguments: - - `model`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines the + - `model`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the ̇`f(u,p,t)` - `constraints_model`: A function `constraints_model(u,p,t)` for the fixed constraints to impose on the algebraic equations. @@ -308,7 +308,7 @@ Arguments: documentation for more details. """ @concrete struct NeuralODEMM <: NeuralDELayer - model <: AbstractExplicitLayer + model <: AbstractLuxLayer constraints_model tspan mass_matrix @@ -317,7 +317,7 @@ Arguments: end function NeuralODEMM(model, constraints_model, tspan, mass_matrix, args...; kwargs...) - !(model isa AbstractExplicitLayer) && (model = FromFluxAdaptor()(model)) + !(model isa AbstractLuxLayer) && (model = FromFluxAdaptor()(model)) return NeuralODEMM(model, constraints_model, tspan, mass_matrix, args, kwargs) end @@ -376,10 +376,10 @@ end Constructs a Dimension Mover Layer. We can have Lux's conventional order `(data, channel, batch)` by using it as the last layer -of `AbstractExplicitLayer` to swap the batch-index and the time-index of the Neural DE's +of `AbstractLuxLayer` to swap the batch-index and the time-index of the Neural DE's output considering that each time point is a channel. """ -@concrete struct DimMover <: AbstractExplicitLayer +@concrete struct DimMover <: AbstractLuxLayer from to end diff --git a/test/hamiltonian_nn_tests.jl b/test/hamiltonian_nn_tests.jl deleted file mode 100644 index 03aa8da32..000000000 --- a/test/hamiltonian_nn_tests.jl +++ /dev/null @@ -1,62 +0,0 @@ -@testitem "Hamiltonian NN" tags=[:advancedneuralde] begin - using Zygote, OrdinaryDiffEq, ForwardDiff, Optimisers, Random, ComponentArrays, - Statistics - - # Checks for Shapes and Non-Zero Gradients - u0 = rand(Float32, 6, 1) - - for ad in (AutoForwardDiff(), AutoZygote()) - hnn = HamiltonianNN(Chain(Dense(6 => 12, relu), Dense(12 => 1)); ad) - ps, st = Lux.setup(Xoshiro(0), hnn) - ps = ps |> ComponentArray - - @test size(first(hnn(u0, ps, st))) == (6, 1) - - @test !iszero(ForwardDiff.gradient(ps -> sum(first(hnn(u0, ps, st))), ps)) - @test !iszero(only(Zygote.gradient(ps -> sum(first(hnn(u0, ps, st))), ps))) - end - - # Test Convergence on a toy problem - t = range(0.0f0, 1.0f0; length = 64) - π_32 = Float32(π) - q_t = reshape(sin.(2π_32 * t), 1, :) - p_t = reshape(cos.(2π_32 * t), 1, :) - dqdt = 2π_32 .* p_t - dpdt = -2π_32 .* q_t - - data = vcat(q_t, p_t) - target = vcat(dqdt, dpdt) - - hnn = HamiltonianNN(Chain(Dense(2 => 16, relu), Dense(16 => 1)); ad = AutoForwardDiff()) - ps, st = Lux.setup(Xoshiro(0), hnn) - ps = ps |> ComponentArray - - opt = Optimisers.Adam(0.01) - st_opt = Optimisers.setup(opt, ps) - loss(data, target, ps) = mean(abs2, first(hnn(data, ps, st)) .- target) - - initial_loss = loss(data, target, ps) - - for epoch in 1:100 - global ps, st_opt - gs = last(Zygote.gradient(loss, data, target, ps)) - st_opt, ps = Optimisers.update!(st_opt, ps, gs) - end - - final_loss = loss(data, target, ps) - - @test initial_loss > 5 * final_loss - - # Test output and gradient of NeuralHamiltonianDE Layer - tspan = (0.0f0, 1.0f0) - - model = NeuralHamiltonianDE( - hnn, tspan, Tsit5(); save_everystep = false, save_start = true, - saveat = range(tspan[1], tspan[2]; length = 10)) - sol = Array(first(model(data[:, 1], ps, st))) - @test size(sol) == (2, 10) - - gs = only(Zygote.gradient(ps -> sum(Array(first(model(data[:, 1], ps, st)))), ps)) - - @test !iszero(gs) -end diff --git a/test/spline_layer_tests.jl b/test/spline_layer_tests.jl deleted file mode 100644 index cd1b8c9f5..000000000 --- a/test/spline_layer_tests.jl +++ /dev/null @@ -1,63 +0,0 @@ -@testitem "SplineLayer" tags=[:basicneuralde] begin - using ComponentArrays, Zygote, DataInterpolations, Optimization, - OptimizationOptimisers, LinearAlgebra, Random - - function run_test(f, layer, atol) - ps, st = Lux.setup(Xoshiro(0), layer) - ps = ComponentArray(ps) - model = StatefulLuxLayer{true}(layer, ps, st) - - data_train_vals = rand(500) - data_train_fn = f.(data_train_vals) - - function loss_function(θ) - data_pred = [model(x, θ) for x in data_train_vals] - loss = sum(abs.(data_pred .- data_train_fn)) / length(data_train_fn) - return loss - end - - function callback(p, l) - @info "[SplineLayer] Loss: $l" - return false - end - - optfunc = Optimization.OptimizationFunction( - (x, p) -> loss_function(x), Optimization.AutoZygote()) - optprob = Optimization.OptimizationProblem(optfunc, ps) - res = Optimization.solve( - optprob, OptimizationOptimisers.Adam(0.1); callback, maxiters = 100) - - optprob = Optimization.OptimizationProblem(optfunc, res.minimizer) - res = Optimization.solve( - optprob, OptimizationOptimisers.Adam(0.1); callback, maxiters = 100) - opt = res.minimizer - - data_validate_vals = rand(100) - data_validate_fn = f.(data_validate_vals) - - data_validate_pred = [model(x, opt) for x in data_validate_vals] - - output = sum(abs.(data_validate_pred .- data_validate_fn)) / - length(data_validate_fn) - return output < atol - end - - ##test 01: affine function, Linear Interpolation - a, b = rand(2) - layer = SplineLayer((0.0, 1.0), 0.01, LinearInterpolation) - @test run_test(x -> a * x + b, layer, 0.1) - - ##test 02: non-linear function, Quadratic Interpolation - a, b, c = rand(3) - layer = SplineLayer((0.0, 1.0), 0.01, QuadraticInterpolation) - @test run_test(x -> a * x^2 + b * x + x, layer, 0.1) - - ##test 03: non-linear function, Quadratic Spline - a, b, c = rand(3) - layer = SplineLayer((0.0, 1.0), 0.1, QuadraticSpline) - @test run_test(x -> a * sin(b * x + c), layer, 0.1) - - ##test 04: non-linear function, Cubic Spline - layer = SplineLayer((0.0, 1.0), 0.1, CubicSpline) - @test run_test(x -> exp(x) * x^2, layer, 0.1) -end diff --git a/test/tensor_product_tests.jl b/test/tensor_product_tests.jl deleted file mode 100644 index d813a6be2..000000000 --- a/test/tensor_product_tests.jl +++ /dev/null @@ -1,56 +0,0 @@ -@testitem "TensorProductLayer" tags=[:basicneuralde] begin - using Zygote, Optimization, OptimizationOptimJL, OptimizationOptimisers, - LinearAlgebra, Random, ComponentArrays - - function run_test(f, layer, atol, N) - ps, st = Lux.setup(Xoshiro(0), layer) - ps = ComponentArray(ps) - model = StatefulLuxLayer{true}(layer, ps, st) - - data_train_vals = [rand(N) for k in 1:500] - data_train_fn = f.(data_train_vals) - - function loss_function(p) - data_pred = [model(x, p) for x in data_train_vals] - loss = sum(norm.(data_pred .- data_train_fn)) / length(data_train_fn) - return loss - end - - function cb(p, l) - @info "[TensorProductLayer] Loss: $l" - return false - end - - optfunc = Optimization.OptimizationFunction( - (x, p) -> loss_function(x), Optimization.AutoZygote()) - optprob = Optimization.OptimizationProblem(optfunc, ps) - res = Optimization.solve( - optprob, OptimizationOptimisers.Adam(0.1); callback = cb, maxiters = 100) - optprob = Optimization.OptimizationProblem(optfunc, res.minimizer) - res = Optimization.solve( - optprob, OptimizationOptimisers.Adam(0.01); callback = cb, maxiters = 100) - optprob = Optimization.OptimizationProblem(optfunc, res.minimizer) - res = Optimization.solve(optprob, BFGS(); callback = cb, maxiters = 200) - opt = res.minimizer - - data_validate_vals = [rand(N) for k in 1:100] - data_validate_fn = f.(data_validate_vals) - - data_validate_pred = [model(x, opt) for x in data_validate_vals] - - return sum(norm.(data_validate_pred .- data_validate_fn)) / - length(data_validate_fn) < atol - end - - ##test 01: affine function, Chebyshev and Polynomial basis - A = rand(2, 2) - b = rand(2) - layer = TensorLayer([ChebyshevBasis(10), PolynomialBasis(10)], 2) - @test run_test(x -> A * x + b, layer, 0.05, 2) - - ##test 02: non-linear function, Chebyshev and Legendre basis - A = rand(2, 2) - b = rand(2) - layer = TensorLayer([ChebyshevBasis(7), FourierBasis(7)], 2) - @test run_test(x -> A * x * norm(x) + b * sin(norm(x)), layer, 0.10, 2) -end