diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
index 837686c5a..81069a001 100644
--- a/.buildkite/pipeline.yml
+++ b/.buildkite/pipeline.yml
@@ -18,7 +18,7 @@ steps:
       RETESTITEMS_NWORKERS: 1 # These tests require quite a lot of GPU memory
       GROUP: CUDA
       DATADEPS_ALWAYS_ACCEPT: 'true'
-      JULIA_PKG_SERVER: "" # it often struggles with our large artifacts
+      JULIA_PKG_SERVER: ""
 
   - label: "Documentation"
     plugins:
@@ -42,5 +42,6 @@ steps:
       DATADEPS_ALWAYS_ACCEPT: true
       JULIA_DEBUG: "Documenter"
       SECRET_DOCUMENTER_KEY: "AdqcYtp4x3U5j1ELurHIoOwURqXcOan+qmihqVjsjhoGUzS/snTyZNQ5fxaJr8Yawm9CyyGvh+Q5O98St1LJ9S+pi9C5TFSbPWnNp/vXabMmeUEVLHVYHUeR2wgMCciSnM/oLw5sNAEj3hrWFjLslEGKQSptUCTWuU5WRizhQONDxeA3tz9biZUYvKanP8GjsHUkD3te15n1t6o78T1+EJxb1znrBSd9aK1Y4UaVjBEfVtLtTD8Z6VP1L4SVXVipxrDdzwzbzUDaTpvjo3z3e9qx2u6Xn5qa/os6JY81jRa5ZTWFkev73DYhoFmordSI85grOPwNpvrNWqOAs5kTDg==;U2FsdGVkX1+TXM0w98SRH5lY0Dw+nmRJ1xtJmffK+GVWHdMjhiIxQoYGGoP065hgl1VOf+oLzqWWFoYIfcz5i/VKD5F7O3EhfLdcmG2Y15u4sxr/hLKKMedSCueiusNd4N9EGGmJ4LLY0I1K6vA8Pa2eRwE+yDE+wfSpqpTC3yxMo40Xk7wra6ZwAybGpSHzOItV+2QGHttr/WLntRbx7GD8HDBC/LGNhtmFzFhAo/2CiQ0qgMDHBxyVqZlAdCWdu9xGmD9FC2+HDGv6QyNge1Ajmg6TNd6tuRhAP6VfDidEtaohcGl2TxbuUcd1OSrSbwmqxcw0IhVriRN8FgB8pKYmol7J71za9ljViyzgAjhQFvute/PYB2nw9MB8yCoNu6X0hqoLdmSxbzerpeYh0yRdi6SedESBJV3PgL7uahnyHbhC4dudFPavobeP9nU0okzKXG7fYfR1aiqgeeed4WFi48u+pciWv6Uo8J4lbBTUXu4xI+yWCpt01LNXyTmsIPYUvqbE8U0DkJNsNie9lw4of3UYkKhtVkLBoQg8++uc6i0w70+/sKZDp2OA5Y1jMFrRQaWUHyaRfpX3pXqvghAfVLEybiSWnpE0JiAnBsDcI3zajc4Cp+lui9G0+E8Lc+NbXOMbjiYHejjN46/03sIHNu0YPlU5p7o2xrGpa3cw6o3yHhBE7yVTcBc7A0AFPGZQxTLOEw1lYf7+B6J5AEpDhxR9+gUhmL+S+2kUw+nxsMxdD0Tunfeg4CIoeB9Tl7uIBrZDQ23uVRrcEyE6t3zf7skBcW3wlrHpAY61CxuGuMolcTl0JaeYFTJPYzOgPa+nD/vKaICsrRDkaSUUHcGufbTgqdJLjIkh+M9a7+DPKpfoT7H4gp4VrocqClFmmPoZZAIKjxXAEnEHWILBw0a9cOar1DKfJVoyN1vQIdVeux90a50Ao62m3sHoYiXY3DeutHkAmfWWDl/5zcU2h1T0XWHmRnjjmAW0fZPL+E38rKXYQECeHMDFEYYfbGyZMJx9T9pwfvxTM4Yzd4nB0qspOXMdeGvnVbzqlnaGJpxs/M5zyxILMQzq979bwSI0TPFRqLojhNezOhZHZaJdFoWq6UqW1kFDyzNIIRCQak0kGuhpCeFqqxiFFtC1M1vskpZ5UfqtCQSgILU+XbJWAxZOqrAxy2T7+h9JMS/jLPW+tyjCJx/bhqSGF9fBt6Q1R4ZL1MjaBSocnMj/5H7IZI2TdH6ulTyigZv7OEMQZRSyTrZgBNPLAiMHVoZVLkZF2NZse/4tHG/7i0Lio+m2z8WsjSQa04LPjtnlCSpYrug8EoGN4NruaRBDBIlTV9w6Rvz4YYB4iDht14ifF6XJVl/uo4jWKHAJC6Bc5IwFD80A/jYmx1vbLwvwVgYGCOW80WSFUGSnBSwVDLsLufXWt9ct8Kql3ICl2/iLO1ZZofELOgddV8yds1vrBdhn8jP1QCrTqtS0ITgLOabDEJMAma7St5R+Oa7kAj2zlVh/A0WXolGD04ReHnuiNN7S6C/ePpSTP/fMSa5bCrmQkw93fEgHdNOpQq9DwYNa9wEijtdEJsw5Kl+B73SNIhG+X7h1sN1DsCao2v1EPtxzaXw51kfJHzhtdCKnKXYap8Lk+twZ6KKH3QZaSsq2LSL7E7da3ZXwo4oRMjV5OTkWaklGKZmzJaMrUnpbJMQUfb4tyNDBJ/52arcTLOn9b72d+927qUfKNCwm+Ma5tUJZ9PkOUxObbXgguXvgVVBL5Li0kfcCyYQC8HcNu1ZkmP7FbJzYo4m/e8v+jASNorC+49BvDE8WlSw+6dJQoP29S0u2OyZ4CSLYvqVDz5WIiLYR9OH2Dk7dB6n69jgngYkEtTF+1TWwQOR0d+6xungqU86W+4JvPkBx4YwVHmnw+iFRNqJd/OTmBVVDYpEkC6N38SuCRAuZwjcVfl6ERm0C7FEJOlmXqs6UeUuPNxuCE7yKcD4J0JGhVjSxN4c3dbV0aipIt9/ob+I2rXA3TUVOU7G+svsboOo1bHlUfoL1HQcasUHwst6ScsrlzJtSLActVb8QMh2iOw5zlxHGyq/MqU+tcquLZc3ctYZwzXatMjEqNqP4nHF1HHkYavwrhFr4U6lbnPZ6ZlUCdrXKDsn7BrRnn2MQ4My7k/Cau6174Zln9RRB4LRs45P3oDUug3Xc8/erekvf8L1HRFsiHzv+8ssvO/dfOEkD1hUTacNejaWt6HXCPC3zXhnwsk/lw3TLXSuxWO3hDpxBuAy8gRmCVZq4HPLAod/lEIpXQY3Rij8mmU28tCnhXrjxTTCe1ci9F4dy2IigN/1YeA5k+6gVpNmV9NvSDlPKN5vkGymFXokBiHkBKOd/uIkNORPYbjEGq17mO4CTfbJ8uDKneibC22VOaqB5Xk3/Xp4zz4TVCCr4xznVui7OOeuNaUptrypsMRtWfYiJ52s8TYiOsQ4SyKmXdCy51k16wGT9/ZrPpEktYFsC6g4SdY04MvBaowPYsudy9uOyUnZFuxPX+SLusRfLHlgkZU2SxrPMlHbOFpFVHKgCiKXLBoDWL3Bkn/9SUfoFYWf65KEfTeKJc7FCxsH/r6ngLMlJidn/fxI0D5FyVh0FVJnpdhGgv8jXo28nVCRLTNwRl5TzUjdz4EIvSxugmFR38qbpdkYeOlCdggE8safaxy+xXpYX0diec3x9MYvV7cWMtOCmTwKkevlPUTL+UuQiWKNf7fOzmGG67wNKfFtwmMxCbQ20nT2eAT6TjZ9eowW/E7s8i3cpCycJjeshuz7Y1G5ECLjSKe2UjCEGhBqRR8T/cBObQBErrilsRgjdO6w4UvBy4FkuzVpvB2cKhMt3HocBLNMlhUHaIgQfh1C6PMRT5S10+e659+qvF0GZOCJ7Y6b5Z9X9XipJEJo8ISC5uR7Z/WmEqgVtTdfezJvnGoibun2FjFriTP1W+HqHcGTpgwfWrVwHuGXYreckkzZaP//Q5uHajxj4AYQhEmjcQ6jcRNAWxOYLKZWqy7d2sDw79wdvKT6kfKg3wiKB/Arl1Mv33b1FkCfr+MRu1nzWeuAG72su6L91T0CVymOQQCZpA2ubPYrIH3vyKmRLvFIgwIwrWRZtsY8dvznOGmHvlEgUw+C2Ln0loc6fDDQ4Jk20LSXzRtsCeJ+PUahaDT9aW4xgWXHTAjGXjaZUak+5rs3ck7ZH3vbvVtbfg3/PKiCKWKXtt/2ZIZTxgdQVlGhZcz/LVzjIPOSTfKgcyz+pVXFq+hC2fubtsgTTm6DkXzP5pFdens05f27nXdr5FYNoYSJcU7w58gHTZOIez7oiMdkG+FObg9u/cpz923hOeBOmYaoQ9JePCQzQkWyPwHVD1IFsGBKtVIwahdKnpmjxubdr2ehdYbC7SsMD772YdPKOx07ipmz56pGBVqG7nUqYDxX2Mr5hvrHKbnLmYQykBSfazMUmK5/c+dNngn7QU8kN0fxFDmDzaUeQcZwbBNVlfp51Gsc+LvYoyXMUhBu53wQgNmq+ZJCSO4V+XBIgJAKIOSlVDq0GXe1VtyfNC4XcU0ey84K0/mD3RmtNGc2YYNp6OPqwzJAIexQcSr8pehN53fqGuGrRX3EqPzxNZwM7W+VzzpT+Ky2jpLl2YrQyuROIAMV09P8HoDxBorSHAQXkijs2ByIAGbQqJhwtbcovSPIMqvHPeKKMuFfNzKnmCkUysklNxQynM2clwKwbOchghZOBNH2sQ4atfhHjdo66dXtzSmngyPujZcwinq1b1VUbG1n9BuusgdUrhpt/28MJRYLt4tJFVBqYGu98Ewa1oX+7xqCmhEe2us43fg7EYBpwLBAVDNsohVO69upLR+Yy2C1lhqJSSbO+JLKg70/7onpMI8JcCtiNYOMFYMix9ynkpBf8gN+cM/VgL4cldHYwbaAJXgnD7PxdmDIy7r8oZnGOHE//a3iDyB+Xqy0t9c41OYYn6PkB32BqRHFUvbzU+6kaDpQD/gk0EBTb51SLmy3IBBLKpKw1R0CVfS2wY5XX7vYYpgAMQzsoZpL3Ep0NpcRqtutcec0o0VXkd3B9wXJhDG+en0MaY9vc6V4g+nT8Z2jZw0A6lXnbDxlQN/CmvvrcsexHGGIj6vjpQs/oSyvOYaD1gVTWdQgcPhCYZGVH5O/llnKfxsRFVU3g6XvL4ND0oQ7S98eHRhz+8TqOx9Se47vEEC1O3bNDf6Rnnm+aB9vD0GKQ6iAETWI74yF5HrCpZY2XIDK8OgOQJoiWpDWaDxfNjK8nWMSjV8bnEdIzLP08p8fLWP/+JPeJkUB91mxmi5mhMjKFpKo/lXtvM2E8zmzkjo/VyLGYaij7EgF1XNIWRC5LWlsrrPiqVfQlmftDzjaG+jCx/47NLws="
+      JULIA_PKG_SERVER: ""
     if: build.message !~ /\[skip docs\]/ && !build.pull_request.draft
     timeout_in_minutes: 1000
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
index 839ed6a4a..935e8c8de 100644
--- a/.github/workflows/CI.yml
+++ b/.github/workflows/CI.yml
@@ -42,8 +42,6 @@ jobs:
             ${{ runner.os }}-
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
-        with:
-          coverage: false
         env:
           GROUP: ${{ matrix.group }}
       - uses: julia-actions/julia-processcoverage@v1
diff --git a/Project.toml b/Project.toml
index 72a79b18c..aee97ae05 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "DiffEqFlux"
 uuid = "aae7a2af-3d4f-5e19-a356-7da93b79d9d0"
 authors = ["Chris Rackauckas <accounts@chrisrackauckas.com>"]
-version = "3.6.0"
+version = "4.0.0"
 
 [deps]
 ADTypes = "47edcb42-4c32-4615-8424-f2b9edc5f35b"
@@ -18,6 +18,7 @@ Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
 SciMLSensitivity = "1ed8b502-d754-442c-8d5d-10ac956f44a1"
 Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46"
+Static = "aedffcd0-7271-4cad-89d0-dc628f76c6d3"
 
 [weakdeps]
 DataInterpolations = "82cc6244-b520-54b8-b5a6-8a565e85f1d0"
@@ -29,7 +30,7 @@ DiffEqFluxDataInterpolationsExt = "DataInterpolations"
 ADTypes = "1.5"
 Aqua = "0.8.7"
 BenchmarkTools = "1.5.0"
-Boltz = "0.4.2"
+Boltz = "1"
 ChainRulesCore = "1"
 ComponentArrays = "0.15.17"
 ConcreteStructs = "0.2"
@@ -46,11 +47,10 @@ ForwardDiff = "0.10"
 Hwloc = "3"
 InteractiveUtils = "<0.0.1, 1"
 LinearAlgebra = "1.10"
-Lux = "0.5.65"
+Lux = "1"
 LuxCUDA = "0.3.2"
-LuxCore = "0.1"
-LuxLib = "0.3.50"
-MLDatasets = "0.7.14"
+LuxCore = "1"
+LuxLib = "1.2"
 NNlib = "0.9.22"
 OneHotArrays = "0.2.5"
 Optimisers = "0.3"
@@ -65,6 +65,7 @@ Reexport = "0.2, 1"
 SciMLBase = "2"
 SciMLSensitivity = "7"
 Setfield = "1.1.1"
+Static = "1.1.1"
 Statistics = "1.10"
 StochasticDiffEq = "6.68.0"
 Test = "1.10"
@@ -87,7 +88,6 @@ ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 Hwloc = "0e44f5e4-bd66-52a0-8798-143a42290a1d"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda"
-MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458"
 NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
 OneHotArrays = "0b1bfda6-eb8a-41d2-88d8-f5af5cad476f"
 Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
@@ -105,4 +105,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
-test = ["Aqua", "BenchmarkTools", "ComponentArrays", "DataInterpolations", "DelayDiffEq", "DiffEqCallbacks", "Distances", "Distributed", "DistributionsAD", "ExplicitImports", "ForwardDiff", "Flux", "Hwloc", "InteractiveUtils", "LuxCUDA", "MLDatasets", "NNlib", "OneHotArrays", "Optimisers", "Optimization", "OptimizationOptimJL", "OptimizationOptimisers", "OrdinaryDiffEq", "Printf", "Random", "ReTestItems", "Reexport", "Statistics", "StochasticDiffEq", "Test", "Zygote"]
+test = ["Aqua", "BenchmarkTools", "ComponentArrays", "DataInterpolations", "DelayDiffEq", "DiffEqCallbacks", "Distances", "Distributed", "DistributionsAD", "ExplicitImports", "ForwardDiff", "Flux", "Hwloc", "InteractiveUtils", "LuxCUDA", "NNlib", "OneHotArrays", "Optimisers", "Optimization", "OptimizationOptimJL", "OptimizationOptimisers", "OrdinaryDiffEq", "Printf", "Random", "ReTestItems", "Reexport", "Statistics", "StochasticDiffEq", "Test", "Zygote"]
diff --git a/README.md b/README.md
index 0824a4a52..708612e47 100644
--- a/README.md
+++ b/README.md
@@ -63,17 +63,18 @@ explore various ways to integrate the two methodologies:
 
 ## Breaking Changes
 
-### v4 (upcoming)
+### v4
 
   - `TensorLayer` has been removed, use `Boltz.Layers.TensorProductLayer` instead.
   - Basis functions in DiffEqFlux have been removed in favor of `Boltz.Basis` module.
   - `SplineLayer` has been removed, use `Boltz.Layers.SplineLayer` instead.
   - `NeuralHamiltonianDE` has been removed, use `NeuralODE` with `Layers.HamiltonianNN` instead.
   - `HamiltonianNN` has been removed in favor of `Layers.HamiltonianNN`.
+  - `Lux` and `Boltz` are updated to v1.
 
 ### v3
 
-  - Flux dependency is dropped. If a non Lux `AbstractExplicitLayer` is passed we try to automatically convert it to a Lux model with `FromFluxAdaptor()(model)`.
+  - Flux dependency is dropped. If a non Lux `AbstractLuxLayer` is passed we try to automatically convert it to a Lux model with `FromFluxAdaptor()(model)`.
   - `Flux` is no longer re-exported from `DiffEqFlux`. Instead we reexport `Lux`.
   - `NeuralDAE` now allows an optional `du0` as input.
   - `TensorLayer` is now a Lux Neural Network.
diff --git a/docs/Project.toml b/docs/Project.toml
index 87d3a86d0..09aa6590e 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -42,7 +42,7 @@ CUDA = "5"
 ComponentArrays = "0.15"
 DataDeps = "0.7"
 DataFrames = "1"
-DiffEqFlux = "3"
+DiffEqFlux = "4"
 Distances = "0.10.7"
 Distributions = "0.25.78"
 Documenter = "1"
@@ -50,9 +50,9 @@ Flux = "0.14"
 ForwardDiff = "0.10"
 IterTools = "1"
 LinearAlgebra = "1"
-Lux = "0.5.5"
+Lux = "1"
 LuxCUDA = "0.3"
-MLDatasets = "0.7"
+MLDatasets = "0.7.18"
 MLUtils = "0.4"
 NNlib = "0.9"
 OneHotArrays = "0.2"
diff --git a/docs/src/examples/hamiltonian_nn.md b/docs/src/examples/hamiltonian_nn.md
index dc359b0c0..9c1716bad 100644
--- a/docs/src/examples/hamiltonian_nn.md
+++ b/docs/src/examples/hamiltonian_nn.md
@@ -33,7 +33,7 @@ dataloader = ncycle(
     for i in 1:(size(data, 2) ÷ B)),
     NEPOCHS)
 
-hnn = HamiltonianNN(Chain(Dense(2 => 64, relu), Dense(64 => 1)); ad = AutoZygote())
+hnn = Layers.HamiltonianNN{true}(Layers.MLP(2, (64, 1)); autodiff = AutoZygote())
 ps, st = Lux.setup(Xoshiro(0), hnn)
 ps_c = ps |> ComponentArray
 
@@ -57,7 +57,7 @@ res = Optimization.solve(opt_prob, opt, dataloader; callback)
 
 ps_trained = res.u
 
-model = NeuralHamiltonianDE(
+model = NeuralODE(
     hnn, (0.0f0, 1.0f0), Tsit5(); save_everystep = false, save_start = true, saveat = t)
 
 pred = Array(first(model(data[:, 1], ps_trained, st)))
@@ -97,10 +97,10 @@ dataloader = ncycle(
 
 ### Training the HamiltonianNN
 
-We parameterize the HamiltonianNN with a small MultiLayered Perceptron. HNNs are trained by optimizing the gradients of the Neural Network. Zygote currently doesn't support nesting itself, so we will be using ForwardDiff in the training loop to compute the gradients of the HNN Layer for Optimization.
+We parameterize the  with a small MultiLayered Perceptron. HNNs are trained by optimizing the gradients of the Neural Network. Zygote currently doesn't support nesting itself, so we will be using ForwardDiff in the training loop to compute the gradients of the HNN Layer for Optimization.
 
 ```@example hamiltonian
-hnn = HamiltonianNN(Chain(Dense(2 => 64, relu), Dense(64 => 1)); ad = AutoZygote())
+hnn = Layers.HamiltonianNN{true}(Layers.MLP(2, (64, 1)); autodiff = AutoZygote())
 ps, st = Lux.setup(Xoshiro(0), hnn)
 ps_c = ps |> ComponentArray
 
@@ -127,10 +127,11 @@ ps_trained = res.u
 
 ### Solving the ODE using trained HNN
 
-In order to visualize the learned trajectories, we need to solve the ODE. We will use the `NeuralHamiltonianDE` layer, which is essentially a wrapper over `HamiltonianNN` layer, and solves the ODE.
+In order to visualize the learned trajectories, we need to solve the ODE. We will use the
+`NeuralODE` layer with `HamiltonianNN` layer, and solves the ODE.
 
 ```@example hamiltonian
-model = NeuralHamiltonianDE(
+model = NeuralODE(
     hnn, (0.0f0, 1.0f0), Tsit5(); save_everystep = false, save_start = true, saveat = t)
 
 pred = Array(first(model(data[:, 1], ps_trained, st)))
diff --git a/docs/src/examples/neural_gde.md b/docs/src/examples/neural_gde.md
index 11643b7c8..e50c70245 100644
--- a/docs/src/examples/neural_gde.md
+++ b/docs/src/examples/neural_gde.md
@@ -14,7 +14,7 @@ using GraphNeuralNetworks, DifferentialEquations
 using DiffEqFlux: NeuralODE
 using GraphNeuralNetworks.GNNGraphs: normalized_adjacency
 using Lux, NNlib, Optimisers, Zygote, Random, ComponentArrays
-using Lux: AbstractExplicitLayer, glorot_normal, zeros32
+using Lux: AbstractLuxLayer, glorot_normal, zeros32
 import Lux: initialparameters, initialstates
 using SciMLSensitivity
 using Statistics: mean
@@ -46,7 +46,7 @@ nout = length(classes)
 epochs = 20
 
 # Define the graph neural network
-struct ExplicitGCNConv{F1, F2, F3, F4} <: AbstractExplicitLayer
+struct ExplicitGCNConv{F1, F2, F3, F4} <: AbstractLuxLayer
     in_chs::Int
     out_chs::Int
     activation::F1
@@ -152,7 +152,7 @@ using GraphNeuralNetworks, DifferentialEquations
 using DiffEqFlux: NeuralODE
 using GraphNeuralNetworks.GNNGraphs: normalized_adjacency
 using Lux, NNlib, Optimisers, Zygote, Random, ComponentArrays
-using Lux: AbstractExplicitLayer, glorot_normal, zeros32
+using Lux: AbstractLuxLayer, glorot_normal, zeros32
 import Lux: initialparameters, initialstates
 using SciMLSensitivity
 using Statistics: mean
@@ -207,10 +207,10 @@ epochs = 20
 
 ## Define the Graph Neural Network
 
-Here, we define a type of graph neural networks called `GCNConv`. We use the name `ExplicitGCNConv` to avoid naming conflicts with `GraphNeuralNetworks`. For more information on defining a layer with `Lux`, please consult to the [doc](http://lux.csail.mit.edu/dev/introduction/overview/#AbstractExplicitLayer-API).
+Here, we define a type of graph neural networks called `GCNConv`. We use the name `ExplicitGCNConv` to avoid naming conflicts with `GraphNeuralNetworks`. For more information on defining a layer with `Lux`, please consult to the [doc](http://lux.csail.mit.edu/dev/introduction/overview/#AbstractLuxLayer-API).
 
 ```julia
-struct ExplicitGCNConv{F1, F2, F3} <: AbstractExplicitLayer
+struct ExplicitGCNConv{F1, F2, F3} <: AbstractLuxLayer
     Ã::AbstractMatrix  # nomalized_adjacency matrix
     in_chs::Int
     out_chs::Int
diff --git a/docs/src/examples/tensor_layer.md b/docs/src/examples/tensor_layer.md
index ed717cb98..4bb7e3cae 100644
--- a/docs/src/examples/tensor_layer.md
+++ b/docs/src/examples/tensor_layer.md
@@ -33,8 +33,8 @@ Now, we create a TensorLayer that will be able to perform 10th order expansions
 a Legendre Basis:
 
 ```@example tensor
-A = [LegendreBasis(10), LegendreBasis(10)]
-nn = TensorLayer(A, 1)
+A = [Basis.Legendre(10), Basis.Legendre(10)]
+nn = Layers.TensorProductLayer(A, 1)
 ps, st = Lux.setup(Xoshiro(0), nn)
 ps = ComponentArray(ps)
 nn = StatefulLuxLayer{true}(nn, nothing, st)
diff --git a/src/DiffEqFlux.jl b/src/DiffEqFlux.jl
index f21e657a7..dc1b9d2a2 100644
--- a/src/DiffEqFlux.jl
+++ b/src/DiffEqFlux.jl
@@ -6,7 +6,7 @@ using ConcreteStructs: @concrete
 using Distributions: Distributions, ContinuousMultivariateDistribution, Distribution, logpdf
 using LinearAlgebra: LinearAlgebra, Diagonal, det, tr, mul!
 using Lux: Lux, Chain, Dense, StatefulLuxLayer, FromFluxAdaptor
-using LuxCore: LuxCore, AbstractExplicitLayer, AbstractExplicitContainerLayer
+using LuxCore: LuxCore, AbstractLuxLayer, AbstractLuxContainerLayer, AbstractLuxWrapperLayer
 using LuxLib: batched_matmul
 using Random: Random, AbstractRNG, randn!
 using Reexport: @reexport
@@ -20,13 +20,15 @@ using SciMLSensitivity: SciMLSensitivity, AdjointLSS, BacksolveAdjoint, EnzymeVJ
                         SteadyStateAdjoint, TrackerAdjoint, TrackerVJP, ZygoteAdjoint,
                         ZygoteVJP
 using Setfield: @set!
+using Static: True, False
 
 const CRC = ChainRulesCore
 
 @reexport using ADTypes, Lux, Boltz
 
 fixed_state_type(_) = true
-fixed_state_type(::Layers.HamiltonianNN{FST}) where {FST} = FST
+fixed_state_type(::Layers.HamiltonianNN{True}) = true
+fixed_state_type(::Layers.HamiltonianNN{False}) = false
 
 include("ffjord.jl")
 include("neural_de.jl")
@@ -34,8 +36,6 @@ include("neural_de.jl")
 include("collocation.jl")
 include("multiple_shooting.jl")
 
-include("deprecated.jl")
-
 export NeuralODE, NeuralDSDE, NeuralSDE, NeuralCDDE, NeuralDAE, AugmentedNDELayer,
        NeuralODEMM
 export FFJORD, FFJORDDistribution
diff --git a/src/deprecated.jl b/src/deprecated.jl
deleted file mode 100644
index 485cb5aad..000000000
--- a/src/deprecated.jl
+++ /dev/null
@@ -1,47 +0,0 @@
-# Tensor Layer
-Base.@deprecate TensorProductBasisFunction(f, n) Basis.GeneralBasisFunction{:none}(f, n, 1)
-
-for B in (:Chebyshev, :Sin, :Cos, :Fourier, :Legendre, :Polynomial)
-    Bold = Symbol(B, :Basis)
-    @eval Base.@deprecate $(Bold)(n) Basis.$(B)(n)
-end
-
-Base.@deprecate TensorLayer(model, out_dim::Int, init_p::F = randn) where {F <: Function} Boltz.Layers.TensorProductLayer(
-    model, out_dim; init_weight = init_p)
-
-# Spline Layer
-function SplineLayer(tspan, tstep, spline_basis; init_saved_points::F = nothing) where {F}
-    Base.depwarn(
-        "SplineLayer is deprecated and will be removed in the next major release. Refer to \
-         Boltz.jl `Layers.SplineLayer` for the newer version.",
-        :SplineLayer)
-
-    init_saved_points_corrected = if init_saved_points === nothing
-        nothing
-    else
-        let init_saved_points = init_saved_points
-            (rng, _, grid_min, grid_max, grid_step) -> begin
-                return init_saved_points(rng, (grid_min, grid_max), grid_step)
-            end
-        end
-    end
-
-    return Layers.SplineLayer((), first(tspan), last(tspan), tstep, spline_basis;
-        init_saved_points = init_saved_points_corrected)
-end
-
-export SplineLayer
-
-# Hamiltonian Neural Network
-Base.@deprecate HamiltonianNN(model; ad = AutoZygote()) Layers.HamiltonianNN{true}(
-    model; autodiff = ad)
-
-function NeuralHamiltonianDE(model, tspan, args...; ad = AutoForwardDiff(), kwargs...)
-    Base.depwarn(
-        "NeuralHamiltonianDE is deprecated, use `NeuralODE` with `Layers.HamiltonianNN` instead.",
-        :NeuralHamiltonianDE)
-    hnn = model isa Layers.HamiltonianNN ? model : HamiltonianNN(model; ad)
-    return NeuralODE(hnn, tspan, args, kwargs)
-end
-
-export NeuralHamiltonianDE
diff --git a/src/ffjord.jl b/src/ffjord.jl
index 8d89ea755..c8fe77b25 100644
--- a/src/ffjord.jl
+++ b/src/ffjord.jl
@@ -1,4 +1,4 @@
-abstract type CNFLayer <: LuxCore.AbstractExplicitContainerLayer{(:model,)} end
+abstract type CNFLayer <: AbstractLuxWrapperLayer{:model} end
 
 """
     FFJORD(model, tspan, input_dims, args...; ad = nothing, basedist = nothing, kwargs...)
@@ -21,7 +21,7 @@ for new values of x.
 
 Arguments:
 
-  - `model`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines the
+  - `model`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the
     dynamics of the model.
   - `basedist`: Distribution of the base variable. Set to the unit normal by default.
   - `input_dims`: Input Dimensions of the model.
@@ -49,7 +49,7 @@ Information Processing Systems, pp. 6572-6583. 2018.
 preprint arXiv:1810.01367 (2018).
 """
 @concrete struct FFJORD <: CNFLayer
-    model <: AbstractExplicitLayer
+    model <: AbstractLuxLayer
     basedist <: Union{Nothing, Distribution}
     ad
     input_dims
@@ -65,7 +65,7 @@ end
 
 function FFJORD(
         model, tspan, input_dims, args...; ad = nothing, basedist = nothing, kwargs...)
-    !(model isa AbstractExplicitLayer) && (model = FromFluxAdaptor()(model))
+    !(model isa AbstractLuxLayer) && (model = FromFluxAdaptor()(model))
     return FFJORD(model, basedist, ad, input_dims, tspan, args, kwargs)
 end
 
diff --git a/src/neural_de.jl b/src/neural_de.jl
index fcdfdab0b..cb69e960e 100644
--- a/src/neural_de.jl
+++ b/src/neural_de.jl
@@ -1,5 +1,5 @@
-abstract type NeuralDELayer <: AbstractExplicitContainerLayer{(:model,)} end
-abstract type NeuralSDELayer <: AbstractExplicitContainerLayer{(:drift, :diffusion)} end
+abstract type NeuralDELayer <: AbstractLuxWrapperLayer{:model} end
+abstract type NeuralSDELayer <: AbstractLuxContainerLayer{(:drift, :diffusion)} end
 
 basic_tgrad(u, p, t) = zero(u)
 basic_dde_tgrad(u, h, p, t) = zero(u)
@@ -15,7 +15,7 @@ derivatives of the loss backwards in time.
 
 Arguments:
 
-  - `model`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines the
+  - `model`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the
     ̇x.
   - `tspan`: The timespan to be solved on.
   - `alg`: The algorithm used to solve the ODE. Defaults to `nothing`, i.e. the
@@ -33,14 +33,14 @@ References:
 [1] Pontryagin, Lev Semenovich. Mathematical theory of optimal processes. CRC press, 1987.
 """
 @concrete struct NeuralODE <: NeuralDELayer
-    model <: AbstractExplicitLayer
+    model <: AbstractLuxLayer
     tspan
     args
     kwargs
 end
 
 function NeuralODE(model, tspan, args...; kwargs...)
-    !(model isa AbstractExplicitLayer) && (model = FromFluxAdaptor()(model))
+    !(model isa AbstractLuxLayer) && (model = FromFluxAdaptor()(model))
     return NeuralODE(model, tspan, args, kwargs)
 end
 
@@ -65,9 +65,9 @@ Constructs a neural stochastic differential equation (neural SDE) with diagonal
 
 Arguments:
 
-  - `drift`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines the
+  - `drift`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the
     drift function.
-  - `diffusion`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines
+  - `diffusion`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines
     the diffusion function. Should output a vector of the same size as the input.
   - `tspan`: The timespan to be solved on.
   - `alg`: The algorithm used to solve the ODE. Defaults to `nothing`, i.e. the
@@ -78,16 +78,16 @@ Arguments:
     documentation for more details.
 """
 @concrete struct NeuralDSDE <: NeuralSDELayer
-    drift <: AbstractExplicitLayer
-    diffusion <: AbstractExplicitLayer
+    drift <: AbstractLuxLayer
+    diffusion <: AbstractLuxLayer
     tspan
     args
     kwargs
 end
 
 function NeuralDSDE(drift, diffusion, tspan, args...; kwargs...)
-    !(drift isa AbstractExplicitLayer) && (drift = FromFluxAdaptor()(drift))
-    !(diffusion isa AbstractExplicitLayer) && (diffusion = FromFluxAdaptor()(diffusion))
+    !(drift isa AbstractLuxLayer) && (drift = FromFluxAdaptor()(drift))
+    !(diffusion isa AbstractLuxLayer) && (diffusion = FromFluxAdaptor()(diffusion))
     return NeuralDSDE(drift, diffusion, tspan, args, kwargs)
 end
 
@@ -113,9 +113,9 @@ Constructs a neural stochastic differential equation (neural SDE).
 
 Arguments:
 
-  - `drift`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines the
+  - `drift`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the
     drift function.
-  - `diffusion`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines
+  - `diffusion`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines
     the diffusion function. Should output a matrix that is `nbrown x size(x, 1)`.
   - `tspan`: The timespan to be solved on.
   - `nbrown`: The number of Brownian processes.
@@ -127,8 +127,8 @@ Arguments:
     documentation for more details.
 """
 @concrete struct NeuralSDE <: NeuralSDELayer
-    drift <: AbstractExplicitLayer
-    diffusion <: AbstractExplicitLayer
+    drift <: AbstractLuxLayer
+    diffusion <: AbstractLuxLayer
     tspan
     nbrown::Int
     args
@@ -136,8 +136,8 @@ Arguments:
 end
 
 function NeuralSDE(drift, diffusion, tspan, nbrown, args...; kwargs...)
-    !(drift isa AbstractExplicitLayer) && (drift = FromFluxAdaptor()(drift))
-    !(diffusion isa AbstractExplicitLayer) && (diffusion = FromFluxAdaptor()(diffusion))
+    !(drift isa AbstractLuxLayer) && (drift = FromFluxAdaptor()(drift))
+    !(diffusion isa AbstractLuxLayer) && (diffusion = FromFluxAdaptor()(diffusion))
     return NeuralSDE(drift, diffusion, tspan, nbrown, args, kwargs)
 end
 
@@ -165,7 +165,7 @@ Constructs a neural delay differential equation (neural DDE) with constant delay
 
 Arguments:
 
-  - `model`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines the
+  - `model`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the
     derivative function. Should take an input of size `[x; x(t - lag_1); ...; x(t - lag_n)]`
     and produce and output shaped like `x`.
   - `tspan`: The timespan to be solved on.
@@ -182,7 +182,7 @@ Arguments:
     documentation for more details.
 """
 @concrete struct NeuralCDDE <: NeuralDELayer
-    model <: AbstractExplicitLayer
+    model <: AbstractLuxLayer
     tspan
     hist
     lags
@@ -191,7 +191,7 @@ Arguments:
 end
 
 function NeuralCDDE(model, tspan, hist, lags, args...; kwargs...)
-    !(model isa AbstractExplicitLayer) && (model = FromFluxAdaptor()(model))
+    !(model isa AbstractLuxLayer) && (model = FromFluxAdaptor()(model))
     return NeuralCDDE(model, tspan, hist, lags, args, kwargs)
 end
 
@@ -218,7 +218,7 @@ Constructs a neural differential-algebraic equation (neural DAE).
 
 Arguments:
 
-  - `model`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines the
+  - `model`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the
     derivative function. Should take an input of size `x` and produce the residual of
     `f(dx,x,t)` for only the differential variables.
   - `constraints_model`: A function `constraints_model(u,p,t)` for the fixed
@@ -233,7 +233,7 @@ Arguments:
     documentation for more details.
 """
 @concrete struct NeuralDAE <: NeuralDELayer
-    model <: AbstractExplicitLayer
+    model <: AbstractLuxLayer
     constraints_model
     tspan
     args
@@ -243,7 +243,7 @@ end
 
 function NeuralDAE(
         model, constraints_model, tspan, args...; differential_vars = nothing, kwargs...)
-    !(model isa AbstractExplicitLayer) && (model = FromFluxAdaptor()(model))
+    !(model isa AbstractLuxLayer) && (model = FromFluxAdaptor()(model))
     return NeuralDAE(model, constraints_model, tspan, args, differential_vars, kwargs)
 end
 
@@ -288,7 +288,7 @@ constraint equations.
 
 Arguments:
 
-  - `model`: A `Flux.Chain` or `Lux.AbstractExplicitLayer` neural network that defines the
+  - `model`: A `Flux.Chain` or `Lux.AbstractLuxLayer` neural network that defines the
     ̇`f(u,p,t)`
   - `constraints_model`: A function `constraints_model(u,p,t)` for the fixed constraints to
     impose on the algebraic equations.
@@ -308,7 +308,7 @@ Arguments:
     documentation for more details.
 """
 @concrete struct NeuralODEMM <: NeuralDELayer
-    model <: AbstractExplicitLayer
+    model <: AbstractLuxLayer
     constraints_model
     tspan
     mass_matrix
@@ -317,7 +317,7 @@ Arguments:
 end
 
 function NeuralODEMM(model, constraints_model, tspan, mass_matrix, args...; kwargs...)
-    !(model isa AbstractExplicitLayer) && (model = FromFluxAdaptor()(model))
+    !(model isa AbstractLuxLayer) && (model = FromFluxAdaptor()(model))
     return NeuralODEMM(model, constraints_model, tspan, mass_matrix, args, kwargs)
 end
 
@@ -376,10 +376,10 @@ end
 Constructs a Dimension Mover Layer.
 
 We can have Lux's conventional order `(data, channel, batch)` by using it as the last layer
-of `AbstractExplicitLayer` to swap the batch-index and the time-index of the Neural DE's
+of `AbstractLuxLayer` to swap the batch-index and the time-index of the Neural DE's
 output considering that each time point is a channel.
 """
-@concrete struct DimMover <: AbstractExplicitLayer
+@concrete struct DimMover <: AbstractLuxLayer
     from
     to
 end
diff --git a/test/hamiltonian_nn_tests.jl b/test/hamiltonian_nn_tests.jl
deleted file mode 100644
index 03aa8da32..000000000
--- a/test/hamiltonian_nn_tests.jl
+++ /dev/null
@@ -1,62 +0,0 @@
-@testitem "Hamiltonian NN" tags=[:advancedneuralde] begin
-    using Zygote, OrdinaryDiffEq, ForwardDiff, Optimisers, Random, ComponentArrays,
-          Statistics
-
-    # Checks for Shapes and Non-Zero Gradients
-    u0 = rand(Float32, 6, 1)
-
-    for ad in (AutoForwardDiff(), AutoZygote())
-        hnn = HamiltonianNN(Chain(Dense(6 => 12, relu), Dense(12 => 1)); ad)
-        ps, st = Lux.setup(Xoshiro(0), hnn)
-        ps = ps |> ComponentArray
-
-        @test size(first(hnn(u0, ps, st))) == (6, 1)
-
-        @test !iszero(ForwardDiff.gradient(ps -> sum(first(hnn(u0, ps, st))), ps))
-        @test !iszero(only(Zygote.gradient(ps -> sum(first(hnn(u0, ps, st))), ps)))
-    end
-
-    # Test Convergence on a toy problem
-    t = range(0.0f0, 1.0f0; length = 64)
-    π_32 = Float32(π)
-    q_t = reshape(sin.(2π_32 * t), 1, :)
-    p_t = reshape(cos.(2π_32 * t), 1, :)
-    dqdt = 2π_32 .* p_t
-    dpdt = -2π_32 .* q_t
-
-    data = vcat(q_t, p_t)
-    target = vcat(dqdt, dpdt)
-
-    hnn = HamiltonianNN(Chain(Dense(2 => 16, relu), Dense(16 => 1)); ad = AutoForwardDiff())
-    ps, st = Lux.setup(Xoshiro(0), hnn)
-    ps = ps |> ComponentArray
-
-    opt = Optimisers.Adam(0.01)
-    st_opt = Optimisers.setup(opt, ps)
-    loss(data, target, ps) = mean(abs2, first(hnn(data, ps, st)) .- target)
-
-    initial_loss = loss(data, target, ps)
-
-    for epoch in 1:100
-        global ps, st_opt
-        gs = last(Zygote.gradient(loss, data, target, ps))
-        st_opt, ps = Optimisers.update!(st_opt, ps, gs)
-    end
-
-    final_loss = loss(data, target, ps)
-
-    @test initial_loss > 5 * final_loss
-
-    # Test output and gradient of NeuralHamiltonianDE Layer
-    tspan = (0.0f0, 1.0f0)
-
-    model = NeuralHamiltonianDE(
-        hnn, tspan, Tsit5(); save_everystep = false, save_start = true,
-        saveat = range(tspan[1], tspan[2]; length = 10))
-    sol = Array(first(model(data[:, 1], ps, st)))
-    @test size(sol) == (2, 10)
-
-    gs = only(Zygote.gradient(ps -> sum(Array(first(model(data[:, 1], ps, st)))), ps))
-
-    @test !iszero(gs)
-end
diff --git a/test/spline_layer_tests.jl b/test/spline_layer_tests.jl
deleted file mode 100644
index cd1b8c9f5..000000000
--- a/test/spline_layer_tests.jl
+++ /dev/null
@@ -1,63 +0,0 @@
-@testitem "SplineLayer" tags=[:basicneuralde] begin
-    using ComponentArrays, Zygote, DataInterpolations, Optimization,
-          OptimizationOptimisers, LinearAlgebra, Random
-
-    function run_test(f, layer, atol)
-        ps, st = Lux.setup(Xoshiro(0), layer)
-        ps = ComponentArray(ps)
-        model = StatefulLuxLayer{true}(layer, ps, st)
-
-        data_train_vals = rand(500)
-        data_train_fn = f.(data_train_vals)
-
-        function loss_function(θ)
-            data_pred = [model(x, θ) for x in data_train_vals]
-            loss = sum(abs.(data_pred .- data_train_fn)) / length(data_train_fn)
-            return loss
-        end
-
-        function callback(p, l)
-            @info "[SplineLayer] Loss: $l"
-            return false
-        end
-
-        optfunc = Optimization.OptimizationFunction(
-            (x, p) -> loss_function(x), Optimization.AutoZygote())
-        optprob = Optimization.OptimizationProblem(optfunc, ps)
-        res = Optimization.solve(
-            optprob, OptimizationOptimisers.Adam(0.1); callback, maxiters = 100)
-
-        optprob = Optimization.OptimizationProblem(optfunc, res.minimizer)
-        res = Optimization.solve(
-            optprob, OptimizationOptimisers.Adam(0.1); callback, maxiters = 100)
-        opt = res.minimizer
-
-        data_validate_vals = rand(100)
-        data_validate_fn = f.(data_validate_vals)
-
-        data_validate_pred = [model(x, opt) for x in data_validate_vals]
-
-        output = sum(abs.(data_validate_pred .- data_validate_fn)) /
-                 length(data_validate_fn)
-        return output < atol
-    end
-
-    ##test 01: affine function, Linear Interpolation
-    a, b = rand(2)
-    layer = SplineLayer((0.0, 1.0), 0.01, LinearInterpolation)
-    @test run_test(x -> a * x + b, layer, 0.1)
-
-    ##test 02: non-linear function, Quadratic Interpolation
-    a, b, c = rand(3)
-    layer = SplineLayer((0.0, 1.0), 0.01, QuadraticInterpolation)
-    @test run_test(x -> a * x^2 + b * x + x, layer, 0.1)
-
-    ##test 03: non-linear function, Quadratic Spline
-    a, b, c = rand(3)
-    layer = SplineLayer((0.0, 1.0), 0.1, QuadraticSpline)
-    @test run_test(x -> a * sin(b * x + c), layer, 0.1)
-
-    ##test 04: non-linear function, Cubic Spline
-    layer = SplineLayer((0.0, 1.0), 0.1, CubicSpline)
-    @test run_test(x -> exp(x) * x^2, layer, 0.1)
-end
diff --git a/test/tensor_product_tests.jl b/test/tensor_product_tests.jl
deleted file mode 100644
index d813a6be2..000000000
--- a/test/tensor_product_tests.jl
+++ /dev/null
@@ -1,56 +0,0 @@
-@testitem "TensorProductLayer" tags=[:basicneuralde] begin
-    using Zygote, Optimization, OptimizationOptimJL, OptimizationOptimisers,
-          LinearAlgebra, Random, ComponentArrays
-
-    function run_test(f, layer, atol, N)
-        ps, st = Lux.setup(Xoshiro(0), layer)
-        ps = ComponentArray(ps)
-        model = StatefulLuxLayer{true}(layer, ps, st)
-
-        data_train_vals = [rand(N) for k in 1:500]
-        data_train_fn = f.(data_train_vals)
-
-        function loss_function(p)
-            data_pred = [model(x, p) for x in data_train_vals]
-            loss = sum(norm.(data_pred .- data_train_fn)) / length(data_train_fn)
-            return loss
-        end
-
-        function cb(p, l)
-            @info "[TensorProductLayer] Loss: $l"
-            return false
-        end
-
-        optfunc = Optimization.OptimizationFunction(
-            (x, p) -> loss_function(x), Optimization.AutoZygote())
-        optprob = Optimization.OptimizationProblem(optfunc, ps)
-        res = Optimization.solve(
-            optprob, OptimizationOptimisers.Adam(0.1); callback = cb, maxiters = 100)
-        optprob = Optimization.OptimizationProblem(optfunc, res.minimizer)
-        res = Optimization.solve(
-            optprob, OptimizationOptimisers.Adam(0.01); callback = cb, maxiters = 100)
-        optprob = Optimization.OptimizationProblem(optfunc, res.minimizer)
-        res = Optimization.solve(optprob, BFGS(); callback = cb, maxiters = 200)
-        opt = res.minimizer
-
-        data_validate_vals = [rand(N) for k in 1:100]
-        data_validate_fn = f.(data_validate_vals)
-
-        data_validate_pred = [model(x, opt) for x in data_validate_vals]
-
-        return sum(norm.(data_validate_pred .- data_validate_fn)) /
-               length(data_validate_fn) < atol
-    end
-
-    ##test 01: affine function, Chebyshev and Polynomial basis
-    A = rand(2, 2)
-    b = rand(2)
-    layer = TensorLayer([ChebyshevBasis(10), PolynomialBasis(10)], 2)
-    @test run_test(x -> A * x + b, layer, 0.05, 2)
-
-    ##test 02: non-linear function, Chebyshev and Legendre basis
-    A = rand(2, 2)
-    b = rand(2)
-    layer = TensorLayer([ChebyshevBasis(7), FourierBasis(7)], 2)
-    @test run_test(x -> A * x * norm(x) + b * sin(norm(x)), layer, 0.10, 2)
-end