From 53a86b21d71216bb0d43167e4eecc724ad5fc5b9 Mon Sep 17 00:00:00 2001
From: nHackel <nHackel@users.noreply.github.com>
Date: Tue, 13 Aug 2024 20:06:42 +0200
Subject: [PATCH 1/3] Allow matrix measurement states to exchange scheduler
 between calls

---
 src/MultiThreading.jl | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/MultiThreading.jl b/src/MultiThreading.jl
index c881657..399659d 100644
--- a/src/MultiThreading.jl
+++ b/src/MultiThreading.jl
@@ -16,7 +16,10 @@ function init!(solver::AbstractLinearSolver, state::AbstractSolverState, b::Abst
   states = prepareMatrixStates(solver, state, b)
   multiState = scheduler(states)
   solver.state = multiState
-  init!(solver, multiState, b; kwargs...)
+  for (i, s) in enumerate(solver.state.states)
+    init!(solver, s, b[:, i]; kwargs...)
+  end
+  solver.state.active .= true
 end
 function init!(solver::AbstractLinearSolver, state::AbstractMatrixSolverState, b::AbstractVector; kwargs...)
   singleState = first(state.states)
@@ -30,12 +33,6 @@ function prepareMatrixStates(solver::AbstractLinearSolver, state::AbstractSolver
 end
 prepareMatrixStates(solver::AbstractLinearSolver, state::Union{SequentialState, MultiThreadingState}, b::AbstractMatrix) = prepareMatrixStates(solver, first(state.states), b)
 
-function init!(solver::AbstractLinearSolver, state::AbstractMatrixSolverState, b::AbstractMatrix; kwargs...)
-  for (i, s) in enumerate(state.states)
-    init!(solver, s, b[:, i]; kwargs...)
-  end
-  state.active .= true
-end
 
 function iterate(solver::S, state::AbstractMatrixSolverState) where {S <: AbstractLinearSolver}
   activeIdx = findall(state.active)

From 9fc5452ba31e3e347a637bc137b7140ebc8892df Mon Sep 17 00:00:00 2001
From: nHackel <nHackel@users.noreply.github.com>
Date: Tue, 13 Aug 2024 20:06:57 +0200
Subject: [PATCH 2/3] Init docu update based on Literate.jl

---
 .gitignore                                    |  1 +
 docs/Project.toml                             |  9 ++-
 docs/make.jl                                  | 32 +++++++--
 docs/src/gettingStarted.md                    | 53 ---------------
 docs/src/index.md                             | 13 +---
 .../literate/examples/compressed_sensing.jl   | 67 +++++++++++++++++++
 .../literate/examples/computed_tomography.jl  | 64 ++++++++++++++++++
 docs/src/literate/examples/getting_started.jl | 51 ++++++++++++++
 docs/src/literate/howto/callbacks.jl          | 47 +++++++++++++
 docs/src/literate/howto/efficient_kaczmarz.jl |  0
 docs/src/literate/howto/gpu_acceleration.jl   |  0
 docs/src/literate/howto/multi_threading.jl    | 66 ++++++++++++++++++
 docs/src/literate/howto/normal_operator.jl    |  0
 docs/src/literate/howto/weighting.jl          | 33 +++++++++
 docs/src/plug-and-play.md                     |  0
 15 files changed, 365 insertions(+), 71 deletions(-)
 delete mode 100644 docs/src/gettingStarted.md
 create mode 100644 docs/src/literate/examples/compressed_sensing.jl
 create mode 100644 docs/src/literate/examples/computed_tomography.jl
 create mode 100644 docs/src/literate/examples/getting_started.jl
 create mode 100644 docs/src/literate/howto/callbacks.jl
 create mode 100644 docs/src/literate/howto/efficient_kaczmarz.jl
 create mode 100644 docs/src/literate/howto/gpu_acceleration.jl
 create mode 100644 docs/src/literate/howto/multi_threading.jl
 create mode 100644 docs/src/literate/howto/normal_operator.jl
 create mode 100644 docs/src/literate/howto/weighting.jl
 create mode 100644 docs/src/plug-and-play.md

diff --git a/.gitignore b/.gitignore
index 3c15202..4761975 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,5 +3,6 @@
 *.jl.mem
 docs/build/
 docs/site/
+docs/src/generated/
 
 Manifest.toml
diff --git a/docs/Project.toml b/docs/Project.toml
index 755ce4d..e9837d9 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,10 +1,13 @@
 [deps]
+CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
+ImageGeoms = "9ee76f2b-840d-4475-b6d6-e485c9297852"
+ImagePhantoms = "71a99df6-f52c-4da1-bd2a-69d6f37f3252"
 LinearOperatorCollection = "a4a2c56f-fead-462a-a3ab-85921a5f2575"
-Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
+RadonKA = "86de8297-835b-47df-b249-c04e8db91db5"
 RegularizedLeastSquares = "1e9c538a-f78c-5de5-8ffb-0b6dbe892d23"
-UnicodePlots = "b8865327-cd53-5732-bb35-84acbb429228"
 Wavelets = "29a6e085-ba6d-5f35-a997-948ac2efa89a"
 
 [compat]
-Documenter = "1.0, 1.1, 1.2"
+Documenter = "1"
diff --git a/docs/make.jl b/docs/make.jl
index 211b283..3b25d9d 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -1,10 +1,24 @@
-using Documenter, RegularizedLeastSquares, LinearOperatorCollection, Wavelets
+using Documenter, Literate, RegularizedLeastSquares
+
+# Generate examples
+OUTPUT_BASE = joinpath(@__DIR__(), "src/generated")
+INPUT_BASE = joinpath(@__DIR__(), "src/literate")
+for (_, dirs, _) in walkdir(INPUT_BASE)
+    for dir in dirs
+        OUTPUT = joinpath(OUTPUT_BASE, dir)
+        INPUT = joinpath(INPUT_BASE, dir)
+        for file in filter(f -> endswith(f, ".jl"), readdir(INPUT))
+            Literate.markdown(joinpath(INPUT, file), OUTPUT)
+        end
+    end
+end
 
 makedocs(
     format = Documenter.HTML(;
         prettyurls=get(ENV, "CI", "false") == "true",
         canonical="https://github.com/JuliaImageRecon/RegularizedLeastSquares.jl",
         assets=String[],
+        collapselevel=1,
     ),
     repo="https://github.com/JuliaImageRecon/RegularizedLeastSquares.jl/blob/{commit}{path}#{line}",
     modules = [RegularizedLeastSquares],
@@ -12,16 +26,26 @@ makedocs(
     authors = "Tobias Knopp, Mirco Grosser, Martin Möddel, Niklas Hackelberg, Andrew Mao, Jakob Assländer",
     pages = [
         "Home" => "index.md",
-        "Getting Started" => "gettingStarted.md",
+        "Getting Started" => "generated/examples/getting_started.md",
+        "Examples" => Any["Compressed Sensing" => "generated/examples/compressed_sensing.md", "Computed Tomography" => "generated/examples/computed_tomography.md"],
         "Solvers" => "solvers.md",
         "Regularization" => "regularization.md",
-        "API" => Any["Solvers" => "API/solvers.md",
+        "How to" => Any[
+            "Weighting" => "generated/howto/weighting.md",
+            "Normal Operator" => "generated/howto/normal_operator.md",
+            "Multi-Threading" => "generated/howto/multi_threading.md",
+            "GPU Acceleration" => "generated/howto/gpu_acceleration.md",
+            "Efficient Kaczmarz" => "generated/howto/efficient_kaczmarz.md",
+            "Callbacks" => "generated/howto/callbacks.md",
+            "Plug-and-Play Regularization" => "plug-and-play.md"
+        ],
+        "API Reference" => Any["Solvers" => "API/solvers.md",
         "Regularization Terms" => "API/regularization.md"],
 
     ],
     pagesonly = true,
     checkdocs = :none,
-    doctest   = true,
+    doctest   = false,
     doctestfilters = [r"(\d*)\.(\d{4})\d+"]
     )
 
diff --git a/docs/src/gettingStarted.md b/docs/src/gettingStarted.md
deleted file mode 100644
index 3f962e9..0000000
--- a/docs/src/gettingStarted.md
+++ /dev/null
@@ -1,53 +0,0 @@
-# Getting Started
-
-To get familiar with the different aspects of RegularizedLeastSquares.jl, we will go through a simple example from the field of Compressed Sensing.
-
-In Addtion to RegularizedLeastSquares.jl, we will need the packages LinearOperatorCollection.jl, Images.jl and Random.jl, as well as PyPlot for visualization.
-
-```julia
-using RegularizedLeastSquares, LinearOperatorCollection, Images, PyPlot, Random
-```
-
-To get started, let us generate a simple phantom
-```julia
-N = 256
-I = shepp_logan(N)
-```
-
-In this example, we consider an operator which randomly samples half of the pixels in the image. Such an operator and the corresponding measurement can be generated by calling
-
-```julia
-# sampling operator
-idx = sort( shuffle( collect(1:N^2) )[1:div(N^2,2)] )
-A = SamplingOp(eltype(I), pattern = idx , shape = (N,N))
-
-# generate undersampled data
-y = A*vec(I)
-```
-
-To recover the image, we solve the TV-regularized least squares problem
-```math
-\begin{equation}
-  \underset{\mathbf{x}}{argmin} \frac{1}{2}\vert\vert \mathbf{A}\mathbf{x}-\mathbf{y} \vert\vert_2^2 + λTV(\mathbf{x}) .
-\end{equation}
-```
-
-For this purpose we build a TV regularizer with regularization parameter $λ=0.01$
-```julia
-reg = TVRegularization(0.01; shape=(N,N))
-```
-
-To solve the CS problem, the Alternating Direction Method of Multipliers can be used. Thus, we build the corresponding solver
-```julia
-solver = createLinearSolver(ADMM, A; reg=reg, ρ=0.1, iterations=20)
-```
-and apply it to our measurement
-```julia
-Ireco = solve!(solver,y)
-Ireco = reshape(Ireco,N,N)
-```
-
-The original phantom and the reconstructed image are shown below
-
-![Phantom](./assets/sh.png)
-![Reconstruction](./assets/sh_tv.png)
diff --git a/docs/src/index.md b/docs/src/index.md
index 4034b65..912ed18 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -7,22 +7,13 @@
 RegularizedLeastSquares.jl is a Julia package for solving large scale linear systems
 using different types of algorithms. Ill-conditioned problems arise in many areas of practical interest. To solve these problems, one often resorts to regularization techniques and non-linear problem formulations. This packages provides implementations for a variety of solvers, which are used in fields such as MPI and MRI.
 
-The implemented methods range from the $l_2$-regularized CGNR method to more general optimizers such as the Alternating Direction of Multipliers Method (ADMM) or the Split-Bregman method.
+The implemented methods range from the $l^2_2$-regularized CGNR method to more general optimizers such as the Alternating Direction of Multipliers Method (ADMM) or the Split-Bregman method.
 
-For convenience, implementations of popular regularizers, such as $l_1$-regularization and TV regularization, are provided. On the other hand, hand-crafted regularizers can be used quite easily. For this purpose, a `Regularization` object needs to be build. The latter mainly contains the regularization parameter and a function to calculate the proximal map of a given input.
+For convenience, implementations of popular regularizers, such as $l_1$-regularization and TV regularization, are provided. On the other hand, hand-crafted regularizers can be used quite easily.
 
 Depending on the problem, it becomes unfeasible to store the full system matrix at hand. For this purpose, RegularizedLeastSquares.jl allows for the use of matrix-free operators. Such operators can be realized using the interface provided by the package [LinearOperators.jl](https://github.com/JuliaSmoothOptimizers/LinearOperators.jl).
 Other interfaces can be used as well, as long as the product `*(A,x)` and the adjoint `adjoint(A)` are provided. A number of common matrix-free operators are provided by the package [LinearOperatorColection.jl](https://github.com/JuliaImageRecon/LinearOperatorCollection.jl).
 
-## Installation
-
-Within Julia, use the package manager:
-```julia
-using Pkg
-Pkg.add("RegularizedLeastSquares")
-```
-This adds the latest release of the package is added. To install a different version, please consult the [Pkg documentation](https://pkgdocs.julialang.org/dev/managing-packages/#Adding-packages).
-
 ## Usage
 
   * See [Getting Started](@ref) for an introduction to using the package
diff --git a/docs/src/literate/examples/compressed_sensing.jl b/docs/src/literate/examples/compressed_sensing.jl
new file mode 100644
index 0000000..5146e58
--- /dev/null
+++ b/docs/src/literate/examples/compressed_sensing.jl
@@ -0,0 +1,67 @@
+# # Compressed Sensing Example
+# In this example we will go through a simple example from the field of Compressed Sensing.
+# In addtion to RegularizedLeastSquares.jl, we will need the packages [LinearOperatorCollection.jl](https://github.com/JuliaImageRecon/LinearOperatorCollection.jl), [ImagePhantoms.jl](https://github.com/JuliaImageRecon/ImagePhantoms.jl), [ImageGeoms.jl](https://github.com/JuliaImageRecon/ImageGeoms.jl) and Random.jl, as well as [CairoMakie.jl](https://docs.makie.org/stable/) for visualization. We can install them the same way we did RegularizedLeastSquares.jl.
+
+# ## Preparing the Inverse Problem
+# To get started, let us generate a simple phantom using the ImagePhantoms and ImageGeom packages:
+
+using ImagePhantoms, ImageGeoms
+N = 256
+image = shepp_logan(N, SheppLoganToft())
+size(image)
+
+# This produces a 256x256 image of a Shepp-Logan phantom. 
+
+# In this example, we consider a problem in which we randomly sample a third of the pixels in the image. Such a problem and the corresponding measurement can be constructed with the packages LinearOperatorCollection and Random:
+
+# We first randomly shuffle the indices of the image and then select the first third of the indices to sample.
+using Random, LinearOperatorCollection
+randomIndices = shuffle(eachindex(image))
+sampledIndices = sort(randomIndices[1:div(end, 3)])
+
+# Afterwards we build a sampling operator which samples the image at the selected indices
+A = SamplingOp(eltype(image), pattern = sampledIndices , shape = size(image));
+
+# Then we apply the sampling operator to the image to obtain the measurement vector
+b = A*vec(image);
+
+# To visualize our image we can use CairoMakie:
+using CairoMakie
+function plot_image(figPos, img; title = "", width = 150, height = 150)
+  ax = CairoMakie.Axis(figPos; yreversed=true, title, width, height)
+  hidedecorations!(ax)
+  heatmap!(ax, img)
+end
+fig = Figure()
+plot_image(fig[1,1], image, title = "Image")
+samplingMask = fill(false, size(image))
+samplingMask[sampledIndices] .= true
+plot_image(fig[1,2], image .* samplingMask, title = "Sampled Image")
+resize_to_layout!(fig)
+fig
+
+# ## Solving the Inverse Problem
+# To recover the image from the measurement vector, we solve the TV-regularized least squares problem
+# ```math
+# \begin{equation}
+#   \underset{\mathbf{x}}{argmin} \frac{1}{2}\vert\vert \mathbf{A}\mathbf{x}-\mathbf{b} \vert\vert_2^2 + \vert\vert\mathbf{x}\vert\vert_{\lambda\text{TV}} .
+# \end{equation}
+# ```
+
+# For this purpose we build a TV regularizer with regularization parameter $λ=0.01$
+
+using RegularizedLeastSquares
+reg = TVRegularization(0.01; shape=size(image));
+
+# To solve this CS problem, the Fast Iterative Shrinkage-Thresholding Algorithm (FISTA) can be used. Thus, we build the corresponding solver
+
+solver = createLinearSolver(FISTA, A; reg=reg, iterations=20);
+
+# and apply it to our measurement vector
+img_approx = solve!(solver,b)
+
+# To visualize the reconstructed image, we need to reshape the result vector to the correct shape. Afterwards we can use CairoMakie again:
+img_approx = reshape(img_approx,size(image));
+plot_image(fig[1,3], img_approx, title = "Reconstructed Image")
+resize_to_layout!(fig)
+fig
\ No newline at end of file
diff --git a/docs/src/literate/examples/computed_tomography.jl b/docs/src/literate/examples/computed_tomography.jl
new file mode 100644
index 0000000..0e45da5
--- /dev/null
+++ b/docs/src/literate/examples/computed_tomography.jl
@@ -0,0 +1,64 @@
+# # Computed Tomography Example
+# In this example we will go through a simple example from the field of Computed Tomography.
+# In addtion to RegularizedLeastSquares.jl, we will need the packages [LinearOperatorCollection.jl](https://github.com/JuliaImageRecon/LinearOperatorCollection.jl), [ImagePhantoms.jl](https://github.com/JuliaImageRecon/ImagePhantoms.jl), [ImageGeoms.jl](https://github.com/JuliaImageRecon/ImageGeoms.jl)
+# and [RadonKA.jl](https://github.com/roflmaostc/RadonKA.jl/tree/main), as well as [CairoMakie.jl](https://docs.makie.org/stable/) for visualization. We can install them the same way we did RegularizedLeastSquares.jl.
+
+# RadonKA is a package for the computation of the Radon transform and its adjoint. It is implemented with KernelAbstractions.jl and supports GPU acceleration. See the GPU acceleration [how-to](../howto/gpu_acceleration.md) for more information.
+
+# ## Preparing the Inverse Problem
+# To get started, let us generate a simple phantom using the ImagePhantoms and ImageGeom packages:
+
+using ImagePhantoms, ImageGeoms
+N = 256
+image = shepp_logan(N, SheppLoganToft())
+size(image)
+
+# This produces a 64x64 image of a Shepp-Logan phantom. 
+
+using RadonKA, LinearOperatorCollection
+angles = collect(range(0, π, 256))
+sinogram = Array(RadonKA.radon(image, angles))
+
+# Afterwards we build a Radon operator implementing both the forward and adjoint Radon transform
+A = RadonOp(eltype(image); angles, shape = size(image));
+
+# To visualize our image we can use CairoMakie:
+using CairoMakie
+function plot_image(figPos, img; title = "", width = 150, height = 150)
+  ax = CairoMakie.Axis(figPos[1, 1]; yreversed=true, title, width, height)
+  hidedecorations!(ax)
+  hm = heatmap!(ax, img)
+  Colorbar(figPos[2, 1], hm, vertical = false, flipaxis = false)
+end
+fig = Figure()
+plot_image(fig[1,1], image, title = "Image")
+plot_image(fig[1,2], sinogram, title = "Sinogram")
+plot_image(fig[1,3], backproject(sinogram, angles), title = "Backprojection")
+resize_to_layout!(fig)
+fig
+
+# ## Solving the Inverse Problem
+# To recover the image from the measurement vector, we solve the $l^2_2$-regularized least squares problem
+# ```math
+# \begin{equation}
+#   \underset{\mathbf{x}}{argmin} \frac{1}{2}\vert\vert \mathbf{A}\mathbf{x}-\mathbf{b} \vert\vert_2^2 + \vert\vert\mathbf{x}\vert\vert^2_2 .
+# \end{equation}
+# ```
+
+# For this purpose we build a $l^2_2$ with regularization parameter $λ=0.001$
+
+using RegularizedLeastSquares
+reg = L2Regularization(0.001);
+
+# To solve this inverse problem, the Conjugate Gradient Normal Residual (CGNR) algorithm can be used. Thus, we build the corresponding solver
+
+solver = createLinearSolver(CGNR, A; reg=reg, iterations=20);
+
+# and apply it to our measurement vector
+img_approx = solve!(solver, vec(sinogram))
+
+# To visualize the reconstructed image, we need to reshape the result vector to the correct shape. Afterwards we can use CairoMakie again:
+img_approx = reshape(img_approx,size(image));
+plot_image(fig[1,4], img_approx, title = "Reconstructed Image")
+resize_to_layout!(fig)
+fig
\ No newline at end of file
diff --git a/docs/src/literate/examples/getting_started.jl b/docs/src/literate/examples/getting_started.jl
new file mode 100644
index 0000000..325b34b
--- /dev/null
+++ b/docs/src/literate/examples/getting_started.jl
@@ -0,0 +1,51 @@
+# # Getting Started
+
+# In this example we will go through a simple example from the field of Compressed Sensing to get familiar with RegularizedLeastSquares.jl.
+
+# ## Installation
+
+# To install RegularizedLeastSquares.jl, you can use the Julia package manager. Open a Julia REPL and run the following command:
+
+# ```julia
+# using Pkg
+# Pkg.add("RegularizedLeastSquares")
+# ```
+# This will download and install the RegularizedLeastSquares.jl package and its dependencies. To install a different version, please consult the [Pkg documentation](https://pkgdocs.julialang.org/dev/managing-packages/#Adding-packages). 
+
+# Once the installation is complete, you can import the package in your code with the `using` keyword:
+
+
+using RegularizedLeastSquares
+
+# RegularizedLeastSquares aims to solve inverse problems of the form:
+# ```math
+# \begin{equation}
+#   \underset{\mathbf{x}}{argmin} \frac{1}{2}\vert\vert \mathbf{A}\mathbf{x}-\mathbf{b} \vert\vert_2^2 + \mathbf{R(x)} .
+# \end{equation}
+# ```
+# where $\mathbf{A}$ is a linear operator, $\mathbf{y}$ is the measurement vector, and $\mathbf{R(x)}$ is an (optional) regularization term.
+# The aim is to reconstruct the unknown vector $\mathbf{x}$. In this first exampel we will just reconstruct a simple random matrix. For more advanced examples, please refer to the examples.
+
+A = rand(32, 16)
+x = rand(16)
+b = A*x;
+
+# To approximate x from b, we can use the Conjugate Gradient Normal Residual (CGNR) algorithm. We first build the corresponding solver:
+solver = createLinearSolver(CGNR, A; iterations=32);
+
+# and apply it to our measurement vector
+x_approx = solve!(solver, b)
+isapprox(x, x_approx, rtol = 0.001)
+
+# Usually the inverse problems are ill-posed and require regularization. RegularizedLeastSquares.jl provides a variety of regularization terms.
+# The CGNR algorithm can solve optimzation problems of the form:
+# ```math
+# \begin{equation}
+#   \underset{\mathbf{x}}{argmin} \frac{1}{2}\vert\vert \mathbf{A}\mathbf{x}-\mathbf{b} \vert\vert_2^2 + \vert\vert\mathbf{x}\vert\vert^2_2 .
+# \end{equation}
+# ```
+
+# The corresponding solver can be built with the L2 regularization term:
+solver = createLinearSolver(CGNR, A; reg = L2Regularization(0.0001), iterations=32);
+x_approx = solve!(solver, b)
+isapprox(x, x_approx, rtol = 0.001)
diff --git a/docs/src/literate/howto/callbacks.jl b/docs/src/literate/howto/callbacks.jl
new file mode 100644
index 0000000..1a4211e
--- /dev/null
+++ b/docs/src/literate/howto/callbacks.jl
@@ -0,0 +1,47 @@
+# # Callbacks
+# For certain reconstructions it is important to monitor the internal state of the solver. RegularizedLeastSquares.jl provides a callback mechanism to allow developres to access this state after each iteration.
+# The package provides a variety of default callbacks, which for example store the solution after each iteration. More information can be found in the API reference for the `solve!` function.
+
+# In this example we will revist the compressed sensing compressed sensing [example](../examples/compressed_sensing.md) and implement a custom callback using the do-syntax of the `solve!` function.
+
+# We first recreate the operator `A` and the measurement vector `b`:
+using ImagePhantoms, ImageGeoms
+N = 256
+image = shepp_logan(N, SheppLoganToft())
+size(image)
+using Random, LinearOperatorCollection
+randomIndices = shuffle(eachindex(image))
+sampledIndices = sort(randomIndices[1:div(end, 3)])
+A = SamplingOp(eltype(image), pattern = sampledIndices , shape = size(image));
+b = A*vec(image);
+
+# Next we prepare our visualization helper function:
+using CairoMakie
+function plot_image(figPos, img; title = "", width = 150, height = 150, clim = extrema(img))
+  ax = CairoMakie.Axis(figPos; yreversed=true, title, width, height)
+  hidedecorations!(ax)
+  heatmap!(ax, img, colorrange = clim)
+end
+
+# Now we construct the solver with the TV regularization term:
+using RegularizedLeastSquares
+reg = TVRegularization(0.01; shape=size(image));
+solver = createLinearSolver(FISTA, A; reg=reg, iterations=20);
+
+# We will now implement a callback that plots the solution every four iteration:
+fig = Figure()
+idx = 1
+solve!(solver, b) do solver, iteration
+  if iteration % 4 == 0
+    img_approx = copy(solversolution(solver))
+    img_approx = reshape(img_approx, size(image))
+    plot_image(fig[div(idx -1, 3) + 1, mod1(idx, 3)], img_approx, clim = extrema(image), title = "$iteration")
+    global idx += 1
+  end
+end
+resize_to_layout!(fig)
+fig
+
+# In the callback we have to copy the solution, as the solver will update it in place.
+# As is explained in the solver section, each features fields that are intended to be immutable during a `solve!` call and a state that is modified in each iteration.
+# Depending on the solvers parameters and the measurement input, the state can differ in its fields and their type. Ideally, one tries to avoid accessing the state directly and uses the provided functions to access the state.
diff --git a/docs/src/literate/howto/efficient_kaczmarz.jl b/docs/src/literate/howto/efficient_kaczmarz.jl
new file mode 100644
index 0000000..e69de29
diff --git a/docs/src/literate/howto/gpu_acceleration.jl b/docs/src/literate/howto/gpu_acceleration.jl
new file mode 100644
index 0000000..e69de29
diff --git a/docs/src/literate/howto/multi_threading.jl b/docs/src/literate/howto/multi_threading.jl
new file mode 100644
index 0000000..a8f2ae3
--- /dev/null
+++ b/docs/src/literate/howto/multi_threading.jl
@@ -0,0 +1,66 @@
+# # Multi-Threading
+# There are three different kinds of multi-threading in RegularizedLeastSquares.jl, two of which are transparent to the solvers themselves.
+# In the following examples we will use the Threads.@threads macro for multi-threading, but the concepts are applicable to other multi-threading options as well.
+
+# ## Solver Based Multi-Threading
+# This type of multi-threading is transparent to the solver and is applicable if the total solution is composed of individual solutions that can be solved in parallel.
+# In particular, this approach also allows for using solvers with different parameters, such as their operator or regularization parameters.
+
+using RegularizedLeastSquares
+As = [rand(32, 16) for _ in 1:4]
+xs = [rand(16) for _ in 1:4]
+bs = [A*x for (A, x) in zip(As, xs)]
+
+xs_approx = similar(xs)
+Threads.@threads for i in 1:4
+  solver = createLinearSolver(CGNR, As[i]; iterations=32)
+  xs_approx[i] = solve!(solver, bs[i])
+end
+
+# ## Operator Based Multi-Threading
+# This type of multi-threading involves linear operators or proximal maps that can be implemnted in parallel.
+# Examples of this include the proximal map of the TV regularization term, which is based on the multi-threaded GradientOp from LinearOperatorCollection.
+# GPU acceleration also falls under this approach.
+
+# ## Measurement Based Multi-Threading
+# This level of multi-threading applies the same solver (and its parameters) to multiple measurement vectors or rather a measurement matrix B.
+# This is useful in the case of multiple measurements that can be solved in parallel and can reuse the same solver. This approach is not applicable if the operator is stateful.
+
+# To use this approach we first build a measurement matrix B and a corresponding solver:
+A = first(As)
+B = mapreduce(x -> A*x, hcat, xs)
+solver = createLinearSolver(CGNR, A; iterations=32)
+
+# We can then simply pass the measurement matrix to the solver. The result will be the same as if we passed each colument of B seperately:
+x_approx = solve!(solver, B)
+size(x_approx)
+
+# The previous `solve!` call was still executed sequentially. To execute it in parallel, we have to pass a MultiThreadingState to the `solve!` call:
+x_multi = solve!(solver, B; scheduler = MultiThreadingState)
+x_approx == x_multi
+
+# It is possible to implement custom scheduling. The following pseudo-code shows how to implement this for the FLoop.jl package:
+
+# Since most solver have conv. criteria, they can finish at different iteration numbers, which we track with the active flags.
+mutable struct FloopState{S, ST <: AbstractSolverState{S}} <: AbstractMatrixSolverState{S}
+  states::Vector{ST}
+  active::Vector{Bool}
+  FloopState(states::Vector{ST}) where {S, ST <: AbstractSolverState{S}} = new{S, ST}(states, fill(true, length(states)))
+end
+
+# To hook into the existing init! code we only have to supply a method that gets a copyable "vector" state. This will invoke our FloopState constructor with copies of the given state.
+prepareMultiStates(solver::AbstractLinearSolver, state::FloopState, b::AbstractMatrix) = prepareMultiStates(solver, first(state.states), b)
+
+# We specialise the iterate function which is called with the idx of still active states
+#function iterate(solver::AbstractLinearSolver, state::FloopState, activeIdx)
+#  @floop for i in activeIdx
+#    res = iterate(solver, state.states[i])
+#    if isnothing(res)
+#      state.active[i] = false
+#    end
+#  end
+#  return state.active, state
+#end
+
+# solver = createLinearSolver(CGNR, A, ...)
+# solve!(solver, B; scheduler = FloopState)
\ No newline at end of file
diff --git a/docs/src/literate/howto/normal_operator.jl b/docs/src/literate/howto/normal_operator.jl
new file mode 100644
index 0000000..e69de29
diff --git a/docs/src/literate/howto/weighting.jl b/docs/src/literate/howto/weighting.jl
new file mode 100644
index 0000000..9476ac5
--- /dev/null
+++ b/docs/src/literate/howto/weighting.jl
@@ -0,0 +1,33 @@
+# # Weighting
+# Often time one wants to solve a weighted least squares problem of the form:
+# ```math
+# \begin{equation}
+#   \underset{\mathbf{x}}{argmin} \frac{1}{2}\vert\vert \mathbf{A}\mathbf{x}-\mathbf{b} \vert\vert^2_{\mathbf{W}} + \mathbf{R(x)} .
+# \end{equation}
+# ```
+# where $\mathbf{W}$ is a symmetric, positive weighting matrix and $\vert\vert\mathbf{y}\vert\vert^2_\mathbf{W}$ denotes the weighted Euclidean norm.
+# An example of such a weighting matrix is a noise whitening matrix. Another example could be a scaling of the matrix rows by the reciprocal of their row energy.
+
+# In the following, we will solve a weighted least squares problem of the form:
+# ```math
+# \begin{equation}
+#   \underset{\mathbf{x}}{argmin} \frac{1}{2}\vert\vert \mathbf{A}\mathbf{x}-\mathbf{b} \vert\vert_\mathbf{W}^2 + \vert\vert\mathbf{x}\vert\vert^2_2 .
+# \end{equation}
+# ```
+using RegularizedLeastSquares, LinearOperatorCollection, LinearAlgebra
+A = rand(32, 16)
+x = rand(16)
+b = A*x
+weights = map(row -> 1/rownorm²(A, row), 1:size(A, 1))
+WA = diagm(weights) * A
+solver = createLinearSolver(Kaczmarz, WA; reg = L2Regularization(0.0001), iterations=10)
+x_approx = solve!(solver, weights .* b);
+
+# The operator A is not always a dense matrix and the product between the operator and the weighting matrix is not always efficient or possible.
+# The package [LinearOperatorCollection.jl](https://github.com/JuliaImageRecon/LinearOperatorCollection.jl) provides a matrix-free implementation of a diagonal weighting matrix, as well as a matrix free product between two matrices.
+# This weighted operator has efficient implementations of the normal operator and also for the row-action operations of the Kaczmarz solver.
+W = WeightingOp(weights)
+P = ProdOp(W, A)
+solver = createLinearSolver(Kaczmarz, P; reg = L2Regularization(0.0001), iterations=10)
+x_approx2 = solve!(solver, W * b)
+isapprox(x_approx, x_approx2)
\ No newline at end of file
diff --git a/docs/src/plug-and-play.md b/docs/src/plug-and-play.md
new file mode 100644
index 0000000..e69de29

From 4f551dc928339b7ba4792ffac0a200e68c6da17d Mon Sep 17 00:00:00 2001
From: nHackel <nHackel@users.noreply.github.com>
Date: Thu, 22 Aug 2024 17:25:53 +0200
Subject: [PATCH 3/3] Update documentation based on literate.jl

---
 docs/Project.toml                             |  2 +
 docs/make.jl                                  |  4 +-
 docs/src/API/solvers.md                       |  8 +-
 docs/src/index.md                             | 22 ++++-
 .../literate/examples/compressed_sensing.jl   | 10 +-
 .../literate/examples/computed_tomography.jl  |  6 +-
 docs/src/literate/examples/getting_started.jl |  6 +-
 .../literate/explanations/regularization.jl   | 96 +++++++++++++++++++
 docs/src/literate/howto/callbacks.jl          | 10 +-
 docs/src/literate/howto/efficient_kaczmarz.jl | 31 ++++++
 docs/src/literate/howto/gpu_acceleration.jl   | 77 +++++++++++++++
 docs/src/literate/howto/multi_threading.jl    | 50 +++++-----
 docs/src/literate/howto/normal_operator.jl    | 70 ++++++++++++++
 docs/src/literate/howto/plug-and-play.jl      | 30 ++++++
 docs/src/literate/howto/weighting.jl          |  8 +-
 docs/src/plug-and-play.md                     |  0
 docs/src/regularization.md                    | 91 ------------------
 docs/src/solvers.md                           | 64 +++++++++++--
 src/MultiThreading.jl                         | 15 +++
 src/RegularizedLeastSquares.jl                | 12 ++-
 20 files changed, 467 insertions(+), 145 deletions(-)
 create mode 100644 docs/src/literate/explanations/regularization.jl
 create mode 100644 docs/src/literate/howto/plug-and-play.jl
 delete mode 100644 docs/src/plug-and-play.md
 delete mode 100644 docs/src/regularization.md

diff --git a/docs/Project.toml b/docs/Project.toml
index e9837d9..92f18a1 100644
--- a/docs/Project.toml
+++ b/docs/Project.toml
@@ -1,8 +1,10 @@
 [deps]
+BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
 CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 ImageGeoms = "9ee76f2b-840d-4475-b6d6-e485c9297852"
 ImagePhantoms = "71a99df6-f52c-4da1-bd2a-69d6f37f3252"
+JLArrays = "27aeb0d3-9eb9-45fb-866b-73c2ecf80fcb"
 LinearOperatorCollection = "a4a2c56f-fead-462a-a3ab-85921a5f2575"
 Literate = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
 RadonKA = "86de8297-835b-47df-b249-c04e8db91db5"
diff --git a/docs/make.jl b/docs/make.jl
index 3b25d9d..c03fed4 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -29,7 +29,7 @@ makedocs(
         "Getting Started" => "generated/examples/getting_started.md",
         "Examples" => Any["Compressed Sensing" => "generated/examples/compressed_sensing.md", "Computed Tomography" => "generated/examples/computed_tomography.md"],
         "Solvers" => "solvers.md",
-        "Regularization" => "regularization.md",
+        "Regularization" => "generated/explanations/regularization.md",
         "How to" => Any[
             "Weighting" => "generated/howto/weighting.md",
             "Normal Operator" => "generated/howto/normal_operator.md",
@@ -37,7 +37,7 @@ makedocs(
             "GPU Acceleration" => "generated/howto/gpu_acceleration.md",
             "Efficient Kaczmarz" => "generated/howto/efficient_kaczmarz.md",
             "Callbacks" => "generated/howto/callbacks.md",
-            "Plug-and-Play Regularization" => "plug-and-play.md"
+            "Plug-and-Play Regularization" => "generated/howto/plug-and-play.md"
         ],
         "API Reference" => Any["Solvers" => "API/solvers.md",
         "Regularization Terms" => "API/regularization.md"],
diff --git a/docs/src/API/solvers.md b/docs/src/API/solvers.md
index 3f0e4fb..5774048 100644
--- a/docs/src/API/solvers.md
+++ b/docs/src/API/solvers.md
@@ -5,6 +5,9 @@ REPL one can access this documentation by entering the help mode with `?`
 ## solve!
 ```@docs
 RegularizedLeastSquares.solve!(::AbstractLinearSolver, ::Any)
+RegularizedLeastSquares.init!(::AbstractLinearSolver, ::Any)
+RegularizedLeastSquares.init!(::AbstractLinearSolver, ::AbstractSolverState, ::AbstractMatrix)
+
 ```
 
 ## ADMM
@@ -42,8 +45,11 @@ RegularizedLeastSquares.POGM
 RegularizedLeastSquares.SplitBregman
 ```
 
-## Miscellaneous Functions
+## Miscellaneous
 ```@docs
+RegularizedLeastSquares.solverstate
+RegularizedLeastSquares.solversolution
+RegularizedLeastSquares.solverconvergence
 RegularizedLeastSquares.StoreSolutionCallback
 RegularizedLeastSquares.StoreConvergenceCallback
 RegularizedLeastSquares.CompareSolutionCallback
diff --git a/docs/src/index.md b/docs/src/index.md
index 912ed18..fee052a 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -4,8 +4,7 @@
 
 ## Introduction
 
-RegularizedLeastSquares.jl is a Julia package for solving large scale linear systems
-using different types of algorithms. Ill-conditioned problems arise in many areas of practical interest. To solve these problems, one often resorts to regularization techniques and non-linear problem formulations. This packages provides implementations for a variety of solvers, which are used in fields such as MPI and MRI.
+RegularizedLeastSquares.jl is a Julia package for solving large linear systems using various types of algorithms. Ill-conditioned problems arise in many areas of practical interest. Regularisation techniques and nonlinear problem formulations are often used to solve these problems. This package provides implementations for a variety of solvers used in areas such as MPI and MRI. In particular, this package serves as the optimization backend of the Julia packages [MPIReco.jl](https://github.com/MagneticParticleImaging/MPIReco.jl) and [MRIReco.jl](https://github.com/MagneticResonanceImaging/MRIReco.jl).
 
 The implemented methods range from the $l^2_2$-regularized CGNR method to more general optimizers such as the Alternating Direction of Multipliers Method (ADMM) or the Split-Bregman method.
 
@@ -14,6 +13,25 @@ For convenience, implementations of popular regularizers, such as $l_1$-regulari
 Depending on the problem, it becomes unfeasible to store the full system matrix at hand. For this purpose, RegularizedLeastSquares.jl allows for the use of matrix-free operators. Such operators can be realized using the interface provided by the package [LinearOperators.jl](https://github.com/JuliaSmoothOptimizers/LinearOperators.jl).
 Other interfaces can be used as well, as long as the product `*(A,x)` and the adjoint `adjoint(A)` are provided. A number of common matrix-free operators are provided by the package [LinearOperatorColection.jl](https://github.com/JuliaImageRecon/LinearOperatorCollection.jl).
 
+## Features
+
+* Variety of optimization algorithms optimized for least squares problems
+* Support for matrix-free operators
+* GPU support
+
 ## Usage
 
   * See [Getting Started](@ref) for an introduction to using the package
+
+## See also
+
+Packages:
+* [ProximalAlgorithms.jl](https://github.com/JuliaFirstOrder/ProximalAlgorithms.jl)
+* [ProximalOperators.jl](https://github.com/JuliaFirstOrder/ProximalOperators.jl)
+* [Krylov.jl](https://github.com/JuliaSmoothOptimizers/Krylov.jl)
+* [RegularizedOptimization.jl](https://github.com/JuliaSmoothOptimizers/RegularizedOptimization.jl)
+
+Organizations:
+* [JuliaNLSolvers](https://github.com/JuliaNLSolvers)
+* [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers)
+* [JuliaFirstOrder](https://github.com/JuliaFirstOrder)
\ No newline at end of file
diff --git a/docs/src/literate/examples/compressed_sensing.jl b/docs/src/literate/examples/compressed_sensing.jl
index 5146e58..4e81d0f 100644
--- a/docs/src/literate/examples/compressed_sensing.jl
+++ b/docs/src/literate/examples/compressed_sensing.jl
@@ -19,10 +19,10 @@ using Random, LinearOperatorCollection
 randomIndices = shuffle(eachindex(image))
 sampledIndices = sort(randomIndices[1:div(end, 3)])
 
-# Afterwards we build a sampling operator which samples the image at the selected indices
+# Afterwards we build a sampling operator which samples the image at the selected indices.
 A = SamplingOp(eltype(image), pattern = sampledIndices , shape = size(image));
 
-# Then we apply the sampling operator to the image to obtain the measurement vector
+# Then we apply the sampling operator to the vectorized image to obtain the sampled measurement vector
 b = A*vec(image);
 
 # To visualize our image we can use CairoMakie:
@@ -40,8 +40,10 @@ plot_image(fig[1,2], image .* samplingMask, title = "Sampled Image")
 resize_to_layout!(fig)
 fig
 
+# As we can see in the right image, only a third of the pixels are sampled. The goal of the inverse problem is to recover the original image from this measurement vector.
+
 # ## Solving the Inverse Problem
-# To recover the image from the measurement vector, we solve the TV-regularized least squares problem
+# To recover the image from the measurement vector, we solve the TV-regularized least squares problem:
 # ```math
 # \begin{equation}
 #   \underset{\mathbf{x}}{argmin} \frac{1}{2}\vert\vert \mathbf{A}\mathbf{x}-\mathbf{b} \vert\vert_2^2 + \vert\vert\mathbf{x}\vert\vert_{\lambda\text{TV}} .
@@ -53,7 +55,7 @@ fig
 using RegularizedLeastSquares
 reg = TVRegularization(0.01; shape=size(image));
 
-# To solve this CS problem, the Fast Iterative Shrinkage-Thresholding Algorithm (FISTA) can be used. Thus, we build the corresponding solver
+# We will use the Fast Iterative Shrinkage-Thresholding Algorithm (FISTA) to solve our inverse problem. Thus, we build the corresponding solver
 
 solver = createLinearSolver(FISTA, A; reg=reg, iterations=20);
 
diff --git a/docs/src/literate/examples/computed_tomography.jl b/docs/src/literate/examples/computed_tomography.jl
index 0e45da5..2d1fca2 100644
--- a/docs/src/literate/examples/computed_tomography.jl
+++ b/docs/src/literate/examples/computed_tomography.jl
@@ -37,6 +37,8 @@ plot_image(fig[1,3], backproject(sinogram, angles), title = "Backprojection")
 resize_to_layout!(fig)
 fig
 
+# In the figure we can see our original image, the sinogram, and the backprojection of the sinogram. The goal of the inverse problem is to recover the original image from the sinogram.
+
 # ## Solving the Inverse Problem
 # To recover the image from the measurement vector, we solve the $l^2_2$-regularized least squares problem
 # ```math
@@ -50,8 +52,8 @@ fig
 using RegularizedLeastSquares
 reg = L2Regularization(0.001);
 
-# To solve this inverse problem, the Conjugate Gradient Normal Residual (CGNR) algorithm can be used. Thus, we build the corresponding solver
-
+# To solve this inverse problem, the Conjugate Gradient Normal Residual (CGNR) algorithm can be used. This solver is based on the normal operator of the Radon operator and uses both the forward and adjoint Radon transform internally.
+# We now build the corresponding solver
 solver = createLinearSolver(CGNR, A; reg=reg, iterations=20);
 
 # and apply it to our measurement vector
diff --git a/docs/src/literate/examples/getting_started.jl b/docs/src/literate/examples/getting_started.jl
index 325b34b..750d334 100644
--- a/docs/src/literate/examples/getting_started.jl
+++ b/docs/src/literate/examples/getting_started.jl
@@ -1,6 +1,6 @@
 # # Getting Started
 
-# In this example we will go through a simple example from the field of Compressed Sensing to get familiar with RegularizedLeastSquares.jl.
+# In this example we will go through a simple random inverse problem to get familiar with RegularizedLeastSquares.jl.
 
 # ## Installation
 
@@ -12,7 +12,7 @@
 # ```
 # This will download and install the RegularizedLeastSquares.jl package and its dependencies. To install a different version, please consult the [Pkg documentation](https://pkgdocs.julialang.org/dev/managing-packages/#Adding-packages). 
 
-# Once the installation is complete, you can import the package in your code with the `using` keyword:
+# Once the installation is complete, you can import the package with the `using` keyword:
 
 
 using RegularizedLeastSquares
@@ -24,7 +24,7 @@ using RegularizedLeastSquares
 # \end{equation}
 # ```
 # where $\mathbf{A}$ is a linear operator, $\mathbf{y}$ is the measurement vector, and $\mathbf{R(x)}$ is an (optional) regularization term.
-# The aim is to reconstruct the unknown vector $\mathbf{x}$. In this first exampel we will just reconstruct a simple random matrix. For more advanced examples, please refer to the examples.
+# The goal is to retrieve an approximation of the unknown vector $\mathbf{x}$. In this first exampel we will just work with simple random arrays. For more advanced examples, please refer to the examples.
 
 A = rand(32, 16)
 x = rand(16)
diff --git a/docs/src/literate/explanations/regularization.jl b/docs/src/literate/explanations/regularization.jl
new file mode 100644
index 0000000..bd9b6c2
--- /dev/null
+++ b/docs/src/literate/explanations/regularization.jl
@@ -0,0 +1,96 @@
+# # Regularization
+# When formulating inverse problems, a regulariser is formulated as an additional cost function which has to be minimised.
+# Many algorithms deal with a regularizers $g$, by computing the proximal map:
+
+# ```math
+# \begin{equation}
+#   prox_g (\mathbf{x}) = \underset{\mathbf{u}}{argmin} \frac{1}{2}\vert\vert \mathbf{u}-\mathbf{x} \vert {\vert}^2 + g(\mathbf{x}).
+# \end{equation}
+# ```
+# For many regularizers, the proximal map can be computed efficiently in a closed form.
+
+# In order to implement these proximal mappings, RegularizedLeastSquares.jl defines the following type hierarchy:
+# ```julia
+# abstract type AbstractRegularization
+# prox!(reg::AbstractRegularization, x)
+# norm(reg::AbstractRegularization, x)
+# ```
+# Here `prox!(reg, x)` is an in-place function which computes the proximal map on the input-vector `x`. The function `norm` computes the value of the corresponding term in the inverse problem. RegularizedLeastSquares.jl provides `AbstractParameterizedRegularization` and `AbstractProjectionRegularization` as core regularization types.
+
+# ## Parameterized Regularization Terms
+# This group of regularization terms features a regularization parameter `λ` that is used during the `prox!` and `norm `computations. Examples of this regulariztion group are `L1`, `L2` or `LLR` (locally low rank) regularization terms.
+
+# These terms are constructed by supplying a `λ` and optionally term specific keyword arguments:
+using RegularizedLeastSquares
+l2 = L2Regularization(0.3)
+# Parameterized regularization terms implement:
+# ```julia
+# prox!(reg::AbstractParameterizedRegularization, x, λ)
+# norm(reg::AbstractParameterizedRegularization, x, λ)
+# ```
+# where `λ` by default is filled with the value used during construction.
+
+# Invoking `λ` on a parameterized term retrieves its regularization parameter. This can be used in a solver to scale and overwrite the parameter as follows:
+prox!(l2, [1.0])
+#
+param = λ(l2)
+prox!(l2, [1.0], param*0.2)
+
+
+# ## Projection Regularization Terms
+# This group of regularization terms implement projections, such as a positivity constraint or a projection with a given convex projection function.
+# These are essentially proximal maps where $g(\mathbf{x})$ is the indicator function of a convex set.
+
+positive = PositiveRegularization()
+prox!(positive, [2.0, -0.2])
+
+# ## Nested Regularization Terms
+# Nested regularization terms are terms that act as decorators to the core regularization terms.
+# These terms can be nested around other terms and add functionality to a regularization term, such as scaling `λ` based on the provided operator or applying a transform, such as the Wavelet, to `x`.
+# As an example, we can nest a `L1` regularization term around a `Wavelet` operator.
+
+# First we generate an image and apply a wavelet operator to it:
+using Wavelets, LinearOperatorCollection, ImagePhantoms, ImageGeoms
+N = 256
+image = shepp_logan(N, SheppLoganToft())
+wop = WaveletOp(Float32, shape = size(image))
+
+wavelet_image = reshape(wop*vec(image), size(image))
+wavelet_image = log.(abs.(wavelet_image) .+ 0.01)
+
+# We will use CairoMakie for visualization:
+using CairoMakie
+function plot_image(figPos, img; title = "", width = 150, height = 150)
+  ax = CairoMakie.Axis(figPos; yreversed=true, title, width, height)
+  hidedecorations!(ax)
+  heatmap!(ax, img)
+end
+fig = Figure()
+plot_image(fig[1,1], image, title = "Image")
+plot_image(fig[1,2], wavelet_image, title = "Wavelet Image")
+resize_to_layout!(fig)
+fig
+
+# To apply soft-thresholding in the wavelet domain, we can nest a `L1` regularization term around the `Wavelet` operator:
+core = L1Regularization(0.1)
+reg = TransformedRegularization(core, wop);
+
+# We can then apply the proximal map to the image or the image in the wavelet domain:
+img_prox_image = prox!(core, copy(vec(image)));
+img_prox_wavelet = prox!(reg, copy(vec(image)));
+
+# We can visualize the result:
+img_prox_image = reshape(img_prox_image, size(image))
+img_prox_wavelet = reshape(img_prox_wavelet, size(image))
+plot_image(fig[1,3], img_prox_image, title = "Reg. Image Domain")
+plot_image(fig[1,4], img_prox_wavelet, title = "Reg. Wavelet Domain")
+resize_to_layout!(fig)
+fig
+
+# Generally, regularization terms can be nested arbitrarly deep, adding new functionality with each layer. Each nested regularization term can return its `inner` regularization term.
+# Furthermore, all regularization terms implement the iteration interface to iterate over the nesting. The innermost regularization term of a nested term must be a core regularization term and it can be returned by the `sink` function:
+RegularizedLeastSquares.innerreg(reg) == core
+# 
+sink(reg) == core
+# 
+foreach(r -> println(nameof(typeof(r))), reg)
\ No newline at end of file
diff --git a/docs/src/literate/howto/callbacks.jl b/docs/src/literate/howto/callbacks.jl
index 1a4211e..ea25aa9 100644
--- a/docs/src/literate/howto/callbacks.jl
+++ b/docs/src/literate/howto/callbacks.jl
@@ -1,8 +1,9 @@
 # # Callbacks
-# For certain reconstructions it is important to monitor the internal state of the solver. RegularizedLeastSquares.jl provides a callback mechanism to allow developres to access this state after each iteration.
+# For certain optimization it is important to monitor the internal state of the solver. RegularizedLeastSquares.jl provides a callback mechanism to allow developres to access this state after each iteration.
 # The package provides a variety of default callbacks, which for example store the solution after each iteration. More information can be found in the API reference for the `solve!` function.
 
 # In this example we will revist the compressed sensing compressed sensing [example](../examples/compressed_sensing.md) and implement a custom callback using the do-syntax of the `solve!` function.
+# This allows us to implement our callback inline and access the solver state after each iteration.
 
 # We first recreate the operator `A` and the measurement vector `b`:
 using ImagePhantoms, ImageGeoms
@@ -39,9 +40,10 @@ solve!(solver, b) do solver, iteration
     global idx += 1
   end
 end
-resize_to_layout!(fig)
-fig
-
 # In the callback we have to copy the solution, as the solver will update it in place.
 # As is explained in the solver section, each features fields that are intended to be immutable during a `solve!` call and a state that is modified in each iteration.
 # Depending on the solvers parameters and the measurement input, the state can differ in its fields and their type. Ideally, one tries to avoid accessing the state directly and uses the provided functions to access the state.
+
+# The resulting figure shows the reconstructed image after 0, 4, 8, 12, 16 and 20 iterations:
+resize_to_layout!(fig)
+fig
diff --git a/docs/src/literate/howto/efficient_kaczmarz.jl b/docs/src/literate/howto/efficient_kaczmarz.jl
index e69de29..51b16ac 100644
--- a/docs/src/literate/howto/efficient_kaczmarz.jl
+++ b/docs/src/literate/howto/efficient_kaczmarz.jl
@@ -0,0 +1,31 @@
+# # Efficient Kaczmarz
+# Unlike many of the other solvers provided by RegularizedLeastSquares.jl, the Kaczmarz method does not utilize a matrix-vector product with the operator $\mathbf{A}$ nor the normal operator $\mathbf{A*A}$. Instead, it uses the rows of $\mathbf{A}$ to update the solution iteratively.
+# Efficient Kaczmarz implementation therefore require very efficient dot products with the rows of $\mathbf{A}$. In RegularizedLeastSquares.jl, this is achieved with the `dot_with_matrix_row` function.
+using RegularizedLeastSquares
+A = randn(256, 256)
+x = randn(256)
+b = A*x;
+
+# The `dot_with_matrix_row` function calculates the dot product between a row of A and the current approximate solution of x:
+row = 1
+isapprox(RegularizedLeastSquares.dot_with_matrix_row(A, x, row), sum(A[row, :] .* x))
+
+# Since in Julia, dense arrays are stored in column-major order, such a row-based operation is quite inefficient. A workaround is to transpose the matrix then pass it to a Kaczmarz solver.
+At = collect(transpose(A))
+A_eff = transpose(At)
+
+# Note that the transpose function can return a lazy transpose object, so we first collect the transpose into a dense matrix.
+# Then we transpose it again to get the efficient representation of the matrix.
+
+# We can compare the performance using the BenchmarkTools.jl package. First for the original matrix:
+using BenchmarkTools
+solver = createLinearSolver(Kaczmarz, A; reg = L2Regularization(0.0001), iterations=100)
+@benchmark solve!(solver, b) samples = 100
+
+# And then for the efficient matrix:
+solver_eff = createLinearSolver(Kaczmarz, A_eff; reg = L2Regularization(0.0001), iterations=100)
+@benchmark solve!(solver_eff, b) samples = 100
+
+# We can also combine the efficient matrix with a weighting matrix, as is shown in the [Weighting](weighting.md) example.
+
+# Custom operators need to implement the `dot_with_matrix_row` function to be used with the Kaczmarz solver. Ideally, such an implementation is allocation free.
\ No newline at end of file
diff --git a/docs/src/literate/howto/gpu_acceleration.jl b/docs/src/literate/howto/gpu_acceleration.jl
index e69de29..b6a5fa1 100644
--- a/docs/src/literate/howto/gpu_acceleration.jl
+++ b/docs/src/literate/howto/gpu_acceleration.jl
@@ -0,0 +1,77 @@
+# # GPU Acceleration
+# RegularizedLeastSquares.jl supports generic GPU acceleration. This means that the user can use any GPU array type that supports the GPUArrays interface. This includes [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl), [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl), and [Metal.jl](https://github.com/JuliaGPU/Metal.jl).
+# In this example we will use the package JLArrays.jl which provides a reference implementation for GPUArrays, that can runs on CPUs.
+using JLArrays
+gpu = JLArray;
+
+# To use the following examples on an actual GPU, load the appropraite package replace `JLArray` with the respective GPU array type, for example:
+# ```julia
+# using CUDA
+# gpu = CuArray
+# ```
+
+# At first we will look at an example of dense GPU arrays.
+using RegularizedLeastSquares
+A = gpu(rand(Float32, 32, 16))
+x = gpu(rand(Float32, 16))
+b = A*x;
+
+# Solvers adapt their states based on the type of the given measurement vector. This means that the solver will automatically switch to GPU acceleration if a GPU array is passed as the measurement vector.
+solver = createLinearSolver(CGNR, A; reg = L2Regularization(0.0001), iterations=32);
+x_approx = solve!(solver, b)
+
+# This adaption does not include the operator. So if we want to compare with CPU result, we need to construct a new solver with a CPU operator.
+solver = createLinearSolver(CGNR, Array(A); reg = L2Regularization(0.0001), iterations=32);
+x_cpu = solve!(solver, Array(b))
+isapprox(Array(x_approx), x_cpu)
+
+# ## Matrix-Free Operators
+# A special case is the usage of matrix-free operators. Since these operators do not have a concrete matrix representation, their GPU support depends on their implementation.
+# Since not all multiplications within a solver approximation are in-place, the operator also needs to support the `*` operation and construct an appropriate result vector.
+# For matrix-free operators based on LinearOperators.jl, this can be achieved by implementing the `LinearOperators.storage_type` method.
+
+# In the following, we will take another look at the CS example and execute it on the GPU. Note that for the JLArray example we chose a small phantom, since the JLArray implementation is not optimized for performance:
+using ImagePhantoms, ImageGeoms
+N = 32
+image = shepp_logan(N, SheppLoganToft())
+
+using Random, LinearOperatorCollection
+randomIndices = shuffle(eachindex(image))
+sampledIndices = sort(randomIndices[1:div(end, 3)]);
+
+# To construct the operator, we need to convert the indices to a GPU array.
+# We also need to specify the correct storage type. In both LinearOperators.jl and LinearOperatorCollection.jl this is done with the `S` keyword argument.
+gpu_indices = gpu(sampledIndices)
+A = SamplingOp(eltype(image), pattern = gpu_indices, shape = size(image), S = typeof(b));
+
+# Let's inspect the storage type of the operator:
+using LinearOperatorCollection.LinearOperators
+LinearOperators.storage_type(A)
+
+# Afterwards we can use the operator as usual:
+b = A*vec(gpu(image));
+
+# And use it in the solver:
+using RegularizedLeastSquares
+reg = TVRegularization(0.01; shape=size(image))
+solver = createLinearSolver(FISTA, A; reg=reg, iterations=20)
+img_approx = solve!(solver,b);
+
+# To visualize the reconstructed image, we need to reshape the result vector to the correct shape and convert it to a CPU array:
+img_approx = reshape(Array(img_approx),size(image))
+
+# We will again use CairoMakie for visualization:
+using CairoMakie
+function plot_image(figPos, img; title = "", width = 150, height = 150)
+  ax = CairoMakie.Axis(figPos; yreversed=true, title, width, height)
+  hidedecorations!(ax)
+  heatmap!(ax, img)
+end
+fig = Figure()
+plot_image(fig[1,1], image, title = "Image")
+samplingMask = fill(false, size(image))
+samplingMask[sampledIndices] .= true
+plot_image(fig[1,2], image .* samplingMask, title = "Sampled Image")
+plot_image(fig[1,3], img_approx, title = "Reconstructed Image")
+resize_to_layout!(fig)
+fig
\ No newline at end of file
diff --git a/docs/src/literate/howto/multi_threading.jl b/docs/src/literate/howto/multi_threading.jl
index a8f2ae3..84b5389 100644
--- a/docs/src/literate/howto/multi_threading.jl
+++ b/docs/src/literate/howto/multi_threading.jl
@@ -1,6 +1,5 @@
 # # Multi-Threading
-# There are three different kinds of multi-threading in RegularizedLeastSquares.jl, two of which are transparent to the solvers themselves.
-# In the following examples we will use the Threads.@threads macro for multi-threading, but the concepts are applicable to other multi-threading options as well.
+# There are different ways multi-threading can be used with RegularizedLeastSquares.jl. To use multi-threading in Julia, one needs to start their session with multi-threads, see the [Julia documentation](https://docs.julialang.org/en/v1/manual/multi-threading/) for more information.
 
 # ## Solver Based Multi-Threading
 # This type of multi-threading is transparent to the solver and is applicable if the total solution is composed of individual solutions that can be solved in parallel.
@@ -20,7 +19,7 @@ end
 # ## Operator Based Multi-Threading
 # This type of multi-threading involves linear operators or proximal maps that can be implemnted in parallel.
 # Examples of this include the proximal map of the TV regularization term, which is based on the multi-threaded GradientOp from LinearOperatorCollection.
-# GPU acceleration also falls under this approach.
+# GPU acceleration also falls under this approach, see [GPU Acceleration](gpu_acceleration.md) for more information.
 
 # ## Measurement Based Multi-Threading
 # This level of multi-threading applies the same solver (and its parameters) to multiple measurement vectors or rather a measurement matrix B.
@@ -35,32 +34,35 @@ solver = createLinearSolver(CGNR, A; iterations=32)
 x_approx = solve!(solver, B)
 size(x_approx)
 
-# The previous `solve!` call was still executed sequentially. To execute it in parallel, we have to pass a MultiThreadingState to the `solve!` call:
+# The previous `solve!` call was still executed sequentially. To execute it in parallel, we have to specify a multi-threaded `scheduler` as a keyword-argument of the `solve!` call.
+# RegularizedLeastSquares.jl provides a `MultiThreadingState` scheduler that can be used for this purpose. This scheduler is based on the `Threads.@threads` macro:
 x_multi = solve!(solver, B; scheduler = MultiThreadingState)
 x_approx == x_multi
 
-# It is possible to implement custom scheduling. The following pseudo-code shows how to implement this for the FLoop.jl package:
+# ## Custom Scheduling
+# It is possible to implement custom scheduling. The following code shows how to implement this for the `Threads.@spawn` macro. Usually one this to implement multi-threading with a package such as FLoop.jl or ThreadPools.jl for thread pinning:
 
-# Since most solver have conv. criteria, they can finish at different iteration numbers, which we track with the active flags.
-mutable struct FloopState{S, ST <: AbstractSolverState{S}} <: AbstractMatrixSolverState{S}
-  states::Vector{ST}
-  active::Vector{Bool}
-  FloopState(states::Vector{ST}) where {S, ST <: AbstractSolverState{S}} = new{S, ST}(states, fill(true, length(states)))
-end
+# Since most solver have conv. criteria, they can finish at different iteration numbers, which we track this information with flags.
+ mutable struct SpawnState{S, ST <: AbstractSolverState{S}} <: RegularizedLeastSquares.AbstractMatrixSolverState{S}
+   states::Vector{ST}
+   active::Vector{Bool}
+   SpawnState(states::Vector{ST}) where {S, ST <: AbstractSolverState{S}} = new{S, ST}(states, fill(true, length(states)))
+ end
 
-# To hook into the existing init! code we only have to supply a method that gets a copyable "vector" state. This will invoke our FloopState constructor with copies of the given state.
-prepareMultiStates(solver::AbstractLinearSolver, state::FloopState, b::AbstractMatrix) = prepareMultiStates(solver, first(state.states), b)
+# To hook into the existing init! code we only have to supply a method that gets a copyable "vector" state. This will invoke our SpawnState constructor with copies of the given state.
+ prepareMultiStates(solver::AbstractLinearSolver, state::SpawnState, b::AbstractMatrix) = prepareMultiStates(solver, first(state.states), b);
 
 # We specialise the iterate function which is called with the idx of still active states
-#function iterate(solver::AbstractLinearSolver, state::FloopState, activeIdx)
-#  @floop for i in activeIdx
-#    res = iterate(solver, state.states[i])
-#    if isnothing(res)
-#      state.active[i] = false
-#    end
-#  end
-#  return state.active, state
-#end
+function Base.iterate(solver::AbstractLinearSolver, state::SpawnState, activeIdx)
+  @sync Threads.@spawn for i in activeIdx
+    res = iterate(solver, state.states[i])
+    if isnothing(res)
+      state.active[i] = false
+    end
+  end
+  return state.active, state
+end
 
-# solver = createLinearSolver(CGNR, A, ...)
-# solve!(solver, B; scheduler = FloopState)
\ No newline at end of file
+# Now we can simply use the SpawnState scheduler in the solve! call:
+x_custom = solve!(solver, B; scheduler = SpawnState)
+x_approx == x_multi
\ No newline at end of file
diff --git a/docs/src/literate/howto/normal_operator.jl b/docs/src/literate/howto/normal_operator.jl
index e69de29..bded38d 100644
--- a/docs/src/literate/howto/normal_operator.jl
+++ b/docs/src/literate/howto/normal_operator.jl
@@ -0,0 +1,70 @@
+# # Normal Operator
+# Many solvers in RegularizedLeastSquares.jl are based on the normal operator $\mathbf{A}^*\mathbf{A}$ of the linear operator $\mathbf{A}$.
+# 
+# Solvers such as ADMM, FISTA and POGM generally solve optimization problems of the form:
+# ```math
+# \begin{equation}
+#   \underset{\mathbf{x}}{argmin} \mathbf{f(x)}+ \mathbf{R(x)}
+# \end{equation}
+# ```
+# and require the gradient of the function $\mathbf{f(x)}$. In this package we specialise the function $\mathbf{f(x)}$ to the least squares norm:
+# ```math
+# \begin{equation}
+#   \mathbf{f(x)} = \frac{1}{2}\vert\vert \mathbf{A}\mathbf{x}-\mathbf{b} \vert\vert^2_2
+# \end{equation}
+# ```
+# The gradient of this function is:
+# ```math
+# \begin{equation}
+#   \nabla \mathbf{f(x)} = \mathbf{A}^*(\mathbf{A}\mathbf{x}-\mathbf{b}) = \mathbf{A}^*\mathbf{Ax} - \mathbf{A}^*\mathbf{b}
+# \end{equation}
+# ```
+# Similarily, the conjugate gradient normal residual (CGNR) algorithm applies conjugate gradient algorithm to:
+# ```math
+# \begin{equation}
+#   \mathbf{A}^*\mathbf{A}\mathbf{x} = \mathbf{A}^*\mathbf{b}
+# \end{equation}
+# ```
+# The normal operator can be passed directly to these solvers, otherwise it is computed internally.
+using RegularizedLeastSquares
+A = randn(32, 16)
+x = randn(16)
+b = A*x
+
+solver = createLinearSolver(CGNR, A; AHA = adjoint(A) * A, reg = L2Regularization(0.0001), iterations=32);
+x_approx = solve!(solver, b)
+
+# The normal operator can also be computed using the `normalOperator` function from LinearOperatorCollection.jl. This is useful if the normal operator is not directly available or shouldn't be stored in memory.
+# This function is opinionated and attempts to optimize the resulting operator for iterative applications. Specifying a custom method for a custom operator allows one to control this optimization.
+
+# An example of such an optimization is a matrix-free weighting of $\mathbf{A}$ as shown in the [Weighting](weighting.md) example:
+using LinearOperatorCollection
+weights = rand(32)
+WA = ProdOp(WeightingOp(weights), A)
+AHA = LinearOperatorCollection.normalOperator(WA)
+
+# Without an optimization a matrix-free product would apply the following operator each iteration:
+# ```math
+# \begin{equation}
+#   (\mathbf{WA})^*\mathbf{WA} = \mathbf{A}^*\mathbf{W}^*\mathbf{W}\mathbf{A}
+# \end{equation}
+# ```
+# This is not efficient and instead the normal operator can be optimized by initially computing the weights:
+# ```math
+# \begin{equation}
+#   \tilde{\mathbf{W}} = \mathbf{W}^*\mathbf{W}
+# \end{equation}
+# ```
+# and then applying the following each iteration:
+# ```math
+# \begin{equation}
+#   \mathbf{A}^*\tilde{\mathbf{W}}\mathbf{A}
+# \end{equation}
+# ```
+
+# The optimized normal operator can then be passed to the solver:
+solver = createLinearSolver(CGNR, WA; AHA = AHA, reg = L2Regularization(0.0001), iterations=32);
+x_approx2 = solve!(solver, weights .* b)
+# Of course it is also possible to optimize a normal operator with other means and pass it to the solver via the AHA keyword argument.
+
+# It is also possible to only supply the normal operator to these solvers, however on then needs to supply $\mathbf{A^*b}$ intead of $\mathbf{b}$.
\ No newline at end of file
diff --git a/docs/src/literate/howto/plug-and-play.jl b/docs/src/literate/howto/plug-and-play.jl
new file mode 100644
index 0000000..096707e
--- /dev/null
+++ b/docs/src/literate/howto/plug-and-play.jl
@@ -0,0 +1,30 @@
+# # Plug-and-Play Regularization
+# A group of regularization terms that can not be directly written down as function are learned plug-and-play (PnP) priors.
+# These are terms based on deep neural networks, which are trainted to implement the proximal map corresponding to the regularization term.
+# Such a PnP prior can be used in the same way as any other regularization term.
+
+# The following example shows how to use a PnP prior in the context of the `Kaczmarz` solver.
+using RegularizedLeastSquares
+A = randn(32, 16)
+x = randn(16)
+b = A*x;
+
+# For the documentation we will just use the identity function as a placeholder for the PnP prior.
+model = identity
+
+# In practice, you would replace this with a neural network:
+# ```julia
+# using Flux
+# model = Flux.loadmodel!(model, ...)
+# ```
+
+# The model can then be used together with the `PnPRegularization` term:
+reg = PnPRegularization(1.0; model = model, shape = [16]);
+
+# Since models often expect a specific input range, we can use the `MinMaxTransform` to normalize the input:
+reg = PnPRegularization(1.0; model = model, shape = [16], input_transform = RegularizedLeastSquares.MinMaxTransform);
+# Custom input transforms can be implemented by passing something callable as the `input_transform` keyword argument. For more details see the PnPRegularization documentation.
+
+# The regularization term can then be used in the solver:
+solver = createLinearSolver(Kaczmarz, A; reg = reg, iterations = 32)
+x_approx = solve!(solver, b)
diff --git a/docs/src/literate/howto/weighting.jl b/docs/src/literate/howto/weighting.jl
index 9476ac5..54fc6eb 100644
--- a/docs/src/literate/howto/weighting.jl
+++ b/docs/src/literate/howto/weighting.jl
@@ -17,8 +17,12 @@
 using RegularizedLeastSquares, LinearOperatorCollection, LinearAlgebra
 A = rand(32, 16)
 x = rand(16)
-b = A*x
-weights = map(row -> 1/rownorm²(A, row), 1:size(A, 1))
+b = A*x;
+
+# As a weighting matrix, we will use the reciprocal of the row energy of the matrix A.
+weights = map(row -> 1/rownorm²(A, row), 1:size(A, 1));
+
+# First, let us solve the problem with matrices we manually weighted.
 WA = diagm(weights) * A
 solver = createLinearSolver(Kaczmarz, WA; reg = L2Regularization(0.0001), iterations=10)
 x_approx = solve!(solver, weights .* b);
diff --git a/docs/src/plug-and-play.md b/docs/src/plug-and-play.md
deleted file mode 100644
index e69de29..0000000
diff --git a/docs/src/regularization.md b/docs/src/regularization.md
deleted file mode 100644
index ddcb328..0000000
--- a/docs/src/regularization.md
+++ /dev/null
@@ -1,91 +0,0 @@
-```@meta
-DocTestSetup = quote
-    using RegularizedLeastSquares, Wavelets, LinearOperatorCollection
-end
-```
-# Regularization
-When formulating inverse problems, a Regularizer is formulated as an additional term in a cost function, which has to be minimized. Popular optimizers often deal with a regularizers $g$, by computing the proximal map
-
-```math
-\begin{equation}
-  prox_g (\mathbf{x}) = \underset{\mathbf{u}}{argmin} \frac{1}{2}\vert\vert \mathbf{u}-\mathbf{x} \vert {\vert}^2 + g(\mathbf{x}).
-\end{equation}
-```
-
-In order to implement those kinds of algorithms,RegularizedLeastSquares defines the following type hierarchy:
-```julia
-abstract type AbstractRegularization
-prox!(reg::AbstractRegularization, x)
-norm(reg::AbstractRegularization, x)
-```
-Here `prox!(reg, x)` is an in-place function which computes the proximal map on the input-vector `x`. The function `norm` computes the value of the corresponding term in the inverse problem. RegularizedLeastSquares.jl provides `AbstractParameterizedRegularization` and `AbstractProjectionRegularization` as core regularization types.
-
-## Parameterized Regularization Terms
-This group of regularization terms features a regularization parameter `λ` that is used during the `prox!` and `norm `computations. Examples of this regulariztion group are `L1`, `L2` or `LLR` (locally low rank) regularization terms.
-
-These terms are constructed by supplying a `λ` and optionally term specific keyword arguments:
-
-```jldoctest l2
-julia> l2 = L2Regularization(0.3)
-L2Regularization{Float64}(0.3)
-```
-Parameterized regularization terms implement:
-```julia
-prox!(reg::AbstractParameterizedRegularization, x, λ)
-norm(reg::AbstractParameterizedRegularization, x, λ)
-```
-where `λ` by default is filled with the value used during construction.
-
-Invoking `λ` on a parameterized term retrieves its regularization parameter. This can be used in a solver to scale and overwrite the parameter as follows:
-```jldoctest l2
-julia> prox!(l2, [1.0])
-1-element Vector{Float64}:
- 0.625
-
-julia> param = λ(l2)
-0.3
-
-julia> prox!(l2, [1.0], param*0.2)
-1-element Vector{Float64}:
- 0.8928571428571428
-
-```
-
-## Projection Regularization Terms
-This group of regularization terms implement projections, such as a positivity constraint or a projection with a given convex projection function.
-
-```jldoctest pos
-julia> positive = PositiveRegularization()
-PositiveRegularization()
-
-julia> prox!(positive, [2.0, -0.2])
-2-element Vector{Float64}:
- 2.0
- 0.0
-```
-
-## Nested Regularization Terms
-Nested regularization terms are terms that act as decorators to the core regularization terms. These terms can be nested around other terms and add functionality to a regularization term, such as scaling `λ` based on the provided system matrix or applying a transform, such as the Wavelet, to `x`:
-
-```jldoctest wavelet
-julia> core = L1Regularization(0.8)
-L1Regularization{Float64}(0.8)
-
-julia> wop = WaveletOp(Float32, shape = (32,32));
-
-julia> reg = TransformedRegularization(core, wop);
-
-julia> prox!(reg, randn(32*32)); # Apply soft-thresholding in Wavelet domain
-```
-The type of regularization term a nested term can be wrapped around depends on the concrete type of the nested term. However generally, they can be nested arbitrarly deep, adding new functionality with each layer. Each nested regularization term can return its `inner` regularization. Furthermore, all regularization terms implement the iteration interface to iterate over the nesting. The innermost regularization term of a nested term must be a core regularization term and it can be returned by the `sink` function:
-```jldoctest wavelet
-julia> RegularizedLeastSquares.innerreg(reg) == core
-true
-
-julia> sink(reg) == core
-true
-
-julia> foreach(r -> println(nameof(typeof(r))), reg)
-TransformedRegularization
-L1Regularization
-```
\ No newline at end of file
diff --git a/docs/src/solvers.md b/docs/src/solvers.md
index 5dcacf3..84a96de 100644
--- a/docs/src/solvers.md
+++ b/docs/src/solvers.md
@@ -1,7 +1,7 @@
 # Solvers
 RegularizedLeastSquares.jl provides a variety of solvers, which are used in fields such as MPI and MRI. The following is a non-exhaustive list of the implemented solvers:
 
-* Kaczmarz algorithm (`Kaczmarz`)
+* Kaczmarz algorithm (`Kaczmarz`, also called Algebraic reconstruction technique)
 * Conjugate Gradients Normal Residual method (`CGNR`)
 * Fast Iterative Shrinkage Thresholding Algorithm (`FISTA`)
 * Alternating Direction of Multipliers Method (`ADMM`)
@@ -13,27 +13,73 @@ abstract type AbstractLinearSolver
 ```
 
 The type hierarchy is further differentiated into solver categories such as `AbstractRowAtionSolver`, `AbstractPrimalDualSolver` or `AbstractProximalGradientSolver`. A list of all available solvers can be returned by the `linearSolverList` function.
-## Creating a Solver
+
+## Solver Construction
 To create a solver, one can invoke the method `createLinearSolver` as in
 ```julia
-solver = createLinearSolver(ADMM, A; reg=reg, kwargs...)
+solver = createLinearSolver(CGNR, A; reg=reg, kwargs...)
 ```
-Here `A` denotes the system matrix and reg are the [Regularization](regularization.md) terms to be used by the solver. All further solver parameters can be passed as keyword arguments and are solver specific. To make things more compact, it can be usefull to collect all parameters
+Here `A` denotes the operator and reg are the [Regularization](generated/explanations/regularization.md) terms to be used by the solver. All further solver parameters can be passed as keyword arguments and are solver specific. To make things more compact, it can be usefull to collect all parameters
 in a `Dict{Symbol,Any}`. In this way, the code snippet above can be written as
 ```julia
 params=Dict{Symbol,Any}()
 params[:reg] = ...
 ...
 
-solver = createLinearSolver(ADMM, A; params...)
+solver = createLinearSolver(CGNR, A; params...)
 ```
 This notation can be convenient when a large number of parameters are set manually.
 
-It is possible to check if a given solver is applicable to the wanted arguments, as not all solvers are applicable to all system matrix and data (element) types or regularization terms combinations. This is achieved with the `isapplicable` function:
+It is also possible to construct a solver directly with its specific keyword arguments:
+
+```julia
+solver = CGNR(A, reg = reg, ...)
+```
+
+## Solver Usage
+Once constructed, a solver can be used to approximate a solution to a given measurement vector:
+
+```julia
+x_approx = solve!(solver, b; kwargs...)
+```
+The keyword arguments can be used to supply an inital solution `x0`, one or more `callbacks` to interact and monitor the solvers state and more. See the How-To and the API for more information.
+
+It is also possible to explicitly invoke the solvers iterations using Julias iterate interface:
+```julia
+init!(solver, b; kwargs...)
+for (iteration, x_approx) in enumerate(solver)
+    println("Iteration $iteration")
+end
+```
+## Solver Internals
+The fields of a solver can be divided into two groups. The first group are intended to be immutable fields that do not change during iterations, the second group are mutable fields that do change. Examples of the first group are the operator itself and examples of the second group are the current solution or the number of the current iteration.
 
+The second group is usually encapsulated in its own state struct:
 ```julia
-isapplicable(Kaczmarz, A, x, [L21Regularization(0.4f0)])
-false
+mutable struct Solver{matT, ...}
+  A::matT
+  # Other "static" fields
+  state::AbstractSolverState{<:Solver}
+end
+
+mutable struct SolverState{T, tempT} <: AbstractSolverState{Solver}
+  x::tempT
+  rho::T
+  # ...
+  iteration::Int64
+end
 ```
+States are subtypes of the parametric `AbstractSolverState{S}` type. The state fields of solvers can be exchanged with different state belonging to the correct solver `S`. This means that the states can be used to realize custom variants of an existing solver:
+```julia
+mutable struct VariantState{T, tempT} <: AbstractSolverState{Solver}
+  x::tempT
+  other::tempT
+  # ...
+  iteration::Int64
+end
+
+SolverVariant(A; kwargs...) = Solver(A, VariantState(kwargs...))
 
-For a given set of arguments the list of applicable solvers can be retrieved with `applicableSolverList`.
\ No newline at end of file
+function iterate(solver::Solver, state::VarianteState)
+  # Custom iteration
+end
\ No newline at end of file
diff --git a/src/MultiThreading.jl b/src/MultiThreading.jl
index 399659d..eb141d9 100644
--- a/src/MultiThreading.jl
+++ b/src/MultiThreading.jl
@@ -1,17 +1,32 @@
 export SequentialState, MultiThreadingState, prepareMatrixStates
 abstract type AbstractMatrixSolverState{S} <: AbstractSolverState{S} end
+"""
+    SequentialState(states::Vector{ST}) where {S, ST <: AbstractSolverState{S}}
+
+SequentialState is a scheduler that runs each active state sequentially per iteration.
+"""
 mutable struct SequentialState{S, ST <: AbstractSolverState{S}} <: AbstractMatrixSolverState{S}
   states::Vector{ST}
   active::Vector{Bool}
   SequentialState(states::Vector{ST}) where {S, ST <: AbstractSolverState{S}} = new{S, ST}(states, fill(true, length(states)))
 end
 
+"""
+    MultiThreadingState(states::Vector{ST}) where {S, ST <: AbstractSolverState{S}}
+
+MultiThreadingState is a scheduler that runs each active state in parallel per iteration.
+"""
 mutable struct MultiThreadingState{S, ST <: AbstractSolverState{S}} <: AbstractMatrixSolverState{S}
   states::Vector{ST}
   active::Vector{Bool}
   MultiThreadingState(states::Vector{ST}) where {S, ST <: AbstractSolverState{S}} = new{S, ST}(states, fill(true, length(states)))
 end
 
+"""
+    init!(solver::AbstractLinearSolver, state::AbstractSolverState, b::AbstractMatrix; scheduler = SequentialState, kwargs...)
+
+Initialize the solver with each column of `b` and pass the corresponding states to the scheduler.
+"""
 function init!(solver::AbstractLinearSolver, state::AbstractSolverState, b::AbstractMatrix; scheduler = SequentialState, kwargs...)
   states = prepareMatrixStates(solver, state, b)
   multiState = scheduler(states)
diff --git a/src/RegularizedLeastSquares.jl b/src/RegularizedLeastSquares.jl
index 39431f9..bad980f 100644
--- a/src/RegularizedLeastSquares.jl
+++ b/src/RegularizedLeastSquares.jl
@@ -15,7 +15,7 @@ using StatsBase
 using LinearOperatorCollection
 using InteractiveUtils
 
-export AbstractLinearSolver, createLinearSolver, init!, deinit, solve!, linearSolverList, linearSolverListReal, applicableSolverList, power_iterations
+export AbstractLinearSolver, AbstractSolverState, createLinearSolver, init!, deinit, solve!, linearSolverList, linearSolverListReal, applicableSolverList, power_iterations
 
 abstract type AbstractLinearSolver end
 abstract type AbstractSolverState{S} end
@@ -173,9 +173,19 @@ Return a named tuple of the solvers current convergence metrics
 """
 function solverconvergence end
 
+"""
+    solverstate(solver::AbstractLinearSolver)
+
+Return the current state of the solver
+"""
 solverstate(solver::AbstractLinearSolver) = solver.state
 solverconvergence(solver::AbstractLinearSolver) = solverconvergence(solverstate(solver))
 
+"""
+    init!(solver::AbstractLinearSolver, b; kwargs...)
+  
+Prepare the solver for iteration based on the given data vector `b` and `kwargs`.
+"""
 init!(solver::AbstractLinearSolver, b; kwargs...) = init!(solver, solverstate(solver), b; kwargs...)
 iterate(solver::AbstractLinearSolver) = iterate(solver, solverstate(solver))