Merge pull request #15 from KGrewal1/v072

v072
KGrewal1 · Oct 5, 2024 · dbbcd66 · dbbcd66
2 parents f2a323b + 1748f75
commit dbbcd66
Show file tree

Hide file tree

Showing 5 changed files with 98 additions and 30 deletions.
diff --git a/.github/workflows/codecov.yml b/.github/workflows/codecov.yml
@@ -13,15 +13,17 @@ jobs:
   coverage:
     name: Code coverage
     runs-on: ubuntu-latest
-    container:
-      image: xd009642/tarpaulin
-      options: --security-opt seccomp=unconfined
     steps:
       - name: Checkout repository
         uses: actions/checkout@v3
-      - name: Generate code coverage
-        run: |
-          cargo tarpaulin --verbose --exclude-files benches/* --out xml
+      - name: Setup tools
+        uses: dtolnay/rust-toolchain@stable
+      - name: Install binstall
+        run: curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash
+      - name: Install cargo-tarpaulin
+        run: cargo clean && cargo binstall cargo-tarpaulin -y
+      - name: Run tests with coverage
+        run: cargo tarpaulin --verbose --exclude-files benches/* --out xml
       - name:                   Upload to codecov.io
         uses:                   codecov/codecov-action@v3
         with:

diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "candle-optimisers"
-version = "0.6.0"
+version = "0.7.2"
 edition = "2021"
 readme = "README.md"
 license = "MIT"
@@ -15,15 +15,15 @@ exclude = ["*.ipynb"]
 
 [dependencies]
 
-candle-core = "0.6.0"
-candle-nn = "0.6.0"
+candle-core = "0.7.2"
+candle-nn = "0.7.2"
 log = "0.4.20"
 
 
 [dev-dependencies]
 anyhow = { version = "1", features = ["backtrace"] }
 assert_approx_eq = "1.1.0"
-candle-datasets = "0.6.0"
+candle-datasets = "0.7.2"
 clap = { version = "4.4.6", features = ["derive"] }
 criterion = { version = "0.5.1", features = ["html_reports"] }
 

diff --git a/README.md b/README.md
@@ -63,20 +63,6 @@ to use the cuda backend.
 cargo add --git https://github.com/KGrewal1/optimisers.git candle-optimisers
 ```
 
-## To do
+## Documentation
 
-Currently unimplemented from pytorch:
-
-* SparseAdam (unsure how to treat sparse tensors in candle)
-
-* ASGD (no pseudocode)
-
-* Rprop (need to reformulate in terms of tensors)
-
-## Notes
-
-For development, to track state of pytorch methods, use:
-
-```python
-print(optimiser.state)
-```
+Documentation is available on the rust docs site <https://docs.rs/candle-optimisers>
diff --git a/paper/paper.md b/paper/paper.md
@@ -19,12 +19,12 @@ bibliography: paper.bib
 # Summary
 
 `candle-optimisers` is a crate for optimisers written in Rust for use with candle (@candle) a lightweight machine learning framework. The crate offers a set of
-optimisers for training neural networks. This allows network training to be done with far lower overhead than using a full python framework such as PyTorch or Tensorflow.
+optimisers for training neural networks.
 
 # Statement of need
 
 Rust provides the opportunity for the development of high performance machine learning libraries, with a leaner runtime. However, there is a lack of optimisation algorithms implemented in Rust,
-with libraries currently implementing only some combination of Adam, AdamW, SGD and RMSProp.
+with machine learning libraries currently implementing only some combination of Adam, AdamW, SGD and RMSProp.
 This crate aims to provide a set of complete set of optimisation algorithms for use with candle.
 This will allow Rust to be used for the training of models more easily.
 
@@ -55,4 +55,4 @@ This library implements the following optimisation algorithms:
 Furthermore, decoupled weight decay (@weightdecay) is implemented for all of the adaptive methods listed and SGD,
 allowing for use of the method beyond solely AdamW.
 
-# References
+# References
diff --git a/src/lib.rs b/src/lib.rs
@@ -1,6 +1,86 @@
 /*!
-Optimisers for use with the candle framework for lightweight machine learning.
+Optimisers for use with the [candle](https://github.com/huggingface/candle) framework for lightweight machine learning.
 Apart from LBFGS, these all implement the [`candle_nn::optim::Optimizer`] trait from candle-nn
+
+# Example
+
+Training an MNIST model using the Adam optimiser
+
+```
+# use candle_core::{Result, Tensor};
+# use candle_core::{DType, D};
+# use candle_nn::{loss, ops, VarBuilder, VarMap, optim::Optimizer};
+# use candle_optimisers::{
+#     adam::{Adam, ParamsAdam}
+# };
+#
+# pub trait Model: Sized {
+#     fn new(vs: VarBuilder) -> Result<Self>;
+#     fn forward(&self, xs: &Tensor) -> Result<Tensor>;
+# }
+#
+# pub fn training_loop<M: Model>(
+#     m: candle_datasets::vision::Dataset,
+#     varmap: &VarMap,
+#     model: M,
+# ) -> anyhow::Result<()> {
+#     // check to see if cuda device availabke
+#     let dev = candle_core::Device::cuda_if_available(0)?;
+#     // get the input from the dataset and put on device
+#     let train_images = m.train_images.to_device(&dev)?;
+#     // get the training labels on the device
+#     let train_labels = m.train_labels.to_dtype(DType::U32)?.to_device(&dev)?;
+#
+#
+#     // load the test images
+#     let test_images = m.test_images.to_device(&dev)?;
+#     // load the test labels
+#     let test_labels = m.test_labels.to_dtype(DType::U32)?.to_device(&dev)?;
+#
+    // create the Adam optimiser
+
+    // set the learning rate to 0.004 and use the default parameters for everything else
+    let params = ParamsAdam {
+            lr: 0.004,
+            ..Default::default()
+        };
+    // create the optimiser by passing in the variable to be optimised and the parameters
+    let mut optimiser = Adam::new(varmap.all_vars(),  params)?;
+
+    // loop for model optimisation
+    for epoch in 0..100 {
+        // run the model forwards
+        // get log probabilities of results
+        let logits = model.forward(&train_images)?;
+        // softmax the log probabilities
+        let log_sm = ops::log_softmax(&logits, D::Minus1)?;
+        // get the loss
+        let loss = loss::nll(&log_sm, &train_labels)?;
+        // step the tensors by backpropagating the loss
+        optimiser.backward_step(&loss)?;
+
+        # // get the log probabilities of the test images
+        # let test_logits = model.forward(&test_images)?;
+        # // get the sum of the correct predictions
+        # let sum_ok = test_logits
+        #     .argmax(D::Minus1)?
+        #     .eq(&test_labels)?
+        #     .to_dtype(DType::F32)?
+        #     .sum_all()?
+        #     .to_scalar::<f32>()?;
+        # // get the accuracy on the test set
+        # #[allow(clippy::cast_precision_loss)]
+        # let test_accuracy = sum_ok / test_labels.dims1()? as f32;
+        # println!(
+        #     "{:4} train loss: {:8.5} test acc: {:5.2}%",
+        #     epoch + 1,
+        #     loss.to_scalar::<f32>()?,
+        #     100. * test_accuracy
+        # );
+    }
+    Ok(())
+# }
+```
 */
 
 use std::fmt::Debug;