diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index ad0ca1c..7a74116 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -1,3 +1,4 @@
+add_subdirectory(mlp-cartpole)
 add_subdirectory(fast-depth)
 add_subdirectory(llama2)
 add_subdirectory(stereo-block-matching)
diff --git a/examples/mlp-cartpole/CMakeLists.txt b/examples/mlp-cartpole/CMakeLists.txt
new file mode 100644
index 0000000..762d376
--- /dev/null
+++ b/examples/mlp-cartpole/CMakeLists.txt
@@ -0,0 +1,23 @@
+cmake_minimum_required(VERSION 3.10)
+
+project(cartpole LANGUAGES C)
+
+add_executable(cartpole main.c)
+
+target_include_directories(cartpole PUBLIC ./)
+
+
+target_compile_features(cartpole INTERFACE c_std_11)
+
+if (X86)
+    message("cartpole: building for x86")
+    target_link_libraries(cartpole PUBLIC target-x86)
+elseif (RISCV)
+    message("cartpole: building for RISC-V")
+    target_link_libraries(cartpole PUBLIC target-riscv)
+endif ()
+
+target_compile_options(cartpole PRIVATE -O2 -Wall -Wextra)
+
+target_link_libraries(cartpole PUBLIC nn)
+target_link_libraries(cartpole PUBLIC m)
diff --git a/examples/mlp-cartpole/README.md b/examples/mlp-cartpole/README.md
new file mode 100644
index 0000000..3121210
--- /dev/null
+++ b/examples/mlp-cartpole/README.md
@@ -0,0 +1,29 @@
+# MLP Example
+
+An example MLP network with three fully-connected layers.
+
+## Initial setup
+
+```bash
+mkdir ./example/mlp/build/
+cd ./example/mlp/build/
+cmake ..
+```
+
+## Generating model weights
+
+```bash
+cd ./example/mlp/
+python ./scripts/run.py
+```
+
+The script will generate a `model.pth` file and a `model.bin` file.
+
+## Compiling and running the program
+
+```bash
+cd ./example/mlp/build/
+cmake --build . && ./mlp
+```
+
+
diff --git a/examples/mlp-cartpole/main.c b/examples/mlp-cartpole/main.c
new file mode 100644
index 0000000..33b9e1b
--- /dev/null
+++ b/examples/mlp-cartpole/main.c
@@ -0,0 +1,50 @@
+/**
+ * @file main.c
+ * 
+ * A simple example demonstrating C = A * B + D
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "rv.h"
+#include "nn.h"
+#include "model.h"
+
+
+// static void enable_vector_operations() {
+//     unsigned long mstatus;
+//     asm volatile("csrr %0, mstatus" : "=r"(mstatus));
+//     mstatus |= 0x00000600 | 0x00006000 | 0x00018000;
+//     asm volatile("csrw mstatus, %0"::"r"(mstatus));
+// }
+
+int main() {
+
+  // enable_vector_operations();
+  
+  Model *model = malloc(sizeof(Model));
+
+  size_t cycles;
+  
+  printf("initalizing model...\n");
+  init(model);
+
+  printf("setting input data...\n");
+  NN_fill(&model->input_1, 1.0);
+  
+  // cycles = READ_CSR("mcycle");
+  forward(model);
+  // cycles = READ_CSR("mcycle") - cycles;
+
+  printf("cycles: %lu\n", cycles);
+
+  // output tensor([[ 0.0258, -0.0050,  0.0902, -0.0022, -0.0924, -0.0574,  0.0328,  0.0386, -0.0277,  0.0788,  0.0603, -0.0085]])
+
+  printf("output:\n");
+  NN_printf(&model->actor_6);
+  
+  return 0;
+}
diff --git a/examples/mlp-cartpole/model.bin b/examples/mlp-cartpole/model.bin
new file mode 100644
index 0000000..955f2ef
Binary files /dev/null and b/examples/mlp-cartpole/model.bin differ
diff --git a/examples/mlp-cartpole/model.h b/examples/mlp-cartpole/model.h
new file mode 100644
index 0000000..22cdf52
--- /dev/null
+++ b/examples/mlp-cartpole/model.h
@@ -0,0 +1,102 @@
+#ifndef __MODEL_H
+#define __MODEL_H
+
+#include <stdint.h>
+#include <stddef.h>
+#include "nn.h"
+
+
+// load the weight data block from the model.bin file
+INCLUDE_FILE(".rodata", "./model.bin", model_weight);
+extern uint8_t model_weight_data[];
+extern size_t model_weight_start[];
+extern size_t model_weight_end[];
+
+typedef struct {
+  Tensor input_1;
+  Tensor actor_0_weight;
+  Tensor actor_0_bias;
+  Tensor actor_0;
+  Tensor actor_1;
+  Tensor actor_2_weight;
+  Tensor actor_2_bias;
+  Tensor actor_2;
+  Tensor actor_3;
+  Tensor actor_4_weight;
+  Tensor actor_4_bias;
+  Tensor actor_4;
+  Tensor actor_5;
+  Tensor actor_6_weight;
+  Tensor actor_6_bias;
+  Tensor actor_6;
+
+} Model;
+
+
+void init(Model *model);
+
+void forward(Model *model);
+
+/**
+ * Initialize the required tensors for the model
+ */
+void init(Model *model) {
+  float *weight_ptr = (float *)model_weight_data;
+
+  NN_init_tensor(&model->input_1, 2, (size_t[]){ 1, 48 }, DTYPE_F32, NULL);
+
+  // <class 'torch.nn.modules.linear.Linear'>: actor_0
+  NN_init_tensor(&model->actor_0_weight, 2, (size_t[]){ 512, 48 }, DTYPE_F32, weight_ptr);
+  weight_ptr += 24576;
+  NN_init_tensor(&model->actor_0_bias, 1, (size_t[]){ 512 }, DTYPE_F32, weight_ptr);
+  weight_ptr += 512;
+  NN_init_tensor(&model->actor_0, 2, (size_t[]){ 1, 512 }, DTYPE_F32, NULL);
+
+  // <class 'torch.nn.modules.activation.ELU'>: actor_1
+  NN_init_tensor(&model->actor_1, 2, (size_t[]){ 1, 512 }, DTYPE_F32, NULL);
+
+  // <class 'torch.nn.modules.linear.Linear'>: actor_2
+  NN_init_tensor(&model->actor_2_weight, 2, (size_t[]){ 256, 512 }, DTYPE_F32, weight_ptr);
+  weight_ptr += 131072;
+  NN_init_tensor(&model->actor_2_bias, 1, (size_t[]){ 256 }, DTYPE_F32, weight_ptr);
+  weight_ptr += 256;
+  NN_init_tensor(&model->actor_2, 2, (size_t[]){ 1, 256 }, DTYPE_F32, NULL);
+
+  // <class 'torch.nn.modules.activation.ELU'>: actor_3
+  NN_init_tensor(&model->actor_3, 2, (size_t[]){ 1, 256 }, DTYPE_F32, NULL);
+
+  // <class 'torch.nn.modules.linear.Linear'>: actor_4
+  NN_init_tensor(&model->actor_4_weight, 2, (size_t[]){ 128, 256 }, DTYPE_F32, weight_ptr);
+  weight_ptr += 32768;
+  NN_init_tensor(&model->actor_4_bias, 1, (size_t[]){ 128 }, DTYPE_F32, weight_ptr);
+  weight_ptr += 128;
+  NN_init_tensor(&model->actor_4, 2, (size_t[]){ 1, 128 }, DTYPE_F32, NULL);
+
+  // <class 'torch.nn.modules.activation.ELU'>: actor_5
+  NN_init_tensor(&model->actor_5, 2, (size_t[]){ 1, 128 }, DTYPE_F32, NULL);
+
+  // <class 'torch.nn.modules.linear.Linear'>: actor_6
+  NN_init_tensor(&model->actor_6_weight, 2, (size_t[]){ 12, 128 }, DTYPE_F32, weight_ptr);
+  weight_ptr += 1536;
+  NN_init_tensor(&model->actor_6_bias, 1, (size_t[]){ 12 }, DTYPE_F32, weight_ptr);
+  weight_ptr += 12;
+  NN_init_tensor(&model->actor_6, 2, (size_t[]){ 1, 12 }, DTYPE_F32, NULL);
+
+}
+
+
+/**
+ * Forward pass of the model
+ */
+void forward(Model *model) {
+  NN_linear(&model->actor_0, &model->input_1, &model->actor_0_weight, &model->actor_0_bias);
+  NN_elu(&model->actor_1, &model->actor_0, 1.0);
+  NN_linear(&model->actor_2, &model->actor_1, &model->actor_2_weight, &model->actor_2_bias);
+  NN_elu(&model->actor_3, &model->actor_2, 1.0);
+  NN_linear(&model->actor_4, &model->actor_3, &model->actor_4_weight, &model->actor_4_bias);
+  NN_elu(&model->actor_5, &model->actor_4, 1.0);
+  NN_linear(&model->actor_6, &model->actor_5, &model->actor_6_weight, &model->actor_6_bias);
+
+}
+
+#endif
\ No newline at end of file
diff --git a/examples/mlp-cartpole/scripts/run.py b/examples/mlp-cartpole/scripts/run.py
new file mode 100644
index 0000000..2e1f34a
--- /dev/null
+++ b/examples/mlp-cartpole/scripts/run.py
@@ -0,0 +1,42 @@
+import numpy as np
+import torch
+import torch.nn as nn
+
+import barstools
+
+
+torch.manual_seed(0)
+
+class Net(nn.Module):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.actor = nn.Sequential(
+            nn.Linear(48, 512, bias=True),
+            nn.ELU(alpha=1.0),
+            nn.Linear(512, 256, bias=True),
+            nn.ELU(alpha=1.0),
+            nn.Linear(256, 128, bias=True),
+            nn.ELU(alpha=1.0),
+            nn.Linear(128, 12, bias=True),
+        )
+
+    def forward(self, input):
+        output = self.actor.forward(input)
+        return output
+
+# Tracing the module
+m = Net()
+
+# m.load_state_dict(torch.load("model.pth", map_location=torch.device("cpu")))
+m.eval()
+
+test_input = torch.ones((48, )).unsqueeze(0)
+
+print(test_input)
+
+with torch.no_grad():
+    output = m.forward(test_input)
+    print("output", output)
+
+output = barstools.TorchConverter(m).convert(test_input, output_dir=".")
+
diff --git a/examples/ppo/CMakeLists.txt b/examples/mlp-ppo/CMakeLists.txt
similarity index 100%
rename from examples/ppo/CMakeLists.txt
rename to examples/mlp-ppo/CMakeLists.txt
diff --git a/examples/ppo/README.md b/examples/mlp-ppo/README.md
similarity index 100%
rename from examples/ppo/README.md
rename to examples/mlp-ppo/README.md
diff --git a/examples/ppo/hack_policy.bin b/examples/mlp-ppo/hack_policy.bin
similarity index 100%
rename from examples/ppo/hack_policy.bin
rename to examples/mlp-ppo/hack_policy.bin
diff --git a/examples/ppo/main.c b/examples/mlp-ppo/main.c
similarity index 100%
rename from examples/ppo/main.c
rename to examples/mlp-ppo/main.c
diff --git a/examples/ppo/model.pth b/examples/mlp-ppo/model.pth
similarity index 100%
rename from examples/ppo/model.pth
rename to examples/mlp-ppo/model.pth
diff --git a/examples/ppo/scripts/convert.py b/examples/mlp-ppo/scripts/convert.py
similarity index 100%
rename from examples/ppo/scripts/convert.py
rename to examples/mlp-ppo/scripts/convert.py
diff --git a/examples/ppo/scripts/model.py b/examples/mlp-ppo/scripts/model.py
similarity index 100%
rename from examples/ppo/scripts/model.py
rename to examples/mlp-ppo/scripts/model.py
diff --git a/examples/ppo/scripts/train.py b/examples/mlp-ppo/scripts/train.py
similarity index 100%
rename from examples/ppo/scripts/train.py
rename to examples/mlp-ppo/scripts/train.py
diff --git a/examples/ppo/simple_converter.py b/examples/mlp-ppo/simple_converter.py
similarity index 100%
rename from examples/ppo/simple_converter.py
rename to examples/mlp-ppo/simple_converter.py