forked from vgvassilev/clad
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add demo for comparison with libtorch (and pytorch)
- Loading branch information
Showing
4 changed files
with
316 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
cmake_minimum_required(VERSION 3.18 FATAL_ERROR) | ||
project(torch-basic-nn) | ||
|
||
find_package(Torch REQUIRED) | ||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") | ||
|
||
add_executable(torch-basic-nn torch-basic-nn.cpp) | ||
target_link_libraries(torch-basic-nn "${TORCH_LIBRARIES}") | ||
set_property(TARGET torch-basic-nn PROPERTY CXX_STANDARD 17) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
#include <chrono> | ||
#include <iostream> | ||
#include <random> | ||
#include "clad/Differentiator/Differentiator.h" | ||
|
||
/* | ||
The code below implements a simple neural network with 3 hidden nodes and 1 | ||
output node. The neural network is trained using gradient descent to minimize | ||
the mean squared error loss. The code uses Clad to automatically generate the | ||
gradient computation code for the loss function. | ||
To compile the code, use the following command: | ||
clang++ -std=c++14 -I<path-to-clad-include-dir> -fplugin=<path-to-clad.so> | ||
basic-nn.cpp -DBENCHMARK_MATRIX_MULTIPLY | ||
*/ | ||
|
||
#define M 100 // num of data points | ||
#define N 100 // num of features | ||
#define N_ITER 5000 // num of iterations | ||
|
||
void relu(double* arr, unsigned int n) { | ||
for (int i = 0; i < n; ++i) | ||
arr[i] = (arr[i] > 0) ? arr[i] : 0; | ||
} | ||
|
||
// Create a model of a neural network with 3 hidden nodes and 1 output node. | ||
// It takes in input data x and weight vectors for each hidden node and the | ||
// output node. | ||
double model_fn(double x[N], // input data | ||
double w1[N], // weight vector for hidden node 1 | ||
double w2[N], // weight vector for hidden node 2 | ||
double w3[N], // weight vector for hidden node 3 | ||
double w4[3] // weight vector for output node | ||
) { | ||
// Layer 1 - matrix multiplication | ||
double h1 = 0, h2 = 0, h3 = 0; | ||
for (int i = 0; i < N; ++i) { | ||
h1 += x[i] * w1[i]; | ||
h2 += x[i] * w2[i]; | ||
h3 += x[i] * w3[i]; | ||
} | ||
// Computing non-linear activation function | ||
double h[3] = {h1, h2, h3}; | ||
relu(h, 3); | ||
|
||
// Layer 2 - matrix multiplication | ||
double o = 0; | ||
for (int i = 0; i < 3; ++i) | ||
o += h[i] * w4[i]; | ||
return o; | ||
} | ||
|
||
double loss_fn(double x[M * N], double y[M], double w1[N], double w2[N], | ||
double w3[N], double w4[3]) { | ||
double loss = 0; | ||
for (int i = 0; i < M; ++i) { | ||
double y_hat = model_fn(x + i * N, w1, w2, w3, w4); | ||
loss += (y_hat - y[i]) * (y_hat - y[i]); | ||
} | ||
return loss / M; | ||
} | ||
|
||
// Perform a single gradient descent step. | ||
// theta_x are the hypothesis parameters, t is the generated dataset and | ||
// clad_grad is the gradient function generated by Clad | ||
template <typename T> | ||
void performStep(double x[M * N], double y[M], double w1[N], double w2[N], | ||
double w3[N], double w4[3], T clad_grad, | ||
double learning_rate = 1e-2) { | ||
// Gradient computation | ||
double grad_w1[N], grad_w2[N], grad_w3[N], grad_w4[3]; | ||
clad_grad.execute(x, y, w1, w2, w3, w4, grad_w1, grad_w2, grad_w3, grad_w4); | ||
|
||
// Update weights for hidden nodes | ||
for (int i = 0; i < N; ++i) { | ||
w1[i] -= learning_rate * grad_w1[i]; | ||
w2[i] -= learning_rate * grad_w2[i]; | ||
w3[i] -= learning_rate * grad_w3[i]; | ||
} | ||
|
||
// Update weights for output node | ||
for (int i = 0; i < 3; ++i) | ||
w4[i] -= learning_rate * grad_w4[i]; | ||
} | ||
|
||
// Perform gradient descent to optimize the weights. | ||
void optimize(double x[M * N], double y[M], double w1[N], double w2[N], | ||
double w3[N], double w4[3], double lr, int num_iters) { | ||
auto grad = clad::gradient(loss_fn, "w1,w2,w3,w4"); | ||
for (int i = 0; i < num_iters; ++i) | ||
performStep(x, y, w1, w2, w3, w4, grad, lr); | ||
} | ||
|
||
// Benchmark the time for 100 matrix multiplications. | ||
void benchmark_matrix_multiply(double x[M + N], double y[M]) { | ||
double z[M * N]; | ||
auto start_bench = std::chrono::high_resolution_clock::now(); | ||
for (int i = 0; i < 100; ++i) { | ||
for (int j = 0; j < M; ++j) { | ||
for (int k = 0; k < N; ++k) { | ||
z[j * N + k] = 0; | ||
for (int l = 0; l < N; ++l) | ||
z[j * N + k] += x[j * N + l] * x[j * N + l]; | ||
} | ||
} | ||
} | ||
auto end_bench = std::chrono::high_resolution_clock::now(); | ||
std::chrono::duration<double> elapsed_bench = end_bench - start_bench; | ||
std::cout << "Time taken for 100 matrix multiplications: " | ||
<< elapsed_bench.count() << " seconds" << std::endl; | ||
} | ||
|
||
int main() { | ||
// Initialize random number generator. | ||
std::mt19937 gen(42); | ||
|
||
// Generate random input data and labels. | ||
double x[M * N], y[M]; | ||
for (int i = 0; i < M * N; ++i) | ||
x[i] = std::uniform_real_distribution<double>(0, 1)(gen); | ||
for (int i = 0; i < M; ++i) | ||
y[i] = std::uniform_real_distribution<double>(0, 1)(gen); | ||
|
||
#ifdef BENCHMARK_MATRIX_MULTIPLY | ||
benchmark_matrix_multiply(x, y); | ||
#endif | ||
|
||
// Initialize weights from a random distribution. | ||
double w1[N], w2[N], w3[N], w4[3]; | ||
for (int i = 0; i < N; ++i) { | ||
w1[i] = std::uniform_real_distribution<double>(0, 1)(gen); | ||
w2[i] = std::uniform_real_distribution<double>(0, 1)(gen); | ||
w3[i] = std::uniform_real_distribution<double>(0, 1)(gen); | ||
} | ||
for (int i = 0; i < 3; ++i) | ||
w4[i] = std::uniform_real_distribution<double>(0, 1)(gen); | ||
|
||
// Calculate the loss before optimization. | ||
double loss = loss_fn(x, y, w1, w2, w3, w4); | ||
std::cout << "Initial loss before optimization: " << loss << std::endl; | ||
|
||
// Optimize the weights and compute the time taken. | ||
auto start = std::chrono::high_resolution_clock::now(); | ||
optimize(x, y, w1, w2, w3, w4, 1e-2, N_ITER); | ||
auto end = std::chrono::high_resolution_clock::now(); | ||
std::chrono::duration<double> elapsed = end - start; | ||
|
||
// Calculate the loss after optimization. | ||
loss = loss_fn(x, y, w1, w2, w3, w4); | ||
std::cout << "Final loss after optimization: " << loss << std::endl; | ||
std::cout << "Time taken: " << elapsed.count() << " seconds" << std::endl; | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
// Do the same thing what's done in basic-nn.cpp, but using libtorch instead of | ||
// Clad. | ||
|
||
/* | ||
To build and run | ||
1. Create a build directory: mkdir build | ||
2. Change to the build directory: cd build | ||
3. Run cmake: cmake .. -DCMAKE_PREFIX_PATH=/path/to/libtorch | ||
4. Run make: make | ||
5. Run the executable: ./torch-basic-nn | ||
*/ | ||
|
||
#include <chrono> | ||
#include <iostream> | ||
#include <random> | ||
#include <torch/torch.h> | ||
|
||
#define M 100 // num of data points | ||
#define N 100 // num of features | ||
#define N_ITER 5000 // num of iterations | ||
|
||
// C++ neural network model defined with value semantics | ||
struct TwoLayerNet : torch::nn::Module { | ||
// constructor with submodules registered in the initializer list | ||
TwoLayerNet(int64_t D_in, int64_t D_out, int64_t H) | ||
: linear1(register_module("linear1", torch::nn::Linear(D_in, H))), | ||
linear2(register_module("linear2", torch::nn::Linear(H, D_out))) {} | ||
|
||
torch::Tensor forward(torch::Tensor x) { | ||
x = torch::relu(linear1->forward(x)); | ||
x = linear2->forward(x); | ||
return x; | ||
} | ||
torch::nn::Linear linear1; | ||
torch::nn::Linear linear2; | ||
}; | ||
|
||
int main() { | ||
// Generate random data | ||
// std::default_random_engine generator; | ||
// std::normal_distribution<double> distribution(0.0, 1.0); | ||
torch::Tensor x = torch::randn({M, N}); | ||
torch::Tensor y = torch::randn({M, 1}); | ||
|
||
// Benchmark the time for 100 matrix multiplications | ||
torch::Tensor z; | ||
auto start_bench = std::chrono::high_resolution_clock::now(); | ||
for (int i = 0; i < 100; ++i) | ||
z = torch::matmul(x, x); | ||
auto end_bench = std::chrono::high_resolution_clock::now(); | ||
std::chrono::duration<double> elapsed_bench = end_bench - start_bench; | ||
std::cout << "Time taken for 100 matrix multiplications: " | ||
<< elapsed_bench.count() << " seconds" << std::endl; | ||
|
||
// Initialize model weights | ||
TwoLayerNet model(N, 1, 3); | ||
|
||
// Create the optimizer and loss function | ||
torch::optim::SGD optimizer(model.parameters(), 0.01); | ||
torch::nn::MSELoss loss_fn; | ||
|
||
// Calculate the loss before optimization | ||
torch::Tensor output = model.forward(x); | ||
std::cout << "Initial loss before optimization: " | ||
<< loss_fn(output, y).item<double>() << std::endl; | ||
|
||
// Optimize | ||
auto start = std::chrono::high_resolution_clock::now(); | ||
for (int i = 0; i < N_ITER; ++i) { | ||
optimizer.zero_grad(); | ||
torch::Tensor output = model.forward(x); | ||
torch::Tensor loss = loss_fn(output, y); | ||
loss.backward(); | ||
optimizer.step(); | ||
} | ||
auto end = std::chrono::high_resolution_clock::now(); | ||
std::chrono::duration<double> elapsed = end - start; | ||
|
||
// Calculate the loss after optimization | ||
output = model.forward(x); | ||
std::cout << "Final loss after optimization: " | ||
<< loss_fn(output, y).item<double>() << std::endl; | ||
std::cout << "Time taken: " << elapsed.count() << " seconds" << std::endl; | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
# Do the same as torch-basic-nn.cpp but in Python using PyTorch | ||
# To run: python torch-basic-nn.py | ||
|
||
import torch | ||
import torch.nn as nn | ||
import torch.optim as optim | ||
import numpy as np | ||
|
||
M = 100 # Number of samples | ||
N = 100 # Number of features | ||
N_ITER = 5000 # Number of iterations | ||
|
||
# Create a simple neural network with 3 hidden nodes and 1 output node | ||
class Net(nn.Module): | ||
def __init__(self): | ||
super(Net, self).__init__() | ||
self.fc1 = nn.Linear(N, 3) | ||
self.fc2 = nn.Linear(3, 1) | ||
def forward(self, x): | ||
x = torch.relu(self.fc1(x)) | ||
x = self.fc2(x) | ||
return x | ||
|
||
def main(): | ||
# Create a simple dataset | ||
x = np.random.randn(M, N).astype(np.float32) | ||
y = np.random.randn(M, 1).astype(np.float32) | ||
|
||
# Convert the dataset to PyTorch tensors | ||
x = torch.from_numpy(x) | ||
y = torch.from_numpy(y) | ||
|
||
# Create the neural network | ||
net = Net() | ||
|
||
# Create the loss function and the optimizer | ||
criterion = nn.MSELoss() | ||
optimizer = optim.SGD(net.parameters(), lr=0.01) | ||
|
||
# Print the initial loss | ||
output = net(x) | ||
loss = criterion(output, y) | ||
print("Initial loss: ", loss.item()) | ||
|
||
# Calculate start time | ||
import time | ||
start = time.time() | ||
|
||
# Train the neural network | ||
for i in range(N_ITER): | ||
optimizer.zero_grad() | ||
output = net(x) | ||
loss = criterion(output, y) | ||
loss.backward() | ||
optimizer.step() | ||
|
||
# Stop the timer | ||
end = time.time() | ||
|
||
# Print the final loss | ||
output = net(x) | ||
loss = criterion(output, y) | ||
print("Final loss: ", loss.item()) | ||
print("Time: ", end - start, " seconds") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |