-
Notifications
You must be signed in to change notification settings - Fork 122
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add demo for comparison with libtorch (and pytorch) #1002
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
cmake_minimum_required(VERSION 3.18 FATAL_ERROR) | ||
project(torch-basic-nn) | ||
|
||
find_package(Torch REQUIRED) | ||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") | ||
|
||
add_executable(torch-basic-nn torch-basic-nn.cpp) | ||
target_link_libraries(torch-basic-nn "${TORCH_LIBRARIES}") | ||
set_property(TARGET torch-basic-nn PROPERTY CXX_STANDARD 17) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
#include <chrono> | ||
#include <iostream> | ||
#include <random> | ||
#include "clad/Differentiator/Differentiator.h" | ||
|
||
/* | ||
The code below implements a simple neural network with 3 hidden nodes and 1 | ||
output node. The neural network is trained using gradient descent to minimize | ||
the mean squared error loss. The code uses Clad to automatically generate the | ||
gradient computation code for the loss function. | ||
|
||
To compile the code, use the following command: | ||
clang++ -std=c++14 -I<path-to-clad-include-dir> -fplugin=<path-to-clad.so> | ||
basic-nn.cpp -DBENCHMARK_MATRIX_MULTIPLY | ||
*/ | ||
|
||
#define M 100 // num of data points | ||
#define N 100 // num of features | ||
#define N_ITER 5000 // num of iterations | ||
|
||
void relu(double* arr, unsigned int n) { | ||
for (int i = 0; i < n; ++i) | ||
arr[i] = (arr[i] > 0) ? arr[i] : 0; | ||
} | ||
|
||
// Create a model of a neural network with 3 hidden nodes and 1 output node. | ||
// It takes in input data x and weight vectors for each hidden node and the | ||
// output node. | ||
double model_fn(double x[N], // input data | ||
double w1[N], // weight vector for hidden node 1 | ||
double w2[N], // weight vector for hidden node 2 | ||
double w3[N], // weight vector for hidden node 3 | ||
double w4[3] // weight vector for output node | ||
) { | ||
// Layer 1 - matrix multiplication | ||
double h1 = 0, h2 = 0, h3 = 0; | ||
for (int i = 0; i < N; ++i) { | ||
h1 += x[i] * w1[i]; | ||
h2 += x[i] * w2[i]; | ||
h3 += x[i] * w3[i]; | ||
} | ||
// Computing non-linear activation function | ||
double h[3] = {h1, h2, h3}; | ||
relu(h, 3); | ||
|
||
// Layer 2 - matrix multiplication | ||
double o = 0; | ||
for (int i = 0; i < 3; ++i) | ||
o += h[i] * w4[i]; | ||
return o; | ||
} | ||
|
||
double loss_fn(double x[M * N], double y[M], double w1[N], double w2[N], | ||
double w3[N], double w4[3]) { | ||
double loss = 0; | ||
for (int i = 0; i < M; ++i) { | ||
double y_hat = model_fn(x + i * N, w1, w2, w3, w4); | ||
loss += (y_hat - y[i]) * (y_hat - y[i]); | ||
} | ||
return loss / M; | ||
} | ||
|
||
// Perform a single gradient descent step. | ||
// theta_x are the hypothesis parameters, t is the generated dataset and | ||
// clad_grad is the gradient function generated by Clad | ||
template <typename T> | ||
void performStep(double x[M * N], double y[M], double w1[N], double w2[N], | ||
double w3[N], double w4[3], T clad_grad, | ||
double learning_rate = 1e-2) { | ||
// Gradient computation | ||
double grad_w1[N], grad_w2[N], grad_w3[N], grad_w4[3]; | ||
clad_grad.execute(x, y, w1, w2, w3, w4, grad_w1, grad_w2, grad_w3, grad_w4); | ||
|
||
// Update weights for hidden nodes | ||
for (int i = 0; i < N; ++i) { | ||
w1[i] -= learning_rate * grad_w1[i]; | ||
w2[i] -= learning_rate * grad_w2[i]; | ||
w3[i] -= learning_rate * grad_w3[i]; | ||
} | ||
|
||
// Update weights for output node | ||
for (int i = 0; i < 3; ++i) | ||
w4[i] -= learning_rate * grad_w4[i]; | ||
} | ||
|
||
// Perform gradient descent to optimize the weights. | ||
void optimize(double x[M * N], double y[M], double w1[N], double w2[N], | ||
double w3[N], double w4[3], double lr, int num_iters) { | ||
auto grad = clad::gradient(loss_fn, "w1,w2,w3,w4"); | ||
for (int i = 0; i < num_iters; ++i) | ||
performStep(x, y, w1, w2, w3, w4, grad, lr); | ||
} | ||
|
||
// Benchmark the time for 100 matrix multiplications. | ||
void benchmark_matrix_multiply(double x[M + N], double y[M]) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think it matters, but shouldn't There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also |
||
double z[M * N]; | ||
auto start_bench = std::chrono::high_resolution_clock::now(); | ||
for (int i = 0; i < 100; ++i) { | ||
for (int j = 0; j < M; ++j) { | ||
for (int k = 0; k < N; ++k) { | ||
z[j * N + k] = 0; | ||
for (int l = 0; l < N; ++l) | ||
z[j * N + k] += x[j * N + l] * x[j * N + l]; | ||
} | ||
} | ||
} | ||
auto end_bench = std::chrono::high_resolution_clock::now(); | ||
std::chrono::duration<double> elapsed_bench = end_bench - start_bench; | ||
std::cout << "Time taken for 100 matrix multiplications: " | ||
<< elapsed_bench.count() << " seconds" << std::endl; | ||
} | ||
|
||
int main() { | ||
// Initialize random number generator. | ||
std::mt19937 gen(42); | ||
|
||
// Generate random input data and labels. | ||
double x[M * N], y[M]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Both of the demos seem to benchmark the multiplication XX, which I believe only makes sense if M=N. Maybe transpose one of them so that it's always valid? |
||
for (int i = 0; i < M * N; ++i) | ||
x[i] = std::uniform_real_distribution<double>(0, 1)(gen); | ||
for (int i = 0; i < M; ++i) | ||
y[i] = std::uniform_real_distribution<double>(0, 1)(gen); | ||
|
||
#ifdef BENCHMARK_MATRIX_MULTIPLY | ||
benchmark_matrix_multiply(x, y); | ||
#endif | ||
|
||
// Initialize weights from a random distribution. | ||
double w1[N], w2[N], w3[N], w4[3]; | ||
for (int i = 0; i < N; ++i) { | ||
w1[i] = std::uniform_real_distribution<double>(0, 1)(gen); | ||
w2[i] = std::uniform_real_distribution<double>(0, 1)(gen); | ||
w3[i] = std::uniform_real_distribution<double>(0, 1)(gen); | ||
} | ||
for (int i = 0; i < 3; ++i) | ||
w4[i] = std::uniform_real_distribution<double>(0, 1)(gen); | ||
|
||
// Calculate the loss before optimization. | ||
double loss = loss_fn(x, y, w1, w2, w3, w4); | ||
std::cout << "Initial loss before optimization: " << loss << std::endl; | ||
|
||
// Optimize the weights and compute the time taken. | ||
auto start = std::chrono::high_resolution_clock::now(); | ||
optimize(x, y, w1, w2, w3, w4, 1e-2, N_ITER); | ||
auto end = std::chrono::high_resolution_clock::now(); | ||
std::chrono::duration<double> elapsed = end - start; | ||
|
||
// Calculate the loss after optimization. | ||
loss = loss_fn(x, y, w1, w2, w3, w4); | ||
std::cout << "Final loss after optimization: " << loss << std::endl; | ||
std::cout << "Time taken: " << elapsed.count() << " seconds" << std::endl; | ||
return 0; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
// Do the same thing what's done in basic-nn.cpp, but using libtorch instead of | ||
// Clad. | ||
|
||
/* | ||
To build and run | ||
1. Create a build directory: mkdir build | ||
2. Change to the build directory: cd build | ||
3. Run cmake: cmake .. -DCMAKE_PREFIX_PATH=/path/to/libtorch | ||
4. Run make: make | ||
5. Run the executable: ./torch-basic-nn | ||
*/ | ||
|
||
#include <chrono> | ||
#include <iostream> | ||
#include <random> | ||
#include <torch/torch.h> | ||
|
||
#define M 100 // num of data points | ||
#define N 100 // num of features | ||
#define N_ITER 5000 // num of iterations | ||
|
||
// C++ neural network model defined with value semantics | ||
struct TwoLayerNet : torch::nn::Module { | ||
// constructor with submodules registered in the initializer list | ||
TwoLayerNet(int64_t D_in, int64_t D_out, int64_t H) | ||
: linear1(register_module("linear1", torch::nn::Linear(D_in, H))), | ||
linear2(register_module("linear2", torch::nn::Linear(H, D_out))) {} | ||
|
||
torch::Tensor forward(torch::Tensor x) { | ||
x = torch::relu(linear1->forward(x)); | ||
x = linear2->forward(x); | ||
return x; | ||
} | ||
torch::nn::Linear linear1; | ||
torch::nn::Linear linear2; | ||
}; | ||
|
||
int main() { | ||
// Generate random data | ||
// std::default_random_engine generator; | ||
// std::normal_distribution<double> distribution(0.0, 1.0); | ||
torch::Tensor x = torch::randn({M, N}); | ||
torch::Tensor y = torch::randn({M, 1}); | ||
|
||
// Benchmark the time for 100 matrix multiplications | ||
torch::Tensor z; | ||
auto start_bench = std::chrono::high_resolution_clock::now(); | ||
for (int i = 0; i < 100; ++i) | ||
z = torch::matmul(x, x); | ||
auto end_bench = std::chrono::high_resolution_clock::now(); | ||
std::chrono::duration<double> elapsed_bench = end_bench - start_bench; | ||
std::cout << "Time taken for 100 matrix multiplications: " | ||
<< elapsed_bench.count() << " seconds" << std::endl; | ||
|
||
// Initialize model weights | ||
TwoLayerNet model(N, 1, 3); | ||
|
||
// Create the optimizer and loss function | ||
torch::optim::SGD optimizer(model.parameters(), 0.01); | ||
torch::nn::MSELoss loss_fn; | ||
|
||
// Calculate the loss before optimization | ||
torch::Tensor output = model.forward(x); | ||
std::cout << "Initial loss before optimization: " | ||
<< loss_fn(output, y).item<double>() << std::endl; | ||
|
||
// Optimize | ||
auto start = std::chrono::high_resolution_clock::now(); | ||
for (int i = 0; i < N_ITER; ++i) { | ||
optimizer.zero_grad(); | ||
torch::Tensor output = model.forward(x); | ||
torch::Tensor loss = loss_fn(output, y); | ||
loss.backward(); | ||
optimizer.step(); | ||
} | ||
auto end = std::chrono::high_resolution_clock::now(); | ||
std::chrono::duration<double> elapsed = end - start; | ||
|
||
// Calculate the loss after optimization | ||
output = model.forward(x); | ||
std::cout << "Final loss after optimization: " | ||
<< loss_fn(output, y).item<double>() << std::endl; | ||
std::cout << "Time taken: " << elapsed.count() << " seconds" << std::endl; | ||
return 0; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
# Do the same as torch-basic-nn.cpp but in Python using PyTorch | ||
# To run: python torch-basic-nn.py | ||
|
||
import torch | ||
import torch.nn as nn | ||
import torch.optim as optim | ||
import numpy as np | ||
|
||
M = 100 # Number of samples | ||
N = 100 # Number of features | ||
N_ITER = 5000 # Number of iterations | ||
|
||
# Create a simple neural network with 3 hidden nodes and 1 output node | ||
class Net(nn.Module): | ||
def __init__(self): | ||
super(Net, self).__init__() | ||
self.fc1 = nn.Linear(N, 3) | ||
self.fc2 = nn.Linear(3, 1) | ||
def forward(self, x): | ||
x = torch.relu(self.fc1(x)) | ||
x = self.fc2(x) | ||
return x | ||
|
||
def main(): | ||
# Create a simple dataset | ||
x = np.random.randn(M, N).astype(np.float32) | ||
y = np.random.randn(M, 1).astype(np.float32) | ||
|
||
# Convert the dataset to PyTorch tensors | ||
x = torch.from_numpy(x) | ||
y = torch.from_numpy(y) | ||
|
||
# Create the neural network | ||
net = Net() | ||
|
||
# Create the loss function and the optimizer | ||
criterion = nn.MSELoss() | ||
optimizer = optim.SGD(net.parameters(), lr=0.01) | ||
|
||
# Print the initial loss | ||
output = net(x) | ||
loss = criterion(output, y) | ||
print("Initial loss: ", loss.item()) | ||
|
||
# Calculate start time | ||
import time | ||
start = time.time() | ||
|
||
# Train the neural network | ||
for i in range(N_ITER): | ||
optimizer.zero_grad() | ||
output = net(x) | ||
loss = criterion(output, y) | ||
loss.backward() | ||
optimizer.step() | ||
|
||
# Stop the timer | ||
end = time.time() | ||
|
||
# Print the final loss | ||
output = net(x) | ||
loss = criterion(output, y) | ||
print("Final loss: ", loss.item()) | ||
print("Time: ", end - start, " seconds") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we add a bot to run this as part of the CI?