From ce7413e2ef0ca2a914c712f2f496ba732b7564a0 Mon Sep 17 00:00:00 2001 From: harshaneo17 Date: Mon, 11 Mar 2024 22:18:36 +0000 Subject: [PATCH 1/2] changes to file structure --- .vscode/settings.json | 3 ++- CMakeLists.txt | 2 +- include/data.hpp | 36 +++++++------------------------- include/layers.hpp | 42 +++---------------------------------- include/neuralnetwork.hpp | 27 +++++------------------- include/train.hpp | 44 ++------------------------------------- src/data.cpp | 19 +++++++++++++++++ src/layers.cpp | 43 ++++++++++++++++++++++++++++++++++++++ src/neuralnetwork.cpp | 24 +++++++++++++++++++++ src/train.cpp | 43 ++++++++++++++++++++++++++++++++++++++ 10 files changed, 149 insertions(+), 134 deletions(-) create mode 100644 src/data.cpp create mode 100644 src/layers.cpp create mode 100644 src/neuralnetwork.cpp create mode 100644 src/train.cpp diff --git a/.vscode/settings.json b/.vscode/settings.json index a0e5d0e..36c8c16 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -54,6 +54,7 @@ "typeinfo": "cpp", "unordered_map": "cpp", "variant": "cpp", - "algorithm": "cpp" + "algorithm": "cpp", + "thread": "cpp" } } \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 4d4793e..89c6cda 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ project("gekko_ml") set(CMAKE_CXX_STANDARD 17) find_package(xtl REQUIRED PATHS /Users/harshaarya17/xtl) find_package(xtensor REQUIRED PATHS /Users/harshaarya17/dependencies/xtensor) -add_executable(main "src/main.cpp") +add_executable(main "src/main.cpp" "src/data.cpp" "src/train.cpp" "src/neuralnetwork.cpp" "src/layers.cpp") target_link_libraries(main xtensor xtensor::optimize) target_include_directories(main PUBLIC "include/") diff --git a/include/data.hpp b/include/data.hpp index e84a795..60c55c1 100644 --- a/include/data.hpp +++ b/include/data.hpp @@ -9,38 +9,16 @@ struct Batch { Tensor targets; }; -class DataIterator { -public: - virtual std::vector initialize(Tensor inputs, Tensor targets) = 0; -}; - -class BatchIterator : public DataIterator { - -public: - - BatchIterator(int batch_size = 32, bool shuffle = true) : batch_size(batch_size), shuffle(shuffle) {} - - std::vector initialize(Tensor inputs, Tensor targets) override { - std::vector batches; - Tensor starts = xt::arange(0,static_cast(inputs.size()),batch_size); +class BatchIterator { - if (shuffle) { - xt::random::shuffle(starts); - } + public: - Batch batch; - for (auto start : starts) { - batch.inputs = inputs; - batch.targets = targets; - batches.push_back(batch); - } - return batches; - } - -private: - int batch_size; - bool shuffle; + BatchIterator(int batch_size = 32, bool shuffle = true) : batch_size(batch_size), shuffle(shuffle) {} + std::vector initialize_batch(Tensor inputs, Tensor targets) {}; + private: + int batch_size; + bool shuffle; }; diff --git a/include/layers.hpp b/include/layers.hpp index ddc46c5..5e5298b 100644 --- a/include/layers.hpp +++ b/include/layers.hpp @@ -23,47 +23,11 @@ class Linear : public Layer { Tensor weights,bias,grad_weights,grad_bias; Params params; - void initialize(){ - params.weights = xt::random::randn({input_class_size,output_class_size}); - params.bias = xt::random::randn({output_class_size}); - } + void initialize(){}; - Tensor forward(Tensor inputs) override { - /*outputs = inputs @ w + b*/ - /*Mathematically, a linear layer can be represented as: - Y = XW + b - where: - X is the input vector of size n x m, where n is the batch size and m is the number of input features. - W is the weight matrix of size m x p, where p is the number of output features. - b is the bias vector of size p. - Y is the output vector of size n x p*/ - initialize(); - Tensor prod = inputs * params.weights; - Tensor outputs = prod + params.bias; - // std::cout << "These are outputs from forward" << outputs << std::endl; - return outputs; - } + Tensor forward(Tensor inputs) override {}; - Tensor backward(Tensor grad, Tensor inputs) override { - /* - if y = f(x) and x = a * b + c - then dy/da = f'(x) * b - and dy/db = f'(x) * a - and dy/dc = f'(x) - - if y = f(x) and x = a @ b + c - then dy/da = f'(x) @ b.T - and dy/db = a.T @ f'(x) - and dy/dc = f'(x)*/ - Tensor copy_var = xt::sum(grad,1); - params.grad_biases = copy_var; - Tensor tr_inputs = xt::transpose(inputs); - params.grad_weights = tr_inputs * grad; - auto tr_grad_w = xt::transpose(params.grad_weights); - Tensor backward_outputs = grad * tr_grad_w; - // std::cout << "These are outputs from backward" << backward_outputs << std::endl; - return backward_outputs; - } + Tensor backward(Tensor grad, Tensor inputs) override {}; private: double input_class_size; double output_class_size; diff --git a/include/neuralnetwork.hpp b/include/neuralnetwork.hpp index 4474af7..db1db51 100644 --- a/include/neuralnetwork.hpp +++ b/include/neuralnetwork.hpp @@ -11,28 +11,11 @@ class NeuralNet{ std::vector layers_class; NeuralNet(std::vector& layers):layers_class(layers) {} - Tensor forward(Tensor inputs){ - for(auto layer : layers_class) - inputs = layer.forward(inputs); - return inputs; - } - - Tensor backward(Tensor grad,Tensor inputs){ - std::vector rev_layers_class = layers_class; - std::reverse(rev_layers_class.begin(),rev_layers_class.end()); - for(auto layer : rev_layers_class) - grad = layer.backward(grad,inputs); - return grad; - } - - std::vector params_and_grads() { - std::vector result; - for (auto layer : layers_class) { - result.push_back(std::make_tuple(layer.params.weights,layer.params.bias)); - result.push_back(std::make_tuple(layer.params.grad_weights,layer.params.grad_biases)); - } - return result; - } + Tensor forward(Tensor inputs){}; + + Tensor backward(Tensor grad,Tensor inputs){}; + + std::vector params_and_grads() {}; }; diff --git a/include/train.hpp b/include/train.hpp index 5d45f01..78abca6 100644 --- a/include/train.hpp +++ b/include/train.hpp @@ -13,48 +13,8 @@ class Train{ public: - void gui_train(float epoch,int num_epochs){ - /*this function uses screen width and multiple for loops to write screen*/ - while (epoch < num_epochs) { - int barWidth = 70; - - std::cout << "["; - int pos = barWidth * epoch; - for (int i = 0; i < barWidth; ++i) { - if (i < pos) std::cout << "="; - else if (i == pos) std::cout << ">"; - else std::cout << " "; - } - std::cout << "] " << int((epoch + 0.1) * 10.0) << " %\r"; - std::cout.flush(); - - epoch += 0.0001; - } - std::cout << std::endl; - } - - - void train(NeuralNet net,Tensor inputs,Tensor targets,int num_epochs,BatchIterator batchit,MSE mse,Optimizer optimizer){ - std::cout << "Training Job started" << std::endl; - for (size_t epoch = 0; epoch < num_epochs; epoch++){ - double epoch_loss = 0.0; - std::vector batches = batchit.initialize(inputs, targets); - for (size_t i = 0; i < batches.size(); i++) { - Tensor predicted = net.forward(batches[i].inputs); - epoch_loss += mse.loss(predicted, batches[i].targets); - Tensor grad = mse.grad(predicted, batches[i].targets); - net.backward(grad,batches[i].inputs); - optimizer.step(net); - - } - gui_train(epoch,num_epochs); - std::cout << "Epoch: " << epoch + 1 << ", Loss: " << epoch_loss << std::endl; - if(epoch+1 == num_epochs){ - std::cout << "Training Job complete" << std::endl; - } - } - - } + void gui_train(float epoch,int num_epochs){}; + void train(NeuralNet net,Tensor inputs,Tensor targets,int num_epochs,BatchIterator batchit,MSE mse,Optimizer optimizer){}; }; #endif \ No newline at end of file diff --git a/src/data.cpp b/src/data.cpp new file mode 100644 index 0000000..df2eded --- /dev/null +++ b/src/data.cpp @@ -0,0 +1,19 @@ +#include "data.hpp" + +std::vector BatchIterator::initialize_batch(Tensor inputs, Tensor targets) { + std::vector batches; + + Tensor starts = xt::arange(0,static_cast(inputs.size()),batch_size); + + if (shuffle) { + xt::random::shuffle(starts); + } + + Batch batch; + for (auto start : starts) { + batch.inputs = inputs; + batch.targets = targets; + batches.push_back(batch); + } + return batches; +} \ No newline at end of file diff --git a/src/layers.cpp b/src/layers.cpp new file mode 100644 index 0000000..3614ae8 --- /dev/null +++ b/src/layers.cpp @@ -0,0 +1,43 @@ +#include "layers.hpp" + +void Linear::initialize(){ + params.weights = xt::random::randn({input_class_size,output_class_size}); + params.bias = xt::random::randn({output_class_size}); + } + +Tensor Linear::forward(Tensor inputs) override { + /*outputs = inputs @ w + b*/ + /*Mathematically, a linear layer can be represented as: + Y = XW + b + where: + X is the input vector of size n x m, where n is the batch size and m is the number of input features. + W is the weight matrix of size m x p, where p is the number of output features. + b is the bias vector of size p. + Y is the output vector of size n x p*/ + initialize(); + Tensor prod = inputs * params.weights; + Tensor outputs = prod + params.bias; + // std::cout << "These are outputs from forward" << outputs << std::endl; + return outputs; +} + +Tensor Linear::backward(Tensor grad, Tensor inputs) override { + /* + if y = f(x) and x = a * b + c + then dy/da = f'(x) * b + and dy/db = f'(x) * a + and dy/dc = f'(x) + + if y = f(x) and x = a @ b + c + then dy/da = f'(x) @ b.T + and dy/db = a.T @ f'(x) + and dy/dc = f'(x)*/ + Tensor copy_var = xt::sum(grad,1); + params.grad_biases = copy_var; + Tensor tr_inputs = xt::transpose(inputs); + params.grad_weights = tr_inputs * grad; + auto tr_grad_w = xt::transpose(params.grad_weights); + Tensor backward_outputs = grad * tr_grad_w; + // std::cout << "These are outputs from backward" << backward_outputs << std::endl; + return backward_outputs; +} \ No newline at end of file diff --git a/src/neuralnetwork.cpp b/src/neuralnetwork.cpp new file mode 100644 index 0000000..3713865 --- /dev/null +++ b/src/neuralnetwork.cpp @@ -0,0 +1,24 @@ +#include "neuralnetwork.hpp" + +Tensor NeuralNet::forward(Tensor inputs){ + for(auto layer : layers_class) + inputs = layer.forward(inputs); + return inputs; +} + +Tensor NeuralNet::backward(Tensor grad,Tensor inputs){ + std::vector rev_layers_class = layers_class; + std::reverse(rev_layers_class.begin(),rev_layers_class.end()); + for(auto layer : rev_layers_class) + grad = layer.backward(grad,inputs); + return grad; +} + +std::vector NeuralNet::params_and_grads() { + std::vector result; + for (auto layer : layers_class) { + result.push_back(std::make_tuple(layer.params.weights,layer.params.bias)); + result.push_back(std::make_tuple(layer.params.grad_weights,layer.params.grad_biases)); + } + return result; +} \ No newline at end of file diff --git a/src/train.cpp b/src/train.cpp new file mode 100644 index 0000000..71ac7e6 --- /dev/null +++ b/src/train.cpp @@ -0,0 +1,43 @@ +#include "train.hpp" + +void Train::gui_train(float epoch,int num_epochs){ + /*this function uses screen width and multiple for loops to write screen*/ + while (epoch < num_epochs) { + int barWidth = 70; + + std::cout << "["; + int pos = barWidth * epoch; + for (int i = 0; i < barWidth; ++i) { + if (i < pos) std::cout << "="; + else if (i == pos) std::cout << ">"; + else std::cout << " "; + } + std::cout << "] " << int((epoch + 0.1) * 10.0) << " %\r"; + std::cout.flush(); + + epoch += 0.0001; + } + std::cout << std::endl; +} + +void Train::train(NeuralNet net,Tensor inputs,Tensor targets,int num_epochs,BatchIterator batchit,MSE mse,Optimizer optimizer){ + std::cout << "Training Job started" << std::endl; + for (size_t epoch = 0; epoch < num_epochs; epoch++){ + double epoch_loss = 0.0; + std::vector batches = batchit.initialize(inputs, targets); + for (size_t i = 0; i < batches.size(); i++) { + Tensor predicted = net.forward(batches[i].inputs); + epoch_loss += mse.loss(predicted, batches[i].targets); + Tensor grad = mse.grad(predicted, batches[i].targets); + net.backward(grad,batches[i].inputs); + optimizer.step(net); + + } + gui_train(epoch,num_epochs); + std::cout << "Epoch: " << epoch + 1 << ", Loss: " << epoch_loss << std::endl; + if(epoch+1 == num_epochs){ + std::cout << "Training Job complete" << std::endl; + } + } + +} From 4daa7f763e63fe4d277a3a89bebb0e0a9d83c41c Mon Sep 17 00:00:00 2001 From: harshaneo17 Date: Sun, 17 Mar 2024 12:13:27 +0000 Subject: [PATCH 2/2] latest commits --- CMakeLists.txt | 2 +- src/train.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 89c6cda..4d4793e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ project("gekko_ml") set(CMAKE_CXX_STANDARD 17) find_package(xtl REQUIRED PATHS /Users/harshaarya17/xtl) find_package(xtensor REQUIRED PATHS /Users/harshaarya17/dependencies/xtensor) -add_executable(main "src/main.cpp" "src/data.cpp" "src/train.cpp" "src/neuralnetwork.cpp" "src/layers.cpp") +add_executable(main "src/main.cpp") target_link_libraries(main xtensor xtensor::optimize) target_include_directories(main PUBLIC "include/") diff --git a/src/train.cpp b/src/train.cpp index 71ac7e6..c982ad8 100644 --- a/src/train.cpp +++ b/src/train.cpp @@ -24,7 +24,7 @@ void Train::train(NeuralNet net,Tensor inputs,Tensor targets,int num_epochs,Batc std::cout << "Training Job started" << std::endl; for (size_t epoch = 0; epoch < num_epochs; epoch++){ double epoch_loss = 0.0; - std::vector batches = batchit.initialize(inputs, targets); + std::vector batches = batchit.initialize_batch(inputs, targets); for (size_t i = 0; i < batches.size(); i++) { Tensor predicted = net.forward(batches[i].inputs); epoch_loss += mse.loss(predicted, batches[i].targets);