From ce7413e2ef0ca2a914c712f2f496ba732b7564a0 Mon Sep 17 00:00:00 2001
From: harshaneo17 <harsha.god86@gmail.com>
Date: Mon, 11 Mar 2024 22:18:36 +0000
Subject: [PATCH 1/2] changes to file structure

---
 .vscode/settings.json     |  3 ++-
 CMakeLists.txt            |  2 +-
 include/data.hpp          | 36 +++++++-------------------------
 include/layers.hpp        | 42 +++----------------------------------
 include/neuralnetwork.hpp | 27 +++++-------------------
 include/train.hpp         | 44 ++-------------------------------------
 src/data.cpp              | 19 +++++++++++++++++
 src/layers.cpp            | 43 ++++++++++++++++++++++++++++++++++++++
 src/neuralnetwork.cpp     | 24 +++++++++++++++++++++
 src/train.cpp             | 43 ++++++++++++++++++++++++++++++++++++++
 10 files changed, 149 insertions(+), 134 deletions(-)
 create mode 100644 src/data.cpp
 create mode 100644 src/layers.cpp
 create mode 100644 src/neuralnetwork.cpp
 create mode 100644 src/train.cpp
diff --git a/.vscode/settings.json b/.vscode/settings.json
index a0e5d0e..36c8c16 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -54,6 +54,7 @@
         "typeinfo": "cpp",
         "unordered_map": "cpp",
         "variant": "cpp",
-        "algorithm": "cpp"
+        "algorithm": "cpp",
+        "thread": "cpp"
     }
 }
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4d4793e..89c6cda 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,7 +5,7 @@ project("gekko_ml")
 set(CMAKE_CXX_STANDARD 17)
 find_package(xtl REQUIRED PATHS /Users/harshaarya17/xtl)
 find_package(xtensor REQUIRED PATHS /Users/harshaarya17/dependencies/xtensor)
-add_executable(main "src/main.cpp")
+add_executable(main "src/main.cpp" "src/data.cpp" "src/train.cpp" "src/neuralnetwork.cpp" "src/layers.cpp")
 target_link_libraries(main xtensor xtensor::optimize)
 target_include_directories(main PUBLIC "include/")
 
diff --git a/include/data.hpp b/include/data.hpp
index e84a795..60c55c1 100644
--- a/include/data.hpp
+++ b/include/data.hpp
@@ -9,38 +9,16 @@ struct Batch {
     Tensor targets;
 };
 
-class DataIterator {
-public:
-    virtual std::vector<Batch> initialize(Tensor inputs, Tensor targets) = 0;
-};
-
-class BatchIterator : public DataIterator {
-
-public:
-    
-    BatchIterator(int batch_size = 32, bool shuffle = true) : batch_size(batch_size), shuffle(shuffle) {}
-    
-    std::vector<Batch> initialize(Tensor inputs, Tensor targets) override {
-        std::vector<Batch> batches;
 
-        Tensor starts = xt::arange(0,static_cast<int>(inputs.size()),batch_size);
+class BatchIterator {
 
-        if (shuffle) {
-            xt::random::shuffle(starts);
-        }
+    public:
         
-        Batch batch;
-        for (auto start : starts) {
-            batch.inputs = inputs;
-            batch.targets = targets;
-            batches.push_back(batch);
-        }
-        return batches;
-    }
-
-private:
-    int batch_size;
-    bool shuffle;
+        BatchIterator(int batch_size = 32, bool shuffle = true) : batch_size(batch_size), shuffle(shuffle) {}
+        std::vector<Batch> initialize_batch(Tensor inputs, Tensor targets) {};
+    private:
+        int batch_size;
+        bool shuffle;
     
 };
 
diff --git a/include/layers.hpp b/include/layers.hpp
index ddc46c5..5e5298b 100644
--- a/include/layers.hpp
+++ b/include/layers.hpp
@@ -23,47 +23,11 @@ class Linear : public Layer {
         Tensor weights,bias,grad_weights,grad_bias;
         Params params;
 
-        void initialize(){
-            params.weights = xt::random::randn<double>({input_class_size,output_class_size});
-            params.bias = xt::random::randn<double>({output_class_size});
-        }
+        void initialize(){};
         
-        Tensor forward(Tensor inputs) override {
-            /*outputs = inputs @ w + b*/
-            /*Mathematically, a linear layer can be represented as:
-                Y = XW + b
-                where:
-                X is the input vector of size n x m, where n is the batch size and m is the number of input features.
-                W is the weight matrix of size m x p, where p is the number of output features.
-                b is the bias vector of size p.
-                Y is the output vector of size n x p*/
-            initialize();
-            Tensor prod = inputs * params.weights;
-            Tensor outputs = prod + params.bias;
-            // std::cout << "These are outputs from forward" << outputs << std::endl;
-            return outputs;
-        }
+        Tensor forward(Tensor inputs) override {};
 
-        Tensor backward(Tensor grad, Tensor inputs) override {
-            /*
-            if y = f(x) and x = a * b + c
-            then dy/da = f'(x) * b
-            and dy/db = f'(x) * a
-            and dy/dc = f'(x)
-
-            if y = f(x) and x = a @ b + c
-            then dy/da = f'(x) @ b.T
-            and dy/db = a.T @ f'(x)
-            and dy/dc = f'(x)*/
-            Tensor copy_var = xt::sum(grad,1);
-            params.grad_biases = copy_var;
-            Tensor tr_inputs = xt::transpose(inputs);
-            params.grad_weights = tr_inputs * grad;
-            auto tr_grad_w = xt::transpose(params.grad_weights);
-            Tensor backward_outputs = grad * tr_grad_w;
-            // std::cout << "These are outputs from backward" << backward_outputs << std::endl;
-            return backward_outputs;
-        }
+        Tensor backward(Tensor grad, Tensor inputs) override {};
         private:
             double input_class_size;
             double output_class_size;
diff --git a/include/neuralnetwork.hpp b/include/neuralnetwork.hpp
index 4474af7..db1db51 100644
--- a/include/neuralnetwork.hpp
+++ b/include/neuralnetwork.hpp
@@ -11,28 +11,11 @@ class NeuralNet{
         std::vector<Linear> layers_class;
         NeuralNet(std::vector<Linear>& layers):layers_class(layers) {}
 
-        Tensor forward(Tensor inputs){
-            for(auto layer : layers_class)
-                inputs = layer.forward(inputs);
-            return inputs;
-        }
-
-        Tensor backward(Tensor grad,Tensor inputs){
-            std::vector<Linear> rev_layers_class = layers_class; 
-            std::reverse(rev_layers_class.begin(),rev_layers_class.end());
-            for(auto layer : rev_layers_class)
-                grad = layer.backward(grad,inputs);
-            return grad;
-        }
-
-        std::vector<TensorTuple> params_and_grads() {
-            std::vector<TensorTuple> result;
-            for (auto layer : layers_class) {
-                result.push_back(std::make_tuple(layer.params.weights,layer.params.bias));
-                result.push_back(std::make_tuple(layer.params.grad_weights,layer.params.grad_biases));
-            }
-            return result;
-        }
+        Tensor forward(Tensor inputs){};
+
+        Tensor backward(Tensor grad,Tensor inputs){};
+
+        std::vector<TensorTuple> params_and_grads() {};
 };
 
 
diff --git a/include/train.hpp b/include/train.hpp
index 5d45f01..78abca6 100644
--- a/include/train.hpp
+++ b/include/train.hpp
@@ -13,48 +13,8 @@
 class Train{
 
     public:
-        void gui_train(float epoch,int num_epochs){
-            /*this function uses screen width and multiple for loops to write screen*/
-                while (epoch < num_epochs) {
-                    int barWidth = 70;
-            
-                    std::cout << "[";
-                    int pos = barWidth * epoch;
-                    for (int i = 0; i < barWidth; ++i) {
-                        if (i < pos) std::cout << "=";
-                        else if (i == pos) std::cout << ">";
-                        else std::cout << " ";
-                    }
-                    std::cout << "] " << int((epoch + 0.1) * 10.0) << " %\r";
-                    std::cout.flush();
-            
-                    epoch += 0.0001;
-                }
-                std::cout << std::endl;
-        }
-
-        
-        void train(NeuralNet net,Tensor inputs,Tensor targets,int num_epochs,BatchIterator batchit,MSE mse,Optimizer optimizer){
-            std::cout << "Training Job started" << std::endl;
-            for (size_t epoch = 0; epoch < num_epochs; epoch++){
-                double epoch_loss = 0.0;
-                std::vector<Batch> batches = batchit.initialize(inputs, targets);
-                    for (size_t i = 0; i < batches.size(); i++) {
-                        Tensor predicted = net.forward(batches[i].inputs);
-                        epoch_loss  += mse.loss(predicted, batches[i].targets); 
-                        Tensor grad = mse.grad(predicted, batches[i].targets);
-                        net.backward(grad,batches[i].inputs);
-                        optimizer.step(net);
-                        
-                    }
-                gui_train(epoch,num_epochs);
-                std::cout << "Epoch: " << epoch + 1 << ", Loss: " << epoch_loss << std::endl;
-                if(epoch+1 == num_epochs){
-                    std::cout << "Training Job complete" << std::endl;
-                }
-            }
-
-        }
+        void gui_train(float epoch,int num_epochs){};
+        void train(NeuralNet net,Tensor inputs,Tensor targets,int num_epochs,BatchIterator batchit,MSE mse,Optimizer optimizer){};
 };
 
 #endif
\ No newline at end of file
diff --git a/src/data.cpp b/src/data.cpp
new file mode 100644
index 0000000..df2eded
--- /dev/null
+++ b/src/data.cpp
@@ -0,0 +1,19 @@
+#include "data.hpp"
+
+std::vector<Batch> BatchIterator::initialize_batch(Tensor inputs, Tensor targets) {
+    std::vector<Batch> batches;
+
+    Tensor starts = xt::arange(0,static_cast<int>(inputs.size()),batch_size);
+
+    if (shuffle) {
+        xt::random::shuffle(starts);
+    }
+    
+    Batch batch;
+    for (auto start : starts) {
+        batch.inputs = inputs;
+        batch.targets = targets;
+        batches.push_back(batch);
+    }
+    return batches;
+}
\ No newline at end of file
diff --git a/src/layers.cpp b/src/layers.cpp
new file mode 100644
index 0000000..3614ae8
--- /dev/null
+++ b/src/layers.cpp
@@ -0,0 +1,43 @@
+#include "layers.hpp"
+
+void Linear::initialize(){
+            params.weights = xt::random::randn<double>({input_class_size,output_class_size});
+            params.bias = xt::random::randn<double>({output_class_size});
+        }
+
+Tensor Linear::forward(Tensor inputs) override {
+    /*outputs = inputs @ w + b*/
+    /*Mathematically, a linear layer can be represented as:
+        Y = XW + b
+        where:
+        X is the input vector of size n x m, where n is the batch size and m is the number of input features.
+        W is the weight matrix of size m x p, where p is the number of output features.
+        b is the bias vector of size p.
+        Y is the output vector of size n x p*/
+    initialize();
+    Tensor prod = inputs * params.weights;
+    Tensor outputs = prod + params.bias;
+    // std::cout << "These are outputs from forward" << outputs << std::endl;
+    return outputs;
+}
+
+Tensor Linear::backward(Tensor grad, Tensor inputs) override {
+    /*
+    if y = f(x) and x = a * b + c
+    then dy/da = f'(x) * b
+    and dy/db = f'(x) * a
+    and dy/dc = f'(x)
+
+    if y = f(x) and x = a @ b + c
+    then dy/da = f'(x) @ b.T
+    and dy/db = a.T @ f'(x)
+    and dy/dc = f'(x)*/
+    Tensor copy_var = xt::sum(grad,1);
+    params.grad_biases = copy_var;
+    Tensor tr_inputs = xt::transpose(inputs);
+    params.grad_weights = tr_inputs * grad;
+    auto tr_grad_w = xt::transpose(params.grad_weights);
+    Tensor backward_outputs = grad * tr_grad_w;
+    // std::cout << "These are outputs from backward" << backward_outputs << std::endl;
+    return backward_outputs;
+}
\ No newline at end of file
diff --git a/src/neuralnetwork.cpp b/src/neuralnetwork.cpp
new file mode 100644
index 0000000..3713865
--- /dev/null
+++ b/src/neuralnetwork.cpp
@@ -0,0 +1,24 @@
+#include "neuralnetwork.hpp"
+
+Tensor NeuralNet::forward(Tensor inputs){
+    for(auto layer : layers_class)
+        inputs = layer.forward(inputs);
+    return inputs;
+}
+
+Tensor NeuralNet::backward(Tensor grad,Tensor inputs){
+    std::vector<Linear> rev_layers_class = layers_class; 
+    std::reverse(rev_layers_class.begin(),rev_layers_class.end());
+    for(auto layer : rev_layers_class)
+        grad = layer.backward(grad,inputs);
+    return grad;
+}
+
+std::vector<TensorTuple> NeuralNet::params_and_grads() {
+    std::vector<TensorTuple> result;
+    for (auto layer : layers_class) {
+        result.push_back(std::make_tuple(layer.params.weights,layer.params.bias));
+        result.push_back(std::make_tuple(layer.params.grad_weights,layer.params.grad_biases));
+    }
+    return result;
+}
\ No newline at end of file
diff --git a/src/train.cpp b/src/train.cpp
new file mode 100644
index 0000000..71ac7e6
--- /dev/null
+++ b/src/train.cpp
@@ -0,0 +1,43 @@
+#include "train.hpp"
+
+void Train::gui_train(float epoch,int num_epochs){
+    /*this function uses screen width and multiple for loops to write screen*/
+    while (epoch < num_epochs) {
+        int barWidth = 70;
+
+        std::cout << "[";
+        int pos = barWidth * epoch;
+        for (int i = 0; i < barWidth; ++i) {
+            if (i < pos) std::cout << "=";
+            else if (i == pos) std::cout << ">";
+            else std::cout << " ";
+        }
+        std::cout << "] " << int((epoch + 0.1) * 10.0) << " %\r";
+        std::cout.flush();
+
+        epoch += 0.0001;
+    }
+    std::cout << std::endl;
+}
+
+void Train::train(NeuralNet net,Tensor inputs,Tensor targets,int num_epochs,BatchIterator batchit,MSE mse,Optimizer optimizer){
+    std::cout << "Training Job started" << std::endl;
+    for (size_t epoch = 0; epoch < num_epochs; epoch++){
+        double epoch_loss = 0.0;
+        std::vector<Batch> batches = batchit.initialize(inputs, targets);
+            for (size_t i = 0; i < batches.size(); i++) {
+                Tensor predicted = net.forward(batches[i].inputs);
+                epoch_loss  += mse.loss(predicted, batches[i].targets); 
+                Tensor grad = mse.grad(predicted, batches[i].targets);
+                net.backward(grad,batches[i].inputs);
+                optimizer.step(net);
+                
+            }
+        gui_train(epoch,num_epochs);
+        std::cout << "Epoch: " << epoch + 1 << ", Loss: " << epoch_loss << std::endl;
+        if(epoch+1 == num_epochs){
+            std::cout << "Training Job complete" << std::endl;
+        }
+    }
+
+}

From 4daa7f763e63fe4d277a3a89bebb0e0a9d83c41c Mon Sep 17 00:00:00 2001
From: harshaneo17 <harsha.god86@gmail.com>
Date: Sun, 17 Mar 2024 12:13:27 +0000
Subject: [PATCH 2/2] latest commits

---
 CMakeLists.txt | 2 +-
 src/train.cpp  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 89c6cda..4d4793e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,7 +5,7 @@ project("gekko_ml")
 set(CMAKE_CXX_STANDARD 17)
 find_package(xtl REQUIRED PATHS /Users/harshaarya17/xtl)
 find_package(xtensor REQUIRED PATHS /Users/harshaarya17/dependencies/xtensor)
-add_executable(main "src/main.cpp" "src/data.cpp" "src/train.cpp" "src/neuralnetwork.cpp" "src/layers.cpp")
+add_executable(main "src/main.cpp")
 target_link_libraries(main xtensor xtensor::optimize)
 target_include_directories(main PUBLIC "include/")
 
diff --git a/src/train.cpp b/src/train.cpp
index 71ac7e6..c982ad8 100644
--- a/src/train.cpp
+++ b/src/train.cpp
@@ -24,7 +24,7 @@ void Train::train(NeuralNet net,Tensor inputs,Tensor targets,int num_epochs,Batc
     std::cout << "Training Job started" << std::endl;
     for (size_t epoch = 0; epoch < num_epochs; epoch++){
         double epoch_loss = 0.0;
-        std::vector<Batch> batches = batchit.initialize(inputs, targets);
+        std::vector<Batch> batches = batchit.initialize_batch(inputs, targets);
             for (size_t i = 0; i < batches.size(); i++) {
                 Tensor predicted = net.forward(batches[i].inputs);
                 epoch_loss  += mse.loss(predicted, batches[i].targets);