diff --git a/include/conv2d.hpp b/include/conv2d.hpp
new file mode 100644
index 0000000..27baab0
--- /dev/null
+++ b/include/conv2d.hpp
@@ -0,0 +1,133 @@
+#ifndef CONV2D_HPP
+#define CONV2D_HPP
+
+#include "tensor_load.hpp"
+#include "layers.hpp"
+#include <xtensor/xview.hpp>
+
+class Conv2D : public Layer {
+public:
+    Conv2D(size_t in_channels, size_t out_channels, std::vector<size_t> kernel_size, size_t stride = 1, size_t padding = 0)
+        : in_channels(in_channels), out_channels(out_channels), kernel_size(kernel_size), 
+        stride(stride), padding(padding) {
+        initialize();
+    }
+
+    void initialize() {
+        // Initialize weights and bias
+        params.weights = xt::random::randn<double>({out_channels, in_channels, kernel_size[0], kernel_size[1]});
+        params.bias = xt::random::randn<double>({out_channels});
+    }
+
+    Tensor forward(Tensor inputs) override {
+    // Use size_t for dimensions
+    size_t batch_size = inputs.shape(0);
+    size_t input_channels = inputs.shape(1);
+    size_t input_height = inputs.shape(2);
+    size_t input_width = inputs.shape(3);
+
+    std::cout << "Input shape: " << batch_size << "x" << input_channels << "x" << input_height << "x" << input_width << std::endl;
+
+    // Check if stride is not zero
+    if (stride == 0) {
+    throw std::runtime_error("Stride cannot be zero");
+    }
+
+    // Use integer division for output dimensions
+    int output_height = (static_cast<int>(input_height) - kernel_size[0] + 2 * padding) / stride + 1;
+    int output_width = (static_cast<int>(input_width) - kernel_size[1] + 2 * padding) / stride + 1;
+
+    std::cout << "Calculated output height: " << output_height << std::endl;
+    std::cout << "Calculated output width: " << output_width << std::endl;
+
+    // Check if output dimensions are positive
+    if (output_height <= 0 || output_width <= 0) {
+    throw std::runtime_error("Invalid output dimensions");
+    }
+
+    Tensor outputs;
+    try {
+        outputs = xt::zeros<double>({batch_size, 
+                                     static_cast<size_t>(out_channels), 
+                                     static_cast<size_t>(output_height), 
+                                     static_cast<size_t>(output_width)});
+        std::cout << "Output tensor created successfully" << std::endl;
+    } catch (const std::exception& e) {
+        std::cerr << "Error creating output tensor: " << e.what() << std::endl;
+        throw;
+    }
+    
+    // Implement convolution operation
+    for (size_t b = 0; b < batch_size; ++b) {
+        for (size_t oc = 0; oc < out_channels; ++oc) {
+            for (size_t oh = 0; oh < output_height; ++oh) {
+                for (size_t ow = 0; ow < output_width; ++ow) {
+                    double sum = 0.0;
+                    for (size_t ic = 0; ic < in_channels; ++ic) {
+                        for (size_t kh = 0; kh < kernel_size[0]; ++kh) {
+                            for (size_t kw = 0; kw < kernel_size[1]; ++kw) {
+                                size_t ih = oh * stride + kh - padding;
+                                size_t iw = ow * stride + kw - padding;
+                                if (ih < input_height && iw < input_width) {
+                                    sum += inputs(b, ic, ih, iw) * params.weights(oc, ic, kh, kw);
+                                }
+                            }
+                        }
+                    }
+                    outputs(b, oc, oh, ow) = sum + params.bias(oc);
+                }
+            }
+        }
+    }
+
+    return outputs;
+}
+
+    Tensor backward(Tensor grad, Tensor inputs) override {
+        double batch_size = inputs.shape(0);
+        double input_height = inputs.shape(2);
+        double input_width = inputs.shape(3);
+        
+        double output_height = grad.shape(2);
+        double output_width = grad.shape(3);
+
+        Tensor input_grad = xt::zeros<double>(inputs.shape());
+        params.grad_weights = xt::zeros<double>(params.weights.shape());
+        params.grad_biases = xt::sum(grad, {0, 2, 3});
+
+        std::cout << "debug 1st backward conv2d" << std::endl;
+        // Compute gradients
+        for (double b = 0; b < batch_size; ++b) {
+            for (double oc = 0; oc < out_channels; ++oc) {
+                for (double oh = 0; oh < output_height; ++oh) {
+                    for (double ow = 0; ow < output_width; ++ow) {
+                        for (double ic = 0; ic < in_channels; ++ic) {
+                            for (double kh = 0; kh < kernel_size[0]; ++kh) {
+                                for (double kw = 0; kw < kernel_size[1]; ++kw) {
+                                    double ih = oh * stride + kh - padding;
+                                    double iw = ow * stride + kw - padding;
+                                    if (ih >= 0 && ih < input_height && iw >= 0 && iw < input_width) {
+                                        double grad_val = grad(b, oc, oh, ow);
+                                        input_grad(b, ic, ih, iw) += grad_val * params.weights(oc, ic, kh, kw);
+                                        params.grad_weights(oc, ic, kh, kw) += grad_val * inputs(b, ic, ih, iw);
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        return input_grad;
+    }
+
+private:
+    size_t in_channels;
+    size_t out_channels;
+    std::vector<size_t> kernel_size;
+    size_t stride;
+    size_t padding;
+};
+
+#endif
\ No newline at end of file
diff --git a/include/initialization.hpp b/include/initialization.hpp
index 1313053..29044b9 100644
--- a/include/initialization.hpp
+++ b/include/initialization.hpp
@@ -3,7 +3,12 @@
 
 #include "tensor_load.hpp"
 
-class Glorot{
+class INIT{
+    public:
+        virtual Tensor initialize(double n_rows,double n_cols){}
+};
+
+class Glorot : public INIT{
     public:
         Tensor initialize(double n_rows,double n_cols){
             /*Golrot proposed a method to initialize
@@ -18,7 +23,7 @@ class Glorot{
         }
 };
 
-class He{
+class He : public INIT{
     Tensor initialize(double n_rows, double n_cols){
         double  bound = std::sqrt(6/n_rows);
         Tensor rand_values = xt::random::rand<double>({n_rows,n_cols});
@@ -28,7 +33,7 @@ class He{
     }
 };
 
-class LSUV{
+class LSUV : public INIT{
     public:
         double input_stddev;
         LSUV(double scale){
diff --git a/include/neuralnetwork.hpp b/include/neuralnetwork.hpp
index a34a589..4261b61 100644
--- a/include/neuralnetwork.hpp
+++ b/include/neuralnetwork.hpp
@@ -4,6 +4,7 @@
 
 #include "tensor_load.hpp"
 #include "layers.hpp"
+#include "conv2d.hpp"
 #include <memory>
 
 
diff --git a/include/train.hpp b/include/train.hpp
index a5e1d5b..5eda344 100644
--- a/include/train.hpp
+++ b/include/train.hpp
@@ -42,6 +42,7 @@ class Train{
                 std::vector<Batch> batches = batchit.initialize(inputs, targets);
                     for (size_t i = 0; i < batches.size(); i++) {
                         Tensor predicted = net.forward(batches[i].inputs);
+                        std::cout << "train 2" << std::endl;
                         epoch_loss  += mse.loss(predicted, batches[i].targets); 
                         Tensor grad = mse.grad(predicted, batches[i].targets);
                         net.backward(grad,batches[i].inputs);
diff --git a/src/main.cpp b/src/main.cpp
index 65662ca..3bed1ba 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -17,11 +17,13 @@ int main(int argc, char* argv[])
     auto linr = std::make_unique<Linear>(3,3);
     auto linr2 = std::make_unique<Linear>(3,3);
     auto linr3 = std::make_unique<Linear>(3,3);
+    auto conv2d = std::make_unique<Conv2D>(3,3,std::vector<size_t> {1,1});
     auto tanh_obj = std::make_unique<Tanh>();
 
     std::vector<std::unique_ptr<Layer>> layers;
-    layers.push_back(std::move(linr));
-    layers.push_back(std::move(linr2));
+    // layers.push_back(std::move(linr));
+    // layers.push_back(std::move(linr2));
+    layers.push_back(std::move(conv2d));
     layers.push_back(std::move(linr3));
     layers.push_back(std::move(tanh_obj));