diff --git a/Model3_Retail_Random_launch.ipynb b/Model3_Retail_Random_launch.ipynb new file mode 100644 index 0000000..ec44497 --- /dev/null +++ b/Model3_Retail_Random_launch.ipynb @@ -0,0 +1,595 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "authorship_tag": "ABX9TyNo4v0uyZnOPNmmPkbdxGNr", + "include_colab_link": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "EblvaNOmtjDe" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "# Example: Load your preprocessed data\n", + "train_df = pd.read_csv('train.csv', low_memory=False)\n", + "\n", + "X_train = train_df.drop(['Sales', 'Date'], axis=1)\n", + "\n", + "y_train = train_df['Sales']" + ] + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "Ai4e72X2t07U" + } + }, + { + "cell_type": "code", + "source": [ + "from sklearn.ensemble import RandomForestRegressor\n", + "\n", + "# Set your model parameters (customize as needed)\n", + "model = RandomForestRegressor(n_estimators=100, random_state=42)\n", + "model.fit(X_train, y_train) # Fit model to training data" + ], + "metadata": { + "id": "aWFHXXYtzbjp", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 80 + }, + "collapsed": true, + "outputId": "d166ca37-0b9c-42ed-9441-4da1a2baafc1" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "RandomForestRegressor(random_state=42)" + ], + "text/html": [ + "
RandomForestRegressor(random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 3 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import joblib\n", + "\n", + "# Save your model\n", + "joblib.dump(model, 'sales_forecast_model.joblib')" + ], + "metadata": { + "id": "_HsrG5nbvGHj", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "c50d9be4-026d-4b78-ae34-b4e2c0bfe175" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['sales_forecast_model.joblib']" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "code", + "source": [ + "import joblib\n", + "model = joblib.load('sales_forecast_model.joblib')\n", + "\n" + ], + "metadata": { + "id": "A6GvWcoAvKtv" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "test_df = pd.read_csv('test.csv', low_memory=False)\n", + "X_test = test_df.drop(['Date', 'Sales'], axis=1) # Assuming 'Date' and 'Sales' columns exist in test data and you want to drop them\n", + "\n", + "# Replace 'a' in 'StateHoliday' with 0\n", + "X_test['StateHoliday'] = X_test['StateHoliday'].replace('a', 1)\n", + "\n", + "preds = model.predict(X_test)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 211 + }, + "id": "cVbDwZFM8yxL", + "outputId": "3b83af5f-346d-4ea8-eb2b-fa0b009e7c41" + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "error", + "ename": "NameError", + "evalue": "name 'pd' is not defined", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipython-input-1-3782873592.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtest_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'test.csv'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlow_memory\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mX_test\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtest_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Date'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'Sales'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# Assuming 'Date' and 'Sales' columns exist in test data and you want to drop them\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m# Replace 'a' in 'StateHoliday' with 0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'StateHoliday'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'StateHoliday'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'a'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'pd' is not defined" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "model.plot(forecast)\n", + "plt.show()" + ], + "metadata": { + "id": "GdKacxyqPuHV" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file