From 2829e9fa526eab65e909c2348fe55091b19baa6f Mon Sep 17 00:00:00 2001
From: Shristi Rawat <shristirawat68@gmail.com>
Date: Tue, 22 Oct 2024 23:05:37 +0530
Subject: [PATCH] set data index frequency and non-invertible MA features

---
 .../ARIMA_V2-checkpoint.ipynb                 | 350 ++++++++++++++++++
 ARIMA/ARIMA_V2.ipynb                          |  97 ++---
 2 files changed, 405 insertions(+), 42 deletions(-)
 create mode 100644 ARIMA/.ipynb_checkpoints/ARIMA_V2-checkpoint.ipynb

diff --git a/ARIMA/.ipynb_checkpoints/ARIMA_V2-checkpoint.ipynb b/ARIMA/.ipynb_checkpoints/ARIMA_V2-checkpoint.ipynb
new file mode 100644
index 0000000..ff47f3a
--- /dev/null
+++ b/ARIMA/.ipynb_checkpoints/ARIMA_V2-checkpoint.ipynb
@@ -0,0 +1,350 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Intial code\n",
+    "for reference purposes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# import pandas as pd\n",
+    "# import numpy as np\n",
+    "# from sklearn.metrics import mean_squared_error\n",
+    "# from sklearn.preprocessing import StandardScaler\n",
+    "# from statsmodels.tsa.statespace.sarimax import SARIMAX\n",
+    "# from statsmodels.tools.sm_exceptions import ConvergenceWarning\n",
+    "# import warnings\n",
+    "\n",
+    "# # Ignore convergence warnings\n",
+    "# warnings.simplefilter(\"ignore\")\n",
+    "\n",
+    "# # Load dataset with parsed dates\n",
+    "# data = pd.read_csv('../Data/SBI Train data.csv', parse_dates=['Date'], dayfirst=True)\n",
+    "\n",
+    "# # Set the index to the Date column\n",
+    "# data.set_index('Date', inplace=True)\n",
+    "# # data = data.asfreq('D')\n",
+    "# # Feature Engineering: Add day of week and month\n",
+    "# data['day_of_week'] = data.index.dayofweek\n",
+    "# data['month'] = data.index.month\n",
+    "\n",
+    "# # Add lagged value of the Close price and moving averages\n",
+    "# data['lagged_close'] = data['Close'].shift(1)  \n",
+    "# data['moving_avg_3'] = data['Close'].rolling(window=3).mean()\n",
+    "# data['moving_avg_7'] = data['Close'].rolling(window=7).mean()  # New: 7-day moving average for long-term trend\n",
+    "\n",
+    "# # Add Volume as a feature (scaling might help)\n",
+    "# data['volume'] = data['Volume']\n",
+    "\n",
+    "# # Drop rows with NaN values\n",
+    "# data.dropna(inplace=True)\n",
+    "\n",
+    "# # Standardize the features (important for scaling)\n",
+    "# scaler = StandardScaler()\n",
+    "# exog_features = ['day_of_week', 'month', 'lagged_close', 'moving_avg_3', 'moving_avg_7', 'volume']\n",
+    "# data[exog_features] = scaler.fit_transform(data[exog_features])\n",
+    "\n",
+    "# # Split the data into training and testing sets\n",
+    "# train_size = int(len(data) * 0.8)\n",
+    "# train, test = data.iloc[:train_size], data.iloc[train_size:]\n",
+    "\n",
+    "# # Tune SARIMAX hyperparameters (ARIMA order (p, d, q))\n",
+    "# order = (2, 1, 2)  # Consider using AIC/BIC for finding optimal order\n",
+    "# seasonal_order = (1, 1, 1, 12)  # Adding seasonality with monthly frequency\n",
+    "\n",
+    "# # Fit the SARIMAX model\n",
+    "# try:\n",
+    "#     model = SARIMAX(train['Close'], \n",
+    "#                     exog=train[exog_features],\n",
+    "#                     order=order,\n",
+    "#                     seasonal_order=seasonal_order)\n",
+    "#     model_fit = model.fit(disp=False)\n",
+    "# except ConvergenceWarning as e:\n",
+    "#     print(f\"Convergence warning: {e}\")\n",
+    "# except Exception as e:\n",
+    "#     print(f\"Error: {e}\")\n",
+    "\n",
+    "# # Forecasting\n",
+    "# forecast = model_fit.forecast(steps=len(test), exog=test[exog_features])\n",
+    "\n",
+    "# # Calculate RMSE for forecast\n",
+    "# rmse_arimax = np.sqrt(mean_squared_error(test['Close'], forecast))\n",
+    "# print(f\"Improved ARIMAX Model RMSE: {rmse_arimax}\")\n",
+    "\n",
+    "# test_prices = [i for i in test['Close']]\n",
+    "# # Check residuals diagnostics (optional)\n",
+    "# residuals = test_prices - forecast\n",
+    "# print(\"Mean of residuals:\", residuals.mean())\n",
+    "# print(\"Standard deviation of residuals:\", residuals.std())\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### immporting necessary libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: kaggle in d:\\anaconda\\lib\\site-packages (1.6.17)\n",
+      "Requirement already satisfied: six>=1.10 in d:\\anaconda\\lib\\site-packages (from kaggle) (1.16.0)\n",
+      "Requirement already satisfied: certifi>=2023.7.22 in d:\\anaconda\\lib\\site-packages (from kaggle) (2024.8.30)\n",
+      "Requirement already satisfied: python-dateutil in d:\\anaconda\\lib\\site-packages (from kaggle) (2.9.0.post0)\n",
+      "Requirement already satisfied: requests in d:\\anaconda\\lib\\site-packages (from kaggle) (2.32.2)\n",
+      "Requirement already satisfied: tqdm in d:\\anaconda\\lib\\site-packages (from kaggle) (4.66.4)\n",
+      "Requirement already satisfied: python-slugify in d:\\anaconda\\lib\\site-packages (from kaggle) (5.0.2)\n",
+      "Requirement already satisfied: urllib3 in d:\\anaconda\\lib\\site-packages (from kaggle) (2.2.2)\n",
+      "Requirement already satisfied: bleach in d:\\anaconda\\lib\\site-packages (from kaggle) (4.1.0)\n",
+      "Requirement already satisfied: packaging in d:\\anaconda\\lib\\site-packages (from bleach->kaggle) (23.2)\n",
+      "Requirement already satisfied: webencodings in d:\\anaconda\\lib\\site-packages (from bleach->kaggle) (0.5.1)\n",
+      "Requirement already satisfied: text-unidecode>=1.3 in d:\\anaconda\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in d:\\anaconda\\lib\\site-packages (from requests->kaggle) (2.0.4)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in d:\\anaconda\\lib\\site-packages (from requests->kaggle) (3.7)\n",
+      "Requirement already satisfied: colorama in c:\\users\\shristi\\appdata\\roaming\\python\\python312\\site-packages (from tqdm->kaggle) (0.4.6)\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install kaggle\n",
+    "import os\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import pickle\n",
+    "from sklearn.metrics import mean_squared_error\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from statsmodels.tsa.statespace.sarimax import SARIMAX\n",
+    "from statsmodels.tools.sm_exceptions import ConvergenceWarning\n",
+    "import warnings"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Udating features to dataset for proper time-series analysis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "# Ignore convergence warnings\n",
+    "warnings.simplefilter(\"ignore\", ConvergenceWarning)\n",
+    "\n",
+    "# Load training dataset with parsed dates\n",
+    "train_data = pd.read_csv('../Data/SBI Train data.csv', parse_dates=['Date'], dayfirst=True)\n",
+    "\n",
+    "# Set the index to the Date column\n",
+    "train_data.index = pd.DatetimeIndex(train_data.index).to_period('M')\n",
+    "\n",
+    "# Feature Engineering: Add day of week and month\n",
+    "train_data['day_of_week'] = train_data.index.dayofweek\n",
+    "train_data['month'] = train_data.index.month\n",
+    "\n",
+    "# Add lagged value of the Close price and moving averages\n",
+    "train_data['lagged_close'] = train_data['Close'].shift(1)\n",
+    "train_data['moving_avg_3'] = train_data['Close'].rolling(window=3).mean()\n",
+    "train_data['moving_avg_7'] = train_data['Close'].rolling(window=7).mean()\n",
+    "\n",
+    "# Add Volume as a feature (scaling might help)\n",
+    "train_data['volume'] = train_data['Volume']\n",
+    "\n",
+    "# Drop rows with NaN values after applying the rolling window and lagging\n",
+    "train_data.dropna(inplace=True)\n",
+    "\n",
+    "# Standardize the features\n",
+    "scaler = StandardScaler()\n",
+    "exog_features = ['day_of_week', 'month', 'lagged_close', 'moving_avg_3', 'moving_avg_7', 'volume']\n",
+    "train_data[exog_features] = scaler.fit_transform(train_data[exog_features])\n",
+    "\n",
+    "# Split the data into training and testing sets\n",
+    "train_size = int(len(train_data) * 0.8)\n",
+    "train, validation = train_data.iloc[:train_size], train_data.iloc[train_size:]\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Training and savinng model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model and scaler saved successfully.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Train the SARIMAX model\n",
+    "order = (2, 1, 2)\n",
+    "seasonal_order = (1, 1, 1, 12)\n",
+    "\n",
+    "model = SARIMAX(train['Close'], exog=train[exog_features], order=order, seasonal_order=seasonal_order,enforce_invertibility=False)\n",
+    "model_fit = model.fit(disp=False)\n",
+    "\n",
+    "#insert your folder name where you want the dataset to be downloaded instead of .kaggle\n",
+    "os.system('kaggle datasets download -d shristirwt/sarimax-model -p/.kaggle')\n",
+    "os.system('kaggle datasets download -d shristirwt/scaler-model -p/.kaggle')\n",
+    "\n",
+    "# Save the model to a file using pickle\n",
+    "with open(r'C:\\Users\\SHRISTI\\.kaggle\\sarimax_model.pkl', 'wb') as f:\n",
+    "    pickle.dump(model_fit, f)\n",
+    "\n",
+    "# Optionally save the scaler as well\n",
+    "with open(r'C:\\Users\\SHRISTI\\.kaggle\\scaler.pkl', 'wb') as f:\n",
+    "    pickle.dump(scaler, f)\n",
+    "\n",
+    "print(\"Model and scaler saved successfully.\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Loading saved model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load the model and scaler from the files\n",
+    "with open(r'C:\\Users\\SHRISTI\\.kaggle\\sarimax_model.pkl', 'rb') as f:\n",
+    "    loaded_model = pickle.load(f)\n",
+    "\n",
+    "with open(r'C:\\Users\\SHRISTI\\.kaggle\\scaler.pkl', 'rb') as f:\n",
+    "    loaded_scaler = pickle.load(f)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Loading and processing Test data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load the test dataset\n",
+    "test_data = pd.read_csv('../Data/SBI Test data.csv', parse_dates=['Date'], dayfirst=True)\n",
+    "\n",
+    "# Set the index to the Date column\n",
+    "test_data.set_index('Date', inplace=True)\n",
+    "\n",
+    "# Apply the same feature engineering on the test data\n",
+    "test_data['day_of_week'] = test_data.index.dayofweek\n",
+    "test_data['month'] = test_data.index.month\n",
+    "test_data['lagged_close'] = test_data['Close'].shift(1)\n",
+    "test_data['moving_avg_3'] = test_data['Close'].rolling(window=3).mean()\n",
+    "test_data['moving_avg_7'] = test_data['Close'].rolling(window=7).mean()\n",
+    "\n",
+    "# Add Volume as a feature\n",
+    "test_data['volume'] = test_data['Volume']\n",
+    "\n",
+    "# Drop rows with NaN values\n",
+    "test_data.dropna(inplace=True)\n",
+    "\n",
+    "# Standardize the features in the test dataset using the loaded scaler\n",
+    "test_data[exog_features] = loaded_scaler.transform(test_data[exog_features])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Predicting share prices using model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Test Data RMSE: 4.883649507349637\n",
+      "Mean of residuals: 0.06489726947015648\n",
+      "Standard deviation of residuals: 4.8849520783077764\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Forecasting on the test data using the loaded model\n",
+    "forecast_test = loaded_model.forecast(steps=len(test_data), exog=test_data[exog_features])\n",
+    "\n",
+    "# Calculate RMSE for forecast\n",
+    "rmse_test = np.sqrt(mean_squared_error(test_data['Close'], forecast_test))\n",
+    "print(f\"Test Data RMSE: {rmse_test}\")\n",
+    "\n",
+    "# Check residuals diagnostics (optional)\n",
+    "test_prices = test_data['Close'].values\n",
+    "residuals_test = test_prices - forecast_test\n",
+    "print(\"Mean of residuals:\", residuals_test.mean())\n",
+    "print(\"Standard deviation of residuals:\", residuals_test.std())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/ARIMA/ARIMA_V2.ipynb b/ARIMA/ARIMA_V2.ipynb
index bd5fe49..ff47f3a 100644
--- a/ARIMA/ARIMA_V2.ipynb
+++ b/ARIMA/ARIMA_V2.ipynb
@@ -10,7 +10,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -95,10 +95,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 6,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: kaggle in d:\\anaconda\\lib\\site-packages (1.6.17)\n",
+      "Requirement already satisfied: six>=1.10 in d:\\anaconda\\lib\\site-packages (from kaggle) (1.16.0)\n",
+      "Requirement already satisfied: certifi>=2023.7.22 in d:\\anaconda\\lib\\site-packages (from kaggle) (2024.8.30)\n",
+      "Requirement already satisfied: python-dateutil in d:\\anaconda\\lib\\site-packages (from kaggle) (2.9.0.post0)\n",
+      "Requirement already satisfied: requests in d:\\anaconda\\lib\\site-packages (from kaggle) (2.32.2)\n",
+      "Requirement already satisfied: tqdm in d:\\anaconda\\lib\\site-packages (from kaggle) (4.66.4)\n",
+      "Requirement already satisfied: python-slugify in d:\\anaconda\\lib\\site-packages (from kaggle) (5.0.2)\n",
+      "Requirement already satisfied: urllib3 in d:\\anaconda\\lib\\site-packages (from kaggle) (2.2.2)\n",
+      "Requirement already satisfied: bleach in d:\\anaconda\\lib\\site-packages (from kaggle) (4.1.0)\n",
+      "Requirement already satisfied: packaging in d:\\anaconda\\lib\\site-packages (from bleach->kaggle) (23.2)\n",
+      "Requirement already satisfied: webencodings in d:\\anaconda\\lib\\site-packages (from bleach->kaggle) (0.5.1)\n",
+      "Requirement already satisfied: text-unidecode>=1.3 in d:\\anaconda\\lib\\site-packages (from python-slugify->kaggle) (1.3)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in d:\\anaconda\\lib\\site-packages (from requests->kaggle) (2.0.4)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in d:\\anaconda\\lib\\site-packages (from requests->kaggle) (3.7)\n",
+      "Requirement already satisfied: colorama in c:\\users\\shristi\\appdata\\roaming\\python\\python312\\site-packages (from tqdm->kaggle) (0.4.6)\n"
+     ]
+    }
+   ],
    "source": [
+    "!pip install kaggle\n",
+    "import os\n",
     "import pandas as pd\n",
     "import numpy as np\n",
     "import pickle\n",
@@ -118,7 +142,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -130,7 +154,7 @@
     "train_data = pd.read_csv('../Data/SBI Train data.csv', parse_dates=['Date'], dayfirst=True)\n",
     "\n",
     "# Set the index to the Date column\n",
-    "train_data.set_index('Date', inplace=True)\n",
+    "train_data.index = pd.DatetimeIndex(train_data.index).to_period('M')\n",
     "\n",
     "# Feature Engineering: Add day of week and month\n",
     "train_data['day_of_week'] = train_data.index.dayofweek\n",
@@ -166,21 +190,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\agraw\\AppData\\Roaming\\Python\\Python311\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.\n",
-      "  self._init_dates(dates, freq)\n",
-      "C:\\Users\\agraw\\AppData\\Roaming\\Python\\Python311\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:473: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.\n",
-      "  self._init_dates(dates, freq)\n",
-      "C:\\Users\\agraw\\AppData\\Roaming\\Python\\Python311\\site-packages\\statsmodels\\tsa\\statespace\\sarimax.py:978: UserWarning: Non-invertible starting MA parameters found. Using zeros as starting parameters.\n",
-      "  warn('Non-invertible starting MA parameters found.'\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -194,15 +206,19 @@
     "order = (2, 1, 2)\n",
     "seasonal_order = (1, 1, 1, 12)\n",
     "\n",
-    "model = SARIMAX(train['Close'], exog=train[exog_features], order=order, seasonal_order=seasonal_order)\n",
+    "model = SARIMAX(train['Close'], exog=train[exog_features], order=order, seasonal_order=seasonal_order,enforce_invertibility=False)\n",
     "model_fit = model.fit(disp=False)\n",
     "\n",
+    "#insert your folder name where you want the dataset to be downloaded instead of .kaggle\n",
+    "os.system('kaggle datasets download -d shristirwt/sarimax-model -p/.kaggle')\n",
+    "os.system('kaggle datasets download -d shristirwt/scaler-model -p/.kaggle')\n",
+    "\n",
     "# Save the model to a file using pickle\n",
-    "with open('sarimax_model.pkl', 'wb') as f:\n",
+    "with open(r'C:\\Users\\SHRISTI\\.kaggle\\sarimax_model.pkl', 'wb') as f:\n",
     "    pickle.dump(model_fit, f)\n",
     "\n",
     "# Optionally save the scaler as well\n",
-    "with open('scaler.pkl', 'wb') as f:\n",
+    "with open(r'C:\\Users\\SHRISTI\\.kaggle\\scaler.pkl', 'wb') as f:\n",
     "    pickle.dump(scaler, f)\n",
     "\n",
     "print(\"Model and scaler saved successfully.\")\n"
@@ -217,15 +233,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Load the model and scaler from the files\n",
-    "with open('sarimax_model.pkl', 'rb') as f:\n",
+    "with open(r'C:\\Users\\SHRISTI\\.kaggle\\sarimax_model.pkl', 'rb') as f:\n",
     "    loaded_model = pickle.load(f)\n",
     "\n",
-    "with open('scaler.pkl', 'rb') as f:\n",
+    "with open(r'C:\\Users\\SHRISTI\\.kaggle\\scaler.pkl', 'rb') as f:\n",
     "    loaded_scaler = pickle.load(f)\n"
    ]
   },
@@ -238,7 +254,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -274,26 +290,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\agraw\\AppData\\Roaming\\Python\\Python311\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:837: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`.\n",
-      "  return get_prediction_index(\n",
-      "C:\\Users\\agraw\\AppData\\Roaming\\Python\\Python311\\site-packages\\statsmodels\\tsa\\base\\tsa_model.py:837: FutureWarning: No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.\n",
-      "  return get_prediction_index(\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Test Data RMSE: 4.673693537736142\n",
-      "Mean of residuals: 0.311316834051805\n",
-      "Standard deviation of residuals: 4.664969245987366\n"
+      "Test Data RMSE: 4.883649507349637\n",
+      "Mean of residuals: 0.06489726947015648\n",
+      "Standard deviation of residuals: 4.8849520783077764\n"
      ]
     }
    ],
@@ -311,11 +317,18 @@
     "print(\"Mean of residuals:\", residuals_test.mean())\n",
     "print(\"Standard deviation of residuals:\", residuals_test.std())"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -329,9 +342,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.4"
+   "version": "3.12.4"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }