diff --git a/18ALGOStock_Price_Prediction.ipynb b/18ALGOStock_Price_Prediction.ipynb new file mode 100644 index 0000000..ab521b1 --- /dev/null +++ b/18ALGOStock_Price_Prediction.ipynb @@ -0,0 +1,5722 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "8tzEK_mSvRoh" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.svm import SVR\n", + "from sklearn.tree import DecisionTreeRegressor\n", + "from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor\n", + "from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error\n", + "from sklearn.neighbors import KNeighborsRegressor\n", + "from tensorflow.keras.models import Sequential\n", + "from tensorflow.keras.layers import Dense,LSTM" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "NbBSc2jLvZWx", + "outputId": "3d158d54-f370-4e7b-fdb8-eb7f7fac2928" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Date Open High Low Close Adj Close \\\n", + "0 01-01-1996 18.691147 18.978922 18.540184 18.823240 12.409931 \n", + "1 02-01-1996 18.894005 18.964767 17.738192 18.224106 12.014931 \n", + "2 03-01-1996 18.327892 18.568489 17.643839 17.738192 11.694577 \n", + "3 04-01-1996 17.502312 17.832542 17.223972 17.676863 11.654142 \n", + "4 05-01-1996 17.738192 17.785366 17.459852 17.577793 11.588827 \n", + "\n", + " Volume \n", + "0 43733533.0 \n", + "1 56167280.0 \n", + "2 68296318.0 \n", + "3 86073880.0 \n", + "4 76613039.0 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DateOpenHighLowCloseAdj CloseVolume
001-01-199618.69114718.97892218.54018418.82324012.40993143733533.0
102-01-199618.89400518.96476717.73819218.22410612.01493156167280.0
203-01-199618.32789218.56848917.64383917.73819211.69457768296318.0
304-01-199617.50231217.83254217.22397217.67686311.65414286073880.0
405-01-199617.73819217.78536617.45985217.57779311.58882776613039.0
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "df", + "summary": "{\n \"name\": \"df\",\n \"rows\": 7074,\n \"fields\": [\n {\n \"column\": \"Date\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 7074,\n \"samples\": [\n \"11-08-2016\",\n \"30-10-2007\",\n \"17-01-2017\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Open\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 154.77322944510627,\n \"min\": 13.478195,\n \"max\": 703.650024,\n \"num_unique_values\": 4758,\n \"samples\": [\n 174.399994,\n 31.0324,\n 187.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"High\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 156.34507839355808,\n \"min\": 13.935802,\n \"max\": 728.349976,\n \"num_unique_values\": 5403,\n \"samples\": [\n 473.0,\n 495.450012,\n 78.321663\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Low\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 152.98051601861636,\n \"min\": 13.214009,\n \"max\": 694.200012,\n \"num_unique_values\": 5488,\n \"samples\": [\n 60.2957,\n 22.677523,\n 16.983376\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Close\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 154.63054905628158,\n \"min\": 13.346102,\n \"max\": 725.25,\n \"num_unique_values\": 5975,\n \"samples\": [\n 633.599976,\n 241.100006,\n 107.834999\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Adj Close\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 152.90324918554697,\n \"min\": 9.53141,\n \"max\": 725.25,\n \"num_unique_values\": 6575,\n \"samples\": [\n 12.345289,\n 223.836212,\n 16.758821\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Volume\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 34627439.39963042,\n \"min\": 0.0,\n \"max\": 446948261.0,\n \"num_unique_values\": 6948,\n \"samples\": [\n 29959130.0,\n 1648453.0,\n 14077470.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 10 + } + ], + "source": [ + "# Load the dataset\n", + "df = pd.read_csv('/content/SBIN.csv')\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "2QdTvORzwEJw" + }, + "outputs": [], + "source": [ + "# Drop the 'Date' and 'Adj Close' columns\n", + "df.drop(['Date', 'Adj Close'], axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "xMfr71b2w3eX", + "outputId": "845c7806-d080-4d94-eb04-52fb96197af9" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Open High Low Close Volume\n", + "0 18.691147 18.978922 18.540184 18.823240 43733533.0\n", + "1 18.894005 18.964767 17.738192 18.224106 56167280.0\n", + "2 18.327892 18.568489 17.643839 17.738192 68296318.0\n", + "3 17.502312 17.832542 17.223972 17.676863 86073880.0\n", + "4 17.738192 17.785366 17.459852 17.577793 76613039.0" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
OpenHighLowCloseVolume
018.69114718.97892218.54018418.82324043733533.0
118.89400518.96476717.73819218.22410656167280.0
218.32789218.56848917.64383917.73819268296318.0
317.50231217.83254217.22397217.67686386073880.0
417.73819217.78536617.45985217.57779376613039.0
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "df", + "summary": "{\n \"name\": \"df\",\n \"rows\": 7074,\n \"fields\": [\n {\n \"column\": \"Open\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 154.77322944510627,\n \"min\": 13.478195,\n \"max\": 703.650024,\n \"num_unique_values\": 4758,\n \"samples\": [\n 174.399994,\n 31.0324,\n 187.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"High\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 156.34507839355808,\n \"min\": 13.935802,\n \"max\": 728.349976,\n \"num_unique_values\": 5403,\n \"samples\": [\n 473.0,\n 495.450012,\n 78.321663\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Low\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 152.98051601861636,\n \"min\": 13.214009,\n \"max\": 694.200012,\n \"num_unique_values\": 5488,\n \"samples\": [\n 60.2957,\n 22.677523,\n 16.983376\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Close\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 154.63054905628158,\n \"min\": 13.346102,\n \"max\": 725.25,\n \"num_unique_values\": 5975,\n \"samples\": [\n 633.599976,\n 241.100006,\n 107.834999\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Volume\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 34627439.39963042,\n \"min\": 0.0,\n \"max\": 446948261.0,\n \"num_unique_values\": 6948,\n \"samples\": [\n 29959130.0,\n 1648453.0,\n 14077470.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 12 + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "id": "4yuVijfLCnnR" + }, + "outputs": [], + "source": [ + "# Handle missing values\n", + "imputer = SimpleImputer(strategy='mean')\n", + "df = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "dUw_auE7w8JA" + }, + "outputs": [], + "source": [ + "# Select features and target variable\n", + "X = df[['Open', 'High', 'Low', 'Volume']]\n", + "y = df['Close']" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "FD2542-uxMhN" + }, + "outputs": [], + "source": [ + "# Split the data into training and testing sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "IfBxpcjIw-h7" + }, + "outputs": [], + "source": [ + "# Scale the features using Min-Max scaling\n", + "scaler = MinMaxScaler()\n", + "X_train_scaled = scaler.fit_transform(X_train)\n", + "X_test_scaled = scaler.transform(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "BUsngQNexIjX", + "outputId": "49795996-e79e-4bf4-90e2-5ed3df9ce2ab" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(5659, 4)" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ], + "source": [ + "X_train.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "GAb1kDEZxQD6", + "outputId": "bd8e09be-4ceb-495e-cbb9-2b1413322d49" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(1415, 4)" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ], + "source": [ + "X_test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "id": "SrzPIdvxxSWL" + }, + "outputs": [], + "source": [ + "# Function to evaluate and print RMSE, MAE, and MAPE\n", + "def evaluate_model(model, X_test, y_test):\n", + " predictions = model.predict(X_test)\n", + " rmse = np.sqrt(mean_squared_error(y_test, predictions))\n", + " mae = mean_absolute_error(y_test, predictions)\n", + " mape = mean_absolute_percentage_error(y_test, predictions)\n", + "\n", + " print(f\"RMSE: {rmse}\")\n", + " print(f\"MAE: {mae}\")\n", + " print(f\"MAPE: {mape}\\n\")\n", + "\n", + " return rmse, mae, mape\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "id": "1V0eOGD_xVCq" + }, + "outputs": [], + "source": [ + "\n", + "metrics = {\n", + " \"Model\": [],\n", + " \"RMSE\": [],\n", + " \"MAE\": [],\n", + " \"MAPE\": []\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4gNvcwrH55rP" + }, + "source": [ + "# **1. Linear Regression**" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "id": "uTbRFCB4xXHU" + }, + "outputs": [], + "source": [ + "# Create a linear regression model\n", + "model1 = LinearRegression()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 241 + }, + "id": "UKaUaJ6sxaYG", + "outputId": "f0d54db6-c8ae-41a1-a75b-f74b253ac50d" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "5286 257.350006\n", + "3408 129.464996\n", + "5477 279.350006\n", + "6906 588.500000\n", + "530 21.644367\n", + "Name: Close, dtype: float64" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Close
5286257.350006
3408129.464996
5477279.350006
6906588.500000
53021.644367
\n", + "

" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ], + "source": [ + "y_train.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 80 + }, + "id": "6iJA5FrBxdEs", + "outputId": "78cfad0c-d5f9-4d8f-e0e5-7da423825bd6" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "LinearRegression()" + ], + "text/html": [ + "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ], + "source": [ + "# Train the model\n", + "model1.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "k-T73PFExiZD", + "outputId": "6f0182e2-7527-4b2c-bd30-957b14b1db53" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "RMSE: 1.6881364642878485\n", + "MAE: 0.9433353484789417\n", + "MAPE: 0.006085435991202993\n", + "\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model1, X_test, y_test)\n", + "metrics[\"Model\"].append(\"Linear Regressor\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qEVWWYIS592D" + }, + "source": [ + "# 2. Support Vector Regression" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "id": "LeUTf8Vhxj_k" + }, + "outputs": [], + "source": [ + "# Create an SVR model\n", + "model2 = SVR()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 80 + }, + "id": "ud3Yhe5Vzvyh", + "outputId": "6f3cac1e-1fb4-4299-ee0b-ce02047eab4e" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "SVR()" + ], + "text/html": [ + "
SVR()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 26 + } + ], + "source": [ + "# Train the model\n", + "model2.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "eiqL4fTuzxWH", + "outputId": "929b8a3f-a1b3-4947-cf05-32f945f79ec7" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "RMSE: 147.71103599153602\n", + "MAE: 110.99419106508152\n", + "MAPE: 1.9715076513294716\n", + "\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model2, X_test, y_test)\n", + "metrics[\"Model\"].append(\"SVR\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PlDcozy-6OGR" + }, + "source": [ + "# 3. Random Forest Regressor" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "id": "iaN8nOOO6cBg" + }, + "outputs": [], + "source": [ + "model3 = RandomForestRegressor()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 80 + }, + "id": "wZ7x_Yp06fI_", + "outputId": "79b6b212-5b7e-4c58-c615-69aa210be892" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "RandomForestRegressor()" + ], + "text/html": [ + "
RandomForestRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 29 + } + ], + "source": [ + "# Train the model\n", + "model3.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "IwK7IZ3E6g_n", + "outputId": "250858e9-5a8b-4b79-81da-1f134e409a9b" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "RMSE: 224.93130980701244\n", + "MAE: 162.98909493804314\n", + "MAPE: 0.7508266646157591\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:493: UserWarning: X does not have valid feature names, but RandomForestRegressor was fitted with feature names\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model3, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"Random Forest\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ijTIDEEa6izO" + }, + "source": [ + "# 4. Gradient Boosting Models" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "id": "EO6OFflr6nJo" + }, + "outputs": [], + "source": [ + "model4 = GradientBoostingRegressor()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 80 + }, + "id": "vrwnbrEi6o1X", + "outputId": "f4160be8-b2ea-45de-a589-99eef9b71724" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "GradientBoostingRegressor()" + ], + "text/html": [ + "
GradientBoostingRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 32 + } + ], + "source": [ + "# Train the model\n", + "model4.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-pTBa0fD6qqx", + "outputId": "e382e867-10dc-4529-bc2f-ca6f71625096" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "RMSE: 224.41069433522418\n", + "MAE: 162.2712281619757\n", + "MAPE: 0.7378541693598376\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:493: UserWarning: X does not have valid feature names, but GradientBoostingRegressor was fitted with feature names\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model4, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"GBM\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eGcU-e6C6sJI" + }, + "source": [ + "# 5. Extreme Graident Boosting" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "id": "0GQmPNFd6uxx", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "fdb35ef5-26f8-44a5-8abb-ed202d3d1137" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting xgboost\n", + " Downloading xgboost-2.1.1-py3-none-manylinux_2_28_x86_64.whl.metadata (2.1 kB)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from xgboost) (1.26.4)\n", + "Collecting nvidia-nccl-cu12 (from xgboost)\n", + " Downloading nvidia_nccl_cu12-2.23.4-py3-none-manylinux2014_x86_64.whl.metadata (1.8 kB)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from xgboost) (1.13.1)\n", + "Downloading xgboost-2.1.1-py3-none-manylinux_2_28_x86_64.whl (153.9 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m153.9/153.9 MB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading nvidia_nccl_cu12-2.23.4-py3-none-manylinux2014_x86_64.whl (199.0 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m199.0/199.0 MB\u001b[0m \u001b[31m5.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: nvidia-nccl-cu12, xgboost\n", + "Successfully installed nvidia-nccl-cu12-2.23.4 xgboost-2.1.1\n" + ] + } + ], + "source": [ + "!pip install xgboost\n", + "import xgboost as xgb\n", + "# Create an XGBoost model\n", + "model5 = xgb.XGBRegressor()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 253 + }, + "id": "kfo1ZNft6xTp", + "outputId": "7f85c2f3-864c-47cc-b943-91265be33b98" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "XGBRegressor(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=None, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=None, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " multi_strategy=None, n_estimators=None, n_jobs=None,\n", + " num_parallel_tree=None, random_state=None, ...)" + ], + "text/html": [ + "
XGBRegressor(base_score=None, booster=None, callbacks=None,\n",
+              "             colsample_bylevel=None, colsample_bynode=None,\n",
+              "             colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
+              "             enable_categorical=False, eval_metric=None, feature_types=None,\n",
+              "             gamma=None, grow_policy=None, importance_type=None,\n",
+              "             interaction_constraints=None, learning_rate=None, max_bin=None,\n",
+              "             max_cat_threshold=None, max_cat_to_onehot=None,\n",
+              "             max_delta_step=None, max_depth=None, max_leaves=None,\n",
+              "             min_child_weight=None, missing=nan, monotone_constraints=None,\n",
+              "             multi_strategy=None, n_estimators=None, n_jobs=None,\n",
+              "             num_parallel_tree=None, random_state=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 36 + } + ], + "source": [ + "# Train the model\n", + "model5.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "7QwLt9iS6zSj", + "outputId": "f4e99e28-380e-4bf9-839a-a068ad52ed93" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "RMSE: 224.66436370022384\n", + "MAE: 162.62070643817412\n", + "MAPE: 0.7441437311249671\n", + "\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model5, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"XGBoost\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sUD1VQBF605K" + }, + "source": [ + "# 6. AdaBoost Regressor" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "id": "0foTLiQp63Y9" + }, + "outputs": [], + "source": [ + "model6 = AdaBoostRegressor()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 80 + }, + "id": "bkzSWYA365MO", + "outputId": "119f4e85-55ac-4a8b-c18d-768a7ecf85de" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "AdaBoostRegressor()" + ], + "text/html": [ + "
AdaBoostRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 39 + } + ], + "source": [ + "# Train the model\n", + "model6.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZKxqdmp166pF", + "outputId": "df8e8b41-a077-4aa7-a71f-df3010428289" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "RMSE: 211.84016190199145\n", + "MAE: 150.27932429061372\n", + "MAPE: 0.7057669522844586\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:493: UserWarning: X does not have valid feature names, but AdaBoostRegressor was fitted with feature names\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model6, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"AdaBoost Regressor\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mtfkPIRi67xo" + }, + "source": [ + "# 7. Decision Tree" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "id": "E6EyzrH36_Fq" + }, + "outputs": [], + "source": [ + "model7 = DecisionTreeRegressor()" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 80 + }, + "id": "DTp5VIYx7AWt", + "outputId": "74192862-7b4e-409d-e614-5ff84e130ce4" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "DecisionTreeRegressor()" + ], + "text/html": [ + "
DecisionTreeRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 42 + } + ], + "source": [ + "# Train the model\n", + "model7.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3YC-pSgv7Dh4", + "outputId": "3e0ecf0d-77bb-4e52-cffb-14a0c78cb3fa" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "RMSE: 224.85857555038172\n", + "MAE: 162.88870413804315\n", + "MAPE: 0.7490024715971244\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:493: UserWarning: X does not have valid feature names, but DecisionTreeRegressor was fitted with feature names\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model7, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"Decision Tree\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WfJAZHnP7E_2" + }, + "source": [ + "# 8. KNeighbors Regressor" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "id": "smujnWTRzzDL" + }, + "outputs": [], + "source": [ + "# Create a KNN model\n", + "model8 = KNeighborsRegressor()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 80 + }, + "id": "zeokqhKd0Aj8", + "outputId": "b42fe301-c5ad-42c8-d147-314e13995429" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "KNeighborsRegressor()" + ], + "text/html": [ + "
KNeighborsRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 45 + } + ], + "source": [ + "# Train the model\n", + "model8.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "X2uNfESC0CA8", + "outputId": "f56c942d-5f58-4c8e-c86f-ce5a98a57cdf" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "RMSE: 224.35603706259303\n", + "MAE: 162.1962430618594\n", + "MAPE: 0.7365233640314862\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:493: UserWarning: X does not have valid feature names, but KNeighborsRegressor was fitted with feature names\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model8, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"KNN\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "X3yNCskZ7KMV" + }, + "source": [ + "# 9. Artificial Neural Networks" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "id": "syd9MRhf0Df1" + }, + "outputs": [], + "source": [ + "# Create an ANN model\n", + "model9 = Sequential()\n", + "model9.add(Dense(32, activation='relu', input_shape=(X_train.shape[1],)))\n", + "model9.add(Dense(16, activation='relu'))\n", + "model9.add(Dense(1, activation='linear'))" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "id": "pdlxN-Dp0IZr" + }, + "outputs": [], + "source": [ + "# Compile the model\n", + "model9.compile(loss='mean_squared_error', optimizer='adam')" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "qcryLURL0KIH", + "outputId": "1a31f08a-6f56-4f8b-dd4b-21c42080a75a" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 49 + } + ], + "source": [ + "# Train the model\n", + "model9.fit(X_train_scaled, y_train, epochs=100, batch_size=32, verbose=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Xu6Cwjey0MaP", + "outputId": "77fd4697-9175-4f35-f814-f3b463dc180b" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "45/45 [==============================] - 0s 1ms/step\n", + "RMSE: 2.9928054434827525\n", + "MAE: 1.963869507604453\n", + "MAPE: 0.014526009797749508\n", + "\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model9, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"ANN\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Yet4TgKq7OZl" + }, + "source": [ + "# 10. Long Short Term Memory" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "id": "keiZDN4w7UH0" + }, + "outputs": [], + "source": [ + "n_features = X_train_scaled.shape[1]\n", + "n_steps = 10\n", + "n_samples_train = X_train_scaled.shape[0] - n_steps + 1\n", + "n_samples_test = X_test_scaled.shape[0] - n_steps + 1\n", + "\n", + "# Reshape the input data\n", + "X_train_reshaped = np.array([X_train_scaled[i:i+n_steps, :] for i in range(n_samples_train)])\n", + "X_test_reshaped = np.array([X_test_scaled[i:i+n_steps, :] for i in range(n_samples_test)])" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "id": "nRRTkQTD7Vjd" + }, + "outputs": [], + "source": [ + "model10 = Sequential()\n", + "model10.add(LSTM(64, activation='relu', input_shape=(n_steps, n_features)))\n", + "model10.add(Dense(1))" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": { + "id": "3UJtO3wC7WWe" + }, + "outputs": [], + "source": [ + "# Compile the model\n", + "model10.compile(loss='mean_squared_error', optimizer='adam')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ld9dofMD7YNO", + "outputId": "f3ebf621-6bd2-4998-e147-7c13bc6aac88" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 55 + } + ], + "source": [ + "model10.fit(X_train_reshaped, y_train[n_steps-1:], epochs=100, batch_size=32, verbose=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lOTdB8Bj7aXM", + "outputId": "c82c1ead-25f5-44b2-c382-dd5318cb9d7f" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "44/44 [==============================] - 0s 3ms/step\n", + "RMSE: 15.359295807149854\n", + "MAE: 12.278894525379425\n", + "MAPE: 0.27883509462425415\n", + "\n" + ] + } + ], + "source": [ + "rmse, mae, mape = evaluate_model(model10, X_test_reshaped, y_test[n_steps-1:])\n", + "\n", + "# Store metrics\n", + "metrics[\"Model\"].append(\"LSTM\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "# **11. Lasso Regression**" + ], + "metadata": { + "id": "Rdjh94-UEoWG" + } + }, + { + "cell_type": "code", + "source": [ + "from sklearn.linear_model import Lasso\n", + "\n", + "model11 = Lasso(alpha=0.001) # Decreased alpha for less regularization\n", + "model11.fit(X_train_scaled, y_train)\n", + "rmse, mae, mape = evaluate_model(model11, X_test_scaled, y_test)\n", + "\n", + "metrics[\"Model\"].append(\"Lasso Regression (Updated)\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OimOLwiuEwea", + "outputId": "000d4ef9-6843-4517-da09-a4a35d83016f" + }, + "execution_count": 69, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "RMSE: 3.5005772524019725\n", + "MAE: 2.1759919360445914\n", + "MAPE: 0.014216792315179878\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_coordinate_descent.py:697: ConvergenceWarning: Objective did not converge. You might want to increase the number of iterations, check the scale of the features or consider increasing regularisation. Duality gap: 3.470e+04, tolerance: 1.349e+04\n", + " model = cd_fast.enet_coordinate_descent(\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# **12.Ridge Regression**" + ], + "metadata": { + "id": "XGqWkXGUFezu" + } + }, + { + "cell_type": "code", + "source": [ + "from sklearn.linear_model import Ridge\n", + "\n", + "model12 = Ridge(alpha=0.5)\n", + "model12.fit(X_train_scaled, y_train)\n", + "rmse, mae, mape = evaluate_model(model12, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"Ridge Regression\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yOeNMBAfFkih", + "outputId": "1893a251-e3f2-461f-dc6a-da90fe96129e" + }, + "execution_count": 60, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "RMSE: 2.7385522000826734\n", + "MAE: 1.6950420985832253\n", + "MAPE: 0.010816737779364599\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# **13.ElasticNet Regression**" + ], + "metadata": { + "id": "ymQ6VQEXFuBs" + } + }, + { + "cell_type": "code", + "source": [ + "from sklearn.linear_model import ElasticNet\n", + "\n", + "model13 = ElasticNet(alpha=0.05, l1_ratio=0.7)\n", + "model13.fit(X_train_scaled, y_train)\n", + "rmse, mae, mape = evaluate_model(model13, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"ElasticNet Regression\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NwwymnjiFzM5", + "outputId": "5d2fd12d-e3af-4b92-e24f-085f7b08e512" + }, + "execution_count": 61, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "RMSE: 14.804090936708079\n", + "MAE: 11.800446698217334\n", + "MAPE: 0.24325036314896364\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# **14.SVR with RBF Kernel**" + ], + "metadata": { + "id": "gPPw3I6fF2yd" + } + }, + { + "cell_type": "code", + "source": [ + "from sklearn.svm import SVR\n", + "\n", + "model14 = SVR(kernel='rbf', C=10, epsilon=0.05)\n", + "model14.fit(X_train_scaled, y_train)\n", + "rmse, mae, mape = evaluate_model(model14, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"SVR (RBF)\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "j_NV17B4F6ZM", + "outputId": "91c7c74b-173d-47a2-af42-8ef37b4cd0bb" + }, + "execution_count": 62, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "RMSE: 4.1606646051672405\n", + "MAE: 1.7711785377611675\n", + "MAPE: 0.015270499337866636\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# **15.Bayesian Ridge Regression**" + ], + "metadata": { + "id": "OKM9CqwhF9-3" + } + }, + { + "cell_type": "code", + "source": [ + "from sklearn.linear_model import BayesianRidge\n", + "\n", + "# Replace 'n_iter' with 'max_iter'\n", + "model15 = BayesianRidge(max_iter=300, tol=1e-4)\n", + "model15.fit(X_train_scaled, y_train)\n", + "rmse, mae, mape = evaluate_model(model15, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"Bayesian Ridge\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "wYNH0UAmGDlb", + "outputId": "61bcaad0-011c-494c-ac3b-5461d5ca2b89" + }, + "execution_count": 65, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "RMSE: 1.6881351101001205\n", + "MAE: 0.9434406049061286\n", + "MAPE: 0.006085921149239934\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# **16.Huber Regressor**" + ], + "metadata": { + "id": "qvZL1xbwGjGu" + } + }, + { + "cell_type": "code", + "source": [ + "from sklearn.linear_model import HuberRegressor\n", + "\n", + "model16 = HuberRegressor(epsilon=1.5)\n", + "model16.fit(X_train_scaled, y_train)\n", + "rmse, mae, mape = evaluate_model(model16, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"Huber Regressor\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yNHH3AdxGiAk", + "outputId": "ac73afbf-8cfa-4e24-9c4e-72dc3d5299db" + }, + "execution_count": 66, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "RMSE: 1.7227752104860061\n", + "MAE: 0.9183987164012762\n", + "MAPE: 0.005933218462550846\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# **17.Passive Aggressive Regressor**" + ], + "metadata": { + "id": "6dK0ArL6G2xM" + } + }, + { + "cell_type": "code", + "source": [ + "from sklearn.linear_model import PassiveAggressiveRegressor\n", + "\n", + "model17 = PassiveAggressiveRegressor(max_iter=1500, tol=1e-4)\n", + "model17.fit(X_train_scaled, y_train)\n", + "rmse, mae, mape = evaluate_model(model17, X_test_scaled, y_test)\n", + "metrics[\"Model\"].append(\"Passive Aggressive Regressor\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XaFuxTwlG8v0", + "outputId": "058987b1-9be4-4f4e-d1c2-b5125356699b" + }, + "execution_count": 67, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "RMSE: 2.064959947140465\n", + "MAE: 1.3041435991086505\n", + "MAPE: 0.011270470851679414\n", + "\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# **18.ARIMA**" + ], + "metadata": { + "id": "v8qPX8UlIzse" + } + }, + { + "cell_type": "code", + "source": [ + "!pip install statsmodels\n", + "!pip install pmdarima\n", + "import pandas as pd\n", + "import numpy as np\n", + "from statsmodels.tsa.arima.model import ARIMA\n", + "from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error\n", + "from pmdarima import auto_arima\n", + "\n", + "\n", + "# Assuming you have your data loaded and preprocessed as before (df, X_train, X_test, y_train, y_test)\n", + "\n", + "# Automatic ARIMA Order Selection using auto_arima\n", + "# Find the best (p, d, q) values\n", + "best_model = auto_arima(y_train, seasonal=False, trace=True, error_action='ignore', suppress_warnings=True)\n", + "p, d, q = best_model.order\n", + "\n", + "# Fit the ARIMA model\n", + "model18 = ARIMA(y_train, order=(p, d, q))\n", + "model18_fit = model18.fit()\n", + "\n", + "# Make predictions\n", + "# Use len(y_train) as the starting point for predictions\n", + "# Forecast for the length of y_test\n", + "predictions = model18_fit.predict(start=len(y_train), end=len(y_train) + len(y_test) - 1)\n", + "\n", + "# Evaluate the model\n", + "rmse = np.sqrt(mean_squared_error(y_test, predictions))\n", + "mae = mean_absolute_error(y_test, predictions)\n", + "mape = mean_absolute_percentage_error(y_test, predictions)\n", + "\n", + "print(f\"ARIMA RMSE: {rmse}\")\n", + "print(f\"ARIMA MAE: {mae}\")\n", + "print(f\"ARIMA MAPE: {mape}\\n\")\n", + "\n", + "# Store metrics\n", + "metrics[\"Model\"].append(\"ARIMA\")\n", + "metrics[\"RMSE\"].append(rmse)\n", + "metrics[\"MAE\"].append(mae)\n", + "metrics[\"MAPE\"].append(mape)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "eWJ9s78bI4kj", + "outputId": "93cd67e4-f1e8-4c70-a174-efe9e039663b" + }, + "execution_count": 75, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: statsmodels in /usr/local/lib/python3.10/dist-packages (0.14.4)\n", + "Requirement already satisfied: numpy<3,>=1.22.3 in /usr/local/lib/python3.10/dist-packages (from statsmodels) (1.26.4)\n", + "Requirement already satisfied: scipy!=1.9.2,>=1.8 in /usr/local/lib/python3.10/dist-packages (from statsmodels) (1.13.1)\n", + "Requirement already satisfied: pandas!=2.1.0,>=1.4 in /usr/local/lib/python3.10/dist-packages (from statsmodels) (2.2.2)\n", + "Requirement already satisfied: patsy>=0.5.6 in /usr/local/lib/python3.10/dist-packages (from statsmodels) (0.5.6)\n", + "Requirement already satisfied: packaging>=21.3 in /usr/local/lib/python3.10/dist-packages (from statsmodels) (24.1)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas!=2.1.0,>=1.4->statsmodels) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas!=2.1.0,>=1.4->statsmodels) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas!=2.1.0,>=1.4->statsmodels) (2024.2)\n", + "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from patsy>=0.5.6->statsmodels) (1.16.0)\n", + "Collecting pmdarima\n", + " Downloading pmdarima-2.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl.metadata (7.8 kB)\n", + "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.10/dist-packages (from pmdarima) (1.4.2)\n", + "Collecting Cython!=0.29.18,!=0.29.31,>=0.29 (from pmdarima)\n", + " Downloading Cython-3.0.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.2 kB)\n", + "Requirement already satisfied: numpy>=1.21.2 in /usr/local/lib/python3.10/dist-packages (from pmdarima) (1.26.4)\n", + "Requirement already satisfied: pandas>=0.19 in /usr/local/lib/python3.10/dist-packages (from pmdarima) (2.2.2)\n", + "Requirement already satisfied: scikit-learn>=0.22 in /usr/local/lib/python3.10/dist-packages (from pmdarima) (1.5.2)\n", + "Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from pmdarima) (1.13.1)\n", + "Requirement already satisfied: statsmodels>=0.13.2 in /usr/local/lib/python3.10/dist-packages (from pmdarima) (0.14.4)\n", + "Requirement already satisfied: urllib3 in /usr/local/lib/python3.10/dist-packages (from pmdarima) (2.2.3)\n", + "Requirement already satisfied: setuptools!=50.0.0,>=38.6.0 in /usr/local/lib/python3.10/dist-packages (from pmdarima) (75.1.0)\n", + "Requirement already satisfied: packaging>=17.1 in /usr/local/lib/python3.10/dist-packages (from pmdarima) (24.1)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.19->pmdarima) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.19->pmdarima) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas>=0.19->pmdarima) (2024.2)\n", + "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.22->pmdarima) (3.5.0)\n", + "Requirement already satisfied: patsy>=0.5.6 in /usr/local/lib/python3.10/dist-packages (from statsmodels>=0.13.2->pmdarima) (0.5.6)\n", + "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from patsy>=0.5.6->statsmodels>=0.13.2->pmdarima) (1.16.0)\n", + "Downloading pmdarima-2.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl (2.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m25.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading Cython-3.0.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.6/3.6 MB\u001b[0m \u001b[31m87.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: Cython, pmdarima\n", + "Successfully installed Cython-3.0.11 pmdarima-2.0.4\n", + "Performing stepwise search to minimize aic\n", + " ARIMA(2,0,2)(0,0,0)[0] : AIC=inf, Time=1.02 sec\n", + " ARIMA(0,0,0)(0,0,0)[0] : AIC=78005.789, Time=0.04 sec\n", + " ARIMA(1,0,0)(0,0,0)[0] : AIC=75551.578, Time=0.08 sec\n", + " ARIMA(0,0,1)(0,0,0)[0] : AIC=76619.836, Time=0.35 sec\n", + " ARIMA(2,0,0)(0,0,0)[0] : AIC=74813.034, Time=0.18 sec\n", + " ARIMA(3,0,0)(0,0,0)[0] : AIC=74364.052, Time=0.21 sec\n", + " ARIMA(4,0,0)(0,0,0)[0] : AIC=74074.892, Time=0.36 sec\n", + " ARIMA(5,0,0)(0,0,0)[0] : AIC=73959.511, Time=0.34 sec\n", + " ARIMA(5,0,1)(0,0,0)[0] : AIC=inf, Time=2.61 sec\n", + " ARIMA(4,0,1)(0,0,0)[0] : AIC=inf, Time=1.82 sec\n", + " ARIMA(5,0,0)(0,0,0)[0] intercept : AIC=73094.800, Time=0.63 sec\n", + " ARIMA(4,0,0)(0,0,0)[0] intercept : AIC=73094.769, Time=0.44 sec\n", + " ARIMA(3,0,0)(0,0,0)[0] intercept : AIC=73097.614, Time=0.31 sec\n", + " ARIMA(4,0,1)(0,0,0)[0] intercept : AIC=73096.761, Time=0.83 sec\n", + " ARIMA(3,0,1)(0,0,0)[0] intercept : AIC=73099.606, Time=0.55 sec\n", + " ARIMA(5,0,1)(0,0,0)[0] intercept : AIC=73096.799, Time=1.13 sec\n", + "\n", + "Best model: ARIMA(4,0,0)(0,0,0)[0] intercept\n", + "Total fit time: 10.937 seconds\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: An unsupported index was provided. As a result, forecasts cannot be generated. To use the model for forecasting, use one of the supported classes of index.\n", + " self._init_dates(dates, freq)\n", + "/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: An unsupported index was provided. As a result, forecasts cannot be generated. To use the model for forecasting, use one of the supported classes of index.\n", + " self._init_dates(dates, freq)\n", + "/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/base/tsa_model.py:473: ValueWarning: An unsupported index was provided. As a result, forecasts cannot be generated. To use the model for forecasting, use one of the supported classes of index.\n", + " self._init_dates(dates, freq)\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "ARIMA RMSE: 155.08416914567002\n", + "ARIMA MAE: 124.61616145375949\n", + "ARIMA MAPE: 2.6139414956995837\n", + "\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/base/tsa_model.py:837: ValueWarning: No supported index is available. Prediction results will be given with an integer index beginning at `start`.\n", + " return get_prediction_index(\n", + "/usr/local/lib/python3.10/dist-packages/statsmodels/tsa/base/tsa_model.py:837: FutureWarning: No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.\n", + " return get_prediction_index(\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# **RMSE PLOT**" + ], + "metadata": { + "id": "4eV6XvILHH2R" + } + }, + { + "cell_type": "code", + "source": [ + "metrics_df = pd.DataFrame(metrics)\n", + "\n", + "# RMSE Plot\n", + "plt.figure(figsize=(12, 6))\n", + "plt.bar(metrics_df['Model'], metrics_df['RMSE'], color='skyblue')\n", + "plt.title('RMSE for Different Models')\n", + "plt.xlabel('Model')\n", + "plt.ylabel('RMSE')\n", + "plt.xticks(rotation=45, ha='right')\n", + "plt.tight_layout()\n", + "plt.show()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 349 + }, + "id": "VKYiQHEoHKTW", + "outputId": "422ee235-af66-4364-c369-989aa88e6177" + }, + "execution_count": 76, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# **MAE Plot**" + ], + "metadata": { + "id": "a9OLdEnhIBpg" + } + }, + { + "cell_type": "code", + "source": [ + "# MAE Plot\n", + "plt.figure(figsize=(12, 6))\n", + "plt.bar(metrics_df['Model'], metrics_df['MAE'], color='lightgreen')\n", + "plt.title('MAE for Different Models')\n", + "plt.xlabel('Model')\n", + "plt.ylabel('MAE')\n", + "plt.xticks(rotation=45, ha='right')\n", + "plt.tight_layout()\n", + "plt.show()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 349 + }, + "id": "mx69_xSJIMbV", + "outputId": "bb6d2f65-57ab-4790-db94-1e6f57cb844f" + }, + "execution_count": 77, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# **MAPE Plot**" + ], + "metadata": { + "id": "vnoQkSzCIQ9C" + } + }, + { + "cell_type": "code", + "source": [ + "# MAPE Plot\n", + "plt.figure(figsize=(12, 6))\n", + "plt.bar(metrics_df['Model'], metrics_df['MAPE'], color='salmon')\n", + "plt.title('MAPE for Different Models')\n", + "plt.xlabel('Model')\n", + "plt.ylabel('MAPE')\n", + "plt.xticks(rotation=45, ha='right')\n", + "plt.tight_layout()\n", + "plt.show()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 349 + }, + "id": "Xeh4ofl-IXHp", + "outputId": "ec96fd12-3a26-4371-ae6b-b6369b88ff33" + }, + "execution_count": 78, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ] + } + ], + "metadata": { + "colab": { + "provenance": [], + "gpuType": "V28" + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "TPU" + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file