From 0732c9951e7dd3cfd3373712ac1195fb756e0ec3 Mon Sep 17 00:00:00 2001 From: spandana2004 <110383668+spandana2004@users.noreply.github.com> Date: Wed, 16 Oct 2024 22:20:54 +0530 Subject: [PATCH] streamlit web app --- stock_price_prediction_app.py | 117 ++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 stock_price_prediction_app.py diff --git a/stock_price_prediction_app.py b/stock_price_prediction_app.py new file mode 100644 index 0000000..86c6e1e --- /dev/null +++ b/stock_price_prediction_app.py @@ -0,0 +1,117 @@ +import pandas as pd +import numpy as np +from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score +from sklearn.linear_model import LinearRegression +from sklearn.svm import SVR +from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor +from sklearn.tree import DecisionTreeRegressor +from sklearn.neighbors import KNeighborsRegressor +from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error +from sklearn.preprocessing import MinMaxScaler +import xgboost as xgb +import matplotlib.pyplot as plt +import streamlit as st +from sklearn.impute import SimpleImputer + +# Load and preprocess the dataset +@st.cache_data +def load_data(): + df = pd.read_csv('SBIN.csv') + df.drop(['Date', 'Adj Close'], axis=1, inplace=True) # Drop irrelevant columns + + # Handle missing values + imputer = SimpleImputer(strategy='mean') + df = pd.DataFrame(imputer.fit_transform(df), columns=df.columns) + + # Calculate VWAP + df['VWAP'] = (df['Close'] * df['Volume']).cumsum() / df['Volume'].cumsum() + + return df + +# Train the models +def train_models(df): + # Select features and target variable + X = df[['Open', 'High', 'Low', 'Volume', 'VWAP']] + y = df['Close'] + + # Split the data into training and testing sets + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + # Scale the features + scaler = MinMaxScaler() + X_train_scaled = scaler.fit_transform(X_train) + X_test_scaled = scaler.transform(X_test) + + models = { + 'Linear Regression': LinearRegression(), + 'SVR': SVR(), + 'Random Forest': RandomForestRegressor(), + 'Gradient Boosting': GradientBoostingRegressor(), + 'XGBoost': xgb.XGBRegressor(), + 'AdaBoost': AdaBoostRegressor(), + 'Decision Tree': DecisionTreeRegressor(), + 'KNeighbors': KNeighborsRegressor() + } + + trained_models = {} + + for name, model in models.items(): + model.fit(X_train_scaled, y_train) + trained_models[name] = model + + return trained_models, scaler, X_test_scaled, y_test + +# Load data and train models +df = load_data() +trained_models, scaler, X_test_scaled, y_test = train_models(df) + +# Streamlit app layout +st.title("Stock Price Prediction App") + +# User input for stock features +open_price = st.number_input("Open Price:", min_value=0.0, format="%.2f") +high_price = st.number_input("High Price:", min_value=0.0, format="%.2f") +low_price = st.number_input("Low Price:", min_value=0.0, format="%.2f") +volume = st.number_input("Volume:", min_value=0, format="%d") + +# Calculate VWAP for user input +vwap = (open_price + high_price + low_price) / 3 # Simplified VWAP calculation + +# Dropdown for model selection +model_selection = st.selectbox("Select Model for Prediction:", options=list(trained_models.keys())) + +# Button to predict +if st.button("Predict Closing Price"): + input_data = np.array([[open_price, high_price, low_price, volume, vwap]]) + input_scaled = scaler.transform(input_data) + + # Make prediction using the selected model + model = trained_models[model_selection] + prediction = model.predict(input_scaled) + + # Display the prediction + st.success(f"The predicted closing price is: ${prediction[0]:.2f}") + +# Optional: Visualize KNN Hyperparameter Tuning +if st.checkbox("Show KNN Hyperparameter Tuning Plot"): + k_values = range(1, 31) # Example range for k + mse_values = [] + + X = df[['Open', 'High', 'Low', 'Volume', 'VWAP']] + y = df['Close'] + + for k in k_values: + knn_model = KNeighborsRegressor(n_neighbors=k) + mse = -cross_val_score(knn_model, scaler.transform(X), y, cv=5, scoring='neg_mean_squared_error').mean() + mse_values.append(mse) + + plt.figure(figsize=(10, 6)) + plt.plot(k_values, mse_values, marker='o', linestyle='-') + plt.title('KNN Hyperparameter Tuning: MSE vs. Number of Neighbors') + plt.xlabel('Number of Neighbors (k)') + plt.ylabel('Mean Squared Error (MSE)') + plt.xticks(k_values) # Show all k values on x-axis + plt.grid() + + # Show the plot in Streamlit + st.pyplot(plt)