Skip to content

Commit

Permalink
Merge pull request #36 from rissangs/main
Browse files Browse the repository at this point in the history
Update script for test model
  • Loading branch information
wang-rui authored Nov 29, 2020
2 parents ab651ca + 7aa5374 commit 6fbc3d8
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 2 deletions.
Binary file added results/best_Model.pkl
Binary file not shown.
Binary file added results/final_model_quality.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 2 additions & 2 deletions src/fit_wine_quality_predict_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ def store_cross_val_results(model_name, scores, results_dict):
def main(in_file_1, out_dir):
# read data and combine two data set vertically
train_df = pd.read_csv(in_file_1)
X_train = train_df.drop(columns = ['quality'])
y_train = train_df['quality']
X_train = train_df.drop(columns = ['quality','quality_rank' ])
y_train = train_df['quality_rank']

#-----------------------------------------------------------------------------------------------------------------------------
#PreProcessor
Expand Down
90 changes: 90 additions & 0 deletions src/wine_quality_test_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# author: UBC Master of Data Science - Group 33
# date: 2020-11-26


"""Pre-processing wine quality data for red wine(https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv) and
wine quality data for white wine(https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv).
Usage: src/wine_quality_test_results_test.py --in_file_1=<in_file_1> --in_file_2=<in_file_2> --out_dir=<out_dir>
Options:
--in_file_1=<in_file_1> Path (including file name) to the processed train data
--in_file_2=<in_file_2> Path (including file name) to the processed test data
--out_dir=<out_dir> Path (excluding file name) to save the confusion matrix
"""
import os
from docopt import docopt
import string
from collections import deque

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# data
from sklearn import datasets
from sklearn.compose import ColumnTransformer, make_column_transformer
from sklearn.dummy import DummyClassifier, DummyRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.feature_extraction.text import CountVectorizer

# Feature selection
from sklearn.feature_selection import RFE, RFECV
from sklearn.impute import SimpleImputer

# classifiers / models
from sklearn.linear_model import RidgeClassifier
from sklearn.linear_model import LogisticRegression

# other
from sklearn.metrics import accuracy_score, log_loss, make_scorer, mean_squared_error, confusion_matrix
from sklearn.model_selection import (
GridSearchCV,
RandomizedSearchCV,
ShuffleSplit,
cross_val_score,
cross_validate,
train_test_split,
)
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import (
OneHotEncoder,
OrdinalEncoder,
PolynomialFeatures,
StandardScaler,
)
from sklearn.neural_network import MLPClassifier
import joblib
from sklearn.metrics import (plot_confusion_matrix)



opt = docopt(__doc__)

def main(in_file_1, in_file_2, out_dir):
# read data and splitting it
train_df = pd.read_csv(in_file_1)
test_df = pd.read_csv(in_file_2)

X_train = train_df.drop(columns = ['quality','quality_rank'], axis=1)
y_train = train_df['quality_rank']
X_test = test_df.drop(columns = ['quality','quality_rank'], axis=1)
y_test = test_df['quality_rank']


#---------------------------------------------------------------------------------------------------------
# Testing out model

best_model_pipe = joblib.load("results/best_Model.pkl")
best_model_pipe.fit(X_train, y_train)
best_model_pipe.score(X_test, y_test)

plot_confusion_matrix(best_model_pipe, X_test, y_test, values_format = 'd', cmap = plt.cm.Blues)
predictions_m = best_model_pipe.predict(X_test)
cm = confusion_matrix(y_test, predictions_m)
path_f = out_dir + "final_model_quality.png"
plt.savefig(path_f)


if __name__ == "__main__":
main(opt["--in_file_1"], opt["--in_file_2"], opt["--out_dir"])

0 comments on commit 6fbc3d8

Please sign in to comment.