Skip to content

Commit

Permalink
Merge pull request #43 from shyan0903/main
Browse files Browse the repository at this point in the history
Reorganize structure and change makefile
  • Loading branch information
shyan0903 authored Dec 4, 2021
2 parents 38cdd2c + 2f94e77 commit 221c3a2
Show file tree
Hide file tree
Showing 32 changed files with 4,004 additions and 2,657 deletions.
53 changes: 28 additions & 25 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# Makefile
# Allyson Stoll, Dec 2021
# Allyson Stoll, Irene Yan
# Dec 4, 2021

# This Makefile completes data download and preprocessing
# prior to EDA and model building of a ramen rating model
# based on ramen ratings from The Ramen Rater.

# Example usage:
# make data/raw/ramen_ratings.csv
# make all

#make all dependencies
all : doc/Report.html
Expand All @@ -17,57 +18,59 @@ data/raw/ramen_ratings.csv : src/download_data.py
--out_file="data/raw/ramen_ratings.csv"

# split the dataset
data/process/train_df.csv data/process/test_df.csv : data/raw/ramen_ratings.csv src/split.py
python3.9 src/split.py --path="data/raw/ramen_ratings.csv" --out_file_train="data/process/train_df.csv" \
--out_file_test="data/process/test_df.csv"
data/processed/train_df.csv data/processed/test_df.csv : data/raw/ramen_ratings.csv src/split.py
python3.9 src/split.py --path="data/raw/ramen_ratings.csv" --out_file_train="data/processed/train_df.csv" \
--out_file_test="data/processed/test_df.csv"

# add country codes to the dataset
data/process/train_codes_df.csv : data/process/train_df.csv src/get_countrycode_data.py
python3.9 src/get_countrycode_data.py --path="data/process/train_df.csv" \
--out_file="data/eda/train_codes_df.csv"
data/processed/train_codes_df.csv : data/processed/train_df.csv src/get_countrycode_data.py
python3.9 src/get_countrycode_data.py --path="data/processed/train_df.csv" \
--out_file="data/processed/train_codes_df.csv"

# complete EDA figures
results/figures/stars_histogram.png results/figures/type_histogram.png \
results/figures/variety_wordcloud.png results/figures/ramen_map.png : data/eda/train_codes_df.csv src/generate_EDA_figures.py
python3.9 src/generate_EDA_figures.py --path="data/eda/train_codes_df.csv" --out_path="results/figures/"
results/figures/variety_wordcloud.png results/figures/ramen_map.png : data/processed/train_codes_df.csv src/generate_EDA_figures.py
python3.9 src/generate_EDA_figures.py --path="data/processed/train_codes_df.csv" --out_path="results/figures/"

# preprocess data for modeling
data/process/train_process.csv data/process/test_process.csv : \
data/process/train_df.csv data/process/test_df.csv src/preprocess.py
python3.9 src/preprocess.py --train_path="data/process/train_df.csv" --test_path="data/process/test_df.csv" \
--out_file_train="data/process/train_process.csv" --out_file_test="data/process/test_process.csv"
data/processed/train_processed.csv data/processed/test_processed.csv : \
data/processed/train_df.csv data/processed/test_df.csv src/preprocess.py
python3.9 src/preprocess.py --train_path="data/processed/train_df.csv" --test_path="data/processed/test_df.csv" \
--out_file_train="data/processed/train_processed.csv" --out_file_test="data/processed/test_processed.csv"

# create model and supporting figures
results/best_model.pkl results/train_metrics.jpg results/Top_20_Good_features.csv results/Top_20_Bad_features.csv : \
data/process/train_process.csv src/train_model.py
python3.9 src/train_model.py --train_file="data/process/train_process.csv" --out_file_train="results/best_model.pkl" \
data/processed/train_processed.csv src/train_model.py
python3.9 src/train_model.py --train_file="data/processed/train_processed.csv" --out_file_train="results/best_model.pkl" \
--out_file_result="results/"

# test model predictions
results/prediction/prediction.csv results/test_metrics.jpg : data/process/test_process.csv results/best_model.pkl src/predict.py
python3.9 src/predict.py --test_file="data/process/test_process.csv" --model_file="results/best_model.pkl" \
results/prediction/prediction.csv results/prediction/test_metrics.jpg : data/processed/test_processed.csv results/best_model.pkl src/predict.py
python3.9 src/predict.py --test_file="data/processed/test_processed.csv" --model_file="results/best_model.pkl" \
--out_file_result="results/prediction/"

# write the report
doc/Report.html : results/prediction/prediction.csv results/test_metrics.jpg \
doc/Report.html : results/prediction/prediction.csv results/prediction/test_metrics.jpg \
results/figures/stars_histogram.png results/figures/type_histogram.png \
results/figures/variety_wordcloud.png results/figures/ramen_map.png
Rscript -e "rmarkdown::render('doc/Report.Rmd')"

# remove the entire analysis
clean :
rm -rf data/raw/ramen_ratings.csv
rm -rf data/process/train_df.csv
rm -rf data/process/test_df.csv
rm -rf data/process/train_codes_df.csv
rm -rf data/processed/train_df.csv
rm -rf data/processed/test_df.csv
rm -rf data/processed/train_codes_df.csv
rm -rf results/figures/stars_histogram.png
rm -rf results/figures/type_histogram.png
rm -rf results/figures/variety_wordcloud.png
rm -rf results/figures/ramen_map.png
rm -rf data/process/train_process.csv
rm -rf data/process/test_process.csv
rm -rf data/processed/train_processed.csv
rm -rf data/processed/test_processed.csv
rm -rf results/best_model.pkl
rm -rf results/train_metrics.jpg
rm -rf results/Top_20_Good_features.csv
rm -rf results/Top_20_Bad_features.csv
rm -rf results/prediction/prediction.csv
rm -rf results/test_metrics.jpg
rm -rf results/prediction/test_metrics.jpg
rm -rf doc/Report.html
Loading

0 comments on commit 221c3a2

Please sign in to comment.