Skip to content

Commit

Permalink
Merge pull request #25 from denBruneBarone/grid_search_cv
Browse files Browse the repository at this point in the history
Grid search cv
  • Loading branch information
Vi1999 authored Mar 18, 2024
2 parents e998430 + 8cb3ccc commit 76d773f
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 20 deletions.
6 changes: 6 additions & 0 deletions machine_learning/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ class HPConfig:
max_features = None # samme som paper
max_leaf_nodes = 500 # værdien fra paperet om modeller er 10

class BestHPConfig:
criterion = None
max_depth = None
max_features = None
max_leaf_nodes = None

class GridSearchConfig:
param_grid = {
'criterion': ['friedman_mse', 'squared_error'],
Expand Down
94 changes: 74 additions & 20 deletions machine_learning/training.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,84 @@
import numpy as np
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from machine_learning.config import HPConfig
from machine_learning.prepare_for_training import TrainingDataset
from sklearn.metrics import mean_squared_error
from machine_learning.config import HPConfig, GridSearchConfig
from sklearn.metrics import make_scorer


def train_model(train_data):
training_dataset = TrainingDataset(train_data)
def rmse_cum_power(true_labels, predicted_labels):
time_diff = np.diff(true_labels[:, 0], prepend=0)
true_cumulative_power = np.cumsum(predicted_labels[:, 0] * predicted_labels[:, 1] * time_diff)
test_targets_cumulative_power = np.cumsum(true_labels[:, 0] * true_labels[:, 1] * time_diff)

# Instantiate the decision tree model with specified hyperparameters
model = DecisionTreeRegressor(criterion=HPConfig.criterion, max_depth=HPConfig.max_depth,
max_features=HPConfig.max_features, max_leaf_nodes=HPConfig.max_leaf_nodes,
random_state=42)
rmse = np.sqrt(mean_squared_error(test_targets_cumulative_power, true_cumulative_power))
return rmse

# Extract features and targets from the training dataset
train_features = []
train_targets = []
for index in range(len(training_dataset)):
input_array, target_array = training_dataset[index]
train_features.append(input_array)
train_targets.append(target_array)

# Concatenate the lists along the appropriate axis
train_features_np = np.concatenate(train_features, axis=0)
train_targets_np = np.concatenate(train_targets, axis=0)
def custom_scoring(true_labels, predicted_labels):
return rmse_cum_power(true_labels, predicted_labels)

custom_scoring = make_scorer(custom_scoring)


def train_model(train_data, grid_search_cv=True):
if grid_search_cv:
training_dataset = TrainingDataset(train_data)

# Instantiate the decision tree model with specified hyperparameters
model = DecisionTreeRegressor()
# splits the train-test data into n_splits number of subsets for cross validation
cv = KFold(n_splits=5, shuffle=True, random_state=42) # TODO best n_split?
grid_search = GridSearchCV(estimator=model, param_grid=GridSearchConfig.param_grid,
cv=cv, scoring=custom_scoring, verbose=2)

train_features = []
train_targets = []

for index in range(len(training_dataset)):
input_array, target_array = training_dataset[index]
train_features.append(input_array)
train_targets.append(target_array)

# Fit the decision tree model
model.fit(train_features_np, train_targets_np)
# Concatenate the lists along the appropriate axis
train_features_np = np.concatenate(train_features, axis=0)
train_targets_np = np.concatenate(train_targets, axis=0)

# Fit the decision tree model
grid_search.fit(train_features_np, train_targets_np)
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Params: ", best_params)
print("Best score: ", best_score)

best_dt_model = grid_search.best_estimator_
best_dt_model.fit(train_features_np, train_targets_np)
# TODO: Tilføj print detaljer

else:
training_dataset = TrainingDataset(train_data)

# Instantiate the decision tree model with specified hyperparameters
model = DecisionTreeRegressor(criterion=HPConfig.criterion, max_depth=HPConfig.max_depth,
max_features=HPConfig.max_features, max_leaf_nodes=HPConfig.max_leaf_nodes,
random_state=42)

# Extract features and targets from the training dataset
train_features = []
train_targets = []
for index in range(len(training_dataset)):
input_array, target_array = training_dataset[index]
train_features.append(input_array)
train_targets.append(target_array)

# Concatenate the lists along the appropriate axis
train_features_np = np.concatenate(train_features, axis=0)
train_targets_np = np.concatenate(train_targets, axis=0)

# Fit the decision tree model
model.fit(train_features_np, train_targets_np)

return model

Expand All @@ -47,6 +99,8 @@ def evaluate_model(model, test_data):
test_features_np = np.concatenate(test_features, axis=0)
test_targets_np = np.concatenate(test_targets, axis=0)

model.fit(test_features_np, test_targets_np)

# Predict on the test set
test_predictions = model.predict(test_features_np)

Expand Down

0 comments on commit 76d773f

Please sign in to comment.