Skip to content

Commit

Permalink
Merge pull request #18 from denBruneBarone/decision_tree
Browse files Browse the repository at this point in the history
Decision tree
  • Loading branch information
denBruneBarone authored Mar 15, 2024
2 parents 162542a + ff0391a commit 0dae3a4
Show file tree
Hide file tree
Showing 13 changed files with 338 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import matplotlib.pyplot as plt

from data_processing.energy_consumption import datapoints_summation, trapeziod_integration
from data_processing.energy_consumption.trapeziod_integration import trapezoidal_integration


def read_csv():
Expand Down Expand Up @@ -36,7 +37,9 @@ def plot_stacked_chart(data_summing, data_integrated):


# data_summing = datapoints_summation.calculate_energy_by_summing(read_csv())
data_integrated = trapeziod_integration.integrate_flight_data(read_csv())
# data_integrated = trapeziod_integration.integrate_flight_data(read_csv())
data_integrated = trapeziod_integration.integrate_each_row_specific_flight_data(read_csv())
data_integrated = trapeziod_integration.add_cumulative_column(data_integrated)

print(f"CALCULATION ENERGY CONSUMPTION FOR ALL FLIGHTS")
print("")
Expand Down
79 changes: 69 additions & 10 deletions data_processing/energy_consumption/trapeziod_integration.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
import pandas as pd
from scipy.interpolate import interp1d


Expand All @@ -12,7 +13,7 @@ def calculate_power(data):
def interpolate_power(time_points, power_values):
"""Interpolate missing points in the power data."""
interpolator = interp1d(time_points, power_values, kind='linear', fill_value="extrapolate")
new_time_points = np.linspace(time_points.min(), time_points.max(), num=1000)
new_time_points = np.linspace(time_points.min(), time_points.max(), num=len(time_points))
return interpolator(new_time_points), new_time_points


Expand All @@ -27,16 +28,74 @@ def trapezoidal_integration(time_points, power_values):
return total_energy


def integrate_flight_data(data):
def integrate_flight_data(df):
"""Integrate power consumption for each flight and convert to watt-hours."""
time_points = df['time'].values
power_values = calculate_power(df)
interpolated_power, new_time_points = interpolate_power(time_points, power_values)
total_energy = trapezoidal_integration(new_time_points, interpolated_power)
# total_energy_wh = total_energy / 3600 # Convert joules to watt-hours
flight_energy = total_energy
return flight_energy




# TODO: Caasper fix this. Lige nu finder jeg ikke integralet ved interpolation. Jeg laver den udregning, som du brugte i starten.
def add_power_to_df(df):
df['power_consumption'] = df['battery_voltage'] * df['battery_current']
df = df.sort_values(by='time')
df['time_difference'] = df['time'].diff().fillna(0)
df['power_in_row'] = df['power_consumption'] * df['time_difference']
df['cumulative_power'] = np.cumsum(df['power_in_row'])
return df


def integrate_specific_flight_data(data):
"""Integrate power consumption for flight 1 and convert to watt-hours."""
flight_energy = {}
grouped_data = data.groupby('flight') # already grouped by Vi in ml
for flight, flight_data in grouped_data:
time_points = flight_data['time'].values
power_values = calculate_power(flight_data)
interpolated_power, new_time_points = interpolate_power(time_points, power_values)
total_energy = trapezoidal_integration(new_time_points, interpolated_power)
# total_energy_wh = total_energy / 3600 # Convert joules to watt-hours
flight_energy[flight] = total_energy
flight_data = data[data['flight'] == 1] # Filter data for flight 1
time_points = flight_data['time'].values
power_values = calculate_power(flight_data)
interpolated_power, new_time_points = interpolate_power(time_points, power_values)
total_energy = trapezoidal_integration(new_time_points, interpolated_power)
# total_energy_wh = total_energy / 3600 # Convert joules to watt-hours
flight_energy[1] = total_energy
return flight_energy


def integrate_each_row_specific_flight_data(data):
"""Integrate power consumption for flight 1 between each consecutive pair of rows and add integrated power to DataFrame."""
flight_data = data[data['flight'] == 1] # Filter data for flight 1
time_points = flight_data['time'].values
power_values = calculate_power(flight_data)
interpolated_power, new_time_points = interpolate_power(time_points, power_values)

# Initialize list to store integrated power values
integrated_power = []

# Integrate between each consecutive pair of rows
for i in range(len(interpolated_power) - 1):
time_diff = new_time_points[i + 1] - new_time_points[i]
energy_interval = (interpolated_power[i] + interpolated_power[i + 1]) / 2.0 * time_diff
integrated_power.append(energy_interval)

# Calculate integrated value for the last row separately
time_diff_last_row = new_time_points[-1] - new_time_points[-2]
energy_interval_last_row = interpolated_power[-1] * time_diff_last_row
integrated_power.append(energy_interval_last_row)

# Ensure that the length of integrated power matches the length of flight data
integrated_power.extend([None] * (len(flight_data) - len(integrated_power)))

# Add integrated power as a new column to the DataFrame
flight_data['integrated_power'] = integrated_power

return flight_data


def add_cumulative_column(data):
"""Add a cumulative sum column to the flight data."""
data['cumulative'] = data['integrated_power'].cumsum()
return data

8 changes: 4 additions & 4 deletions data_processing/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
notebook
pandas
pyarrow
matplotlib
notebook==7.1.1
pandas==2.2.1
pyarrow==15.0.1
matplotlib==3.8.3
10 changes: 0 additions & 10 deletions machine_learning/Model.py

This file was deleted.

21 changes: 12 additions & 9 deletions machine_learning/config.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
class ModelConfig:
input_size = 1000
embedding_dim = 100
hidden_size = 128
num_classes = 3
class HPConfig:
criterion = 'friedman_mse' # samme som paper
max_depth = 10 # værdien fra paperet om modeller er 7
max_features = None # samme som paper
max_leaf_nodes = 500 # værdien fra paperet om modeller er 10

class GridSearchConfig:
param_grid = {
'criterion': ['mse', 'friedman_mse', 'mae'],
'max_depth': [2, 3, 4, 5, 6, 7, 8],
'max_features': [None, 'sqrt', 'log2'],
'max_leaf_nodes': [2, 3, 4, 5, 6, 7, 8, 9, 10]
}

class TrainingConfig:
num_epochs = 10
batch_size = 32
learning_rate = 0.05
38 changes: 38 additions & 0 deletions machine_learning/grid_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pandas as pd
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.tree import DecisionTreeRegressor
from machine_learning.config import GridSearchConfig
from machine_learning.prepare_for_training import organize_data

def grid_search(array_of_df):
# n_splits: number of subsets,
# splits the train-val data into n_splits number of subsets for cross validation
decisionTree = DecisionTreeRegressor()
cv = KFold(n_splits = 5, shuffle = True, random_state = 42)

grid_search = GridSearchCV(estimator = decisionTree, param_grid = GridSearchConfig.param_grid,
cv = cv, scoring = 'friedman_mse')

flight_dict_list = organize_data(array_of_df)

# Extract features and target variable from flight_dict_list
X_train_list = [flight['data'] for flight in flight_dict_list]
y_train_list = [flight['power'] for flight in flight_dict_list]

# Convert lists of DataFrames/Series into a single DataFrame and Series
X_train = pd.concat(X_train_list, ignore_index=True)
y_train = pd.concat(y_train_list, ignore_index=True)

# Perform grid search
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_

best_regressor = DecisionTreeRegressor(**best_params)
best_regressor.fit(X_train, y_train)

return best_params, best_score, best_regressor



29 changes: 29 additions & 0 deletions machine_learning/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import os
from machine_learning.pre_processing import pre_process_and_split_data
from machine_learning.prepare_for_training import format_data
from machine_learning.training import training_and_evaluating


PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../"))
flights_processed = os.path.join(PROJECT_ROOT, "data/datasets/rodrigues/flights_processed.csv")


def train():
# pre_processing
print("Pre-processing data...")
input_file = os.path.join(PROJECT_ROOT, "data/datasets/rodrigues/flights_processed.csv")

# organizing
print("Splitting data...")
train_data, test_data = pre_process_and_split_data(input_file)
print("Formatting data...")
train_data = format_data(train_data)
test_data = format_data(test_data)

# training
print("Training...")
training_and_evaluating(train_data, test_data)


if __name__ == "__main__":
train()
27 changes: 21 additions & 6 deletions machine_learning/pre_processing.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,25 @@
def load_data () :
pass
import pandas as pd
from sklearn.model_selection import train_test_split

def extract_flights () :
pass

def load_data(file_path):
df = pd.read_csv(file_path, sep=',', low_memory=False)
return df

def split_data () :
pass

def extract_flights(df):
flights_list = [group for _, group in df.groupby('flight')]
return flights_list


def split_data(df, train_size=0.8, random_state=42):
flights_list = extract_flights(df)
train_data, test_data = train_test_split(flights_list, test_size=1-train_size, random_state=random_state)
return train_data, test_data


def pre_process_and_split_data(file_path):
df = load_data(file_path)
train_data, test_data = split_data(df)
return train_data, test_data

74 changes: 66 additions & 8 deletions machine_learning/prepare_for_training.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,69 @@
def target_variable_processing () :
pass
import numpy as np
from torch.utils.data import Dataset
from data_processing.energy_consumption.trapeziod_integration import add_power_to_df
from sklearn.preprocessing import StandardScaler
import pandas as pd

def pre_process_flights () :
pass

def organize_data () :
pass

class TrainingDataset:
pass
def __init__(self, data):
self.data = data
self.scaler = StandardScaler()
self.fit_scaler() # Laver og fitter en scaler ved initialisation.

# Kigger på alt given data, ikke bare en enkelt dataframe. Konstruerer en passende scaler.
def fit_scaler(self):
# Concatenate all DataFrames in self.data into a single DataFrame
df = pd.concat(self.data, ignore_index=True)

features = df[
['time', 'wind_speed', 'wind_angle',
'position_x', 'position_y', 'position_z',
'orientation_x', 'orientation_y', 'orientation_z', 'orientation_w',
'velocity_x', 'velocity_y', 'velocity_z',
'angular_x', 'angular_y', 'angular_z',
'linear_acceleration_x', 'linear_acceleration_y', 'linear_acceleration_z',
'payload']
].values

self.scaler.fit(features) # Fit the scaler on the entire training dataset

def __len__(self):
return len(self.data)

def __getitem__(self, index):
sample = self.data[index]

# input features
input_array = sample[
['time', 'wind_speed', 'wind_angle',
'position_x', 'position_y', 'position_z',
'orientation_x', 'orientation_y', 'orientation_z', 'orientation_w',
'velocity_x', 'velocity_y', 'velocity_z',
'angular_x', 'angular_y', 'angular_z',
'linear_acceleration_x', 'linear_acceleration_y', 'linear_acceleration_z',
'payload']
].values

# Normalize input med scaleren fra initialization
normalized_input = self.scaler.transform(input_array)

# Output/target feature
target_array = sample[
['battery_current', 'battery_voltage']
].values

return normalized_input, target_array


def format_data(array_of_df):
formatted_array = []
for df in array_of_df:
df['position_x'] = df['position_x'] - df['position_x'].iloc[0]
df['position_y'] = df['position_y'] - df['position_y'].iloc[0]
df['position_z'] = df['position_z'] - df['position_z'].iloc[0]

df = df.drop(columns=['flight', 'speed', 'altitude', 'date', 'time_day', 'route'])
df = add_power_to_df(df)
formatted_array.append(df)
return formatted_array
4 changes: 4 additions & 0 deletions machine_learning/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
torch==2.2.1
pandas==2.2.1
scikit-learn==1.4.1.post1
numpy
Loading

0 comments on commit 0dae3a4

Please sign in to comment.