diff --git a/.gitignore b/.gitignore
index 062e76e8..07e7bf65 100644
--- a/.gitignore
+++ b/.gitignore
@@ -169,3 +169,4 @@ tmp/
 .gitignore
 pipeline_debug.json
 inference-time-data*
+optuna_studies.db
diff --git a/hordelib/train.py b/hordelib/train.py
deleted file mode 100644
index fb141cf0..00000000
--- a/hordelib/train.py
+++ /dev/null
@@ -1,384 +0,0 @@
-# type: ignore
-#
-# Train a predictive model from horde payload inputs to predict inference time.
-#
-# Supports multi-processing, just run this multiple times and the processes will
-# automatically work together on the training. We are training with torch and searching
-# through network hyper parameters using Optuna.
-#
-# Requires two input files (both exactly the same format) which can be created by enabling
-# the SAVE_KUDOS_TRAINING_DATA constant in the worker.
-#   - inference-time-data.json
-#   - inference-time-data-validation.json
-#
-# The output is a series of model checkpoints, "kudos_models/kudos-X-n.ckpt" Where n is the
-# number of the trial and X is the study version. Once the best trial number is identified
-# simply select the appropriate file.
-#
-# The stand-alone class in examples/kudos.py is the code to actually use the model.
-#
-# Requires also a local mysql database named "optuna" and assumes it can connect
-# with user "root" password "root". Change to your needs.
-#
-# For visualisation with optuna dashboard:
-#   optuna-dashboard mysql://root:root@localhost/optuna
-#
-# This is a quick hack to assist with kudos calculation.
-import math
-import os
-import random
-import sys
-import time
-
-import torch
-import torch.nn as nn
-from torch import optim
-from torch.utils.data import DataLoader, Dataset
-
-import hordelib
-
-hordelib.initialise()
-import json
-import pickle
-
-from hordelib.horde import HordeLib
-
-ENABLE_TRAINING = False
-
-if ENABLE_TRAINING:
-    import optuna
-
-
-random.seed()
-
-# Database connection string for Optuna - don't use root :)
-DB_CONNECTION_STRING = "mysql://root:root@localhost/optuna"
-
-# Where is our training data?
-TRAINING_DATA_FILENAME = "f:/ai/dev/AI-Horde-Worker/inference-time-data.json"
-VALIDATION_DATA_FILENAME = "f:/ai/dev/AI-Horde-Worker/inference-time-data-validation.json"
-
-# Number of trials to run.
-# Each trial generates a new neural network topology with new hyper parameters and trains it.
-NUMBER_OF_STUDY_TRIALS = 2000
-
-# The version number of our study. Bump for different model versions.
-STUDY_VERSION = "v21"
-
-# Hyper parameter search bounds
-MIN_NUMBER_OF_EPOCHS = 50
-MAX_NUMBER_OF_EPOCHS = 2000
-MAX_HIDDEN_LAYERS = 6
-MIN_NODES_IN_LAYER = 4
-MAX_NODES_IN_LAYER = 128
-MIN_LEARNING_RATE = 1e-5
-MAX_LEARNING_RATE = 1e-1
-MIN_WEIGHT_DECAY = 1e-6
-MAX_WEIGHT_DECAY = 1e-1
-MIN_DATA_BATCH_SIZE = 32
-MAX_DATA_BATCH_SIZE = 512
-
-# The study sampler to use
-if ENABLE_TRAINING:
-    OPTUNA_SAMPLER = optuna.samplers.TPESampler(n_startup_trials=30, n_ei_candidates=30)
-    # OPTUNA_SAMPLER = optuna.samplers.NSGAIISampler()  # genetic algorithm
-
-# We have the following 14 inputs to our kudos calculation, for example:
-PAYLOAD_EXAMPLE = {
-    "height": 576,
-    "width": 1024,
-    "ddim_steps": 35,
-    "cfg_scale": 9.0,
-    "denoising_strength": 0.75,
-    "control_strength": 1.0,
-    "karras": True,
-    "hires_fix": False,
-    "source_image": False,
-    "source_mask": False,
-    "source_processing": "txt2img",
-    "sampler_name": "k_dpm_2_a",
-    "control_type": "canny",
-    "post_processing": ["RealESRGAN_x4plus", "CodeFormers"],
-}
-# And one output
-# "time": 13.2032
-
-
-KNOWN_POST_PROCESSORS = [
-    "RealESRGAN_x4plus",
-    "RealESRGAN_x2plus",
-    "RealESRGAN_x4plus_anime_6B",
-    "NMKD_Siax",
-    "4x_AnimeSharp",
-    "strip_background",
-    "GFPGAN",
-    "CodeFormers",
-]
-KNOWN_SAMPLERS = sorted(set(HordeLib.SAMPLERS_MAP.keys()))
-KNOWN_CONTROL_TYPES = list(set(HordeLib.CONTROLNET_IMAGE_PREPROCESSOR_MAP.keys()))
-KNOWN_CONTROL_TYPES.append("None")
-KNOWN_CONTROL_TYPES.sort()
-KNOWN_SOURCE_PROCESSING = HordeLib.SOURCE_IMAGE_PROCESSING_OPTIONS[:]
-KNOWN_SOURCE_PROCESSING.append("txt2img")
-KNOWN_SOURCE_PROCESSING.sort()
-
-
-# This is an example of how to use the final model, pass in a horde payload, get back a predicted time in seconds
-def payload_to_time(model, payload):
-    inputs = KudosDataset.payload_to_tensor(payload).squeeze()
-    with torch.no_grad():
-        output = model(inputs)
-    return round(float(output.item()), 2)
-
-
-# This is how to load the model required above
-def load_model(model_filename):
-    with open(model_filename, "rb") as infile:
-        return pickle.load(infile)
-
-
-# This is just an helper for walking through the validation dataset one line at a time
-# and using the methods above to calculate an overall average percentage accuracy
-def test_one_by_one(model_filename):
-    dataset = []
-    with open(VALIDATION_DATA_FILENAME) as infile:
-        while line := infile.readline():
-            dataset.append(json.loads(line))
-
-    model = load_model(model_filename)
-
-    perc = []
-    total_job_time = 0
-    total_time = 0
-    for data in dataset:
-        model_time = time.perf_counter()
-        predicted = payload_to_time(model, data)
-        total_time += time.perf_counter() - model_time
-        actual = round(data["time"], 2)
-        total_job_time += data["time"]
-
-        diff = abs(actual - predicted)
-        max_val = max(actual, predicted)
-        percentage_accuracy = (1 - diff / max_val) * 100
-
-        perc.append(percentage_accuracy)
-        # Print the data if very inaccurate prediction
-        if percentage_accuracy < 60:
-            print(data)
-        print(f"{predicted} predicted, {actual} actual ({round(percentage_accuracy, 1)}%)")
-
-    avg_perc = round(sum(perc) / len(perc), 1)
-    print(f"Average kudos calculation time {round((total_time*1000000)/len(perc))} micro-seconds")
-    print(f"Average actual job time in the dataset {round(total_job_time/len(perc), 2)} seconds")
-    print(f"Average accuracy = {avg_perc}%")
-
-
-class KudosDataset(Dataset):
-    def __init__(self, filename):
-        self.data = []
-        self.labels = []
-        with open(filename) as infile:
-            while line := infile.readline().strip():
-                line = json.loads(line)
-                self.data.append(KudosDataset.payload_to_tensor(line)[0])
-                self.labels.append(line["time"])
-
-        self.labels = torch.tensor(self.labels).float()
-        self.mixed_data = torch.stack(self.data)
-
-    @classmethod
-    def payload_to_tensor(cls, payload):
-        data = []
-        data_samplers = []
-        data_control_types = []
-        data_source_processing_types = []
-        data_post_processors = []
-        data.append(
-            [
-                payload["height"] / 1024,
-                payload["width"] / 1024,
-                payload["ddim_steps"] / 100,
-                payload["cfg_scale"] / 30,
-                payload.get("denoising_strength", 1.0),
-                payload.get("control_strength", payload.get("denoising_strength", 1.0)),
-                1.0 if payload["karras"] else 0.0,
-                1.0 if payload.get("hires_fix", False) else 0.0,
-                1.0 if payload.get("source_image", False) else 0.0,
-                1.0 if payload.get("source_mask", False) else 0.0,
-            ],
-        )
-        data_samplers.append(payload["sampler_name"] if payload["sampler_name"] in KNOWN_SAMPLERS else "k_euler")
-        data_control_types.append(payload.get("control_type", "None"))
-        data_source_processing_types.append(payload.get("source_processing", "txt2img"))
-        data_post_processors = payload.get("post_processing", [])[:]
-
-        _data_floats = torch.tensor(data).float()
-        _data_samplers = cls.one_hot_encode(data_samplers, KNOWN_SAMPLERS)
-        _data_control_types = cls.one_hot_encode(data_control_types, KNOWN_CONTROL_TYPES)
-        _data_source_processing_types = cls.one_hot_encode(data_source_processing_types, KNOWN_SOURCE_PROCESSING)
-        _data_post_processors = cls.one_hot_encode_combined(data_post_processors, KNOWN_POST_PROCESSORS)
-        return torch.cat(
-            (_data_floats, _data_samplers, _data_control_types, _data_source_processing_types, _data_post_processors),
-            dim=1,
-        )
-
-    @classmethod
-    def one_hot_encode(cls, strings, unique_strings):
-        one_hot = torch.zeros(len(strings), len(unique_strings))
-        for i, string in enumerate(strings):
-            one_hot[i, unique_strings.index(string)] = 1
-        return one_hot
-
-    @classmethod
-    def one_hot_encode_combined(cls, strings, unique_strings):
-        one_hot = torch.zeros(len(strings), len(unique_strings))
-        for i, string in enumerate(strings):
-            one_hot[i, unique_strings.index(string)] = 1
-
-        return torch.sum(one_hot, dim=0, keepdim=True)
-
-    def __len__(self):
-        return len(self.data)
-
-    def __getitem__(self, idx):
-        return self.mixed_data[idx], self.labels[idx]
-
-
-if ENABLE_TRAINING:
-
-    def create_sequential_model(trial, layer_sizes, input_size, output_size=1):
-        # Define the layer sizes
-        layer_sizes = [input_size] + layer_sizes + [output_size]
-
-        # Create the layers and activation functions
-        layers = []
-        for i in range(len(layer_sizes) - 1):
-            layers.append(nn.Linear(layer_sizes[i], layer_sizes[i + 1]))
-            if i < len(layer_sizes) - 2:
-                layers.append(nn.ReLU())  # Use ReLU activation for all layers except the last one
-                # Add a dropout layer
-                if i > 0:
-                    drop = trial.suggest_float(f"dropout_l{i}", 0.05, 0.2, log=True)
-                    layers.append(nn.Dropout(drop))
-
-        # Create the nn.Sequential model
-        return nn.Sequential(*layers)
-
-    def objective(trial):
-        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        trial.set_user_attr("name", "predict_kudos")
-
-        # Network topology
-        input_size = len(KudosDataset.payload_to_tensor(PAYLOAD_EXAMPLE)[0])
-        num_hidden_layers = trial.suggest_int("hidden_layers", 1, MAX_HIDDEN_LAYERS, log=True)
-        layers = []
-        for i in range(num_hidden_layers):
-            layers.append(
-                trial.suggest_int(f"hidden_layer_{i}_size", MIN_NODES_IN_LAYER, MAX_NODES_IN_LAYER, log=True),
-            )
-        output_size = 1  # we want just the predicted time in seconds
-
-        # Create the network
-        model = create_sequential_model(trial, layers, input_size, output_size).to(device)
-
-        # Optimiser
-        optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
-        lr = trial.suggest_float("lr", MIN_LEARNING_RATE, MAX_LEARNING_RATE, log=True)
-        weight_decay = trial.suggest_float("weight_decay", MIN_WEIGHT_DECAY, MAX_WEIGHT_DECAY, log=True)
-
-        optimizer = None
-
-        if optimizer_name == "Adam":
-            optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
-        elif optimizer_name == "RMSprop":
-            optimizer = optim.RMSprop(model.parameters(), lr=lr, weight_decay=weight_decay)
-        elif optimizer_name == "SGD":
-            optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay)
-
-        if optimizer is None:
-            raise Exception("Unknown optimizer")
-
-        # Load training dataset
-        train_dataset = KudosDataset(TRAINING_DATA_FILENAME)
-        batch_start = int(math.ceil(math.log2(MIN_DATA_BATCH_SIZE)))
-        batch_end = int(math.floor(math.log2(MAX_DATA_BATCH_SIZE)))
-        batch_sizes = [2**i for i in range(batch_start, batch_end + 1)]
-        batch = trial.suggest_categorical("batch_size", batch_sizes)
-        train_loader = DataLoader(train_dataset, batch_size=batch, shuffle=True)
-
-        # Load the validation dataset
-        validate_dataset = KudosDataset(VALIDATION_DATA_FILENAME)
-        validate_loader = DataLoader(validate_dataset, batch_size=64, shuffle=True)
-
-        # Loss function
-        criterion = nn.MSELoss()
-
-        num_epochs = trial.suggest_int("num_epochs", MIN_NUMBER_OF_EPOCHS, MAX_NUMBER_OF_EPOCHS)
-        total_loss = None
-        for _ in range(num_epochs):
-            # Train the model
-            model.train()
-            for data, labels in train_loader:
-                data = data.to(device)
-                labels = labels.to(device)
-                optimizer.zero_grad()
-                labels = labels.unsqueeze(1)
-                outputs = model(data)
-                loss = criterion(outputs, labels)
-                loss.backward()
-                optimizer.step()
-
-            model.eval()
-            total_loss = 0
-            with torch.no_grad():
-                for data, labels in validate_loader:
-                    data = data.to(device)
-                    labels = labels.to(device)
-                    outputs = model(data)
-                    labels = labels.unsqueeze(1)
-                    loss = criterion(outputs, labels)
-                    total_loss += loss
-
-            total_loss /= len(validate_loader)
-            total_loss = round(float(total_loss), 2)
-
-        # Pickle it as we'll forget the model architecture
-        filename = f"kudos_models/kudos-{STUDY_VERSION}-{trial.number}.ckpt"
-        with open(filename, "wb") as outfile:
-            pickle.dump(model.to("cpu"), outfile)
-
-        return total_loss
-
-
-if __name__ == "__main__":
-    if len(sys.argv) > 1:
-        test_one_by_one(sys.argv[1])
-        exit(0)
-
-    if not ENABLE_TRAINING:
-        exit(0)
-
-    # Make our model output dir
-    os.makedirs("kudos_models", exist_ok=True)
-
-    study = optuna.create_study(
-        direction="minimize",
-        study_name=f"kudos_model_{STUDY_VERSION}",
-        storage=DB_CONNECTION_STRING,
-        load_if_exists=True,
-        sampler=OPTUNA_SAMPLER,
-    )
-    study.optimize(objective, n_trials=NUMBER_OF_STUDY_TRIALS)
-
-    # Print the best hyperparameters
-    print("Best trial:")
-    trial = study.best_trial
-    print("Value: ", trial.value)
-    print("Params: ")
-    for key, value in trial.params.items():
-        print(f"{key}: {value}")
-
-    # Calculate the accuracy of the best model
-    best_filename = f"kudos_models/kudos-{STUDY_VERSION}-{trial.number}.ckpt"
-    model = test_one_by_one(best_filename)
-    print(f"Best model file is: {best_filename}")
diff --git a/train.py b/train.py
new file mode 100644
index 00000000..c6e22827
--- /dev/null
+++ b/train.py
@@ -0,0 +1,528 @@
+# type: ignore
+#
+# Train a predictive model from horde payload inputs to predict inference time.
+#
+# Supports multi-processing, just run this multiple times and the processes will
+# automatically work together on the training. We are training with torch and searching
+# through network hyper parameters using Optuna.
+#
+# Requires two input files (both exactly the same format) which can be created by enabling
+# the SAVE_KUDOS_TRAINING_DATA constant in the worker.
+#   - inference-time-data.json
+#   - inference-time-data-validation.json
+#
+# The output is a series of model checkpoints, "kudos_models/kudos-X-n.ckpt" Where n is the
+# number of the trial and X is the study version. Once the best trial number is identified
+# simply select the appropriate file.
+#
+# The stand-alone class in examples/kudos.py is the code to actually use the model.
+#
+# Requires also a local mysql database named "optuna" and assumes it can connect
+# with user "root" password "root". Change to your needs.
+#
+# For visualisation with optuna dashboard:
+#   optuna-dashboard mysql://root:root@localhost/optuna
+#
+# This is a quick hack to assist with kudos calculation.
+import argparse
+import json
+import math
+import os
+import random
+import time
+
+import torch
+import torch.nn as nn
+from torch import optim
+from torch.utils.data import DataLoader, Dataset
+
+import hordelib
+
+hordelib.initialise()
+import pickle
+
+import optuna
+
+from hordelib.horde import HordeLib
+
+random.seed()
+
+# Database connection string for Optuna - don't use root :)
+DB_CONNECTION_STRING = "mysql://root:root@localhost/optuna"
+
+# # Where is our training data?
+# TRAINING_DATA_FILENAME = "f:/ai/dev/AI-Horde-Worker/inference-time-data.json"
+# VALIDATION_DATA_FILENAME = "f:/ai/dev/AI-Horde-Worker/inference-time-data-validation.json"
+
+# Number of trials to run.
+# Each trial generates a new neural network topology with new hyper parameters and trains it.
+NUMBER_OF_STUDY_TRIALS = 2000
+
+# The version number of our study. Bump for different model versions.
+STUDY_VERSION = "v21"
+
+# Hyper parameter search bounds
+MIN_NUMBER_OF_EPOCHS = 50
+MAX_NUMBER_OF_EPOCHS = 2000
+MAX_HIDDEN_LAYERS = 6
+MIN_NODES_IN_LAYER = 4
+MAX_NODES_IN_LAYER = 128
+MIN_LEARNING_RATE = 1e-5
+MAX_LEARNING_RATE = 1e-1
+MIN_WEIGHT_DECAY = 1e-6
+MAX_WEIGHT_DECAY = 1e-1
+MIN_DATA_BATCH_SIZE = 32
+MAX_DATA_BATCH_SIZE = 512
+
+# The study sampler to use
+OPTUNA_SAMPLER = optuna.samplers.TPESampler(n_startup_trials=30, n_ei_candidates=30)
+# OPTUNA_SAMPLER = optuna.samplers.NSGAIISampler()  # genetic algorithm
+
+# We have the following inputs to our kudos calculation, for example:
+PAYLOAD_EXAMPLE = {
+    "sdk_api_job_info": {
+        "id_": "7ba3b75b-6926-4e78-ad42-6763fa15c262",
+        "ids": ["7ba3b75b-6926-4e78-ad42-6763fa15c262"],
+        "payload": {
+            "sampler_name": "k_euler",
+            "cfg_scale": 24.0,
+            "denoising_strength": None,
+            "seed": "2066405361",
+            "height": 1024,
+            "width": 768,
+            "seed_variation": None,
+            "post_processing": [],
+            "post_processing_order": "facefixers_first",
+            "tiling": False,
+            "hires_fix": False,
+            "hires_fix_denoising_strength": None,
+            "clip_skip": 1,
+            "control_type": None,
+            "image_is_control": False,
+            "return_control_map": False,
+            "facefixer_strength": None,
+            "loras": None,
+            "tis": None,
+            "extra_texts": None,
+            "workflow": None,
+            "transparent": False,
+            "use_nsfw_censor": False,
+            "ddim_steps": 40,
+            "n_iter": 1,
+            "scheduler": "karras",
+            "lora_count": 0,
+            "ti_count": 0,
+        },
+        "model": "Dreamshaper",
+        "source_processing": "img2img",
+        "model_baseline": "stable_diffusion_1",
+        "extra_source_images_count": 0,
+        "extra_source_images_combined_size": 0,
+        "source_image_size": 0,
+        "source_mask_size": 0,
+    },
+    "state": "ok",
+    "censored": False,
+    "time_popped": 1729837827.8703332,
+    "time_submitted": 1729837835.3562803,
+    "time_to_generate": 4.450331687927246,
+    "time_to_download_aux_models": None,
+}
+# And one output
+# "time": 13.2032
+
+
+KNOWN_POST_PROCESSORS = [
+    "RealESRGAN_x4plus",
+    "RealESRGAN_x2plus",
+    "RealESRGAN_x4plus_anime_6B",
+    "NMKD_Siax",
+    "4x_AnimeSharp",
+    "strip_background",
+    "GFPGAN",
+    "CodeFormers",
+]
+KNOWN_SCHEDULERS = [
+    "simple",
+    "karras",
+]
+KNOWN_SCHEDULERS.sort()
+KNOWN_SAMPLERS = sorted(set(HordeLib.SAMPLERS_MAP.keys()))
+KNOWN_CONTROL_TYPES = list(set(HordeLib.CONTROLNET_IMAGE_PREPROCESSOR_MAP.keys()))
+KNOWN_CONTROL_TYPES.append("None")
+KNOWN_CONTROL_TYPES.sort()
+KNOWN_SOURCE_PROCESSING = HordeLib.SOURCE_IMAGE_PROCESSING_OPTIONS[:]
+KNOWN_SOURCE_PROCESSING.append("txt2img")
+KNOWN_SOURCE_PROCESSING.sort()
+KNOWN_MODEL_BASELINES = [
+    "stable_diffusion_1",
+    "stable_diffusion_2",
+    "stable_diffusion_xl",
+    "stable_cascade",
+    "flux_1",
+]
+KNOWN_MODEL_BASELINES.sort()
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="ML Training Script with configurable parameters")
+
+    # Training control
+    parser.add_argument("--enable-training", action="store_true", default=False, help="Enable training mode")
+
+    # Test mode
+    parser.add_argument("--test-model", type=str, help="Path to model file for testing one by one")
+
+    # Database configuration
+    parser.add_argument(
+        "--db-path",
+        type=str,
+        default="optuna_studies.db",
+        help="Path to SQLite database file for Optuna",
+    )
+
+    # Data paths
+    parser.add_argument(
+        "--training-data",
+        type=str,
+        default="./inference-time-data.json",
+        help="Path to training data file",
+    )
+
+    parser.add_argument(
+        "--validation-data",
+        type=str,
+        default="./inference-time-data-validation.json",
+        help="Path to validation data file",
+    )
+
+    # Study parameters
+    parser.add_argument("--study-trials", type=int, default=2000, help="Number of trials to run")
+
+    parser.add_argument("--study-version", type=str, default="v21", help="Version number of the study")
+
+    return parser.parse_args()
+
+
+# This is an example of how to use the final model, pass in a horde payload, get back a predicted time in seconds
+def payload_to_time(model, payload):
+    inputs = KudosDataset.payload_to_tensor(payload).squeeze()
+    with torch.no_grad():
+        output = model(inputs)
+    return round(float(output.item()), 2)
+
+
+# This is how to load the model required above
+def load_model(model_filename):
+    with open(model_filename, "rb") as infile:
+        return pickle.load(infile)
+
+
+# This is just an helper for walking through the validation dataset one line at a time
+# and using the methods above to calculate an overall average percentage accuracy
+def test_one_by_one(model_filename):
+    dataset = []
+    with open(VALIDATION_DATA_FILENAME) as infile:
+        while line := infile.readline():
+            dataset.append(json.loads(line))
+
+    model = load_model(model_filename)
+
+    perc = []
+    total_job_time = 0
+    total_time = 0
+    for data in dataset:
+        model_time = time.perf_counter()
+        predicted = payload_to_time(model, data)
+        total_time += time.perf_counter() - model_time
+        actual = round(data["time"], 2)
+        total_job_time += data["time"]
+
+        diff = abs(actual - predicted)
+        max_val = max(actual, predicted)
+        percentage_accuracy = (1 - diff / max_val) * 100
+
+        perc.append(percentage_accuracy)
+        # Print the data if very inaccurate prediction
+        if percentage_accuracy < 60:
+            print(data)
+        print(f"{predicted} predicted, {actual} actual ({round(percentage_accuracy, 1)}%)")
+
+    avg_perc = round(sum(perc) / len(perc), 1)
+    print(f"Average kudos calculation time {round((total_time*1000000)/len(perc))} micro-seconds")
+    print(f"Average actual job time in the dataset {round(total_job_time/len(perc), 2)} seconds")
+    print(f"Average accuracy = {avg_perc}%")
+
+
+class KudosDataset(Dataset):
+    def __init__(self, filename):
+        self.data = []
+        self.labels = []
+
+        with open(filename) as infile:
+            payload_list = json.load(infile)
+
+            for payload in payload_list:
+                if payload["time_to_generate"] is None:
+                    continue
+                self.data.append(KudosDataset.payload_to_tensor(payload)[0])
+                self.labels.append(payload["time_to_generate"])
+
+        self.labels = torch.tensor(self.labels).float()
+        self.mixed_data = torch.stack(self.data)
+
+    @classmethod
+    def payload_to_tensor(cls, payload):
+        payload = payload["sdk_api_job_info"]
+        p = payload["payload"]
+        data = []
+        data_samplers = []
+        data_control_types = []
+        data_source_processing_types = []
+        data_model_baseline = []
+        data_post_processors = []
+        data_schedulers = []
+        data.append(
+            [
+                p["height"] / 1024,
+                p["width"] / 1024,
+                p["ddim_steps"] / 100,
+                p["cfg_scale"] / 30,
+                p.get("denoising_strength", 1.0) if p.get("denoising_strength", 1.0) is not None else 1.0,
+                float(p.get("clip_skip", 1.0)),
+                p.get("control_strength", 1.0) if p.get("control_strength", 1.0) is not None else 1.0,
+                p.get("facefixer_strength", 1.0) if p.get("facefixer_strength", 1.0) is not None else 1.0,
+                float(p.get("lora_count", 0.0)),
+                float(p.get("ti_count", 0.0)),
+                float(p.get("extra_source_images_count", 0.0)),
+                float(p.get("extra_source_images_combined_size", 0.0)),
+                float(p.get("source_image_size", 0.0)),
+                float(p.get("source_mask_size", 0.0)),
+                1.0 if p.get("hires_fix", True) else 0.0,
+                1.0 if p.get("hires_fix_denoising_strength", True) else 0.0,
+                1.0 if p.get("image_is_control", True) else 0.0,
+                1.0 if p.get("return_control_map", True) else 0.0,
+                1.0 if p.get("transparent", True) else 0.0,
+                1.0 if p.get("source_image", True) else 0.0,
+                1.0 if p.get("source_mask", True) else 0.0,
+                1.0 if p.get("tiling", True) else 0.0,
+                1.0 if p.get("post_processing_order", "facefixers_first") == "facefixers_first" else 0.0,
+            ],
+        )
+        data_model_baseline.append(
+            payload["model_baseline"] if payload["model_baseline"] in KNOWN_MODEL_BASELINES else "stable_diffusion_xl",
+        )
+        data_schedulers.append(p["scheduler"])
+        data_samplers.append(p["sampler_name"] if p["sampler_name"] in KNOWN_SAMPLERS else "k_euler")
+        data_control_types.append(
+            p.get("control_type", "None") if p.get("control_type", "None") is not None else "None",
+        )
+        data_source_processing_types.append(payload.get("source_processing", "txt2img"))
+        data_post_processors = p.get("post_processing", [])[:]
+        _data_floats = torch.tensor(data).float()
+        _data_model_baselines = cls.one_hot_encode(data_model_baseline, KNOWN_MODEL_BASELINES)
+        _data_samplers = cls.one_hot_encode(data_samplers, KNOWN_SAMPLERS)
+        _data_schedulers = cls.one_hot_encode(data_schedulers, KNOWN_SCHEDULERS)
+        _data_control_types = cls.one_hot_encode(data_control_types, KNOWN_CONTROL_TYPES)
+        _data_source_processing_types = cls.one_hot_encode(data_source_processing_types, KNOWN_SOURCE_PROCESSING)
+        _data_post_processors = cls.one_hot_encode_combined(data_post_processors, KNOWN_POST_PROCESSORS)
+        return torch.cat(
+            (
+                _data_floats,
+                _data_model_baselines,
+                _data_samplers,
+                _data_schedulers,
+                _data_control_types,
+                _data_source_processing_types,
+                _data_post_processors,
+            ),
+            dim=1,
+        )
+
+    @classmethod
+    def one_hot_encode(cls, strings, unique_strings):
+        one_hot = torch.zeros(len(strings), len(unique_strings))
+        for i, string in enumerate(strings):
+            one_hot[i, unique_strings.index(string)] = 1
+        return one_hot
+
+    @classmethod
+    def one_hot_encode_combined(cls, strings, unique_strings):
+        one_hot = torch.zeros(len(strings), len(unique_strings))
+        for i, string in enumerate(strings):
+            one_hot[i, unique_strings.index(string)] = 1
+
+        return torch.sum(one_hot, dim=0, keepdim=True)
+
+    def __len__(self):
+        return len(self.data)
+
+    def __getitem__(self, idx):
+        return self.mixed_data[idx], self.labels[idx]
+
+
+def create_sequential_model(trial, layer_sizes, input_size, output_size=1):
+    # Define the layer sizes
+    layer_sizes = [input_size] + layer_sizes + [output_size]
+
+    # Create the layers and activation functions
+    layers = []
+    for i in range(len(layer_sizes) - 1):
+        layers.append(nn.Linear(layer_sizes[i], layer_sizes[i + 1]))
+        if i < len(layer_sizes) - 2:
+            layers.append(nn.ReLU())  # Use ReLU activation for all layers except the last one
+            # Add a dropout layer
+            if i > 0:
+                drop = trial.suggest_float(f"dropout_l{i}", 0.05, 0.2, log=True)
+                layers.append(nn.Dropout(drop))
+
+    # Create the nn.Sequential model
+    return nn.Sequential(*layers)
+
+
+def objective(trial):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    trial.set_user_attr("name", "predict_kudos")
+
+    # Network topology
+    input_size = len(KudosDataset.payload_to_tensor(PAYLOAD_EXAMPLE)[0])
+    num_hidden_layers = trial.suggest_int("hidden_layers", 1, MAX_HIDDEN_LAYERS, log=True)
+    layers = []
+    for i in range(num_hidden_layers):
+        layers.append(
+            trial.suggest_int(f"hidden_layer_{i}_size", MIN_NODES_IN_LAYER, MAX_NODES_IN_LAYER, log=True),
+        )
+    output_size = 1  # we want just the predicted time in seconds
+
+    # Create the network
+    model = create_sequential_model(trial, layers, input_size, output_size).to(device)
+
+    # Optimiser
+    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
+    lr = trial.suggest_float("lr", MIN_LEARNING_RATE, MAX_LEARNING_RATE, log=True)
+    weight_decay = trial.suggest_float("weight_decay", MIN_WEIGHT_DECAY, MAX_WEIGHT_DECAY, log=True)
+
+    optimizer = None
+
+    if optimizer_name == "Adam":
+        optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
+    elif optimizer_name == "RMSprop":
+        optimizer = optim.RMSprop(model.parameters(), lr=lr, weight_decay=weight_decay)
+    elif optimizer_name == "SGD":
+        optimizer = optim.SGD(model.parameters(), lr=lr, weight_decay=weight_decay)
+
+    if optimizer is None:
+        raise Exception("Unknown optimizer")
+
+    # Load training dataset
+    train_dataset = KudosDataset(TRAINING_DATA_FILENAME)
+    batch_start = int(math.ceil(math.log2(MIN_DATA_BATCH_SIZE)))
+    batch_end = int(math.floor(math.log2(MAX_DATA_BATCH_SIZE)))
+    batch_sizes = [2**i for i in range(batch_start, batch_end + 1)]
+    batch = trial.suggest_categorical("batch_size", batch_sizes)
+    train_loader = DataLoader(train_dataset, batch_size=batch, shuffle=True)
+
+    # Load the validation dataset
+    validate_dataset = KudosDataset(VALIDATION_DATA_FILENAME)
+    validate_loader = DataLoader(validate_dataset, batch_size=64, shuffle=True)
+
+    # Loss function
+    criterion = nn.MSELoss()
+
+    num_epochs = trial.suggest_int("num_epochs", MIN_NUMBER_OF_EPOCHS, MAX_NUMBER_OF_EPOCHS)
+    total_loss = None
+    for _ in range(num_epochs):
+        # Train the model
+        model.train()
+        for data, labels in train_loader:
+            data = data.to(device)
+            labels = labels.to(device)
+            optimizer.zero_grad()
+            labels = labels.unsqueeze(1)
+            outputs = model(data)
+            loss = criterion(outputs, labels)
+            loss.backward()
+            optimizer.step()
+
+        model.eval()
+        total_loss = 0
+        with torch.no_grad():
+            for data, labels in validate_loader:
+                data = data.to(device)
+                labels = labels.to(device)
+                outputs = model(data)
+                labels = labels.unsqueeze(1)
+                loss = criterion(outputs, labels)
+                total_loss += loss
+
+        total_loss /= len(validate_loader)
+        total_loss = round(float(total_loss), 2)
+
+    # Pickle it as we'll forget the model architecture
+    filename = f"kudos_models/kudos-{STUDY_VERSION}-{trial.number}.ckpt"
+    with open(filename, "wb") as outfile:
+        pickle.dump(model.to("cpu"), outfile)
+
+    return total_loss
+
+
+def main():
+
+    if args.test_model:
+        test_one_by_one(args.test_model)
+        return
+
+    if not ENABLE_TRAINING:
+        return
+
+    # Make our model output dir
+    os.makedirs("kudos_models", exist_ok=True)
+
+    if ENABLE_TRAINING:
+        import optuna
+
+        # Create the database directory if it doesn't exist
+        db_dir = os.path.dirname(os.path.abspath(args.db_path))
+        if db_dir:
+            os.makedirs(db_dir, exist_ok=True)
+
+        study = optuna.create_study(
+            direction="minimize",
+            study_name=f"kudos_model_{STUDY_VERSION}",
+            storage=DB_CONNECTION_STRING,
+            load_if_exists=True,
+            sampler=OPTUNA_SAMPLER,
+        )
+        study.optimize(objective, n_trials=NUMBER_OF_STUDY_TRIALS)
+
+        # Print the best hyperparameters
+        print("Best trial:")
+        trial = study.best_trial
+        print("Value: ", trial.value)
+        print("Params: ")
+        for key, value in trial.params.items():
+            print(f"{key}: {value}")
+
+        # Calculate the accuracy of the best model
+        best_filename = f"kudos_models/kudos-{STUDY_VERSION}-{trial.number}.ckpt"
+        # model = test_one_by_one(best_filename)
+        print(f"Best model file is: {best_filename}")
+
+
+if __name__ == "__main__":
+    # Parse command line arguments
+    args = parse_args()
+
+    # Set random seed
+    random.seed()
+
+    # Global constants now derived from args
+    ENABLE_TRAINING = args.enable_training
+    TRAINING_DATA_FILENAME = args.training_data
+    VALIDATION_DATA_FILENAME = args.validation_data
+    NUMBER_OF_STUDY_TRIALS = args.study_trials
+    STUDY_VERSION = args.study_version
+
+    # Create SQLite connection string
+    DB_CONNECTION_STRING = f"sqlite:///{args.db_path}"
+    main()
diff --git a/hordelib/merge_train_files.py b/train_merge_files.py
similarity index 100%
rename from hordelib/merge_train_files.py
rename to train_merge_files.py