diff --git a/carte/__init__.py b/carte/__init__.py
new file mode 100644
index 0000000..c70c045
--- /dev/null
+++ b/carte/__init__.py
@@ -0,0 +1,4 @@
+from carte.src import *
+from carte.configs import *
+#from carte.data import *
+from carte.scripts import *
\ No newline at end of file
diff --git a/carte/configs/__init__.py b/carte/configs/__init__.py
new file mode 100644
index 0000000..69d54b2
--- /dev/null
+++ b/carte/configs/__init__.py
@@ -0,0 +1,4 @@
+from carte.configs.carte_configs import *
+from carte.configs.directory import *
+from carte.configs.model_parameters import *
+from carte.configs.visuailization import *
diff --git a/carte/configs/carte_configs.py b/carte/configs/carte_configs.py
new file mode 100644
index 0000000..9d46cd5
--- /dev/null
+++ b/carte/configs/carte_configs.py
@@ -0,0 +1,166 @@
+"""Specific configurations for the CARTE paper."""
+
+## Dataset names
+carte_datalist = [
+    "anime_planet",
+    "babies_r_us",
+    "beer_ratings",
+    "bikedekho",
+    "bikewale",
+    "buy_buy_baby",
+    "cardekho",
+    "chocolate_bar_ratings",
+    "clear_corpus",
+    "coffee_ratings",
+    "company_employees",
+    "employee_remuneration",
+    "employee_salaries",
+    "fifa22_players",
+    "filmtv_movies",
+    "journal_jcr",
+    "journal_sjr",
+    "jp_anime",
+    "k_drama",
+    "michelin",
+    "mlds_salaries",
+    "movies",
+    "museums",
+    "mydramalist",
+    "nba_draft",
+    "prescription_drugs",
+    "ramen_ratings",
+    "roger_ebert",
+    "rotten_tomatoes",
+    "spotify",
+    "us_accidents_counts",
+    "us_accidents_severity",
+    "us_presidential",
+    "used_cars_24",
+    "used_cars_benz_italy",
+    "used_cars_dot_com",
+    "used_cars_pakistan",
+    "used_cars_saudi_arabia",
+    "videogame_sales",
+    "whisky",
+    "wikiliq_beer",
+    "wikiliq_spirit",
+    "wina_pl",
+    "wine_dot_com_prices",
+    "wine_dot_com_ratings",
+    "wine_enthusiasts_prices",
+    "wine_enthusiasts_ratings",
+    "wine_vivino_price",
+    "wine_vivino_rating",
+    "yelp",
+    "zomato",
+]
+
+## Dictionary of baseline methods
+carte_singletable_baselines = dict()
+carte_singletable_baselines["full"] = [
+    "carte-gnn",
+    "catboost",
+    "sentence-llm-concat-num_histgb",
+    "sentence-llm-concat-num_xgb",
+    "sentence-llm-embed-num_histgb",
+    "sentence-llm-embed-num_xgb",
+    "tablevectorizer-fasttext_histgb",
+    "tablevectorizer-fasttext_xgb",
+    "tablevectorizer-llm_histgb",
+    "tablevectorizer-llm_xgb",
+    "tablevectorizer_histgb",
+    "tablevectorizer_logistic",
+    "tablevectorizer_mlp",
+    "tablevectorizer_randomforest",
+    "tablevectorizer_resnet",
+    "tablevectorizer_ridge",
+    "tablevectorizer_xgb",
+    "tablevectorizer_tabpfn",
+    "target-encoder_histgb",
+    "target-encoder_logistic",
+    "target-encoder_mlp",
+    "target-encoder_randomforest",
+    "target-encoder_resnet",
+    "target-encoder_ridge",
+    "target-encoder_xgb",
+    "target-encoder_tabpfn",
+]
+
+carte_singletable_baselines["reduced"] = [
+    "carte-gnn",
+    "catboost",
+    "sentence-llm-concat-num_xgb",
+    "sentence-llm-embed-num_xgb",
+    "tablevectorizer_logistic",
+    "tablevectorizer_mlp",
+    "tablevectorizer_randomforest",
+    "tablevectorizer_resnet",
+    "tablevectorizer_ridge",
+    "tablevectorizer_xgb",
+    "target-encoder_tabpfn",
+]
+
+carte_multitable_baselines = [
+    "original_carte-multitable",
+    "matched_carte-multitable",
+    "original_catboost-multitable",
+    "matched_catboost-multitable",
+    "original-sentence-llm_histgb-multitable",
+    "matched-sentence-llm_histgb-multitable",
+]
+
+
+## Dictionary of method mapping
+carte_singletable_baseline_mapping = dict()
+carte_singletable_baseline_mapping["carte-gnn"] = "CARTE"
+
+# Preprocessings
+carte_singletable_baseline_mapping["tablevectorizer_"] = "TabVec-"
+carte_singletable_baseline_mapping["tablevectorizer-"] = "TabVec-"
+carte_singletable_baseline_mapping["target-encoder_"] = "TarEnc-"
+carte_singletable_baseline_mapping["fasttext_"] = "FT-"
+carte_singletable_baseline_mapping["llm_"] = "LLM-"
+carte_singletable_baseline_mapping["sentence-llm-concat-num_"] = "S-LLM-CN-"
+carte_singletable_baseline_mapping["sentence-llm-embed-num_"] = "S-LLM-EN-"
+
+# Estimators
+carte_singletable_baseline_mapping["catboost"] = "CatBoost"
+carte_singletable_baseline_mapping["xgb"] = "XGB"
+carte_singletable_baseline_mapping["histgb"] = "HGB"
+carte_singletable_baseline_mapping["randomforest"] = "RF"
+carte_singletable_baseline_mapping["ridge"] = "Ridge"
+carte_singletable_baseline_mapping["logistic"] = "Logistic"
+carte_singletable_baseline_mapping["mlp"] = "MLP"
+carte_singletable_baseline_mapping["resnet"] = "ResNet"
+carte_singletable_baseline_mapping["tabpfn"] = "TabPFN"
+
+# Bagging
+carte_singletable_baseline_mapping["bagging"] = "Bagging"
+
+## Colors for visualization
+carte_singletable_color_palette = dict()
+carte_singletable_color_palette["CARTE"] = "C3"
+carte_singletable_color_palette["CatBoost"] = "C0"
+carte_singletable_color_palette["TabVec-XGB"] = "C1"
+carte_singletable_color_palette["TabVec-RF"] = "C2"
+carte_singletable_color_palette["TabVec-Ridge"] = "C4"
+carte_singletable_color_palette["TabVec-Logistic"] = "C5"
+carte_singletable_color_palette["S-LLM-CN-XGB"] = "C6"
+carte_singletable_color_palette["S-LLM-EN-XGB"] = "C7"
+carte_singletable_color_palette["TabVec-ResNet"] = "C8"
+carte_singletable_color_palette["TabVec-MLP"] = "C9"
+carte_singletable_color_palette["TarEnc-TabPFN"] = "#A9561E"
+
+## Markers for visualization
+carte_singletable_markers = dict()
+carte_singletable_markers["CARTE"] = "o"
+carte_singletable_markers["TabVec-XGB"] = (4, 0, 45)
+carte_singletable_markers["TabVec-RF"] = "P"
+carte_singletable_markers["CatBoost"] = "X"
+carte_singletable_markers["S-LLM-CN-XGB"] = (4, 0, 0)
+carte_singletable_markers["S-LLM-EN-XGB"] = "d"
+carte_singletable_markers["TabVec-Ridge"] = "v"
+carte_singletable_markers["TabVec-Logistic"] = "v"
+carte_singletable_markers["TabVec-ResNet"] = "^"
+carte_singletable_markers["TabVec-MLP"] = "p"
+carte_singletable_markers["TarEnc-TabPFN"] = (5, 1, 0)
diff --git a/carte/configs/directory.py b/carte/configs/directory.py
new file mode 100644
index 0000000..0130146
--- /dev/null
+++ b/carte/configs/directory.py
@@ -0,0 +1,24 @@
+"""
+Configurations for directory
+"""
+
+from pathlib import Path
+
+base_path = Path().cwd()
+config_directory = dict()
+config_directory["base_path"] = base_path
+
+config_directory["data"] = str(base_path / "data/")
+config_directory["pretrained_model"] = str(base_path / "data/etc/kg_pretrained.pt")
+config_directory["data_raw"] = str(base_path / "data/data_raw/")
+config_directory["data_singletable"] = str(base_path / "data/data_singletable/")
+config_directory["data_yago"] = str(base_path / "data/data_yago/")
+config_directory["etc"] = str(base_path / "data/etc/")
+
+config_directory["results"] = str(base_path / "results/")
+config_directory["compiled_results"] = str(base_path / "results/compiled_results/")
+config_directory["visualization"] = str(base_path / "visualization/")
+
+# Specify the directory in which you have downloaded each
+config_directory["fasttext"] = str(base_path / "data/etc/cc.en.300.bin")
+config_directory["ken_embedding"] = str(base_path / "data/etc/ken_embedding.parquet")
diff --git a/carte/configs/model_parameters.py b/carte/configs/model_parameters.py
new file mode 100644
index 0000000..70265f4
--- /dev/null
+++ b/carte/configs/model_parameters.py
@@ -0,0 +1,148 @@
+"""
+Parameter distributions for hyperparameter optimization
+"""
+
+import numpy as np
+from scipy.stats import loguniform, randint, uniform, norm
+import copy
+
+
+class loguniform_int:
+    """Integer valued version of the log-uniform distribution"""
+
+    def __init__(self, a, b):
+        self._distribution = loguniform(a, b)
+
+    def rvs(self, *args, **kwargs):
+        """Random variable sample"""
+        return self._distribution.rvs(*args, **kwargs).astype(int)
+
+
+class norm_int:
+    """Integer valued version of the normal distribution"""
+
+    def __init__(self, a, b):
+        self._distribution = norm(a, b)
+
+    def rvs(self, *args, **kwargs):
+        """Random variable sample"""
+        if self._distribution.rvs(*args, **kwargs).astype(int) < 1:
+            return 1
+        else:
+            return self._distribution.rvs(*args, **kwargs).astype(int)
+
+
+param_distributions_total = dict()
+
+# carte-gnn
+param_distributions = dict()
+lr_grid = [1e-4, 2.5e-4, 5e-4, 7.5e-4, 1e-3]
+param_distributions["learning_rate"] = lr_grid
+param_distributions_total["carte-gnn"] = param_distributions
+
+# histgb
+param_distributions = dict()
+param_distributions["learning_rate"] = loguniform(1e-2, 10)
+param_distributions["max_depth"] = [None, 2, 3, 4]
+param_distributions["max_leaf_nodes"] = norm_int(31, 5)
+param_distributions["min_samples_leaf"] = norm_int(20, 2)
+param_distributions["l2_regularization"] = loguniform(1e-6, 1e3)
+param_distributions_total["histgb"] = param_distributions
+
+# catboost
+param_distributions = dict()
+param_distributions["max_depth"] = randint(2, 11)
+param_distributions["learning_rate"] = loguniform(1e-5, 1)
+param_distributions["bagging_temperature"] = uniform(0, 1)
+param_distributions["l2_leaf_reg"] = loguniform(1, 10)
+param_distributions["iterations"] = randint(400, 1001)
+param_distributions["one_hot_max_size"] = randint(2, 26)
+param_distributions_total["catboost"] = param_distributions
+
+# xgb
+param_distributions = dict()
+param_distributions["n_estimators"] = randint(50, 1001)
+param_distributions["max_depth"] = randint(2, 11)
+param_distributions["min_child_weight"] = loguniform(1, 100)
+param_distributions["subsample"] = uniform(0.5, 1 - 0.5)
+param_distributions["learning_rate"] = loguniform(1e-5, 1)
+param_distributions["colsample_bylevel"] = uniform(0.5, 1 - 0.5)
+param_distributions["colsample_bytree"] = uniform(0.5, 1 - 0.5)
+param_distributions["gamma"] = loguniform(1e-8, 7)
+param_distributions["lambda"] = loguniform(1, 4)
+param_distributions["alpha"] = loguniform(1e-8, 100)
+param_distributions_total["xgb"] = param_distributions
+
+# RandomForest
+param_distributions = dict()
+param_distributions["n_estimators"] = randint(50, 250)
+param_distributions["max_depth"] = [None, 2, 3, 4]
+param_distributions["max_features"] = [
+    "sqrt",
+    "log2",
+    None,
+    0.1,
+    0.2,
+    0.3,
+    0.4,
+    0.5,
+    0.6,
+    0.7,
+    0.8,
+    0.9,
+]
+param_distributions["min_samples_leaf"] = loguniform_int(0.5, 50.5)
+param_distributions["bootstrap"] = [True, False]
+param_distributions["min_impurity_decrease"] = [0.0, 0.01, 0.02, 0.05]
+param_distributions_total["randomforest"] = param_distributions
+
+
+# resnet
+param_distributions = dict()
+param_distributions["normalization"] = ["batchnorm", "layernorm"]
+param_distributions["num_layers"] = randint(1, 9)
+param_distributions["hidden_dim"] = randint(32, 513)
+param_distributions["hidden_factor"] = randint(1, 3)
+param_distributions["hidden_dropout_prob"] = uniform(0.0, 0.5)
+param_distributions["residual_dropout_prob"] = uniform(0.0, 0.5)
+param_distributions["learning_rate"] = loguniform(1e-5, 1e-2)
+param_distributions["weight_decay"] = loguniform(1e-8, 1e-2)
+param_distributions["batch_size"] = [16, 32]
+param_distributions_total["resnet"] = param_distributions
+
+# mlp
+param_distributions = dict()
+param_distributions["hidden_dim"] = [2**x for x in range(4, 11)]
+param_distributions["num_layers"] = randint(1, 5)
+param_distributions["dropout_prob"] = uniform(0.0, 0.5)
+param_distributions["learning_rate"] = loguniform(1e-5, 1e-2)
+param_distributions["weight_decay"] = loguniform(1e-8, 1e-2)
+param_distributions["batch_size"] = [16, 32]
+param_distributions_total["mlp"] = param_distributions
+
+# ridge regression
+param_distributions = dict()
+param_distributions["solver"] = ["svd", "cholesky", "lsqr", "sag"]
+param_distributions["alpha"] = loguniform(1e-5, 100)
+param_distributions_total["ridge"] = param_distributions
+
+# logistic regression
+param_distributions = dict()
+param_distributions["solver"] = ["newton-cg", "lbfgs", "liblinear"]
+param_distributions["penalty"] = ["none", "l1", "l2", "elasticnet"]
+param_distributions["C"] = loguniform(1e-5, 100)
+param_distributions_total["logistic"] = param_distributions
+
+# tabpfn
+param_distributions = dict()
+param_distributions_total["tabpfn"] = param_distributions
+
+# catboost-multitable
+param_distributions = copy.deepcopy(param_distributions_total["catboost"])
+param_distributions["source_fraction"] = uniform(0, 1)
+param_distributions_total["catboost-multitable"] = param_distributions
+
+# histgb-multitable
+param_distributions = copy.deepcopy(param_distributions_total["histgb"])
+param_distributions["source_fraction"] = uniform(0, 1)
+param_distributions_total["histgb-multitable"] = param_distributions
diff --git a/carte/configs/visuailization.py b/carte/configs/visuailization.py
new file mode 100644
index 0000000..4babbbc
--- /dev/null
+++ b/carte/configs/visuailization.py
@@ -0,0 +1,43 @@
+"""
+Visualization configurations
+"""
+
+# Main models
+model_color_palette = dict()
+model_color_palette["CARTE"] = "C3"
+model_color_palette["CatBoost"] = "C0"
+model_color_palette["TabVec-XGB"] = "C1"
+model_color_palette["TabVec-RF"] = "C2"
+model_color_palette["TabVec-Ridge"] = "C4"
+model_color_palette["TabVec-Logistic"] = "C5"
+model_color_palette["S-LLM-CN-XGB"] = "C6"  # ""
+model_color_palette["S-LLM-EN-XGB"] = "C7"  # "C7" "#C875C4" mediumorchid
+model_color_palette["ResNet"] = "C8"
+model_color_palette["MLP"] = "C9"
+model_color_palette["TabPFN"] = "#A9561E"
+
+model_color_palette["TabVec-RandomForest"] = "C2"
+model_color_palette["TabVec-ResNet"] = "C8"
+model_color_palette["TabVec-MLP"] = "C9"
+model_color_palette["TarEnc-TabPFN"] = "#A9561E"
+
+
+# model_color_palette["CARTE-B"] = "C3"
+# model_color_palette["CatBoost-B"] = "C0"
+# model_color_palette["TabVec-XGB-B"] = "C1"
+# model_color_palette["TabVec-RF-B"] = "C2"
+# model_color_palette["TabVec-Ridge-B"] = "C4"
+# model_color_palette["TabVec-Logistic-B"] = "C5"
+# model_color_palette["S-LLM-CN-XGB-B"] = "C6"
+# model_color_palette["S-LLM-EN-XGB-B"] = "C7"
+# model_color_palette["ResNet-B"] = "C8"
+# model_color_palette["MLP-B"] = "C9"
+# model_color_palette["TabPFN-B"] = "#A9561E"
+
+
+# model_color_palette["TabVec-HGB"] = "#650021"
+# model_color_palette["TabVec-TabPFN"] = "#650021"
+# model_color_palette["TabVec-FT-XGB"] = "#650021"
+# model_color_palette["TabVec-FT-HGB"] = "#650021"
+
+# model_color_palette["TabLLM"] = "#653700"
diff --git a/carte/data/__init__.py b/carte/data/__init__.py
new file mode 100644
index 0000000..d153ea5
--- /dev/null
+++ b/carte/data/__init__.py
@@ -0,0 +1 @@
+from carte.data.data_singletable import *
\ No newline at end of file
diff --git a/carte/scripts/__init__.py b/carte/scripts/__init__.py
new file mode 100644
index 0000000..ac3c58f
--- /dev/null
+++ b/carte/scripts/__init__.py
@@ -0,0 +1,5 @@
+from carte.scripts.compile_results_singletable import *
+from carte.scripts.download_data import *
+from carte.scripts.evaluate_singletable import *
+from carte.scripts.preprocess_lm import *
+from carte.scripts.preprocess_raw import *
\ No newline at end of file
diff --git a/carte/scripts/compile_results_singletable.py b/carte/scripts/compile_results_singletable.py
new file mode 100644
index 0000000..c50dc48
--- /dev/null
+++ b/carte/scripts/compile_results_singletable.py
@@ -0,0 +1,67 @@
+"""Script for compling results"""
+
+# >>>
+if __name__ == "__main__":
+    import os
+    import sys
+
+    _project_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
+    os.environ["PROJECT_DIR"] = _project_dir
+    sys.path.append(_project_dir)
+    del _project_dir
+# <<<
+
+import json
+from glob import glob
+from carte.configs.directory import config_directory
+import numpy as np
+import pandas as pd
+
+
+def _load_config(data_name):
+    config_data_dir = (
+        f"{config_directory['data_singletable']}/{data_name}/config_data.json"
+    )
+    filename = open(config_data_dir)
+    config_data = json.load(filename)
+    filename.close()
+    return config_data
+
+
+if __name__ == "__main__":
+
+    result_dir_base = f"{config_directory['results']}/singletable"
+    result_filenames = glob(f"{result_dir_base}/*/*.csv*")
+
+    df_score = pd.DataFrame()
+    for path in result_filenames:
+        data_name = path.split("/")[-2]
+        file_name = path.split("/")[-1]
+        method_name = file_name.split(f"{data_name}_")[1].split("_num_train")[0]
+        num_train = file_name.split("num_train-")[1].split("_")[0]
+        random_state = file_name.split("rs-")[1].split(".csv")[0]
+
+        config_data = _load_config(data_name)
+        task = config_data["task"]
+        score_measure = "r2" if task == "regression" else "roc_auc"
+
+        score_ = pd.read_csv(path)
+        score_col = [col for col in score_.columns if score_measure in col][0]
+        score_[score_col].iloc[0]
+
+        df_score_ = dict()
+        df_score_["model"] = method_name
+        df_score_["score"] = score_[score_col].iloc[0]
+        df_score_["data_name"] = data_name
+        df_score_["num_train"] = num_train
+        df_score_["random_state"] = random_state
+        df_score_["task"] = task
+        df_score_ = pd.DataFrame([df_score_])
+
+        df_score = pd.concat([df_score, df_score_], axis=0)
+
+    df_score.reset_index(drop=True, inplace=True)
+    save_dir = (
+        f"{config_directory['compiled_results']}/results_carte_baseline_singletable.csv"
+    )
+    df_score.to_csv(save_dir, index=False)
diff --git a/carte/scripts/download_data.py b/carte/scripts/download_data.py
new file mode 100644
index 0000000..af08cd5
--- /dev/null
+++ b/carte/scripts/download_data.py
@@ -0,0 +1,150 @@
+"""Script for downloading required data."""
+
+# >>>
+if __name__ == '__main__':
+    import os
+    import sys
+
+    _project_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
+    os.environ['PROJECT_DIR'] = _project_dir
+    sys.path.append(_project_dir)
+    del _project_dir
+# <<<
+
+import shutil
+import os
+import requests
+from zipfile import ZipFile
+from carte.configs.directory import config_directory
+
+
+def _download_with_request(url, download_path):
+    req = requests.get(url, stream=True)
+    with open(download_path,'wb') as f:
+        for chunk in req.iter_content(chunk_size=8192): 
+            f.write(chunk)
+
+
+def _download_fasttext():
+    import fasttext.util
+    fasttext.util.download_model('en', if_exists='ignore')
+    ft_path = str(config_directory["base_path"] / "cc.en.300.bin")    
+    shutil.move(ft_path, config_directory["fasttext"])
+    os.remove(str(config_directory["base_path"] / "cc.en.300.bin.gz"))
+
+
+def _download_ken():
+    ken_url = "https://figshare.com/ndownloader/files/39142985"
+    ken_path = config_directory["ken_embedding"]
+    _download_with_request(ken_url, ken_path)
+
+
+def _download_raw(option="carte"):
+    url = "https://huggingface.co/datasets/inria-soda/carte-benchmark/resolve/main/data_raw.zip"
+    download_path = str(config_directory["base_path"] / "data_raw.zip")
+    _download_with_request(url, download_path)
+    if option == "carte":
+        carte_example_data = ["wina_pl", "spotify", "wine_dot_com_prices", "wine_vivino_price"]
+        with ZipFile(download_path, 'r') as zObject:
+            for name in carte_example_data:
+                raw_data_path = f"data_raw/{name}.csv"
+                zObject.extract(raw_data_path, path=f"{config_directory['data']}")
+    elif option == "full":
+        with ZipFile(download_path, 'r') as zObject:
+            zObject.extractall(path=config_directory["data"]) 
+        zObject.close() 
+    os.remove(download_path)
+
+
+def _download_preprocessed(option="carte", include_llm=False):
+    if include_llm:
+        url = "https://huggingface.co/datasets/inria-soda/carte-benchmark/resolve/main/data_singletable.zip"
+    else:
+        url = "https://huggingface.co/datasets/inria-soda/carte-benchmark/resolve/main/data_singletable_light.zip"
+    download_path = str(config_directory["base_path"] / "data_singletable.zip")
+    _download_with_request(url, download_path)
+    if option == "carte":
+        carte_example_data = ["wina_pl", "spotify", "wine_dot_com_prices", "wine_vivino_price"]
+        with ZipFile(download_path, 'r') as zObject:
+            for name in carte_example_data:
+                raw_data_path = f"data_singletable/{name}/raw.parquet"
+                config_path = f"data_singletable/{name}/config_data.json"
+                zObject.extract(raw_data_path, path=f"{config_directory['data']}")
+                zObject.extract(config_path, path=f"{config_directory['data']}")
+                if include_llm:
+                    external_path = f"data_singletable/{name}/external.pickle"
+                    zObject.extract(external_path, path=f"{config_directory['data']}")
+    elif option == "full":
+        with ZipFile(download_path, 'r') as zObject:
+            zObject.extractall(path=config_directory["data"]) 
+        zObject.close() 
+    os.remove(download_path)
+
+
+# Main
+def main(option = "carte", include_raw = False, include_ken = False):
+
+    if os.path.exists(config_directory["fasttext"]):
+        pass
+    else:
+        _download_fasttext()
+
+    if option == "carte":
+        option_ = "full"
+    else:
+        if option == "basic_examples":
+            option_, include_llm = "carte", False
+        elif option == "full_examples":
+            option_, include_llm = "full", False
+        elif option == "full_benchmark":
+            option_, include_llm = "full", True
+        _download_preprocessed(option_, include_llm)
+
+    if include_raw:
+        _download_raw(option=option_)
+
+    if include_ken:
+        _download_ken()
+
+    return None
+
+if __name__ == "__main__":
+
+    # Set parser
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Download data.")
+    parser.add_argument(
+        "-op",
+        "--option",
+        type=str,
+        help="option for downloading",
+    )
+    parser.add_argument(
+        "-ir",
+        "--include_raw",
+        type=str,
+        help="include raw data for downloading",
+    )
+    parser.add_argument(
+        "-ik",
+        "--include_ken",
+        type=str,
+        help="include ken data for downloading",
+    )
+    args = parser.parse_args()
+
+    if args.include_raw == "True":
+        include_raw = True
+    else:
+        include_raw = False
+
+    if args.include_ken == "True":
+        include_ken = True
+    else:
+        include_ken = False
+
+    main(args.option, include_raw, include_ken)
+
+
+
diff --git a/carte/scripts/download_data.sh b/carte/scripts/download_data.sh
new file mode 100644
index 0000000..401fffe
--- /dev/null
+++ b/carte/scripts/download_data.sh
@@ -0,0 +1,9 @@
+# Download data. See README for information on the variables.
+
+ENV_NAME="myenv"  # Change the environment name accordingly
+OPTIONS="basic_examples" 
+INCLUDE_RAW="False"
+INCLUDE_KEN="False"
+
+conda run -n $ENV_NAME python -W ignore scripts/download_data.py -op $OPTIONS -ir $INCLUDE_RAW -ik $INCLUDE_KEN
+
diff --git a/carte/scripts/evaluate_singletable.py b/carte/scripts/evaluate_singletable.py
new file mode 100644
index 0000000..0c25114
--- /dev/null
+++ b/carte/scripts/evaluate_singletable.py
@@ -0,0 +1,703 @@
+"""Script for evalutating a model of choice for singletables."""
+
+# >>>
+if __name__ == "__main__":
+    import os
+    import sys
+
+    _project_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
+    os.environ["PROJECT_DIR"] = _project_dir
+    sys.path.append(_project_dir)
+    del _project_dir
+# <<<
+
+import os
+import pickle
+import json
+import pandas as pd
+import numpy as np
+import copy
+
+from sklearn.pipeline import Pipeline
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import MinMaxScaler
+from category_encoders import TargetEncoder
+from sklearn.impute import SimpleImputer
+from sklearn.decomposition import PCA
+from sklearn.model_selection import ParameterGrid
+from carte.configs.directory import config_directory
+from carte.configs.carte_configs import carte_datalist, carte_singletable_baselines
+from carte.src.evaluate_utils import *
+from carte.src.carte_estimator_new import CARTERegressor, CARTEClassifier
+from catboost import CatBoostRegressor, CatBoostClassifier
+from xgboost import XGBRegressor, XGBClassifier
+from tabpfn import TabPFNClassifier
+from sklearn.ensemble import (
+    HistGradientBoostingRegressor,
+    HistGradientBoostingClassifier,
+    RandomForestRegressor,
+    RandomForestClassifier,
+    BaggingRegressor,
+    BaggingClassifier,
+)
+from sklearn.linear_model import Ridge, LogisticRegression
+from carte.src.baseline_singletable_nn import (
+    MLPRegressor,
+    MLPClassifier,
+    RESNETRegressor,
+    RESNETClassifier,
+)
+
+
+def _load_data(data_name):
+    """Load data, external data, and configs."""
+
+    data_dir = f"{config_directory['data_singletable']}/{data_name}/raw.parquet"
+    data_additional_dir = (
+        f"{config_directory['data_singletable']}/{data_name}/external.pickle"
+    )
+    data = pd.read_parquet(data_dir)
+    data.fillna(value=np.nan, inplace=True)
+    with open(data_additional_dir, "rb") as pickle_file:
+        data_additional = pickle.load(pickle_file)
+    config_data_dir = (
+        f"{config_directory['data_singletable']}/{data_name}/config_data.json"
+    )
+    filename = open(config_data_dir)
+    config_data = json.load(filename)
+    filename.close()
+    return data, data_additional, config_data
+
+
+def _prepare_carte_gnn(
+    data,
+    data_config,
+    num_train,
+    random_state,
+):
+    """Preprocess for CARTE (graph construction)."""
+    from carte_table_to_graph_old import Table2GraphTransformer
+
+    data_ = data.copy()
+    X_train, X_test, y_train, y_test = set_split(
+        data_,
+        data_config,
+        num_train,
+        random_state=random_state,
+    )
+    preprocessor = Table2GraphTransformer()
+    X_train = preprocessor.fit_transform(X_train, y=y_train)
+    X_test = preprocessor.transform(X_test)
+    return X_train, X_test, y_train, y_test
+
+
+def _prepare_catboost(
+    data,
+    data_config,
+    num_train,
+    random_state,
+):
+    """Preprocess for CatBoost."""
+    data_ = data.copy()
+    _, cat_col_names = col_names_per_type(data, data_config["target_name"])
+    data_cat = data_[cat_col_names]
+    data_cat = data_cat.replace(np.nan, "nan", regex=True)
+    data_[cat_col_names] = data_cat
+    for col in cat_col_names:
+        data_[col] = data_[col].astype("category")
+    X_train, X_test, y_train, y_test = set_split(
+        data_,
+        data_config,
+        num_train,
+        random_state=random_state,
+    )
+    # index of categorical columns
+    cat_features = [X_train.columns.get_loc(col) for col in cat_col_names]
+    return (
+        np.array(X_train),
+        np.array(X_test),
+        np.array(y_train),
+        np.array(y_test),
+        cat_features,
+    )
+
+
+def _prepare_tablevectorizer(
+    data,
+    data_config,
+    num_train,
+    random_state,
+    estim_method,
+):
+    """Preprocess with Tablevectorizer."""
+
+    from skrub import TableVectorizer
+
+    data_ = data.copy()
+    X_train, X_test, y_train, y_test = set_split(
+        data_,
+        data_config,
+        num_train,
+        random_state=random_state,
+    )
+    num_col_names, cat_col_names = col_names_per_type(data, data_config["target_name"])
+
+    # Set preprocessors for categorical and numerical
+    categorical_preprocessor = TableVectorizer(auto_cast=False, sparse_threshold=0)
+    numerical_preprocessor = SimpleImputer(strategy="mean")
+
+    # Set final pipeline for preprocessing depending on the method
+    tree_based_methods = ["xgb", "histgb", "randomforest"]
+    if estim_method in tree_based_methods:
+        preprocessor_final = ColumnTransformer(
+            [
+                ("numerical", "passthrough", num_col_names),
+                ("categorical", categorical_preprocessor, cat_col_names),
+            ]
+        )
+    elif estim_method in ["tabpfn"]:
+        preprocessor = ColumnTransformer(
+            [
+                ("numerical", numerical_preprocessor, num_col_names),
+                ("categorical", categorical_preprocessor, cat_col_names),
+            ]
+        )
+        preprocessor_final = Pipeline(
+            [
+                ("preprocess", preprocessor),
+                ("missing", SimpleImputer(strategy="mean")),
+            ]
+        )
+    else:
+        preprocessor = ColumnTransformer(
+            [
+                ("numerical", numerical_preprocessor, num_col_names),
+                ("categorical", categorical_preprocessor, cat_col_names),
+            ]
+        )
+        preprocessor_final = Pipeline(
+            [
+                ("preprocess", preprocessor),
+                ("minmax", MinMaxScaler()),
+                ("missing", SimpleImputer(strategy="mean")),
+            ]
+        )
+    X_train = preprocessor_final.fit_transform(X_train, y=y_train)
+    X_test = preprocessor_final.transform(X_test)
+
+    if estim_method in ["tabpfn"]:
+        if X_train.shape[1] > 100:
+            n_components = np.min([X_train.shape[0], 100])
+            pca_ = PCA(n_components=n_components, svd_solver="full")
+            X_train = pca_.fit_transform(X_train)
+            X_test = pca_.transform(X_test)
+
+    return X_train, X_test, y_train, y_test
+
+
+def _prepare_target_encoder(
+    data,
+    data_config,
+    num_train,
+    random_state,
+    estim_method,
+):
+    """Preprocess with Target Encoder."""
+    data_ = data.copy()
+    X_train, X_test, y_train, y_test = set_split(
+        data_,
+        data_config,
+        num_train,
+        random_state=random_state,
+    )
+    num_col_names, cat_col_names = col_names_per_type(data, data_config["target_name"])
+    if data_config["task"] == "regression":
+        target_type = "continuous"
+    else:
+        target_type = "binary"
+
+    # Set preprocessors for categorical and numerical
+    categorical_preprocessor = TargetEncoder(
+        categories="auto",
+        target_type=target_type,
+        random_state=random_state,
+    )
+    numerical_preprocessor = SimpleImputer(strategy="mean")
+
+    # Set final pipeline for preprocessing depending on the method
+    tree_based_methods = ["xgb", "histgb", "randomforest"]
+    if estim_method in tree_based_methods:
+        preprocessor_final = ColumnTransformer(
+            [
+                ("numerical", "passthrough", num_col_names),
+                ("categorical", categorical_preprocessor, cat_col_names),
+            ]
+        )
+    elif estim_method in ["tabpfn"]:
+        preprocessor_final = ColumnTransformer(
+            [
+                ("numerical", numerical_preprocessor, num_col_names),
+                ("categorical", categorical_preprocessor, cat_col_names),
+            ]
+        )
+    else:
+        preprocessor = ColumnTransformer(
+            [
+                ("numerical", numerical_preprocessor, num_col_names),
+                ("categorical", categorical_preprocessor, cat_col_names),
+            ]
+        )
+        preprocessor_final = Pipeline(
+            [
+                ("preprocess", preprocessor),
+                ("minmax", MinMaxScaler()),
+            ]
+        )
+    X_train = preprocessor_final.fit_transform(X_train, y=y_train)
+    X_test = preprocessor_final.transform(X_test)
+
+    if estim_method in ["tabpfn"]:
+        if X_train.shape[1] > 100:
+            n_components = np.min([X_train.shape[0], 100])
+            pca_ = PCA(n_components=n_components, svd_solver="full")
+            X_train = pca_.fit_transform(X_train)
+            X_test = pca_.transform(X_test)
+
+    return X_train, X_test, y_train, y_test
+
+
+def _prepare_llm(
+    data,
+    data_config,
+    num_train,
+    random_state,
+):
+    """Prepare the llm data. It loads the preprocessed data."""
+    data_ = data.copy()
+    data_.drop(columns=data_config["entity_name"], inplace=True)
+    X_train, X_test, y_train, y_test = set_split(
+        data_,
+        data_config,
+        num_train,
+        random_state,
+    )
+
+    col_llm, col_not_llm = X_train.columns[:1024], X_train.columns[1024:]
+
+    X_train_llm, X_train_ = X_train[col_llm], X_train[col_not_llm]
+    X_test_llm, X_test_ = X_test[col_llm], X_test[col_not_llm]
+
+    if num_train > 1024:
+        pca = PCA().set_output(transform="pandas")
+        reduced_data_train = pca.fit_transform(X_train_llm)
+        dim_reduce_ = np.where(np.cumsum(pca.explained_variance_ratio_) > 0.9)[0][0]
+        dim_reduce = min(dim_reduce_, 300)
+        reduced_data_train = reduced_data_train.iloc[:, : dim_reduce + 1]
+        reduced_data_test = pca.transform(X_test_llm).iloc[:, : dim_reduce + 1]
+        X_train = pd.concat([reduced_data_train, X_train_], axis=1)
+        X_train = X_train.to_numpy().astype(np.float32)
+        X_test = pd.concat([reduced_data_test, X_test_], axis=1)
+        X_test = X_test.to_numpy().astype(np.float32)
+
+    return X_train, X_test, y_train, y_test
+
+
+def _assign_estimator(
+    estim_method,
+    task,
+    device,
+    cat_features,
+    bagging,
+):
+    """Assign the specific estimator to train model."""
+
+    # Set number of models for NN-based methods
+    if bagging:
+        num_model = 1
+    else:
+        num_model = 15
+
+    if estim_method == "carte-gnn":
+        fixed_params = dict()
+        fixed_params["batch_size"] = 16
+        fixed_params["num_model"] = num_model
+        fixed_params["device"] = device
+        fixed_params["n_jobs"] = num_model
+        fixed_params["random_state"] = 0
+        if task == "regression":
+            estimator = CARTERegressor(**fixed_params)
+        else:
+            estimator = CARTEClassifier(**fixed_params)
+    elif estim_method == "catboost":
+        fixed_params = dict()
+        fixed_params["cat_features"] = cat_features
+        fixed_params["verbose"] = False
+        fixed_params["allow_writing_files"] = False
+        fixed_params["thread_count"] = 1
+        fixed_params["leaf_estimation_iterations"] = 1
+        fixed_params["max_ctr_complexity"] = 1
+        if task == "regression":
+            estimator = CatBoostRegressor(**fixed_params)
+        else:
+            estimator = CatBoostClassifier(**fixed_params)
+    elif estim_method == "xgb":
+        fixed_params = dict()
+        fixed_params["booster"] = "gbtree"
+        fixed_params["tree_method"] = "exact"  # exact approx hist
+        if task == "regression":
+            estimator = XGBRegressor(**fixed_params)
+        else:
+            estimator = XGBClassifier(**fixed_params)
+    elif estim_method == "histgb":
+        fixed_params = dict()
+        if task == "regression":
+            estimator = HistGradientBoostingRegressor(**fixed_params)
+        else:
+            estimator = HistGradientBoostingClassifier(**fixed_params)
+    elif estim_method == "randomforest":
+        fixed_params = dict()
+        if task == "regression":
+            estimator = RandomForestRegressor(**fixed_params)
+        else:
+            estimator = RandomForestClassifier(**fixed_params)
+    elif estim_method == "ridge":
+        fixed_params = dict()
+        estimator = Ridge(**fixed_params)
+    elif estim_method == "logistic":
+        fixed_params = dict()
+        estimator = LogisticRegression(**fixed_params)
+    elif estim_method == "mlp":
+        fixed_params = dict()
+        fixed_params["num_model"] = num_model
+        fixed_params["device"] = device
+        fixed_params["n_jobs"] = num_model
+        fixed_params["random_state"] = 0
+        if task == "regression":
+            estimator = MLPRegressor(**fixed_params)
+        else:
+            estimator = MLPClassifier(**fixed_params)
+    elif estim_method == "resnet":
+        fixed_params = dict()
+        fixed_params["num_model"] = num_model
+        fixed_params["device"] = device
+        fixed_params["n_jobs"] = num_model
+        fixed_params["random_state"] = 0
+        if task == "regression":
+            estimator = RESNETRegressor(**fixed_params)
+        else:
+            estimator = RESNETClassifier(**fixed_params)
+    elif estim_method == "tabpfn":
+        estimator = TabPFNClassifier()
+    return estimator
+
+
+def _assign_bagging_estimator(estimator_base, estim_method, task):
+    """Assign the bagging estimator if bagging set to true."""
+    bagging_estimator = copy.deepcopy(estimator_base)
+    if estim_method in ["carte-gnn", "mlp", "resnet"]:
+        fixed_params = dict()
+        fixed_params["num_model"] = 15
+        fixed_params["n_jobs"] = 15
+        bagging_estimator.__dict__.update(fixed_params)
+    else:
+        bagging_params = dict()
+        bagging_params["estimator"] = estimator_base
+        bagging_params["n_estimators"] = 15
+        bagging_params["max_samples"] = 0.8
+        bagging_params["n_jobs"] = 15
+        bagging_params["random_state"] = 0
+        if task == "regression":
+            bagging_estimator = BaggingRegressor(**bagging_params)
+        else:
+            bagging_estimator = BaggingClassifier(**bagging_params)
+
+    return bagging_estimator
+
+
+# Run evaluation
+def run_model(
+    data_name,
+    num_train,
+    method,
+    random_state,
+    bagging,
+    device,
+):
+    """Run model for specific experiment setting."""
+    # Load data
+    data, data_additional, data_config = _load_data(data_name)
+
+    # Basic settings
+    target_name = data_config["target_name"]
+    entity_name = data_config["entity_name"]
+    task = data_config["task"]
+    _, result_criterion = set_score_criterion(task)
+    cat_features = None  # overriden by prepare_... functions if needed
+
+    # Set methods
+    method_parse = method.split("_")
+    estim_method = method_parse[-1]
+    preprocess_method = method_parse[0]
+
+    # Stop for exceptions - Regression/Classification only methods, tabpfn > 1024
+    reg_only_methods = ["tablevectorizer_ridge", "target-encoder_ridge"]
+    cls_only_methods = [
+        method for method in carte_singletable_baselines["full"] if "tabpfn" in method
+    ]
+    cls_only_methods += [
+        method for method in carte_singletable_baselines["full"] if "logistic" in method
+    ]
+    if (data_config["task"] == "regression") and (method in cls_only_methods):
+        return None
+    elif (data_config["task"] == "classification") and (method in reg_only_methods):
+        return None
+    elif (num_train > 1024) and (estim_method == "tabpfn"):
+        return None
+
+    # Prepare data
+    if "fasttext" in preprocess_method:
+        data_fasttext = data_additional["fasttext"].copy()
+        data_fasttext.drop_duplicates(subset=entity_name, inplace=True)
+        data = data.merge(right=data_fasttext, how="left", on=entity_name)
+    elif "llm" in preprocess_method:
+        if preprocess_method.split("-")[0] == "sentence":
+            data_ = data_additional[preprocess_method].copy()
+            data = pd.concat([data_, data[[target_name, entity_name]]], axis=1)
+            data.dropna(subset=target_name, inplace=True)
+        else:
+            data_llm = data_additional["llm"].copy()
+            data_llm.drop_duplicates(subset=entity_name, inplace=True)
+            data = data.merge(right=data_llm, how="left", on=entity_name)
+    else:
+        pass
+
+    # Preprocess data
+    if "carte-gnn" in preprocess_method:
+        X_train, X_test, y_train, y_test = _prepare_carte_gnn(
+            data,
+            data_config,
+            num_train,
+            random_state,
+        )
+    elif "catboost" in preprocess_method:
+        X_train, X_test, y_train, y_test, cat_features = _prepare_catboost(
+            data,
+            data_config,
+            num_train,
+            random_state,
+        )
+    elif "tablevectorizer" in preprocess_method:
+        X_train, X_test, y_train, y_test = _prepare_tablevectorizer(
+            data,
+            data_config,
+            num_train,
+            random_state,
+            estim_method,
+        )
+    elif "target-encoder" in preprocess_method:
+        X_train, X_test, y_train, y_test = _prepare_target_encoder(
+            data,
+            data_config,
+            num_train,
+            random_state,
+            estim_method,
+        )
+    elif "llm" in preprocess_method:
+        X_train, X_test, y_train, y_test = _prepare_llm(
+            data,
+            data_config,
+            num_train,
+            random_state,
+        )
+
+    # Assign estimators
+    best_params = extract_best_params(data_name, method, num_train, random_state)
+    estimator = _assign_estimator(
+        estim_method,
+        task,
+        device,
+        cat_features,
+        bagging,
+    )
+    estimator.__dict__.update(best_params)
+    estimator_bagging = _assign_bagging_estimator(estimator, estim_method, task)
+
+    # Create directory for saving results
+    result_save_dir_base = f"{config_directory['results']}/singletable/{data_name}"
+    if not os.path.exists(result_save_dir_base):
+        os.makedirs(result_save_dir_base, exist_ok=True)
+
+    # Run without bagging strategy
+    marker = f"{data_name}_{method}_num_train-{num_train}_rs-{random_state}"
+    results_model_dir = result_save_dir_base + f"/{marker}.csv"
+
+    # Do not run if result already exists
+    if os.path.exists(results_model_dir):
+        pass
+    else:
+        estimator.fit(X_train, y_train)
+        if task == "regression":
+            y_pred = estimator.predict(X_test)
+        else:
+            y_pred = estimator.predict_proba(X_test)
+            y_pred = reshape_pred_output(y_pred)
+        y_pred = check_pred_output(y_train, y_pred)
+        score = return_score(y_test, y_pred, task)
+
+        results_ = dict()
+        results_[result_criterion[0]] = score[0]
+        results_[result_criterion[1]] = score[1]
+        results_model = pd.DataFrame([results_], columns=result_criterion[:2])
+        results_model.columns = f"{method}_" + results_model.columns
+        results_model.to_csv(results_model_dir, index=False)
+
+    if bagging:
+        # Run with bagging strategy
+        estimator_bagging.fit(X_train, y_train)
+        if task == "regression":
+            y_pred = estimator_bagging.predict(X_test)
+        else:
+            y_pred = estimator_bagging.predict_proba(X_test)
+            y_pred = reshape_pred_output(y_pred)
+        y_pred = check_pred_output(y_train, y_pred)
+        score = return_score(y_test, y_pred, task)
+
+        results_ = dict()
+        results_[result_criterion[0]] = score[0]
+        results_[result_criterion[1]] = score[1]
+        results_model = pd.DataFrame([results_], columns=result_criterion[:2])
+        results_model.columns = f"{method}_" + results_model.columns
+
+        marker = f"{data_name}_{method}-bagging_num_train-{num_train}_rs-{random_state}"
+        results_model_dir = result_save_dir_base + f"/{marker}.csv"
+        results_model.to_csv(results_model_dir, index=False)
+
+    return None
+
+
+# Main
+def main(data_name, num_train, method, random_state, bagging, device):
+
+    # Setting for train size
+    if "all" in data_name:
+        data_name_list = carte_datalist
+    else:
+        data_name_list = data_name
+        if isinstance(data_name_list, list) == False:
+            data_name_list = [data_name_list]
+
+    # Setting for train size
+    if "all" in num_train:
+        num_train = [32, 64, 128, 256, 512, 1024, 2048]
+    else:
+        if isinstance(num_train, list) == False:
+            num_train = [num_train]
+            num_train = list(map(int, num_train))
+        else:
+            num_train = list(map(int, num_train))
+
+    # Setting for bagging
+    if bagging == "True":
+        bagging = True
+    else:
+        bagging = False
+
+    # Setting for methods
+    if "full" in method:
+        method_list = carte_singletable_baselines["full"]
+    elif "reduced" in method:
+        assert bagging == False
+        method_list = carte_singletable_baselines["reduced"]
+    elif "f-r" in method:
+        method_list = set(carte_singletable_baselines["full"])
+        method_list -= set(carte_singletable_baselines["reduced"])
+        method_list = list(method_list)
+        method_list.sort()
+    else:
+        method_list = method
+        if isinstance(method_list, list) == False:
+            method_list = [method_list]
+
+    # Setting for random state
+    if "all" in random_state:
+        random_state = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+    else:
+        if isinstance(random_state, list) == False:
+            random_state = [random_state]
+            random_state = list(map(int, random_state))
+        else:
+            random_state = list(map(int, random_state))
+
+    # List out all the cases and run
+    args_dict = dict()
+    args_dict["data_name"] = data_name_list
+    args_dict["num_train"] = num_train
+    args_dict["method"] = method_list
+    args_dict["random_state"] = random_state
+    args_dict["device"] = [device]
+    args_dict["bagging"] = [bagging]
+    args_list = list(ParameterGrid(args_dict))
+
+    # Depending on the specific machine or computing environment, you may want to parallelize the evaluation.
+    for args in args_list:
+        run_model(**args)
+        marker = f"{args['data_name']}_{args['method']}_num_train-{args['num_train']}_rs-{args['random_state']}"
+        print(marker + " is complete")
+
+
+if __name__ == "__main__":
+
+    # Set parser
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Evaluate model for singletables.")
+    parser.add_argument(
+        "-dn",
+        "--data_name",
+        nargs="+",
+        type=str,
+        help="dataset to evaluate",
+    )
+    parser.add_argument(
+        "-nt",
+        "--num_train",
+        nargs="+",
+        type=str,
+        help="Number of train",
+    )
+    parser.add_argument(
+        "-m",
+        "--method",
+        nargs="+",
+        type=str,
+        help="Method to evaluate",
+    )
+    parser.add_argument(
+        "-rs",
+        "--random_state",
+        nargs="+",
+        type=str,
+        help="Random_state",
+    )
+    parser.add_argument(
+        "-b",
+        "--bagging",
+        type=str,
+        help="include bagging strategy for evaluation",
+    )
+    parser.add_argument(
+        "-dv",
+        "--device",
+        type=str,
+        help="Device, cpu or cuda",
+    )
+    args = parser.parse_args()
+
+    main(
+        args.data_name,
+        args.num_train,
+        args.method,
+        args.random_state,
+        args.bagging,
+        args.device,
+    )
diff --git a/carte/scripts/preprocess_lm.py b/carte/scripts/preprocess_lm.py
new file mode 100644
index 0000000..3916347
--- /dev/null
+++ b/carte/scripts/preprocess_lm.py
@@ -0,0 +1,121 @@
+""" Python script for preparing datasets for evaluation
+"""
+
+# >>>
+if __name__ == '__main__':
+    import os
+    import sys
+
+    _project_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
+    os.environ['PROJECT_DIR'] = _project_dir
+    sys.path.append(_project_dir)
+    del _project_dir
+# <<<
+
+import pandas as pd
+import numpy as np
+import pickle
+import json
+import os
+from carte.configs.directory import config_directory
+from carte.configs.carte_configs import carte_datalist
+from carte.src.preprocess_utils import (
+    extract_fasttext_features,
+    extract_llm_features,
+    table2llmfeatures,
+)   
+
+
+def data_preprocess(data_name: str, device: str = "cuda:0"):
+    
+    # Load data
+    data_pd_dir = f"{config_directory['data_singletable']}/{data_name}/raw.parquet"
+    data_pd = pd.read_parquet(data_pd_dir)
+    data_pd.fillna(value=np.nan, inplace=True)
+
+    # Basic settings for the data
+    config_data_dir = f"{config_directory['data_singletable']}/{data_name}/config_data.json"
+    filename = open(config_data_dir)
+    config_data = json.load(filename)
+
+    # Set the data without the target
+    data_X = data_pd.drop(columns=config_data["target_name"])
+
+    data = dict()
+    data_fasttext = None
+    data_llm = None
+    data_sentence_llm_embed_num = None
+    data_sentence_llm_concat_num = None
+
+    if config_data["entity_name"] is not None:
+        data_fasttext = extract_fasttext_features(
+            data=data_X,
+            extract_col_name=config_data["entity_name"],
+        )
+        data_llm = extract_llm_features(
+            data=data_X,
+            extract_col_name=config_data["entity_name"],
+            device=device,
+        )
+    else:
+        pass
+
+    data_sentence_llm_embed_num = table2llmfeatures(
+        data=data_X,
+        embed_numeric=True,
+        device=device,
+    )
+    data_sentence_llm_concat_num = table2llmfeatures(
+        data=data_X,
+        embed_numeric=False,
+        device=device,
+    )
+
+    data["fasttext"] = data_fasttext
+    data["llm"] = data_llm
+    data["sentence-llm-embed-num"] = data_sentence_llm_embed_num
+    data["sentence-llm-concat-num"] = data_sentence_llm_concat_num
+
+    save_dir = f"{config_directory['data_singletable']}/{data_name}/external.pickle"
+
+    with open(save_dir, "wb") as pickle_file:
+        pickle.dump(data, pickle_file)
+
+
+def main(datalist, device: str = "cuda:0"):
+
+    datalist_total = carte_datalist
+
+    # Setting methods
+    if "all" in datalist:
+        data_list = datalist_total
+    else:
+        data_list = datalist
+        if isinstance(data_list, list) == False:
+            data_list = list(data_list)
+
+    for data_name in data_list:
+        data_preprocess(data_name=data_name, device=device)
+        print(f"{data_name} complete!")
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Data Preparation")
+    parser.add_argument(
+        "-dt",
+        "--datalist",
+        nargs="+",
+        type=str,
+        help="List of data",
+    )
+    parser.add_argument(
+        "-de",
+        "--device",
+        type=str,
+        help="Device",
+    )
+    args = parser.parse_args()
+
+    main(args.datalist, args.device)
diff --git a/carte/scripts/preprocess_raw.py b/carte/scripts/preprocess_raw.py
new file mode 100644
index 0000000..26e83a6
--- /dev/null
+++ b/carte/scripts/preprocess_raw.py
@@ -0,0 +1,1444 @@
+"""Script for preprocessing raw data."""
+
+# >>>
+if __name__ == "__main__":
+    import os
+    import sys
+
+    _project_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
+    os.environ["PROJECT_DIR"] = _project_dir
+    sys.path.append(_project_dir)
+    del _project_dir
+# <<<
+
+import os
+import json
+import ast
+import pandas as pd
+import numpy as np
+from carte.configs.directory import config_directory
+from carte.configs.carte_configs import carte_datalist
+
+
+def _drop_high_null(data, proportion=0.5):
+    """Drop columns with high fraction of missing values"""
+    null_num = np.array([data[col].isnull().sum() for col in data.columns])
+    null_crit = int(len(data) * proportion)
+    null_col = list(data.columns[null_num > null_crit])
+    return data.drop(columns=null_col)
+
+
+def _drop_single_unique(data):
+    """Drop columns with single unique values."""
+    num_unique_cols = [col for col in data.columns if data[col].nunique() == 1]
+    return data.drop(columns=num_unique_cols)
+
+
+def _load_raw_data(data_name, file_type="csv", sep=","):
+    """Load the raw data for preprocessing."""
+    data_dir = f"{config_directory['data_raw']}/{data_name}.{file_type}"
+    if file_type == "csv":
+        data = pd.read_csv(data_dir, sep=sep)
+    elif file_type == "json":
+        data_file = open(data_dir)
+        data = []
+        for line in data_file:
+            data.append(json.loads(line))
+        data = pd.DataFrame(data)
+        data_file.close()
+    data.columns = data.columns.str.replace(" ", "_")
+    data.columns = data.columns.str.replace("\n", "_")
+    data.columns = data.columns.str.replace("%", "Percentage")
+    data.replace("\n", " ", regex=True, inplace=True)
+    return data
+
+
+def _save_processed_data(data_name, data, target_name, entity_name, task, repeated):
+    """Save the preprocessed data and configs."""
+    # save the data
+    save_dir = f"{config_directory['data_singletable']}/{data_name}/"
+    if not os.path.exists(save_dir):
+        os.makedirs(save_dir, exist_ok=True)
+    data.to_parquet(save_dir + "raw.parquet")
+    # save the config file
+    config = dict()
+    config["entity_name"] = entity_name
+    config["target_name"] = target_name
+    config["task"] = task
+    config["repeated"] = repeated
+    with open(save_dir + "config_data.json", "w") as outfile:
+        json.dump(config, outfile)
+    return None
+
+
+def preprocess_data(data_name):
+    """Preprocess the raw data with the given name of the dataset."""
+
+    # Load data
+    data = _load_raw_data(data_name)
+
+    # Preoprocess depending on each data
+    if data_name == "anime_planet":
+        # basic info
+        target_name = "Rating_Score"
+        entity_name = "Name"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.replace("Unknown", np.nan, inplace=True)
+        target_name = "Rating_Score"
+        data.dropna(subset=target_name, inplace=True)
+        data[target_name] = data[target_name].astype("float")
+        data.reset_index(drop=True, inplace=True)
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        drop_col = []
+        drop_col.append("Anime-PlanetID")
+        drop_col.append("Number_Votes")
+        drop_col.append("Url")
+        data.drop(columns=drop_col, inplace=True)
+        data["Finished"] = data["Finished"].astype("str")
+        data["Episodes"] = data["Episodes"].astype("float")
+        data["Duration"] = data["Duration"].astype("float")
+    elif data_name == "babies_r_us":
+        # basic info
+        target_name = "price"
+        entity_name = "title"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.log(data[target_name])
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        drop_col = []
+        drop_col.append("int_id")
+        drop_col.append("ext_id")
+        drop_col.append("SKU")
+        data.drop(columns=drop_col, inplace=True)
+        temp = data["is_discounted"].copy()
+        temp = temp.astype("str")
+        data["is_discounted"] = temp
+    elif data_name == "beer_ratings":
+        # basic info
+        target_name = "review_overall"
+        entity_name = "Beer_Name"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        drop_col = [col for col in data.columns if "review" in col]
+        drop_col.remove(target_name)
+        data.drop(columns=drop_col, inplace=True)
+        numeric_cols = data.select_dtypes(exclude="object").columns.to_list()
+        data[numeric_cols] = data[numeric_cols].astype("float")
+        data.rename(columns={"Beer_Name_(Full)": "Beer_Name"}, inplace=True)
+    elif data_name == "bikedekho":
+        # basic info
+        target_name = "price"
+        entity_name = "bike_name"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.emath.logn(10, data[target_name])
+        data.reset_index(drop=True, inplace=True)
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        drop_col = []
+        drop_col.append("id")
+        data.drop(columns=drop_col, inplace=True)
+        data["model_year"] = data["model_year"].astype("str")
+        data["km_driven"] = data["km_driven"].astype("float")
+    elif data_name == "bikewale":
+        # basic info
+        target_name = "price"
+        entity_name = "bike_name"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        mask = data[target_name] >= 500
+        data = data[mask]
+        data[target_name] = np.emath.logn(10, data[target_name])
+        data.reset_index(drop=True, inplace=True)
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        drop_col = []
+        drop_col.append("id")
+        data.drop(columns=drop_col, inplace=True)
+        data["model_year"] = data["model_year"].astype("str")
+        data["km_driven"] = data["km_driven"].astype("float")
+    elif data_name == "buy_buy_baby":
+        # basic info
+        target_name = "price"
+        entity_name = "title"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.log(data[target_name] + 1)
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        drop_col = []
+        drop_col.append("int_id")
+        drop_col.append("ext_id")
+        drop_col.append("SKU")
+        drop_col.append("company_free")
+        data.drop(columns=drop_col, inplace=True)
+        temp = data["is_discounted"].copy()
+        temp = temp.astype("str")
+        temp[temp == "True"] = "1"
+        temp[temp == "False"] = "0"
+        data["is_discounted"] = temp
+    elif data_name == "cardekho":
+        # basic info
+        target_name = "price"
+        entity_name = "model"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.rename(columns={"km": "mileage"}, inplace=True)
+        data["model_year"] = data["model_year"].astype(str)
+        target_name = "price"
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.emath.logn(100, data[target_name])
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+    elif data_name == "chocolate_bar_ratings":
+        # basic info
+        target_name = "Rating"
+        entity_name = "Specific_Bean_Origin_or_Bar_Name"
+        task = "classification"
+        repeated = False
+        # preprocess
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        temp = data["Rating"].copy()
+        temp[temp < 3.25] = 0
+        temp[temp != 0] = 1
+        data["Rating"] = temp
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        data.drop(columns="REF", inplace=True)
+        data.columns = data.columns.str.replace(" ", "_")
+        data["Review_Date"] = data["Review_Date"].astype("str")
+        data["Cocoa_Percent"] = data["Cocoa_Percent"].str.replace("%", "")
+        data["Cocoa_Percent"] = data["Cocoa_Percent"].astype("float")
+    elif data_name == "clear_corpus":
+        # basic info
+        target_name = "BT_Easiness"
+        entity_name = "Title"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data = data.replace("?", np.nan)
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        drop_col = []
+        drop_col.append("ID")
+        drop_col.append("BT_s.e.")
+        drop_col.append("MPAA_#Avg")
+        drop_col.append("MPAA__#Max")
+        data.drop(columns=drop_col, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data["Pub_Year"] = data["Pub_Year"].astype("str")
+        data["Pub_Year"] = data["Pub_Year"].str.split(".").str[0]
+        numeric_cols = data.select_dtypes(exclude="object").columns.to_list()
+        data[numeric_cols] = data[numeric_cols].astype("float")
+    elif data_name == "coffee_ratings":
+        # basic info
+        target_name = "rating"
+        entity_name = "name"
+        task = "classification"
+        repeated = False
+        # preprocess
+        temp = data[target_name].copy()
+        temp[temp <= 93] = 0
+        temp[temp != 0] = 1
+        data[target_name] = temp
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = data[target_name].astype("float")
+        data.dropna(subset=target_name, inplace=True)
+        data.drop_duplicates(subset=["name"], inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        drop_col = []
+        drop_col.append("slug")
+        drop_col.append("all_text")
+        drop_col.append("review_date")
+        drop_col.append("est_price")
+        drop_col.append("aroma")
+        drop_col.append("acid")
+        drop_col.append("body")
+        drop_col.append("flavor")
+        drop_col.append("aftertaste")
+        drop_col.append("agtron")
+        data.drop(columns=drop_col, inplace=True)
+    elif data_name == "company_employees":
+        # basic info
+        target_name = "current_employee_estimate"
+        entity_name = "name"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.drop(columns=["Unnamed:_0"], inplace=True)
+        data.dropna(subset=target_name, inplace=True)
+        data.drop_duplicates(subset="name", keep=False, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = data[target_name].astype("float")
+        data[target_name] = np.emath.logn(10, data[target_name])
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        drop_cols = []
+        drop_cols.append("country")
+        drop_cols.append("total_employee_estimate")
+        data.drop(columns=drop_cols, inplace=True)
+        data["year_founded"] = data["year_founded"].astype("str")
+        data["year_founded"] = data["year_founded"].str.split(".").str[0]
+        temp = data["year_founded"].copy()
+        temp[temp == "nan"] = np.nan
+        data["year_founded"] = temp
+        num_cols = data.select_dtypes(exclude="object").columns
+        data[num_cols] = data[num_cols].astype("float")
+    elif data_name == "employee_remuneration":
+        # Exception with different sep
+        data = _load_raw_data(data_name, sep=";")
+        # basic info
+        target_name = "Remuneration"
+        entity_name = "Title"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.emath.logn(10, data[target_name])
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        data.drop(columns=["Name"], inplace=True)
+        data["Year"] = data["Year"].astype("str")
+    elif data_name == "employee_salaries":
+        # basic info
+        target_name = "current_annual_salary"
+        entity_name = "employee_position_title"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.emath.logn(10, data[target_name])
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        data["year_first_hired"] = data["year_first_hired"].astype("str")
+    elif data_name == "fifa22_players":
+        # basic info
+        target_name = "wage_eur"
+        entity_name = "name"
+        task = "regression"
+        repeated = False
+        # preprocess
+        drop_col_url = [col for col in data.columns if "_url" in col]
+        drop_col_id = [col for col in data.columns if "_id" in col]
+        data.drop(columns=drop_col_url + drop_col_id, inplace=True)
+        data = data[data.columns[:-68]]
+        data.rename(columns={"short_name": "name"}, inplace=True)
+        data.drop_duplicates(subset=["name"], inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.emath.logn(10, data[target_name])
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        drop_col = []
+        drop_col.append("long_name")
+        drop_col.append("overall")
+        drop_col.append("potential")
+        drop_col.append("league_name")
+        drop_col.append("league_level")
+        drop_col.append("weak_foot")
+        drop_col.append("skill_moves")
+        drop_col.append("real_face")
+        data.drop(columns=drop_col, inplace=True)
+        data["club_jersey_number"] = data["club_jersey_number"].astype("str")
+        data["club_jersey_number"] = data["club_jersey_number"].str.split(".").str[0]
+        data["club_contract_valid_until"] = data["club_contract_valid_until"].astype(
+            "str"
+        )
+        data["club_contract_valid_until"] = (
+            data["club_contract_valid_until"].str.split(".").str[0]
+        )
+        num_cols = data.select_dtypes(exclude="object").columns
+        data[num_cols] = data[num_cols].astype("float")
+    elif data_name == "filmtv_movies":
+        # basic info
+        target_name = "public_vote"
+        entity_name = "title"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        data["year"] = data["year"].astype(str)
+        data["duration"] = data["duration"].astype(float)
+        drop_col = []
+        drop_col.append("filmtv_id")
+        drop_col.append("avg_vote")
+        drop_col.append("total_votes")
+        drop_col.append("humor")
+        drop_col.append("rhythm")
+        drop_col.append("effort")
+        drop_col.append("tension")
+        drop_col.append("erotism")
+        data.drop(columns=drop_col, inplace=True)
+    elif data_name == "journal_jcr":
+        # basic info
+        target_name = "2021_JIF"
+        entity_name = "Journal_name"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.replace("N/A", np.nan, regex=True, inplace=True)
+        num_cols = data.columns[4:8]
+        num_cols = num_cols.append(data.columns[10:])
+        for col in num_cols:
+            data[col] = data[col].str.replace(",", "")
+            data[col] = data[col].astype("float")
+            data.dropna(subset=target_name, inplace=True)
+            data.reset_index(drop=True, inplace=True)
+            data[target_name] = np.log(data[target_name] + 1)
+            data = _drop_high_null(data)
+            data = _drop_single_unique(data)
+            remove_cols = []
+            remove_cols.append("Total_Citations")
+            remove_cols.append("2021_JCI")
+            remove_cols.append("JIF_Without_Self_Cites")
+            remove_cols.append("5_Year_JIF")
+            remove_cols.append("Immediacy_Index")
+            remove_cols.append("Normalized_Eigenfactor")
+            remove_cols.append("Eigenfactor")
+            remove_cols.append("Article_Influence_Score")
+            remove_cols.append("Total_Articles")
+            data.drop(columns=remove_cols, inplace=True)
+    elif data_name == "journal_sjr":
+        # Exception with different sep
+        data = _load_raw_data(data_name, sep=";")
+        # basic info
+        target_name = "H_index"
+        entity_name = "Title"
+        task = "regression"
+        repeated = False
+        # preprocess
+        col_keep = list(data.columns[[2, 3, 4, 7]]) + list(data.columns)[-6:]
+        data = data[col_keep]
+        data.columns = data.columns.str.replace(" ", "_")
+        temp1 = data["Issn"].str.split(",").str[0]
+        temp1 = temp1.rename("ISSN")
+        data["Issn"] = temp1
+        temp2 = data["Issn"].str.split(",").str[1]
+        temp2 = temp2.rename("e-ISSN")
+        data["e-ISSN"] = temp2
+        data.drop_duplicates(subset="Title", inplace=True)
+        target_name = "H_index"
+        data.dropna(subset=target_name, inplace=True)
+        data[target_name] = np.log10(data[target_name] + 1)
+        data.reset_index(drop=True, inplace=True)
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+    elif data_name == "jp_anime":
+        # basic info
+        target_name = "Score"
+        entity_name = "Name"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.replace("UNKNOWN", np.nan, inplace=True)
+        data.replace("Unknown", np.nan, inplace=True)
+        mask = data["English_name"].isnull()
+        temp = data["English_name"].copy()
+        temp[mask] = data["Name"][mask]
+        data["English_name"] = temp
+        data.reset_index(drop=True, inplace=True)
+        data.dropna(subset=target_name, inplace=True)
+        data[target_name] = data[target_name].astype("float")
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.log(data[target_name])
+        temp = data["Aired"].copy()
+        data["Start_Date"] = temp.str.split(" to ").str[0]
+        data["End_Date"] = temp.str.split(" to ").str[1]
+        keep_col = list(data.columns)
+        keep_col.remove("anime_id")
+        keep_col.remove("Name")
+        keep_col.remove("Other_name")
+        keep_col.remove("Scored_By")
+        keep_col.remove("Image_URL")
+        keep_col.remove("Rank")
+        keep_col.remove("Aired")
+        data = data[keep_col]
+        data.rename(columns={"English_name": "Name"}, inplace=True)
+        num_cols = data.select_dtypes(exclude="object").columns
+        data[num_cols] = data[num_cols].astype("float")
+        data["Rating"] = data["Rating"].str.split(" - ").str[0]
+        data.drop_duplicates(subset="Name", inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data["Episodes"] = data["Episodes"].astype(float)
+        temp = data["Duration"].copy()
+        temp = temp.astype(str)
+        temp = temp.str.replace(" per ep", "", regex=False)
+        temp1 = temp.str.split(" hr").str[0]
+        temp1[~temp1.str.isnumeric()] = "0"
+        temp1 = temp1.astype(float) * 60
+        temp2 = temp.str.split(" hr").str[1]
+        temp2 = temp2.astype(str)
+        temp2 = temp2.str.replace(" min", "", regex=False)
+        temp2 = temp2.str.replace(" ", "", regex=False)
+        temp2[~temp2.str.isnumeric()] = "0"
+        temp2 = temp2.astype(float)
+        temp3 = temp.copy()
+        temp3[temp.str.contains("hr")] = "nan"
+        temp3 = temp3.str.replace(" min", "", regex=False)
+        temp3 = temp3.str.replace(" ", "", regex=False)
+        temp3[~temp3.str.isnumeric()] = "0"
+        temp3 = temp3.astype(float)
+        temp = temp1 + temp2 + temp3
+        temp[temp == 0] = np.nan
+        data["Duration"] = temp
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+    elif data_name == "k_drama":
+        # basic info
+        target_name = "score"
+        entity_name = "Kdrama_name"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.dropna(subset=target_name, inplace=True)
+        data[target_name] = data[target_name].astype("float")
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.log(data[target_name])
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        drop_cols = []
+        drop_cols.append("scored_by")
+        drop_cols.append("Ranked")
+        data.drop(columns=drop_cols, inplace=True)
+        data["Content_Rating"] = data["Content_Rating"].str.split(" - ").str[0]
+    elif data_name == "michelin":
+        # basic info
+        target_name = "Award"
+        entity_name = "Kdrama_name"
+        task = "classification"
+        repeated = False
+        # preprocess
+        temp = data["Award"].copy()
+        temp[temp.str.contains("MICHELIN")] = "1"
+        temp[temp.str.contains("Bib Gourmand")] = "0"
+        temp = temp.astype("float")
+        data["Award"] = temp
+        data.rename(columns={"WebsiteUrl": "Website_Url"}, inplace=True)
+        data.rename(
+            columns={"FacilitiesAndServices": "Facilities_And_Services"}, inplace=True
+        )
+        data.rename(columns={"PhoneNumber": "Phone_Number"}, inplace=True)
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        data["Facilities_And_Services"] = data["Facilities_And_Services"].str.replace(
+            ",", ", "
+        )
+        drop_col = []
+        drop_col.append("Phone_Number")
+        drop_col.append("Url")
+        drop_col.append("Price")
+        drop_col.append("Facilities_And_Services")
+        data.drop(columns=drop_col, inplace=True)
+    elif data_name == "mlds_salaries":
+        # basic info
+        target_name = "salary_in_usd"
+        entity_name = "job_title"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.dropna(subset=target_name, inplace=True)
+        data[target_name] = data[target_name].astype("float")
+        data[target_name] = np.log10(data[target_name])
+        data.reset_index(drop=True, inplace=True)
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        data["work_year"] = data["work_year"].astype("str")
+        data["remote_ratio"] = data["remote_ratio"].astype("str")
+        mapping = dict()
+        mapping["experience_level"] = dict()
+        mapping["experience_level"]["SE"] = "Senior-level / Expert"
+        mapping["experience_level"]["EN"] = "Entry-level / Junior"
+        mapping["experience_level"]["MI"] = "Mid-level / Intermediate"
+        mapping["experience_level"]["EX"] = "Executive-level / Director"
+        mapping["employment_type"] = dict()
+        mapping["employment_type"]["FT"] = "Full-time"
+        mapping["employment_type"]["PT"] = "Part-time"
+        mapping["employment_type"]["CT"] = "Contract"
+        mapping["employment_type"]["FL"] = "Freelance"
+        mapping["remote_ratio"] = dict()
+        mapping["remote_ratio"]["0"] = "No remote work"
+        mapping["remote_ratio"]["50"] = "Partially remote"
+        mapping["remote_ratio"]["100"] = "Fully remote"
+        mapping["company_size"] = dict()
+        mapping["company_size"]["M"] = "Medium"
+        mapping["company_size"]["L"] = "Large"
+        mapping["company_size"]["S"] = "Small"
+        for name in mapping.keys():
+            temp = data[name].copy()
+            temp = temp.map(mapping[name])
+            data[name] = temp
+        drop_col = []
+        drop_col.append("salary")
+        drop_col.append("salary_currency")
+        data.drop(columns=drop_col, inplace=True)
+    elif data_name == "movies":
+        # basic info
+        target_name = "revenue"
+        entity_name = "title"
+        task = "regression"
+        repeated = False
+        # preprocess
+        mask = data["revenue"] >= 1000  # >= 10000000
+        data = data[mask]
+        data.dropna(subset="revenue", inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        temp = data["budget"].copy()
+        mask = temp.str.contains(".jpg")
+        data = data[~mask]
+        data.reset_index(drop=True, inplace=True)
+        temp = data["runtime"].copy()
+        mask = temp == 0
+        data = data[~mask]
+        data.reset_index(drop=True, inplace=True)
+        data["budget"] = data["budget"].astype("float")
+        temp = data["budget"].copy()
+        temp[temp == 0] = np.nan
+        data["budget"] = temp
+        data["popularity"] = data["popularity"].astype("float")
+        data.fillna(value=np.nan, inplace=True)
+        data.columns = data.columns.str.replace(" ", "_")
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.emath.logn(10, data[target_name])
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        adjust_cols = [
+            "belongs_to_collection",
+            "genres",
+            "production_companies",
+            "production_countries",
+            "spoken_languages",
+        ]
+        extract_name = ["name", "name", "name", "name", "iso_639_1"]
+        for i in range(len(adjust_cols)):
+            if adjust_cols[i] in data.columns:
+                col = []
+                for idx in range(len(data)):
+                    temp = data[adjust_cols[i]][idx]
+                    if str(temp) == "nan":
+                        col.append(np.nan)
+                    else:
+                        temp = ast.literal_eval(temp)
+                        if isinstance(temp, list) is False:
+                            temp = [temp]
+                        if len(temp) == 0:
+                            col.append(np.nan)
+                        else:
+                            temp = pd.DataFrame(temp, index=None)
+                            temp[extract_name[i]] = temp[extract_name[i]] + ", "
+                            col.append(temp[extract_name[i]].sum()[:-2])
+                col = pd.Series(col)
+                col = col.rename(adjust_cols[i])
+                data[adjust_cols[i]] = col
+            else:
+                pass
+        drop_col = []
+        drop_col.append("id")
+        drop_col.append("imdb_id")
+        drop_col.append("overview")
+        drop_col.append("poster_path")
+        drop_col.append("original_title")
+        drop_col.append("original_language")
+        data.drop(columns=drop_col, inplace=True)
+        data.drop_duplicates(subset=["title", target_name], inplace=True)
+        data.reset_index(drop=True, inplace=True)
+    elif data_name == "museums":
+        # basic info
+        target_name = "Revenue"
+        entity_name = "Museum_Name"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        mask = data[target_name] > 0
+        data = data[mask].copy()
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.emath.logn(100, data[target_name])
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        drop_col = []
+        drop_col.append("Museum_ID")
+        drop_col.append("Income")
+        data.drop(columns=drop_col, inplace=True)
+        num_cols = data.select_dtypes(exclude="object").columns.tolist()
+        num_cols.remove(target_name)
+        data[num_cols] = data[num_cols].astype("str")
+        for col in num_cols:
+            data[col] = data[col].str.strip(".0")
+        data.reset_index(drop=True, inplace=True)
+    elif data_name == "mydramalist":
+        # basic info
+        target_name = "rating"
+        entity_name = "Name"
+        task = "regression"
+        repeated = False
+        # preprocess
+        temp = data["category"].copy()
+        mask = temp == "Drama"
+        data = data[mask]
+        data.reset_index(drop=True, inplace=True)
+        temp = data["country"].copy()
+        mask = temp == "South Korea"
+        data = data[~mask]
+        data.reset_index(drop=True, inplace=True)
+        for col in data.select_dtypes(include="object").columns:
+            temp = data[col].copy()
+            temp = temp.astype(str)
+            temp[temp.str.isspace()] = np.nan
+            temp[temp == "nan"] = np.nan
+            data[col] = temp
+        data.replace("", "", regex=True, inplace=True)
+        data.replace("", "", regex=True, inplace=True)
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        drop_col = []
+        drop_col.append("url")
+        data.drop(columns=drop_col, inplace=True)
+        temp = data["duration"].copy()
+        temp = temp.astype(str)
+        mask = temp.str.contains("hr")
+        temp[~mask] = "0"
+        temp = temp.str.split("hr").str[0]
+        temp1 = temp.astype(float) * 60
+        temp = data["duration"].copy()
+        temp = temp.astype(str)
+        temp[mask] = "0"
+        temp = temp.str.split("min").str[0]
+        temp2 = temp.astype(float)
+        data["duration"] = temp1 + temp2
+    elif data_name == "nba_draft":
+        # basic info
+        target_name = "value_over_replacement"
+        entity_name = "player"
+        task = "classification"
+        repeated = False
+        # preprocess
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        temp = data[target_name].copy()
+        temp[temp <= 0] = 0
+        temp[temp != 0] = 1
+        data[target_name] = temp
+        keep_col = []
+        keep_col.append(target_name)
+        keep_col.append("year")
+        keep_col.append("overall_pick")
+        keep_col.append("team")
+        keep_col.append("player")
+        keep_col.append("college")
+        keep_col.append("years_active")
+        data = data[keep_col]
+        data["year"] = data["year"].astype("str")
+        data["overall_pick"] = data["overall_pick"].astype("str")
+        data.reset_index(drop=True, inplace=True)
+    elif data_name == "prescription_drugs":
+        # basic info
+        target_name = "WAC_at_Introduction"
+        entity_name = "Drug_Product_Description"
+        task = "regression"
+        repeated = False
+        # preprocess
+        unnamed_col = [col for col in data.columns if "Unnamed:" in col]
+        data.drop(columns=unnamed_col, inplace=True)
+        temp = data["Estimated_Number_of_Patients"].copy()
+        temp[temp == 0] = np.nan
+        data["Estimated_Number_of_Patients"] = temp
+        temp = data["Date_Introduced_to_Market"].copy()
+        temp = temp.str.split("-").str[0]
+        data["Date_Introduced_to_Market"] = temp
+        data.dropna(
+            subset=["Drug_Product_Description", "WAC_at_Introduction"], inplace=True
+        )
+        data.reset_index(drop=True, inplace=True)
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.emath.logn(10, data[target_name])
+        data = _drop_high_null(data, 0.9)
+        data = _drop_single_unique(data)
+        drop_col = []
+        drop_col.append("NDC_Number")
+        data.drop(columns=drop_col, inplace=True)
+    elif data_name == "ramen_ratings":
+        # basic info
+        target_name = "Stars"
+        entity_name = "Variety"
+        task = "classification"
+        repeated = False
+        # preprocess
+        data["Stars"] = data["Stars"].str.replace("NS", "-1")
+        data["Stars"] = data["Stars"].str.replace("NR", "-1")
+        data["Stars"] = data["Stars"].str.replace("Unrated", "-1")
+        data["Stars"] = data["Stars"].str.split("/").str[0]
+        data["Stars"] = data["Stars"].astype("float")
+        temp = data["Stars"].copy()
+        temp[temp == -1] = np.nan
+        data["Stars"] = temp
+        data.dropna(subset="Stars", inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        drop_col = []
+        drop_col.append("Review_#")
+        data.drop(columns=drop_col, inplace=True)
+        temp = data["Stars"].copy()
+        temp[temp < 4] = 0
+        temp[temp != 0] = 1
+        data["Stars"] = temp
+        data.drop_duplicates(inplace=True)
+        data.reset_index(drop=True, inplace=True)
+    elif data_name == "roger_ebert":
+        # basic info
+        target_name = "critic_rating"
+        entity_name = "movie_name"
+        task = "classification"
+        repeated = False
+        # preprocess
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        mask = data[target_name] > 2
+        data = data[mask]
+        data.reset_index(drop=True, inplace=True)
+        temp = data[target_name].copy()
+        temp[temp < 3.5] = 0
+        temp[temp != 0] = 1
+        data[target_name] = temp
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        data.drop(columns="id", inplace=True)
+        data["year"] = data["year"].astype("str")
+        data["year"] = data["year"].str[:4]
+        temp = data["duration"].str.extract(r"([0-9]+)")[0]
+        temp = temp.astype("float")
+        data["duration"] = temp
+    elif data_name == "rotten_tomatoes":
+        # basic info
+        target_name = "Rating_Value"
+        entity_name = "Name"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.drop(columns="Id", inplace=True)
+        data.drop(columns="ReviewCount", inplace=True)
+        data.drop(columns="Actors", inplace=True)
+        data.rename(columns={"RatingValue": "Rating_Value"}, inplace=True)
+        data.rename(columns={"RatingCount": "Rating_Count"}, inplace=True)
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.log(data[target_name])
+        data["Year"] = data["Year"].astype("str")
+        data["Creator"] = data["Creator"].str.replace(",", ", ", regex=False)
+        data["Cast"] = data["Cast"].str.replace(",", ", ", regex=False)
+        data["Genre"] = data["Genre"].str.replace(",", ", ", regex=False)
+        data["Country"] = data["Country"].str.replace(",", ", ", regex=False)
+        data["Language"] = data["Language"].str.replace(",", ", ", regex=False)
+        data["Release_Date"] = data["Release_Date"].str.split("(").str[0]
+        data["Duration"] = data["Duration"].str.replace("min", "")
+        data["Duration"] = data["Duration"].astype("float")
+        data["Rating_Count"] = data["Rating_Count"].str.replace(",", "").astype("float")
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+    elif data_name == "spotify":
+        # basic info
+        target_name = "popularity"
+        entity_name = "track"
+        task = "classification"
+        repeated = False
+        # preprocess
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        drop_cols = []
+        drop_cols.append("uri")
+        data.drop(columns=drop_cols, inplace=True)
+        data["time_signature"] = data["time_signature"].astype("str")
+        data["sections"] = data["sections"].astype("str")
+        data["key"] = data["key"].astype("str")
+        data["duration_ms"] = data["duration_ms"].astype("float")
+        temp = data["mode"].copy()
+        mapping = {1: "Major", 0: "Minor"}
+        temp = temp.map(mapping)
+        data["mode"] = temp
+    elif data_name == "us_accidents_counts":
+        # basic info
+        target_name = "Counts"
+        entity_name = "City"
+        task = "regression"
+        repeated = False
+        # preprocess
+    elif data_name == "us_accidents_severity":
+        # basic info
+        target_name = "Severity"
+        entity_name = "Location"
+        task = "classification"
+        repeated = False
+        # preprocess
+    elif data_name == "us_presidential":
+        # basic info
+        target_name = "target"
+        entity_name = "region"
+        task = "regression"
+        repeated = False
+        # preprocess
+    elif data_name == "used_cars_24":
+        # basic info
+        target_name = "Price"
+        entity_name = "Model"
+        task = "regression"
+        repeated = False
+        # preprocess
+        drop_col = []
+        drop_col.append("Unnamed:_0")
+        drop_col.append("EMI_(monthly)")
+        data.drop(columns=drop_col, inplace=True)
+        data.rename(columns={"Driven_(Kms)": "Mileage"}, inplace=True)
+        data["Model"] = data["Car_Brand"] + " " + data["Model"]
+        temp = data["Ownership"].copy()
+        temp = temp.astype(str)
+        temp[temp == "1"] = "First"
+        temp[temp == "2"] = "Second"
+        temp[temp == "3"] = "Third"
+        temp[temp == "4"] = "Fourth"
+        data["Ownership"] = temp
+        data["Model_Year"] = data["Model_Year"].astype(str)
+        data["Mileage"] = data["Mileage"].astype(float)
+        data["Price"] = data["Price"].astype(float)
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.emath.logn(100, data[target_name])
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        for col in data.select_dtypes(include="object").columns:
+            temp = data[col].copy()
+            temp = temp.astype(str)
+            temp[temp == "nan"] = np.nan
+            data[col] = temp
+    elif data_name == "used_cars_benz_italy":
+        # Exception with different sep
+        data = _load_raw_data(data_name, sep=";")
+        # basic info
+        target_name = "price"
+        entity_name = "model"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.replace("unknown", np.nan, inplace=True)
+        drop_col = []
+        drop_col.append("Unnamed:_0")
+        data.drop(columns=drop_col, inplace=True)
+        data["model"] = data["brand"] + " " + data["model"]
+        mapping = dict()
+        mapping["fuel"] = dict()
+        mapping["fuel"]["d"] = "diesel"
+        mapping["fuel"]["g"] = "petrol"
+        mapping["fuel"]["e"] = "electric"
+        mapping["fuel"]["l"] = "lpg"
+        mapping["seller_type"] = dict()
+        mapping["seller_type"]["d"] = "dealer"
+        mapping["seller_type"]["p"] = "private"
+        for col in mapping.keys():
+            temp = data[col].copy()
+            temp = temp.map(mapping[col])
+            data[col] = temp
+        rename_map = dict()
+        rename_map["first_reg"] = "first_registration_date"
+        rename_map["mileage_km"] = "mileage"
+        rename_map["power_hp"] = "power"
+        data.rename(columns=rename_map, inplace=True)
+        data["mileage"] = data["mileage"].astype(float)
+        data["price"] = data["price"].astype(float)
+        temp = data["power"].copy()
+        temp = temp.astype(str)
+        temp[~temp.str.isnumeric()] = np.nan
+        temp = temp.astype(float)
+        data["power"] = temp
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        mask = data[target_name] > 100
+        data = data[mask]
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.emath.logn(10, data[target_name])
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+    elif data_name == "used_cars_dot_com":
+        # basic info
+        target_name = "price"
+        entity_name = "model"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.rename(columns={"milage": "mileage"}, inplace=True)
+        data["model_year"] = data["model_year"].astype(str)
+        temp = data["mileage"].copy()
+        temp = temp.str.replace(" mi.", "", regex=False).str.replace(
+            ",", "", regex=False
+        )
+        temp = temp.astype(float)
+        data["mileage"] = temp
+        temp = data["price"].copy()
+        temp = temp.str.replace("$", "", regex=False).str.replace(",", "", regex=False)
+        temp = temp.astype(float)
+        data["price"] = temp
+        data["model"] = data["brand"] + " " + data["model"]
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.emath.logn(100, data[target_name])
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+    elif data_name == "used_cars_pakistan":
+        # basic info
+        target_name = "Price"
+        entity_name = "Model"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.rename(columns={"Make": "Brand"}, inplace=True)
+        data.rename(columns={"Make_Year": "Year"}, inplace=True)
+        data.rename(columns={"CC": "Engine_Capacity"}, inplace=True)
+        data["Year"] = data["Year"].astype(str)
+        data["Engine_Capacity"] = data["Engine_Capacity"].astype(float)
+        data["Mileage"] = data["Mileage"].astype(float)
+        data["Model"] = data["Brand"] + " " + data["Model"] + ", " + data["Version"]
+        drop_col = []
+        drop_col.append("Brand")
+        drop_col.append("Version")
+        data.drop(columns=drop_col, inplace=True)
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.emath.logn(100, data[target_name])
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+    elif data_name == "used_cars_saudi_arabia":
+        # basic info
+        target_name = "Price"
+        entity_name = "Model"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data["Year"] = data["Year"].astype(str)
+        data["Mileage"] = data["Mileage"].astype(float)
+        data["Negotiable"] = data["Negotiable"].astype(str)
+        data["Model"] = data["Make"] + " " + data["Type"]
+        drop_col = []
+        drop_col.append("Make")
+        drop_col.append("Type")
+        data.drop(columns=drop_col, inplace=True)
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        mask = data[target_name] < 10
+        data = data[~mask]
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.emath.logn(100, data[target_name])
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+    elif data_name == "videogame_sales":
+        # basic info
+        target_name = "Global_Sales"
+        entity_name = "Name"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.log10(data[target_name] * 1e6)
+        drop_col = [col for col in data.columns if "Sales" in col]
+        drop_col.remove(target_name)
+        drop_col.append("Rank")
+        data.drop(columns=drop_col, inplace=True)
+        data["Year"] = data["Year"].astype("str")
+        data["Year"] = data["Year"].str.split(".").str[0]
+        temp = data["Year"].copy()
+        temp[temp == "nan"] = np.nan
+        data["Year"] = temp
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        data.drop_duplicates(subset=["Name", "Year", "Global_Sales"], inplace=True)
+        data.reset_index(drop=True, inplace=True)
+    elif data_name == "whisky":
+        # basic info
+        target_name = "Meta_Critic"
+        entity_name = "Whisky"
+        task = "classification"
+        repeated = False
+        # preprocess
+        temp = data["Cost"]
+        map = dict()
+        map["$$$$$+"] = "over 300 CAD"
+        map["$$$$$"] = "between 125 and 300 CAD"
+        map["$$$$"] = "between 70 and 125 CAD"
+        map["$$$"] = "between 50 and 70 CAD"
+        map["$$"] = "between 30 and 50 CAD"
+        map["$"] = "less than 30 CAD"
+        data["Cost"] = temp.map(map)
+        temp = data["Cluster"]
+        map = dict()
+        map["A"] = "Full-bodied, sweet, pronounced sherry, fruity, honey, spicy"
+        map["B"] = "Full-bodied, sweet, pronounced sherry, fruity, floral, malty"
+        map["C"] = "Full-bodied, sweet, pronounced sherry, fruity, floral, nutty, spicy"
+        map["E"] = "Medium-bodied, medium-sweet, fruity, honey, malty, winey"
+        map["F"] = "Full-bodied, sweet, malty, fruity, spicy, smoky"
+        map["G"] = "Light-bodied, sweet, apéritif-style, honey, floral, fruity, spicy"
+        map["H"] = "Very light-bodied, sweet, apéritif-style, malty, fruity, floral"
+        map["I"] = "Medium-bodied, medium-sweet, smoky, medicinal, spicy, fruity, nutty"
+        map["J"] = "Full-bodied, dry, very smoky, pungent"
+        map["R0"] = "No Rye whisky"
+        map["R1"] = "Low Rye whisky"
+        map["R2"] = "Standard Rye whisky"
+        map["R3"] = "High Rye whisky"
+        map["R4"] = "Strong Rye whisky"
+        data["Cluster"] = temp.map(map)
+        data.drop(columns=["STDEV", "#", "Super_Cluster"], inplace=True)
+        data.fillna(value=np.nan, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        temp = data["Meta_Critic"].copy()
+        temp[temp <= 8.6] = 0
+        temp[temp != 0] = 1
+        data["Meta_Critic"] = temp
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+    elif data_name == "wikiliq_beer":
+        # basic info
+        target_name = "Price"
+        entity_name = "Name"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.replace("None", np.nan, inplace=True)
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = data[target_name].str.replace("$", "", regex=False)
+        data[target_name] = data[target_name].astype(float)
+        mask = data[target_name].copy() == 0
+        data = data[~mask]
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.log(data[target_name])
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        drop_col = []
+        drop_col.append("Unnamed:_0")
+        drop_col.append("Rating")
+        data.drop(columns=drop_col, inplace=True)
+        data["ABV"] = data["ABV"].str[:-1]
+        data["ABV"] = data["ABV"].astype(float)
+        data["Rate_Count"] = data["Rate_Count"].astype(float)
+    elif data_name == "wikiliq_spirit":
+        # basic info
+        target_name = "Price"
+        entity_name = "Name"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = data[target_name].str.replace("$", "", regex=False)
+        data[target_name] = data[target_name].astype(float)
+        mask = data[target_name].copy() == 0
+        data = data[~mask]
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.log(data[target_name])
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        drop_col = []
+        drop_col.append("Unnamed:_0")
+        drop_col.append("Rating")
+        data.drop(columns=drop_col, inplace=True)
+        data["ABV"] = data["ABV"].str[:-1]
+        data["ABV"] = data["ABV"].astype(float)
+        data["Rate_Count"] = data["Rate_Count"].astype(float)
+        data.replace("®", "", regex=True, inplace=True)
+        data.replace("™", "", regex=True, inplace=True)
+    elif data_name == "wina_pl":
+        # basic info
+        target_name = "price"
+        entity_name = "name"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.log10(data[target_name])
+        data["vegan"] = data["vegan"].astype(str)
+        data["natural"] = data["natural"].astype(str)
+        data["vintage"] = data["vintage"].astype(str)
+        data["vintage"] = data["vintage"].str[:4]
+        temp = data["vintage"].copy()
+        temp[temp == "nan"] = np.nan
+        data["vintage"] = temp
+        data["volume"] = data["volume"] * 1000
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+    elif data_name == "wine_dot_com_prices":
+        # basic info
+        target_name = "Prices"
+        entity_name = "Names"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        mask = data[target_name] == 0
+        data = data[~mask]
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.log(data[target_name])
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        temp = data["Names"].copy()
+        data["Year"] = temp.str[-4:]
+        temp = data["Countrys"].copy()
+        data["Grapes"] = temp.str.split("from").str[0]
+        data["Region"] = temp.str.split("from").str[-1]
+        temp = data["Capacity"].copy()
+        temp = temp.str.replace("ml", "", regex=False)
+        temp = temp.astype("float")
+        data["Capacity"] = temp
+        drop_col = []
+        drop_col.append("Countrys")
+        data.drop(columns=drop_col, inplace=True)
+    elif data_name == "wine_dot_com_ratings":
+        # basic info
+        target_name = "Ratings"
+        entity_name = "Names"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        mask = data[target_name] == 0
+        data = data[~mask]
+        data.reset_index(drop=True, inplace=True)
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        temp = data["Names"].copy()
+        data["Year"] = temp.str[-4:]
+        temp = data["Countrys"].copy()
+        data["Grapes"] = temp.str.split("from").str[0]
+        data["Region"] = temp.str.split("from").str[-1]
+        temp = data["Capacity"].copy()
+        temp = temp.str.replace("ml", "", regex=False)
+        temp = temp.astype("float")
+        data["Capacity"] = temp
+        drop_col = []
+        drop_col.append("Countrys")
+        data.drop(columns=drop_col, inplace=True)
+    elif data_name == "wine_enthusiasts_prices":
+        # basic info
+        target_name = "price"
+        entity_name = "title"
+        task = "regression"
+        repeated = False
+        # preprocess
+        drop_col = []
+        drop_col.append("Unnamed:_0")
+        drop_col.append("region_1")
+        drop_col.append("region_2")
+        drop_col.append("taster_twitter_handle")
+        data.drop(columns=drop_col, inplace=True)
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.log(data[target_name])
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+    elif data_name == "wine_enthusiasts_ratings":
+        # basic info
+        target_name = "points"
+        entity_name = "title"
+        task = "regression"
+        repeated = False
+        # preprocess
+        drop_col = []
+        drop_col.append("Unnamed:_0")
+        drop_col.append("region_1")
+        drop_col.append("region_2")
+        drop_col.append("taster_twitter_handle")
+        data.drop(columns=drop_col, inplace=True)
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.log(data[target_name])
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+    elif data_name == "wine_vivino_price":
+        # basic info
+        target_name = "Price"
+        entity_name = "Name"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        mask = data[target_name] == 0
+        data = data[~mask]
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = np.log(data[target_name])
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        data["Number_Of_Ratings"] = data["Number_Of_Ratings"].astype(float)
+        data["Region"] = data["Region"] + ", " + data["Country"]
+        drop_col = []
+        drop_col.append("Country")
+        data.drop(columns=drop_col, inplace=True)
+    elif data_name == "wine_vivino_rating":
+        # basic info
+        target_name = "Rating"
+        entity_name = "Name"
+        task = "regression"
+        repeated = False
+        # preprocess
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        data["Number_Of_Ratings"] = data["Number_Of_Ratings"].astype(float)
+    elif data_name == "yelp":
+        # Exception with different file_type
+        data = _load_raw_data(data_name, file_type="json")
+        # basic info
+        target_name = "stars"
+        entity_name = "name"
+        task = "classification"
+        repeated = False
+        # preprocess
+        temp = data["categories"].copy()
+        mask = temp.str.contains("Restaurants") | temp.str.contains("Food")
+        data = data[mask].copy()
+        data.reset_index(drop=True, inplace=True)
+        data.dropna(subset=[target_name], inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        temp = data["stars"].copy()
+        temp[temp <= 3.5] = 0
+        temp[temp != 0] = 1
+        data["stars"] = temp
+        temp = data["attributes"].copy()
+        temp = temp.to_list()
+        temp1 = [{} if x is None else x for x in temp]
+        attributes_df = pd.DataFrame(temp1)
+        attribute_extract_cols = []
+        attribute_extract_cols.append(("RestaurantsPriceRange2", "price_range"))
+        for col in attribute_extract_cols:
+            data[col[1]] = attributes_df[col[0]]
+            temp = data[col[1]].copy()
+            temp[temp.isnull()] = np.nan
+            temp[temp == "None"] = np.nan
+            data[col[1]] = temp
+        temp = data["hours"].copy()
+        temp = temp.astype("str")
+        temp = temp.str.extractall(r"([A-Z]+)")
+        temp = temp.groupby(level=0).sum()[0]
+        temp = temp.str.replace("N", "")
+        temp = temp.str.len()
+        temp = temp.astype("float")
+        temp[temp == 0] = np.nan
+        data["number_of_days_open"] = temp
+        temp = data["is_open"].copy()
+        temp[temp == 1] = "open"
+        temp[temp == 0] = "closed"
+        temp = temp.astype("str")
+        data["is_open"] = temp
+        data["review_count"] = data["review_count"].astype("float")
+        data.drop(columns="hours", inplace=True)
+        data.drop(columns="attributes", inplace=True)
+        data.drop(columns="business_id", inplace=True)
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+    elif data_name == "zomato":
+        # basic info
+        target_name = "rating"
+        entity_name = "name"
+        task = "classification"
+        repeated = False
+        # preprocess
+        data[target_name].replace("--", np.nan, inplace=True)
+        data.dropna(subset=target_name, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        data[target_name] = data[target_name].astype("float")
+        temp = data[target_name].copy()
+        temp[temp < 4] = 0
+        temp[temp != 0] = 1
+        data[target_name] = temp
+        data = _drop_high_null(data)
+        data = _drop_single_unique(data)
+        data["cost"] = data["cost"].str[1:]
+        data["cost"] = data["cost"].astype("float")
+        drop_col = []
+        drop_col.append("Unnamed:_0")
+        drop_col.append("id")
+        drop_col.append("menu")
+        data.drop(columns=drop_col, inplace=True)
+
+    # Save data
+    _save_processed_data(data_name, data, target_name, entity_name, task, repeated)
+
+    return None
+
+
+# Main
+def main(data_name_list):
+
+    if "all" in data_name_list:
+        data_name_list = carte_datalist
+    else:
+        if isinstance(data_name_list, list) == False:
+            data_name_list = [data_name_list]
+
+    for data_name in data_name_list:
+        preprocess_data(data_name)
+        print(f"{data_name} complete!")
+
+    return None
+
+
+if __name__ == "__main__":
+
+    # Set parser
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Preprocess raw data.")
+    parser.add_argument(
+        "-dt",
+        "--data_name_list",
+        nargs="+",
+        type=str,
+        help="data_name to preprocess",
+    )
+    args = parser.parse_args()
+
+    main(args.data_name_list)
diff --git a/carte/src/__init__.py b/carte/src/__init__.py
new file mode 100644
index 0000000..fcda92d
--- /dev/null
+++ b/carte/src/__init__.py
@@ -0,0 +1,9 @@
+from carte.src.baseline_multitable import *
+from carte.src.baseline_singletable_nn import *
+from carte.src.carte_estimator import *
+from carte.src.carte_model import *
+from carte.src.carte_gridsearch import *
+from carte.src.carte_table_to_graph import *
+from carte.src.evaluate_utils import *
+from carte.src.visualization_utils import *
+from carte.src.preprocess_utils import *
diff --git a/carte/src/baseline_multitable.py b/carte/src/baseline_multitable.py
new file mode 100644
index 0000000..c03271b
--- /dev/null
+++ b/carte/src/baseline_multitable.py
@@ -0,0 +1,634 @@
+"""Baselines for multitable problem."""
+
+import pandas as pd
+import numpy as np
+
+from typing import Union
+from sklearn.model_selection import train_test_split
+from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin
+from sklearn.utils.validation import check_is_fitted, check_random_state
+from sklearn.metrics import r2_score, roc_auc_score
+from joblib import Parallel, delayed
+
+from catboost import CatBoostRegressor, CatBoostClassifier
+from xgboost import XGBRegressor, XGBClassifier
+from sklearn.ensemble import (
+    HistGradientBoostingRegressor,
+    HistGradientBoostingClassifier,
+)
+
+class GradientBoostingMultitableBase(BaseEstimator):
+    """Base class for Gradient Boosting Multitable Estimator."""
+
+    def __init__(
+        self,
+        *,
+        source_data,
+        source_fraction,
+        num_model,
+        val_size,
+        random_state,
+        n_jobs,
+    ):
+        self.source_data = source_data
+        self.source_fraction = source_fraction
+        self.num_model = num_model
+        self.val_size = val_size
+        self.random_state = random_state
+        self.n_jobs = n_jobs
+
+    def fit(self, X, y):
+        """Fit the model.
+
+        Parameters
+        ----------
+        X : Pandas dataframe of the target dataset (n_samples)
+            The input samples.
+
+        y : array-like of shape (n_samples,)
+            Target values.
+
+        Returns
+        -------
+        self : object
+               Fitted estimator.
+        """
+
+        # Preliminary settings
+        self.is_fitted_ = False
+        self.X_ = X
+        self.y_ = y
+        self._set_gb_method()
+
+        # Set random_state
+        random_state = check_random_state(self.random_state)
+        random_state_list = [random_state.randint(10000) for _ in range(self.num_model)]
+
+        # Run parallel for different train/validation split
+        result_fit = Parallel(n_jobs=self.n_jobs)(
+            delayed(self._run_fit_with_source_split)(X, y, rs)
+            for rs in random_state_list
+        )
+
+        # Store the required results that may be used later
+        self.estimator_list_ = [model for (model, _) in result_fit]
+        self.valid_loss_ = [valid_loss for (_, valid_loss) in result_fit]
+
+        self.is_fitted_ = True
+
+        return self
+    
+
+    def _run_fit_with_source_split(self, X, y, random_state):
+        """Train each model corresponding to the random_state with the split on Source and train/validtion on Target.
+
+        Returns the trained estimator, and the validation loss of the train model.
+        """
+
+        # Set validation by val_size
+        stratify = None
+        if self._estimator_type == "classifier":
+            stratify = self.y_
+        dx_train, dx_valid, dy_train, dy_valid = train_test_split(
+            X,
+            y,
+            test_size=self.val_size,
+            shuffle=True,
+            stratify=stratify,
+            random_state=random_state,
+        )
+
+        # Set source data
+        X_train_source, y_train_source = self._load_source_data(random_state)
+
+        # Total dataset
+        X_train = pd.concat([dx_train, X_train_source], axis=0)
+        y_train = pd.concat([dy_train, y_train_source], axis=0)
+
+        # Set estimator, run fit/predict to obtain validation loss
+        estimator = self._set_estimator()
+        estimator.fit(X_train, y_train)
+        y_pred = self._generate_output(estimator, dx_valid)
+        valid_loss = self._return_score(dy_valid, y_pred)
+
+        return estimator, valid_loss
+
+    def _load_source_data(self, random_state):
+        """Loads the Source data and extract based on the defined fraction of Source.
+
+        Applies stratification on the Source data based on their sizes.
+        The max. size of the source data is set at 10,000 to prevent overfitting on the Source.
+        """
+        # Set train_size (max = 10000)
+        if len(self.source_data["X"]) > 10000:
+            train_size = 10000 / len(self.source_data["X"]) * self.source_fraction
+        else:
+            train_size = self.source_fraction
+        # Set split for source data
+        if self._estimator_type == "regressor":
+            stratify = self.source_data["domain_indicator"]
+        if self._estimator_type == "classifier":
+            y_source_temp = self.source_data["y"].copy()
+            y_source_temp = y_source_temp.astype(str)
+            stratify = self.source_data["domain_indicator"] + "_" + y_source_temp
+        X_train_source, _, y_train_source, _ = train_test_split(
+            self.source_data["X"],
+            self.source_data["y"],
+            train_size=train_size,
+            random_state=random_state,
+            shuffle=True,
+            stratify=stratify,
+        )
+        return X_train_source, y_train_source
+
+    def _generate_output(self, estimator, X):
+        """Generate output on the given estimator and X."""
+
+        # Predict
+        if self._estimator_type == "regressor":
+            y_pred = estimator.predict(X)
+        else:
+            y_pred = estimator.predict_proba(X)
+        # Reshape prediction
+        if self._estimator_type == "classifier":
+            num_pred = len(y_pred)
+            if y_pred.shape == (num_pred, 2):
+                y_pred = y_pred[:, 1]
+            elif y_pred.shape == (num_pred, 1):
+                y_pred = y_pred.ravel()
+            else:
+                pass
+        # Control for nan in prediction
+        if np.isnan(y_pred).sum() > 0:
+            mean_pred = np.mean(self.y_)
+            y_pred[np.isnan(y_pred)] = mean_pred
+        return y_pred
+
+    def _return_score(self, y, y_pred):
+        """Return the score based on the task."""
+        if self._estimator_type == "regressor":
+            score = r2_score(y, y_pred)
+        else:
+            score = roc_auc_score(y, y_pred)
+        return score
+
+    def _set_estimator(self):
+        """Set the estimator according to the model of Gradient-Boosted Trees."""
+
+        fixed_params = dict()
+        if self.gb_method_ == "catboost":
+            fixed_params["cat_features"] = self.cat_features_
+            fixed_params["verbose"] = False
+            fixed_params["allow_writing_files"] = False
+            fixed_params["thread_count"] = self.thread_count
+            fixed_params["max_ctr_complexity"] = 2
+            catboost_params = dict()
+            catboost_params["max_depth"] = self.max_depth
+            catboost_params["learning_rate"] = self.learning_rate
+            catboost_params["bagging_temperature"] = self.bagging_temperature
+            catboost_params["l2_leaf_reg"] = self.l2_leaf_reg
+            catboost_params["one_hot_max_size"] = self.one_hot_max_size
+            catboost_params["iterations"] = self.iterations
+            if self._estimator_type == "regressor":
+                estimator_ = CatBoostRegressor(**fixed_params, **catboost_params)
+            else:
+                estimator_ = CatBoostClassifier(**fixed_params, **catboost_params)
+        elif self.gb_method_ == "xgboost":
+            fixed_params["booster"] = "gbtree"
+            fixed_params["tree_method"] = "exact"  # exact approx hist
+            xgb_params = dict()
+            xgb_params["n_estimators"] = self.n_estimators
+            xgb_params["max_depth"] = self.max_depth
+            xgb_params["min_child_weight"] = self.min_child_weight
+            xgb_params["subsample"] = self.subsample
+            xgb_params["learning_rate"] = self.learning_rate
+            xgb_params["colsample_bylevel"] = self.colsample_bylevel
+            xgb_params["colsample_bytree"] = self.colsample_bytree
+            xgb_params["gamma"] = self.reg_gamma
+            xgb_params["lambda"] = self.reg_lambda
+            xgb_params["alpha"] = self.reg_alpha
+            if self._estimator_type == "regressor":
+                estimator_ = XGBRegressor(**fixed_params, **xgb_params)
+            else:
+                estimator_ = XGBClassifier(**fixed_params, **xgb_params)
+        elif self.gb_method_ == "histgb":
+            histgb_params = dict()
+            histgb_params["learning_rate"] = self.learning_rate
+            histgb_params["max_depth"] = self.max_depth
+            histgb_params["max_leaf_nodes"] = self.max_leaf_nodes
+            histgb_params["min_samples_leaf"] = self.min_samples_leaf
+            histgb_params["l2_regularization"] = self.l2_regularization
+            if self._estimator_type == "regressor":
+                estimator_ = HistGradientBoostingRegressor(**fixed_params, **histgb_params)
+            else:
+                estimator_ = HistGradientBoostingClassifier(**fixed_params, **histgb_params)
+        return estimator_
+
+    def _set_gb_method(self,):
+        self.gb_method_ = None
+        return None
+
+class GradientBoostingRegressorBase(RegressorMixin, GradientBoostingMultitableBase):
+    """Base class for Gradient Boosting Multitable Regressor."""
+
+    def __init__(
+        self,
+        *,
+        source_data,
+        source_fraction,
+        num_model,
+        val_size,
+        random_state,
+        n_jobs,
+    ):
+        super(GradientBoostingRegressorBase, self).__init__(
+            source_data=source_data,
+            source_fraction = source_fraction,
+            num_model = num_model,
+            val_size = val_size,
+            random_state = random_state,
+            n_jobs = n_jobs,
+        )
+
+    def predict(self, X):
+        """Predict values for X. Returns the average of predicted values over all the models.
+
+        Parameters
+        ----------
+        X : list of graph objects with size (n_samples)
+            The input samples.
+
+        Returns
+        -------
+        y : ndarray, shape (n_samples,)
+            The predicted values.
+        """
+        check_is_fitted(self, "is_fitted_")
+        # Obtain output
+        X_test = X.copy()
+        out = [estimator.predict(X_test) for estimator in self.estimator_list_]
+        if self.num_model == 1:
+            out = np.array(out).squeeze().transpose()
+        else:
+            out = np.array(out).squeeze().transpose()
+            out = np.mean(out, axis=1)
+        # Control for nan in prediction
+        if np.isnan(out).sum() > 0:
+            mean_pred = np.mean(self.y_)
+            out[np.isnan(out)] = mean_pred
+        return out
+
+
+class GradientBoostingClassifierBase(ClassifierMixin, GradientBoostingMultitableBase):
+    """Base class for Gradient Boosting Multitable Classifier."""
+
+    def __init__(
+        self,
+        *,
+        source_data,
+        source_fraction,
+        num_model,
+        val_size,
+        random_state,
+        n_jobs,
+    ):
+        super(GradientBoostingClassifierBase, self).__init__(
+            source_data=source_data,
+            source_fraction = source_fraction,
+            num_model = num_model,
+            val_size = val_size,
+            random_state = random_state,
+            n_jobs = n_jobs,
+        )
+
+    def predict(self, X):
+        """Predict classes for X.
+
+        Parameters
+        ----------
+        X : list of graph objects with size (n_samples)
+            The input samples.
+
+        Returns
+        -------
+        y : ndarray, shape (n_samples,)
+            The predicted classes.
+        """
+        check_is_fitted(self, "is_fitted_")
+        return np.round(self.predict_proba(X))
+
+    def predict_proba(self, X):
+        """Predict class probabilities for X.
+
+        Parameters
+        ----------
+        X : list of graph objects with size (n_samples)
+            The input samples.
+
+        Returns
+        -------
+        p : ndarray, shape (n_samples,) for binary classification or (n_samples, n_classes)
+            The class probabilities of the input samples.
+        """
+
+        check_is_fitted(self, "is_fitted_")
+        # Obtain output
+        out = [estimator.predict_proba(X)[:, 1] for estimator in self.estimator_list_]
+        if self.num_model == 1:
+            out = np.array(out).transpose()
+        else:
+            out = np.array(out).squeeze().transpose()
+            out = np.mean(out, axis=1)
+        # Control for nan in prediction
+        if np.isnan(out).sum() > 0:
+            mean_pred = np.mean(self.y_)
+            out[np.isnan(out)] = mean_pred
+        return out
+
+    def decision_function(self, X):
+        """Compute the decision function of X."""
+        decision = self.predict_proba(X)
+        return decision
+
+
+class CatBoostMultitableRegressor(GradientBoostingRegressorBase):
+    """Base class for CatBoost Multitable Regressor."""
+
+    def __init__(
+        self,
+        *,
+        source_data: dict = {},
+        max_depth: int = 6,
+        learning_rate: float = 0.03,
+        bagging_temperature: float = 1,
+        l2_leaf_reg: float = 3.0,
+        one_hot_max_size: int = 2,
+        iterations: int = 1000,
+        thread_count: int = 1,
+        source_fraction: float = 0.5,
+        num_model: int = 1,
+        val_size: float = 0.1,
+        random_state: int = 0,
+        n_jobs: int = 1,
+    ):
+        super(CatBoostMultitableRegressor, self).__init__(
+            source_data=source_data,
+            source_fraction = source_fraction,
+            num_model = num_model,
+            val_size = val_size,
+            random_state = random_state,
+            n_jobs = n_jobs,
+        )
+
+        self.max_depth = max_depth
+        self.learning_rate = learning_rate
+        self.bagging_temperature = bagging_temperature
+        self.l2_leaf_reg = l2_leaf_reg
+        self.one_hot_max_size = one_hot_max_size
+        self.iterations = iterations
+        self.thread_count = thread_count
+
+    def _set_gb_method(self,):
+        """Set the Gradient-Boosting method.
+        
+        For CatBoost, it sets the required indicators of categorical columns.
+        """
+        self.gb_method_ = "catboost"
+        # Set column names
+        X_total_train = pd.concat([self.X_, self.source_data["X"]], axis=0)
+        self.cat_col_names_ = X_total_train.select_dtypes(
+            include="object"
+        ).columns.tolist()
+        self.cat_features_ = [
+            X_total_train.columns.get_loc(col) for col in self.cat_col_names_
+        ]
+        return None
+
+
+class CatBoostMultitableClassifier(GradientBoostingClassifierBase):
+    """Base class for CatBoost Multitable Classifier."""
+
+    def __init__(
+        self,
+        *,
+        source_data: dict = {},
+        max_depth: int = 6,
+        learning_rate: float = 0.03,
+        bagging_temperature: float = 1,
+        l2_leaf_reg: float = 3.0,
+        one_hot_max_size: int = 2,
+        iterations: int = 1000,
+        thread_count: int = 1,
+        source_fraction: float = 0.5,
+        num_model: int = 1,
+        val_size: float = 0.1,
+        random_state: int = 0,
+        n_jobs: int = 1,
+    ):
+        super(CatBoostMultitableClassifier, self).__init__(
+            source_data=source_data,
+            source_fraction = source_fraction,
+            num_model = num_model,
+            val_size = val_size,
+            random_state = random_state,
+            n_jobs = n_jobs,
+        )
+
+        self.max_depth = max_depth
+        self.learning_rate = learning_rate
+        self.bagging_temperature = bagging_temperature
+        self.l2_leaf_reg = l2_leaf_reg
+        self.one_hot_max_size = one_hot_max_size
+        self.iterations = iterations
+        self.thread_count = thread_count
+
+    def _set_gb_method(self,):
+        """Set the Gradient-Boosting method.
+        
+        For CatBoost, it sets the required indicators of categorical columns.
+        """
+        self.gb_method_ = "catboost"
+        # Set column names
+        X_total_train = pd.concat([self.X_, self.source_data["X"]], axis=0)
+        self.cat_col_names_ = X_total_train.select_dtypes(
+            include="object"
+        ).columns.tolist()
+        self.cat_features_ = [
+            X_total_train.columns.get_loc(col) for col in self.cat_col_names_
+        ]
+        return None
+
+class HistGBMultitableRegressor(GradientBoostingRegressorBase):
+    """Base class for Historgram Gradient Boosting Multitable Regressor."""
+
+    def __init__(
+        self,
+        *,
+        source_data: dict = {},
+        learning_rate: float = 0.1,
+        max_depth: Union[None, int] = None,
+        max_leaf_nodes: int = 31,
+        min_samples_leaf: int = 20,
+        l2_regularization: float = 0,
+        source_fraction: float = 0.5,
+        num_model: int = 1,
+        val_size: float = 0.1,
+        random_state: int = 0,
+        n_jobs: int = 1,
+    ):
+        super(HistGBMultitableRegressor, self).__init__(
+            source_data=source_data,
+            source_fraction = source_fraction,
+            num_model = num_model,
+            val_size = val_size,
+            random_state = random_state,
+            n_jobs = n_jobs,
+        )
+
+        self.learning_rate = learning_rate
+        self.max_depth = max_depth
+        self.max_leaf_nodes = max_leaf_nodes
+        self.min_samples_leaf = min_samples_leaf
+        self.l2_regularization = l2_regularization
+
+    def _set_gb_method(self,):
+        """Set the Gradient-Boosting method."""
+        self.gb_method_ = "histgb"
+        return None
+    
+
+class HistGBMultitableClassifier(GradientBoostingClassifierBase):
+    """Base class for Historgram Gradient Boosting Multitable Classifier."""
+
+    def __init__(
+        self,
+        *,
+        source_data: dict = {},
+        learning_rate: float = 0.1,
+        max_depth: Union[None, int] = None,
+        max_leaf_nodes: int = 31,
+        min_samples_leaf: int = 20,
+        l2_regularization: float = 0,
+        source_fraction: float = 0.5,
+        num_model: int = 1,
+        val_size: float = 0.1,
+        random_state: int = 0,
+        n_jobs: int = 1,
+    ):
+        super(HistGBMultitableClassifier, self).__init__(
+            source_data=source_data,
+            source_fraction = source_fraction,
+            num_model = num_model,
+            val_size = val_size,
+            random_state = random_state,
+            n_jobs = n_jobs,
+        )
+
+        self.learning_rate = learning_rate
+        self.max_depth = max_depth
+        self.max_leaf_nodes = max_leaf_nodes
+        self.min_samples_leaf = min_samples_leaf
+        self.l2_regularization = l2_regularization
+
+    def _set_gb_method(self,):
+        """Set the Gradient-Boosting method."""
+        self.gb_method_ = "histgb"
+        return None
+
+
+class XGBoostMultitableRegressor(GradientBoostingRegressorBase):
+    """Base class for XGBoost Multitable Regressor."""
+
+    def __init__(
+        self,
+        *,
+        source_data: dict = {},
+        n_estimators: int = 100,
+        max_depth: int = 6,
+        min_child_weight: float = 1,
+        subsample: float = 1,
+        learning_rate: float = 0.3,
+        colsample_bylevel: float = 1,
+        colsample_bytree: float = 1,
+        reg_gamma: float = 0,
+        reg_lambda: float = 1,
+        reg_alpha: float = 0,
+        source_fraction: float = 0.5,
+        num_model: int = 1,
+        val_size: float = 0.1,
+        random_state: int = 0,
+        n_jobs: int = 1,
+    ):
+        super(XGBoostMultitableRegressor, self).__init__(
+            source_data=source_data,
+            source_fraction = source_fraction,
+            num_model = num_model,
+            val_size = val_size,
+            random_state = random_state,
+            n_jobs = n_jobs,
+        )
+
+        self.n_estimators = n_estimators
+        self.max_depth = max_depth
+        self.min_child_weight = min_child_weight
+        self.subsample = subsample
+        self.learning_rate = learning_rate
+        self.colsample_bylevel = colsample_bylevel
+        self.colsample_bytree = colsample_bytree
+        self.reg_gamma = reg_gamma
+        self.reg_lambda = reg_lambda
+        self.reg_alpha = reg_alpha
+
+    def _set_gb_method(self,):
+        """Set the Gradient-Boosting method."""
+        self.gb_method_ = "xgboost"
+        return None
+
+
+class XGBoostMultitableClassifier(GradientBoostingClassifierBase):
+    """Base class for XGBoost Multitable Classifier."""
+
+    def __init__(
+        self,
+        *,
+        source_data: dict = {},
+        n_estimators: int = 100,
+        max_depth: int = 6,
+        min_child_weight: float = 1,
+        subsample: float = 1,
+        learning_rate: float = 0.3,
+        colsample_bylevel: float = 1,
+        colsample_bytree: float = 1,
+        reg_gamma: float = 0,
+        reg_lambda: float = 1,
+        reg_alpha: float = 0,
+        source_fraction: float = 0.5,
+        num_model: int = 1,
+        val_size: float = 0.1,
+        random_state: int = 0,
+        n_jobs: int = 1,
+    ):
+        super(XGBoostMultitableClassifier, self).__init__(
+            source_data=source_data,
+            source_fraction = source_fraction,
+            num_model = num_model,
+            val_size = val_size,
+            random_state = random_state,
+            n_jobs = n_jobs,
+        )
+
+        self.n_estimators = n_estimators
+        self.max_depth = max_depth
+        self.min_child_weight = min_child_weight
+        self.subsample = subsample
+        self.learning_rate = learning_rate
+        self.colsample_bylevel = colsample_bylevel
+        self.colsample_bytree = colsample_bytree
+        self.reg_gamma = reg_gamma
+        self.reg_lambda = reg_lambda
+        self.reg_alpha = reg_alpha
+
+    def _set_gb_method(self,):
+        """Set the Gradient-Boosting method."""
+        self.gb_method_ = "xgboost"
+        return None
\ No newline at end of file
diff --git a/carte/src/baseline_singletable_nn.py b/carte/src/baseline_singletable_nn.py
new file mode 100644
index 0000000..3e7e920
--- /dev/null
+++ b/carte/src/baseline_singletable_nn.py
@@ -0,0 +1,775 @@
+"""Neural network baseline for comparison."""
+
+import torch
+import torch.nn as nn
+import numpy as np
+import copy
+from typing import Union
+from torch import Tensor
+from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin
+from sklearn.model_selection import train_test_split
+from sklearn.utils.validation import check_is_fitted, check_random_state
+from torch.utils.data import Dataset, DataLoader
+from tqdm import tqdm
+from joblib import Parallel, delayed
+
+
+## Simple MLP model
+class MLP_Model(nn.Module):
+    def __init__(
+        self,
+        input_dim: int,
+        hidden_dim: int,
+        output_dim: int,
+        dropout_prob: float,
+        num_layers: int,
+    ):
+        super().__init__()
+
+        self.initial = nn.Linear(input_dim, hidden_dim)
+
+        self.mlp_block = nn.Sequential(
+            nn.Linear(hidden_dim, hidden_dim),
+            nn.LayerNorm(hidden_dim),
+            nn.ReLU(),
+            nn.Dropout(dropout_prob),
+        )
+        self.layers = nn.Sequential(*[self.mlp_block for _ in range(num_layers)])
+
+        self.classifier = nn.Linear(hidden_dim, output_dim)
+
+    def forward(self, X):
+        X = self.initial(X)
+        X = self.layers(X)
+        X = self.classifier(X)
+        return X
+
+
+## Residual Block
+class Residual_Block(nn.Module):
+    def __init__(
+        self,
+        input_dim: int,
+        output_dim: int,
+        hidden_factor: int,
+        normalization: Union[str, None] = "layernorm",
+        hidden_dropout_prob: float = 0.2,
+        residual_dropout_prob: float = 0.2,
+    ):
+        super().__init__()
+
+        self.lin1 = nn.Linear(input_dim, output_dim * hidden_factor)
+        self.lin2 = nn.Linear(output_dim * hidden_factor, output_dim)
+        self.relu = nn.ReLU()
+        self.dropout_hidden = nn.Dropout(hidden_dropout_prob)
+        self.dropout_residual = nn.Dropout(residual_dropout_prob)
+
+        self.norm1: Union[nn.BatchNorm1d, nn.LayerNorm, None]
+        self.norm2: Union[nn.BatchNorm1d, nn.LayerNorm, None]
+        if normalization == "batchnorm":
+            self.norm1 = nn.BatchNorm1d(output_dim * hidden_factor)
+            self.norm2 = nn.BatchNorm1d(output_dim)
+        elif normalization == "layernorm":
+            self.norm1 = nn.LayerNorm(output_dim * hidden_factor)
+            self.norm2 = nn.LayerNorm(output_dim)
+        else:
+            self.norm1 = self.norm2 = None
+
+    def reset_parameters(self) -> None:
+        self.lin1.reset_parameters()
+        self.lin2.reset_parameters()
+        if self.norm1 is not None:
+            self.norm1.reset_parameters()
+        if self.norm2 is not None:
+            self.norm2.reset_parameters()
+
+    def forward(self, x: Tensor):
+        out = self.lin1(x)
+        out = self.norm1(out) if self.norm1 else out
+        out = self.relu(out)
+        out = self.dropout_hidden(out)
+
+        out = self.lin2(out)
+        out = self.norm2(out) if self.norm2 else out
+        out = self.relu(out)
+        out = self.dropout_residual(out)
+
+        out = out + x
+        out = self.relu(out)
+
+        return out
+
+
+## Resnet model
+class RESNET_Model(nn.Module):
+    def __init__(
+        self,
+        input_dim: int,
+        hidden_dim: int,
+        output_dim: int,
+        num_layers: int,
+        **block_args
+    ):
+        super(RESNET_Model, self).__init__()
+
+        self.initial = nn.Linear(input_dim, hidden_dim)
+
+        self.layers = nn.ModuleList(
+            [
+                Residual_Block(
+                    input_dim=hidden_dim, output_dim=hidden_dim, **block_args
+                )
+                for _ in range(num_layers)
+            ]
+        )
+
+        self.classifer = nn.Linear(hidden_dim, output_dim)
+
+    def forward(self, X):
+        X = self.initial(X)
+
+        for l in self.layers:
+            X = l(X)
+
+        X = self.classifer(X)
+        return X
+
+
+class TabularDataset(Dataset):
+    def __init__(self, X, y):
+        self.X = X
+        self.y = y
+
+    def __len__(self):
+        return self.X.size(0)
+
+    def __getitem__(self, idx):
+        return self.X[idx], self.y[idx]
+
+
+class MLPBase(BaseEstimator):
+    """Base class for MLP."""
+
+    def __init__(
+        self,
+        *,
+        hidden_dim,
+        learning_rate,
+        weight_decay,
+        batch_size,
+        val_size,
+        num_model,
+        max_epoch,
+        early_stopping_patience,
+        n_jobs,
+        device,
+        random_state,
+        disable_pbar,
+    ):
+        self.hidden_dim = hidden_dim
+        self.learning_rate = learning_rate
+        self.weight_decay = weight_decay
+        self.batch_size = batch_size
+        self.val_size = val_size
+        self.num_model = num_model
+        self.max_epoch = max_epoch
+        self.early_stopping_patience = early_stopping_patience
+        self.n_jobs = n_jobs
+        self.device = device
+        self.random_state = random_state
+        self.disable_pbar = disable_pbar
+
+    def fit(self, X, y):
+        # Preliminary settings
+        self.is_fitted_ = False
+        self.device_ = torch.device(self.device)
+        self.X_ = X
+        self.y_ = y
+        self._set_task_specific_settings()
+
+        if isinstance(X, Tensor) == False:
+            X = torch.tensor(X, dtype=torch.float32)
+        if isinstance(y, Tensor) == False:
+            y = torch.tensor(y, dtype=torch.float32)
+
+        # Set random_state
+        random_state = check_random_state(self.random_state)
+        random_state_list = [random_state.randint(1000) for _ in range(self.num_model)]
+
+        # Fit model
+        result_fit = Parallel(n_jobs=self.n_jobs)(
+            delayed(self._run_train_with_early_stopping)(X, y, rs)
+            for rs in random_state_list
+        )
+
+        # Store the required results that may be used later
+        self.model_list_ = [model for (model, _, _) in result_fit]
+        self.valid_loss_ = [valid_loss for (_, valid_loss, _) in result_fit]
+        self.random_state_list_ = [rs for (_, _, rs) in result_fit]
+        self.is_fitted_ = True
+
+        return self
+
+    def _run_train_with_early_stopping(self, X, y, random_state):
+        """Train each model corresponding to the random_state with the early_stopping patience.
+
+        This mode of training sets train/valid set for the early stopping criterion.
+        Returns the trained model, train and validation loss at the best epoch, and the random_state.
+        """
+        # Set validation by val_size
+        stratify = None
+        if self.model_task_ == "classification":
+            stratify = self.y_
+        X_train, X_valid, y_train, y_valid = train_test_split(
+            X,
+            y,
+            test_size=self.val_size,
+            shuffle=True,
+            random_state=random_state,
+            stratify=stratify,
+        )
+
+        ds_train = TabularDataset(X_train, y_train)
+
+        # Load model and optimizer
+        input_dim = X.size(1)
+        model_run_train = self._load_model(input_dim)
+        model_run_train.to(self.device_)
+        optimizer = torch.optim.AdamW(
+            model_run_train.parameters(),
+            lr=self.learning_rate,
+            weight_decay=self.weight_decay,
+        )
+
+        # Train model
+        train_loader = DataLoader(ds_train, batch_size=self.batch_size, shuffle=False)
+        valid_loss_best = 9e15
+
+        es_counter = 0
+        model_best_ = copy.deepcopy(model_run_train)
+        for _ in tqdm(
+            range(1, self.max_epoch + 1),
+            desc=f"Model No. {random_state}",
+            disable=self.disable_pbar,
+        ):
+            self._run_epoch(model_run_train, optimizer, train_loader)
+            valid_loss = self._eval(model_run_train, X_valid, y_valid)
+            if valid_loss < valid_loss_best:
+                valid_loss_best = valid_loss
+                model_best_ = copy.deepcopy(model_run_train)
+                es_counter = 0
+            else:
+                es_counter += 1
+                if es_counter > self.early_stopping_patience:
+                    break
+        model_best_.eval()
+        return model_best_, valid_loss_best, random_state
+
+    def _run_epoch(self, model, optimizer, train_loader):
+        """Run an epoch of the input model.
+
+        With each epoch, it updates the model and the optimizer.
+        """
+        model.train()
+        for data_X, data_y in train_loader:
+            optimizer.zero_grad()  # Clear gradients.
+            data_X = data_X.to(self.device_)
+            data_y = data_y.to(self.device_)
+            out = model(data_X)  # Perform a single forward pass.
+            target = data_y
+            out = out.view(-1).to(torch.float64)
+            target = target.to(torch.float64)
+            loss = self.criterion_(out, target)  # Compute the loss.
+            loss.backward()  # Derive gradients.
+            optimizer.step()  # Update parameters based on gradients.
+
+    def _eval(self, model, X, y):
+        """Run an evaluation of the input data on the input model.
+
+        Returns the selected loss of the input data from the input model.
+        """
+        X = X.to(self.device_)
+        y = y.to(self.device_)
+        with torch.no_grad():
+            model.eval()
+            out = model(X)
+            target = y
+            out = out.view(-1).to(torch.float64)
+            target = target.to(torch.float64)
+            loss_eval = self.criterion_(out, target)
+            loss_eval = round(loss_eval.detach().item(), 4)
+        return loss_eval
+
+    def _set_task_specific_settings(self):
+        self.criterion_ = None
+        self.output_dim_ = None
+        self.model_task_ = None
+
+    def _load_model(self, input_dim):
+        return None
+
+
+class BaseMLPEstimator(MLPBase):
+    """Base class for MLP Estimator."""
+
+    def __init__(
+        self,
+        *,
+        hidden_dim: int = 256,
+        num_layers: int = 2,
+        dropout_prob: float = 0.2,
+        learning_rate: float = 1e-3,
+        weight_decay: float = 1e-2,
+        batch_size: int = 128,
+        val_size: float = 0.1,
+        num_model: int = 1,
+        max_epoch: int = 200,
+        early_stopping_patience: Union[None, int] = 10,
+        n_jobs: int = 1,
+        device: str = "cpu",
+        random_state: int = 0,
+        disable_pbar: bool = True,
+    ):
+        super(BaseMLPEstimator, self).__init__(
+            hidden_dim=hidden_dim,
+            learning_rate=learning_rate,
+            weight_decay=weight_decay,
+            batch_size=batch_size,
+            val_size=val_size,
+            num_model=num_model,
+            max_epoch=max_epoch,
+            early_stopping_patience=early_stopping_patience,
+            n_jobs=n_jobs,
+            device=device,
+            random_state=random_state,
+            disable_pbar=disable_pbar,
+        )
+
+        self.num_layers = num_layers
+        self.dropout_prob = dropout_prob
+
+    def _load_model(self, input_dim):
+        """Load the MLP model for training.
+
+        Returns the model that can be used for training.
+        """
+
+        # Set seed for torch - for reproducibility
+        random_state = check_random_state(self.random_state)
+        model_seed = random_state.randint(10000)
+        torch.manual_seed(model_seed)
+
+        model_config = dict()
+        model_config["input_dim"] = input_dim
+        model_config["hidden_dim"] = self.hidden_dim
+        model_config["output_dim"] = self.output_dim_
+        model_config["dropout_prob"] = self.dropout_prob
+        model_config["num_layers"] = self.num_layers
+        model = MLP_Model(**model_config)
+        return model
+
+
+class MLPRegressor(RegressorMixin, BaseMLPEstimator):
+    """ """
+
+    def __init__(
+        self,
+        *,
+        loss: str = "squared_error",
+        hidden_dim: int = 256,
+        num_layers: int = 2,
+        dropout_prob: float = 0.2,
+        learning_rate: float = 1e-3,
+        weight_decay: float = 1e-2,
+        batch_size: int = 128,
+        val_size: float = 0.1,
+        num_model: int = 1,
+        max_epoch: int = 200,
+        early_stopping_patience: Union[None, int] = 10,
+        n_jobs: int = 1,
+        device: str = "cpu",
+        random_state: int = 0,
+        disable_pbar: bool = True,
+    ):
+        super(MLPRegressor, self).__init__(
+            hidden_dim=hidden_dim,
+            num_layers=num_layers,
+            dropout_prob=dropout_prob,
+            learning_rate=learning_rate,
+            weight_decay=weight_decay,
+            batch_size=batch_size,
+            val_size=val_size,
+            num_model=num_model,
+            max_epoch=max_epoch,
+            early_stopping_patience=early_stopping_patience,
+            n_jobs=n_jobs,
+            device=device,
+            random_state=random_state,
+            disable_pbar=disable_pbar,
+        )
+
+        self.loss = loss
+
+    def predict(self, X):
+        check_is_fitted(self, "is_fitted_")
+        if isinstance(X, Tensor) == False:
+            X = torch.tensor(X, dtype=torch.float32)
+        X = X.to(self.device_)
+
+        # Obtain the predicitve output
+        with torch.no_grad():
+            out = [model(X).cpu().detach().numpy() for model in self.model_list_]
+
+        if self.num_model == 1:
+            out = np.array(out).squeeze().transpose()
+        else:
+            out = np.array(out).squeeze().transpose()
+            out = np.mean(out, axis=1)
+
+        if np.isnan(out).sum() > 0:
+            mean_pred = np.mean(self.y_)
+            out[np.isnan(out)] = mean_pred
+
+        return out
+
+    def _set_task_specific_settings(self):
+        if self.loss == "squared_error":
+            self.criterion_ = torch.nn.MSELoss()
+        elif self.loss == "absolute_error":
+            self.criterion_ = torch.nn.L1Loss()
+
+        self.output_dim_ = 1
+        self.model_task_ = "regression"
+
+
+class MLPClassifier(ClassifierMixin, BaseMLPEstimator):
+    """ """
+
+    def __init__(
+        self,
+        *,
+        loss: str = "binary_crossentropy",
+        hidden_dim: int = 256,
+        num_layers: int = 2,
+        dropout_prob: float = 0.2,
+        learning_rate: float = 1e-3,
+        weight_decay: float = 1e-2,
+        batch_size: int = 128,
+        val_size: float = 0.1,
+        num_model: int = 1,
+        max_epoch: int = 200,
+        early_stopping_patience: Union[None, int] = 10,
+        n_jobs: int = 1,
+        device: str = "cpu",
+        random_state: int = 0,
+        disable_pbar: bool = True,
+    ):
+        super(MLPClassifier, self).__init__(
+            hidden_dim=hidden_dim,
+            num_layers=num_layers,
+            dropout_prob=dropout_prob,
+            learning_rate=learning_rate,
+            weight_decay=weight_decay,
+            batch_size=batch_size,
+            val_size=val_size,
+            num_model=num_model,
+            max_epoch=max_epoch,
+            early_stopping_patience=early_stopping_patience,
+            n_jobs=n_jobs,
+            device=device,
+            random_state=random_state,
+            disable_pbar=disable_pbar,
+        )
+
+        self.loss = loss
+
+    def predict(self, X):
+        check_is_fitted(self, "is_fitted_")
+        if isinstance(X, Tensor) == False:
+            X = torch.tensor(X, dtype=torch.float32)
+
+        if self.loss == "binary_crossentropy":
+            return np.round(self.predict_proba(X))
+        elif self.loss == "categorical_crossentropy":
+            return np.argmax(self.predict_proba(X), axis=1)
+
+    def predict_proba(self, X):
+        check_is_fitted(self, "is_fitted_")
+        if isinstance(X, Tensor) == False:
+            X = torch.tensor(X, dtype=torch.float32)
+        X = X.to(self.device_)
+        return self._get_predict_prob(X)
+
+    def decision_function(self, X):
+        decision = self.predict_proba(X)
+        if decision.shape[1] == 1:
+            decision = decision.ravel()
+        return decision
+
+    def _get_predict_prob(self, X):
+        # Obtain the predicitve output
+        with torch.no_grad():
+            out = [model(X).cpu().detach().numpy() for model in self.model_list_]
+        out = np.mean(out, axis=0)
+        if self.loss == "binary_crossentropy":
+            out = 1 / (1 + np.exp(-out))
+        elif self.loss == "categorical_crossentropy":
+            out = np.exp(out) / sum(np.exp(out))
+        return out
+
+    def _set_task_specific_settings(self):
+        if self.loss == "binary_crossentropy":
+            self.criterion_ = torch.nn.BCEWithLogitsLoss()
+        elif self.loss == "categorical_crossentropy":
+            self.criterion_ = torch.nn.CrossEntropyLoss()
+
+        self.output_dim_ = len(np.unique(self.y_))
+        if self.output_dim_ == 2:
+            self.output_dim_ -= 1
+            self.criterion_ = torch.nn.BCEWithLogitsLoss()
+
+        self.model_task_ = "classification"
+
+
+class BaseRESNETEstimator(MLPBase):
+    """Base class for RESNET Estimator."""
+
+    def __init__(
+        self,
+        *,
+        normalization: Union[str, None] = "layernorm",
+        num_layers: int = 4,
+        hidden_dim: int = 256,
+        hidden_factor: int = 2,
+        hidden_dropout_prob: float = 0.2,
+        residual_dropout_prob: float = 0.2,
+        learning_rate: float = 1e-3,
+        weight_decay: float = 1e-2,
+        batch_size: int = 128,
+        val_size: float = 0.1,
+        num_model: int = 1,
+        max_epoch: int = 200,
+        early_stopping_patience: Union[None, int] = 10,
+        n_jobs: int = 1,
+        device: str = "cpu",
+        random_state: int = 0,
+        disable_pbar: bool = True,
+    ):
+        super(BaseRESNETEstimator, self).__init__(
+            hidden_dim=hidden_dim,
+            learning_rate=learning_rate,
+            weight_decay=weight_decay,
+            batch_size=batch_size,
+            val_size=val_size,
+            num_model=num_model,
+            max_epoch=max_epoch,
+            early_stopping_patience=early_stopping_patience,
+            n_jobs=n_jobs,
+            device=device,
+            random_state=random_state,
+            disable_pbar=disable_pbar,
+        )
+
+        self.normalization = normalization
+        self.num_layers = num_layers
+        self.hidden_factor = hidden_factor
+        self.hidden_dropout_prob = hidden_dropout_prob
+        self.residual_dropout_prob = residual_dropout_prob
+
+    def _load_model(self, input_dim):
+        """Load the RESNET model for training.
+
+        Returns the model that can be used for training.
+        """
+
+        # Set seed for torch - for reproducibility
+        random_state = check_random_state(self.random_state)
+        model_seed = random_state.randint(10000)
+        torch.manual_seed(model_seed)
+
+        model_config = dict()
+        model_config["input_dim"] = input_dim
+        model_config["hidden_dim"] = self.hidden_dim
+        model_config["output_dim"] = self.output_dim_
+        model_config["hidden_factor"] = self.hidden_factor
+        model_config["hidden_dropout_prob"] = self.hidden_dropout_prob
+        model_config["residual_dropout_prob"] = self.residual_dropout_prob
+        model_config["normalization"] = self.normalization
+        model_config["num_layers"] = self.num_layers
+
+        model = RESNET_Model(**model_config)
+        return model
+
+
+class RESNETRegressor(RegressorMixin, BaseRESNETEstimator):
+    """ """
+
+    def __init__(
+        self,
+        *,
+        loss: str = "squared_error",
+        normalization: Union[str, None] = "layernorm",
+        num_layers: int = 4,
+        hidden_dim: int = 256,
+        hidden_factor: int = 2,
+        hidden_dropout_prob: float = 0.2,
+        residual_dropout_prob: float = 0.2,
+        learning_rate: float = 1e-3,
+        weight_decay: float = 1e-2,
+        batch_size: int = 128,
+        val_size: float = 0.1,
+        num_model: int = 1,
+        max_epoch: int = 200,
+        early_stopping_patience: Union[None, int] = 10,
+        n_jobs: int = 1,
+        device: str = "cpu",
+        random_state: int = 0,
+        disable_pbar: bool = True,
+    ):
+        super(RESNETRegressor, self).__init__(
+            normalization=normalization,
+            num_layers=num_layers,
+            hidden_dim=hidden_dim,
+            hidden_factor=hidden_factor,
+            hidden_dropout_prob=hidden_dropout_prob,
+            residual_dropout_prob=residual_dropout_prob,
+            learning_rate=learning_rate,
+            weight_decay=weight_decay,
+            batch_size=batch_size,
+            val_size=val_size,
+            num_model=num_model,
+            max_epoch=max_epoch,
+            early_stopping_patience=early_stopping_patience,
+            n_jobs=n_jobs,
+            device=device,
+            random_state=random_state,
+            disable_pbar=disable_pbar,
+        )
+
+        self.loss = loss
+
+    def predict(self, X):
+        check_is_fitted(self, "is_fitted_")
+        if isinstance(X, Tensor) == False:
+            X = torch.tensor(X, dtype=torch.float32)
+        X = X.to(self.device_)
+
+        # Obtain the predicitve output
+        with torch.no_grad():
+            out = [model(X).cpu().detach().numpy() for model in self.model_list_]
+
+        if self.num_model == 1:
+            out = np.array(out).squeeze().transpose()
+        else:
+            out = np.array(out).squeeze().transpose()
+            out = np.mean(out, axis=1)
+
+        if np.isnan(out).sum() > 0:
+            mean_pred = np.mean(self.y_)
+            out[np.isnan(out)] = mean_pred
+
+        return out
+
+    def _set_task_specific_settings(self):
+        if self.loss == "squared_error":
+            self.criterion_ = torch.nn.MSELoss()
+        elif self.loss == "absolute_error":
+            self.criterion_ = torch.nn.L1Loss()
+
+        self.output_dim_ = 1
+        self.model_task_ = "regression"
+
+
+class RESNETClassifier(ClassifierMixin, BaseRESNETEstimator):
+    """ """
+
+    def __init__(
+        self,
+        *,
+        loss: str = "binary_crossentropy",
+        normalization: Union[str, None] = "layernorm",
+        num_layers: int = 4,
+        hidden_dim: int = 256,
+        hidden_factor: int = 2,
+        hidden_dropout_prob: float = 0.2,
+        residual_dropout_prob: float = 0.2,
+        learning_rate: float = 1e-3,
+        weight_decay: float = 1e-2,
+        batch_size: int = 128,
+        val_size: float = 0.1,
+        num_model: int = 1,
+        max_epoch: int = 200,
+        early_stopping_patience: Union[None, int] = 10,
+        n_jobs: int = 1,
+        device: str = "cpu",
+        random_state: int = 0,
+        disable_pbar: bool = True,
+    ):
+        super(RESNETClassifier, self).__init__(
+            normalization=normalization,
+            num_layers=num_layers,
+            hidden_dim=hidden_dim,
+            hidden_factor=hidden_factor,
+            hidden_dropout_prob=hidden_dropout_prob,
+            residual_dropout_prob=residual_dropout_prob,
+            learning_rate=learning_rate,
+            weight_decay=weight_decay,
+            batch_size=batch_size,
+            val_size=val_size,
+            num_model=num_model,
+            max_epoch=max_epoch,
+            early_stopping_patience=early_stopping_patience,
+            n_jobs=n_jobs,
+            device=device,
+            random_state=random_state,
+            disable_pbar=disable_pbar,
+        )
+
+        self.loss = loss
+
+    def predict(self, X):
+        check_is_fitted(self, "is_fitted_")
+        if isinstance(X, Tensor) == False:
+            X = torch.tensor(X, dtype=torch.float32)
+
+        if self.loss == "binary_crossentropy":
+            return np.round(self.predict_proba(X))
+        elif self.loss == "categorical_crossentropy":
+            return np.argmax(self.predict_proba(X), axis=1)
+
+    def predict_proba(self, X):
+        check_is_fitted(self, "is_fitted_")
+        if isinstance(X, Tensor) == False:
+            X = torch.tensor(X, dtype=torch.float32)
+        X = X.to(self.device_)
+        return self._get_predict_prob(X)
+
+    def decision_function(self, X):
+        decision = self.predict_proba(X)
+        if decision.shape[1] == 1:
+            decision = decision.ravel()
+        return decision
+
+    def _get_predict_prob(self, X):
+        # Obtain the predicitve output
+        with torch.no_grad():
+            out = [model(X).cpu().detach().numpy() for model in self.model_list_]
+        out = np.mean(out, axis=0)
+        if self.loss == "binary_crossentropy":
+            out = 1 / (1 + np.exp(-out))
+        elif self.loss == "categorical_crossentropy":
+            out = np.exp(out) / sum(np.exp(out))
+        return out
+
+    def _set_task_specific_settings(self):
+        if self.loss == "binary_crossentropy":
+            self.criterion_ = torch.nn.BCEWithLogitsLoss()
+        elif self.loss == "categorical_crossentropy":
+            self.criterion_ = torch.nn.CrossEntropyLoss()
+
+        self.output_dim_ = len(np.unique(self.y_))
+        if self.output_dim_ == 2:
+            self.output_dim_ -= 1
+            self.criterion_ = torch.nn.BCEWithLogitsLoss()
+
+        self.model_task_ = "classification"
diff --git a/carte/src/carte_estimator.py b/carte/src/carte_estimator.py
new file mode 100644
index 0000000..ac67ad4
--- /dev/null
+++ b/carte/src/carte_estimator.py
@@ -0,0 +1,1541 @@
+"""CARTE estimators for regression and classification."""
+
+import torch
+import numpy as np
+import pandas as pd
+import copy
+import math
+from typing import Union
+from torcheval.metrics import (
+    MeanSquaredError,
+    R2Score,
+    BinaryAUROC,
+    BinaryNormalizedEntropy,
+    BinaryAUPRC,
+    MulticlassAUROC,
+)
+from torch import Tensor
+from torch_geometric.loader import DataLoader
+from torch_geometric.data import Batch
+from sklearn.model_selection import RepeatedKFold, RepeatedStratifiedKFold, ShuffleSplit, StratifiedShuffleSplit, ParameterGrid, train_test_split
+from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin
+from sklearn.utils.validation import check_is_fitted, check_random_state
+from joblib import Parallel, delayed
+from tqdm import tqdm
+from scipy.special import softmax
+from carte.src.carte_model import CARTE_NN_Model, CARTE_NN_Model_Ablation
+from carte.configs.directory import config_directory
+
+
+class BaseCARTEEstimator(BaseEstimator):
+    """Base class for CARTE Estimator."""
+
+    def __init__(
+        self,
+        *,
+        num_layers,
+        load_pretrain,
+        freeze_pretrain,
+        learning_rate,
+        batch_size,
+        max_epoch,
+        dropout,
+        val_size,
+        cross_validate,
+        early_stopping_patience,
+        num_model,
+        random_state,
+        n_jobs,
+        device,
+        disable_pbar,
+    ):
+        self.num_layers = num_layers
+        self.load_pretrain = load_pretrain
+        self.freeze_pretrain = freeze_pretrain
+        self.learning_rate = learning_rate
+        self.batch_size = batch_size
+        self.max_epoch = max_epoch
+        self.dropout = dropout
+        self.val_size = val_size
+        self.cross_validate = cross_validate
+        self.early_stopping_patience = early_stopping_patience
+        self.num_model = num_model
+        self.random_state = random_state
+        self.n_jobs = n_jobs
+        self.device = device
+        self.disable_pbar = disable_pbar
+
+    def fit(self, X, y):
+        """Fit the CARTE model.
+
+        Parameters
+        ----------
+        X : list of graph objects with size (n_samples)
+            The input samples.
+
+        y : array-like of shape (n_samples,)
+            Target values.
+
+        Returns
+        -------
+        self : object
+               Fitted estimator.
+        """
+        # Preliminary settings
+        self.is_fitted_ = False
+        self.device_ = torch.device(self.device)
+        self.X_ = X
+        self.y_ = y
+        self._set_task_specific_settings()
+
+        # Set the cv-splits
+        splits = self._set_train_valid_split()
+
+        # Fit model
+        result_fit = Parallel(n_jobs=self.n_jobs)(
+            delayed(self._run_train_with_early_stopping)(X, split_index)
+            for split_index in splits
+        )
+
+        # Store the required results that may be used later
+        self.model_list_ = [model for (model, _) in result_fit]
+        self.valid_loss_ = [valid_loss for (_, valid_loss) in result_fit]
+        self.weights_ = np.array([1/self.num_model]*self.num_model)
+        self.is_fitted_ = True
+
+        return self
+
+    def _run_train_with_early_stopping(self, X, split_index):
+        """Train each model corresponding to the random_state with the early_stopping patience.
+
+        This mode of training sets train/valid set for the early stopping criterion.
+        Returns the trained model, and the validation loss at the best epoch.
+        """
+
+        # Set datasets
+        ds_train = [X[i] for i in split_index[0]]
+        ds_valid = [X[i] for i in split_index[1]]
+
+        # Set validation batch for evaluation
+        ds_valid_eval = self._set_data_eval(data=ds_valid)
+
+        # Load model and optimizer
+        model_run_train = self._load_model()
+        model_run_train.to(self.device_)
+        optimizer = torch.optim.AdamW(
+            model_run_train.parameters(), lr=self.learning_rate
+        )
+
+        # Train model
+        train_loader = DataLoader(ds_train, batch_size=self.batch_size, shuffle=False)
+        valid_loss_best = 9e15
+        es_counter = 0
+        model_best_ = copy.deepcopy(model_run_train)
+        for _ in tqdm(
+            range(1, self.max_epoch + 1),
+            desc=f"Model No. xx",
+            disable=self.disable_pbar,
+        ):
+            self._run_epoch(model_run_train, optimizer, train_loader)
+            valid_loss = self._eval(model_run_train, ds_valid_eval)
+            if valid_loss < valid_loss_best:
+                valid_loss_best = valid_loss
+                model_best_ = copy.deepcopy(model_run_train)
+                es_counter = 0
+            else:
+                es_counter += 1
+                if es_counter > self.early_stopping_patience:
+                    break
+        model_best_.eval()
+        return model_best_, valid_loss_best
+
+    def _run_epoch(self, model, optimizer, train_loader):
+        """Run an epoch of the input model.
+
+        Each epoch consists of steps that update the model and the optimizer.
+        """
+        model.train()
+        for data in train_loader:  # Iterate in batches over the training dataset.
+            self._run_step(model, data, optimizer)
+
+    def _run_step(self, model, data, optimizer):
+        """Run a step of the training.
+
+        With each step, it updates the model and the optimizer.
+        """
+        optimizer.zero_grad()  # Clear gradients.
+        data.to(self.device_)  # Send to device
+        out = model(data)  # Perform a single forward pass.
+        target = data.y  # Set target
+        if self.output_dim_ == 1:
+            out = out.view(-1).to(torch.float32)  # Reshape outputSet head index
+            target = target.to(torch.float32)  # Reshape target
+        loss = self.criterion_(out, target)  # Compute the loss.
+        loss.backward()  # Derive gradients.
+        optimizer.step()  # Update parameters based on gradients.
+
+    def _eval(self, model, ds_eval):
+        """Run an evaluation of the input data on the input model.
+
+        Returns the selected loss of the input data from the input model.
+        """
+        with torch.no_grad():
+            model.eval()
+            out = model(ds_eval)
+            target = ds_eval.y
+            if self.output_dim_ == 1:
+                out = out.view(-1).to(torch.float32)
+                target = target.to(torch.float32)
+            self.valid_loss_metric_.update(out, target)
+            loss_eval = self.valid_loss_metric_.compute()
+            loss_eval = loss_eval.detach().item()
+            if self.valid_loss_flag_ == "neg":
+                loss_eval = -1 * loss_eval
+            self.valid_loss_metric_.reset()
+        return loss_eval
+
+    def _set_train_valid_split(self):
+        """Train/validation split for the bagging strategy.
+        
+        The style of split depends on the cross_validate parameter.
+        Reuturns the train/validation split with KFold cross-validation.
+        """
+
+        if self._estimator_type == "regressor":
+            if self.cross_validate:
+                n_splits = int(1 / self.val_size)
+                n_repeats = int(self.num_model / n_splits)
+                splitter = RepeatedKFold(
+                    n_splits=n_splits, n_repeats=n_repeats, random_state=self.random_state,
+                )
+            else:
+                splitter = ShuffleSplit(n_splits = self.num_model, test_size=self.val_size, random_state=self.random_state)
+            splits = [
+                    (train_index, test_index)
+                    for train_index, test_index in splitter.split(np.arange(0, len(self.X_)))
+                ]
+        else:
+            if self.cross_validate:
+                n_splits = int(1 / self.val_size)
+                n_repeats = int(self.num_model / n_splits)
+                splitter = RepeatedStratifiedKFold(
+                    n_splits=n_splits, n_repeats=n_repeats, random_state=self.random_state,
+                )
+            else:
+                splitter = StratifiedShuffleSplit(n_splits = self.num_model, test_size=self.val_size, random_state=self.random_state)
+            splits = [
+                (train_index, test_index)
+                for train_index, test_index in splitter.split(
+                    np.arange(0, len(self.X_)), self.y_
+                )
+            ]
+
+        return splits
+
+    def _set_data_eval(self, data):
+        """Constructs the aggregated graph object from the list of data.
+
+        This is consistent with the graph object from torch_geometric.
+        Returns the aggregated graph object.
+        """
+        make_batch = Batch()
+        with torch.no_grad():
+            ds_eval = make_batch.from_data_list(data, follow_batch=["edge_index"])
+            ds_eval.to(self.device_)
+        return ds_eval
+
+    def _generate_output(self, X, model_list, weights):
+        """Generate the output from the trained model.
+
+        Returns the output (prediction) of input X.
+        """
+
+        # Obtain the batch to feed into the network
+        ds_predict_eval = self._set_data_eval(data=X)
+        with torch.no_grad():
+            out = [
+                model(ds_predict_eval).cpu().detach().numpy() for model in model_list
+            ]
+        out = np.array(out).squeeze().transpose()
+        if len(model_list) != 1:
+            out = np.average(out, weights=weights, axis=1)
+
+        # Change if the task is classification
+        if self.loss == "binary_crossentropy":
+            out = 1 / (1 + np.exp(-out))
+        elif self.loss == "categorical_crossentropy":
+            out = softmax(out, axis=1)
+
+        # Control for nulls in prediction
+        if np.isnan(out).sum() > 0:
+            mean_pred = np.mean(self.y_)
+            out[np.isnan(out)] = mean_pred
+        return out
+
+    def _set_task_specific_settings(self):
+        """Set task specific settings for regression and classfication.
+        """
+
+        if self._estimator_type == "regressor":
+            if self.loss == "squared_error":
+                self.criterion_ = torch.nn.MSELoss()
+            elif self.loss == "absolute_error":
+                self.criterion_ = torch.nn.L1Loss()
+            if self.scoring == "squared_error":
+                self.valid_loss_metric_ = MeanSquaredError()
+                self.valid_loss_flag_ = "pos"
+            elif self.scoring == "r2_score":
+                self.valid_loss_metric_ = R2Score()
+                self.valid_loss_flag_ = "neg"
+            self.output_dim_ = 1
+        elif self._estimator_type == "classifier":
+            if self.loss == "binary_crossentropy":
+                self.criterion_ = torch.nn.BCEWithLogitsLoss()
+            elif self.loss == "categorical_crossentropy":
+                self.criterion_ = torch.nn.CrossEntropyLoss()
+            self.output_dim_ = len(np.unique(self.y_))
+            if self.output_dim_ == 2:
+                self.output_dim_ -= 1
+                self.criterion_ = torch.nn.BCEWithLogitsLoss()
+            if self.scoring == "auroc":
+                self.valid_loss_metric_ = BinaryAUROC()
+                self.valid_loss_flag_ = "neg"
+            elif self.scoring == "binary_entropy":
+                self.valid_loss_metric_ = BinaryNormalizedEntropy(from_logits = True)
+                self.valid_loss_flag_ = "neg"
+            elif self.scoring == "auprc":
+                self.valid_loss_metric_ = BinaryAUPRC()
+                self.valid_loss_flag_ = "neg"
+            if self.loss == "categorical_crossentropy":
+                self.valid_loss_metric_ = MulticlassAUROC(num_classes=self.output_dim_)
+                self.valid_loss_flag_ = "neg"
+            self.classes_ = np.unique(self.y_)
+        self.valid_loss_metric_.to(self.device_)
+
+    def _load_model(self):
+        """Load the CARTE model for training.
+
+        This loads the pretrained weights if the parameter load_pretrain is set to True.
+        The freeze of the pretrained weights are controlled by the freeze_pretrain parameter.
+
+        Returns the model that can be used for training.
+        """
+        # Model configuration
+        model_config = dict()
+        model_config["input_dim_x"] = self.X_[0].x.size(1)
+        model_config["input_dim_e"] = self.X_[0].x.size(1)
+        model_config["hidden_dim"] = self.X_[0].x.size(1)
+        model_config["ff_dim"] = self.X_[0].x.size(1)
+        model_config["num_heads"] = 12
+        model_config["num_layers"] = self.num_layers-1
+        model_config["output_dim"] = self.output_dim_
+        model_config["dropout"] = self.dropout
+
+        # Set seed for torch - for reproducibility
+        random_state = check_random_state(self.random_state)
+        model_seed = random_state.randint(10000)
+        torch.manual_seed(model_seed)
+
+        # Set model architecture
+        model = CARTE_NN_Model(**model_config)
+
+        # Load the pretrained weights if specified
+        if self.load_pretrain:
+            dir_model = config_directory["pretrained_model"]
+            pretrain_model_dict = torch.load(dir_model, map_location=self.device_)
+            initial_x_keys = [
+                key for key in pretrain_model_dict.keys() if "initial_x" in key
+            ]
+            for key in initial_x_keys:
+                pretrain_model_dict[key + "_pretrain"] = pretrain_model_dict.pop(key)
+            model.load_state_dict(pretrain_model_dict, strict=False)
+
+        # Freeze the pretrained weights if specified
+        if self.freeze_pretrain:
+            for param in model.ft_base.read_out_block.parameters():
+                param.requires_grad = False
+            for param in model.ft_base.layers.parameters():
+                param.requires_grad = False
+
+        return model
+
+
+class CARTERegressor(RegressorMixin, BaseCARTEEstimator):
+    """CARTE Regressor for Regression tasks.
+
+    This estimator is GNN-based model compatible with the CARTE pretrained model.
+
+    Parameters
+    ----------
+    loss : {'squared_error', 'absolute_error'}, default='squared_error'
+        The loss function used for backpropagation.
+    scoring : {'r2_score', 'squared_error'}, default='r2_score'
+        The scoring function used for validation.
+    num_layers : int, default=1
+        The number of layers for the NN model
+    load_pretrain : bool, default=True
+        Indicates whether to load pretrained weights or not
+    freeze_pretrain : bool, default=True
+        Indicates whether to freeze the pretrained weights in the training or not
+    learning_rate : float, default=1e-3
+        The learning rate of the model. The model uses AdamW as the optimizer
+    batch_size : int, default=16
+        The batch size used for training
+    max_epoch : int or None, default=500
+        The maximum number of epoch for training
+    dropout : float, default=0
+        The dropout rate for training
+    val_size : float, default=0.1
+        The size of the validation set used for early stopping
+    cross_validate : bool, default=False
+        Indicates whether to use cross-validation strategy for train/validation split
+    early_stopping_patience : int or None, default=40
+        The early stopping patience when early stopping is used.
+        If set to None, no early stopping is employed
+    num_model : int, default=1
+        The total number of models used for Bagging strategy
+    random_state : int or None, default=0
+        Pseudo-random number generator to control the train/validation data split
+        if early stoppingis enabled, the weight initialization, and the dropout.
+        Pass an int for reproducible output across multiple function calls.
+    n_jobs : int, default=1
+        Number of jobs to run in parallel. Training the estimator the score are parallelized
+        over the number of models.
+    device : {"cpu", "gpu"}, default="cpu",
+        The device used for the estimator.
+    disable_pbar : bool, default=True
+        Indicates whether to show progress bars for the training process.
+    """
+
+    def __init__(
+        self,
+        *,
+        loss: str = "squared_error",
+        scoring: str = "r2_score",
+        num_layers: int = 1,
+        load_pretrain: bool = True,
+        freeze_pretrain: bool = True,
+        learning_rate: float = 1e-3,
+        batch_size: int = 16,
+        max_epoch: int = 500,
+        dropout: float = 0,
+        val_size: float = 0.2,
+        cross_validate: bool = False,
+        early_stopping_patience: Union[None, int] = 40,
+        num_model: int = 1,
+        random_state: int = 0,
+        n_jobs: int = 1,
+        device: str = "cpu",
+        disable_pbar: bool = True,
+    ):
+        super(CARTERegressor, self).__init__(
+            num_layers=num_layers,
+            load_pretrain=load_pretrain,
+            freeze_pretrain=freeze_pretrain,
+            learning_rate=learning_rate,
+            batch_size=batch_size,
+            max_epoch=max_epoch,
+            dropout=dropout,
+            val_size=val_size,
+            cross_validate=cross_validate,
+            early_stopping_patience=early_stopping_patience,
+            num_model=num_model,
+            random_state=random_state,
+            n_jobs=n_jobs,
+            device=device,
+            disable_pbar=disable_pbar,
+        )
+
+        self.loss = loss
+        self.scoring = scoring
+
+    def predict(self, X):
+        """Predict values for X. Returns the average of predicted values over all the models.
+
+        Parameters
+        ----------
+        X : list of graph objects with size (n_samples)
+            The input samples.
+
+        Returns
+        -------
+        y : ndarray, shape (n_samples,)
+            The predicted values.
+        """
+    
+        check_is_fitted(self, "is_fitted_")
+
+        out = self._generate_output(X=X, model_list = self.model_list_, weights=None)
+
+        return out
+
+
+class CARTEClassifier(ClassifierMixin, BaseCARTEEstimator):
+    """CARTE Classifier for Classification tasks.
+
+    This estimator is GNN-based model compatible with the CARTE pretrained model.
+
+    Parameters
+    ----------
+    loss : {'binary_crossentropy', 'categorical_crossentropy'}, default='binary_crossentropy'
+        The loss function used for backpropagation.
+    scoring : {'auroc', 'auprc', 'binary_entropy'}, default='auroc'
+        The scoring function used for validation.
+    num_layers : int, default=1
+        The number of layers for the NN model
+    load_pretrain : bool, default=True
+        Indicates whether to load pretrained weights or not
+    freeze_pretrain : bool, default=True
+        Indicates whether to freeze the pretrained weights in the training or not
+    learning_rate : float, default=1e-3
+        The learning rate of the model. The model uses AdamW as the optimizer
+    batch_size : int, default=16
+        The batch size used for training
+    max_epoch : int or None, default=500
+        The maximum number of epoch for training
+    dropout : float, default=0
+        The dropout rate for training
+    val_size : float, default=0.1
+        The size of the validation set used for early stopping
+    cross_validate : bool, default=False
+        Indicates whether to use cross-validation strategy for train/validation split
+    early_stopping_patience : int or None, default=40
+        The early stopping patience when early stopping is used.
+        If set to None, no early stopping is employed
+    num_model : int, default=1
+        The total number of models used for Bagging strategy
+    random_state : int or None, default=0
+        Pseudo-random number generator to control the train/validation data split
+        if early stoppingis enabled, the weight initialization, and the dropout.
+        Pass an int for reproducible output across multiple function calls.
+    n_jobs : int, default=1
+        Number of jobs to run in parallel. Training the estimator the score are parallelized
+        over the number of models.
+    device : {"cpu", "gpu"}, default="cpu",
+        The device used for the estimator.
+    disable_pbar : bool, default=True
+        Indicates whether to show progress bars for the training process.
+    """
+
+    def __init__(
+        self,
+        *,
+        loss: str = "binary_crossentropy",
+        scoring: str = "auroc",
+        num_layers: int = 1,
+        load_pretrain: bool = True,
+        freeze_pretrain: bool = True,
+        learning_rate: float = 1e-3,
+        batch_size: int = 16,
+        max_epoch: int = 500,
+        dropout: float = 0,
+        val_size: float = 0.2,
+        cross_validate: bool = False,
+        early_stopping_patience: Union[None, int] = 40,
+        num_model: int = 1,
+        random_state: int = 0,
+        n_jobs: int = 1,
+        device: str = "cpu",
+        disable_pbar: bool = True,
+    ):
+        super(CARTEClassifier, self).__init__(
+            num_layers=num_layers,
+            load_pretrain=load_pretrain,
+            freeze_pretrain=freeze_pretrain,
+            learning_rate=learning_rate,
+            batch_size=batch_size,
+            max_epoch=max_epoch,
+            dropout=dropout,
+            val_size=val_size,
+            cross_validate=cross_validate,
+            early_stopping_patience=early_stopping_patience,
+            num_model=num_model,
+            random_state=random_state,
+            n_jobs=n_jobs,
+            device=device,
+            disable_pbar=disable_pbar,
+        )
+
+        self.loss = loss
+        self.scoring = scoring
+
+    def predict(self, X):
+        """Predict classes for X.
+
+        Parameters
+        ----------
+        X : list of graph objects with size (n_samples)
+            The input samples.
+
+        Returns
+        -------
+        y : ndarray, shape (n_samples,)
+            The predicted classes.
+        """
+        check_is_fitted(self, "is_fitted_")
+
+        if self.loss == "binary_crossentropy":
+            return np.round(self.predict_proba(X))
+        elif self.loss == "categorical_crossentropy":
+            return np.argmax(self.predict_proba(X), axis=1)
+
+    def predict_proba(self, X):
+        """Predict class probabilities for X.
+
+        Parameters
+        ----------
+        X : list of graph objects with size (n_samples)
+            The input samples.
+
+        Returns
+        -------
+        p : ndarray, shape (n_samples,) for binary classification or (n_samples, n_classes)
+            The class probabilities of the input samples.
+        """
+        check_is_fitted(self, "is_fitted_")
+        return self._get_predict_prob(X)
+
+    def decision_function(self, X):
+        """Compute the decision function of X.
+
+        Parameters
+        ----------
+        X : list of graph objects with size (n_samples)
+            The input samples.
+
+        Returns
+        -------
+        decision : ndarray, shape (n_samples,)
+        """
+        decision = self.predict_proba(X)
+        if decision.shape[1] == 1:
+            decision = decision.ravel()
+        return decision
+
+    def _get_predict_prob(self, X):
+        """Return the average of the outputs over all the models.
+
+        Parameters
+        ----------
+        X : list of graph objects with size (n_samples)
+            The input samples.
+
+        Returns
+        -------
+        raw_predictions : array, shape (n_samples,)
+            The raw predicted values.
+        """
+
+        out = self._generate_output(X=X, model_list = self.model_list_, weights=None)
+
+        return out
+
+
+class IdxIterator:
+    """Class for iterating indices to set up the batch for CARTE Multitables"""
+
+    def __init__(
+        self,
+        n_batch: int,
+        domain_indicator: Tensor,
+        target_fraction: float,
+    ):
+        self.n_batch = n_batch
+        self.target_fraction = target_fraction
+        self.domain_indicator = domain_indicator
+
+        # Number of samples for target and source
+        self.num_t = (domain_indicator == 0).sum().item()
+        self.count_t = torch.ones(self.num_t)
+
+        self.num_source_domain = domain_indicator.unique().size(0) - 1
+
+        domain_list = domain_indicator.unique()
+        source_domain_list = domain_list[domain_list != 0]
+
+        self.num_s = [(domain_indicator == x).sum().item() for x in source_domain_list]
+
+        count_s_ = [torch.ones(x) for x in self.num_s]
+        self.count_s = count_s_[0]
+        for x in range(1, self.num_source_domain):
+            self.count_s = torch.block_diag(self.count_s, count_s_[x])
+        if self.num_source_domain == 1:
+            self.count_s = self.count_s.reshape(1, -1)
+        self.count_s_fixed = copy.deepcopy(self.count_s)
+
+        self.train_flag = None
+
+        self.set_num_samples()
+
+    def set_num_samples(self):
+        self.num_samples_t = math.ceil(self.n_batch * self.target_fraction)
+        n_batch_source_total = int((self.n_batch - self.num_samples_t))
+        num_samples_s = [
+            int(n_batch_source_total / self.num_source_domain)
+            for _ in range(self.num_source_domain)
+        ]
+        if sum(num_samples_s) != n_batch_source_total:
+            num_samples_s[
+                torch.randint(0, self.num_source_domain, (1,))
+            ] += n_batch_source_total - sum(num_samples_s)
+        self.num_samples_s = num_samples_s
+
+    def sample(self):
+        idx_batch_t = torch.multinomial(
+            self.count_t, num_samples=self.num_samples_t, replacement=False
+        )
+        self.count_t[idx_batch_t] -= 1
+
+        idx_batch_s = torch.tensor([]).to(dtype=torch.long)
+        for x in range(self.num_source_domain):
+            idx_batch_s_ = torch.multinomial(
+                self.count_s[x], num_samples=self.num_samples_s[x], replacement=False
+            )
+            self.count_s[x, idx_batch_s_] -= 1
+            idx_batch_s = torch.hstack([idx_batch_s, idx_batch_s_])
+            if torch.sum(self.count_s[x, :]) < self.num_samples_s[x]:
+                self.count_s[x] = self.count_s_fixed[x, :]
+
+        if torch.sum(self.count_t) < self.num_samples_t:
+            self.count_t = torch.ones(self.num_t)
+            self.train_flag = False
+
+        return idx_batch_t, idx_batch_s
+
+
+class BaseCARTEMultitableEstimator(BaseCARTEEstimator):
+    """Base class for CARTE Multitable Estimator."""
+
+    def __init__(
+        self,
+        *,
+        source_data,
+        num_layers,
+        load_pretrain,
+        freeze_pretrain,
+        learning_rate,
+        batch_size,
+        max_epoch,
+        dropout,
+        val_size,
+        target_fraction,
+        early_stopping_patience,
+        num_model,
+        random_state,
+        n_jobs,
+        device,
+        disable_pbar,
+    ):
+
+        super(BaseCARTEMultitableEstimator, self).__init__(
+            num_layers=num_layers,
+            load_pretrain=load_pretrain,
+            freeze_pretrain=freeze_pretrain,
+            learning_rate=learning_rate,
+            batch_size=batch_size,
+            max_epoch=max_epoch,
+            dropout=dropout,
+            val_size=val_size,
+            early_stopping_patience=early_stopping_patience,
+            num_model=num_model,
+            random_state=random_state,
+            n_jobs=n_jobs,
+            device=device,
+            disable_pbar=disable_pbar,
+            cross_validate=False,  # overridden
+        )
+
+        self.source_data = source_data
+        self.target_fraction = target_fraction
+
+    def fit(self, X, y):
+        """Fit the CARTE Multitable model.
+
+        Parameters
+        ----------
+        X : list of graph objects with size (n_samples)
+            The input samples of the target data.
+
+        y : array-like of shape (n_samples,)
+            Target values.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+
+        # Preliminary settings
+        self.is_fitted_ = False
+        self.device_ = torch.device(self.device)
+        self.X_ = X
+        self.y_ = y
+        self._set_task_specific_settings()
+
+        # Set random_state, source list, and grid for parallelism
+        random_state = check_random_state(self.random_state)
+        random_state_list = [random_state.randint(1000) for _ in range(self.num_model)]
+        self.source_list_total_ = list(self.source_data.keys()) + ["target"]
+        grid = {"source": self.source_list_total_, "random_state": random_state_list}
+        model_space_total = list(ParameterGrid(grid))
+
+        # Fit model
+        result_fit = Parallel(n_jobs=self.n_jobs)(
+            delayed(self._run_train_with_early_stopping)(model_space)
+            for model_space in model_space_total
+        )
+
+        self.result_fit_ = result_fit
+
+        # Store the required results that may be used later
+        self.model_list_ = [model for (model, _, _, _) in result_fit]
+        self.valid_loss_ = [valid_loss for (_, valid_loss, _, _) in result_fit]
+        self.source_list_ = [sl for (_, _, sl, _) in result_fit]
+        self.random_state_list_ = [rs for (_, _, _, rs) in result_fit]
+        self.is_fitted_ = True
+
+        val_loss_mean_ = []
+        val_loss_total_ = []
+        for source_name in self.source_list_total_:
+            idx_ = np.where(np.array(self.source_list_) == source_name)[0]
+            val_loss_total_ += [self.valid_loss_[idx] for idx in idx_]
+            val_loss_mean_ += [np.array(val_loss_total_).mean()]
+        val_loss_mean_ = -1 * np.array(val_loss_mean_)
+        val_loss_total_ = -1 * np.array(val_loss_total_)
+        weights = val_loss_mean_ / val_loss_total_.std()
+        self.weights_ = np.exp(weights) / sum(np.exp(weights))
+
+        return self
+
+    def _run_train_with_early_stopping(self, model_space):
+        """Train each model corresponding to the random_state with the early_stopping patience.
+
+        This mode of training sets train/valid set for the early stopping criterion.
+        Returns the trained model, train and validation loss at the best epoch, and the random_state.
+        """
+
+        # Set random_state and source data
+        random_state = model_space["random_state"]
+        if model_space["source"] == "target":
+            target_only_flag = True
+            source_data = None
+        else:
+            source_data = self.source_data[model_space["source"]]
+            target_only_flag = False
+
+        # Target dataset
+        y_target = [data.y.cpu().detach().numpy() for data in self.X_]
+        stratify = None
+        if self._estimator_type == "classifier":
+            stratify = y_target
+        ds_train_target, ds_valid_target = train_test_split(
+            self.X_,
+            test_size=self.val_size,
+            shuffle=True,
+            stratify=stratify,
+            random_state=random_state,
+        )
+
+        # Source dataset
+        ds_train_source, ds_valid_source = self._set_source_data(
+            source_data,
+            ds_valid_target,
+            random_state,
+        )
+
+        # Set validation batch for evaluation
+        ds_valid = ds_valid_target + ds_valid_source
+        ds_train = ds_train_target + ds_train_source
+        ds_valid_eval = self._set_data_eval(data=ds_valid)
+
+        # Load model and optimizer
+        model_run_train = self._load_model()
+        model_run_train.to(self.device_)
+        optimizer = torch.optim.AdamW(
+            model_run_train.parameters(), lr=self.learning_rate
+        )
+
+        # Train model
+        valid_loss_best = 9e15
+        es_counter = 0
+        model_best_ = copy.deepcopy(model_run_train)
+
+        if target_only_flag:
+            train_loader = DataLoader(
+                ds_train, batch_size=self.batch_size, shuffle=False
+            )
+        else:
+            domain_indicator = torch.tensor([data.domain for data in ds_train])
+            idx_iterator = IdxIterator(
+                n_batch=self.batch_size,
+                domain_indicator=domain_indicator,
+                target_fraction=self.target_fraction,
+            )
+
+        for _ in tqdm(
+            range(1, self.max_epoch + 1),
+            desc=f"Model No. xx",
+            disable=self.disable_pbar,
+        ):
+
+            # Run epoch
+            if target_only_flag:
+                self._run_epoch(model_run_train, optimizer, train_loader)
+            else:
+                self._run_epoch_multitable(
+                    ds_train_source,
+                    ds_train_target,
+                    model_run_train,
+                    optimizer,
+                    idx_iterator,
+                )
+
+            # Obtain validation losses
+            valid_loss = self._eval(model_run_train, ds_valid_eval)
+
+            # Update model
+            if valid_loss < valid_loss_best:
+                valid_loss_best = valid_loss
+                model_best_ = copy.deepcopy(model_run_train)
+                es_counter = 0
+            else:
+                es_counter += 1
+                if es_counter > self.early_stopping_patience:
+                    break
+        model_best_.eval()
+        return model_best_, valid_loss_best, model_space["source"], random_state
+
+    def _run_epoch_multitable(
+        self,
+        ds_source,
+        ds_target,
+        model,
+        optimizer,
+        idx_iterator,
+    ):
+        """Run an epoch for multitable of the input model."""
+        model.train()
+        idx_iterator.train_flag = True
+        while idx_iterator.train_flag:
+            idx_batch_target, idx_batch_source = idx_iterator.sample()
+            ds_source_batch = [ds_source[idx] for idx in idx_batch_source]
+            ds_target_batch = [ds_target[idx] for idx in idx_batch_target]
+            ds_batch = ds_source_batch + ds_target_batch
+            ds_train = self._set_data_eval(data=ds_batch)
+            self._run_step(data=ds_train, model=model, optimizer=optimizer)
+
+    def _set_source_data(self, source_data, ds_valid_target, random_state):
+        """Prepare the source data for training."""
+        if source_data is None:
+            return [], []
+        else:
+            y_source = [data.y.cpu().detach().numpy() for data in source_data]
+            stratify = [data.domain for data in source_data]
+            stratify = np.array(stratify)
+            if self._estimator_type == "classifier":
+                y_source = [data.y.cpu().detach().numpy() for data in source_data]
+                y_source = pd.Series(y_source)
+                y_source = y_source.astype(str)
+                stratify = pd.Series(stratify)
+                stratify = stratify.astype(str)
+                stratify = stratify + "_" + y_source
+            ds_train_source, ds_valid_source = train_test_split(
+                source_data,
+                test_size=len(ds_valid_target),
+                shuffle=True,
+                stratify=stratify,
+                random_state=random_state,
+            )
+            return ds_train_source, ds_valid_source
+
+
+class CARTEMultitableRegressor(RegressorMixin, BaseCARTEMultitableEstimator):
+    """CARTE Multitable Regressor for Regression tasks.
+
+    This estimator is GNN-based model compatible with the CARTE pretrained model.
+
+    Parameters
+    ----------
+    loss : {'squared_error', 'absolute_error'}, default='squared_error'
+        The loss function used for backpropagation.
+    scoring : {'r2_score', 'squared_error'}, default='r2_score'
+        The scoring function used for validation.
+    source_date : dict, default={}
+        The source data used in multitable estimator.
+    num_layers : int, default=1
+        The number of layers for the NN model
+    load_pretrain : bool, default=True
+        Indicates whether to load pretrained weights or not
+    freeze_pretrain : bool, default=True
+        Indicates whether to freeze the pretrained weights in the training or not
+    learning_rate : float, default=1e-3
+        The learning rate of the model. The model uses AdamW as the optimizer
+    batch_size : int, default=16
+        The batch size used for training
+    max_epoch : int or None, default=500
+        The maximum number of epoch for training
+    dropout : float, default=0
+        The dropout rate for training
+    val_size : float, default=0.1
+        The size of the validation set used for early stopping
+    target_fraction : float, default=0.125
+        The fraction of target data inside of a batch when training
+    early_stopping_patience : int or None, default=40
+        The early stopping patience when early stopping is used.
+        If set to None, no early stopping is employed
+    num_model : int, default=1
+        The total number of models used for Bagging strategy
+    random_state : int or None, default=0
+        Pseudo-random number generator to control the train/validation data split
+        if early stoppingis enabled, the weight initialization, and the dropout.
+        Pass an int for reproducible output across multiple function calls.
+    n_jobs : int, default=1
+        Number of jobs to run in parallel. Training the estimator the score are parallelized
+        over the number of models.
+    device : {"cpu", "gpu"}, default="cpu",
+        The device used for the estimator.
+    disable_pbar : bool, default=True
+        Indicates whether to show progress bars for the training process.
+    """
+
+    def __init__(
+        self,
+        *,
+        loss: str = "squared_error",
+        scoring: str = "r2_score",
+        source_data: dict = {},
+        num_layers: int = 1,
+        load_pretrain: bool = True,
+        freeze_pretrain: bool = True,
+        learning_rate: float = 1e-3,
+        batch_size: int = 16,
+        max_epoch: int = 500,
+        dropout: float = 0,
+        val_size: float = 0.2,
+        target_fraction: float = 0.125,
+        early_stopping_patience: Union[None, int] = 40,
+        num_model: int = 1,
+        random_state: int = 0,
+        n_jobs: int = 1,
+        device: str = "cpu",
+        disable_pbar: bool = True,
+    ):
+        super(CARTEMultitableRegressor, self).__init__(
+            num_layers=num_layers,
+            load_pretrain=load_pretrain,
+            freeze_pretrain=freeze_pretrain,
+            learning_rate=learning_rate,
+            batch_size=batch_size,
+            max_epoch=max_epoch,
+            dropout=dropout,
+            val_size=val_size,
+            early_stopping_patience=early_stopping_patience,
+            num_model=num_model,
+            random_state=random_state,
+            n_jobs=n_jobs,
+            device=device,
+            disable_pbar=disable_pbar,
+            source_data=source_data,
+            target_fraction=target_fraction,
+        )
+
+        self.loss = loss
+        self.scoring = scoring
+
+    def predict(self, X):
+        """Predict values for X. 
+        
+        Returns the weighted average of the singletable model and all pairwise model with 1-source.
+
+        Parameters
+        ----------
+        X : list of graph objects with size (n_samples)
+            The input samples.
+
+        Returns
+        -------
+        y : ndarray, shape (n_samples,)
+            The predicted values.
+        """
+
+        out = []
+        for source_name in self.source_list_total_:
+            idx_ = np.where(np.array(self.source_list_) == source_name)[0]
+            model_list = [self.model_list_[idx] for idx in idx_]
+            out += [self._generate_output(X, model_list=model_list, weights=None)]
+        out = np.array(out).squeeze().transpose()
+        out = np.average(out, weights=self.weights_, axis=1)
+        if np.isnan(out).sum() > 0:
+            mean_pred = np.mean(self.y_)
+            out[np.isnan(out)] = mean_pred
+        return out
+
+
+class CARTEMultitableClassifer(ClassifierMixin, BaseCARTEMultitableEstimator):
+    """CARTE Multitable Classifier for Classification tasks.
+
+    This estimator is GNN-based model compatible with the CARTE pretrained model.
+
+    Parameters
+    ----------
+    loss : {'binary_crossentropy', 'categorical_crossentropy'}, default='binary_crossentropy'
+        The loss function used for backpropagation.
+    scoring : {'auroc', 'auprc', 'binary_entropy'}, default='auroc'
+        The scoring function used for validation.
+    source_date : dict, default={}
+        The source data used in multitable estimator.
+    num_layers : int, default=1
+        The number of layers for the NN model
+    load_pretrain : bool, default=True
+        Indicates whether to load pretrained weights or not
+    freeze_pretrain : bool, default=True
+        Indicates whether to freeze the pretrained weights in the training or not
+    learning_rate : float, default=1e-3
+        The learning rate of the model. The model uses AdamW as the optimizer
+    batch_size : int, default=16
+        The batch size used for training
+    max_epoch : int or None, default=500
+        The maximum number of epoch for training
+    dropout : float, default=0
+        The dropout rate for training
+    val_size : float, default=0.1
+        The size of the validation set used for early stopping
+    target_fraction : float, default=0.125
+        The fraction of target data inside of a batch when training
+    early_stopping_patience : int or None, default=40
+        The early stopping patience when early stopping is used.
+        If set to None, no early stopping is employed
+    num_model : int, default=1
+        The total number of models used for Bagging strategy
+    random_state : int or None, default=0
+        Pseudo-random number generator to control the train/validation data split
+        if early stoppingis enabled, the weight initialization, and the dropout.
+        Pass an int for reproducible output across multiple function calls.
+    n_jobs : int, default=1
+        Number of jobs to run in parallel. Training the estimator the score are parallelized
+        over the number of models.
+    device : {"cpu", "gpu"}, default="cpu",
+        The device used for the estimator.
+    disable_pbar : bool, default=True
+        Indicates whether to show progress bars for the training process.
+    """
+
+    def __init__(
+        self,
+        *,
+        loss: str = "binary_crossentropy",
+        scoring: str = "auroc",
+        source_data: dict = {},
+        num_layers: int = 1,
+        load_pretrain: bool = True,
+        freeze_pretrain: bool = True,
+        learning_rate: float = 1e-3,
+        batch_size: int = 16,
+        max_epoch: int = 500,
+        dropout: float = 0,
+        val_size: float = 0.2,
+        target_fraction: float = 0.125,
+        early_stopping_patience: Union[None, int] = 40,
+        num_model: int = 1,
+        random_state: int = 0,
+        n_jobs: int = 1,
+        device: str = "cpu",
+        disable_pbar: bool = True,
+    ):
+        super(CARTEMultitableClassifer, self).__init__(
+            num_layers=num_layers,
+            load_pretrain=load_pretrain,
+            freeze_pretrain=freeze_pretrain,
+            learning_rate=learning_rate,
+            batch_size=batch_size,
+            max_epoch=max_epoch,
+            dropout=dropout,
+            val_size=val_size,
+            early_stopping_patience=early_stopping_patience,
+            num_model=num_model,
+            random_state=random_state,
+            n_jobs=n_jobs,
+            device=device,
+            disable_pbar=disable_pbar,
+            source_data=source_data,
+            target_fraction=target_fraction,
+        )
+
+        self.loss = loss
+        self.scoring = scoring
+
+    def predict(self, X):
+        """Predict classes for X.
+
+        Parameters
+        ----------
+        X : list of graph objects with size (n_samples)
+            The input samples.
+
+        Returns
+        -------
+        y : ndarray, shape (n_samples,)
+            The predicted classes.
+        """
+        check_is_fitted(self, "is_fitted_")
+
+        if self.loss == "binary_crossentropy":
+            return np.round(self.predict_proba(X))
+        elif self.loss == "categorical_crossentropy":
+            return np.argmax(self.predict_proba(X), axis=1)
+
+    def predict_proba(self, X):
+        """Predict class probabilities for X.
+
+        Parameters
+        ----------
+        X : list of graph objects with size (n_samples)
+            The input samples.
+
+        Returns
+        -------
+        p : ndarray, shape (n_samples,) for binary classification or (n_samples, n_classes)
+            The class probabilities of the input samples.
+        """
+        check_is_fitted(self, "is_fitted_")
+        return self._get_predict_prob(X)
+
+    def decision_function(self, X):
+        """Compute the decision function of ``X``.
+
+        Parameters
+        ----------
+        X : list of graph objects with size (n_samples)
+            The input samples.
+
+        Returns
+        -------
+        decision : ndarray, shape (n_samples,)
+        """
+        decision = self.predict_proba(X)
+        if decision.shape[1] == 1:
+            decision = decision.ravel()
+        return decision
+
+    def _get_predict_prob(self, X):
+        """Returns the weighted average of the singletable model and all pairwise model with 1-source.
+
+        Parameters
+        ----------
+        X : list of graph objects with size (n_samples)
+            The input samples.
+
+        Returns
+        -------
+        raw_predictions : array, shape (n_samples,)
+            The raw predicted values.
+        """
+
+        out = []
+        for source_name in self.source_list_total_:
+            idx_ = np.where(np.array(self.source_list_) == source_name)[0]
+            model_list = [self.model_list_[idx] for idx in idx_]
+            out += [self._generate_output(X, model_list=model_list, weights=None)]
+        out = np.array(out).squeeze().transpose()
+        out = np.average(out, weights=self.weights_, axis=1)
+        # Transform according to loss
+        if self.loss == "binary_crossentropy":
+            out = 1 / (1 + np.exp(-out))
+        elif self.loss == "categorical_crossentropy":
+            out = softmax(out, axis=1)
+        # Control for nulls in prediction
+        if np.isnan(out).sum() > 0:
+            mean_pred = np.mean(self.y_)
+            out[np.isnan(out)] = mean_pred
+        return out
+
+
+class CARTE_AblationRegressor(CARTERegressor):
+    """CARTE Ablation Regressor for Regression tasks.
+
+    This estimator is GNN-based model compatible with the CARTE pretrained model.
+    Note that this is an implementation for the ablation study of CARTE
+    
+    Parameters
+    ----------
+    ablation_method : {'exclude-edge', 'exclude-attention', 'exclude-attention-edge'}, default='exclude-edge'
+        The ablation method for CARTE Estimators.
+    loss : {'squared_error', 'absolute_error'}, default='squared_error'
+        The loss function used for backpropagation.
+    scoring : {'r2_score', 'squared_error'}, default='r2_score'
+        The scoring function used for validation.
+    num_layers : int, default=1
+        The number of layers for the NN model
+    load_pretrain : bool, default=True
+        Indicates whether to load pretrained weights or not
+    freeze_pretrain : bool, default=True
+        Indicates whether to freeze the pretrained weights in the training or not
+    learning_rate : float, default=1e-3
+        The learning rate of the model. The model uses AdamW as the optimizer
+    batch_size : int, default=16
+        The batch size used for training
+    max_epoch : int or None, default=500
+        The maximum number of epoch for training
+    dropout : float, default=0
+        The dropout rate for training
+    val_size : float, default=0.1
+        The size of the validation set used for early stopping
+    cross_validate : bool, default=False
+        Indicates whether to use cross-validation strategy for train/validation split
+    early_stopping_patience : int or None, default=40
+        The early stopping patience when early stopping is used.
+        If set to None, no early stopping is employed
+    num_model : int, default=1
+        The total number of models used for Bagging strategy
+    random_state : int or None, default=0
+        Pseudo-random number generator to control the train/validation data split
+        if early stoppingis enabled, the weight initialization, and the dropout.
+        Pass an int for reproducible output across multiple function calls.
+    n_jobs : int, default=1
+        Number of jobs to run in parallel. Training the estimator the score are parallelized
+        over the number of models.
+    device : {"cpu", "gpu"}, default="cpu",
+        The device used for the estimator.
+    disable_pbar : bool, default=True
+        Indicates whether to show progress bars for the training process.
+    """
+    def __init__(
+        self,
+        *,
+        ablation_method: str = "exclude-edge",
+        loss: str = "squared_error",
+        scoring: str = "r2_score",
+        num_layers: int = 1,
+        load_pretrain: bool = True,
+        freeze_pretrain: bool = True,
+        learning_rate: float = 1e-3,
+        batch_size: int = 16,
+        max_epoch: int = 500,
+        dropout: float = 0,
+        val_size: float = 0.2,
+        cross_validate: bool = False,
+        early_stopping_patience: Union[None, int] = 40,
+        num_model: int = 1,
+        random_state: int = 0,
+        n_jobs: int = 1,
+        device: str = "cpu",
+        disable_pbar: bool = True,
+    ):
+
+        super(CARTE_AblationRegressor, self).__init__(
+            loss=loss,
+            scoring=scoring,
+            num_layers=num_layers,
+            load_pretrain=load_pretrain,
+            freeze_pretrain=freeze_pretrain,
+            learning_rate=learning_rate,
+            batch_size=batch_size,
+            max_epoch=max_epoch,
+            dropout=dropout,
+            val_size=val_size,
+            cross_validate=cross_validate,
+            early_stopping_patience=early_stopping_patience,
+            num_model=num_model,
+            random_state=random_state,
+            n_jobs=n_jobs,
+            device=device,
+            disable_pbar=disable_pbar,
+        )
+
+        self.ablation_method = ablation_method
+
+    def _load_model(self):
+        """Load the CARTE Ablation model for training.
+
+        This loads the pretrained weights if the parameter load_pretrain is set to True.
+        The freeze of the pretrained weights are controlled by the freeze_pretrain parameter.
+
+        Returns the model depending on the ablation method that can be used for training.
+        """
+
+        # Model configuration
+        model_config = dict()
+        model_config["ablation_method"] = self.ablation_method
+        model_config["input_dim_x"] = self.X_[0].x.size(1)
+        model_config["input_dim_e"] = self.X_[0].x.size(1)
+        model_config["hidden_dim"] = self.X_[0].x.size(1)
+        model_config["ff_dim"] = self.X_[0].x.size(1)
+        model_config["num_heads"] = 12
+        model_config["num_layers"] = self.num_layers-1
+        model_config["output_dim"] = self.output_dim_
+        model_config["dropout"] = self.dropout
+
+        # Set seed for torch - for reproducibility
+        random_state = check_random_state(self.random_state)
+        model_seed = random_state.randint(10000)
+        torch.manual_seed(model_seed)
+
+        # Set model architecture
+        model = CARTE_NN_Model_Ablation(**model_config)
+
+        # Load the pretrained weights if specified
+        if self.load_pretrain:
+            dir_model = config_directory["pretrained_model"]
+            model.load_state_dict(
+                torch.load(dir_model, map_location=self.device_), strict=False
+            )
+        # Freeze the pretrained weights if specified
+        if self.freeze_pretrain:
+            for param in model.ft_base.read_out_block.parameters():
+                param.requires_grad = False
+            for param in model.ft_base.layers.parameters():
+                param.requires_grad = False
+
+        # Set architecture for ablation
+        if self.ablation_method == "exclude-edge":
+            model.ft_base.initial_e = torch.nn.Identity()
+
+        return model
+
+
+class CARTE_AblationClassifier(CARTEClassifier):
+    """CARTE Ablation Classifier for Classification tasks.
+
+    This estimator is GNN-based model compatible with the CARTE pretrained model.
+    Note that this is an implementation for the ablation study of CARTE
+    
+    Parameters
+    ----------
+    ablation_method : {'exclude-edge', 'exclude-attention', 'exclude-attention-edge'}, default='exclude-edge'
+        The ablation method for CARTE Estimators.
+    loss : {'binary_crossentropy', 'categorical_crossentropy'}, default='binary_crossentropy'
+        The loss function used for backpropagation.
+    scoring : {'auroc', 'auprc', 'binary_entropy'}, default='auroc'
+        The scoring function used for validation.
+    num_layers : int, default=1
+        The number of layers for the NN model
+    load_pretrain : bool, default=True
+        Indicates whether to load pretrained weights or not
+    freeze_pretrain : bool, default=True
+        Indicates whether to freeze the pretrained weights in the training or not
+    learning_rate : float, default=1e-3
+        The learning rate of the model. The model uses AdamW as the optimizer
+    batch_size : int, default=16
+        The batch size used for training
+    max_epoch : int or None, default=500
+        The maximum number of epoch for training
+    dropout : float, default=0
+        The dropout rate for training
+    val_size : float, default=0.1
+        The size of the validation set used for early stopping
+    cross_validate : bool, default=False
+        Indicates whether to use cross-validation strategy for train/validation split
+    early_stopping_patience : int or None, default=40
+        The early stopping patience when early stopping is used.
+        If set to None, no early stopping is employed
+    num_model : int, default=1
+        The total number of models used for Bagging strategy
+    random_state : int or None, default=0
+        Pseudo-random number generator to control the train/validation data split
+        if early stoppingis enabled, the weight initialization, and the dropout.
+        Pass an int for reproducible output across multiple function calls.
+    n_jobs : int, default=1
+        Number of jobs to run in parallel. Training the estimator the score are parallelized
+        over the number of models.
+    device : {"cpu", "gpu"}, default="cpu",
+        The device used for the estimator.
+    disable_pbar : bool, default=True
+        Indicates whether to show progress bars for the training process.
+    """
+    def __init__(
+        self,
+        *,
+        ablation_method: str = "exclude-edge",
+        loss: str = "binary_crossentropy",
+        scoring: str = "auroc",
+        num_layers: int = 1,
+        load_pretrain: bool = False,
+        freeze_pretrain: bool = False,
+        learning_rate: float = 1e-3,
+        batch_size: int = 16,
+        max_epoch: int = 500,
+        dropout: float = 0,
+        val_size: float = 0.2,
+        cross_validate: bool = False,
+        early_stopping_patience: Union[None, int] = 40,
+        num_model: int = 1,
+        random_state: int = 0,
+        n_jobs: int = 1,
+        device: str = "cpu",
+        disable_pbar: bool = True,
+    ):
+
+        super(CARTE_AblationClassifier, self).__init__(
+            loss=loss,
+            scoring=scoring,
+            num_layers=num_layers,
+            load_pretrain=load_pretrain,
+            freeze_pretrain=freeze_pretrain,
+            learning_rate=learning_rate,
+            batch_size=batch_size,
+            max_epoch=max_epoch,
+            dropout=dropout,
+            val_size=val_size,
+            cross_validate=cross_validate,
+            early_stopping_patience=early_stopping_patience,
+            num_model=num_model,
+            random_state=random_state,
+            n_jobs=n_jobs,
+            device=device,
+            disable_pbar=disable_pbar,
+        )
+
+        self.ablation_method = ablation_method
+
+    def _load_model(self):
+        """Load the CARTE Ablation model for training.
+
+        This loads the pretrained weights if the parameter load_pretrain is set to True.
+        The freeze of the pretrained weights are controlled by the freeze_pretrain parameter.
+
+        Returns the model depending on the ablation method that can be used for training.
+        """
+
+        # Model configuration
+        model_config = dict()
+        model_config["ablation_method"] = self.ablation_method
+        model_config["input_dim_x"] = self.X_[0].x.size(1)
+        model_config["input_dim_e"] = self.X_[0].x.size(1)
+        model_config["hidden_dim"] = self.X_[0].x.size(1)
+        model_config["ff_dim"] = self.X_[0].x.size(1)
+        model_config["num_heads"] = 12
+        model_config["num_layers"] = self.num_layers-1
+        model_config["output_dim"] = self.output_dim_
+        model_config["dropout"] = self.dropout
+
+        # Set seed for torch - for reproducibility
+        random_state = check_random_state(self.random_state)
+        model_seed = random_state.randint(10000)
+        torch.manual_seed(model_seed)
+
+        # Set model architecture
+        model = CARTE_NN_Model_Ablation(**model_config)
+
+        # Load the pretrained weights if specified
+        if self.load_pretrain:
+            dir_model = config_directory["pretrained_model"]
+            pretrain_model_dict = torch.load(dir_model, map_location=self.device_)
+            initial_x_keys = [
+                key for key in pretrain_model_dict.keys() if "initial_x" in key
+            ]
+            for key in initial_x_keys:
+                pretrain_model_dict[key + "_pretrain"] = pretrain_model_dict.pop(key)
+            model.load_state_dict(pretrain_model_dict, strict=False)
+
+        # Freeze the pretrained weights if specified
+        if self.freeze_pretrain:
+            for param in model.ft_base.read_out_block.parameters():
+                param.requires_grad = False
+            for param in model.ft_base.layers.parameters():
+                param.requires_grad = False
+
+        # Set architecture for ablation
+        if self.ablation_method == "exclude-edge":
+            model.ft_base.initial_e = torch.nn.Identity()
+
+        return model
diff --git a/carte/src/carte_gridsearch.py b/carte/src/carte_gridsearch.py
new file mode 100644
index 0000000..e5dad54
--- /dev/null
+++ b/carte/src/carte_gridsearch.py
@@ -0,0 +1,110 @@
+"""Custom grid search used for CARTE-GNN model"""
+
+import ast
+import copy
+import pandas as pd
+import numpy as np
+from joblib import Parallel, delayed
+from time import perf_counter
+from sklearn.model_selection import ParameterGrid
+
+
+def carte_gridsearch(
+    estimator,
+    X_train: list,
+    y_train: np.array,
+    param_distributions: dict,
+    refit: bool = True,
+    n_jobs: int = 1,
+):
+    """CARTE grid search.
+
+    This function runs grid search for CARTE GNN models.
+
+    Parameters
+    ----------
+    estimator : CARTE estimator
+        The CARTE estimator used for grid search
+    X_train : list
+        The list of graph objects for the train data transformed using Table2GraphTransformer
+    y_train : numpy array of shape (n_samples,)
+        The target variable of the train data.
+    param_distributions: dict
+        The dictionary of parameter grids to search for the optimial parameter.
+    refit: bool, default=True
+        Indicates whether to return a refitted estimator with the best parameter.
+    n_jobs: int, default=1
+        Number of jobs to run in parallel. Training the estimator in the grid search is parallelized
+        over the parameter grid.
+
+    Returns
+    -------
+    Result : Pandas DataFrame
+        The result of each parameter grid.
+    best_params : dict
+        The dictionary of best parameters obtained through grid search.
+    best_estimator : CARTEGNN estimator
+        The CARTE estimator trained using the best_params if refit is set to True.
+    """
+    # Set paramater list
+    param_distributions_ = param_distributions.copy()
+    param_list = list(ParameterGrid(param_distributions_))
+
+    # Run Gridsearch
+    gridsearch_result = Parallel(n_jobs=n_jobs)(
+        delayed(_run_search_carte)(estimator, X_train, y_train, params)
+        for params in param_list
+    )
+    gridsearch_result = pd.concat(gridsearch_result, axis=0)
+
+    # Add rank
+    rank = gridsearch_result["score"].rank(method="min").astype(int).copy()
+    rank = pd.DataFrame(rank)
+    rank.rename(columns={"score": "rank"}, inplace=True)
+    gridsearch_result = pd.concat([gridsearch_result, rank], axis=1)
+
+    # Best params
+    params_ = gridsearch_result["params"]
+    best_params_ = params_[gridsearch_result["rank"] == 1].iloc[0]
+    best_params = ast.literal_eval(best_params_)
+
+    # Refit
+    best_estimator = None
+    if refit:
+        best_estimator = copy.deepcopy(estimator)
+        best_estimator.__dict__.update(best_params)
+        best_estimator.fit(X=X_train, y=y_train)
+
+    return gridsearch_result, best_params, best_estimator
+
+
+def _run_search_carte(estimator, X_train, y_train, params):
+    """Run fit predict over a parmeter in the parameter grid."""
+    # Measure time
+    start_time = perf_counter()
+
+    # Run estimator
+    estimator_ = copy.deepcopy(estimator)
+    estimator_.__dict__.update(params)
+    estimator_.fit(X=X_train, y=y_train)
+
+    # Measure time
+    end_time = perf_counter()
+    duration = round(end_time - start_time, 4)
+
+    # Statistics
+    vl = np.array(estimator_.valid_loss_)
+
+    # Obtain results
+    result_run = {
+        f"cv-run_{i}_valid_loss": estimator_.valid_loss_[i]
+        for i in range(estimator_.num_model)
+    }
+    result_run["params"] = str(params)
+    result_run["score"] = np.mean(vl)
+    result_run["fit_time"] = duration
+
+    result_df = pd.DataFrame([result_run])
+    result_df = result_df.reindex(sorted(result_df.columns), axis=1)
+
+    return result_df
diff --git a/carte/src/carte_model.py b/carte/src/carte_model.py
new file mode 100644
index 0000000..cebcaf3
--- /dev/null
+++ b/carte/src/carte_model.py
@@ -0,0 +1,420 @@
+"""
+CARTE neural network model used for pretraining and downstream tasks.
+
+"""
+
+import math
+import torch
+import torch.nn as nn
+from typing import Tuple
+from torch import Tensor
+from torch_geometric.utils import softmax
+from torch_scatter import scatter
+
+
+## CARTE - Attention and output calculation
+def _carte_calculate_attention(
+    edge_index: Tensor, query: Tensor, key: Tensor, value: Tensor
+):
+    # Calculate the scaled-dot product attention
+    attention = torch.sum(torch.mul(query[edge_index[0], :], key), dim=1)
+    attention = attention / math.sqrt(query.size(1))
+    attention = softmax(attention, edge_index[0])
+    # Generate the output
+    src = torch.mul(attention, value.t()).t()
+    output = scatter(src, edge_index[0], dim=0, reduce="sum")
+    return output, attention
+
+
+## CARTE - output calculation with multi-head (message passing)
+def _carte_calculate_multihead_output(
+    edge_index: Tensor,
+    query: Tensor,
+    key: Tensor,
+    value: Tensor,
+    num_heads: int = 1,
+    concat: bool = True,
+):
+    if concat:
+        H, C = num_heads, query.size(1) // num_heads
+        for i in range(H):
+            O, A = _carte_calculate_attention(
+                edge_index,
+                query[:, i * C : (i + 1) * C],
+                key[:, i * C : (i + 1) * C],
+                value[:, i * C : (i + 1) * C],
+            )
+            if i == 0:
+                output, attention = O, A
+            else:
+                output = torch.cat((output, O), dim=1)
+                attention = torch.cat((attention, A), dim=0)
+    else:
+        H, C = num_heads, query.size(1)
+        for i in range(H):
+            O, A = _carte_calculate_attention(
+                edge_index,
+                query[:, i * C : (i + 1) * C],
+                key[:, i * C : (i + 1) * C],
+                value[:, i * C : (i + 1) * C],
+            )
+            if i == 0:
+                output, attention = O, A
+            else:
+                output = torch.cat((output, O), dim=0)
+                attention = torch.cat((attention, A), dim=0)
+        output = output / H
+        attention = attention / H
+    return output, attention
+
+
+## CARTE - Attention Layer
+class CARTE_Attention(nn.Module):
+    def __init__(
+        self,
+        input_dim: int,
+        output_dim: int,
+        num_heads: int = 1,
+        concat: bool = True,
+        read_out: bool = False,
+    ):
+        super(CARTE_Attention, self).__init__()
+
+        if concat:
+            assert output_dim % num_heads == 0
+            self.lin_query = nn.Linear(input_dim, output_dim, bias=False)
+            self.lin_key = nn.Linear(input_dim, output_dim, bias=False)
+            self.lin_value = nn.Linear(input_dim, output_dim, bias=False)
+        else:
+            self.lin_query = nn.Linear(input_dim, num_heads * output_dim, bias=False)
+            self.lin_key = nn.Linear(input_dim, num_heads * output_dim, bias=False)
+            self.lin_value = nn.Linear(input_dim, num_heads * output_dim, bias=False)
+
+        if read_out == False:
+            self.lin_edge = nn.Linear(input_dim, output_dim)
+
+        self.input_dim = input_dim
+        self.output_dim = output_dim
+        self.num_heads = num_heads
+        self.concat = concat
+        self.readout = read_out
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        self.lin_query.reset_parameters()
+        self.lin_key.reset_parameters()
+        self.lin_value.reset_parameters()
+        if self.readout == False:
+            self.lin_edge.reset_parameters()
+
+    def forward(
+        self,
+        x: Tensor,
+        edge_index: Tensor,
+        edge_attr: Tensor,
+        return_attention: bool = False,
+    ):
+        Z = torch.mul(edge_attr, x[edge_index[1]])
+
+        query = self.lin_query(x)
+        key = self.lin_key(Z)
+        value = self.lin_value(Z)
+
+        output, attention = _carte_calculate_multihead_output(
+            edge_index=edge_index,
+            query=query,
+            key=key,
+            value=value,
+            num_heads=self.num_heads,
+            concat=self.concat,
+        )
+
+        if self.readout == False:
+            edge_attr = self.lin_edge(edge_attr)
+
+        if return_attention:
+            return output, edge_attr, attention
+        else:
+            return output, edge_attr
+
+
+## CARTE - single encoding block
+class CARTE_Block(nn.Module):
+    def __init__(
+        self,
+        input_dim: int,
+        ff_dim: int,
+        num_heads: int = 1,
+        concat: bool = True,
+        dropout: float = 0.1,
+        read_out: bool = False,
+    ):
+        super().__init__()
+
+        # Graph Attention Layer
+        self.g_attn = CARTE_Attention(
+            input_dim, input_dim, num_heads, concat, read_out=read_out
+        )
+
+        # Two-layer MLP + Layers to apply in between the main layers for x and edges
+        self.linear_net_x = nn.Sequential(
+            nn.Linear(input_dim, ff_dim),
+            nn.Dropout(dropout),
+            nn.GELU(),
+            nn.Linear(ff_dim, input_dim),
+        )
+        self.norm1_x = nn.LayerNorm(input_dim)
+        self.norm2_x = nn.LayerNorm(input_dim)
+
+        self.read_out = read_out
+        if self.read_out == False:
+            self.linear_net_e = nn.Sequential(
+                nn.Linear(input_dim, ff_dim),
+                nn.Dropout(dropout),
+                nn.GELU(),
+                nn.Linear(ff_dim, input_dim),
+            )
+            self.norm1_e = nn.LayerNorm(input_dim)
+
+        self.dropout = nn.Dropout(dropout)
+        self.gelu = nn.GELU()
+
+    def forward(
+        self,
+        x: Tensor,
+        edge_index: Tensor,
+        edge_attr: Tensor,
+    ):
+        # Attention part
+        attn_out_x, attn_out_e = self.g_attn(x, edge_index, edge_attr)
+        x = self.dropout(attn_out_x)
+        # x = self.gelu(x)
+        # x = x + self.dropout(attn_out_x)
+        x = self.norm1_x(x)
+
+        # MLP part - Node
+        linear_out_x = self.linear_net_x(x)
+        x = self.dropout(linear_out_x)
+        # x = self.gelu(x)
+        # x = x + self.dropout(linear_out_x)
+        x = self.norm2_x(x)
+
+        # MLP part - Edge
+        if self.read_out == False:
+            edge_attr = self.linear_net_e(attn_out_e)
+            edge_attr = edge_attr + self.dropout(edge_attr)
+            edge_attr = self.norm1_e(edge_attr)
+            return x, edge_attr
+        else:
+            return x
+
+
+## CARTE - contrast block
+class CARTE_Contrast(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x: Tensor):
+        x = nn.functional.normalize(x, dim=1)
+
+        # Cosine similarity
+        x = 1 - (torch.cdist(x, x) / 2)
+
+        # RBF kernel (Gaussian similarity)
+        # sig = torch.median(torch.cdist(x, x))
+        # x = torch.exp(-(torch.cdist(x, x) / (2 * sig)))
+
+        return x
+
+
+## CARTE - finetune base block
+class CARTE_Base(nn.Module):
+    def __init__(
+        self,
+        input_dim_x: int,
+        input_dim_e: int,
+        hidden_dim: int,
+        num_layers: int,
+        **block_args
+    ):
+        super(CARTE_Base, self).__init__()
+
+        self.initial_x = nn.Sequential(
+            nn.Linear(input_dim_x, hidden_dim),
+            nn.GELU(),
+            nn.LayerNorm(hidden_dim),
+        )
+
+        self.initial_e = nn.Sequential(
+            nn.Linear(input_dim_e, hidden_dim),
+            nn.GELU(),
+            nn.LayerNorm(hidden_dim),
+        )
+
+        self.layers = nn.ModuleList(
+            [CARTE_Block(input_dim=hidden_dim, **block_args) for _ in range(num_layers)]
+        )
+
+        self.read_out_block = CARTE_Block(
+            input_dim=hidden_dim, read_out=True, **block_args
+        )
+
+    def forward(self, x, edge_index, edge_attr, return_attention=False):
+        # Initial layer for the node/edge features
+        x = self.initial_x(x)
+        edge_attr = self.initial_e(edge_attr)
+
+        for l in self.layers:
+            x, edge_attr = l(x, edge_index, edge_attr)
+
+        x = self.read_out_block(x, edge_index, edge_attr)
+
+        if return_attention:
+            attention_maps = []
+            for l in self.layers:
+                _, _, attention = l.g_attn(x, edge_index, edge_attr, return_attention)
+                attention_maps.append(attention)
+            return x, attention_maps
+        elif return_attention == False:
+            return x
+
+
+## CARTE - Pretrain Model
+class CARTE_Pretrain(nn.Module):
+    def __init__(
+        self,
+        input_dim_x: int,
+        input_dim_e: int,
+        hidden_dim: int,
+        num_layers: int,
+        **block_args
+    ):
+        super(CARTE_Pretrain, self).__init__()
+
+        self.ft_base = CARTE_Base(
+            input_dim_x=input_dim_x,
+            input_dim_e=input_dim_e,
+            hidden_dim=hidden_dim,
+            num_layers=num_layers,
+            **block_args
+        )
+
+        self.pretrain_classifier = nn.Sequential(
+            nn.Linear(hidden_dim, 4 * hidden_dim),
+            nn.GELU(),
+            nn.Linear(4 * hidden_dim, hidden_dim),
+            nn.GELU(),
+            nn.LayerNorm(hidden_dim, elementwise_affine=False),
+            CARTE_Contrast(),
+        )
+
+    def forward(self, input):
+        x, edge_index, edge_attr, head_idx = (
+            input.x.clone(),
+            input.edge_index,
+            input.edge_attr.clone(),
+            input.head_idx,
+        )
+
+        x = self.ft_base(x, edge_index, edge_attr)
+        x = x[head_idx, :]
+        x = self.pretrain_classifier(x)
+
+        return x
+
+
+## CARTE - Downstream Model
+class CARTE_NN_Model(nn.Module):
+    def __init__(
+        self,
+        input_dim_x: int,
+        input_dim_e: int,
+        hidden_dim: int,
+        output_dim: int,
+        num_layers: int,
+        **block_args
+    ):
+        super(CARTE_NN_Model, self).__init__()
+
+        self.ft_base = CARTE_Base(
+            input_dim_x=input_dim_x,
+            input_dim_e=input_dim_e,
+            hidden_dim=hidden_dim,
+            num_layers=num_layers,
+            **block_args
+        )
+
+        self.ft_classifier = nn.Sequential(
+            nn.Linear(hidden_dim, int(hidden_dim / 2)),
+            nn.ReLU(),
+            nn.LayerNorm(int(hidden_dim / 2)),
+            nn.Linear(int(hidden_dim / 2), int(hidden_dim / 4)),
+            nn.ReLU(),
+            nn.LayerNorm(int(hidden_dim / 4)),
+            nn.Linear(int(hidden_dim / 4), output_dim),
+        )
+
+    def forward(self, input):
+        x, edge_index, edge_attr, head_idx = (
+            input.x.clone(),
+            input.edge_index.clone(),
+            input.edge_attr.clone(),
+            input.ptr[:-1],
+        )
+
+        x = self.ft_base(x, edge_index, edge_attr)
+        x = x[head_idx, :]
+        x = self.ft_classifier(x)
+
+        return x
+
+
+## CARTE - Downstream Ablation model
+class CARTE_NN_Model_Ablation(nn.Module):
+    def __init__(
+        self,
+        ablation_method: str,
+        input_dim_x: int,
+        input_dim_e: int,
+        hidden_dim: int,
+        output_dim: int,
+        num_layers: int,
+        **block_args,
+    ):
+        super(CARTE_NN_Model_Ablation, self).__init__()
+
+        self.ablation_method = ablation_method
+
+        self.ft_base = CARTE_Base(
+            input_dim_x=input_dim_x,
+            input_dim_e=input_dim_e,
+            hidden_dim=hidden_dim,
+            num_layers=num_layers,
+            **block_args,
+        )
+
+        self.ft_classifier = nn.Sequential(
+            nn.Linear(hidden_dim, int(hidden_dim / 2)),
+            nn.ReLU(),
+            nn.LayerNorm(int(hidden_dim / 2)),
+            nn.Linear(int(hidden_dim / 2), int(hidden_dim / 4)),
+            nn.ReLU(),
+            nn.LayerNorm(int(hidden_dim / 4)),
+            nn.Linear(int(hidden_dim / 4), output_dim),
+        )
+
+    def forward(self, input):
+        x, edge_index, edge_attr, head_idx = (
+            input.x.clone(),
+            input.edge_index.clone(),
+            input.edge_attr.clone(),
+            input.ptr[:-1],
+        )
+
+        if "exclude-attention" not in self.ablation_method:
+            x = self.ft_base(x, edge_index, edge_attr)
+        x = x[head_idx, :]
+        x = self.ft_classifier(x)
+
+        return x
diff --git a/carte/src/carte_table_to_graph.py b/carte/src/carte_table_to_graph.py
new file mode 100644
index 0000000..87e1fce
--- /dev/null
+++ b/carte/src/carte_table_to_graph.py
@@ -0,0 +1,277 @@
+import torch
+import numpy as np
+import pandas as pd
+import fasttext
+import fasttext.util
+import gc  # Import the garbage collector module
+from typing import Union
+from torch_geometric.data import Data
+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.preprocessing import PowerTransformer
+from sklearn.pipeline import make_pipeline
+from carte.configs.directory import config_directory
+from skrub import MinHashEncoder  # change to skrub
+
+
+def _create_edge_index(num_nodes: int, edge_attr: torch.Tensor, undirected: bool = True, self_loop: bool = True):
+    """
+    Sets the edge_index and edge_attr for graphs.
+
+    Parameters
+    ----------
+    num_nodes : int
+        Number of nodes in the graph.
+    edge_attr : torch.Tensor
+        Edge attributes tensor.
+    undirected : bool, optional
+        Whether the graph is undirected, by default True.
+    self_loop : bool, optional
+        Whether to add self-loops, by default True.
+
+    Returns
+    -------
+    edge_index : torch.Tensor
+        Edge indices tensor.
+    edge_attr : torch.Tensor
+        Edge attributes tensor.
+    """
+    edge_index_ = torch.triu_indices(num_nodes, num_nodes, offset=1)
+    edge_index_ = edge_index_[:, (edge_index_[0] == 0)]
+    edge_index = edge_index_.clone()
+    edge_attr_ = edge_attr.clone()
+
+    if undirected:
+        edge_index = torch.cat((edge_index, torch.flip(edge_index, [0])))
+        edge_attr_ = torch.cat((edge_attr_, edge_attr_))
+
+    if self_loop:
+        unique_nodes = edge_index_[1].unique()
+        edge_index_self_loop = torch.stack((unique_nodes, unique_nodes))
+        edge_index = torch.cat((edge_index, edge_index_self_loop), dim=1)
+        edge_attr_ = torch.cat((edge_attr_, torch.ones(unique_nodes.size(0), edge_attr_.size(1), dtype=edge_attr_.dtype)))
+
+    return edge_index, edge_attr_
+
+
+class Table2GraphTransformer(TransformerMixin, BaseEstimator):
+    """
+    Transformer from tables to a list of graphs.
+
+    Parameters
+    ----------
+    include_edge_attr : bool, optional
+        Whether to include edge attributes, by default True.
+    lm_model : str, optional
+        Language model to use, by default "fasttext".
+    n_components : int, optional
+        Number of components for MinHash encoder, by default 300.
+    n_jobs : int, optional
+        Number of jobs for parallel processing, by default 1.
+    """
+
+    def __init__(self, *, include_edge_attr: bool = True, lm_model: str = "fasttext", n_components: int = 300, n_jobs: int = 1):
+        super().__init__()
+        self.include_edge_attr = include_edge_attr
+        self.lm_model = lm_model
+        self.n_components = n_components
+        self.n_jobs = n_jobs
+        self.is_fitted_ = False
+
+    def fit(self, X, y=None):
+        """
+        Fit function used for the Table2GraphTransformer.
+
+        Parameters
+        ----------
+        X : pandas.DataFrame
+            Input data to fit.
+        y : array-like, optional
+            Target values, by default None.
+
+        Returns
+        -------
+        self : Table2GraphTransformer
+            Fitted transformer.
+        """
+        self.y_ = y
+
+        if not hasattr(self, "lm_model_"):
+            self._load_lm_model()
+
+        cat_col_names = X.select_dtypes(include="object").columns.str.replace("\n", " ", regex=True).str.lower()
+        self.cat_col_names = list(cat_col_names)
+        num_col_names = X.select_dtypes(exclude="object").columns.str.replace("\n", " ", regex=True).str.lower()
+        self.num_col_names = list(num_col_names)
+        self.col_names = self.cat_col_names + self.num_col_names
+
+        self.num_transformer_ = PowerTransformer().set_output(transform="pandas")
+        if self.lm_model == "minhash":
+            self.name_transformer = make_pipeline(
+                MinHashEncoder(n_components=self.n_components, n_jobs=self.n_jobs),
+                PowerTransformer(),
+            )
+
+        # Ensure numerical columns exist before fitting the transformer
+        if self.num_col_names:
+            num_cols_exist = [col for col in self.num_col_names if col in X.columns]
+            if num_cols_exist:
+                self.num_transformer_.fit(X[num_cols_exist])
+
+        self.is_fitted_ = True
+        return self
+
+    def transform(self, X, y=None):
+        """
+        Apply Table2GraphTransformer to each row of the data.
+
+        Parameters
+        ----------
+        X : pandas.DataFrame
+            Input data to transform.
+        y : array-like, optional
+            Target values, by default None.
+
+        Returns
+        -------
+        data_graph : list
+            List of transformed graph objects.
+        """
+        X_ = X.replace("\n", " ", regex=True)
+        num_data = X_.shape[0]
+
+        y_ = torch.tensor(self.y_, dtype=torch.float32).reshape((num_data, 1)) if self.y_ is not None else None
+
+        X_categorical = X_.select_dtypes(include="object").copy()
+        X_categorical.columns = self.cat_col_names
+        X_numerical = X_.select_dtypes(exclude="object").copy()
+        X_numerical.columns = self.num_col_names
+
+        cat_names = pd.melt(X_categorical)["value"].dropna().astype(str).str.lower().unique()
+        names_total = np.unique(np.hstack([self.col_names, cat_names]))
+        name_dict = {name: idx for idx, name in enumerate(names_total)}
+
+        name_attr_total = self._transform_names(names_total)
+        if self.num_col_names:
+            num_cols_exist = [col for col in self.num_col_names if col in X.columns]
+            if num_cols_exist:
+                X_numerical = self._transform_numerical(X_numerical[num_cols_exist])
+
+        data_graph = [
+            self._graph_construct(X_categorical.iloc[idx], X_numerical.iloc[idx], name_attr_total, name_dict, y_, idx)
+            for idx in range(num_data)
+        ]
+
+        self.y_ = None
+
+        # Manually trigger garbage collection after transforming data
+        gc.collect()
+
+        return data_graph
+
+    def _load_lm_model(self):
+        """
+        Load the language model for features of nodes and edges.
+        """
+        if self.lm_model == "fasttext":
+            self.lm_model_ = fasttext.load_model(config_directory["fasttext"])
+            if self.n_components != 300:
+                fasttext.util.reduce_model(self.lm_model_, self.n_components)
+        elif self.lm_model == "minhash":
+            self.lm_model_ = MinHashEncoder(n_components=self.n_components, n_jobs=self.n_jobs)
+
+    def _transform_numerical(self, X):
+        """
+        Transform numerical columns using power transformer.
+
+        Parameters
+        ----------
+        X : pandas.DataFrame
+            Input numerical data.
+
+        Returns
+        -------
+        transformed_X : pandas.DataFrame
+            Transformed numerical data.
+        """
+        return self.num_transformer_.transform(X)
+
+    def _transform_names(self, names_total):
+        """
+        Obtain the feature for a given list of string values.
+
+        Parameters
+        ----------
+        names_total : array-like
+            List of string values.
+
+        Returns
+        -------
+        name_features : np.ndarray
+            Transformed features for names.
+        """
+        if self.lm_model == "fasttext":
+            return np.array([self.lm_model_.get_sentence_vector(name) for name in names_total], dtype=np.float32)
+        elif self.lm_model == "minhash":
+            return self.name_transformer.fit_transform(names_total.reshape(-1, 1)).astype(np.float32)
+
+    def _graph_construct(self, data_cat, data_num, name_attr_total, name_dict, y, idx):
+        """
+        Transform to graph objects.
+
+        Parameters
+        ----------
+        data_cat : pandas.Series
+            Categorical data for a single instance.
+        data_num : pandas.Series
+            Numerical data for a single instance.
+        name_attr_total : np.ndarray
+            Transformed features for names.
+        name_dict : dict
+            Dictionary mapping names to indices.
+        y : torch.Tensor or None
+            Target values.
+        idx : int
+            Index of the instance.
+
+        Returns
+        -------
+        data : torch_geometric.data.Data
+            Graph data object.
+        """
+        data_cat = data_cat.dropna().str.lower()
+        data_num = data_num.dropna()
+        num_cat = len(data_cat)
+        num_num = len(data_num)
+
+        edge_attr_cat = np.array([name_attr_total[name_dict[col]] for col in data_cat.index], dtype=np.float32)
+        edge_attr_num = np.array([name_attr_total[name_dict[col]] for col in data_num.index], dtype=np.float32)
+
+        x_cat = torch.tensor(np.array([name_attr_total[name_dict[val]] for val in data_cat]), dtype=torch.float32)
+        x_num = torch.tensor(data_num.values[:, None] * edge_attr_num, dtype=torch.float32)
+
+        if x_cat.size(0) == 0:
+            x_cat = torch.empty((0, self.n_components), dtype=torch.float32)
+            edge_attr_cat = torch.empty((0, self.n_components), dtype=torch.float32)
+        if x_num.size(0) == 0:
+            x_num = torch.empty((0, self.n_components), dtype=torch.float32)
+            edge_attr_num = torch.empty((0, self.n_components), dtype=torch.float32)
+
+        x = torch.cat((x_cat, x_num))
+        x = torch.cat((torch.ones((1, x.size(1))), x))
+        edge_attr = torch.tensor(np.vstack((edge_attr_cat, edge_attr_num)), dtype=torch.float32)
+
+        num_nodes = num_cat + num_num + 1
+        edge_index, edge_attr = _create_edge_index(num_nodes, edge_attr, False, True)
+
+        Z = torch.mul(edge_attr, x[edge_index[1]])
+        x[0, :] = Z[edge_index[0] == 0].mean(dim=0)
+
+        y_ = y[idx].clone() if y is not None else torch.tensor([])
+
+        return Data(
+            x=x,
+            edge_index=edge_index,
+            edge_attr=edge_attr,
+            y=y_,
+            g_idx=idx,
+        )
diff --git a/carte/src/evaluate_utils.py b/carte/src/evaluate_utils.py
new file mode 100644
index 0000000..99f6743
--- /dev/null
+++ b/carte/src/evaluate_utils.py
@@ -0,0 +1,145 @@
+
+import numpy as np
+import pandas as pd
+
+from ast import literal_eval
+from sklearn.metrics import (
+    r2_score,
+    root_mean_squared_error,
+    roc_auc_score,
+    average_precision_score,
+)
+from sklearn.model_selection import GroupShuffleSplit
+from carte.configs.directory import config_directory
+
+
+def set_split(data, data_config, num_train, random_state):
+    """Set train/test split given the random state."""
+
+    target_name = data_config["target_name"]
+    X = data.drop(columns=target_name)
+    y = data[target_name]
+    y = np.array(y)
+
+    if data_config["repeated"]:
+        entity_name = data_config["entity_name"]
+    else:
+        entity_name = np.arange(len(y))
+
+    groups = np.array(data.groupby(entity_name).ngroup())
+    num_groups = len(np.unique(groups))
+    gss = GroupShuffleSplit(
+        n_splits=1,
+        test_size=int(num_groups - num_train),
+        random_state=random_state,
+    )
+    idx_train, idx_test = next(iter(gss.split(X=y, groups=groups)))
+
+    X_train, X_test = X.iloc[idx_train], X.iloc[idx_test]
+    y_train, y_test = y[idx_train], y[idx_test]
+    return X_train, X_test, y_train, y_test
+
+
+def extract_best_params(data_name, method, num_train, random_state):
+    """Extract the best parameters in the CARTE paper."""
+
+    if "tabpfn" in method:
+        return dict()
+    else:
+        # Load compiled log
+        df_log_dir = f"{config_directory['results']}/compiled_results/results_carte_baseline_bestparams.csv"
+        df_log = pd.read_csv(df_log_dir)
+
+        # Obtain the mask
+        mask = df_log["data_name"] != data_name
+        mask += df_log["model"] != method
+        mask += df_log["num_train"] != num_train
+        mask += df_log["random_state"] != random_state
+
+        # Extract the best paramameters
+        best_params_ = df_log["best_param"].copy()
+        best_params = literal_eval(best_params_[~mask].iloc[0])
+        return best_params
+
+
+def set_score_criterion(task):
+    """Set scoring method for CV and score criterion in final result."""
+
+    if task == "regression":
+        scoring = "r2"
+        score_criterion = ["r2", "rmse"]
+    else:
+        scoring = "roc_auc"
+        score_criterion = ["roc_auc", "avg_precision"]
+    score_criterion += ["preprocess_time"]
+    score_criterion += ["inference_time"]
+    score_criterion += ["run_time"]
+    return scoring, score_criterion
+
+def shorten_param(param_name):
+    """Shorten the param_names for column names in search results."""
+
+    if "__" in param_name:
+        return param_name.rsplit("__", 1)[1]
+    return param_name
+
+
+def check_pred_output(y_train, y_pred):
+    """Set the output as the mean of train data if it is nan."""
+
+    if np.isnan(y_pred).sum() > 0:
+        mean_pred = np.mean(y_train)
+        y_pred[np.isnan(y_pred)] = mean_pred
+    return y_pred
+
+
+def reshape_pred_output(y_pred):
+    """Reshape the predictive output accordingly."""
+
+    num_pred = len(y_pred)
+    if y_pred.shape == (num_pred, 2):
+        y_pred = y_pred[:, 1]
+    elif y_pred.shape == (num_pred, 1):
+        y_pred = y_pred.ravel()
+    else:
+        pass
+    return y_pred
+
+
+def set_score_criterion(task):
+    """Set scoring method for CV and score criterion in final result."""
+
+    if task == "regression":
+        scoring = "r2"
+        score_criterion = ["r2", "rmse"]
+    else:
+        scoring = "roc_auc"
+        score_criterion = ["roc_auc", "avg_precision"]
+    score_criterion += ["preprocess_time"]
+    score_criterion += ["inference_time"]
+    score_criterion += ["run_time"]
+    return scoring, score_criterion
+
+
+def return_score(y_target, y_pred, task):
+    """Return score results for given task."""
+
+    if task == "regression":
+        score_r2 = r2_score(y_target, y_pred)
+        score_rmse = root_mean_squared_error(y_target, y_pred)
+        return score_r2, score_rmse
+    else:
+        score_auc = roc_auc_score(y_target, y_pred)
+        score_avg_precision = average_precision_score(y_target, y_pred)
+        return score_auc, score_avg_precision
+
+
+def col_names_per_type(data, target_name):
+    """Extract column names per type."""
+    num_col_names = data.select_dtypes(exclude="object").columns.tolist()
+    if target_name in num_col_names:
+        num_col_names.remove(target_name)
+    cat_col_names = data.select_dtypes(include="object").columns.tolist()
+    if target_name in cat_col_names:
+        cat_col_names.remove(target_name)
+    return num_col_names, cat_col_names
\ No newline at end of file
diff --git a/carte/src/preprocess_utils.py b/carte/src/preprocess_utils.py
new file mode 100644
index 0000000..f22d823
--- /dev/null
+++ b/carte/src/preprocess_utils.py
@@ -0,0 +1,162 @@
+""" Functions used for preprocessing the data. """
+
+import numpy as np
+import pandas as pd
+from carte.configs.directory import config_directory
+
+
+def _clean_entity_names(data_entity_name):
+    data_entity_name = (
+        data_entity_name.str.replace("<", "")
+        .str.replace(">", "")
+        .str.replace("\n", "")
+        .str.replace("_", " ")
+        .str.lower()
+    )
+    return data_entity_name
+
+
+def _serialize_instance(data):
+    data_temp = data.copy()
+    data_temp = data_temp.dropna()  # Exclude cells with Null values
+    data_temp = _clean_entity_names(data_temp)
+    serialization = np.array(data_temp.index) + " " + np.array(data_temp) + ". "
+    sentence = ""
+    for i in range(len(data_temp)):
+        sentence += serialization[i]
+    sentence = sentence[:-1]
+    return sentence
+
+
+def extract_fasttext_features(data: pd.DataFrame, extract_col_name: str):
+    import fasttext
+
+    # Preliminary Settings
+    lm_model = fasttext.load_model(config_directory["fasttext"])
+
+    # Original data
+    data_ = data.copy()
+    data_.replace("\n", " ", regex=True, inplace=True)
+    data_ = data.copy()
+
+    # Entity Names
+    ent_names = _clean_entity_names(data[extract_col_name])
+    ent_names = list(ent_names)
+
+    # Data Fasttext for entity names
+    data_fasttext = [lm_model.get_sentence_vector(str(x)) for x in ent_names]
+    data_fasttext = np.array(data_fasttext)
+    data_fasttext = pd.DataFrame(data_fasttext)
+    col_names = [f"X{i}" for i in range(data_fasttext.shape[1])]
+    data_fasttext = data_fasttext.set_axis(col_names, axis="columns")
+    data_fasttext = pd.concat([data_fasttext, data[extract_col_name]], axis=1)
+    # data_fasttext.drop_duplicates(inplace=True)
+    data_fasttext = data_fasttext.reset_index(drop=True)
+
+    return data_fasttext
+
+
+def extract_llm_features(
+    data: pd.DataFrame,
+    extract_col_name: str,
+    device: str = "cuda:0",
+):
+    # Load LLM Model
+    from sentence_transformers import SentenceTransformer
+
+    lm_model = SentenceTransformer("intfloat/e5-large-v2", device=device)
+
+    # Original data
+    data_ = data.copy()
+    data_.replace("\n", " ", regex=True, inplace=True)
+
+    # Entity Names
+    ent_names = _clean_entity_names(data_[extract_col_name].copy())
+    ent_names = ent_names.astype(str)
+    ent_names = (
+        "query: " + ent_names
+    )  # following the outlined procedure using "query: "
+    ent_names = list(ent_names)
+
+    # Data for entity names
+    embedding = lm_model.encode(ent_names, convert_to_numpy=True)
+    embedding = pd.DataFrame(embedding)
+    col_names = [f"X{i}" for i in range(embedding.shape[1])]
+    embedding = embedding.set_axis(col_names, axis="columns")
+    embedding = pd.concat([embedding, data[extract_col_name]], axis=1)
+    # data_fasttext.drop_duplicates(inplace=True)
+    embedding = embedding.reset_index(drop=True)
+
+    return embedding
+
+
+def extract_ken_features(
+    data: pd.DataFrame,
+    extract_col_name: str,
+):
+    # KEN embeddings
+    ken_emb = pd.read_parquet(config_directory["ken_embed"])
+    ken_ent = ken_emb["Entity"].str.lower()
+    ken_embed_ent2idx = {ken_ent[i]: i for i in range(len(ken_emb))}
+
+    # Original data
+    data_ = data.copy()
+    data_.replace("\n", " ", regex=True, inplace=True)
+    data_ = data.copy()
+    data_[extract_col_name] = data_[extract_col_name].str.lower()
+
+    # Mapping
+    mapping = data_[extract_col_name].map(ken_embed_ent2idx)
+    mapping = mapping.dropna()
+    mapping = mapping.astype(np.int64)
+    mapping = np.array(mapping)
+
+    # KEN data
+    data_ken = ken_emb.iloc[mapping]
+    data_ken.rename(columns={"Entity": "name"}, inplace=True)
+    data_ken.drop_duplicates(inplace=True)
+    data_ken = data_ken.reset_index(drop=True)
+
+    return data_ken
+
+
+def table2llmfeatures(
+    data: pd.DataFrame,
+    embed_numeric: bool,
+    device: str = "cuda:0",
+):
+    # Load LLM Model
+    from sentence_transformers import SentenceTransformer
+
+    lm_model = SentenceTransformer("intfloat/e5-large-v2", device=device)
+
+    # Preprocessing for the strings (subject to specifics of the data)
+    data = data.replace("\n", " ", regex=True)
+    num_data = len(data)
+    data_x = data.copy()
+
+    if embed_numeric:
+        num_cols = data_x.select_dtypes(exclude="object").columns.tolist()
+        data_x[num_cols] = data_x[num_cols].astype("str")
+
+    data_x_cat = data_x.select_dtypes(include="object")
+    data_x_num = data_x.select_dtypes(exclude="object")
+
+    sentences = []
+    for idx in range(num_data):
+        data_ = data_x_cat.iloc[idx]
+        sentence = _serialize_instance(data_)
+        sentence = (
+            "query: " + sentence
+        )  # following the outlined procedure using "query: "
+        sentences.append(sentence)
+
+    X_categorical = lm_model.encode(sentences, convert_to_numpy=True)
+    X_categorical = pd.DataFrame(X_categorical)
+
+    col_names = [f"X{i}" for i in range(X_categorical.shape[1])]
+    X_categorical = X_categorical.set_axis(col_names, axis="columns")
+
+    data_total = pd.concat([X_categorical, data_x_num], axis=1)
+
+    return data_total
diff --git a/carte/src/visualization_utils.py b/carte/src/visualization_utils.py
new file mode 100644
index 0000000..9dcf71a
--- /dev/null
+++ b/carte/src/visualization_utils.py
@@ -0,0 +1,488 @@
+"""
+Functions that can be utilized for visualization. 
+For Critical difference diagram, it modifies some of the codes from scikit-posthocs.
+"""
+
+import pandas as pd
+import numpy as np
+from typing import Union, List, Tuple, Dict, Set
+from matplotlib import colors
+from matplotlib.axes import SubplotBase
+from matplotlib.colorbar import ColorbarBase, Colorbar
+from matplotlib.colors import ListedColormap
+from matplotlib import pyplot
+from pandas import DataFrame, Series
+from seaborn import heatmap
+from carte.configs.carte_configs import carte_singletable_baseline_mapping
+from carte.configs.directory import config_directory
+
+
+# Normalization function of the results
+def _normalize(group):
+    min_score = group["score"].min()
+    max_score = group["score"].max()
+    group["normalized_score"] = (group["score"] - min_score) / (max_score - min_score)
+    return group
+
+
+# Prepare dataframe suitable for the learning curves
+def prepare_result(task, models="all", rank_at=2048):
+
+    # load result
+    result_dir = f"{config_directory['results']}/compiled_results/results_carte_baseline_singletable.csv"
+    df_score = pd.read_csv(result_dir)
+
+    # control for not important values
+    mask_cls = df_score["task"] == "classification"
+    temp = df_score["score"].copy()
+    temp[np.logical_and(mask_cls, temp < 0.5)] = 0.5
+    temp[np.logical_and(~mask_cls, temp < 0)] = 0
+    df_score["score"] = temp
+
+    # select results based on task
+    mask = df_score["task"] == task
+    df_score = df_score[mask].reset_index(drop=True)
+
+    # select result with model of interest
+    if models == "all":
+        pass
+    else:
+        mask = df_score["model"].isin(models)
+        df_score = df_score[mask]
+        df_score.reset_index(drop=True, inplace=True)
+
+    # Change the names of models for clarity
+    temp = df_score["model"].copy()
+    for key in carte_singletable_baseline_mapping:
+        temp = temp.str.replace(key, carte_singletable_baseline_mapping[key])
+    df_score["model"] = temp.copy()
+
+    # Apply normalization on scores
+    df_normalized = df_score.groupby(["data_name"], group_keys=True).apply(_normalize)
+    df_normalized.reset_index(drop=True, inplace=True)
+
+    # Ranking
+    if rank_at == "all":
+        temp = df_normalized["num_train"].astype(float)
+        mask = temp <= max(temp)
+        df_normalized_ = df_normalized[mask].copy()
+        avg_rank = (
+            df_normalized_.groupby("model")
+            .normalized_score.mean()
+            .rank(ascending=False)
+        )
+        avg_rank = avg_rank.sort_values()
+        rank_order = avg_rank.index.tolist()
+    else:
+        mask = df_normalized["num_train"] == rank_at
+        df_normalized_ = df_normalized[mask].copy()
+        avg_rank = (
+            df_normalized_.groupby("model")
+            .normalized_score.mean()
+            .rank(ascending=False)
+        )
+        avg_rank = avg_rank.sort_values()
+        rank_order = avg_rank.index.tolist()
+
+    df_normalized = df_normalized.sort_values(by="num_train", ascending=True)
+    df_normalized["num_train"] = df_normalized["num_train"].astype(str)
+    df_normalized.reset_index(drop=True, inplace=True)
+
+    return df_normalized, rank_order
+
+
+# Generate dataframe suitable for creating critical difference diagram
+def generate_df_cdd(df_normalized, train_size="all"):
+
+    # Set the base df
+    df_cdd = df_normalized.copy()
+    df_cdd["case"] = (
+        df_normalized["data_name"]
+        + "_"
+        + df_normalized["num_train"].astype(str)
+        + "_"
+        + df_normalized["random_state"].astype(str)
+    )
+
+    # select the train_size for comparison
+    if train_size == "all":
+        return df_cdd
+    else:
+        mask = df_cdd["num_train"].str.contains(f"{train_size}")
+        df_cdd = df_cdd[mask].copy()
+        df_cdd.reset_index(drop=True, inplace=True)
+        return df_cdd
+
+
+# Sign array for scikit-posthoc
+def sign_array(p_values: Union[List, np.ndarray], alpha: float = 0.05) -> np.ndarray:
+
+    p_values = np.array(p_values)
+    p_values[p_values > alpha] = 0
+    p_values[(p_values < alpha) & (p_values > 0)] = 1
+    np.fill_diagonal(p_values, 1)
+
+    return p_values
+
+
+# Sign table for scikit-posthoc
+def sign_table(
+    p_values: Union[List, np.ndarray, DataFrame], lower: bool = True, upper: bool = True
+) -> Union[DataFrame, np.ndarray]:
+
+    if not any([lower, upper]):
+        raise ValueError("Either lower or upper triangle must be returned")
+
+    pv = (
+        DataFrame(p_values, copy=True)
+        if not isinstance(p_values, DataFrame)
+        else p_values.copy()
+    )
+
+    ns = pv > 0.05
+    three = (pv < 0.001) & (pv >= 0)
+    two = (pv < 0.01) & (pv >= 0.001)
+    one = (pv < 0.05) & (pv >= 0.01)
+
+    pv = pv.astype(str)
+    pv[ns] = "NS"
+    pv[three] = "***"
+    pv[two] = "**"
+    pv[one] = "*"
+
+    np.fill_diagonal(pv.values, "-")
+    if not lower:
+        pv.values[np.tril_indices(pv.shape[0], -1)] = ""
+    elif not upper:
+        pv.values[np.triu_indices(pv.shape[0], 1)] = ""
+
+    return pv
+
+
+# Sign plot for scikit-posthoc
+def sign_plot(
+    x: Union[List, np.ndarray, DataFrame],
+    g: Union[List, np.ndarray] = None,
+    flat: bool = False,
+    labels: bool = True,
+    cmap: List = None,
+    cbar_ax_bbox: List = None,
+    ax: SubplotBase = None,
+    **kwargs,
+) -> Union[SubplotBase, Tuple[SubplotBase, Colorbar]]:
+
+    for key in ["cbar", "vmin", "vmax", "center"]:
+        if key in kwargs:
+            del kwargs[key]
+
+    if isinstance(x, DataFrame):
+        df = x.copy()
+    else:
+        x = np.array(x)
+        g = g or np.arange(x.shape[0])
+        df = DataFrame(np.copy(x), index=g, columns=g)
+
+    dtype = df.values.dtype
+
+    if not np.issubdtype(dtype, np.integer) and flat:
+        raise ValueError("X should be a sign_array or DataFrame of integers")
+    elif not np.issubdtype(dtype, np.floating) and not flat:
+        raise ValueError("X should be an array or DataFrame of float p values")
+
+    if not cmap and flat:
+        # format: diagonal, non-significant, significant
+        cmap = ["1", "#fbd7d4", "#1a9641"]
+    elif not cmap and not flat:
+        # format: diagonal, non-significant, p<0.001, p<0.01, p<0.05
+        cmap = ["1", "#fbd7d4", "#005a32", "#238b45", "#a1d99b"]
+
+    if flat:
+        np.fill_diagonal(df.values, -1)
+        hax = heatmap(
+            df, vmin=-1, vmax=1, cmap=ListedColormap(cmap), cbar=False, ax=ax, **kwargs
+        )
+        if not labels:
+            hax.set_xlabel("")
+            hax.set_ylabel("")
+        return hax
+
+    else:
+        df[(x < 0.001) & (x >= 0)] = 1
+        df[(x < 0.01) & (x >= 0.001)] = 2
+        df[(x < 0.05) & (x >= 0.01)] = 3
+        df[(x >= 0.05)] = 0
+
+        np.fill_diagonal(df.values, -1)
+
+        if len(cmap) != 5:
+            raise ValueError("Cmap list must contain 5 items")
+
+        hax = heatmap(
+            df,
+            vmin=-1,
+            vmax=3,
+            cmap=ListedColormap(cmap),
+            center=1,
+            cbar=False,
+            ax=ax,
+            **kwargs,
+        )
+        if not labels:
+            hax.set_xlabel("")
+            hax.set_ylabel("")
+
+        cbar_ax = hax.figure.add_axes(cbar_ax_bbox or [0.95, 0.35, 0.04, 0.3])
+        cbar = ColorbarBase(
+            cbar_ax,
+            cmap=(ListedColormap(cmap[2:] + [cmap[1]])),
+            norm=colors.NoNorm(),
+            boundaries=[0, 1, 2, 3, 4],
+        )
+        cbar.set_ticks(
+            list(np.linspace(0, 3, 4)),
+            labels=["p < 0.001", "p < 0.01", "p < 0.05", "NS"],
+        )
+
+        cbar.outline.set_linewidth(1)
+        cbar.outline.set_edgecolor("0.5")
+        cbar.ax.tick_params(size=0)
+
+        return hax, cbar
+
+
+def _find_maximal_cliques(adj_matrix: DataFrame) -> List[Set]:
+
+    if (adj_matrix.index != adj_matrix.columns).any():
+        raise ValueError("adj_matrix must be symmetric, indices do not match")
+    if not adj_matrix.isin((0, 1)).values.all():
+        raise ValueError("Input matrix must be binary")
+    if adj_matrix.empty or not (adj_matrix.T == adj_matrix).values.all():
+        raise ValueError("Input matrix must be non-empty and symmetric")
+
+    result = []
+    _bron_kerbosch(
+        current_clique=set(),
+        candidates=set(adj_matrix.index),
+        visited=set(),
+        adj_matrix=adj_matrix,
+        result=result,
+    )
+    return result
+
+
+def _bron_kerbosch(
+    current_clique: Set,
+    candidates: Set,
+    visited: Set,
+    adj_matrix: DataFrame,
+    result: List[Set],
+) -> None:
+
+    while candidates:
+        v = candidates.pop()
+        _bron_kerbosch(
+            current_clique | {v},
+            # Restrict candidate vertices to the neighbors of v
+            {n for n in candidates if adj_matrix.loc[v, n]},
+            # Restrict visited vertices to the neighbors of v
+            {n for n in visited if adj_matrix.loc[v, n]},
+            adj_matrix,
+            result,
+        )
+        visited.add(v)
+
+    # We do not need to report a clique if a children call aready did it.
+    if not visited:
+        # If this is not a terminal call, i.e. if any clique was reported.
+        result.append(current_clique)
+
+
+def critical_difference_diagram(
+    ranks: Union[dict, Series],
+    sig_matrix: DataFrame,
+    *,
+    ax: SubplotBase = None,
+    label_fmt_left: str = "{label} ({rank:.2g})",
+    label_fmt_right: str = "({rank:.2g}) {label}",
+    label_props: dict = None,
+    marker_props: dict = None,
+    elbow_props: dict = None,
+    crossbar_props: dict = None,
+    color_palette: Union[Dict[str, str], List] = {},
+    line_style: Union[Dict[str, str], List] = {},
+    text_h_margin: float = 0.01,
+) -> Dict[str, list]:
+
+    ## check color_palette consistency
+    if len(color_palette) == 0:
+        pass
+    elif isinstance(color_palette, Dict) and (
+        (len(set(ranks.keys()) & set(color_palette.keys()))) == len(ranks)
+    ):
+        pass
+    elif isinstance(color_palette, List) and (len(ranks) <= len(color_palette)):
+        pass
+    else:
+        raise ValueError(
+            "color_palette keys are not consistent, or list size too small"
+        )
+
+    elbow_props = elbow_props or {}
+    marker_props = {"zorder": 3, **(marker_props or {})}
+    label_props = {"va": "center", **(label_props or {})}
+    crossbar_props = {
+        "color": "k",
+        "zorder": 3,
+        "linewidth": 2,
+        **(crossbar_props or {}),
+    }
+
+    ax = ax or pyplot.gca()
+    ax.yaxis.set_visible(False)
+    ax.spines["right"].set_visible(False)
+    ax.spines["left"].set_visible(False)
+    ax.spines["bottom"].set_visible(False)
+    ax.xaxis.set_ticks_position("top")
+    ax.spines["top"].set_position("zero")
+
+    # lists of artists to be returned
+    markers = []
+    elbows = []
+    labels = []
+    crossbars = []
+
+    # True if pairwise comparison is NOT significant
+    adj_matrix = DataFrame(
+        1 - sign_array(sig_matrix),
+        index=sig_matrix.index,
+        columns=sig_matrix.columns,
+        dtype=bool,
+    )
+
+    ranks = Series(ranks)  # Standardize if ranks is dict
+    points_left, points_right = np.array_split(ranks.sort_values(), 2)
+
+    # Sets of points under the same crossbar
+    crossbar_sets = _find_maximal_cliques(adj_matrix)
+
+    # Sort by lowest rank and filter single-valued sets
+    crossbar_sets = sorted(
+        (x for x in crossbar_sets if len(x) > 1), key=lambda x: ranks[list(x)].min()
+    )
+
+    # Create stacking of crossbars: for each level, try to fit the crossbar,
+    # so that it does not intersect with any other in the level. If it does not
+    # fit in any level, create a new level for it.
+    crossbar_levels: list[list[set]] = []
+    for bar in crossbar_sets:
+        for level, bars_in_level in enumerate(crossbar_levels):
+            if not any(bool(bar & bar_in_lvl) for bar_in_lvl in bars_in_level):
+                ypos = -level - 1
+                bars_in_level.append(bar)
+                break
+        else:
+            ypos = -len(crossbar_levels) - 1
+            crossbar_levels.append([bar])
+
+        crossbars.append(
+            ax.plot(
+                # Adding a separate line between each pair enables showing a
+                # marker over each elbow with crossbar_props={'marker': 'o'}.
+                [ranks[i] for i in bar],
+                [ypos] * len(bar),
+                **crossbar_props,
+            )
+        )
+
+    lowest_crossbar_ypos = -len(crossbar_levels)
+
+    # def _change_label(label):
+    #     label_ = label.split("-")
+    #     label_ = [rf"$\bf{x}$" for x in label_]
+    #     label_ = ("-").join(label_)
+    #     return label_
+
+    def _change_label(label):
+        label_temp = label.split("-")
+        label_ = []
+        for x in label_temp:
+            if len(x.split(" ")) != 1:
+                temp = x.split(" ")
+                temp = (" ").join([r"$\bf\{" + f"{x}" + r"}$" for x in temp])
+                label_.append(temp)
+            else:
+                label_.append(r"$\bf\{" + f"{x}" + r"}$")
+        label_ = ("-").join(label_)
+        label_ = label_.replace("\\{", "{")
+        return label_
+
+    def plot_items(points, xpos, label_fmt, color_palette, line_style, label_props):
+        """Plot each marker + elbow + label."""
+        ypos = lowest_crossbar_ypos - 1
+        for idx, (label, rank) in enumerate(points.items()):
+            if len(color_palette) == 0:
+                elbow, *_ = ax.plot(
+                    [xpos, rank, rank],
+                    [ypos, ypos, 0],
+                    **elbow_props,
+                )
+                label_ = label
+            else:
+                elbow, *_ = ax.plot(
+                    [xpos, rank, rank],
+                    [ypos, ypos, 0],
+                    c=(
+                        color_palette[label]
+                        if isinstance(color_palette, Dict)
+                        else color_palette[idx]
+                    ),
+                    ls=(
+                        line_style[label]
+                        if isinstance(line_style, Dict)
+                        else line_style[idx]
+                    ),
+                    **elbow_props,
+                )
+                if color_palette[label] != "black":  # darkgrey black
+                    label_ = _change_label(label)
+                else:
+                    label_ = label
+            elbows.append(elbow)
+            curr_color = elbow.get_color()
+            markers.append(ax.scatter(rank, 0, **{"color": curr_color, **marker_props}))
+            labels.append(
+                ax.text(
+                    xpos,
+                    ypos,
+                    label_fmt.format(label=label_, rank=-1 * rank),
+                    **{"color": curr_color, **label_props},
+                )
+            )
+            ypos -= 1.5
+
+    plot_items(
+        points_left,
+        xpos=points_left.iloc[0] - text_h_margin,
+        label_fmt=label_fmt_left,
+        color_palette=color_palette,
+        line_style=line_style,
+        label_props={
+            "ha": "right",
+            **label_props,
+        },
+    )
+    plot_items(
+        points_right[::-1],
+        xpos=points_right.iloc[-1] + text_h_margin,
+        label_fmt=label_fmt_right,
+        color_palette=color_palette,
+        line_style=line_style,
+        label_props={"ha": "left", **label_props},
+    )
+
+    return {
+        "markers": markers,
+        "elbows": elbows,
+        "labels": labels,
+        "crossbars": crossbars,
+    }
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..58193a9
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,53 @@
+"""The setup script."""
+
+from setuptools import setup, find_packages
+
+with open('History.rst') as history_file:
+    history = history_file.read()
+
+requirements = []
+test_requirements = []
+
+setup(
+    author="""Myung Jun Kim, Léo Grinsztajn, Gaël Varoquaux""",
+    author_email='test@gmail.com',
+    python_requires='>=3.10.12',
+    classifiers=[
+        'Development Status :: 2 - Pre-Alpha',
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: MIT License',
+        'Environment :: Console',
+        'Operating System :: OS Independent',
+        'Operating System :: POSIX :: Linux',
+        'Operating System :: MacOS',
+        'Operating System :: POSIX',
+        'Operating System :: Microsoft :: Windows',
+        'Natural Language :: English',
+        'Programming Language :: Python :: 3.10',
+        'Programming Language :: Python :: 3.11',
+    ],
+    description="""Pretrained deep-learning models are the go-to solution for images or text. However, for tabular data the standard is still to train tree-based models.
+      Indeed, transfer learning on tables hits the challenge of data integration: finding correspondences,
+       correspondences in the entries (entity matching) where different words may denote the same entity, correspondences across columns (schema matching),
+        which may come in different orders, names... We propose a neural architecture that does not need such correspondences.
+         As a result, we can pretrain it on background data that has not been matched. 
+         The architecture -- CARTE for Context Aware Representation of Table Entries -- uses a graph representation of tabular (or relational) data to process tables with different columns,
+        string embedding of entries and columns names to model an open vocabulary, and a graph-attentional network to contextualize entries with column names and neighboring entries.
+         An extensive benchmark shows that CARTE facilitates learning, outperforming a solid set of baselines including the best tree-based models.
+           CARTE also enables joint learning across tables with unmatched columns, enhancing a small table with bigger ones. CARTE opens the door to large pretrained models for tabular data.""",
+    install_requires=["numpy", "pandas", "scipy", "scikit-learn", "skrub","torch","torch-geometric","torcheval","torch_scatter"],
+    license="MIT license",
+    keywords='carte',
+    name='carte',
+    packages=find_packages(include=['carte', 'carte.*']),
+    include_package_data=True,
+    #package_data = {
+    #    '': ['*.csv'],
+    #    'carte': ['data/data_singletable/',"data/etc"],
+    #},
+    test_suite='tests',
+    tests_require=test_requirements,
+    url='https://github.com/soda-inria/carte',
+    version='0.0.9',
+    zip_safe=False,
+)
\ No newline at end of file
diff --git a/src/carte_table_to_graph.py b/src/carte_table_to_graph.py
index 3fbcacd..c3d4a88 100644
--- a/src/carte_table_to_graph.py
+++ b/src/carte_table_to_graph.py
@@ -1,9 +1,9 @@
-"""The Table2GraphTransformer Class"""
-
 import torch
 import numpy as np
 import pandas as pd
 import fasttext
+import fasttext.util
+import gc  # Import the garbage collector module
 from typing import Union
 from torch_geometric.data import Data
 from sklearn.base import BaseEstimator, TransformerMixin
@@ -13,102 +13,97 @@
 from skrub import MinHashEncoder  # change to skrub
 
 
-def _create_edge_index(
-    num_nodes: int,
-    edge_attr: torch.tensor,
-    undirected: bool = True,
-    self_loop: bool = True,
-):
-    """Sets the edge_index and edge_attr for graphs."""
+def _create_edge_index(num_nodes: int, edge_attr: torch.Tensor, undirected: bool = True, self_loop: bool = True):
+    """
+    Sets the edge_index and edge_attr for graphs.
 
-    # the list of possible edge_index (directed with the numbering)
-    edge_index_ = torch.combinations(torch.arange(num_nodes), 2).transpose(0, 1)
+    Parameters
+    ----------
+    num_nodes : int
+        Number of nodes in the graph.
+    edge_attr : torch.Tensor
+        Edge attributes tensor.
+    undirected : bool, optional
+        Whether the graph is undirected, by default True.
+    self_loop : bool, optional
+        Whether to add self-loops, by default True.
+
+    Returns
+    -------
+    edge_index : torch.Tensor
+        Edge indices tensor.
+    edge_attr : torch.Tensor
+        Edge attributes tensor.
+    """
+    edge_index_ = torch.triu_indices(num_nodes, num_nodes, offset=1)
     edge_index_ = edge_index_[:, (edge_index_[0] == 0)]
     edge_index = edge_index_.clone()
     edge_attr_ = edge_attr.clone()
-    # undirected
+
     if undirected:
-        edge_index = torch.hstack((edge_index, torch.flipud(edge_index)))
-        edge_attr_ = torch.vstack((edge_attr_, edge_attr_))
-    # self-loop
+        edge_index = torch.cat((edge_index, torch.flip(edge_index, [0])))
+        edge_attr_ = torch.cat((edge_attr_, edge_attr_))
+
     if self_loop:
-        edge_index_self_loop = torch.vstack(
-            (edge_index_[1].unique(), edge_index_[1].unique())
-        )
-        edge_index = torch.hstack((edge_index, edge_index_self_loop))
-        edge_attr_ = torch.vstack(
-            (edge_attr_, torch.ones(num_nodes - 1, edge_attr_.size(1)))
-        )
+        unique_nodes = edge_index_[1].unique()
+        edge_index_self_loop = torch.stack((unique_nodes, unique_nodes))
+        edge_index = torch.cat((edge_index, edge_index_self_loop), dim=1)
+        edge_attr_ = torch.cat((edge_attr_, torch.ones(unique_nodes.size(0), edge_attr_.size(1), dtype=edge_attr_.dtype)))
+
     return edge_index, edge_attr_
 
 
 class Table2GraphTransformer(TransformerMixin, BaseEstimator):
-    """Transformer from tables to a list of graphs.
-
-    The list of graphs are generated in a row-wise fashion.
+    """
+    Transformer from tables to a list of graphs.
 
     Parameters
     ----------
-    include_edge_attr : bool, default = True
-        Indicates whether to include the edge features or not.
-    lm_model : {'fasttext', 'minhash'}, default = 'fasttext'
-        The lm_model used to initialize the features of nodes and edges.
-    n_components : int, default = 300
-        The number of components for the minhash encoder. Ignored for lm_model='fasttext'
-    n_jobs : : int, default=1
-        Number of jobs to run in parallel for minhash encoder.
+    include_edge_attr : bool, optional
+        Whether to include edge attributes, by default True.
+    lm_model : str, optional
+        Language model to use, by default "fasttext".
+    n_components : int, optional
+        Number of components for MinHash encoder, by default 300.
+    n_jobs : int, optional
+        Number of jobs for parallel processing, by default 1.
     """
 
-    def __init__(
-        self,
-        *,
-        include_edge_attr: bool = True,
-        lm_model: str = "fasttext",
-        n_components: float = 300,
-        n_jobs: int = 1,
-    ):
-        super(Table2GraphTransformer, self).__init__()
-
+    def __init__(self, *, include_edge_attr: bool = True, lm_model: str = "fasttext", n_components: int = 300, n_jobs: int = 1):
+        super().__init__()
         self.include_edge_attr = include_edge_attr
         self.lm_model = lm_model
         self.n_components = n_components
         self.n_jobs = n_jobs
+        self.is_fitted_ = False
 
     def fit(self, X, y=None):
-        """Fit function used for the Table2GraphTransformer
+        """
+        Fit function used for the Table2GraphTransformer.
 
         Parameters
         ----------
-        X : pandas DataFrame (n_samples, n_features)
-            The input data used to transform to graphs.
-
-        y : None
-            Ignored.
+        X : pandas.DataFrame
+            Input data to fit.
+        y : array-like, optional
+            Target values, by default None.
 
         Returns
         -------
-        self : object
+        self : Table2GraphTransformer
             Fitted transformer.
         """
-
         self.y_ = y
 
-        self.is_fitted_ = False
-
-        # Load language_model
-        if hasattr(self, "lm_model_") == False:
+        if not hasattr(self, "lm_model_"):
             self._load_lm_model()
 
-        # Relations
-        cat_col_names = X.select_dtypes(include="object").columns
-        cat_col_names = cat_col_names.str.replace("\n", " ", regex=True).str.lower()
+        cat_col_names = X.select_dtypes(include="object").columns.str.replace("\n", " ", regex=True).str.lower()
         self.cat_col_names = list(cat_col_names)
-        num_col_names = X.select_dtypes(exclude="object").columns
-        num_col_names = num_col_names.str.replace("\n", " ", regex=True).str.lower()
+        num_col_names = X.select_dtypes(exclude="object").columns.str.replace("\n", " ", regex=True).str.lower()
         self.num_col_names = list(num_col_names)
         self.col_names = self.cat_col_names + self.num_col_names
 
-        # Numerical transformer - Powertransformer
         self.num_transformer_ = PowerTransformer().set_output(transform="pandas")
         if self.lm_model == "minhash":
             self.name_transformer = make_pipeline(
@@ -116,212 +111,167 @@ def fit(self, X, y=None):
                 PowerTransformer(),
             )
 
+        # Ensure numerical columns exist before fitting the transformer
+        if self.num_col_names:
+            num_cols_exist = [col for col in self.num_col_names if col in X.columns]
+            if num_cols_exist:
+                self.num_transformer_.fit(X[num_cols_exist])
+
+        self.is_fitted_ = True
         return self
 
     def transform(self, X, y=None):
-        """Apply Table2GraphTransformer to each row of the data
+        """
+        Apply Table2GraphTransformer to each row of the data.
 
         Parameters
         ----------
-        X : Pandas DataFrame. (n_samples, n_features)
-            The input data used to transform to graphs.
-
-        y : None
-            Ignored.
+        X : pandas.DataFrame
+            Input data to transform.
+        y : array-like, optional
+            Target values, by default None.
 
         Returns
         -------
-        Graph Data : list of size (n_samples).
-            The list of transformed graph data.
+        data_graph : list
+            List of transformed graph objects.
         """
-
-        # Preprocess the features
-        X_ = X.copy()
-        X_ = X_.replace("\n", " ", regex=True)
+        X_ = X.replace("\n", " ", regex=True)
         num_data = X_.shape[0]
 
-        # Preprocess the target
-        y_ = None
-        if self.y_ is not None:
-            y_ = np.array(self.y_)
-            y_ = torch.tensor(y_).reshape((num_data, 1))
+        y_ = torch.tensor(self.y_, dtype=torch.float32).reshape((num_data, 1)) if self.y_ is not None else None
 
-        # Separate categorical and numerical columns
         X_categorical = X_.select_dtypes(include="object").copy()
         X_categorical.columns = self.cat_col_names
         X_numerical = X_.select_dtypes(exclude="object").copy()
         X_numerical.columns = self.num_col_names
 
-        # Features for names
-        cat_names = pd.melt(X_categorical)["value"]
-        cat_names = cat_names.dropna()
-        cat_names = cat_names.astype(str)
-        cat_names = cat_names.str.replace("\n", " ", regex=True).str.lower()
-        cat_names = cat_names.unique()
-        names_total = np.hstack([self.col_names, cat_names])
-        names_total = np.unique(names_total)
-        name_dict = {names_total[i]: i for i in range(names_total.shape[0])}
-
-        # preprocess values
+        cat_names = pd.melt(X_categorical)["value"].dropna().astype(str).str.lower().unique()
+        names_total = np.unique(np.hstack([self.col_names, cat_names]))
+        name_dict = {name: idx for idx, name in enumerate(names_total)}
+
         name_attr_total = self._transform_names(names_total)
-        if len(self.num_col_names) != 0:
-            X_numerical = self._transform_numerical(X_numerical)
-        if self.is_fitted_ == False:
-            self.is_fitted_ = True
+        if self.num_col_names:
+            num_cols_exist = [col for col in self.num_col_names if col in X.columns]
+            if num_cols_exist:
+                X_numerical = self._transform_numerical(X_numerical[num_cols_exist])
 
         data_graph = [
-            self._graph_construct(
-                X_categorical,
-                X_numerical,
-                name_attr_total,
-                name_dict,
-                y_,
-                idx=i,
-            )
-            for i in range(num_data)
+            self._graph_construct(X_categorical.iloc[idx], X_numerical.iloc[idx], name_attr_total, name_dict, y_, idx)
+            for idx in range(num_data)
         ]
 
-        if self.y_ is not None:
-            self.y_ = None
+        self.y_ = None
+
+        # Manually trigger garbage collection after transforming data
+        gc.collect()
 
         return data_graph
 
     def _load_lm_model(self):
-        """Load the language model for features of nodes and edges."""
-
+        """
+        Load the language model for features of nodes and edges.
+        """
         if self.lm_model == "fasttext":
-            # Loading fasttext
             self.lm_model_ = fasttext.load_model(config_directory["fasttext"])
             if self.n_components != 300:
                 fasttext.util.reduce_model(self.lm_model_, self.n_components)
         elif self.lm_model == "minhash":
-            self.lm_model_ = MinHashEncoder(
-                n_components=self.n_components,
-                n_jobs=self.n_jobs,
-            )
+            self.lm_model_ = MinHashEncoder(n_components=self.n_components, n_jobs=self.n_jobs)
 
     def _transform_numerical(self, X):
-        """Transform numerical columns using powertransformer"""
+        """
+        Transform numerical columns using power transformer.
 
-        X_num = X.copy()
-        if self.is_fitted_ == False:
-            X_num = self.num_transformer_.fit_transform(X_num)
-        else:
-            X_num = self.num_transformer_.transform(X_num)
-        return X_num
+        Parameters
+        ----------
+        X : pandas.DataFrame
+            Input numerical data.
+
+        Returns
+        -------
+        transformed_X : pandas.DataFrame
+            Transformed numerical data.
+        """
+        return self.num_transformer_.transform(X)
 
     def _transform_names(self, names_total):
-        """Obtain the feature for a given list of string values"""
+        """
+        Obtain the feature for a given list of string values.
 
+        Parameters
+        ----------
+        names_total : array-like
+            List of string values.
+
+        Returns
+        -------
+        name_features : np.ndarray
+            Transformed features for names.
+        """
         if self.lm_model == "fasttext":
-            name_attr_total = [
-                self.lm_model_.get_sentence_vector(i) for i in names_total
-            ]
-            name_attr_total = np.array(name_attr_total).astype(np.float32)
-            pass
+            return np.array([self.lm_model_.get_sentence_vector(name) for name in names_total], dtype=np.float32)
         elif self.lm_model == "minhash":
-            name_attr_total = self.name_transformer.fit_transform(
-                names_total.reshape(-1, 1)
-            )
-            name_attr_total = name_attr_total.astype(np.float32)
-        return name_attr_total
-
-    def _graph_construct(
-        self,
-        X_categorical,
-        X_numerical,
-        name_attr_total,
-        name_dict,
-        y,
-        idx,
-    ):
-        """Transform to graph objects.
+            return self.name_transformer.fit_transform(names_total.reshape(-1, 1)).astype(np.float32)
+
+    def _graph_construct(self, data_cat, data_num, name_attr_total, name_dict, y, idx):
+        """
+        Transform to graph objects.
 
         Parameters
         ----------
-        X_categorical : Pandas DataFrame of shape (n_samples, n_categorical_features)
-            The input pandas DataFrame containing only the categorical features.
-        X_numerical : Pandas DataFrame of shape (n_samples, n_numerical_features)
-            The input pandas DataFrame containing only the numerical features.
-        name_attr_total : Numpy array of shape (n_words, n_dim_fasttext)
-            The features of each word (or sentence) in the name_dict.
-        name_dict : List of shape (n_words,)
-            Total list of words (or sentences) that the data contains.
-        y : array-like of shape (n_samples,)
-            The target variable to try to predict.
-        idx: int
-            The index of a particular data point used to transform into graphs
+        data_cat : pandas.Series
+            Categorical data for a single instance.
+        data_num : pandas.Series
+            Numerical data for a single instance.
+        name_attr_total : np.ndarray
+            Transformed features for names.
+        name_dict : dict
+            Dictionary mapping names to indices.
+        y : torch.Tensor or None
+            Target values.
+        idx : int
+            Index of the instance.
 
         Returns
         -------
-        Graph : Graph object
-            The graph object from torch_geometric
+        data : torch_geometric.data.Data
+            Graph data object.
         """
-
-        # Obtain the data for a 'idx'-th row
-        data_cat = X_categorical.iloc[idx]
-        data_cat = data_cat.dropna()
-        num_cat = len(data_cat)
-        if num_cat != 0:
-            data_cat = data_cat.str.replace("\n", " ", regex=True).str.lower()
-        data_num = X_numerical.iloc[idx]
+        data_cat = data_cat.dropna().str.lower()
         data_num = data_num.dropna()
+        num_cat = len(data_cat)
         num_num = len(data_num)
 
-        # edge_attributes
-        if self.include_edge_attr:
-            edge_attr_cat = [name_attr_total[name_dict[x]] for x in data_cat.index]
-            edge_attr_cat = np.array(edge_attr_cat).astype(np.float32)
-            edge_attr_num = [name_attr_total[name_dict[x]] for x in data_num.index]
-            edge_attr_num = np.array(edge_attr_num).astype(np.float32)
-        else:
-            edge_attr_cat = np.ones((num_cat, self.n_components)).astype(np.float32)
-            edge_attr_num = np.ones((num_num, self.n_components)).astype(np.float32)
-
-        # node_attributes
-        x_cat = [name_attr_total[name_dict[x]] for x in data_cat]
-        x_cat = np.array(x_cat).astype(np.float32)
-        x_cat = torch.tensor(x_cat)
-        if x_cat.size(0) == 0:
-            x_cat = x_cat.reshape(0, self.n_components)
-            edge_attr_cat = edge_attr_cat.reshape(0, self.n_components)
+        edge_attr_cat = np.array([name_attr_total[name_dict[col]] for col in data_cat.index], dtype=np.float32)
+        edge_attr_num = np.array([name_attr_total[name_dict[col]] for col in data_num.index], dtype=np.float32)
+
+        x_cat = torch.tensor(np.array([name_attr_total[name_dict[val]] for val in data_cat]), dtype=torch.float32)
+        x_num = torch.tensor(data_num.values[:, None] * edge_attr_num, dtype=torch.float32)
 
-        x_num_ = np.array(data_num).astype("float32")
-        x_num = x_num_.reshape(-1, 1) * edge_attr_num
-        x_num = torch.tensor(x_num)
+        if x_cat.size(0) == 0:
+            x_cat = torch.empty((0, self.n_components), dtype=torch.float32)
+            edge_attr_cat = torch.empty((0, self.n_components), dtype=torch.float32)
         if x_num.size(0) == 0:
-            x_num = x_num.reshape(0, self.n_components)
-            edge_attr_num = edge_attr_num.reshape(0, self.n_components)
+            x_num = torch.empty((0, self.n_components), dtype=torch.float32)
+            edge_attr_num = torch.empty((0, self.n_components), dtype=torch.float32)
 
-        # combined node/edge attributes
-        x = torch.vstack((x_cat, x_num))
-        x = torch.vstack((torch.ones((1, x.size(1))), x))
-        edge_attr = np.vstack((edge_attr_cat, edge_attr_num))
-        edge_attr = torch.tensor(edge_attr)
+        x = torch.cat((x_cat, x_num))
+        x = torch.cat((torch.ones((1, x.size(1))), x))
+        edge_attr = torch.tensor(np.vstack((edge_attr_cat, edge_attr_num)), dtype=torch.float32)
 
-        # edge_index
         num_nodes = num_cat + num_num + 1
         edge_index, edge_attr = _create_edge_index(num_nodes, edge_attr, False, True)
 
-        # Set the center node
         Z = torch.mul(edge_attr, x[edge_index[1]])
-        x[0, :] = Z[(edge_index[0] == 0), :].mean(dim=0)
-
-        # Target
-        if y is not None:
-            y_ = y[idx].clone()
-        else:
-            y_ = torch.tensor([])
+        x[0, :] = Z[edge_index[0] == 0].mean(dim=0)
 
-        # graph index (g_idx)
-        g_idx = idx
+        y_ = y[idx].clone() if y is not None else torch.tensor([])
 
-        data = Data(
+        return Data(
             x=x,
             edge_index=edge_index,
             edge_attr=edge_attr,
             y=y_,
-            g_idx=g_idx,
+            g_idx=idx,
         )
-
-        return data