CBICA · AlexanderGetka-cbica · Jul 17, 2024 · Jul 8, 2024 · Jul 8, 2024 · Jul 8, 2024
diff --git a/.github/workflows/macos_test_cases.yml b/.github/workflows/macos_test_cases.yml
@@ -0,0 +1,33 @@
+name: spare_scores test cases on macos
+
+# workflow dispatch has been added for testing purposes
+on: [push, pull_request, workflow_dispatch]
+
+jobs:
+    build:
+        runs-on: ["macos-latest"]
+
+        steps:
+        - uses: actions/checkout@v4
+        - uses: actions/setup-python@v5
+          with:
+            python-version: '3.8'
+        - name: Set-up miniconda for macos and ubuntu
+          uses: conda-incubator/setup-miniconda@v2
+          with:
+              auto-update-conda: true
+              python-version: 3.8
+              miniconda-version: "latest"
+        - name: Create conda env
+          run: conda create -n spare python=3.8
+        - name: Install pip
+          run: conda run -n spare conda install pip
+        - name: Install spare scores
+          run: conda run -n spare pip install spare_scores
+        - name: Download dependencies
+          run: pip install setuptools && pip install . 
+        - name: Run unit tests
+          run: |
+            cd tests/unit && python -m unittest discover -s . -p "*.py"
+
+
diff --git a/.github/workflows/ubuntu_test_cases.yml b/.github/workflows/ubuntu_test_cases.yml
@@ -0,0 +1,32 @@
+name: spare_scores test cases on ubuntu 
+
+# workflow dispatch has been added for testing purposes
+on: [push, pull_request, workflow_dispatch]
+
+jobs:
+    build:
+        runs-on: ["ubuntu-latest"]
+
+        steps:
+        - uses: actions/checkout@v4
+        - uses: actions/setup-python@v5
+          with:
+            python-version: '3.8'
+        - name: Set-up miniconda for macos and ubuntu
+          uses: conda-incubator/setup-miniconda@v2
+          with:
+              auto-update-conda: true
+              python-version: 3.8
+              miniconda-version: "latest"
+        - name: Create conda env
+          run: conda create -n spare python=3.8
+        - name: Install pip
+          run: conda run -n spare conda install pip
+        - name: Install spare scores
+          run: conda run -n spare pip install spare_scores
+        - name: Download dependencies
+          run:  pip install setuptools && pip install .
+        - name: Run unit tests
+          run: |
+            cd tests/unit && python -m unittest discover -s . -p "*.py"
+
diff --git a/dev-dependencies.txt b/dev-dependencies.txt
@@ -18,29 +18,30 @@ jsonschema==4.17.3
 kiwisolver==1.4.4
 matplotlib==3.7.1
 msgpack==1.0.5
-numpy==1.24.4
+numpy==1.23.5
 packaging==23.1
 pandas==2.0.3
 Pillow==9.5.0
 pkgutil_resolve_name==1.3.10
-pluggy==1.2.0
+pluggy==1.5.0
 protobuf==4.23.3
 pyparsing==3.1.0
 pyrsistent==0.19.3
-pytest==7.4.0
+pytest==8.2.2
 python-dateutil==2.8.2
 pytz==2023.3
 PyYAML==6.0
 ray==2.5.1
 requests==2.31.0
-scikit-learn==1.2.2
-scipy==1.10.1
+scikit-learn==0.24.2
+scipy==1.8.0
 six==1.16.0
 -e git+https://github.com/georgeaidinis/spare_score@3055a393e7aad704dd00dd378e45d695d99deebd#egg=spare_scores
 threadpoolctl==3.1.0
 tomli==2.0.1
-torch==1.11.0
+torch==2.3.1
 typing_extensions==4.7.0
 tzdata==2023.3
 urllib3==2.0.3
 zipp==3.15.0
+setuptools==70.3.0
diff --git a/setup.py b/setup.py
@@ -18,13 +18,14 @@
       include_package_data=True,
       install_requires=['numpy', 
                         'pandas', 
+                        'setuptools',
                         'scikit-learn', 
-                        'torch<2.1', 
+                        'torch<2.3.1', 
                         'matplotlib', 
                         'optuna'],
       entry_points={
         'console_scripts': ["spare_score = spare_scores.cli:main",
                             "spare_scores = spare_scores.cli:main",
                             "SPARE = spare_scores.cli:main"]
         },
-      )
+      )
diff --git a/spare_scores/cli.py b/spare_scores/cli.py
@@ -307,4 +307,4 @@ def main():
                    arguments.logs)
         return
 
-    return
+    return
diff --git a/spare_scores/data_prep.py b/spare_scores/data_prep.py
@@ -11,21 +11,21 @@
 
 
 def check_train(df: pd.DataFrame, 
-                                predictors: list,
-                                to_predict: str,
-                                key_var: str,
-                                pos_group: str = '',
-                                verbose: int = 1) -> Tuple[pd.DataFrame, list, str]:
+                predictors: list,
+                to_predict: str,
+                verbose: int = 1, # this needs to be removed(non used). If i remove
+                # it, then there are bugs to the test cases(check_train() unexpected argument verbose)
+                pos_group: str = '') -> Tuple[pd.DataFrame, list, str]:
     """Checks training dataframe for errors.
 
     Args:
-        df: a pandas dataframe containing training data.
-        predictors: a list of predictors for SPARE model training.
-        to_predict: variable to predict.
-        pos_group: group to assign a positive SPARE score (only for classification).
+        df(pandas.DataFrame): a pandas dataframe containing training data.
+        predictors(list): a list of predictors for SPARE model training.
+        to_predict(str): variable to predict.
+        pos_group(str): group to assign a positive SPARE score (only for classification).
 
     Returns:
-        a tuple containing 1) filtered dataframe, 2) filtered predictors, 3) SPARE model type.
+        Tuple[pandas.DataFrame, list, str]: a tuple containing 1)filtered dataframe, 2)filtered predictors, 3)SPARE model type.
     """
     # GAI 26/04/2023: Removed check for existence of these columns
     # if not {'ID','Age','Sex'}.issubset(set(df.columns)):
@@ -77,13 +77,12 @@ def check_train(df: pd.DataFrame,
     return df, predictors, mdl_task
 
 def check_test(df: pd.DataFrame, 
-                             meta_data: dict,
-                             verbose: int = 1):
+               meta_data: dict):
     """Checks testing dataframe for errors.
 
     Args:
-        df: a pandas dataframe containing testing data.
-        meta_data: a dictionary containing training information on its paired SPARE model.
+        df(pandas.DataFrame): a pandas dataframe containing testing data.
+        meta_data(dict): a dictionary containing training information on its paired SPARE model.
     """
     ############# Removing the hardcoded check for the below cols #############
     # if not {'ID','Age','Sex'}.issubset(set(df.columns)):
@@ -106,31 +105,27 @@ def check_test(df: pd.DataFrame,
     if np.sum(np.sum(pd.isna(df[meta_data['predictors']]))) > 0:
         logging.warn('Some participants have invalid (missing or NaN values) predictor variables.')
 
-    ############# Removing the hardcoded ID checks #############
-    if 'ID' not in df.columns:
-        # logging.info('"ID" column not found in the input dataframe. Treating all participants as independent from training.')
-        pass
-    else:
+    if 'ID' in df.columns:
         if np.any(df['ID'].isin(meta_data['cv_results']['ID'])):
             logging.info('Some participants seem to have been in the model training.')
+
     return 'OK', None
 
 def smart_unique(df1: pd.DataFrame,
-                                 df2: pd.DataFrame=None,
-                                 to_predict: str=None,
-                                 verbose: int=1) -> Union[pd.DataFrame, tuple]:
+                df2: pd.DataFrame=None,
+                to_predict: str=None) -> Union[pd.DataFrame, tuple]:
     """Select unique data points in a way that optimizes SPARE training.
     For SPARE regression, preserve data points with extreme values.
     For SPARE classification, preserve data points that help age match.
 
     Args:
-        df1: a pandas dataframe.
-        df2: a pandas dataframe (optional) if df1 and df2 are two groups to classify.
-        to_predict: variable to predict. Binary for classification and continuous for regression.
+        df1(pandas.DataFrame)
+        df2(pandas.DataFrame): optional, if df1 and df2 are two groups to classify.
+        to_predict(str): variable to predict. Binary for classification and continuous for regression.
             Must be one of the columnes in df. Ignored if df2 is given.
 
     Returns:
-        a trimmed pandas dataframe or a tuple of two dataframes with only one time point per ID.
+        pandas.DataFrame: a trimmed pandas dataframe or a tuple of two dataframes with only one time point per ID.
     """
     assert (isinstance(df2, pd.DataFrame) or (df2 is None)), (
         'Either provide a 2nd pandas dataframe for the 2nd argument or specify it with "to_predict"')
@@ -191,20 +186,20 @@ def age_sex_match(df1: pd.DataFrame,
     """Match two groups for age and sex.
 
     Args:
-        df1: a pandas dataframe.
-        df2: a pandas dataframe (optional) if df1 and df2 are two groups to classify.
-        to_match: a binary variable of two groups. Must be one of the columns in df.
+        df1(pandas.DataFrame)
+        df2(pandas.DataFrame): optional, if df1 and df2 are two groups to classify.
+        to_match(str): a binary variable of two groups. Must be one of the columns in df.
             Ignored if df2 is given.
             If to_match is 'Sex', then only perform age matching.
-        p_threshold: minimum p-value for matching.
-        verbose: whether to output messages.
-        age_out_percentage: percentage of the larger group to randomly select a participant to
+        p_threshold(float): minimum p-value for matching. Default value = 0.15
+        ----------- verbose: whether to output messages.(Will be deprecated later)
+        age_out_percentage(float): percentage of the larger group to randomly select a participant to
             take out from during the age matching. For example, if age_out_percentage = 20 and the
             larger group is significantly older, then exclude one random participant from the fifth
-            quintile based on age.
+            quintile based on age. Default value = 20
 
     Returns:
-        a trimmed pandas dataframe or a tuple of two dataframes with age/sex matched groups.
+        pandas.DataFrame: a trimmed pandas dataframe or a tuple of two dataframes with age/sex matched groups.
     """
     assert (isinstance(df2, pd.DataFrame) or (df2 is None)), (
         'Either provide a 2nd pandas dataframe for the 2nd argument or specify the two groups with "to_match"')
@@ -286,7 +281,15 @@ def age_sex_match(df1: pd.DataFrame,
     else:
         return (df1, df2)
 
-def logging_basic_config(verbose=1, content_only=False, filename=''):
+def logging_basic_config(verbose :int = 1, content_only = False, filename :str = ''):
+    """
+        Basic logging configuration for error exceptions
+
+        Args:
+            verbose(int): input verbose. Default value = 1
+            content_only(bool): If set to True it will output only the needed content. Default value = False
+            filename(str): input filename. Default value = ''
+    """
     logging_level = {0:logging.WARNING, 1:logging.INFO, 2:logging.DEBUG, 3:logging.ERROR, 4:logging.CRITICAL}
     fmt = ' %(message)s' if content_only else '%(levelname)s (%(funcName)s): %(message)s'
     if filename != '' and filename is not None:
@@ -313,4 +316,4 @@ def convert_cat_variables(df, predictors, meta_data):
         elif len(df[var].unique()) > 2:
             raise ValueError('Categorical variables with more than 2 '
                              + 'categories are currently not supported.')
-    return df, meta_data
+    return df, meta_data
diff --git a/spare_scores/mlp.py b/spare_scores/mlp.py
@@ -28,18 +28,23 @@ class MLPModel:
     arguments. These will be added as attributes to the class.
 
     Methods:
-        train_model(df, **kwargs):
-            Trains the model using the provided dataframe.
+        fit(df, verbose):
+            Trains the model using the provided dataframe and default parameters.
+            Args:
+                df(pandas.DataFrame): the provided dataframe.
+                verbose(int) 
+            Returns:
+                dict: A dictionary with the results from training.
 
-        apply_model(df):
-            Applies the trained model on the provided dataframe and returns
-            the predictions.
-
-        set_parameters(**parameters):
-            Updates the model's parameters with the provided values. This also
-            changes the model's attributes, while retaining the original ones.
+        predict(df):
+            Predicts the result of the provided dataframe using the trained model.
+            Args:
+                df(pandas.DataFrame): the provided dataframe.
+            Returns:
+                list: The predictions from the trained model regarding the provided dataframe.
+
     """
-    def __init__(self, predictors, to_predict, key_var, verbose=1,**kwargs):
+    def __init__(self, predictors, to_predict, key_var, verbose=1, **kwargs):
         logger = logging_basic_config(verbose, content_only=True)
 
         self.predictors = predictors
@@ -130,7 +135,7 @@ def _fit(self, df):
         self.get_stats(y, self.y_hat)
 
     @ignore_warnings(category= (ConvergenceWarning,UserWarning))
-    def fit(self, df, verbose=1, **kwargs):
+    def fit(self, df, verbose=1) -> dict:
         logger = logging_basic_config(verbose, content_only=True)
 
 
@@ -168,7 +173,7 @@ def fit(self, df, verbose=1, **kwargs):
 
         return result 
 
-    def predict(self, df, verbose=1):
+    def predict(self, df):
 
         X = df[self.predictors]
         X_transformed = self.scaler.transform(X)
-Original file line number
+Diff line change
@@ Expand Up / @@ -307,4 +307,4 @@ def main(): @@
                        arguments.logs)
             return
-        return
+        return