diff --git a/atomsci/ddm/pipeline/compare_models.py b/atomsci/ddm/pipeline/compare_models.py index 3e95c88e..a3cac1d1 100644 --- a/atomsci/ddm/pipeline/compare_models.py +++ b/atomsci/ddm/pipeline/compare_models.py @@ -1871,7 +1871,7 @@ def get_multitask_perf_from_files_new(result_dir, pred_type='regression', datase pred=models[['model_uuid','response_cols']].join(pred) # check for > 1 dataset - if len(set(models.response_cols.astype(str)))>1: + if len(set(models.dataset_key.astype(str)))>1: raise Exception (f"Warning: you cannot export multitask model performances for more than one dataset at a time. Please provide the dataset_key as an additional parameter. Your {pred_type} options are: {list(set(models.dataset_key))}.") num_model_tasks=models.num_model_tasks.iloc[0] diff --git a/atomsci/ddm/pipeline/model_datasets.py b/atomsci/ddm/pipeline/model_datasets.py index 87bbffd9..3e59d70d 100644 --- a/atomsci/ddm/pipeline/model_datasets.py +++ b/atomsci/ddm/pipeline/model_datasets.py @@ -821,6 +821,8 @@ def get_featurized_data(self, dset_df, is_featurized=False): self.vals = np.zeros((nrows,ncols)) self.attr = pd.DataFrame({params.smiles_col: dset_df[params.smiles_col].values}, index=dset_df[params.id_col]) + if params.model_type != "hybrid": + self.vals, weights = feat.make_weights(self.vals, is_class=params.prediction_type=='classification') self.log.warning("Done") else: self.log.warning("Featurizing data...") @@ -828,7 +830,7 @@ def get_featurized_data(self, dset_df, is_featurized=False): params, self.contains_responses) self.log.warning("Done") self.n_features = self.featurization.get_feature_count() - self.dataset = NumpyDataset(features, self.vals, ids=ids) + self.dataset = NumpyDataset(features, self.vals, ids=ids, w=weights) # **************************************************************************************** def save_featurized_data(self, featurized_dset_df):