-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
72 lines (56 loc) · 3.14 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import argparse
from data_analysis import cal_metrics
from data_synthesis import synthesis
class BaseOptions():
def __init__(self):
self.parser = argparse.ArgumentParser()
self.initialized = False
def initialize(self):
# experiment target
self.parser.add_argument("--target", type=str,default='analysis',
help="Target of the task. Please choose between synthesis and analysis.")
self.parser.add_argument('-d',"--data_dir", type=str,
default='/workspace/data.csv',
help="File path for the reference dataset. CSV format is required. ")
# synthesis parameter
# data_dir,select_models,num_samples,output_dir
self.parser.add_argument("--select_models", type=str, default='gaussian_copula,ctgan,copula_gan,tvae',
help="Models used for the synthesis. Available methods include aussian_copula, ctgan, copula_gan, and tvae. ")
self.parser.add_argument("--num_samples", type=int, default=1000,
help="Number of samples to be synthesized. ")
self.parser.add_argument("--output_dir", default='/home/chaimeleon/datasets',
help="The folder path to save synthetic outputs.")
# analysis parameter
# data_dir,syn_dir,metric,meta_dir,label_col=None,id_col=None,key_fields=None,sensitive_fields=None,numerical_match_tolerance=0.1
self.parser.add_argument('-s',"--syn_dir", type=str,
default='/workspace/tvae_data.csv',
help="File path for the synthetic dataset. CSV format is required.")
self.parser.add_argument("--cross_validation", type=int,
default=5,
help="The time for cross validations")
self.parser.add_argument("--label_col", type=str,
default='class',
help="The column name for label. ")
self.parser.add_argument("--id_col", type=str,
default='id',
help="The column name for patient IDs. Patient IDs should be removed for ml efficiency evaluation. ")
self.initialized = True
def parse(self):
if not self.initialized:
self.initialize()
self.opt = self.parser.parse_args()
args = vars(self.opt)
print('------------ Options -------------')
for k, v in sorted(args.items()):
print('%s: %s' % (str(k), str(v)))
print('-------------- End ----------------')
return self.opt
if __name__ == '__main__':
opt = BaseOptions().parse()
if opt.target == 'synthesis':
synthesis(opt.data_dir,opt.select_models,opt.num_samples,opt.output_dir)
elif opt.target == 'analysis':
cal_metrics(opt.data_dir, opt.syn_dir,
opt.label_col, opt.id_col, opt.cross_validation)
else:
raise ValueError('This function is not implemented. Try synthesis or analysis. ')