-
Notifications
You must be signed in to change notification settings - Fork 1
/
tao_iva_classification_pipeline.py
131 lines (104 loc) · 6.11 KB
/
tao_iva_classification_pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import tao_iva_classification_ops as tao_iva_ops
import kfp.dsl as dsl
from kubernetes import client as k8s_client
@dsl.pipeline(
name='taoPipeline',
description='TAO Pipeline Kubeflow Demo'
)
def taoPipeline(
pretrained_model_name: str ="nvidia/tao/pretrained_classification:resnet18",
pretrained_model_dir: str ="models",
train_classification_data: str = "tao-experiments/data",
train_classification_spec: str = "specs/classification_spec.cfg",
train_classification_output: str = "tao-experiments/output",
train_num_gpus: str = "1",
prune_model_input_file = "tao-experiments/output/weights/resnet_020.tlt",
prune_model_output_file = "tao-experiments/output-pruned/resnet18_nopool_bn_pruned.tlt",
prune_equalization_criterion = "union",
prune_threshold = "0.6",
retrain_classification_spec: str = "specs/classification_retrain_spec.cfg",
retrain_classification_output: str = "tao-experiments/output-retrain",
test_infer_model_input_file: str = "tao-experiments/output-retrain/weights/resnet_020.tlt",
test_infer_image_path: str = "tao-experiments/data/split/test/person",
test_infer_class_map: str = "tao-experiments/output-retrain/classmap.json",
test_infer_batch_size: str = "16",
export_model_input_file: str = "tao-experiments/output-retrain/weights/resnet_020.tlt",
export_model_output_file: str = "tao-experiments/export/final_model.etlt",
calibrate_classification_spec: str = "specs/classification_retrain_spec.cfg",
calibrate_max_batches: str = "10",
calibrate_output_file: str = "tao-experiments/export/calibration.tensor",
export_int8_cache_file: str = "tao-experiments/export/final_model_int8_cache.bin",
export_int8_batches: str = "10",
export_int8_model_output_file: str = "tao-experiments/export/final_int8_model.etlt",
convert_trt_output_file: str = "tao-experiments/export/final_model.trt",
convert_trt_output_layer: str = "predictions/Softmax",
convert_trt_dims: str = "3,224,224",
convert_trt_input_type: str = "nchw",
convert_trt_max_batch_size: str = "64",
convert_trt_precision: str = "int8",
convert_trt_batch_size: str = "64",
tao_mount_dir: str ="/mnt/workspace", # directory where model/data volume mounted
api_key: str = "nvidia_tlt",
):
# define some variables
op_dict = {}
persistent_volume_name='nvidia-workspace'
# Defining the main pipeline here.
# add component to download pretrained model
op_dict['tao_pull'] = tao_iva_ops.TAOPullOp(
"download-model", tao_mount_dir, pretrained_model_dir, pretrained_model_name)
# add component to train model
op_dict['tao_train'] = tao_iva_ops.TAOTrainClassificationOp(
"train-model", tao_mount_dir, api_key, train_classification_output,
train_classification_spec, train_num_gpus)
op_dict['tao_train'].after(op_dict['tao_pull'])
# add component to evaluate model
op_dict['tao_evaluate'] = tao_iva_ops.TAOEvaluateClassificationOp(
"evaluate-model", tao_mount_dir, api_key, train_classification_spec)
op_dict['tao_evaluate'].after(op_dict['tao_train'])
# add component to prune model
op_dict['tao_prune'] = tao_iva_ops.TAOPruneClassificationOp(
"prune-model", tao_mount_dir, api_key, prune_model_input_file, prune_model_output_file, prune_threshold, prune_equalization_criterion)
op_dict['tao_prune'].after(op_dict['tao_train'])
# add component to retrain after evaluation and pruning
op_dict['tao_retrain'] = tao_iva_ops.TAOTrainClassificationOp(
"retrain-model", tao_mount_dir, api_key, retrain_classification_output,
retrain_classification_spec, train_num_gpus)
op_dict['tao_retrain'].after(op_dict['tao_prune'])
op_dict['tao_retrain'].after(op_dict['tao_evaluate'])
# add component to evaluate model
op_dict['tao_reevaluate'] = tao_iva_ops.TAOEvaluateClassificationOp(
"reevaluate-model", tao_mount_dir, api_key, retrain_classification_spec)
op_dict['tao_reevaluate'].after(op_dict['tao_retrain'])
# add component to test inference
op_dict['tao_test_infer'] = tao_iva_ops.TAOInferenceClassificationOp(
"test-infer", tao_mount_dir, api_key, test_infer_model_input_file, test_infer_image_path, test_infer_batch_size, test_infer_class_map, retrain_classification_spec)
op_dict['tao_test_infer'].after(op_dict['tao_reevaluate'])
# export model
op_dict['tao_export_model'] = tao_iva_ops.TAOExportClassificationOp(
"export-model", tao_mount_dir, api_key, export_model_input_file, export_model_output_file)
op_dict['tao_export_model'].after(op_dict['tao_test_infer'])
# calibrate model
op_dict['tao_calibrate_model'] = tao_iva_ops.TAOCalibrateClassificationOp(
"calibrate-model", tao_mount_dir, calibrate_classification_spec, calibrate_max_batches, calibrate_output_file)
op_dict['tao_calibrate_model'].after(op_dict['tao_export_model'])
# export int8 model
op_dict['tao_export_int8_model'] = tao_iva_ops.TAOExportClassificationAdvancedOp(
"export-int8-model", tao_mount_dir, api_key, export_model_input_file, export_int8_model_output_file, calibrate_output_file, convert_trt_precision, export_int8_batches, export_int8_cache_file)
op_dict['tao_export_int8_model'].after(op_dict['tao_calibrate_model'])
# Convert to TRT Engine
op_dict['tao_create_trt'] = tao_iva_ops.TAOConvertTRTCalibrateOp(
"convert-trt", tao_mount_dir, api_key, export_int8_model_output_file, export_int8_cache_file, convert_trt_output_file,
convert_trt_output_layer, convert_trt_dims, convert_trt_input_type, convert_trt_max_batch_size, convert_trt_precision, convert_trt_batch_size)
op_dict['tao_create_trt'].after(op_dict['tao_export_int8_model'])
# add volume mount to all components
for name, container_op in op_dict.items():
container_op.add_volume(k8s_client.V1Volume(
host_path=k8s_client.V1HostPathVolumeSource(path=tao_mount_dir.__str__()),
name=persistent_volume_name))
container_op.add_volume_mount(k8s_client.V1VolumeMount(
mount_path=tao_mount_dir.__str__(),
name=persistent_volume_name))
if __name__ == '__main__':
import kfp.compiler as compiler
compiler.Compiler().compile(taoPipeline, __file__ + '.tar.gz')