-
Notifications
You must be signed in to change notification settings - Fork 0
/
explore_clinc150.py
23 lines (19 loc) · 1.29 KB
/
explore_clinc150.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# Created by xunannancy at 2024/03/04
from explore_banking77 import save_labels, retrieve_labels_sbert_train_examples, prepare_data_sparse_retrieval, \
construct_dataset
from dataset_utilities import label_check
if __name__ == '__main__':
save_labels(dataset='CLINC150')
label_check(dataset='CLINC150')
model_name = 'gtr-t5-xl'
for setting in ['train_10', 'train_5']:
retrieve_labels_sbert_train_examples(dataset='CLINC150', setting=setting, batch_examples=5, model_name=model_name)
for num_words in [100]:
prepare_data_sparse_retrieval(num_words=num_words, with_text=True,
source_list=[f'{setting}_{model_name}_-1examples'],# 'definitions_77_5examples_truncated.json'],
nickname=f'combined_{setting}_{model_name}_-1examples',
dataset='CLINC150')
for dataset in ['train', 'train_5', 'valid', 'test', 'train_10']:
for label_nickname in [f'combined_train_5_{model_name}_-1examples', f'combined_train_10_{model_name}_-1examples']:
for with_question_flag in [False]:
construct_dataset(dataset=dataset, label_nickname=label_nickname, parent_dataset='CLINC150', with_question_flag=with_question_flag)