From e3c22d144b00e83864b86c790f41558a39dc5f0d Mon Sep 17 00:00:00 2001 From: chenky9106 Date: Sat, 4 Jul 2020 10:49:43 +0800 Subject: [PATCH] Add files via upload Still need testing --- xlink.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 xlink.py diff --git a/xlink.py b/xlink.py new file mode 100644 index 0000000..3745b43 --- /dev/null +++ b/xlink.py @@ -0,0 +1,38 @@ +import config +from tqdm import tqdm +import requests +import json + +def xlink_extract(folder_path, file_name): + file_path = folder_path + '/' + file_name + url = config.url + with open(file_path,'r') as f: + lines = f.readlines() + concept_list = [] + l = len(lines) + lines = [''.join(lines[i * 100: min((i+1)*100, l)]) for i in range(l // 100 + 1)] + for text in tqdm(lines): + data = {"text": text, "lang": config.lang} + request_result = requests.post(url, data) + link_result = json.loads(request_result.text) + concept_list.extend([link['label'] for link in link_result['ResultList']]) + concept_dict = {} + for concept in concept_list: + if concept not in concept_dict: + concept_dict[concept] = 1 + else: + concept_dict[concept] += 1 + return concept_dict + +def json_dumper(file_path, js): + with open(file_path, 'w') as f: + str_ = '\n'.join([json.dumps(student, ensure_ascii=False) for student in js]) + f.write(str_) + +def xlink_main(): + concept_dict = xlink_extract(config.folder_path, config.file_name) + js = sorted([{"name": concept, "freq": frac} for concept, frac in concept_dict.items()], key=lambda x: x['freq'], reverse=True) + json_dumper(config.save_folder +'/' + config.file_name, js) + +if __name__=='__main__': + xlink_main() \ No newline at end of file