-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathextract_file.py
65 lines (54 loc) · 2.43 KB
/
extract_file.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import os
import shutil
# 解析txt文件的函数
def parse_txt_file(txt_path):
with open(txt_path, "r", encoding="utf-8") as f:
lines = f.read().splitlines()
data_entries = []
for i in range(0, len(lines), 7):
title = lines[i].split("标题:")[1]
authors = lines[i + 1].split("作者:")[1].split(";")
journal = lines[i + 2].split("期刊名称:")[1]
year = lines[i + 3].split("出版时间:")[1].split(";")[0].split()[-1]
data_entries.append(
{"title": title, "authors": authors, "journal": journal, "year": year}
)
return data_entries
# 根据解析到的数据构建期望的PDF文件名
# def generate_expected_pdf_name(entry):
# if len(entry["authors"]) == 1:
# author_str = entry["authors"][0].split(",")[0]
# elif len(entry["authors"]) == 2:
# author_str = (
# entry["authors"][0].split(",")[0] + "_" + entry["authors"][1].split(",")[0]
# )
# else:
# author_str = entry["authors"][0].split(",")[0] + " et al"
# return f"{entry['year']}_{author_str}_{entry['journal']}.pdf"
# 根据解析到的数据构建期望的PDF文件名
def generate_expected_pdf_name(entry):
if len(entry['authors']) == 1:
author_str = entry['authors'][0].split(",")[0].strip()
elif len(entry['authors']) == 2:
author_str = entry['authors'][0].split(",")[0].strip() + "_" + entry['authors'][1].split(",")[0].strip()
else:
author_str = entry['authors'][0].split(",")[0].strip() + " et al"
return f"{entry['year']}_{author_str}_{entry['journal']}.pdf"
# 主函数
def main():
txt_path = "files/numbering_15.txt" # 指定txt文件的路径
source_folder = "sciAward" # 指定PDF文件所在的文件夹路径
target_folder = "output/rep_paper_15" # 指定新的文件夹路径
if not os.path.exists(target_folder):
os.makedirs(target_folder)
data_entries = parse_txt_file(txt_path)
for idx, entry in enumerate(data_entries, 1):
expected_pdf_name = generate_expected_pdf_name(entry)
source_pdf_path = os.path.join(source_folder, expected_pdf_name)
if os.path.exists(source_pdf_path):
target_pdf_path = os.path.join(target_folder, f"{idx}_{expected_pdf_name}")
shutil.copy(source_pdf_path, target_pdf_path)
else:
print(f"PDF not found for: {expected_pdf_name}")
if __name__ == "__main__":
main()