forked from olovholm/NIME
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathresultcleaner.py
57 lines (40 loc) · 1.34 KB
/
resultcleaner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#!/usr/bin/python
#-*- coding: utf-8
import os, re, sys
import xml.etree.ElementTree as ET
def clean_abstract(abst):
abst = abst.lstrip(' ')
abst = abst.replace('\n\n','\n')
abst = abst.replace(' ',' ')
abst = abst.replace('-\n','')
abst = abst.replace('- \n','')
abst = abst.replace('\n','')
abst = abst.replace(' ',' ')
return abst
def clean_keywords(keywrd):
keywrd = keywrd.lstrip(' ')
keywrd = keywrd.replace('\n','')
keywrd = keywrd.replace(' ',' ')
return keywrd
#
# MAIN PROGRAM STARTS RUNNING
#
result_path = 'result.xml'
tree = ET.parse(result_path)
root = tree.getroot()
for document in root.iter('document'):
print document.find('name').text
if document.find('abstract') != None:
# print clean_abstract(document.find('abstract').text)
document.find('abstract').text = clean_abstract(document.find('abstract').text)
if document.find('keywords') != None:
# print clean_keywords(document.find('keywords').text)
document.find('keywords').text = clean_keywords(document.find('keywords').text)
for document in root.iter('document'):
print document.find('name').text
if document.find('abstract') != None:
print document.find('abstract').text
if document.find('keywords') != None:
print document.find('keywords').text
tree.write('result_cleaned.xml')
sys.exit(0)