-
Notifications
You must be signed in to change notification settings - Fork 2
/
processor.py
115 lines (95 loc) · 5.23 KB
/
processor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from lxml import etree
import json
import sys
from datetime import datetime
def check_num_segments(inputstring):
#inputstring = "https://docs.ogc.org/is/22-047r1/22-047r1.html#conf_mf_tproperty_delete_success,http://www.opengis.net/spec/geosparql/1.1/conf/movingfeatures/tproperty-delete-success"
if "/req/" in inputstring:
token = inputstring[1+inputstring.index("/req/"):]
return str(len(token.split("/")))
elif "/conf/" in inputstring:
token = inputstring[1+inputstring.index("/conf/"):]
return str(len(token.split("/")))
else:
return str(0)
def check_segments(elem_type,inputstring):
#inputstring = "https://docs.ogc.org/is/22-047r1/22-047r1.html#conf_mf_tproperty_delete_success,http://www.opengis.net/spec/geosparql/1.1/conf/movingfeatures/tproperty-delete-success"
if "/req/" in inputstring:
token = inputstring[1+inputstring.index("/req/"):]
if len(token.split("/")) == 2 and elem_type == "requirements_class":
return "PASS"
elif len(token.split("/")) == 3 and elem_type == "requirement":
return "PASS"
else:
return "FAIL"
elif "/conf/" in inputstring:
token = inputstring[1+inputstring.index("/conf/"):]
if len(token.split("/")) == 2 and elem_type == "conformance_class":
return "PASS"
elif len(token.split("/")) == 3 and elem_type == "abstract_test":
return "PASS"
else:
return "FAIL"
else:
return "FAIL"
def main(argv):
# python3 processor.py 22-047r1.html https://docs.ogc.org/is/22-047r1/22-047r1.html http://www.opengis.net/spec/geosparql/1.1
# python3 processor.py 22-003.html https://docs.ogc.org/is/22-003/22-003.html http://www.opengis.net/spec/ogcapi-movingfeatures-1/1.0
baseURI = argv[2]
source_webpage = argv[1]
document_number = str(argv[0]).lower().replace(".html","")
wd = "./"
fout = open(wd+document_number+'.csv','w')
now = datetime.now()
current_time = now.strftime("%Y-%m-%dT%H:%M:%S")
fout.write("timestamp,target,source,type,num_segments,segment_result\n")
with open(wd+document_number+".html",'r') as file:
data = file.read()
table = etree.HTML(str(data)).findall(".//table")
rows = iter(table)
headers = [col.text for col in next(rows)]
for row in rows:
outputstring = ''
date_time = datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
readingModSpecElement = False
modSpecElementType = None
if 'class' in row.attrib:
if row.attrib['class'] == 'modspec':
paragraphs = row.findall(".//p")
for paragraph in paragraphs:
if 'class' in paragraph.attrib:
if paragraph.attrib['class'] == 'RecommendationTitle' or paragraph.attrib['class'] == 'RecommendationTestTitle':
if 'id' in row.attrib:
outputstring = outputstring +str(current_time)+","+ source_webpage+"#"+str(row.attrib['id'])
readingModSpecElement = True
if "Requirements class " in str(paragraph.text):
modSpecElementType = "requirements_class"
elif "Conformance class " in str(paragraph.text):
modSpecElementType = "conformance_class"
elif "Requirement " in str(paragraph.text):
modSpecElementType = "requirement"
elif "Abstract test " in str(paragraph.text):
modSpecElementType = "abstract_test"
else:
modSpecElementType = str(paragraph.text)
print(paragraph.text)
if readingModSpecElement == True:
trElements = row.findall(".//tr")
for trElement in trElements:
thElements = trElement.findall(".//th")
for thElement in thElements:
if str(thElement.text) == 'Identifier':
tdElements = trElement.findall(".//td")
for tdElement in tdElements:
ttElements = tdElement.findall(".//tt")
for ttElement in ttElements:
if "www.opengis.net" in str(ttElement.text):
outputstring = outputstring +","+str(ttElement.text)+""
else:
outputstring = outputstring +","+baseURI+ str(ttElement.text)+""
if(len(outputstring)>0):
fout.write(outputstring+","+str(modSpecElementType)+","+check_num_segments(outputstring)+","+check_segments(str(modSpecElementType),outputstring)+"\n")
print(outputstring+"\n")
fout.close()
if __name__ == "__main__":
main(sys.argv[1:])