-
Notifications
You must be signed in to change notification settings - Fork 0
/
ParseJsonQuery.py
115 lines (85 loc) · 3.04 KB
/
ParseJsonQuery.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import json,sys,csv
''' Query Parser: The stored queery is in self.parsedquery.
The helperquery function has details on how to access the stored qery object'''
class ParseJsonQuery:
def __init__(self,jsonstring):
#Input json query from front-end as string
self.inpjson = jsonstring
#Dictionary object containing parsed query
self.parsedquery = None
#self.jsonformat = json.loads(self.inpjson)
self.ParseInpString()
def ParseInpString(self):
json_data = json.loads(self.inpjson)
#Flatten nested Json with "." as separator
self.parsedquery = self.FlattenJson(json_data,".")
#To flatten a nested json
def FlattenJson(self,b,delim):
val = {}
for i in b.keys():
if isinstance( b[i], dict ):
get = self.FlattenJson( b[i], delim )
for j in get.keys():
val[ i + delim + j ] = get[j]
else:
val[i] = b[i]
return val
def helperquery(self):
fieldlist = self.parsedquery['extract.fields']
url = self.parsedquery['from.url']
#This is the number of things to extract. For example, if we have to extract title and price of laptops, this number wil be 2
num_fields = len(fieldlist)
for each_field in fieldlist:
#Unique identifer for every
identifier = each_field['Field_id']
#Match params is a dictionary. Key is primitives and value is user-specified value, For example minLength is key with value as 80 from Query1
matchparams = each_field['match']
for key, val in matchparams.items():
print(key)
print(val)
if __name__ == "__main__":
query = """ {
"extract" : {
"fields": [
{
"Field_id": "AA",
"match" : {
"type" : "text",
"TextLength" : {"lt":100,"gt":80},
"tagName" : "H2"
}
},
{
"Field_id": "BB",
"match" : {
"type" : "text",
"TextLength" : {"lt":20, "gt":2},
"tagName" : "SPAN"
}
},
{
"Field_id": "CC",
"match" : {
"type" : "text",
"TextLength" : {"lt":250,"gt":50},
"tagName" : "H2"
}
},
{
"Field_id": "DD",
"match" : {
"type" : "text",
"TextLength" : {"lt":250,"gt":50},
"tagName" : "H3"
}
}
]
},
"from" : {
"url" : "https://www.amazon.com/s/ref=nb_sb_noss/138-7753184-2542555?url=search-alias%3Delectronics&field-keywords=computer&rh=n%3A172282%2Ck%3Acomputer"
}
}
"""
pq = ParseJsonQuery(query)
pq.ParseInpString()
#pq.helperquery()