Skip to content

Commit

Permalink
add key based search and list extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
Bharat23 committed May 21, 2020
1 parent bf26fc4 commit 397eb1f
Show file tree
Hide file tree
Showing 11 changed files with 136 additions and 8 deletions.
3 changes: 3 additions & 0 deletions WPTParser/Constants/RegexConstants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
class RegexConstants:
INDEXED_ARRAY = r'\[(\d+)\]'
DICT_ARRAY_SEARCH = r'\[\{(.+\=.+)\}\]'
1 change: 1 addition & 0 deletions WPTParser/Constants/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from WPTParser.Constants.RegexConstants import RegexConstants as RegexConstants
8 changes: 8 additions & 0 deletions WPTParser/Fetch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@ def __init__(self, headers: dict = {}):
self.headers.update(headers)

def json(self, test_id: str='200518_Y2_c736f1cb25d54ac8cd93ebdfdcf6375b') -> dict:
"""Fetches the JSON format result for a WPT test
Keyword Arguments:
test_id {str} -- unique WPT test ID (default: {'200518_Y2_c736f1cb25d54ac8cd93ebdfdcf6375b'})
Returns:
dict -- json response of the WPT test
"""
# api rejects the request with unauthorized if user-agent header not set
url = '{0}/jsonResult.php?test={1}'.format(WPTParser().WPT_URI(), test_id)
json_data = requests.get(url, headers = self.headers)
Expand Down
6 changes: 6 additions & 0 deletions WPTParser/JSONParser/DataExtracter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
class DataExtracter():

def extract(self):
"""
override for own implementation
"""
9 changes: 9 additions & 0 deletions WPTParser/JSONParser/KeyDataExtracter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from WPTParser.JSONParser.DataExtracter import DataExtracter

class KeyDataExtracter(DataExtracter):

def __init__(self):
super().__init__()

def extract(self, obj: dict, key: str):
return obj.get(key, None)
12 changes: 12 additions & 0 deletions WPTParser/JSONParser/ListDataExtracter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from WPTParser.JSONParser.DataExtracter import DataExtracter

class ListDataExtracter(DataExtracter):

def __init__(self):
super().__init__()

def extract(self, list: list, index: int):
try:
return list[index]
except KeyError as ex:
return None
14 changes: 14 additions & 0 deletions WPTParser/JSONParser/ObjectListDataExtracter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from WPTParser.JSONParser.DataExtracter import DataExtracter

class ObjectListDataExtracter(DataExtracter):

def __init__(self):
super().__init__()

def extract(self, obj_list: list, key: str):
key = key.replace(' ', '')
dict_key, dict_value = key.split('=')
for obj in obj_list:
if obj.get(dict_key, None) is not None and obj.get(dict_key) == dict_value:
return obj
return None
72 changes: 65 additions & 7 deletions WPTParser/JSONParser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,28 @@
# TODO: ADD support for array type fields while parsing eg: data.Median.[0].value
import re

from WPTParser.Constants import RegexConstants
from WPTParser.JSONParser.KeyDataExtracter import KeyDataExtracter
from WPTParser.JSONParser.ListDataExtracter import ListDataExtracter
from WPTParser.JSONParser.ObjectListDataExtracter import ObjectListDataExtracter

class JSONParser():

def __init__(self, json: dict = {}):
self.json = json
self.picked_json = {}

def pick(self, key: str = None, keys: list = [], key_delimiter: str = '.'):
"""Extract key from provided JSON object and returns new object of specified keys
Keyword Arguments:
key {str} -- A single key that needs to be extracted (default: {None})
keys {list} -- A list of keys that needs to be extracted (default: {[]})
key_delimiter {str} -- JSON object level separator used in the keys provided (default: {'.'})
Returns:
JSONParser -- object of JSONParser to allow functional chaining
"""
if key is not None:
final_obj = self._recursive_find(self.json, key.split(key_delimiter), 0)
self.picked_json[key] = final_obj
Expand All @@ -15,17 +33,57 @@ def pick(self, key: str = None, keys: list = [], key_delimiter: str = '.'):
return self

def _recursive_find(self, obj: dict = {}, level_list: list = [], index: int = 0):
if index == (len(level_list) - 1):
return obj.get(level_list[index], None)
else:
current_level_obj = obj.get(level_list[index], None)
if current_level_obj is not None:
return self._recursive_find(current_level_obj, level_list, index + 1)
"""recursively find and extract the requested keys
Keyword Arguments:
obj {dict} -- object on which extraction needs to be done (default: {{}})
level_list {list} -- list of key depth (default: {[]})
index {int} -- index of the level list (default: {0})
Returns:
[type] -- The request value/dict/list
"""
try:
key, extracter = self._process_key(level_list[index])
current_level_obj = extracter.extract(obj, key)
if index == (len(level_list) - 1):
return current_level_obj
else:
return None
if current_level_obj is not None:
return self._recursive_find(current_level_obj, level_list, index + 1)
else:
return None
except Exception as ex:
print('error', ex)
return None

def _process_key(self, key: str):
"""processes the key to make it usable for extraction and decides extraction type
Arguments:
key {str} -- the key which needs to extracted
Returns:
str, DataExtracter -- the key which needs to be extracted and Extracter object based on type of Extraction needed
"""
extracter = KeyDataExtracter()
if re.match(RegexConstants.INDEXED_ARRAY, key):
key = int(re.findall(RegexConstants.INDEXED_ARRAY, key)[0])
extracter = ListDataExtracter()
elif re.match(RegexConstants.DICT_ARRAY_SEARCH, key):
key = re.findall(RegexConstants.DICT_ARRAY_SEARCH, key)[0]
extracter = ObjectListDataExtracter()
return key, extracter


def remove(self, key: str = None, keys: list = []):
# TODO: Add remove key functionality
pass

def exec(self):
"""fetches the final object with extracted keys and their values
Returns:
dict -- the dictionary with final extracted values
"""
return self.picked_json
4 changes: 3 additions & 1 deletion data.json

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions tests/test_fetch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import unittest
import os

from WPTParser import WPTParser
from WPTParser.Fetch import Fetch

class FetchJSONTest(unittest.TestCase):

def test_fetch_json(self):

fetch = Fetch()
self.assertNotEqual(fetch.json(), None)

if __name__ == "__main__":
unittest.main(verbosity=2)
Empty file added tests/test_jsonparser.py
Empty file.

0 comments on commit 397eb1f

Please sign in to comment.