import boto3
import botocore
import json
import pandas as pd
import utils.load_data_util
# Pandas Display Settings to allow the dataframe to display in one view
pd.set_option('display.max_columns', 500)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.max_rows', 50000)
s3 = boto3.resource('s3')
# Helper function to trim the json files into a proper json format
def process_string(data):
return "[" + data[1:-1] + "]"
#Helper function to count the occurance of a given key
def count_key(data, key, key_value_count):
for site in data :
key_value = site[key]
key_value_count[key_value] = key_value_count.get(key_value, 0) + 1
result = utils.load_data_util.load_random_data(50)
unique_args = result.arguments.unique()
count = 0
with open("uniqueArgs.txt", "wb") as f:
for arg in unique_args:
count += 1
f.write((str(arg)+"\n").encode("utf-8"))
grouped_by_symbol = result.groupby(['symbol']).count()
grouped_by_symbol
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
arguments | call_stack | crawl_id | file_number | func_name | in_iframe | location | operation | script_col | script_line | script_loc_eval | script_url | time_stamp | value | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
symbol | ||||||||||||||
CanvasRenderingContext2D.fillRect | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
CanvasRenderingContext2D.fillStyle | 0 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |
CanvasRenderingContext2D.textBaseline | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
HTMLCanvasElement.getContext | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |
HTMLCanvasElement.height | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
HTMLCanvasElement.style | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
HTMLCanvasElement.width | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
RTCPeerConnection.iceGatheringState | 0 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |
RTCPeerConnection.idpLoginUrl | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
RTCPeerConnection.localDescription | 0 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |
RTCPeerConnection.onicecandidate | 0 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |
RTCPeerConnection.onremovestream | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
RTCPeerConnection.peerIdentity | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
RTCPeerConnection.remoteDescription | 0 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |
RTCPeerConnection.signalingState | 0 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |
window.Storage.getItem | 182 | 182 | 182 | 182 | 182 | 182 | 182 | 182 | 182 | 182 | 182 | 182 | 182 | 182 |
window.Storage.key | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |
window.Storage.length | 0 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 5 |
window.Storage.removeItem | 35 | 35 | 35 | 35 | 35 | 35 | 35 | 35 | 35 | 35 | 35 | 35 | 35 | 35 |
window.Storage.setItem | 49 | 49 | 49 | 49 | 49 | 49 | 49 | 49 | 49 | 49 | 49 | 49 | 49 | 49 |
window.document.cookie | 0 | 479 | 479 | 479 | 479 | 479 | 479 | 479 | 479 | 479 | 479 | 479 | 479 | 479 |
window.localStorage | 0 | 94 | 94 | 94 | 94 | 94 | 94 | 94 | 94 | 94 | 94 | 94 | 94 | 94 |
window.name | 0 | 31 | 31 | 31 | 31 | 31 | 31 | 31 | 31 | 31 | 31 | 31 | 31 | 31 |
window.navigator.appCodeName | 0 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |
window.navigator.appName | 0 | 20 | 20 | 20 | 20 | 20 | 20 | 20 | 20 | 20 | 20 | 20 | 20 | 20 |
window.navigator.appVersion | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
window.navigator.cookieEnabled | 0 | 14 | 14 | 14 | 14 | 14 | 14 | 14 | 14 | 14 | 14 | 14 | 14 | 14 |
window.navigator.language | 0 | 21 | 21 | 21 | 21 | 21 | 21 | 21 | 21 | 21 | 21 | 21 | 21 | 21 |
window.navigator.mimeTypes[application/futuresplash].type | 0 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 | 4 |
window.navigator.mimeTypes[application/x-shockwave-flash].type | 0 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |
window.navigator.onLine | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
window.navigator.platform | 0 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 | 23 |
window.navigator.plugins[Shockwave Flash].description | 0 | 39 | 39 | 39 | 39 | 39 | 39 | 39 | 39 | 39 | 39 | 39 | 39 | 39 |
window.navigator.plugins[Shockwave Flash].filename | 0 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 |
window.navigator.plugins[Shockwave Flash].length | 0 | 9 | 9 | 9 | 9 | 9 | 9 | 9 | 9 | 9 | 9 | 9 | 9 | 9 |
window.navigator.plugins[Shockwave Flash].name | 0 | 10 | 10 | 10 | 10 | 10 | 10 | 10 | 10 | 10 | 10 | 10 | 10 | 10 |
window.navigator.plugins[Shockwave Flash].version | 0 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 |
window.navigator.product | 0 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |
window.navigator.productSub | 0 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 | 2 |
window.navigator.userAgent | 0 | 258 | 258 | 258 | 258 | 258 | 258 | 258 | 258 | 258 | 258 | 258 | 258 | 258 |
window.navigator.vendor | 0 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 | 7 |
window.navigator.vendorSub | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 |
window.screen.colorDepth | 0 | 22 | 22 | 22 | 22 | 22 | 22 | 22 | 22 | 22 | 22 | 22 | 22 | 22 |
window.screen.pixelDepth | 0 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 5 | 5 |
window.sessionStorage | 0 | 65 | 65 | 65 | 65 | 65 | 65 | 65 | 65 | 65 | 65 | 65 | 65 | 65 |
result.corr()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
crawl_id | file_number | in_iframe | |
---|---|---|---|
crawl_id | NaN | NaN | NaN |
file_number | NaN | 1.000000 | 0.137485 |
in_iframe | NaN | 0.137485 | 1.000000 |