forked from stopipv/isdi
-
Notifications
You must be signed in to change notification settings - Fork 0
/
blacklist.py
158 lines (131 loc) · 5.87 KB
/
blacklist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
"""
This file have different ways to detect spyware apps add flags to them.
The csv file has two column appId, store, and flag
store: {'playstore', 'appstore', 'offstore'}
flag: {'dual-use', 'spyware', 'safe'}
Flags added to them are from the following four classes
1. "onstore-dual-use": onstore dual-use apps
2. "onstore-spyware": onstore apps which are clearly spyware based on our analysis
3. "offstore-spyware": offstore spyware apps
4. "regex-spy": Regex based spyware detection
5. "odds-ratio": Spyware based on high co-occurrence with other offstore-spyware
"""
import re
import config
import pandas as pd
try:
APP_FLAGS = pd.read_csv(
config.APP_FLAGS_FILE,
index_col='appId', encoding='latin1',
error_bad_lines=False).fillna({
'title': '',
'store': '',
'flag': '',
'human': 0,
'ml_score': 0.0,
'source': ''
})
except FileNotFoundError as e:
print("You do not currently have the blacklist ISDi uses.\n Please contact the repository authors (https://github.com/stopipv/isdi) with a legitimate request for it.")
exit(0)
#{APP_FLAGS = APP_FLAGS[APP_FLAGS.flag.isin({'dual-use', 'high co-occurrence odds', 'spyware'})]
APP_FLAGS = APP_FLAGS[APP_FLAGS.flag.isin({
'dual-use', 'spyware', 'co-occurrence'
})].replace('<Unknown>', '')
SPY_REGEX = {
"pos": re.compile(r'(?i)(spy|track|keylog|cheating)'),
"neg": re.compile(r'(?i)(anti.*(spy|track|keylog)|(spy|track|keylog).*remov[ea])'),
}
def dedup_app_flags(df):
return df.fillna('').groupby('appId').agg({
'title': lambda x: ' '.join(set(x)),
'flag': list,
}).reset_index()
def _regex_blacklist(app):
# print("_regex_balcklist: {}".format(app))
# return ['regex-spy'] if (SPY_REGEX['pos'].search(app) and not SPY_REGEX['neg'].search(app)) \
# else []
return (SPY_REGEX['pos'].search(app) is not None and
SPY_REGEX['neg'].search(app) is None)
def score(flags):
"""The weights are completely arbitrary"""
weight = {
'onstore-dual-use': 0.8,
'dual-use': 0.8,
'onstore-spyware': 1.0,
'offstore-spyware': 1.0,
'offstore-app': 0.8,
'regex-spy': 0.3,
'odds-ratio': 0.2,
'system-app': -0.1
}
return sum(map(lambda x: weight.get(x, 0.0), flags))
def assign_class(flags):
"""Assigns bootstrap text-classes to each flag. """
# TODO: This is a view function, should not be here
w = score(flags)
norm_w = 0 if w <= 0 else 1 if w <= 0.3 else 2 if w <= 0.8 else 3
_classes = ['', 'alert-info', 'alert-warning', 'alert-primary']
return _classes[norm_w]
def flag_str(flags):
"""Returns a comma seperated strings"""
def _add_class(flag):
return ('primary' if 'spyware' in flag else
'warning' if 'dual-use' in flag else
'info' if 'spy' in flag else ''
)
def _info(flag):
return {
"regex-spy": "This app's name or its app-id contain words like 'spy', 'track', etc.",
"offstore-spyware": ("This app is a spyware app, distributed outside official applicate stores, e.g., "\
"Play Store or iTunes App Store"),
"co-occurrence": "This app appears very frequently with other offstore-spyware apps.",
"onstore-dual-use": "This app has a legitimate usecase, but can be harmful in certain situations.",
"offstore-app": "This app is installed outside Play Store. It might be a preinstalled app too.",
"dual-use": "This app has a legitimate usecase, but can be harmful in certain situations.",
"system-app": "This app came preinstalled with the device.",
}.get(flag.lower(), flag)
# If spyware <span class='text-danger'>{}</span>
flags = [y.strip() for y in flags]
return ', '.join(
"<span class=\"text-{0}\"><abbr title=\"{1}\">{2}</abbr></span>"
.format(_add_class(flag), _info(flag), flag)
for flag in flags if len(flag) > 0
)
def store_str(st):
if st in ('playstore', 'appstore'):
return 'onstore'
else:
return 'offstore'
def app_title_and_flag(apps, offstore_apps=[], system_apps=[]):
"""Gets app flags and title from app-flags.csv file. """
# print(apps)
print("Size of app-flags: {}".format(len(APP_FLAGS)))
_td = dedup_app_flags(
apps.merge(APP_FLAGS, on='appId', how="left")
).set_index('appId')
_td['flags'] = _td['flag'].fillna('')
_td.loc[offstore_apps, 'flags'].apply(lambda x: x.append('offstore-app'))
_td.loc[system_apps, 'flags'].apply(lambda x: x.append('system-app'))
# print(apps, flagged_apps)
spy_regex_app = (_td.index.map(_regex_blacklist).values |
_td.title.fillna('').apply(_regex_blacklist).values)
_td.loc[spy_regex_app, 'flags'].apply(lambda x: x.extend(['regex-spy']))
# Seperate kevin's list from app-flags, here is a dirty hack
# odds_ratio_apps = set(APP_FLAGS.query('source == "odds-ratio"').index)\
# .intersection(set(apps['appId']))
# _td.loc[odds_ratio_apps, 'flags'].apply(lambda x: set(x) | {'co-occurrence'})
ret = _td[['title', 'flags']].reset_index()
return ret
# def flag_apps(apps, device=''):
# """Flag a list of apps based on the APP_FLAGS obtained from the csv file, or spy regex flags"""
# _td = APP_FLAGS.loc[set(apps) & set(APP_FLAGS.index)]
# flagged_apps = (_td['store'].apply(store_str) + '-' + _td['flag']).fillna('').apply(lambda x: [x] if x else [])
# # print(apps, flagged_apps)
# a = flagged_apps + flagged_apps.index.map(_regex_blacklist)
# return a
# def flag_app(app, device=''):
# return flag_apps([app], device=device).iloc[0]
if __name__ == "__main__":
apps = pd.DataFrame({'appId': ['com.TrackView', 'com.apple.mobileme.fmf1']})
print(app_title_and_flag(apps, system_apps=['com.apple.mobileme.fmf1']))