-
Notifications
You must be signed in to change notification settings - Fork 0
/
Background.js
234 lines (201 loc) · 7.35 KB
/
Background.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
"use strict";
/*
* --------------------------------------------------
* Keep list of tabs outside of request callback
* Idea and code from
* https://stackoverflow.com/questions/15498358/synchronous-call-in-google-chrome-extension
* --------------------------------------------------
*/
const tabs = {};
// Get all existing tabs
chrome.tabs.query({}, function(results) {
results.forEach(function(tab) {
tabs[tab.id] = tab;
});
});
// Create tab event listeners
function onUpdatedListener(tabId, changeInfo, tab) {
tabs[tab.id] = tab;
}
function onRemovedListener(tabId) {
delete tabs[tabId];
}
// Subscribe to tab events
chrome.tabs.onUpdated.addListener(onUpdatedListener);
chrome.tabs.onRemoved.addListener(onRemovedListener);
chrome.webRequest.onBeforeSendHeaders.addListener(onBeforeSendHeaders,
{ urls: ["<all_urls>"] },
["blocking", "requestHeaders"]);
console.log('Background page of Rewriting History Defense is loaded.');
class Counts {
constructor() {
this.waybackMachineMetaRequest = 0;
this.archiveRequest = 0;
this.archiveEscape = 0;
this.notOnArchive = 0;
this.unclassifiedRequest = 0;
}
}
class Visit {
constructor(tabId, url) {
this.tabId = tabId;
this.url = url;
this.counts = new Counts();
this.requestURLs = [];
}
}
const globalCounts = new Counts();
// This is keyed on visitID, which can be obtained using getVisitIDFromTab(tab),
// which uses tab.url and tab.tabId to compute a unique key.
const visits = {};
let _escapeBlockingEnabled = true;
let _anachronismBlockingEnabled = false;
let _anachronismRange = null;
initializeMessageListeners();
// Input: requestDetails, an object containing information about the request.
// Output: An object blocking/not blocking the request.
function onBeforeSendHeaders(requestDetails) {
if (typeof requestDetails.tabId !== 'number' || requestDetails.tabId < 0) {
console.log('tabid is', requestDetails.tabId);
console.log(requestDetails.url);
} else {
chrome.tabs.get(requestDetails.tabId, function(tab) {
let requestType = classifyRequest(requestDetails, tab);
let visit = getVisitForTab(tab);
visit.counts[requestType] += 1
visit.requestURLs.push(requestDetails.url);
// console.log('visitID: ' + visitIDFromTab(tab))
// console.log(visit);
globalCounts[requestType] += 1;
});
}
let willBlock = shouldBlock(requestDetails);
if (willBlock) {
console.log(`blocking ${requestDetails.url}`);
}
return {
cancel: willBlock
};
}
function shouldBlock(requestDetails) {
let tab = tabs[requestDetails.tabId];
if (!tab) {
return false;
}
let isEscape = false;
try {
let requestType = classifyRequest(requestDetails, tab);
if (requestType === 'archiveEscape') {
isEscape = true;
}
} catch (err) {
console.error(err);
// Be careful -- if error, don't block.
return false;
}
return isEscape && _escapeBlockingEnabled;
}
// This is a little bit hacky (endsWith instead of some reliable tld/domain extracting)
// but it should mostly work.
//
// Returns true for WBM meta requests (e.g., WBM analytics, time-browsing bar, etc.)
// and for archive requests (requests for actual archived URLs).
//
// Returns false for all requests to domains other than archive.org.
function isToArchiveDotOrg(requestDetails) {
return getHostnameFromUrl(requestDetails.url).endsWith('archive.org');
}
// Checks whether the URL of the request is (roughly) of the form:
// web.archive.org/web/<timestamp>/<url>
function isArchiveRequest(requestDetails) {
const archiveRequestURLRegex = new RegExp('^(https?://)?web.archive.org/web/\\d+', 'i');
return archiveRequestURLRegex.test(requestDetails.url);
}
// Get the entire hostname portion of the URL, which is everything that's not
// the scheme, port or path. Inclues subdomains as part of what's returned.
function getHostnameFromUrl(fullUrl) {
// If there's no scheme, we need to prepend a scheme, since relative URLs will
// be interpreted as relative to the current page. We'll prepend http://, since
// it doesn't really matter what it is.
if (!hasScheme(fullUrl)) {
fullUrl = 'http://' + fullUrl;
}
return (new URL(fullUrl)).hostname;
}
// Check whether url starts with a scheme (\w+://)
function hasScheme(url) {
const hasSchemeRE = new RegExp('^\\w+://');
return hasSchemeRE.test(url);
}
// Returns true if the request is to a site outside of the Wayback Machine.
function isArchiveEscape(requestDetails) {
return !isToArchiveDotOrg(requestDetails);
// Example:
// http://www.google.com (TRUE)
// but not
// https://web.archive.org/web/20150324001529/http://www.google.com/ (FALSE)
}
// Get the page stats object for
function getVisitForTab(tab) {
let visitID = visitIDFromTab(tab);
if (visits[visitID] === undefined) {
visits[visitID] = new Visit(tab.id, tab.url);
}
return visits[visitID];
}
// Could be improved to compute a more complicated function, though human
// readable is highly desirable. tabId comes before tab.url because URLs can't
// start with numbers.
//
// This version uses `>` to delimit the tabId and tab.url because `>` can't appear
// in URLs -- I think this guarantees that if tabId is unique, all tabId/URL pairs
// produce unique visitIDs.
function visitIDFromTab(tab) {
return `${tab.id}>${tab.url}`;
}
// Classifies a request into the categories measured by the extension:
// * "notOnArchive" -- requests where the URL bar isn't on archive.org
// * "waybackMachineMetaRequest" -- *.archive.org requests where * != web
// * "archiveRequest" -- requests for archived captures of resources/pages
// * "archiveEscape" -- escapes
// * "unclassifiedRequest" -- something went wrong if this is used
//
// We have to pass in the tab (even though the requestDetails includes the
// tabId) because getting the tab is asynchronous and I wanted this function
// to be synchronous. It could be rewritten to use promises...
//
function classifyRequest(requestDetails, tab) {
if (!getHostnameFromUrl(tab.url).endsWith('archive.org')) {
return 'notOnArchive';
}
let url = requestDetails.url;
if (!isArchiveRequest(requestDetails) && isToArchiveDotOrg(requestDetails)) {
return 'waybackMachineMetaRequest';
} else if (isArchiveRequest(requestDetails)) {
return 'archiveRequest';
} else if (isArchiveEscape(requestDetails)) {
return 'archiveEscape';
} else {
return 'unclassifiedRequest';
}
}
// Allows us to communicate with the popup.
function initializeMessageListeners() {
chrome.extension.onMessage.addListener(
function(request, sender, sendResponse) {
if (request.type === 'getGlobalCounts') {
sendResponse(globalCounts);
} else if (request.type === 'getVisitForTab') {
sendResponse(getVisitForTab(request.tab));
} else if (request.type === 'getEscapeBlockingEnabled') {
sendResponse(_escapeBlockingEnabled);
} else if (request.type === 'setEscapeBlockingEnabled') {
_escapeBlockingEnabled = request.escapeBlockingEnabled;
console.log(`escape blocking enabled? ${_escapeBlockingEnabled}`);
} else if (request.type === 'setAnachronismBlockingEnabled') {
_anachronismBlockingEnabled = request.anachronismBlockingEnabled;
_anachronismRange = request.anachronismRange;
}
}
);
}