Skip to content

Commit

Permalink
added saving all evidence for comp #9
Browse files Browse the repository at this point in the history
  • Loading branch information
danielgoldelman committed Dec 20, 2023
1 parent ad7f641 commit 9ce1155
Show file tree
Hide file tree
Showing 7 changed files with 155 additions and 6 deletions.
41 changes: 39 additions & 2 deletions rest-api/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ if (args.length > 2 && args[2] == "debug") {

async function rest(table) {
// create application/json parser
var jsonParser = bodyParser.json();
var jsonParser = bodyParser.json({limit:'10mb'});

// set table name
const table_name = "entries";

app.get("/", (req, res) => res.send("Try: /" + table));
app.get("/", (req, res) => res.send("Try: /" + table_name));

app.get("/status", (req, res) => res.send("Success."));

Expand Down Expand Up @@ -94,6 +94,43 @@ async function rest(table) {
res.json({ res: "completed" });
}
});

// CREATE TABLE allEv (id INTEGER PRIMARY KEY AUTO_INCREMENT, rootUrl varchar(255), request text(100000));

app.post("/allEv", jsonParser, (req, res) => {
// console.log(req.body);
const reqBody = req.body;
if (reqBody == {}) {
res.json({ res: "empty body" });
} else {
const request = reqBody.request;
connection.query(
"INSERT INTO ??.?? (rootUrl, request) VALUES (?,?)",
[
process.env.DB_DATABASE,
"allEv",
reqBody.host,
request
],
(error, results, fields) => {
if (error) throw error;
// console.log(results)
}
);
res.json({ res: "completed" });
}
});

app.get("/allEv", (req, res) => {
connection.query(
"SELECT * FROM ??.??",
[process.env.DB_DATABASE, "allEv"],
(error, results, fields) => {
if (error) throw error;
res.json(results);
}
);
});
}

rest("analysis");
Expand Down
11 changes: 11 additions & 0 deletions selenium-crawler/1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Site URL
https://tencent.com
https://verizon.com
https://indiamart.com
https://garmin.com
https://watchguard.com
https://trustarc.com
https://wisc.edu
https://dw.com
https://quantcount.com
https://pewresearch.org
100 changes: 100 additions & 0 deletions selenium-crawler/100_site_test_list.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
Site URL
https://tencent.com
https://verizon.com
https://indiamart.com
https://garmin.com
https://watchguard.com
https://trustarc.com
https://wisc.edu
https://dw.com
https://quantcount.com
https://pewresearch.org
https://craigslist.org
https://americanexpress.com
https://coursera.org
https://coolmathgames.com
https://hltv.org
https://sun.com
https://fitbit.com
https://nintendo.com
https://jusbrasil.com.br
https://scientificamerican.com
https://cardgames.io
https://science.org
https://typing.com
https://mathplayground.com
https://python.org
https://playrix.com
https://here.com
https://acronis.com
https://zearn.org
https://sportybet.com
https://nba.com
https://digg.com
https://cardmarket.com
https://pluto.tv
https://deezer.com
https://dhl.com
https://cmu.edu
https://tesla.com
https://battle.net
https://wellsfargo.com
https://aljazeera.com
https://wiktionary.org
https://bbb.org
https://jhu.edu
https://fedoraproject.org
https://dictionary.com
https://wunderground.com
https://entrepreneur.com
https://umbrella.com
https://census.gov
https://robinhood.com
https://usgs.gov
https://jetbrains.com
https://peacocktv.com
https://adblockplus.org
https://amd.com
https://squareup.com
https://usc.edu
https://citrix.com
https://nyu.edu
https://aboutcookies.org
https://hhs.gov
https://drift.com
https://pcmag.com
https://livescience.com
https://samsclub.com
https://usps.com
https://purdue.edu
https://rollingstone.com
https://disney.com
https://xbox.com
https://khanacademy.org
https://pbskids.org
https://ucdavis.edu
https://senate.gov
https://kernel.org
https://sportradar.com
https://logitech.com
https://blizzard.com
https://nordvpn.com
https://register.com
https://unicef.org
https://iheart.com
https://ny.gov
https://dbs.com
https://sec.gov
https://glassdoor.com
https://today.com
https://coinbase.com
https://unc.edu
https://lego.com
https://howstuffworks.com
https://ssa.gov
https://waze.com
https://aarp.org
https://arizona.edu
https://insider.com
https://duke.edu
https://weather.gov
2 changes: 1 addition & 1 deletion selenium-crawler/error-logging.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"TimeoutError":["https://www.newarena.com","https://www.mercurynews.com","https://www.androidcentral.com","https://www.ebaumsworld.com","https://www.epicurious.com"],"WebDriverError: Reached Error Page":["https://www.science101.com","https://www.finance101.com"]}
{"TimeoutError":["https://quantcount.com"]}
Binary file modified selenium-crawler/ext.xpi
Binary file not shown.
Binary file added selenium-crawler/extEntries.xpi
Binary file not shown.
7 changes: 4 additions & 3 deletions selenium-crawler/local-crawler.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@ const firefox = require("selenium-webdriver/firefox");

const fs = require("fs");
const { parse } = require("csv-parse");
const axios = require("axios");

var total_begin = Date.now(); //start logging time
var err_obj = new Object();
// Loads sites to crawl
const sites = [];
fs.createReadStream("val_set_sites1.csv")
// fs.createReadStream("val_set_sites1.csv")
// fs.createReadStream("100_site_test_list.csv")
fs.createReadStream("1.csv")
.pipe(parse({ delimiter: ",", from_line: 2 }))
.on("data", function (row) {
sites.push(row[0]);
Expand Down Expand Up @@ -85,7 +86,7 @@ async function visit_site(sites, site_id) {
try {
await driver.get(sites[site_id]);
// console.log(Date.now()); to compare to site loading time in debug table
await new Promise((resolve) => setTimeout(resolve, 22000));
await new Promise((resolve) => setTimeout(resolve, 30000));
// check if access is denied
// if so, throw an error so it gets tagged as a human check site
var title = await driver.getTitle();
Expand Down

0 comments on commit 9ce1155

Please sign in to comment.