Skip to content

Commit

Permalink
chore(api): add norobo crawl handling
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Aug 30, 2022
1 parent 16c663e commit e518a90
Show file tree
Hide file tree
Showing 8 changed files with 33 additions and 18 deletions.
18 changes: 9 additions & 9 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@a11ywatch/core",
"version": "0.5.46",
"version": "0.5.47",
"description": "a11ywatch central api",
"main": "./server.js",
"scripts": {
Expand All @@ -12,7 +12,7 @@
"pub": "tsc && cp package.json dist && cp package-lock.json dist && cp README.md dist && cp LICENSE dist && cd dist && npm publish"
},
"dependencies": {
"@a11ywatch/protos": "^0.3.1",
"@a11ywatch/protos": "^0.3.3",
"@a11ywatch/website-source-builder": "^0.0.32",
"@fastify/cookie": "^6.0.0",
"@fastify/cors": "^7.0.0",
Expand Down
4 changes: 2 additions & 2 deletions src/core/actions/accessibility/watcher_crawl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ interface CrawlParams {
url?: string;
scan?: boolean; // determine scan or crawl method
userId?: number;
robots?: boolean; // respect robots txt file
robots?: boolean; // respect robots txt file defaults to true
subdomains?: boolean;
tld?: boolean;
}
Expand All @@ -31,7 +31,7 @@ export const watcherCrawl = async ({
const crawlParams = {
url: String(initUrl(url, true)),
id: userId,
norobots: !robots,
robots,
subdomains: subdomains,
tld: tld,
};
Expand Down
10 changes: 9 additions & 1 deletion src/core/streams/crawl-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,14 @@ import { SUPER_MODE } from "../../config/config";
import { getWebsite } from "../controllers/websites";

// get the website crawl configuration
export const getCrawlConfig = async ({ id, role, url, tld, subdomains }) => {
export const getCrawlConfig = async ({
id,
role,
url,
tld,
subdomains,
robots = true,
}) => {
let subdomainsEnabled = subdomains;
let tldEnabled = tld;

Expand All @@ -29,5 +36,6 @@ export const getCrawlConfig = async ({ id, role, url, tld, subdomains }) => {
userId: id,
subdomains: subdomainsEnabled,
tld: tldEnabled,
robots,
};
};
1 change: 1 addition & 0 deletions src/core/streams/crawl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ export const crawlStream = async (
role: userNext.role,
subdomains: body.subdomains,
tld: body.tld,
robots: body.robots,
});

res.raw.write("[");
Expand Down
3 changes: 2 additions & 1 deletion src/core/utils/crawl-stream-slim.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ export const crawlHttpStreamSlim = (
client?: string,
onlyData?: boolean // remove issues and other data from stream
): Promise<boolean> => {
const { url, userId, subdomains, tld } = props;
const { url, userId, subdomains, tld, robots } = props;

setImmediate(async () => {
await watcherCrawl({
Expand All @@ -23,6 +23,7 @@ export const crawlHttpStreamSlim = (
subdomains: !!subdomains,
tld: !!tld,
scan: true,
robots,
});
});

Expand Down
4 changes: 3 additions & 1 deletion src/core/utils/crawl-stream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export type CrawlProps = {
userId?: number;
subdomains?: boolean;
tld?: boolean;
robots?: boolean; // prevent robots txt
};

// crawl website and wait for finished emit event to continue @return Website[] use for testing.
Expand All @@ -18,7 +19,7 @@ export const crawlHttpStream = (
res: FastifyContext["reply"],
client?: string
): Promise<boolean> => {
const { url, userId, subdomains, tld } = props;
const { url, userId, subdomains, tld, robots = true } = props;

setImmediate(async () => {
await watcherCrawl({
Expand All @@ -27,6 +28,7 @@ export const crawlHttpStream = (
subdomains: !!subdomains,
tld: !!tld,
scan: true,
robots,
});
});

Expand Down
7 changes: 5 additions & 2 deletions src/proto/calls/core-crawl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@ export const coreCrawl = async (call: ServerCallStreaming) => {

// crawl website slim and wait for finished emit event to continue @return Website[].
export const crawlStreaming = (
props: CrawlProps,
props: CrawlProps & {
norobo?: boolean;
},
call: ServerCallStreaming
): Promise<boolean> => {
const { url, userId, subdomains, tld } = props;
const { url, userId, subdomains, tld, norobo } = props;

setImmediate(async () => {
await watcherCrawl({
Expand All @@ -45,6 +47,7 @@ export const crawlStreaming = (
subdomains: !!subdomains,
tld: !!tld,
scan: true,
robots: !norobo,
});
});

Expand Down

0 comments on commit e518a90

Please sign in to comment.