Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: custom bot detection (legacy) #1381

Merged
merged 4 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions packages/prerender-proxy/lib/handlers/prerender-check.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@ import "source-map-support/register";
import { CloudFrontRequest, CloudFrontRequestEvent } from "aws-lambda";

const IS_BOT =
/googlebot|Google-InspectionTool|Schema-Markup-Validator|SchemaBot|chrome-lighthouse|lighthouse|adsbot-google|Feedfetcher-Google|bingbot|yandex|baiduspider|Facebot|facebookexternalhit|twitterbot|rogerbot|linkedinbot|embedly|quora link preview|showyoubot|outbrain|pinterest|slackbot|vkShare|W3C_Validator|AhrefsBot|SiteAuditBot|SemrushBot|Screaming Frog SEO Spider/i;
/googlebot|bingbot|yandex|baiduspider|facebookexternalhit|facebookbot|twitterbot|linkedinbot|embedly|showyoubot|outbrain|pinterestbot|slackbot|vkShare|W3C_Validator|whatsapp|ImgProxy|flipboard|tumblr|bitlybot|skype|nuzzel|discordbot|google|qwantify|pinterest|lighthouse|telegrambo|Google-InspectionTool|Schema-Markup-Validator|SchemaBot|chrome-lighthouse|adsbot-google|Feedfetcher-Google|Facebot|rogerbot|quora link preview|SiteAuditBot|Storebot|Mediapartners-Google|AdIdxBot|BingPreview|Yahoo! Slurp|duckduckbot|applebot|gptbot|/i;

const IS_FILE =
/\.(js|css|xml|less|png|jpg|jpeg|gif|pdf|doc|txt|ico|rss|zip|mp3|rar|exe|wmv|doc|avi|ppt|mpg|mpeg|tif|wav|mov|psd|ai|xls|mp4|m4a|swf|dat|dmg|iso|flv|m4v|torrent|ttf|woff|svg|eot)$/i;
/\.(js|css|xml|less|png|jpg|jpeg|gif|pdf|doc|txt|ico|rss|zip|mp3|rar|exe|wmv|avi|ppt|mpg|mpeg|tif|wav|mov|psd|ai|xls|mp4|m4a|swf|dat|dmg|iso|flv|m4v|woff|ttf|svg|webmanifest|eot|torrent)$/;

// Allow passing a custom bot detection regex string
const IS_BOT_CUSTOM = new RegExp(process.env.CUSTOM_BOT_CHECK || "[]");

export const handler = async (
event: CloudFrontRequestEvent
Expand All @@ -14,7 +18,8 @@ export const handler = async (
// If the request is from a bot, is not a file and is not from prerender
// then set the x-request-prerender header so the origin-request lambda function
// alters the origin to prerender.io
if (IS_BOT.test(request.headers["user-agent"][0].value)) {
const userAgent = request.headers["user-agent"][0].value;
if (IS_BOT.test(userAgent) || IS_BOT_CUSTOM.test(userAgent)) {
if (!IS_FILE.test(request.uri) && !request.headers["x-prerender"]) {
request.headers["x-request-prerender"] = [
{
Expand Down
17 changes: 16 additions & 1 deletion packages/prerender-proxy/lib/prerender-check-construct.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,20 @@ import { Runtime } from "@aws-cdk/aws-lambda";
import { experimental } from "@aws-cdk/aws-cloudfront";
import { EdgeFunction } from "@aws-cdk/aws-cloudfront/lib/experimental";

export interface PrerenderCheckOptions {
/**
* A custom regex string to detect bots. Will be used in addition
* to the existing bot check regex to determine if a user-agent is a bot.
*
* @type string
*/
customBotCheckRegex: string;
}

export class PrerenderCheckFunction extends Construct {
readonly edgeFunction: EdgeFunction;

constructor(scope: Construct, id: string) {
constructor(scope: Construct, id: string, options?: PrerenderCheckOptions) {
super(scope, id);
this.edgeFunction = new experimental.EdgeFunction(
this,
Expand All @@ -19,6 +29,11 @@ export class PrerenderCheckFunction extends Construct {
sourceMap: true,
projectRoot: `${__dirname}/handlers/`,
depsLockFilePath: `${__dirname}/handlers/package-lock.json`,
define: {
"process.env.CUSTOM_BOT_CHECK": JSON.stringify(
options?.customBotCheckRegex ?? "[]"
),
},
} as any),
runtime: Runtime.NODEJS_16_X,
handler: "index.handler",
Expand Down
9 changes: 7 additions & 2 deletions packages/prerender-proxy/lib/prerender-lambda-construct.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import { Construct, CfnOutput } from "@aws-cdk/core";
import { PrerenderFunction } from "./prerender-construct";
import { PrerenderCheckFunction } from "./prerender-check-construct";
import {
PrerenderCheckFunction,
PrerenderCheckOptions,
} from "./prerender-check-construct";
import { ErrorResponseFunction } from "./error-response-construct";
import {
CloudFrontCacheControl,
Expand All @@ -11,6 +14,7 @@ export interface PrerenderLambdaProps {
prerenderToken: string;
exclusionExpression?: string;
cacheControlProps?: CloudFrontCacheControlOptions;
prerenderCheckOptions?: PrerenderCheckOptions;
}

export class PrerenderLambda extends Construct {
Expand All @@ -24,7 +28,8 @@ export class PrerenderLambda extends Construct {

this.prerenderCheckFunction = new PrerenderCheckFunction(
this,
"PrerenderViewerRequest"
"PrerenderViewerRequest",
props.prerenderCheckOptions
);

this.prerenderFunction = new PrerenderFunction(
Expand Down
Loading