Skip to content

Commit

Permalink
Merge pull request #1382 from aligent/feature/custom-bot-detection
Browse files Browse the repository at this point in the history
feat: support custom bot detection string
  • Loading branch information
TheOrangePuff authored May 31, 2024
2 parents 26995d4 + 7c08f79 commit 9010cae
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 6 deletions.
11 changes: 8 additions & 3 deletions packages/prerender-proxy/lib/handlers/prerender-check.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@ import "source-map-support/register";
import { CloudFrontRequest, CloudFrontRequestEvent } from "aws-lambda";

const IS_BOT =
/googlebot|Google-InspectionTool|Schema-Markup-Validator|SchemaBot|chrome-lighthouse|lighthouse|adsbot-google|Feedfetcher-Google|bingbot|yandex|baiduspider|Facebot|facebookexternalhit|twitterbot|rogerbot|linkedinbot|embedly|quora link preview|showyoubot|outbrain|pinterest|slackbot|vkShare|W3C_Validator|AhrefsBot|SiteAuditBot|SemrushBot|Screaming Frog SEO Spider/i;
/googlebot|bingbot|yandex|baiduspider|facebookexternalhit|facebookbot|twitterbot|linkedinbot|embedly|showyoubot|outbrain|pinterestbot|slackbot|vkShare|W3C_Validator|whatsapp|ImgProxy|flipboard|tumblr|bitlybot|skype|nuzzel|discordbot|google|qwantify|pinterest|lighthouse|telegrambo|Google-InspectionTool|Schema-Markup-Validator|SchemaBot|chrome-lighthouse|adsbot-google|Feedfetcher-Google|Facebot|rogerbot|quora link preview|SiteAuditBot|Storebot|Mediapartners-Google|AdIdxBot|BingPreview|Yahoo! Slurp|duckduckbot|applebot|gptbot/i;

const IS_FILE =
/\.(js|css|xml|less|png|jpg|jpeg|gif|pdf|doc|txt|ico|rss|zip|mp3|rar|exe|wmv|doc|avi|ppt|mpg|mpeg|tif|wav|mov|psd|ai|xls|mp4|m4a|swf|dat|dmg|iso|flv|m4v|torrent|ttf|woff|svg|eot)$/i;
/\.(js|css|xml|less|png|jpg|jpeg|gif|pdf|doc|txt|ico|rss|zip|mp3|rar|exe|wmv|avi|ppt|mpg|mpeg|tif|wav|mov|psd|ai|xls|mp4|m4a|swf|dat|dmg|iso|flv|m4v|woff|ttf|svg|webmanifest|eot|torrent)$/;

// Allow passing a custom bot detection regex string
const IS_BOT_CUSTOM = new RegExp(process.env.CUSTOM_BOT_CHECK || "[]", "i");

export const handler = async (
event: CloudFrontRequestEvent
Expand All @@ -14,7 +18,8 @@ export const handler = async (
// If the request is from a bot, is not a file and is not from prerender
// then set the x-request-prerender header so the origin-request lambda function
// alters the origin to prerender.io
if (IS_BOT.test(request.headers["user-agent"][0].value)) {
const userAgent = request.headers["user-agent"][0].value;
if (IS_BOT.test(userAgent) || IS_BOT_CUSTOM.test(userAgent)) {
if (!IS_FILE.test(request.uri) && !request.headers["x-prerender"]) {
request.headers["x-request-prerender"] = [
{
Expand Down
17 changes: 16 additions & 1 deletion packages/prerender-proxy/lib/prerender-check-construct.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,20 @@ import { Construct } from "constructs";
import { join } from "path";
import { Esbuild } from "@aligent/cdk-esbuild";

export interface PrerenderCheckOptions {
/**
* A custom regex string to detect bots. Will be used in addition
* to the existing bot check regex to determine if a user-agent is a bot.
*
* @type string
*/
customBotCheckRegex: string;
}

export class PrerenderCheckFunction extends Construct {
readonly edgeFunction: experimental.EdgeFunction;

constructor(scope: Construct, id: string) {
constructor(scope: Construct, id: string, options?: PrerenderCheckOptions) {
super(scope, id);

const command = [
Expand All @@ -28,6 +38,11 @@ export class PrerenderCheckFunction extends Construct {
image: DockerImage.fromRegistry("busybox"),
local: new Esbuild({
entryPoints: [join(__dirname, "handlers/prerender-check.ts")],
define: {
"process.env.CUSTOM_BOT_CHECK": JSON.stringify(
options?.customBotCheckRegex ?? "[]"
),
},
}),
},
}),
Expand Down
9 changes: 7 additions & 2 deletions packages/prerender-proxy/lib/prerender-lambda-construct.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ import {
CloudFrontCacheControl,
CloudFrontCacheControlOptions,
} from "./prerender-cf-cache-control-construct";
import { PrerenderCheckFunction } from "./prerender-check-construct";
import {
PrerenderCheckFunction,
PrerenderCheckOptions,
} from "./prerender-check-construct";
import {
PrerenderFunction,
PrerenderFunctionOptions,
Expand All @@ -16,6 +19,7 @@ import {
export interface PrerenderLambdaProps {
prerenderProps: PrerenderFunctionOptions;
errorResponseProps: ErrorResponseFunctionOptions;
prerenderCheckOptions?: PrerenderCheckOptions;
cacheControlProps?: CloudFrontCacheControlOptions;
}

Expand All @@ -30,7 +34,8 @@ export class PrerenderLambda extends Construct {

this.prerenderCheckFunction = new PrerenderCheckFunction(
this,
"PrerenderViewerRequest"
"PrerenderViewerRequest",
props.prerenderCheckOptions
);

this.prerenderFunction = new PrerenderFunction(
Expand Down

0 comments on commit 9010cae

Please sign in to comment.