diff --git a/packages/prerender-proxy/lib/handlers/prerender-check.ts b/packages/prerender-proxy/lib/handlers/prerender-check.ts index a50abac0..440a86bf 100644 --- a/packages/prerender-proxy/lib/handlers/prerender-check.ts +++ b/packages/prerender-proxy/lib/handlers/prerender-check.ts @@ -2,9 +2,13 @@ import "source-map-support/register"; import { CloudFrontRequest, CloudFrontRequestEvent } from "aws-lambda"; const IS_BOT = - /googlebot|Google-InspectionTool|Schema-Markup-Validator|SchemaBot|chrome-lighthouse|lighthouse|adsbot-google|Feedfetcher-Google|bingbot|yandex|baiduspider|Facebot|facebookexternalhit|twitterbot|rogerbot|linkedinbot|embedly|quora link preview|showyoubot|outbrain|pinterest|slackbot|vkShare|W3C_Validator|AhrefsBot|SiteAuditBot|SemrushBot|Screaming Frog SEO Spider/i; + /googlebot|bingbot|yandex|baiduspider|facebookexternalhit|facebookbot|twitterbot|linkedinbot|embedly|showyoubot|outbrain|pinterestbot|slackbot|vkShare|W3C_Validator|whatsapp|ImgProxy|flipboard|tumblr|bitlybot|skype|nuzzel|discordbot|google|qwantify|pinterest|lighthouse|telegrambo|Google-InspectionTool|Schema-Markup-Validator|SchemaBot|chrome-lighthouse|adsbot-google|Feedfetcher-Google|Facebot|rogerbot|quora link preview|SiteAuditBot|Storebot|Mediapartners-Google|AdIdxBot|BingPreview|Yahoo! Slurp|duckduckbot|applebot|gptbot/i; + const IS_FILE = - /\.(js|css|xml|less|png|jpg|jpeg|gif|pdf|doc|txt|ico|rss|zip|mp3|rar|exe|wmv|doc|avi|ppt|mpg|mpeg|tif|wav|mov|psd|ai|xls|mp4|m4a|swf|dat|dmg|iso|flv|m4v|torrent|ttf|woff|svg|eot)$/i; + /\.(js|css|xml|less|png|jpg|jpeg|gif|pdf|doc|txt|ico|rss|zip|mp3|rar|exe|wmv|avi|ppt|mpg|mpeg|tif|wav|mov|psd|ai|xls|mp4|m4a|swf|dat|dmg|iso|flv|m4v|woff|ttf|svg|webmanifest|eot|torrent)$/; + +// Allow passing a custom bot detection regex string +const IS_BOT_CUSTOM = new RegExp(process.env.CUSTOM_BOT_CHECK || "[]", "i"); export const handler = async ( event: CloudFrontRequestEvent @@ -14,7 +18,8 @@ export const handler = async ( // If the request is from a bot, is not a file and is not from prerender // then set the x-request-prerender header so the origin-request lambda function // alters the origin to prerender.io - if (IS_BOT.test(request.headers["user-agent"][0].value)) { + const userAgent = request.headers["user-agent"][0].value; + if (IS_BOT.test(userAgent) || IS_BOT_CUSTOM.test(userAgent)) { if (!IS_FILE.test(request.uri) && !request.headers["x-prerender"]) { request.headers["x-request-prerender"] = [ { diff --git a/packages/prerender-proxy/lib/prerender-check-construct.ts b/packages/prerender-proxy/lib/prerender-check-construct.ts index 57105f66..89ee2d5c 100644 --- a/packages/prerender-proxy/lib/prerender-check-construct.ts +++ b/packages/prerender-proxy/lib/prerender-check-construct.ts @@ -5,10 +5,20 @@ import { Construct } from "constructs"; import { join } from "path"; import { Esbuild } from "@aligent/cdk-esbuild"; +export interface PrerenderCheckOptions { + /** + * A custom regex string to detect bots. Will be used in addition + * to the existing bot check regex to determine if a user-agent is a bot. + * + * @type string + */ + customBotCheckRegex: string; +} + export class PrerenderCheckFunction extends Construct { readonly edgeFunction: experimental.EdgeFunction; - constructor(scope: Construct, id: string) { + constructor(scope: Construct, id: string, options?: PrerenderCheckOptions) { super(scope, id); const command = [ @@ -28,6 +38,11 @@ export class PrerenderCheckFunction extends Construct { image: DockerImage.fromRegistry("busybox"), local: new Esbuild({ entryPoints: [join(__dirname, "handlers/prerender-check.ts")], + define: { + "process.env.CUSTOM_BOT_CHECK": JSON.stringify( + options?.customBotCheckRegex ?? "[]" + ), + }, }), }, }), diff --git a/packages/prerender-proxy/lib/prerender-lambda-construct.ts b/packages/prerender-proxy/lib/prerender-lambda-construct.ts index 6740b0ad..198a7cf9 100644 --- a/packages/prerender-proxy/lib/prerender-lambda-construct.ts +++ b/packages/prerender-proxy/lib/prerender-lambda-construct.ts @@ -3,7 +3,10 @@ import { CloudFrontCacheControl, CloudFrontCacheControlOptions, } from "./prerender-cf-cache-control-construct"; -import { PrerenderCheckFunction } from "./prerender-check-construct"; +import { + PrerenderCheckFunction, + PrerenderCheckOptions, +} from "./prerender-check-construct"; import { PrerenderFunction, PrerenderFunctionOptions, @@ -16,6 +19,7 @@ import { export interface PrerenderLambdaProps { prerenderProps: PrerenderFunctionOptions; errorResponseProps: ErrorResponseFunctionOptions; + prerenderCheckOptions?: PrerenderCheckOptions; cacheControlProps?: CloudFrontCacheControlOptions; } @@ -30,7 +34,8 @@ export class PrerenderLambda extends Construct { this.prerenderCheckFunction = new PrerenderCheckFunction( this, - "PrerenderViewerRequest" + "PrerenderViewerRequest", + props.prerenderCheckOptions ); this.prerenderFunction = new PrerenderFunction(