From 929386b9082289c68ffb124fbaf2ce3d649525ba Mon Sep 17 00:00:00 2001 From: Daniel Van Der Ploeg Date: Thu, 30 May 2024 13:07:20 +0930 Subject: [PATCH 1/6] feat: support custom bot detection string --- .../lib/handlers/prerender-check.ts | 11 ++++++++--- .../lib/prerender-check-construct.ts | 17 ++++++++++++++++- .../prerender-proxy/lib/prerender-construct.ts | 2 +- .../lib/prerender-lambda-construct.ts | 11 ++++++++--- 4 files changed, 33 insertions(+), 8 deletions(-) diff --git a/packages/prerender-proxy/lib/handlers/prerender-check.ts b/packages/prerender-proxy/lib/handlers/prerender-check.ts index a50abac0..1a28ceb0 100644 --- a/packages/prerender-proxy/lib/handlers/prerender-check.ts +++ b/packages/prerender-proxy/lib/handlers/prerender-check.ts @@ -2,9 +2,13 @@ import "source-map-support/register"; import { CloudFrontRequest, CloudFrontRequestEvent } from "aws-lambda"; const IS_BOT = - /googlebot|Google-InspectionTool|Schema-Markup-Validator|SchemaBot|chrome-lighthouse|lighthouse|adsbot-google|Feedfetcher-Google|bingbot|yandex|baiduspider|Facebot|facebookexternalhit|twitterbot|rogerbot|linkedinbot|embedly|quora link preview|showyoubot|outbrain|pinterest|slackbot|vkShare|W3C_Validator|AhrefsBot|SiteAuditBot|SemrushBot|Screaming Frog SEO Spider/i; + /googlebot|bingbot|yandex|baiduspider|facebookexternalhit|facebookbot|twitterbot|linkedinbot|embedly|showyoubot|outbrain|pinterestbot|slackbot|vkShare|W3C_Validator|whatsapp|ImgProxy|flipboard|tumblr|bitlybot|skype|nuzzel|discordbot|google|qwantify|pinterest|lighthouse|telegrambo|Google-InspectionTool|Schema-Markup-Validator|SchemaBot|chrome-lighthouse|adsbot-google|Feedfetcher-Google|Facebot|rogerbot|quora link preview|SiteAuditBot|Storebot|Mediapartners-Google|AdIdxBot|BingPreview|Yahoo\! Slurp|duckduckbot|applebot|gptbot|/i + const IS_FILE = - /\.(js|css|xml|less|png|jpg|jpeg|gif|pdf|doc|txt|ico|rss|zip|mp3|rar|exe|wmv|doc|avi|ppt|mpg|mpeg|tif|wav|mov|psd|ai|xls|mp4|m4a|swf|dat|dmg|iso|flv|m4v|torrent|ttf|woff|svg|eot)$/i; + /\.(js|css|xml|less|png|jpg|jpeg|gif|pdf|doc|txt|ico|rss|zip|mp3|rar|exe|wmv|avi|ppt|mpg|mpeg|tif|wav|mov|psd|ai|xls|mp4|m4a|swf|dat|dmg|iso|flv|m4v|woff|ttf|svg|webmanifest|eot|torrent)$/ + +// Allow passing a custom bot detection regex string +const IS_BOT_CUSTOM = new RegExp(process.env.CUSTOM_BOT_CHECK || '[]') export const handler = async ( event: CloudFrontRequestEvent @@ -14,7 +18,8 @@ export const handler = async ( // If the request is from a bot, is not a file and is not from prerender // then set the x-request-prerender header so the origin-request lambda function // alters the origin to prerender.io - if (IS_BOT.test(request.headers["user-agent"][0].value)) { + const userAgent = request.headers["user-agent"][0].value + if (IS_BOT.test(userAgent) || IS_BOT_CUSTOM.test(userAgent)) { if (!IS_FILE.test(request.uri) && !request.headers["x-prerender"]) { request.headers["x-request-prerender"] = [ { diff --git a/packages/prerender-proxy/lib/prerender-check-construct.ts b/packages/prerender-proxy/lib/prerender-check-construct.ts index 57105f66..90f70951 100644 --- a/packages/prerender-proxy/lib/prerender-check-construct.ts +++ b/packages/prerender-proxy/lib/prerender-check-construct.ts @@ -5,10 +5,20 @@ import { Construct } from "constructs"; import { join } from "path"; import { Esbuild } from "@aligent/cdk-esbuild"; +export interface PrerenderCheckOptions { + /** + * A custom regex string to detect bots. Will be used in addition + * to the existing bot check regex to determine if a user-agent is a bot. + * + * @type string + */ + customBotCheckRegex: string +} + export class PrerenderCheckFunction extends Construct { readonly edgeFunction: experimental.EdgeFunction; - constructor(scope: Construct, id: string) { + constructor(scope: Construct, id: string, options?: PrerenderCheckOptions) { super(scope, id); const command = [ @@ -28,6 +38,11 @@ export class PrerenderCheckFunction extends Construct { image: DockerImage.fromRegistry("busybox"), local: new Esbuild({ entryPoints: [join(__dirname, "handlers/prerender-check.ts")], + define: { + "process.env.CUSTOM_BOT_CHECK": JSON.stringify( + options?.customBotCheckRegex ?? "[]" + ), + }, }), }, }), diff --git a/packages/prerender-proxy/lib/prerender-construct.ts b/packages/prerender-proxy/lib/prerender-construct.ts index 8e1649dc..9dd48008 100644 --- a/packages/prerender-proxy/lib/prerender-construct.ts +++ b/packages/prerender-proxy/lib/prerender-construct.ts @@ -14,7 +14,7 @@ export interface PrerenderFunctionOptions { export class PrerenderFunction extends Construct { readonly edgeFunction: experimental.EdgeFunction; - constructor(scope: Construct, id: string, options: PrerenderFunctionOptions) { + constructor(scope: Construct, id: string, options?: PrerenderFunctionOptions) { super(scope, id); const command = [ diff --git a/packages/prerender-proxy/lib/prerender-lambda-construct.ts b/packages/prerender-proxy/lib/prerender-lambda-construct.ts index 6740b0ad..b73e6c74 100644 --- a/packages/prerender-proxy/lib/prerender-lambda-construct.ts +++ b/packages/prerender-proxy/lib/prerender-lambda-construct.ts @@ -3,7 +3,10 @@ import { CloudFrontCacheControl, CloudFrontCacheControlOptions, } from "./prerender-cf-cache-control-construct"; -import { PrerenderCheckFunction } from "./prerender-check-construct"; +import { + PrerenderCheckFunction, + PrerenderCheckOptions, +} from "./prerender-check-construct"; import { PrerenderFunction, PrerenderFunctionOptions, @@ -14,8 +17,9 @@ import { } from "./error-response-construct"; export interface PrerenderLambdaProps { - prerenderProps: PrerenderFunctionOptions; + prerenderProps?: PrerenderFunctionOptions; errorResponseProps: ErrorResponseFunctionOptions; + prerenderCheckOptions?: PrerenderCheckOptions; cacheControlProps?: CloudFrontCacheControlOptions; } @@ -30,7 +34,8 @@ export class PrerenderLambda extends Construct { this.prerenderCheckFunction = new PrerenderCheckFunction( this, - "PrerenderViewerRequest" + "PrerenderViewerRequest", + props.prerenderCheckOptions ); this.prerenderFunction = new PrerenderFunction( From 25d2f99d7b1fe3d47c21e6f7cd07b280697b966f Mon Sep 17 00:00:00 2001 From: Daniel Van Der Ploeg Date: Thu, 30 May 2024 13:34:01 +0930 Subject: [PATCH 2/6] chore: run prettier --- packages/prerender-proxy/lib/handlers/prerender-check.ts | 8 ++++---- packages/prerender-proxy/lib/prerender-check-construct.ts | 6 +++--- packages/prerender-proxy/lib/prerender-construct.ts | 6 +++++- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/packages/prerender-proxy/lib/handlers/prerender-check.ts b/packages/prerender-proxy/lib/handlers/prerender-check.ts index 1a28ceb0..499fd4c8 100644 --- a/packages/prerender-proxy/lib/handlers/prerender-check.ts +++ b/packages/prerender-proxy/lib/handlers/prerender-check.ts @@ -2,13 +2,13 @@ import "source-map-support/register"; import { CloudFrontRequest, CloudFrontRequestEvent } from "aws-lambda"; const IS_BOT = - /googlebot|bingbot|yandex|baiduspider|facebookexternalhit|facebookbot|twitterbot|linkedinbot|embedly|showyoubot|outbrain|pinterestbot|slackbot|vkShare|W3C_Validator|whatsapp|ImgProxy|flipboard|tumblr|bitlybot|skype|nuzzel|discordbot|google|qwantify|pinterest|lighthouse|telegrambo|Google-InspectionTool|Schema-Markup-Validator|SchemaBot|chrome-lighthouse|adsbot-google|Feedfetcher-Google|Facebot|rogerbot|quora link preview|SiteAuditBot|Storebot|Mediapartners-Google|AdIdxBot|BingPreview|Yahoo\! Slurp|duckduckbot|applebot|gptbot|/i + /googlebot|bingbot|yandex|baiduspider|facebookexternalhit|facebookbot|twitterbot|linkedinbot|embedly|showyoubot|outbrain|pinterestbot|slackbot|vkShare|W3C_Validator|whatsapp|ImgProxy|flipboard|tumblr|bitlybot|skype|nuzzel|discordbot|google|qwantify|pinterest|lighthouse|telegrambo|Google-InspectionTool|Schema-Markup-Validator|SchemaBot|chrome-lighthouse|adsbot-google|Feedfetcher-Google|Facebot|rogerbot|quora link preview|SiteAuditBot|Storebot|Mediapartners-Google|AdIdxBot|BingPreview|Yahoo\! Slurp|duckduckbot|applebot|gptbot|/i; const IS_FILE = - /\.(js|css|xml|less|png|jpg|jpeg|gif|pdf|doc|txt|ico|rss|zip|mp3|rar|exe|wmv|avi|ppt|mpg|mpeg|tif|wav|mov|psd|ai|xls|mp4|m4a|swf|dat|dmg|iso|flv|m4v|woff|ttf|svg|webmanifest|eot|torrent)$/ + /\.(js|css|xml|less|png|jpg|jpeg|gif|pdf|doc|txt|ico|rss|zip|mp3|rar|exe|wmv|avi|ppt|mpg|mpeg|tif|wav|mov|psd|ai|xls|mp4|m4a|swf|dat|dmg|iso|flv|m4v|woff|ttf|svg|webmanifest|eot|torrent)$/; // Allow passing a custom bot detection regex string -const IS_BOT_CUSTOM = new RegExp(process.env.CUSTOM_BOT_CHECK || '[]') +const IS_BOT_CUSTOM = new RegExp(process.env.CUSTOM_BOT_CHECK || "[]"); export const handler = async ( event: CloudFrontRequestEvent @@ -18,7 +18,7 @@ export const handler = async ( // If the request is from a bot, is not a file and is not from prerender // then set the x-request-prerender header so the origin-request lambda function // alters the origin to prerender.io - const userAgent = request.headers["user-agent"][0].value + const userAgent = request.headers["user-agent"][0].value; if (IS_BOT.test(userAgent) || IS_BOT_CUSTOM.test(userAgent)) { if (!IS_FILE.test(request.uri) && !request.headers["x-prerender"]) { request.headers["x-request-prerender"] = [ diff --git a/packages/prerender-proxy/lib/prerender-check-construct.ts b/packages/prerender-proxy/lib/prerender-check-construct.ts index 90f70951..89ee2d5c 100644 --- a/packages/prerender-proxy/lib/prerender-check-construct.ts +++ b/packages/prerender-proxy/lib/prerender-check-construct.ts @@ -7,12 +7,12 @@ import { Esbuild } from "@aligent/cdk-esbuild"; export interface PrerenderCheckOptions { /** - * A custom regex string to detect bots. Will be used in addition + * A custom regex string to detect bots. Will be used in addition * to the existing bot check regex to determine if a user-agent is a bot. - * + * * @type string */ - customBotCheckRegex: string + customBotCheckRegex: string; } export class PrerenderCheckFunction extends Construct { diff --git a/packages/prerender-proxy/lib/prerender-construct.ts b/packages/prerender-proxy/lib/prerender-construct.ts index 9dd48008..84df9642 100644 --- a/packages/prerender-proxy/lib/prerender-construct.ts +++ b/packages/prerender-proxy/lib/prerender-construct.ts @@ -14,7 +14,11 @@ export interface PrerenderFunctionOptions { export class PrerenderFunction extends Construct { readonly edgeFunction: experimental.EdgeFunction; - constructor(scope: Construct, id: string, options?: PrerenderFunctionOptions) { + constructor( + scope: Construct, + id: string, + options?: PrerenderFunctionOptions + ) { super(scope, id); const command = [ From 0d6743c43715784f12d091db73abce04160c8d1b Mon Sep 17 00:00:00 2001 From: Daniel Van Der Ploeg Date: Thu, 30 May 2024 13:38:15 +0930 Subject: [PATCH 3/6] fix: linting error --- packages/prerender-proxy/lib/handlers/prerender-check.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/prerender-proxy/lib/handlers/prerender-check.ts b/packages/prerender-proxy/lib/handlers/prerender-check.ts index 499fd4c8..23296b5a 100644 --- a/packages/prerender-proxy/lib/handlers/prerender-check.ts +++ b/packages/prerender-proxy/lib/handlers/prerender-check.ts @@ -2,7 +2,7 @@ import "source-map-support/register"; import { CloudFrontRequest, CloudFrontRequestEvent } from "aws-lambda"; const IS_BOT = - /googlebot|bingbot|yandex|baiduspider|facebookexternalhit|facebookbot|twitterbot|linkedinbot|embedly|showyoubot|outbrain|pinterestbot|slackbot|vkShare|W3C_Validator|whatsapp|ImgProxy|flipboard|tumblr|bitlybot|skype|nuzzel|discordbot|google|qwantify|pinterest|lighthouse|telegrambo|Google-InspectionTool|Schema-Markup-Validator|SchemaBot|chrome-lighthouse|adsbot-google|Feedfetcher-Google|Facebot|rogerbot|quora link preview|SiteAuditBot|Storebot|Mediapartners-Google|AdIdxBot|BingPreview|Yahoo\! Slurp|duckduckbot|applebot|gptbot|/i; + /googlebot|bingbot|yandex|baiduspider|facebookexternalhit|facebookbot|twitterbot|linkedinbot|embedly|showyoubot|outbrain|pinterestbot|slackbot|vkShare|W3C_Validator|whatsapp|ImgProxy|flipboard|tumblr|bitlybot|skype|nuzzel|discordbot|google|qwantify|pinterest|lighthouse|telegrambo|Google-InspectionTool|Schema-Markup-Validator|SchemaBot|chrome-lighthouse|adsbot-google|Feedfetcher-Google|Facebot|rogerbot|quora link preview|SiteAuditBot|Storebot|Mediapartners-Google|AdIdxBot|BingPreview|Yahoo! Slurp|duckduckbot|applebot|gptbot|/i; const IS_FILE = /\.(js|css|xml|less|png|jpg|jpeg|gif|pdf|doc|txt|ico|rss|zip|mp3|rar|exe|wmv|avi|ppt|mpg|mpeg|tif|wav|mov|psd|ai|xls|mp4|m4a|swf|dat|dmg|iso|flv|m4v|woff|ttf|svg|webmanifest|eot|torrent)$/; From f4bc14fc32b661a9d8b9c27ff926a62e71699c6f Mon Sep 17 00:00:00 2001 From: Daniel Van Der Ploeg Date: Thu, 30 May 2024 13:40:54 +0930 Subject: [PATCH 4/6] fix: remove optional --- packages/prerender-proxy/lib/prerender-construct.ts | 6 +----- packages/prerender-proxy/lib/prerender-lambda-construct.ts | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/packages/prerender-proxy/lib/prerender-construct.ts b/packages/prerender-proxy/lib/prerender-construct.ts index 84df9642..8e1649dc 100644 --- a/packages/prerender-proxy/lib/prerender-construct.ts +++ b/packages/prerender-proxy/lib/prerender-construct.ts @@ -14,11 +14,7 @@ export interface PrerenderFunctionOptions { export class PrerenderFunction extends Construct { readonly edgeFunction: experimental.EdgeFunction; - constructor( - scope: Construct, - id: string, - options?: PrerenderFunctionOptions - ) { + constructor(scope: Construct, id: string, options: PrerenderFunctionOptions) { super(scope, id); const command = [ diff --git a/packages/prerender-proxy/lib/prerender-lambda-construct.ts b/packages/prerender-proxy/lib/prerender-lambda-construct.ts index b73e6c74..198a7cf9 100644 --- a/packages/prerender-proxy/lib/prerender-lambda-construct.ts +++ b/packages/prerender-proxy/lib/prerender-lambda-construct.ts @@ -17,7 +17,7 @@ import { } from "./error-response-construct"; export interface PrerenderLambdaProps { - prerenderProps?: PrerenderFunctionOptions; + prerenderProps: PrerenderFunctionOptions; errorResponseProps: ErrorResponseFunctionOptions; prerenderCheckOptions?: PrerenderCheckOptions; cacheControlProps?: CloudFrontCacheControlOptions; From 01de3da59aed24fd57dd5730c4a6f90c73bd1ca2 Mon Sep 17 00:00:00 2001 From: Daniel Van Der Ploeg Date: Thu, 30 May 2024 15:21:07 +0930 Subject: [PATCH 5/6] fix: remove extra | --- packages/prerender-proxy/lib/handlers/prerender-check.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/prerender-proxy/lib/handlers/prerender-check.ts b/packages/prerender-proxy/lib/handlers/prerender-check.ts index 23296b5a..b9032b8d 100644 --- a/packages/prerender-proxy/lib/handlers/prerender-check.ts +++ b/packages/prerender-proxy/lib/handlers/prerender-check.ts @@ -2,7 +2,7 @@ import "source-map-support/register"; import { CloudFrontRequest, CloudFrontRequestEvent } from "aws-lambda"; const IS_BOT = - /googlebot|bingbot|yandex|baiduspider|facebookexternalhit|facebookbot|twitterbot|linkedinbot|embedly|showyoubot|outbrain|pinterestbot|slackbot|vkShare|W3C_Validator|whatsapp|ImgProxy|flipboard|tumblr|bitlybot|skype|nuzzel|discordbot|google|qwantify|pinterest|lighthouse|telegrambo|Google-InspectionTool|Schema-Markup-Validator|SchemaBot|chrome-lighthouse|adsbot-google|Feedfetcher-Google|Facebot|rogerbot|quora link preview|SiteAuditBot|Storebot|Mediapartners-Google|AdIdxBot|BingPreview|Yahoo! Slurp|duckduckbot|applebot|gptbot|/i; + /googlebot|bingbot|yandex|baiduspider|facebookexternalhit|facebookbot|twitterbot|linkedinbot|embedly|showyoubot|outbrain|pinterestbot|slackbot|vkShare|W3C_Validator|whatsapp|ImgProxy|flipboard|tumblr|bitlybot|skype|nuzzel|discordbot|google|qwantify|pinterest|lighthouse|telegrambo|Google-InspectionTool|Schema-Markup-Validator|SchemaBot|chrome-lighthouse|adsbot-google|Feedfetcher-Google|Facebot|rogerbot|quora link preview|SiteAuditBot|Storebot|Mediapartners-Google|AdIdxBot|BingPreview|Yahoo! Slurp|duckduckbot|applebot|gptbot/i; const IS_FILE = /\.(js|css|xml|less|png|jpg|jpeg|gif|pdf|doc|txt|ico|rss|zip|mp3|rar|exe|wmv|avi|ppt|mpg|mpeg|tif|wav|mov|psd|ai|xls|mp4|m4a|swf|dat|dmg|iso|flv|m4v|woff|ttf|svg|webmanifest|eot|torrent)$/; From 7c08f79eaf329763a42e3eb2142b1e0ce5ed65a7 Mon Sep 17 00:00:00 2001 From: Daniel Van Der Ploeg Date: Thu, 30 May 2024 15:33:51 +0930 Subject: [PATCH 6/6] feat: pass case insensitive flag --- packages/prerender-proxy/lib/handlers/prerender-check.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/prerender-proxy/lib/handlers/prerender-check.ts b/packages/prerender-proxy/lib/handlers/prerender-check.ts index b9032b8d..440a86bf 100644 --- a/packages/prerender-proxy/lib/handlers/prerender-check.ts +++ b/packages/prerender-proxy/lib/handlers/prerender-check.ts @@ -8,7 +8,7 @@ const IS_FILE = /\.(js|css|xml|less|png|jpg|jpeg|gif|pdf|doc|txt|ico|rss|zip|mp3|rar|exe|wmv|avi|ppt|mpg|mpeg|tif|wav|mov|psd|ai|xls|mp4|m4a|swf|dat|dmg|iso|flv|m4v|woff|ttf|svg|webmanifest|eot|torrent)$/; // Allow passing a custom bot detection regex string -const IS_BOT_CUSTOM = new RegExp(process.env.CUSTOM_BOT_CHECK || "[]"); +const IS_BOT_CUSTOM = new RegExp(process.env.CUSTOM_BOT_CHECK || "[]", "i"); export const handler = async ( event: CloudFrontRequestEvent