From 7aaae0c52e3d1eadce17e935eb47687bfe3d2b55 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Fri, 13 Dec 2024 09:16:26 +1300 Subject: [PATCH 01/13] wip --- package-lock.json | 303 +++++++++++++++++++++++++++++++++-- packages/shared/src/const.ts | 2 + 2 files changed, 295 insertions(+), 10 deletions(-) diff --git a/package-lock.json b/package-lock.json index 3818e9e7e..2fd3018d3 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1427,6 +1427,10 @@ "resolved": "packages/_infra", "link": true }, + "node_modules/@basemaps/lambda-analytic-cloudfront": { + "resolved": "packages/lambda-analytic-cloudfront", + "link": true + }, "node_modules/@basemaps/lambda-analytics": { "resolved": "packages/lambda-analytics", "link": true @@ -1614,6 +1618,44 @@ "streamx": "^2.15.0" } }, + "node_modules/@elastic/elasticsearch": { + "version": "8.16.2", + "resolved": "https://registry.npmjs.org/@elastic/elasticsearch/-/elasticsearch-8.16.2.tgz", + "integrity": "sha512-2ivc6uS97fbEeW4tNtg5mvh/Jy82ZLfcwQ1HhNhdYxyapNnQxIgZ83Zd8Ir+5jCPMDWKSYgwDb8t4GAINDDv2w==", + "dev": true, + "dependencies": { + "@elastic/transport": "^8.9.1", + "apache-arrow": "^18.0.0", + "tslib": "^2.4.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@elastic/transport": { + "version": "8.9.1", + "resolved": "https://registry.npmjs.org/@elastic/transport/-/transport-8.9.1.tgz", + "integrity": "sha512-jasKNQeOb1vNf9aEYg+8zXmetaFjApDTSCC4QTl6aTixvyiRiSLcCiB8P6Q0lY9JIII/BhqNl8WbpFnsKitntw==", + "dev": true, + "dependencies": { + "@opentelemetry/api": "1.x", + "debug": "^4.3.4", + "hpagent": "^1.0.0", + "ms": "^2.1.3", + "secure-json-parse": "^2.4.0", + "tslib": "^2.4.0", + "undici": "^6.12.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@elastic/transport/node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "dev": true + }, "node_modules/@emnapi/runtime": { "version": "0.44.0", "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-0.44.0.tgz", @@ -3953,6 +3995,15 @@ "@octokit/openapi-types": "^18.0.0" } }, + "node_modules/@opentelemetry/api": { + "version": "1.9.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz", + "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==", + "dev": true, + "engines": { + "node": ">=8.0.0" + } + }, "node_modules/@pkgjs/parseargs": { "version": "0.11.0", "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", @@ -5185,12 +5236,12 @@ } }, "node_modules/@swc/helpers": { - "version": "0.5.3", - "resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.3.tgz", - "integrity": "sha512-FaruWX6KdudYloq1AHD/4nU+UsMTdNE8CKyrseXWEcgjDAbvkwJg2QGPAnfIJLIWsjZOSPLOAykK6fuYp4vp4A==", + "version": "0.5.15", + "resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.15.tgz", + "integrity": "sha512-JQ5TuMi45Owi4/BIMAJBoSQoOJu12oOk/gADqlcUL9JEdHB8vyjUSsxqeNXnmXHjYKMi2WcYtezGEEhqUI/E2g==", "dev": true, "dependencies": { - "tslib": "^2.4.0" + "tslib": "^2.8.0" } }, "node_modules/@szhsin/react-menu": { @@ -5281,6 +5332,18 @@ "integrity": "sha512-IWmFpqnVDvskYWnNSiu/qlRn80XlIOU0Gy5rKCl/NjhnI95pV8qIHs6L5b+bpHhyzuOSzjLgBcwgFSXrC1nZWA==", "dev": true }, + "node_modules/@types/command-line-args": { + "version": "5.2.3", + "resolved": "https://registry.npmjs.org/@types/command-line-args/-/command-line-args-5.2.3.tgz", + "integrity": "sha512-uv0aG6R0Y8WHZLTamZwtfsDLVRnOa+n+n5rEvFWL5Na5gZ8V2Teab/duDPFzIIIhs9qizDpcavCusCLJZu62Kw==", + "dev": true + }, + "node_modules/@types/command-line-usage": { + "version": "5.0.4", + "resolved": "https://registry.npmjs.org/@types/command-line-usage/-/command-line-usage-5.0.4.tgz", + "integrity": "sha512-BwR5KP3Es/CSht0xqBcUXS3qCAUVXwpRKsV2+arxeb65atasuXG9LykC9Ab10Cw3s2raH92ZqOeILaQbsB2ACg==", + "dev": true + }, "node_modules/@types/geojson": { "version": "7946.0.14", "resolved": "https://registry.npmjs.org/@types/geojson/-/geojson-7946.0.14.tgz", @@ -5893,6 +5956,26 @@ "integrity": "sha512-rr+OQyAjxze7GgWrSaJwydHStIhHq2lvY3BOC2Mj7KnzI7XK0Uw1TOOdI9lDoajEbSWLiYgoo4f1R51erQfhPQ==", "license": "MIT" }, + "node_modules/apache-arrow": { + "version": "18.1.0", + "resolved": "https://registry.npmjs.org/apache-arrow/-/apache-arrow-18.1.0.tgz", + "integrity": "sha512-v/ShMp57iBnBp4lDgV8Jx3d3Q5/Hac25FWmQ98eMahUiHPXcvwIMKJD0hBIgclm/FCG+LwPkAKtkRO1O/W0YGg==", + "dev": true, + "dependencies": { + "@swc/helpers": "^0.5.11", + "@types/command-line-args": "^5.2.3", + "@types/command-line-usage": "^5.0.4", + "@types/node": "^20.13.0", + "command-line-args": "^5.2.1", + "command-line-usage": "^7.0.1", + "flatbuffers": "^24.3.25", + "json-bignum": "^0.0.3", + "tslib": "^2.6.2" + }, + "bin": { + "arrow2csv": "bin/arrow2csv.js" + } + }, "node_modules/aproba": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/aproba/-/aproba-2.0.0.tgz", @@ -5927,6 +6010,15 @@ "sprintf-js": "~1.0.2" } }, + "node_modules/array-back": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/array-back/-/array-back-3.1.0.tgz", + "integrity": "sha512-TkuxA4UCOvxuDK6NZYXCalszEzj+TLszyASooky+i742l9TqsOdYCMJJupxRic61hwquNtppB3hgcuq9SVSH1Q==", + "dev": true, + "engines": { + "node": ">=6" + } + }, "node_modules/array-buffer-byte-length": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/array-buffer-byte-length/-/array-buffer-byte-length-1.0.1.tgz", @@ -7003,6 +7095,37 @@ "url": "https://github.com/chalk/chalk?sponsor=1" } }, + "node_modules/chalk-template": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/chalk-template/-/chalk-template-0.4.0.tgz", + "integrity": "sha512-/ghrgmhfY8RaSdeo43hNXxpoHAtxdbskUHjPpfqUWGttFgycUhYPGx3YZBCnUCvOa7Doivn1IZec3DEGFoMgLg==", + "dev": true, + "dependencies": { + "chalk": "^4.1.2" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/chalk-template?sponsor=1" + } + }, + "node_modules/chalk-template/node_modules/chalk": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", + "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", + "dev": true, + "dependencies": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/chalk?sponsor=1" + } + }, "node_modules/chardet": { "version": "0.7.0", "resolved": "https://registry.npmjs.org/chardet/-/chardet-0.7.0.tgz", @@ -7282,6 +7405,54 @@ "node": ">= 0.8" } }, + "node_modules/command-line-args": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/command-line-args/-/command-line-args-5.2.1.tgz", + "integrity": "sha512-H4UfQhZyakIjC74I9d34fGYDwk3XpSr17QhEd0Q3I9Xq1CETHo4Hcuo87WyWHpAF1aSLjLRf5lD9ZGX2qStUvg==", + "dev": true, + "dependencies": { + "array-back": "^3.1.0", + "find-replace": "^3.0.0", + "lodash.camelcase": "^4.3.0", + "typical": "^4.0.0" + }, + "engines": { + "node": ">=4.0.0" + } + }, + "node_modules/command-line-usage": { + "version": "7.0.3", + "resolved": "https://registry.npmjs.org/command-line-usage/-/command-line-usage-7.0.3.tgz", + "integrity": "sha512-PqMLy5+YGwhMh1wS04mVG44oqDsgyLRSKJBdOo1bnYhMKBW65gZF1dRp2OZRhiTjgUHljy99qkO7bsctLaw35Q==", + "dev": true, + "dependencies": { + "array-back": "^6.2.2", + "chalk-template": "^0.4.0", + "table-layout": "^4.1.0", + "typical": "^7.1.1" + }, + "engines": { + "node": ">=12.20.0" + } + }, + "node_modules/command-line-usage/node_modules/array-back": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/array-back/-/array-back-6.2.2.tgz", + "integrity": "sha512-gUAZ7HPyb4SJczXAMUXMGAvI976JoK3qEx9v1FTmeYuJj0IBiaKttG1ydtGKdkfqWkIkouke7nG8ufGy77+Cvw==", + "dev": true, + "engines": { + "node": ">=12.17" + } + }, + "node_modules/command-line-usage/node_modules/typical": { + "version": "7.3.0", + "resolved": "https://registry.npmjs.org/typical/-/typical-7.3.0.tgz", + "integrity": "sha512-ya4mg/30vm+DOWfBg4YK3j2WD6TWtRkCbasOJr40CseYENzCUby/7rIvXA99JGsQHeNxLbnXdyLLxKSv3tauFw==", + "dev": true, + "engines": { + "node": ">=12.17" + } + }, "node_modules/commander": { "version": "2.20.3", "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz", @@ -9751,6 +9922,18 @@ "node": ">=14" } }, + "node_modules/find-replace": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/find-replace/-/find-replace-3.0.0.tgz", + "integrity": "sha512-6Tb2myMioCAgv5kfvP5/PkZZ/ntTpVK39fHY7WkWBgvbeE+VHd/tZuZ4mrC+bxh4cfOZeYKVPaJIZtZXV7GNCQ==", + "dev": true, + "dependencies": { + "array-back": "^3.0.1" + }, + "engines": { + "node": ">=4.0.0" + } + }, "node_modules/find-root": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/find-root/-/find-root-1.1.0.tgz", @@ -9825,6 +10008,12 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/flatbuffers": { + "version": "24.3.25", + "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-24.3.25.tgz", + "integrity": "sha512-3HDgPbgiwWMI9zVB7VYBHaMrbOO7Gm0v+yD2FV/sCKj+9NDeVL7BOBYUuhWAQGKWOzBo8S9WdMvV0eixO233XQ==", + "dev": true + }, "node_modules/flatted": { "version": "3.3.1", "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.1.tgz", @@ -11073,6 +11262,15 @@ "node": ">=10" } }, + "node_modules/hpagent": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/hpagent/-/hpagent-1.2.0.tgz", + "integrity": "sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA==", + "dev": true, + "engines": { + "node": ">=14" + } + }, "node_modules/http-cache-semantics": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/http-cache-semantics/-/http-cache-semantics-4.1.1.tgz", @@ -12142,6 +12340,15 @@ "dev": true, "license": "Python-2.0" }, + "node_modules/json-bignum": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/json-bignum/-/json-bignum-0.0.3.tgz", + "integrity": "sha512-2WHyXj3OfHSgNyuzDbSxI1w2jgw5gkWSWhS7Qg4bWXx1nLk3jnbwfUeS0PSba3IzpTUWdHxBieELUzXRjQB2zg==", + "dev": true, + "engines": { + "node": ">=0.8" + } + }, "node_modules/json-buffer": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.0.tgz", @@ -12995,6 +13202,12 @@ "dev": true, "license": "MIT" }, + "node_modules/lodash.camelcase": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz", + "integrity": "sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==", + "dev": true + }, "node_modules/lodash.get": { "version": "4.4.2", "resolved": "https://registry.npmjs.org/lodash.get/-/lodash.get-4.4.2.tgz", @@ -17861,6 +18074,28 @@ "url": "https://opencollective.com/unts" } }, + "node_modules/table-layout": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/table-layout/-/table-layout-4.1.1.tgz", + "integrity": "sha512-iK5/YhZxq5GO5z8wb0bY1317uDF3Zjpha0QFFLA8/trAoiLbQD0HUbMesEaxyzUgDxi2QlcbM8IvqOlEjgoXBA==", + "dev": true, + "dependencies": { + "array-back": "^6.2.2", + "wordwrapjs": "^5.1.0" + }, + "engines": { + "node": ">=12.17" + } + }, + "node_modules/table-layout/node_modules/array-back": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/array-back/-/array-back-6.2.2.tgz", + "integrity": "sha512-gUAZ7HPyb4SJczXAMUXMGAvI976JoK3qEx9v1FTmeYuJj0IBiaKttG1ydtGKdkfqWkIkouke7nG8ufGy77+Cvw==", + "dev": true, + "engines": { + "node": ">=12.17" + } + }, "node_modules/tar": { "version": "6.1.11", "resolved": "https://registry.npmjs.org/tar/-/tar-6.1.11.tgz", @@ -18135,9 +18370,9 @@ } }, "node_modules/tslib": { - "version": "2.6.2", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz", - "integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==" + "version": "2.8.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", + "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==" }, "node_modules/tuf-js": { "version": "1.1.7", @@ -18504,10 +18739,19 @@ "node": ">=14.17" } }, + "node_modules/typical": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/typical/-/typical-4.0.0.tgz", + "integrity": "sha512-VAH4IvQ7BDFYglMd7BPRDfLgxZZX4O4TFcRDA6EN5X7erNJJq+McIEp8np9aVtxrCJ6qx4GTYVfOWNjcqwZgRw==", + "dev": true, + "engines": { + "node": ">=8" + } + }, "node_modules/ua-parser-js": { - "version": "1.0.37", - "resolved": "https://registry.npmjs.org/ua-parser-js/-/ua-parser-js-1.0.37.tgz", - "integrity": "sha512-bhTyI94tZofjo+Dn8SN6Zv8nBDvyXTymAdM3LDI/0IboIUwTu1rEhW7v2TfiVsoYWgkQ4kOVqnI8APUFbIQIFQ==", + "version": "1.0.39", + "resolved": "https://registry.npmjs.org/ua-parser-js/-/ua-parser-js-1.0.39.tgz", + "integrity": "sha512-k24RCVWlEcjkdOxYmVJgeD/0a1TiSpqLg+ZalVGV9lsnr4yqu0w7tX/x2xX6G4zpkgQnRf89lxuZ1wsbjXM8lw==", "funding": [ { "type": "opencollective", @@ -18522,6 +18766,9 @@ "url": "https://github.com/sponsors/faisalman" } ], + "bin": { + "ua-parser-js": "script/cli.js" + }, "engines": { "node": "*" } @@ -18567,6 +18814,15 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/undici": { + "version": "6.21.0", + "resolved": "https://registry.npmjs.org/undici/-/undici-6.21.0.tgz", + "integrity": "sha512-BUgJXc752Kou3oOIuU1i+yZZypyZRqNPW0vqoMPl8VaoalSfeR0D8/t4iAS3yirs79SSMTxTag+ZC86uswv+Cw==", + "dev": true, + "engines": { + "node": ">=18.17" + } + }, "node_modules/undici-types": { "version": "5.26.5", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", @@ -18893,6 +19149,15 @@ "dev": true, "license": "MIT" }, + "node_modules/wordwrapjs": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/wordwrapjs/-/wordwrapjs-5.1.0.tgz", + "integrity": "sha512-JNjcULU2e4KJwUNv6CHgI46UvDGitb6dGryHajXTDiLgg1/RiGoPSDw4kZfYnwGtEXf2ZMeIewDQgFGzkCB2Sg==", + "dev": true, + "engines": { + "node": ">=12.17" + } + }, "node_modules/wrap-ansi": { "version": "6.2.0", "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-6.2.0.tgz", @@ -19330,6 +19595,24 @@ "dev": true, "license": "MIT" }, + "packages/lambda-analytic-cloudfront": { + "name": "@basemaps/lambda-analytic-cloudfront", + "version": "7.11.0", + "license": "MIT", + "dependencies": { + "@basemaps/config": "^7.11.0", + "@basemaps/geo": "^7.11.0", + "@basemaps/shared": "^7.11.0", + "ua-parser-js": "^1.0.39" + }, + "devDependencies": { + "@elastic/elasticsearch": "^8.16.2", + "@types/ua-parser-js": "^0.7.36" + }, + "engines": { + "node": ">=16.0.0" + } + }, "packages/lambda-analytics": { "name": "@basemaps/lambda-analytics", "version": "7.12.0", diff --git a/packages/shared/src/const.ts b/packages/shared/src/const.ts index 052bb2046..390b664f8 100644 --- a/packages/shared/src/const.ts +++ b/packages/shared/src/const.ts @@ -54,6 +54,8 @@ export const Env = { CloudFrontId: 'ANALYTICS_CLOUD_FRONT_ID', CloudFrontSourceBucket: 'ANALYTICS_CLOUD_FRONT_SOURCE_BUCKET', CacheBucket: 'ANALYTICS_CACHE_BUCKET', + /** Max number of records to process in the analytics process */ + MaxRecords: 'ANALYTICS_MAX_RECORDS', }, /** Load a environment var defaulting to defaultOutput if it does not exist */ From 102fc425a5fa612c6ddce3452f733c1312ccc29b Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Fri, 13 Dec 2024 14:15:48 +1300 Subject: [PATCH 02/13] wip: add infra --- .../_infra/src/analytics/edge.analytics.ts | 32 +++++++++++++++++-- packages/shared/src/const.ts | 10 +++++- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/packages/_infra/src/analytics/edge.analytics.ts b/packages/_infra/src/analytics/edge.analytics.ts index 61558f54c..31f049be5 100644 --- a/packages/_infra/src/analytics/edge.analytics.ts +++ b/packages/_infra/src/analytics/edge.analytics.ts @@ -7,7 +7,8 @@ import { RetentionDays } from 'aws-cdk-lib/aws-logs'; import { BlockPublicAccess, Bucket } from 'aws-cdk-lib/aws-s3'; import { Construct } from 'constructs'; -const CODE_PATH = '../lambda-analytics/dist'; +const CodePath = '../lambda-analytics/dist'; +const CodePathV2 = '../lambda-analytics-cloudfront/dist'; export interface EdgeAnalyticsProps extends StackProps { distributionId: string; @@ -36,7 +37,7 @@ export class EdgeAnalytics extends Stack { memorySize: 2048, timeout: Duration.minutes(10), handler: 'index.handler', - code: lambda.Code.fromAsset(CODE_PATH), + code: lambda.Code.fromAsset(CodePath), environment: { [Env.Analytics.CloudFrontId]: distributionId, [Env.Analytics.CacheBucket]: `s3://${cacheBucket.bucketName}`, @@ -53,5 +54,32 @@ export class EdgeAnalytics extends Stack { // Run this lambda function every hour const rule = new Rule(this, 'AnalyticRule', { schedule: Schedule.rate(Duration.hours(1)) }); rule.addTarget(new LambdaFunction(this.lambda)); + + const v2Lambda = new lambda.Function(this, 'AnalyticV2Lambda', { + runtime: lambda.Runtime.NODEJS_LATEST, + memorySize: 2048, + timeout: Duration.minutes(10), + handler: 'index.handler', + code: lambda.Code.fromAsset(CodePathV2), + environment: { + [Env.Analytics.CloudFrontId]: distributionId, + [Env.Analytics.CacheBucket]: `s3://${cacheBucket.bucketName}`, + [Env.Analytics.CloudFrontSourceBucket]: `s3://${logBucket.bucketName}`, + [Env.Analytics.MaxRecords]: String(24 * 7 * 4), + [Env.Analytics.ElasticId]: Env.get(Env.Analytics.ElasticId), + [Env.Analytics.ElasticApiKey]: Env.get(Env.Analytics.ElasticApiKey), + AWS_NODEJS_CONNECTION_REUSE_ENABLED: '1', + }, + logRetention: RetentionDays.ONE_MONTH, + loggingFormat: lambda.LoggingFormat.JSON, + }); + + cacheBucket.grantReadWrite(v2Lambda); + logBucket.grantRead(v2Lambda); + + // Run this lambda function every hour + new Rule(this, 'AnalyticRule', { schedule: Schedule.rate(Duration.hours(1)) }).addTarget( + new LambdaFunction(v2Lambda), + ); } } diff --git a/packages/shared/src/const.ts b/packages/shared/src/const.ts index 390b664f8..0ad4d4942 100644 --- a/packages/shared/src/const.ts +++ b/packages/shared/src/const.ts @@ -53,10 +53,18 @@ export const Env = { Analytics: { CloudFrontId: 'ANALYTICS_CLOUD_FRONT_ID', CloudFrontSourceBucket: 'ANALYTICS_CLOUD_FRONT_SOURCE_BUCKET', + + /** Where to store the analytic cache data */ CacheBucket: 'ANALYTICS_CACHE_BUCKET', + /** Max number of records to process in the analytics process */ MaxRecords: 'ANALYTICS_MAX_RECORDS', - }, + + /** Elastic server Id */ + ElasticId: 'ELASTIC_ID', + /** ElasticSearch's API key */ + ElasticApiKey: 'ELASTIC_API_KEY', + } as const, /** Load a environment var defaulting to defaultOutput if it does not exist */ get(envName: string): string | undefined { From 94385ad96fc1f50b50a2b56a69d734c97ecfa10d Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Fri, 13 Dec 2024 14:43:02 +1300 Subject: [PATCH 03/13] wip: import source --- .../lambda-analytic-cloudfront/CHANGELOG.md | 0 packages/lambda-analytic-cloudfront/README.md | 5 + .../lambda-analytic-cloudfront/package.json | 41 ++++ .../src/__test__/analytics.test.ts | 119 +++++++++++ .../src/__test__/log.data.ts | 15 ++ .../lambda-analytic-cloudfront/src/date.ts | 33 +++ .../lambda-analytic-cloudfront/src/elastic.ts | 74 +++++++ .../lambda-analytic-cloudfront/src/handler.ts | 152 ++++++++++++++ .../src/log.reader.ts | 167 +++++++++++++++ .../src/log.stats.ts | 76 +++++++ .../src/log/__test__/tile.url.test.ts | 53 +++++ .../src/log/query.ts | 31 +++ .../src/log/referer.ts | 29 +++ .../src/log/tile.url.ts | 87 ++++++++ .../src/useragent/__test__/parser.test.ts | 195 ++++++++++++++++++ .../src/useragent/agent.ts | 9 + .../src/useragent/agents/gis.ts | 85 ++++++++ .../src/useragent/agents/programming.ts | 25 +++ .../src/useragent/parser.ts | 106 ++++++++++ .../src/useragent/parser.types.ts | 14 ++ .../lambda-analytic-cloudfront/tsconfig.json | 10 + .../lambda-analytic-cloudfront/typedoc.json | 4 + 22 files changed, 1330 insertions(+) create mode 100644 packages/lambda-analytic-cloudfront/CHANGELOG.md create mode 100644 packages/lambda-analytic-cloudfront/README.md create mode 100644 packages/lambda-analytic-cloudfront/package.json create mode 100644 packages/lambda-analytic-cloudfront/src/__test__/analytics.test.ts create mode 100644 packages/lambda-analytic-cloudfront/src/__test__/log.data.ts create mode 100644 packages/lambda-analytic-cloudfront/src/date.ts create mode 100644 packages/lambda-analytic-cloudfront/src/elastic.ts create mode 100644 packages/lambda-analytic-cloudfront/src/handler.ts create mode 100644 packages/lambda-analytic-cloudfront/src/log.reader.ts create mode 100644 packages/lambda-analytic-cloudfront/src/log.stats.ts create mode 100644 packages/lambda-analytic-cloudfront/src/log/__test__/tile.url.test.ts create mode 100644 packages/lambda-analytic-cloudfront/src/log/query.ts create mode 100644 packages/lambda-analytic-cloudfront/src/log/referer.ts create mode 100644 packages/lambda-analytic-cloudfront/src/log/tile.url.ts create mode 100644 packages/lambda-analytic-cloudfront/src/useragent/__test__/parser.test.ts create mode 100644 packages/lambda-analytic-cloudfront/src/useragent/agent.ts create mode 100644 packages/lambda-analytic-cloudfront/src/useragent/agents/gis.ts create mode 100644 packages/lambda-analytic-cloudfront/src/useragent/agents/programming.ts create mode 100644 packages/lambda-analytic-cloudfront/src/useragent/parser.ts create mode 100644 packages/lambda-analytic-cloudfront/src/useragent/parser.types.ts create mode 100644 packages/lambda-analytic-cloudfront/tsconfig.json create mode 100644 packages/lambda-analytic-cloudfront/typedoc.json diff --git a/packages/lambda-analytic-cloudfront/CHANGELOG.md b/packages/lambda-analytic-cloudfront/CHANGELOG.md new file mode 100644 index 000000000..e69de29bb diff --git a/packages/lambda-analytic-cloudfront/README.md b/packages/lambda-analytic-cloudfront/README.md new file mode 100644 index 000000000..7626825f0 --- /dev/null +++ b/packages/lambda-analytic-cloudfront/README.md @@ -0,0 +1,5 @@ +# @basemaps/lambda-analytic-cloudfront + +Generate analytics from CloudFront distribution statistics + +Every hour this lambda function runs and generates a rolled up summary of usage by API Key diff --git a/packages/lambda-analytic-cloudfront/package.json b/packages/lambda-analytic-cloudfront/package.json new file mode 100644 index 000000000..fdecef43f --- /dev/null +++ b/packages/lambda-analytic-cloudfront/package.json @@ -0,0 +1,41 @@ +{ + "name": "@basemaps/lambda-analytic-cloudfront", + "version": "7.11.0", + "private": true, + "repository": { + "type": "git", + "url": "https://github.com/linz/basemaps.git", + "directory": "packages/lambda-analytic-cloudfront" + }, + "author": { + "name": "Land Information New Zealand", + "url": "https://linz.govt.nz", + "organization": true + }, + "type": "module", + "engines": { + "node": ">=16.0.0" + }, + "license": "MIT", + "dependencies": { + "@basemaps/config": "^7.11.0", + "@basemaps/geo": "^7.11.0", + "@basemaps/shared": "^7.11.0", + "ua-parser-js": "^1.0.39" + }, + "scripts": { + "test": "node --test", + "bundle": "../../scripts/bundle.mjs package.json" + }, + "devDependencies": { + "@elastic/elasticsearch": "^8.16.2", + "@types/ua-parser-js": "^0.7.36" + }, + "bundle": { + "entry": "src/index.ts", + "outdir": "dist/", + "external": [ + "pino-pretty" + ] + } +} diff --git a/packages/lambda-analytic-cloudfront/src/__test__/analytics.test.ts b/packages/lambda-analytic-cloudfront/src/__test__/analytics.test.ts new file mode 100644 index 000000000..2ca487cc6 --- /dev/null +++ b/packages/lambda-analytic-cloudfront/src/__test__/analytics.test.ts @@ -0,0 +1,119 @@ +import assert from 'node:assert'; +import { beforeEach, describe, it, TestContext } from 'node:test'; +import { gzipSync } from 'node:zlib'; + +import { Env, fsa, FsMemory } from '@basemaps/shared'; +import { Client } from '@elastic/elasticsearch'; + +import { getYesterday } from '../date.js'; +import { Elastic } from '../elastic.js'; +import { handler } from '../handler.js'; +import { LogStats } from '../log.stats.js'; +import { LogData } from './log.data.js'; + +interface IndexOperation { + index: { _index: string }; +} +type BulkOperation = (IndexOperation | LogStats)[]; + +describe('analytic lambda', () => { + const memory = new FsMemory(); + beforeEach(() => { + fsa.register('mem://', memory); + memory.files.clear(); + + Elastic.indexDelay = 1; + Elastic._client = undefined; + }); + + function setupEnv(t: TestContext): void { + t.mock.method(Env, 'get', (key: string): string => { + switch (key) { + case Env.Analytics.CacheBucket: + return 'mem://cache/'; + case Env.Analytics.CloudFrontSourceBucket: + return 'mem://source/'; + case Env.Analytics.CloudFrontId: + return 'cfid'; + case Env.Analytics.MaxRecords: + return '30'; + } + throw new Error(`Invalid test process.env access ${key}`); + }); + } + + it('should process some log data', async (t) => { + setupEnv(t); + + const operations: BulkOperation[] = []; + Elastic._client = { + bulk(op: { operations: BulkOperation }) { + operations.push(op.operations); + return Promise.resolve({}); + }, + } as unknown as Client; + + const YesterDay = getYesterday(); + const shortDate = YesterDay.toISOString().slice(0, 13).replace('T', '-'); + + await fsa.write(new URL(`mem://source/cfid.${shortDate}/data.txt.gz`), gzipSync(LogData)); + + await handler(); + + // One call to insert + assert.equal(operations.length, 1); + + const op = operations[0]; + + const indexOpt = op[0] as IndexOperation; + const logOpt = op[1] as LogStats; + + // First Log line: /v1/tiles/aerial/EPSG:3857/19/516588/320039.webp + assert.equal(indexOpt.index._index, 'basemaps-history-2020'); + assert.equal(logOpt.apiType, 'd'); + assert.equal(logOpt.tileMatrix, 'EPSG:3857'); + assert.equal(logOpt.tileMatrixId, 'WebMercatorQuad'); + assert.equal(logOpt.tileSet, 'aerial'); + assert.equal(logOpt.z, 19); + assert.equal(logOpt.cacheHit, 1); + assert.equal(logOpt.cacheMiss, 0); + assert.equal(logOpt.total, 1); + + assert.deepEqual(logOpt.ua, { os: 'linux', name: 'chrome', version: '85', variant: 'unknown' }); + + const files = [...memory.files.keys()]; + assert.equal(files.length, 2); // two files one input one output + + assert.equal( + files[1], + `mem://cache/RollUpV3/${shortDate.slice(0, 4)}/${shortDate.slice(5, 7)}/${shortDate}.ndjson.gz`, + ); + }); + + it('should write errors to storage', async (t) => { + setupEnv(t); + + Elastic._client = { + bulk() { + return Promise.resolve({ errors: ['Hello'] }); + }, + } as unknown as Client; + + const YesterDay = getYesterday(); + const shortDate = YesterDay.toISOString().slice(0, 13).replace('T', '-'); + + await fsa.write(new URL(`mem://source/cfid.${shortDate}/data.txt.gz`), gzipSync(LogData)); + + const ret = await handler().catch((e: Error) => e); + + assert.equal(String(ret), 'Error: Failed to index'); + + const files = [...memory.files.keys()]; + assert.equal(files.length, 2); // two files one input one output + + assert.ok( + files[1].startsWith(`mem://cache/errors-${new Date().toISOString().slice(0, 12)}`), + // ${shortDate.slice(0, 4)}/${shortDate.slice(5, 7)}/${shortDate}.ndjson.gz`, + ); + }); +}); diff --git a/packages/lambda-analytic-cloudfront/src/__test__/log.data.ts b/packages/lambda-analytic-cloudfront/src/__test__/log.data.ts new file mode 100644 index 000000000..db0a049da --- /dev/null +++ b/packages/lambda-analytic-cloudfront/src/__test__/log.data.ts @@ -0,0 +1,15 @@ +import { ulid } from 'ulid'; + +export const DevApiKey = 'd' + ulid().toLowerCase(); +export const ClientApiKey = 'c' + ulid().toLowerCase(); + +export const LogData = `#Version: 1.0 +#Fields: date time x-edge-location sc-bytes c-ip cs-method cs(Host) cs-uri-stem sc-status cs(Referer) cs(User-Agent) cs-uri-query cs(Cookie) x-edge-result-type x-edge-request-id x-host-header cs-protocol cs-bytes time-taken x-forwarded-for ssl-protocol ssl-cipher x-edge-response-result-type cs-protocol-version fle-status fle-encrypted-fields c-port time-to-first-byte x-edge-detailed-result-type sc-content-type sc-content-len sc-range-start sc-range-end +2020-07-28 01:11:25 AKL50-C1 20753 255.255.255.141 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/aerial/EPSG:3857/19/516588/320039.webp 200 https://bar.com/ Mozilla/5.0%20(X11;%20Linux%20x86_64)%20AppleWebKit/537.36%20(KHTML,%20like%20Gecko)%20Chrome/85.0.4183.101%20Safari/537.36 api=${DevApiKey} - Hit sBUoz03SwR_hVZkdj0LVC1s_bKakd9ONcKTYRrQLuIR3VPBQUx5xog== basemaps.linz.govt.nz https 82 0.049 - TLSv1.3 TLS_AES_128_GCM_SHA256 Hit HTTP/2.0 -- 21780 0.049 Hit image/webp 20320 - - +2020-07-28 01:16:13 SYD1-C2 156474 255.255.255.4 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/aerial/NZTM2000Quad/19/516542/319785.png 200 https://www.bar.com/ Mozilla/5.0%20(Macintosh;%20Intel%20Mac%20OS%20X%2010_15_4)%20AppleWebKit/605.1.15%20(KHTML,%20like%20Gecko)%20Version/13.1.2%20Safari/605.1.15 api=${DevApiKey}&foo=bar - Hit 9KNnEESjZA-yVs62ffwtRYNaa0gpYKLeEEHH490dmO7AAu3ZxnPc8Q== basemaps.linz.govt.nz https 77 1.791 - TLSv1.3 TLS_AES_128_GCM_SHA256 Hit HTTP/2.0 - - 19468 0.028 Hit image/png 155886 - - +2020-07-28 01:16:21 SYD1-C2 21223 255.255.255.73 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/topo50/3857/18/257866/162011.jpeg 200 https://bar.com/map/ Mozilla/5.0%20(Windows%20NT%2010.0;%20Win64;%20x64)%20AppleWebKit/537.36%20(KHTML,%20like%20Gecko)%20Chrome/85.0.4183.102%20Safari/537.36 api=${DevApiKey} - Miss a5nrTCsdsP5EDQ9EXkUQQJMCJTlbRUz5JIxowZ-1kRriRDUmLPxvVQ== basemaps.linz.govt.nz https 76 0.222 - TLSv1.3 TLS_AES_128_GCM_SHA256 Miss HTTP/2.0 - - 57799 0.222 Miss image/jpeg 20797 - - +2020-07-28 01:13:33 SYD4-C2 2588 255.255.255.128 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/topo50/EPSG:3857/WMTSCapabilities.xml 200 - Mozilla/5.0%20QGIS/31006 api=${ClientApiKey} - RefreshHit oflBr-vO5caoVpi2S23hGh9YWMUca-McU_Fl5oN9fqW_H9ea_iS-Kg== basemaps.linz.govt.nz https 243 0.051 - TLSv1.2 ECDHE-RSA-AES128-GCM-SHA256 RefreshHit HTTP/1.1 - - 55515 0.050 RefreshHit text/xml - +2020-07-28 01:13:33 SYD4-C2 2588 255.255.255.128 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/topo50/EPSG:2193/18/257866/162011.pbf 200 - Mozilla/5.0%20QGIS/31006 api=${ClientApiKey} - RefreshHit oflBr-vO5caoVpi2S23hGh9YWMUca-McU_Fl5oN9fqW_H9ea_iS-Kg== basemaps.linz.govt.nz https 243 0.051 - TLSv1.2 ECDHE-RSA-AES128-GCM-SHA256 RefreshHit HTTP/1.1 - - 55515 0.050 RefreshHit text/xml - +2020-07-28 01:13:33 SYD4-C2 2588 255.255.255.128 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/antipodes-islands-satellite-2019-2020-0.5m/NZTM2000Quad/18/257866/162011.webp 200 - Mozilla/5.0%20QGIS/31006 api=${ClientApiKey} - RefreshHit oflBr-vO5caoVpi2S23hGh9YWMUca-McU_Fl5oN9fqW_H9ea_iS-Kg== basemaps.linz.govt.nz https 243 0.051 - TLSv1.2 ECDHE-RSA-AES128-GCM-SHA256 RefreshHit HTTP/1.1 - - 55515 0.050 RefreshHit text/xml - +2020-07-28 01:13:33 SYD4-C2 2588 255.255.255.128 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/elevation/WebMercatorQuad/18/257866/162011.png 200 - Mozilla/5.0%20QGIS/31006 api=${ClientApiKey}&pipeline=terrain-rgb - RefreshHit oflBr-vO5caoVpi2S23hGh9YWMUca-McU_Fl5oN9fqW_H9ea_iS-Kg== basemaps.linz.govt.nz https 243 0.051 - TLSv1.2 ECDHE-RSA-AES128-GCM-SHA256 RefreshHit HTTP/1.1 - - 55515 0.050 RefreshHit text/xml - +`.trim(); diff --git a/packages/lambda-analytic-cloudfront/src/date.ts b/packages/lambda-analytic-cloudfront/src/date.ts new file mode 100644 index 000000000..a262e8ad9 --- /dev/null +++ b/packages/lambda-analytic-cloudfront/src/date.ts @@ -0,0 +1,33 @@ +export function getYesterday(): Date { + // Process up to about a day ago + const maxDate = new Date(); + maxDate.setUTCMinutes(0); + maxDate.setUTCSeconds(0); + maxDate.setUTCMilliseconds(0); + maxDate.setUTCDate(maxDate.getUTCDate() - 1); + return maxDate; +} + +export function* byDay(startDate: Date, endDate: Date): Generator { + const currentDate = new Date(startDate); + currentDate.setUTCMinutes(0); + currentDate.setUTCSeconds(0); + currentDate.setUTCMilliseconds(0); + while (true) { + yield currentDate.toISOString().slice(0, 10); + currentDate.setUTCDate(currentDate.getUTCDate() - 1); + if (currentDate.getTime() < endDate.getTime()) break; + } +} + +export function* byMonth(startDate: Date, endDate: Date): Generator { + const currentDate = new Date(startDate); + currentDate.setUTCMinutes(0); + currentDate.setUTCSeconds(0); + currentDate.setUTCMilliseconds(0); + while (true) { + yield currentDate.toISOString().slice(0, 7); + currentDate.setUTCMonth(currentDate.getUTCMonth() - 1); + if (currentDate.getTime() < endDate.getTime()) break; + } +} diff --git a/packages/lambda-analytic-cloudfront/src/elastic.ts b/packages/lambda-analytic-cloudfront/src/elastic.ts new file mode 100644 index 000000000..a3f287c8b --- /dev/null +++ b/packages/lambda-analytic-cloudfront/src/elastic.ts @@ -0,0 +1,74 @@ +import { Env, LogType } from '@basemaps/shared'; +import { Client } from '@elastic/elasticsearch'; + +import { LogStats } from './log.stats.js'; + +export class ElasticClient { + _client: Client | undefined; + /** Between index requests delay this amount */ + indexDelay: number = 200; + + get client(): Client { + if (this._client != null) return this._client; + + const id = Env.get(Env.Analytics.ElasticId); + const apiKey = Env.get(Env.Analytics.ElasticApiKey); + if (id == null) throw new Error(`$${Env.Analytics.ElasticId} is unset`); + if (apiKey == null) throw new Error(`$${Env.Analytics.ElasticApiKey} is unset`); + this._client = new Client({ cloud: { id }, auth: { apiKey } }); + return this._client; + } + + errors: unknown[] = []; + insertQueue: Promise = Promise.resolve(); + + async insert(prefix: string, combined: Iterable, log: LogType): Promise { + this.insertQueue = this.insertQueue.then(() => this._doInsert(prefix, combined, log)); + return this.insertQueue; + } + + async _doInsert(prefix: string, combined: Iterable, log: LogType): Promise { + const client = this.client; + let inserts = 0; + let skipHits = 0; + let operations: unknown[] = []; + + const startTime = performance.now(); + + const errors = this.errors; + const indexDelay = this.indexDelay; + + async function doInsert(): Promise { + inserts += operations.length / 2; + log.trace({ prefix, records: operations.length / 2, skipHits, total: inserts }, 'log:ingest'); + const ret = await client.bulk({ operations }); + + if (ret.errors) { + errors.push(ret); + throw new Error('Failed to index: ' + prefix); + } + // Give it a little bit of time to index + await new Promise((r) => setTimeout(r, indexDelay)); + operations = []; + } + + for (const rec of combined) { + if (rec.total < 1) { + skipHits++; + continue; + } + operations.push({ index: { _index: 'basemaps-history-' + rec['@timestamp'].slice(0, 4), _id: rec.id } }, rec); + if (operations.length > 50_000) await doInsert(); + } + + if (operations.length > 0) await doInsert(); + + if (inserts > 0) { + log.info({ prefix, skipHits, total: inserts, duration: performance.now() - startTime }, 'log:ingest'); + } else { + log.trace({ prefix }, 'log:ingest:skip'); + } + } +} + +export const Elastic = new ElasticClient(); diff --git a/packages/lambda-analytic-cloudfront/src/handler.ts b/packages/lambda-analytic-cloudfront/src/handler.ts new file mode 100644 index 000000000..aec59e716 --- /dev/null +++ b/packages/lambda-analytic-cloudfront/src/handler.ts @@ -0,0 +1,152 @@ +import { promisify } from 'node:util'; +import { gzip } from 'node:zlib'; + +import { Env, fsa, LogConfig } from '@basemaps/shared'; +import pLimit from 'p-limit'; +import { basename } from 'path'; + +import { byDay, getYesterday } from './date.js'; +import { Elastic } from './elastic.js'; +import { FileProcess } from './log.reader.js'; +import { LogStats } from './log.stats.js'; + +const gzipPromise = promisify(gzip); + +const OldestDate = new Date('2020-01-01T00:00:00.000Z'); + +/** + * extract a environment variable and parse it as a URL + * + * @throws if the env var is unset or not a URL + * @param env ENV var to lookup + * @returns parsed url from the environment + */ +function getEnvUrl(env: string): URL { + const val = Env.get(env); + if (val == null) throw new Error(`$${env} is unset`); + try { + return fsa.toUrl(val); + } catch (e) { + throw new Error(`$${env} is not a url`); + } +} + +export async function handler(): Promise { + const SourceLocation = getEnvUrl(Env.Analytics.CloudFrontSourceBucket); + const CacheLocation = getEnvUrl(Env.Analytics.CacheBucket); + const CloudFrontId = Env.get(Env.Analytics.CloudFrontId); + + const MaxToProcess = Env.getNumber(Env.Analytics.MaxRecords, 24 * 7 * 4); // Process 4 weeks of logs by default + const logger = LogConfig.get(); + + logger.info( + { source: SourceLocation.href, cacheLocation: CacheLocation.href, cloudFrontId: CloudFrontId }, + 'log:index:start', + ); + if (CloudFrontId == null) throw new Error(`Missing $${Env.Analytics.CloudFrontId}`); + + // Limit hours to be processed 5 at a time and log files to 5 at a time, which gives upto 25 logs files concurrency + // as often hours are skipped + const hourQ = pLimit(5); + const fileQ = pLimit(5); + + let processedCount = 0; + for (const prefixByDay of byDay(getYesterday(), OldestDate)) { + if (processedCount > MaxToProcess) break; + const todo = []; + + for (let hour = 0; hour < 24; hour++) { + processedCount++; + if (processedCount > MaxToProcess) break; + + const hourOfDay = String(hour).padStart(2, '0'); + const prefix = `${prefixByDay}-${hourOfDay}`; + + // Create a folder structure of /YYYY/MM/ + const cacheFolderParts = prefix.slice(0, 7).replace('-', '/'); + + const cacheUrl = new URL(`./RollUpV3/${cacheFolderParts}/${prefix}.ndjson.gz`, CacheLocation); + + const promise = hourQ(async () => { + // Cache file exists skip processing + if (await fsa.exists(cacheUrl)) { + logger.debug({ prefix }, 'log:prefix:skip'); + return; + } + + const startTime = performance.now(); + logger.trace({ prefix }, 'log:prefix:start'); + const logPrefix = new URL(`${CloudFrontId}.${prefix}`, SourceLocation); + + const stats = new Map(); + + const logFiles = await fsa.toArray(fsa.list(logPrefix)); + if (logFiles.length === 0) { + logger.info({ prefix }, 'log:prefix:no-files'); + return; + } + + let lines = 0; + let fileCount = 0; + const filePromises = logFiles.map((lf) => { + return fileQ(async () => { + const fileStartTime = performance.now(); + + const fileLines = await FileProcess.process(lf, stats); + logger.trace( + { + prefix: prefix, + file: basename(lf.pathname), + lines: fileLines, + remaining: logFiles.length - fileCount, + duration: performance.now() - fileStartTime, + }, + 'log:file:done', + ); + lines += fileLines; + fileCount++; + }); + }); + + // Process all the log files + await Promise.all(filePromises); + + // Extrac thte values + const allStats = [...stats.values()]; + await Elastic.insert(prefix, allStats, logger); + // Ensure everything is indexed into elasticsearch before writing the cache to disk + await fsa.write(cacheUrl, await gzipPromise(JSON.stringify(allStats))); + + logger.info( + { + prefix: prefix, + files: logFiles.length, + lines, + records: stats.size, + duration: performance.now() - startTime, + }, + 'log:prefix:done', + ); + }); + + todo.push(promise); + } + + const rets = await Promise.allSettled(todo); + + // If anythign fails to index write the errors out to a log file at the cache location + if (Elastic.errors.length > 0) { + const errorLocation = new URL(`./errors-${new Date().toISOString()}.json`, CacheLocation); + logger.fatal({ errorLocation: errorLocation.href }, 'log:index:failed'); + await fsa.write(errorLocation, JSON.stringify(rets)); + } + + let failed = false; + for (const ret of rets) { + if (ret.status !== 'rejected') continue; + logger.fatal({ err: ret.reason }, 'log:index:failed'); + failed = true; + } + if (failed) throw new Error('Failed to index'); + } +} diff --git a/packages/lambda-analytic-cloudfront/src/log.reader.ts b/packages/lambda-analytic-cloudfront/src/log.reader.ts new file mode 100644 index 000000000..c3a9a8dc6 --- /dev/null +++ b/packages/lambda-analytic-cloudfront/src/log.reader.ts @@ -0,0 +1,167 @@ +import { createInterface } from 'node:readline/promises'; +import { createGunzip } from 'node:zlib'; + +import { sha256base58 } from '@basemaps/config'; +import { fsa } from '@chunkd/fs'; + +import { LogStats } from './log.stats.js'; +import { parseQueryString } from './log/query.js'; +import { getUrlHost } from './log/referer.js'; +import { parseTileUrl } from './log/tile.url.js'; +import { UaParser } from './useragent/agent.js'; + +/** +00 'date': '2017-02-09', +01 'time': '17:50:17', +02 'x-edge-location': 'MUC51', +03 'sc-bytes': '2797', // Number of bytes to viewer +04 'c-ip': '192.168.0.123', +05 'cs-method': 'GET', +06 'cs-host': 'yourdistribution.cloudfront.net', +07 'cs-uri-stem': '/', +08 'sc-status': '200', +09 'cs-referer': '-', +10 'cs-user-agent': 'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)', +11 'cs-uri-query': '-', +12 'cs-cookie': '-', +13 'x-edge-result-type': 'Hit', +14 'x-edge-request-id': 'sjXpb8nMq_1ewovZ6nrojpvxIETPbo7EhF2RNtPZ_zfd0MtOW6pjlg==', +15 'x-host-header': 'example.com', +16 'cs-protocol': 'https', +17 'cs-bytes': '148', +18 'time-taken': '0.002', +19 'x-forwarded-for': '-', +20 'ssl-protocol': 'TLSv1.2', +21 'ssl-cipher': 'ECDHE-RSA-AES128-GCM-SHA256', +22 'x-edge-response-result-type': 'Hit', +23 'cs-protocol-version': 'HTTP/1.1' +*/ + +// tiles with full alpha or single solid color are approx these size +const EmptyTileSizes: Record = { + webp: 214, + png: 355, + jpeg: 650, +}; + +const IsoDateMonth = 7; // 2023-06 +const IsoDateDay = 10; // 2023-06-12 +const IsoDateHour = 13; // 2023-06-12:T01 + +/** + * Hide the full API key from the log analytics + */ +function hideApiKey(str: string): string { + if (str.startsWith('d')) return 'd..' + str.slice(str.length - 6); + if (str.startsWith('c')) return 'c..' + str.slice(str.length - 6); + return str; +} + +const empty: Record = { webp: 0, png: 0, jpeg: 0 }; +function toFullDate(x: string): string { + if (x.length === IsoDateMonth) return `${x}-01T00:00:00.000Z`; + if (x.length === IsoDateDay) return `${x}T00:00:00.000Z`; + if (x.length === IsoDateHour) return `${x}:00:00.000Z`; + throw new Error('Unknown date:' + x); +} + +export const FileProcess = { + process(fileName: URL, stats: Map): Promise { + let count = 0; + const lineReader = createInterface({ input: fsa.readStream(fileName).pipe(createGunzip()), terminal: false }); + + function processLine(line: string): void { + if (line.startsWith('#')) return; + const lineData = line.split('\t'); + const status = Number(lineData[8]); + + // Ignore requests that were not actually served + if (status > 399) return; + if (status < 200) return; + // No data was served ignore! + if (status === 204) return; + + // Ignore files where no bytes were served + const bytes = Number(lineData[3]); + if (isNaN(bytes)) return; + + // Ignore anything that is not /v1/tiles + const url = lineData[7]; + if (!url.startsWith('/v1/tiles')) return; + + const date = lineData[0]; + const time = lineData[1]; + const dateTime = `${date}T${time}Z`; + + const contentLength = Number(lineData[30]); + + const { api, pipeline } = parseQueryString(lineData[11]); + + // Slice the request to the hour 2023-06-12T01 + const dateAggregate = dateTime.slice(0, IsoDateHour); + const hit = lineData[13] === 'Hit' || lineData[13] === 'RefreshHit'; + const referer = getUrlHost(lineData[9]); + + const userAgent = UaParser.parse(lineData[10]); + + const ret = parseTileUrl(status, url); + if (ret == null) return; // Couldn't parse tileInformation out!? + + // Aggregation date, api and referer + const trackId = [dateAggregate, api, referer]; + // Aggregate on useragent + if (userAgent) trackId.push(...Object.values(userAgent).map((m) => String(m))); + + let isEmpty = false; + trackId.push(ret.tileMatrix, ret.extension, String(ret.webMercatorZoom)); + if (pipeline) trackId.push(pipeline); + + // If the bytes served back to the user is low, it could be a empty tile + // compare it to known empty tile sizes + const emptyBytes = EmptyTileSizes[ret.extension]; + if (emptyBytes && contentLength === emptyBytes) { + empty[ret.extension]++; + isEmpty = true; + } + + const trackingId = trackId.join('_'); + let existing = stats.get(trackingId) as LogStats; + if (existing == null) { + existing = { + '@timestamp': toFullDate(dateAggregate), + api: hideApiKey(api), + apiType: api?.slice(0, 1), + tileMatrix: ret.tileMatrix, + tileMatrixId: ret.tileMatrixId, + tileSet: ret.tileSet, + z: ret.webMercatorZoom, + referer, + extension: ret?.extension, + ua: userAgent, + pipeline, + cacheHit: 0, + cacheMiss: 0, + total: 0, + bytes: 0, + empty: 0, + id: sha256base58(trackingId), + }; + stats.set(trackingId, existing); + } + + existing.bytes += bytes; + existing.total++; + if (isEmpty) existing.empty++; + if (hit) existing.cacheHit++; + else existing.cacheMiss++; + + count++; + } + + return new Promise((resolve, reject) => { + lineReader.on('error', (err) => reject(err)); + lineReader.on('close', () => resolve(count)); + lineReader.on('line', processLine); + }); + }, +}; diff --git a/packages/lambda-analytic-cloudfront/src/log.stats.ts b/packages/lambda-analytic-cloudfront/src/log.stats.ts new file mode 100644 index 000000000..17cb5c062 --- /dev/null +++ b/packages/lambda-analytic-cloudfront/src/log.stats.ts @@ -0,0 +1,76 @@ +import { UserAgentInfo } from './useragent/parser.types.js'; + +export interface LogStats { + '@timestamp': string; + + /** + * Cut down API key consisting of the first character and the last 6 characters + */ + api: string; + + /** + * API Key type "c" | "d" + */ + apiType: string; + + /** + * Raw tile matrix name from the URL + */ + tileMatrix: string; + + /** + * Actual tile matrix used + */ + tileMatrixId: 'NZTM2000Quad' | 'WebMercatorQuad' | 'NZTM2000'; + + /** + * Name of the tile set + * + * @example 'aerial' or 'topographic' + */ + tileSet: string; + + /** + * zoom level of the request served + */ + z: number; + + /** Host that referred the request */ + referer: string; + + /** Extension that was served */ + extension: string; + + /** User agent information */ + ua?: UserAgentInfo; + /** + * Rendering pipeline if used + * + * @example "rgba" or "color-ramp" + */ + pipeline?: string; + /** + * Number of hits that were cache hits + */ + cacheHit: number; + /** + * Number of hits that were cache misses + */ + cacheMiss: number; + /** + * Total number of requests + */ + total: number; + /** + * Total bytes served + */ + bytes: number; + /** + * Total number of tiles that were empty + */ + empty: number; + /** + * Unique ID for the tracking information + */ + id: string; +} diff --git a/packages/lambda-analytic-cloudfront/src/log/__test__/tile.url.test.ts b/packages/lambda-analytic-cloudfront/src/log/__test__/tile.url.test.ts new file mode 100644 index 000000000..86b2d2a56 --- /dev/null +++ b/packages/lambda-analytic-cloudfront/src/log/__test__/tile.url.test.ts @@ -0,0 +1,53 @@ +import assert from 'node:assert'; +import { describe, it } from 'node:test'; + +import { parseTileUrl } from '../tile.url.js'; + +describe('tile.url', () => { + it('should parse tile requests', () => { + assert.deepEqual(parseTileUrl(200, '/v1/tiles/aerial/NZTM2000Quad/16/32237/31326.jpeg'), { + extension: 'jpeg', + tileMatrix: 'NZTM2000Quad', + tileMatrixId: 'NZTM2000Quad', + tileSet: 'aerial', + webMercatorZoom: 18, + z: 16, + }); + + assert.deepEqual(parseTileUrl(200, '/v1/tiles/aerial/WebMercatorQuad/16/32237/31326.webp'), { + extension: 'webp', + tileMatrix: 'WebMercatorQuad', + tileMatrixId: 'WebMercatorQuad', + tileSet: 'aerial', + webMercatorZoom: 16, + z: 16, + }); + }); + + it('should parse tile matrix sets', () => { + assert.deepEqual(parseTileUrl(200, '/v1/tiles/aerial/3857/1/1/1.webp'), { + extension: 'webp', + tileMatrix: '3857', + tileMatrixId: 'WebMercatorQuad', + tileSet: 'aerial', + webMercatorZoom: 1, + z: 1, + }); + assert.deepEqual(parseTileUrl(200, '/v1/tiles/aerial/EPSG:3857/1/1/1.webp'), { + extension: 'webp', + tileMatrix: 'EPSG:3857', + tileMatrixId: 'WebMercatorQuad', + tileSet: 'aerial', + webMercatorZoom: 1, + z: 1, + }); + assert.deepEqual(parseTileUrl(200, '/v1/tiles/topographic/2193/1/1/1.pbf'), { + extension: 'pbf', + tileMatrix: '2193', + tileMatrixId: 'NZTM2000', + tileSet: 'topographic', + webMercatorZoom: 5, + z: 1, + }); + }); +}); diff --git a/packages/lambda-analytic-cloudfront/src/log/query.ts b/packages/lambda-analytic-cloudfront/src/log/query.ts new file mode 100644 index 000000000..9b1663f58 --- /dev/null +++ b/packages/lambda-analytic-cloudfront/src/log/query.ts @@ -0,0 +1,31 @@ +export interface QueryStringInfo { + api: string; + pipeline?: string; +} +function getQuery(str: string): QueryStringInfo { + const urlSearch = new URLSearchParams(str); + const api = _getApi(urlSearch); + const pipeline = urlSearch.get('pipeline') ?? undefined; + return { api, pipeline }; +} + +function _getApi(url: URLSearchParams): string { + const api = url.get('api') ?? ''; + // api keys are 27 chars starting with d or c + if (api.length !== 27) return 'invalid'; + if (api.startsWith('d')) return api; + if (api.startsWith('c')) return api; + return 'invalid'; +} +const QueryMap = new Map(); + +export function parseQueryString(str: string): QueryStringInfo { + let existing = QueryMap.get(str); + if (existing == null) { + existing = getQuery(str); + QueryMap.set(str, existing); + } + // This can get very very large so periodically clear it + if (QueryMap.size > 5_000_000) QueryMap.clear(); + return existing; +} diff --git a/packages/lambda-analytic-cloudfront/src/log/referer.ts b/packages/lambda-analytic-cloudfront/src/log/referer.ts new file mode 100644 index 000000000..fcf1cd6e9 --- /dev/null +++ b/packages/lambda-analytic-cloudfront/src/log/referer.ts @@ -0,0 +1,29 @@ +const hostCache = new Map(); + +export function getUrlHost(ref: string): string { + let existing = hostCache.get(ref); + if (existing == null) { + existing = _getUrlHost(ref); + hostCache.set(ref, existing); + } + return existing; +} +/** Extract the hostname from a url */ +export function _getUrlHost(ref: string): string { + if (ref == null) return 'unknown'; + if (ref === '-') return 'unknown'; + // console.log(ref) + + try { + const { hostname } = new URL(ref); + if (hostname == null) return ref; + if (hostname.startsWith('www.')) return hostname.slice(4); + return hostname; + } catch (e) { + if (!ref.startsWith('http')) return _getUrlHost('https://' + ref); + // Ignore invalid referer hostname + // eslint-disable-next-line no-console + console.log(ref); + } + return 'unknown'; +} diff --git a/packages/lambda-analytic-cloudfront/src/log/tile.url.ts b/packages/lambda-analytic-cloudfront/src/log/tile.url.ts new file mode 100644 index 000000000..04f871d79 --- /dev/null +++ b/packages/lambda-analytic-cloudfront/src/log/tile.url.ts @@ -0,0 +1,87 @@ +import { GoogleTms, TileMatrixSet, TileMatrixSets } from '@basemaps/geo'; + +function isValidExt(ext: string): boolean { + switch (ext) { + case 'webp': + case 'jpeg': + case 'png': + case 'avif': + // Vector + case 'pbf': + return true; + } + return false; +} + +const tileMatrixLookup = new Map(); + +// Validate +// - /v1/tiles/aerial/EPSG:2193/12.67876636397893/11737/18011.jpeg + +export interface TileUrlInfo { + extension: string; + + tileSet: string; + + /** + * Raw tile matrix used + * + * @example "3857" "EPSG:3857" + */ + tileMatrix: string; + /** + * Tile Matrix Used + * @example "NZTM2000Quad" + */ + tileMatrixId: 'NZTM2000Quad' | 'WebMercatorQuad' | 'NZTM2000'; + + /** Zoom used from in the tile matrix */ + z: number; + + /** closes zoom level in web mercator quad */ + webMercatorZoom: number; +} +export function parseTileUrl(status: number, url: string): TileUrlInfo | undefined { + if (!url.startsWith('/v1/tiles')) return; + if (status > 399) return; + + // /v1/tiles/topographic/EPSG:3857/tile.json + // /v1/tiles/topographic/EPSG:3857/style/topolite.json + const lastDot = url.lastIndexOf('.'); + if (lastDot === -1) return; // no extension ignore + + let ext = url.slice(lastDot + 1); + if (ext === 'jpg') ext = 'jpeg'; // standardise "jpg" into "jpeg" + const tileSetType = isValidExt(ext); + if (tileSetType == null) return; + + // /v1/tiles/:tileSet/:tileMatrixId/:z/:x/:y.:ext + const urlPart = url.split('/'); + const tileSet = urlPart[3]; + const tileMatrixPart = urlPart[4]; + + let tileMatrix = tileMatrixLookup.get(tileMatrixPart); + if (tileMatrix === undefined) { + tileMatrix = TileMatrixSets.find(tileMatrixPart); + tileMatrixLookup.set(tileMatrixPart, tileMatrix); + } + if (tileMatrix == null) return; // TileMatrix not found + + const z = Number.parseInt(urlPart[5]); + + // Check tile is in valid ranges + if (isNaN(z)) return; + if (z < 0) return; + + // Convert the zoom to webmercator zoom scales + const webMercatorZoom = TileMatrixSet.convertZoomLevel(z, tileMatrix, GoogleTms); + + return { + extension: ext, + tileSet, + tileMatrix: tileMatrixPart, + tileMatrixId: tileMatrix.identifier as TileUrlInfo['tileMatrixId'], + z, + webMercatorZoom, + }; +} diff --git a/packages/lambda-analytic-cloudfront/src/useragent/__test__/parser.test.ts b/packages/lambda-analytic-cloudfront/src/useragent/__test__/parser.test.ts new file mode 100644 index 000000000..61d631e0c --- /dev/null +++ b/packages/lambda-analytic-cloudfront/src/useragent/__test__/parser.test.ts @@ -0,0 +1,195 @@ +import assert from 'node:assert'; +import { describe, it } from 'node:test'; + +import { UaParser } from '../agent.js'; + +describe('UserAgents', () => { + it('should parse common browsers', () => { + assert.deepEqual( + UaParser.parse( + 'Mozilla/5.0%20(X11;%20Linux%20x86_64)%20AppleWebKit/537.36%20(KHTML,%20like%20Gecko)%20Chrome/85.0.4183.101%20Safari/537.36', + ), + { name: 'chrome', os: 'linux', variant: 'unknown', version: '85' }, + ); + assert.deepEqual( + UaParser.parse( + `Mozilla/5.0 (Linux; U; Android 2.2.1; de-de; LG-P350 Build/FRG83) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1 MMS/LG-Android-MMS-V1.0/1.2`, + ), + { name: 'androidbrowser', os: 'android', variant: 'unknown', version: '4' }, + ); + assert.deepEqual( + UaParser.parse('Mozilla/5.0%20(Windows%20NT%2010.0;%20WOW64;%20rv:48.0)%20Gecko/20100101%20Firefox/48.0'), + { name: 'firefox', os: 'unknown', variant: 'unknown', version: '48' }, + ); + assert.deepEqual( + UaParser.parse( + 'Mozilla/5.0%20(Linux;%20Android%2011;%20SM-A025F)%20AppleWebKit/537.36%20(KHTML,%20like%20Gecko)%20Chrome/96.0.4664.104%20Mobile%20Safari/537.36', + ), + { name: 'chrome', os: 'android', variant: 'unknown', version: '96' }, + ); + + assert.deepEqual( + UaParser.parse( + 'Mozilla/5.0%20(Windows%20NT%2010.0;%20Win64;%20x64)%20AppleWebKit/537.36%20(KHTML,%20like%20Gecko)%20Chrome/98.0.4758.80%20Safari/537.36%20Edg/98.0.1108.43', + ), + { name: 'edge', os: 'unknown', variant: 'unknown', version: '98' }, + ); + }); + + it('should parse qgis', () => { + assert.deepEqual(UaParser.parse('QGIS/31613'), { + name: 'qgis', + version: '3.16', + os: 'unknown', + variant: 'unknown', + }); + assert.deepEqual(UaParser.parse('Mozilla/5.0%20QGIS/31006'), { + name: 'qgis', + version: '3.10', + os: 'unknown', + variant: 'unknown', + }); + assert.deepEqual(UaParser.parse('Mozilla/5.0%20QGIS/31607'), { + name: 'qgis', + version: '3.16', + os: 'unknown', + variant: 'unknown', + }); + assert.deepEqual(UaParser.parse('Mozilla/5.0%20QGIS/32402/macOS%2012.4'), { + name: 'qgis', + version: '3.24', + os: 'macos', + variant: 'unknown', + }); + assert.deepEqual(UaParser.parse('Mozilla/5.0%20QGIS/2.14.9-Essen'), { + name: 'qgis', + version: '2.14', + os: 'unknown', + variant: 'unknown', + }); + assert.deepEqual(UaParser.parse('Mozilla/5.0%20QGIS/32601/Windows%2010%20Version%202009'), { + name: 'qgis', + version: '3.26', + os: 'windows', + variant: 'unknown', + }); + assert.deepEqual(UaParser.parse('Mozilla/5.0%20QGIS/3.10.1-A%20Coru%C3%B1a'), { + name: 'qgis', + version: '3.10', + os: 'unknown', + variant: 'unknown', + }); + }); + + it('should parse ArcGIS', () => { + assert.deepEqual(UaParser.parse('ArcGIS%20Pro%202.7.3%20(00000000000)%20-%20ArcGISPro'), { + name: 'arcgis', + os: 'windows', + variant: 'pro', + version: '2.7', + }); + assert.deepEqual(UaParser.parse('ArcGIS%20Pro%203.0.0%20(00000000000)%20-%20ArcGISPro'), { + name: 'arcgis', + os: 'windows', + variant: 'pro', + version: '3.0', + }); + assert.deepEqual(UaParser.parse('ArcGIS%20Pro%202.9.3%20(00000000000)%20-%20ArcGISPro'), { + name: 'arcgis', + os: 'windows', + variant: 'pro', + version: '2.9', + }); + assert.deepEqual(UaParser.parse('ArcGIS%20Pro%202.9.2%20(00000000000)%20-%20ArcGISPro'), { + name: 'arcgis', + os: 'windows', + variant: 'pro', + version: '2.9', + }); + assert.deepEqual(UaParser.parse('ArcGIS%20Pro%202.8.0%20(00000000000)%20-%20ArcGISPro'), { + name: 'arcgis', + os: 'windows', + variant: 'pro', + version: '2.8', + }); + }); + + it('should handle software', () => { + assert.deepEqual(UaParser.parse('python-requests/2.23.0'), { + name: 'python', + os: 'unknown', + variant: 'requests', + version: '2.23.0', + }); + assert.deepEqual(UaParser.parse('MapProxy-1.12.0'), { + name: 'map-proxy', + os: 'unknown', + variant: 'unknown', + version: '1.12', + }); + assert.deepEqual(UaParser.parse('MapProxy-1.13.2'), { + name: 'map-proxy', + os: 'unknown', + variant: 'unknown', + version: '1.13', + }); + assert.deepEqual(UaParser.parse('okhttp/3.12.3'), { + name: 'okhttp', + os: 'unknown', + variant: 'unknown', + version: '3.12', + }); + assert.deepEqual(UaParser.parse('axios/0.21.1'), { + name: 'axios', + os: 'unknown', + variant: 'unknown', + version: '0.21', + }); + assert.deepEqual(UaParser.parse('Dart/2.16 (dart:io) '), { + name: 'dart', + os: 'unknown', + variant: 'unknown', + version: '2.16', + }); + assert.deepEqual(UaParser.parse('Apache-HttpClient/4.5.13'), { + name: 'apache', + os: 'unknown', + variant: 'http', + version: '4.5', + }); + }); + + it('should parse gis software', () => { + // assert.deepEqual( + // UaParser.parse('MapFishPrint/3.29.2%20Apache-HttpClient/4.5.13%20(Java/1.8.0_312)'), + // 'mapfishprint_3.29', + // ); + assert.deepEqual( + UaParser.parse( + 'FME/2022.7.43.22343%20%20libcurl/7.79.1%20(OpenSSL/1.1.1n)%20Schannel%20zlib/1.2.11%20WinIDN%20libssh2/1.10.0%20nghttp2/1.44.0', + ), + { name: 'fme', os: 'unknown', variant: 'unknown', version: '2022.7' }, + ); + + assert.deepEqual(UaParser.parse('JOSM/1.5 (18513 en) Windows 10 64-Bit Java/11.0.15'), { + name: 'josm', + os: 'unknown', + variant: 'unknown', + version: '1.5', + }); + + assert.deepEqual(UaParser.parse('GDAL WMS driver (http://www.gdal.org/frmt_wms.html)'), { + name: 'gdal', + os: 'unknown', + variant: 'wms', + version: 'unknown', + }); + + assert.deepEqual(UaParser.parse('MapInfoPro/21.0.0.0172 (MapInfoPro.exe) '), { + name: 'unknown', + os: 'unknown', + variant: 'unknown', + version: 'unknown', + }); + }); +}); diff --git a/packages/lambda-analytic-cloudfront/src/useragent/agent.ts b/packages/lambda-analytic-cloudfront/src/useragent/agent.ts new file mode 100644 index 000000000..4e67400b1 --- /dev/null +++ b/packages/lambda-analytic-cloudfront/src/useragent/agent.ts @@ -0,0 +1,9 @@ +import { Gis } from './agents/gis.js'; +import { Bot, Programming } from './agents/programming.js'; +import { UserAgentParsers } from './parser.js'; + +export const UaParser = new UserAgentParsers(); + +Object.entries(Programming).forEach(([key, create]) => UaParser.addParser(key, create)); +Object.entries(Gis).forEach(([key, create]) => UaParser.addParser(key, create)); +Object.entries(Bot).forEach(([key, create]) => UaParser.addParser(key, create)); diff --git a/packages/lambda-analytic-cloudfront/src/useragent/agents/gis.ts b/packages/lambda-analytic-cloudfront/src/useragent/agents/gis.ts new file mode 100644 index 000000000..243bbe128 --- /dev/null +++ b/packages/lambda-analytic-cloudfront/src/useragent/agents/gis.ts @@ -0,0 +1,85 @@ +import { isValidOs, UserAgentOs, UserAgentParser } from '../parser.types.js'; + +// Mozilla/5.0 QGIS/32400/Windows 10 Version 2009 +function guessQgisOs(ua: string): UserAgentOs | undefined { + if (ua.includes('/windows')) return 'windows'; + if (ua.includes('/mac')) return 'macos'; +} + +const ArcGis: Record = { + // ArcGISRuntime-NET/100.11.2 (Windows..... + // ArcGISRuntime-Qt/100.10 ... + 'ArcGISRuntime-': (ua: string) => { + const chunks = ua.split('/'); + const variant = chunks[0].slice('ArcGISRuntime-'.length).toLowerCase(); + const version = chunks[1].slice(0, chunks[1].indexOf('.')); + const os = chunks[1].split(' ')[1].slice(1); + if (isValidOs(os)) return { name: 'arcgis', variant, version, os }; + return { name: 'arcgis', variant, version }; + }, + + // ArcGIS Pro 2.7.3 (00000000000) - ArcGISPro + // ArcGIS Pro 3.0.0 (00000000000) - ArcGISPro + 'ArcGIS Pro': (ua: string) => { + return { + name: 'arcgis', + variant: 'pro', + version: ua.slice('ArcGIS Pro'.length, ua.lastIndexOf('.')).trim(), + os: 'windows', + }; // assume arcgis is windows + }, + + 'ArcGIS Client': () => { + return { name: 'arcgis', variant: 'client' }; + }, +}; + +function guessJosmOs(ua: string): { os: 'linux' } | undefined { + if (ua.includes('linux')) return { os: 'linux' }; + return; +} + +export const Gis: Record = { + ...ArcGis, + // QGIS/31613 + 'QGIS/': (ua) => { + const qgisVersion = Number(ua.split('/')[1]); + if (isNaN(qgisVersion)) return { name: 'qgis', os: guessQgisOs(ua) }; + return { name: 'qgis', version: (qgisVersion / 10000).toFixed(2), os: guessQgisOs(ua) }; + }, + 'Mozilla/5.0 QGIS/': (ua) => { + const chunk = ua.slice('Mozilla/5.0 QGIS/'.length).split('/')[0]; + if (chunk == null) return { name: 'qgis', os: guessQgisOs(ua) }; + // Mozilla/5.0 QGIS/2.18.22 + // Mozilla/5.0 QGIS/2.14.9-Essen + if (chunk.includes('.')) + return { name: 'qgis', version: chunk.slice(0, chunk.lastIndexOf('.')), os: guessQgisOs(ua) }; + + // Mozilla/5.0 QGIS/31400 + // Mozilla/5.0 QGIS/32400/Windows 10 Version 2009 + const qgisVersion = Number(chunk); + if (isNaN(qgisVersion)) return { name: 'qgis', os: guessQgisOs(ua) }; + return { name: 'qgis', version: `${(qgisVersion / 10000).toFixed(2)}`, os: guessQgisOs(ua) }; + }, + + // FME/2022.7.43.22343 libcurl/7.79.1 (OpenSSL/1.1.1n) Schannel zlib/1.2.11 WinIDN libssh2/1.10.0 nghttp2/1.44.0 + 'FME/': (ua) => { + return { name: 'fme', version: ua.slice('FME/'.length, ua.indexOf('.', 9)) }; + }, + + // GDAL WMS driver (http://www.gdal.org/frmt_wms.html) + 'GDAL WMS': () => { + return { name: 'gdal', variant: 'wms' }; + }, + + // MapProxy-1.12.0 + 'MapProxy-': (ua) => { + return { name: 'map-proxy', version: ua.slice('MapProxy-'.length, ua.lastIndexOf('.')) }; + }, + + // JOSM/1.5 (18700 en) Linux Freedesktop.org SDK 22.08 (Flatpak runtime) Java/17.0.6 29147 + // JOSM/1.5 (18700 en) Linux Mint 20.3 Java/17.0.5 + 'JOSM/': (ua) => { + return { name: 'josm', version: ua.slice('JOSM/'.length, ua.indexOf(' ')), ...guessJosmOs(ua) }; + }, +}; diff --git a/packages/lambda-analytic-cloudfront/src/useragent/agents/programming.ts b/packages/lambda-analytic-cloudfront/src/useragent/agents/programming.ts new file mode 100644 index 000000000..d85ce05a8 --- /dev/null +++ b/packages/lambda-analytic-cloudfront/src/useragent/agents/programming.ts @@ -0,0 +1,25 @@ +import { UserAgentParser } from '../parser.types.js'; + +// Programming languages internal http clients +export const Programming: Record = { + 'python-requests/': (ua) => ({ name: 'python', variant: 'requests', version: ua.slice('python-requests/'.length) }), + 'python-urllib/': (ua) => ({ name: 'python', variant: 'urllib', version: ua.slice('python-urllib/'.length) }), + 'python/': (ua) => ({ name: 'python', version: ua.slice('python/'.length, ua.lastIndexOf('.')) }), + 'java/': (ua) => ({ name: 'java', version: ua.slice('java/'.length, ua.lastIndexOf('.')) }), + 'axios/': (ua) => ({ name: 'axios', version: ua.slice('axios/'.length, ua.lastIndexOf('.')).replace('/', '_') }), + 'okhttp/': (ua) => ({ name: 'okhttp', version: ua.slice('okhttp/'.length, ua.lastIndexOf('.')).replace('/', '_') }), + 'Go-http-client/': (ua) => ({ name: 'go', variant: 'http', version: ua.slice('Go-http-client/'.length) }), + 'Dart/': (ua) => ({ name: 'dart', version: ua.split(' ')[0].slice('Dart/'.length) }), + 'Apache-HttpClient/': (ua) => ({ + name: 'apache', + variant: 'http', + version: ua.slice('Apache-HttpClient/'.length, ua.lastIndexOf('.')), + }), + flutter_: () => ({ name: 'flutter' }), +}; + +// Bots +export const Bot: Record = { + 'Googlebot-Image/': (ua) => ({ name: 'bot', variant: 'google', version: ua.split('/').at(1) }), + 'AdsBot-Google': () => ({ name: 'bot', variant: 'google' }), +}; diff --git a/packages/lambda-analytic-cloudfront/src/useragent/parser.ts b/packages/lambda-analytic-cloudfront/src/useragent/parser.ts new file mode 100644 index 000000000..f1d537812 --- /dev/null +++ b/packages/lambda-analytic-cloudfront/src/useragent/parser.ts @@ -0,0 +1,106 @@ +import UA from 'ua-parser-js'; + +import { UserAgentInfo, UserAgentOs, UserAgentParser } from './parser.types.js'; + +const OsMap: Record = { ubuntu: 'linux' }; +const UaParser: ParserConfig = { name: 'ua-parser-js', hit: 0 }; +const Skipped: ParserConfig = { name: 'skipped', hit: 0 }; +interface ParserConfig { + name: string; + hit: number; + create?: UserAgentParser; +} +interface ParserCache { + hit: number; + parser: ParserConfig; + value?: UserAgentInfo; +} + +export class UserAgentParsers { + parsers = new Map(); + cache = new Map(); + + addParser(value: string, create: UserAgentParser): void { + const char = value[0].toLowerCase(); + const parser = this.parsers.get(char) ?? []; + parser.push({ name: value.toLowerCase(), create, hit: 0 }); + this.parsers.set(char, parser); + } + + parse(userAgent: string): UserAgentInfo | undefined { + const existing = this.cache.get(userAgent); + if (existing) { + existing.hit++; + existing.parser.hit++; + return existing.value; + } + const ret = this._parse(userAgent); + if (ret.value) { + if (ret.value.version == null) ret.value.version = 'unknown'; + if (ret.value.variant == null) ret.value.variant = 'unknown'; + if (ret.value.os == null) ret.value.os = 'unknown'; + } + ret.hit++; + ret.parser.hit++; + this.cache.set(userAgent, ret); + return ret.value; + } + + _parse(userAgent: string | undefined): ParserCache { + if (userAgent == null || userAgent === '' || userAgent === '-' || userAgent === 'Mozilla/5.0') { + return { value: { name: 'empty' }, parser: Skipped, hit: 0 }; + } + const parsedName = decodeURI(userAgent); + const lowered = parsedName.toLowerCase(); + + // Is there a customer parser for the user agent string + const parsers = this.parsers.get(lowered[0]) ?? []; + for (const parser of parsers) { + if (lowered.startsWith(parser.name)) return { value: parser.create?.(lowered), parser, hit: 0 }; + } + + // No custom parser attempt to pull the information from a generic user agent parser + const ua = UA(userAgent); + const output: Partial = {}; + + if (ua.os.name) { + output.os = ua.os.name.toLowerCase().replace(/ /g, '') as UserAgentOs; + if (OsMap[output.os]) output.os = OsMap[output.os]; + } + + if (ua.browser.name) { + output.name = ua.browser.name.replace(/ /g, '').toLowerCase(); + if (ua.browser.version) output.version = ua.browser.version.slice(0, ua.browser.version.indexOf('.')); + return { value: output as UserAgentInfo, parser: UaParser, hit: 0 }; + } + + if (ua.os.name === 'Android') { + const slashIndex = parsedName.indexOf('/'); + if (slashIndex > -1) { + output.name = 'android'; + output.variant = lowered.slice(0, slashIndex); + return { value: output as UserAgentInfo, parser: UaParser, hit: 0 }; + } + } + // IOS apps + // Tracks%2520NZ/1 CFNetwork/1335.0.3 Darwin/21.6.0' + // com.spatialnetworks.fulcrum/4.0.1%20iPhone/16.4.1%20hw/iPhone13_2 + if ( + ua.os.name === 'iOS' || + lowered.includes('iphone/') || + lowered.includes('ios/') || + lowered.includes('ipad/') || + lowered.includes('ios simulator/') + ) { + const slashIndex = parsedName.indexOf('/'); + if (slashIndex > -1) { + output.os = output.os ?? 'ios'; + output.name = 'ios'; + output.variant = decodeURIComponent(lowered.slice(0, slashIndex)).replace(/ /g, ''); + return { value: output as UserAgentInfo, parser: UaParser, hit: 0 }; + } + } + + return { value: { name: 'unknown' }, parser: Skipped, hit: 0 }; + } +} diff --git a/packages/lambda-analytic-cloudfront/src/useragent/parser.types.ts b/packages/lambda-analytic-cloudfront/src/useragent/parser.types.ts new file mode 100644 index 000000000..262ac81bc --- /dev/null +++ b/packages/lambda-analytic-cloudfront/src/useragent/parser.types.ts @@ -0,0 +1,14 @@ +export type UserAgentParser = (ua: string) => UserAgentInfo | undefined; +export interface UserAgentInfo { + name: string; + variant?: string; + version?: string; + os?: UserAgentOs; +} + +export type UserAgentOs = 'windows' | 'macos' | 'ios' | 'android' | 'linux' | 'unknown'; +export const ValidOs = new Set(['windows', 'macos', 'ios', 'android', 'linux']); + +export function isValidOs(os: string): os is UserAgentOs { + return ValidOs.has(os); +} diff --git a/packages/lambda-analytic-cloudfront/tsconfig.json b/packages/lambda-analytic-cloudfront/tsconfig.json new file mode 100644 index 000000000..cdf874685 --- /dev/null +++ b/packages/lambda-analytic-cloudfront/tsconfig.json @@ -0,0 +1,10 @@ +{ + "extends": "../../tsconfig.base.json", + + "compilerOptions": { + "outDir": "./build", + "rootDir": "./src" + }, + "include": ["src"], + "references": [{ "path": "../config" }, { "path": "../geo" }, { "path": "../shared" }] +} diff --git a/packages/lambda-analytic-cloudfront/typedoc.json b/packages/lambda-analytic-cloudfront/typedoc.json new file mode 100644 index 000000000..6bf46780c --- /dev/null +++ b/packages/lambda-analytic-cloudfront/typedoc.json @@ -0,0 +1,4 @@ +{ + "entryPoints": ["./src/**/*.ts"], + "exclude": ["./**/__tests__/*.ts"] +} From 517dd7bc361d4c6fb80578afc263a1d5f8aca314 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Fri, 13 Dec 2024 14:49:37 +1300 Subject: [PATCH 04/13] refactor: force strings for api keys --- packages/_infra/src/analytics/edge.analytics.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/_infra/src/analytics/edge.analytics.ts b/packages/_infra/src/analytics/edge.analytics.ts index 31f049be5..e5e338c94 100644 --- a/packages/_infra/src/analytics/edge.analytics.ts +++ b/packages/_infra/src/analytics/edge.analytics.ts @@ -66,8 +66,8 @@ export class EdgeAnalytics extends Stack { [Env.Analytics.CacheBucket]: `s3://${cacheBucket.bucketName}`, [Env.Analytics.CloudFrontSourceBucket]: `s3://${logBucket.bucketName}`, [Env.Analytics.MaxRecords]: String(24 * 7 * 4), - [Env.Analytics.ElasticId]: Env.get(Env.Analytics.ElasticId), - [Env.Analytics.ElasticApiKey]: Env.get(Env.Analytics.ElasticApiKey), + [Env.Analytics.ElasticId]: Env.get(Env.Analytics.ElasticId) ?? '', + [Env.Analytics.ElasticApiKey]: Env.get(Env.Analytics.ElasticApiKey) ?? '', AWS_NODEJS_CONNECTION_REUSE_ENABLED: '1', }, logRetention: RetentionDays.ONE_MONTH, @@ -78,7 +78,7 @@ export class EdgeAnalytics extends Stack { logBucket.grantRead(v2Lambda); // Run this lambda function every hour - new Rule(this, 'AnalyticRule', { schedule: Schedule.rate(Duration.hours(1)) }).addTarget( + new Rule(this, 'AnalyticV2Rule', { schedule: Schedule.rate(Duration.hours(1)) }).addTarget( new LambdaFunction(v2Lambda), ); } From f163ff0fc0c9b1edc726fec3489baa5cc383c287 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Fri, 13 Dec 2024 14:59:14 +1300 Subject: [PATCH 05/13] refactor: validate errors are written into s3 --- .../src/__test__/analytics.test.ts | 11 ++++++----- packages/lambda-analytic-cloudfront/src/elastic.ts | 14 ++++++++++++-- packages/lambda-analytic-cloudfront/src/handler.ts | 2 +- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/packages/lambda-analytic-cloudfront/src/__test__/analytics.test.ts b/packages/lambda-analytic-cloudfront/src/__test__/analytics.test.ts index 2ca487cc6..29e960974 100644 --- a/packages/lambda-analytic-cloudfront/src/__test__/analytics.test.ts +++ b/packages/lambda-analytic-cloudfront/src/__test__/analytics.test.ts @@ -22,7 +22,8 @@ describe('analytic lambda', () => { fsa.register('mem://', memory); memory.files.clear(); - Elastic.indexDelay = 1; + Elastic.indexDelay = 1; // do not wait between requests + Elastic.minRequestCount = 1; // index everything Elastic._client = undefined; }); @@ -111,9 +112,9 @@ describe('analytic lambda', () => { const files = [...memory.files.keys()]; assert.equal(files.length, 2); // two files one input one output - assert.ok( - files[1].startsWith(`mem://cache/errors-${new Date().toISOString().slice(0, 12)}`), - // ${shortDate.slice(0, 4)}/${shortDate.slice(5, 7)}/${shortDate}.ndjson.gz`, - ); + assert.ok(files[1].startsWith(`mem://cache/errors-${new Date().toISOString().slice(0, 12)}`)); + + const data = await fsa.read(new URL(files[1])); + assert.ok(data.toString().includes(JSON.stringify('Hello'))); }); }); diff --git a/packages/lambda-analytic-cloudfront/src/elastic.ts b/packages/lambda-analytic-cloudfront/src/elastic.ts index a3f287c8b..5da2877e4 100644 --- a/packages/lambda-analytic-cloudfront/src/elastic.ts +++ b/packages/lambda-analytic-cloudfront/src/elastic.ts @@ -8,6 +8,15 @@ export class ElasticClient { /** Between index requests delay this amount */ indexDelay: number = 200; + /** + * Do not index analytics for buckets that contain less than this number of total requests + * + * @example + * `1` - drop all requests where total requests <= 1 + * + */ + minRequestCount: number = 1; + get client(): Client { if (this._client != null) return this._client; @@ -44,7 +53,7 @@ export class ElasticClient { const ret = await client.bulk({ operations }); if (ret.errors) { - errors.push(ret); + errors.push({ prefix, errors: ret.errors }); throw new Error('Failed to index: ' + prefix); } // Give it a little bit of time to index @@ -53,7 +62,8 @@ export class ElasticClient { } for (const rec of combined) { - if (rec.total < 1) { + // skip over roll ups that are less than + if (rec.total <= this.minRequestCount) { skipHits++; continue; } diff --git a/packages/lambda-analytic-cloudfront/src/handler.ts b/packages/lambda-analytic-cloudfront/src/handler.ts index aec59e716..e67edcfa7 100644 --- a/packages/lambda-analytic-cloudfront/src/handler.ts +++ b/packages/lambda-analytic-cloudfront/src/handler.ts @@ -138,7 +138,7 @@ export async function handler(): Promise { if (Elastic.errors.length > 0) { const errorLocation = new URL(`./errors-${new Date().toISOString()}.json`, CacheLocation); logger.fatal({ errorLocation: errorLocation.href }, 'log:index:failed'); - await fsa.write(errorLocation, JSON.stringify(rets)); + await fsa.write(errorLocation, JSON.stringify(Elastic.errors)); } let failed = false; From 8ba3e75f57e57b3fda6e376cbbac900fb282c379 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Mon, 16 Dec 2024 13:13:30 +1300 Subject: [PATCH 06/13] refactor: add in index.ts for default handler --- .../src/__test__/analytics.test.ts | 6 ++--- .../lambda-analytic-cloudfront/src/handler.ts | 24 +++++++++---------- .../lambda-analytic-cloudfront/src/index.ts | 6 +++++ 3 files changed, 21 insertions(+), 15 deletions(-) create mode 100644 packages/lambda-analytic-cloudfront/src/index.ts diff --git a/packages/lambda-analytic-cloudfront/src/__test__/analytics.test.ts b/packages/lambda-analytic-cloudfront/src/__test__/analytics.test.ts index 29e960974..5c2e62a04 100644 --- a/packages/lambda-analytic-cloudfront/src/__test__/analytics.test.ts +++ b/packages/lambda-analytic-cloudfront/src/__test__/analytics.test.ts @@ -7,7 +7,7 @@ import { Client } from '@elastic/elasticsearch'; import { getYesterday } from '../date.js'; import { Elastic } from '../elastic.js'; -import { handler } from '../handler.js'; +import { main } from '../handler.js'; import { LogStats } from '../log.stats.js'; import { LogData } from './log.data.js'; @@ -59,7 +59,7 @@ describe('analytic lambda', () => { await fsa.write(new URL(`mem://source/cfid.${shortDate}/data.txt.gz`), gzipSync(LogData)); - await handler(); + await main(); // One call to insert assert.equal(operations.length, 1); @@ -105,7 +105,7 @@ describe('analytic lambda', () => { await fsa.write(new URL(`mem://source/cfid.${shortDate}/data.txt.gz`), gzipSync(LogData)); - const ret = await handler().catch((e: Error) => e); + const ret = await main().catch((e: Error) => e); assert.equal(String(ret), 'Error: Failed to index'); diff --git a/packages/lambda-analytic-cloudfront/src/handler.ts b/packages/lambda-analytic-cloudfront/src/handler.ts index e67edcfa7..299aabc01 100644 --- a/packages/lambda-analytic-cloudfront/src/handler.ts +++ b/packages/lambda-analytic-cloudfront/src/handler.ts @@ -2,6 +2,7 @@ import { promisify } from 'node:util'; import { gzip } from 'node:zlib'; import { Env, fsa, LogConfig } from '@basemaps/shared'; +import { LambdaRequest } from '@linzjs/lambda'; import pLimit from 'p-limit'; import { basename } from 'path'; @@ -31,15 +32,14 @@ function getEnvUrl(env: string): URL { } } -export async function handler(): Promise { +export async function main(req: LambdaRequest): Promise { const SourceLocation = getEnvUrl(Env.Analytics.CloudFrontSourceBucket); const CacheLocation = getEnvUrl(Env.Analytics.CacheBucket); const CloudFrontId = Env.get(Env.Analytics.CloudFrontId); const MaxToProcess = Env.getNumber(Env.Analytics.MaxRecords, 24 * 7 * 4); // Process 4 weeks of logs by default - const logger = LogConfig.get(); - logger.info( + req.log.info( { source: SourceLocation.href, cacheLocation: CacheLocation.href, cloudFrontId: CloudFrontId }, 'log:index:start', ); @@ -70,19 +70,19 @@ export async function handler(): Promise { const promise = hourQ(async () => { // Cache file exists skip processing if (await fsa.exists(cacheUrl)) { - logger.debug({ prefix }, 'log:prefix:skip'); + req.log.debug({ prefix }, 'log:prefix:skip'); return; } const startTime = performance.now(); - logger.trace({ prefix }, 'log:prefix:start'); + req.log.trace({ prefix }, 'log:prefix:start'); const logPrefix = new URL(`${CloudFrontId}.${prefix}`, SourceLocation); const stats = new Map(); const logFiles = await fsa.toArray(fsa.list(logPrefix)); if (logFiles.length === 0) { - logger.info({ prefix }, 'log:prefix:no-files'); + req.log.info({ prefix }, 'log:prefix:no-files'); return; } @@ -93,7 +93,7 @@ export async function handler(): Promise { const fileStartTime = performance.now(); const fileLines = await FileProcess.process(lf, stats); - logger.trace( + req.log.trace( { prefix: prefix, file: basename(lf.pathname), @@ -113,11 +113,11 @@ export async function handler(): Promise { // Extrac thte values const allStats = [...stats.values()]; - await Elastic.insert(prefix, allStats, logger); + await Elastic.insert(prefix, allStats, req.log); // Ensure everything is indexed into elasticsearch before writing the cache to disk await fsa.write(cacheUrl, await gzipPromise(JSON.stringify(allStats))); - logger.info( + req.log.info( { prefix: prefix, files: logFiles.length, @@ -134,17 +134,17 @@ export async function handler(): Promise { const rets = await Promise.allSettled(todo); - // If anythign fails to index write the errors out to a log file at the cache location + // If anything fails to index write the errors out to a log file at the cache location if (Elastic.errors.length > 0) { const errorLocation = new URL(`./errors-${new Date().toISOString()}.json`, CacheLocation); - logger.fatal({ errorLocation: errorLocation.href }, 'log:index:failed'); + req.log.fatal({ errorLocation: errorLocation.href }, 'log:index:failed'); await fsa.write(errorLocation, JSON.stringify(Elastic.errors)); } let failed = false; for (const ret of rets) { if (ret.status !== 'rejected') continue; - logger.fatal({ err: ret.reason }, 'log:index:failed'); + req.log.fatal({ err: ret.reason }, 'log:index:failed'); failed = true; } if (failed) throw new Error('Failed to index'); diff --git a/packages/lambda-analytic-cloudfront/src/index.ts b/packages/lambda-analytic-cloudfront/src/index.ts new file mode 100644 index 000000000..8fe889cc7 --- /dev/null +++ b/packages/lambda-analytic-cloudfront/src/index.ts @@ -0,0 +1,6 @@ +import { LogConfig } from '@basemaps/shared'; +import { lf } from '@linzjs/lambda'; + +import { main } from './handler.js'; + +export const handler = lf.handler(main, { tracePercent: 0, rejectOnError: true }, LogConfig.get()); From bce84b86c1d8adee4911b03b2115167e0ee45ba9 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Mon, 16 Dec 2024 13:23:34 +1300 Subject: [PATCH 07/13] refactor: fixup tests --- .../src/__test__/analytics.test.ts | 25 ++++++++++++++----- .../lambda-analytic-cloudfront/src/handler.ts | 2 +- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/packages/lambda-analytic-cloudfront/src/__test__/analytics.test.ts b/packages/lambda-analytic-cloudfront/src/__test__/analytics.test.ts index 5c2e62a04..f31da1ba8 100644 --- a/packages/lambda-analytic-cloudfront/src/__test__/analytics.test.ts +++ b/packages/lambda-analytic-cloudfront/src/__test__/analytics.test.ts @@ -1,9 +1,11 @@ import assert from 'node:assert'; -import { beforeEach, describe, it, TestContext } from 'node:test'; +import { afterEach, beforeEach, describe, it, TestContext } from 'node:test'; import { gzipSync } from 'node:zlib'; -import { Env, fsa, FsMemory } from '@basemaps/shared'; +import { Env, fsa, FsMemory, LogConfig } from '@basemaps/shared'; import { Client } from '@elastic/elasticsearch'; +import { LambdaRequest } from '@linzjs/lambda'; +import { Context } from 'aws-lambda'; import { getYesterday } from '../date.js'; import { Elastic } from '../elastic.js'; @@ -16,6 +18,12 @@ interface IndexOperation { } type BulkOperation = (IndexOperation | LogStats)[]; +export class FakeLambdaRequest extends LambdaRequest { + constructor() { + super({}, {} as Context, LogConfig.get()); + } +} + describe('analytic lambda', () => { const memory = new FsMemory(); beforeEach(() => { @@ -23,8 +31,13 @@ describe('analytic lambda', () => { memory.files.clear(); Elastic.indexDelay = 1; // do not wait between requests - Elastic.minRequestCount = 1; // index everything + Elastic.minRequestCount = 0; // index everything Elastic._client = undefined; + LogConfig.get().level = 'silent'; + }); + + afterEach(() => { + LogConfig.get().level = 'info'; }); function setupEnv(t: TestContext): void { @@ -37,7 +50,7 @@ describe('analytic lambda', () => { case Env.Analytics.CloudFrontId: return 'cfid'; case Env.Analytics.MaxRecords: - return '30'; + return '1'; } throw new Error(`Invalid test process.env access ${key}`); }); @@ -59,7 +72,7 @@ describe('analytic lambda', () => { await fsa.write(new URL(`mem://source/cfid.${shortDate}/data.txt.gz`), gzipSync(LogData)); - await main(); + await main(new FakeLambdaRequest()); // One call to insert assert.equal(operations.length, 1); @@ -105,7 +118,7 @@ describe('analytic lambda', () => { await fsa.write(new URL(`mem://source/cfid.${shortDate}/data.txt.gz`), gzipSync(LogData)); - const ret = await main().catch((e: Error) => e); + const ret = await main(new FakeLambdaRequest()).catch((e: Error) => e); assert.equal(String(ret), 'Error: Failed to index'); diff --git a/packages/lambda-analytic-cloudfront/src/handler.ts b/packages/lambda-analytic-cloudfront/src/handler.ts index 299aabc01..b50c72f62 100644 --- a/packages/lambda-analytic-cloudfront/src/handler.ts +++ b/packages/lambda-analytic-cloudfront/src/handler.ts @@ -1,7 +1,7 @@ import { promisify } from 'node:util'; import { gzip } from 'node:zlib'; -import { Env, fsa, LogConfig } from '@basemaps/shared'; +import { Env, fsa } from '@basemaps/shared'; import { LambdaRequest } from '@linzjs/lambda'; import pLimit from 'p-limit'; import { basename } from 'path'; From 3d914a8c1a7aae6003e61e3c7e93c1faee433bd9 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Mon, 16 Dec 2024 13:25:07 +1300 Subject: [PATCH 08/13] refactor: fixup deps --- package-lock.json | 45 ++++--------------- .../lambda-analytic-cloudfront/package.json | 3 +- 2 files changed, 11 insertions(+), 37 deletions(-) diff --git a/package-lock.json b/package-lock.json index 2fd3018d3..e56c3b1dd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1622,7 +1622,6 @@ "version": "8.16.2", "resolved": "https://registry.npmjs.org/@elastic/elasticsearch/-/elasticsearch-8.16.2.tgz", "integrity": "sha512-2ivc6uS97fbEeW4tNtg5mvh/Jy82ZLfcwQ1HhNhdYxyapNnQxIgZ83Zd8Ir+5jCPMDWKSYgwDb8t4GAINDDv2w==", - "dev": true, "dependencies": { "@elastic/transport": "^8.9.1", "apache-arrow": "^18.0.0", @@ -1636,7 +1635,6 @@ "version": "8.9.1", "resolved": "https://registry.npmjs.org/@elastic/transport/-/transport-8.9.1.tgz", "integrity": "sha512-jasKNQeOb1vNf9aEYg+8zXmetaFjApDTSCC4QTl6aTixvyiRiSLcCiB8P6Q0lY9JIII/BhqNl8WbpFnsKitntw==", - "dev": true, "dependencies": { "@opentelemetry/api": "1.x", "debug": "^4.3.4", @@ -1653,8 +1651,7 @@ "node_modules/@elastic/transport/node_modules/ms": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", - "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", - "dev": true + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" }, "node_modules/@emnapi/runtime": { "version": "0.44.0", @@ -3999,7 +3996,6 @@ "version": "1.9.0", "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz", "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==", - "dev": true, "engines": { "node": ">=8.0.0" } @@ -5239,7 +5235,6 @@ "version": "0.5.15", "resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.15.tgz", "integrity": "sha512-JQ5TuMi45Owi4/BIMAJBoSQoOJu12oOk/gADqlcUL9JEdHB8vyjUSsxqeNXnmXHjYKMi2WcYtezGEEhqUI/E2g==", - "dev": true, "dependencies": { "tslib": "^2.8.0" } @@ -5335,14 +5330,12 @@ "node_modules/@types/command-line-args": { "version": "5.2.3", "resolved": "https://registry.npmjs.org/@types/command-line-args/-/command-line-args-5.2.3.tgz", - "integrity": "sha512-uv0aG6R0Y8WHZLTamZwtfsDLVRnOa+n+n5rEvFWL5Na5gZ8V2Teab/duDPFzIIIhs9qizDpcavCusCLJZu62Kw==", - "dev": true + "integrity": "sha512-uv0aG6R0Y8WHZLTamZwtfsDLVRnOa+n+n5rEvFWL5Na5gZ8V2Teab/duDPFzIIIhs9qizDpcavCusCLJZu62Kw==" }, "node_modules/@types/command-line-usage": { "version": "5.0.4", "resolved": "https://registry.npmjs.org/@types/command-line-usage/-/command-line-usage-5.0.4.tgz", - "integrity": "sha512-BwR5KP3Es/CSht0xqBcUXS3qCAUVXwpRKsV2+arxeb65atasuXG9LykC9Ab10Cw3s2raH92ZqOeILaQbsB2ACg==", - "dev": true + "integrity": "sha512-BwR5KP3Es/CSht0xqBcUXS3qCAUVXwpRKsV2+arxeb65atasuXG9LykC9Ab10Cw3s2raH92ZqOeILaQbsB2ACg==" }, "node_modules/@types/geojson": { "version": "7946.0.14", @@ -5385,7 +5378,6 @@ "version": "20.14.8", "resolved": "https://registry.npmjs.org/@types/node/-/node-20.14.8.tgz", "integrity": "sha512-DO+2/jZinXfROG7j7WKFn/3C6nFwxy2lLpgLjEXJz+0XKphZlTLJ14mo8Vfg8X5BWN6XjyESXq+LcYdT7tR3bA==", - "dev": true, "dependencies": { "undici-types": "~5.26.4" } @@ -5960,7 +5952,6 @@ "version": "18.1.0", "resolved": "https://registry.npmjs.org/apache-arrow/-/apache-arrow-18.1.0.tgz", "integrity": "sha512-v/ShMp57iBnBp4lDgV8Jx3d3Q5/Hac25FWmQ98eMahUiHPXcvwIMKJD0hBIgclm/FCG+LwPkAKtkRO1O/W0YGg==", - "dev": true, "dependencies": { "@swc/helpers": "^0.5.11", "@types/command-line-args": "^5.2.3", @@ -6014,7 +6005,6 @@ "version": "3.1.0", "resolved": "https://registry.npmjs.org/array-back/-/array-back-3.1.0.tgz", "integrity": "sha512-TkuxA4UCOvxuDK6NZYXCalszEzj+TLszyASooky+i742l9TqsOdYCMJJupxRic61hwquNtppB3hgcuq9SVSH1Q==", - "dev": true, "engines": { "node": ">=6" } @@ -7099,7 +7089,6 @@ "version": "0.4.0", "resolved": "https://registry.npmjs.org/chalk-template/-/chalk-template-0.4.0.tgz", "integrity": "sha512-/ghrgmhfY8RaSdeo43hNXxpoHAtxdbskUHjPpfqUWGttFgycUhYPGx3YZBCnUCvOa7Doivn1IZec3DEGFoMgLg==", - "dev": true, "dependencies": { "chalk": "^4.1.2" }, @@ -7114,7 +7103,6 @@ "version": "4.1.2", "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz", "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==", - "dev": true, "dependencies": { "ansi-styles": "^4.1.0", "supports-color": "^7.1.0" @@ -7409,7 +7397,6 @@ "version": "5.2.1", "resolved": "https://registry.npmjs.org/command-line-args/-/command-line-args-5.2.1.tgz", "integrity": "sha512-H4UfQhZyakIjC74I9d34fGYDwk3XpSr17QhEd0Q3I9Xq1CETHo4Hcuo87WyWHpAF1aSLjLRf5lD9ZGX2qStUvg==", - "dev": true, "dependencies": { "array-back": "^3.1.0", "find-replace": "^3.0.0", @@ -7424,7 +7411,6 @@ "version": "7.0.3", "resolved": "https://registry.npmjs.org/command-line-usage/-/command-line-usage-7.0.3.tgz", "integrity": "sha512-PqMLy5+YGwhMh1wS04mVG44oqDsgyLRSKJBdOo1bnYhMKBW65gZF1dRp2OZRhiTjgUHljy99qkO7bsctLaw35Q==", - "dev": true, "dependencies": { "array-back": "^6.2.2", "chalk-template": "^0.4.0", @@ -7439,7 +7425,6 @@ "version": "6.2.2", "resolved": "https://registry.npmjs.org/array-back/-/array-back-6.2.2.tgz", "integrity": "sha512-gUAZ7HPyb4SJczXAMUXMGAvI976JoK3qEx9v1FTmeYuJj0IBiaKttG1ydtGKdkfqWkIkouke7nG8ufGy77+Cvw==", - "dev": true, "engines": { "node": ">=12.17" } @@ -7448,7 +7433,6 @@ "version": "7.3.0", "resolved": "https://registry.npmjs.org/typical/-/typical-7.3.0.tgz", "integrity": "sha512-ya4mg/30vm+DOWfBg4YK3j2WD6TWtRkCbasOJr40CseYENzCUby/7rIvXA99JGsQHeNxLbnXdyLLxKSv3tauFw==", - "dev": true, "engines": { "node": ">=12.17" } @@ -9926,7 +9910,6 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/find-replace/-/find-replace-3.0.0.tgz", "integrity": "sha512-6Tb2myMioCAgv5kfvP5/PkZZ/ntTpVK39fHY7WkWBgvbeE+VHd/tZuZ4mrC+bxh4cfOZeYKVPaJIZtZXV7GNCQ==", - "dev": true, "dependencies": { "array-back": "^3.0.1" }, @@ -10011,8 +9994,7 @@ "node_modules/flatbuffers": { "version": "24.3.25", "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-24.3.25.tgz", - "integrity": "sha512-3HDgPbgiwWMI9zVB7VYBHaMrbOO7Gm0v+yD2FV/sCKj+9NDeVL7BOBYUuhWAQGKWOzBo8S9WdMvV0eixO233XQ==", - "dev": true + "integrity": "sha512-3HDgPbgiwWMI9zVB7VYBHaMrbOO7Gm0v+yD2FV/sCKj+9NDeVL7BOBYUuhWAQGKWOzBo8S9WdMvV0eixO233XQ==" }, "node_modules/flatted": { "version": "3.3.1", @@ -11266,7 +11248,6 @@ "version": "1.2.0", "resolved": "https://registry.npmjs.org/hpagent/-/hpagent-1.2.0.tgz", "integrity": "sha512-A91dYTeIB6NoXG+PxTQpCCDDnfHsW9kc06Lvpu1TEe9gnd6ZFeiBoRO9JvzEv6xK7EX97/dUE8g/vBMTqTS3CA==", - "dev": true, "engines": { "node": ">=14" } @@ -12344,7 +12325,6 @@ "version": "0.0.3", "resolved": "https://registry.npmjs.org/json-bignum/-/json-bignum-0.0.3.tgz", "integrity": "sha512-2WHyXj3OfHSgNyuzDbSxI1w2jgw5gkWSWhS7Qg4bWXx1nLk3jnbwfUeS0PSba3IzpTUWdHxBieELUzXRjQB2zg==", - "dev": true, "engines": { "node": ">=0.8" } @@ -13205,8 +13185,7 @@ "node_modules/lodash.camelcase": { "version": "4.3.0", "resolved": "https://registry.npmjs.org/lodash.camelcase/-/lodash.camelcase-4.3.0.tgz", - "integrity": "sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==", - "dev": true + "integrity": "sha512-TwuEnCnxbc3rAvhf/LbG7tJUDzhqXyFnv3dtzLOPgCG/hODL7WFnsbwktkD7yUV0RrreP/l1PALq/YSg6VvjlA==" }, "node_modules/lodash.get": { "version": "4.4.2", @@ -17036,8 +17015,7 @@ "node_modules/secure-json-parse": { "version": "2.7.0", "resolved": "https://registry.npmjs.org/secure-json-parse/-/secure-json-parse-2.7.0.tgz", - "integrity": "sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw==", - "dev": true + "integrity": "sha512-6aU+Rwsezw7VR8/nyvKTx8QpWH9FrcYiXXlqC4z5d5XQBDRqtbfsRjnwGyqbi3gddNtWHuEk9OANUotL26qKUw==" }, "node_modules/semver": { "version": "7.5.3", @@ -18078,7 +18056,6 @@ "version": "4.1.1", "resolved": "https://registry.npmjs.org/table-layout/-/table-layout-4.1.1.tgz", "integrity": "sha512-iK5/YhZxq5GO5z8wb0bY1317uDF3Zjpha0QFFLA8/trAoiLbQD0HUbMesEaxyzUgDxi2QlcbM8IvqOlEjgoXBA==", - "dev": true, "dependencies": { "array-back": "^6.2.2", "wordwrapjs": "^5.1.0" @@ -18091,7 +18068,6 @@ "version": "6.2.2", "resolved": "https://registry.npmjs.org/array-back/-/array-back-6.2.2.tgz", "integrity": "sha512-gUAZ7HPyb4SJczXAMUXMGAvI976JoK3qEx9v1FTmeYuJj0IBiaKttG1ydtGKdkfqWkIkouke7nG8ufGy77+Cvw==", - "dev": true, "engines": { "node": ">=12.17" } @@ -18743,7 +18719,6 @@ "version": "4.0.0", "resolved": "https://registry.npmjs.org/typical/-/typical-4.0.0.tgz", "integrity": "sha512-VAH4IvQ7BDFYglMd7BPRDfLgxZZX4O4TFcRDA6EN5X7erNJJq+McIEp8np9aVtxrCJ6qx4GTYVfOWNjcqwZgRw==", - "dev": true, "engines": { "node": ">=8" } @@ -18818,7 +18793,6 @@ "version": "6.21.0", "resolved": "https://registry.npmjs.org/undici/-/undici-6.21.0.tgz", "integrity": "sha512-BUgJXc752Kou3oOIuU1i+yZZypyZRqNPW0vqoMPl8VaoalSfeR0D8/t4iAS3yirs79SSMTxTag+ZC86uswv+Cw==", - "dev": true, "engines": { "node": ">=18.17" } @@ -18826,8 +18800,7 @@ "node_modules/undici-types": { "version": "5.26.5", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", - "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", - "dev": true + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==" }, "node_modules/unique-filename": { "version": "2.0.1", @@ -19153,7 +19126,6 @@ "version": "5.1.0", "resolved": "https://registry.npmjs.org/wordwrapjs/-/wordwrapjs-5.1.0.tgz", "integrity": "sha512-JNjcULU2e4KJwUNv6CHgI46UvDGitb6dGryHajXTDiLgg1/RiGoPSDw4kZfYnwGtEXf2ZMeIewDQgFGzkCB2Sg==", - "dev": true, "engines": { "node": ">=12.17" } @@ -19603,10 +19575,11 @@ "@basemaps/config": "^7.11.0", "@basemaps/geo": "^7.11.0", "@basemaps/shared": "^7.11.0", + "@elastic/elasticsearch": "^8.16.2", + "@linzjs/lambda": "^4.0.0", "ua-parser-js": "^1.0.39" }, "devDependencies": { - "@elastic/elasticsearch": "^8.16.2", "@types/ua-parser-js": "^0.7.36" }, "engines": { diff --git a/packages/lambda-analytic-cloudfront/package.json b/packages/lambda-analytic-cloudfront/package.json index fdecef43f..6d9998240 100644 --- a/packages/lambda-analytic-cloudfront/package.json +++ b/packages/lambda-analytic-cloudfront/package.json @@ -21,6 +21,8 @@ "@basemaps/config": "^7.11.0", "@basemaps/geo": "^7.11.0", "@basemaps/shared": "^7.11.0", + "@elastic/elasticsearch": "^8.16.2", + "@linzjs/lambda": "^4.0.0", "ua-parser-js": "^1.0.39" }, "scripts": { @@ -28,7 +30,6 @@ "bundle": "../../scripts/bundle.mjs package.json" }, "devDependencies": { - "@elastic/elasticsearch": "^8.16.2", "@types/ua-parser-js": "^0.7.36" }, "bundle": { From 754021f12cbd40901544c9dc33669f6c6b0ccc9a Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Mon, 16 Dec 2024 13:45:55 +1300 Subject: [PATCH 09/13] refactor: fixup key hiding to be consistent --- packages/lambda-analytic-cloudfront/src/log.reader.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/lambda-analytic-cloudfront/src/log.reader.ts b/packages/lambda-analytic-cloudfront/src/log.reader.ts index c3a9a8dc6..521804875 100644 --- a/packages/lambda-analytic-cloudfront/src/log.reader.ts +++ b/packages/lambda-analytic-cloudfront/src/log.reader.ts @@ -52,8 +52,8 @@ const IsoDateHour = 13; // 2023-06-12:T01 * Hide the full API key from the log analytics */ function hideApiKey(str: string): string { - if (str.startsWith('d')) return 'd..' + str.slice(str.length - 6); - if (str.startsWith('c')) return 'c..' + str.slice(str.length - 6); + if (str.startsWith('d')) return 'd' + str.slice(str.length - 6); + if (str.startsWith('c')) return 'c' + str.slice(str.length - 6); return str; } From 1f315ca835d6e94662eba3a6cbbc78f63066aa27 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Mon, 16 Dec 2024 13:51:17 +1300 Subject: [PATCH 10/13] refactor: loop backwards through hours --- packages/lambda-analytic-cloudfront/src/handler.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/lambda-analytic-cloudfront/src/handler.ts b/packages/lambda-analytic-cloudfront/src/handler.ts index b50c72f62..59191aa41 100644 --- a/packages/lambda-analytic-cloudfront/src/handler.ts +++ b/packages/lambda-analytic-cloudfront/src/handler.ts @@ -55,7 +55,7 @@ export async function main(req: LambdaRequest): Promise { if (processedCount > MaxToProcess) break; const todo = []; - for (let hour = 0; hour < 24; hour++) { + for (let hour = 23; hour >= 0; hour--) { processedCount++; if (processedCount > MaxToProcess) break; From a0ac5a3705459505c608b8d2b4b0334e89b02348 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Mon, 16 Dec 2024 13:54:34 +1300 Subject: [PATCH 11/13] refactor: add util to run lambda locally --- packages/lambda-analytic-cloudfront/src/bin.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 packages/lambda-analytic-cloudfront/src/bin.ts diff --git a/packages/lambda-analytic-cloudfront/src/bin.ts b/packages/lambda-analytic-cloudfront/src/bin.ts new file mode 100644 index 000000000..88d358e35 --- /dev/null +++ b/packages/lambda-analytic-cloudfront/src/bin.ts @@ -0,0 +1,10 @@ +import { LogConfig } from '@basemaps/shared'; +import { LambdaRequest } from '@linzjs/lambda'; +import { Context } from 'aws-lambda'; + +import { main } from './handler.js'; + +/** + * Manually run the lambda function, this can be helpful for debugging the analytic roll up process + */ +main(new LambdaRequest(null, {} as Context, LogConfig.get())).catch((e) => console.error(e)); From 9059a86969ec520337af7e492a1f93cc239d776f Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Mon, 16 Dec 2024 13:59:09 +1300 Subject: [PATCH 12/13] feat: allow configuring the elastic index name --- packages/_infra/src/analytics/edge.analytics.ts | 3 +++ packages/_infra/src/config.ts | 5 +++++ packages/lambda-analytic-cloudfront/src/elastic.ts | 9 ++++++++- packages/shared/src/const.ts | 2 ++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/packages/_infra/src/analytics/edge.analytics.ts b/packages/_infra/src/analytics/edge.analytics.ts index e5e338c94..977ba7182 100644 --- a/packages/_infra/src/analytics/edge.analytics.ts +++ b/packages/_infra/src/analytics/edge.analytics.ts @@ -7,6 +7,8 @@ import { RetentionDays } from 'aws-cdk-lib/aws-logs'; import { BlockPublicAccess, Bucket } from 'aws-cdk-lib/aws-s3'; import { Construct } from 'constructs'; +import { getConfig } from '../config.js'; + const CodePath = '../lambda-analytics/dist'; const CodePathV2 = '../lambda-analytics-cloudfront/dist'; @@ -68,6 +70,7 @@ export class EdgeAnalytics extends Stack { [Env.Analytics.MaxRecords]: String(24 * 7 * 4), [Env.Analytics.ElasticId]: Env.get(Env.Analytics.ElasticId) ?? '', [Env.Analytics.ElasticApiKey]: Env.get(Env.Analytics.ElasticApiKey) ?? '', + [Env.Analytics.ElasticIndexName]: getConfig().ElasticHistoryIndexName, AWS_NODEJS_CONNECTION_REUSE_ENABLED: '1', }, logRetention: RetentionDays.ONE_MONTH, diff --git a/packages/_infra/src/config.ts b/packages/_infra/src/config.ts index c82ab41a6..91662fb55 100644 --- a/packages/_infra/src/config.ts +++ b/packages/_infra/src/config.ts @@ -12,6 +12,9 @@ export interface BaseMapsConfig { /** AWS role config bucket */ AwsRoleConfigBucket: string; + + /** Elastic Index to use for basemaps history data */ + ElasticHistoryIndexName: string; } export const BaseMapsProdConfig: BaseMapsConfig = { @@ -26,6 +29,7 @@ export const BaseMapsProdConfig: BaseMapsConfig = { CloudFrontDns: ['basemaps.linz.govt.nz', 'tiles.basemaps.linz.govt.nz'], PublicUrlBase: 'https://basemaps.linz.govt.nz', AwsRoleConfigBucket: 'linz-bucket-config', + ElasticHistoryIndexName: 'basemaps-history', }; export const BaseMapsDevConfig: BaseMapsConfig = { @@ -33,6 +37,7 @@ export const BaseMapsDevConfig: BaseMapsConfig = { CloudFrontDns: ['dev.basemaps.linz.govt.nz', 'tiles.dev.basemaps.linz.govt.nz'], PublicUrlBase: 'https://dev.basemaps.linz.govt.nz', AwsRoleConfigBucket: 'linz-bucket-config', + ElasticHistoryIndexName: 'nonprod-basemaps-history', }; /** Is this deployment intended for production */ export const IsProduction = process.env['NODE_ENV'] === 'production'; diff --git a/packages/lambda-analytic-cloudfront/src/elastic.ts b/packages/lambda-analytic-cloudfront/src/elastic.ts index 5da2877e4..5f877519d 100644 --- a/packages/lambda-analytic-cloudfront/src/elastic.ts +++ b/packages/lambda-analytic-cloudfront/src/elastic.ts @@ -17,6 +17,12 @@ export class ElasticClient { */ minRequestCount: number = 1; + get indexName(): string { + const indexName = Env.get(Env.Analytics.ElasticIndexName); + if (indexName == null) throw new Error(`$${Env.Analytics.ElasticIndexName} is unset`); + return indexName; + } + get client(): Client { if (this._client != null) return this._client; @@ -46,6 +52,7 @@ export class ElasticClient { const errors = this.errors; const indexDelay = this.indexDelay; + const indexName = this.indexName; async function doInsert(): Promise { inserts += operations.length / 2; @@ -67,7 +74,7 @@ export class ElasticClient { skipHits++; continue; } - operations.push({ index: { _index: 'basemaps-history-' + rec['@timestamp'].slice(0, 4), _id: rec.id } }, rec); + operations.push({ index: { _index: indexName + '-' + rec['@timestamp'].slice(0, 4), _id: rec.id } }, rec); if (operations.length > 50_000) await doInsert(); } diff --git a/packages/shared/src/const.ts b/packages/shared/src/const.ts index 0ad4d4942..1f3f35a9d 100644 --- a/packages/shared/src/const.ts +++ b/packages/shared/src/const.ts @@ -64,6 +64,8 @@ export const Env = { ElasticId: 'ELASTIC_ID', /** ElasticSearch's API key */ ElasticApiKey: 'ELASTIC_API_KEY', + /** Index to use for storing analytic data */ + ElasticIndexName: 'ELASTIC_INDEX_NAME', } as const, /** Load a environment var defaulting to defaultOutput if it does not exist */ From 746b4ee13f38ea60de4c863d360ff21f5fffa740 Mon Sep 17 00:00:00 2001 From: Blayne Chard Date: Mon, 16 Dec 2024 14:13:14 +1300 Subject: [PATCH 13/13] fix: prevent processing the current hour --- packages/lambda-analytic-cloudfront/src/handler.ts | 8 ++++++-- packages/lambda-analytic-cloudfront/src/log.reader.ts | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/packages/lambda-analytic-cloudfront/src/handler.ts b/packages/lambda-analytic-cloudfront/src/handler.ts index 59191aa41..4935f2221 100644 --- a/packages/lambda-analytic-cloudfront/src/handler.ts +++ b/packages/lambda-analytic-cloudfront/src/handler.ts @@ -8,7 +8,7 @@ import { basename } from 'path'; import { byDay, getYesterday } from './date.js'; import { Elastic } from './elastic.js'; -import { FileProcess } from './log.reader.js'; +import { FileProcess, toFullDate } from './log.reader.js'; import { LogStats } from './log.stats.js'; const gzipPromise = promisify(gzip); @@ -58,10 +58,14 @@ export async function main(req: LambdaRequest): Promise { for (let hour = 23; hour >= 0; hour--) { processedCount++; if (processedCount > MaxToProcess) break; - const hourOfDay = String(hour).padStart(2, '0'); const prefix = `${prefixByDay}-${hourOfDay}`; + const targetDate = new Date(toFullDate(prefixByDay + 'T' + hourOfDay)); + const dateDiff = Date.now() - targetDate.getTime(); + // Do not process anything within a hour of the current time as some logs take a while to propagate into the bucket + if (dateDiff < 60 * 60 * 1000) continue; + // Create a folder structure of /YYYY/MM/ const cacheFolderParts = prefix.slice(0, 7).replace('-', '/'); diff --git a/packages/lambda-analytic-cloudfront/src/log.reader.ts b/packages/lambda-analytic-cloudfront/src/log.reader.ts index 521804875..bc0e82c69 100644 --- a/packages/lambda-analytic-cloudfront/src/log.reader.ts +++ b/packages/lambda-analytic-cloudfront/src/log.reader.ts @@ -58,7 +58,7 @@ function hideApiKey(str: string): string { } const empty: Record = { webp: 0, png: 0, jpeg: 0 }; -function toFullDate(x: string): string { +export function toFullDate(x: string): string { if (x.length === IsoDateMonth) return `${x}-01T00:00:00.000Z`; if (x.length === IsoDateDay) return `${x}T00:00:00.000Z`; if (x.length === IsoDateHour) return `${x}:00:00.000Z`;