Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: improved cloudwatch analytics rollup BM-1092 #3381

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
288 changes: 272 additions & 16 deletions package-lock.json

Large diffs are not rendered by default.

35 changes: 33 additions & 2 deletions packages/_infra/src/analytics/edge.analytics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ import { RetentionDays } from 'aws-cdk-lib/aws-logs';
import { BlockPublicAccess, Bucket } from 'aws-cdk-lib/aws-s3';
import { Construct } from 'constructs';

const CODE_PATH = '../lambda-analytics/dist';
import { getConfig } from '../config.js';

const CodePath = '../lambda-analytics/dist';
const CodePathV2 = '../lambda-analytics-cloudfront/dist';

export interface EdgeAnalyticsProps extends StackProps {
distributionId: string;
Expand Down Expand Up @@ -36,7 +39,7 @@ export class EdgeAnalytics extends Stack {
memorySize: 2048,
timeout: Duration.minutes(10),
handler: 'index.handler',
code: lambda.Code.fromAsset(CODE_PATH),
code: lambda.Code.fromAsset(CodePath),
environment: {
[Env.Analytics.CloudFrontId]: distributionId,
[Env.Analytics.CacheBucket]: `s3://${cacheBucket.bucketName}`,
Expand All @@ -53,5 +56,33 @@ export class EdgeAnalytics extends Stack {
// Run this lambda function every hour
const rule = new Rule(this, 'AnalyticRule', { schedule: Schedule.rate(Duration.hours(1)) });
rule.addTarget(new LambdaFunction(this.lambda));

const v2Lambda = new lambda.Function(this, 'AnalyticV2Lambda', {
runtime: lambda.Runtime.NODEJS_LATEST,
memorySize: 2048,
timeout: Duration.minutes(10),
handler: 'index.handler',
code: lambda.Code.fromAsset(CodePathV2),
environment: {
[Env.Analytics.CloudFrontId]: distributionId,
[Env.Analytics.CacheBucket]: `s3://${cacheBucket.bucketName}`,
[Env.Analytics.CloudFrontSourceBucket]: `s3://${logBucket.bucketName}`,
[Env.Analytics.MaxRecords]: String(24 * 7 * 4),
[Env.Analytics.ElasticId]: Env.get(Env.Analytics.ElasticId) ?? '',
[Env.Analytics.ElasticApiKey]: Env.get(Env.Analytics.ElasticApiKey) ?? '',
[Env.Analytics.ElasticIndexName]: getConfig().ElasticHistoryIndexName,
AWS_NODEJS_CONNECTION_REUSE_ENABLED: '1',
},
logRetention: RetentionDays.ONE_MONTH,
loggingFormat: lambda.LoggingFormat.JSON,
});

cacheBucket.grantReadWrite(v2Lambda);
logBucket.grantRead(v2Lambda);

// Run this lambda function every hour
new Rule(this, 'AnalyticV2Rule', { schedule: Schedule.rate(Duration.hours(1)) }).addTarget(
new LambdaFunction(v2Lambda),
);
}
}
5 changes: 5 additions & 0 deletions packages/_infra/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ export interface BaseMapsConfig {

/** AWS role config bucket */
AwsRoleConfigBucket: string;

/** Elastic Index to use for basemaps history data */
ElasticHistoryIndexName: string;
}

export const BaseMapsProdConfig: BaseMapsConfig = {
Expand All @@ -26,13 +29,15 @@ export const BaseMapsProdConfig: BaseMapsConfig = {
CloudFrontDns: ['basemaps.linz.govt.nz', 'tiles.basemaps.linz.govt.nz'],
PublicUrlBase: 'https://basemaps.linz.govt.nz',
AwsRoleConfigBucket: 'linz-bucket-config',
ElasticHistoryIndexName: 'basemaps-history',
};

export const BaseMapsDevConfig: BaseMapsConfig = {
CogBucket: ['basemaps-cog-test', ...BaseMapsProdConfig.CogBucket],
CloudFrontDns: ['dev.basemaps.linz.govt.nz', 'tiles.dev.basemaps.linz.govt.nz'],
PublicUrlBase: 'https://dev.basemaps.linz.govt.nz',
AwsRoleConfigBucket: 'linz-bucket-config',
ElasticHistoryIndexName: 'nonprod-basemaps-history',
};
/** Is this deployment intended for production */
export const IsProduction = process.env['NODE_ENV'] === 'production';
Expand Down
Empty file.
5 changes: 5 additions & 0 deletions packages/lambda-analytic-cloudfront/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# @basemaps/lambda-analytic-cloudfront

Generate analytics from CloudFront distribution statistics

Every hour this lambda function runs and generates a rolled up summary of usage by API Key
42 changes: 42 additions & 0 deletions packages/lambda-analytic-cloudfront/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"name": "@basemaps/lambda-analytic-cloudfront",
"version": "7.11.0",
"private": true,
"repository": {
"type": "git",
"url": "https://github.com/linz/basemaps.git",
"directory": "packages/lambda-analytic-cloudfront"
},
"author": {
"name": "Land Information New Zealand",
"url": "https://linz.govt.nz",
"organization": true
},
"type": "module",
"engines": {
"node": ">=16.0.0"
},
"license": "MIT",
"dependencies": {
"@basemaps/config": "^7.11.0",
"@basemaps/geo": "^7.11.0",
"@basemaps/shared": "^7.11.0",
"@elastic/elasticsearch": "^8.16.2",
"@linzjs/lambda": "^4.0.0",
"ua-parser-js": "^1.0.39"
},
"scripts": {
"test": "node --test",
"bundle": "../../scripts/bundle.mjs package.json"
},
"devDependencies": {
"@types/ua-parser-js": "^0.7.36"
},
"bundle": {
"entry": "src/index.ts",
"outdir": "dist/",
"external": [
"pino-pretty"
]
}
}
133 changes: 133 additions & 0 deletions packages/lambda-analytic-cloudfront/src/__test__/analytics.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import assert from 'node:assert';
import { afterEach, beforeEach, describe, it, TestContext } from 'node:test';
import { gzipSync } from 'node:zlib';

import { Env, fsa, FsMemory, LogConfig } from '@basemaps/shared';
import { Client } from '@elastic/elasticsearch';
import { LambdaRequest } from '@linzjs/lambda';
import { Context } from 'aws-lambda';

import { getYesterday } from '../date.js';
import { Elastic } from '../elastic.js';
import { main } from '../handler.js';
import { LogStats } from '../log.stats.js';
import { LogData } from './log.data.js';

interface IndexOperation {
index: { _index: string };
}
type BulkOperation = (IndexOperation | LogStats)[];

export class FakeLambdaRequest extends LambdaRequest {
constructor() {
super({}, {} as Context, LogConfig.get());
}
}

describe('analytic lambda', () => {
const memory = new FsMemory();
beforeEach(() => {
fsa.register('mem://', memory);
memory.files.clear();

Elastic.indexDelay = 1; // do not wait between requests
Elastic.minRequestCount = 0; // index everything
Elastic._client = undefined;
LogConfig.get().level = 'silent';
});

afterEach(() => {
LogConfig.get().level = 'info';
});

function setupEnv(t: TestContext): void {
t.mock.method(Env, 'get', (key: string): string => {
switch (key) {
case Env.Analytics.CacheBucket:
return 'mem://cache/';
case Env.Analytics.CloudFrontSourceBucket:
return 'mem://source/';
case Env.Analytics.CloudFrontId:
return 'cfid';
case Env.Analytics.MaxRecords:
return '1';
}
throw new Error(`Invalid test process.env access ${key}`);
});
}

it('should process some log data', async (t) => {
setupEnv(t);

const operations: BulkOperation[] = [];
Elastic._client = {
bulk(op: { operations: BulkOperation }) {
operations.push(op.operations);
return Promise.resolve({});
},
} as unknown as Client;

const YesterDay = getYesterday();
const shortDate = YesterDay.toISOString().slice(0, 13).replace('T', '-');

await fsa.write(new URL(`mem://source/cfid.${shortDate}/data.txt.gz`), gzipSync(LogData));

await main(new FakeLambdaRequest());

// One call to insert
assert.equal(operations.length, 1);

const op = operations[0];

const indexOpt = op[0] as IndexOperation;
const logOpt = op[1] as LogStats;

// First Log line: /v1/tiles/aerial/EPSG:3857/19/516588/320039.webp
assert.equal(indexOpt.index._index, 'basemaps-history-2020');
assert.equal(logOpt.apiType, 'd');
assert.equal(logOpt.tileMatrix, 'EPSG:3857');
assert.equal(logOpt.tileMatrixId, 'WebMercatorQuad');
assert.equal(logOpt.tileSet, 'aerial');
assert.equal(logOpt.z, 19);
assert.equal(logOpt.cacheHit, 1);
assert.equal(logOpt.cacheMiss, 0);
assert.equal(logOpt.total, 1);

assert.deepEqual(logOpt.ua, { os: 'linux', name: 'chrome', version: '85', variant: 'unknown' });

const files = [...memory.files.keys()];
assert.equal(files.length, 2); // two files one input one output

assert.equal(
files[1],
`mem://cache/RollUpV3/${shortDate.slice(0, 4)}/${shortDate.slice(5, 7)}/${shortDate}.ndjson.gz`,
);
});

it('should write errors to storage', async (t) => {
setupEnv(t);

Elastic._client = {
bulk() {
return Promise.resolve({ errors: ['Hello'] });
},
} as unknown as Client;

const YesterDay = getYesterday();
const shortDate = YesterDay.toISOString().slice(0, 13).replace('T', '-');

await fsa.write(new URL(`mem://source/cfid.${shortDate}/data.txt.gz`), gzipSync(LogData));

const ret = await main(new FakeLambdaRequest()).catch((e: Error) => e);

assert.equal(String(ret), 'Error: Failed to index');

const files = [...memory.files.keys()];
assert.equal(files.length, 2); // two files one input one output

assert.ok(files[1].startsWith(`mem://cache/errors-${new Date().toISOString().slice(0, 12)}`));

const data = await fsa.read(new URL(files[1]));
assert.ok(data.toString().includes(JSON.stringify('Hello')));
});
});
15 changes: 15 additions & 0 deletions packages/lambda-analytic-cloudfront/src/__test__/log.data.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import { ulid } from 'ulid';

export const DevApiKey = 'd' + ulid().toLowerCase();
export const ClientApiKey = 'c' + ulid().toLowerCase();

export const LogData = `#Version: 1.0
#Fields: date time x-edge-location sc-bytes c-ip cs-method cs(Host) cs-uri-stem sc-status cs(Referer) cs(User-Agent) cs-uri-query cs(Cookie) x-edge-result-type x-edge-request-id x-host-header cs-protocol cs-bytes time-taken x-forwarded-for ssl-protocol ssl-cipher x-edge-response-result-type cs-protocol-version fle-status fle-encrypted-fields c-port time-to-first-byte x-edge-detailed-result-type sc-content-type sc-content-len sc-range-start sc-range-end
2020-07-28 01:11:25 AKL50-C1 20753 255.255.255.141 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/aerial/EPSG:3857/19/516588/320039.webp 200 https://bar.com/ Mozilla/5.0%20(X11;%20Linux%20x86_64)%20AppleWebKit/537.36%20(KHTML,%20like%20Gecko)%20Chrome/85.0.4183.101%20Safari/537.36 api=${DevApiKey} - Hit sBUoz03SwR_hVZkdj0LVC1s_bKakd9ONcKTYRrQLuIR3VPBQUx5xog== basemaps.linz.govt.nz https 82 0.049 - TLSv1.3 TLS_AES_128_GCM_SHA256 Hit HTTP/2.0 -- 21780 0.049 Hit image/webp 20320 - -
2020-07-28 01:16:13 SYD1-C2 156474 255.255.255.4 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/aerial/NZTM2000Quad/19/516542/319785.png 200 https://www.bar.com/ Mozilla/5.0%20(Macintosh;%20Intel%20Mac%20OS%20X%2010_15_4)%20AppleWebKit/605.1.15%20(KHTML,%20like%20Gecko)%20Version/13.1.2%20Safari/605.1.15 api=${DevApiKey}&foo=bar - Hit 9KNnEESjZA-yVs62ffwtRYNaa0gpYKLeEEHH490dmO7AAu3ZxnPc8Q== basemaps.linz.govt.nz https 77 1.791 - TLSv1.3 TLS_AES_128_GCM_SHA256 Hit HTTP/2.0 - - 19468 0.028 Hit image/png 155886 - -
2020-07-28 01:16:21 SYD1-C2 21223 255.255.255.73 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/topo50/3857/18/257866/162011.jpeg 200 https://bar.com/map/ Mozilla/5.0%20(Windows%20NT%2010.0;%20Win64;%20x64)%20AppleWebKit/537.36%20(KHTML,%20like%20Gecko)%20Chrome/85.0.4183.102%20Safari/537.36 api=${DevApiKey} - Miss a5nrTCsdsP5EDQ9EXkUQQJMCJTlbRUz5JIxowZ-1kRriRDUmLPxvVQ== basemaps.linz.govt.nz https 76 0.222 - TLSv1.3 TLS_AES_128_GCM_SHA256 Miss HTTP/2.0 - - 57799 0.222 Miss image/jpeg 20797 - -
2020-07-28 01:13:33 SYD4-C2 2588 255.255.255.128 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/topo50/EPSG:3857/WMTSCapabilities.xml 200 - Mozilla/5.0%20QGIS/31006 api=${ClientApiKey} - RefreshHit oflBr-vO5caoVpi2S23hGh9YWMUca-McU_Fl5oN9fqW_H9ea_iS-Kg== basemaps.linz.govt.nz https 243 0.051 - TLSv1.2 ECDHE-RSA-AES128-GCM-SHA256 RefreshHit HTTP/1.1 - - 55515 0.050 RefreshHit text/xml -
2020-07-28 01:13:33 SYD4-C2 2588 255.255.255.128 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/topo50/EPSG:2193/18/257866/162011.pbf 200 - Mozilla/5.0%20QGIS/31006 api=${ClientApiKey} - RefreshHit oflBr-vO5caoVpi2S23hGh9YWMUca-McU_Fl5oN9fqW_H9ea_iS-Kg== basemaps.linz.govt.nz https 243 0.051 - TLSv1.2 ECDHE-RSA-AES128-GCM-SHA256 RefreshHit HTTP/1.1 - - 55515 0.050 RefreshHit text/xml -
2020-07-28 01:13:33 SYD4-C2 2588 255.255.255.128 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/antipodes-islands-satellite-2019-2020-0.5m/NZTM2000Quad/18/257866/162011.webp 200 - Mozilla/5.0%20QGIS/31006 api=${ClientApiKey} - RefreshHit oflBr-vO5caoVpi2S23hGh9YWMUca-McU_Fl5oN9fqW_H9ea_iS-Kg== basemaps.linz.govt.nz https 243 0.051 - TLSv1.2 ECDHE-RSA-AES128-GCM-SHA256 RefreshHit HTTP/1.1 - - 55515 0.050 RefreshHit text/xml -
2020-07-28 01:13:33 SYD4-C2 2588 255.255.255.128 GET d1mez8rta20vo0.cloudfront.net /v1/tiles/elevation/WebMercatorQuad/18/257866/162011.png 200 - Mozilla/5.0%20QGIS/31006 api=${ClientApiKey}&pipeline=terrain-rgb - RefreshHit oflBr-vO5caoVpi2S23hGh9YWMUca-McU_Fl5oN9fqW_H9ea_iS-Kg== basemaps.linz.govt.nz https 243 0.051 - TLSv1.2 ECDHE-RSA-AES128-GCM-SHA256 RefreshHit HTTP/1.1 - - 55515 0.050 RefreshHit text/xml -
`.trim();
10 changes: 10 additions & 0 deletions packages/lambda-analytic-cloudfront/src/bin.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import { LogConfig } from '@basemaps/shared';
import { LambdaRequest } from '@linzjs/lambda';
import { Context } from 'aws-lambda';

import { main } from './handler.js';

/**
* Manually run the lambda function, this can be helpful for debugging the analytic roll up process
*/
main(new LambdaRequest(null, {} as Context, LogConfig.get())).catch((e) => console.error(e));
33 changes: 33 additions & 0 deletions packages/lambda-analytic-cloudfront/src/date.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
export function getYesterday(): Date {
// Process up to about a day ago
const maxDate = new Date();
maxDate.setUTCMinutes(0);
maxDate.setUTCSeconds(0);
maxDate.setUTCMilliseconds(0);
maxDate.setUTCDate(maxDate.getUTCDate() - 1);
return maxDate;
}

export function* byDay(startDate: Date, endDate: Date): Generator<string> {
const currentDate = new Date(startDate);
currentDate.setUTCMinutes(0);
currentDate.setUTCSeconds(0);
currentDate.setUTCMilliseconds(0);
while (true) {
yield currentDate.toISOString().slice(0, 10);
currentDate.setUTCDate(currentDate.getUTCDate() - 1);
if (currentDate.getTime() < endDate.getTime()) break;
}
}

export function* byMonth(startDate: Date, endDate: Date): Generator<string> {
const currentDate = new Date(startDate);
currentDate.setUTCMinutes(0);
currentDate.setUTCSeconds(0);
currentDate.setUTCMilliseconds(0);
while (true) {
yield currentDate.toISOString().slice(0, 7);
currentDate.setUTCMonth(currentDate.getUTCMonth() - 1);
if (currentDate.getTime() < endDate.getTime()) break;
}
}
Loading
Loading