Skip to content

Commit

Permalink
Switch OSD Optimizer to rely on file hashes instead of mtimes (open…
Browse files Browse the repository at this point in the history
…search-project#8472)

* Hotswap getMtimes to compute sha1 hashes

Signed-off-by: Simeon Widdis <[email protected]>

* Rename all the things

Signed-off-by: Simeon Widdis <[email protected]>

* Changeset file for PR opensearch-project#8472 created/updated

---------

Signed-off-by: Simeon Widdis <[email protected]>
Co-authored-by: opensearch-changeset-bot[bot] <154024398+opensearch-changeset-bot[bot]@users.noreply.github.com>
  • Loading branch information
Swiddis and opensearch-changeset-bot[bot] authored Oct 9, 2024
1 parent 98df4dd commit 0609ecf
Show file tree
Hide file tree
Showing 12 changed files with 87 additions and 78 deletions.
2 changes: 2 additions & 0 deletions changelogs/fragments/8472.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
infra:
- Switch OSD build cache to rely on file hashes instead of `mtime`s ([#8472](https://github.com/opensearch-project/OpenSearch-Dashboards/pull/8472))
2 changes: 1 addition & 1 deletion packages/osd-optimizer/src/common/bundle.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ it('creates cache keys', () => {
)
).toMatchInlineSnapshot(`
Object {
"mtimes": Object {
"hashes": Object {
"/foo/bar/a": 123,
"/foo/bar/c": 789,
},
Expand Down
8 changes: 4 additions & 4 deletions packages/osd-optimizer/src/common/bundle.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,13 @@ export class Bundle {

/**
* Calculate the cache key for this bundle based from current
* mtime values.
* hash values.
*/
createCacheKey(files: string[], mtimes: Map<string, number | undefined>): unknown {
createCacheKey(files: string[], hashes: Map<string, string | undefined>): unknown {
return {
spec: this.toSpec(),
mtimes: entriesToObject(
files.map((p) => [p, mtimes.get(p)] as const).sort(ascending((e) => e[0]))
hashes: entriesToObject(
files.map((p) => [p, hashes.get(p)] as const).sort(ascending((e) => e[0]))
),
};
}
Expand Down
22 changes: 11 additions & 11 deletions packages/osd-optimizer/src/integration_tests/bundle_cache.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import cpy from 'cpy';
import del from 'del';
import { createAbsolutePathSerializer } from '@osd/dev-utils';

import { getMtimes } from '../optimizer/get_mtimes';
import { getHashes } from '../optimizer/get_hashes';
import { OptimizerConfig } from '../optimizer/optimizer_config';
import { allValuesFrom, Bundle } from '../common';
import { getBundleCacheEvent$ } from '../optimizer/bundle_cache';
Expand Down Expand Up @@ -77,8 +77,8 @@ it('emits "bundle cached" event when everything is updated', async () => {
Path.resolve(MOCK_REPO_DIR, 'plugins/foo/public/index.ts'),
Path.resolve(MOCK_REPO_DIR, 'plugins/foo/public/lib.ts'),
];
const mtimes = await getMtimes(files);
const cacheKey = bundle.createCacheKey(files, mtimes);
const hashes = await getHashes(files);
const cacheKey = bundle.createCacheKey(files, hashes);

bundle.cache.set({
cacheKey,
Expand Down Expand Up @@ -116,8 +116,8 @@ it('emits "bundle not cached" event when cacheKey is up to date but caching is d
Path.resolve(MOCK_REPO_DIR, 'plugins/foo/public/index.ts'),
Path.resolve(MOCK_REPO_DIR, 'plugins/foo/public/lib.ts'),
];
const mtimes = await getMtimes(files);
const cacheKey = bundle.createCacheKey(files, mtimes);
const hashes = await getHashes(files);
const cacheKey = bundle.createCacheKey(files, hashes);

bundle.cache.set({
cacheKey,
Expand Down Expand Up @@ -155,8 +155,8 @@ it('emits "bundle not cached" event when optimizerCacheKey is missing', async ()
Path.resolve(MOCK_REPO_DIR, 'plugins/foo/public/index.ts'),
Path.resolve(MOCK_REPO_DIR, 'plugins/foo/public/lib.ts'),
];
const mtimes = await getMtimes(files);
const cacheKey = bundle.createCacheKey(files, mtimes);
const hashes = await getHashes(files);
const cacheKey = bundle.createCacheKey(files, hashes);

bundle.cache.set({
cacheKey,
Expand Down Expand Up @@ -194,8 +194,8 @@ it('emits "bundle not cached" event when optimizerCacheKey is outdated, includes
Path.resolve(MOCK_REPO_DIR, 'plugins/foo/public/index.ts'),
Path.resolve(MOCK_REPO_DIR, 'plugins/foo/public/lib.ts'),
];
const mtimes = await getMtimes(files);
const cacheKey = bundle.createCacheKey(files, mtimes);
const hashes = await getHashes(files);
const cacheKey = bundle.createCacheKey(files, hashes);

bundle.cache.set({
cacheKey,
Expand Down Expand Up @@ -238,8 +238,8 @@ it('emits "bundle not cached" event when bundleRefExportIds is outdated, include
Path.resolve(MOCK_REPO_DIR, 'plugins/foo/public/index.ts'),
Path.resolve(MOCK_REPO_DIR, 'plugins/foo/public/lib.ts'),
];
const mtimes = await getMtimes(files);
const cacheKey = bundle.createCacheKey(files, mtimes);
const hashes = await getHashes(files);
const cacheKey = bundle.createCacheKey(files, hashes);

bundle.cache.set({
cacheKey,
Expand Down
16 changes: 8 additions & 8 deletions packages/osd-optimizer/src/node/cache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ const dbName = (db: LMDB.Database) =>
export class Cache {
private readonly codes: LMDB.RootDatabase<string, string>;
private readonly atimes: LMDB.Database<string, string>;
private readonly mtimes: LMDB.Database<string, string>;
private readonly hashes: LMDB.Database<string, string>;
private readonly sourceMaps: LMDB.Database<string, string>;
private readonly pathRoots: string[];
private readonly prefix: string;
Expand Down Expand Up @@ -82,8 +82,8 @@ export class Cache {
encoding: 'string',
});

this.mtimes = this.codes.openDB('mtimes', {
name: 'mtimes',
this.hashes = this.codes.openDB('hashes', {
name: 'hashes',
encoding: 'string',
});

Expand All @@ -106,8 +106,8 @@ export class Cache {
}
}

getMtime(path: string) {
return this.safeGet(this.mtimes, this.getKey(path));
getFileHash(path: string) {
return this.safeGet(this.hashes, this.getKey(path));
}

getCode(path: string) {
Expand All @@ -131,12 +131,12 @@ export class Cache {
}
}

async update(path: string, file: { mtime: string; code: string; map: any }) {
async update(path: string, file: { filehash: string; code: string; map: any }) {
const key = this.getKey(path);

await Promise.all([
this.safePut(this.atimes, key, GLOBAL_ATIME),
this.safePut(this.mtimes, key, file.mtime),
this.safePut(this.hashes, key, file.filehash),
this.safePut(this.codes, key, file.code),
this.safePut(this.sourceMaps, key, JSON.stringify(file.map)),
]);
Expand Down Expand Up @@ -223,7 +223,7 @@ export class Cache {
// if a future version starts returning independent promises so
// this is just for some future-proofing
promises.add(this.atimes.remove(k));
promises.add(this.mtimes.remove(k));
promises.add(this.hashes.remove(k));
promises.add(this.codes.remove(k));
promises.add(this.sourceMaps.remove(k));
}
Expand Down
6 changes: 3 additions & 3 deletions packages/osd-optimizer/src/node/node_auto_tranpilation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ function determineCachePrefix() {

function compile(cache: Cache, source: string, path: string) {
try {
const mtime = `${Fs.statSync(path).mtimeMs}`;
if (cache.getMtime(path) === mtime) {
const filehash = Crypto.createHash('sha1').update(Fs.readFileSync(path)).digest('base64');
if (cache.getFileHash(path) === filehash) {
const code = cache.getCode(path);
if (code) {
// code *should* always be defined, but if it isn't for some reason rebuild it
Expand All @@ -122,7 +122,7 @@ function compile(cache: Cache, source: string, path: string) {
}

cache.update(path, {
mtime,
filehash,
map: result.map,
code: result.code,
});
Expand Down
6 changes: 3 additions & 3 deletions packages/osd-optimizer/src/optimizer/bundle_cache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ import { mergeAll } from 'rxjs/operators';
import { Bundle, BundleRefs } from '../common';

import { OptimizerConfig } from './optimizer_config';
import { getMtimes } from './get_mtimes';
import { getHashes } from './get_hashes';
import { diffCacheKey } from './cache_keys';

export type BundleCacheEvent = BundleNotCachedEvent | BundleCachedEvent;
Expand Down Expand Up @@ -136,7 +136,7 @@ export function getBundleCacheEvent$(
eligibleBundles.push(bundle);
}

const mtimes = await getMtimes(
const hashes = await getHashes(
new Set<string>(
eligibleBundles.reduce(
(acc: string[], bundle) => [...acc, ...(bundle.cache.getReferencedFiles() || [])],
Expand All @@ -148,7 +148,7 @@ export function getBundleCacheEvent$(
for (const bundle of eligibleBundles) {
const diff = diffCacheKey(
bundle.cache.getCacheKey(),
bundle.createCacheKey(bundle.cache.getReferencedFiles() || [], mtimes)
bundle.createCacheKey(bundle.cache.getReferencedFiles() || [], hashes)
);

if (diff) {
Expand Down
12 changes: 6 additions & 6 deletions packages/osd-optimizer/src/optimizer/cache_keys.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ jest.mock('./get_changes.ts', () => ({
]),
}));

jest.mock('./get_mtimes.ts', () => ({
getMtimes: async (paths: string[]) => new Map(paths.map((path) => [path, 12345])),
jest.mock('./get_hashes.ts', () => ({
getHashes: async (paths: string[]) => new Map(paths.map((path) => [path, '<content hash>'])),
}));

jest.mock('execa');
Expand Down Expand Up @@ -88,11 +88,11 @@ describe('getOptimizerCacheKey()', () => {
"deletedPaths": Array [
"/foo/bar/c",
],
"lastCommit": "<last commit sha>",
"modifiedTimes": Object {
"/foo/bar/a": 12345,
"/foo/bar/b": 12345,
"fileHashes": Object {
"/foo/bar/a": "<content hash>",
"/foo/bar/b": "<content hash>",
},
"lastCommit": "<last commit sha>",
"workerConfig": Object {
"browserslistEnv": "dev",
"dist": false,
Expand Down
14 changes: 7 additions & 7 deletions packages/osd-optimizer/src/optimizer/cache_keys.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ import { diff } from 'jest-diff';
import jsonStable from 'json-stable-stringify';
import { ascending, CacheableWorkerConfig } from '../common';

import { getMtimes } from './get_mtimes';
import { getHashes } from './get_hashes';
import { getChanges } from './get_changes';
import { OptimizerConfig } from './optimizer_config';

Expand Down Expand Up @@ -138,7 +138,7 @@ export interface OptimizerCacheKey {
readonly bootstrap: string | undefined;
readonly workerConfig: CacheableWorkerConfig;
readonly deletedPaths: string[];
readonly modifiedTimes: Record<string, number>;
readonly fileHashes: Record<string, string>;
}

async function getLastCommit() {
Expand Down Expand Up @@ -181,14 +181,14 @@ export async function getOptimizerCacheKey(config: OptimizerConfig) {
lastCommit,
bootstrap,
deletedPaths,
modifiedTimes: {} as Record<string, number>,
fileHashes: {} as Record<string, string>,
workerConfig: config.getCacheableWorkerConfig(),
};

const mtimes = await getMtimes(modifiedPaths);
for (const [path, mtime] of Array.from(mtimes.entries()).sort(ascending((e) => e[0]))) {
if (typeof mtime === 'number') {
cacheKeys.modifiedTimes[path] = mtime;
const hashes = await getHashes(modifiedPaths);
for (const [path, filehash] of Array.from(hashes.entries()).sort(ascending((e) => e[0]))) {
if (typeof filehash === 'string') {
cacheKeys.fileHashes[path] = filehash;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,28 +30,36 @@

jest.mock('fs');

import { getMtimes } from './get_mtimes';
import { getHashes } from './get_hashes';

const { stat }: { stat: jest.Mock } = jest.requireMock('fs');
const { stat, readFile }: { stat: jest.Mock; readFile: jest.Mock } = jest.requireMock('fs');

it('returns mtimes Map', async () => {
it('returns hashes Map', async () => {
stat.mockImplementation((path, cb) => {
if (path.includes('missing')) {
const error = new Error('file not found');
(error as any).code = 'ENOENT';
cb(error);
} else {
cb(null, {
mtimeMs: 1234,
});
cb(null, {});
}
});

await expect(getMtimes(['/foo/bar', '/foo/missing', '/foo/baz', '/foo/bar'])).resolves
readFile.mockImplementation((path, cb) => {
if (path.includes('missing')) {
const error = new Error('file not found');
(error as any).code = 'ENOENT';
cb(error);
} else {
cb(null, `Content of ${path}`);
}
});

await expect(getHashes(['/foo/bar', '/foo/missing', '/foo/baz', '/foo/bar'])).resolves
.toMatchInlineSnapshot(`
Map {
"/foo/bar" => 1234,
"/foo/baz" => 1234,
"/foo/bar" => "OwCtruddjWkB6ROdbLRM0NnWOhs=",
"/foo/baz" => "mb6SFQi4VuH8jbwW3h6YoolklXc=",
}
`);
});
Original file line number Diff line number Diff line change
Expand Up @@ -32,23 +32,28 @@ import Fs from 'fs';

import * as Rx from 'rxjs';
import { mergeMap, map, catchError } from 'rxjs/operators';
import Crypto from 'crypto';
import { allValuesFrom } from '../common';

const stat$ = Rx.bindNodeCallback<Fs.PathLike, Fs.Stats>(Fs.stat);
// const stat$ = Rx.bindNodeCallback<Fs.PathLike, Fs.Stats>(Fs.stat);
const readFile$ = Rx.bindNodeCallback<Fs.PathLike, Buffer>(Fs.readFile);

/**
* get mtimes of referenced paths concurrently, limit concurrency to 100
* Get content hashes of referenced paths concurrently, with at most 100 concurrent files
*/
export async function getMtimes(paths: Iterable<string>) {
export async function getHashes(paths: Iterable<string>): Promise<Map<string, string>> {
return new Map(
await allValuesFrom(
Rx.from(paths).pipe(
// map paths to [path, mtimeMs] entries with concurrency of
// map paths to [path, sha1Hash] entries with concurrency of
// 100 at a time, ignoring missing paths
mergeMap(
(path) =>
stat$(path).pipe(
map((stat) => [path, stat.mtimeMs] as const),
readFile$(path).pipe(
map(
(buffer) =>
[path, Crypto.createHash('sha1').update(buffer).digest('base64')] as const
),
catchError((error: any) =>
error?.code === 'ENOENT' ? Rx.EMPTY : Rx.throwError(error)
)
Expand Down
34 changes: 14 additions & 20 deletions packages/osd-optimizer/src/worker/run_compilers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ import {
isConcatenatedModule,
getModulePath,
} from './webpack_helpers';
import { getHashes } from '../optimizer/get_hashes';

const PLUGIN_NAME = '@osd/optimizer';

Expand Down Expand Up @@ -178,28 +179,21 @@ const observeCompiler = (
}

const files = Array.from(referencedFiles).sort(ascending((p) => p));
const mtimes = new Map(
files.map((path): [string, number | undefined] => {
try {
return [path, compiler.inputFileSystem.statSync(path)?.mtimeMs];
} catch (error) {
if (error?.code === 'ENOENT') {
return [path, undefined];
}

throw error;
}
getHashes(files)
.then((hashes) => {
bundle.cache.set({
bundleRefExportIds,
optimizerCacheKey: workerConfig.optimizerCacheKey,
cacheKey: bundle.createCacheKey(files, hashes),
moduleCount,
workUnits,
files,
});
})
);

bundle.cache.set({
bundleRefExportIds,
optimizerCacheKey: workerConfig.optimizerCacheKey,
cacheKey: bundle.createCacheKey(files, mtimes),
moduleCount,
workUnits,
files,
});
.catch((_err) => {
// If cache fails to write, it's alright to ignore and reattempt next build
});

return compilerMsgs.compilerSuccess({
moduleCount,
Expand Down

0 comments on commit 0609ecf

Please sign in to comment.