Skip to content

Commit

Permalink
Merge pull request #11 from fourTheorem/vitest
Browse files Browse the repository at this point in the history
Use vitest for unit tests
  • Loading branch information
eoinsha authored Oct 26, 2023
2 parents 59da7b8 + e190653 commit 35785d9
Show file tree
Hide file tree
Showing 12 changed files with 5,519 additions and 9,768 deletions.
3 changes: 1 addition & 2 deletions transcript-orchestration/functions/audio-transcoder/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,7 @@ export const handleEvent = middify(async (event: TranscodeEvent, context: Contex
try {
unlink(tempInputFilePath)
unlink(tempOutputFilePath)
} catch (err) {
/* istanbul ignore next */
} /* c8 ignore next 3 */ catch (err) {
logger.warn('Failed to delete temporary files', { err })
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,31 +5,27 @@ import { resolve } from 'node:path'
import { mockClient } from 'aws-sdk-client-mock'
import { sdkStreamMixin } from '@aws-sdk/util-stream-node'
import { S3Client, PutObjectCommand, GetObjectCommand } from '@aws-sdk/client-s3'
import tap from 'tap'
import { AudioTranscodeOptions } from '../../transcode'
import { mockedContext } from '../../../mock-utils/lambda-context'

const mockS3 = mockClient(S3Client)

import { test, expect, vi } from 'vitest'
process.env.BUCKET_NAME = 'test-bucket'
import * as app from '../../app'
import * as transcode from '../../transcode'

const testState: { options?: AudioTranscodeOptions } = {}

const { handleEvent } = tap.mock('../../app', {
'../../transcode': {
transcodeAudio: async (options: AudioTranscodeOptions) => {
testState.options = options
await writeFile(options.outputFilePath, 'mp3 contents')
}
}
})
const mockS3 = mockClient(S3Client)
vi.mock('../../transcode', () => ({
transcodeAudio: vi.fn(async (options) => {
writeFile(options.outputFilePath, 'mp3 contents')
})
}))

tap.test('converts audio using FFmpeg', async (t) => {
test('converts audio using FFmpeg', async () => {
const stream = createReadStream(resolve(__dirname, '../../../../../sample-audio/sample1.m4a'))
const sdkStream = sdkStreamMixin(stream)
mockS3.on(GetObjectCommand).resolves({ Body: sdkStream })
mockS3.on(PutObjectCommand).resolves({})
await handleEvent({ audioInputKey: 'sample1.m4a', audioOutputKey: 'sample1.mp3' }, mockedContext)
t.ok(testState.options?.inputFilePath)
t.ok(testState.options?.outputFilePath)
await app.handleEvent({ audioInputKey: 'sample1.m4a', audioOutputKey: 'sample1.mp3' }, mockedContext)
expect(transcode.transcodeAudio).toHaveBeenCalledWith(expect.objectContaining({
inputFilePath: expect.any(String),
outputFilePath: expect.any(String)
}))
})
10 changes: 5 additions & 5 deletions transcript-orchestration/functions/lib/tests/envs.test.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
process.env.EXAMPLE_KEY_1 = 'test'

import envs from '../envs'
import { test } from 'tap'
import { test, assert } from 'vitest'

test('envs retrieves environment variables if available', async (t) => {
t.equal(envs.EXAMPLE_KEY_1, 'test')
test('envs retrieves environment variables if available', async () => {
assert.equal(envs.EXAMPLE_KEY_1, 'test')
})

test('envs throws an error if an environment variable is missing', async (t) => {
t.throws(() => envs.EXAMPLE_KEY_2)
test('envs throws an error if an environment variable is missing', async () => {
assert.throws(() => envs.EXAMPLE_KEY_2)
})
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { Context } from 'aws-lambda';

/* istanbul ignore next */
export const mockedContext: Context = {
callbackWaitsForEmptyEventLoop: false,
functionName: 'mockFunction',
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
import { sdkStreamMixin } from '@aws-sdk/util-stream-node'
import tap from 'tap'
import { mockedContext } from '../../../mock-utils/lambda-context'
import { test, expect } from 'vitest'
import { handleEvent, S3KeysEvent } from '../../app'

process.env.BUCKET_NAME = 'test-bucket'

const testState: { options?: S3KeysEvent } = {}

tap.test('constructs keys from audio input key', async (t) => {
test('constructs keys from audio input key', async (t) => {
const result = await handleEvent({ audioInputKey: 'audio/50.m4a' })
t.same(result, {
expect(result).toEqual({
mp3Key: 'audio/50.mp3',
whisperPrefix: 'whisper-batch-output',
whisperOutputKey: 'whisper-batch-output/50.json',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { resolve } from 'node:path'
import { mockClient } from 'aws-sdk-client-mock'
import { sdkStreamMixin } from '@aws-sdk/util-stream-node'
import { S3Client, PutObjectCommand, GetObjectCommand } from '@aws-sdk/client-s3'
import tap from 'tap'
import { test, assert } from 'vitest'

import { VocabularySubstitutions } from '../../vocabulary'
import { MergedTranscript } from '../../types'
Expand All @@ -27,7 +27,7 @@ const substitutions: VocabularySubstitutions = [
]

for (const vocabularySubstitutions of [undefined, substitutions]) {
tap.test(`transcript processor generates a merged transcript ${vocabularySubstitutions ? 'with': 'without'} substitutions`, async (t) => {
test(`transcript processor generates a merged transcript ${vocabularySubstitutions ? 'with': 'without'} substitutions`, async (t) => {
const transcribeOutputStream = createReadStream(resolve(__dirname, './resources/1.transcribe'))
const whisperOutputStream = createReadStream(resolve(__dirname, './resources/1.whisper.out.json'))

Expand All @@ -48,17 +48,17 @@ for (const vocabularySubstitutions of [undefined, substitutions]) {
})

mockS3.on(PutObjectCommand).callsFake((input) => {
t.equal(input.Bucket, process.env.BUCKET_NAME)
t.equal(input.Key, processedTranscriptKey)
assert.equal(input.Bucket, process.env.BUCKET_NAME)
assert.equal(input.Key, processedTranscriptKey)
const transcript = JSON.parse(input.Body) as any as MergedTranscript
t.equal(transcript.segments.length, 295)
assert.equal(transcript.segments.length, 295)
let prevEnd = 0
for (const segment of transcript.segments) {
const distance = Number(segment.start.toFixed(2)) - Number(prevEnd.toFixed(2))
t.ok(distance >= 0, `${JSON.stringify(segment)} >= ${prevEnd} (${distance})`)
t.ok(segment.end > segment.start, JSON.stringify(segment))
t.ok(segment.text.length > 0, JSON.stringify(segment))
t.ok(segment.speakerLabel.startsWith('spk_'), JSON.stringify(segment))
assert.ok(distance >= 0, `${JSON.stringify(segment)} >= ${prevEnd} (${distance})`)
assert.ok(segment.end > segment.start, JSON.stringify(segment))
assert.ok(segment.text.length > 0, JSON.stringify(segment))
assert.ok(segment.speakerLabel.startsWith('spk_'), JSON.stringify(segment))
prevEnd = segment.end
}
})
Expand Down
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
import { closestSpeakerChange, merge } from '../../process-transcripts'
import { test, assert } from 'vitest'

import tap from 'tap'
import { closestSpeakerChange, merge } from '../../process-transcripts'
import { TranscribeSpeakerSegment } from '../../types'

tap.test('it finds the closes speaker change', async (t) => {
test('it finds the closes speaker change', () => {
const speakerChangeIndex = [
{ speakerLabel: 'spk_0', start: 0 },
{ speakerLabel: 'spk_1', start: 3 }
]

t.same(closestSpeakerChange(speakerChangeIndex, 0)?.speakerLabel, 'spk_0')
t.same(closestSpeakerChange(speakerChangeIndex, 1)?.speakerLabel, 'spk_0')
t.same(closestSpeakerChange(speakerChangeIndex, 2)?.speakerLabel, 'spk_0')
t.same(closestSpeakerChange(speakerChangeIndex, 3)?.speakerLabel, 'spk_1')
t.same(closestSpeakerChange(speakerChangeIndex, 4)?.speakerLabel, 'spk_1')
t.same(closestSpeakerChange(speakerChangeIndex, 100)?.speakerLabel, 'spk_1')
assert.equal(closestSpeakerChange(speakerChangeIndex, 0)?.speakerLabel, 'spk_0')
assert.equal(closestSpeakerChange(speakerChangeIndex, 1)?.speakerLabel, 'spk_0')
assert.equal(closestSpeakerChange(speakerChangeIndex, 2)?.speakerLabel, 'spk_0')
assert.equal(closestSpeakerChange(speakerChangeIndex, 3)?.speakerLabel, 'spk_1')
assert.equal(closestSpeakerChange(speakerChangeIndex, 4)?.speakerLabel, 'spk_1')
assert.equal(closestSpeakerChange(speakerChangeIndex, 100)?.speakerLabel, 'spk_1')
})

tap.test('it merges a simple set of files', async (t) => {
test('it merges a simple set of files', () => {
const whisperSegments = [{
start: 0,
end: 2,
Expand Down Expand Up @@ -74,10 +74,10 @@ tap.test('it merges a simple set of files', async (t) => {
]
}

t.same(result, expectedResult)
assert.deepEqual(result, expectedResult)
})

tap.test('it merges segments where the first transcribe segment starts after the intiial audio silence', async (t) => {
test('it merges segments where the first transcribe segment starts after the intiial audio silence', async () => {
const whisperSegments = [
{ start: 0, end: 7.04, text: ' Node.js is considered by many a game changer, possibly the biggest innovation of the decade' },
{ start: 7.04, end: 36.64, text: ' in web development.' }
Expand Down Expand Up @@ -105,23 +105,23 @@ tap.test('it merges segments where the first transcribe segment starts after the
}
]
}
t.same(result, expectedResult)
assert.deepEqual(result, expectedResult)
})

tap.test('it identifies speaker as unknown if there is no speaker data', async (t) => {
test('it identifies speaker as unknown if there is no speaker data', async () => {
const whisperSegments = [
{ start: 0, end: 1, text: 'Hello' },
{ start: 1, end: 2, text: 'Goodbye' }
]
const transcribeSegments: TranscribeSpeakerSegment[] = []
const result = merge(whisperSegments, transcribeSegments)
t.same(result.segments.length, 2)
assert.equal(result.segments.length, 2)
for (const segment of result.segments) {
t.equal(segment.speakerLabel, 'unknown')
assert.equal(segment.speakerLabel, 'unknown')
}
})

tap.test('it splits a segment if the speaker changes mid-sentence', async (t) => {
test('it splits a segment if the speaker changes mid-sentence', () => {
const whisperSegments = [
{ start: 0, end: 1, text: 'Hello. My name is Bob and I am here with' },
{ start: 1, end: 2, text: ' Alice. How are you today, Alice? I am good actually' },
Expand Down Expand Up @@ -159,5 +159,5 @@ tap.test('it splits a segment if the speaker changes mid-sentence', async (t) =>
]
}
const result = merge(whisperSegments, transcribeSegments)
t.same(result, expectedResult)
assert.deepEqual(result, expectedResult)
})
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import { substituteVocabulary, VocabularySubstitutions } from '../../vocabulary'

import tap from 'tap'
import { test, assert } from 'vitest'
import { MergedTranscript } from '../../types'

tap.test('it substitutes with literal and regex searches', async (t) => {
test('it substitutes with literal and regex searches', async () => {
const transcript: MergedTranscript = {
speakers: {
'spk_0': 'a',
Expand Down Expand Up @@ -47,6 +47,6 @@ tap.test('it substitutes with literal and regex searches', async (t) => {
}
]
substituteVocabulary(transcript, vocab)
t.equal(transcript.segments[0].text, 'Hello my name is Eoin and this is AWS Bites.')
t.equal(transcript.segments[1].text, ' Hi, my name is Luciano and we are going to talk about Lambda!')
assert.equal(transcript.segments[0].text, 'Hello my name is Eoin and this is AWS Bites.')
assert.equal(transcript.segments[1].text, ' Hi, my name is Luciano and we are going to talk about Lambda!')
})
Loading

0 comments on commit 35785d9

Please sign in to comment.