From ece3e1ab705f0d6454bc7507d01f7c7a1e3af611 Mon Sep 17 00:00:00 2001 From: Daniel Naab Date: Thu, 31 Oct 2024 16:17:26 -0500 Subject: [PATCH] Add form_documents table and an initializeForm service, responsible for creating a form that's initialized with data from a PDF. initializeForm is not currently wired to the UI. --- .../20241031103354_form_documents_table.mjs | 21 +++++ packages/database/src/clients/kysely/types.ts | 16 +++- packages/forms/src/builder/index.ts | 13 ++- packages/forms/src/context/index.ts | 9 +- .../__tests__/doj-pardon-marijuana.test.ts | 3 +- .../src/documents/__tests__/extract.test.ts | 2 +- .../src/documents/__tests__/fill-pdf.test.ts | 3 +- packages/forms/src/documents/document.ts | 35 +++++++- packages/forms/src/documents/pdf/index.ts | 20 ++++- packages/forms/src/documents/types.ts | 9 ++ .../forms/src/repository/add-document.test.ts | 29 +++++++ packages/forms/src/repository/add-document.ts | 39 +++++++++ packages/forms/src/repository/index.ts | 3 + packages/forms/src/services/index.ts | 5 +- .../src/services/initialize-form.test.ts | 66 ++++++++++++++ .../forms/src/services/initialize-form.ts | 86 +++++++++++++++++++ packages/forms/src/testing.ts | 11 ++- packages/forms/src/types.ts | 1 + 18 files changed, 356 insertions(+), 15 deletions(-) create mode 100644 packages/database/migrations/20241031103354_form_documents_table.mjs create mode 100644 packages/forms/src/repository/add-document.test.ts create mode 100644 packages/forms/src/repository/add-document.ts create mode 100644 packages/forms/src/services/initialize-form.test.ts create mode 100644 packages/forms/src/services/initialize-form.ts diff --git a/packages/database/migrations/20241031103354_form_documents_table.mjs b/packages/database/migrations/20241031103354_form_documents_table.mjs new file mode 100644 index 00000000..85ddc3f2 --- /dev/null +++ b/packages/database/migrations/20241031103354_form_documents_table.mjs @@ -0,0 +1,21 @@ +/** + * @param { import("knex").Knex } knex + * @returns { Promise } + */ +export async function up(knex) { + await knex.schema.createTable('form_documents', table => { + table.uuid('id').primary(); + table.string('type').notNullable(); + table.string('file_name').notNullable(); + table.binary('data').notNullable(); + table.string('extract').notNullable(); + }); +} + +/** + * @param { import("knex").Knex } knex + * @returns { Promise } + */ +export async function down(knex) { + await knex.schema.dropTableIfExists('form_documents'); +} diff --git a/packages/database/src/clients/kysely/types.ts b/packages/database/src/clients/kysely/types.ts index 80c04b27..0c54b490 100644 --- a/packages/database/src/clients/kysely/types.ts +++ b/packages/database/src/clients/kysely/types.ts @@ -1,4 +1,5 @@ import type { + ColumnType, Generated, Insertable, Kysely, @@ -14,7 +15,9 @@ export interface Database { sessions: SessionsTable; forms: FormsTable; form_sessions: FormSessionsTable; + form_documents: FormDocumentsTable; } +export type DatabaseClient = Kysely; interface UsersTable { id: string; @@ -48,8 +51,6 @@ export type FormsTableSelectable = Selectable; export type FormsTableInsertable = Insertable; export type FormsTableUpdateable = Updateable; -export type DatabaseClient = Kysely; - interface FormSessionsTable { id: string; form_id: string; @@ -60,3 +61,14 @@ interface FormSessionsTable { export type FormSessionsTableSelectable = Selectable; export type FormSessionsTableInsertable = Insertable; export type FormSessionsTableUpdateable = Updateable; + +interface FormDocumentsTable { + id: string; + type: string; + data: ColumnType; + file_name: string; + extract: string; +} +export type FormDocumentsTableSelectable = Selectable; +export type FormDocumentsTableInsertable = Insertable; +export type FormDocumentsTableUpdateable = Updateable; diff --git a/packages/forms/src/builder/index.ts b/packages/forms/src/builder/index.ts index 5f109702..1e61abb5 100644 --- a/packages/forms/src/builder/index.ts +++ b/packages/forms/src/builder/index.ts @@ -9,7 +9,7 @@ import { removePatternFromBlueprint, updateFormSummary, } from '../blueprint.js'; -import { addDocument } from '../documents/document.js'; +import { addDocument, addParsedPdfToForm } from '../documents/document.js'; import type { FormErrors } from '../error.js'; import { createDefaultPattern, @@ -23,6 +23,8 @@ import { import { type FieldsetPattern } from '../patterns/fieldset/config.js'; import { type PageSetPattern } from '../patterns/page-set/config.js'; import type { Blueprint, FormSummary } from '../types.js'; +import type { ParsedPdf } from '../documents/pdf/parsing-api.js'; +import type { DocumentFieldMap } from '../documents/types.js'; export class BlueprintBuilder { bp: Blueprint; @@ -47,6 +49,15 @@ export class BlueprintBuilder { this.bp = updatedForm; } + async addDocumentRef(opts: { id: string; extract: ParsedPdf }) { + const { updatedForm } = await addParsedPdfToForm(this.form, { + id: opts.id, + label: opts.extract.title, + extract: opts.extract, + }); + this.bp = updatedForm; + } + addPage() { const newPage = createDefaultPattern(this.config, 'page'); this.bp = addPageToPageSet(this.form, newPage); diff --git a/packages/forms/src/context/index.ts b/packages/forms/src/context/index.ts index 910f3f6e..d73929c7 100644 --- a/packages/forms/src/context/index.ts +++ b/packages/forms/src/context/index.ts @@ -1,10 +1,13 @@ -import { type FormConfig } from '../pattern.js'; -import { type FormRepository } from '../repository/index.js'; +import type { ParsePdf } from '../documents/index.js'; +import type { FormConfig } from '../pattern.js'; +import type { FormRepository } from '../repository/index.js'; -export { createTestBrowserFormService } from './test/index.js'; export { BrowserFormRepository } from './browser/form-repo.js'; +export { createTestBrowserFormService } from './test/index.js'; + export type FormServiceContext = { repository: FormRepository; config: FormConfig; isUserLoggedIn: () => boolean; + parsePdf: ParsePdf; }; diff --git a/packages/forms/src/documents/__tests__/doj-pardon-marijuana.test.ts b/packages/forms/src/documents/__tests__/doj-pardon-marijuana.test.ts index 1b828d4a..48ce61aa 100644 --- a/packages/forms/src/documents/__tests__/doj-pardon-marijuana.test.ts +++ b/packages/forms/src/documents/__tests__/doj-pardon-marijuana.test.ts @@ -3,7 +3,8 @@ import { describe, expect, test } from 'vitest'; import { Success } from '@atj/common'; import { type DocumentFieldMap } from '../index.js'; -import { fillPDF, getDocumentFieldData } from '../pdf/index.js'; +import { fillPDF } from '../pdf/index.js'; +import { getDocumentFieldData } from '../pdf/extract.js'; import { loadSamplePDF } from './sample-data.js'; diff --git a/packages/forms/src/documents/__tests__/extract.test.ts b/packages/forms/src/documents/__tests__/extract.test.ts index ccdaef1f..a2ec418b 100644 --- a/packages/forms/src/documents/__tests__/extract.test.ts +++ b/packages/forms/src/documents/__tests__/extract.test.ts @@ -1,7 +1,7 @@ import { describe, expect, it } from 'vitest'; -import { getDocumentFieldData } from '../index.js'; import { loadSamplePDF } from './sample-data.js'; +import { getDocumentFieldData } from '../pdf/extract.js'; describe('PDF form field extraction', () => { it('extracts data from California UD-105 form', async () => { diff --git a/packages/forms/src/documents/__tests__/fill-pdf.test.ts b/packages/forms/src/documents/__tests__/fill-pdf.test.ts index 8fbc5068..4ec57684 100644 --- a/packages/forms/src/documents/__tests__/fill-pdf.test.ts +++ b/packages/forms/src/documents/__tests__/fill-pdf.test.ts @@ -2,8 +2,9 @@ import { beforeAll, describe, expect, it } from 'vitest'; import { type Failure, type Success } from '@atj/common'; -import { getDocumentFieldData, fillPDF } from '../index.js'; +import { fillPDF } from '../index.js'; import { loadSamplePDF } from './sample-data.js'; +import { getDocumentFieldData } from '../pdf/extract.js'; describe('PDF form filler', () => { let pdfBytes: Uint8Array; diff --git a/packages/forms/src/documents/document.ts b/packages/forms/src/documents/document.ts index 2e455702..ee5bb18c 100644 --- a/packages/forms/src/documents/document.ts +++ b/packages/forms/src/documents/document.ts @@ -8,18 +8,47 @@ import { type Pattern } from '../pattern.js'; import { type InputPattern } from '../patterns/input/config.js'; import { type SequencePattern } from '../patterns/sequence.js'; import { type Blueprint } from '../types.js'; +import { getDocumentFieldData } from './pdf/extract.js'; -import { type PDFDocument, getDocumentFieldData } from './pdf/index.js'; +import { type PDFDocument } from './pdf/index.js'; import { type FetchPdfApiResponse, - processApiResponse, + type ParsedPdf, fetchPdfApiResponse, + processApiResponse, } from './pdf/parsing-api.js'; import { type DocumentFieldMap } from './types.js'; export type DocumentTemplate = PDFDocument; +export const addParsedPdfToForm = async ( + form: Blueprint, + document: { + id: string; + label: string; + extract: ParsedPdf; + } +) => { + form = addPatternMap(form, document.extract.patterns, document.extract.root); + const updatedForm = addFormOutput(form, { + id: document.id, + data: new Uint8Array(), // TODO: remove this no-longer-used field + path: document.label, + fields: document.extract.outputs, + formFields: Object.fromEntries( + Object.keys(document.extract.outputs).map(output => { + return [output, document.extract.outputs[output].name]; + }) + ), + }); + return { + newFields: document.extract.outputs, + updatedForm, + errors: document.extract.errors, + }; +}; + export const addDocument = async ( form: Blueprint, fileDetails: { @@ -41,6 +70,7 @@ export const addDocument = async ( }); form = addPatternMap(form, parsedPdf.patterns, parsedPdf.root); const updatedForm = addFormOutput(form, { + id: 'document-1', // TODO: generate a unique ID data: fileDetails.data, path: fileDetails.name, fields: parsedPdf.outputs, @@ -58,6 +88,7 @@ export const addDocument = async ( } else { const formWithFields = addDocumentFieldsToForm(form, fields); const updatedForm = addFormOutput(formWithFields, { + id: 'document-1', // TODO: generate a unique ID data: fileDetails.data, path: fileDetails.name, fields, diff --git a/packages/forms/src/documents/pdf/index.ts b/packages/forms/src/documents/pdf/index.ts index 6cdd0aba..9c065ec7 100644 --- a/packages/forms/src/documents/pdf/index.ts +++ b/packages/forms/src/documents/pdf/index.ts @@ -1,4 +1,11 @@ -export { getDocumentFieldData } from './extract.js'; +import { getDocumentFieldData } from './extract.js'; +import { + type ParsedPdf, + fetchPdfApiResponse, + processApiResponse, +} from './parsing-api.js'; +import type { DocumentFieldMap } from '../types.js'; + export * from './generate.js'; export { generateDummyPDF } from './generate-dummy.js'; @@ -20,3 +27,14 @@ export type PDFFieldType = | 'RadioGroup' | 'Paragraph' | 'RichText'; + +export type ParsePdf = ( + pdf: Uint8Array +) => Promise<{ parsedPdf: ParsedPdf; fields: DocumentFieldMap }>; + +export const parsePdf: ParsePdf = async (pdfBytes: Uint8Array) => { + const fields = await getDocumentFieldData(pdfBytes); + const apiResponse = await fetchPdfApiResponse(pdfBytes); + const parsedPdf = await processApiResponse(apiResponse); + return { parsedPdf, fields }; +}; diff --git a/packages/forms/src/documents/types.ts b/packages/forms/src/documents/types.ts index a94d4d2a..e2bde5a3 100644 --- a/packages/forms/src/documents/types.ts +++ b/packages/forms/src/documents/types.ts @@ -1,3 +1,12 @@ +import type { Result } from '@atj/common'; + +import { getDocumentFieldData } from './pdf/extract'; +import { + type ParsedPdf, + fetchPdfApiResponse, + processApiResponse, +} from './pdf/parsing-api'; + export type DocumentFieldValue = | { type: 'TextField'; diff --git a/packages/forms/src/repository/add-document.test.ts b/packages/forms/src/repository/add-document.test.ts new file mode 100644 index 00000000..2b171131 --- /dev/null +++ b/packages/forms/src/repository/add-document.test.ts @@ -0,0 +1,29 @@ +import { beforeAll, expect, it, vi } from 'vitest'; + +import { type DbTestContext, describeDatabase } from '@atj/database/testing'; +import { addDocument } from './add-document.js'; +import type { ParsedPdf } from '../documents/pdf/parsing-api.js'; +import type { DocumentFieldMap } from '../documents/types.js'; + +describeDatabase('add document', () => { + const today = new Date(2000, 1, 1); + + beforeAll(async () => { + vi.setSystemTime(today); + }); + + it('works', async ({ db }) => { + const result = await addDocument(db.ctx, { + fileName: 'file.pdf', + data: new Uint8Array([1, 2, 3]), + extract: { + parsedPdf: {} as ParsedPdf, + fields: {} as DocumentFieldMap, + }, + }); + if (result.success === false) { + expect.fail(`addDocument failed: ${result.error}`); + } + expect(result.data.id).toBeTypeOf('string'); + }); +}); diff --git a/packages/forms/src/repository/add-document.ts b/packages/forms/src/repository/add-document.ts new file mode 100644 index 00000000..bac7bff5 --- /dev/null +++ b/packages/forms/src/repository/add-document.ts @@ -0,0 +1,39 @@ +import { type Result, failure, success } from '@atj/common'; +import { type DatabaseContext } from '@atj/database'; + +import type { ParsedPdf } from '../documents/pdf/parsing-api'; +import type { DocumentFieldMap } from '../documents/types'; + +export type AddDocument = ( + ctx: DatabaseContext, + document: { + fileName: string; + data: Uint8Array; + extract: { + parsedPdf: ParsedPdf; + fields: DocumentFieldMap; + }; + } +) => Promise>; + +export const addDocument: AddDocument = async (ctx, document) => { + const uuid = crypto.randomUUID(); + const db = await ctx.getKysely(); + + return await db + .insertInto('form_documents') + .values({ + id: uuid, + type: 'pdf', + file_name: document.fileName, + data: Buffer.from(document.data), + extract: JSON.stringify(document.extract), + }) + .execute() + .then(() => + success({ + id: uuid, + }) + ) + .catch(err => failure(err.message)); +}; diff --git a/packages/forms/src/repository/index.ts b/packages/forms/src/repository/index.ts index dd469d4e..ee1b6e88 100644 --- a/packages/forms/src/repository/index.ts +++ b/packages/forms/src/repository/index.ts @@ -1,6 +1,7 @@ import { type ServiceMethod, createService } from '@atj/common'; import { type DatabaseContext } from '@atj/database'; +import { type AddDocument, addDocument } from './add-document.js'; import { type AddForm, addForm } from './add-form.js'; import { type DeleteForm, deleteForm } from './delete-form.js'; import { type GetForm, getForm } from './get-form.js'; @@ -13,6 +14,7 @@ import { } from './upsert-form-session.js'; export interface FormRepository { + addDocument: ServiceMethod; addForm: ServiceMethod; deleteForm: ServiceMethod; getForm: ServiceMethod; @@ -24,6 +26,7 @@ export interface FormRepository { export const createFormsRepository = (ctx: DatabaseContext): FormRepository => createService(ctx, { + addDocument, addForm, deleteForm, getFormList, diff --git a/packages/forms/src/services/index.ts b/packages/forms/src/services/index.ts index 96cc598a..d426dc29 100644 --- a/packages/forms/src/services/index.ts +++ b/packages/forms/src/services/index.ts @@ -1,4 +1,4 @@ -import { createService, ServiceMethod } from '@atj/common'; +import { type ServiceMethod, createService } from '@atj/common'; import { type FormServiceContext } from '../context/index.js'; @@ -7,6 +7,7 @@ import { type DeleteForm, deleteForm } from './delete-form.js'; import { type GetForm, getForm } from './get-form.js'; import { type GetFormList, getFormList } from './get-form-list.js'; import { type GetFormSession, getFormSession } from './get-form-session.js'; +import { type InitializeForm, initializeForm } from './initialize-form.js'; import { type SaveForm, saveForm } from './save-form.js'; import { type SubmitForm, submitForm } from './submit-form.js'; @@ -17,6 +18,7 @@ export const createFormService = (ctx: FormServiceContext) => getForm, getFormList, getFormSession, + initializeForm, saveForm, submitForm, }); @@ -27,6 +29,7 @@ export type FormService = { getForm: ServiceMethod; getFormList: ServiceMethod; getFormSession: ServiceMethod; + initializeForm: ServiceMethod; saveForm: ServiceMethod; submitForm: ServiceMethod; }; diff --git a/packages/forms/src/services/initialize-form.test.ts b/packages/forms/src/services/initialize-form.test.ts new file mode 100644 index 00000000..e2fcf823 --- /dev/null +++ b/packages/forms/src/services/initialize-form.test.ts @@ -0,0 +1,66 @@ +import { describe, expect, it } from 'vitest'; + +import { createTestFormServiceContext } from '../testing.js'; + +import { initializeForm } from './initialize-form.js'; + +const summary = { title: 'Form Title', description: '' }; + +describe('initializeForm', () => { + it('returns access denied (401) if user is not logged in', async () => { + const ctx = await createTestFormServiceContext({ + isUserLoggedIn: () => false, + }); + const result = await initializeForm(ctx, { summary }); + expect(result).toEqual({ + success: false, + error: { + status: 401, + message: 'You must be logged in to initialize a new form', + }, + }); + }); + + it('initializes with summary when user is logged in', async () => { + const ctx = await createTestFormServiceContext({ + isUserLoggedIn: () => true, + }); + const result = await initializeForm(ctx, { summary }); + expect(result).toEqual({ + success: true, + data: { + timestamp: expect.any(String), + id: expect.any(String), + }, + }); + }); + + it('initializes successfuly with document when user is logged in', async () => { + const ctx = await createTestFormServiceContext({ + isUserLoggedIn: () => true, + parsedPdf: async () => ({ + parsedPdf: { + text: 'test', + title: '', + root: 'root', + description: '', + patterns: {}, + errors: [], + outputs: {}, + }, + fields: {}, + }), + }); + const result = await initializeForm(ctx, { + summary, + document: { fileName: 'test.pdf', data: new Uint8Array([1, 2, 3]) }, + }); + expect(result).toEqual({ + success: true, + data: { + timestamp: expect.any(String), + id: expect.any(String), + }, + }); + }); +}); diff --git a/packages/forms/src/services/initialize-form.ts b/packages/forms/src/services/initialize-form.ts new file mode 100644 index 00000000..2128f11b --- /dev/null +++ b/packages/forms/src/services/initialize-form.ts @@ -0,0 +1,86 @@ +import { type Result, failure, success } from '@atj/common'; +import { type FormSummary, BlueprintBuilder } from '../index.js'; + +import { type FormServiceContext } from '../context/index.js'; + +type InitializeFormError = { + status: number; + message: string; +}; +type InitializeFormResult = { + timestamp: string; + id: string; +}; + +export type InitializeForm = ( + ctx: FormServiceContext, + opts: { + summary?: FormSummary; + document?: { fileName: string; data: Uint8Array }; + } +) => Promise>; + +export const initializeForm: InitializeForm = async ( + ctx, + { summary, document } +) => { + if (!ctx.isUserLoggedIn()) { + return failure({ + status: 401, + message: 'You must be logged in to initialize a new form', + }); + } + + const builder = new BlueprintBuilder(ctx.config); + + if (document !== undefined) { + const parsePdfResult = await ctx + .parsePdf(document.data) + .then(result => success(result)) + .catch(err => + failure({ + status: 400, + message: `Failed to parse PDF: ${err.message}`, + }) + ); + if (!parsePdfResult.success) { + return parsePdfResult; + } + const { parsedPdf } = parsePdfResult.data; + + builder.setFormSummary({ + title: parsedPdf.title || document.fileName, + description: parsedPdf.description, + }); + + const addDocumentResult = await ctx.repository.addDocument({ + fileName: document.fileName, + data: document.data, + extract: parsePdfResult.data, + }); + if (!addDocumentResult.success) { + return failure({ + status: 500, + message: `Failed to add document: ${addDocumentResult.error}`, + }); + } + await builder.addDocumentRef({ + id: addDocumentResult.data.id, + extract: parsedPdf, + }); + } + + if (summary) { + builder.setFormSummary(summary); + } + + const result = await ctx.repository.addForm(builder.form); + if (!result.success) { + console.error('Failed to add form:', result.error); + return failure({ + status: 500, + message: result.error, + }); + } + return result; +}; diff --git a/packages/forms/src/testing.ts b/packages/forms/src/testing.ts index cdad9203..1fbb4f8b 100644 --- a/packages/forms/src/testing.ts +++ b/packages/forms/src/testing.ts @@ -1,13 +1,19 @@ import { type DatabaseContext } from '@atj/database'; import { createInMemoryDatabaseContext } from '@atj/database/context'; -import { createFormsRepository } from './repository'; + +import type { FormServiceContext } from './context'; +import { type ParsePdf, parsePdf } from './documents'; import { defaultFormConfig } from './patterns'; +import { createFormsRepository } from './repository'; type Options = { isUserLoggedIn: () => boolean; + parsedPdf: ParsePdf; }; -export const createTestFormServiceContext = async (opts?: Partial) => { +export const createTestFormServiceContext = async ( + opts?: Partial +): Promise => { const db: DatabaseContext = await createInMemoryDatabaseContext(); const repository = createFormsRepository(db); return { @@ -15,6 +21,7 @@ export const createTestFormServiceContext = async (opts?: Partial) => { repository, config: defaultFormConfig, isUserLoggedIn: opts?.isUserLoggedIn || (() => true), + parsePdf: opts?.parsedPdf || parsePdf, }; }; diff --git a/packages/forms/src/types.ts b/packages/forms/src/types.ts index e5147eb4..33e7efa4 100644 --- a/packages/forms/src/types.ts +++ b/packages/forms/src/types.ts @@ -14,6 +14,7 @@ export type FormSummary = { }; export type FormOutput = { + id: string; data: Uint8Array; path: string; fields: DocumentFieldMap;