From e10b8224c341a64067f153ad29db572d614be008 Mon Sep 17 00:00:00 2001 From: Martijn <73393707+martijn-dev@users.noreply.github.com> Date: Tue, 25 Jun 2024 14:31:18 +0200 Subject: [PATCH] feat: Add support for normalizing hashes (#110) * feat(sanitize-hash): add support for sanitizing hash * feat(sanitize-hash): Add note about unique in README * feat(normalize-hash): Rename sanitize to normalize * feat(normalize-hash): Add utf8 check --------- Co-authored-by: Martijn de Voogd --- README.md | 26 +++++++++++++++++ prisma/schema.prisma | 2 +- src/dmmf.test.ts | 6 ++-- src/dmmf.ts | 42 +++++++++++++++++++++++---- src/errors.ts | 20 ++++++++++++- src/hash.ts | 29 +++++++++++++++++-- src/tests/integration.test.ts | 53 ++++++++++++++++++++++++++++------- src/types.ts | 9 ++++++ 8 files changed, 166 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 1f6f037..ff358ce 100644 --- a/README.md +++ b/README.md @@ -235,6 +235,32 @@ rainbow table attacks. There are multiple ways to do so, listed by order of prec The salt should be of the same encoding as the associated data to hash. +### Normalize hash + +> _Support: introduced in version 1.6.0_ + +You can normalize a hash before creation and querying. This might be useful in case you would like to find a User with the name of `François ` with a query input of `francois`. + +There are several normalize options: + +``` +/// @encryption:hash(email)?normalize=lowercase <- lowercase hash +/// @encryption:hash(email)?normalize=uppercase <- uppercase hash +/// @encryption:hash(email)?normalize=trim <- trim start and end of hash +/// @encryption:hash(email)?normalize=spaces <- remove spaces in hash +/// @encryption:hash(email)?normalize=diacritics <- remove diacritics like ç or é in hash +``` + +You can also combine the normalize options: + +``` +/// @encryption:hash(email)?normalize=lowercase&normalize=trim&normalize=trim&normalize=diacritics +``` + +> Be aware: You should only use the normalize hash feature in combination with a `utf8` input encodnig. It would not make sense to normalize a `hex` or `base64` string. + +> Be aware: Using the normalize hash feature in combination with `unique` could cause conflicts. Example: Users with the name `François` and `francois` result in the same hash which could result in a database conflict. + ## Migrations Adding encryption to an existing field is a transparent operation: Prisma will diff --git a/prisma/schema.prisma b/prisma/schema.prisma index f295d31..2a49923 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -18,7 +18,7 @@ model User { id Int @id @default(autoincrement()) email String @unique name String? @unique /// @encrypted - nameHash String? @unique /// @encryption:hash(name) + nameHash String? @unique /// @encryption:hash(name)?normalize=lowercase&normalize=diacritics&normalize=trim posts Post[] pinnedPost Post? @relation(fields: [pinnedPostId], references: [id], name: "pinnedPost") pinnedPostId Int? diff --git a/src/dmmf.test.ts b/src/dmmf.test.ts index 6da709c..9945ed1 100644 --- a/src/dmmf.test.ts +++ b/src/dmmf.test.ts @@ -5,6 +5,7 @@ import { parseEncryptedAnnotation, parseHashAnnotation } from './dmmf' +import { HashFieldNormalizeOptions } from './types' describe('dmmf', () => { describe('parseEncryptedAnnotation', () => { @@ -120,7 +121,7 @@ describe('dmmf', () => { id Int @id @default(autoincrement()) email String @unique name String? /// @encrypted - nameHash String? /// @encryption:hash(name) + nameHash String? /// @encryption:hash(name)?normalize=lowercase posts Post[] pinnedPost Post? @relation(fields: [pinnedPostId], references: [id], name: "pinnedPost") pinnedPostId Int? @@ -162,7 +163,8 @@ describe('dmmf', () => { targetField: 'nameHash', algorithm: 'sha256', inputEncoding: 'utf8', - outputEncoding: 'hex' + outputEncoding: 'hex', + normalize: [HashFieldNormalizeOptions.lowercase] } } }, diff --git a/src/dmmf.ts b/src/dmmf.ts index d310342..7bab291 100644 --- a/src/dmmf.ts +++ b/src/dmmf.ts @@ -2,9 +2,10 @@ import type { Encoding } from '@47ng/codec' import { errors, warnings } from './errors' import { DMMFDocument, - dmmfDocumentParser, FieldConfiguration, - HashFieldConfiguration + HashFieldConfiguration, + HashFieldNormalizeOptions, + dmmfDocumentParser } from './types' export interface ConnectionDescriptor { @@ -40,8 +41,8 @@ export function analyseDMMF(input: DMMFDocument): DMMFModels { field => field.isUnique && supportedCursorTypes.includes(String(field.type)) ) - const cursorField = model.fields.find( - field => field.documentation?.includes('@encryption:cursor') + const cursorField = model.fields.find(field => + field.documentation?.includes('@encryption:cursor') ) if (cursorField) { // Make sure custom cursor field is valid @@ -208,16 +209,47 @@ export function parseHashAnnotation( ? process.env[saltEnv] : process.env.PRISMA_FIELD_ENCRYPTION_HASH_SALT) + const normalize = + (query.getAll('normalize') as HashFieldNormalizeOptions[]) ?? [] + + if ( + !isValidNormalizeOptions(normalize) && + process.env.NODE_ENV === 'development' && + model && + field + ) { + console.warn(warnings.unsupportedNormalize(model, field, normalize)) + } + + if ( + normalize.length > 0 && + inputEncoding !== 'utf8' && + process.env.NODE_ENV === 'development' && + model && + field + ) { + console.warn( + warnings.unsupportedNormalizeEncoding(model, field, inputEncoding) + ) + } + return { sourceField: match.groups.fieldName, targetField: field ?? match.groups.fieldName + 'Hash', algorithm: query.get('algorithm') ?? 'sha256', salt, inputEncoding, - outputEncoding + outputEncoding, + normalize } } function isValidEncoding(encoding: string): encoding is Encoding { return ['hex', 'base64', 'utf8'].includes(encoding) } + +function isValidNormalizeOptions( + options: string[] +): options is HashFieldNormalizeOptions[] { + return options.every(option => option in HashFieldNormalizeOptions) +} diff --git a/src/errors.ts b/src/errors.ts index 37a1584..2cc04d2 100644 --- a/src/errors.ts +++ b/src/errors.ts @@ -1,5 +1,9 @@ import { namespace } from './debugger' -import type { DMMFField, DMMFModel } from './types' +import { + HashFieldNormalizeOptions, + type DMMFField, + type DMMFModel +} from './types' const error = `[${namespace}] Error` const warning = `[${namespace}] Warning` @@ -105,5 +109,19 @@ export const warnings = { io: string ) => `${warning}: unsupported ${io} encoding \`${encoding}\` for hash field ${model}.${field} -> Valid values are utf8, base64, hex +`, + unsupportedNormalize: ( + model: string, + field: string, + normalize: string + ) => `${warning}: unsupported normalize \`${normalize}\` for hash field ${model}.${field} + -> Valid values are ${Object.values(HashFieldNormalizeOptions)} +`, + unsupportedNormalizeEncoding: ( + model: string, + field: string, + inputEncoding: string + ) => `${warning}: unsupported normalize flag on field with encoding \`${inputEncoding}\` for hash field ${model}.${field} +-> Valid inputEncoding values for normalize are [utf8] ` } diff --git a/src/hash.ts b/src/hash.ts index 00b66f4..96251a1 100644 --- a/src/hash.ts +++ b/src/hash.ts @@ -1,6 +1,6 @@ import { decoders, encoders } from '@47ng/codec' import crypto from 'node:crypto' -import { HashFieldConfiguration } from 'types' +import { HashFieldConfiguration, HashFieldNormalizeOptions } from './types' export function hashString( input: string, @@ -8,7 +8,9 @@ export function hashString( ) { const decode = decoders[config.inputEncoding] const encode = encoders[config.outputEncoding] - const data = decode(input) + const normalized = normalizeHashString(input, config.normalize) + + const data = decode(normalized) const hash = crypto.createHash(config.algorithm) hash.update(data) if (config.salt) { @@ -16,3 +18,26 @@ export function hashString( } return encode(hash.digest()) } + +export function normalizeHashString( + input: string, + options: HashFieldNormalizeOptions[] = [] +) { + let output = input + if (options.includes(HashFieldNormalizeOptions.lowercase)) { + output = output.toLowerCase() + } + if (options.includes(HashFieldNormalizeOptions.uppercase)) { + output = output.toUpperCase() + } + if (options.includes(HashFieldNormalizeOptions.trim)) { + output = output.trim() + } + if (options.includes(HashFieldNormalizeOptions.spaces)) { + output = output.replace(/\s/g, '') + } + if (options.includes(HashFieldNormalizeOptions.diacritics)) { + output = output.normalize('NFD').replace(/[\u0300-\u036f]/g, '') + } + return output +} diff --git a/src/tests/integration.test.ts b/src/tests/integration.test.ts index 672a582..92e89f2 100644 --- a/src/tests/integration.test.ts +++ b/src/tests/integration.test.ts @@ -404,20 +404,53 @@ describe.each(clients)('integration ($type)', ({ client }) => { } }) - test("query entries with non-empty name", async () => { + test('query entries with non-empty name', async () => { const fakeName = 'f@keU$er' await client.user.create({ - data: { - name: '', - email: 'test_mail@example.com' + data: { + name: '', + email: 'test_mail@example.com' } - }); - const users = await client.user.findMany(); + }) + const users = await client.user.findMany() // assume active user with nonempty name - const activeUserCount = await client.user.count({ where: { name: { not: '' } } }) + const activeUserCount = await client.user.count({ + where: { name: { not: '' } } + }) // use fakeName to pretend unique name - const existingUsers = await client.user.findMany({ where: { name: { not: fakeName } } }) - expect(activeUserCount).toBe(users.length - 1); - expect(existingUsers).toEqual(users); + const existingUsers = await client.user.findMany({ + where: { name: { not: fakeName } } + }) + expect(activeUserCount).toBe(users.length - 1) + expect(existingUsers).toEqual(users) + }) + + const normalizeTestEmail = 'normalize@example.com' + + test('create user with normalizeable name', async () => { + const received = await client.user.create({ + data: { + email: normalizeTestEmail, + name: ' François' + } + }) + const dbValue = await sqlite.get({ + table: 'User', + where: { email: normalizeTestEmail } + }) + expect(received.name).toEqual(' François') // clear text in returned value + expect(dbValue.name).toMatch(cloakedStringRegex) // encrypted in database + }) + + test('query user by encrypted and hashed name field with a normalized input (with equals)', async () => { + const received = await client.user.findFirst({ + where: { + name: { + equals: 'Francois' //check for lowercase, trim and diacritics + } + } + }) + expect(received!.name).toEqual(' François') // clear text in returned value + expect(received!.email).toEqual(normalizeTestEmail) }) }) diff --git a/src/types.ts b/src/types.ts index 61c05b5..87a6a4a 100644 --- a/src/types.ts +++ b/src/types.ts @@ -79,6 +79,15 @@ export type HashFieldConfiguration = { salt?: string inputEncoding: Encoding outputEncoding: Encoding + normalize?: HashFieldNormalizeOptions[] +} + +export enum HashFieldNormalizeOptions { + lowercase = 'lowercase', + uppercase = 'uppercase', + trim = 'trim', + spaces = 'spaces', + diacritics = 'diacritics' } export interface FieldConfiguration {