Skip to content

Commit

Permalink
fix(backend): no longer treat files attached to messages as project f…
Browse files Browse the repository at this point in the history
…iles
  • Loading branch information
Mati365 committed Dec 26, 2024
1 parent 716a0d5 commit e5a7d73
Show file tree
Hide file tree
Showing 11 changed files with 88 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ export function createIdObjectMapping(properties?: object, idType = 'integer') {
};
}

export function createNullableIdObjectMapping(properties?: object) {
export function createNullableIdObjectMapping(properties?: object, idType = 'integer') {
return {
properties: {
id: { type: 'integer', null_value: createMagicNullIdEsValue() },
id: { type: idType, null_value: createMagicNullIdEsValue() },
...properties,
},
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
createBaseDatedRecordMappings,
createElasticsearchIndexRepo,
createIdObjectMapping,
createNullableIdObjectMapping,
ElasticsearchRepo,
type EsDocument,
} from '~/modules/elasticsearch';
Expand All @@ -28,6 +29,7 @@ const ProjectsEmbeddingsAbstractEsIndexRepo = createElasticsearchIndexRepo({
...createBaseDatedRecordMappings(),
project: createIdObjectMapping(),
project_file: createIdObjectMapping({
chat: createNullableIdObjectMapping({}, 'keyword'),
resource: createIdObjectMapping(),
}),
summary: { type: 'boolean' },
Expand Down Expand Up @@ -99,12 +101,16 @@ export class ProjectsEmbeddingsEsIndexRepo extends ProjectsEmbeddingsAbstractEsI
return pipe(
this.embeddingsRepo.findWithRelationsByIds({ ids }),
TE.map(
A.map(({ vector, ...entity }) => {
A.map(({ vector, projectFile, ...entity }) => {
const parsedVector: number[] = JSON.parse(vector);

return {
...snakecaseKeys(entity, { deep: true }),
[`vector_${parsedVector.length}`]: parsedVector,
project_file: {
...snakecaseKeys(projectFile, { deep: true }),
chat: projectFile.chat ?? ({ id: null } as any),
},
_id: String(entity.id),
};
}),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ import { pipe } from 'fp-ts/lib/function';
import { inject, injectable } from 'tsyringe';

import type { SdkSearchProjectEmbeddingItemT, SdkSearchProjectEmbeddingsInputT } from '@llm/sdk';
import type { TableId } from '~/modules/database';
import type { TableId, TableUuid } from '~/modules/database';

import { pluck, rejectFalsyItems } from '@llm/commons';
import { createPaginationOffsetSearchQuery, createScoredSortFieldQuery } from '~/modules/elasticsearch';
import { createMagicNullIdEsValue, createPaginationOffsetSearchQuery, createScoredSortFieldQuery } from '~/modules/elasticsearch';

import { ProjectEmbeddingsTableRowWithRelations } from '../projects-embeddings.tables';
import { type ProjectsEmbeddingsEsDocument, ProjectsEmbeddingsEsIndexRepo } from './projects-embeddings-es-index.repo';
Expand Down Expand Up @@ -49,47 +49,58 @@ export class ProjectsEmbeddingsEsSearchRepo {
);

matchByEmbedding = (
{ embedding, projectId }: {
{ embedding, projectId, chatId }: {
embedding: number[];
projectId: TableId;
chatId: TableUuid;
},
) => pipe(
this.indexRepo.search(
esb
.requestBodySearch()
.source(['id', 'text', 'project_file'])
.size(100)
.kNN([
esb
.kNN(`vector_${embedding.length}`, 30, 200)
.queryVector(embedding)
.boost(1)
.filter([
esb.termQuery('summary', false),
esb.termQuery('project.id', projectId),
]),
) => {
const sharedFilters = [
esb.termQuery('project.id', projectId),
esb.boolQuery().should([
esb.termQuery('project_file.chat.id', chatId),
esb.termQuery('project_file.chat.id', createMagicNullIdEsValue()),
]),
];

esb
.kNN(`vector_${embedding.length}`, 20, 200)
.queryVector(embedding)
.boost(3)
.filter([
esb.termQuery('summary', true),
esb.termQuery('project.id', projectId),
]),
])
.toJSON(),
),
TE.map(({ hits: { hits } }) => pipe(
hits,
pluck('_source'),
A.map((item): EsMatchingProjectEmbedding => ({
id: item.id!,
text: item.text!,
projectFile: camelcaseKeys(item.project_file!, { deep: true }),
})),
)),
);
return pipe(
this.indexRepo.search(
esb
.requestBodySearch()
.source(['id', 'text', 'project_file'])
.size(100)
.kNN([
esb
.kNN(`vector_${embedding.length}`, 30, 200)
.queryVector(embedding)
.boost(1)
.filter([
esb.termQuery('summary', false),
...sharedFilters,
]),

esb
.kNN(`vector_${embedding.length}`, 20, 200)
.queryVector(embedding)
.boost(3)
.filter([
esb.termQuery('summary', true),
...sharedFilters,
]),
])
.toJSON(),
),
TE.map(({ hits: { hits } }) => pipe(
hits,
pluck('_source'),
A.map((item): EsMatchingProjectEmbedding => ({
id: item.id!,
text: item.text!,
projectFile: camelcaseKeys(item.project_file!, { deep: true }),
})),
)),
);
};

private static mapOutputHit = (source: ProjectsEmbeddingsEsDocument): SdkSearchProjectEmbeddingItemT =>
({
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ export function createRelevantEmbeddingsPrompt(
'9. MANDATORY: Always analyze attached files first before other context',
'10. MANDATORY: For attached files, provide more detailed analysis unless user specifies otherwise',
'11. Prefer to analyze latest attached files first but consider all attached files in the context',
'12. When user asks about "attached files", "uploaded files", or similar:',
' - Focus ONLY on files that were actually attached in the current chat',
' - Use other embeddings only for additional context if relevant',
' - Always prioritize responses about attached files',
' - Make it clear which information comes from attached files vs other context',
'',
'Text Format Rules:',
'- ❌ WRONG: "In config.ts we see..."',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ export class ProjectsEmbeddingsRepo extends createDatabaseRepo('projects_embeddi
.selectFrom(this.table)
.where('projects_embeddings.id', 'in', ids)
.innerJoin('projects_files', 'projects_files.id', 'project_file_id')
.leftJoin('messages', 'messages.id', 'projects_files.message_id')
.innerJoin('s3_resources', 's3_resources.id', 'projects_files.s3_resource_id')
.innerJoin('s3_resources_buckets', 's3_resources_buckets.id', 's3_resources.bucket_id')
.selectAll(this.table)
Expand All @@ -40,6 +41,8 @@ export class ProjectsEmbeddingsRepo extends createDatabaseRepo('projects_embeddi
's3_resources.id as project_file_s3_resource_id',
's3_resources.s3_key as project_file_s3_resource_s3_key',
's3_resources_buckets.public_base_url as bucket_public_base_url',

'messages.chat_id as message_chat_id',
])
.limit(ids.length)
.execute(),
Expand All @@ -56,6 +59,8 @@ export class ProjectsEmbeddingsRepo extends createDatabaseRepo('projects_embeddi
project_file_s3_resource_s3_key: projectFileS3ResourceS3Key,
bucket_public_base_url: bucketBaseUrl,

message_chat_id: messageChatId,

...item
}): ProjectEmbeddingsTableRowWithRelations => ({
...camelcaseKeys(item),
Expand All @@ -65,6 +70,7 @@ export class ProjectsEmbeddingsRepo extends createDatabaseRepo('projects_embeddi
projectFile: {
id: projectFileId,
name: projectFileName,
chat: messageChatId ? { id: messageChatId } : null,
resource: {
id: projectFileS3ResourceId,
publicUrl: `${bucketBaseUrl}/${projectFileS3ResourceS3Key}`,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ export class ProjectsEmbeddingsService implements WithAuthFirewall<ProjectsEmbed
TE.chainW(embedding => this.esSearchRepo.matchByEmbedding({
embedding,
projectId,
chatId: chat.id,
})),
TE.map(searchResults => createRelevantEmbeddingsPrompt(message, searchResults)),
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import type {
TableId,
TableRowWithId,
TableRowWithIdName,
TableRowWithUuid,
TableWithDefaultColumns,
} from '../database';
import type { S3ResourcesTableRowWithRelations } from '../s3';
Expand All @@ -30,6 +31,7 @@ export type ProjectEmbeddingsTableRowWithRelations =
& {
project: TableRowWithId;
projectFile: TableRowWithIdName & {
chat: TableRowWithUuid | null;
resource: Pick<S3ResourcesTableRowWithRelations, 'id' | 'publicUrl'>;
};
};
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
createBaseDatedRecordMappings,
createElasticsearchIndexRepo,
createIdNameObjectMapping,
createNullableIdObjectMapping,
ElasticsearchRepo,
type EsDocument,
} from '~/modules/elasticsearch';
Expand All @@ -26,6 +27,7 @@ const ProjectsFilesAbstractEsIndexRepo = createElasticsearchIndexRepo({
dynamic: false,
properties: {
...createBaseDatedRecordMappings(),
message: createNullableIdObjectMapping({}, 'keyword'),
project: createIdNameObjectMapping(),
resource: createIdNameObjectMapping(),
},
Expand Down Expand Up @@ -64,6 +66,7 @@ export class ProjectsFilesEsIndexRepo extends ProjectsFilesAbstractEsIndexRepo<P
TE.map(
A.map((entity): ProjectFileEsDocument => ({
...snakecaseKeys(entity, { deep: true }),
message: entity.message ?? ({ id: null } as any),
_id: String(entity.id),
})),
),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import type {

import { pluck, rejectFalsyItems } from '@llm/commons';
import {
createMagicNullIdEsValue,
createPaginationOffsetSearchQuery,
createPhraseFieldQuery,
createScoredSortFieldQuery,
Expand All @@ -22,7 +23,10 @@ import {
ProjectsFilesEsIndexRepo,
} from './projects-files-es-index.repo';

type InternalSearchProjectFilesInput = SdkSearchProjectFilesInputT & { projectId: SdkTableRowIdT; };
type InternalSearchProjectFilesInput = SdkSearchProjectFilesInputT & {
projectId: SdkTableRowIdT;
ignoreAttachedToMessages?: boolean;
};

@injectable()
export class ProjectsFilesEsSearchRepo {
Expand Down Expand Up @@ -60,13 +64,15 @@ export class ProjectsFilesEsSearchRepo {
projectId,
phrase,
ids,
ignoreAttachedToMessages,
}: InternalSearchProjectFilesInput,
): esb.Query =>
esb.boolQuery().must(
rejectFalsyItems([
esb.termQuery('project.id', projectId),
!!ids?.length && esb.termsQuery('id', ids),
!!phrase && createPhraseFieldQuery()(phrase).boost(3),
ignoreAttachedToMessages && esb.termQuery('message.id', createMagicNullIdEsValue()),
]),
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ export function ProjectFilesListContainer({ projectId }: Props) {
},
fetchResultsTask: filters => sdks.dashboard.projectsFiles.search({
...filters,
ignoreAttachedToMessages: true,
projectId,
}),
});
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import type { z } from 'zod';

import { StrictBooleanV } from '@llm/commons';
import {
SdkDefaultSortInputV,
SdkFilteredPhraseInputV,
Expand All @@ -15,6 +16,9 @@ export const SdkSearchProjectFileItemV = SdkProjectFileV;
export type SdkSearchProjectFileItemT = z.infer<typeof SdkSearchProjectFileItemV>;

export const SdkSearchProjectFilesInputV = SdkOffsetPaginationInputV
.extend({
ignoreAttachedToMessages: StrictBooleanV.default(true).optional(),
})
.merge(SdkDefaultSortInputV)
.merge(SdkIdsFiltersInputV)
.merge(SdkFilteredPhraseInputV);
Expand Down

0 comments on commit e5a7d73

Please sign in to comment.