From 3051448a09441079e2183436dab3076f74a0cbef Mon Sep 17 00:00:00 2001 From: Shankar Ambady Date: Thu, 12 Dec 2024 11:38:01 -0500 Subject: [PATCH] Filter params for vector search (#1889) * adding serializer fields * adding filters * update spec * updating tests and accounting for wrapped boolean arrays * adding test for qdrant conditions * looking up resources by readable id * typo * removing id field from request serializer and replacing with readable_id * calrifying docstring * changing order of recreating collection * adding query counts to results * some consolidation * fixing test * removing unused filters --- frontends/api/src/generated/v0/api.ts | 413 ++++++++++++++++++ learning_resources_search/api.py | 14 +- openapi/specs/v0.yaml | 325 ++++++++++++++ vector_search/conftest.py | 2 + .../commands/generate_embeddings.py | 5 +- vector_search/serializers.py | 127 ++++++ vector_search/utils.py | 125 ++++-- vector_search/utils_test.py | 46 ++ vector_search/views.py | 4 +- vector_search/views_test.py | 99 ++++- 10 files changed, 1097 insertions(+), 63 deletions(-) diff --git a/frontends/api/src/generated/v0/api.ts b/frontends/api/src/generated/v0/api.ts index 975af7a9ac..11743b483d 100644 --- a/frontends/api/src/generated/v0/api.ts +++ b/frontends/api/src/generated/v0/api.ts @@ -7414,16 +7414,46 @@ export const LearningResourcesVectorSearchApiAxiosParamCreator = function ( /** * Vector Search for learning resources * @summary Vector Search + * @param {boolean | null} [certification] True if the learning resource offers a certificate + * @param {Array} [certification_type] The type of certificate * `micromasters` - MicroMasters Credential * `professional` - Professional Certificate * `completion` - Certificate of Completion * `none` - No Certificate + * @param {Array} [course_feature] The course feature. Possible options are at api/v1/course_features/ + * @param {Array} [delivery] The delivery options in which the learning resource is offered * `online` - Online * `hybrid` - Hybrid * `in_person` - In person * `offline` - Offline + * @param {Array} [department] The department that offers the learning resource * `1` - Civil and Environmental Engineering * `2` - Mechanical Engineering * `3` - Materials Science and Engineering * `4` - Architecture * `5` - Chemistry * `6` - Electrical Engineering and Computer Science * `7` - Biology * `8` - Physics * `9` - Brain and Cognitive Sciences * `10` - Chemical Engineering * `11` - Urban Studies and Planning * `12` - Earth, Atmospheric, and Planetary Sciences * `14` - Economics * `15` - Management * `16` - Aeronautics and Astronautics * `17` - Political Science * `18` - Mathematics * `20` - Biological Engineering * `21A` - Anthropology * `21G` - Global Languages * `21H` - History * `21L` - Literature * `21M` - Music and Theater Arts * `22` - Nuclear Science and Engineering * `24` - Linguistics and Philosophy * `CC` - Concourse * `CMS-W` - Comparative Media Studies/Writing * `EC` - Edgerton Center * `ES` - Experimental Study Group * `ESD` - Engineering Systems Division * `HST` - Medical Engineering and Science * `IDS` - Data, Systems, and Society * `MAS` - Media Arts and Sciences * `PE` - Athletics, Physical Education and Recreation * `SP` - Special Programs * `STS` - Science, Technology, and Society * `WGS` - Women\'s and Gender Studies + * @param {boolean | null} [free] + * @param {Array} [level] * @param {number} [limit] Number of results to return per page + * @param {Array} [ocw_topic] The ocw topic name. + * @param {Array} [offered_by] The organization that offers the learning resource * `mitx` - MITx * `ocw` - MIT OpenCourseWare * `bootcamps` - Bootcamps * `xpro` - MIT xPRO * `mitpe` - MIT Professional Education * `see` - MIT Sloan Executive Education * @param {number} [offset] The initial index from which to return the results + * @param {Array} [platform] The platform on which the learning resource is offered * `edx` - edX * `ocw` - MIT OpenCourseWare * `oll` - Open Learning Library * `mitxonline` - MITx Online * `bootcamps` - Bootcamps * `xpro` - MIT xPRO * `csail` - CSAIL * `mitpe` - MIT Professional Education * `see` - MIT Sloan Executive Education * `scc` - Schwarzman College of Computing * `ctl` - Center for Transportation & Logistics * `whu` - WHU * `susskind` - Susskind * `globalalumni` - Global Alumni * `simplilearn` - Simplilearn * `emeritus` - Emeritus * `podcast` - Podcast * `youtube` - YouTube + * @param {boolean | null} [professional] * @param {string} [q] The search text + * @param {string} [readable_id] The readable id of the resource + * @param {Array} [resource_category] The category of learning resource * `course` - Course * `program` - Program * `learning_material` - Learning Material + * @param {Array} [resource_type] The type of learning resource * `course` - course * `program` - program * `learning_path` - learning path * `podcast` - podcast * `podcast_episode` - podcast episode * `video` - video * `video_playlist` - video playlist + * @param {Array} [topic] The topic name. To see a list of options go to api/v1/topics/ * @param {*} [options] Override http request option. * @throws {RequiredError} */ learningResourcesVectorSearchRetrieve: async ( + certification?: boolean | null, + certification_type?: Array, + course_feature?: Array, + delivery?: Array, + department?: Array, + free?: boolean | null, + level?: Array, limit?: number, + ocw_topic?: Array, + offered_by?: Array, offset?: number, + platform?: Array, + professional?: boolean | null, q?: string, + readable_id?: string, + resource_category?: Array, + resource_type?: Array, + topic?: Array, options: RawAxiosRequestConfig = {}, ): Promise => { const localVarPath = `/api/v0/learning_resources_vector_search/` @@ -7442,18 +7472,78 @@ export const LearningResourcesVectorSearchApiAxiosParamCreator = function ( const localVarHeaderParameter = {} as any const localVarQueryParameter = {} as any + if (certification !== undefined) { + localVarQueryParameter["certification"] = certification + } + + if (certification_type) { + localVarQueryParameter["certification_type"] = certification_type + } + + if (course_feature) { + localVarQueryParameter["course_feature"] = course_feature + } + + if (delivery) { + localVarQueryParameter["delivery"] = delivery + } + + if (department) { + localVarQueryParameter["department"] = department + } + + if (free !== undefined) { + localVarQueryParameter["free"] = free + } + + if (level) { + localVarQueryParameter["level"] = level + } + if (limit !== undefined) { localVarQueryParameter["limit"] = limit } + if (ocw_topic) { + localVarQueryParameter["ocw_topic"] = ocw_topic + } + + if (offered_by) { + localVarQueryParameter["offered_by"] = offered_by + } + if (offset !== undefined) { localVarQueryParameter["offset"] = offset } + if (platform) { + localVarQueryParameter["platform"] = platform + } + + if (professional !== undefined) { + localVarQueryParameter["professional"] = professional + } + if (q !== undefined) { localVarQueryParameter["q"] = q } + if (readable_id !== undefined) { + localVarQueryParameter["readable_id"] = readable_id + } + + if (resource_category) { + localVarQueryParameter["resource_category"] = resource_category + } + + if (resource_type) { + localVarQueryParameter["resource_type"] = resource_type + } + + if (topic) { + localVarQueryParameter["topic"] = topic + } + setSearchParams(localVarUrlObj, localVarQueryParameter) let headersFromBaseOptions = baseOptions && baseOptions.headers ? baseOptions.headers : {} @@ -7484,16 +7574,46 @@ export const LearningResourcesVectorSearchApiFp = function ( /** * Vector Search for learning resources * @summary Vector Search + * @param {boolean | null} [certification] True if the learning resource offers a certificate + * @param {Array} [certification_type] The type of certificate * `micromasters` - MicroMasters Credential * `professional` - Professional Certificate * `completion` - Certificate of Completion * `none` - No Certificate + * @param {Array} [course_feature] The course feature. Possible options are at api/v1/course_features/ + * @param {Array} [delivery] The delivery options in which the learning resource is offered * `online` - Online * `hybrid` - Hybrid * `in_person` - In person * `offline` - Offline + * @param {Array} [department] The department that offers the learning resource * `1` - Civil and Environmental Engineering * `2` - Mechanical Engineering * `3` - Materials Science and Engineering * `4` - Architecture * `5` - Chemistry * `6` - Electrical Engineering and Computer Science * `7` - Biology * `8` - Physics * `9` - Brain and Cognitive Sciences * `10` - Chemical Engineering * `11` - Urban Studies and Planning * `12` - Earth, Atmospheric, and Planetary Sciences * `14` - Economics * `15` - Management * `16` - Aeronautics and Astronautics * `17` - Political Science * `18` - Mathematics * `20` - Biological Engineering * `21A` - Anthropology * `21G` - Global Languages * `21H` - History * `21L` - Literature * `21M` - Music and Theater Arts * `22` - Nuclear Science and Engineering * `24` - Linguistics and Philosophy * `CC` - Concourse * `CMS-W` - Comparative Media Studies/Writing * `EC` - Edgerton Center * `ES` - Experimental Study Group * `ESD` - Engineering Systems Division * `HST` - Medical Engineering and Science * `IDS` - Data, Systems, and Society * `MAS` - Media Arts and Sciences * `PE` - Athletics, Physical Education and Recreation * `SP` - Special Programs * `STS` - Science, Technology, and Society * `WGS` - Women\'s and Gender Studies + * @param {boolean | null} [free] + * @param {Array} [level] * @param {number} [limit] Number of results to return per page + * @param {Array} [ocw_topic] The ocw topic name. + * @param {Array} [offered_by] The organization that offers the learning resource * `mitx` - MITx * `ocw` - MIT OpenCourseWare * `bootcamps` - Bootcamps * `xpro` - MIT xPRO * `mitpe` - MIT Professional Education * `see` - MIT Sloan Executive Education * @param {number} [offset] The initial index from which to return the results + * @param {Array} [platform] The platform on which the learning resource is offered * `edx` - edX * `ocw` - MIT OpenCourseWare * `oll` - Open Learning Library * `mitxonline` - MITx Online * `bootcamps` - Bootcamps * `xpro` - MIT xPRO * `csail` - CSAIL * `mitpe` - MIT Professional Education * `see` - MIT Sloan Executive Education * `scc` - Schwarzman College of Computing * `ctl` - Center for Transportation & Logistics * `whu` - WHU * `susskind` - Susskind * `globalalumni` - Global Alumni * `simplilearn` - Simplilearn * `emeritus` - Emeritus * `podcast` - Podcast * `youtube` - YouTube + * @param {boolean | null} [professional] * @param {string} [q] The search text + * @param {string} [readable_id] The readable id of the resource + * @param {Array} [resource_category] The category of learning resource * `course` - Course * `program` - Program * `learning_material` - Learning Material + * @param {Array} [resource_type] The type of learning resource * `course` - course * `program` - program * `learning_path` - learning path * `podcast` - podcast * `podcast_episode` - podcast episode * `video` - video * `video_playlist` - video playlist + * @param {Array} [topic] The topic name. To see a list of options go to api/v1/topics/ * @param {*} [options] Override http request option. * @throws {RequiredError} */ async learningResourcesVectorSearchRetrieve( + certification?: boolean | null, + certification_type?: Array, + course_feature?: Array, + delivery?: Array, + department?: Array, + free?: boolean | null, + level?: Array, limit?: number, + ocw_topic?: Array, + offered_by?: Array, offset?: number, + platform?: Array, + professional?: boolean | null, q?: string, + readable_id?: string, + resource_category?: Array, + resource_type?: Array, + topic?: Array, options?: RawAxiosRequestConfig, ): Promise< ( @@ -7503,9 +7623,24 @@ export const LearningResourcesVectorSearchApiFp = function ( > { const localVarAxiosArgs = await localVarAxiosParamCreator.learningResourcesVectorSearchRetrieve( + certification, + certification_type, + course_feature, + delivery, + department, + free, + level, limit, + ocw_topic, + offered_by, offset, + platform, + professional, q, + readable_id, + resource_category, + resource_type, + topic, options, ) const index = configuration?.serverIndex ?? 0 @@ -7548,9 +7683,24 @@ export const LearningResourcesVectorSearchApiFactory = function ( ): AxiosPromise { return localVarFp .learningResourcesVectorSearchRetrieve( + requestParameters.certification, + requestParameters.certification_type, + requestParameters.course_feature, + requestParameters.delivery, + requestParameters.department, + requestParameters.free, + requestParameters.level, requestParameters.limit, + requestParameters.ocw_topic, + requestParameters.offered_by, requestParameters.offset, + requestParameters.platform, + requestParameters.professional, requestParameters.q, + requestParameters.readable_id, + requestParameters.resource_category, + requestParameters.resource_type, + requestParameters.topic, options, ) .then((request) => request(axios, basePath)) @@ -7564,6 +7714,55 @@ export const LearningResourcesVectorSearchApiFactory = function ( * @interface LearningResourcesVectorSearchApiLearningResourcesVectorSearchRetrieveRequest */ export interface LearningResourcesVectorSearchApiLearningResourcesVectorSearchRetrieveRequest { + /** + * True if the learning resource offers a certificate + * @type {boolean} + * @memberof LearningResourcesVectorSearchApiLearningResourcesVectorSearchRetrieve + */ + readonly certification?: boolean | null + + /** + * The type of certificate * `micromasters` - MicroMasters Credential * `professional` - Professional Certificate * `completion` - Certificate of Completion * `none` - No Certificate + * @type {Array<'micromasters' | 'professional' | 'completion' | 'none'>} + * @memberof LearningResourcesVectorSearchApiLearningResourcesVectorSearchRetrieve + */ + readonly certification_type?: Array + + /** + * The course feature. Possible options are at api/v1/course_features/ + * @type {Array} + * @memberof LearningResourcesVectorSearchApiLearningResourcesVectorSearchRetrieve + */ + readonly course_feature?: Array + + /** + * The delivery options in which the learning resource is offered * `online` - Online * `hybrid` - Hybrid * `in_person` - In person * `offline` - Offline + * @type {Array<'online' | 'hybrid' | 'in_person' | 'offline'>} + * @memberof LearningResourcesVectorSearchApiLearningResourcesVectorSearchRetrieve + */ + readonly delivery?: Array + + /** + * The department that offers the learning resource * `1` - Civil and Environmental Engineering * `2` - Mechanical Engineering * `3` - Materials Science and Engineering * `4` - Architecture * `5` - Chemistry * `6` - Electrical Engineering and Computer Science * `7` - Biology * `8` - Physics * `9` - Brain and Cognitive Sciences * `10` - Chemical Engineering * `11` - Urban Studies and Planning * `12` - Earth, Atmospheric, and Planetary Sciences * `14` - Economics * `15` - Management * `16` - Aeronautics and Astronautics * `17` - Political Science * `18` - Mathematics * `20` - Biological Engineering * `21A` - Anthropology * `21G` - Global Languages * `21H` - History * `21L` - Literature * `21M` - Music and Theater Arts * `22` - Nuclear Science and Engineering * `24` - Linguistics and Philosophy * `CC` - Concourse * `CMS-W` - Comparative Media Studies/Writing * `EC` - Edgerton Center * `ES` - Experimental Study Group * `ESD` - Engineering Systems Division * `HST` - Medical Engineering and Science * `IDS` - Data, Systems, and Society * `MAS` - Media Arts and Sciences * `PE` - Athletics, Physical Education and Recreation * `SP` - Special Programs * `STS` - Science, Technology, and Society * `WGS` - Women\'s and Gender Studies + * @type {Array<'1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' | '10' | '11' | '12' | '14' | '15' | '16' | '17' | '18' | '20' | '21A' | '21G' | '21H' | '21L' | '21M' | '22' | '24' | 'CC' | 'CMS-W' | 'EC' | 'ES' | 'ESD' | 'HST' | 'IDS' | 'MAS' | 'PE' | 'SP' | 'STS' | 'WGS'>} + * @memberof LearningResourcesVectorSearchApiLearningResourcesVectorSearchRetrieve + */ + readonly department?: Array + + /** + * + * @type {boolean} + * @memberof LearningResourcesVectorSearchApiLearningResourcesVectorSearchRetrieve + */ + readonly free?: boolean | null + + /** + * + * @type {Array<'undergraduate' | 'graduate' | 'high_school' | 'noncredit' | 'advanced' | 'intermediate' | 'introductory'>} + * @memberof LearningResourcesVectorSearchApiLearningResourcesVectorSearchRetrieve + */ + readonly level?: Array + /** * Number of results to return per page * @type {number} @@ -7571,6 +7770,20 @@ export interface LearningResourcesVectorSearchApiLearningResourcesVectorSearchRe */ readonly limit?: number + /** + * The ocw topic name. + * @type {Array} + * @memberof LearningResourcesVectorSearchApiLearningResourcesVectorSearchRetrieve + */ + readonly ocw_topic?: Array + + /** + * The organization that offers the learning resource * `mitx` - MITx * `ocw` - MIT OpenCourseWare * `bootcamps` - Bootcamps * `xpro` - MIT xPRO * `mitpe` - MIT Professional Education * `see` - MIT Sloan Executive Education + * @type {Array<'mitx' | 'ocw' | 'bootcamps' | 'xpro' | 'mitpe' | 'see'>} + * @memberof LearningResourcesVectorSearchApiLearningResourcesVectorSearchRetrieve + */ + readonly offered_by?: Array + /** * The initial index from which to return the results * @type {number} @@ -7578,12 +7791,54 @@ export interface LearningResourcesVectorSearchApiLearningResourcesVectorSearchRe */ readonly offset?: number + /** + * The platform on which the learning resource is offered * `edx` - edX * `ocw` - MIT OpenCourseWare * `oll` - Open Learning Library * `mitxonline` - MITx Online * `bootcamps` - Bootcamps * `xpro` - MIT xPRO * `csail` - CSAIL * `mitpe` - MIT Professional Education * `see` - MIT Sloan Executive Education * `scc` - Schwarzman College of Computing * `ctl` - Center for Transportation & Logistics * `whu` - WHU * `susskind` - Susskind * `globalalumni` - Global Alumni * `simplilearn` - Simplilearn * `emeritus` - Emeritus * `podcast` - Podcast * `youtube` - YouTube + * @type {Array<'edx' | 'ocw' | 'oll' | 'mitxonline' | 'bootcamps' | 'xpro' | 'csail' | 'mitpe' | 'see' | 'scc' | 'ctl' | 'whu' | 'susskind' | 'globalalumni' | 'simplilearn' | 'emeritus' | 'podcast' | 'youtube'>} + * @memberof LearningResourcesVectorSearchApiLearningResourcesVectorSearchRetrieve + */ + readonly platform?: Array + + /** + * + * @type {boolean} + * @memberof LearningResourcesVectorSearchApiLearningResourcesVectorSearchRetrieve + */ + readonly professional?: boolean | null + /** * The search text * @type {string} * @memberof LearningResourcesVectorSearchApiLearningResourcesVectorSearchRetrieve */ readonly q?: string + + /** + * The readable id of the resource + * @type {string} + * @memberof LearningResourcesVectorSearchApiLearningResourcesVectorSearchRetrieve + */ + readonly readable_id?: string + + /** + * The category of learning resource * `course` - Course * `program` - Program * `learning_material` - Learning Material + * @type {Array<'course' | 'program' | 'learning_material'>} + * @memberof LearningResourcesVectorSearchApiLearningResourcesVectorSearchRetrieve + */ + readonly resource_category?: Array + + /** + * The type of learning resource * `course` - course * `program` - program * `learning_path` - learning path * `podcast` - podcast * `podcast_episode` - podcast episode * `video` - video * `video_playlist` - video playlist + * @type {Array<'course' | 'program' | 'learning_path' | 'podcast' | 'podcast_episode' | 'video' | 'video_playlist'>} + * @memberof LearningResourcesVectorSearchApiLearningResourcesVectorSearchRetrieve + */ + readonly resource_type?: Array + + /** + * The topic name. To see a list of options go to api/v1/topics/ + * @type {Array} + * @memberof LearningResourcesVectorSearchApiLearningResourcesVectorSearchRetrieve + */ + readonly topic?: Array } /** @@ -7607,15 +7862,173 @@ export class LearningResourcesVectorSearchApi extends BaseAPI { ) { return LearningResourcesVectorSearchApiFp(this.configuration) .learningResourcesVectorSearchRetrieve( + requestParameters.certification, + requestParameters.certification_type, + requestParameters.course_feature, + requestParameters.delivery, + requestParameters.department, + requestParameters.free, + requestParameters.level, requestParameters.limit, + requestParameters.ocw_topic, + requestParameters.offered_by, requestParameters.offset, + requestParameters.platform, + requestParameters.professional, requestParameters.q, + requestParameters.readable_id, + requestParameters.resource_category, + requestParameters.resource_type, + requestParameters.topic, options, ) .then((request) => request(this.axios, this.basePath)) } } +/** + * @export + */ +export const LearningResourcesVectorSearchRetrieveCertificationTypeEnum = { + Micromasters: "micromasters", + Professional: "professional", + Completion: "completion", + None: "none", +} as const +export type LearningResourcesVectorSearchRetrieveCertificationTypeEnum = + (typeof LearningResourcesVectorSearchRetrieveCertificationTypeEnum)[keyof typeof LearningResourcesVectorSearchRetrieveCertificationTypeEnum] +/** + * @export + */ +export const LearningResourcesVectorSearchRetrieveDeliveryEnum = { + Online: "online", + Hybrid: "hybrid", + InPerson: "in_person", + Offline: "offline", +} as const +export type LearningResourcesVectorSearchRetrieveDeliveryEnum = + (typeof LearningResourcesVectorSearchRetrieveDeliveryEnum)[keyof typeof LearningResourcesVectorSearchRetrieveDeliveryEnum] +/** + * @export + */ +export const LearningResourcesVectorSearchRetrieveDepartmentEnum = { + _1: "1", + _2: "2", + _3: "3", + _4: "4", + _5: "5", + _6: "6", + _7: "7", + _8: "8", + _9: "9", + _10: "10", + _11: "11", + _12: "12", + _14: "14", + _15: "15", + _16: "16", + _17: "17", + _18: "18", + _20: "20", + _21A: "21A", + _21G: "21G", + _21H: "21H", + _21L: "21L", + _21M: "21M", + _22: "22", + _24: "24", + Cc: "CC", + CmsW: "CMS-W", + Ec: "EC", + Es: "ES", + Esd: "ESD", + Hst: "HST", + Ids: "IDS", + Mas: "MAS", + Pe: "PE", + Sp: "SP", + Sts: "STS", + Wgs: "WGS", +} as const +export type LearningResourcesVectorSearchRetrieveDepartmentEnum = + (typeof LearningResourcesVectorSearchRetrieveDepartmentEnum)[keyof typeof LearningResourcesVectorSearchRetrieveDepartmentEnum] +/** + * @export + */ +export const LearningResourcesVectorSearchRetrieveLevelEnum = { + Undergraduate: "undergraduate", + Graduate: "graduate", + HighSchool: "high_school", + Noncredit: "noncredit", + Advanced: "advanced", + Intermediate: "intermediate", + Introductory: "introductory", +} as const +export type LearningResourcesVectorSearchRetrieveLevelEnum = + (typeof LearningResourcesVectorSearchRetrieveLevelEnum)[keyof typeof LearningResourcesVectorSearchRetrieveLevelEnum] +/** + * @export + */ +export const LearningResourcesVectorSearchRetrieveOfferedByEnum = { + Mitx: "mitx", + Ocw: "ocw", + Bootcamps: "bootcamps", + Xpro: "xpro", + Mitpe: "mitpe", + See: "see", +} as const +export type LearningResourcesVectorSearchRetrieveOfferedByEnum = + (typeof LearningResourcesVectorSearchRetrieveOfferedByEnum)[keyof typeof LearningResourcesVectorSearchRetrieveOfferedByEnum] +/** + * @export + */ +export const LearningResourcesVectorSearchRetrievePlatformEnum = { + Edx: "edx", + Ocw: "ocw", + Oll: "oll", + Mitxonline: "mitxonline", + Bootcamps: "bootcamps", + Xpro: "xpro", + Csail: "csail", + Mitpe: "mitpe", + See: "see", + Scc: "scc", + Ctl: "ctl", + Whu: "whu", + Susskind: "susskind", + Globalalumni: "globalalumni", + Simplilearn: "simplilearn", + Emeritus: "emeritus", + Podcast: "podcast", + Youtube: "youtube", +} as const +export type LearningResourcesVectorSearchRetrievePlatformEnum = + (typeof LearningResourcesVectorSearchRetrievePlatformEnum)[keyof typeof LearningResourcesVectorSearchRetrievePlatformEnum] +/** + * @export + */ +export const LearningResourcesVectorSearchRetrieveResourceCategoryEnum = { + Course: "course", + Program: "program", + LearningMaterial: "learning_material", +} as const +export type LearningResourcesVectorSearchRetrieveResourceCategoryEnum = + (typeof LearningResourcesVectorSearchRetrieveResourceCategoryEnum)[keyof typeof LearningResourcesVectorSearchRetrieveResourceCategoryEnum] +/** + * @export + */ +export const LearningResourcesVectorSearchRetrieveResourceTypeEnum = { + Course: "course", + Program: "program", + LearningPath: "learning_path", + Podcast: "podcast", + PodcastEpisode: "podcast_episode", + Video: "video", + VideoPlaylist: "video_playlist", +} as const +export type LearningResourcesVectorSearchRetrieveResourceTypeEnum = + (typeof LearningResourcesVectorSearchRetrieveResourceTypeEnum)[keyof typeof LearningResourcesVectorSearchRetrieveResourceTypeEnum] + /** * NewsEventsApi - axios parameter creator * @export diff --git a/learning_resources_search/api.py b/learning_resources_search/api.py index abbbc32d9d..4772292177 100644 --- a/learning_resources_search/api.py +++ b/learning_resources_search/api.py @@ -952,12 +952,14 @@ def get_similar_resources_qdrant(value_doc: dict, num_resources: int): list of learning resources """ hits = _qdrant_similar_results(value_doc, num_resources) - return LearningResource.objects.for_search_serialization().filter( - id__in=[ - resource["id"] - for resource in hits - if resource["id"] != value_doc["id"] and resource["published"] - ] + return ( + LearningResource.objects.for_search_serialization() + .filter( + readable_id__in=[ + resource["readable_id"] for resource in hits if resource["published"] + ] + ) + .exclude(id=value_doc["id"]) ) diff --git a/openapi/specs/v0.yaml b/openapi/specs/v0.yaml index 70a1397cd5..9c02c56253 100644 --- a/openapi/specs/v0.yaml +++ b/openapi/specs/v0.yaml @@ -303,22 +303,347 @@ paths: description: Vector Search for learning resources summary: Vector Search parameters: + - in: query + name: certification + schema: + type: boolean + nullable: true + description: True if the learning resource offers a certificate + - in: query + name: certification_type + schema: + type: array + items: + enum: + - micromasters + - professional + - completion + - none + type: string + description: |- + * `micromasters` - MicroMasters Credential + * `professional` - Professional Certificate + * `completion` - Certificate of Completion + * `none` - No Certificate + description: "The type of certificate \n\n* `micromasters` - MicroMasters\ + \ Credential\n* `professional` - Professional Certificate\n* `completion`\ + \ - Certificate of Completion\n* `none` - No Certificate" + - in: query + name: course_feature + schema: + type: array + items: + type: string + minLength: 1 + description: The course feature. Possible options are at api/v1/course_features/ + - in: query + name: delivery + schema: + type: array + items: + enum: + - online + - hybrid + - in_person + - offline + type: string + description: |- + * `online` - Online + * `hybrid` - Hybrid + * `in_person` - In person + * `offline` - Offline + description: "The delivery options in which the learning resource is offered\ + \ \n\n* `online` - Online\n* `hybrid` - Hybrid\n* `in_person`\ + \ - In person\n* `offline` - Offline" + - in: query + name: department + schema: + type: array + items: + enum: + - '1' + - '2' + - '3' + - '4' + - '5' + - '6' + - '7' + - '8' + - '9' + - '10' + - '11' + - '12' + - '14' + - '15' + - '16' + - '17' + - '18' + - '20' + - 21A + - 21G + - 21H + - 21L + - 21M + - '22' + - '24' + - CC + - CMS-W + - EC + - ES + - ESD + - HST + - IDS + - MAS + - PE + - SP + - STS + - WGS + type: string + description: |- + * `1` - Civil and Environmental Engineering + * `2` - Mechanical Engineering + * `3` - Materials Science and Engineering + * `4` - Architecture + * `5` - Chemistry + * `6` - Electrical Engineering and Computer Science + * `7` - Biology + * `8` - Physics + * `9` - Brain and Cognitive Sciences + * `10` - Chemical Engineering + * `11` - Urban Studies and Planning + * `12` - Earth, Atmospheric, and Planetary Sciences + * `14` - Economics + * `15` - Management + * `16` - Aeronautics and Astronautics + * `17` - Political Science + * `18` - Mathematics + * `20` - Biological Engineering + * `21A` - Anthropology + * `21G` - Global Languages + * `21H` - History + * `21L` - Literature + * `21M` - Music and Theater Arts + * `22` - Nuclear Science and Engineering + * `24` - Linguistics and Philosophy + * `CC` - Concourse + * `CMS-W` - Comparative Media Studies/Writing + * `EC` - Edgerton Center + * `ES` - Experimental Study Group + * `ESD` - Engineering Systems Division + * `HST` - Medical Engineering and Science + * `IDS` - Data, Systems, and Society + * `MAS` - Media Arts and Sciences + * `PE` - Athletics, Physical Education and Recreation + * `SP` - Special Programs + * `STS` - Science, Technology, and Society + * `WGS` - Women's and Gender Studies + description: "The department that offers the learning resource \ + \ \n\n* `1` - Civil and Environmental Engineering\n* `2` - Mechanical Engineering\n\ + * `3` - Materials Science and Engineering\n* `4` - Architecture\n* `5` -\ + \ Chemistry\n* `6` - Electrical Engineering and Computer Science\n* `7`\ + \ - Biology\n* `8` - Physics\n* `9` - Brain and Cognitive Sciences\n* `10`\ + \ - Chemical Engineering\n* `11` - Urban Studies and Planning\n* `12` -\ + \ Earth, Atmospheric, and Planetary Sciences\n* `14` - Economics\n* `15`\ + \ - Management\n* `16` - Aeronautics and Astronautics\n* `17` - Political\ + \ Science\n* `18` - Mathematics\n* `20` - Biological Engineering\n* `21A`\ + \ - Anthropology\n* `21G` - Global Languages\n* `21H` - History\n* `21L`\ + \ - Literature\n* `21M` - Music and Theater Arts\n* `22` - Nuclear Science\ + \ and Engineering\n* `24` - Linguistics and Philosophy\n* `CC` - Concourse\n\ + * `CMS-W` - Comparative Media Studies/Writing\n* `EC` - Edgerton Center\n\ + * `ES` - Experimental Study Group\n* `ESD` - Engineering Systems Division\n\ + * `HST` - Medical Engineering and Science\n* `IDS` - Data, Systems, and\ + \ Society\n* `MAS` - Media Arts and Sciences\n* `PE` - Athletics, Physical\ + \ Education and Recreation\n* `SP` - Special Programs\n* `STS` - Science,\ + \ Technology, and Society\n* `WGS` - Women's and Gender Studies" + - in: query + name: free + schema: + type: boolean + nullable: true + - in: query + name: level + schema: + type: array + items: + enum: + - undergraduate + - graduate + - high_school + - noncredit + - advanced + - intermediate + - introductory + type: string + description: |- + * `undergraduate` - Undergraduate + * `graduate` - Graduate + * `high_school` - High School + * `noncredit` - Non-Credit + * `advanced` - Advanced + * `intermediate` - Intermediate + * `introductory` - Introductory - in: query name: limit schema: type: integer description: Number of results to return per page + - in: query + name: ocw_topic + schema: + type: array + items: + type: string + minLength: 1 + description: The ocw topic name. + - in: query + name: offered_by + schema: + type: array + items: + enum: + - mitx + - ocw + - bootcamps + - xpro + - mitpe + - see + type: string + description: |- + * `mitx` - MITx + * `ocw` - MIT OpenCourseWare + * `bootcamps` - Bootcamps + * `xpro` - MIT xPRO + * `mitpe` - MIT Professional Education + * `see` - MIT Sloan Executive Education + description: "The organization that offers the learning resource \ + \ \n\n* `mitx` - MITx\n* `ocw` - MIT OpenCourseWare\n* `bootcamps` -\ + \ Bootcamps\n* `xpro` - MIT xPRO\n* `mitpe` - MIT Professional Education\n\ + * `see` - MIT Sloan Executive Education" - in: query name: offset schema: type: integer description: The initial index from which to return the results + - in: query + name: platform + schema: + type: array + items: + enum: + - edx + - ocw + - oll + - mitxonline + - bootcamps + - xpro + - csail + - mitpe + - see + - scc + - ctl + - whu + - susskind + - globalalumni + - simplilearn + - emeritus + - podcast + - youtube + type: string + description: |- + * `edx` - edX + * `ocw` - MIT OpenCourseWare + * `oll` - Open Learning Library + * `mitxonline` - MITx Online + * `bootcamps` - Bootcamps + * `xpro` - MIT xPRO + * `csail` - CSAIL + * `mitpe` - MIT Professional Education + * `see` - MIT Sloan Executive Education + * `scc` - Schwarzman College of Computing + * `ctl` - Center for Transportation & Logistics + * `whu` - WHU + * `susskind` - Susskind + * `globalalumni` - Global Alumni + * `simplilearn` - Simplilearn + * `emeritus` - Emeritus + * `podcast` - Podcast + * `youtube` - YouTube + description: "The platform on which the learning resource is offered \ + \ \n\n* `edx` - edX\n* `ocw` - MIT OpenCourseWare\n* `oll` - Open\ + \ Learning Library\n* `mitxonline` - MITx Online\n* `bootcamps` - Bootcamps\n\ + * `xpro` - MIT xPRO\n* `csail` - CSAIL\n* `mitpe` - MIT Professional Education\n\ + * `see` - MIT Sloan Executive Education\n* `scc` - Schwarzman College of\ + \ Computing\n* `ctl` - Center for Transportation & Logistics\n* `whu` -\ + \ WHU\n* `susskind` - Susskind\n* `globalalumni` - Global Alumni\n* `simplilearn`\ + \ - Simplilearn\n* `emeritus` - Emeritus\n* `podcast` - Podcast\n* `youtube`\ + \ - YouTube" + - in: query + name: professional + schema: + type: boolean + nullable: true - in: query name: q schema: type: string minLength: 1 description: The search text + - in: query + name: readable_id + schema: + type: string + minLength: 1 + description: The readable id of the resource + - in: query + name: resource_category + schema: + type: array + items: + enum: + - course + - program + - learning_material + type: string + description: |- + * `course` - Course + * `program` - Program + * `learning_material` - Learning Material + description: "The category of learning resource \n\n* `course`\ + \ - Course\n* `program` - Program\n* `learning_material` - Learning Material" + - in: query + name: resource_type + schema: + type: array + items: + enum: + - course + - program + - learning_path + - podcast + - podcast_episode + - video + - video_playlist + type: string + description: |- + * `course` - course + * `program` - program + * `learning_path` - learning path + * `podcast` - podcast + * `podcast_episode` - podcast episode + * `video` - video + * `video_playlist` - video playlist + description: "The type of learning resource \n\n* `course` - course\n\ + * `program` - program\n* `learning_path` - learning path\n* `podcast` -\ + \ podcast\n* `podcast_episode` - podcast episode\n* `video` - video\n* `video_playlist`\ + \ - video playlist" + - in: query + name: topic + schema: + type: array + items: + type: string + minLength: 1 + description: The topic name. To see a list of options go to api/v1/topics/ tags: - learning_resources_vector_search responses: diff --git a/vector_search/conftest.py b/vector_search/conftest.py index 9853b2419c..76ecdce709 100644 --- a/vector_search/conftest.py +++ b/vector_search/conftest.py @@ -1,5 +1,6 @@ import numpy as np import pytest +from qdrant_client.http.models.models import CountResult from vector_search.encoders.base import BaseEncoder @@ -33,6 +34,7 @@ def _use_test_qdrant_settings(settings, mocker): [], None, ] + mock_qdrant.count.return_value = CountResult(count=10) mocker.patch( "vector_search.utils.qdrant_client", return_value=mock_qdrant, diff --git a/vector_search/management/commands/generate_embeddings.py b/vector_search/management/commands/generate_embeddings.py index fcde4c176d..60aa04500b 100644 --- a/vector_search/management/commands/generate_embeddings.py +++ b/vector_search/management/commands/generate_embeddings.py @@ -64,12 +64,11 @@ def handle(self, *args, **options): # noqa: ARG002 for object_type in sorted(LEARNING_RESOURCE_TYPES): self.stdout.write(f" --{object_type}s") return - + if options["recreate_collections"]: + create_qdrand_collections(force_recreate=True) task = start_embed_resources.delay( indexes_to_update, skip_content_files=options["skip_content_files"] ) - if options["recreate_collections"]: - create_qdrand_collections(force_recreate=True) self.stdout.write( f"Started celery task {task} to index content for the following" f" Types to embed: {indexes_to_update}" diff --git a/vector_search/serializers.py b/vector_search/serializers.py index e079d2d17a..0cba0a5a00 100644 --- a/vector_search/serializers.py +++ b/vector_search/serializers.py @@ -1,8 +1,20 @@ +from drf_spectacular.plumbing import build_choice_description_list from drf_spectacular.utils import extend_schema_field from rest_framework import serializers +from learning_resources.constants import ( + DEPARTMENTS, + RESOURCE_CATEGORY_VALUES, + CertificationType, + LearningResourceDelivery, + LearningResourceType, + LevelType, + OfferedBy, + PlatformType, +) from learning_resources.serializers import LearningResourceSerializer from learning_resources_search.serializers import ( + ArrayWrappedBoolean, SearchResponseMetadata, SearchResponseSerializer, ) @@ -11,6 +23,7 @@ class LearningResourcesVectorSearchRequestSerializer(serializers.Serializer): """ Request serializer for vector based search + instead of id we use readable_id in case we upload qdrant snapshots """ q = serializers.CharField(required=False, help_text="The search text") @@ -20,6 +33,120 @@ class LearningResourcesVectorSearchRequestSerializer(serializers.Serializer): limit = serializers.IntegerField( required=False, help_text="Number of results to return per page" ) + readable_id = serializers.CharField( + required=False, help_text="The readable id of the resource" + ) + offered_by_choices = [(e.name.lower(), e.value) for e in OfferedBy] + offered_by = serializers.ListField( + required=False, + child=serializers.ChoiceField(choices=offered_by_choices), + help_text=( + f"The organization that offers the learning resource \ + \n\n{build_choice_description_list(offered_by_choices)}" + ), + ) + platform_choices = [(e.name.lower(), e.value) for e in PlatformType] + platform = serializers.ListField( + required=False, + child=serializers.ChoiceField(choices=platform_choices), + help_text=( + f"The platform on which the learning resource is offered \ + \n\n{build_choice_description_list(platform_choices)}" + ), + ) + topic = serializers.ListField( + required=False, + child=serializers.CharField(), + help_text="The topic name. To see a list of options go to api/v1/topics/", + ) + ocw_topic = serializers.ListField( + required=False, + child=serializers.CharField(), + help_text="The ocw topic name.", + ) + + resource_choices = [(e.name, e.value.lower()) for e in LearningResourceType] + resource_type = serializers.ListField( + required=False, + child=serializers.ChoiceField( + choices=resource_choices, + ), + help_text=( + f"The type of learning resource \ + \n\n{build_choice_description_list(resource_choices)}" + ), + ) + free = ArrayWrappedBoolean( + required=False, + allow_null=True, + default=None, + ) + professional = ArrayWrappedBoolean( + required=False, + allow_null=True, + default=None, + ) + + certification = ArrayWrappedBoolean( + required=False, + allow_null=True, + default=None, + help_text="True if the learning resource offers a certificate", + ) + certification_choices = CertificationType.as_tuple() + certification_type = serializers.ListField( + required=False, + child=serializers.ChoiceField( + choices=certification_choices, + ), + help_text=( + f"The type of certificate \ + \n\n{build_choice_description_list(certification_choices)}" + ), + ) + department_choices = list(DEPARTMENTS.items()) + department = serializers.ListField( + required=False, + child=serializers.ChoiceField(choices=department_choices), + help_text=( + f"The department that offers the learning resource \ + \n\n{build_choice_description_list(department_choices)}" + ), + ) + + level = serializers.ListField( + required=False, child=serializers.ChoiceField(choices=LevelType.as_list()) + ) + + course_feature = serializers.ListField( + required=False, + child=serializers.CharField(), + help_text="The course feature. " + "Possible options are at api/v1/course_features/", + ) + + delivery_choices = LearningResourceDelivery.as_list() + delivery = serializers.ListField( + required=False, + child=serializers.ChoiceField(choices=delivery_choices), + help_text=( + f"The delivery options in which the learning resource is offered \ + \n\n{build_choice_description_list(delivery_choices)}" + ), + ) + resource_category_choices = [ + (value, value.replace("_", " ").title()) for value in RESOURCE_CATEGORY_VALUES + ] + resource_category = serializers.ListField( + required=False, + child=serializers.ChoiceField( + choices=resource_category_choices, + ), + help_text=( + f"The category of learning resource \ + \n\n{build_choice_description_list(resource_category_choices)}" + ), + ) class LearningResourcesVectorSearchResponseSerializer(SearchResponseSerializer): diff --git a/vector_search/utils.py b/vector_search/utils.py index 696141b410..20698eafa6 100644 --- a/vector_search/utils.py +++ b/vector_search/utils.py @@ -4,6 +4,7 @@ from qdrant_client import QdrantClient, models from learning_resources.models import LearningResource +from learning_resources.serializers import LearningResourceSerializer from learning_resources_search.constants import CONTENT_FILE_TYPE from learning_resources_search.serializers import ( serialize_bulk_content_files, @@ -11,6 +12,25 @@ ) from vector_search.encoders.utils import dense_encoder +QDRANT_PARAM_MAP = { + "readable_id": "readable_id", + "resource_type": "resource_type", + "certification": "certification", + "certification_type": "certification_type.code", + "professional": "professional", + "free": "free", + "course_feature": "course_feature", + "content_feature_type": "content_feature_type", + "topic": "topics[].name", + "ocw_topic": "ocw_topics", + "level": "runs[].level.code", + "department": "departments.department_id", + "platform": "platform.code", + "offered_by": "offered_by.code", + "delivery": "delivery[].code", + "resource_category": "resource_category", +} + def qdrant_client(): return QdrantClient( @@ -170,6 +190,7 @@ def embed_learning_resources(ids, resource_type): def vector_search( query_string: str, + params: dict, limit: int = 10, offset: int = 10, ): @@ -178,6 +199,7 @@ def vector_search( Args: query_string (str): Query string to search + params (dict): Additional search filters limit (int): Max number of results to return offset (int): Offset to start from Returns: @@ -185,53 +207,82 @@ def vector_search( Response dict containing "hits" with search results and "total" with total count """ + client = qdrant_client() + qdrant_conditions = qdrant_query_conditions(params) + search_filter = models.Filter( + must=[ + *qdrant_conditions, + models.FieldCondition(key="published", match=models.MatchValue(value=True)), + ], + ) if query_string: - client = qdrant_client() encoder = dense_encoder() search_result = client.query_points( collection_name=f"{settings.QDRANT_BASE_COLLECTION_NAME}.resources", using=encoder.model_short_name(), query=encoder.encode(query_string), - query_filter=models.Filter( - must=[ - models.FieldCondition( - key="published", match=models.MatchValue(value=True) - ) - ] - ), + query_filter=search_filter, limit=limit, offset=offset, - ) - hits = [ - { - "id": hit.payload["id"], - "readable_id": hit.payload["readable_id"], - "resource_type": hit.payload["resource_type"], - "title": hit.payload["title"], - "description": hit.payload["description"], - "platform": hit.payload["platform"], - } - for hit in search_result.points - ] + ).points else: - results = serialize_bulk_learning_resources( - LearningResource.objects.all()[offset : offset + limit].values_list( - "id", flat=True - ) - ) + search_result = client.scroll( + collection_name=f"{settings.QDRANT_BASE_COLLECTION_NAME}.resources", + scroll_filter=search_filter, + limit=limit, + offset=offset, + )[0] + hits = [hit.payload["readable_id"] for hit in search_result] + count_result = client.count( + collection_name=f"{settings.QDRANT_BASE_COLLECTION_NAME}.resources", + count_filter=search_filter, + exact=True, + ) - hits = [ - { - "id": resource["id"], - "readable_id": resource["readable_id"], - "resource_type": resource["resource_type"], - "title": resource["title"], - "description": resource["description"], - "platform": resource["platform"], - } - for resource in results - ] - return {"hits": hits, "total": {"value": 10000}} + """ + Always lookup learning resources by readable_id for portability + in case we load points from external systems + """ + return { + "hits": LearningResourceSerializer( + LearningResource.objects.for_serialization().filter(readable_id__in=hits), + many=True, + ).data, + "total": {"value": count_result.count}, + } + + +def qdrant_query_conditions(params): + """ + Generate Qdrant query conditions from query params + Args: + params (dict): Query params + Returns: + FieldCondition[]: + List of Qdrant FieldCondition objects + """ + conditions = [] + if not params: + return conditions + for param in params: + if param in QDRANT_PARAM_MAP and params[param] is not None: + if type(params[param]) is list: + """ + Account for array wrapped booleans which should only match value + We can also use MatchValue for arrays with a single item + """ + if len(params[param]) == 1 and type(params[param][0]) is bool: + match_condition = models.MatchValue(value=params[param][0]) + else: + match_condition = models.MatchAny(any=params[param]) + else: + match_condition = models.MatchValue(value=params[param]) + conditions.append( + models.FieldCondition( + key=QDRANT_PARAM_MAP[param], match=match_condition + ) + ) + return conditions def filter_existing_qdrant_points(learning_resources): diff --git a/vector_search/utils_test.py b/vector_search/utils_test.py index 13ec1bb411..c85a17e94e 100644 --- a/vector_search/utils_test.py +++ b/vector_search/utils_test.py @@ -1,4 +1,5 @@ import pytest +from qdrant_client import models from qdrant_client.models import PointStruct from learning_resources.factories import ContentFileFactory, LearningResourceFactory @@ -7,6 +8,7 @@ create_qdrand_collections, embed_learning_resources, filter_existing_qdrant_points, + qdrant_query_conditions, vector_point_id, ) @@ -169,3 +171,47 @@ def test_skip_creating_qdrand_collections(mocker): "dummy-embedding" in mock_qdrant.recreate_collection.mock_calls[1].kwargs["vectors_config"] ) + + +def test_qdrant_query_conditions(mocker): + """ + Test query filter mapping to qdrant conditions + """ + params = { + "q": "test", + "topic": ["test topic 1", "test topic 2"], + "offered_by": ["ocw", "edx"], + "platform": ["edx"], + "resource_type": ["course", "podcast"], + "free": True, + } + query_conditions = qdrant_query_conditions(params) + + assert ( + models.FieldCondition( + key="offered_by.code", match=models.MatchAny(any=["ocw", "edx"]) + ) + in query_conditions + ) + assert ( + models.FieldCondition(key="platform.code", match=models.MatchAny(any=["edx"])) + in query_conditions + ) + assert ( + models.FieldCondition( + key="resource_type", match=models.MatchAny(any=["course", "podcast"]) + ) + in query_conditions + ) + assert ( + models.FieldCondition( + key="topics[].name", + match=models.MatchAny(any=["test topic 1", "test topic 2"]), + ) + in query_conditions + ) + # test that items not in the filter map are ignored + assert ( + models.FieldCondition(key="q", match=models.MatchValue(value="test")) + not in query_conditions + ) diff --git a/vector_search/views.py b/vector_search/views.py index 4eaae7beee..c7a7fe5e7a 100644 --- a/vector_search/views.py +++ b/vector_search/views.py @@ -62,7 +62,9 @@ def get(self, request): query_text = request_data.data.get("q", "") limit = request_data.data.get("limit", 10) offset = request_data.data.get("offset", 0) - response = vector_search(query_text, limit=limit, offset=offset) + response = vector_search( + query_text, limit=limit, offset=offset, params=request_data.data + ) if request_data.data.get("dev_mode"): return Response(response) else: diff --git a/vector_search/views_test.py b/vector_search/views_test.py index 351dc854b8..38080e93ec 100644 --- a/vector_search/views_test.py +++ b/vector_search/views_test.py @@ -1,29 +1,96 @@ from django.urls import reverse +from qdrant_client import models +from qdrant_client.http.models.models import CountResult -from learning_resources.factories import ( - LearningResourceFactory, -) -from learning_resources.models import LearningResource +def test_vector_search_filters(mocker, client): + """Test vector search with query uses query filters""" -def test_vector_search_returns_all_resources_for_empty_query(mocker, client): - """Test vector search endpoint returns all resources when 'q' is empty""" - LearningResourceFactory.create_batch(5) mock_qdrant = mocker.patch("qdrant_client.QdrantClient") - mock_qdrant.query.return_value = [] + mock_qdrant.scroll.return_value = [[]] mocker.patch( "vector_search.utils.qdrant_client", return_value=mock_qdrant, ) - params = {"q": "", "limit": LearningResource.objects.count() + 10} - resp = client.get( + mock_qdrant.count.return_value = CountResult(count=10) + params = { + "q": "test", + "topic": ["test"], + "offered_by": ["ocw"], + "platform": ["edx"], + "resource_type": ["course"], + "free": True, + "department": ["6", "7"], + } + + client.get( reverse("vector_search:v0:learning_resources_vector_search"), data=params ) - results = resp.json()["results"] - assert len(results) == LearningResource.objects.count() - params = {"q": "test"} - resp = client.get( + + assert all( + condition in mock_qdrant.query_points.mock_calls[0].kwargs["query_filter"].must + for condition in [ + models.FieldCondition( + key="offered_by.code", match=models.MatchAny(any=["ocw"]) + ), + models.FieldCondition( + key="platform.code", match=models.MatchAny(any=["edx"]) + ), + models.FieldCondition( + key="resource_type", match=models.MatchAny(any=["course"]) + ), + models.FieldCondition(key="free", match=models.MatchValue(value=True)), + models.FieldCondition( + key="departments.department_id", + match=models.MatchAny(any=["6", "7"]), + ), + models.FieldCondition(key="published", match=models.MatchValue(value=True)), + ] + ) + + +def test_vector_search_filters_empty_query(mocker, client): + """Test vector search filters with empty query uses scroll filters""" + + mock_qdrant = mocker.patch("qdrant_client.QdrantClient") + mock_qdrant.scroll.return_value = [[]] + mock_qdrant.count.return_value = CountResult(count=10) + mocker.patch( + "vector_search.utils.qdrant_client", + return_value=mock_qdrant, + ) + + params = { + "q": "", + "topic": ["test"], + "offered_by": ["ocw"], + "platform": ["edx"], + "resource_type": ["course"], + "free": True, + "department": ["6", "7"], + } + + client.get( reverse("vector_search:v0:learning_resources_vector_search"), data=params ) - results = resp.json()["results"] - assert len(results) == 0 + + assert all( + condition in mock_qdrant.scroll.mock_calls[0].kwargs["scroll_filter"].must + for condition in [ + models.FieldCondition( + key="offered_by.code", match=models.MatchAny(any=["ocw"]) + ), + models.FieldCondition( + key="platform.code", match=models.MatchAny(any=["edx"]) + ), + models.FieldCondition( + key="resource_type", match=models.MatchAny(any=["course"]) + ), + models.FieldCondition(key="free", match=models.MatchValue(value=True)), + models.FieldCondition( + key="departments.department_id", + match=models.MatchAny(any=["6", "7"]), + ), + models.FieldCondition(key="published", match=models.MatchValue(value=True)), + ] + )