diff --git a/clients/ts-sdk/openapi.json b/clients/ts-sdk/openapi.json index 4b4bf5a4f9..7ce4059cb3 100644 --- a/clients/ts-sdk/openapi.json +++ b/clients/ts-sdk/openapi.json @@ -7597,9 +7597,11 @@ "CrawlShopifyOptions": { "type": "object", "title": "CrawlShopifyOptions", + "description": "Options for Crawling Shopify", "properties": { - "boost_item_names": { + "group_variants": { "type": "boolean", + "description": "This option will ingest all variants as individual chunks and place them in groups by product id. Turning this off will only scrape 1 variant per product. default: true", "nullable": true } } diff --git a/clients/ts-sdk/src/types.gen.ts b/clients/ts-sdk/src/types.gen.ts index f5af8a719a..c825461441 100644 --- a/clients/ts-sdk/src/types.gen.ts +++ b/clients/ts-sdk/src/types.gen.ts @@ -469,8 +469,14 @@ export type CrawlOptions = { site_url?: (string) | null; }; +/** + * Options for Crawling Shopify + */ export type CrawlShopifyOptions = { - boost_item_names?: (boolean) | null; + /** + * This option will ingest all variants as individual chunks and place them in groups by product id. Turning this off will only scrape 1 variant per product. default: true + */ + group_variants?: (boolean) | null; }; export type CreateBatchChunkGroupReqPayload = Array; diff --git a/frontends/dashboard/src/components/NewDatasetModal.tsx b/frontends/dashboard/src/components/NewDatasetModal.tsx index ddb1828558..2f9a98081c 100644 --- a/frontends/dashboard/src/components/NewDatasetModal.tsx +++ b/frontends/dashboard/src/components/NewDatasetModal.tsx @@ -1,7 +1,6 @@ import { Accessor, createSignal, - createEffect, useContext, For, Switch, @@ -26,14 +25,21 @@ import { createToast } from "./ShowToasts"; import { createNewDataset } from "../api/createDataset"; import { uploadSampleData } from "../api/uploadSampleData"; import { defaultServerEnvsConfiguration } from "../utils/serverEnvs"; -import { CrawlInterval, CrawlOptions, DistanceMetric } from "trieve-ts-sdk"; +import { CrawlInterval, DistanceMetric } from "trieve-ts-sdk"; import { FaRegularCircleQuestion } from "solid-icons/fa"; import { Tooltip } from "shared/ui"; import { FiChevronDown, FiChevronUp } from "solid-icons/fi"; import { createStore, SetStoreFunction, unwrap } from "solid-js/store"; import { DatasetConfig } from "./dataset-settings/LegacySettingsWrapper"; import { cn } from "shared/utils"; -import { ValidateFn, ErrorMsg } from "../utils/validation"; +import { ValidateFn, ErrorMsg, ValidateErrors } from "../utils/validation"; +import { + defaultCrawlOptions, + FlatCrawlOptions, + flattenCrawlOptions, + unflattenCrawlOptions, + validateFlatCrawlOptions, +} from "../pages/dataset/CrawlingSettings"; export interface NewDatasetModalProps { isOpen: Accessor; @@ -41,7 +47,7 @@ export interface NewDatasetModalProps { } const validate: ValidateFn = (value) => { - const errors: Record = {}; + const errors: ValidateErrors = {}; if (value.BM25_ENABLED) { if (!value.BM25_B) { @@ -74,7 +80,9 @@ export const NewDatasetModal = (props: NewDatasetModalProps) => { const [serverConfig, setServerConfig] = createStore( defaultServerEnvsConfiguration, ); - const [crawlOptions, setCrawlOptions] = createSignal(); + const [crawlOptions, setCrawlOptions] = createStore( + flattenCrawlOptions(defaultCrawlOptions), + ); const [name, setName] = createSignal(""); const [showAdvanced, setShowAdvanced] = createSignal(false); const [showScraping, setShowScraping] = createSignal(false); @@ -84,9 +92,13 @@ export const NewDatasetModal = (props: NewDatasetModalProps) => { const [errors, setErrors] = createStore< ReturnType>["errors"] >({}); + const [crawlErrors, setCrawlErrors] = createStore< + ReturnType>["errors"] + >({}); const createDataset = async () => { const curServerConfig = unwrap(serverConfig); + const unwrappedFlatCrawlOptions = unwrap(crawlOptions); const validateResult = validate(curServerConfig); if (validateResult.valid) { setErrors({}); @@ -95,13 +107,27 @@ export const NewDatasetModal = (props: NewDatasetModalProps) => { return; } + if (showScraping()) { + const crawlValidateResult = validateFlatCrawlOptions( + unwrappedFlatCrawlOptions, + ); + if (crawlValidateResult.valid) { + setCrawlErrors({}); + } else { + setCrawlErrors(crawlValidateResult.errors); + return; + } + } + try { setIsLoading(true); const dataset = await createNewDataset({ name: name(), organizationId: userContext.selectedOrg().id, serverConfig: curServerConfig, - crawlOptions: crawlOptions(), + crawlOptions: showScraping() + ? unflattenCrawlOptions(unwrappedFlatCrawlOptions) + : undefined, }); if (fillWithExampleData()) { @@ -133,10 +159,6 @@ export const NewDatasetModal = (props: NewDatasetModalProps) => { } }; - createEffect(() => { - console.log(crawlOptions()); - }); - return ( { > - + - + Scraping @@ -485,625 +515,11 @@ export const NewDatasetModal = (props: NewDatasetModalProps) => { /> -
-
- - - setCrawlOptions((prev) => { - if (!prev) { - return { - site_url: e.currentTarget.value, - }; - } - - return { - ...prev, - site_url: e.currentTarget.value, - }; - }) - } - /> -
-
- - - setCrawlOptions((prev) => { - if (!prev) { - return { - exclude_paths: - e.currentTarget.value.split(","), - }; - } - - return { - ...prev, - exclude_paths: - e.currentTarget.value.split(","), - }; - }) - } - /> -
- -
- - - setCrawlOptions((prev) => { - if (!prev) { - return { - include_paths: - e.currentTarget.value.split(","), - }; - } - - return { - ...prev, - include_paths: - e.currentTarget.value.split(","), - }; - }) - } - /> -
- -
- - - setCrawlOptions((prev) => { - if (!prev) { - return { - exclude_tags: - e.currentTarget.value.split(","), - }; - } - - return { - ...prev, - exclude_tags: - e.currentTarget.value.split(","), - }; - }) - } - /> -
- -
- - - setCrawlOptions((prev) => { - if (!prev) { - return { - include_tags: - e.currentTarget.value.split(","), - }; - } - - return { - ...prev, - include_tags: - e.currentTarget.value.split(","), - }; - }) - } - /> -
- -
- - - setCrawlOptions((prev) => { - if (!prev) { - return { - max_depth: parseInt( - e.currentTarget.value, - ), - }; - } - - return { - ...prev, - max_depth: parseInt(e.currentTarget.value), - }; - }) - } - /> -
- -
- - - setCrawlOptions((prev) => { - if (!prev) { - return { - limit: parseInt(e.currentTarget.value), - }; - } - - return { - ...prev, - limit: parseInt(e.currentTarget.value), - }; - }) - } - /> -
- -
- - -
- -
- - - setCrawlOptions((prev) => { - if (!prev) { - return { - boost_titles: e.currentTarget.checked, - }; - } - - return { - ...prev, - boost_titles: e.currentTarget.checked, - }; - }) - } - /> -
- -
-
- - setCrawlOptions((prev) => { - if (!prev) { - return { - scrape_options: { - type: "openapi", - }, - }; - } - if (!e.currentTarget.checked) { - return { - ...prev, - scrape_options: null, - }; - } else { - return { - ...prev, - scrape_options: { - type: "openapi", - }, - }; - } - }) - } - /> - - -
-
- - setCrawlOptions((prev) => { - if (!prev) { - return { - scrape_options: { - type: "shopify", - }, - }; - } - - if (!e.currentTarget.checked) { - return { - ...prev, - scrape_options: null, - }; - } else { - return { - ...prev, - scrape_options: { - type: "shopify", - }, - }; - } - }) - } - /> - - -
-
- - - -
- - component - crawlOptions()?.scrape_options - ?.openapi_schema_url ?? "" - } - onInput={(e) => - setCrawlOptions((prev) => { - if (!prev) { - return { - scrape_options: { - type: "openapi", - openapi_schema_url: - e.currentTarget.value, - openapi_tag: "", - }, - }; - } - - return { - ...prev, - scrape_options: { - type: "openapi", - openapi_schema_url: - e.currentTarget.value, - openapi_tag: - prev.scrape_options?.openapi_tag ?? - "", - }, - }; - }) - } - /> -
- -
- - - setCrawlOptions((prev) => { - if (!prev) { - return { - scrape_options: { - type: "openapi", - openapi_schema_url: "", - openapi_tag: e.currentTarget.value, - }, - }; - } - - return { - ...prev, - scrape_options: { - type: "openapi", - openapi_schema_url: - prev.scrape_options - ?.openapi_schema_url ?? "", - openapi_tag: e.currentTarget.value, - }, - }; - }) - } - /> -
-
- -
- - - setCrawlOptions((prev) => { - if (!prev) { - console.log(e.currentTarget.value); - return { - scrape_options: { - type: "shopify", - group_variants: e.currentTarget.checked, - }, - }; - } - - return { - ...prev, - scrape_options: { - type: "shopify", - group_variants: e.currentTarget.checked, - }, - }; - }) - } - /> -
-
-
-
+
@@ -1135,6 +551,515 @@ export const NewDatasetModal = (props: NewDatasetModalProps) => { }; export default NewDatasetModal; +const ScrapingSettings = (props: { + crawlOptions: FlatCrawlOptions; + setCrawlOptions: SetStoreFunction; + errors: ReturnType>["errors"]; +}) => { + return ( +
+
+
+ + + props.setCrawlOptions((prev) => { + if (!prev) { + return { + site_url: e.currentTarget.value, + }; + } + + return { + ...prev, + site_url: e.currentTarget.value, + }; + }) + } + /> +
+
+ + + props.setCrawlOptions((prev) => { + if (!prev) { + return { + exclude_paths: e.currentTarget.value.split(","), + }; + } + + return { + ...prev, + exclude_paths: e.currentTarget.value.split(","), + }; + }) + } + /> +
+ +
+ + + props.setCrawlOptions((prev) => { + if (!prev) { + return { + include_paths: e.currentTarget.value.split(","), + }; + } + + return { + ...prev, + include_paths: e.currentTarget.value.split(","), + }; + }) + } + /> +
+ +
+ + + props.setCrawlOptions((prev) => { + if (!prev) { + return { + exclude_tags: e.currentTarget.value.split(","), + }; + } + return { + ...prev, + exclude_tags: e.currentTarget.value.split(","), + }; + }) + } + /> +
+ +
+ + + props.setCrawlOptions((prev) => { + if (!prev) { + return { + include_tags: e.currentTarget.value.split(","), + }; + } + + return { + ...prev, + include_tags: e.currentTarget.value.split(","), + }; + }) + } + /> +
+ +
+ + + props.setCrawlOptions((prev) => { + if (!prev) { + return { + max_depth: parseInt(e.currentTarget.value), + }; + } + + return { + ...prev, + max_depth: parseInt(e.currentTarget.value), + }; + }) + } + /> +
+ +
+ + + props.setCrawlOptions((prev) => { + if (!prev) { + return { + limit: parseInt(e.currentTarget.value), + }; + } + + return { + ...prev, + limit: parseInt(e.currentTarget.value), + }; + }) + } + /> +
+ +
+ + +
+ +
+ + + props.setCrawlOptions((prev) => { + if (!prev) { + return { + boost_titles: e.currentTarget.checked, + }; + } + + return { + ...prev, + boost_titles: e.currentTarget.checked, + }; + }) + } + /> +
+ +
+
+ + props.setCrawlOptions((prev) => { + if (!e.currentTarget.checked) { + if (prev.type === "openapi") { + return { + ...prev, + type: undefined, + }; + } + return { + ...prev, + }; + } else { + return { + ...prev, + type: "openapi", + }; + } + }) + } + /> + + +
+
+ + props.setCrawlOptions((prev) => { + if (!e.currentTarget.checked) { + if (prev.type === "shopify") { + return { + ...prev, + type: undefined, + }; + } + return { + ...prev, + }; + } else { + return { + type: "shopify" as const, + }; + } + }) + } + /> + + +
+
+ + + +
+ + + props.setCrawlOptions((prev) => { + return { + ...prev, + type: "openapi", + openapi_schema_url: e.currentTarget.value, + }; + }) + } + /> +
+ +
+ + + props.setCrawlOptions((prev) => { + return { + ...prev, + type: "openapi", + openapi_tag: e.currentTarget.value, + }; + }) + } + /> +
+
+ +
+ + + props.setCrawlOptions((prev) => { + return { + ...prev, + scrape_options: { + type: "shopify", + group_variants: e.currentTarget.checked, + }, + }; + }) + } + /> +
+
+
+
+
+ ); +}; + const BM25Settings = (props: { config: DatasetConfig; setConfig: SetStoreFunction; diff --git a/frontends/dashboard/src/pages/dataset/CrawlingSettings.tsx b/frontends/dashboard/src/pages/dataset/CrawlingSettings.tsx index 64733c4db3..1aed43fbc0 100644 --- a/frontends/dashboard/src/pages/dataset/CrawlingSettings.tsx +++ b/frontends/dashboard/src/pages/dataset/CrawlingSettings.tsx @@ -1,22 +1,18 @@ import { createMutation, createQuery } from "@tanstack/solid-query"; -import { Show, useContext, createMemo, createEffect } from "solid-js"; +import { Show, useContext, createMemo } from "solid-js"; import { DatasetContext } from "../../contexts/DatasetContext"; import { useTrieve } from "../../hooks/useTrieve"; -import { - CrawlInterval, - CrawlOpenAPIOptions, - CrawlOptions, -} from "trieve-ts-sdk"; +import { CrawlInterval, CrawlOptions } from "trieve-ts-sdk"; import { createStore } from "solid-js/store"; import { MultiStringInput, Select } from "shared/ui"; import { toTitleCase } from "../../analytics/utils/titleCase"; import { Spacer } from "../../components/Spacer"; import { UserContext } from "../../contexts/UserContext"; import { createToast } from "../../components/ShowToasts"; -import { ValidateFn } from "../../utils/validation"; +import { ErrorMsg, ValidateErrors, ValidateFn } from "../../utils/validation"; import { cn } from "shared/utils"; -const defaultCrawlOptions: CrawlOptions = { +export const defaultCrawlOptions: CrawlOptions = { boost_titles: false, exclude_paths: [], exclude_tags: [], @@ -26,24 +22,87 @@ const defaultCrawlOptions: CrawlOptions = { limit: 1000, max_depth: 10, site_url: "", - scrape_options: {}, + scrape_options: null, }; -const normalizeOpenAPIOptions = ( - options: CrawlOpenAPIOptions | null | undefined, -) => { +export type FlatCrawlOptions = Omit & { + type?: "openapi" | "shopify"; + openapi_schema_url?: string; + openapi_tag?: string; + group_variants?: boolean | null; +}; + +export const unflattenCrawlOptions = ( + options: FlatCrawlOptions, +): CrawlOptions => { if (options && options.type == "openapi") { - if (options.openapi_schema_url === "") { - return null; + if (!options.openapi_schema_url || !options.openapi_tag) { + return { + ...options, + scrape_options: null, + }; } - if (!options.openapi_tag && !options.openapi_schema_url) { - return null; - } - return options; + return { + boost_titles: options.boost_titles, + exclude_paths: options.exclude_paths, + exclude_tags: options.exclude_tags, + include_paths: options.include_paths, + include_tags: options.include_tags, + interval: options.interval, + limit: options.limit, + max_depth: options.max_depth, + site_url: options.site_url, + scrape_options: { + type: "openapi", + openapi_schema_url: options.openapi_schema_url, + openapi_tag: options.openapi_tag, + }, + }; } else if (options && options.type == "shopify") { - return options; + return { + boost_titles: options.boost_titles, + exclude_paths: options.exclude_paths, + exclude_tags: options.exclude_tags, + include_paths: options.include_paths, + include_tags: options.include_tags, + interval: options.interval, + limit: options.limit, + max_depth: options.max_depth, + site_url: options.site_url, + scrape_options: { + type: "shopify", + group_variants: options.group_variants, + }, + }; + } + return { + ...options, + scrape_options: null, + }; +}; + +export const flattenCrawlOptions = ( + options: CrawlOptions, +): FlatCrawlOptions => { + if (options.scrape_options?.type == "openapi") { + return { + ...options, + type: "openapi", + openapi_schema_url: options.scrape_options.openapi_schema_url, + openapi_tag: options.scrape_options.openapi_tag, + }; + } else if (options.scrape_options?.type == "shopify") { + return { + ...options, + type: "shopify", + group_variants: options.scrape_options.group_variants, + }; + } else { + return { + ...options, + type: undefined, + }; } - return null; }; export const CrawlingSettings = () => { @@ -70,10 +129,7 @@ export const CrawlingSettings = () => { mutationFn: async (options: CrawlOptions) => { await trieve.fetch("/api/dataset", "put", { data: { - crawl_options: { - ...options, - scrape_options: normalizeOpenAPIOptions(options.scrape_options), - }, + crawl_options: options, dataset_id: datasetId(), }, organizationId: userContext.selectedOrg().id, @@ -104,14 +160,16 @@ export const CrawlingSettings = () => { ); }; interface RealCrawlingSettingsProps { - initialCrawlingSettings: CrawlOptions; + initialCrawlingSettings: FlatCrawlOptions; mode: "edit" | "create"; onSave: (options: CrawlOptions) => void; } @@ -124,64 +182,57 @@ const Error = (props: { error: string | null | undefined }) => { ); }; -const RealCrawlingSettings = (props: RealCrawlingSettingsProps) => { - const [options, setOptions] = createStore(props.initialCrawlingSettings); - const [errors, setErrors] = createStore< - ReturnType>["errors"] - >({}); - - const isShopify = createMemo( - () => - options.scrape_options != {} && - options.scrape_options?.type === "shopify", - ); - const isOpenAPI = createMemo( - () => - options.scrape_options?.type != null && - options.scrape_options?.type === "openapi", - ); +export const validateFlatCrawlOptions: ValidateFn = ( + value, +) => { + const errors: ValidateErrors = {}; + if (!value.site_url) { + errors.site_url = "Site URL is required"; + } - createEffect(() => { - console.log(isOpenAPI()); - }); + if (value.site_url && !value.site_url.startsWith("http")) { + errors.site_url = "Invalid Site URL - http(s):// required"; + } - const validate: ValidateFn = (value) => { - const errors: Record = {}; - if (!value.site_url) { - errors.site_url = "Site URL is required"; + if (value.type != "shopify") { + if (!value.limit || value.limit <= 0) { + errors.limit = "Limit must be greater than 0"; } - - if (value.site_url && !value.site_url.startsWith("http")) { - errors.site_url = "Invalid Site URL - http(s):// required"; + if (!value.max_depth) { + errors.max_depth = "Max depth must be greater than 0"; } - - if (value.scrape_options?.type != "shopify") { - if (!value.limit || value.limit <= 0) { - errors.limit = "Limit must be greater than 0"; - } - if (!value.max_depth) { - errors.max_depth = "Max depth must be greater than 0"; - } - if ( - value.scrape_options?.type == "openapi" && - value.scrape_options?.openapi_tag && - !value.scrape_options.openapi_schema_url - ) { - errors.scrape_options = "OpenAPI Schema URL is required for tag"; - } + if (value.type === "openapi" && !value.openapi_schema_url) { + errors.openapi_schema_url = "OpenAPI Schema URL is required"; } + if ( + value.type == "openapi" && + value.openapi_tag && + !value.openapi_schema_url + ) { + errors.openapi_schema_url = "OpenAPI Schema URL is required for tag"; + } + } - return { - errors, - valid: Object.values(errors).filter((v) => !!v).length === 0, - }; + return { + errors, + valid: Object.values(errors).filter((v) => !!v).length === 0, }; +}; + +const RealCrawlingSettings = (props: RealCrawlingSettingsProps) => { + const [options, setOptions] = createStore(props.initialCrawlingSettings); + const [errors, setErrors] = createStore< + ReturnType>["errors"] + >({}); + + const isShopify = createMemo(() => options.type === "shopify"); + const isOpenAPI = createMemo(() => options.type === "openapi"); const submit = () => { - const validateResult = validate(options); + const validateResult = validateFlatCrawlOptions(options); if (validateResult.valid) { setErrors({}); - props.onSave(options); + props.onSave(unflattenCrawlOptions(options)); } else { setErrors(validateResult.errors); } @@ -240,12 +291,23 @@ const RealCrawlingSettings = (props: RealCrawlingSettingsProps) => { { - if (e.currentTarget.checked) { - setOptions("scrape_options", "type", "shopify"); - } else { - setOptions("scrape_options", "type", {}); - setOptions("scrape_options", {}); - } + setOptions((prev) => { + if (!e.currentTarget.checked) { + if (prev.type === "shopify") { + return { + ...prev, + type: undefined, + }; + } + return { + ...prev, + }; + } else { + return { + type: "shopify" as const, + }; + } + }); }} checked={isShopify()} class="h-4 w-4 rounded border border-neutral-300 bg-neutral-100 p-1 accent-magenta-400 dark:border-neutral-900 dark:bg-neutral-800" @@ -253,14 +315,26 @@ const RealCrawlingSettings = (props: RealCrawlingSettingsProps) => { /> { - if (e.currentTarget.checked) { - setOptions("scrape_options", "type", "openapi"); - } else { - setOptions("scrape_options", "type", {}); - setOptions("scrape_options", {}); - } - }} + onChange={(e) => + setOptions((prev) => { + if (!e.currentTarget.checked) { + if (prev.type === "openapi") { + return { + ...prev, + type: undefined, + }; + } + return { + ...prev, + }; + } else { + return { + ...prev, + type: "openapi", + }; + } + }) + } checked={isOpenAPI()} class="h-4 w-4 rounded border border-neutral-300 bg-neutral-100 p-1 accent-magenta-400 dark:border-neutral-900 dark:bg-neutral-800" type="checkbox" @@ -298,48 +372,36 @@ const RealCrawlingSettings = (props: RealCrawlingSettingsProps) => { /> -
- - { - if (!options.scrape_options) { - setOptions("scrape_options", {}); - } - setOptions( - "scrape_options", - "openapi_schema_url", - e.currentTarget.value, - ); - }} - class="block w-full rounded border border-neutral-300 px-3 py-1.5 shadow-sm placeholder:text-neutral-400 focus:outline-magenta-500 sm:text-sm sm:leading-6" - /> - -
-
- - { - if (!options.scrape_options) { - setOptions("scrape_options", { type: "openapi" }); - } - setOptions( - "scrape_options", - "openapi_tag", - e.currentTarget.value, - ); - }} - class="block w-full rounded border border-neutral-300 px-3 py-1.5 shadow-sm placeholder:text-neutral-400 focus:outline-magenta-500 sm:text-sm sm:leading-6" - /> -
+ +
+ + { + setOptions("openapi_schema_url", e.currentTarget.value); + }} + class="block w-full rounded border border-neutral-300 px-3 py-1.5 shadow-sm placeholder:text-neutral-400 focus:outline-magenta-500 sm:text-sm sm:leading-6" + /> + +
+
+ + { + setOptions("openapi_tag", e.currentTarget.value); + }} + class="block w-full rounded border border-neutral-300 px-3 py-1.5 shadow-sm placeholder:text-neutral-400 focus:outline-magenta-500 sm:text-sm sm:leading-6" + /> +
+
> = (value: T) => { - errors: { - [key in keyof T]: string | undefined; - }; +export type ValidateFn> = (value: T) => { + errors: ValidateErrors; valid: boolean; }; // eslint-disable-next-line @typescript-eslint/no-explicit-any -export type ValidateErrors> = ReturnType["errors"]; +export type ValidateErrors> = { + [key in keyof T]: NonNullable extends Record + ? ReturnType>>["errors"] + : string | undefined; +}; export const ErrorMsg = (props: { error: string | null | undefined }) => { return (