Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: dashboard crawling options #2483

Merged
merged 13 commits into from
Oct 3, 2024
107 changes: 107 additions & 0 deletions clients/ts-sdk/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -3208,6 +3208,76 @@
]
}
},
"/api/dataset/crawl_options/{dataset_id}": {
"get": {
"tags": [
"Dataset"
],
"summary": "Get Dataset Crawl Options",
"description": "Auth'ed user or api key must have an admin or owner role for the specified dataset's organization.",
"operationId": "get_dataset_crawl_options",
"parameters": [
{
"name": "TR-Dataset",
"in": "header",
"description": "The dataset id or tracking_id to use for the request. We assume you intend to use an id if the value is a valid uuid.",
"required": true,
"schema": {
"type": "string"
}
},
{
"name": "dataset_id",
"in": "path",
"description": "The id of the dataset you want to retrieve.",
"required": true,
"schema": {
"type": "string",
"format": "uuid"
}
}
],
"responses": {
"200": {
"description": "Crawl options retrieved successfully",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/GetCrawlOptionsResponse"
}
}
}
},
"400": {
"description": "Service error relating to retrieving the crawl options",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponseBody"
}
}
}
},
"404": {
"description": "Dataset not found",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponseBody"
}
}
}
}
},
"security": [
{
"ApiKey": [
"admin"
]
}
]
}
},
"/api/dataset/files/{dataset_id}/{page}": {
"get": {
"tags": [
Expand Down Expand Up @@ -9119,6 +9189,43 @@
}
}
},
"GetCrawlOptionsResponse": {
"type": "object",
"properties": {
"crawl_options": {
"allOf": [
{
"$ref": "#/components/schemas/CrawlOptions"
}
],
"nullable": true
}
},
"example": {
"crawl_options": {
"exclude_paths": [
"https://example.com/exclude"
],
"exclude_tags": [
"#ad",
"#footer"
],
"include_paths": [
"https://example.com/include"
],
"include_tags": [
"h1",
"p",
"a",
".main-content"
],
"interval": "daily",
"limit": 1000,
"max_depth": 10,
"site_url": "https://example.com"
}
}
},
"GetDatasetsPagination": {
"type": "object",
"properties": {
Expand Down
36 changes: 36 additions & 0 deletions clients/ts-sdk/src/types.gen.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1109,6 +1109,10 @@ export type GetChunksInGroupsResponseBody = {
total_pages: number;
};

export type GetCrawlOptionsResponse = {
crawl_options?: ((CrawlOptions) | null);
};

export type GetDatasetsPagination = {
limit?: (number) | null;
offset?: (number) | null;
Expand Down Expand Up @@ -3285,6 +3289,19 @@ export type ClearDatasetData = {

export type ClearDatasetResponse = (void);

export type GetDatasetCrawlOptionsData = {
/**
* The id of the dataset you want to retrieve.
*/
datasetId: string;
/**
* The dataset id or tracking_id to use for the request. We assume you intend to use an id if the value is a valid uuid.
*/
trDataset: string;
};

export type GetDatasetCrawlOptionsResponse = (GetCrawlOptionsResponse);

export type GetDatasetFilesHandlerData = {
/**
* The id of the dataset to fetch files for.
Expand Down Expand Up @@ -4560,6 +4577,25 @@ export type $OpenApiTs = {
};
};
};
'/api/dataset/crawl_options/{dataset_id}': {
get: {
req: GetDatasetCrawlOptionsData;
res: {
/**
* Crawl options retrieved successfully
*/
200: GetCrawlOptionsResponse;
/**
* Service error relating to retrieving the crawl options
*/
400: ErrorResponseBody;
/**
* Dataset not found
*/
404: ErrorResponseBody;
};
};
};
'/api/dataset/files/{dataset_id}/{page}': {
get: {
req: GetDatasetFilesHandlerData;
Expand Down
7 changes: 7 additions & 0 deletions frontends/dashboard/src/components/Sidebar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { DatasetContext } from "../contexts/DatasetContext";
import { A, useLocation } from "@solidjs/router";
import {
AiOutlineBarChart,
AiOutlineCamera,
AiOutlineHistory,
AiOutlineInfoCircle,
AiOutlineKey,
Expand Down Expand Up @@ -189,6 +190,12 @@ export const DashboardSidebar = () => {
href={`/dataset/${datasetId()}/llm-settings`}
label="LLM Options"
/>
<Link
isExternal={false}
icon={AiOutlineCamera}
href={`/dataset/${datasetId()}/crawling`}
label="Crawling Options"
/>
<Link
isExternal={false}
icon={FiTrash}
Expand Down
15 changes: 11 additions & 4 deletions frontends/dashboard/src/components/Spacer.tsx
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
export const Spacer = (props: { h: number; withBorder?: boolean }) => {
import { cn } from "shared/utils";

export const Spacer = (props: {
h: number;
withBorder?: boolean;
class?: string;
}) => {
return (
<div
classList={{
"border-b border-b-neutral-300": props.withBorder,
}}
class={cn(
props.withBorder && "border-b border-b-neutral-300",
props.class,
)}
style={{ height: `${props.h}px` }}
/>
);
Expand Down
5 changes: 5 additions & 0 deletions frontends/dashboard/src/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import { RAGTablePage } from "./analytics/pages/tablePages/RAGTablePage.tsx";
import { ApiContext, trieve } from "./api/trieve.ts";
import { SingleRAGQueryPage } from "./analytics/pages/SingleRAGQueryPage.tsx";
import { DataExplorerTabs } from "./analytics/layouts/DataExplorerTabs.tsx";
import { CrawlingSettings } from "./pages/dataset/CrawlingSettings.tsx";

if (!DEV) {
Sentry.init({
Expand Down Expand Up @@ -141,6 +142,10 @@ const routes: RouteDefinition[] = [
path: "/manage",
component: DangerZoneForm,
},
{
path: "/crawling",
component: CrawlingSettings,
},

{
path: "/analytics",
Expand Down
Loading
Loading