Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature: typo tolerance #2144

Merged
merged 10 commits into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions .github/workflows/push-server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -450,3 +450,147 @@ jobs:
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

word-id-cronjob:
name: Push Word ID cronjob
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: [blacksmith-8vcpu-ubuntu-2204]
platform: [linux/amd64]
exclude:
- runner: blacksmith-8vcpu-ubuntu-2204
platform: linux/arm64
- runner: blacksmith-8vcpu-ubuntu-2204-arm
platform: linux/amd64
steps:
- name: Checkout the repo
uses: actions/checkout@v4

- name: Setup buildx
uses: docker/setup-buildx-action@v3

- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
# list of Docker images to use as base name for tags
images: |
trieve/word-id-cronjob
tags: |
type=raw,latest
type=sha

- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
platforms: ${{ matrix.platform }}
cache-from: type=registry,ref=trieve/buildcache:word-id-cronjob-${{matrix.runner}}
cache-to: type=registry,ref=trieve/buildcache:word-id-cronjob-${{matrix.runner}},mode=max
context: server/
file: ./server/Dockerfile.word-id-cronjob
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

word-worker:
name: Push Word Worker
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: [blacksmith-8vcpu-ubuntu-2204]
platform: [linux/amd64]
exclude:
- runner: blacksmith-8vcpu-ubuntu-2204
platform: linux/arm64
- runner: blacksmith-8vcpu-ubuntu-2204-arm
platform: linux/amd64
steps:
- name: Checkout the repo
uses: actions/checkout@v4

- name: Setup buildx
uses: docker/setup-buildx-action@v3

- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
# list of Docker images to use as base name for tags
images: |
trieve/word-worker
tags: |
type=raw,latest
type=sha

- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
platforms: ${{ matrix.platform }}
cache-from: type=registry,ref=trieve/buildcache:word-worker-${{matrix.runner}}
cache-to: type=registry,ref=trieve/buildcache:word-worker-${{matrix.runner}},mode=max
context: server/
file: ./server/Dockerfile.word-worker
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

bktree-worker:
name: Push BK-Tree Worker
runs-on: ${{ matrix.runner }}
strategy:
matrix:
runner: [blacksmith-8vcpu-ubuntu-2204]
platform: [linux/amd64]
exclude:
- runner: blacksmith-8vcpu-ubuntu-2204
platform: linux/arm64
- runner: blacksmith-8vcpu-ubuntu-2204-arm
platform: linux/amd64
steps:
- name: Checkout the repo
uses: actions/checkout@v4

- name: Setup buildx
uses: docker/setup-buildx-action@v3

- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
# list of Docker images to use as base name for tags
images: |
trieve/bktree-worker
tags: |
type=raw,latest
type=sha

- name: Build and push Docker image
uses: docker/build-push-action@v5
with:
platforms: ${{ matrix.platform }}
cache-from: type=registry,ref=trieve/buildcache:bktree-worker-${{matrix.runner}}
cache-to: type=registry,ref=trieve/buildcache:bktree-worker-${{matrix.runner}},mode=max
context: server/
file: ./server/Dockerfile.bktree-worker
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
12 changes: 12 additions & 0 deletions frontends/search/src/components/GroupPage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,18 @@ export const GroupPage = (props: GroupPageProps) => {
slim_chunks: search.debounced.slimChunks,
page_size: search.debounced.pageSize,
get_total_pages: search.debounced.getTotalPages,
typo_options: {
correct_typos: search.debounced.correctTypos,
one_typo_word_range: {
min: search.debounced.oneTypoWordRangeMin,
max: search.debounced.oneTypoWordRangeMax,
},
two_typo_word_range: {
min: search.debounced.twoTypoWordRangeMin,
max: search.debounced.twoTypoWordRangeMax,
},
disable_on_words: search.debounced.disableOnWords,
},
highlight_options: {
highlight_results: search.debounced.highlightResults,
highlight_strategy: search.debounced.highlightStrategy,
Expand Down
12 changes: 12 additions & 0 deletions frontends/search/src/components/ResultsPage.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,18 @@ const ResultsPage = (props: ResultsPageProps) => {
slim_chunks: props.search.debounced.slimChunks ?? false,
page_size: props.search.debounced.pageSize ?? 10,
get_total_pages: props.search.debounced.getTotalPages ?? false,
typo_options: {
correct_typos: props.search.debounced.correctTypos,
one_typo_word_range: {
min: props.search.debounced.oneTypoWordRangeMin,
max: props.search.debounced.oneTypoWordRangeMax,
},
two_typo_word_range: {
min: props.search.debounced.twoTypoWordRangeMin,
max: props.search.debounced.twoTypoWordRangeMax,
},
disable_on_word: props.search.debounced.disableOnWords,
},
highlight_options: {
highlight_results: props.search.debounced.highlightResults ?? true,
highlight_strategy:
Expand Down
135 changes: 134 additions & 1 deletion frontends/search/src/components/SearchForm.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -1051,6 +1051,13 @@ const SearchForm = (props: {
pageSize: 10,
getTotalPages: false,
highlightStrategy: "exactmatch",
correctTypos: false,
oneTypoWordRangeMin: 5,
oneTypoWordRangeMax: 8,
twoTypoWordRangeMin: 8,
twoTypoWordRangeMax: null,
disableOnWords: [],
typoTolerance: false,
highlightResults: true,
highlightDelimiters: ["?", ".", "!"],
highlightMaxLength: 8,
Expand Down Expand Up @@ -1195,7 +1202,7 @@ const SearchForm = (props: {
/>
</div>
<div class="flex items-center justify-between space-x-2 p-1">
<label>Remove Stop Words</label>
<label>Remove Stop Words:</label>
<input
class="h-4 w-4"
type="checkbox"
Expand All @@ -1210,6 +1217,132 @@ const SearchForm = (props: {
}}
/>
</div>
<div class="flex items-center justify-between space-x-2 p-1">
<label>Typo Tolerance (Latency Penalty):</label>
<input
class="h-4 w-4"
type="checkbox"
checked={tempSearchValues().correctTypos}
onChange={(e) => {
setTempSearchValues((prev) => {
return {
...prev,
correctTypos: e.target.checked,
};
});
}}
/>
</div>
<div class="items flex justify-between space-x-2 p-1">
<label>One typo min word length:</label>
<input
class="w-16 rounded border border-neutral-400 p-0.5 text-black"
type="number"
step="any"
value={tempSearchValues().oneTypoWordRangeMin}
onChange={(e) => {
setTempSearchValues((prev) => {
return {
...prev,
oneTypoWordRangeMin: parseInt(
e.currentTarget.value,
),
};
});
}}
/>
</div>
<div class="items flex justify-between space-x-2 p-1">
<label>One typo max word length:</label>
<input
class="w-16 rounded border border-neutral-400 p-0.5 text-black"
type="number"
step="any"
value={
tempSearchValues().oneTypoWordRangeMax?.toString() ??
""
}
onChange={(e) => {
setTempSearchValues((prev) => {
return {
...prev,
oneTypoWordRangeMax:
e.currentTarget.value === ""
? null
: parseInt(e.currentTarget.value),
};
});
}}
/>
</div>
<div class="items flex justify-between space-x-2 p-1">
<label>Two typo min word length:</label>
<input
class="w-16 rounded border border-neutral-400 p-0.5 text-black"
type="number"
step="any"
value={tempSearchValues().twoTypoWordRangeMin}
onChange={(e) => {
setTempSearchValues((prev) => {
return {
...prev,
twoTypoWordRangeMin: parseInt(
e.currentTarget.value,
),
};
});
}}
/>
</div>
<div class="items flex justify-between space-x-2 p-1">
<label>Two typo max word length:</label>
<input
class="w-16 rounded border border-neutral-400 p-0.5 text-black"
type="number"
step="any"
value={
tempSearchValues().twoTypoWordRangeMax?.toString() ??
""
}
onChange={(e) => {
setTempSearchValues((prev) => {
return {
...prev,
oneTypoWordRangeMax:
e.currentTarget.value === ""
? null
: parseInt(e.currentTarget.value),
};
});
}}
/>
</div>
<div class="items flex justify-between space-x-2 p-1">
<label>Disable typo tolerance for words:</label>
<input
class="w-16 rounded border border-neutral-400 p-0.5 text-black"
type="text"
value={tempSearchValues().disableOnWords.join(",")}
onInput={(e) => {
if (e.currentTarget.value === " ") {
setTempSearchValues((prev) => {
return {
...prev,
disableOnWords: [" "],
};
});
}

setTempSearchValues((prev) => {
return {
...prev,
disableOnWords:
e.currentTarget.value.split(","),
};
});
}}
/>
</div>
<div class="flex items-center justify-between space-x-2 p-1">
<label>Highlight Results (Latency Penalty):</label>
<input
Expand Down
Loading
Loading