-
Notifications
You must be signed in to change notification settings - Fork 0
/
cli.py
executable file
·57 lines (41 loc) · 1.08 KB
/
cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python3
import logging
import os
import sys
import click
from dotenv import load_dotenv
from dragqueen.loader import Loader
from dragqueen.scrapper import Scrapper
from dragqueen.vectorstore import Vectorstore
from langchain.embeddings import HuggingFaceEmbeddings
load_dotenv()
logging.basicConfig(
stream=sys.stdout,
level=logging.INFO,
format="%(asctime)s %(levelname)-8s %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
scrapper = Scrapper("https://dls.staatsarchiv.bs.ch")
paginator = scrapper.paginate()
vectorstore = Vectorstore(
os.getenv("CHROMA_HOST", "chroma"),
os.getenv("CHROMA_PORT", "8000"),
"dls_rag_collection",
HuggingFaceEmbeddings(model_name="all-MiniLM-L12-v2"),
)
vectorstore.init()
loader = Loader()
@click.group()
def main():
pass
@main.command()
def load():
vectorstore.init()
for file in paginator:
documents = loader.load(file["s3_path"], file["identifier"])
vectorstore.add_documents(documents)
@main.command()
def reset():
vectorstore.reset()
if __name__ == "__main__":
main()