-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
652a0db
commit a467be0
Showing
28 changed files
with
847 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
.github/ | ||
|
||
.coverage/ | ||
.coverage | ||
coverage | ||
|
||
venv/ | ||
.env |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
name: Production CI/CD Pipeline | ||
|
||
on: | ||
push: | ||
branches: | ||
- main | ||
|
||
jobs: | ||
ci: | ||
uses: TogetherCrew/operations/.github/workflows/ci.yml@main | ||
secrets: | ||
CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
name: Staging CI/CD Pipeline | ||
|
||
on: pull_request | ||
|
||
jobs: | ||
ci: | ||
uses: TogetherCrew/operations/.github/workflows/ci.yml@main | ||
secrets: | ||
CC_TEST_REPORTER_ID: ${{ secrets.CC_TEST_REPORTER_ID }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# It's recommended that we use `bullseye` for Python (alpine isn't suitable as it conflcts with numpy) | ||
FROM python:3.11-bullseye AS base | ||
WORKDIR /project | ||
COPY . . | ||
RUN pip3 install -r requirements.txt | ||
|
||
FROM base AS test | ||
RUN chmod +x docker-entrypoint.sh | ||
CMD ["./docker-entrypoint.sh"] | ||
|
||
FROM base AS prod | ||
CMD ["python3", "celery", "-A", "celery_app.server", "worker", "-l", "INFO"] |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
from tc_messageBroker import RabbitMQ | ||
from tc_messageBroker.rabbit_mq.event import Event | ||
from tc_messageBroker.rabbit_mq.queue import Queue | ||
|
||
|
||
def job_send(broker_url, port, username, password, res): | ||
rabbit_mq = RabbitMQ( | ||
broker_url=broker_url, port=port, username=username, password=password | ||
) | ||
|
||
content = { | ||
"uuid": "d99a1490-fba6-11ed-b9a9-0d29e7612dp8", | ||
"data": f"some results {res}", | ||
} | ||
|
||
rabbit_mq.connect(Queue.DISCORD_ANALYZER) | ||
rabbit_mq.publish( | ||
queue_name=Queue.DISCORD_ANALYZER, | ||
event=Event.DISCORD_BOT.FETCH, | ||
content=content, | ||
) | ||
|
||
|
||
if __name__ == "__main__": | ||
# TODO: read from .env | ||
broker_url = "localhost" | ||
port = 5672 | ||
username = "root" | ||
password = "pass" | ||
job_send(broker_url, port, username, password, "CALLED FROM __main__") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
from celery import Celery | ||
|
||
# TODO: read from .env | ||
app = Celery("celery_app/tasks", broker="pyamqp://root:pass@localhost//") | ||
app.autodiscover_tasks(["celery_app"]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
from celery_app.server import app | ||
from celery_app.job_send import job_send | ||
|
||
# TODO: Write tasks that match our requirements | ||
|
||
|
||
@app.task | ||
def add(x, y): | ||
broker_url = "localhost" | ||
port = 5672 | ||
username = "root" | ||
password = "pass" | ||
|
||
res = x + y | ||
job_send(broker_url, port, username, password, res) | ||
|
||
return res | ||
|
||
|
||
@app.task | ||
def mul(x, y): | ||
return x * y | ||
|
||
|
||
@app.task | ||
def xsum(numbers): | ||
return sum(numbers) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
from retrievers.forum_summary_retriever import ( | ||
ForumBasedSummaryRetriever, | ||
) | ||
from retrievers.process_dates import process_dates | ||
from retrievers.utils.load_hyperparams import load_hyperparams | ||
from tc_hivemind_backend.embeddings.cohere import CohereEmbedding | ||
from tc_hivemind_backend.pg_vector_access import PGVectorAccess | ||
from llama_index import QueryBundle | ||
from llama_index.vector_stores import ExactMatchFilter, FilterCondition, MetadataFilters | ||
|
||
|
||
def query_discord( | ||
community_id: str, | ||
query: str, | ||
thread_names: list[str], | ||
channel_names: list[str], | ||
days: list[str], | ||
similarity_top_k: int | None = None, | ||
) -> str: | ||
""" | ||
query the discord database using filters given | ||
and give an anwer to the given query using the LLM | ||
Parameters | ||
------------ | ||
guild_id : str | ||
the discord guild data to query | ||
query : str | ||
the query (question) of the user | ||
thread_names : list[str] | ||
the given threads to search for | ||
channel_names : list[str] | ||
the given channels to search for | ||
days : list[str] | ||
the given days to search for | ||
similarity_top_k : int | None | ||
the k similar results to use when querying the data | ||
if `None` will load from `.env` file | ||
Returns | ||
--------- | ||
response : str | ||
the LLM response given the query | ||
""" | ||
if similarity_top_k is None: | ||
_, similarity_top_k, _ = load_hyperparams() | ||
|
||
table_name = "discord" | ||
dbname = f"community_{community_id}" | ||
|
||
pg_vector = PGVectorAccess(table_name=table_name, dbname=dbname) | ||
|
||
index = pg_vector.load_index() | ||
|
||
thread_filters: list[ExactMatchFilter] = [] | ||
channel_filters: list[ExactMatchFilter] = [] | ||
day_filters: list[ExactMatchFilter] = [] | ||
|
||
for channel in channel_names: | ||
channel_updated = channel.replace("'", "''") | ||
channel_filters.append(ExactMatchFilter(key="channel", value=channel_updated)) | ||
|
||
for thread in thread_names: | ||
thread_updated = thread.replace("'", "''") | ||
thread_filters.append(ExactMatchFilter(key="thread", value=thread_updated)) | ||
|
||
for day in days: | ||
day_filters.append(ExactMatchFilter(key="date", value=day)) | ||
|
||
all_filters: list[ExactMatchFilter] = [] | ||
all_filters.extend(thread_filters) | ||
all_filters.extend(channel_filters) | ||
all_filters.extend(day_filters) | ||
|
||
filters = MetadataFilters(filters=all_filters, condition=FilterCondition.OR) | ||
|
||
query_engine = index.as_query_engine( | ||
filters=filters, similarity_top_k=similarity_top_k | ||
) | ||
|
||
query_bundle = QueryBundle( | ||
query_str=query, embedding=CohereEmbedding().get_text_embedding(text=query) | ||
) | ||
response = query_engine.query(query_bundle) | ||
|
||
return response.response | ||
|
||
|
||
def query_discord_auto_filter( | ||
community_id: str, | ||
query: str, | ||
similarity_top_k: int | None = None, | ||
d: int | None = None, | ||
) -> str: | ||
""" | ||
get the query results and do the filtering automatically. | ||
By automatically we mean, it would first query the summaries | ||
to get the metadata filters | ||
Parameters | ||
----------- | ||
guild_id : str | ||
the discord guild data to query | ||
query : str | ||
the query (question) of the user | ||
similarity_top_k : int | None | ||
the value for the initial summary search | ||
to get the `k2` count simliar nodes | ||
if `None`, then would read from `.env` | ||
d : int | ||
this would make the secondary search (`query_discord`) | ||
to be done on the `metadata.date - d` to `metadata.date + d` | ||
Returns | ||
--------- | ||
response : str | ||
the LLM response given the query | ||
""" | ||
table_name = "discord_summary" | ||
dbname = f"community_{community_id}" | ||
|
||
if d is None: | ||
_, _, d = load_hyperparams() | ||
if similarity_top_k is None: | ||
similarity_top_k, _, _ = load_hyperparams() | ||
|
||
discord_retriever = ForumBasedSummaryRetriever(table_name=table_name, dbname=dbname) | ||
|
||
channels, threads, dates = discord_retriever.retreive_metadata( | ||
query=query, | ||
metadata_group1_key="channel", | ||
metadata_group2_key="thread", | ||
metadata_date_key="date", | ||
similarity_top_k=similarity_top_k, | ||
) | ||
|
||
dates_modified = process_dates(dates, d) | ||
|
||
response = query_discord( | ||
community_id=community_id, | ||
query=query, | ||
thread_names=threads, | ||
channel_names=channels, | ||
days=dates_modified, | ||
) | ||
return response |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
version: "3.9" | ||
|
||
services: | ||
server: | ||
build: | ||
context: . | ||
target: prod | ||
dockerfile: Dockerfile | ||
worker: | ||
build: | ||
context: . | ||
target: prod | ||
dockerfile: Dockerfile | ||
command: python3 worker.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
version: "3.9" | ||
|
||
services: | ||
app: | ||
build: | ||
context: . | ||
target: test | ||
dockerfile: Dockerfile | ||
environment: | ||
- PORT=3000 | ||
- MONGODB_HOST=mongo | ||
- MONGODB_PORT=27017 | ||
- MONGODB_USER=root | ||
- MONGODB_PASS=pass | ||
- NEO4J_PROTOCOL=bolt | ||
- NEO4J_HOST=neo4j | ||
- NEO4J_PORT=7687 | ||
- NEO4J_USER=neo4j | ||
- NEO4J_PASSWORD=password | ||
- NEO4J_DB=neo4j | ||
- POSTGRES_HOST=postgres | ||
- POSTGRES_USER=root | ||
- POSTGRES_PASS=pass | ||
- POSTGRES_PORT=5432 | ||
- CHUNK_SIZE=512 | ||
- EMBEDDING_DIM=1024 | ||
- K1_RETRIEVER_SEARCH=20 | ||
- K2_RETRIEVER_SEARCH=5 | ||
- D_RETRIEVER_SEARCH=7 | ||
volumes: | ||
- ./coverage:/project/coverage | ||
depends_on: | ||
neo4j: | ||
condition: service_healthy | ||
mongo: | ||
condition: service_healthy | ||
postgres: | ||
condition: service_healthy | ||
neo4j: | ||
image: "neo4j:5.9.0" | ||
environment: | ||
- NEO4J_AUTH=neo4j/password | ||
- NEO4J_PLUGINS=["apoc", "graph-data-science"] | ||
- NEO4J_dbms_security_procedures_unrestricted=apoc.*,gds.* | ||
healthcheck: | ||
test: ["CMD" ,"wget", "http://localhost:7474"] | ||
interval: 1m30s | ||
timeout: 10s | ||
retries: 2 | ||
start_period: 40s | ||
mongo: | ||
image: "mongo:6.0.8" | ||
environment: | ||
- MONGO_INITDB_ROOT_USERNAME=root | ||
- MONGO_INITDB_ROOT_PASSWORD=pass | ||
healthcheck: | ||
test: echo 'db.stats().ok' | mongosh localhost:27017/test --quiet | ||
interval: 60s | ||
timeout: 10s | ||
retries: 2 | ||
start_period: 40s | ||
postgres: | ||
image: "ankane/pgvector" | ||
environment: | ||
- POSTGRES_USER=root | ||
- POSTGRES_PASSWORD=pass | ||
healthcheck: | ||
test: ["CMD-SHELL", "pg_isready"] | ||
interval: 10s | ||
timeout: 5s | ||
retries: 5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/usr/bin/env bash | ||
python3 -m coverage run --omit=tests/* -m pytest . | ||
python3 -m coverage lcov -o coverage/lcov.info |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
numpy | ||
llama-index>=0.9.21, <1.0.0 | ||
pymongo | ||
python-dotenv | ||
pgvector | ||
asyncpg | ||
psycopg2-binary | ||
sqlalchemy[asyncio] | ||
async-sqlalchemy | ||
python-pptx | ||
tc-neo4j-lib | ||
google-api-python-client | ||
unstructured | ||
cohere | ||
neo4j>=5.14.1, <6.0.0 | ||
coverage>=7.3.3, <8.0.0 | ||
pytest>=7.4.3, <8.0.0 | ||
python-dotenv==1.0.0 | ||
tc_hivemind_backend==1.0.0 | ||
celery>=5.3.6, <6.0.0 |
Empty file.
Oops, something went wrong.