Skip to content

Commit

Permalink
add TODO and run black and isort
Browse files Browse the repository at this point in the history
  • Loading branch information
jojortz committed Dec 23, 2023
1 parent 1fd8e63 commit 8dece7b
Show file tree
Hide file tree
Showing 45 changed files with 265 additions and 328 deletions.
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,5 +79,5 @@
""",
"class": "",
},
]
],
}
4 changes: 1 addition & 3 deletions example/chatbot/demo_launch_app_gpu_huggingface.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,9 @@
```
"""
from pykoi import Application
from pykoi.chat import ModelFactory
from pykoi.chat import QuestionAnswerDatabase
from pykoi.chat import ModelFactory, QuestionAnswerDatabase
from pykoi.component import Chatbot, Dashboard


###################################################################################
# Creating a Huggingface model tiiuae/falcon-7b (EC2 g5.4xlarge with 100GB space) #
###################################################################################
Expand Down
4 changes: 1 addition & 3 deletions example/chatbot/demo_launch_app_gpu_huggingface_peft.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,9 @@
"""

from pykoi import Application
from pykoi.chat import ModelFactory
from pykoi.chat import QuestionAnswerDatabase
from pykoi.chat import ModelFactory, QuestionAnswerDatabase
from pykoi.component import Chatbot, Dashboard


###################################################################################
# Creating a Huggingface model tiiuae/falcon-7b (EC2 g5.4xlarge with 100GB space) #
###################################################################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
from pykoi.chat import ModelFactory
from pykoi.component import Compare


###################################################################################
# Creating a Huggingface model tiiuae/falcon-rw-1b (EC2 g4.2xlarge with 100GB space) #
###################################################################################
Expand Down
5 changes: 1 addition & 4 deletions example/mlu/demo_comparator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from pykoi.chat.llm.huggingface import HuggingfaceModel
from pykoi.component import Compare


######################################################################################
# Creating a Huggingface model tiiuae/falcon-rw-1b (EC2 g4.2xlarge with 100GB space) #
######################################################################################
Expand Down Expand Up @@ -73,9 +72,7 @@
tokenizers = [hf_tokenizer_1, hf_tokenizer_2, hf_tokenizer_3]

models_list = [
HuggingfaceModel.create(
model=model, tokenizer=tokenizer, name=name, max_length=100
)
HuggingfaceModel.create(model=model, tokenizer=tokenizer, name=name, max_length=100)
for model, tokenizer, name in zip(models, tokenizers, model_name)
]

Expand Down
24 changes: 14 additions & 10 deletions example/retrieval_qa/retrieval_qa_huggingface_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,29 @@
python -m example.retrieval_qa.retrieval_qa_huggingface_demo
"""

import os
import argparse
import os

from dotenv import load_dotenv

from pykoi import Application
from pykoi.chat import RAGDatabase
from pykoi.retrieval import RetrievalFactory
from pykoi.retrieval import VectorDbFactory
from pykoi.component import Chatbot, Dashboard, RetrievalQA
from dotenv import load_dotenv
from pykoi.retrieval import RetrievalFactory, VectorDbFactory

# NOTE: Configure your retrieval model as RETRIEVAL_MODEL in .env file.
# Load environment variables from .env file
load_dotenv()

## Set the RETRIEVAL_MODEL, pykoi supports most of the open-source LLMs, e.g.
# "HuggingFaceH4/zephyr-7b-beta"
# "meta-llama/Llama-2-7b-chat-hf"
# "mistralai/Mistral-7B-v0.1"
# "databricks/dolly-v2-3b"
# "HuggingFaceH4/zephyr-7b-beta"
# "meta-llama/Llama-2-7b-chat-hf"
# "mistralai/Mistral-7B-v0.1"
# "databricks/dolly-v2-3b"

RETRIEVAL_MODEL = os.getenv("RETRIEVAL_MODEL", default="mistralai/Mistral-7B-v0.1")


def main(**kwargs):
os.environ["DOC_PATH"] = os.path.join(os.getcwd(), "temp/docs")
os.environ["VECTORDB_PATH"] = os.path.join(os.getcwd(), "temp/vectordb")
Expand All @@ -48,11 +50,13 @@ def main(**kwargs):
vector_db=vector_db,
model_name=RETRIEVAL_MODEL,
trust_remote_code=True,
max_length=1000
max_length=1000,
)

# retrieval, chatbot, and dashboard pykoi components
retriever = RetrievalQA(retrieval_model=retrieval_model, vector_db=vector_db, feedback="rag")
retriever = RetrievalQA(
retrieval_model=retrieval_model, vector_db=vector_db, feedback="rag"
)
chatbot = Chatbot(None, feedback="rag", is_retrieval=True)
# dashboard = Dashboard(RAGDatabase(), feedback="rag")

Expand Down
10 changes: 4 additions & 6 deletions example/rlhf/demo_rl.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,15 @@
"""
# accelerate launch --num_machines 1 --num_processes 1 --mixed_precision fp16 example/rlhf/demo_rl.py

from pykoi.rlhf import RLHFConfig
from pykoi.rlhf import RLFinetuning

from pykoi.rlhf import RLFinetuning, RLHFConfig

# use huggingface sft and reward model
config = RLHFConfig(
base_model_path="models/rlhf_step1_sft", #"elinas/llama-7b-hf-transformers-4.29",
dataset_type="huggingface",
base_model_path="models/rlhf_step1_sft", # "elinas/llama-7b-hf-transformers-4.29",
dataset_type="huggingface",
dataset_name="cambioml/stack_exchange_rank_10k_dataset",
dataset_subset_rl="data",
reward_model_path="models/rlhf_step2_rw/", #"cambioml/rlhf_reward_model",
reward_model_path="models/rlhf_step2_rw/", # "cambioml/rlhf_reward_model",
save_freq=1,
ppo_batch_size=32,
ppo_epochs=4,
Expand Down
25 changes: 13 additions & 12 deletions example/rlhf/demo_rw_finetuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,24 @@
python -m example.rlhf.demo_rw_finetuning
"""

from pykoi.rlhf import RLHFConfig
from pykoi.rlhf import RewardFinetuning
from pykoi.chat import RankingDatabase
from pykoi.chat.db.constants import (
RANKING_CSV_HEADER_ID,
RANKING_CSV_HEADER_QUESTION,
RANKING_CSV_HEADER_UP_RANKING_ANSWER,
RANKING_CSV_HEADER_LOW_RANKING_ANSWER)
from pykoi.chat.db.constants import (RANKING_CSV_HEADER_ID,
RANKING_CSV_HEADER_LOW_RANKING_ANSWER,
RANKING_CSV_HEADER_QUESTION,
RANKING_CSV_HEADER_UP_RANKING_ANSWER)
from pykoi.rlhf import RewardFinetuning, RLHFConfig

# get data from local database
ranking_database = RankingDatabase()
my_data_pd = ranking_database.retrieve_all_question_answers_as_pandas()
my_data_pd = my_data_pd[[
RANKING_CSV_HEADER_ID,
RANKING_CSV_HEADER_QUESTION,
RANKING_CSV_HEADER_UP_RANKING_ANSWER,
RANKING_CSV_HEADER_LOW_RANKING_ANSWER]]
my_data_pd = my_data_pd[
[
RANKING_CSV_HEADER_ID,
RANKING_CSV_HEADER_QUESTION,
RANKING_CSV_HEADER_UP_RANKING_ANSWER,
RANKING_CSV_HEADER_LOW_RANKING_ANSWER,
]
]

# analyze the data
print(my_data_pd)
Expand Down
15 changes: 7 additions & 8 deletions example/rlhf/demo_supervised_finetuning_nike.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
python -m example.rlhf.demo_supervised_finetuning_nike
"""

from pykoi.rlhf import RLHFConfig
from pykoi.rlhf import SupervisedFinetuning
from peft import LoraConfig, TaskType

from pykoi.rlhf import RLHFConfig, SupervisedFinetuning

base_model_path = "meta-llama/Llama-2-7b-chat-hf"
dataset_name = "./output_self_instructed_data_nike_10k_2023_FULL.csv"
Expand All @@ -22,7 +21,7 @@
save_freq = 200
train_test_split_ratio = 0.0001
dataset_subset_sft_train = 999999999
size_valid_set = 0
size_valid_set = 0

r = 8
lora_alpha = 16
Expand All @@ -36,13 +35,13 @@
lora_dropout=lora_dropout,
bias=bias,
task_type=task_type,
)
)


# run supervised finetuning
config = RLHFConfig(
base_model_path=base_model_path,
dataset_type=dataset_type,
base_model_path=base_model_path,
dataset_type=dataset_type,
dataset_name=dataset_name,
learning_rate=learning_rate,
weight_decay=weight_decay,
Expand All @@ -55,7 +54,7 @@
train_test_split_ratio=train_test_split_ratio,
dataset_subset_sft_train=dataset_subset_sft_train,
size_valid_set=size_valid_set,
lora_config_rl=lora_config
)
lora_config_rl=lora_config,
)
rlhf_step1_sft = SupervisedFinetuning(config)
rlhf_step1_sft.train_and_save(peft_model_path)
25 changes: 12 additions & 13 deletions example/rlhf/supervised_finetuning_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,22 @@
"""

from pykoi.chat import QuestionAnswerDatabase
from pykoi.rlhf import RLHFConfig
from pykoi.rlhf import SupervisedFinetuning

from pykoi.chat.db.constants import (
QA_CSV_HEADER_ID,
QA_CSV_HEADER_QUESTION,
QA_CSV_HEADER_ANSWER,
QA_CSV_HEADER_VOTE_STATUS)
from pykoi.chat.db.constants import (QA_CSV_HEADER_ANSWER, QA_CSV_HEADER_ID,
QA_CSV_HEADER_QUESTION,
QA_CSV_HEADER_VOTE_STATUS)
from pykoi.rlhf import RLHFConfig, SupervisedFinetuning

# get data from local database
qa_database = QuestionAnswerDatabase()
my_data_pd = qa_database.retrieve_all_question_answers_as_pandas()
my_data_pd = my_data_pd[[
QA_CSV_HEADER_ID,
QA_CSV_HEADER_QUESTION,
QA_CSV_HEADER_ANSWER,
QA_CSV_HEADER_VOTE_STATUS]]
my_data_pd = my_data_pd[
[
QA_CSV_HEADER_ID,
QA_CSV_HEADER_QUESTION,
QA_CSV_HEADER_ANSWER,
QA_CSV_HEADER_VOTE_STATUS,
]
]

# analyze the data
print(my_data_pd)
Expand Down
37 changes: 24 additions & 13 deletions pykoi/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,20 @@
import re
import subprocess
import time

from datetime import datetime
from typing import List, Optional, Any, Dict, Union
from fastapi import FastAPI, Depends, HTTPException, UploadFile, status
from fastapi.security import HTTPBasic, HTTPBasicCredentials
from passlib.context import CryptContext
from typing import Any, Dict, List, Optional, Union

from fastapi import Depends, FastAPI, HTTPException, UploadFile, status
from fastapi.responses import JSONResponse
from fastapi.security import HTTPBasic, HTTPBasicCredentials
from fastapi.staticfiles import StaticFiles
from passlib.context import CryptContext
from pydantic import BaseModel
from starlette.middleware.cors import CORSMiddleware
from pykoi.telemetry.telemetry import Telemetry
from pykoi.telemetry.events import AppStartEvent, AppStopEvent
from pykoi.chat.db.constants import RAG_LIST_SEPARATOR

from pykoi.chat.db.constants import RAG_LIST_SEPARATOR
from pykoi.telemetry.events import AppStartEvent, AppStopEvent
from pykoi.telemetry.telemetry import Telemetry

oauth_scheme = HTTPBasic()

Expand Down Expand Up @@ -644,10 +644,14 @@ async def inference(
try:
print("[/retrieval]: model inference.....", request_body.prompt)
component["component"].retrieval_model.re_init(request_body.file_names)
output = component["component"].retrieval_model.run_with_return_source_documents({"query": request_body.prompt})
print('output', output, output["result"])
output = component[
"component"
].retrieval_model.run_with_return_source_documents(
{"query": request_body.prompt}
)
print("output", output, output["result"])
if "source_documents" not in output:
print('no source documents', output)
print("no source documents", output)
source = ["N/A"]
source_content = ["N/A"]
elif output["source_documents"] == []:
Expand Down Expand Up @@ -791,9 +795,16 @@ async def check_file_exists(
try:
file_path = f"{os.getcwd()}/{file_name}"
file_exists = os.path.exists(file_path)
return {"log": f"Check if {file_name} exists succeeded.", "file_exists": file_exists, "status": "200"}
return {
"log": f"Check if {file_name} exists succeeded.",
"file_exists": file_exists,
"status": "200",
}
except Exception as ex:
return {"log": f"Check if {file_name} exists failed: {ex}", "status": "500"}
return {
"log": f"Check if {file_name} exists failed: {ex}",
"status": "500",
}

def create_data_route(id: str, data_source: Any):
"""
Expand Down
5 changes: 2 additions & 3 deletions pykoi/chat/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import pykoi.chat.llm as llm

from pykoi.chat.llm.model_factory import ModelFactory
from pykoi.chat.db.qa_database import QuestionAnswerDatabase
from pykoi.chat.db.rag_database import RAGDatabase
from pykoi.chat.db.ranking_database import RankingDatabase
from pykoi.chat.db.rag_database import RAGDatabase
from pykoi.chat.llm.model_factory import ModelFactory
17 changes: 4 additions & 13 deletions pykoi/chat/db/abs_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import abc
import sqlite3
import threading

from typing import List, Tuple


Expand Down Expand Up @@ -71,9 +70,7 @@ def insert(self, **kwargs) -> None:
Args:
kwargs (dict): The key-value pairs to insert into the database.
"""
raise NotImplementedError(
"Insert method must be implemented by subclasses."
)
raise NotImplementedError("Insert method must be implemented by subclasses.")

@abc.abstractmethod
def update(self, **kwargs) -> None:
Expand All @@ -83,17 +80,13 @@ def update(self, **kwargs) -> None:
Args:
kwargs (dict): The key-value pairs to update in the database.
"""
raise NotImplementedError(
"Update method must be implemented by subclasses."
)
raise NotImplementedError("Update method must be implemented by subclasses.")

def retrieve_all(self) -> List[Tuple]:
"""
Retrieves all pairs from the database.
"""
raise NotImplementedError(
"Retrieve method must be implemented by subclasses."
)
raise NotImplementedError("Retrieve method must be implemented by subclasses.")

@abc.abstractmethod
def print_table(self, rows: str) -> None:
Expand All @@ -103,6 +96,4 @@ def print_table(self, rows: str) -> None:
Args:
rows (str): The rows to print.
"""
raise NotImplementedError(
"Print method must be implemented by subclasses."
)
raise NotImplementedError("Print method must be implemented by subclasses.")
Loading

0 comments on commit 8dece7b

Please sign in to comment.