Skip to content

Commit

Permalink
Merge pull request #97 from historysciencelab/seperate-llms
Browse files Browse the repository at this point in the history
Break out LLMs into their own dependency and tests
  • Loading branch information
audreyfeldroy authored Oct 30, 2023
2 parents 11f69f0 + 4a66e98 commit 30321c5
Show file tree
Hide file tree
Showing 9 changed files with 113 additions and 31 deletions.
44 changes: 44 additions & 0 deletions .github/workflows/ci-llms.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
name: Project Tests with LLMS

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:
build:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4

- name: Set up Python 3.11
uses: actions/setup-python@v4
with:
python-version: '3.11'

- name: Free up disk space
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip
pip install -e '.[test]'
pip install -e '.[clarifai_grpc]'
pip install -e '.[transformers]'
- name: Install ffmpeg
run: |
sudo apt-get update
sudo apt-get install -y ffmpeg
- name: Run code and llm tests
run: |
make test_llms
7 changes: 2 additions & 5 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,6 @@ jobs:
sudo apt-get update
sudo apt-get install -y ffmpeg
- name: Find and run pytest tests
- name: Run code and non-llm tests
run: |
test_files=$(find ./tests/ -name "*test*.py")
# Run pytest on all found test files
python -m pytest $test_files
make test
14 changes: 14 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,20 @@ ruff check . --fix
ruff format .
```

#### How to run tests

To run the non-LLM tests (fast), run at the project root:

```sh
pytest tests/code/*.py
```

To run the tests with LLMs, run at the project root:

```sh
pytest tests/llms/*.py
```

#### How Do I Submit a Good Enhancement Suggestion?

Enhancement suggestions are tracked as [GitHub issues](https://github.com/historysciencelab/HistoryAIToolkit/issues).
Expand Down
5 changes: 5 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,8 @@ format:
lint:
ruff check . --fix

test:
pytest tests/code/*.py

test_llms:
pytest tests/llms/*.py
10 changes: 8 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,11 @@ classifiers = [
]
license = {text = "GNU General Public License v3"}
dependencies = [
"clarifai_grpc==9.9.1",
"openai-whisper==20230918",
"pydantic==2.4.2",
"pydantic-settings==2.0.3",
"pydub==0.25.1",
"rich==13.6.0",
"transformers==4.34.0",
"typer==0.9.0",
"yarl==1.9.2"
]
Expand All @@ -49,6 +47,14 @@ docs = [
"mkdocs-include-markdown-plugin"
]

clarifai_grpc = [
"clarifai_grpc==9.9.1"
]

transformers = [
"transformers==4.34.0"
]

[project.urls]

bugs = "https://github.com/audreyfeldroy/historyaitoolkit/issues"
Expand Down
36 changes: 26 additions & 10 deletions src/interviewkit/questions.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
from clarifai_grpc.channel.clarifai_channel import ClarifaiChannel
from clarifai_grpc.grpc.api import resources_pb2, service_pb2, service_pb2_grpc
from clarifai_grpc.grpc.api.status import status_code_pb2
from rich.console import Console


console = Console()

try:
import clarifai_grpc
except ImportError:
clarifai_grpc = None

# # Securely get your credentials
# TODO: Pass in arguments or use env vars
CLARIFAI_PAT = ""
Expand All @@ -16,29 +21,40 @@


def generate_questions_from_transcript(transcript: str):
channel = ClarifaiChannel.get_grpc_channel()
stub = service_pb2_grpc.V2Stub(channel)
if clarifai_grpc is None:
console.print(
"Please install clarifai-grpc: pip install 'historyaitoolkit\[clarifai]'",
style="bold red",
)
exit(1)
channel = clarifai_grpc.channel.clarifai_channel.ClarifaiChannel.get_grpc_channel()
stub = clarifai_grpc.grpc.api.service_pb2_grpc.V2Stub(channel)

metadata = (("authorization", "Key " + CLARIFAI_PAT),)
userDataObject = resources_pb2.UserAppIDSet(
userDataObject = clarifai_grpc.grpc.api.resources_pb2.UserAppIDSet(
user_id=CLARIFAI_USER_ID, app_id=CLARIFAI_APP_ID
)

post_model_outputs_response = stub.PostModelOutputs(
service_pb2.PostModelOutputsRequest(
clarifai_grpc.grpc.api.service_pb2.PostModelOutputsRequest(
user_app_id=userDataObject,
model_id=CLARIFAI_MODEL_ID,
version_id=CLARIFAI_MODEL_VERSION_ID,
inputs=[
resources_pb2.Input(
data=resources_pb2.Data(text=resources_pb2.Text(raw=transcript))
clarifai_grpc.grpc.api.resources_pb2.Input(
data=clarifai_grpc.grpc.api.resources_pb2.Data(
text=clarifai_grpc.grpc.api.resources_pb2.Text(raw=transcript)
)
)
],
),
metadata=metadata,
)

if post_model_outputs_response.status.code != status_code_pb2.SUCCESS:
if (
post_model_outputs_response.status.code
!= clarifai_grpc.grpc.api.status.status_code_pb2.SUCCESS
):
print(post_model_outputs_response.status)
status = post_model_outputs_response.status.description
raise Exception(f"Post model outputs failed, status: {status}")
Expand Down
28 changes: 14 additions & 14 deletions src/interviewkit/transcript_using_m5.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,34 @@
import sys
from pathlib import Path

import whisper
from pydantic import BaseModel
from rich.console import Console
from transformers import T5ForConditionalGeneration, T5Tokenizer
from whisper.utils import get_writer


console = Console()

try:
import whisper
import transformers

# Load T5 model and tokenizer
tokenizer = transformers.T5Tokenizer.from_pretrained("t5-base")
model = transformers.T5ForConditionalGeneration.from_pretrained("t5-base")
except ImportError:
print("Please install Whisper: pip install openai-whisper")
console.print(
"Please install transformers: pip install 'historyaitoolkit\[transformers]'",
style="bold red",
)
exit(1)

from pydantic import BaseModel
from whisper.utils import get_writer


console = Console()


class Transcript(BaseModel):
"""The Transcript entity represents the transcript of an interview."""

content: str


# Load T5 model and tokenizer
tokenizer = T5Tokenizer.from_pretrained("t5-base")
model = T5ForConditionalGeneration.from_pretrained("t5-base")


def chunk_text(text, max_length):
"""Split the text into chunks of max_length."""
words = text.split()
Expand Down
File renamed without changes.
File renamed without changes.

0 comments on commit 30321c5

Please sign in to comment.