Skip to content

Commit

Permalink
Fixed transcripts and added "ask a question about an image" mode
Browse files Browse the repository at this point in the history
  • Loading branch information
Dicklesworthstone committed May 25, 2024
1 parent ca60f43 commit 4269a2a
Show file tree
Hide file tree
Showing 11 changed files with 374 additions and 76 deletions.
2 changes: 2 additions & 0 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ USE_SECURITY_TOKEN=1
USE_PARALLEL_INFERENCE_QUEUE=1
MAX_CONCURRENT_PARALLEL_INFERENCE_TASKS=50
DEFAULT_MODEL_NAME=Meta-Llama-3-8B-Instruct.Q3_K_S
DEFAULT_MULTI_MODAL_MODEL_NAME=llava-llama-3-8b-v1_1-int4
USE_FLASH_ATTENTION=1
LLM_CONTEXT_SIZE_IN_TOKENS=2048
TEXT_COMPLETION_CONTEXT_SIZE_IN_TOKENS=32000
DEFAULT_MAX_COMPLETION_TOKENS=1000
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ magika
mutagen
nvgpu
pandas
pillow
psutil
pydantic
PyPDF2
Expand Down
16 changes: 16 additions & 0 deletions embeddings_data_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from decouple import config
from sqlalchemy import event
from datetime import datetime
from fastapi import UploadFile

Base = declarative_base()
DEFAULT_MODEL_NAME = config("DEFAULT_MODEL_NAME", default="Meta-Llama-3-8B-Instruct.Q3_K_S", cast=str)
Expand Down Expand Up @@ -181,8 +182,22 @@ class TextCompletionResponse(BaseModel):
number_of_completions_to_generate: int
time_taken_in_seconds: float
generated_text: str
finish_reason: str
llm_model_usage_json: str

class ImageQuestionResponse(BaseModel):
question: str
llm_model_name: str
image_hash: str
time_taken_in_seconds: float
grammar_file_string: str
number_of_tokens_to_generate: int
number_of_completions_to_generate: int
time_taken_in_seconds: float
generated_text: str
finish_reason: str
llm_model_usage_json: str

class AudioTranscript(Base):
__tablename__ = "audio_transcripts"
audio_file_hash = Column(String, primary_key=True, index=True)
Expand All @@ -196,6 +211,7 @@ class AudioTranscript(Base):
request_time = Column(DateTime)
response_time = Column(DateTime)
total_time = Column(Float)
corpus_identifier_string = Column(String, index=True)

class AudioTranscriptResponse(BaseModel):
audio_file_hash: str
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ dependencies:
- mutagen
- nvgpu
- pandas
- pillow
- psutil
- pydantic
- PyPDF2
Expand Down
40 changes: 40 additions & 0 deletions misc_utility_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,26 @@
import re
import json
import io
import glob
import redis
import sys
import threading
import numpy as np
import faiss
import base64
from typing import Optional
from pathlib import Path
from typing import Any
from database_functions import AsyncSessionLocal
from sqlalchemy import select
from collections import defaultdict
from PIL import Image
from decouple import config

logger = setup_logger()
USE_RAMDISK = config("USE_RAMDISK", default=False, cast=bool)
RAMDISK_PATH = config("RAMDISK_PATH", default="/mnt/ramdisk", cast=str)
BASE_DIRECTORY = os.path.dirname(os.path.abspath(__file__))

class suppress_stdout_stderr(object):
def __enter__(self):
Expand Down Expand Up @@ -292,3 +302,33 @@ def seek(self, offset: int, whence: int = 0) -> int:
return self.file.seek(offset, whence)
def tell(self) -> int:
return self.file.tell()

def process_image(image_path, max_dimension=1024):
original_path = Path(image_path)
processed_image_path = original_path.with_stem(original_path.stem + "_processed").with_suffix(original_path.suffix)
with Image.open(image_path) as img:
img.thumbnail((max_dimension, max_dimension), Image.LANCZOS)
img.save(processed_image_path)
return processed_image_path

def alpha_remover_func(img):
if img.mode != 'RGBA':
return img
canvas = Image.new('RGBA', img.size, (255, 255, 255, 255))
canvas.paste(img, mask=img)
return canvas.convert('RGB')

def image_to_base64_data_uri(file_path):
with open(file_path, "rb") as img_file:
base64_data = base64.b64encode(img_file.read()).decode('utf-8')
return f"data:image/png;base64,{base64_data}"

def find_clip_model_path(llm_model_name: str) -> Optional[str]:
models_dir = os.path.join(RAMDISK_PATH, 'models') if USE_RAMDISK else os.path.join(BASE_DIRECTORY, 'models')
base_name = os.path.splitext(os.path.basename(llm_model_name))[0]
mmproj_model_name = base_name.replace("-f16", "-mmproj-f16").replace("-int4", "-mmproj-f16")
mmproj_files = glob.glob(os.path.join(models_dir, f"{mmproj_model_name}.gguf"))
if not mmproj_files:
logger.error(f"No mmproj file found matching: {mmproj_model_name}")
return None
return mmproj_files[0]
5 changes: 4 additions & 1 deletion model_urls.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,8 @@
"https://huggingface.co/NousResearch/Hermes-2-Theta-Llama-3-8B-GGUF/resolve/main/Hermes-2-Pro-Llama-3-Instruct-Merged-DPO-Q4_K_M.gguf",
"https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q3_K_S.gguf",
"https://huggingface.co/Orenguteng/Llama-3-8B-Lexi-Uncensored-GGUF/resolve/main/Lexi-Llama-3-8B-Uncensored_Q5_K_M.gguf",
"https://huggingface.co/vonjack/bge-m3-gguf/resolve/main/bge-m3-q8_0.gguf"
"https://huggingface.co/bartowski/Phi-3-medium-128k-instruct-GGUF/resolve/main/Phi-3-medium-128k-instruct-IQ4_NL.gguf",
"https://huggingface.co/vonjack/bge-m3-gguf/resolve/main/bge-m3-q8_0.gguf",
"https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-gguf/resolve/main/llava-llama-3-8b-v1_1-mmproj-f16.gguf",
"https://huggingface.co/xtuner/llava-llama-3-8b-v1_1-gguf/resolve/main/llava-llama-3-8b-v1_1-int4.gguf"
]
Empty file added models/download.lock
Empty file.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ magika
mutagen
nvgpu
pandas
pillow
psutil
pydantic
PyPDF2
Expand Down
Loading

0 comments on commit 4269a2a

Please sign in to comment.