Skip to content

Commit

Permalink
Merge pull request #5 from AutoResearch/carlosg/prompts
Browse files Browse the repository at this point in the history
refactor: Refactor prompts and add tests
  • Loading branch information
carlosgjs authored Dec 6, 2023
2 parents 600b53f + 58efb38 commit 1d80dfe
Show file tree
Hide file tree
Showing 11 changed files with 180 additions and 82 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
[![Template](https://img.shields.io/badge/Template-LINCC%20Frameworks%20Python%20Project%20Template-brightgreen)](https://lincc-ppt.readthedocs.io/en/latest/)

[![PyPI](https://img.shields.io/pypi/v/autora-doc?color=blue&logo=pypi&logoColor=white)](https://pypi.org/project/autora-doc/)
[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/AutoResearch/autodoc/smoke-test.yml)](https://github.com/AutoResearch/autodoc/actions/workflows/smoke-test.yml)


[![GitHub Workflow Status](https://github.com/autoresearch/autodoc/actions/workflows/smoke-test.yml/badge.svg)](https://github.com/AutoResearch/autodoc/actions/workflows/smoke-test.yml)
[![codecov](https://codecov.io/gh/AutoResearch/autodoc/branch/main/graph/badge.svg)](https://codecov.io/gh/AutoResearch/autodoc)
[![Read the Docs](https://img.shields.io/readthedocs/autora-doc)](https://autora-doc.readthedocs.io/)

Expand Down Expand Up @@ -76,7 +78,7 @@ az storage blob upload --account-name <account> --container <container>> --file

Prediction
```sh
az ml job create -f azureml/predict.yml --set display_name="Test prediction job" --web
az ml job create -f azureml/eval.yml --set display_name="Test prediction job" --web
```

Notes:
Expand Down
7 changes: 6 additions & 1 deletion azureml/predict.yml → azureml/eval.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
$schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
command: python -m autora.doc.pipelines.main predict ${{inputs.data_dir}}/data.jsonl ${{inputs.model_dir}}/llama-2-7b-chat-hf
command: >
python -m autora.doc.pipelines.main eval
${{inputs.data_dir}}/data.jsonl
${{inputs.model_dir}}/llama-2-7b-chat-hf
SYS_1
INSTR_SWEETP_1
code: ../src
inputs:
data_dir:
Expand Down
18 changes: 18 additions & 0 deletions azureml/generate.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
$schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
command: >
python -m autora.doc.pipelines.main generate
--model-path ${{inputs.model_dir}}/llama-2-7b-chat-hf
--output ./outputs/output.txt
autora/doc/pipelines/main.py
code: ../src
inputs:
model_dir:
type: uri_folder
path: azureml://datastores/workspaceblobstore/paths/base_models
environment:
image: mcr.microsoft.com/azureml/curated/acpt-pytorch-2.0-cuda11.7:21
conda_file: conda.yml
display_name: autodoc_prediction
compute: azureml:v100cluster
experiment_name: autodoc_prediction
description: |
15 changes: 9 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ classifiers = [
]
dynamic = ["version"]
dependencies = [
"transformers>=4.35.2",
"typer",
"scipy",
# This works, while installing from pytorch and cuda from conda does not",
"torch==2.0.1",
"transformers>=4.35.2",
]

# On a mac, install optional dependencies with `pip install '.[dev]'` (include the single quotes)
Expand All @@ -42,17 +42,17 @@ dev = [
"nbsphinx", # Used to integrate Python notebooks into Sphinx documentation
"ipython", # Also used in building notebooks into Sphinx
"matplotlib", # Used in sample notebook intro_notebook.ipynb
"numpy", # Used in sample notebook intro_notebook.ipynb
"ipykernel",
]
train = [
"jsonlines",
"mlflow",
"azureml-mlflow",
]
azure = [
"azureml-core",
"jsonlines",
"azureml-mlflow",
]

train_cuda = [
cuda = [
"bitsandbytes>=0.41.2.post2",
"accelerate>=0.24.1",
"xformers",
Expand Down Expand Up @@ -98,3 +98,6 @@ include = ["src/autora"]

[tool.hatch.build.targets.wheel]
packages = ["src/autora"]

[project.scripts]
autodoc = "autora.doc.pipelines.main:app"
23 changes: 0 additions & 23 deletions src/autora/doc/example_module.py

This file was deleted.

46 changes: 31 additions & 15 deletions src/autora/doc/pipelines/main.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import logging
from timeit import default_timer as timer
from typing import List

import jsonlines
import mlflow
import torch
import typer

from autora.doc.runtime.predict_hf import Predictor
from autora.doc.runtime.prompts import INSTR, SYS, InstructionPrompts, SystemPrompts

app = typer.Typer()
logging.basicConfig(
Expand All @@ -15,37 +15,33 @@
)
logger = logging.getLogger(__name__)

# TODO: organize the system and instruction prompts into a separate module
SYS = """You are a technical documentation writer. You always write clear, concise, and accurate documentation for
scientific experiments. Your documentation focuses on the experiment's purpose, procedure, and results. Therefore,
details about specific python functions, packages, or libraries are not necessary. Your readers are experimental
scientists.
"""

instr = """Please generate high-level two paragraph documentation for the following experiment. The first paragraph
should explain the purpose and the second one the procedure, but don't use the word 'Paragraph'"""
@app.command()
def eval(data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: InstructionPrompts) -> List[str]:
import jsonlines
import mlflow

mlflow.autolog()

@app.command()
def predict(data_file: str, model_path: str) -> None:
run = mlflow.active_run()

sys_prompt = SYS[sys_id]
instr_prompt = INSTR[instruc_id]
if run is None:
run = mlflow.start_run()
with run:
logger.info(f"Active run_id: {run.info.run_id}")
logger.info(f"running predict with {data_file}")
logger.info(f"model path: {model_path}")

# predictions = []
with jsonlines.open(data_file) as reader:
items = [item for item in reader]
inputs = [item["instruction"] for item in items]
labels = [item["output"] for item in items]

pred = Predictor(model_path)
timer_start = timer()
predictions = pred.predict(SYS, instr, inputs)
predictions = pred.predict(sys_prompt, instr_prompt, inputs)
timer_end = timer()
pred_time = timer_end - timer_start
mlflow.log_metric("prediction_time/doc", pred_time / (len(inputs)))
Expand All @@ -58,6 +54,27 @@ def predict(data_file: str, model_path: str) -> None:
total_tokens = sum([len(token) for token in tokens])
mlflow.log_metric("total_tokens", total_tokens)
mlflow.log_metric("tokens/sec", total_tokens / pred_time)
return predictions


@app.command()
def generate(
python_file: str,
model_path: str = "meta-llama/llama-2-7b-chat-hf",
output: str = "output.txt",
sys_id: SystemPrompts = SystemPrompts.SYS_1,
instruc_id: InstructionPrompts = InstructionPrompts.INSTR_SWEETP_1,
) -> None:
with open(python_file, "r") as f:
inputs = [f.read()]
sys_prompt = SYS[sys_id]
instr_prompt = INSTR[instruc_id]
pred = Predictor(model_path)
predictions = pred.predict(sys_prompt, instr_prompt, inputs)
assert len(predictions) == 1, f"Expected only one output, got {len(predictions)}"
logger.info(f"Writing output to {output}")
with open(output, "w") as f:
f.write(predictions[0])


@app.command()
Expand All @@ -68,5 +85,4 @@ def import_model(model_name: str) -> None:
if __name__ == "__main__":
logger.info(f"Torch version: {torch.__version__} , Cuda available: {torch.cuda.is_available()}")

mlflow.autolog()
app()
57 changes: 35 additions & 22 deletions src/autora/doc/runtime/predict_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,22 @@

import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

from autora.doc.runtime.prompts import LLAMA2_INST_CLOSE, TEMP_LLAMA2

logger = logging.getLogger(__name__)


class Predictor:
def __init__(self, model_path: str):
# Load the model in 4bit quantization for faster inference on smaller GPUs
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
)
config = self.get_config()

logger.info(f"Loading model from {model_path}")
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
self.model = AutoModelForCausalLM.from_pretrained(
model_path, quantization_config=bnb_config, device_map="auto"
model_path,
**config,
)
logger.info("Model loaded")
self.pipeline = transformers.pipeline(
Expand All @@ -30,18 +28,8 @@ def __init__(self, model_path: str):
)

def predict(self, sys: str, instr: str, inputs: List[str]) -> List[str]:
# Standard Llama2 template
template = f"""
[INST]<<SYS>>
{sys}
{instr}
[INPUT]
[/INST]
"""
logger.info(f"Generating {len(inputs)} predictions")
prompts = [template.replace("[INPUT]", input) for input in inputs]
prompts = [TEMP_LLAMA2.format(sys=sys, instr=instr, input=input) for input in inputs]
# TODO: Make these parameters configurable
sequences = self.pipeline(
prompts,
Expand All @@ -51,13 +39,38 @@ def predict(self, sys: str, instr: str, inputs: List[str]) -> List[str]:
top_k=40,
num_return_sequences=1,
eos_token_id=self.tokenizer.eos_token_id,
max_length=1000,
max_length=2048,
)

results = [sequence[0]["generated_text"] for sequence in sequences]
results = [Predictor.trim_prompt(sequence[0]["generated_text"]) for sequence in sequences]
logger.info(f"Generated {len(results)} results")
return results

@staticmethod
def trim_prompt(output: str) -> str:
marker = output.find(LLAMA2_INST_CLOSE)
if marker == -1:
logger.warning(f"Could not find end of prompt marker '{LLAMA2_INST_CLOSE}' in '{output}'")
return output
return output[marker + len(LLAMA2_INST_CLOSE) :]

def tokenize(self, input: List[str]) -> Dict[str, List[List[int]]]:
tokens: Dict[str, List[List[int]]] = self.tokenizer(input)
return tokens

def get_config(self) -> Dict[str, str]:
if torch.cuda.is_available():
from transformers import BitsAndBytesConfig

# Load the model in 4bit quantization for faster inference on smaller GPUs
return {
"quantization_config": BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
),
"device_map": "auto",
}
else:
return {}
36 changes: 36 additions & 0 deletions src/autora/doc/runtime/prompts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from enum import Enum

LLAMA2_INST_CLOSE = "[/INST]\n"

# Standard Llama2 template
TEMP_LLAMA2 = """
[INST]<<SYS>>
{sys}
{instr}
{input}
[/INST]
"""


SYS_1 = """You are a technical documentation writer. You always write clear, concise, and accurate documentation for
scientific experiments. Your documentation focuses on the experiment's purpose, procedure, and results. Therefore,
details about specific python functions, packages, or libraries are not necessary. Your readers are experimental
scientists.
"""

INSTR_SWEETP_1 = """Please generate high-level two paragraph documentation for the following experiment. The first
paragraph should explain the purpose and the second one the procedure, but don't use the word 'Paragraph'"""


class SystemPrompts(str, Enum):
SYS_1 = "SYS_1"


class InstructionPrompts(str, Enum):
INSTR_SWEETP_1 = "INSTR_SWEETP_1"


SYS = {SystemPrompts.SYS_1: SYS_1}
INSTR = {InstructionPrompts.INSTR_SWEETP_1: INSTR_SWEETP_1}
13 changes: 0 additions & 13 deletions tests/test.py

This file was deleted.

25 changes: 25 additions & 0 deletions tests/test_main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from pathlib import Path

from autora.doc.pipelines.main import eval, generate
from autora.doc.runtime.prompts import InstructionPrompts, SystemPrompts

# dummy HF model for testing
TEST_HF_MODEL = "hf-internal-testing/tiny-random-FalconForCausalLM"


def test_predict() -> None:
data = Path(__file__).parent.joinpath("../data/data.jsonl").resolve()
outputs = eval(str(data), TEST_HF_MODEL, SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1)
assert len(outputs) == 3, "Expected 3 outputs"
for output in outputs:
assert len(output) > 0, "Expected non-empty output"


def test_generate() -> None:
python_file = __file__
output = Path("output.txt")
output.unlink(missing_ok=True)
generate(python_file, TEST_HF_MODEL, str(output), SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1)
assert output.exists(), f"Expected output file {output} to exist"
with open(str(output), "r") as f:
assert len(f.read()) > 0, f"Expected non-empty output file {output}"
Loading

0 comments on commit 1d80dfe

Please sign in to comment.