From fcc744da37435270324590bf59bb280e24747a9d Mon Sep 17 00:00:00 2001 From: Carlos Garcia Jurado Suarez Date: Tue, 5 Dec 2023 11:01:36 -0800 Subject: [PATCH 1/5] refactor: Refactor prompts --- README.md | 4 +- azureml/predict.yml | 7 +++- pyproject.toml | 3 ++ src/autora/doc/example_module.py | 23 ------------ src/autora/doc/pipelines/main.py | 17 +++------ src/autora/doc/runtime/predict_hf.py | 55 +++++++++++++++++----------- src/autora/doc/runtime/prompts.py | 37 +++++++++++++++++++ tests/test.py | 23 +++++++----- 8 files changed, 101 insertions(+), 68 deletions(-) delete mode 100644 src/autora/doc/example_module.py create mode 100644 src/autora/doc/runtime/prompts.py diff --git a/README.md b/README.md index 6db068f..2ff8a62 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,9 @@ [![Template](https://img.shields.io/badge/Template-LINCC%20Frameworks%20Python%20Project%20Template-brightgreen)](https://lincc-ppt.readthedocs.io/en/latest/) [![PyPI](https://img.shields.io/pypi/v/autora-doc?color=blue&logo=pypi&logoColor=white)](https://pypi.org/project/autora-doc/) -[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/AutoResearch/autodoc/smoke-test.yml)](https://github.com/AutoResearch/autodoc/actions/workflows/smoke-test.yml) + + +[![GitHub Workflow Status](https://github.com/autoresearch/autodoc/actions/workflows/smoke-test.yml/badge.svg)](https://github.com/AutoResearch/autodoc/actions/workflows/smoke-test.yml) [![codecov](https://codecov.io/gh/AutoResearch/autodoc/branch/main/graph/badge.svg)](https://codecov.io/gh/AutoResearch/autodoc) [![Read the Docs](https://img.shields.io/readthedocs/autora-doc)](https://autora-doc.readthedocs.io/) diff --git a/azureml/predict.yml b/azureml/predict.yml index 7f888b4..d5410a2 100644 --- a/azureml/predict.yml +++ b/azureml/predict.yml @@ -1,5 +1,10 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json -command: python -m autora.doc.pipelines.main predict ${{inputs.data_dir}}/data.jsonl ${{inputs.model_dir}}/llama-2-7b-chat-hf +command: > + python -m autora.doc.pipelines.main predict + ${{inputs.data_dir}}/data.jsonl + ${{inputs.model_dir}}/llama-2-7b-chat-hf + SYS_1 + INSTR_SWEETP_1 code: ../src inputs: data_dir: diff --git a/pyproject.toml b/pyproject.toml index e6db6a3..97c9c31 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -98,3 +98,6 @@ include = ["src/autora"] [tool.hatch.build.targets.wheel] packages = ["src/autora"] + +[project.scripts] +autodoc = "autora.doc.pipelines.main:app" \ No newline at end of file diff --git a/src/autora/doc/example_module.py b/src/autora/doc/example_module.py deleted file mode 100644 index f76e837..0000000 --- a/src/autora/doc/example_module.py +++ /dev/null @@ -1,23 +0,0 @@ -"""An example module containing simplistic functions.""" - - -def greetings() -> str: - """A friendly greeting for a future friend. - - Returns - ------- - str - A typical greeting from a software engineer. - """ - return "Hello from LINCC-Frameworks!" - - -def meaning() -> int: - """The meaning of life, the universe, and everything. - - Returns - ------- - int - The meaning of life. - """ - return 42 diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py index 292c8ff..b74bf4b 100644 --- a/src/autora/doc/pipelines/main.py +++ b/src/autora/doc/pipelines/main.py @@ -7,6 +7,7 @@ import typer from autora.doc.runtime.predict_hf import Predictor +from autora.doc.runtime.prompts import INSTR, SYS, InstructionPrompts, SystemPrompts app = typer.Typer() logging.basicConfig( @@ -15,21 +16,13 @@ ) logger = logging.getLogger(__name__) -# TODO: organize the system and instruction prompts into a separate module -SYS = """You are a technical documentation writer. You always write clear, concise, and accurate documentation for - scientific experiments. Your documentation focuses on the experiment's purpose, procedure, and results. Therefore, - details about specific python functions, packages, or libraries are not necessary. Your readers are experimental - scientists. -""" - -instr = """Please generate high-level two paragraph documentation for the following experiment. The first paragraph - should explain the purpose and the second one the procedure, but don't use the word 'Paragraph'""" - @app.command() -def predict(data_file: str, model_path: str) -> None: +def predict(data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: InstructionPrompts) -> None: run = mlflow.active_run() + sys_prompt = SYS[sys_id] + instr_prompt = INSTR[instruc_id] if run is None: run = mlflow.start_run() with run: @@ -45,7 +38,7 @@ def predict(data_file: str, model_path: str) -> None: pred = Predictor(model_path) timer_start = timer() - predictions = pred.predict(SYS, instr, inputs) + predictions = pred.predict(sys_prompt, instr_prompt, inputs) timer_end = timer() pred_time = timer_end - timer_start mlflow.log_metric("prediction_time/doc", pred_time / (len(inputs))) diff --git a/src/autora/doc/runtime/predict_hf.py b/src/autora/doc/runtime/predict_hf.py index ba3e59d..cbde760 100644 --- a/src/autora/doc/runtime/predict_hf.py +++ b/src/autora/doc/runtime/predict_hf.py @@ -3,24 +3,22 @@ import torch import transformers -from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig +from transformers import AutoModelForCausalLM, AutoTokenizer + +from autora.doc.runtime.prompts import LLAMA2_INST_CLOSE, TEMP_LLAMA2 logger = logging.getLogger(__name__) class Predictor: def __init__(self, model_path: str): - # Load the model in 4bit quantization for faster inference on smaller GPUs - bnb_config = BitsAndBytesConfig( - load_in_4bit=True, - bnb_4bit_use_double_quant=True, - bnb_4bit_quant_type="nf4", - bnb_4bit_compute_dtype=torch.bfloat16, - ) + config = self.get_config() + logger.info(f"Loading model from {model_path}") self.tokenizer = AutoTokenizer.from_pretrained(model_path) self.model = AutoModelForCausalLM.from_pretrained( - model_path, quantization_config=bnb_config, device_map="auto" + model_path, + **config, ) logger.info("Model loaded") self.pipeline = transformers.pipeline( @@ -30,18 +28,8 @@ def __init__(self, model_path: str): ) def predict(self, sys: str, instr: str, inputs: List[str]) -> List[str]: - # Standard Llama2 template - template = f""" -[INST]<> -{sys} - -{instr} - -[INPUT] -[/INST] -""" logger.info(f"Generating {len(inputs)} predictions") - prompts = [template.replace("[INPUT]", input) for input in inputs] + prompts = [TEMP_LLAMA2.format(sys=sys, instr=instr, input=input) for input in inputs] # TODO: Make these parameters configurable sequences = self.pipeline( prompts, @@ -54,10 +42,35 @@ def predict(self, sys: str, instr: str, inputs: List[str]) -> List[str]: max_length=1000, ) - results = [sequence[0]["generated_text"] for sequence in sequences] + results = [Predictor.trim_prompt(sequence[0]["generated_text"]) for sequence in sequences] logger.info(f"Generated {len(results)} results") return results + @staticmethod + def trim_prompt(output: str) -> str: + marker = output.find(LLAMA2_INST_CLOSE) + if marker == -1: + logger.warning(f"Could not find end of prompt marker '{LLAMA2_INST_CLOSE}' in '{output}'") + return output + return output[marker + len(LLAMA2_INST_CLOSE) :] + def tokenize(self, input: List[str]) -> Dict[str, List[List[int]]]: tokens: Dict[str, List[List[int]]] = self.tokenizer(input) return tokens + + def get_config(self) -> Dict[str, str]: + if torch.cuda.is_available(): + from transformers import BitsAndBytesConfig + + # Load the model in 4bit quantization for faster inference on smaller GPUs + return { + "quantization_config": BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.bfloat16, + ), + "device_map": "auto", + } + else: + return {} diff --git a/src/autora/doc/runtime/prompts.py b/src/autora/doc/runtime/prompts.py new file mode 100644 index 0000000..5875127 --- /dev/null +++ b/src/autora/doc/runtime/prompts.py @@ -0,0 +1,37 @@ +from enum import Enum + +LLAMA2_INST_CLOSE = "[/INST]\n" + +# Standard Llama2 template +TEMP_LLAMA2 = """ +[INST]<> +{sys} + +{instr} + +{input} +[/INST] +""" + + +SYS_1 = """You are a technical documentation writer. You always write clear, concise, and accurate documentation for +scientific experiments. Your documentation focuses on the experiment's purpose, procedure, and results. Therefore, +details about specific python functions, packages, or libraries are not necessary. Your readers are experimental +scientists. +""" + +INSTR_SWEETP_1 = """Please generate high-level two paragraph documentation for the following experiment. The first +paragraph should explain the purpose and the second one the procedure, but don't use the word 'Paragraph'""" + + +class SystemPrompts(Enum): + SYS_1 = "SYS_1" + + +class InstructionPrompts(Enum): + SYS_1 = "SYS_1" + INSTR_SWEETP_1 = "INSTR_SWEETP_1" + + +SYS = {SystemPrompts.SYS_1: SYS_1} +INSTR = {InstructionPrompts.INSTR_SWEETP_1: INSTR_SWEETP_1} diff --git a/tests/test.py b/tests/test.py index a578227..9ad2c47 100644 --- a/tests/test.py +++ b/tests/test.py @@ -1,13 +1,16 @@ -from autora.doc import example_module +from autora.doc.runtime.predict_hf import Predictor -def test_greetings() -> None: - """Verify the output of the `greetings` function""" - output = example_module.greetings() - assert output == "Hello from LINCC-Frameworks!" +def test_trim_prompt() -> None: + """Verify the output of the `trim_prompt` function""" + no_marker = "Generated text with no marker" + output = Predictor.trim_prompt(no_marker) + assert output == no_marker - -def test_meaning() -> None: - """Verify the output of the `meaning` function""" - output = example_module.meaning() - assert output == 42 + with_marker = """ +The prompt is here +[/INST] +output +""" + output = Predictor.trim_prompt(with_marker) + assert output == "output\n" From 4f8d9005e47caca493ee839ad6d93f87e7d7959a Mon Sep 17 00:00:00 2001 From: Carlos Garcia Jurado Suarez Date: Tue, 5 Dec 2023 11:19:51 -0800 Subject: [PATCH 2/5] add tests --- src/autora/doc/pipelines/main.py | 6 +++++- src/autora/doc/runtime/prompts.py | 1 - tests/test_main.py | 15 +++++++++++++++ tests/{test.py => test_predict_hf.py} | 0 4 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 tests/test_main.py rename tests/{test.py => test_predict_hf.py} (100%) diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py index b74bf4b..aacb809 100644 --- a/src/autora/doc/pipelines/main.py +++ b/src/autora/doc/pipelines/main.py @@ -1,5 +1,6 @@ import logging from timeit import default_timer as timer +from typing import List import jsonlines import mlflow @@ -18,7 +19,9 @@ @app.command() -def predict(data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: InstructionPrompts) -> None: +def predict( + data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: InstructionPrompts +) -> List[str]: run = mlflow.active_run() sys_prompt = SYS[sys_id] @@ -51,6 +54,7 @@ def predict(data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: total_tokens = sum([len(token) for token in tokens]) mlflow.log_metric("total_tokens", total_tokens) mlflow.log_metric("tokens/sec", total_tokens / pred_time) + return predictions @app.command() diff --git a/src/autora/doc/runtime/prompts.py b/src/autora/doc/runtime/prompts.py index 5875127..19f905a 100644 --- a/src/autora/doc/runtime/prompts.py +++ b/src/autora/doc/runtime/prompts.py @@ -29,7 +29,6 @@ class SystemPrompts(Enum): class InstructionPrompts(Enum): - SYS_1 = "SYS_1" INSTR_SWEETP_1 = "INSTR_SWEETP_1" diff --git a/tests/test_main.py b/tests/test_main.py new file mode 100644 index 0000000..e2d12d3 --- /dev/null +++ b/tests/test_main.py @@ -0,0 +1,15 @@ +from pathlib import Path + +from autora.doc.pipelines.main import predict +from autora.doc.runtime.prompts import InstructionPrompts, SystemPrompts + +# dummy HF model for testing +TEST_HF_MODEL = "hf-internal-testing/tiny-random-FalconForCausalLM" + + +def test_predict() -> None: + data = Path(__file__).parent.joinpath("../data/data.jsonl").resolve() + outputs = predict(str(data), TEST_HF_MODEL, SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1) + assert len(outputs) == 3, "Expected 3 outputs" + for output in outputs: + assert len(output) > 0, "Expected non-empty output" diff --git a/tests/test.py b/tests/test_predict_hf.py similarity index 100% rename from tests/test.py rename to tests/test_predict_hf.py From 424f8b6cd41cf48b40a44eb34e6b1454c3c817df Mon Sep 17 00:00:00 2001 From: Carlos Garcia Jurado Suarez Date: Tue, 5 Dec 2023 16:58:23 -0800 Subject: [PATCH 3/5] feat: Generate command --- README.md | 2 +- azureml/{predict.yml => eval.yml} | 2 +- azureml/generate.yml | 18 ++++++++++++++++++ src/autora/doc/pipelines/main.py | 25 +++++++++++++++++++++---- src/autora/doc/runtime/predict_hf.py | 2 +- src/autora/doc/runtime/prompts.py | 4 ++-- tests/test_main.py | 14 ++++++++++++-- 7 files changed, 56 insertions(+), 11 deletions(-) rename azureml/{predict.yml => eval.yml} (96%) create mode 100644 azureml/generate.yml diff --git a/README.md b/README.md index 2ff8a62..127c2f4 100644 --- a/README.md +++ b/README.md @@ -78,7 +78,7 @@ az storage blob upload --account-name --container > --file Prediction ```sh -az ml job create -f azureml/predict.yml --set display_name="Test prediction job" --web +az ml job create -f azureml/eval.yml --set display_name="Test prediction job" --web ``` Notes: diff --git a/azureml/predict.yml b/azureml/eval.yml similarity index 96% rename from azureml/predict.yml rename to azureml/eval.yml index d5410a2..a2f72b6 100644 --- a/azureml/predict.yml +++ b/azureml/eval.yml @@ -1,6 +1,6 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json command: > - python -m autora.doc.pipelines.main predict + python -m autora.doc.pipelines.main eval ${{inputs.data_dir}}/data.jsonl ${{inputs.model_dir}}/llama-2-7b-chat-hf SYS_1 diff --git a/azureml/generate.yml b/azureml/generate.yml new file mode 100644 index 0000000..c7df113 --- /dev/null +++ b/azureml/generate.yml @@ -0,0 +1,18 @@ +$schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json +command: > + python -m autora.doc.pipelines.main generate + --model-path ${{inputs.model_dir}}/llama-2-7b-chat-hf + --output ./outputs/output.txt + autora/doc/pipelines/main.py +code: ../src +inputs: + model_dir: + type: uri_folder + path: azureml://datastores/workspaceblobstore/paths/base_models +environment: + image: mcr.microsoft.com/azureml/curated/acpt-pytorch-2.0-cuda11.7:21 + conda_file: conda.yml +display_name: autodoc_prediction +compute: azureml:v100cluster +experiment_name: autodoc_prediction +description: | \ No newline at end of file diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py index aacb809..9c9aa4e 100644 --- a/src/autora/doc/pipelines/main.py +++ b/src/autora/doc/pipelines/main.py @@ -19,9 +19,7 @@ @app.command() -def predict( - data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: InstructionPrompts -) -> List[str]: +def eval(data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: InstructionPrompts) -> List[str]: run = mlflow.active_run() sys_prompt = SYS[sys_id] @@ -33,7 +31,6 @@ def predict( logger.info(f"running predict with {data_file}") logger.info(f"model path: {model_path}") - # predictions = [] with jsonlines.open(data_file) as reader: items = [item for item in reader] inputs = [item["instruction"] for item in items] @@ -57,6 +54,26 @@ def predict( return predictions +@app.command() +def generate( + python_file: str, + model_path: str = "meta-llama/llama-2-7b-chat-hf", + output: str = "output.txt", + sys_id: SystemPrompts = SystemPrompts.SYS_1, + instruc_id: InstructionPrompts = InstructionPrompts.INSTR_SWEETP_1, +) -> None: + with open(python_file, "r") as f: + inputs = [f.read()] + sys_prompt = SYS[sys_id] + instr_prompt = INSTR[instruc_id] + pred = Predictor(model_path) + predictions = pred.predict(sys_prompt, instr_prompt, inputs) + assert len(predictions) == 1, f"Expected only one output, got {len(predictions)}" + logger.info(f"Writing output to {output}") + with open(output, "w") as f: + f.write(predictions[0]) + + @app.command() def import_model(model_name: str) -> None: pass diff --git a/src/autora/doc/runtime/predict_hf.py b/src/autora/doc/runtime/predict_hf.py index cbde760..23c484e 100644 --- a/src/autora/doc/runtime/predict_hf.py +++ b/src/autora/doc/runtime/predict_hf.py @@ -39,7 +39,7 @@ def predict(self, sys: str, instr: str, inputs: List[str]) -> List[str]: top_k=40, num_return_sequences=1, eos_token_id=self.tokenizer.eos_token_id, - max_length=1000, + max_length=2048, ) results = [Predictor.trim_prompt(sequence[0]["generated_text"]) for sequence in sequences] diff --git a/src/autora/doc/runtime/prompts.py b/src/autora/doc/runtime/prompts.py index 19f905a..75019fc 100644 --- a/src/autora/doc/runtime/prompts.py +++ b/src/autora/doc/runtime/prompts.py @@ -24,11 +24,11 @@ paragraph should explain the purpose and the second one the procedure, but don't use the word 'Paragraph'""" -class SystemPrompts(Enum): +class SystemPrompts(str, Enum): SYS_1 = "SYS_1" -class InstructionPrompts(Enum): +class InstructionPrompts(str, Enum): INSTR_SWEETP_1 = "INSTR_SWEETP_1" diff --git a/tests/test_main.py b/tests/test_main.py index e2d12d3..3e67bab 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -1,6 +1,6 @@ from pathlib import Path -from autora.doc.pipelines.main import predict +from autora.doc.pipelines.main import eval, generate from autora.doc.runtime.prompts import InstructionPrompts, SystemPrompts # dummy HF model for testing @@ -9,7 +9,17 @@ def test_predict() -> None: data = Path(__file__).parent.joinpath("../data/data.jsonl").resolve() - outputs = predict(str(data), TEST_HF_MODEL, SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1) + outputs = eval(str(data), TEST_HF_MODEL, SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1) assert len(outputs) == 3, "Expected 3 outputs" for output in outputs: assert len(output) > 0, "Expected non-empty output" + + +def test_generate() -> None: + python_file = __file__ + output = Path("output.txt") + output.unlink(missing_ok=True) + generate(python_file, TEST_HF_MODEL, str(output), SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1) + assert output.exists(), f"Expected output file {output} to exist" + with open(str(output), "r") as f: + assert len(f.read()) > 0, f"Expected non-empty output file {output}" From 4b40f3476d68597808281e3cb072968fcbc37718 Mon Sep 17 00:00:00 2001 From: Carlos Garcia Jurado Suarez Date: Wed, 6 Dec 2023 13:06:27 -0800 Subject: [PATCH 4/5] fix dependencies --- pyproject.toml | 12 ++++++------ src/autora/doc/pipelines/main.py | 8 +++++--- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 97c9c31..235a81a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,6 @@ classifiers = [ ] dynamic = ["version"] dependencies = [ - "transformers>=4.35.2", "typer", "scipy", # This works, while installing from pytorch and cuda from conda does not", @@ -42,17 +41,18 @@ dev = [ "nbsphinx", # Used to integrate Python notebooks into Sphinx documentation "ipython", # Also used in building notebooks into Sphinx "matplotlib", # Used in sample notebook intro_notebook.ipynb - "numpy", # Used in sample notebook intro_notebook.ipynb "ipykernel", ] train = [ + "jsonlines", "mlflow", - "azureml-mlflow", +] +azure = [ "azureml-core", - "jsonlines", + "azureml-mlflow", ] - -train_cuda = [ +cuda = [ + "transformers>=4.35.2", "bitsandbytes>=0.41.2.post2", "accelerate>=0.24.1", "xformers", diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py index 9c9aa4e..de7e906 100644 --- a/src/autora/doc/pipelines/main.py +++ b/src/autora/doc/pipelines/main.py @@ -2,8 +2,6 @@ from timeit import default_timer as timer from typing import List -import jsonlines -import mlflow import torch import typer @@ -20,6 +18,11 @@ @app.command() def eval(data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: InstructionPrompts) -> List[str]: + import jsonlines + import mlflow + + mlflow.autolog() + run = mlflow.active_run() sys_prompt = SYS[sys_id] @@ -82,5 +85,4 @@ def import_model(model_name: str) -> None: if __name__ == "__main__": logger.info(f"Torch version: {torch.__version__} , Cuda available: {torch.cuda.is_available()}") - mlflow.autolog() app() From 58efb38a45a8960a2232b59ca4687b325b3e63cc Mon Sep 17 00:00:00 2001 From: Carlos Garcia Jurado Suarez Date: Wed, 6 Dec 2023 15:05:09 -0800 Subject: [PATCH 5/5] fix transformer dependency --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 235a81a..422c8ff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,6 +20,7 @@ dependencies = [ "scipy", # This works, while installing from pytorch and cuda from conda does not", "torch==2.0.1", + "transformers>=4.35.2", ] # On a mac, install optional dependencies with `pip install '.[dev]'` (include the single quotes) @@ -52,7 +53,6 @@ azure = [ "azureml-mlflow", ] cuda = [ - "transformers>=4.35.2", "bitsandbytes>=0.41.2.post2", "accelerate>=0.24.1", "xformers",