-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #5 from AutoResearch/carlosg/prompts
refactor: Refactor prompts and add tests
- Loading branch information
Showing
11 changed files
with
180 additions
and
82 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
$schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json | ||
command: > | ||
python -m autora.doc.pipelines.main generate | ||
--model-path ${{inputs.model_dir}}/llama-2-7b-chat-hf | ||
--output ./outputs/output.txt | ||
autora/doc/pipelines/main.py | ||
code: ../src | ||
inputs: | ||
model_dir: | ||
type: uri_folder | ||
path: azureml://datastores/workspaceblobstore/paths/base_models | ||
environment: | ||
image: mcr.microsoft.com/azureml/curated/acpt-pytorch-2.0-cuda11.7:21 | ||
conda_file: conda.yml | ||
display_name: autodoc_prediction | ||
compute: azureml:v100cluster | ||
experiment_name: autodoc_prediction | ||
description: | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
from enum import Enum | ||
|
||
LLAMA2_INST_CLOSE = "[/INST]\n" | ||
|
||
# Standard Llama2 template | ||
TEMP_LLAMA2 = """ | ||
[INST]<<SYS>> | ||
{sys} | ||
{instr} | ||
{input} | ||
[/INST] | ||
""" | ||
|
||
|
||
SYS_1 = """You are a technical documentation writer. You always write clear, concise, and accurate documentation for | ||
scientific experiments. Your documentation focuses on the experiment's purpose, procedure, and results. Therefore, | ||
details about specific python functions, packages, or libraries are not necessary. Your readers are experimental | ||
scientists. | ||
""" | ||
|
||
INSTR_SWEETP_1 = """Please generate high-level two paragraph documentation for the following experiment. The first | ||
paragraph should explain the purpose and the second one the procedure, but don't use the word 'Paragraph'""" | ||
|
||
|
||
class SystemPrompts(str, Enum): | ||
SYS_1 = "SYS_1" | ||
|
||
|
||
class InstructionPrompts(str, Enum): | ||
INSTR_SWEETP_1 = "INSTR_SWEETP_1" | ||
|
||
|
||
SYS = {SystemPrompts.SYS_1: SYS_1} | ||
INSTR = {InstructionPrompts.INSTR_SWEETP_1: INSTR_SWEETP_1} |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
from pathlib import Path | ||
|
||
from autora.doc.pipelines.main import eval, generate | ||
from autora.doc.runtime.prompts import InstructionPrompts, SystemPrompts | ||
|
||
# dummy HF model for testing | ||
TEST_HF_MODEL = "hf-internal-testing/tiny-random-FalconForCausalLM" | ||
|
||
|
||
def test_predict() -> None: | ||
data = Path(__file__).parent.joinpath("../data/data.jsonl").resolve() | ||
outputs = eval(str(data), TEST_HF_MODEL, SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1) | ||
assert len(outputs) == 3, "Expected 3 outputs" | ||
for output in outputs: | ||
assert len(output) > 0, "Expected non-empty output" | ||
|
||
|
||
def test_generate() -> None: | ||
python_file = __file__ | ||
output = Path("output.txt") | ||
output.unlink(missing_ok=True) | ||
generate(python_file, TEST_HF_MODEL, str(output), SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1) | ||
assert output.exists(), f"Expected output file {output} to exist" | ||
with open(str(output), "r") as f: | ||
assert len(f.read()) > 0, f"Expected non-empty output file {output}" |
Oops, something went wrong.