From aa1b494d33358bc311e02c00f43d3b549c2daf36 Mon Sep 17 00:00:00 2001 From: Carlos Garcia Jurado Suarez Date: Fri, 8 Dec 2023 21:49:54 +0000 Subject: [PATCH 1/5] Add arguments for model parameters --- notebooks/generate.ipynb | 165 +++++++++++++++++++++++++++ src/autora/doc/runtime/predict_hf.py | 15 ++- 2 files changed, 172 insertions(+), 8 deletions(-) create mode 100644 notebooks/generate.ipynb diff --git a/notebooks/generate.ipynb b/notebooks/generate.ipynb new file mode 100644 index 0000000..5260a53 --- /dev/null +++ b/notebooks/generate.ipynb @@ -0,0 +1,165 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2\n", + "from autora.doc.runtime.predict_hf import Predictor\n", + "from autora.doc.runtime.prompts import INSTR, SYS, InstructionPrompts, SystemPrompts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# model = \"../../models\" # if model has been previously downloaded via huggingface-cli\n", + "model = \"meta-llama/Llama-2-7b-chat-hf\"\n", + "pred = Predictor(model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# The following prompt uses an example (code, doc) to specify the desired behavior\n", + "EX_CODE=\"\"\"\n", + "from sweetpea import *\n", + "\n", + "color = Factor('color', ['red', 'green', 'blue', 'yellow'])\n", + "word = Factor('word', ['red', 'green', 'blue', 'yellow'])\n", + "\n", + "def is_congruent(word, color):\n", + " return (word == color)\n", + "\n", + "def is_not_congruent(word, color):\n", + " return not is_congruent(word, color)\n", + "\n", + "congruent = DerivedLevel('congruent', WithinTrial(is_congruent, [word, color]))\n", + "incongruent = DerivedLevel('incongruent', WithinTrial(is_not_congruent, [word, color]))\n", + "\n", + "congruency = Factor('congruency', [congruent, incongruent])\n", + "\n", + "constraints = [MinimumTrials(48)]\n", + "design = [word, color, congruency]\n", + "crossing = [word, congruency]\n", + "\n", + "block = CrossBlock(design, crossing, constraints)\n", + "\n", + "experiment = synthesize_trials(block, 1)\n", + "\n", + "save_experiments_csv(block, experiment, 'code_1_sequences/seq')\n", + "\"\"\"\n", + "\n", + "EX_DOC=\"\"\"There are two regular factors: color and word. The color factor consists of four levels: \"red\", \"green\", \"blue\", and \"yellow\". \n", + "The word factor also consists of the four levels: \"red\", \"green\", \"blue\", and \"yellow\". There is another derived factor referred to as congruency. \n", + "The congruency factor depends on the regular factors word and color and has two levels: \"congruent\" and \"incongruent\".\n", + "A trial is considered \"congruent\" if the word matches the color, otherwise, it is considered \"incongruent\". We counterbalanced the word factor with the congruency factor. \n", + "All experiment sequences contained at least 48 trials.\"\"\"\n", + "\n", + "TEST_CODE=\"\"\"\n", + "from sweetpea import *\n", + "from sweetpea.primitives import *\n", + "\n", + "number_list = [125, 132, 139, 146, 160, 167, 174, 181]\n", + "letter_list = ['b', 'd', 'f', 'h', 's', 'u', 'w', 'y']\n", + "\n", + "number = Factor(\"number\", number_list)\n", + "letter = Factor(\"letter\", letter_list)\n", + "task = Factor(\"task\", [\"number task\", \"letter task\", \"free choice task\"])\n", + "\n", + "\n", + "def is_forced_trial_switch(task):\n", + " return (task[-1] == \"number task\" and task[0] == \"letter task\") or \\\n", + " (task[-1] == \"letter task\" and task[0] == \"number task\")\n", + "\n", + "\n", + "def is_forced_trial_repeat(task):\n", + " return (task[-1] == \"number task\" and task[0] == \"number task\") or \\\n", + " (task[-1] == \"letter task\" and task[0] == \"letter task\")\n", + "\n", + "\n", + "def is_free_trial_transition(task):\n", + " return task[-1] != \"free choice task\" and task[0] == \"free choice task\"\n", + "\n", + "\n", + "def is_free_trial_repeat(task):\n", + " return task[-1] == \"free choice task\" and task[0] == \"free choice task\"\n", + "\n", + "\n", + "def is_not_relevant_transition(task):\n", + " return not (is_forced_trial_repeat(task) or is_forced_trial_switch(task) or is_free_trial_repeat(\n", + " task) or is_free_trial_transition(task))\n", + "\n", + "\n", + "transit = Factor(\"task transition\", [\n", + " DerivedLevel(\"forced switch\", transition(is_forced_trial_switch, [task]), 3),\n", + " DerivedLevel(\"forced repeat\", transition(is_forced_trial_repeat, [task])),\n", + " DerivedLevel(\"free transition\", transition(is_free_trial_transition, [task]), 4),\n", + " DerivedLevel(\"free repeat\", transition(is_free_trial_repeat, [task]), 4),\n", + " DerivedLevel(\"forced first\", transition(is_not_relevant_transition, [task]), 4)\n", + "])\n", + "design = [letter, number, task, transit]\n", + "crossing = [[letter], [number], [transit]]\n", + "constraints = [MinimumTrials(256)]\n", + "\n", + "block = MultiCrossBlock(design, crossing, constraints)\n", + "\n", + "experiment = synthesize_trials(block, 1)\n", + "\n", + "save_experiments_csv(block, experiment, 'code_1_sequences/seq')\n", + "\"\"\"\n", + "\n", + "PROMPT=f\"\"\"Consider the following experiment code:\n", + "---\n", + "{EX_CODE}\n", + "---\n", + "Here's a a good English description:\n", + "---\n", + "{EX_DOC}\n", + "---\n", + "Using the same style, please generate a high-level one paragraph description for the following experiment code:\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "output = pred.predict(SYS[SystemPrompts.SYS_1], PROMPT, [TEST_CODE], temperature=0.05, top_k=10, num_ret_seq=3)[0]\n", + "for i,o in enumerate(output):\n", + " print(f\"******** Output {i} ********\\n{o}*************\\n\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "autodoc", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/autora/doc/runtime/predict_hf.py b/src/autora/doc/runtime/predict_hf.py index 23c484e..49059cd 100644 --- a/src/autora/doc/runtime/predict_hf.py +++ b/src/autora/doc/runtime/predict_hf.py @@ -27,22 +27,21 @@ def __init__(self, model_path: str): tokenizer=self.tokenizer, ) - def predict(self, sys: str, instr: str, inputs: List[str]) -> List[str]: + def predict(self, sys: str, instr: str, inputs: List[str], temperature=0.6, top_p=0.95, top_k=40, max_length=2048, num_ret_seq=1) -> List[List[str]]: logger.info(f"Generating {len(inputs)} predictions") prompts = [TEMP_LLAMA2.format(sys=sys, instr=instr, input=input) for input in inputs] - # TODO: Make these parameters configurable sequences = self.pipeline( prompts, do_sample=True, - temperature=0.6, - top_p=0.95, - top_k=40, - num_return_sequences=1, + temperature=temperature, + top_p=top_p, + top_k=top_k, + num_return_sequences=num_ret_seq, eos_token_id=self.tokenizer.eos_token_id, - max_length=2048, + max_length=max_length, ) - results = [Predictor.trim_prompt(sequence[0]["generated_text"]) for sequence in sequences] + results = [[Predictor.trim_prompt(seq["generated_text"]) for seq in sequence] for sequence in sequences] logger.info(f"Generated {len(results)} results") return results From e3c004a540ae25e8c65436343acfd5bf167f6e5d Mon Sep 17 00:00:00 2001 From: Carlos Garcia Jurado Suarez Date: Fri, 8 Dec 2023 15:24:17 -0800 Subject: [PATCH 2/5] Surface inference parameters to the CLI and jobs --- README.md | 2 + azureml/conda.yml | 2 +- azureml/eval.yml | 16 ++++++-- azureml/generate.yml | 12 +++++- notebooks/generate.ipynb | 59 +++++----------------------- pyproject.toml | 2 +- src/autora/doc/pipelines/main.py | 49 +++++++++++++++++------ src/autora/doc/runtime/predict_hf.py | 27 ++++++++++--- src/autora/doc/runtime/prompts.py | 53 ++++++++++++++++++++++++- tests/test_main.py | 8 ++-- 10 files changed, 152 insertions(+), 78 deletions(-) diff --git a/README.md b/README.md index 127c2f4..6eed825 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # AutoDoc +[![ssec](https://img.shields.io/badge/SSEC-Project-purple?logo=&style=plastic)](https://escience.washington.edu/software-engineering/ssec/) + [![Template](https://img.shields.io/badge/Template-LINCC%20Frameworks%20Python%20Project%20Template-brightgreen)](https://lincc-ppt.readthedocs.io/en/latest/) [![PyPI](https://img.shields.io/pypi/v/autora-doc?color=blue&logo=pypi&logoColor=white)](https://pypi.org/project/autora-doc/) diff --git a/azureml/conda.yml b/azureml/conda.yml index f772397..ce84fc2 100644 --- a/azureml/conda.yml +++ b/azureml/conda.yml @@ -15,4 +15,4 @@ dependencies: - xformers - scipy # This works, while installing from pytorch and cuda from conda does not - - torch==2.0.1 \ No newline at end of file + - torch==2.1.0 \ No newline at end of file diff --git a/azureml/eval.yml b/azureml/eval.yml index a2f72b6..e64cda2 100644 --- a/azureml/eval.yml +++ b/azureml/eval.yml @@ -2,9 +2,12 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json command: > python -m autora.doc.pipelines.main eval ${{inputs.data_dir}}/data.jsonl - ${{inputs.model_dir}}/llama-2-7b-chat-hf - SYS_1 - INSTR_SWEETP_1 + --model-path ${{inputs.model_dir}}/llama-2-7b-chat-hf + --sys-id ${{inputs.sys_id}} + --instruc-id ${{inputs.instruc_id}} + --param temperature=${{inputs.temperature}} + --param top_k=${{inputs.top_k}} + --param top_p=${{inputs.top_p}} code: ../src inputs: data_dir: @@ -13,6 +16,11 @@ inputs: model_dir: type: uri_folder path: azureml://datastores/workspaceblobstore/paths/base_models + temperature: 0.7 + top_p: 0.95 + top_k: 40 + sys_id: SYS_1 + instruc_id: INSTR_SWEETP_1 # using a curated environment doesn't work because we need additional packages environment: # azureml://registries/azureml/environments/acpt-pytorch-2.0-cuda11.7/versions/21 image: mcr.microsoft.com/azureml/curated/acpt-pytorch-2.0-cuda11.7:21 @@ -26,5 +34,5 @@ environment: # azureml://registries/azureml/environments/acpt-pytorch-2.0-cuda11 conda_file: conda.yml display_name: autodoc_prediction compute: azureml:v100cluster -experiment_name: autodoc_prediction +experiment_name: evaluation description: | \ No newline at end of file diff --git a/azureml/generate.yml b/azureml/generate.yml index c7df113..28d3208 100644 --- a/azureml/generate.yml +++ b/azureml/generate.yml @@ -3,16 +3,26 @@ command: > python -m autora.doc.pipelines.main generate --model-path ${{inputs.model_dir}}/llama-2-7b-chat-hf --output ./outputs/output.txt + --sys-id ${{inputs.sys_id}} + --instruc-id ${{inputs.instruc_id}} + --param temperature=${{inputs.temperature}} + --param top_k=${{inputs.top_k}} + --param top_p=${{inputs.top_p}} autora/doc/pipelines/main.py code: ../src inputs: model_dir: type: uri_folder path: azureml://datastores/workspaceblobstore/paths/base_models + temperature: 0.7 + top_p: 0.95 + top_k: 40 + sys_id: SYS_1 + instruc_id: INSTR_SWEETP_1 environment: image: mcr.microsoft.com/azureml/curated/acpt-pytorch-2.0-cuda11.7:21 conda_file: conda.yml display_name: autodoc_prediction compute: azureml:v100cluster -experiment_name: autodoc_prediction +experiment_name: prediction description: | \ No newline at end of file diff --git a/notebooks/generate.ipynb b/notebooks/generate.ipynb index 5260a53..06b3683 100644 --- a/notebooks/generate.ipynb +++ b/notebooks/generate.ipynb @@ -29,42 +29,7 @@ "metadata": {}, "outputs": [], "source": [ - "# The following prompt uses an example (code, doc) to specify the desired behavior\n", - "EX_CODE=\"\"\"\n", - "from sweetpea import *\n", - "\n", - "color = Factor('color', ['red', 'green', 'blue', 'yellow'])\n", - "word = Factor('word', ['red', 'green', 'blue', 'yellow'])\n", - "\n", - "def is_congruent(word, color):\n", - " return (word == color)\n", - "\n", - "def is_not_congruent(word, color):\n", - " return not is_congruent(word, color)\n", - "\n", - "congruent = DerivedLevel('congruent', WithinTrial(is_congruent, [word, color]))\n", - "incongruent = DerivedLevel('incongruent', WithinTrial(is_not_congruent, [word, color]))\n", - "\n", - "congruency = Factor('congruency', [congruent, incongruent])\n", - "\n", - "constraints = [MinimumTrials(48)]\n", - "design = [word, color, congruency]\n", - "crossing = [word, congruency]\n", - "\n", - "block = CrossBlock(design, crossing, constraints)\n", - "\n", - "experiment = synthesize_trials(block, 1)\n", - "\n", - "save_experiments_csv(block, experiment, 'code_1_sequences/seq')\n", - "\"\"\"\n", - "\n", - "EX_DOC=\"\"\"There are two regular factors: color and word. The color factor consists of four levels: \"red\", \"green\", \"blue\", and \"yellow\". \n", - "The word factor also consists of the four levels: \"red\", \"green\", \"blue\", and \"yellow\". There is another derived factor referred to as congruency. \n", - "The congruency factor depends on the regular factors word and color and has two levels: \"congruent\" and \"incongruent\".\n", - "A trial is considered \"congruent\" if the word matches the color, otherwise, it is considered \"incongruent\". We counterbalanced the word factor with the congruency factor. \n", - "All experiment sequences contained at least 48 trials.\"\"\"\n", - "\n", - "TEST_CODE=\"\"\"\n", + "TEST_CODE = \"\"\"\n", "from sweetpea import *\n", "from sweetpea.primitives import *\n", "\n", @@ -115,17 +80,6 @@ "experiment = synthesize_trials(block, 1)\n", "\n", "save_experiments_csv(block, experiment, 'code_1_sequences/seq')\n", - "\"\"\"\n", - "\n", - "PROMPT=f\"\"\"Consider the following experiment code:\n", - "---\n", - "{EX_CODE}\n", - "---\n", - "Here's a a good English description:\n", - "---\n", - "{EX_DOC}\n", - "---\n", - "Using the same style, please generate a high-level one paragraph description for the following experiment code:\n", "\"\"\"" ] }, @@ -135,8 +89,15 @@ "metadata": {}, "outputs": [], "source": [ - "output = pred.predict(SYS[SystemPrompts.SYS_1], PROMPT, [TEST_CODE], temperature=0.05, top_k=10, num_ret_seq=3)[0]\n", - "for i,o in enumerate(output):\n", + "output = pred.predict(\n", + " SYS[SystemPrompts.SYS_1],\n", + " INSTR[InstructionPrompts.INSTR_SWEETP_EXAMPLE],\n", + " [TEST_CODE],\n", + " temperature=0.05,\n", + " top_k=10,\n", + " num_ret_seq=3,\n", + ")[0]\n", + "for i, o in enumerate(output):\n", " print(f\"******** Output {i} ********\\n{o}*************\\n\")" ] } diff --git a/pyproject.toml b/pyproject.toml index 422c8ff..afb5a23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ dependencies = [ "typer", "scipy", # This works, while installing from pytorch and cuda from conda does not", - "torch==2.0.1", + "torch==2.1.0", "transformers>=4.35.2", ] diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py index de7e906..e797ce1 100644 --- a/src/autora/doc/pipelines/main.py +++ b/src/autora/doc/pipelines/main.py @@ -16,13 +16,24 @@ logger = logging.getLogger(__name__) -@app.command() -def eval(data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: InstructionPrompts) -> List[str]: +@app.command(help="Evaluate model on a data file") +def eval( + data_file: str = typer.Argument(..., help="JSONL Data file to evaluate on"), + model_path: str = typer.Option("meta-llama/Llama-2-7b-chat-hf", help="Path to HF model"), + sys_id: SystemPrompts = typer.Option(SystemPrompts.SYS_1, help="System prompt ID"), + instruc_id: InstructionPrompts = typer.Option( + InstructionPrompts.INSTR_SWEETP_1, help="Instruction prompt ID" + ), + param: List[str] = typer.Option( + [], help="Additional float parameters to pass to the model as name=float pairs" + ), +) -> List[List[str]]: import jsonlines import mlflow mlflow.autolog() + param_dict = {pair[0]: float(pair[1]) for pair in [pair.split("=") for pair in param]} run = mlflow.active_run() sys_prompt = SYS[sys_id] @@ -33,6 +44,7 @@ def eval(data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: Ins logger.info(f"Active run_id: {run.info.run_id}") logger.info(f"running predict with {data_file}") logger.info(f"model path: {model_path}") + mlflow.log_params(param_dict) with jsonlines.open(data_file) as reader: items = [item for item in reader] @@ -41,16 +53,19 @@ def eval(data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: Ins pred = Predictor(model_path) timer_start = timer() - predictions = pred.predict(sys_prompt, instr_prompt, inputs) + predictions = pred.predict(sys_prompt, instr_prompt, inputs, **param_dict) timer_end = timer() pred_time = timer_end - timer_start mlflow.log_metric("prediction_time/doc", pred_time / (len(inputs))) for i in range(len(inputs)): mlflow.log_text(labels[i], f"label_{i}.txt") mlflow.log_text(inputs[i], f"input_{i}.py") - mlflow.log_text(predictions[i], f"prediction_{i}.txt") + for j in range(len(predictions[i])): + mlflow.log_text(predictions[i][j], f"prediction_{i}_{j}.txt") - tokens = pred.tokenize(predictions)["input_ids"] + # flatten predictions for counting tokens + predictions_flat = [pred for pred_list in predictions for pred in pred_list] + tokens = pred.tokenize(predictions_flat)["input_ids"] total_tokens = sum([len(token) for token in tokens]) mlflow.log_metric("total_tokens", total_tokens) mlflow.log_metric("tokens/sec", total_tokens / pred_time) @@ -59,18 +74,28 @@ def eval(data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: Ins @app.command() def generate( - python_file: str, - model_path: str = "meta-llama/llama-2-7b-chat-hf", - output: str = "output.txt", - sys_id: SystemPrompts = SystemPrompts.SYS_1, - instruc_id: InstructionPrompts = InstructionPrompts.INSTR_SWEETP_1, + python_file: str = typer.Argument(..., help="Python file to generate documentation for"), + model_path: str = typer.Option("meta-llama/Llama-2-7b-chat-hf", help="Path to HF model"), + output: str = typer.Option("output.txt", help="Output file"), + sys_id: SystemPrompts = typer.Option(SystemPrompts.SYS_1, help="System prompt ID"), + instruc_id: InstructionPrompts = typer.Option( + InstructionPrompts.INSTR_SWEETP_1, help="Instruction prompt ID" + ), + param: List[str] = typer.Option( + [], help="Additional float parameters to pass to the model as name=float pairs" + ), ) -> None: + param_dict = {pair[0]: float(pair[1]) for pair in [pair.split("=") for pair in param]} + """ + Generate documentation from python file + """ with open(python_file, "r") as f: - inputs = [f.read()] + input = f.read() sys_prompt = SYS[sys_id] instr_prompt = INSTR[instruc_id] pred = Predictor(model_path) - predictions = pred.predict(sys_prompt, instr_prompt, inputs) + # grab first result since we only passed one input + predictions = pred.predict(sys_prompt, instr_prompt, [input], **param_dict)[0] assert len(predictions) == 1, f"Expected only one output, got {len(predictions)}" logger.info(f"Writing output to {output}") with open(output, "w") as f: diff --git a/src/autora/doc/runtime/predict_hf.py b/src/autora/doc/runtime/predict_hf.py index 49059cd..307c99e 100644 --- a/src/autora/doc/runtime/predict_hf.py +++ b/src/autora/doc/runtime/predict_hf.py @@ -27,21 +27,36 @@ def __init__(self, model_path: str): tokenizer=self.tokenizer, ) - def predict(self, sys: str, instr: str, inputs: List[str], temperature=0.6, top_p=0.95, top_k=40, max_length=2048, num_ret_seq=1) -> List[List[str]]: - logger.info(f"Generating {len(inputs)} predictions") + def predict( + self, + sys: str, + instr: str, + inputs: List[str], + temperature: float = 0.6, + top_p: float = 0.95, + top_k: float = 40, + max_length: float = 2048, + num_ret_seq: float = 1, + ) -> List[List[str]]: + logger.info( + f"Generating {len(inputs)} predictions. Temperature: {temperature}, top_p: {top_p}, top_k: {top_k}, " + f"max_length: {max_length}" + ) prompts = [TEMP_LLAMA2.format(sys=sys, instr=instr, input=input) for input in inputs] sequences = self.pipeline( prompts, do_sample=True, temperature=temperature, top_p=top_p, - top_k=top_k, - num_return_sequences=num_ret_seq, + top_k=int(top_k), + num_return_sequences=int(num_ret_seq), eos_token_id=self.tokenizer.eos_token_id, - max_length=max_length, + max_length=int(max_length), ) - results = [[Predictor.trim_prompt(seq["generated_text"]) for seq in sequence] for sequence in sequences] + results = [ + [Predictor.trim_prompt(seq["generated_text"]) for seq in sequence] for sequence in sequences + ] logger.info(f"Generated {len(results)} results") return results diff --git a/src/autora/doc/runtime/prompts.py b/src/autora/doc/runtime/prompts.py index 75019fc..4480fb8 100644 --- a/src/autora/doc/runtime/prompts.py +++ b/src/autora/doc/runtime/prompts.py @@ -23,6 +23,53 @@ INSTR_SWEETP_1 = """Please generate high-level two paragraph documentation for the following experiment. The first paragraph should explain the purpose and the second one the procedure, but don't use the word 'Paragraph'""" +# The following prompt uses an example (code, doc) to specify the desired behavior +EX_CODE = """ +from sweetpea import * + +color = Factor('color', ['red', 'green', 'blue', 'yellow']) +word = Factor('word', ['red', 'green', 'blue', 'yellow']) + +def is_congruent(word, color): + return (word == color) + +def is_not_congruent(word, color): + return not is_congruent(word, color) + +congruent = DerivedLevel('congruent', WithinTrial(is_congruent, [word, color])) +incongruent = DerivedLevel('incongruent', WithinTrial(is_not_congruent, [word, color])) + +congruency = Factor('congruency', [congruent, incongruent]) + +constraints = [MinimumTrials(48)] +design = [word, color, congruency] +crossing = [word, congruency] + +block = CrossBlock(design, crossing, constraints) + +experiment = synthesize_trials(block, 1) + +save_experiments_csv(block, experiment, 'code_1_sequences/seq') +""" + +EX_DOC = """There are two regular factors: color and word. The color factor consists of four levels: "red", "green", +"blue", and "yellow". The word factor also consists of the four levels: "red", "green", "blue", and "yellow". +There is another derived factor referred to as congruency. The congruency factor depends on the regular factors word +and color and has two levels: "congruent" and "incongruent". A trial is considered "congruent" if the word matches +the color, otherwise, it is considered "incongruent". We counterbalanced the word factor with the congruency factor. +All experiment sequences contained at least 48 trials.""" + +INSTR_SWEETP_EXAMPLE = f"""Consider the following experiment code: +--- +{EX_CODE} +--- +Here's a a good English description: +--- +{EX_DOC} +--- +Using the same style, please generate a high-level one paragraph description for the following experiment code: +""" + class SystemPrompts(str, Enum): SYS_1 = "SYS_1" @@ -30,7 +77,11 @@ class SystemPrompts(str, Enum): class InstructionPrompts(str, Enum): INSTR_SWEETP_1 = "INSTR_SWEETP_1" + INSTR_SWEETP_EXAMPLE = "INSTR_SWEETP_EXAMPLE" SYS = {SystemPrompts.SYS_1: SYS_1} -INSTR = {InstructionPrompts.INSTR_SWEETP_1: INSTR_SWEETP_1} +INSTR = { + InstructionPrompts.INSTR_SWEETP_1: INSTR_SWEETP_1, + InstructionPrompts.INSTR_SWEETP_EXAMPLE: INSTR_SWEETP_EXAMPLE, +} diff --git a/tests/test_main.py b/tests/test_main.py index 3e67bab..097e8c7 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -9,17 +9,19 @@ def test_predict() -> None: data = Path(__file__).parent.joinpath("../data/data.jsonl").resolve() - outputs = eval(str(data), TEST_HF_MODEL, SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1) + outputs = eval(str(data), TEST_HF_MODEL, SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1, []) assert len(outputs) == 3, "Expected 3 outputs" for output in outputs: - assert len(output) > 0, "Expected non-empty output" + assert len(output[0]) > 0, "Expected non-empty output" def test_generate() -> None: python_file = __file__ output = Path("output.txt") output.unlink(missing_ok=True) - generate(python_file, TEST_HF_MODEL, str(output), SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1) + generate( + python_file, TEST_HF_MODEL, str(output), SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1, [] + ) assert output.exists(), f"Expected output file {output} to exist" with open(str(output), "r") as f: assert len(f.read()) > 0, f"Expected non-empty output file {output}" From c5cd8d6dc0a0a2ecc69e349cd7a1d07e2a4972a5 Mon Sep 17 00:00:00 2001 From: Carlos Garcia Jurado Suarez Date: Mon, 11 Dec 2023 09:06:14 -0800 Subject: [PATCH 3/5] Back to torch 2.0.1 --- azureml/conda.yml | 2 +- azureml/eval.yml | 2 +- azureml/generate.yml | 2 +- pyproject.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/azureml/conda.yml b/azureml/conda.yml index ce84fc2..f772397 100644 --- a/azureml/conda.yml +++ b/azureml/conda.yml @@ -15,4 +15,4 @@ dependencies: - xformers - scipy # This works, while installing from pytorch and cuda from conda does not - - torch==2.1.0 \ No newline at end of file + - torch==2.0.1 \ No newline at end of file diff --git a/azureml/eval.yml b/azureml/eval.yml index e64cda2..ea6953b 100644 --- a/azureml/eval.yml +++ b/azureml/eval.yml @@ -33,6 +33,6 @@ environment: # azureml://registries/azureml/environments/acpt-pytorch-2.0-cuda11 # image: nvcr.io/nvidia/pytorch:23.10-py3 conda_file: conda.yml display_name: autodoc_prediction -compute: azureml:v100cluster +compute: azureml:t4cluster experiment_name: evaluation description: | \ No newline at end of file diff --git a/azureml/generate.yml b/azureml/generate.yml index 28d3208..d849fcd 100644 --- a/azureml/generate.yml +++ b/azureml/generate.yml @@ -23,6 +23,6 @@ environment: image: mcr.microsoft.com/azureml/curated/acpt-pytorch-2.0-cuda11.7:21 conda_file: conda.yml display_name: autodoc_prediction -compute: azureml:v100cluster +compute: azureml:t4cluster experiment_name: prediction description: | \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index afb5a23..422c8ff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ dependencies = [ "typer", "scipy", # This works, while installing from pytorch and cuda from conda does not", - "torch==2.1.0", + "torch==2.0.1", "transformers>=4.35.2", ] From 5e4db23a7c9421a21fe96ea9f6209c83d4cc350c Mon Sep 17 00:00:00 2001 From: Carlos Garcia Jurado Suarez Date: Tue, 12 Dec 2023 11:28:57 -0800 Subject: [PATCH 4/5] comment out unused badges --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6eed825..e2e66ad 100644 --- a/README.md +++ b/README.md @@ -4,12 +4,12 @@ [![Template](https://img.shields.io/badge/Template-LINCC%20Frameworks%20Python%20Project%20Template-brightgreen)](https://lincc-ppt.readthedocs.io/en/latest/) -[![PyPI](https://img.shields.io/pypi/v/autora-doc?color=blue&logo=pypi&logoColor=white)](https://pypi.org/project/autora-doc/) + [![GitHub Workflow Status](https://github.com/autoresearch/autodoc/actions/workflows/smoke-test.yml/badge.svg)](https://github.com/AutoResearch/autodoc/actions/workflows/smoke-test.yml) [![codecov](https://codecov.io/gh/AutoResearch/autodoc/branch/main/graph/badge.svg)](https://codecov.io/gh/AutoResearch/autodoc) -[![Read the Docs](https://img.shields.io/readthedocs/autora-doc)](https://autora-doc.readthedocs.io/) + This project was automatically generated using the LINCC-Frameworks [python-project-template](https://github.com/lincc-frameworks/python-project-template). For more information about the project template see the From 5231456a3ef834e3585ceec55d51218c71a531d3 Mon Sep 17 00:00:00 2001 From: Carlos Garcia Jurado Suarez Date: Tue, 12 Dec 2023 11:39:08 -0800 Subject: [PATCH 5/5] cr feedback to use itertools to flatten --- src/autora/doc/pipelines/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py index e797ce1..5afc6bf 100644 --- a/src/autora/doc/pipelines/main.py +++ b/src/autora/doc/pipelines/main.py @@ -1,3 +1,4 @@ +import itertools import logging from timeit import default_timer as timer from typing import List @@ -64,7 +65,7 @@ def eval( mlflow.log_text(predictions[i][j], f"prediction_{i}_{j}.txt") # flatten predictions for counting tokens - predictions_flat = [pred for pred_list in predictions for pred in pred_list] + predictions_flat = list(itertools.chain.from_iterable(predictions)) tokens = pred.tokenize(predictions_flat)["input_ids"] total_tokens = sum([len(token) for token in tokens]) mlflow.log_metric("total_tokens", total_tokens)