From aa1b494d33358bc311e02c00f43d3b549c2daf36 Mon Sep 17 00:00:00 2001
From: Carlos Garcia Jurado Suarez <carlosgjs@live.com>
Date: Fri, 8 Dec 2023 21:49:54 +0000
Subject: [PATCH 1/5] Add arguments for model parameters

---
 notebooks/generate.ipynb             | 165 +++++++++++++++++++++++++++
 src/autora/doc/runtime/predict_hf.py |  15 ++-
 2 files changed, 172 insertions(+), 8 deletions(-)
 create mode 100644 notebooks/generate.ipynb

diff --git a/notebooks/generate.ipynb b/notebooks/generate.ipynb
new file mode 100644
index 0000000..5260a53
--- /dev/null
+++ b/notebooks/generate.ipynb
@@ -0,0 +1,165 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n",
+    "from autora.doc.runtime.predict_hf import Predictor\n",
+    "from autora.doc.runtime.prompts import INSTR, SYS, InstructionPrompts, SystemPrompts"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# model = \"../../models\" # if model has been previously downloaded via huggingface-cli\n",
+    "model = \"meta-llama/Llama-2-7b-chat-hf\"\n",
+    "pred = Predictor(model)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# The following prompt uses an example (code, doc) to specify the desired behavior\n",
+    "EX_CODE=\"\"\"\n",
+    "from sweetpea import *\n",
+    "\n",
+    "color = Factor('color', ['red', 'green', 'blue', 'yellow'])\n",
+    "word = Factor('word', ['red', 'green', 'blue', 'yellow'])\n",
+    "\n",
+    "def is_congruent(word, color):\n",
+    "    return (word == color)\n",
+    "\n",
+    "def is_not_congruent(word, color):\n",
+    "    return not is_congruent(word, color)\n",
+    "\n",
+    "congruent = DerivedLevel('congruent', WithinTrial(is_congruent, [word, color]))\n",
+    "incongruent = DerivedLevel('incongruent', WithinTrial(is_not_congruent, [word, color]))\n",
+    "\n",
+    "congruency = Factor('congruency', [congruent, incongruent])\n",
+    "\n",
+    "constraints = [MinimumTrials(48)]\n",
+    "design = [word, color, congruency]\n",
+    "crossing = [word, congruency]\n",
+    "\n",
+    "block = CrossBlock(design, crossing, constraints)\n",
+    "\n",
+    "experiment = synthesize_trials(block, 1)\n",
+    "\n",
+    "save_experiments_csv(block, experiment, 'code_1_sequences/seq')\n",
+    "\"\"\"\n",
+    "\n",
+    "EX_DOC=\"\"\"There are two regular factors: color and word. The color factor consists of four levels: \"red\", \"green\", \"blue\", and \"yellow\". \n",
+    "The word factor also consists of the four levels: \"red\", \"green\", \"blue\", and \"yellow\". There is another derived factor referred to as congruency. \n",
+    "The congruency factor depends on the regular factors word and color and has two levels: \"congruent\" and \"incongruent\".\n",
+    "A trial is considered \"congruent\" if the word matches the color, otherwise, it is considered \"incongruent\". We counterbalanced the word factor with the congruency factor. \n",
+    "All experiment sequences contained at least 48 trials.\"\"\"\n",
+    "\n",
+    "TEST_CODE=\"\"\"\n",
+    "from sweetpea import *\n",
+    "from sweetpea.primitives import *\n",
+    "\n",
+    "number_list = [125, 132, 139, 146, 160, 167, 174, 181]\n",
+    "letter_list = ['b', 'd', 'f', 'h', 's', 'u', 'w', 'y']\n",
+    "\n",
+    "number = Factor(\"number\", number_list)\n",
+    "letter = Factor(\"letter\", letter_list)\n",
+    "task = Factor(\"task\", [\"number task\", \"letter task\", \"free choice task\"])\n",
+    "\n",
+    "\n",
+    "def is_forced_trial_switch(task):\n",
+    "    return (task[-1] == \"number task\" and task[0] == \"letter task\") or \\\n",
+    "           (task[-1] == \"letter task\" and task[0] == \"number task\")\n",
+    "\n",
+    "\n",
+    "def is_forced_trial_repeat(task):\n",
+    "    return (task[-1] == \"number task\" and task[0] == \"number task\") or \\\n",
+    "           (task[-1] == \"letter task\" and task[0] == \"letter task\")\n",
+    "\n",
+    "\n",
+    "def is_free_trial_transition(task):\n",
+    "    return task[-1] != \"free choice task\" and task[0] == \"free choice task\"\n",
+    "\n",
+    "\n",
+    "def is_free_trial_repeat(task):\n",
+    "    return task[-1] == \"free choice task\" and task[0] == \"free choice task\"\n",
+    "\n",
+    "\n",
+    "def is_not_relevant_transition(task):\n",
+    "    return not (is_forced_trial_repeat(task) or is_forced_trial_switch(task) or is_free_trial_repeat(\n",
+    "        task) or is_free_trial_transition(task))\n",
+    "\n",
+    "\n",
+    "transit = Factor(\"task transition\", [\n",
+    "    DerivedLevel(\"forced switch\", transition(is_forced_trial_switch, [task]), 3),\n",
+    "    DerivedLevel(\"forced repeat\", transition(is_forced_trial_repeat, [task])),\n",
+    "    DerivedLevel(\"free transition\", transition(is_free_trial_transition, [task]), 4),\n",
+    "    DerivedLevel(\"free repeat\", transition(is_free_trial_repeat, [task]), 4),\n",
+    "    DerivedLevel(\"forced first\", transition(is_not_relevant_transition, [task]), 4)\n",
+    "])\n",
+    "design = [letter, number, task, transit]\n",
+    "crossing = [[letter], [number], [transit]]\n",
+    "constraints = [MinimumTrials(256)]\n",
+    "\n",
+    "block = MultiCrossBlock(design, crossing, constraints)\n",
+    "\n",
+    "experiment = synthesize_trials(block, 1)\n",
+    "\n",
+    "save_experiments_csv(block, experiment, 'code_1_sequences/seq')\n",
+    "\"\"\"\n",
+    "\n",
+    "PROMPT=f\"\"\"Consider the following experiment code:\n",
+    "---\n",
+    "{EX_CODE}\n",
+    "---\n",
+    "Here's a a good English description:\n",
+    "---\n",
+    "{EX_DOC}\n",
+    "---\n",
+    "Using the same style, please generate a high-level one paragraph description for the following experiment code:\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output = pred.predict(SYS[SystemPrompts.SYS_1], PROMPT, [TEST_CODE], temperature=0.05, top_k=10, num_ret_seq=3)[0]\n",
+    "for i,o in enumerate(output):\n",
+    "    print(f\"******** Output {i} ********\\n{o}*************\\n\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "autodoc",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/autora/doc/runtime/predict_hf.py b/src/autora/doc/runtime/predict_hf.py
index 23c484e..49059cd 100644
--- a/src/autora/doc/runtime/predict_hf.py
+++ b/src/autora/doc/runtime/predict_hf.py
@@ -27,22 +27,21 @@ def __init__(self, model_path: str):
             tokenizer=self.tokenizer,
         )
 
-    def predict(self, sys: str, instr: str, inputs: List[str]) -> List[str]:
+    def predict(self, sys: str, instr: str, inputs: List[str], temperature=0.6, top_p=0.95, top_k=40, max_length=2048, num_ret_seq=1) -> List[List[str]]:
         logger.info(f"Generating {len(inputs)} predictions")
         prompts = [TEMP_LLAMA2.format(sys=sys, instr=instr, input=input) for input in inputs]
-        # TODO: Make these parameters configurable
         sequences = self.pipeline(
             prompts,
             do_sample=True,
-            temperature=0.6,
-            top_p=0.95,
-            top_k=40,
-            num_return_sequences=1,
+            temperature=temperature,
+            top_p=top_p,
+            top_k=top_k,
+            num_return_sequences=num_ret_seq,
             eos_token_id=self.tokenizer.eos_token_id,
-            max_length=2048,
+            max_length=max_length,
         )
 
-        results = [Predictor.trim_prompt(sequence[0]["generated_text"]) for sequence in sequences]
+        results = [[Predictor.trim_prompt(seq["generated_text"]) for seq in sequence] for sequence in sequences]
         logger.info(f"Generated {len(results)} results")
         return results
 

From e3c004a540ae25e8c65436343acfd5bf167f6e5d Mon Sep 17 00:00:00 2001
From: Carlos Garcia Jurado Suarez <carlosg@uw.edu>
Date: Fri, 8 Dec 2023 15:24:17 -0800
Subject: [PATCH 2/5] Surface inference parameters to the CLI and jobs

---
 README.md                            |  2 +
 azureml/conda.yml                    |  2 +-
 azureml/eval.yml                     | 16 ++++++--
 azureml/generate.yml                 | 12 +++++-
 notebooks/generate.ipynb             | 59 +++++-----------------------
 pyproject.toml                       |  2 +-
 src/autora/doc/pipelines/main.py     | 49 +++++++++++++++++------
 src/autora/doc/runtime/predict_hf.py | 27 ++++++++++---
 src/autora/doc/runtime/prompts.py    | 53 ++++++++++++++++++++++++-
 tests/test_main.py                   |  8 ++--
 10 files changed, 152 insertions(+), 78 deletions(-)

diff --git a/README.md b/README.md
index 127c2f4..6eed825 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,7 @@
 # AutoDoc
 
+[![ssec](https://img.shields.io/badge/SSEC-Project-purple?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA0AAAAOCAQAAABedl5ZAAAACXBIWXMAAAHKAAABygHMtnUxAAAAGXRFWHRTb2Z0d2FyZQB3d3cuaW5rc2NhcGUub3Jnm+48GgAAAMNJREFUGBltwcEqwwEcAOAfc1F2sNsOTqSlNUopSv5jW1YzHHYY/6YtLa1Jy4mbl3Bz8QIeyKM4fMaUxr4vZnEpjWnmLMSYCysxTcddhF25+EvJia5hhCudULAePyRalvUteXIfBgYxJufRuaKuprKsbDjVUrUj40FNQ11PTzEmrCmrevPhRcVQai8m1PRVvOPZgX2JttWYsGhD3atbHWcyUqX4oqDtJkJiJHUYv+R1JbaNHJmP/+Q1HLu2GbNoSm3Ft0+Y1YMdPSTSwQAAAABJRU5ErkJggg==&style=plastic)](https://escience.washington.edu/software-engineering/ssec/)
+
 [![Template](https://img.shields.io/badge/Template-LINCC%20Frameworks%20Python%20Project%20Template-brightgreen)](https://lincc-ppt.readthedocs.io/en/latest/)
 
 [![PyPI](https://img.shields.io/pypi/v/autora-doc?color=blue&logo=pypi&logoColor=white)](https://pypi.org/project/autora-doc/)
diff --git a/azureml/conda.yml b/azureml/conda.yml
index f772397..ce84fc2 100644
--- a/azureml/conda.yml
+++ b/azureml/conda.yml
@@ -15,4 +15,4 @@ dependencies:
     - xformers
     - scipy
     # This works, while installing from pytorch and cuda from conda does not
-    - torch==2.0.1    
\ No newline at end of file
+    - torch==2.1.0    
\ No newline at end of file
diff --git a/azureml/eval.yml b/azureml/eval.yml
index a2f72b6..e64cda2 100644
--- a/azureml/eval.yml
+++ b/azureml/eval.yml
@@ -2,9 +2,12 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
 command: >
   python -m autora.doc.pipelines.main eval 
   ${{inputs.data_dir}}/data.jsonl
-  ${{inputs.model_dir}}/llama-2-7b-chat-hf
-  SYS_1
-  INSTR_SWEETP_1
+  --model-path ${{inputs.model_dir}}/llama-2-7b-chat-hf
+  --sys-id ${{inputs.sys_id}}
+  --instruc-id ${{inputs.instruc_id}}
+  --param temperature=${{inputs.temperature}}
+  --param top_k=${{inputs.top_k}}
+  --param top_p=${{inputs.top_p}}
 code: ../src
 inputs:
   data_dir:
@@ -13,6 +16,11 @@ inputs:
   model_dir:
     type: uri_folder 
     path: azureml://datastores/workspaceblobstore/paths/base_models    
+  temperature: 0.7
+  top_p: 0.95
+  top_k: 40
+  sys_id: SYS_1
+  instruc_id: INSTR_SWEETP_1
 # using a curated environment doesn't work because we need additional packages
 environment: # azureml://registries/azureml/environments/acpt-pytorch-2.0-cuda11.7/versions/21
   image: mcr.microsoft.com/azureml/curated/acpt-pytorch-2.0-cuda11.7:21
@@ -26,5 +34,5 @@ environment: # azureml://registries/azureml/environments/acpt-pytorch-2.0-cuda11
   conda_file: conda.yml
 display_name: autodoc_prediction
 compute: azureml:v100cluster
-experiment_name: autodoc_prediction
+experiment_name: evaluation
 description: |
\ No newline at end of file
diff --git a/azureml/generate.yml b/azureml/generate.yml
index c7df113..28d3208 100644
--- a/azureml/generate.yml
+++ b/azureml/generate.yml
@@ -3,16 +3,26 @@ command: >
   python -m autora.doc.pipelines.main generate 
   --model-path ${{inputs.model_dir}}/llama-2-7b-chat-hf
   --output ./outputs/output.txt
+  --sys-id ${{inputs.sys_id}}
+  --instruc-id ${{inputs.instruc_id}}
+  --param temperature=${{inputs.temperature}}
+  --param top_k=${{inputs.top_k}}
+  --param top_p=${{inputs.top_p}}
   autora/doc/pipelines/main.py    
 code: ../src
 inputs:
   model_dir:
     type: uri_folder 
     path: azureml://datastores/workspaceblobstore/paths/base_models    
+  temperature: 0.7
+  top_p: 0.95
+  top_k: 40
+  sys_id: SYS_1
+  instruc_id: INSTR_SWEETP_1
 environment: 
   image: mcr.microsoft.com/azureml/curated/acpt-pytorch-2.0-cuda11.7:21
   conda_file: conda.yml
 display_name: autodoc_prediction
 compute: azureml:v100cluster
-experiment_name: autodoc_prediction
+experiment_name: prediction
 description: |
\ No newline at end of file
diff --git a/notebooks/generate.ipynb b/notebooks/generate.ipynb
index 5260a53..06b3683 100644
--- a/notebooks/generate.ipynb
+++ b/notebooks/generate.ipynb
@@ -29,42 +29,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# The following prompt uses an example (code, doc) to specify the desired behavior\n",
-    "EX_CODE=\"\"\"\n",
-    "from sweetpea import *\n",
-    "\n",
-    "color = Factor('color', ['red', 'green', 'blue', 'yellow'])\n",
-    "word = Factor('word', ['red', 'green', 'blue', 'yellow'])\n",
-    "\n",
-    "def is_congruent(word, color):\n",
-    "    return (word == color)\n",
-    "\n",
-    "def is_not_congruent(word, color):\n",
-    "    return not is_congruent(word, color)\n",
-    "\n",
-    "congruent = DerivedLevel('congruent', WithinTrial(is_congruent, [word, color]))\n",
-    "incongruent = DerivedLevel('incongruent', WithinTrial(is_not_congruent, [word, color]))\n",
-    "\n",
-    "congruency = Factor('congruency', [congruent, incongruent])\n",
-    "\n",
-    "constraints = [MinimumTrials(48)]\n",
-    "design = [word, color, congruency]\n",
-    "crossing = [word, congruency]\n",
-    "\n",
-    "block = CrossBlock(design, crossing, constraints)\n",
-    "\n",
-    "experiment = synthesize_trials(block, 1)\n",
-    "\n",
-    "save_experiments_csv(block, experiment, 'code_1_sequences/seq')\n",
-    "\"\"\"\n",
-    "\n",
-    "EX_DOC=\"\"\"There are two regular factors: color and word. The color factor consists of four levels: \"red\", \"green\", \"blue\", and \"yellow\". \n",
-    "The word factor also consists of the four levels: \"red\", \"green\", \"blue\", and \"yellow\". There is another derived factor referred to as congruency. \n",
-    "The congruency factor depends on the regular factors word and color and has two levels: \"congruent\" and \"incongruent\".\n",
-    "A trial is considered \"congruent\" if the word matches the color, otherwise, it is considered \"incongruent\". We counterbalanced the word factor with the congruency factor. \n",
-    "All experiment sequences contained at least 48 trials.\"\"\"\n",
-    "\n",
-    "TEST_CODE=\"\"\"\n",
+    "TEST_CODE = \"\"\"\n",
     "from sweetpea import *\n",
     "from sweetpea.primitives import *\n",
     "\n",
@@ -115,17 +80,6 @@
     "experiment = synthesize_trials(block, 1)\n",
     "\n",
     "save_experiments_csv(block, experiment, 'code_1_sequences/seq')\n",
-    "\"\"\"\n",
-    "\n",
-    "PROMPT=f\"\"\"Consider the following experiment code:\n",
-    "---\n",
-    "{EX_CODE}\n",
-    "---\n",
-    "Here's a a good English description:\n",
-    "---\n",
-    "{EX_DOC}\n",
-    "---\n",
-    "Using the same style, please generate a high-level one paragraph description for the following experiment code:\n",
     "\"\"\""
    ]
   },
@@ -135,8 +89,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "output = pred.predict(SYS[SystemPrompts.SYS_1], PROMPT, [TEST_CODE], temperature=0.05, top_k=10, num_ret_seq=3)[0]\n",
-    "for i,o in enumerate(output):\n",
+    "output = pred.predict(\n",
+    "    SYS[SystemPrompts.SYS_1],\n",
+    "    INSTR[InstructionPrompts.INSTR_SWEETP_EXAMPLE],\n",
+    "    [TEST_CODE],\n",
+    "    temperature=0.05,\n",
+    "    top_k=10,\n",
+    "    num_ret_seq=3,\n",
+    ")[0]\n",
+    "for i, o in enumerate(output):\n",
     "    print(f\"******** Output {i} ********\\n{o}*************\\n\")"
    ]
   }
diff --git a/pyproject.toml b/pyproject.toml
index 422c8ff..afb5a23 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,7 +19,7 @@ dependencies = [
     "typer",
     "scipy",
     # This works, while installing from pytorch and cuda from conda does not",
-    "torch==2.0.1",
+    "torch==2.1.0",
     "transformers>=4.35.2",
 ]
 
diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py
index de7e906..e797ce1 100644
--- a/src/autora/doc/pipelines/main.py
+++ b/src/autora/doc/pipelines/main.py
@@ -16,13 +16,24 @@
 logger = logging.getLogger(__name__)
 
 
-@app.command()
-def eval(data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: InstructionPrompts) -> List[str]:
+@app.command(help="Evaluate model on a data file")
+def eval(
+    data_file: str = typer.Argument(..., help="JSONL Data file to evaluate on"),
+    model_path: str = typer.Option("meta-llama/Llama-2-7b-chat-hf", help="Path to HF model"),
+    sys_id: SystemPrompts = typer.Option(SystemPrompts.SYS_1, help="System prompt ID"),
+    instruc_id: InstructionPrompts = typer.Option(
+        InstructionPrompts.INSTR_SWEETP_1, help="Instruction prompt ID"
+    ),
+    param: List[str] = typer.Option(
+        [], help="Additional float parameters to pass to the model as name=float pairs"
+    ),
+) -> List[List[str]]:
     import jsonlines
     import mlflow
 
     mlflow.autolog()
 
+    param_dict = {pair[0]: float(pair[1]) for pair in [pair.split("=") for pair in param]}
     run = mlflow.active_run()
 
     sys_prompt = SYS[sys_id]
@@ -33,6 +44,7 @@ def eval(data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: Ins
         logger.info(f"Active run_id: {run.info.run_id}")
         logger.info(f"running predict with {data_file}")
         logger.info(f"model path: {model_path}")
+        mlflow.log_params(param_dict)
 
         with jsonlines.open(data_file) as reader:
             items = [item for item in reader]
@@ -41,16 +53,19 @@ def eval(data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: Ins
 
         pred = Predictor(model_path)
         timer_start = timer()
-        predictions = pred.predict(sys_prompt, instr_prompt, inputs)
+        predictions = pred.predict(sys_prompt, instr_prompt, inputs, **param_dict)
         timer_end = timer()
         pred_time = timer_end - timer_start
         mlflow.log_metric("prediction_time/doc", pred_time / (len(inputs)))
         for i in range(len(inputs)):
             mlflow.log_text(labels[i], f"label_{i}.txt")
             mlflow.log_text(inputs[i], f"input_{i}.py")
-            mlflow.log_text(predictions[i], f"prediction_{i}.txt")
+            for j in range(len(predictions[i])):
+                mlflow.log_text(predictions[i][j], f"prediction_{i}_{j}.txt")
 
-        tokens = pred.tokenize(predictions)["input_ids"]
+        # flatten predictions for counting tokens
+        predictions_flat = [pred for pred_list in predictions for pred in pred_list]
+        tokens = pred.tokenize(predictions_flat)["input_ids"]
         total_tokens = sum([len(token) for token in tokens])
         mlflow.log_metric("total_tokens", total_tokens)
         mlflow.log_metric("tokens/sec", total_tokens / pred_time)
@@ -59,18 +74,28 @@ def eval(data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: Ins
 
 @app.command()
 def generate(
-    python_file: str,
-    model_path: str = "meta-llama/llama-2-7b-chat-hf",
-    output: str = "output.txt",
-    sys_id: SystemPrompts = SystemPrompts.SYS_1,
-    instruc_id: InstructionPrompts = InstructionPrompts.INSTR_SWEETP_1,
+    python_file: str = typer.Argument(..., help="Python file to generate documentation for"),
+    model_path: str = typer.Option("meta-llama/Llama-2-7b-chat-hf", help="Path to HF model"),
+    output: str = typer.Option("output.txt", help="Output file"),
+    sys_id: SystemPrompts = typer.Option(SystemPrompts.SYS_1, help="System prompt ID"),
+    instruc_id: InstructionPrompts = typer.Option(
+        InstructionPrompts.INSTR_SWEETP_1, help="Instruction prompt ID"
+    ),
+    param: List[str] = typer.Option(
+        [], help="Additional float parameters to pass to the model as name=float pairs"
+    ),
 ) -> None:
+    param_dict = {pair[0]: float(pair[1]) for pair in [pair.split("=") for pair in param]}
+    """
+    Generate documentation from python file
+    """
     with open(python_file, "r") as f:
-        inputs = [f.read()]
+        input = f.read()
     sys_prompt = SYS[sys_id]
     instr_prompt = INSTR[instruc_id]
     pred = Predictor(model_path)
-    predictions = pred.predict(sys_prompt, instr_prompt, inputs)
+    # grab first result since we only passed one input
+    predictions = pred.predict(sys_prompt, instr_prompt, [input], **param_dict)[0]
     assert len(predictions) == 1, f"Expected only one output, got {len(predictions)}"
     logger.info(f"Writing output to {output}")
     with open(output, "w") as f:
diff --git a/src/autora/doc/runtime/predict_hf.py b/src/autora/doc/runtime/predict_hf.py
index 49059cd..307c99e 100644
--- a/src/autora/doc/runtime/predict_hf.py
+++ b/src/autora/doc/runtime/predict_hf.py
@@ -27,21 +27,36 @@ def __init__(self, model_path: str):
             tokenizer=self.tokenizer,
         )
 
-    def predict(self, sys: str, instr: str, inputs: List[str], temperature=0.6, top_p=0.95, top_k=40, max_length=2048, num_ret_seq=1) -> List[List[str]]:
-        logger.info(f"Generating {len(inputs)} predictions")
+    def predict(
+        self,
+        sys: str,
+        instr: str,
+        inputs: List[str],
+        temperature: float = 0.6,
+        top_p: float = 0.95,
+        top_k: float = 40,
+        max_length: float = 2048,
+        num_ret_seq: float = 1,
+    ) -> List[List[str]]:
+        logger.info(
+            f"Generating {len(inputs)} predictions. Temperature: {temperature}, top_p: {top_p}, top_k: {top_k}, "
+            f"max_length: {max_length}"
+        )
         prompts = [TEMP_LLAMA2.format(sys=sys, instr=instr, input=input) for input in inputs]
         sequences = self.pipeline(
             prompts,
             do_sample=True,
             temperature=temperature,
             top_p=top_p,
-            top_k=top_k,
-            num_return_sequences=num_ret_seq,
+            top_k=int(top_k),
+            num_return_sequences=int(num_ret_seq),
             eos_token_id=self.tokenizer.eos_token_id,
-            max_length=max_length,
+            max_length=int(max_length),
         )
 
-        results = [[Predictor.trim_prompt(seq["generated_text"]) for seq in sequence] for sequence in sequences]
+        results = [
+            [Predictor.trim_prompt(seq["generated_text"]) for seq in sequence] for sequence in sequences
+        ]
         logger.info(f"Generated {len(results)} results")
         return results
 
diff --git a/src/autora/doc/runtime/prompts.py b/src/autora/doc/runtime/prompts.py
index 75019fc..4480fb8 100644
--- a/src/autora/doc/runtime/prompts.py
+++ b/src/autora/doc/runtime/prompts.py
@@ -23,6 +23,53 @@
 INSTR_SWEETP_1 = """Please generate high-level two paragraph documentation for the following experiment. The first
 paragraph should explain the purpose and the second one the procedure, but don't use the word 'Paragraph'"""
 
+# The following prompt uses an example (code, doc) to specify the desired behavior
+EX_CODE = """
+from sweetpea import *
+
+color = Factor('color', ['red', 'green', 'blue', 'yellow'])
+word = Factor('word', ['red', 'green', 'blue', 'yellow'])
+
+def is_congruent(word, color):
+    return (word == color)
+
+def is_not_congruent(word, color):
+    return not is_congruent(word, color)
+
+congruent = DerivedLevel('congruent', WithinTrial(is_congruent, [word, color]))
+incongruent = DerivedLevel('incongruent', WithinTrial(is_not_congruent, [word, color]))
+
+congruency = Factor('congruency', [congruent, incongruent])
+
+constraints = [MinimumTrials(48)]
+design = [word, color, congruency]
+crossing = [word, congruency]
+
+block = CrossBlock(design, crossing, constraints)
+
+experiment = synthesize_trials(block, 1)
+
+save_experiments_csv(block, experiment, 'code_1_sequences/seq')
+"""
+
+EX_DOC = """There are two regular factors: color and word. The color factor consists of four levels: "red", "green",
+"blue", and "yellow". The word factor also consists of the four levels: "red", "green", "blue", and "yellow".
+There is another derived factor referred to as congruency. The congruency factor depends on the regular factors word
+and color and has two levels: "congruent" and "incongruent". A trial is considered "congruent" if the word matches
+the color, otherwise, it is considered "incongruent". We counterbalanced the word factor with the congruency factor.
+All experiment sequences contained at least 48 trials."""
+
+INSTR_SWEETP_EXAMPLE = f"""Consider the following experiment code:
+---
+{EX_CODE}
+---
+Here's a a good English description:
+---
+{EX_DOC}
+---
+Using the same style, please generate a high-level one paragraph description for the following experiment code:
+"""
+
 
 class SystemPrompts(str, Enum):
     SYS_1 = "SYS_1"
@@ -30,7 +77,11 @@ class SystemPrompts(str, Enum):
 
 class InstructionPrompts(str, Enum):
     INSTR_SWEETP_1 = "INSTR_SWEETP_1"
+    INSTR_SWEETP_EXAMPLE = "INSTR_SWEETP_EXAMPLE"
 
 
 SYS = {SystemPrompts.SYS_1: SYS_1}
-INSTR = {InstructionPrompts.INSTR_SWEETP_1: INSTR_SWEETP_1}
+INSTR = {
+    InstructionPrompts.INSTR_SWEETP_1: INSTR_SWEETP_1,
+    InstructionPrompts.INSTR_SWEETP_EXAMPLE: INSTR_SWEETP_EXAMPLE,
+}
diff --git a/tests/test_main.py b/tests/test_main.py
index 3e67bab..097e8c7 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -9,17 +9,19 @@
 
 def test_predict() -> None:
     data = Path(__file__).parent.joinpath("../data/data.jsonl").resolve()
-    outputs = eval(str(data), TEST_HF_MODEL, SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1)
+    outputs = eval(str(data), TEST_HF_MODEL, SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1, [])
     assert len(outputs) == 3, "Expected 3 outputs"
     for output in outputs:
-        assert len(output) > 0, "Expected non-empty output"
+        assert len(output[0]) > 0, "Expected non-empty output"
 
 
 def test_generate() -> None:
     python_file = __file__
     output = Path("output.txt")
     output.unlink(missing_ok=True)
-    generate(python_file, TEST_HF_MODEL, str(output), SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1)
+    generate(
+        python_file, TEST_HF_MODEL, str(output), SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1, []
+    )
     assert output.exists(), f"Expected output file {output} to exist"
     with open(str(output), "r") as f:
         assert len(f.read()) > 0, f"Expected non-empty output file {output}"

From c5cd8d6dc0a0a2ecc69e349cd7a1d07e2a4972a5 Mon Sep 17 00:00:00 2001
From: Carlos Garcia Jurado Suarez <carlosg@uw.edu>
Date: Mon, 11 Dec 2023 09:06:14 -0800
Subject: [PATCH 3/5] Back to torch 2.0.1

---
 azureml/conda.yml    | 2 +-
 azureml/eval.yml     | 2 +-
 azureml/generate.yml | 2 +-
 pyproject.toml       | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/azureml/conda.yml b/azureml/conda.yml
index ce84fc2..f772397 100644
--- a/azureml/conda.yml
+++ b/azureml/conda.yml
@@ -15,4 +15,4 @@ dependencies:
     - xformers
     - scipy
     # This works, while installing from pytorch and cuda from conda does not
-    - torch==2.1.0    
\ No newline at end of file
+    - torch==2.0.1    
\ No newline at end of file
diff --git a/azureml/eval.yml b/azureml/eval.yml
index e64cda2..ea6953b 100644
--- a/azureml/eval.yml
+++ b/azureml/eval.yml
@@ -33,6 +33,6 @@ environment: # azureml://registries/azureml/environments/acpt-pytorch-2.0-cuda11
   # image: nvcr.io/nvidia/pytorch:23.10-py3
   conda_file: conda.yml
 display_name: autodoc_prediction
-compute: azureml:v100cluster
+compute: azureml:t4cluster
 experiment_name: evaluation
 description: |
\ No newline at end of file
diff --git a/azureml/generate.yml b/azureml/generate.yml
index 28d3208..d849fcd 100644
--- a/azureml/generate.yml
+++ b/azureml/generate.yml
@@ -23,6 +23,6 @@ environment:
   image: mcr.microsoft.com/azureml/curated/acpt-pytorch-2.0-cuda11.7:21
   conda_file: conda.yml
 display_name: autodoc_prediction
-compute: azureml:v100cluster
+compute: azureml:t4cluster
 experiment_name: prediction
 description: |
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index afb5a23..422c8ff 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,7 +19,7 @@ dependencies = [
     "typer",
     "scipy",
     # This works, while installing from pytorch and cuda from conda does not",
-    "torch==2.1.0",
+    "torch==2.0.1",
     "transformers>=4.35.2",
 ]
 

From 5e4db23a7c9421a21fe96ea9f6209c83d4cc350c Mon Sep 17 00:00:00 2001
From: Carlos Garcia Jurado Suarez <carlosg@uw.edu>
Date: Tue, 12 Dec 2023 11:28:57 -0800
Subject: [PATCH 4/5] comment out unused badges

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 6eed825..e2e66ad 100644
--- a/README.md
+++ b/README.md
@@ -4,12 +4,12 @@
 
 [![Template](https://img.shields.io/badge/Template-LINCC%20Frameworks%20Python%20Project%20Template-brightgreen)](https://lincc-ppt.readthedocs.io/en/latest/)
 
-[![PyPI](https://img.shields.io/pypi/v/autora-doc?color=blue&logo=pypi&logoColor=white)](https://pypi.org/project/autora-doc/)
+<!-- [![PyPI](https://img.shields.io/pypi/v/autora-doc?color=blue&logo=pypi&logoColor=white)](https://pypi.org/project/autora-doc/) -->
 
 
 [![GitHub Workflow Status](https://github.com/autoresearch/autodoc/actions/workflows/smoke-test.yml/badge.svg)](https://github.com/AutoResearch/autodoc/actions/workflows/smoke-test.yml)
 [![codecov](https://codecov.io/gh/AutoResearch/autodoc/branch/main/graph/badge.svg)](https://codecov.io/gh/AutoResearch/autodoc)
-[![Read the Docs](https://img.shields.io/readthedocs/autora-doc)](https://autora-doc.readthedocs.io/)
+<!-- [![Read the Docs](https://img.shields.io/readthedocs/autora-doc)](https://autora-doc.readthedocs.io/) -->
 
 This project was automatically generated using the LINCC-Frameworks 
 [python-project-template](https://github.com/lincc-frameworks/python-project-template). For more information about the project template see the 

From 5231456a3ef834e3585ceec55d51218c71a531d3 Mon Sep 17 00:00:00 2001
From: Carlos Garcia Jurado Suarez <carlosg@uw.edu>
Date: Tue, 12 Dec 2023 11:39:08 -0800
Subject: [PATCH 5/5] cr feedback to use itertools to flatten

---
 src/autora/doc/pipelines/main.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py
index e797ce1..5afc6bf 100644
--- a/src/autora/doc/pipelines/main.py
+++ b/src/autora/doc/pipelines/main.py
@@ -1,3 +1,4 @@
+import itertools
 import logging
 from timeit import default_timer as timer
 from typing import List
@@ -64,7 +65,7 @@ def eval(
                 mlflow.log_text(predictions[i][j], f"prediction_{i}_{j}.txt")
 
         # flatten predictions for counting tokens
-        predictions_flat = [pred for pred_list in predictions for pred in pred_list]
+        predictions_flat = list(itertools.chain.from_iterable(predictions))
         tokens = pred.tokenize(predictions_flat)["input_ids"]
         total_tokens = sum([len(token) for token in tokens])
         mlflow.log_metric("total_tokens", total_tokens)