From 364aa183b0c083bc302b5cbfd1c6382704a13bf4 Mon Sep 17 00:00:00 2001
From: Anuj Sinha <sinha.anuj30@gmail.com>
Date: Sat, 3 Feb 2024 18:12:56 -0800
Subject: [PATCH 1/5] feat: create an azure-ml pipeline for eval_prompts()

---
 azureml/eval_prompts.yml | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 azureml/eval_prompts.yml

diff --git a/azureml/eval_prompts.yml b/azureml/eval_prompts.yml
new file mode 100644
index 0000000..96287fa
--- /dev/null
+++ b/azureml/eval_prompts.yml
@@ -0,0 +1,32 @@
+$schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
+command: >
+  python -m autora.doc.pipelines.main eval-prompts
+  ${{inputs.data_dir}}/data.jsonl
+  ${{inputs.data_dir}}/all_prompt.json
+  --model-path ${{inputs.model_path}}
+  --param do_sample=${{inputs.do_sample}}
+  --param temperature=${{inputs.temperature}}
+  --param top_k=${{inputs.top_k}}
+  --param top_p=${{inputs.top_p}}
+code: ../src
+inputs:
+  data_dir:
+    type: uri_folder
+    path: azureml://datastores/workspaceblobstore/paths/data/sweetpea/
+  # Currently models are loading faster directly from HuggingFace vs Azure Blob Storage
+  # model_dir:
+  #   type: uri_folder
+  #   path: azureml://datastores/workspaceblobstore/paths/base_models
+  model_path: meta-llama/Llama-2-7b-chat-hf
+  temperature: 0.01
+  do_sample: 0
+  top_p: 0.95
+  top_k: 1
+# using a curated environment doesn't work because we need additional packages
+environment: # azureml://registries/azureml/environments/acpt-pytorch-2.0-cuda11.7/versions/21
+  image: mcr.microsoft.com/azureml/curated/acpt-pytorch-2.0-cuda11.7:21
+  conda_file: conda.yml
+display_name: autodoc_multi_prompts_prediction
+compute: azureml:v100cluster
+experiment_name: evaluation_multi_prompts
+description: Run code-to-documentation generation on data_file for each prompt in prompts_file

From 1eb7333de13266ac6695771e90ce32bfa1566e97 Mon Sep 17 00:00:00 2001
From: Anuj Sinha <sinha.anuj30@gmail.com>
Date: Mon, 5 Feb 2024 17:37:44 -0800
Subject: [PATCH 2/5] feat: parameterize data file and prompt file input

---
 azureml/eval_prompts.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/azureml/eval_prompts.yml b/azureml/eval_prompts.yml
index 96287fa..29c9033 100644
--- a/azureml/eval_prompts.yml
+++ b/azureml/eval_prompts.yml
@@ -1,8 +1,8 @@
 $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
 command: >
   python -m autora.doc.pipelines.main eval-prompts
-  ${{inputs.data_dir}}/data.jsonl
-  ${{inputs.data_dir}}/all_prompt.json
+  ${{inputs.data_dir}}/{{data_file}}
+  ${{inputs.data_dir}}/{{prompts_file}}
   --model-path ${{inputs.model_path}}
   --param do_sample=${{inputs.do_sample}}
   --param temperature=${{inputs.temperature}}
@@ -22,6 +22,8 @@ inputs:
   do_sample: 0
   top_p: 0.95
   top_k: 1
+  data_file: data.jsonl
+  prompts_file: prompt_list.json
 # using a curated environment doesn't work because we need additional packages
 environment: # azureml://registries/azureml/environments/acpt-pytorch-2.0-cuda11.7/versions/21
   image: mcr.microsoft.com/azureml/curated/acpt-pytorch-2.0-cuda11.7:21

From b6895922b103453bfa1dae1973b69df726898ec4 Mon Sep 17 00:00:00 2001
From: Anuj Sinha <sinha.anuj30@gmail.com>
Date: Mon, 5 Feb 2024 19:45:05 -0800
Subject: [PATCH 3/5] refactor: update prompts directory path

---
 azureml/eval_prompts.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/azureml/eval_prompts.yml b/azureml/eval_prompts.yml
index 29c9033..c0c87be 100644
--- a/azureml/eval_prompts.yml
+++ b/azureml/eval_prompts.yml
@@ -2,7 +2,7 @@ $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
 command: >
   python -m autora.doc.pipelines.main eval-prompts
   ${{inputs.data_dir}}/{{data_file}}
-  ${{inputs.data_dir}}/{{prompts_file}}
+  ${{inputs.prompts_dir}}/{{prompts_file}}
   --model-path ${{inputs.model_path}}
   --param do_sample=${{inputs.do_sample}}
   --param temperature=${{inputs.temperature}}
@@ -13,6 +13,9 @@ inputs:
   data_dir:
     type: uri_folder
     path: azureml://datastores/workspaceblobstore/paths/data/sweetpea/
+  prompts_dir:
+    type: uri_folder
+    path: azureml://datastores/workspaceblobstore/paths/data/autora/prompts/
   # Currently models are loading faster directly from HuggingFace vs Azure Blob Storage
   # model_dir:
   #   type: uri_folder

From aaded9ce288ea8e59628067b7b0bd7d12986ff3f Mon Sep 17 00:00:00 2001
From: Anuj Sinha <sinha.anuj30@gmail.com>
Date: Tue, 6 Feb 2024 14:39:03 -0800
Subject: [PATCH 4/5] refactor: update artifact labeling (TODO: fix metrics)

---
 src/autora/doc/pipelines/main.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py
index d01c407..dedf6e2 100644
--- a/src/autora/doc/pipelines/main.py
+++ b/src/autora/doc/pipelines/main.py
@@ -145,14 +145,12 @@ def eval_prompt(
     timer_end = timer()
     bleu, meteor = evaluate_documentation(predictions, labels)
     pred_time = timer_end - timer_start
+    prompt_hash = hash(prompt)
     mlflow.log_metric("prediction_time/doc", pred_time / (len(inputs)))
     for i in range(len(inputs)):
-        mlflow.log_text(labels[i], f"label_{i}.txt")
-        mlflow.log_text(inputs[i], f"input_{i}.py")
-        for j in range(len(predictions[i])):
-            mlflow.log_text(predictions[i][j], f"prediction_{i}_{j}.txt")
-    mlflow.log_text("bleu_score is ", str(bleu))
-    mlflow.log_text("meteor_score is ", str(meteor))
+        mlflow.log_text(labels[i], f"{prompt_hash}_label_{i}.txt")
+        mlflow.log_text(inputs[i], f"{prompt_hash}_input_{i}.py")
+        mlflow.log_text(predictions[i], f"{prompt_hash}_prediction_{i}.txt")
 
     # flatten predictions for counting tokens
     predictions_flat = list(itertools.chain.from_iterable(predictions))

From 6faff60841d46cd3cc8fb2c94e0e1e978dab572d Mon Sep 17 00:00:00 2001
From: Anuj Sinha <sinha.anuj30@gmail.com>
Date: Tue, 6 Feb 2024 14:51:31 -0800
Subject: [PATCH 5/5] refactor: update file path directory

---
 azureml/eval_prompts.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/azureml/eval_prompts.yml b/azureml/eval_prompts.yml
index c0c87be..5613f29 100644
--- a/azureml/eval_prompts.yml
+++ b/azureml/eval_prompts.yml
@@ -1,8 +1,8 @@
 $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
 command: >
   python -m autora.doc.pipelines.main eval-prompts
-  ${{inputs.data_dir}}/{{data_file}}
-  ${{inputs.prompts_dir}}/{{prompts_file}}
+  ${{inputs.data_dir}}/${{inputs.data_file}}
+  ${{inputs.prompts_dir}}/${{inputs.prompts_file}}
   --model-path ${{inputs.model_path}}
   --param do_sample=${{inputs.do_sample}}
   --param temperature=${{inputs.temperature}}