From fcc744da37435270324590bf59bb280e24747a9d Mon Sep 17 00:00:00 2001
From: Carlos Garcia Jurado Suarez <carlosg@uw.edu>
Date: Tue, 5 Dec 2023 11:01:36 -0800
Subject: [PATCH 1/5] refactor: Refactor prompts

---
 README.md                            |  4 +-
 azureml/predict.yml                  |  7 +++-
 pyproject.toml                       |  3 ++
 src/autora/doc/example_module.py     | 23 ------------
 src/autora/doc/pipelines/main.py     | 17 +++------
 src/autora/doc/runtime/predict_hf.py | 55 +++++++++++++++++-----------
 src/autora/doc/runtime/prompts.py    | 37 +++++++++++++++++++
 tests/test.py                        | 23 +++++++-----
 8 files changed, 101 insertions(+), 68 deletions(-)
 delete mode 100644 src/autora/doc/example_module.py
 create mode 100644 src/autora/doc/runtime/prompts.py

diff --git a/README.md b/README.md
index 6db068f..2ff8a62 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,9 @@
 [![Template](https://img.shields.io/badge/Template-LINCC%20Frameworks%20Python%20Project%20Template-brightgreen)](https://lincc-ppt.readthedocs.io/en/latest/)
 
 [![PyPI](https://img.shields.io/pypi/v/autora-doc?color=blue&logo=pypi&logoColor=white)](https://pypi.org/project/autora-doc/)
-[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/AutoResearch/autodoc/smoke-test.yml)](https://github.com/AutoResearch/autodoc/actions/workflows/smoke-test.yml)
+
+
+[![GitHub Workflow Status](https://github.com/autoresearch/autodoc/actions/workflows/smoke-test.yml/badge.svg)](https://github.com/AutoResearch/autodoc/actions/workflows/smoke-test.yml)
 [![codecov](https://codecov.io/gh/AutoResearch/autodoc/branch/main/graph/badge.svg)](https://codecov.io/gh/AutoResearch/autodoc)
 [![Read the Docs](https://img.shields.io/readthedocs/autora-doc)](https://autora-doc.readthedocs.io/)
 
diff --git a/azureml/predict.yml b/azureml/predict.yml
index 7f888b4..d5410a2 100644
--- a/azureml/predict.yml
+++ b/azureml/predict.yml
@@ -1,5 +1,10 @@
 $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
-command: python -m autora.doc.pipelines.main predict ${{inputs.data_dir}}/data.jsonl ${{inputs.model_dir}}/llama-2-7b-chat-hf
+command: >
+  python -m autora.doc.pipelines.main predict
+  ${{inputs.data_dir}}/data.jsonl
+  ${{inputs.model_dir}}/llama-2-7b-chat-hf
+  SYS_1
+  INSTR_SWEETP_1
 code: ../src
 inputs:
   data_dir:
diff --git a/pyproject.toml b/pyproject.toml
index e6db6a3..97c9c31 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -98,3 +98,6 @@ include = ["src/autora"]
 
 [tool.hatch.build.targets.wheel]
 packages = ["src/autora"]
+
+[project.scripts]
+autodoc = "autora.doc.pipelines.main:app"
\ No newline at end of file
diff --git a/src/autora/doc/example_module.py b/src/autora/doc/example_module.py
deleted file mode 100644
index f76e837..0000000
--- a/src/autora/doc/example_module.py
+++ /dev/null
@@ -1,23 +0,0 @@
-"""An example module containing simplistic functions."""
-
-
-def greetings() -> str:
-    """A friendly greeting for a future friend.
-
-    Returns
-    -------
-    str
-        A typical greeting from a software engineer.
-    """
-    return "Hello from LINCC-Frameworks!"
-
-
-def meaning() -> int:
-    """The meaning of life, the universe, and everything.
-
-    Returns
-    -------
-    int
-        The meaning of life.
-    """
-    return 42
diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py
index 292c8ff..b74bf4b 100644
--- a/src/autora/doc/pipelines/main.py
+++ b/src/autora/doc/pipelines/main.py
@@ -7,6 +7,7 @@
 import typer
 
 from autora.doc.runtime.predict_hf import Predictor
+from autora.doc.runtime.prompts import INSTR, SYS, InstructionPrompts, SystemPrompts
 
 app = typer.Typer()
 logging.basicConfig(
@@ -15,21 +16,13 @@
 )
 logger = logging.getLogger(__name__)
 
-# TODO: organize the system and instruction prompts into a separate module
-SYS = """You are a technical documentation writer. You always write clear, concise, and accurate documentation for
- scientific experiments. Your documentation focuses on the experiment's purpose, procedure, and results. Therefore,
- details about specific python functions, packages, or libraries are not necessary. Your readers are experimental
- scientists.
-"""
-
-instr = """Please generate high-level two paragraph documentation for the following experiment. The first paragraph
- should explain the purpose and the second one the procedure, but don't use the word 'Paragraph'"""
-
 
 @app.command()
-def predict(data_file: str, model_path: str) -> None:
+def predict(data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: InstructionPrompts) -> None:
     run = mlflow.active_run()
 
+    sys_prompt = SYS[sys_id]
+    instr_prompt = INSTR[instruc_id]
     if run is None:
         run = mlflow.start_run()
     with run:
@@ -45,7 +38,7 @@ def predict(data_file: str, model_path: str) -> None:
 
         pred = Predictor(model_path)
         timer_start = timer()
-        predictions = pred.predict(SYS, instr, inputs)
+        predictions = pred.predict(sys_prompt, instr_prompt, inputs)
         timer_end = timer()
         pred_time = timer_end - timer_start
         mlflow.log_metric("prediction_time/doc", pred_time / (len(inputs)))
diff --git a/src/autora/doc/runtime/predict_hf.py b/src/autora/doc/runtime/predict_hf.py
index ba3e59d..cbde760 100644
--- a/src/autora/doc/runtime/predict_hf.py
+++ b/src/autora/doc/runtime/predict_hf.py
@@ -3,24 +3,22 @@
 
 import torch
 import transformers
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+from autora.doc.runtime.prompts import LLAMA2_INST_CLOSE, TEMP_LLAMA2
 
 logger = logging.getLogger(__name__)
 
 
 class Predictor:
     def __init__(self, model_path: str):
-        # Load the model in 4bit quantization for faster inference on smaller GPUs
-        bnb_config = BitsAndBytesConfig(
-            load_in_4bit=True,
-            bnb_4bit_use_double_quant=True,
-            bnb_4bit_quant_type="nf4",
-            bnb_4bit_compute_dtype=torch.bfloat16,
-        )
+        config = self.get_config()
+
         logger.info(f"Loading model from {model_path}")
         self.tokenizer = AutoTokenizer.from_pretrained(model_path)
         self.model = AutoModelForCausalLM.from_pretrained(
-            model_path, quantization_config=bnb_config, device_map="auto"
+            model_path,
+            **config,
         )
         logger.info("Model loaded")
         self.pipeline = transformers.pipeline(
@@ -30,18 +28,8 @@ def __init__(self, model_path: str):
         )
 
     def predict(self, sys: str, instr: str, inputs: List[str]) -> List[str]:
-        # Standard Llama2 template
-        template = f"""
-[INST]<<SYS>>
-{sys}
-
-{instr}
-
-[INPUT]
-[/INST]
-"""
         logger.info(f"Generating {len(inputs)} predictions")
-        prompts = [template.replace("[INPUT]", input) for input in inputs]
+        prompts = [TEMP_LLAMA2.format(sys=sys, instr=instr, input=input) for input in inputs]
         # TODO: Make these parameters configurable
         sequences = self.pipeline(
             prompts,
@@ -54,10 +42,35 @@ def predict(self, sys: str, instr: str, inputs: List[str]) -> List[str]:
             max_length=1000,
         )
 
-        results = [sequence[0]["generated_text"] for sequence in sequences]
+        results = [Predictor.trim_prompt(sequence[0]["generated_text"]) for sequence in sequences]
         logger.info(f"Generated {len(results)} results")
         return results
 
+    @staticmethod
+    def trim_prompt(output: str) -> str:
+        marker = output.find(LLAMA2_INST_CLOSE)
+        if marker == -1:
+            logger.warning(f"Could not find end of prompt marker '{LLAMA2_INST_CLOSE}' in '{output}'")
+            return output
+        return output[marker + len(LLAMA2_INST_CLOSE) :]
+
     def tokenize(self, input: List[str]) -> Dict[str, List[List[int]]]:
         tokens: Dict[str, List[List[int]]] = self.tokenizer(input)
         return tokens
+
+    def get_config(self) -> Dict[str, str]:
+        if torch.cuda.is_available():
+            from transformers import BitsAndBytesConfig
+
+            # Load the model in 4bit quantization for faster inference on smaller GPUs
+            return {
+                "quantization_config": BitsAndBytesConfig(
+                    load_in_4bit=True,
+                    bnb_4bit_use_double_quant=True,
+                    bnb_4bit_quant_type="nf4",
+                    bnb_4bit_compute_dtype=torch.bfloat16,
+                ),
+                "device_map": "auto",
+            }
+        else:
+            return {}
diff --git a/src/autora/doc/runtime/prompts.py b/src/autora/doc/runtime/prompts.py
new file mode 100644
index 0000000..5875127
--- /dev/null
+++ b/src/autora/doc/runtime/prompts.py
@@ -0,0 +1,37 @@
+from enum import Enum
+
+LLAMA2_INST_CLOSE = "[/INST]\n"
+
+# Standard Llama2 template
+TEMP_LLAMA2 = """
+[INST]<<SYS>>
+{sys}
+
+{instr}
+
+{input}
+[/INST]
+"""
+
+
+SYS_1 = """You are a technical documentation writer. You always write clear, concise, and accurate documentation for
+scientific experiments. Your documentation focuses on the experiment's purpose, procedure, and results. Therefore,
+details about specific python functions, packages, or libraries are not necessary. Your readers are experimental
+scientists.
+"""
+
+INSTR_SWEETP_1 = """Please generate high-level two paragraph documentation for the following experiment. The first
+paragraph should explain the purpose and the second one the procedure, but don't use the word 'Paragraph'"""
+
+
+class SystemPrompts(Enum):
+    SYS_1 = "SYS_1"
+
+
+class InstructionPrompts(Enum):
+    SYS_1 = "SYS_1"
+    INSTR_SWEETP_1 = "INSTR_SWEETP_1"
+
+
+SYS = {SystemPrompts.SYS_1: SYS_1}
+INSTR = {InstructionPrompts.INSTR_SWEETP_1: INSTR_SWEETP_1}
diff --git a/tests/test.py b/tests/test.py
index a578227..9ad2c47 100644
--- a/tests/test.py
+++ b/tests/test.py
@@ -1,13 +1,16 @@
-from autora.doc import example_module
+from autora.doc.runtime.predict_hf import Predictor
 
 
-def test_greetings() -> None:
-    """Verify the output of the `greetings` function"""
-    output = example_module.greetings()
-    assert output == "Hello from LINCC-Frameworks!"
+def test_trim_prompt() -> None:
+    """Verify the output of the `trim_prompt` function"""
+    no_marker = "Generated text with no marker"
+    output = Predictor.trim_prompt(no_marker)
+    assert output == no_marker
 
-
-def test_meaning() -> None:
-    """Verify the output of the `meaning` function"""
-    output = example_module.meaning()
-    assert output == 42
+    with_marker = """
+The prompt is here
+[/INST]
+output
+"""
+    output = Predictor.trim_prompt(with_marker)
+    assert output == "output\n"

From 4f8d9005e47caca493ee839ad6d93f87e7d7959a Mon Sep 17 00:00:00 2001
From: Carlos Garcia Jurado Suarez <carlosg@uw.edu>
Date: Tue, 5 Dec 2023 11:19:51 -0800
Subject: [PATCH 2/5] add tests

---
 src/autora/doc/pipelines/main.py      |  6 +++++-
 src/autora/doc/runtime/prompts.py     |  1 -
 tests/test_main.py                    | 15 +++++++++++++++
 tests/{test.py => test_predict_hf.py} |  0
 4 files changed, 20 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_main.py
 rename tests/{test.py => test_predict_hf.py} (100%)

diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py
index b74bf4b..aacb809 100644
--- a/src/autora/doc/pipelines/main.py
+++ b/src/autora/doc/pipelines/main.py
@@ -1,5 +1,6 @@
 import logging
 from timeit import default_timer as timer
+from typing import List
 
 import jsonlines
 import mlflow
@@ -18,7 +19,9 @@
 
 
 @app.command()
-def predict(data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: InstructionPrompts) -> None:
+def predict(
+    data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: InstructionPrompts
+) -> List[str]:
     run = mlflow.active_run()
 
     sys_prompt = SYS[sys_id]
@@ -51,6 +54,7 @@ def predict(data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id:
         total_tokens = sum([len(token) for token in tokens])
         mlflow.log_metric("total_tokens", total_tokens)
         mlflow.log_metric("tokens/sec", total_tokens / pred_time)
+        return predictions
 
 
 @app.command()
diff --git a/src/autora/doc/runtime/prompts.py b/src/autora/doc/runtime/prompts.py
index 5875127..19f905a 100644
--- a/src/autora/doc/runtime/prompts.py
+++ b/src/autora/doc/runtime/prompts.py
@@ -29,7 +29,6 @@ class SystemPrompts(Enum):
 
 
 class InstructionPrompts(Enum):
-    SYS_1 = "SYS_1"
     INSTR_SWEETP_1 = "INSTR_SWEETP_1"
 
 
diff --git a/tests/test_main.py b/tests/test_main.py
new file mode 100644
index 0000000..e2d12d3
--- /dev/null
+++ b/tests/test_main.py
@@ -0,0 +1,15 @@
+from pathlib import Path
+
+from autora.doc.pipelines.main import predict
+from autora.doc.runtime.prompts import InstructionPrompts, SystemPrompts
+
+# dummy HF model for testing
+TEST_HF_MODEL = "hf-internal-testing/tiny-random-FalconForCausalLM"
+
+
+def test_predict() -> None:
+    data = Path(__file__).parent.joinpath("../data/data.jsonl").resolve()
+    outputs = predict(str(data), TEST_HF_MODEL, SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1)
+    assert len(outputs) == 3, "Expected 3 outputs"
+    for output in outputs:
+        assert len(output) > 0, "Expected non-empty output"
diff --git a/tests/test.py b/tests/test_predict_hf.py
similarity index 100%
rename from tests/test.py
rename to tests/test_predict_hf.py

From 424f8b6cd41cf48b40a44eb34e6b1454c3c817df Mon Sep 17 00:00:00 2001
From: Carlos Garcia Jurado Suarez <carlosg@uw.edu>
Date: Tue, 5 Dec 2023 16:58:23 -0800
Subject: [PATCH 3/5] feat: Generate command

---
 README.md                            |  2 +-
 azureml/{predict.yml => eval.yml}    |  2 +-
 azureml/generate.yml                 | 18 ++++++++++++++++++
 src/autora/doc/pipelines/main.py     | 25 +++++++++++++++++++++----
 src/autora/doc/runtime/predict_hf.py |  2 +-
 src/autora/doc/runtime/prompts.py    |  4 ++--
 tests/test_main.py                   | 14 ++++++++++++--
 7 files changed, 56 insertions(+), 11 deletions(-)
 rename azureml/{predict.yml => eval.yml} (96%)
 create mode 100644 azureml/generate.yml

diff --git a/README.md b/README.md
index 2ff8a62..127c2f4 100644
--- a/README.md
+++ b/README.md
@@ -78,7 +78,7 @@ az storage blob upload  --account-name <account> --container <container>> --file
 
 Prediction
 ```sh
-az ml job create -f azureml/predict.yml  --set display_name="Test prediction job" --web
+az ml job create -f azureml/eval.yml  --set display_name="Test prediction job" --web
 ```
 
 Notes:
diff --git a/azureml/predict.yml b/azureml/eval.yml
similarity index 96%
rename from azureml/predict.yml
rename to azureml/eval.yml
index d5410a2..a2f72b6 100644
--- a/azureml/predict.yml
+++ b/azureml/eval.yml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
 command: >
-  python -m autora.doc.pipelines.main predict
+  python -m autora.doc.pipelines.main eval 
   ${{inputs.data_dir}}/data.jsonl
   ${{inputs.model_dir}}/llama-2-7b-chat-hf
   SYS_1
diff --git a/azureml/generate.yml b/azureml/generate.yml
new file mode 100644
index 0000000..c7df113
--- /dev/null
+++ b/azureml/generate.yml
@@ -0,0 +1,18 @@
+$schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
+command: >
+  python -m autora.doc.pipelines.main generate 
+  --model-path ${{inputs.model_dir}}/llama-2-7b-chat-hf
+  --output ./outputs/output.txt
+  autora/doc/pipelines/main.py    
+code: ../src
+inputs:
+  model_dir:
+    type: uri_folder 
+    path: azureml://datastores/workspaceblobstore/paths/base_models    
+environment: 
+  image: mcr.microsoft.com/azureml/curated/acpt-pytorch-2.0-cuda11.7:21
+  conda_file: conda.yml
+display_name: autodoc_prediction
+compute: azureml:v100cluster
+experiment_name: autodoc_prediction
+description: |
\ No newline at end of file
diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py
index aacb809..9c9aa4e 100644
--- a/src/autora/doc/pipelines/main.py
+++ b/src/autora/doc/pipelines/main.py
@@ -19,9 +19,7 @@
 
 
 @app.command()
-def predict(
-    data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: InstructionPrompts
-) -> List[str]:
+def eval(data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: InstructionPrompts) -> List[str]:
     run = mlflow.active_run()
 
     sys_prompt = SYS[sys_id]
@@ -33,7 +31,6 @@ def predict(
         logger.info(f"running predict with {data_file}")
         logger.info(f"model path: {model_path}")
 
-        # predictions = []
         with jsonlines.open(data_file) as reader:
             items = [item for item in reader]
             inputs = [item["instruction"] for item in items]
@@ -57,6 +54,26 @@ def predict(
         return predictions
 
 
+@app.command()
+def generate(
+    python_file: str,
+    model_path: str = "meta-llama/llama-2-7b-chat-hf",
+    output: str = "output.txt",
+    sys_id: SystemPrompts = SystemPrompts.SYS_1,
+    instruc_id: InstructionPrompts = InstructionPrompts.INSTR_SWEETP_1,
+) -> None:
+    with open(python_file, "r") as f:
+        inputs = [f.read()]
+    sys_prompt = SYS[sys_id]
+    instr_prompt = INSTR[instruc_id]
+    pred = Predictor(model_path)
+    predictions = pred.predict(sys_prompt, instr_prompt, inputs)
+    assert len(predictions) == 1, f"Expected only one output, got {len(predictions)}"
+    logger.info(f"Writing output to {output}")
+    with open(output, "w") as f:
+        f.write(predictions[0])
+
+
 @app.command()
 def import_model(model_name: str) -> None:
     pass
diff --git a/src/autora/doc/runtime/predict_hf.py b/src/autora/doc/runtime/predict_hf.py
index cbde760..23c484e 100644
--- a/src/autora/doc/runtime/predict_hf.py
+++ b/src/autora/doc/runtime/predict_hf.py
@@ -39,7 +39,7 @@ def predict(self, sys: str, instr: str, inputs: List[str]) -> List[str]:
             top_k=40,
             num_return_sequences=1,
             eos_token_id=self.tokenizer.eos_token_id,
-            max_length=1000,
+            max_length=2048,
         )
 
         results = [Predictor.trim_prompt(sequence[0]["generated_text"]) for sequence in sequences]
diff --git a/src/autora/doc/runtime/prompts.py b/src/autora/doc/runtime/prompts.py
index 19f905a..75019fc 100644
--- a/src/autora/doc/runtime/prompts.py
+++ b/src/autora/doc/runtime/prompts.py
@@ -24,11 +24,11 @@
 paragraph should explain the purpose and the second one the procedure, but don't use the word 'Paragraph'"""
 
 
-class SystemPrompts(Enum):
+class SystemPrompts(str, Enum):
     SYS_1 = "SYS_1"
 
 
-class InstructionPrompts(Enum):
+class InstructionPrompts(str, Enum):
     INSTR_SWEETP_1 = "INSTR_SWEETP_1"
 
 
diff --git a/tests/test_main.py b/tests/test_main.py
index e2d12d3..3e67bab 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -1,6 +1,6 @@
 from pathlib import Path
 
-from autora.doc.pipelines.main import predict
+from autora.doc.pipelines.main import eval, generate
 from autora.doc.runtime.prompts import InstructionPrompts, SystemPrompts
 
 # dummy HF model for testing
@@ -9,7 +9,17 @@
 
 def test_predict() -> None:
     data = Path(__file__).parent.joinpath("../data/data.jsonl").resolve()
-    outputs = predict(str(data), TEST_HF_MODEL, SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1)
+    outputs = eval(str(data), TEST_HF_MODEL, SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1)
     assert len(outputs) == 3, "Expected 3 outputs"
     for output in outputs:
         assert len(output) > 0, "Expected non-empty output"
+
+
+def test_generate() -> None:
+    python_file = __file__
+    output = Path("output.txt")
+    output.unlink(missing_ok=True)
+    generate(python_file, TEST_HF_MODEL, str(output), SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1)
+    assert output.exists(), f"Expected output file {output} to exist"
+    with open(str(output), "r") as f:
+        assert len(f.read()) > 0, f"Expected non-empty output file {output}"

From 4b40f3476d68597808281e3cb072968fcbc37718 Mon Sep 17 00:00:00 2001
From: Carlos Garcia Jurado Suarez <carlosg@uw.edu>
Date: Wed, 6 Dec 2023 13:06:27 -0800
Subject: [PATCH 4/5] fix dependencies

---
 pyproject.toml                   | 12 ++++++------
 src/autora/doc/pipelines/main.py |  8 +++++---
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 97c9c31..235a81a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,7 +16,6 @@ classifiers = [
 ]
 dynamic = ["version"]
 dependencies = [
-    "transformers>=4.35.2",
     "typer",
     "scipy",
     # This works, while installing from pytorch and cuda from conda does not",
@@ -42,17 +41,18 @@ dev = [
     "nbsphinx", # Used to integrate Python notebooks into Sphinx documentation
     "ipython", # Also used in building notebooks into Sphinx
     "matplotlib", # Used in sample notebook intro_notebook.ipynb
-    "numpy", # Used in sample notebook intro_notebook.ipynb
     "ipykernel",
 ]
 train = [
+    "jsonlines",
     "mlflow",
-    "azureml-mlflow",
+]
+azure = [
     "azureml-core",
-    "jsonlines",
+    "azureml-mlflow",
 ]
-
-train_cuda = [
+cuda = [
+    "transformers>=4.35.2",
     "bitsandbytes>=0.41.2.post2",
     "accelerate>=0.24.1",
     "xformers",
diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py
index 9c9aa4e..de7e906 100644
--- a/src/autora/doc/pipelines/main.py
+++ b/src/autora/doc/pipelines/main.py
@@ -2,8 +2,6 @@
 from timeit import default_timer as timer
 from typing import List
 
-import jsonlines
-import mlflow
 import torch
 import typer
 
@@ -20,6 +18,11 @@
 
 @app.command()
 def eval(data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: InstructionPrompts) -> List[str]:
+    import jsonlines
+    import mlflow
+
+    mlflow.autolog()
+
     run = mlflow.active_run()
 
     sys_prompt = SYS[sys_id]
@@ -82,5 +85,4 @@ def import_model(model_name: str) -> None:
 if __name__ == "__main__":
     logger.info(f"Torch version: {torch.__version__} , Cuda available: {torch.cuda.is_available()}")
 
-    mlflow.autolog()
     app()

From 58efb38a45a8960a2232b59ca4687b325b3e63cc Mon Sep 17 00:00:00 2001
From: Carlos Garcia Jurado Suarez <carlosg@uw.edu>
Date: Wed, 6 Dec 2023 15:05:09 -0800
Subject: [PATCH 5/5] fix transformer dependency

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 235a81a..422c8ff 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,6 +20,7 @@ dependencies = [
     "scipy",
     # This works, while installing from pytorch and cuda from conda does not",
     "torch==2.0.1",
+    "transformers>=4.35.2",
 ]
 
 # On a mac, install optional dependencies with `pip install '.[dev]'` (include the single quotes)
@@ -52,7 +53,6 @@ azure = [
     "azureml-mlflow",
 ]
 cuda = [
-    "transformers>=4.35.2",
     "bitsandbytes>=0.41.2.post2",
     "accelerate>=0.24.1",
     "xformers",