Add action template (#32)

* Add action template * Add permissions * Add variable * Add markdown fn and script * Fix gha * Formatting * Try require * Formatting * Try with secret * Debugging * Fix prompting * Cleanup * Fix markdown issue * Fix markdown * Switch back to DEBUG * Compress GIF
hegelai · Jul 26, 2023 · 1c4ba74 · 1c4ba74
1 parent 6750693
commit 1c4ba74
Show file tree

Hide file tree

Showing 9 changed files with 81 additions and 25 deletions.
diff --git a/.github/workflows/comment.yml b/.github/workflows/comment.yml
@@ -0,0 +1,43 @@
+name: PromptTools
+
+on:
+ # Trigger the workflow on push or pull request
+ pull_request:
+ branches:
+ - main
+
+jobs:
+ comment:
+ permissions: write-all
+ runs-on: ubuntu-latest
+ steps:
+ - name: Check out source repository
+ uses: actions/checkout@v3
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v3
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install flake8 pytest
+ if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+ - name: Build prompttools
+ run: |
+ pip3 install .
+ - name: Create markdown
+ run: |
+ DEBUG=1 python scripts/create_comment.py
+ - name: Write comment
+ uses: actions/github-script@v6
+ with:
+ script: |
+ const fs = require('fs')
+ fs.readFile('./markdown.md', 'utf8', (err, data) => {
+ github.rest.issues.createComment({
+ issue_number: context.issue.number,
+ owner: context.repo.owner,
+ repo: context.repo.repo,
+ body: data
+ })
+ });
diff --git a/examples/notebooks/GPT4RegressionTesting.ipynb b/examples/notebooks/GPT4RegressionTesting.ipynb
@@ -123,7 +123,7 @@
  " \"\"\"\n",
  "You are a math tutor. When given a math question, talk through each step \n",
  "of your thought process before arriving at the answer. \n",
- "\"\"\"\n",
+ "\"\"\",\n",
  "]\n",
  "inputs = [\n",
  " \"\"\"\n",

diff --git a/examples/notebooks/OpenAIChatExperiment.ipynb b/examples/notebooks/OpenAIChatExperiment.ipynb
diff --git a/examples/prompttests/test_huggingface_hub.py b/examples/prompttests/test_huggingface_hub.py
@@ -38,7 +38,7 @@ def create_prompt():
  prompts=[create_prompt()],
  expected=["George Washington"],
  threshold=1.0,
- threshold_type=ThresholdType.MAXIMUM
+ threshold_type=ThresholdType.MAXIMUM,
 )
 def completion_fn(prompt: str):
  response = None

diff --git a/img/demo.gif b/img/demo.gif
diff --git a/prompttools/experiment/experiments/experiment.py b/prompttools/experiment/experiments/experiment.py
@@ -446,6 +446,14 @@ def to_lora_json(
  else:
  return extracted_data.to_json(**kwargs)
 
+ def to_markdown(self):
+ if not self.results:
+ logging.info("Running first...")
+ self.run()
+ markdown = self.to_pandas_df().to_markdown()
+ print(markdown)
+ return markdown
+
  def _get_model_names(self):
  pass
 

diff --git a/prompttools/experiment/experiments/openai_chat_experiment.py b/prompttools/experiment/experiments/openai_chat_experiment.py
@@ -117,7 +117,7 @@ def __init__(
 
  @staticmethod
  def _extract_responses(output: Dict[str, object]) -> list[str]:
- return [choice["message"]["content"] for choice in output["choices"]][0]
+ return [choice["message"]["content"].replace('\n', '<br />') for choice in output["choices"]][0]
 
  def _is_chat(self):
  return True

diff --git a/prompttools/utils/autoeval_from_expected.py b/prompttools/utils/autoeval_from_expected.py
@@ -50,7 +50,7 @@ def compute(prompt: str, expected: str, response: str, model: str = "gpt-4") ->
  raise PromptToolsUtilityError
  evaluation = openai.ChatCompletion.create(model=model, messages=_get_messages(prompt, expected, response))
  return 1.0 if "RIGHT" in evaluation["choices"][0]["message"]["content"] else 0.0
- 
+
 
 def evaluate(prompt: str, response: str, metadata: Dict, expected: str) -> float:
  r"""

diff --git a/scripts/create_comment.py b/scripts/create_comment.py
@@ -0,0 +1,15 @@
+from prompttools.experiment import OpenAIChatExperiment
+from prompttools.selector.prompt_selector import PromptSelector
+
+PROMPTTOOLS_MD_TMP = "markdown.md"
+
+selectors = [PromptSelector("You are a helpful assistant.", "Is 17077 a prime number?"),
+ PromptSelector("You are a math tutor.", "Is 17077 a prime number?")]
+models = ["gpt-3.5-turbo", "gpt-4"]
+temperatures = [0.0]
+openai_experiment = OpenAIChatExperiment(models, selectors, temperature=temperatures)
+openai_experiment.run()
+
+markdown = openai_experiment.to_markdown()
+with open(PROMPTTOOLS_MD_TMP, "w") as f:
+ f.write(markdown)