Merge branch 'main' into musicgen_experiment

hegelai · Mar 9, 2024 · 2136c64 · 2136c64
2 parents 412e36b + 5a80732
commit 2136c64
Show file tree

Hide file tree

Showing 32 changed files with 2,202 additions and 246 deletions.
diff --git a/.gitignore b/.gitignore
@@ -8,6 +8,9 @@ prompttools/version.py
 # C extensions
 *.so
 
+macOS
+*/.DS_Store
+
 # PyCharm
 # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore

diff --git a/README.md b/README.md
@@ -14,16 +14,14 @@
  <a href="http://prompttools.readthedocs.io/"><img src="https://img.shields.io/badge/View%20Documentation-Docs-yellow"></a>
  <a href="https://discord.gg/7KeRPNHGdJ"><img src="https://img.shields.io/badge/Join%20our%20community-Discord-blue"></a>
  <a href="https://pepy.tech/project/prompttools" target="_blank"><img src="https://static.pepy.tech/badge/prompttools" alt="Total Downloads"/></a>
- <a href="https://github.com/hegelai/prompttools">
-  <img src="https://img.shields.io/github/stars/hegelai/prompttools" />
- </a>
+ <a href="https://github.com/hegelai/prompttools"><img src="https://img.shields.io/github/stars/hegelai/prompttools" /></a>
  <a href="https://twitter.com/hegel_ai"><img src="https://img.shields.io/twitter/follow/Hegel_AI?style=social"></a>
 </p>
 
 
 Welcome to `prompttools` created by [Hegel AI](https://hegel-ai.com/)! This repo offers a set of open-source, self-hostable tools for experimenting with, testing, and evaluating LLMs, vector databases, and prompts. The core idea is to enable developers to evaluate using familiar interfaces like _code_, _notebooks_, and a local _playground_.
 
-In just a few lines of codes, you can test your prompts and parameters across different models (whether you are using
+In just a few lines of code, you can test your prompts and parameters across different models (whether you are using
 OpenAI, Anthropic, or LLaMA models). You can even evaluate the retrieval accuracy of vector databases.
 
 ```python
@@ -102,7 +100,9 @@ LLMs
 - LLaMA.Cpp (LLaMA 1, LLaMA 2) - **Supported**
 - HuggingFace (Hub API, Inference Endpoints) - **Supported**
 - Anthropic - **Supported**
-- Google PaLM - **Supported**
+- Mistral AI - **Supported**
+- Google Gemini - **Supported**
+- Google PaLM (legacy) - **Supported**
 - Google Vertex AI - **Supported**
 - Azure OpenAI Service - **Supported**
 - Replicate - **Supported**

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -11,7 +11,7 @@
 project = "prompttools"
 copyright = "2023, Hegel AI"
 author = "Hegel AI"
-release = "0.0.43"
+release = "0.0.45"
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

diff --git a/docs/source/experiment.rst b/docs/source/experiment.rst
@@ -37,10 +37,14 @@ LLMs
 
 .. autoclass:: HuggingFaceHubExperiment
 
+.. autoclass:: GoogleGeminiChatCompletionExperiment
+
 .. autoclass:: GooglePaLMCompletionExperiment
 
 .. autoclass:: GoogleVertexChatCompletionExperiment
 
+.. autoclass:: MistralChatCompletionExperiment
+
 .. autoclass:: LlamaCppExperiment
 
 .. autoclass:: ReplicateExperiment

diff --git a/docs/source/harness.rst b/docs/source/harness.rst
@@ -18,8 +18,14 @@ a corresponding experiment, and keeps track of the templates and inputs used for
 
 .. autoclass:: ChatModelComparisonHarness
 
+.. autoclass:: ChatPromptTemplateExperimentationHarness
+
+.. autoclass:: ModelComparisonHarness
+
 .. autoclass:: MultiExperimentHarness
 
 .. autoclass:: PromptTemplateExperimentationHarness
 
+.. autoclass:: RetrievalAugmentedGenerationExperimentationHarness
+
 .. autoclass:: SystemPromptExperimentationHarness
diff --git a/docs/source/utils.rst b/docs/source/utils.rst
@@ -16,6 +16,8 @@ They can also be used with ``prompttest`` for be part of your CI/CD system.
 
 .. autofunction:: prompttools.utils.compute_similarity_against_model
 
+.. autofunction:: prompttools.utils.apply_moderation
+
 .. autofunction:: prompttools.utils.ranking_correlation
 
 .. autofunction:: prompttools.utils.validate_json_response

diff --git a/examples/notebooks/AzureOpenAIServiceExperiment.ipynb b/examples/notebooks/AzureOpenAIServiceExperiment.ipynb
@@ -130,7 +130,6 @@
  "]\n",
  "\n",
  "azure_openai_service_configs = {\"AZURE_OPENAI_ENDPOINT\": \"https://YOURENDPOINTNAME.openai.azure.com/\",\n",
- " \"API_TYPE\": \"azure\",\n",
  " \"API_VERSION\": \"2023-05-15\"} # Specify which API version to use\n",
  "temperatures = [0.0, 1.0]\n",
  "# You can add more parameters that you'd like to test here.\n",
@@ -252,7 +251,7 @@
  },
  {
  "cell_type": "code",
- "execution_count": 5,
+ "execution_count": 7,
  "id": "4cf5897b",
  "metadata": {},
  "outputs": [],
@@ -271,7 +270,6 @@
  "]\n",
  "\n",
  "azure_openai_service_configs = {\"AZURE_OPENAI_ENDPOINT\": \"https://YOURENDPOINTNAME.openai.azure.com/\",\n",
- " \"API_TYPE\": \"azure\",\n",
  " \"API_VERSION\": \"2023-05-15\"} # Specify which API version to use\n",
  "temperatures = [0.0, 1.0]\n",
  "# You can add more parameters that you'd like to test here.\n",
@@ -282,100 +280,10 @@
  },
  {
  "cell_type": "code",
- "execution_count": 6,
- "id": "6eab3877",
+ "execution_count": null,
+ "id": "2d261524",
  "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<style scoped>\n",
- " .dataframe tbody tr th:only-of-type {\n",
- " vertical-align: middle;\n",
- " }\n",
- "\n",
- " .dataframe tbody tr th {\n",
- " vertical-align: top;\n",
- " }\n",
- "\n",
- " .dataframe thead th {\n",
- " text-align: right;\n",
- " }\n",
- "</style>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>temperature</th>\n",
- " <th>messages</th>\n",
- " <th>response</th>\n",
- " <th>latency</th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>0.0</td>\n",
- " <td>[{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Who was the first president?'}]</td>\n",
- " <td>The first president of the United States was George Washington.</td>\n",
- " <td>0.903520</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>1.0</td>\n",
- " <td>[{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Who was the first president?'}]</td>\n",
- " <td>The first president of the United States was George Washington. He served as president from 1789 to 1797.</td>\n",
- " <td>0.815370</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>0.0</td>\n",
- " <td>[{'role': 'system', 'content': 'You are a creative copywriter.'}, {'role': 'user', 'content': 'Write a tagline for an ice cream shop.'}]</td>\n",
- " <td>\"Scoops of happiness in every cone!\"</td>\n",
- " <td>0.517402</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>1.0</td>\n",
- " <td>[{'role': 'system', 'content': 'You are a creative copywriter.'}, {'role': 'user', 'content': 'Write a tagline for an ice cream shop.'}]</td>\n",
- " <td>\"Scoops of happiness in every cone.\"</td>\n",
- " <td>0.508131</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " temperature \\\n",
- "0 0.0 \n",
- "1 1.0 \n",
- "2 0.0 \n",
- "3 1.0 \n",
- "\n",
- " messages \\\n",
- "0 [{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Who was the first president?'}] \n",
- "1 [{'role': 'system', 'content': 'You are a helpful assistant.'}, {'role': 'user', 'content': 'Who was the first president?'}] \n",
- "2 [{'role': 'system', 'content': 'You are a creative copywriter.'}, {'role': 'user', 'content': 'Write a tagline for an ice cream shop.'}] \n",
- "3 [{'role': 'system', 'content': 'You are a creative copywriter.'}, {'role': 'user', 'content': 'Write a tagline for an ice cream shop.'}] \n",
- "\n",
- " response \\\n",
- "0 The first president of the United States was George Washington. \n",
- "1 The first president of the United States was George Washington. He served as president from 1789 to 1797. \n",
- "2 \"Scoops of happiness in every cone!\" \n",
- "3 \"Scoops of happiness in every cone.\" \n",
- "\n",
- " latency \n",
- "0 0.903520 \n",
- "1 0.815370 \n",
- "2 0.517402 \n",
- "3 0.508131 "
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
  "source": [
  "experiment.run()\n",
  "experiment.visualize()"

diff --git a/examples/notebooks/GoogleGeminiChatExperiment.ipynb b/examples/notebooks/GoogleGeminiChatExperiment.ipynb
@@ -0,0 +1,173 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Google Gemini Chat Experiment Example"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Installations"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# !pip install --quiet --force-reinstall prompttools"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Setup imports and API keys"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "In order for the Google GenAI API to work, you must set up your Google AI Studio credentials (one example in the following cell) or execute this experiment on Google Colab.\n",
+ "\n",
+ "Executing on Google Colab may require the least amount of set-up."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import google.generativeai as genai\n",
+ "\n",
+ "from google.colab import userdata\n",
+ "\n",
+ "GOOGLE_API_KEY = \"\" # You can manually set your key\n",
+ "# GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY') # Or, you can read it from your account\n",
+ "\n",
+ "genai.configure(api_key=GOOGLE_API_KEY)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Once you succeed in setting up your credential, you should be able to execute the following cell without error and see the list of models you have access to."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "for m in genai.list_models():\n",
+ " if 'generateContent' in m.supported_generation_methods:\n",
+ " print(m.name)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Then we'll import the relevant `prompttools` modules to setup our experiment."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "ExecuteTime": {
+ "end_time": "2023-07-28T21:15:15.360723Z",
+ "start_time": "2023-07-28T21:15:15.230441Z"
+ },
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
+ },
+ "source": [
+ "## Run an experiment"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Next, we create our test inputs. We can iterate over models (`\"gemini-pro\"` in this case, you can also use the ultra model if you have access to it), contents (equivalent of prompt). You can also experiment with configurations like temperature using `generation_config` or `safety_settings`.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from prompttools.experiment import GoogleVertexChatCompletionExperiment\n",
+ "\n",
+ "model = ['gemini-pro']\n",
+ "contents = [\"What is the meaning of life?\", \"Who was the first president?\"]\n",
+ "\n",
+ "experiment = GoogleVertexChatCompletionExperiment(model=model, contents=contents)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "experiment.run()\n",
+ "experiment.visualize()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Evaluate the model response"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Please reference other notebooks (such as Google PaLM 2, Anthropic) for detailed evaluation of the model's response."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}