From 8001d368ecaafec2ab1397bb4b1f717b34a6a19e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9sar=20Miguel=C3=A1=C3=B1ez?= Date: Thu, 12 Sep 2024 17:38:38 +0200 Subject: [PATCH] Updating docs (#166) * Updated README * Missed one replacement * New docs for evaluations * Added a few more articles to the docs * Updated readme * Quick updates --- README.md | 37 +++++++++++-- docs/guides/datasets/creating-datasets.mdx | 0 docs/guides/datasets/overview.mdx | 20 +++++++ .../datasets/use-in-batch-evaluations.mdx | 0 docs/guides/evaluations/evaluators.mdx | 0 docs/guides/evaluations/overview.mdx | 53 +++++++++++++++++++ docs/guides/evaluations/run-in-batch.mdx | 0 .../evaluations/running-evaluations.mdx | 45 ++++++++++++++++ docs/guides/getting-started/concepts.mdx | 2 +- docs/guides/getting-started/quick-start.mdx | 46 ++++++++++++++++ docs/guides/logs/overview.mdx | 29 ++++++++++ docs/mint.json | 7 ++- 12 files changed, 231 insertions(+), 8 deletions(-) delete mode 100644 docs/guides/datasets/creating-datasets.mdx create mode 100644 docs/guides/datasets/overview.mdx delete mode 100644 docs/guides/datasets/use-in-batch-evaluations.mdx delete mode 100644 docs/guides/evaluations/evaluators.mdx create mode 100644 docs/guides/evaluations/overview.mdx delete mode 100644 docs/guides/evaluations/run-in-batch.mdx create mode 100644 docs/guides/evaluations/running-evaluations.mdx diff --git a/README.md b/README.md index 7614c7dee..2026c0683 100644 --- a/README.md +++ b/README.md @@ -65,11 +65,42 @@ Latitude puts all these helpful tools in one place. This means you can add AI to ## ⚡ Quick start -Here’s a quick getting started guide to get the app up and running: +Latitude offers two deployment options: -### 1. Install Latitude +1. **Latitude Cloud**: A fully managed solution that allows you to get started quickly without worrying about infrastructure. +2. **Latitude Self-Hosted**: An open-source version that you can deploy and manage on your own infrastructure for complete control and customization. -[TODO] +Choose the option that best fits your needs and follow the corresponding instructions below. + +### Latitude Cloud + +To get started with Latitude, follow these steps: + +1. **Sign up for Latitude**: Visit our [website](https://latitude.so) and follow the instructions to create your account. + +2. **Create a new project**: Once logged in, create a new project to organize your prompts and evaluations. + +3. **Write your first prompt**: Navigate to the Editor and create a new prompt. Start with a simple task, like generating a short story or answering a question. + +4. **Test your prompt**: Use the playground to test your prompt with different inputs and see the model's responses. + +5. **Evaluate in batch**: Before deploying, you can upload a dataset and run a batch evaluation to assess your prompt's performance across various scenarios. + +6. **Deploy your prompt**: Once you're satisfied with your prompt's performance in batch evaluation, deploy it as an endpoint for easy integration with your applications. + +7. **Monitor and evaluate**: Use the Logs section to review your prompt's performance over time. Set up ongoing evaluations to systematically assess and improve your prompt's output. + +8. **Iterate and improve**: Based on the evaluation results, refine your prompt or create new versions to enhance its performance. + +9. **Collaborate with your team**: Invite team members to your Latitude workspace to collaborate on prompt engineering and evaluations. + +For more detailed information on each step, explore our documentation or join our [community](https://join.slack.com/t/trylatitude/shared_invite/zt-17dyj4elt-rwM~h2OorAA3NtgmibhnLA) for support and discussions. + +### Latitude Self-Hosted + +Follow the instructions in the [self-hosted guide](https://docs.latitude.so/self-hosted/quick-start) to get started with Latitude Self-Hosted. + +After setting up Latitude Self-Hosted, you can follow the same steps as in the Latitude Cloud guide to create, test, evaluate, and deploy your prompts. ## 👥 Community diff --git a/docs/guides/datasets/creating-datasets.mdx b/docs/guides/datasets/creating-datasets.mdx deleted file mode 100644 index e69de29bb..000000000 diff --git a/docs/guides/datasets/overview.mdx b/docs/guides/datasets/overview.mdx new file mode 100644 index 000000000..89f4d1182 --- /dev/null +++ b/docs/guides/datasets/overview.mdx @@ -0,0 +1,20 @@ +--- +title: Datasets +description: Learn how to use to create and manage your datasets. +--- + +## Overview + +Datasets contain values to use as parameters for running evaluations in batches. For example, you can upload a dataset of customer support tickets and use it to evaluate the performance of a chatbot. + +## How it works + +To create a dataset, navigate to the "Datasets" page and click on the "Upload dataset" button. You'll see a form with the following fields: + +- Name: The name of the dataset +- Delimiter: The delimiter used in the first row of the CSV file +- File: The file to upload + +Click the "Create dataset" button to upload the dataset. + +Once the dataset is uploaded, you can use it to run evaluations in batches. Learn how to do it in the [running evaluations](/guides/evaluations/running-evaluations#running-evaluations-in-batch-mode) guide. \ No newline at end of file diff --git a/docs/guides/datasets/use-in-batch-evaluations.mdx b/docs/guides/datasets/use-in-batch-evaluations.mdx deleted file mode 100644 index e69de29bb..000000000 diff --git a/docs/guides/evaluations/evaluators.mdx b/docs/guides/evaluations/evaluators.mdx deleted file mode 100644 index e69de29bb..000000000 diff --git a/docs/guides/evaluations/overview.mdx b/docs/guides/evaluations/overview.mdx new file mode 100644 index 000000000..41c20fb89 --- /dev/null +++ b/docs/guides/evaluations/overview.mdx @@ -0,0 +1,53 @@ +--- +title: Overview +description: 'Learn how to create and connect evaluations to your prompts.' +--- + +## What is an evaluation? + +Evaluations help you assess the quality of your LLM outputs. Latitude supports two types of evaluations: + +- **LLM evaluations**: You can use LLM evaluators to score your LLM outputs. +- **Human evaluations (HITL) [Coming soon]**: You can manually review the logs and score them based on your criteria. + +## How do they work? + +A Latitude project can have any number of evaluations that will be available to connect to prompts. You can create evaluations in the **Evaluations** tab of your workspace. Latitude comes with a set of built-in evaluations that you can use to get started, it's as simple as importing them into your project. + +Once you've created an evaluation, you can connect it to a prompt by navigating to the prompt and clicking on the **Evaluations** tab. Then you can select the evaluation you want to connect to the prompt. + +After connecting an evaluation to a prompt, you can: + +- Activate a live evaluation: This will start evaluating the prompt in real-time. For every new log, the evaluation will be run and the result will be displayed in the evaluation's page. +- Run in batch: You can choose whether to run the evaluation on existing logs or automatically generate a batch of logs to run the evaluation on. + +To learn more about how to connect and run evaluations, check out the [Running evaluations](/guides/evaluations/running-evaluations) guide. + +## How do I create an evaluation? + +You can create an evaluation from scratch or import an existing one and edit it. + +### Creating an evaluation from scratch + +Go to the **Evaluations** tab of your project and click on the **Create evaluation** button. You'll have to provide a name for the evaluation and select the type of evaluation you want to create. We support three types of evaluations, depending on the output you expect: + +- **Number**: This is helpful when you want to score outputs on a range, for example a score between 0 and 10. You'll have to provide a minimum and maximum value for the evaluation. +- **Boolean**: Useful for true/false questions. For example, you can use this to evaluate if the output contains harmful content. +- **Text**: A free-form text evaluation. For example, you can use this to generate feedback on the output of a prompt. + +Number and Boolean evaluations expect a specific format for the evaluation result. You have to make sure your evaluation prompt returns either a score or a boolean value (true/false) and that the output is a JSON object with the following format: + +```json +{ + "result": , + "reason": +} +``` + +We use this format to parse the evaluation result and display aggregated metrics in the evaluations page. Make sure to include this format in your evaluation prompt. If you're not sure how to do this, all of our templates include this format, so you can use them as a reference. + +### Importing an evaluation + +Importing an evaluation is really simple, just navigate to the **Evaluations** tab of your project and you'll see a few templates to get you started. Simply click on the template you want to import and the evaluation will be created for you. + +You can edit an imported evaluation just like you would edit an evaluation from scratch, so feel free to customize it to your needs. \ No newline at end of file diff --git a/docs/guides/evaluations/run-in-batch.mdx b/docs/guides/evaluations/run-in-batch.mdx deleted file mode 100644 index e69de29bb..000000000 diff --git a/docs/guides/evaluations/running-evaluations.mdx b/docs/guides/evaluations/running-evaluations.mdx new file mode 100644 index 000000000..b3e4ee507 --- /dev/null +++ b/docs/guides/evaluations/running-evaluations.mdx @@ -0,0 +1,45 @@ +--- +title: Running Evaluations +description: 'Learn how to run evaluations on your prompts.' +--- + +Once you've created evaluations and connected them to any of your prompts, you can run them on live logs or in batch mode. This guide will walk you through the process of running evaluations. + +## Prerequisites + +- You have already connected one or more evaluations to your prompt. +- To run evaluations in batch mode, you need to have a dataset created in your project. Learn more about [creating datasets](/guides/datasets/creating-datasets). + +## Steps to run evaluations + +1. **Navigate to the document** + Go to the specific document where you've connected the evaluations. + +2. **Access the evaluations tab** + Look for the "Evaluations" tab or section within the document view. This is where you'll find all the connected evaluations. + +3. **Select evaluations to run** + You should see a list of connected evaluations. Click on the one you want to run. + +4. **Run the evaluation in batch mode** + Click on the "Run in batch" button to start the evaluation process. Learn more about [running evaluations in batch mode](/guides/evaluations/running-evaluations#running-evaluations-in-batch-mode). + +5. **Run the evaluation in live mode** + Activate the "Evaluate production logs" toggle in the top right corner to turn on live evaluation. Learn more about [running evaluations in live mode](/guides/evaluations/running-evaluations#running-evaluations-in-live-mode). + +By following these steps, you should be able to successfully run your connected evaluations and gain valuable insights into the performance of your prompts. + +## Running evaluations in batch mode + +When you run evaluations in batch mode, you can either create new logs from a dataset or use existing logs. + +- **Create new logs from a dataset**: Select the option "Generate from dataset" as the source for the logs. Choose the dataset you want to use, the number of logs to generate, and how the prompt parameters map to the dataset columns. +- **Use existing logs [Coming soon]**: Select the option "Use existing logs" as the source for the logs. Choose how many logs you want to use, and the evaluation will run on the logs you selected. + +Click the "Run evaluation" button to start the evaluation process. You'll see the status of the batch evaluation just above the logs table. Once it's finished, the charts will update with the results of the evaluation, and you can check the evaluation logs to drill down into the results. + +## Running evaluations in live mode + +Evaluations running in live mode will run on all new logs generated in your project. This is useful if you want to monitor the performance of your prompts in real-time. + +We recommend keeping a few key evaluations running in live mode to spot degradations in response quality as soon as they happen. Sometimes new model releases or changes in parameters can lead to a drop in response quality, so this is a good way to catch those issues early. \ No newline at end of file diff --git a/docs/guides/getting-started/concepts.mdx b/docs/guides/getting-started/concepts.mdx index 2d8f3ca84..238c9a2e3 100644 --- a/docs/guides/getting-started/concepts.mdx +++ b/docs/guides/getting-started/concepts.mdx @@ -22,7 +22,7 @@ In Latitude, logs are automatically captured and stored whenever you run a promp Evaluations are the process of assessing your model's performance using logs. You can evaluate your model's output for accuracy, fluency, or any other metric that you choose. There are a few evaluation techniques that you can use: - **LLM evaluations**: You can use large language models to evaluate the output of other models. This is useful when you have a large number of logs and need to evaluate them quickly. -- **Human evaluations (HITL)**: You—or your team—can manually review the logs and score them based on your criteria. +- **Human evaluations (HITL) [Coming soon]**: You—or your team—can manually review the logs and score them based on your criteria. Evaluations also generate logs that you can eventually use to fine-tune future models or improve your prompts. To learn more about evaluations, check out the [Evaluations](/guides/evaluations/) overview. diff --git a/docs/guides/getting-started/quick-start.mdx b/docs/guides/getting-started/quick-start.mdx index e69de29bb..2b6fadd19 100644 --- a/docs/guides/getting-started/quick-start.mdx +++ b/docs/guides/getting-started/quick-start.mdx @@ -0,0 +1,46 @@ +--- +title: Quick start +description: Learn how to get started with Latitude +--- + +## Overview + +This quick start guide will walk you through the process of setting up and using Latitude, whether you choose to use Latitude Cloud or self-host the platform. By the end of this guide, you'll have created your first prompt, tested it, and learned how to evaluate and deploy it. + +Latitude offers two deployment options: + +1. **Latitude Cloud**: A fully managed solution that allows you to get started quickly without worrying about infrastructure. +2. **Latitude Self-Hosted**: An open-source version that you can deploy and manage on your own infrastructure for complete control and customization. + +Choose the option that best fits your needs and follow the corresponding instructions below. + + +## Latitude Cloud + +To get started with Latitude, follow these steps: + +1. **Sign up for Latitude**: Visit our [website](https://latitude.so) and follow the instructions to create your account. + +2. **Create a new project**: Once logged in, create a new project to organize your prompts and evaluations. + +3. **Write your first prompt**: Navigate to the Editor and create a new prompt. Start with a simple task, like generating a short story or answering a question. + +4. **Test your prompt**: Use the playground to test your prompt with different inputs and see the model's responses. + +5. **Evaluate in batch**: Before deploying, you can upload a dataset and run a batch evaluation to assess your prompt's performance across various scenarios. + +6. **Deploy your prompt**: Once you're satisfied with your prompt's performance in batch evaluation, deploy it as an endpoint for easy integration with your applications. + +7. **Monitor and evaluate**: Use the Logs section to review your prompt's performance over time. Set up ongoing evaluations to systematically assess and improve your prompt's output. + +8. **Iterate and improve**: Based on the evaluation results, refine your prompt or create new versions to enhance its performance. + +9. **Collaborate with your team**: Invite team members to your Latitude workspace to collaborate on prompt engineering and evaluations. + +For more detailed information on each step, explore our documentation or join our [community](https://join.slack.com/t/trylatitude/shared_invite/zt-17dyj4elt-rwM~h2OorAA3NtgmibhnLA) for support and discussions. + +## Latitude Self-Hosted + +Follow the instructions in the [self-hosted guide](https://docs.latitude.so/self-hosted/quick-start) to get started with Latitude Self-Hosted. + +After setting up Latitude Self-Hosted, you can follow the same steps as in the Latitude Cloud guide to create, test, evaluate, and deploy your prompts. \ No newline at end of file diff --git a/docs/guides/logs/overview.mdx b/docs/guides/logs/overview.mdx index e69de29bb..2d7054396 100644 --- a/docs/guides/logs/overview.mdx +++ b/docs/guides/logs/overview.mdx @@ -0,0 +1,29 @@ +--- +title: Logs +description: Learn how to use the logs page to monitor your prompts and evaluate their performance. +--- + +## Overview + +Latitude stores all the logs generated by your prompts in a database. You can use the logs page to monitor your prompts and evaluate their performance. + +## How it works + +Every time you run a prompt, from the API or from the UI, a new log is created. + +To access the logs page, navigate to a prompt and click on the "Logs" tab. You'll see a table with all the logs generated by the prompt, some metadata like the timestamp, the prompt version used, latency, tokens used, and cost. + +Clicking on a log will display a side panel with the full details of the log, including the list of messages. + +## Creating logs for evaluations + +You can also create logs for evaluation purposes without actually running the prompt. This is useful when you want to run evaluations on a large number of inputs. + +For a detailed guide on running evaluations in batches, refer to the [Running Evaluations](/guides/evaluations/running-evaluations#running-evaluations-in-batch-mode) guide. + +## Coming soon + +- Filtering and sorting +- Exporting logs to a CSV file +- Deleting logs +- Visualizations for certain metrics like latency, tokens used, and cost \ No newline at end of file diff --git a/docs/mint.json b/docs/mint.json index 6a50297cc..970a2c391 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -52,8 +52,8 @@ { "group": "Evaluations", "pages": [ - "guides/evaluations/evaluators", - "guides/evaluations/run-in-batch" + "guides/evaluations/overview", + "guides/evaluations/running-evaluations" ] }, { @@ -65,8 +65,7 @@ { "group": "Datasets", "pages": [ - "guides/datasets/creating-datasets", - "guides/datasets/use-in-batch-evaluations" + "guides/datasets/overview" ] }, {