diff --git a/src/examples/test.ipynb b/src/examples/test.ipynb new file mode 100644 index 000000000..fd6c7d650 --- /dev/null +++ b/src/examples/test.ipynb @@ -0,0 +1,299 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from intelligence_layer.evaluation.dataset.domain import Example\n", + "from intelligence_layer.evaluation.run.domain import ExampleOutput\n", + "from intelligence_layer.evaluation.infrastructure.repository_navigator import RunLineage, EvaluationLineage, RepositoryNavigator" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'tuple' object has no attribute '_rich_render'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/intelligence-layer-d3iSWYpm-py3.10/lib/python3.10/site-packages/IPython/core/formatters.py:925\u001b[0m, in \u001b[0;36mIPythonDisplayFormatter.__call__\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m 923\u001b[0m method \u001b[38;5;241m=\u001b[39m get_real_method(obj, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprint_method)\n\u001b[1;32m 924\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m method \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 925\u001b[0m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 926\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m\n", + "File \u001b[0;32m~/aleph/intelligence-layer/src/intelligence_layer/evaluation/infrastructure/repository_navigator.py:43\u001b[0m, in \u001b[0;36mEvaluationLineage._ipython_display_\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 41\u001b[0m output_tree \u001b[38;5;241m=\u001b[39m Tree(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mOutputs:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 42\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m output \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutputs:\n\u001b[0;32m---> 43\u001b[0m output_tree\u001b[38;5;241m.\u001b[39madd(\u001b[43moutput\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_rich_render\u001b[49m(skip_example_id\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m))\n\u001b[1;32m 44\u001b[0m tree\u001b[38;5;241m.\u001b[39madd(output_tree)\n\u001b[1;32m 45\u001b[0m tree\u001b[38;5;241m.\u001b[39madd(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mevaluation\u001b[38;5;241m.\u001b[39m_rich_render(skip_example_id\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m))\n", + "\u001b[0;31mAttributeError\u001b[0m: 'tuple' object has no attribute '_rich_render'" + ] + }, + { + "data": { + "text/plain": [ + "EvaluationLineage(example=Example ID = example-id\n", + "Input = input0\n", + "Expected output = \"expected_output0\"\n", + ", outputs=Example ID=example-id\n", + "Related Run ID=run-id\n", + "Output=\"output\"\n", + ", evaluation=Evaluation ID = eval-id\n", + "Example ID = example-id\n", + "Result = eval='Result'\n", + ")" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from pydantic import BaseModel\n", + "from intelligence_layer.evaluation.evaluation.domain import ExampleEvaluation\n", + "\n", + "outputs = [ExampleOutput(run_id=\"run-id\", example_id=\"example-id\", output=\"output\"), ExampleOutput(run_id=\"run-id\", example_id=\"example-id\", output=\"output\")]\n", + "lineage = EvaluationLineage(example=Example(input=\"input0\", expected_output=\"expected_output0\", data=\"data0\", id=\"example-id\"), \n", + " outputs=ExampleOutput(run_id=\"run-id\", example_id=\"example-id\", output=\"output\"),\n", + " evaluation=ExampleEvaluation(evaluation_id=\"eval-id\", example_id=\"example-id\", result=DummyEval(eval=\"Result\")))\n", + "lineage" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "ename": "ValidationError", + "evalue": "1 validation error for RunLineage\noutput\n Input should be a valid dictionary or instance of ExampleOutput [type=model_type, input_value=[Example ID=example-id\nRe...run-id\nOutput=\"output\"\n], input_type=list]\n For further information visit https://errors.pydantic.dev/2.6/v/model_type", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValidationError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[13], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m lineage \u001b[38;5;241m=\u001b[39m \u001b[43mRunLineage\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mExample\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43minput0\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexpected_output\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mexpected_output0\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdata0\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mid\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mexample-id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[43mExampleOutput\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrun_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrun-id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexample_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mexample-id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutput\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43moutput\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m lineage\n", + "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/intelligence-layer-d3iSWYpm-py3.10/lib/python3.10/site-packages/pydantic/main.py:171\u001b[0m, in \u001b[0;36mBaseModel.__init__\u001b[0;34m(self, **data)\u001b[0m\n\u001b[1;32m 169\u001b[0m \u001b[38;5;66;03m# `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks\u001b[39;00m\n\u001b[1;32m 170\u001b[0m __tracebackhide__ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 171\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__pydantic_validator__\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate_python\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mself_instance\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mValidationError\u001b[0m: 1 validation error for RunLineage\noutput\n Input should be a valid dictionary or instance of ExampleOutput [type=model_type, input_value=[Example ID=example-id\nRe...run-id\nOutput=\"output\"\n], input_type=list]\n For further information visit https://errors.pydantic.dev/2.6/v/model_type" + ] + } + ], + "source": [ + "lineage = RunLineage(example=Example(input=\"input0\", expected_output=\"expected_output0\", data=\"data0\", id=\"example-id\"), \n", + " output=[ExampleOutput(run_id=\"run-id\", example_id=\"example-id\", output=\"output\")])\n", + "lineage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "ename": "ValidationError", + "evalue": "1 validation error for EvaluationLineage\noutputs\n Input should be a valid dictionary or instance of ExampleOutput [type=model_type, input_value=[Example ID=example-id\nRe...run-id\nOutput=\"output\"\n], input_type=list]\n For further information visit https://errors.pydantic.dev/2.6/v/model_type", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValidationError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[4], line 7\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28meval\u001b[39m: \u001b[38;5;28mstr\u001b[39m\n\u001b[1;32m 6\u001b[0m outputs \u001b[38;5;241m=\u001b[39m [ExampleOutput(run_id\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrun-id\u001b[39m\u001b[38;5;124m\"\u001b[39m, example_id\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mexample-id\u001b[39m\u001b[38;5;124m\"\u001b[39m, output\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moutput\u001b[39m\u001b[38;5;124m\"\u001b[39m), ExampleOutput(run_id\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrun-id\u001b[39m\u001b[38;5;124m\"\u001b[39m, example_id\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mexample-id\u001b[39m\u001b[38;5;124m\"\u001b[39m, output\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moutput\u001b[39m\u001b[38;5;124m\"\u001b[39m)]\n\u001b[0;32m----> 7\u001b[0m lineage \u001b[38;5;241m=\u001b[39m \u001b[43mEvaluationLineage\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexample\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mExample\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43minput0\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexpected_output\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mexpected_output0\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdata0\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mid\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mexample-id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43moutputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[43mExampleOutput\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrun_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrun-id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexample_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mexample-id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moutput\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43moutput\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mevaluation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mExampleEvaluation\u001b[49m\u001b[43m(\u001b[49m\u001b[43mevaluation_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43meval-id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexample_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mexample-id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mresult\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mDummyEval\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43meval\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mResult\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 10\u001b[0m lineage\n", + "File \u001b[0;32m~/.cache/pypoetry/virtualenvs/intelligence-layer-d3iSWYpm-py3.10/lib/python3.10/site-packages/pydantic/main.py:171\u001b[0m, in \u001b[0;36mBaseModel.__init__\u001b[0;34m(self, **data)\u001b[0m\n\u001b[1;32m 169\u001b[0m \u001b[38;5;66;03m# `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks\u001b[39;00m\n\u001b[1;32m 170\u001b[0m __tracebackhide__ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 171\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__pydantic_validator__\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidate_python\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mself_instance\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\n", + "\u001b[0;31mValidationError\u001b[0m: 1 validation error for EvaluationLineage\noutputs\n Input should be a valid dictionary or instance of ExampleOutput [type=model_type, input_value=[Example ID=example-id\nRe...run-id\nOutput=\"output\"\n], input_type=list]\n For further information visit https://errors.pydantic.dev/2.6/v/model_type" + ] + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | example | \n", + "output | \n", + "
---|---|---|
0 | \n", + "Example ID = example-id\\nInput = input0\\nExpec... | \n", + "Example ID=example-id\\nRelated Run ID=run-id\\n... | \n", + "