From 164349bf2695caddb826ed0c8091ef5328d6661e Mon Sep 17 00:00:00 2001
From: Edgar Ruiz <edgararuiz@gmail.com>
Date: Mon, 14 Oct 2024 16:24:06 -0500
Subject: [PATCH 01/13] Fixes output validation

---
 python/mall/llm.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/python/mall/llm.py b/python/mall/llm.py
index 4ed8929..690d3a5 100644
--- a/python/mall/llm.py
+++ b/python/mall/llm.py
@@ -74,10 +74,11 @@ def llm_call(x, msg, use, preview=False, valid_resps="", convert=None, data_type
             if out == label:
                 out = convert.get(label)
 
-    # out = data_type(out)
+    if data_type == int:
+        out = data_type(out)
 
-    # if out not in valid_resps:
-    #     out = None
+    if out not in valid_resps and len(valid_resps) > 0:
+        out = None
 
     return out
 

From 87ca00d32a7082ae902ad9bdb49cc601114f1d6b Mon Sep 17 00:00:00 2001
From: Edgar Ruiz <edgararuiz@gmail.com>
Date: Mon, 14 Oct 2024 16:29:09 -0500
Subject: [PATCH 02/13] Updates example for sentiment

---
 _freeze/reference/MallFrame/execute-results/html.json | 4 ++--
 python/mall/polars.py                                 | 2 +-
 reference/MallFrame.qmd                               | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/_freeze/reference/MallFrame/execute-results/html.json b/_freeze/reference/MallFrame/execute-results/html.json
index f7bb025..8f027ec 100644
--- a/_freeze/reference/MallFrame/execute-results/html.json
+++ b/_freeze/reference/MallFrame/execute-results/html.json
@@ -1,8 +1,8 @@
 {
-  "hash": "ab2b83a620205221658b2e724e51e73e",
+  "hash": "b719238e79aa68d0ccd5c863f83a82ef",
   "result": {
     "engine": "jupyter",
-    "markdown": "---\ntitle: MallFrame\n---\n\n\n\n`MallFrame(self, df)`\n\nExtension to Polars that add ability to use\nan LLM to run batch predictions over a data frame\n\nWe will start by loading the needed libraries, and\nset up the data frame that will be used in the\nexamples:\n\n\n::: {#e0baad23 .cell execution_count=1}\n``` {.python .cell-code}\nimport mall\nimport polars as pl\npl.Config(fmt_str_lengths=100)\npl.Config.set_tbl_hide_dataframe_shape(True)\npl.Config.set_tbl_hide_column_data_types(True)\ndata = mall.MallData\nreviews = data.reviews\nreviews.llm.use(options = dict(seed = 100))\n```\n:::\n\n\n## Methods\n\n| Name | Description |\n| --- | --- |\n| [classify](#mall.MallFrame.classify) | Classify text into specific categories. |\n| [custom](#mall.MallFrame.custom) | Provide the full prompt that the LLM will process. |\n| [extract](#mall.MallFrame.extract) | Pull a specific label from the text. |\n| [sentiment](#mall.MallFrame.sentiment) | Use an LLM to run a sentiment analysis |\n| [summarize](#mall.MallFrame.summarize) | Summarize the text down to a specific number of words. |\n| [translate](#mall.MallFrame.translate) | Translate text into another language. |\n| [use](#mall.MallFrame.use) | Define the model, backend, and other options to use to |\n| [verify](#mall.MallFrame.verify) | Check to see if something is true about the text. |\n\n### classify { #mall.MallFrame.classify }\n\n`MallFrame.classify(col, labels='', additional='', pred_name='classify')`\n\nClassify text into specific categories.\n\n#### Parameters\n\n| Name         | Type   | Description                                                                                                             | Default      |\n|--------------|--------|-------------------------------------------------------------------------------------------------------------------------|--------------|\n| `col`        | str    | The name of the text field to process                                                                                   | _required_   |\n| `labels`     | list   | A list or a DICT object that defines the categories to classify the text as. It will return one of the provided labels. | `''`         |\n| `pred_name`  | str    | A character vector with the name of the new column where the prediction will be placed                                  | `'classify'` |\n| `additional` | str    | Inserts this text into the prompt sent to the LLM                                                                       | `''`         |\n\n#### Examples\n\n::: {#c433ce08 .cell execution_count=2}\n``` {.python .cell-code}\nreviews.llm.classify(\"review\", [\"appliance\", \"computer\"])\n```\n\n::: {.cell-output .cell-output-display execution_count=2}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>classify</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;computer&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;computer&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;appliance&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#cda91b85 .cell execution_count=3}\n``` {.python .cell-code}\n# Use 'pred_name' to customize the new column's name\nreviews.llm.classify(\"review\", [\"appliance\", \"computer\"], pred_name=\"prod_type\")\n```\n\n::: {.cell-output .cell-output-display execution_count=3}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>prod_type</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;computer&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;computer&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;appliance&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#f6d7e2c6 .cell execution_count=4}\n``` {.python .cell-code}\n#Pass a DICT to set custom values for each classification\nreviews.llm.classify(\"review\", {\"appliance\" : \"1\", \"computer\" : \"2\"})\n```\n\n::: {.cell-output .cell-output-display execution_count=4}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>classify</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;1&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;2&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;1&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n### custom { #mall.MallFrame.custom }\n\n`MallFrame.custom(col, prompt='', valid_resps='', pred_name='custom')`\n\nProvide the full prompt that the LLM will process.\n\n#### Parameters\n\n| Name        | Type   | Description                                                                            | Default    |\n|-------------|--------|----------------------------------------------------------------------------------------|------------|\n| `col`       | str    | The name of the text field to process                                                  | _required_ |\n| `prompt`    | str    | The prompt to send to the LLM along with the `col`                                     | `''`       |\n| `pred_name` | str    | A character vector with the name of the new column where the prediction will be placed | `'custom'` |\n\n#### Examples\n\n::: {#2c633a89 .cell execution_count=5}\n``` {.python .cell-code}\nmy_prompt = (\n    \"Answer a question.\"\n    \"Return only the answer, no explanation\"\n    \"Acceptable answers are 'yes', 'no'\"\n    \"Answer this about the following text, is this a happy customer?:\"\n)\n\nreviews.llm.custom(\"review\", prompt = my_prompt)\n```\n\n::: {.cell-output .cell-output-display execution_count=5}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>custom</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;Yes&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;No&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;No&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n### extract { #mall.MallFrame.extract }\n\n`MallFrame.extract(col, labels='', expand_cols=False, additional='', pred_name='extract')`\n\nPull a specific label from the text.\n\n#### Parameters\n\n| Name         | Type   | Description                                                                            | Default     |\n|--------------|--------|----------------------------------------------------------------------------------------|-------------|\n| `col`        | str    | The name of the text field to process                                                  | _required_  |\n| `labels`     | list   | A list or a DICT object that defines tells the LLM what to look for and return         | `''`        |\n| `pred_name`  | str    | A character vector with the name of the new column where the prediction will be placed | `'extract'` |\n| `additional` | str    | Inserts this text into the prompt sent to the LLM                                      | `''`        |\n\n#### Examples\n\n::: {#11a96b13 .cell execution_count=6}\n``` {.python .cell-code}\n# Use 'labels' to let the function know what to extract\nreviews.llm.extract(\"review\", labels = \"product\")\n```\n\n::: {.cell-output .cell-output-display execution_count=6}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>extract</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;tv&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;laptop&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;washing machine&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#33a564f6 .cell execution_count=7}\n``` {.python .cell-code}\n# Use 'pred_name' to customize the new column's name\nreviews.llm.extract(\"review\", \"product\", pred_name = \"prod\")\n```\n\n::: {.cell-output .cell-output-display execution_count=7}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>prod</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;tv&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;laptop&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;washing machine&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#29bc70bf .cell execution_count=8}\n``` {.python .cell-code}\n# Pass a vector to request multiple things, the results will be pipe delimeted\n# in a single column\nreviews.llm.extract(\"review\", [\"product\", \"feelings\"])\n```\n\n::: {.cell-output .cell-output-display execution_count=8}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>extract</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;tv | great&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;laptop|frustration&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;washing machine | confusion&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#35587a7e .cell execution_count=9}\n``` {.python .cell-code}\n# Set 'expand_cols' to True to split multiple lables\n# into individual columns\nreviews.llm.extract(\n    col=\"review\",\n    labels=[\"product\", \"feelings\"],\n    expand_cols=True\n    )\n```\n\n::: {.cell-output .cell-output-display execution_count=9}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>product</th><th>feelings</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;tv &quot;</td><td>&quot; great&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;laptop&quot;</td><td>&quot;frustration&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;washing machine &quot;</td><td>&quot; confusion&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#bc1572b9 .cell execution_count=10}\n``` {.python .cell-code}\n# Set custom names to the resulting columns\nreviews.llm.extract(\n    col=\"review\",\n    labels={\"prod\": \"product\", \"feels\": \"feelings\"},\n    expand_cols=True\n    )\n```\n\n::: {.cell-output .cell-output-display execution_count=10}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>prod</th><th>feels</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;tv &quot;</td><td>&quot; great&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;laptop&quot;</td><td>&quot;frustration&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;washing machine &quot;</td><td>&quot; confusion&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n### sentiment { #mall.MallFrame.sentiment }\n\n`MallFrame.sentiment(col, options=['positive', 'negative', 'neutral'], additional='', pred_name='sentiment')`\n\nUse an LLM to run a sentiment analysis\n\n#### Parameters\n\n| Name         | Type         | Description                                                                            | Default                               |\n|--------------|--------------|----------------------------------------------------------------------------------------|---------------------------------------|\n| `col`        | str          | The name of the text field to process                                                  | _required_                            |\n| `options`    | list or dict | A list of the sentiment options to use, or a named DICT object                         | `['positive', 'negative', 'neutral']` |\n| `pred_name`  | str          | A character vector with the name of the new column where the prediction will be placed | `'sentiment'`                         |\n| `additional` | str          | Inserts this text into the prompt sent to the LLM                                      | `''`                                  |\n\n#### Examples\n\n::: {#16b56226 .cell execution_count=11}\n``` {.python .cell-code}\nreviews.llm.sentiment(\"review\")\n```\n\n::: {.cell-output .cell-output-display execution_count=11}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>sentiment</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;positive&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;negative&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;neutral&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#082d1ef7 .cell execution_count=12}\n``` {.python .cell-code}\n# Use 'pred_name' to customize the new column's name\nreviews.llm.sentiment(\"review\", pred_name=\"review_sentiment\")\n```\n\n::: {.cell-output .cell-output-display execution_count=12}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>review_sentiment</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;positive&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;negative&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;neutral&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#0f2f7a13 .cell execution_count=13}\n``` {.python .cell-code}\n# Pass custom sentiment options\nreviews.llm.sentiment(\"review\", [\"positive\", \"negative\"])\n```\n\n::: {.cell-output .cell-output-display execution_count=13}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>sentiment</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;positive&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;negative&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;negative&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#7bb697be .cell execution_count=14}\n``` {.python .cell-code}\n# Use a DICT object to specify values to return per sentiment\nreviews.llm.sentiment(\"review\", {\"positive\" : \"1\", \"negative\" : \"0\"})\n```\n\n::: {.cell-output .cell-output-display execution_count=14}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>sentiment</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;1&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;0&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;0&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n### summarize { #mall.MallFrame.summarize }\n\n`MallFrame.summarize(col, max_words=10, additional='', pred_name='summary')`\n\nSummarize the text down to a specific number of words.\n\n#### Parameters\n\n| Name         | Type   | Description                                                                            | Default     |\n|--------------|--------|----------------------------------------------------------------------------------------|-------------|\n| `col`        | str    | The name of the text field to process                                                  | _required_  |\n| `max_words`  | int    | Maximum number of words to use for the summary                                         | `10`        |\n| `pred_name`  | str    | A character vector with the name of the new column where the prediction will be placed | `'summary'` |\n| `additional` | str    | Inserts this text into the prompt sent to the LLM                                      | `''`        |\n\n#### Examples\n\n::: {#2690ac20 .cell execution_count=15}\n``` {.python .cell-code}\n# Use max_words to set the maximum number of words to use for the summary\nreviews.llm.summarize(\"review\", max_words = 5)\n```\n\n::: {.cell-output .cell-output-display execution_count=15}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>summary</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;great tv with good features&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;laptop purchase was a mistake&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;feeling uncertain about new purchase&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#62f13bf2 .cell execution_count=16}\n``` {.python .cell-code}\n# Use 'pred_name' to customize the new column's name\nreviews.llm.summarize(\"review\", 5, pred_name = \"review_summary\")\n```\n\n::: {.cell-output .cell-output-display execution_count=16}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>review_summary</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;great tv with good features&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;laptop purchase was a mistake&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;feeling uncertain about new purchase&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n### translate { #mall.MallFrame.translate }\n\n`MallFrame.translate(col, language='', additional='', pred_name='translation')`\n\nTranslate text into another language.\n\n#### Parameters\n\n| Name         | Type   | Description                                                                            | Default         |\n|--------------|--------|----------------------------------------------------------------------------------------|-----------------|\n| `col`        | str    | The name of the text field to process                                                  | _required_      |\n| `language`   | str    | The target language to translate to. For example 'French'.                             | `''`            |\n| `pred_name`  | str    | A character vector with the name of the new column where the prediction will be placed | `'translation'` |\n| `additional` | str    | Inserts this text into the prompt sent to the LLM                                      | `''`            |\n\n#### Examples\n\n::: {#a4d7ae95 .cell execution_count=17}\n``` {.python .cell-code}\nreviews.llm.translate(\"review\", \"spanish\")\n```\n\n::: {.cell-output .cell-output-display execution_count=17}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>translation</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;Esta ha sido la mejor televisión que he utilizado hasta ahora. Gran pantalla y sonido.&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;Me arrepiento de comprar este portátil. Es demasiado lento y la tecla es demasiado ruidosa.&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;No estoy seguro de cómo sentirme con mi nueva lavadora. Un color maravilloso, pero muy difícil de en…</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#df4fb9ee .cell execution_count=18}\n``` {.python .cell-code}\nreviews.llm.translate(\"review\", \"french\")\n```\n\n::: {.cell-output .cell-output-display execution_count=18}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>translation</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;Ceci était la meilleure télévision que j&#x27;ai jamais utilisée. Écran et son excellent.&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;Je me regrette d&#x27;avoir acheté ce portable. Il est trop lent et le clavier fait trop de bruit.&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;Je ne sais pas comment réagir à mon nouveau lave-linge. Couleur superbe, mais difficile à comprendre…</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n### use { #mall.MallFrame.use }\n\n`MallFrame.use(backend='', model='', _cache='_mall_cache', **kwargs)`\n\nDefine the model, backend, and other options to use to\ninteract with the LLM.\n\n#### Parameters\n\n| Name       | Type   | Description                                                                                                                                            | Default         |\n|------------|--------|--------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|\n| `backend`  | str    | The name of the backend to use. At the beginning of the session it defaults to \"ollama\". If passing `\"\"`, it will remain unchanged                     | `''`            |\n| `model`    | str    | The name of the model tha the backend should use. At the beginning of the session it defaults to \"llama3.2\". If passing `\"\"`, it will remain unchanged | `''`            |\n| `_cache`   | str    | The path of where to save the cached results. Passing `\"\"` disables the cache                                                                          | `'_mall_cache'` |\n| `**kwargs` |        | Arguments to pass to the downstream Python call. In this case, the `chat` function in `ollama`                                                         | `{}`            |\n\n#### Examples\n\n::: {#8ac89991 .cell execution_count=19}\n``` {.python .cell-code}\n# Additional arguments will be passed 'as-is' to the\n# downstream R function in this example, to ollama::chat()\nreviews.llm.use(\"ollama\", \"llama3.2\", seed = 100, temp = 0.1)\n```\n\n::: {.cell-output .cell-output-display execution_count=19}\n```\n{'backend': 'ollama',\n 'model': 'llama3.2',\n '_cache': '_mall_cache',\n 'options': {'seed': 100},\n 'seed': 100,\n 'temp': 0.1}\n```\n:::\n:::\n\n\n::: {#ee435769 .cell execution_count=20}\n``` {.python .cell-code}\n# During the Python session, you can change any argument\n# individually and it will retain all of previous\n# arguments used\nreviews.llm.use(temp = 0.3)\n```\n\n::: {.cell-output .cell-output-display execution_count=20}\n```\n{'backend': 'ollama',\n 'model': 'llama3.2',\n '_cache': '_mall_cache',\n 'options': {'seed': 100},\n 'seed': 100,\n 'temp': 0.3}\n```\n:::\n:::\n\n\n::: {#266e2cb3 .cell execution_count=21}\n``` {.python .cell-code}\n# Use _cache to modify the target folder for caching\nreviews.llm.use(_cache = \"_my_cache\")\n```\n\n::: {.cell-output .cell-output-display execution_count=21}\n```\n{'backend': 'ollama',\n 'model': 'llama3.2',\n '_cache': '_my_cache',\n 'options': {'seed': 100},\n 'seed': 100,\n 'temp': 0.3}\n```\n:::\n:::\n\n\n::: {#ab07df94 .cell execution_count=22}\n``` {.python .cell-code}\n# Leave _cache empty to turn off this functionality\nreviews.llm.use(_cache = \"\")\n```\n\n::: {.cell-output .cell-output-display execution_count=22}\n```\n{'backend': 'ollama',\n 'model': 'llama3.2',\n '_cache': '',\n 'options': {'seed': 100},\n 'seed': 100,\n 'temp': 0.3}\n```\n:::\n:::\n\n\n### verify { #mall.MallFrame.verify }\n\n`MallFrame.verify(col, what='', yes_no=[1, 0], additional='', pred_name='verify')`\n\nCheck to see if something is true about the text.\n\n#### Parameters\n\n| Name         | Type   | Description                                                                                                                                                                  | Default    |\n|--------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------|\n| `col`        | str    | The name of the text field to process                                                                                                                                        | _required_ |\n| `what`       | str    | The statement or question that needs to be verified against the provided text                                                                                                | `''`       |\n| `yes_no`     | list   | A positional list of size 2, which contains the values to return if true and false. The first position will be used as the 'true' value, and the second as the 'false' value | `[1, 0]`   |\n| `pred_name`  | str    | A character vector with the name of the new column where the prediction will be placed                                                                                       | `'verify'` |\n| `additional` | str    | Inserts this text into the prompt sent to the LLM                                                                                                                            | `''`       |\n\n#### Examples\n\n::: {#3f2cbfdf .cell execution_count=23}\n``` {.python .cell-code}\nreviews.llm.verify(\"review\", \"is the customer happy\")\n```\n\n::: {.cell-output .cell-output-display execution_count=23}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>verify</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>1</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>0</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>0</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#4899b7b6 .cell execution_count=24}\n``` {.python .cell-code}\n# Use 'yes_no' to modify the 'true' and 'false' values to return\nreviews.llm.verify(\"review\", \"is the customer happy\", [\"y\", \"n\"])\n```\n\n::: {.cell-output .cell-output-display execution_count=24}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>verify</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;y&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;n&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;n&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n",
+    "markdown": "---\ntitle: MallFrame\n---\n\n\n\n`MallFrame(self, df)`\n\nExtension to Polars that add ability to use\nan LLM to run batch predictions over a data frame\n\nWe will start by loading the needed libraries, and\nset up the data frame that will be used in the\nexamples:\n\n\n::: {#e255dd1e .cell execution_count=1}\n``` {.python .cell-code}\nimport mall\nimport polars as pl\npl.Config(fmt_str_lengths=100)\npl.Config.set_tbl_hide_dataframe_shape(True)\npl.Config.set_tbl_hide_column_data_types(True)\ndata = mall.MallData\nreviews = data.reviews\nreviews.llm.use(options = dict(seed = 100))\n```\n:::\n\n\n## Methods\n\n| Name | Description |\n| --- | --- |\n| [classify](#mall.MallFrame.classify) | Classify text into specific categories. |\n| [custom](#mall.MallFrame.custom) | Provide the full prompt that the LLM will process. |\n| [extract](#mall.MallFrame.extract) | Pull a specific label from the text. |\n| [sentiment](#mall.MallFrame.sentiment) | Use an LLM to run a sentiment analysis |\n| [summarize](#mall.MallFrame.summarize) | Summarize the text down to a specific number of words. |\n| [translate](#mall.MallFrame.translate) | Translate text into another language. |\n| [use](#mall.MallFrame.use) | Define the model, backend, and other options to use to |\n| [verify](#mall.MallFrame.verify) | Check to see if something is true about the text. |\n\n### classify { #mall.MallFrame.classify }\n\n`MallFrame.classify(col, labels='', additional='', pred_name='classify')`\n\nClassify text into specific categories.\n\n#### Parameters\n\n| Name         | Type   | Description                                                                                                             | Default      |\n|--------------|--------|-------------------------------------------------------------------------------------------------------------------------|--------------|\n| `col`        | str    | The name of the text field to process                                                                                   | _required_   |\n| `labels`     | list   | A list or a DICT object that defines the categories to classify the text as. It will return one of the provided labels. | `''`         |\n| `pred_name`  | str    | A character vector with the name of the new column where the prediction will be placed                                  | `'classify'` |\n| `additional` | str    | Inserts this text into the prompt sent to the LLM                                                                       | `''`         |\n\n#### Examples\n\n::: {#7d8996a1 .cell execution_count=2}\n``` {.python .cell-code}\nreviews.llm.classify(\"review\", [\"appliance\", \"computer\"])\n```\n\n::: {.cell-output .cell-output-display execution_count=2}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>classify</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;computer&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;computer&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;appliance&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#b192fa51 .cell execution_count=3}\n``` {.python .cell-code}\n# Use 'pred_name' to customize the new column's name\nreviews.llm.classify(\"review\", [\"appliance\", \"computer\"], pred_name=\"prod_type\")\n```\n\n::: {.cell-output .cell-output-display execution_count=3}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>prod_type</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;computer&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;computer&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;appliance&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#fd7ef1d2 .cell execution_count=4}\n``` {.python .cell-code}\n#Pass a DICT to set custom values for each classification\nreviews.llm.classify(\"review\", {\"appliance\" : \"1\", \"computer\" : \"2\"})\n```\n\n::: {.cell-output .cell-output-display execution_count=4}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>classify</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;1&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;2&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;1&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n### custom { #mall.MallFrame.custom }\n\n`MallFrame.custom(col, prompt='', valid_resps='', pred_name='custom')`\n\nProvide the full prompt that the LLM will process.\n\n#### Parameters\n\n| Name        | Type   | Description                                                                            | Default    |\n|-------------|--------|----------------------------------------------------------------------------------------|------------|\n| `col`       | str    | The name of the text field to process                                                  | _required_ |\n| `prompt`    | str    | The prompt to send to the LLM along with the `col`                                     | `''`       |\n| `pred_name` | str    | A character vector with the name of the new column where the prediction will be placed | `'custom'` |\n\n#### Examples\n\n::: {#dd97345e .cell execution_count=5}\n``` {.python .cell-code}\nmy_prompt = (\n    \"Answer a question.\"\n    \"Return only the answer, no explanation\"\n    \"Acceptable answers are 'yes', 'no'\"\n    \"Answer this about the following text, is this a happy customer?:\"\n)\n\nreviews.llm.custom(\"review\", prompt = my_prompt)\n```\n\n::: {.cell-output .cell-output-display execution_count=5}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>custom</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;Yes&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;No&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;No&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n### extract { #mall.MallFrame.extract }\n\n`MallFrame.extract(col, labels='', expand_cols=False, additional='', pred_name='extract')`\n\nPull a specific label from the text.\n\n#### Parameters\n\n| Name         | Type   | Description                                                                            | Default     |\n|--------------|--------|----------------------------------------------------------------------------------------|-------------|\n| `col`        | str    | The name of the text field to process                                                  | _required_  |\n| `labels`     | list   | A list or a DICT object that defines tells the LLM what to look for and return         | `''`        |\n| `pred_name`  | str    | A character vector with the name of the new column where the prediction will be placed | `'extract'` |\n| `additional` | str    | Inserts this text into the prompt sent to the LLM                                      | `''`        |\n\n#### Examples\n\n::: {#d0507daf .cell execution_count=6}\n``` {.python .cell-code}\n# Use 'labels' to let the function know what to extract\nreviews.llm.extract(\"review\", labels = \"product\")\n```\n\n::: {.cell-output .cell-output-display execution_count=6}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>extract</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;tv&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;laptop&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;washing machine&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#2d24b4f1 .cell execution_count=7}\n``` {.python .cell-code}\n# Use 'pred_name' to customize the new column's name\nreviews.llm.extract(\"review\", \"product\", pred_name = \"prod\")\n```\n\n::: {.cell-output .cell-output-display execution_count=7}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>prod</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;tv&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;laptop&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;washing machine&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#45fd5ff8 .cell execution_count=8}\n``` {.python .cell-code}\n# Pass a vector to request multiple things, the results will be pipe delimeted\n# in a single column\nreviews.llm.extract(\"review\", [\"product\", \"feelings\"])\n```\n\n::: {.cell-output .cell-output-display execution_count=8}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>extract</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;tv | great&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;laptop|frustration&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;washing machine | confusion&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#ad729125 .cell execution_count=9}\n``` {.python .cell-code}\n# Set 'expand_cols' to True to split multiple lables\n# into individual columns\nreviews.llm.extract(\n    col=\"review\",\n    labels=[\"product\", \"feelings\"],\n    expand_cols=True\n    )\n```\n\n::: {.cell-output .cell-output-display execution_count=9}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>product</th><th>feelings</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;tv &quot;</td><td>&quot; great&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;laptop&quot;</td><td>&quot;frustration&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;washing machine &quot;</td><td>&quot; confusion&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#f510b410 .cell execution_count=10}\n``` {.python .cell-code}\n# Set custom names to the resulting columns\nreviews.llm.extract(\n    col=\"review\",\n    labels={\"prod\": \"product\", \"feels\": \"feelings\"},\n    expand_cols=True\n    )\n```\n\n::: {.cell-output .cell-output-display execution_count=10}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>prod</th><th>feels</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;tv &quot;</td><td>&quot; great&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;laptop&quot;</td><td>&quot;frustration&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;washing machine &quot;</td><td>&quot; confusion&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n### sentiment { #mall.MallFrame.sentiment }\n\n`MallFrame.sentiment(col, options=['positive', 'negative', 'neutral'], additional='', pred_name='sentiment')`\n\nUse an LLM to run a sentiment analysis\n\n#### Parameters\n\n| Name         | Type         | Description                                                                            | Default                               |\n|--------------|--------------|----------------------------------------------------------------------------------------|---------------------------------------|\n| `col`        | str          | The name of the text field to process                                                  | _required_                            |\n| `options`    | list or dict | A list of the sentiment options to use, or a named DICT object                         | `['positive', 'negative', 'neutral']` |\n| `pred_name`  | str          | A character vector with the name of the new column where the prediction will be placed | `'sentiment'`                         |\n| `additional` | str          | Inserts this text into the prompt sent to the LLM                                      | `''`                                  |\n\n#### Examples\n\n::: {#e1a8bc00 .cell execution_count=11}\n``` {.python .cell-code}\nreviews.llm.sentiment(\"review\")\n```\n\n::: {.cell-output .cell-output-display execution_count=11}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>sentiment</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;positive&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;negative&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;neutral&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#bf76c32c .cell execution_count=12}\n``` {.python .cell-code}\n# Use 'pred_name' to customize the new column's name\nreviews.llm.sentiment(\"review\", pred_name=\"review_sentiment\")\n```\n\n::: {.cell-output .cell-output-display execution_count=12}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>review_sentiment</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;positive&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;negative&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;neutral&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#817144f6 .cell execution_count=13}\n``` {.python .cell-code}\n# Pass custom sentiment options\nreviews.llm.sentiment(\"review\", [\"positive\", \"negative\"])\n```\n\n::: {.cell-output .cell-output-display execution_count=13}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>sentiment</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;positive&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;negative&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;negative&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#139c8892 .cell execution_count=14}\n``` {.python .cell-code}\n# Use a DICT object to specify values to return per sentiment\nreviews.llm.sentiment(\"review\", {\"positive\" : 1, \"negative\" : 0})\n```\n\n::: {.cell-output .cell-output-display execution_count=14}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>sentiment</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>1</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>0</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>0</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n### summarize { #mall.MallFrame.summarize }\n\n`MallFrame.summarize(col, max_words=10, additional='', pred_name='summary')`\n\nSummarize the text down to a specific number of words.\n\n#### Parameters\n\n| Name         | Type   | Description                                                                            | Default     |\n|--------------|--------|----------------------------------------------------------------------------------------|-------------|\n| `col`        | str    | The name of the text field to process                                                  | _required_  |\n| `max_words`  | int    | Maximum number of words to use for the summary                                         | `10`        |\n| `pred_name`  | str    | A character vector with the name of the new column where the prediction will be placed | `'summary'` |\n| `additional` | str    | Inserts this text into the prompt sent to the LLM                                      | `''`        |\n\n#### Examples\n\n::: {#96f10751 .cell execution_count=15}\n``` {.python .cell-code}\n# Use max_words to set the maximum number of words to use for the summary\nreviews.llm.summarize(\"review\", max_words = 5)\n```\n\n::: {.cell-output .cell-output-display execution_count=15}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>summary</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;great tv with good features&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;laptop purchase was a mistake&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;feeling uncertain about new purchase&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#1e180aa2 .cell execution_count=16}\n``` {.python .cell-code}\n# Use 'pred_name' to customize the new column's name\nreviews.llm.summarize(\"review\", 5, pred_name = \"review_summary\")\n```\n\n::: {.cell-output .cell-output-display execution_count=16}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>review_summary</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;great tv with good features&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;laptop purchase was a mistake&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;feeling uncertain about new purchase&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n### translate { #mall.MallFrame.translate }\n\n`MallFrame.translate(col, language='', additional='', pred_name='translation')`\n\nTranslate text into another language.\n\n#### Parameters\n\n| Name         | Type   | Description                                                                            | Default         |\n|--------------|--------|----------------------------------------------------------------------------------------|-----------------|\n| `col`        | str    | The name of the text field to process                                                  | _required_      |\n| `language`   | str    | The target language to translate to. For example 'French'.                             | `''`            |\n| `pred_name`  | str    | A character vector with the name of the new column where the prediction will be placed | `'translation'` |\n| `additional` | str    | Inserts this text into the prompt sent to the LLM                                      | `''`            |\n\n#### Examples\n\n::: {#67b462bb .cell execution_count=17}\n``` {.python .cell-code}\nreviews.llm.translate(\"review\", \"spanish\")\n```\n\n::: {.cell-output .cell-output-display execution_count=17}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>translation</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;Esta ha sido la mejor televisión que he utilizado hasta ahora. Gran pantalla y sonido.&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;Me arrepiento de comprar este portátil. Es demasiado lento y la tecla es demasiado ruidosa.&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;No estoy seguro de cómo sentirme con mi nueva lavadora. Un color maravilloso, pero muy difícil de en…</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#f0529322 .cell execution_count=18}\n``` {.python .cell-code}\nreviews.llm.translate(\"review\", \"french\")\n```\n\n::: {.cell-output .cell-output-display execution_count=18}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>translation</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;Ceci était la meilleure télévision que j&#x27;ai jamais utilisée. Écran et son excellent.&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;Je me regrette d&#x27;avoir acheté ce portable. Il est trop lent et le clavier fait trop de bruit.&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;Je ne sais pas comment réagir à mon nouveau lave-linge. Couleur superbe, mais difficile à comprendre…</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n### use { #mall.MallFrame.use }\n\n`MallFrame.use(backend='', model='', _cache='_mall_cache', **kwargs)`\n\nDefine the model, backend, and other options to use to\ninteract with the LLM.\n\n#### Parameters\n\n| Name       | Type   | Description                                                                                                                                            | Default         |\n|------------|--------|--------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------|\n| `backend`  | str    | The name of the backend to use. At the beginning of the session it defaults to \"ollama\". If passing `\"\"`, it will remain unchanged                     | `''`            |\n| `model`    | str    | The name of the model tha the backend should use. At the beginning of the session it defaults to \"llama3.2\". If passing `\"\"`, it will remain unchanged | `''`            |\n| `_cache`   | str    | The path of where to save the cached results. Passing `\"\"` disables the cache                                                                          | `'_mall_cache'` |\n| `**kwargs` |        | Arguments to pass to the downstream Python call. In this case, the `chat` function in `ollama`                                                         | `{}`            |\n\n#### Examples\n\n::: {#f669b934 .cell execution_count=19}\n``` {.python .cell-code}\n# Additional arguments will be passed 'as-is' to the\n# downstream R function in this example, to ollama::chat()\nreviews.llm.use(\"ollama\", \"llama3.2\", seed = 100, temp = 0.1)\n```\n\n::: {.cell-output .cell-output-display execution_count=19}\n```\n{'backend': 'ollama',\n 'model': 'llama3.2',\n '_cache': '_mall_cache',\n 'options': {'seed': 100},\n 'seed': 100,\n 'temp': 0.1}\n```\n:::\n:::\n\n\n::: {#6a6296ad .cell execution_count=20}\n``` {.python .cell-code}\n# During the Python session, you can change any argument\n# individually and it will retain all of previous\n# arguments used\nreviews.llm.use(temp = 0.3)\n```\n\n::: {.cell-output .cell-output-display execution_count=20}\n```\n{'backend': 'ollama',\n 'model': 'llama3.2',\n '_cache': '_mall_cache',\n 'options': {'seed': 100},\n 'seed': 100,\n 'temp': 0.3}\n```\n:::\n:::\n\n\n::: {#725f81c4 .cell execution_count=21}\n``` {.python .cell-code}\n# Use _cache to modify the target folder for caching\nreviews.llm.use(_cache = \"_my_cache\")\n```\n\n::: {.cell-output .cell-output-display execution_count=21}\n```\n{'backend': 'ollama',\n 'model': 'llama3.2',\n '_cache': '_my_cache',\n 'options': {'seed': 100},\n 'seed': 100,\n 'temp': 0.3}\n```\n:::\n:::\n\n\n::: {#c14035f2 .cell execution_count=22}\n``` {.python .cell-code}\n# Leave _cache empty to turn off this functionality\nreviews.llm.use(_cache = \"\")\n```\n\n::: {.cell-output .cell-output-display execution_count=22}\n```\n{'backend': 'ollama',\n 'model': 'llama3.2',\n '_cache': '',\n 'options': {'seed': 100},\n 'seed': 100,\n 'temp': 0.3}\n```\n:::\n:::\n\n\n### verify { #mall.MallFrame.verify }\n\n`MallFrame.verify(col, what='', yes_no=[1, 0], additional='', pred_name='verify')`\n\nCheck to see if something is true about the text.\n\n#### Parameters\n\n| Name         | Type   | Description                                                                                                                                                                  | Default    |\n|--------------|--------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------|\n| `col`        | str    | The name of the text field to process                                                                                                                                        | _required_ |\n| `what`       | str    | The statement or question that needs to be verified against the provided text                                                                                                | `''`       |\n| `yes_no`     | list   | A positional list of size 2, which contains the values to return if true and false. The first position will be used as the 'true' value, and the second as the 'false' value | `[1, 0]`   |\n| `pred_name`  | str    | A character vector with the name of the new column where the prediction will be placed                                                                                       | `'verify'` |\n| `additional` | str    | Inserts this text into the prompt sent to the LLM                                                                                                                            | `''`       |\n\n#### Examples\n\n::: {#51fbc07d .cell execution_count=23}\n``` {.python .cell-code}\nreviews.llm.verify(\"review\", \"is the customer happy\")\n```\n\n::: {.cell-output .cell-output-display execution_count=23}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>verify</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>1</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>0</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>0</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n::: {#dbe707bf .cell execution_count=24}\n``` {.python .cell-code}\n# Use 'yes_no' to modify the 'true' and 'false' values to return\nreviews.llm.verify(\"review\", \"is the customer happy\", [\"y\", \"n\"])\n```\n\n::: {.cell-output .cell-output-display execution_count=24}\n```{=html}\n<div><style>\n.dataframe > thead > tr,\n.dataframe > tbody > tr {\n  text-align: right;\n  white-space: pre-wrap;\n}\n</style>\n<table border=\"1\" class=\"dataframe\"><thead><tr><th>review</th><th>verify</th></tr></thead><tbody><tr><td>&quot;This has been the best TV I&#x27;ve ever used. Great screen, and sound.&quot;</td><td>&quot;y&quot;</td></tr><tr><td>&quot;I regret buying this laptop. It is too slow and the keyboard is too noisy&quot;</td><td>&quot;n&quot;</td></tr><tr><td>&quot;Not sure how to feel about my new washing machine. Great color, but hard to figure&quot;</td><td>&quot;n&quot;</td></tr></tbody></table></div>\n```\n:::\n:::\n\n\n",
     "supporting": [
       "MallFrame_files"
     ],
diff --git a/python/mall/polars.py b/python/mall/polars.py
index 4a7cfd9..d31c849 100644
--- a/python/mall/polars.py
+++ b/python/mall/polars.py
@@ -137,7 +137,7 @@ def sentiment(
 
         ```{python}
         # Use a DICT object to specify values to return per sentiment
-        reviews.llm.sentiment("review", {"positive" : "1", "negative" : "0"})
+        reviews.llm.sentiment("review", {"positive" : 1, "negative" : 0})
         ```
 
         """
diff --git a/reference/MallFrame.qmd b/reference/MallFrame.qmd
index e11b8b3..2da1411 100644
--- a/reference/MallFrame.qmd
+++ b/reference/MallFrame.qmd
@@ -177,7 +177,7 @@ reviews.llm.sentiment("review", ["positive", "negative"])
 
 ```{python}
 # Use a DICT object to specify values to return per sentiment
-reviews.llm.sentiment("review", {"positive" : "1", "negative" : "0"})
+reviews.llm.sentiment("review", {"positive" : 1, "negative" : 0})
 ```
 
 ### summarize { #mall.MallFrame.summarize }

From 05dd90d3d41f0011a219e5f7d2ef5edab0c61399 Mon Sep 17 00:00:00 2001
From: Edgar Ruiz <edgararuiz@gmail.com>
Date: Mon, 14 Oct 2024 17:04:54 -0500
Subject: [PATCH 03/13] Adds support for a "test" backend, adds backend to
 hashed call

---
 python/mall/llm.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/python/mall/llm.py b/python/mall/llm.py
index 690d3a5..1dd7dda 100644
--- a/python/mall/llm.py
+++ b/python/mall/llm.py
@@ -41,7 +41,9 @@ def map_call(df, col, msg, pred_name, use, valid_resps="", convert=None):
 
 def llm_call(x, msg, use, preview=False, valid_resps="", convert=None, data_type=None):
 
+    backend = use.get("backend")
     call = dict(
+        backend=backend,
         model=use.get("model"),
         messages=build_msg(x, msg),
         options=use.get("options"),
@@ -56,12 +58,15 @@ def llm_call(x, msg, use, preview=False, valid_resps="", convert=None, data_type
         cache = cache_check(hash_call, use)
 
     if cache == "":
-        resp = ollama.chat(
-            model=use.get("model"),
-            messages=build_msg(x, msg),
-            options=use.get("options"),
-        )
-        out = resp["message"]["content"]
+        if backend == "ollama":
+            resp = ollama.chat(
+                model=use.get("model"),
+                messages=build_msg(x, msg),
+                options=use.get("options"),
+            )
+            out = resp["message"]["content"]
+        if backend == "test":
+            out = x
     else:
         out = cache
 

From f228201581b7cae8925404207f39dc9cfe178589 Mon Sep 17 00:00:00 2001
From: Edgar Ruiz <edgararuiz@gmail.com>
Date: Tue, 15 Oct 2024 10:33:12 -0500
Subject: [PATCH 04/13] First set of tests

---
 python/tests/__init__.py |  1 +
 python/tests/test_use.py | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)
 create mode 100644 python/tests/__init__.py
 create mode 100644 python/tests/test_use.py

diff --git a/python/tests/__init__.py b/python/tests/__init__.py
new file mode 100644
index 0000000..570e4df
--- /dev/null
+++ b/python/tests/__init__.py
@@ -0,0 +1 @@
+"Unit tests for mall"
\ No newline at end of file
diff --git a/python/tests/test_use.py b/python/tests/test_use.py
new file mode 100644
index 0000000..bec400d
--- /dev/null
+++ b/python/tests/test_use.py
@@ -0,0 +1,32 @@
+import pytest
+import mall
+import polars
+
+def test_use_init():
+    data = mall.MallData
+    reviews = data.reviews
+    x = reviews.llm.use()
+    x == dict(backend = "ollama", model = "llama3.2", _cache = "_mall_cache")
+    
+def test_use_mod1():
+    data = mall.MallData
+    reviews = data.reviews
+    x = reviews.llm.use(options = dict(seed = 100))
+    x == dict(
+        backend = "ollama", 
+        model = "llama3.2", 
+        _cache = "_mall_cache",
+         options = dict(seed = 100)
+         )
+
+def test_use_mod2():
+    data = mall.MallData
+    reviews = data.reviews
+    x = reviews.llm.use(options = dict(seed = 99))
+    x == dict(
+        backend = "ollama", 
+        model = "llama3.2", 
+        _cache = "_mall_cache",
+         options = dict(seed = 99)
+         )    
+    

From be939ef56c0e7036778a044d7798922a169068d5 Mon Sep 17 00:00:00 2001
From: Edgar Ruiz <edgararuiz@gmail.com>
Date: Tue, 15 Oct 2024 11:17:36 -0500
Subject: [PATCH 05/13] Starts tests for sentiment

---
 python/tests/test_sentiment.py | 15 +++++++++++++++
 python/tests/test_use.py       | 26 +++++++++++---------------
 2 files changed, 26 insertions(+), 15 deletions(-)
 create mode 100644 python/tests/test_sentiment.py

diff --git a/python/tests/test_sentiment.py b/python/tests/test_sentiment.py
new file mode 100644
index 0000000..28f7cd4
--- /dev/null
+++ b/python/tests/test_sentiment.py
@@ -0,0 +1,15 @@
+import pytest
+import mall
+import polars
+import pyarrow
+
+
+def test_use_init():
+    data = mall.MallData
+    reviews = data.reviews
+    reviews.llm.use("test")
+    x = reviews.llm.sentiment("review")
+    assert (
+        x.select("sentiment").to_pandas().to_string()
+        == "  sentiment\n0      None\n1      None\n2      None"
+    )
diff --git a/python/tests/test_use.py b/python/tests/test_use.py
index bec400d..90795c1 100644
--- a/python/tests/test_use.py
+++ b/python/tests/test_use.py
@@ -2,31 +2,27 @@
 import mall
 import polars
 
+
 def test_use_init():
     data = mall.MallData
     reviews = data.reviews
     x = reviews.llm.use()
-    x == dict(backend = "ollama", model = "llama3.2", _cache = "_mall_cache")
-    
+    x == dict(backend="ollama", model="llama3.2", _cache="_mall_cache")
+
+
 def test_use_mod1():
     data = mall.MallData
     reviews = data.reviews
-    x = reviews.llm.use(options = dict(seed = 100))
+    x = reviews.llm.use(options=dict(seed=100))
     x == dict(
-        backend = "ollama", 
-        model = "llama3.2", 
-        _cache = "_mall_cache",
-         options = dict(seed = 100)
-         )
+        backend="ollama", model="llama3.2", _cache="_mall_cache", options=dict(seed=100)
+    )
+
 
 def test_use_mod2():
     data = mall.MallData
     reviews = data.reviews
-    x = reviews.llm.use(options = dict(seed = 99))
+    x = reviews.llm.use(options=dict(seed=99))
     x == dict(
-        backend = "ollama", 
-        model = "llama3.2", 
-        _cache = "_mall_cache",
-         options = dict(seed = 99)
-         )    
-    
+        backend="ollama", model="llama3.2", _cache="_mall_cache", options=dict(seed=99)
+    )

From a3e66cf693c2b19aa87e8f66960faa9d376b7e16 Mon Sep 17 00:00:00 2001
From: Edgar Ruiz <edgararuiz@gmail.com>
Date: Tue, 15 Oct 2024 12:25:06 -0500
Subject: [PATCH 06/13] finishes sentiment tests

---
 python/tests/test_sentiment.py | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/python/tests/test_sentiment.py b/python/tests/test_sentiment.py
index 28f7cd4..7d15ead 100644
--- a/python/tests/test_sentiment.py
+++ b/python/tests/test_sentiment.py
@@ -1,10 +1,10 @@
 import pytest
 import mall
-import polars
+import polars as pl
 import pyarrow
 
 
-def test_use_init():
+def test_sentiment_simple():
     data = mall.MallData
     reviews = data.reviews
     reviews.llm.use("test")
@@ -13,3 +13,27 @@ def test_use_init():
         x.select("sentiment").to_pandas().to_string()
         == "  sentiment\n0      None\n1      None\n2      None"
     )
+
+
+def sim_sentiment():
+    df = pl.DataFrame(dict(x=["positive", "negative", "neutral", "not-real"]))
+    df.llm.use("test")
+    return df
+
+
+def test_sentiment_valid():
+    x = sim_sentiment()
+    x = x.llm.sentiment("x")
+    assert (
+        x.select("sentiment").to_pandas().to_string()
+        == "  sentiment\n0  positive\n1  negative\n2   neutral\n3      None"
+    )
+
+
+def test_sentiment_valid2():
+    x = sim_sentiment()
+    x = x.llm.sentiment("x", ["positive", "negative"])
+    assert (
+        x.select("sentiment").to_pandas().to_string()
+        == "  sentiment\n0  positive\n1  negative\n2      None\n3      None"
+    )

From 40abc3ba0f0a83b2aad44ca0c27f4667bcce95a1 Mon Sep 17 00:00:00 2001
From: Edgar Ruiz <edgararuiz@gmail.com>
Date: Tue, 15 Oct 2024 13:32:44 -0500
Subject: [PATCH 07/13] Adds more complex test backend, and translate test

---
 python/.coverage               | Bin 0 -> 53248 bytes
 python/mall/llm.py             |  10 ++++++++--
 python/tests/test_translate.py |  14 ++++++++++++++
 3 files changed, 22 insertions(+), 2 deletions(-)
 create mode 100644 python/.coverage
 create mode 100644 python/tests/test_translate.py

diff --git a/python/.coverage b/python/.coverage
new file mode 100644
index 0000000000000000000000000000000000000000..ef29685db8273743762ed31090dbf93f35c3b81a
GIT binary patch
literal 53248
zcmeI)&2QUe90zbaPMRid_Y9^flxpj9fv#F#mW_dU8!OPm#LzYvAcREbBu`V1*iLO{
zYbBs`L#hNq;s7^5;*7)}zyUZ8Zk#zV4zL|SfHaWCD#7pPmnPYoDV30>mamoAj{Q9L
z^L(DSI4^$w)Nw0Na?bT^GmwY0otmy|N2Sy>El1BGdL~nrR?^7}TIy}<-Bxqjl=H=q
zcu^bddruRu4xJE_gC7muKk(h4oqK2CldPGm&<;!xfB*y_@c$NAIW?Fq3=iuU-U!Tc
zT?L+5QC>73dg{5^nbWiK^vsjTXJxca?#oM>CMV^L^xX5Zp*%Tf)s?iIs#P%q%c;p=
zUL~QMzN+#-N84zq<A76Pd|oSARf-j;8XcnHS+?md$=B4<L>M4GTc8$$XagNYSv7}4
z$bE72ap|c!<teA4{3sRstm=4v{Ni#xTiCTrUsYkAOs_(ZfsWkJhE`UN&C!7>u2<#M
zmp#*|%qxFFn(H~NxRlPtAj+-h+S7q6>y{H{)A6mqaviA_RizoI>V_GdV6<c+y!PDC
zvCeRY&O|ZebG7NTB5x2g4jxCIqr^FG!rF?+r>4kZnw_RyCi1gQyTOG+F`^R(Znv1-
z;s$P<*`CX0gR*Spx1%@{axT#*R@1Q-nrgl9CuA$;M3@c7osgUfssZ;6^c5c7rE63x
z)uO*pr}tYWvl+PI>k<`ysYtWIx9{rD7Dh+))rGL4Fi)v$1^)U<uA|bdZ|+iccz^eL
z!>y#Vl5i`k*QGlbHW`jBqr#!u+}>uU6AaVih9=!OAf@z<);3Nor9qbKZn?9rg*C=k
z)Ig{&NP^8ZoeDP<pYu?0&neHWsi}Bx)GgfUibPeXnCmOdj&`d$>s6psTwl$v4EJRV
zBP05STv(0b(ux<^jw+Ot<ObbnvddPc-}I7><aV885~I~iCT%3%L)n?k_7!GEx@9L0
zS1QIU{VUmYwy=A*z7n+v?w9zP>Bt0cX*GeH27=y`-!OxDd28BvGbMa1(^q(OcejLd
z5=up$_g#D?6()IgIMlbP)90trk!0S|O7b&mY4V&|r{1JtS{^l7>Vu;ZKytvUNrBC>
z+YG|`%P+&~$zx9|z0m<$uM>L;H68}1=6%xjeT#-w9=7~PlC5%V-t@T_^2cP^b?eG>
zqOpNu9P{a%mM_CJ+&U^f(K?2|#K)+e8w=Bv3@+TwpQc8av@cq*ZPTg7;}s9X(NU@I
z=f=tBi8>LAD95N#`qV^kI%aavcrW)@$?>NeWWq<a&1%%#xlr1Ucz#~pa=2(TdfE(B
zVA)D?kT5Y}#X4{Lvf?S)Nn`2wm~p#mz@=3x@|>@d`}wWh%BY?#?BB1iG~>n?^)n@(
z26fv^k{!QHbySj@cDCxv3F>g^-A;QwVY@aCei@yCJGZC=%aKA7Dt@n%ALk2vUEI*<
zfe8W-fB*y_009U<00Izz00bZaf$b+?=qWwR*Z(Q;qb7c%872ro00Izz00bZa0SG_<
z0uX=z1h%HYV9MAdlAm}S5V|o^h<^m|(A0xdhw_xGl(?#itK!eCnL(r)0uX=z1Rwwb
z2tWV=5P$##AOL|xV9?m3C!YeO^2SIm{t$q_|Ih6BTNB@i4@5~!(kdnhKmY;|fB*y_
z009U<00Izzz%~*nW(>`me!-`k3{0zP&Ge>EdhS_O3H)i>tk<U-OToPBMC;*=2^vcW
z(uP*+F-pB|)9nZj8HQ%}7^6XVzi9*<DYc16Zr${JigZ{vw1pneR4Q4H6_iR8jlciT
z?4bYuf4k#`m=Yg~Gux<$krfC)00Izz00bZa0SG_<0uX?JrX4gay~hjY`1(InG-|y@
ziLU?C4;Xf@G1jmDjm<<#uK!bqjfGr~*=f7}$Nm4%6d(Wr2tWV=5P$##AOHafKmY>U
zTEL)R3=Hx2|GHSy=z$3W5P$##AOHafKmY;|fB*y_0D)~NU>HNW@c;kfO-)=EUx-h|
zDy?FI00bZa0SG_<0uX=z1Rwwb2teRJ7071NdTwXN>%S*WD+kj@mf!v7vuk7DfBkvx
z<s;YE)}Gb&77W^R!1!|cZteGv)0(97AJybqHuZRlcD$a-zi()lQ=F=|zIu!o+VW32
zFP7+e`K7(Qs$I&43!X{A^Gn%`p3dcV+#CM?U;Lnn8{(SyRs2k=m>>WF2tWV=5P$##
zAOHafKmY;|*jfU8^cjJk$%Kn^I$Rh=xJaea^Z`Kl{lB(zYh?<lg#ZK~009U<00Izz
z00bZa0SG`~a{~PRKd%2bXNJ%afB*y_009U<00Izz00bZafvqMG{r~?8|Nj3k@rU?D
vT-j<lKzbnn0SG_<0uX=z1Rwwb2tWV=5Qqhg4BhMBNb}6#S&C<T|Nnmh`1uX|

literal 0
HcmV?d00001

diff --git a/python/mall/llm.py b/python/mall/llm.py
index 1dd7dda..2191ac8 100644
--- a/python/mall/llm.py
+++ b/python/mall/llm.py
@@ -42,9 +42,10 @@ def map_call(df, col, msg, pred_name, use, valid_resps="", convert=None):
 def llm_call(x, msg, use, preview=False, valid_resps="", convert=None, data_type=None):
 
     backend = use.get("backend")
+    model=use.get("model")
     call = dict(
         backend=backend,
-        model=use.get("model"),
+        model=model,
         messages=build_msg(x, msg),
         options=use.get("options"),
     )
@@ -54,6 +55,7 @@ def llm_call(x, msg, use, preview=False, valid_resps="", convert=None, data_type
 
     cache = ""
     if use.get("_cache") != "":
+
         hash_call = build_hash(call)
         cache = cache_check(hash_call, use)
 
@@ -66,7 +68,11 @@ def llm_call(x, msg, use, preview=False, valid_resps="", convert=None, data_type
             )
             out = resp["message"]["content"]
         if backend == "test":
-            out = x
+            if model=="echo":
+                out = x
+            if model=="content":
+                out = msg[0]["content"]
+                return(out)
     else:
         out = cache
 
diff --git a/python/tests/test_translate.py b/python/tests/test_translate.py
new file mode 100644
index 0000000..768fab2
--- /dev/null
+++ b/python/tests/test_translate.py
@@ -0,0 +1,14 @@
+import pytest
+import mall
+import polars as pl
+import pyarrow
+
+
+def test_translate():
+    df = pl.DataFrame(dict(x="x"))
+    df.llm.use("test", "content")
+    x = df.llm.translate("x", language="spanish")
+    assert (
+        x["translation"][0]
+        == "You are a helpful translation engine. You will return only the translation text, no explanations. The target language to translate to is: spanish.   The answer is the translation of the following text:\n{}"
+    )

From 1b6d92e93863f3191d88d63b9c68e5e0cc1549cb Mon Sep 17 00:00:00 2001
From: Edgar Ruiz <edgararuiz@gmail.com>
Date: Tue, 15 Oct 2024 13:56:15 -0500
Subject: [PATCH 08/13] Adds summarize tests

---
 .gitignore                     |   1 +
 python/.coverage               | Bin 53248 -> 53248 bytes
 python/tests/test_sentiment.py |  14 ++++++++++++--
 python/tests/test_summarize.py |  23 +++++++++++++++++++++++
 python/tests/test_translate.py |   2 +-
 5 files changed, 37 insertions(+), 3 deletions(-)
 create mode 100644 python/tests/test_summarize.py

diff --git a/.gitignore b/.gitignore
index c825b24..d22d76c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -50,3 +50,4 @@ rsconnect/
 docs/
 
 python/mall/src/
+python/assets/style.css
diff --git a/python/.coverage b/python/.coverage
index ef29685db8273743762ed31090dbf93f35c3b81a..0dd860d1d6a5c4195982a9db3610ceec90f82520 100644
GIT binary patch
delta 740
zcmZuuO=uHA6rSDLpPA8}7e#HfB?*dDP#Y6!^w3s8&_l5~D1xWSLUUMXqTSsdluB~Y
zixsNu*(3)&34%n?LlMP;KL=@UqK8~WQS?%BDGk)wZG=7a^4{Zp-^}~I_v$TNZ{hpH
z3VSeePGQeVkIp2SwTj>a+=WY!;=lMyew&}?#9D3dN<6pp0>j7)HfIj*^UI#^Wy2^I
zyz<!f8xs_xS_qBAAm6pC-kj}61TrA(j}g3uO2u}an;{`*2K|}5Vg%V@PSFFqIId_i
z9<{E`*{&xECox9za>Qst2&d}!LWFbGZRUFAs_#@}kc5IV(0qJrZtN{X2r;XJ;4;6%
zUqPDxhL5nsFK}vgq~RA9K83v?Cl2k6N#3%{CUGr9hGR(FE!_r>n<aVd$oEIctnJ%U
zr5TKgob+B2<I;Oc9NYQdwN}9MAroT4@!<FPuH&B70`_Ql3_X(9?ukSu$zU46I{bm}
z@EGoa3l0dl0@KoI7HrBih3;XWf4&Ek$}&M4K!_}MlLn+sGi;eINgA@q8&kEV%_qIl
zZ*QK`t5fU!{za4;(j{_2e_dO<wf0a$8M)iDDAPC9NlgNaTJUyxNUe+D>vH<Efn|&1
z13OfilWL<mQEV*gibg3{qocqIf(_`wU-$uC_yQgH1n*!)N^L>VZR#n;LLmyJQ6eNt
RrWqx}Xm^{3Gi|Nahkw5B-~j*t

delta 417
zcmX|7JxD@P6h7~rd(mfjkGYf;`3Rz+k~G`Og64d45a=d?l1fVuKDUMjjaOr2&=!Q{
z)=(`C1&vJ&wv+`01rqoJbzh;&Irsa%bNJ5v3S}&mu{5k`yCchrb{0Qg6kN1`ZfTEV
zG%Z@ye$T7mL5`5LipH4bF}PKUIvGPqbO~oFl@4S!XVKD)Nmp(?o!PLVDq)x@2-)tS
zWg2~67XWb42D%e%3eY~qrN^6KAxtX0!AjJ@c@Psl#_{jLEPrfD>sp{hVXn?$R_kp_
zi}N_s1*bevlP>6pwka<yQncpaOwl|%=Nx465n>T@=jWs`@pyfztwx%i&I*hUpj2k?
z()7aT0Rvqgy99d2p)ROW*iv03geO(9P`H}68<@|E-K?9U-^UfEX=3Q_i&O(@Q-eO}
pg=*6Oo~pIOar>~kEqv1#y;EHZ59C9cuN!5iZbZANc9F@se*t)IY~ug`

diff --git a/python/tests/test_sentiment.py b/python/tests/test_sentiment.py
index 7d15ead..04ab2c5 100644
--- a/python/tests/test_sentiment.py
+++ b/python/tests/test_sentiment.py
@@ -7,7 +7,7 @@
 def test_sentiment_simple():
     data = mall.MallData
     reviews = data.reviews
-    reviews.llm.use("test")
+    reviews.llm.use("test", "echo")
     x = reviews.llm.sentiment("review")
     assert (
         x.select("sentiment").to_pandas().to_string()
@@ -17,7 +17,7 @@ def test_sentiment_simple():
 
 def sim_sentiment():
     df = pl.DataFrame(dict(x=["positive", "negative", "neutral", "not-real"]))
-    df.llm.use("test")
+    df.llm.use("test", "echo")
     return df
 
 
@@ -37,3 +37,13 @@ def test_sentiment_valid2():
         x.select("sentiment").to_pandas().to_string()
         == "  sentiment\n0  positive\n1  negative\n2      None\n3      None"
     )
+
+
+def test_sentiment_prompt():
+    df = pl.DataFrame(dict(x="x"))
+    df.llm.use("test", "content")
+    x = df.llm.sentiment("x")
+    assert (
+        x["sentiment"][0]
+        == "You are a helpful sentiment engine. Return only one of the following answers: positive, negative, neutral . No capitalization. No explanations.  The answer is based on the following text:\n{}"
+    )
diff --git a/python/tests/test_summarize.py b/python/tests/test_summarize.py
new file mode 100644
index 0000000..36bdb37
--- /dev/null
+++ b/python/tests/test_summarize.py
@@ -0,0 +1,23 @@
+import pytest
+import mall
+import polars as pl
+import pyarrow
+
+
+def test_summarize_prompt():
+    df = pl.DataFrame(dict(x="x"))
+    df.llm.use("test", "content")
+    x = df.llm.summarize("x")
+    assert (
+        x["summary"][0]
+        == 'You are a helpful summarization engine. Your answer will contain no no capitalization and no explanations. Return no more than 10 words.   The answer is the summary of the following text:\n{}'
+    )
+
+def test_summarize_max():
+    df = pl.DataFrame(dict(x="x"))
+    df.llm.use("test", "content")
+    x = df.llm.summarize("x", max_words=5)
+    assert (
+        x["summary"][0]
+        == 'You are a helpful summarization engine. Your answer will contain no no capitalization and no explanations. Return no more than 5 words.   The answer is the summary of the following text:\n{}'
+    )
diff --git a/python/tests/test_translate.py b/python/tests/test_translate.py
index 768fab2..f7bc592 100644
--- a/python/tests/test_translate.py
+++ b/python/tests/test_translate.py
@@ -4,7 +4,7 @@
 import pyarrow
 
 
-def test_translate():
+def test_translate_prompt():
     df = pl.DataFrame(dict(x="x"))
     df.llm.use("test", "content")
     x = df.llm.translate("x", language="spanish")

From 9abafd50b068e1ce84b015c71c9655f92d38e63e Mon Sep 17 00:00:00 2001
From: Edgar Ruiz <edgararuiz@gmail.com>
Date: Tue, 15 Oct 2024 14:14:28 -0500
Subject: [PATCH 09/13] Adds summarize tests

---
 python/.coverage               | Bin 53248 -> 53248 bytes
 python/tests/test_extract.py   |  34 +++++++++++++++++++++++++++++++++
 python/tests/test_summarize.py |   5 +++--
 3 files changed, 37 insertions(+), 2 deletions(-)
 create mode 100644 python/tests/test_extract.py

diff --git a/python/.coverage b/python/.coverage
index 0dd860d1d6a5c4195982a9db3610ceec90f82520..58530c92a58ccf6c779976014d5f9c83255b8683 100644
GIT binary patch
delta 413
zcmYk0JuE{}6vy9tx37KoUGM&pCZvlpH4%cLf|4c-8cDlgLqs}=MYKMeN*II%9iGP4
zcC$)eI*^(SN|4xWObosj1~Jqr26wr?lXL#(ch(DRy}%A#VV|(}6qK=zG5F<^Jd&Go
zSen+Qwdd5;E?Ig|FePDUVr|A^BHliq3YqqHti$e&S}G^%v{O-`g$%-wvk`TxdauTm
zbTxqZN^)U(CP|^7`nx1xf5bq_tb%d5C6Cc8pK*>I8Ii)eva+OHw(OgUZe8@VHMP2h
zOf|asSG8AdI{&MBoQ+A3>gr?ZpUHndEyxm_(gEjV3VxqJSivV=@qh#DB8fRnViY3?
zlduJ?T)0&371xhv@R)?PW)21?sOLE}34^H7$mK(^+^RjB>C*4tPAU<*km(t`uQUXp
z&?|e~jK(DMagBbjM!^AHb#X2=lllTh1z&i_3mtKXTioCRrzoLFC+x%HrjROxQ_2r@
Fd;=tJaW4P>

delta 372
zcmX|)F-SsD7=`b9_q}}g*1N}|K^jz=Tr6`igeM3^4WbT3TRAmIOC$|NkV7pMI9ys)
zg7(Ow2DgT+L3~@(P(u_&Z8=0sSCf|WAO3Ux|0`B$u}V*jq>tMzf(G_52fug|7h*@u
z2+MBO2EAvt8RL``q?-;~>xJd9)eVQqW;&r+N+Z(A444xhO7f~ODxWf@Jk8wej@}J7
zRT$I$u0yh&g(A5G^I}(AA|T#zgMBe8nC&Xkxh)SDCrzH9+nRb7H(AexyAUf%^PPPU
zvh-m5#~q9wqXnkoP_3Oq(8rM?_{ImGaf}13A&(pukW!p+_!&1eHq4u^*YGf9y{rt&
zQfA01BOq5wQ8P#gNy?*Ad`g)hl#qs}kE)KKBmKIXKD}IVvgIvPGZ+&(R}GFj!4JOh
Vi5EQL4lZtSg}S0vQL9&`Bfn?rXdeIo

diff --git a/python/tests/test_extract.py b/python/tests/test_extract.py
new file mode 100644
index 0000000..80316d0
--- /dev/null
+++ b/python/tests/test_extract.py
@@ -0,0 +1,34 @@
+import pytest
+import mall
+import polars as pl
+import pyarrow
+
+
+def test_extract_list():
+    df = pl.DataFrame(dict(x="x"))
+    df.llm.use("test", "content")
+    x = df.llm.extract("x", ["a", "b"])
+    assert (
+        x["extract"][0]
+        == "You are a helpful text extraction engine. Extract the a, b being referred to on the text. I expect 2 items exactly. No capitalization. No explanations.  Return the response exclusively in a pipe separated list, and no headers.    The answer is based on the following text:\n{}"
+    )
+
+
+def test_extract_dict():
+    df = pl.DataFrame(dict(x="x"))
+    df.llm.use("test", "content")
+    x = df.llm.extract("x", dict(a="one", b="two"))
+    assert (
+        x["extract"][0]
+        == "You are a helpful text extraction engine. Extract the one, two being referred to on the text. I expect 2 items exactly. No capitalization. No explanations.  Return the response exclusively in a pipe separated list, and no headers.    The answer is based on the following text:\n{}"
+    )
+
+
+def test_extract_one():
+    df = pl.DataFrame(dict(x="x"))
+    df.llm.use("test", "content")
+    x = df.llm.extract("x", labels="a")
+    assert (
+        x["extract"][0]
+        == "You are a helpful text extraction engine. Extract the a being referred to on the text. I expect 1 item exactly. No capitalization. No explanations.     The answer is based on the following text:\n{}"
+    )
diff --git a/python/tests/test_summarize.py b/python/tests/test_summarize.py
index 36bdb37..ed15d05 100644
--- a/python/tests/test_summarize.py
+++ b/python/tests/test_summarize.py
@@ -10,14 +10,15 @@ def test_summarize_prompt():
     x = df.llm.summarize("x")
     assert (
         x["summary"][0]
-        == 'You are a helpful summarization engine. Your answer will contain no no capitalization and no explanations. Return no more than 10 words.   The answer is the summary of the following text:\n{}'
+        == "You are a helpful summarization engine. Your answer will contain no no capitalization and no explanations. Return no more than 10 words.   The answer is the summary of the following text:\n{}"
     )
 
+
 def test_summarize_max():
     df = pl.DataFrame(dict(x="x"))
     df.llm.use("test", "content")
     x = df.llm.summarize("x", max_words=5)
     assert (
         x["summary"][0]
-        == 'You are a helpful summarization engine. Your answer will contain no no capitalization and no explanations. Return no more than 5 words.   The answer is the summary of the following text:\n{}'
+        == "You are a helpful summarization engine. Your answer will contain no no capitalization and no explanations. Return no more than 5 words.   The answer is the summary of the following text:\n{}"
     )

From 0b216ae4b2a429a2642a581479e3e35a345e0664 Mon Sep 17 00:00:00 2001
From: Edgar Ruiz <edgararuiz@gmail.com>
Date: Tue, 15 Oct 2024 14:41:10 -0500
Subject: [PATCH 10/13] Adds cache reset to all test scripts

---
 python/.coverage               | Bin 53248 -> 53248 bytes
 python/tests/test_extract.py   |  10 +++++++---
 python/tests/test_sentiment.py |  12 +++++++++---
 python/tests/test_summarize.py |  10 ++++++++--
 python/tests/test_translate.py |   8 +++++++-
 5 files changed, 31 insertions(+), 9 deletions(-)

diff --git a/python/.coverage b/python/.coverage
index 58530c92a58ccf6c779976014d5f9c83255b8683..0f46580d4f0355558ec6039d5403a0ab2e2e0154 100644
GIT binary patch
delta 95
zcmV-l0HFVXpaX!Q1F!~w7(M_G`48<6<qxJ0jSq7VY7blwQV&TFKC=-JAP<v3j~x#_
z3<v}P2^t9A@ssDtlMj<?j~NM%3t2w-{2a`GlZ}sI3<?4S0SOiY-ah!?1GD*$2SB}i
BAW{GT

delta 113
zcmZozz}&Eac>`O6gaZTrPyToO&-j<~_wkqTXY<GLhwyvzJ8TvdP~x9#*Dq@z&&|QY
z$SJ`wZ@!KHyc%|PAV+|`h(m%6$S`8FtMC55`iuJQ1M4^=?SB6G<I6C)r$3RKlL;s*
P!ZeRx-fr{Hes%`{M<pSq

diff --git a/python/tests/test_extract.py b/python/tests/test_extract.py
index 80316d0..dbd3a40 100644
--- a/python/tests/test_extract.py
+++ b/python/tests/test_extract.py
@@ -3,10 +3,14 @@
 import polars as pl
 import pyarrow
 
+import shutil
+
+shutil.rmtree("_test_cache", ignore_errors=True)
+
 
 def test_extract_list():
     df = pl.DataFrame(dict(x="x"))
-    df.llm.use("test", "content")
+    df.llm.use("test", "content", _cache="_test_cache")
     x = df.llm.extract("x", ["a", "b"])
     assert (
         x["extract"][0]
@@ -16,7 +20,7 @@ def test_extract_list():
 
 def test_extract_dict():
     df = pl.DataFrame(dict(x="x"))
-    df.llm.use("test", "content")
+    df.llm.use("test", "content", _cache="_test_cache")
     x = df.llm.extract("x", dict(a="one", b="two"))
     assert (
         x["extract"][0]
@@ -26,7 +30,7 @@ def test_extract_dict():
 
 def test_extract_one():
     df = pl.DataFrame(dict(x="x"))
-    df.llm.use("test", "content")
+    df.llm.use("test", "content", _cache="_test_cache")
     x = df.llm.extract("x", labels="a")
     assert (
         x["extract"][0]
diff --git a/python/tests/test_sentiment.py b/python/tests/test_sentiment.py
index 04ab2c5..2e3f711 100644
--- a/python/tests/test_sentiment.py
+++ b/python/tests/test_sentiment.py
@@ -3,11 +3,17 @@
 import polars as pl
 import pyarrow
 
+import shutil
+import os
+
+if os._exists("_test_cache"):
+    shutil.rmtree("_test_cache", ignore_errors=True)
+
 
 def test_sentiment_simple():
     data = mall.MallData
     reviews = data.reviews
-    reviews.llm.use("test", "echo")
+    reviews.llm.use("test", "echo", _cache="_test_cache")
     x = reviews.llm.sentiment("review")
     assert (
         x.select("sentiment").to_pandas().to_string()
@@ -17,7 +23,7 @@ def test_sentiment_simple():
 
 def sim_sentiment():
     df = pl.DataFrame(dict(x=["positive", "negative", "neutral", "not-real"]))
-    df.llm.use("test", "echo")
+    df.llm.use("test", "echo", _cache="_test_cache")
     return df
 
 
@@ -41,7 +47,7 @@ def test_sentiment_valid2():
 
 def test_sentiment_prompt():
     df = pl.DataFrame(dict(x="x"))
-    df.llm.use("test", "content")
+    df.llm.use("test", "content", _cache="_test_cache")
     x = df.llm.sentiment("x")
     assert (
         x["sentiment"][0]
diff --git a/python/tests/test_summarize.py b/python/tests/test_summarize.py
index ed15d05..e6b6677 100644
--- a/python/tests/test_summarize.py
+++ b/python/tests/test_summarize.py
@@ -3,10 +3,16 @@
 import polars as pl
 import pyarrow
 
+import shutil
+import os
+
+if os._exists("_test_cache"):
+    shutil.rmtree("_test_cache", ignore_errors=True)
+
 
 def test_summarize_prompt():
     df = pl.DataFrame(dict(x="x"))
-    df.llm.use("test", "content")
+    df.llm.use("test", "content", _cache="_test_cache")
     x = df.llm.summarize("x")
     assert (
         x["summary"][0]
@@ -16,7 +22,7 @@ def test_summarize_prompt():
 
 def test_summarize_max():
     df = pl.DataFrame(dict(x="x"))
-    df.llm.use("test", "content")
+    df.llm.use("test", "content", _cache="_test_cache")
     x = df.llm.summarize("x", max_words=5)
     assert (
         x["summary"][0]
diff --git a/python/tests/test_translate.py b/python/tests/test_translate.py
index f7bc592..5118d88 100644
--- a/python/tests/test_translate.py
+++ b/python/tests/test_translate.py
@@ -3,10 +3,16 @@
 import polars as pl
 import pyarrow
 
+import shutil
+import os
+
+if os._exists("_test_cache"):
+    shutil.rmtree("_test_cache", ignore_errors=True)
+
 
 def test_translate_prompt():
     df = pl.DataFrame(dict(x="x"))
-    df.llm.use("test", "content")
+    df.llm.use("test", "content", _cache="_test_cache")
     x = df.llm.translate("x", language="spanish")
     assert (
         x["translation"][0]

From 8949efe8e4cca1c58b18b461f0458a10b40c1c17 Mon Sep 17 00:00:00 2001
From: Edgar Ruiz <edgararuiz@gmail.com>
Date: Tue, 15 Oct 2024 14:41:52 -0500
Subject: [PATCH 11/13] fixes extract

---
 python/tests/test_extract.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/python/tests/test_extract.py b/python/tests/test_extract.py
index dbd3a40..a320896 100644
--- a/python/tests/test_extract.py
+++ b/python/tests/test_extract.py
@@ -4,13 +4,13 @@
 import pyarrow
 
 import shutil
-
-shutil.rmtree("_test_cache", ignore_errors=True)
-
+import os
+if os._exists("_test_cache"):
+    shutil.rmtree("_test_cache", ignore_errors=True)
 
 def test_extract_list():
     df = pl.DataFrame(dict(x="x"))
-    df.llm.use("test", "content", _cache="_test_cache")
+    df.llm.use("test", "content", _cache = "_test_cache")
     x = df.llm.extract("x", ["a", "b"])
     assert (
         x["extract"][0]
@@ -20,7 +20,7 @@ def test_extract_list():
 
 def test_extract_dict():
     df = pl.DataFrame(dict(x="x"))
-    df.llm.use("test", "content", _cache="_test_cache")
+    df.llm.use("test", "content", _cache = "_test_cache")
     x = df.llm.extract("x", dict(a="one", b="two"))
     assert (
         x["extract"][0]
@@ -30,7 +30,7 @@ def test_extract_dict():
 
 def test_extract_one():
     df = pl.DataFrame(dict(x="x"))
-    df.llm.use("test", "content", _cache="_test_cache")
+    df.llm.use("test", "content", _cache = "_test_cache")
     x = df.llm.extract("x", labels="a")
     assert (
         x["extract"][0]

From c9be4ab921ac0f808ba43cc81d4a66ccf55fa7b1 Mon Sep 17 00:00:00 2001
From: Edgar Ruiz <edgararuiz@gmail.com>
Date: Tue, 15 Oct 2024 14:59:03 -0500
Subject: [PATCH 12/13] Adds verify tests

---
 python/tests/test_verify.py | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 python/tests/test_verify.py

diff --git a/python/tests/test_verify.py b/python/tests/test_verify.py
new file mode 100644
index 0000000..e4c29c1
--- /dev/null
+++ b/python/tests/test_verify.py
@@ -0,0 +1,35 @@
+import pytest
+import mall
+import polars as pl
+import pyarrow
+
+import shutil
+import os
+
+if os._exists("_test_cache"):
+    shutil.rmtree("_test_cache", ignore_errors=True)
+
+
+def sim_verify():
+    df = pl.DataFrame(dict(x=[1,1,0,2]))
+    df.llm.use("test", "echo", _cache="_test_cache")
+    return df
+
+
+def test_verify():
+    x = sim_verify()
+    x = x.llm.verify("x", "this is my question")
+    assert (
+        x.select("verify").to_pandas().to_string()
+        == '   verify\n0     1.0\n1     1.0\n2     0.0\n3     NaN'
+    )
+
+def test_verify_yn():
+    df = pl.DataFrame(dict(x=["y", "n", "y", "x"]))
+    df.llm.use("test", "echo", _cache="_test_cache")    
+    x = df.llm.verify("x", "this is my question", ["y", "n"])
+    assert (
+        x.select("verify").to_pandas().to_string()
+        == '  verify\n0      y\n1      n\n2      y\n3   None'
+    )
+

From 7b12d14d7874222a3bf1351ce44991fc211ed8a4 Mon Sep 17 00:00:00 2001
From: Edgar Ruiz <edgararuiz@gmail.com>
Date: Tue, 15 Oct 2024 16:01:16 -0500
Subject: [PATCH 13/13] Adds classify tests

---
 python/.coverage               | Bin 53248 -> 53248 bytes
 python/tests/test_classify.py  |  29 +++++++++++++++++++++++++++++
 python/tests/test_summarize.py |   1 -
 python/tests/test_verify.py    |  20 +++++++-------------
 4 files changed, 36 insertions(+), 14 deletions(-)
 create mode 100644 python/tests/test_classify.py

diff --git a/python/.coverage b/python/.coverage
index 0f46580d4f0355558ec6039d5403a0ab2e2e0154..5d98eab8e2a9ae2581806f5d24be716372e4cef2 100644
GIT binary patch
delta 625
zcmY+9PiPZC6vlUVcCtI!nR!u*mC~j`lmw)b_K?yfm4YWNNiG%@p&}9?AR=89q9&<G
zMNq6VXR)4oP*e;B#eWqKHicAr6ALjEQB+h?S|m0k&Za?hF7Nx^d-L&o<5OsS3O&?^
zyo_}!Fb7xR0Ce-O{4t;4`?$r5Y?2*ho%v0k-xLopq@=}StcRaXj-^hdcb`s&C{k>v
zWuCE<(nl?^5E~T5Sg(l2nM$M!igR(Qg-vli5yrg+Qc`k%yC}!8coct+BN|dNPJ-w7
z$z!(Nid8pa17a-!lC4XD5q_CJ0U!Sc1-QodbIKOj4R(liNw#&Whf!MnAC2`RyV<ff
z(C8q_Ieq|JE!86#DR%#Lk`t2%I<JbGiGFPMp_H@VLLFVwqbSo1{FhV<A)CmpKz{8g
z_`D2!3RK|-e1bc01x`T{4#O}EK?rt%Poma&jxb#%Z7f?U_z86+UHJS4OiRkmR%sa#
zx6^cIE3XVnr0gK2W&c~@?{=HgZ8b~P@+B|~sXfM+xy>a-pKDL<Y2)sye`)qX<Vd|*
zs}*zJb}T1B{35rKS-Ewg@$u*LM&Z|v8fl=tI#$U=m$)~%bEH0RD9u-Hv<JXZb53LS
zr3=aL9CZ*));29$`LYG8Iw4$h%Q=H-1=e8|zROcTz&lukd3X)aU<U5Pv^;bIxI?Bg
N)-06fXKoJ%{{RDvyJ7$U

delta 473
zcmXAkK}b|V7{}+md2i>vnSJxOkc=f_J+&8Q^{|1tkB%*|8w3eL=BZ03@u3l0>|Uxv
zipD6NGEZH)1j`<bw4g()Xy7dnbT8$hA`}HhVg24ZhyVPT??3$bs*Ak3$UizaQ!1Ea
zXkZHWkk)_nl769YX&I~sbF~W{lYyI4oJFR)*yBw^4|~U+3`+C5n74e&tZ5Yk%WG4<
zl&n23B`kNq=Bym|O}Ql9oOS*?V-D|tT=NV=`lbGaZv7XlnAQcYf{kFFT#o~OokwPK
zq*t7ql$jmLTavPmL`hP{)D}Gu7QeyEak;Irb3F1%nyDR*BArqQ8IJJ}zwjQ@i0}ya
zaTf)I<VmCAE64IX)sNlp5R$M%F2M5%F8T`ylO9pZ+uk#~s|G^E1u^bjP7w4b!pf`G
zhl6WdUuV>Cczk+gy>hk7rAgLZsq8=7f9tU3|K(PJrB8*OcWCg`k+YmNZAPd1WsuS=
z&2$!7L&PtPHCtU7#Ly54PJPW&(x&^8y&WDkxobJTuWd;Mg*w9t4rsC8*uhV1Vh!K$
O8FgCg4Qlmwx3g#2QG^}<

diff --git a/python/tests/test_classify.py b/python/tests/test_classify.py
new file mode 100644
index 0000000..01a41be
--- /dev/null
+++ b/python/tests/test_classify.py
@@ -0,0 +1,29 @@
+import pytest
+import mall
+import polars as pl
+import pyarrow
+import shutil
+import os
+
+if os._exists("_test_cache"):
+    shutil.rmtree("_test_cache", ignore_errors=True)
+
+
+def test_classify():
+    df = pl.DataFrame(dict(x=["one", "two", "three"]))
+    df.llm.use("test", "echo", _cache="_test_cache")
+    x = df.llm.classify("x", ["one", "two"])
+    assert (
+        x.select("classify").to_pandas().to_string()
+        == "  classify\n0      one\n1      two\n2     None"
+    )
+
+
+def test_classify_dict():
+    df = pl.DataFrame(dict(x=[1, 2, 3]))
+    df.llm.use("test", "echo", _cache="_test_cache")
+    x = df.llm.classify("x", {"one": 1, "two": 2})
+    assert (
+        x.select("classify").to_pandas().to_string()
+        == "   classify\n0       1.0\n1       2.0\n2       NaN"
+    )
diff --git a/python/tests/test_summarize.py b/python/tests/test_summarize.py
index e6b6677..e2182d4 100644
--- a/python/tests/test_summarize.py
+++ b/python/tests/test_summarize.py
@@ -2,7 +2,6 @@
 import mall
 import polars as pl
 import pyarrow
-
 import shutil
 import os
 
diff --git a/python/tests/test_verify.py b/python/tests/test_verify.py
index e4c29c1..58421e7 100644
--- a/python/tests/test_verify.py
+++ b/python/tests/test_verify.py
@@ -2,7 +2,6 @@
 import mall
 import polars as pl
 import pyarrow
-
 import shutil
 import os
 
@@ -10,26 +9,21 @@
     shutil.rmtree("_test_cache", ignore_errors=True)
 
 
-def sim_verify():
-    df = pl.DataFrame(dict(x=[1,1,0,2]))
-    df.llm.use("test", "echo", _cache="_test_cache")
-    return df
-
-
 def test_verify():
-    x = sim_verify()
-    x = x.llm.verify("x", "this is my question")
+    df = pl.DataFrame(dict(x=[1, 1, 0, 2]))
+    df.llm.use("test", "echo", _cache="_test_cache")
+    x = df.llm.verify("x", "this is my question")
     assert (
         x.select("verify").to_pandas().to_string()
-        == '   verify\n0     1.0\n1     1.0\n2     0.0\n3     NaN'
+        == "   verify\n0     1.0\n1     1.0\n2     0.0\n3     NaN"
     )
 
+
 def test_verify_yn():
     df = pl.DataFrame(dict(x=["y", "n", "y", "x"]))
-    df.llm.use("test", "echo", _cache="_test_cache")    
+    df.llm.use("test", "echo", _cache="_test_cache")
     x = df.llm.verify("x", "this is my question", ["y", "n"])
     assert (
         x.select("verify").to_pandas().to_string()
-        == '  verify\n0      y\n1      n\n2      y\n3   None'
+        == "  verify\n0      y\n1      n\n2      y\n3   None"
     )
-