Merge pull request #27 from mlverse/updates

Adds tests
mlverse · Oct 15, 2024 · ea0abc0 · ea0abc0
2 parents 94105c4 + 7b12d14
commit ea0abc0
Show file tree

Hide file tree

Showing 14 changed files with 256 additions and 14 deletions.
diff --git a/.gitignore b/.gitignore
@@ -50,3 +50,4 @@ rsconnect/
 docs/
 
 python/mall/src/
+python/assets/style.css
diff --git a/_freeze/reference/MallFrame/execute-results/html.json b/_freeze/reference/MallFrame/execute-results/html.json
diff --git a/python/.coverage b/python/.coverage
diff --git a/python/mall/llm.py b/python/mall/llm.py
@@ -41,8 +41,11 @@ def map_call(df, col, msg, pred_name, use, valid_resps="", convert=None):
 
 def llm_call(x, msg, use, preview=False, valid_resps="", convert=None, data_type=None):
 
+    backend = use.get("backend")
+    model=use.get("model")
     call = dict(
-        model=use.get("model"),
+        backend=backend,
+        model=model,
         messages=build_msg(x, msg),
         options=use.get("options"),
     )
@@ -52,16 +55,24 @@ def llm_call(x, msg, use, preview=False, valid_resps="", convert=None, data_type
 
     cache = ""
     if use.get("_cache") != "":
+
         hash_call = build_hash(call)
         cache = cache_check(hash_call, use)
 
     if cache == "":
-        resp = ollama.chat(
-            model=use.get("model"),
-            messages=build_msg(x, msg),
-            options=use.get("options"),
-        )
-        out = resp["message"]["content"]
+        if backend == "ollama":
+            resp = ollama.chat(
+                model=use.get("model"),
+                messages=build_msg(x, msg),
+                options=use.get("options"),
+            )
+            out = resp["message"]["content"]
+        if backend == "test":
+            if model=="echo":
+                out = x
+            if model=="content":
+                out = msg[0]["content"]
+                return(out)
     else:
         out = cache
 
@@ -74,10 +85,11 @@ def llm_call(x, msg, use, preview=False, valid_resps="", convert=None, data_type
             if out == label:
                 out = convert.get(label)
 
-    # out = data_type(out)
+    if data_type == int:
+        out = data_type(out)
 
-    # if out not in valid_resps:
-    #     out = None
+    if out not in valid_resps and len(valid_resps) > 0:
+        out = None
 
     return out
 

diff --git a/python/mall/polars.py b/python/mall/polars.py
@@ -137,7 +137,7 @@ def sentiment(
 
         ```{python}
         # Use a DICT object to specify values to return per sentiment
-        reviews.llm.sentiment("review", {"positive" : "1", "negative" : "0"})
+        reviews.llm.sentiment("review", {"positive" : 1, "negative" : 0})
         ```
 
         """

diff --git a/python/tests/__init__.py b/python/tests/__init__.py
@@ -0,0 +1 @@
+"Unit tests for mall"
diff --git a/python/tests/test_classify.py b/python/tests/test_classify.py
@@ -0,0 +1,29 @@
+import pytest
+import mall
+import polars as pl
+import pyarrow
+import shutil
+import os
+
+if os._exists("_test_cache"):
+    shutil.rmtree("_test_cache", ignore_errors=True)
+
+
+def test_classify():
+    df = pl.DataFrame(dict(x=["one", "two", "three"]))
+    df.llm.use("test", "echo", _cache="_test_cache")
+    x = df.llm.classify("x", ["one", "two"])
+    assert (
+        x.select("classify").to_pandas().to_string()
+        == "  classify\n0      one\n1      two\n2     None"
+    )
+
+
+def test_classify_dict():
+    df = pl.DataFrame(dict(x=[1, 2, 3]))
+    df.llm.use("test", "echo", _cache="_test_cache")
+    x = df.llm.classify("x", {"one": 1, "two": 2})
+    assert (
+        x.select("classify").to_pandas().to_string()
+        == "   classify\n0       1.0\n1       2.0\n2       NaN"
+    )
diff --git a/python/tests/test_extract.py b/python/tests/test_extract.py
@@ -0,0 +1,38 @@
+import pytest
+import mall
+import polars as pl
+import pyarrow
+
+import shutil
+import os
+if os._exists("_test_cache"):
+    shutil.rmtree("_test_cache", ignore_errors=True)
+
+def test_extract_list():
+    df = pl.DataFrame(dict(x="x"))
+    df.llm.use("test", "content", _cache = "_test_cache")
+    x = df.llm.extract("x", ["a", "b"])
+    assert (
+        x["extract"][0]
+        == "You are a helpful text extraction engine. Extract the a, b being referred to on the text. I expect 2 items exactly. No capitalization. No explanations.  Return the response exclusively in a pipe separated list, and no headers.    The answer is based on the following text:\n{}"
+    )
+
+
+def test_extract_dict():
+    df = pl.DataFrame(dict(x="x"))
+    df.llm.use("test", "content", _cache = "_test_cache")
+    x = df.llm.extract("x", dict(a="one", b="two"))
+    assert (
+        x["extract"][0]
+        == "You are a helpful text extraction engine. Extract the one, two being referred to on the text. I expect 2 items exactly. No capitalization. No explanations.  Return the response exclusively in a pipe separated list, and no headers.    The answer is based on the following text:\n{}"
+    )
+
+
+def test_extract_one():
+    df = pl.DataFrame(dict(x="x"))
+    df.llm.use("test", "content", _cache = "_test_cache")
+    x = df.llm.extract("x", labels="a")
+    assert (
+        x["extract"][0]
+        == "You are a helpful text extraction engine. Extract the a being referred to on the text. I expect 1 item exactly. No capitalization. No explanations.     The answer is based on the following text:\n{}"
+    )
diff --git a/python/tests/test_sentiment.py b/python/tests/test_sentiment.py
@@ -0,0 +1,55 @@
+import pytest
+import mall
+import polars as pl
+import pyarrow
+
+import shutil
+import os
+
+if os._exists("_test_cache"):
+    shutil.rmtree("_test_cache", ignore_errors=True)
+
+
+def test_sentiment_simple():
+    data = mall.MallData
+    reviews = data.reviews
+    reviews.llm.use("test", "echo", _cache="_test_cache")
+    x = reviews.llm.sentiment("review")
+    assert (
+        x.select("sentiment").to_pandas().to_string()
+        == "  sentiment\n0      None\n1      None\n2      None"
+    )
+
+
+def sim_sentiment():
+    df = pl.DataFrame(dict(x=["positive", "negative", "neutral", "not-real"]))
+    df.llm.use("test", "echo", _cache="_test_cache")
+    return df
+
+
+def test_sentiment_valid():
+    x = sim_sentiment()
+    x = x.llm.sentiment("x")
+    assert (
+        x.select("sentiment").to_pandas().to_string()
+        == "  sentiment\n0  positive\n1  negative\n2   neutral\n3      None"
+    )
+
+
+def test_sentiment_valid2():
+    x = sim_sentiment()
+    x = x.llm.sentiment("x", ["positive", "negative"])
+    assert (
+        x.select("sentiment").to_pandas().to_string()
+        == "  sentiment\n0  positive\n1  negative\n2      None\n3      None"
+    )
+
+
+def test_sentiment_prompt():
+    df = pl.DataFrame(dict(x="x"))
+    df.llm.use("test", "content", _cache="_test_cache")
+    x = df.llm.sentiment("x")
+    assert (
+        x["sentiment"][0]
+        == "You are a helpful sentiment engine. Return only one of the following answers: positive, negative, neutral . No capitalization. No explanations.  The answer is based on the following text:\n{}"
+    )
diff --git a/python/tests/test_summarize.py b/python/tests/test_summarize.py
@@ -0,0 +1,29 @@
+import pytest
+import mall
+import polars as pl
+import pyarrow
+import shutil
+import os
+
+if os._exists("_test_cache"):
+    shutil.rmtree("_test_cache", ignore_errors=True)
+
+
+def test_summarize_prompt():
+    df = pl.DataFrame(dict(x="x"))
+    df.llm.use("test", "content", _cache="_test_cache")
+    x = df.llm.summarize("x")
+    assert (
+        x["summary"][0]
+        == "You are a helpful summarization engine. Your answer will contain no no capitalization and no explanations. Return no more than 10 words.   The answer is the summary of the following text:\n{}"
+    )
+
+
+def test_summarize_max():
+    df = pl.DataFrame(dict(x="x"))
+    df.llm.use("test", "content", _cache="_test_cache")
+    x = df.llm.summarize("x", max_words=5)
+    assert (
+        x["summary"][0]
+        == "You are a helpful summarization engine. Your answer will contain no no capitalization and no explanations. Return no more than 5 words.   The answer is the summary of the following text:\n{}"
+    )
diff --git a/python/tests/test_translate.py b/python/tests/test_translate.py
@@ -0,0 +1,20 @@
+import pytest
+import mall
+import polars as pl
+import pyarrow
+
+import shutil
+import os
+
+if os._exists("_test_cache"):
+    shutil.rmtree("_test_cache", ignore_errors=True)
+
+
+def test_translate_prompt():
+    df = pl.DataFrame(dict(x="x"))
+    df.llm.use("test", "content", _cache="_test_cache")
+    x = df.llm.translate("x", language="spanish")
+    assert (
+        x["translation"][0]
+        == "You are a helpful translation engine. You will return only the translation text, no explanations. The target language to translate to is: spanish.   The answer is the translation of the following text:\n{}"
+    )
diff --git a/python/tests/test_use.py b/python/tests/test_use.py
@@ -0,0 +1,28 @@
+import pytest
+import mall
+import polars
+
+
+def test_use_init():
+    data = mall.MallData
+    reviews = data.reviews
+    x = reviews.llm.use()
+    x == dict(backend="ollama", model="llama3.2", _cache="_mall_cache")
+
+
+def test_use_mod1():
+    data = mall.MallData
+    reviews = data.reviews
+    x = reviews.llm.use(options=dict(seed=100))
+    x == dict(
+        backend="ollama", model="llama3.2", _cache="_mall_cache", options=dict(seed=100)
+    )
+
+
+def test_use_mod2():
+    data = mall.MallData
+    reviews = data.reviews
+    x = reviews.llm.use(options=dict(seed=99))
+    x == dict(
+        backend="ollama", model="llama3.2", _cache="_mall_cache", options=dict(seed=99)
+    )
diff --git a/python/tests/test_verify.py b/python/tests/test_verify.py
@@ -0,0 +1,29 @@
+import pytest
+import mall
+import polars as pl
+import pyarrow
+import shutil
+import os
+
+if os._exists("_test_cache"):
+    shutil.rmtree("_test_cache", ignore_errors=True)
+
+
+def test_verify():
+    df = pl.DataFrame(dict(x=[1, 1, 0, 2]))
+    df.llm.use("test", "echo", _cache="_test_cache")
+    x = df.llm.verify("x", "this is my question")
+    assert (
+        x.select("verify").to_pandas().to_string()
+        == "   verify\n0     1.0\n1     1.0\n2     0.0\n3     NaN"
+    )
+
+
+def test_verify_yn():
+    df = pl.DataFrame(dict(x=["y", "n", "y", "x"]))
+    df.llm.use("test", "echo", _cache="_test_cache")
+    x = df.llm.verify("x", "this is my question", ["y", "n"])
+    assert (
+        x.select("verify").to_pandas().to_string()
+        == "  verify\n0      y\n1      n\n2      y\n3   None"
+    )
diff --git a/reference/MallFrame.qmd b/reference/MallFrame.qmd
@@ -177,7 +177,7 @@ reviews.llm.sentiment("review", ["positive", "negative"])
 
 ```{python}
 # Use a DICT object to specify values to return per sentiment
-reviews.llm.sentiment("review", {"positive" : "1", "negative" : "0"})
+reviews.llm.sentiment("review", {"positive" : 1, "negative" : 0})
 ```
 
 ### summarize { #mall.MallFrame.summarize }