Merge branch 'main' into jojo-branch

CambioML · Oct 3, 2023 · c98fe79 · c98fe79
2 parents c940692 + 4f11a4f
commit c98fe79
Show file tree

Hide file tree

Showing 14 changed files with 214 additions and 170 deletions.
diff --git a/pykoi/application.py b/pykoi/application.py
@@ -1,6 +1,9 @@
 """Application module."""
+import asyncio
 import os
+import re
 import socket
+import subprocess
 import threading
 import time
 
@@ -787,13 +790,38 @@ async def read_item(
         self._telemetry.capture(start_event)
 
         if self._share:
-            public_url = ngrok.connect(self._host + ":" + str(self._port))
-            print("Public URL:", public_url)
+            import nest_asyncio
+
+            nest_asyncio.apply()
+            command = f"ssh -o StrictHostKeyChecking=no -R 80:{self._host}:{self._port} [email protected]"
+            process = subprocess.Popen(
+                command,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                shell=True,
+                text=True,
+            )
+            # Get the public URL without waiting for the process to complete
+            while True:
+                line = process.stdout.readline()
+
+                if not line:
+                    break
+                # return url
+                match = re.search(r"(\bhttp[s]?://[^\s]+)", line)
+                if match:
+                    public_url = match.group(1)
+                    print("Public URL:", public_url)
+                    break
+
+            # The process will continue to run in the background here
             import uvicorn
 
             uvicorn.run(app, host=self._host, port=self._port)
             print("Stopping server...")
-            ngrok.disconnect(public_url)
+
+            # Once done, you may choose to terminate the ssh process
+            process.terminate()
         else:
             import uvicorn
 
@@ -810,6 +838,7 @@ def display(self):
         """
         Run the application.
         """
+        print("hey2")
         import nest_asyncio
 
         nest_asyncio.apply()

diff --git a/pykoi/chat/db/comparator_database.py b/pykoi/chat/db/comparator_database.py
@@ -5,6 +5,9 @@
 
 from typing import List, Tuple
 
+import pandas as pd
+
+
 from pykoi.chat.db.abs_database import AbsDatabase
 from pykoi.chat.db.constants import COMPARATOR_CSV_HEADER
 
@@ -66,9 +69,7 @@ def update(self, **kwargs) -> None:
         """
         Updates the database.
         """
-        raise NotImplementedError(
-            "ComparatorQuestionDatabase does not support update."
-        )
+        raise NotImplementedError("ComparatorQuestionDatabase does not support update.")
 
     def retrieve_all(self) -> List[Tuple]:
         """
@@ -177,9 +178,7 @@ def update(self, **kwargs) -> None:
         """
         with self._lock:
             cursor = self.get_cursor()
-            cursor.execute(
-                query, (kwargs["rank"], kwargs["qid"], kwargs["model"])
-            )
+            cursor.execute(query, (kwargs["rank"], kwargs["qid"], kwargs["model"]))
             self.get_connection().commit()
         if self._debug:
             rows = self.retrieve_all()
@@ -239,6 +238,7 @@ def print_table(self, rows: List[Tuple]) -> None:
                 f"Timestamp: {row[5]}"
             )
 
+
     def save_to_csv(self, csv_file_name="comparator_table"):
         """
         This method saves the contents of the RAG table into a CSV file.
@@ -260,3 +260,36 @@ def save_to_csv(self, csv_file_name="comparator_table"):
             writer = csv.writer(file)
             writer.writerow(COMPARATOR_CSV_HEADER)
             writer.writerows(my_sql_data)
+
+    def retrieve_all_question_answers_as_pandas(self) -> pd.DataFrame:
+        """
+        Retrieves all data by joining the comparator and comparator_question tables as a pandas dataframe.
+
+        Returns:
+            DataFrame: A pandas dataframe.
+        """
+        join_query = """
+        SELECT 
+            comparator.id, 
+            comparator.model, 
+            comparator.qid, 
+            comparator_question.question, 
+            comparator.rank, 
+            comparator.answer, 
+            comparator.timestamp
+        FROM comparator
+        INNER JOIN comparator_question 
+        ON comparator.qid = comparator_question.id;
+        """
+
+        with self._lock:
+            cursor = self.get_cursor()
+            cursor.execute(join_query)
+            rows = cursor.fetchall()
+
+        df = pd.DataFrame(
+            rows,
+            columns=["ID", "Model", "QID", "Question", "Rank", "Answer", "Timestamp"],
+        )
+        return df
+
diff --git a/pykoi/chat/llm/constants.py b/pykoi/chat/llm/constants.py
@@ -10,3 +10,4 @@ class ModelSource(Enum):
     OPENAI = "openai"
     HUGGINGFACE = "huggingface"
     PEFT_HUGGINGFACE = "peft_huggingface"
+    MLU = "mlu"
diff --git a/pykoi/chat/llm/mlu.py b/pykoi/chat/llm/mlu.py
@@ -0,0 +1,49 @@
+"""MLU HF model."""
+from transformers import GenerationConfig
+from pykoi.chat.llm.abs_llm import AbsLlm
+
+from transformers import GenerationConfig
+
+
+class MLUWrapper(AbsLlm):
+    model_source = "mlu_trainer"
+
+    def __init__(self, trainer, tokenizer, name=None):
+        self._trainer = trainer
+        self._model = trainer.model
+        self._tokenizer = tokenizer
+        self._name = name
+        self._model.to("cuda:0")
+        self._model.eval()
+        super().__init__()
+
+    @property
+    def name(self):
+        if self._name:
+            return self._name
+        return "_".join([str(MLUWrapper.model_source), "trainer_model"])
+
+    def predict(self, message: str, num_of_response: int = 1):
+        MAX_RESPONSE = 100
+        prompt_template = """Below is a sentence that you need to complete. Write a response that appropriately completes the request. Sentence: {instruction}\n Response:"""
+        answer_template = """{response}"""
+
+        generation_output = self._model.generate(
+            input_ids=self._tokenizer(
+                prompt_template.format(instruction=message), return_tensors="pt"
+            )["input_ids"].cuda(),
+            generation_config=GenerationConfig(
+                do_sample=False, num_beams=2
+            ),  # Match the standalone function
+            return_dict_in_generate=True,
+            output_scores=True,
+            max_new_tokens=MAX_RESPONSE,
+            num_return_sequences=num_of_response,
+        )
+
+        response = [
+            self._tokenizer.decode(seq, skip_special_tokens=True)
+            for seq in generation_output.sequences
+        ]
+        response = [resp.split("\n")[1] for resp in response if "\n" in resp]
+        return response
diff --git a/pykoi/chat/llm/model_factory.py b/pykoi/chat/llm/model_factory.py
@@ -48,9 +48,11 @@ def create_model(model_source: Union[str, ModelSource], **kwargs) -> AbsLlm:
                 from pykoi.chat.llm.peft_huggingface import PeftHuggingfacemodel
 
                 return PeftHuggingfacemodel(**kwargs)
+            elif model_source == ModelSource.MLU:
+                from pykoi.chat.llm.mlu import MLUWrapper
+
+                return MLUWrapper(**kwargs)
             else:
-                raise ValueError(
-                    f"[llm_factory]: Unknown model source {model_source}"
-                )
+                raise ValueError(f"[llm_factory]: Unknown model source {model_source}")
         except ValueError as ex:
             raise ValueError("[llm_factory]: initialize model failure") from ex
diff --git a/pykoi/component/base.py b/pykoi/component/base.py
@@ -4,6 +4,7 @@
 
 from pykoi.component.chatbot_database_factory import ChatbotDatabaseFactory
 from pykoi.component.constants import FeedbackType
+from pykoi.chat.db.comparator_database import ComparatorDatabase
 from pykoi.chat.db.qa_database import QuestionAnswerDatabase
 from pykoi.chat.db.rag_database import RAGDatabase
 from pykoi.chat.db.ranking_database import RankingDatabase
@@ -42,9 +43,7 @@ class Component:
         props (Dict[str, Any]): Additional properties for the component.
     """
 
-    def __init__(
-        self, fetch_func: Optional[Callable], svelte_component: str, **kwargs
-    ):
+    def __init__(self, fetch_func: Optional[Callable], svelte_component: str, **kwargs):
         """
         Initialize a new instance of Component.
 
@@ -54,9 +53,7 @@ def __init__(
             kwargs: Additional properties for the component.
         """
         self.id = str(uuid.uuid4())  # Generate a unique ID
-        self.data_source = (
-            DataSource(self.id, fetch_func) if fetch_func else None
-        )
+        self.data_source = DataSource(self.id, fetch_func) if fetch_func else None
         self.svelte_component = svelte_component
         self.props = kwargs
 
@@ -81,6 +78,7 @@ def __init__(self, fetch_func: Callable, value_column: List[str], **kwargs):
         super().__init__(fetch_func, "Dropdown", **kwargs)
         self.value_column = value_column
 
+
 class RAG(Component):
     """
     RAG class represents a RAG component.
@@ -138,5 +136,8 @@ def __init__(self, database: any, **kwargs):
             database (QuestionAnswerDatabase): The database to use for the dashboard.
             kwargs: Additional properties for the dashboard.
         """
-        super().__init__(None, "Feedback", **kwargs)
+        if isinstance(database, ComparatorDatabase):
+            super().__init__(None, "CompareDashboard", **kwargs)
+        else:
+            super().__init__(None, "Feedback", **kwargs)
         self.database = database
diff --git a/pykoi/frontend/dist/assets/index-42540648.js b/pykoi/frontend/dist/assets/index-42540648.js
diff --git a/pykoi/frontend/dist/assets/index-c2d4a9bc.css b/pykoi/frontend/dist/assets/index-c2d4a9bc.css
diff --git a/pykoi/frontend/src/App.svelte b/pykoi/frontend/src/App.svelte
@@ -8,7 +8,7 @@
   import QuestionRating from "./lib/Annotations/QuestionRating.svelte";
   import RankedChatbot from "./lib/Chatbots/RankedChatbot.svelte";
   import RAG from "./lib/RAG/RAG.svelte";
-    import FeedbackWrapper from "./lib/Dashboards/FeedbackWrapper.svelte";
+  import FeedbackWrapper from "./lib/Dashboards/FeedbackWrapper.svelte";
 
   const components = writable([]);
   const selectedPage = writable(null);
@@ -18,6 +18,7 @@
     Dropdown: Dropdown,
     Feedback: FeedbackWrapper,
     Compare: ComparisonChat,
+    CompareDashboard: ComparisonChart,
     RetrievalQA: RAG,
   };
 

diff --git a/pykoi/frontend/src/lib/Comparison/BumpChart.svelte b/pykoi/frontend/src/lib/Comparison/BumpChart.svelte
@@ -3,14 +3,7 @@
   import { line } from "d3-shape";
   import { comparisonData } from "./store";
 
-  $: firstData = $comparisonData
-    .filter((d) => d.qid === 3)
-    .map((d) => ({ model: d.model, rank: d.rank }));
-
-  $: console.log("firstData", $comparisonData);
-
   $: models = Array.from(new Set($comparisonData.map((d) => d.model)));
-  $: console.log("models", models);
 
   let outerHeight;
   let outerWidth;
@@ -46,18 +39,11 @@
   $: pathLine = line()
     .x((d) => xScale(d.qid))
     .y((d) => yScale(d.rank));
-  // .curve(curveBasis)
 
   $: modelData = models.map((model) =>
     $comparisonData.filter((d) => d.model === model)
   );
 
-  $: console.log("md", modelData);
-  $: console.log(
-    "ranks",
-    $comparisonData.map((d) => d.rank)
-  );
-
   $: xTickArray =
     xScale.domain().length > 10
       ? xScale.domain().filter((_, index) => index % 2 === 0)
@@ -122,21 +108,6 @@
         </g>
       {/if}
     {/each}
-    <!-- y-ticks -->
-    {#each yScale.domain() as tick}
-      <g transform={`translate(${margin.left} ${yScale(tick) + 0})`}>
-        <!-- <text
-          class="axis-text"
-          x="-5"
-          y="0"
-          text-anchor="end"
-          dominant-baseline="middle"
-          >{firstData
-            .filter((d) => d.rank == tick)
-            .map((d) => d.model)[0]}</text
-        > -->
-      </g>
-    {/each}
 
     <!-- axis labels -->
     <text