Merge pull request #195 from SylphAI-Inc/main

[update] docs
SylphAI-Inc · Sep 1, 2024 · b8cae34 · b8cae34
2 parents ffbba03 + 406a7ee
commit b8cae34
Show file tree

Hide file tree

Showing 59 changed files with 2,783 additions and 789 deletions.
diff --git a/README.md b/README.md
@@ -16,6 +16,7 @@
 </h2>
 
 
+
 <p align="center">
     <a href="https://colab.research.google.com/drive/1TKw_JHE42Z_AWo8UuRYZCO2iuMgyslTZ?usp=sharing">
         <img alt="Try Quickstart in Colab" src="https://colab.research.google.com/assets/colab-badge.svg">
@@ -122,8 +123,7 @@ Because of this, no library can provide out-of-the-box solutions. Users must bui
 <!-- This is what AdalFlow is: light, modular, and robust, with a 100% readable codebase. -->
 
 
-Further reading: [How We Started](https://www.linkedin.com/posts/li-yin-ai_both-ai-research-and-engineering-use-pytorch-activity-7189366364694892544-Uk1U?utm_source=share&utm_medium=member_desktop),
-[Introduction](https://adalflow.sylph.ai/), [Design Philosophy](https://adalflow.sylph.ai/tutorials/lightrag_design_philosophy.html) and [Class hierarchy](https://adalflow.sylph.ai/tutorials/class_hierarchy.html).
+Further reading: [How We Started](https://www.linkedin.com/posts/li-yin-ai_both-ai-research-and-engineering-use-pytorch-activity-7189366364694892544-Uk1U?utm_source=share&utm_medium=member_desktop), <!-- [Introduction](https://adalflow.sylph.ai/),  -->[Design Philosophy](https://adalflow.sylph.ai/tutorials/lightrag_design_philosophy.html) and [Class hierarchy](https://adalflow.sylph.ai/tutorials/class_hierarchy.html).
 
 
 <!--
@@ -171,7 +171,7 @@ Just define it as a ``Parameter`` and pass it to our ``Generator``.
 
 ### **AdalComponent & Trainer**
 
-``AdalComponent`` acts as the `interpreter`  between task pipeline and the trainer, defining training and validation steps, optimizers, evaluators, loss functions, backward engine for textual gradients or tracing the demonstrations, the teacher generator.
+``AdalComponent`` acts as the 'interpreter'  between task pipeline and the trainer, defining training and validation steps, optimizers, evaluators, loss functions, backward engine for textual gradients or tracing the demonstrations, the teacher generator.
 
 <p align="center">
   <img src="https://raw.githubusercontent.com/SylphAI-Inc/LightRAG/main/docs/source/_static/images/trainer.png" alt="AdalFlow AdalComponent & Trainer">
@@ -214,13 +214,14 @@ AdalFlow is named in honor of [Ada Lovelace](https://en.wikipedia.org/wiki/Ada_L
 
 # Acknowledgements
 
-Many existing works greatly inspired this project! Here is a non-exhaustive list:
+Many existing works greatly inspired AdalFlow library! Here is a non-exhaustive list:
 
 - 📚 [PyTorch](https://github.com/pytorch/pytorch/) for design philosophy and design pattern of ``Component``, ``Parameter``, ``Sequential``.
 - 📚 [Micrograd](https://github.com/karpathy/micrograd): A tiny autograd engine for our auto-differentiative architecture.
 - 📚 [Text-Grad](https://github.com/zou-group/textgrad) for the ``Textual Gradient Descent`` text optimizer.
 - 📚 [DSPy](https://github.com/stanfordnlp/dspy) for inspiring the ``__{input/output}__fields`` in our ``DataClass`` and the bootstrap few-shot optimizer.
 - 📚 [ORPO](https://github.com/google-deepmind/opro) for adding past text instruction along with its accuracy in the text optimizer.
+- 📚 [PyTorch Lightning](https://github.com/Lightning-AI/pytorch-lightning) for the ``AdalComponent`` and ``Trainer``.
 
 # Citation
 

diff --git a/adalflow/CHANGELOG.md b/adalflow/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [0.2.1] - 2024-09-01
+### Added
+- `get_cache_path`, instead of print out the cache path all the time, we add a ``get_cache_path`` to get the cache path.
+- Make `huggingface datasets` as an optional dependency.
+### Modified
+- Add `template` to let users pass their own template, but need to have the same arguments as the default template.
+- Added `checkpoint resumt` in the `Trainer.diagnose` to show the newest performance and diagnostics on the checkpoint.
 ## [0.2.0] - 2024-08-20
 ### Added
 - Qdrant retriever.

diff --git a/adalflow/adalflow/__init__.py b/adalflow/adalflow/__init__.py
@@ -2,7 +2,7 @@
 
 from adalflow.core.component import Component, fun_to_component
 from adalflow.core.container import Sequential
-from adalflow.core.base_data_class import DataClass
+from adalflow.core.base_data_class import DataClass, DataClassFormatType, required_field
 
 from adalflow.optim.grad_component import GradComponent
 from adalflow.core.generator import Generator
@@ -55,7 +55,11 @@
 __all__ = [
     "Component",
     "fun_to_component",
+    # dataclass
     "DataClass",
+    "DataClassFormatType",
+    "required_field",
+    # Container
     "Sequential",
     "GradComponent",
     "ModelClient",

diff --git a/adalflow/adalflow/components/output_parsers/dataclass_parser.py b/adalflow/adalflow/components/output_parsers/dataclass_parser.py
@@ -90,7 +90,6 @@ def get_output_format_str(self) -> str:
         else:
             schema = self._data_class.to_json_signature(include=self._output_fields)
             output_format_str = Prompt(template=JSON_OUTPUT_FORMAT)(schema=schema)
-        print(f"output_format_str: {output_format_str}")
         return output_format_str
 
     def get_input_str(self, input: DataClass) -> str:

diff --git a/adalflow/adalflow/core/functional.py b/adalflow/adalflow/core/functional.py
@@ -1264,6 +1264,8 @@ def random_sample(
     Randomly sample num_shots from the dataset. If replace is True, sample with replacement.
     """
     dataset_size = len(dataset)
+    if dataset_size == 0:
+        return []
 
     if not replace and num_shots > dataset_size:
         log.debug(

diff --git a/adalflow/adalflow/core/generator.py b/adalflow/adalflow/core/generator.py
@@ -125,8 +125,6 @@ def __init__(
         )
         self.cache_path = os.path.join(_cache_path, f"cache_{model_str}.db")
 
-        print(f"cache_path: {self.cache_path}")
-
         CachedEngine.__init__(self, cache_path=self.cache_path)
         Component.__init__(self)
         GradComponent.__init__(self)
@@ -167,6 +165,10 @@ def __init__(
         }
         self._teacher: Optional["Generator"] = None
 
+    def get_cache_path(self) -> str:
+        r"""Get the cache path for the generator."""
+        return self.cache_path
+
     @staticmethod
     def _get_default_mapping(
         output: "GeneratorOutput" = None,
@@ -269,11 +271,9 @@ def _compose_model_kwargs(self, **model_kwargs) -> Dict:
         return combined_model_kwargs
 
     def print_prompt(self, **kwargs) -> str:
-        # prompt_kwargs_str = _convert_prompt_kwargs_to_str(kwargs)
         return self.prompt.print_prompt(**kwargs)
 
     def get_prompt(self, **kwargs) -> str:
-        # prompt_kwargs_str = _convert_prompt_kwargs_to_str(kwargs)
         return self.prompt.call(**kwargs)
 
     def _extra_repr(self) -> str:
@@ -420,8 +420,12 @@ def forward(
         if self.mock_output:
             output = GeneratorOutput(data=self.mock_output_data)
         else:
-            if self.teacher_mode:
+            if self.teacher_mode and not isinstance(self, BackwardEngine):
                 if not self._teacher:
+                    print(
+                        f"prompt_kwargs: {prompt_kwargs}, model_kwargs: {model_kwargs}"
+                    )
+                    print(f"names: {self.name}")
                     raise ValueError("Teacher generator is not set.")
                 log.info(f"Using teacher: {self._teacher}")
                 input_args = {
@@ -706,7 +710,6 @@ def _run_callbacks(
             model_kwargs=model_kwargs,
         )
         if output.error:
-            print(f"call back on failure: {output}")
             self.trigger_callbacks(
                 "on_failure",
                 output=output,
@@ -833,6 +836,10 @@ def _extra_repr(self) -> str:
         s = f"model_kwargs={self.model_kwargs}, "
         return s
 
+    def to_dict(self) -> Dict[str, Any]:
+        r"""Convert the generator to a dictionary."""
+        # exclude default functions
+
     @staticmethod
     def failure_message_to_backward_engine(
         gradient_response: GeneratorOutput,
@@ -854,6 +861,8 @@ def __init__(self, **kwargs):
             kwargs = {}
         kwargs["template"] = FEEDBACK_ENGINE_TEMPLATE
         super().__init__(**kwargs)
+        self.name = "BackwardEngine"
+        self.teacher_mode = False
 
     @staticmethod
     def failure_message_to_optimizer(
@@ -954,7 +963,6 @@ def create_teacher_generator(
     call_logger = GeneratorCallLogger(save_dir="traces")
 
     def on_complete(output, input, prompt_kwargs, model_kwargs, logger_call: Callable):
-        print(f"on_complet  output: {output}")
         logger_call(
             output=output,
             input=input,
@@ -963,13 +971,9 @@ def on_complete(output, input, prompt_kwargs, model_kwargs, logger_call: Callabl
         )
 
     for model in [llama3_model, gpt_3_model, gemini_model, claude_model]:
-        print(f"""model: {model["model_kwargs"]["model"]}""")
         generator = Generator(**model)
 
-        print("_kwargs: ", generator._kwargs)
-
         teacher = create_teacher_generator(generator, **claude_model)
-        print(f"teacher: {teacher}")
 
         call_logger.register_generator("generator", "generator_call")
         # setup the callback
@@ -983,7 +987,6 @@ def on_complete(output, input, prompt_kwargs, model_kwargs, logger_call: Callabl
                 "input_str": "Hello, world!",
             }
         )
-        print(f"output: {output}")
         break
 
     # test the backward engine

diff --git a/adalflow/adalflow/datasets/big_bench_hard.py b/adalflow/adalflow/datasets/big_bench_hard.py
@@ -15,13 +15,22 @@
 class BigBenchHard(Dataset):
     __doc__ = """Big Bench Hard dataset for object counting task.
 
+    You can find the task name from the following link:
+    https://github.com/suzgunmirac/BIG-Bench-Hard/tree/main/bbh
+
+
     Data will be saved to ~/.adalflow/cache_datasets/BBH_object_counting/{split}.csv
     if root is not specified.
 
     Size for each split:
     - train: 50 examples
     - val: 50 examples
     - test: 100 examples
+
+    Args:
+        task_name (str): The name of the task. "BHH_{task_name}" is the task name in the dataset.
+        root (str, optional): Root directory of the dataset to save the data. Defaults to ~/.adalflow/cache_datasets/task_name.
+        split (str, optional): The dataset split, supports ``"train"`` (default), ``"val"`` and ``"test"``.
     """
 
     def __init__(
@@ -54,7 +63,6 @@ def __init__(
                 self.data.append(
                     Example(question=row["x"], answer=row["y"], id=row["id"])
                 )  # dont use a tuple, use a dict {"x": ..., "y": ...}
-        self._task_description = "You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value."
 
     def _check_or_download_dataset(self, data_path: str = None, split: str = "train"):
 
@@ -81,6 +89,9 @@ def _check_or_download_dataset(self, data_path: str = None, split: str = "train"
 
         examples = data["examples"]
 
+        # NOTE: better to shuffle the examples before splitting.
+        # We do this splitting in order to be consistent with text-grad paper.
+
         train_examples = [
             {"x": ex["input"], "y": ex["target"], "id": str(uuid.uuid4())}
             for ex in examples[:50]
@@ -91,8 +102,9 @@ def _check_or_download_dataset(self, data_path: str = None, split: str = "train"
         ]
         test_examples = [
             {"x": ex["input"], "y": ex["target"], "id": str(uuid.uuid4())}
-            for ex in examples[100:200]
+            for ex in examples[150:250]
         ]
+        # ensure the
 
         for split, examples in zip(
             ["train", "val", "test"], [train_examples, val_examples, test_examples]
@@ -107,14 +119,16 @@ def __getitem__(self, index) -> Example:
     def __len__(self):
         return len(self.data)
 
-    def get_default_task_instruction(self):
-        return self._task_description
+    @staticmethod
+    def get_default_task_instruction():
+        _task_description = "You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value."
+        return _task_description
 
 
 if __name__ == "__main__":
     from adalflow.datasets.big_bench_hard import BigBenchHard
 
-    dataset = BigBenchHard("BBH_object_counting", split="train")
-    print(dataset[0])
+    dataset = BigBenchHard("BBH_word_sorting", split="train")
+    print(dataset[0:10])
     print(len(dataset))
     print(dataset.get_default_task_instruction())
diff --git a/adalflow/adalflow/eval/llm_as_judge.py b/adalflow/adalflow/eval/llm_as_judge.py
@@ -1,6 +1,6 @@
 """This is the metric to use an LLM as a judge for evaluating the performance of predicted answers."""
 
-from typing import List, Dict, Any, Optional, TYPE_CHECKING
+from typing import List, Dict, Any, Optional, TYPE_CHECKING, Union, Literal
 import logging
 
 if TYPE_CHECKING:
@@ -11,24 +11,26 @@
 
 log = logging.getLogger(__name__)
 
-DEFAULT_LLM_EVALUATOR_PROMPT = r"""
-<<SYS>>{# task desc #}
-You are a helpful assistant.
-Given the question, ground truth answer, and predicted answer, you need to answer the judgement query.
-Output True or False according to the judgement query.<</SYS>>
+DEFAULT_LLM_EVALUATOR_PROMPT = r"""<START_OF_SYSTEM_PROMPT>
+{# task desc #}
+You are an evaluator. Given the question, ground truth answer, and predicted answer,
+{# judgement question #}
+{{judgement_str}}
+<END_OF_SYSTEM_PROMPT>
 ---------------------
+<START_OF_USER>
 {# question #}
 Question: {{question_str}}
 {# ground truth answer #}
 Ground truth answer: {{gt_answer_str}}
 {# predicted answer #}
 Predicted answer: {{pred_answer_str}}
-{# judgement question #}
-Judgement question: {{judgement_str}}
 {# assistant response #}
-You:
+<END_OF_USER>
 """
 
+DEFAULT_JUDGEMENT_QUERY = "Does the predicted answer contain the ground truth answer? Say True if yes, False if no."
+
 
 # print(f"globals: {globals()}")
 
@@ -55,6 +57,10 @@ def __init__(
         self,
         model_client: Optional[ModelClient] = None,
         model_kwargs: Optional[Dict[str, Any]] = None,
+        template: Optional[str] = None,
+        jugement_query: Optional[str] = None,
+        output_type: Literal["bool", "float"] = "bool",
+        use_cache: bool = True,
     ):
         from adalflow.core.generator import Generator
 
@@ -70,15 +76,23 @@ def __init__(
                 )
             self.model_client = OpenAIClient()
         self.model_kwargs = model_kwargs or DEFAULT_LLM_EVALUATOR_MODEL_KWARGS
+        self.template = template or DEFAULT_LLM_EVALUATOR_PROMPT
         self.llm_evaluator = Generator(
             model_client=self.model_client,
             model_kwargs=self.model_kwargs,
-            template=DEFAULT_LLM_EVALUATOR_PROMPT,
+            template=self.template,
+            use_cache=use_cache,
         )
+        self._jugement_query = jugement_query or DEFAULT_JUDGEMENT_QUERY
+        self.output_type = output_type
 
     def call(
-        self, question: str, gt_answer: str, pred_answer: str, judgement_query: str
-    ) -> bool:
+        self,
+        question: str,
+        gt_answer: str,
+        pred_answer: str,
+        judgement_query: Optional[str] = None,
+    ) -> Union[bool, float]:
         r"""
         Get the judgement of the predicted answer for a single question.
 
@@ -91,6 +105,7 @@ def call(
         Returns:
             bool: Judgement result.
         """
+        judgement_query = judgement_query or self._jugement_query
         output = self.llm_evaluator(
             prompt_kwargs={
                 "question_str": question,
@@ -102,12 +117,15 @@ def call(
 
         judgement = output.raw_response
         judgement = judgement.strip().lower()
+        output = False if self.output_type == "bool" else 0.0
         if "true" in judgement:
-            return True
+            output = True if self.output_type == "bool" else 1.0
         elif "false" in judgement:
-            return False
+            output = False if self.output_type == "bool" else 0.0
         else:
-            raise ValueError(f"Invalid judgement: {judgement}")
+            print(f"Invalid judgement: {judgement}, use False or 0.0 instead.")
+            # raise ValueError(f"Invalid judgement: {judgement}")
+        return output
 
 
 class LLMasJudge: