Skip to content

Commit

Permalink
Merge pull request #195 from SylphAI-Inc/main
Browse files Browse the repository at this point in the history
[update] docs
  • Loading branch information
Sylph-AI authored Sep 1, 2024
2 parents ffbba03 + 406a7ee commit b8cae34
Show file tree
Hide file tree
Showing 59 changed files with 2,783 additions and 789 deletions.
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
</h2>



<p align="center">
<a href="https://colab.research.google.com/drive/1TKw_JHE42Z_AWo8UuRYZCO2iuMgyslTZ?usp=sharing">
<img alt="Try Quickstart in Colab" src="https://colab.research.google.com/assets/colab-badge.svg">
Expand Down Expand Up @@ -122,8 +123,7 @@ Because of this, no library can provide out-of-the-box solutions. Users must bui
<!-- This is what AdalFlow is: light, modular, and robust, with a 100% readable codebase. -->


Further reading: [How We Started](https://www.linkedin.com/posts/li-yin-ai_both-ai-research-and-engineering-use-pytorch-activity-7189366364694892544-Uk1U?utm_source=share&utm_medium=member_desktop),
[Introduction](https://adalflow.sylph.ai/), [Design Philosophy](https://adalflow.sylph.ai/tutorials/lightrag_design_philosophy.html) and [Class hierarchy](https://adalflow.sylph.ai/tutorials/class_hierarchy.html).
Further reading: [How We Started](https://www.linkedin.com/posts/li-yin-ai_both-ai-research-and-engineering-use-pytorch-activity-7189366364694892544-Uk1U?utm_source=share&utm_medium=member_desktop), <!-- [Introduction](https://adalflow.sylph.ai/), -->[Design Philosophy](https://adalflow.sylph.ai/tutorials/lightrag_design_philosophy.html) and [Class hierarchy](https://adalflow.sylph.ai/tutorials/class_hierarchy.html).


<!--
Expand Down Expand Up @@ -171,7 +171,7 @@ Just define it as a ``Parameter`` and pass it to our ``Generator``.

### **AdalComponent & Trainer**

``AdalComponent`` acts as the `interpreter` between task pipeline and the trainer, defining training and validation steps, optimizers, evaluators, loss functions, backward engine for textual gradients or tracing the demonstrations, the teacher generator.
``AdalComponent`` acts as the 'interpreter' between task pipeline and the trainer, defining training and validation steps, optimizers, evaluators, loss functions, backward engine for textual gradients or tracing the demonstrations, the teacher generator.

<p align="center">
<img src="https://raw.githubusercontent.com/SylphAI-Inc/LightRAG/main/docs/source/_static/images/trainer.png" alt="AdalFlow AdalComponent & Trainer">
Expand Down Expand Up @@ -214,13 +214,14 @@ AdalFlow is named in honor of [Ada Lovelace](https://en.wikipedia.org/wiki/Ada_L

# Acknowledgements

Many existing works greatly inspired this project! Here is a non-exhaustive list:
Many existing works greatly inspired AdalFlow library! Here is a non-exhaustive list:

- 📚 [PyTorch](https://github.com/pytorch/pytorch/) for design philosophy and design pattern of ``Component``, ``Parameter``, ``Sequential``.
- 📚 [Micrograd](https://github.com/karpathy/micrograd): A tiny autograd engine for our auto-differentiative architecture.
- 📚 [Text-Grad](https://github.com/zou-group/textgrad) for the ``Textual Gradient Descent`` text optimizer.
- 📚 [DSPy](https://github.com/stanfordnlp/dspy) for inspiring the ``__{input/output}__fields`` in our ``DataClass`` and the bootstrap few-shot optimizer.
- 📚 [ORPO](https://github.com/google-deepmind/opro) for adding past text instruction along with its accuracy in the text optimizer.
- 📚 [PyTorch Lightning](https://github.com/Lightning-AI/pytorch-lightning) for the ``AdalComponent`` and ``Trainer``.

# Citation

Expand Down
7 changes: 7 additions & 0 deletions adalflow/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
## [0.2.1] - 2024-09-01
### Added
- `get_cache_path`, instead of print out the cache path all the time, we add a ``get_cache_path`` to get the cache path.
- Make `huggingface datasets` as an optional dependency.
### Modified
- Add `template` to let users pass their own template, but need to have the same arguments as the default template.
- Added `checkpoint resumt` in the `Trainer.diagnose` to show the newest performance and diagnostics on the checkpoint.
## [0.2.0] - 2024-08-20
### Added
- Qdrant retriever.
Expand Down
6 changes: 5 additions & 1 deletion adalflow/adalflow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from adalflow.core.component import Component, fun_to_component
from adalflow.core.container import Sequential
from adalflow.core.base_data_class import DataClass
from adalflow.core.base_data_class import DataClass, DataClassFormatType, required_field

from adalflow.optim.grad_component import GradComponent
from adalflow.core.generator import Generator
Expand Down Expand Up @@ -55,7 +55,11 @@
__all__ = [
"Component",
"fun_to_component",
# dataclass
"DataClass",
"DataClassFormatType",
"required_field",
# Container
"Sequential",
"GradComponent",
"ModelClient",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ def get_output_format_str(self) -> str:
else:
schema = self._data_class.to_json_signature(include=self._output_fields)
output_format_str = Prompt(template=JSON_OUTPUT_FORMAT)(schema=schema)
print(f"output_format_str: {output_format_str}")
return output_format_str

def get_input_str(self, input: DataClass) -> str:
Expand Down
2 changes: 2 additions & 0 deletions adalflow/adalflow/core/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -1264,6 +1264,8 @@ def random_sample(
Randomly sample num_shots from the dataset. If replace is True, sample with replacement.
"""
dataset_size = len(dataset)
if dataset_size == 0:
return []

if not replace and num_shots > dataset_size:
log.debug(
Expand Down
27 changes: 15 additions & 12 deletions adalflow/adalflow/core/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,6 @@ def __init__(
)
self.cache_path = os.path.join(_cache_path, f"cache_{model_str}.db")

print(f"cache_path: {self.cache_path}")

CachedEngine.__init__(self, cache_path=self.cache_path)
Component.__init__(self)
GradComponent.__init__(self)
Expand Down Expand Up @@ -167,6 +165,10 @@ def __init__(
}
self._teacher: Optional["Generator"] = None

def get_cache_path(self) -> str:
r"""Get the cache path for the generator."""
return self.cache_path

@staticmethod
def _get_default_mapping(
output: "GeneratorOutput" = None,
Expand Down Expand Up @@ -269,11 +271,9 @@ def _compose_model_kwargs(self, **model_kwargs) -> Dict:
return combined_model_kwargs

def print_prompt(self, **kwargs) -> str:
# prompt_kwargs_str = _convert_prompt_kwargs_to_str(kwargs)
return self.prompt.print_prompt(**kwargs)

def get_prompt(self, **kwargs) -> str:
# prompt_kwargs_str = _convert_prompt_kwargs_to_str(kwargs)
return self.prompt.call(**kwargs)

def _extra_repr(self) -> str:
Expand Down Expand Up @@ -420,8 +420,12 @@ def forward(
if self.mock_output:
output = GeneratorOutput(data=self.mock_output_data)
else:
if self.teacher_mode:
if self.teacher_mode and not isinstance(self, BackwardEngine):
if not self._teacher:
print(
f"prompt_kwargs: {prompt_kwargs}, model_kwargs: {model_kwargs}"
)
print(f"names: {self.name}")
raise ValueError("Teacher generator is not set.")
log.info(f"Using teacher: {self._teacher}")
input_args = {
Expand Down Expand Up @@ -706,7 +710,6 @@ def _run_callbacks(
model_kwargs=model_kwargs,
)
if output.error:
print(f"call back on failure: {output}")
self.trigger_callbacks(
"on_failure",
output=output,
Expand Down Expand Up @@ -833,6 +836,10 @@ def _extra_repr(self) -> str:
s = f"model_kwargs={self.model_kwargs}, "
return s

def to_dict(self) -> Dict[str, Any]:
r"""Convert the generator to a dictionary."""
# exclude default functions

@staticmethod
def failure_message_to_backward_engine(
gradient_response: GeneratorOutput,
Expand All @@ -854,6 +861,8 @@ def __init__(self, **kwargs):
kwargs = {}
kwargs["template"] = FEEDBACK_ENGINE_TEMPLATE
super().__init__(**kwargs)
self.name = "BackwardEngine"
self.teacher_mode = False

@staticmethod
def failure_message_to_optimizer(
Expand Down Expand Up @@ -954,7 +963,6 @@ def create_teacher_generator(
call_logger = GeneratorCallLogger(save_dir="traces")

def on_complete(output, input, prompt_kwargs, model_kwargs, logger_call: Callable):
print(f"on_complet output: {output}")
logger_call(
output=output,
input=input,
Expand All @@ -963,13 +971,9 @@ def on_complete(output, input, prompt_kwargs, model_kwargs, logger_call: Callabl
)

for model in [llama3_model, gpt_3_model, gemini_model, claude_model]:
print(f"""model: {model["model_kwargs"]["model"]}""")
generator = Generator(**model)

print("_kwargs: ", generator._kwargs)

teacher = create_teacher_generator(generator, **claude_model)
print(f"teacher: {teacher}")

call_logger.register_generator("generator", "generator_call")
# setup the callback
Expand All @@ -983,7 +987,6 @@ def on_complete(output, input, prompt_kwargs, model_kwargs, logger_call: Callabl
"input_str": "Hello, world!",
}
)
print(f"output: {output}")
break

# test the backward engine
Expand Down
26 changes: 20 additions & 6 deletions adalflow/adalflow/datasets/big_bench_hard.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,22 @@
class BigBenchHard(Dataset):
__doc__ = """Big Bench Hard dataset for object counting task.
You can find the task name from the following link:
https://github.com/suzgunmirac/BIG-Bench-Hard/tree/main/bbh
Data will be saved to ~/.adalflow/cache_datasets/BBH_object_counting/{split}.csv
if root is not specified.
Size for each split:
- train: 50 examples
- val: 50 examples
- test: 100 examples
Args:
task_name (str): The name of the task. "BHH_{task_name}" is the task name in the dataset.
root (str, optional): Root directory of the dataset to save the data. Defaults to ~/.adalflow/cache_datasets/task_name.
split (str, optional): The dataset split, supports ``"train"`` (default), ``"val"`` and ``"test"``.
"""

def __init__(
Expand Down Expand Up @@ -54,7 +63,6 @@ def __init__(
self.data.append(
Example(question=row["x"], answer=row["y"], id=row["id"])
) # dont use a tuple, use a dict {"x": ..., "y": ...}
self._task_description = "You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value."

def _check_or_download_dataset(self, data_path: str = None, split: str = "train"):

Expand All @@ -81,6 +89,9 @@ def _check_or_download_dataset(self, data_path: str = None, split: str = "train"

examples = data["examples"]

# NOTE: better to shuffle the examples before splitting.
# We do this splitting in order to be consistent with text-grad paper.

train_examples = [
{"x": ex["input"], "y": ex["target"], "id": str(uuid.uuid4())}
for ex in examples[:50]
Expand All @@ -91,8 +102,9 @@ def _check_or_download_dataset(self, data_path: str = None, split: str = "train"
]
test_examples = [
{"x": ex["input"], "y": ex["target"], "id": str(uuid.uuid4())}
for ex in examples[100:200]
for ex in examples[150:250]
]
# ensure the

for split, examples in zip(
["train", "val", "test"], [train_examples, val_examples, test_examples]
Expand All @@ -107,14 +119,16 @@ def __getitem__(self, index) -> Example:
def __len__(self):
return len(self.data)

def get_default_task_instruction(self):
return self._task_description
@staticmethod
def get_default_task_instruction():
_task_description = "You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value."
return _task_description


if __name__ == "__main__":
from adalflow.datasets.big_bench_hard import BigBenchHard

dataset = BigBenchHard("BBH_object_counting", split="train")
print(dataset[0])
dataset = BigBenchHard("BBH_word_sorting", split="train")
print(dataset[0:10])
print(len(dataset))
print(dataset.get_default_task_instruction())
48 changes: 33 additions & 15 deletions adalflow/adalflow/eval/llm_as_judge.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""This is the metric to use an LLM as a judge for evaluating the performance of predicted answers."""

from typing import List, Dict, Any, Optional, TYPE_CHECKING
from typing import List, Dict, Any, Optional, TYPE_CHECKING, Union, Literal
import logging

if TYPE_CHECKING:
Expand All @@ -11,24 +11,26 @@

log = logging.getLogger(__name__)

DEFAULT_LLM_EVALUATOR_PROMPT = r"""
<<SYS>>{# task desc #}
You are a helpful assistant.
Given the question, ground truth answer, and predicted answer, you need to answer the judgement query.
Output True or False according to the judgement query.<</SYS>>
DEFAULT_LLM_EVALUATOR_PROMPT = r"""<START_OF_SYSTEM_PROMPT>
{# task desc #}
You are an evaluator. Given the question, ground truth answer, and predicted answer,
{# judgement question #}
{{judgement_str}}
<END_OF_SYSTEM_PROMPT>
---------------------
<START_OF_USER>
{# question #}
Question: {{question_str}}
{# ground truth answer #}
Ground truth answer: {{gt_answer_str}}
{# predicted answer #}
Predicted answer: {{pred_answer_str}}
{# judgement question #}
Judgement question: {{judgement_str}}
{# assistant response #}
You:
<END_OF_USER>
"""

DEFAULT_JUDGEMENT_QUERY = "Does the predicted answer contain the ground truth answer? Say True if yes, False if no."


# print(f"globals: {globals()}")

Expand All @@ -55,6 +57,10 @@ def __init__(
self,
model_client: Optional[ModelClient] = None,
model_kwargs: Optional[Dict[str, Any]] = None,
template: Optional[str] = None,
jugement_query: Optional[str] = None,
output_type: Literal["bool", "float"] = "bool",
use_cache: bool = True,
):
from adalflow.core.generator import Generator

Expand All @@ -70,15 +76,23 @@ def __init__(
)
self.model_client = OpenAIClient()
self.model_kwargs = model_kwargs or DEFAULT_LLM_EVALUATOR_MODEL_KWARGS
self.template = template or DEFAULT_LLM_EVALUATOR_PROMPT
self.llm_evaluator = Generator(
model_client=self.model_client,
model_kwargs=self.model_kwargs,
template=DEFAULT_LLM_EVALUATOR_PROMPT,
template=self.template,
use_cache=use_cache,
)
self._jugement_query = jugement_query or DEFAULT_JUDGEMENT_QUERY
self.output_type = output_type

def call(
self, question: str, gt_answer: str, pred_answer: str, judgement_query: str
) -> bool:
self,
question: str,
gt_answer: str,
pred_answer: str,
judgement_query: Optional[str] = None,
) -> Union[bool, float]:
r"""
Get the judgement of the predicted answer for a single question.
Expand All @@ -91,6 +105,7 @@ def call(
Returns:
bool: Judgement result.
"""
judgement_query = judgement_query or self._jugement_query
output = self.llm_evaluator(
prompt_kwargs={
"question_str": question,
Expand All @@ -102,12 +117,15 @@ def call(

judgement = output.raw_response
judgement = judgement.strip().lower()
output = False if self.output_type == "bool" else 0.0
if "true" in judgement:
return True
output = True if self.output_type == "bool" else 1.0
elif "false" in judgement:
return False
output = False if self.output_type == "bool" else 0.0
else:
raise ValueError(f"Invalid judgement: {judgement}")
print(f"Invalid judgement: {judgement}, use False or 0.0 instead.")
# raise ValueError(f"Invalid judgement: {judgement}")
return output


class LLMasJudge:
Expand Down
Loading

0 comments on commit b8cae34

Please sign in to comment.