EleutherAI · haileyschoelkopf · Sep 13, 2024 · Jul 2, 2024 · Jul 2, 2024 · Jul 2, 2024
@@ -75,6 +75,7 @@ class TaskConfig(dict):
  process_docs: Optional[Callable] = None
  doc_to_text: Optional[Union[Callable, str]] = None
  doc_to_target: Optional[Union[Callable, str]] = None
+ doc_to_image: Union[Callable, str] = None
  doc_to_choice: Optional[Union[Callable, str, dict, list]] = None
  process_results: Optional[Union[Callable, str]] = None
  use_prompt: Optional[str] = None
@@ -377,6 +378,10 @@ def doc_to_text(self, doc):
  def doc_to_target(self, doc):
  pass
 
+ # not an abstractmethod because not every language-only task has to implement this
+ def doc_to_image(self, doc):
+ raise NotImplementedError
+
  def build_all_requests(
  self,
  *,
@@ -1042,8 +1047,8 @@ def fewshot_context(
  Whether to apply the chat template to the fewshot context.
  :param fewshot_as_multiturn: bool
  Whether to provide the fewshot examples as a multiturn conversation or a single user turn.
- :param chat_template: Callable
- Chat template to be applied to the fewshot context.
+ :param chat_template:
+ callable (from lm.apply_chat_template) that takes in a list[Dict] chat transcript and renders it into a string.
  :returns: str
  The fewshot context.
  """
@@ -1273,9 +1278,34 @@ def doc_to_choice(self, doc: Any) -> List[str]:
  else:
  raise TypeError
 
+ def doc_to_image(self, doc: Any, doc_to_image=None) -> Union[int, str, list]:
+ if doc_to_image is not None:
+ doc_to_image = doc_to_image
+ elif self.config.doc_to_image is not None:
+ doc_to_image = self.config.doc_to_image
+ else:
+ return None
+
+ if isinstance(doc_to_image, list):
+ image_feature = [
+ self.doc_to_image(doc, feature) for feature in doc_to_image
+ ]
+ return [feature for feature in image_feature if feature is not None]
+ elif isinstance(doc_to_image, str):
+ if doc_to_image in self.features:
+ return doc[doc_to_image]
+ else:
+ return ast.literal_eval(utils.apply_template(doc_to_image, doc))
+ elif callable(doc_to_image):
+ return doc_to_image(doc)
+ else:
+ return None
+
  def construct_requests(
  self, doc: dict, ctx: str, **kwargs
  ) -> Union[List[Instance], Instance]:
+ aux_arguments = None
+
  if self.OUTPUT_TYPE == "loglikelihood":
  arguments = (ctx, self.doc_to_target(doc))
  elif self.OUTPUT_TYPE == "loglikelihood_rolling":
@@ -1293,6 +1323,37 @@ def construct_requests(
  # Otherwise they are placed in the continuation
  arguments = [(ctx, f"{target_delimiter}{cont}") for cont in choices]
 
+ # TODO: we should raise a warning telling users this will at most ~2x runtime.
+ if "acc_mutual_info" in self._metric_fn_list.keys():
+ # if we are calculating multiple choice accuracy
+ # using mutual information instead of raw loglikelihood as metric, need unconditional lls.
+
+ # here mutual info refers to calculating
+ # log(P(choice|ctx) / P(choice)) = log(P(choice|ctx)) - log(P(choice))
+ # in other words normalizing by subtracting the unconditional logprob of each choice.
+ aux_arguments = [("", f"{choice}") for choice in choices]
+
+ arguments.extend(aux_arguments)
+
+ elif self.OUTPUT_TYPE == "generate_until":
+ arguments = (ctx, deepcopy(self.config.generation_kwargs))
+
+ multimodal_arg = {}
+ if (
+ self.config.doc_to_image
+ ): # TODO: ensure that non-multimodal tasks aren't getting visual args
+ multimodal_arg = {
+ **multimodal_arg,
+ **{"visual": self.doc_to_image(doc)},
+ }
+
+ if bool(multimodal_arg):
+ if isinstance(arguments, list):
+ arguments = [arg + (multimodal_arg,) for arg in arguments]
+ else:
+ arguments = arguments + (multimodal_arg,)
+
+ if self.OUTPUT_TYPE == "multiple_choice":
  request_list = [
  Instance(
  request_type="loglikelihood",
@@ -1303,33 +1364,15 @@ def construct_requests(
  )
  for i, arg in enumerate(arguments)
  ]
- # TODO: we should raise a warning telling users this will at most ~2x runtime.
- if "acc_mutual_info" in self._metric_fn_list.keys():
- # if we are calculating multiple choice accuracy
- # using mutual information instead of raw loglikelihood as metric, need unconditional lls.
 
- # here mutual info refers to calculating
- # log(P(choice|ctx) / P(choice)) = log(P(choice|ctx)) - log(P(choice))
- # in other words normalizing by subtracting the unconditional logprob of each choice.
- request_list.extend(
- [
- Instance(
- request_type="loglikelihood",
- doc=doc,
- arguments=("", "{}".format(choice)),
- idx=i,
- **kwargs,
- )
- for i, choice in enumerate(choices)
- ]
- )
  return request_list
 
- elif self.OUTPUT_TYPE == "generate_until":
- arguments = (ctx, deepcopy(self.config.generation_kwargs))
-
  return Instance(
- request_type=self.OUTPUT_TYPE, doc=doc, arguments=arguments, idx=0, **kwargs
+ request_type=self.OUTPUT_TYPE,
+ doc=doc,
+ arguments=arguments,
+ idx=0,
+ **kwargs,
  )
 
  def process_results(self, doc, results):
@@ -1541,7 +1584,7 @@ def __repr__(self):
  f"ConfigurableTask(task_name={getattr(self.config, 'task', None)},"
  f"output_type={self.OUTPUT_TYPE},"
  f"num_fewshot={getattr(self.config, 'num_fewshot', None)},"
- f"num_samples={len(self.eval_docs)})"
+ f"num_samples={len(self.eval_docs)})",
  )
 
 

@@ -2,6 +2,7 @@
  anthropic_llms,
  dummy,
  gguf,
+ hf_vlms,
  huggingface,
  mamba_lm,
  nemo_lm,