Skip to content

Commit

Permalink
code format
Browse files Browse the repository at this point in the history
Signed-off-by: HuiyingLi <[email protected]>
  • Loading branch information
HuiyingLi committed Jun 3, 2024
1 parent 62a287e commit c6f0364
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 13 deletions.
12 changes: 7 additions & 5 deletions nemo/collections/multimodal/data/neva/neva_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,7 @@ def preprocess_multimodal(sources: dict, multimodal_cfg: dict, cur_token_len: in

return sources


def process_image(processor, image, image_aspect_ratio="square"):
if isinstance(processor, CLIPImageProcessor) or isinstance(processor, SiglipImageProcessor):
# image processor from HF
Expand Down Expand Up @@ -322,12 +323,11 @@ def expand2square(pil_img, background_color):
else:
image = processor.preprocess(image, return_tensors='pt')['pixel_values'][0]
else:
assert (
image_aspect_ratio == 'square'
), 'NeMo image transform with setting `image_aspect_ratio` to `square`.'
assert image_aspect_ratio == 'square', 'NeMo image transform with setting `image_aspect_ratio` to `square`.'
image = processor(image)
return image


def preprocess_llama_3(
sources: dict,
tokenizer,
Expand Down Expand Up @@ -803,9 +803,11 @@ def preprocess_nv_dpo(
if len(parts) != 2:
break

#handle label if exists
# handle label if exists
labels_match = re.search(rf"{re.escape(DEFAULT_LABELS_TOKEN)}.*?\n", parts[1])
instruction_len = len(tokenizer.text_to_ids(parts[0] + sep + (parts[1][:labels_match.end()] if labels_match else "")))
instruction_len = len(
tokenizer.text_to_ids(parts[0] + sep + (parts[1][: labels_match.end()] if labels_match else ""))
)
round_len = len(tokenizer.text_to_ids(rou + conv.sep))
target[cur_len : cur_len + instruction_len] = IGNORE_INDEX

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,8 @@ def create_vision_encoder_and_processor(self, mm_cfg):
if mm_cfg.vision_encoder.get("from_hf", False):
if "clip" in mm_cfg.vision_encoder.from_pretrained:
vision_encoder = CLIPVisionModel.from_pretrained(
mm_cfg.vision_encoder.from_pretrained, torch_dtype=torch.bfloat16,
mm_cfg.vision_encoder.from_pretrained,
torch_dtype=torch.bfloat16,
).cuda()
vision_encoder = vision_encoder.to(torch.bfloat16)
if mm_cfg.vision_encoder.freeze:
Expand All @@ -312,7 +313,8 @@ def create_vision_encoder_and_processor(self, mm_cfg):
)
elif "siglip" in mm_cfg.vision_encoder.from_pretrained:
vision_encoder = SiglipVisionModel.from_pretrained(
mm_cfg.vision_encoder.from_pretrained, torch_dtype=torch.bfloat16,
mm_cfg.vision_encoder.from_pretrained,
torch_dtype=torch.bfloat16,
).cuda()
vision_encoder = vision_encoder.to(torch.bfloat16)
if mm_cfg.vision_encoder.freeze:
Expand All @@ -323,7 +325,7 @@ def create_vision_encoder_and_processor(self, mm_cfg):
mm_cfg.vision_encoder.from_pretrained, torch_dtype=torch.bfloat16
)
else:
raise(ValueError("Currently only support CLIPVisionModel and SigLipVisionModel from Huggingface"))
raise (ValueError("Currently only support CLIPVisionModel and SigLipVisionModel from Huggingface"))
else:
vision_cfg = MegatronCLIPModel.restore_from(
mm_cfg.vision_encoder.from_pretrained, return_config=True
Expand All @@ -333,7 +335,12 @@ def create_vision_encoder_and_processor(self, mm_cfg):
if mm_cfg.vision_encoder.freeze:
vision_encoder.freeze()
crop_size = mm_cfg.get("crop_size", (224, 224))
image_processor = image_transform(crop_size, is_train=False, mean=None, std=None, )
image_processor = image_transform(
crop_size,
is_train=False,
mean=None,
std=None,
)

return vision_encoder, image_processor

Expand Down Expand Up @@ -1058,8 +1065,9 @@ def build_train_valid_test_datasets(self):
else:
ds_dict = make_supervised_data_module(
tokenizer=self.tokenizer,
image_processor=self.model.module.image_processor if hasattr(self.model,
"module") else self.model.image_processor,
image_processor=(
self.model.module.image_processor if hasattr(self.model, "module") else self.model.image_processor
),
model_cfg=self.cfg,
)
self._train_ds = ds_dict["train_dataset"]
Expand Down
5 changes: 3 additions & 2 deletions nemo/collections/multimodal/parts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,8 +430,9 @@ def image_processor(maybe_image_path):
else:
image = maybe_image_path

processor = model.model.module.image_processor \
if hasattr(model.model, "module") else model.model.image_processor
processor = (
model.model.module.image_processor if hasattr(model.model, "module") else model.model.image_processor
)
image = process_image(processor, image, neva_cfg.data.image_aspect_ratio)
if neva_cfg.precision in [16, '16', '16-mixed']:
media = image.type(torch.float16)
Expand Down

0 comments on commit c6f0364

Please sign in to comment.