From e43f12688432e317a0b273f272223b3f894b83de Mon Sep 17 00:00:00 2001 From: Slyne Deng Date: Fri, 5 Jul 2024 10:44:54 -0700 Subject: [PATCH] update Signed-off-by: Slyne Deng --- .../conf/fine_tuning/video_neva/llama3_8b_vita.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/launcher_scripts/conf/fine_tuning/video_neva/llama3_8b_vita.yaml b/launcher_scripts/conf/fine_tuning/video_neva/llama3_8b_vita.yaml index fd5d95289..d1d70e6c7 100644 --- a/launcher_scripts/conf/fine_tuning/video_neva/llama3_8b_vita.yaml +++ b/launcher_scripts/conf/fine_tuning/video_neva/llama3_8b_vita.yaml @@ -91,7 +91,7 @@ model: sample_frames: 4 # for lita 1.5 sample_frames are used for spatial tokens, and spatial tokens will no longer do pooling and instead, it will use full tokens use_lita: True pretrain_mm_mlp_adapter: null # path to pretrained mm adapter - mm_mlp_adapter_type: mlp2x_gelu + mm_mlp_adapter_type: mlp_downsample use_im_start_end: False