diff --git a/code/model/ImageBind/data.py b/code/model/ImageBind/data.py index f0af53d..55e60f7 100644 --- a/code/model/ImageBind/data.py +++ b/code/model/ImageBind/data.py @@ -23,7 +23,7 @@ DEFAULT_AUDIO_FRAME_SHIFT_MS = 10 # in milliseconds -BPE_PATH = "/data/guzhaopeng/PandaGPT/code/model/ImageBind/bpe/bpe_simple_vocab_16e6.txt.gz" +BPE_PATH = "./model/ImageBind/bpe/bpe_simple_vocab_16e6.txt.gz" def waveform2melspec(waveform, sample_rate, num_mel_bins, target_length): diff --git a/code/scripts/train_all_supervised_cn.sh b/code/scripts/train_all_supervised_cn.sh index ea06e1c..899e448 100644 --- a/code/scripts/train_all_supervised_cn.sh +++ b/code/scripts/train_all_supervised_cn.sh @@ -3,7 +3,7 @@ deepspeed --include localhost:0,1 --master_port 28412 train_all_supervised_cn.py \ --model openllama_peft \ --stage 1\ - --pointbind_ckpt_path ../pretrained_ckpt/pointbind_ckpt/pointbind_i2pmae.pt\ + --imagebind_ckpt_path ../pretrained_ckpt/imagebind_ckpt/imagebind_huge.pth\ --vicuna_ckpt_path ../pretrained_ckpt/vicuna_ckpt/7b_v0/\ --delta_ckpt_path ../pretrained_ckpt/pandagpt_ckpt/7b/pytorch_model.pt\ --max_tgt_len 1024\ diff --git a/code/scripts/train_visa.sh b/code/scripts/train_visa.sh index 0ebf3ee..9713760 100644 --- a/code/scripts/train_visa.sh +++ b/code/scripts/train_visa.sh @@ -3,7 +3,7 @@ deepspeed --include localhost:0,1 --master_port 28412 train_visa.py \ --model openllama_peft \ --stage 1\ - --pointbind_ckpt_path ../pretrained_ckpt/pointbind_ckpt/pointbind_i2pmae.pt\ + --imagebind_ckpt_path ../pretrained_ckpt/imagebind_ckpt/imagebind_huge.pth\ --vicuna_ckpt_path ../pretrained_ckpt/vicuna_ckpt/7b_v0/\ --delta_ckpt_path ../pretrained_ckpt/pandagpt_ckpt/7b/pytorch_model.pt\ --max_tgt_len 1024\ diff --git a/code/train_all_supervised_cn.py b/code/train_all_supervised_cn.py index 54f051a..ed6c0ac 100644 --- a/code/train_all_supervised_cn.py +++ b/code/train_all_supervised_cn.py @@ -13,7 +13,7 @@ def parser_args(): parser.add_argument('--save_path', type=str) parser.add_argument('--log_path', type=str) # model configurations - parser.add_argument('--pointbind_ckpt_path', type=str) # the path that stores the imagebind checkpoint + parser.add_argument('--imagebind_ckpt_path', type=str) # the path that stores the imagebind checkpoint parser.add_argument('--vicuna_ckpt_path', type=str) # the path that stores the vicuna checkpoint parser.add_argument('--delta_ckpt_path', type=str) # the delta parameters trained in stage 1 parser.add_argument('--max_tgt_len', type=int) # the maximum sequence length