Skip to content

Commit

Permalink
update to fix bbox error
Browse files Browse the repository at this point in the history
  • Loading branch information
Ubuntu committed Nov 7, 2024
1 parent dbe5606 commit 6b1e7a0
Show file tree
Hide file tree
Showing 43 changed files with 2,726 additions and 5,780 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
datasets/*
refer/data/*
pretrained_weights/*
data/aihub_csv_error_csv
run_scripts/finetune/polyformer_b_aihub_indoor_checkpoints
run_scripts/finetune/polyformer_b_aihub_indoor_logs
run_scripts/finetune/polyformer_b_aihub_manufact_checkpoints
Expand Down
Binary file modified __pycache__/trainer.cpython-37.pyc
Binary file not shown.
Binary file modified bert/__pycache__/activations.cpython-37.pyc
Binary file not shown.
Binary file modified bert/__pycache__/configuration_bert.cpython-37.pyc
Binary file not shown.
Binary file modified bert/__pycache__/configuration_utils.cpython-37.pyc
Binary file not shown.
Binary file modified bert/__pycache__/file_utils.cpython-37.pyc
Binary file not shown.
Binary file modified bert/__pycache__/generation_utils.cpython-37.pyc
Binary file not shown.
Binary file modified bert/__pycache__/modeling_bert.cpython-37.pyc
Binary file not shown.
Binary file modified bert/__pycache__/modeling_utils.cpython-37.pyc
Binary file not shown.
Binary file modified bert/__pycache__/tokenization_bert.cpython-37.pyc
Binary file not shown.
Binary file modified bert/__pycache__/tokenization_utils.cpython-37.pyc
Binary file not shown.
Binary file modified bert/__pycache__/tokenization_utils_base.cpython-37.pyc
Binary file not shown.
Binary file modified criterions/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file not shown.
Binary file modified data/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file modified data/__pycache__/base_dataset.cpython-37.pyc
Binary file not shown.
Binary file modified data/__pycache__/data_utils.cpython-37.pyc
Binary file not shown.
Binary file modified data/__pycache__/file_dataset.cpython-37.pyc
Binary file not shown.
Binary file modified data/__pycache__/poly_utils.cpython-37.pyc
Binary file not shown.
Binary file modified data/__pycache__/refcoco_dataset.cpython-37.pyc
Binary file not shown.
Binary file modified data/__pycache__/refcoco_pretrain_dataset.cpython-37.pyc
Binary file not shown.
65 changes: 59 additions & 6 deletions data/create_aihub_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from PIL import Image
import random
import os
import pandas as pd
import glob
from tqdm import tqdm

import pickle
Expand All @@ -16,8 +18,8 @@

data_root = './refer/data'
# datasets = ['refcoco', 'refcoco+', 'refcocog']
# datasets = ['aihub_indoor']
datasets = ['aihub_manufact']
datasets = ['aihub_indoor']
# datasets = ['aihub_manufact']

if datasets[0] == 'aihub_indoor':
image_dir = './refer/data/aihub_refcoco_format/indoor_80/images'
Expand All @@ -28,6 +30,28 @@
val_test_files = pickle.load(open("data/val_test_files.p", "rb"))


# Define the directory containing your CSV files
if datasets[0] == 'aihub_indoor':
csv_dir = 'data/aihub_csv_error_csv/indoor' # Replace with the actual directory path
elif datasets[0] == 'aihub_manufact':
csv_dir = 'data/aihub_csv_error_csv/manufact' # Replace with the actual directory path
csv_files = glob.glob(f'{csv_dir}/*.csv')

# Initialize an empty dictionary to store bounding box values from all CSV files
bbox_dict = {}

# Load and combine data from all CSV files
for csv_file in csv_files:
bbox_data = pd.read_csv(csv_file)

# Determine prefix based on the file name
prefix = "real_" if "real_" in csv_file else "syn_"

# Convert filenames to the appropriate format and store in bbox_dict
bbox_data['파일명'] = bbox_data['파일명'].apply(lambda x: f'{prefix}{x}')
# Update bbox_dict with bbox data from this file
bbox_dict.update(dict(zip(bbox_data['파일명'], bbox_data['bbox']))) # Replace 'bbox_column_name' with actual column name


combined_train_data = []

Expand All @@ -50,7 +74,7 @@
splits = ['train', 'val', 'test']
splitBy = None

save_dir = f'datasets/finetune/{dataset}'
save_dir = f'datasets/finetune/{dataset}_bbox_fix'
os.makedirs(save_dir, exist_ok=True)
for split in splits:
num_pts = []
Expand Down Expand Up @@ -78,6 +102,10 @@
fn = this_img['file_name']
img_id = fn.split(".")[0].split("_")[-1]

# Determine the appropriate prefix for file_name_key
prefix = fn.split(".")[0].split("_")[0] + "_"
file_name_key = f"{prefix}{img_id}"

# load image
img = Image.open(os.path.join(image_dir, this_img['file_name'])).convert("RGB")

Expand Down Expand Up @@ -114,9 +142,34 @@
pts_string_interpolated = polygons_to_string(polygons_interpolated)

# load box
box = refer.getRefBox(this_ref_id) # x,y,w,h
x, y, w, h = box
box_string = f'{x},{y},{x + w},{y + h}'
if file_name_key in bbox_dict:
print('bbox dict')
# Update bbox value based on CSV data
x1, y1, x2, y2 = map(int, bbox_dict[file_name_key].split(','))
box_string = f'{x1},{y1},{x2},{y2}'
else:
box = refer.getRefBox(this_ref_id) # x,y,w,h
# Fallback to the default logic if not in combined CSV data
if prefix == "real_":
x, y, w, h = box
box_string = f'{x},{y},{x + w},{y + h}'
elif prefix == "syn_":
x1, y1, x2, y2 = box
box_string = f'{x1},{y1},{x2},{y2}'
else:
print("Image must be either real or syn")
exit()
# box = refer.getRefBox(this_ref_id) # x,y,w,h
# print(fn.split(".")[0].split("_")[0])
# if fn.split(".")[0].split("_")[0] == "real":
# x, y, w, h = box
# box_string = f'{x},{y},{x + w},{y + h}'
# elif fn.split(".")[0].split("_")[0] == "syn":
# x1, y1, x2, y2 = box
# box_string = f'{x1},{y1},{x2},{y2}'
# else:
# print("Image must be either real or syn")
# exit()

max_num_pts = max(max_num_pts, check_length(polygons))

Expand Down
9 changes: 6 additions & 3 deletions demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# Load pretrained ckpt & config
overrides={"bpe_dir":"utils/BPE"}
models, cfg, task = load_model_ensemble_and_task(
utils.split_paths('weights/polyformer_l_refcocog.pt'),
utils.split_paths('run_scripts/finetune/polyformer_b_aihub_indoor_80_checkpoints/100_5e-5_512/checkpoint_epoch_21.pt'),
arg_overrides=overrides
)

Expand All @@ -33,8 +33,11 @@
cfg.generation.no_repeat_ngram_size = 3
cfg.task.patch_image_size = 512

from bert.tokenization_bert import BertTokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# from bert.tokenization_bert import BertTokenizer
# tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
from transformers import AutoTokenizer, AutoModelForMaskedLM
tokenizer = AutoTokenizer.from_pretrained('xlm-roberta-base')


# Fix seed for stochastic decoding
if cfg.common.seed is not None and not cfg.generation.no_seed_provided:
Expand Down
Binary file modified models/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file modified models/polyformer/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file modified models/polyformer/__pycache__/polyformer.cpython-37.pyc
Binary file not shown.
Binary file modified models/polyformer/__pycache__/swin.cpython-37.pyc
Binary file not shown.
Binary file not shown.
Binary file modified models/polyformer/__pycache__/unify_transformer.cpython-37.pyc
Binary file not shown.
Binary file not shown.
Binary file modified polyformer_module/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
3 changes: 2 additions & 1 deletion run_scripts/evaluation/evaluate_polyformer_b_aihub_indoor.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ num_bins=64
batch_size=16

dataset='aihub_indoor'
ckpt_path=../finetune/polyformer_b_aihub_indoor_80_checkpoints/100_5e-5_512/checkpoint_epoch_61.pt
ckpt_path=../finetune/polyformer_b_aihub_indoor_80_checkpoints/100_5e-5_512/checkpoint_epoch_21.pt
# dataset='refcocog'
# ckpt_path=../../weights/polyformer_b_refcocog.pt

Expand All @@ -44,6 +44,7 @@ python3 -m torch.distributed.launch --nproc_per_node=${GPUS_PER_NODE} --master_p
--fp16 \
--num-workers=0 \
--num-bins=${num_bins} \
--vis \
--vis_dir=${vis_dir} \
--result_dir=${result_dir} \
--model-overrides="{\"data\":\"${data}\",\"bpe_dir\":\"${bpe_dir}\",\"selected_cols\":\"${selected_cols}\"}"
Expand Down
8,424 changes: 2,656 additions & 5,768 deletions run_scripts/finetune/polyformer_b_aihub_indoor_80_logs/100_5e-5_512.log

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions run_scripts/finetune/train_polyformer_b_aihub.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ data=${data_dir}/aihub_indoor/aihub_indoor_train.tsv,${data_dir}/aihub_indoor/ai
selected_cols=0,5,6,2,4,3,7
# restore_file=../../weights/polyformer_b_pretrain.pt
# restore_file=../pretrain/polyformer_b_pretrain_aihub_indoor_checkpoints/20_5e-5_512/checkpoint_20_1000.pt
restore_file=../pretrain/polyformer_b_pretrain_aihub_indoor_80_checkpoints_resume_2/20_5e-5_512/checkpoint.best_score_0.5220.pt

# restore_file=../pretrain/polyformer_b_pretrain_aihub_indoor_80_checkpoints_resume_2/20_5e-5_512/checkpoint.best_score_0.5220.pt
restore_file=../finetune/polyformer_b_aihub_indoor_80_checkpoints/100_5e-5_512/checkpoint_epoch_73.pt


task=refcoco
Expand Down
Binary file modified tasks/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file modified tasks/__pycache__/base_task.cpython-37.pyc
Binary file not shown.
Binary file modified tasks/__pycache__/refcoco.cpython-37.pyc
Binary file not shown.
Binary file modified tasks/__pycache__/refcoco_pretrain.cpython-37.pyc
Binary file not shown.
Binary file modified utils/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file modified utils/__pycache__/checkpoint_utils.cpython-37.pyc
Binary file not shown.
Binary file modified utils/__pycache__/eval_utils.cpython-37.pyc
Binary file not shown.
Binary file modified utils/__pycache__/transforms.cpython-37.pyc
Binary file not shown.
Binary file modified utils/__pycache__/vis_utils.cpython-37.pyc
Binary file not shown.

0 comments on commit 6b1e7a0

Please sign in to comment.