Skip to content

Commit

Permalink
Feat: make aihub working
Browse files Browse the repository at this point in the history
  • Loading branch information
mhko1998 committed Oct 27, 2024
1 parent 69fc728 commit 682f6b5
Show file tree
Hide file tree
Showing 52 changed files with 609 additions and 14 deletions.
14 changes: 14 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
datasets/*
refer/data/*
pretrained_weights/*
run_scripts/finetune/polyformer_b_aihub_indoor_checkpoints
run_scripts/finetune/polyformer_b_aihub_indoor_logs
run_scripts/finetune/polyformer_b_aihub_manufact_checkpoints
run_scripts/finetune/polyformer_b_aihub_manufact_logs
run_scripts/finetune/polyformer_l_checkpoints
run_scripts/finetune/polyformer_l_logs
results_polyformer_b
weights

*.pt
*.png
Binary file added __pycache__/demo.cpython-37.pyc
Binary file not shown.
Binary file added __pycache__/trainer.cpython-37.pyc
Binary file not shown.
Binary file added bert/__pycache__/activations.cpython-37.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added bert/__pycache__/file_utils.cpython-37.pyc
Binary file not shown.
Binary file added bert/__pycache__/generation_utils.cpython-37.pyc
Binary file not shown.
Binary file added bert/__pycache__/modeling_bert.cpython-37.pyc
Binary file not shown.
Binary file added bert/__pycache__/modeling_utils.cpython-37.pyc
Binary file not shown.
Binary file added bert/__pycache__/tokenization_bert.cpython-37.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added criterions/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file not shown.
Binary file added data/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file added data/__pycache__/base_dataset.cpython-37.pyc
Binary file not shown.
Binary file added data/__pycache__/data_utils.cpython-37.pyc
Binary file not shown.
Binary file added data/__pycache__/file_dataset.cpython-37.pyc
Binary file not shown.
Binary file added data/__pycache__/poly_utils.cpython-37.pyc
Binary file not shown.
Binary file added data/__pycache__/refcoco_dataset.cpython-37.pyc
Binary file not shown.
Binary file not shown.
140 changes: 140 additions & 0 deletions data/create_aihub_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import sys
sys.path.append("./")
from refer.refer import REFER
import numpy as np
from PIL import Image
import random
import os
from tqdm import tqdm

import pickle
from poly_utils import is_clockwise, revert_direction, check_length, reorder_points, \
approximate_polygons, interpolate_polygons, image_to_base64, polygons_to_string


max_length = 400

data_root = './refer/data'
# datasets = ['refcoco', 'refcoco+', 'refcocog']
datasets = ['aihub_indoor']
# datasets = ['aihub_manufact']

if datasets[0] == 'aihub_indoor':
image_dir = './refer/data/aihub_refcoco_format/indoor/images'
elif datasets[0] == 'aihub_manufact':
image_dir = './refer/data/aihub_refcoco_format/manufact/images'
else:
image_dir = './datasets/images/mscoco/train2014'
val_test_files = pickle.load(open("data/val_test_files.p", "rb"))



combined_train_data = []

for dataset in datasets:
if dataset == 'refcoco':
splits = ['train', 'val', 'testA', 'testB']
splitBy = 'unc'
elif dataset == 'refcoco+':
splits = ['train', 'val', 'testA', 'testB']
splitBy = 'unc'
elif dataset == 'refcocog':
splits = ['train', 'val']
splitBy = 'umd'
elif dataset == 'aihub_indoor':
splits = ['train', 'val', 'test']
splitBy = None
elif dataset == 'aihub_manufact':
splits = ['train', 'val', 'test']
splitBy = None

save_dir = f'datasets/finetune/{dataset}'
os.makedirs(save_dir, exist_ok=True)
for split in splits:
num_pts = []
max_num_pts = 0
file_name = os.path.join(save_dir, f"{dataset}_{split}.tsv")
print("creating ", file_name)

uniq_ids = []
image_ids = []
sents = []
coeffs_strings = []
img_strings = []

writer = open(file_name, 'w')
refer = REFER(data_root, dataset, splitBy)

ref_ids = refer.getRefIds(split=split)

for this_ref_id in tqdm(ref_ids):
this_img_id = refer.getImgIds(this_ref_id)
this_img = refer.Imgs[this_img_id[0]]
fn = this_img['file_name']
img_id = fn.split(".")[0].split("_")[-1]

# load image
img = Image.open(os.path.join(image_dir, this_img['file_name'])).convert("RGB")

# convert image to string
img_base64 = image_to_base64(img, format='jpeg')

# load mask
ref = refer.loadRefs(this_ref_id)
ref_mask = np.array(refer.getMask(ref[0])['mask'])
annot = np.zeros(ref_mask.shape)
annot[ref_mask == 1] = 1 # 255
annot_img = Image.fromarray(annot.astype(np.uint8), mode="P")
annot_base64 = image_to_base64(annot_img, format='png')

polygons = refer.getPolygon(ref[0])['polygon']

polygons_processed = []
for polygon in polygons:
# make the polygon clockwise
if not is_clockwise(polygon):
polygon = revert_direction(polygon)

# reorder the polygon so that the first vertex is the one closest to image origin
polygon = reorder_points(polygon)
polygons_processed.append(polygon)

polygons = sorted(polygons_processed, key=lambda x: (x[0] ** 2 + x[1] ** 2, x[0], x[1]))
polygons_interpolated = interpolate_polygons(polygons)

polygons = approximate_polygons(polygons, 5, max_length)

pts_string = polygons_to_string(polygons)
pts_string_interpolated = polygons_to_string(polygons_interpolated)

# load box
box = refer.getRefBox(this_ref_id) # x,y,w,h
x, y, w, h = box
box_string = f'{x},{y},{x + w},{y + h}'

max_num_pts = max(max_num_pts, check_length(polygons))

num_pts.append(check_length(polygons))
# load text
ref_sent = refer.Refs[this_ref_id]
for i, (sent, sent_id) in enumerate(zip(ref_sent['sentences'], ref_sent['sent_ids'])):
uniq_id = f"{this_ref_id}_{i}"
instance = '\t'.join(
[uniq_id, str(this_img_id[0]), sent['sent'], box_string, pts_string, img_base64, annot_base64,
pts_string_interpolated]) + '\n'
writer.write(instance)

if img_id not in val_test_files and split == 'train': # filtered out val/test files
combined_train_data.append(instance)
writer.close()

# random.shuffle(combined_train_data)
# file_name = os.path.join("datasets/finetune/refcoco+g_train_shuffled.tsv")
# print("creating ", file_name)
# writer = open(file_name, 'w')
# writer.writelines(combined_train_data)
# writer.close()




140 changes: 140 additions & 0 deletions data/create_aihub_manufact_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import sys
sys.path.append("./")
from refer.refer import REFER
import numpy as np
from PIL import Image
import random
import os
from tqdm import tqdm

import pickle
from poly_utils import is_clockwise, revert_direction, check_length, reorder_points, \
approximate_polygons, interpolate_polygons, image_to_base64, polygons_to_string


max_length = 400

data_root = './refer/data'
# datasets = ['refcoco', 'refcoco+', 'refcocog']
# datasets = ['aihub_indoor']
datasets = ['aihub_manufact']

if datasets[0] == 'aihub_indoor':
image_dir = './refer/data/aihub_refcoco_format/indoor/images'
elif datasets[0] == 'aihub_manufact':
image_dir = './refer/data/aihub_refcoco_format/manufact/images'
else:
image_dir = './datasets/images/mscoco/train2014'
val_test_files = pickle.load(open("data/val_test_files.p", "rb"))



combined_train_data = []

for dataset in datasets:
if dataset == 'refcoco':
splits = ['train', 'val', 'testA', 'testB']
splitBy = 'unc'
elif dataset == 'refcoco+':
splits = ['train', 'val', 'testA', 'testB']
splitBy = 'unc'
elif dataset == 'refcocog':
splits = ['train', 'val']
splitBy = 'umd'
elif dataset == 'aihub_indoor':
splits = ['train', 'val', 'test']
splitBy = None
elif dataset == 'aihub_manufact':
splits = ['train', 'val', 'test']
splitBy = None

save_dir = f'datasets/finetune/{dataset}'
os.makedirs(save_dir, exist_ok=True)
for split in splits:
num_pts = []
max_num_pts = 0
file_name = os.path.join(save_dir, f"{dataset}_{split}.tsv")
print("creating ", file_name)

uniq_ids = []
image_ids = []
sents = []
coeffs_strings = []
img_strings = []

writer = open(file_name, 'w')
refer = REFER(data_root, dataset, splitBy)

ref_ids = refer.getRefIds(split=split)

for this_ref_id in tqdm(ref_ids):
this_img_id = refer.getImgIds(this_ref_id)
this_img = refer.Imgs[this_img_id[0]]
fn = this_img['file_name']
img_id = fn.split(".")[0].split("_")[-1]

# load image
img = Image.open(os.path.join(image_dir, this_img['file_name'])).convert("RGB")

# convert image to string
img_base64 = image_to_base64(img, format='jpeg')

# load mask
ref = refer.loadRefs(this_ref_id)
ref_mask = np.array(refer.getMask(ref[0])['mask'])
annot = np.zeros(ref_mask.shape)
annot[ref_mask == 1] = 1 # 255
annot_img = Image.fromarray(annot.astype(np.uint8), mode="P")
annot_base64 = image_to_base64(annot_img, format='png')

polygons = refer.getPolygon(ref[0])['polygon']

polygons_processed = []
for polygon in polygons:
# make the polygon clockwise
if not is_clockwise(polygon):
polygon = revert_direction(polygon)

# reorder the polygon so that the first vertex is the one closest to image origin
polygon = reorder_points(polygon)
polygons_processed.append(polygon)

polygons = sorted(polygons_processed, key=lambda x: (x[0] ** 2 + x[1] ** 2, x[0], x[1]))
polygons_interpolated = interpolate_polygons(polygons)

polygons = approximate_polygons(polygons, 5, max_length)

pts_string = polygons_to_string(polygons)
pts_string_interpolated = polygons_to_string(polygons_interpolated)

# load box
box = refer.getRefBox(this_ref_id) # x,y,w,h
x, y, w, h = box
box_string = f'{x},{y},{x + w},{y + h}'

max_num_pts = max(max_num_pts, check_length(polygons))

num_pts.append(check_length(polygons))
# load text
ref_sent = refer.Refs[this_ref_id]
for i, (sent, sent_id) in enumerate(zip(ref_sent['sentences'], ref_sent['sent_ids'])):
uniq_id = f"{this_ref_id}_{i}"
instance = '\t'.join(
[uniq_id, str(this_img_id[0]), sent['sent'], box_string, pts_string, img_base64, annot_base64,
pts_string_interpolated]) + '\n'
writer.write(instance)

if img_id not in val_test_files and split == 'train': # filtered out val/test files
combined_train_data.append(instance)
writer.close()

# random.shuffle(combined_train_data)
# file_name = os.path.join("datasets/finetune/refcoco+g_train_shuffled.tsv")
# print("creating ", file_name)
# writer = open(file_name, 'w')
# writer.writelines(combined_train_data)
# writer.close()




2 changes: 2 additions & 0 deletions data/create_finetuning_data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import sys
sys.path.append("./")
from refer.refer import REFER
import numpy as np
from PIL import Image
Expand Down
5 changes: 4 additions & 1 deletion data/refcoco_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
from bert.tokenization_bert import BertTokenizer
from data.poly_utils import string_to_polygons, downsample_polygons, polygons_to_string, points_to_token_string
import cv2
from transformers import AutoTokenizer, AutoModelForMaskedLM


ImageFile.LOAD_TRUNCATED_IMAGES = True
ImageFile.MAX_IMAGE_PIXELS = None
Expand Down Expand Up @@ -71,7 +73,8 @@ def __init__(
T.ToTensor(),
T.Normalize(mean=mean, std=std, max_image_size=max_image_size)
])
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
self.tokenizer = AutoTokenizer.from_pretrained('xlm-roberta-base')

def __getitem__(self, index):
data = self.dataset[index]
Expand Down
Binary file added models/__pycache__/__init__.cpython-37.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
7 changes: 5 additions & 2 deletions models/polyformer/unify_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from .unify_transformer_layer import TransformerEncoderLayer, TransformerDecoderLayer
from .swin import SwinTransformer
from bert.modeling_bert import BertModel
from transformers import AutoTokenizer, AutoModelForMaskedLM



Expand Down Expand Up @@ -529,7 +530,8 @@ def __init__(self, args, dictionary, embed_tokens):
self.register_buffer("token_rp_bucket", token_rp_bucket)
self.register_buffer("image_rp_bucket", image_rp_bucket)
self.entangle_position_embedding = args.entangle_position_embedding
self.bert = BertModel.from_pretrained("bert-base-uncased")
# self.bert = BertModel.from_pretrained("bert-base-uncased")
self.bert = AutoModelForMaskedLM.from_pretrained("xlm-roberta-base")

def train(self, mode=True):
super(TransformerEncoder, self).train(mode)
Expand Down Expand Up @@ -613,7 +615,8 @@ def forward_embedding(
):
# embed tokens and positions
if token_embedding is None:
token_embedding = self.bert(src_tokens, attention_mask=att_masks)[0]
# token_embedding = self.bert(src_tokens, attention_mask=att_masks)[0]
token_embedding = self.bert(src_tokens, attention_mask=att_masks, output_hidden_states=True,).hidden_states[0]

x = embed = token_embedding
if self.entangle_position_embedding and pos_embed is not None:
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion refer/external/_mask.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def frPoly( poly, siz h, siz w ):
Rs = RLEs(n)
for i, p in enumerate(poly):
np_poly = np.array(p, dtype=np.double, order='F')
rleFrPoly( <RLE*>&Rs._R[i], <const double*> np_poly.data, len(np_poly)/2, h, w )
rleFrPoly( <RLE*>&Rs._R[i], <const double*> np_poly.data, len(np_poly)//2, h, w )
objs = _toString(Rs)
return objs

Expand Down
Loading

0 comments on commit 682f6b5

Please sign in to comment.