Skip to content

Commit

Permalink
Feat: add pretrain
Browse files Browse the repository at this point in the history
  • Loading branch information
mhko1998 committed Oct 31, 2024
1 parent 682f6b5 commit 3abd65f
Show file tree
Hide file tree
Showing 16 changed files with 7,928 additions and 9 deletions.
Binary file modified data/__pycache__/refcoco_pretrain_dataset.cpython-37.pyc
Binary file not shown.
148 changes: 148 additions & 0 deletions data/create_pretraining_aihub_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
import json
import os
from tqdm import tqdm
import random
import pickle


img_path = 'refer/data/aihub_refcoco_format/indoor_80/images'
# img_path = 'refer/data/aihub_refcoco_format/indoor/images'

# load annotation files
# f = open("datasets/annotations/instances.json")
f = open("refer/data/aihub_refcoco_format/indoor_80/instances.json")
# f = open("refer/data/aihub_refcoco_format/manufact/instances.json")
print("Loading annotation file")
data = json.load(f)
f.close()

# load the validation and test image list of refcoco, refcoco+, and refcocog
# val_test_files = pickle.load(open("data/val_test_files.p", "rb"))

# create result folder
os.makedirs("datasets/pretrain", exist_ok=True)

# generate training tsv file
print(data['annotations'][10])
print(data['annotations'][1])
print(data['annotations'][2])
print(data['annotations'][3])
print(data['annotations'][4])
print(data['annotations'][5])
print(data['annotations'][6])

# print(data['images'][0])
print(len(data['images']))
print(len(data['annotations']))

ref_file = 'refer/data/aihub_refcoco_format/indoor_80/refs.p'
# ref_file = 'refer/data/aihub_refcoco_format/manufact/refs.p'
ref_ann = pickle.load(open(ref_file, 'rb'))
print(ref_ann[10])
print(ref_ann[1])
print(ref_ann[2])
print(ref_ann[3])
print(ref_ann[4])
print(ref_ann[5])
print(ref_ann[6])

print(len(ref_ann))

# exit()


tsv_filename = "datasets/pretrain/train_aihub_indoor_80.tsv"
writer = open(tsv_filename, 'w')
print("generating ", tsv_filename)

lines = []

train_idx = 0
# ref_ann_i = next((d for d in ref_ann if d["ref_id"] == str(i)), None)
# ref_ann_i = ref_ann[i]
# for i, ann_i in enumerate(tqdm(data['annotations'])):
for i, ref_ann_i in enumerate(tqdm(ref_ann)):
ann_i = data['annotations'][int(ref_ann_i["ref_id"])]
image_id = ann_i['image_id']
bbox = ann_i['bbox']


if ref_ann_i['split'] == 'train':
# print("train!!")
pass
else:
# print("It's validation or test data")
continue

expressions = ref_ann_i['sentences'][0]['raw']
# print(expressions)
# print(expressions[0])


img_dict_i = next((d for d in data['images'] if d["id"] == image_id), None)
height, width = img_dict_i['height'], img_dict_i['width']

x, y, w, h = bbox
box_string = f'{x},{y},{x + w},{y + h}'

img_name = img_dict_i['file_name']
filepath = os.path.join(img_path, img_name)

line = '\t'.join([str(train_idx), expressions.replace('\n', ''), box_string, filepath]) + '\n'
lines.append(line)
train_idx += 1

# shuffle the training set
random.shuffle(lines)

# write training tsv file
writer.writelines(lines)
writer.close()

#####################################
# generate validation tsv files
tsv_filename = f"datasets/pretrain/val_aihub_indoor_80.tsv"
writer = open(tsv_filename, 'w')
print("generating ", tsv_filename)

lines = []

val_idx = 0
# for i, ann_i in enumerate(tqdm(data['annotations'])):
for i, ref_ann_i in enumerate(tqdm(ref_ann)):
ann_i = data['annotations'][int(ref_ann_i["ref_id"])]
image_id = ann_i['image_id']
bbox = ann_i['bbox']

# ref_ann_i = next((d for d in ref_ann if d["ref_id"] == str(i)), None)
# ref_ann_i = ref_ann[i]
if ref_ann_i['split'] == 'val':
# print("val!!")
pass
else:
# print("It's train or test data")
continue

expressions = ref_ann_i['sentences'][0]['raw']
# print(expressions)
# print(expressions[0])

img_dict_i = next((d for d in data['images'] if d["id"] == image_id), None)
height, width = img_dict_i['height'], img_dict_i['width']

x, y, w, h = bbox
box_string = f'{x},{y},{x + w},{y + h}'

img_name = img_dict_i['file_name']
filepath = os.path.join(img_path, img_name)

line = '\t'.join([str(val_idx), expressions.replace('\n', ''), box_string, filepath]) + '\n'
lines.append(line)
val_idx += 1

# write tsv file
writer.writelines(lines)
writer.close()

print("train_idx", train_idx)
print('val_idx', val_idx)
15 changes: 12 additions & 3 deletions data/create_pretraining_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
# set up image paths
imgsfile = dict(
coco='mscoco/train2014',
vg='visual-genome',
saiaprtc12='saiaprtc12',
flickr='flickr30k'
# vg='visual-genome',
# saiaprtc12='saiaprtc12',
# flickr='flickr30k'
)

# load annotation files
Expand All @@ -24,6 +24,12 @@
# create result folder
os.makedirs("datasets/pretrain", exist_ok=True)

print(data.keys())
print(data['train'][0])

print(data['train'][0].keys())


# generate training tsv file
train_instances = data['train']
tsv_filename = "datasets/pretrain/train_shuffled.tsv"
Expand All @@ -33,9 +39,12 @@
lines = []
for i, data_i in enumerate(tqdm(train_instances)):
data_source = data_i['data_source']
if data_source is not "coco":
continue
image_id = data_i['image_id']
bbox = data_i['bbox']
expressions = data_i['expressions']
print(expressions)
height, width = data_i['height'], data_i['width']
x, y, w, h = bbox
box_string = f'{x},{y},{x + w},{y + h}'
Expand Down
9 changes: 7 additions & 2 deletions data/refcoco_pretrain_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
from data import data_utils
from data.base_dataset import BaseDataset
from bert.tokenization_bert import BertTokenizer
from transformers import AutoTokenizer, AutoModelForMaskedLM


ImageFile.LOAD_TRUNCATED_IMAGES = True
ImageFile.MAX_IMAGE_PIXELS = None
Expand All @@ -50,7 +52,8 @@ def __init__(
imagenet_default_mean_and_std=False,
num_bins=1000,
max_image_size=512,
image_path="../../datasets/images"
image_path="../../"
# image_path="../../datasets/images"
):
super().__init__(split, dataset, bpe, src_dict, tgt_dict)
self.max_src_length = max_src_length
Expand All @@ -72,7 +75,9 @@ def __init__(
T.ToTensor(),
T.Normalize(mean=mean, std=std, max_image_size=max_image_size)
])
self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
self.tokenizer = AutoTokenizer.from_pretrained('xlm-roberta-base')


def __getitem__(self, index):
uniq_id, img_file, text, region_coord = self.dataset[index]
Expand Down
39 changes: 36 additions & 3 deletions refer/refer.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,20 +251,49 @@ def getRefBox(self, ref_id):
ann = self.refToAnn[ref_id]
return ann['bbox'] # [x, y, w, h]

def polygonFromMask(self, maskedArr):
def polygonFromMask(self, maskedArr, epsilon_factor=0.01):
# adapted from https://github.com/hazirbas/coco-json-converter/blob/master/generate_coco_json.py
contours, _ = cv2.findContours(maskedArr, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

segmentation = []
valid_poly = 0
for contour in contours:
# Valid polygons have >= 6 coordinates (3 points)
# Calculate epsilon based on the contour's perimeter
epsilon = epsilon_factor * cv2.arcLength(contour, True)
# Approximate the contour to reduce the number of points
approx = cv2.approxPolyDP(contour, epsilon, True)
# Valid polygons have >= 6 coordinates (3 points)
if contour.size >= 6:
segmentation.append(contour.astype(float).flatten().tolist())
segmentation.append(approx.astype(float).flatten().tolist())
valid_poly += 1
if valid_poly == 0:
raise ValueError
return segmentation

def visualizePolygon(self, maskedArr, polygon):
# Create a color image to draw the polygons on
if len(maskedArr.shape) == 2:
# Convert grayscale mask to BGR color image
display_img = cv2.cvtColor(maskedArr, cv2.COLOR_GRAY2BGR)
else:
display_img = maskedArr.copy()

# Iterate over the polygons and draw them
# for polygon in polygons:
# Check if polygon has enough points
# if len(polygon) < 3:
# print("Polygon does not have enough points to form a valid shape.")
# continue # Skip this polygon
# Convert the flattened list back to Nx1x2 array of points
points = np.array(polygon, np.int32).reshape((-1, 1, 2))
# Draw the polygon on the image
cv2.polylines(display_img, [points], isClosed=True, color=(0, 255, 0), thickness=2)

# Display the image
# cv2.imwrite('Polygons.png', display_img)
# cv2.waitKey(0)
# cv2.destroyAllWindows()

def getPolygon(self, ref):
# return mask, area and mask-center
ann = self.refToAnn[ref['ref_id']]
Expand All @@ -286,14 +315,18 @@ def getPolygon(self, ref):

if self.dataset in ['aihub_indoor', 'aihub_manufact']:
# seg = ann['segmentation']

seg = self.polygonFromMask(m)[0]
self.visualizePolygon(m, seg)
# print(np.array(seg).shape)
# print(seg)
# print(len(seg))
# poly = np.array(seg).reshape((len(seg)//2, 2))
# print(poly)
# print("----------------------")
# polygons.append(Polygon(poly, True, alpha=0.4))
# print(np.array(seg).shape)

polygons.append(seg)
else:
for seg in ann['segmentation']:
Expand Down
Loading

0 comments on commit 3abd65f

Please sign in to comment.