diff --git a/dataloader.py b/dataloader.py index fd76ec77..79f7fbac 100644 --- a/dataloader.py +++ b/dataloader.py @@ -164,6 +164,9 @@ def get_batch(self, split, batch_size=None, seq_per_img=None): data['att_masks'] = np.zeros(data['att_feats'].shape[:2], dtype='float32') for i in range(len(att_batch)): data['att_masks'][i*seq_per_img:(i+1)*seq_per_img, :att_batch[i].shape[0]] = 1 + # set att_masks to None if attention features have same length + if data['att_masks'].sum() == data['att_masks'].size: + data['att_masks'] = None data['labels'] = np.vstack(label_batch) # generate mask @@ -187,6 +190,8 @@ def __getitem__(self, index): ix = index #self.split_ix[index] if self.use_att: att_feat = np.load(os.path.join(self.input_att_dir, str(self.info['images'][ix]['id']) + '.npz'))['feat'] + # Reshape to K x C + att_feat = att_feat.reshape(-1, att_feat.shape[-1]) if self.norm_att_feat: att_feat = att_feat / np.linalg.norm(att_feat, 2, 1, keepdims=True) if self.use_box: diff --git a/train.py b/train.py index c3dc2e5d..8b0335b8 100644 --- a/train.py +++ b/train.py @@ -117,7 +117,7 @@ def train(opt): start = time.time() tmp = [data['fc_feats'], data['att_feats'], data['labels'], data['masks'], data['att_masks']] - tmp = [torch.from_numpy(_).cuda() for _ in tmp] + tmp = [_ if _ is None else torch.from_numpy(_).cuda() for _ in tmp] fc_feats, att_feats, labels, masks, att_masks = tmp optimizer.zero_grad()