diff --git a/model.py b/model.py index 52566ef..7a3a5ec 100644 --- a/model.py +++ b/model.py @@ -40,7 +40,7 @@ def forward(self, x, length=None, writer=None): _, _, look_ahead_mask = utils.get_masked_with_pad_tensor(self.max_seq, x, x, config.pad_token) decoder, w = self.Decoder(x, mask=look_ahead_mask) fc = self.fc(decoder) - return fc.contiguous() if self.training else fc.contiguous(), [weight.contiguous() for weight in w] + return fc.contiguous() if self.training else (fc.contiguous(), [weight.contiguous() for weight in w]) else: return self.generate(x, length, None).contiguous().tolist() diff --git a/train.py b/train.py index 6d7b618..1c97d37 100644 --- a/train.py +++ b/train.py @@ -135,7 +135,8 @@ # switch output device to: gpu-1 ~ gpu-n sw_start = time.time() - mt.output_device = idx % (torch.cuda.device_count() -1) + 1 + if torch.cuda.device_count() > 1: + mt.output_device = idx % (torch.cuda.device_count() -1) + 1 sw_end = time.time() if config.debug: print('output switch time: {}'.format(sw_end - sw_start) )