Merge pull request #11 from adamoudad/master

Fix single GPU training and model output
jason9693 · Dec 3, 2020 · fe29667 · fe29667
2 parents 5f18337 + 68911c7
commit fe29667
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 2 deletions.
diff --git a/model.py b/model.py
@@ -40,7 +40,7 @@ def forward(self, x, length=None, writer=None):
             _, _, look_ahead_mask = utils.get_masked_with_pad_tensor(self.max_seq, x, x, config.pad_token)
             decoder, w = self.Decoder(x, mask=look_ahead_mask)
             fc = self.fc(decoder)
-            return fc.contiguous() if self.training else fc.contiguous(), [weight.contiguous() for weight in w]
+            return fc.contiguous() if self.training else (fc.contiguous(), [weight.contiguous() for weight in w])
         else:
             return self.generate(x, length, None).contiguous().tolist()
 

diff --git a/train.py b/train.py
@@ -135,7 +135,8 @@
 
         # switch output device to: gpu-1 ~ gpu-n
         sw_start = time.time()
-        mt.output_device = idx % (torch.cuda.device_count() -1) + 1
+        if torch.cuda.device_count() > 1:
+            mt.output_device = idx % (torch.cuda.device_count() -1) + 1
         sw_end = time.time()
         if config.debug:
             print('output switch time: {}'.format(sw_end - sw_start) )