allow yolo3 to detect arbitrary resolution (#362)

dmlc · Oct 5, 2018 · 3b55554 · 3b55554
1 parent 4380e5e
commit 3b55554
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 6 deletions.
diff --git a/gluoncv/data/transforms/presets/yolo.py b/gluoncv/data/transforms/presets/yolo.py
@@ -10,7 +10,7 @@
 
 __all__ = ['transform_test', 'load_test', 'YOLO3DefaultTrainTransform', 'YOLO3DefaultValTransform']
 
-def transform_test(imgs, short=416, max_size=1024, stride=32, mean=(0.485, 0.456, 0.406),
+def transform_test(imgs, short=416, max_size=1024, stride=1, mean=(0.485, 0.456, 0.406),
                    std=(0.229, 0.224, 0.225)):
     """A util function to transform all images to tensors as network input by applying
     normalizations. This function support 1 NDArray or iterable of NDArrays.
@@ -25,7 +25,7 @@ def transform_test(imgs, short=416, max_size=1024, stride=32, mean=(0.485, 0.456
         Maximum longer side length to fit image.
         This is to limit the input image shape. Aspect ratio is intact because we
         support arbitrary input size in our YOLO implementation.
-    stride : int, optinal, default is 32
+    stride : int, optinal, default is 1
         The stride constraint due to precised alignment of bounding box prediction module.
         Image's width and height must be multiples of `stride`. Use `stride = 1` to
         relax this constraint.
@@ -61,7 +61,7 @@ def transform_test(imgs, short=416, max_size=1024, stride=32, mean=(0.485, 0.456
         return tensors[0], origs[0]
     return tensors, origs
 
-def load_test(filenames, short=416, max_size=1024, stride=32, mean=(0.485, 0.456, 0.406),
+def load_test(filenames, short=416, max_size=1024, stride=1, mean=(0.485, 0.456, 0.406),
               std=(0.229, 0.224, 0.225)):
     """A util function to load all images, transform them to tensor by applying
     normalizations. This function support 1 filename or list of filenames.
@@ -76,7 +76,7 @@ def load_test(filenames, short=416, max_size=1024, stride=32, mean=(0.485, 0.456
         Maximum longer side length to fit image.
         This is to limit the input image shape. Aspect ratio is intact because we
         support arbitrary input size in our YOLO implementation.
-    stride : int, optinal, default is 32
+    stride : int, optinal, default is 1
         The stride constraint due to precised alignment of bounding box prediction module.
         Image's width and height must be multiples of `stride`. Use `stride = 1` to
         relax this constraint.

diff --git a/gluoncv/model_zoo/yolo/yolo3.py b/gluoncv/model_zoo/yolo/yolo3.py
@@ -349,7 +349,8 @@ def hybrid_forward(self, F, x, *args):
             x = self.transitions[i](x)
             # upsample feature map reverse to shallow layers
             upsample = _upsample(x, stride=2)
-            x = F.concat(upsample, routes[::-1][i + 1], dim=1)
+            route_now = routes[::-1][i + 1]
+            x = F.concat(F.slice_like(upsample, route_now, axes=(2, 3)), route_now, dim=1)
 
         if autograd.is_training():
             # during training, the network behaves differently since we don't need detection results
@@ -430,7 +431,7 @@ def get_yolov3(name, stages, filters, anchors, strides, classes,
     ----------
     name : str or None
         Model name, if `None` is used, you must specify `features` to be a `HybridBlock`.
-    features : iterable of str or `HybridBlock`
+    stages : iterable of str or `HybridBlock`
         List of network internal output names, in order to specify which layers are
         used for predicting bbox values.
         If `name` is `None`, `features` must be a `HybridBlock` which generate mutliple