+cmake_minimum_required(VERSION 2.8.3)
+## Compile as C++11, supported in ROS Kinetic and newer
+# add_compile_options(-std=c++11)
+## Find catkin macros and libraries
+## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
+## is used, also find other catkin packages
+find_package(catkin REQUIRED COMPONENTS
+ pcl_ros
+ roscpp
+ sensor_msgs
+ std_msgs
+## System dependencies are found with CMake's conventions
+# find_package(Boost REQUIRED COMPONENTS system)
+## Uncomment this if the package has a setup.py. This macro ensures
+## modules and global scripts declared therein get installed
+## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html
+# catkin_python_setup()
+## Declare ROS messages, services and actions ##
+## To declare and build messages, services or actions from within this
+## package, follow these steps:
+## * Let MSG_DEP_SET be the set of packages whose message types you use in
+## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).
+## * In the file package.xml:
+## * add a build_depend tag for "message_generation"
+## * add a build_depend and a run_depend tag for each package in MSG_DEP_SET
+## * If MSG_DEP_SET isn't empty the following dependency has been pulled in
+## but can be declared for certainty nonetheless:
+## * add a run_depend tag for "message_runtime"
+## * In this file (CMakeLists.txt):
+## * add "message_generation" and every package in MSG_DEP_SET to
+## find_package(catkin REQUIRED COMPONENTS ...)
+## * add "message_runtime" and every package in MSG_DEP_SET to
+## catkin_package(CATKIN_DEPENDS ...)
+## * uncomment the add_*_files sections below as needed
+## and list every .msg/.srv/.action file to be processed
+## * uncomment the generate_messages entry below
+## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)
+## Generate messages in the 'msg' folder
+# add_message_files(
+# Message1.msg
+# Message2.msg
+# )
+## Generate services in the 'srv' folder
+# add_service_files(
+# Service1.srv
+# Service2.srv
+# )
+## Generate actions in the 'action' folder
+# add_action_files(
+# Action1.action
+# Action2.action
+# )
+## Generate added messages and services with any dependencies listed here
+# generate_messages(
+# sensor_msgs# std_msgs
+# )
+## Declare ROS dynamic reconfigure parameters ##
+## To declare and build dynamic reconfigure parameters within this
+## package, follow these steps:
+## * In the file package.xml:
+## * add a build_depend and a run_depend tag for "dynamic_reconfigure"
+## * In this file (CMakeLists.txt):
+## * add "dynamic_reconfigure" to
+## find_package(catkin REQUIRED COMPONENTS ...)
+## * uncomment the "generate_dynamic_reconfigure_options" section below
+## and list every .cfg file to be processed
+## Generate dynamic reconfigure parameters in the 'cfg' folder
+# generate_dynamic_reconfigure_options(
+# cfg/DynReconf1.cfg
+# cfg/DynReconf2.cfg
+# )
+## catkin specific configuration ##
+## The catkin_package macro generates cmake config files for your package
+## Declare things to be passed to dependent projects
+## INCLUDE_DIRS: uncomment this if your package contains header files
+## LIBRARIES: libraries you create in this project that dependent projects also need
+## CATKIN_DEPENDS: catkin_packages dependent projects also need
+## DEPENDS: system dependencies of this project that dependent projects also need
+# INCLUDE_DIRS include
+# LIBRARIES kitti_velodyne_to_ros
+# CATKIN_DEPENDS pcl_ros roscpp sensor_msgs std_msgs
+# DEPENDS system_lib
+## Build ##
+## Specify additional locations of header files
+## Your package locations should be listed before other locations
+# include
+ ${catkin_INCLUDE_DIRS}
+## Declare a C++ library
+# add_library(${PROJECT_NAME}
+# src/${PROJECT_NAME}/kitti_velodyne_to_ros.cpp
+# )
+## Add cmake target dependencies of the library
+## as an example, code may need to be generated before libraries
+## either from message generation or dynamic reconfigure
+## Declare a C++ executable
+## With catkin_make all packages are built within a single CMake context
+## The recommended prefix ensures that target names across packages don't collide
+add_executable(${PROJECT_NAME}_node src/kitti_velodyne_to_ros_node.cpp)
+## Rename C++ executable without prefix
+## The above recommended prefix causes long target names, the following renames the
+## target back to the shorter version for ease of user use
+## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node"
+# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "")
+## Add cmake target dependencies of the executable
+## same as for the library above
+## Specify libraries to link a library or executable target against
+ ${catkin_LIBRARIES}
+## Install ##
+# all install targets should use catkin DESTINATION variables
+# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html
+## Mark executable scripts (Python etc.) for installation
+## in contrast to setup.py, you can choose the destination
+# install(PROGRAMS
+# scripts/my_python_script
+# )
+## Mark executables and/or libraries for installation
+# )
+## Mark cpp header files for installation
+# install(DIRECTORY include/${PROJECT_NAME}/
+# )
+## Mark other files for installation (e.g. launch and bag files, etc.)
+# install(FILES
+# # myfile1
+# # myfile2
+# )
+## Testing ##
+## Add gtest based cpp test target and link libraries
+# catkin_add_gtest(${PROJECT_NAME}-test test/test_kitti_velodyne_to_ros.cpp)
+# if(TARGET ${PROJECT_NAME}-test)
+# target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})
+# endif()
+## Add folders to be run by python nosetests
+# catkin_add_nosetests(test)
+### .npy file visualizer for ROS
+- basically It has two commands
+- one is for `.bin` file from KITTI velodyne datasets, another is for `.npy` file from SqueezeSeg datasets
+ - commands are introduced below
+### Instrunction
+1. add codes in your `eval.py` like below 117-122 lines
+ 1.1 then you can save `pred_~~~.npy` files after evaluation. It'd be prediction data
+2. run `eval.sh` and save `pred_~~~.npy` files into specific path
+3. move this `kitty_velodyne_to_ros` folder to your `catkin_ws` folder and run `catkin_make`
+4. modify `.launch` file to your data paths.
+5. run commands below
+### Available commands
+(for `.bin` file from KITTI datasets)
+1. `rosrun kitti_velodyne_to_ros kitti_velodyne_to_ros_node`
+(for `.npy` file from SqueezeSeg) (after modifying path in .launch file)
+2. `roslaunch kitti_velodyne_to_ros npy_velodyne_to_ros.launch`
+ kitti_velodyne_to_ros
+ 0.0.0
+ The kitti_velodyne_to_ros package
+ dyros-vehicle
+ catkin
+ pcl_ros
+ roscpp
+ sensor_msgs
+ std_msgs
+ pcl_ros
+ roscpp
+ sensor_msgs
+ std_msgs
+ pcl_ros
+ roscpp
+ sensor_msgs
+ std_msgs
+ - Class: rviz/Displays
+ Help Height: 78
+ Name: Displays
+ Property Tree Widget:
+ Expanded:
+ - /Global Options1
+ Splitter Ratio: 0.63688761
+ Tree Height: 775
+ - Class: rviz/Selection
+ Name: Selection
+ - Class: rviz/Tool Properties
+ Expanded:
+ - /2D Pose Estimate1
+ - /2D Nav Goal1
+ - /Publish Point1
+ Name: Tool Properties
+ Splitter Ratio: 0.588679016
+ - Class: rviz/Views
+ Expanded:
+ - /Current View1
+ Name: Views
+ Splitter Ratio: 0.5
+ - Class: rviz/Time
+ Experimental: false
+ Name: Time
+ SyncMode: 0
+ SyncSource: velodyne_points_npy
+Visualization Manager:
+ Class: ""
+ Displays:
+ - Alpha: 0.5
+ Cell Size: 1
+ Class: rviz/Grid
+ Color: 160; 160; 164
+ Enabled: false
+ Line Style:
+ Line Width: 0.0299999993
+ Value: Lines
+ Name: Grid
+ Normal Cell Count: 0
+ Offset:
+ X: 0
+ Y: 0
+ Z: 0
+ Plane: XY
+ Plane Cell Count: 100
+ Reference Frame:
+ Value: false
+ - Alpha: 1
+ Autocompute Intensity Bounds: true
+ Autocompute Value Bounds:
+ Max Value: 10
+ Min Value: -10
+ Value: true
+ Axis: Z
+ Channel Name: intensity
+ Class: rviz/PointCloud2
+ Color: 255; 255; 255
+ Color Transformer: Intensity
+ Decay Time: 0
+ Enabled: true
+ Invert Rainbow: false
+ Max Color: 255; 255; 255
+ Max Intensity: 2
+ Min Color: 0; 0; 0
+ Min Intensity: 0
+ Name: velodyne_points_npy
+ Position Transformer: XYZ
+ Queue Size: 10
+ Selectable: true
+ Size (Pixels): 3
+ Size (m): 0.0599999987
+ Style: Flat Squares
+ Topic: /velodyne_points_npy
+ Unreliable: false
+ Use Fixed Frame: true
+ Use rainbow: true
+ Value: true
+ Enabled: true
+ Global Options:
+ Background Color: 100; 100; 100
+ Default Light: true
+ Fixed Frame: velodyne_link
+ Frame Rate: 30
+ Name: root
+ Tools:
+ - Class: rviz/Interact
+ Hide Inactive Objects: true
+ - Class: rviz/MoveCamera
+ - Class: rviz/Select
+ - Class: rviz/FocusCamera
+ - Class: rviz/Measure
+ - Class: rviz/SetInitialPose
+ Topic: /initialpose
+ - Class: rviz/SetGoal
+ Topic: /move_base_simple/goal
+ - Class: rviz/PublishPoint
+ Single click: true
+ Topic: /clicked_point
+ - Class: rviz_plugin_selected_points_publisher/SelectedPointsPublisher
+ Value: true
+ Views:
+ Current:
+ Class: rviz/Orbit
+ Distance: 25.0534229
+ Enable Stereo Rendering:
+ Stereo Eye Separation: 0.0599999987
+ Stereo Focal Distance: 1
+ Swap Stereo Eyes: false
+ Value: false
+ Focal Point:
+ X: 6.3730402
+ Y: -4.70459843
+ Z: 6.13416672
+ Focal Shape Fixed Size: false
+ Focal Shape Size: 0.0500000007
+ Invert Z Axis: false
+ Name: Current View
+ Near Clip Distance: 0.00999999978
+ Pitch: 0.600398242
+ Target Frame:
+ Value: Orbit (rviz)
+ Yaw: 3.63040066
+ Saved: ~
+Window Geometry:
+ Displays:
+ collapsed: false
+ Height: 1056
+ Hide Left Dock: false
+ Hide Right Dock: true
+ QMainWindow State: 000000ff00000000fd00000004000000000000017200000396fc0200000010fb0000001e0054006f006f006c002000500072006f0070006500720074006900650073020000050b0000037700000185000000a3fb000000120056006900650077007300200054006f006f02000001df000002110000018500000122fb000000200054006f006f006c002000500072006f0070006500720074006900650073003203000002880000011d000002210000017afb0000000c00430061006d00650072006100000000280000017e0000000000000000fb0000001200530065006c0065006300740069006f006e0000000028000000840000006100fffffffc0000002800000396000000d700fffffffa000000010100000002fb0000000800630061006d00340000000000ffffffff0000000000000000fb000000100044006900730070006c00610079007301000000000000016a0000016a00fffffffb0000002000730065006c0065006300740069006f006e00200062007500660066006500720200000138000000aa0000023a00000294fb00000014005700690064006500530074006500720065006f02000000e6000000d2000003ee0000030bfb0000000c004b0069006e0065006300740200000186000001060000030c00000261fb0000000c00430061006d0065007200610000000292000000c50000000000000000fb0000000800630061006d003102000009a8000002ad000001370000014bfb0000000800630061006d00320200000ae6000002af0000013c00000154fb0000000c00430061006d00650072006102000008d20000022e0000006900000018fb0000000800630061006d00330200000c28000002b00000014900000156fb0000000800630061006d00340200000d78000002b00000014800000157fb000000140074006f0070005f00630061006d00650072006102000004c800000245000001c8000001d500000001000001140000030bfc0200000003fb0000001e0054006f006f006c002000500072006f00700065007200740069006500730100000041000000780000000000000000fb0000000a0056006900650077007300000000280000030b000000ad00fffffffb0000001200530065006c0065006300740069006f006e010000025a000000b200000000000000000000000200000490000000a9fc0100000001fb0000000a00560069006500770073030000004e00000080000002e100000197000000030000073f0000003efc0100000002fb0000000800540069006d006501000000000000073f0000030000fffffffb0000000800540069006d00650100000000000004500000000000000000000005c70000039600000004000000040000000800000008fc0000000100000002000000010000000a0054006f006f006c00730100000000ffffffff0000000000000000
+ Selection:
+ collapsed: false
+ Time:
+ collapsed: false
+ Tool Properties:
+ collapsed: false
+ Views:
+ collapsed: true
+ Width: 1855
+ X: 1037
+ Y: 24
+# -*- coding: utf-8 -*-
+ online segmentation using .npy & SqueezeSeg model
+ this script can
+ 1. read all .npy file from lidar_2d folder
+ 2. predict label from SqueezeSeg model using tensorflow
+ 3. publish to 'sqeeuze_seg/points' topic
+ strongly inspried from [https://github.com/Durant35/SqueezeSeg]
+ original code [https://github.com/BichenWuUCB/SqueezeSeg]
+import sys
+import os.path
+import numpy as np
+from PIL import Image
+import tensorflow as tf
+import rospy
+from sensor_msgs.msg import PointCloud2, PointField
+import sensor_msgs.point_cloud2 as pc2
+from sensor_msgs.msg import Image as ImageMsg
+from std_msgs.msg import Header
+from squeezeseg.config import *
+from squeezeseg.nets import SqueezeSeg
+from squeezeseg.utils.util import *
+from squeezeseg.utils.clock import Clock
+from squeezeseg.imdb import kitti # ed: header added
+ def __init__(self, pub_topic, FLAGS, npy_path="", npy_file_list=""):
+ os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
+ self._mc = kitti_squeezeSeg_config()
+ self._mc.LOAD_PRETRAINED_MODEL = False
+ self._mc.BATCH_SIZE = 1 # TODO(bichen): fix this hard-coded batch size.
+ self._model = SqueezeSeg(self._mc)
+ self._saver = tf.train.Saver(self._model.model_params)
+ self._session = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
+ self._saver.restore(self._session, FLAGS.checkpoint)
+ # ed: Publisher
+ self._pub = rospy.Publisher(pub_topic, PointCloud2, queue_size=1)
+ self.get_npy_from_lidar_2d(npy_path, npy_file_list)
+ self.idx = 0
+ while not rospy.is_shutdown():
+ self.prediction_publish(self.idx)
+ self.idx += 1
+ if self.idx > self.len_files:
+ self.idx = 0
+ rospy.spin()
+ # Read all .npy data from lidar_2d folder
+ def get_npy_from_lidar_2d(self, npy_path, npy_file_list):
+ self.npy_path = npy_path
+ self.npy_file_list = open(npy_file_list,'r').read().split('\n')
+ self.npy_files = []
+ for i in range(len(self.npy_file_list)):
+ self.npy_files.append(self.npy_path + self.npy_file_list[i] + '.npy')
+ self.len_files = len(self.npy_files)
+ def prediction_publish(self, idx):
+ clock = Clock()
+ record = np.load(os.path.join(self.npy_path,self.npy_files[idx]))
+ lidar = record[:,:,:5]
+ # to perform prediction
+ lidar_mask = np.reshape(
+ (lidar[:, :, 4] > 0),
+ [self._mc.ZENITH_LEVEL, self._mc.AZIMUTH_LEVEL, 1]
+ )
+ norm_lidar = (lidar - self._mc.INPUT_MEAN) / self._mc.INPUT_STD
+ pred_cls = self._session.run(
+ self._model.pred_cls,
+ feed_dict={
+ self._model.lidar_input: [norm_lidar],
+ self._model.keep_prob: 1.0,
+ self._model.lidar_mask: [lidar_mask]
+ }
+ )
+ label = pred_cls[0]
+ ## point cloud for SqueezeSeg segments
+ x = lidar[:, :, 0].reshape(-1)
+ y = lidar[:, :, 1].reshape(-1)
+ z = lidar[:, :, 2].reshape(-1)
+ i = lidar[:, :, 3].reshape(-1)
+ label = label.reshape(-1)
+ cloud = np.stack((x,y,z,i, label))
+ header = Header()
+ header.stamp = rospy.Time().now()
+ header.frame_id = "velodyne_link"
+ # point cloud segments
+ msg_segment = self.create_cloud_xyzil32(header, cloud.T)
+ # publish
+ self._pub.publish(msg_segment)
+ rospy.loginfo("Point cloud processed. Took %.6f ms.", clock.takeRealTime())
+ # create pc2_msg with 5 fields
+ def create_cloud_xyzil32(self, header, points):
+ fields = [PointField('x', 0, PointField.FLOAT32, 1),
+ PointField('y', 4, PointField.FLOAT32, 1),
+ PointField('z', 8, PointField.FLOAT32, 1),
+ PointField('intensity', 12, PointField.FLOAT32, 1),
+ PointField('label', 16, PointField.FLOAT32, 1)]
+ return pc2.create_cloud(header, fields, points)
+if __name__ == '__main__':
+ rospy.init_node('npy_tensorflow_ros_node')
+ npy_path = rospy.get_param('npy_path')
+ npy_path_pred = rospy.get_param('npy_path_pred')
+ npy_file_list = rospy.get_param('npy_file_list')
+ pub_topic = rospy.get_param('pub_topic')
+ checkpoint = rospy.get_param('checkpoint')
+ gpu = rospy.get_param('gpu')
+ FLAGS = tf.app.flags.FLAGS
+ tf.app.flags.DEFINE_string(
+ 'checkpoint', checkpoint,
+ """Path to the model paramter file.""")
+ tf.app.flags.DEFINE_string('gpu', '0', """gpu id.""")
+ npy_tensorflow_to_ros = NPY_TENSORFLOW_TO_ROS(pub_topic=pub_topic,
+ npy_path=npy_path,
+ npy_file_list=npy_file_list)
+#!/usr/bin/env python
+#-*- coding: utf-8 -*-
+import rospy
+import std_msgs.msg
+import sensor_msgs.point_cloud2 as pcl2
+from std_msgs.msg import String
+from sensor_msgs.msg import PointCloud2, PointField
+import os, os.path
+import numpy as np
+class NPY_TO_ROS():
+ def __init__(self, is_xyzi=True, npy_path='', npy_path_pred='', npy_file_list=''):
+ # ed: lidar_2d path
+ self.npy_path = npy_path
+ # ed: pred_~~~.npy path
+ self.npy_path_pred = npy_path_pred
+ self.npy_file_list = open(npy_file_list,'r').read().split('\n')
+ if is_xyzi == False:
+ self.npy_files_list_xyz = [name for name in os.listdir(self.npy_path)]
+ self.npy_files = []
+ for i in range(len(self.npy_file_list)):
+ self.npy_files.append(self.npy_path + self.npy_file_list[i] + '.npy')
+ self.npy_files_pred = []
+ for i in range(len(self.npy_file_list)):
+ self.npy_files_pred.append(self.npy_path_pred + 'pred_' + self.npy_file_list[i] + '.npy')
+ self.len_files = len(self.npy_files)
+ print("[+] There are {} .npy files".format(self.len_files))
+ self.velo_pub = rospy.Publisher('/velodyne_points_npy', PointCloud2, queue_size=1)
+ self.loop_rate = rospy.Rate(5)
+ if is_xyzi == True:
+ self.processing_xyzi()
+ else:
+ self.processing_xyz()
+ def processing_xyzi(self):
+ for i in range(self.len_files):
+ print("[+] {} th file name : {} ".format(i, self.npy_files[i]))
+ bin_points = np.load(os.path.join(self.npy_path,self.npy_files[i]))
+ bin_points_pred = np.load(os.path.join(self.npy_path_pred,self.npy_files_pred[i]))
+ pc2_msg = PointCloud2()
+ header = std_msgs.msg.Header()
+ header.stamp = rospy.Time.now()
+ header.frame_id = 'velodyne_link'
+ h,w,_ = bin_points.shape # 64, 512 ,6
+ cloud_points = []
+ for j in range(h): # 64
+ for k in range(w): # 512
+ cloud_points.append(list(np.append(bin_points[j,:,:3][k], bin_points_pred[j][k])))
+ pc2_msg = self.create_cloud_xyzi32(header, cloud_points)
+ # ed: /velodyne_points_npy publish
+ self.velo_pub.publish(pc2_msg)
+ self.loop_rate.sleep()
+ if rospy.is_shutdown():
+ return
+ def processing_xyz(self):
+ for i in range(self.len_files):
+ print("[+] {} th file name : {} ".format(i, self.npy_files_list_xyz[i]))
+ bin_points = np.fromfile(os.path.join(self.npy_path,self.npy_files_list_xyz[i]), dtype=np.float32).reshape(-1,4)
+ pc2_msg = PointCloud2()
+ header = std_msgs.msg.Header()
+ header.stamp = rospy.Time.now()
+ header.frame_id = 'velodyne_link'
+ cloud_points = []
+ for j in range(len(bin_points)):
+ if(bin_points[j][0] >= 0.1 and bin_points[j][1] != 0 and bin_points[j][2] <= 5 and bin_points[j][3] < 10):
+ cloud_points.append(list(bin_points[j,:3]))
+ pc2_msg = pcl2.create_cloud_xyz32(header, cloud_points)
+ # ed: /velodyne_points_npy publish
+ self.velo_pub.publish(pc2_msg)
+ self.loop_rate.sleep()
+ if rospy.is_shutdown():
+ return
+ def create_cloud_xyzi32(self,header, points):
+ fields = [PointField('x', 0, PointField.FLOAT32, 1),
+ PointField('y', 4, PointField.FLOAT32, 1),
+ PointField('z', 8, PointField.FLOAT32, 1),
+ PointField('intensity', 12, PointField.FLOAT32, 1)]
+ return pcl2.create_cloud(header, fields, points)
+if __name__ == '__main__':
+ rospy.init_node('npy_to_ros_node')
+ # It should start from .launch file because of parameter
+ npy_path = rospy.get_param('npy_path')
+ npy_path_pred = rospy.get_param('npy_path_pred')
+ npy_file_list = rospy.get_param('npy_file_list')
+ npy_to_ros = NPY_TO_ROS(is_xyzi=True, npy_path=npy_path, npy_path_pred=npy_path_pred, npy_file_list=npy_file_list)
+# Author: Bichen Wu (bichen@berkeley.edu) 02/20/2017
+from .kitti_squeezeSeg_config import kitti_squeezeSeg_config
#G Author: Bichen Wu (bichen@berkeley.edu) 02/20/2017
+#!/usr/bin/env python
+#-*- coding: utf-8 -*-
+"""Base Model configurations"""
+import os
+import os.path as osp
+import numpy as np
+from easydict import EasyDict as edict
+def base_model_config(dataset='KITTI'):
+ assert dataset.upper()=='KITTI', \
+ 'Currently only support KITTI dataset'
+ cfg = edict()
+ # Dataset used to train/val/test model. Now support KITTI
+ cfg.DATASET = dataset.upper()
+ # classes
+ cfg.CLASSES = [
+ 'unknown',
+ 'car',
+ 'van',
+ 'truck',
+ 'pedestrian',
+ 'person_sitting',
+ 'cyclist',
+ 'tram',
+ 'misc',
+ ]
+ # number of classes
+ cfg.NUM_CLASS = len(cfg.CLASSES)
+ # dict from class name to id
+ cfg.CLS_2_ID = dict(zip(cfg.CLASSES, range(len(cfg.CLASSES))))
+ # loss weight for each class
+ cfg.CLS_LOSS_WEIGHT = np.array(
+ [1/20.0, 1.0, 2.0, 3.0,
+ 8.0, 10.0, 8.0, 2.0, 1.0]
+ )
+ # rgb color for each class
+ cfg.CLS_COLOR_MAP = np.array(
+ [[ 0.00, 0.00, 0.00],
+ [ 0.12, 0.56, 0.37],
+ [ 0.66, 0.55, 0.71],
+ [ 0.58, 0.72, 0.88],
+ [ 0.25, 0.51, 0.76],
+ [ 0.98, 0.47, 0.73],
+ [ 0.40, 0.19, 0.10],
+ [ 0.87, 0.19, 0.17],
+ [ 0.13, 0.55, 0.63]]
+ )
+ # Probability to keep a node in dropout
+ cfg.KEEP_PROB = 0.5
+ # image width
+ cfg.IMAGE_WIDTH = 224
+ # image height
+ cfg.IMAGE_HEIGHT = 224
+ # number of vertical levels
+ cfg.NUM_LEVEL = 10
+ # number of pie sectors of the field of view
+ cfg.NUM_SECTOR = 90
+ # maximum distance of a measurement
+ cfg.MAX_DIST = 100.0
+ # batch size
+ cfg.BATCH_SIZE = 20
+ # Pixel mean values (BGR order) as a (1, 1, 3) array. Below is the BGR mean
+ # of VGG16
+ cfg.BGR_MEANS = np.array([[[103.939, 116.779, 123.68]]])
+ # Pixel mean values (BGR order) as a (1, 1, 3) array. Below is the BGR mean
+ # of VGG16
+ cfg.RGB_MEANS = np.array([[[123.68, 116.779, 103.939]]])
+ # reduce step size after this many steps
+ cfg.DECAY_STEPS = 10000
+ # multiply the learning rate by this factor
+ cfg.LR_DECAY_FACTOR = 0.1
+ # learning rate
+ cfg.LEARNING_RATE = 0.005
+ # momentum
+ cfg.MOMENTUM = 0.9
+ # weight decay
+ cfg.WEIGHT_DECAY = 0.0005
+ # wether to load pre-trained model
+ # path to load the pre-trained model
+ # print log to console in debug mode
+ cfg.DEBUG_MODE = False
+ # gradients with norm larger than this is going to be clipped.
+ cfg.MAX_GRAD_NORM = 10.0
+ # Whether to do data augmentation
+ # The range to randomly shift the image widht
+ cfg.DRIFT_X = 0
+ # The range to randomly shift the image height
+ cfg.DRIFT_Y = 0
+ # small value used in batch normalization to prevent dividing by 0. The
+ # default value here is the same with caffe's default value.
+ # small value used in denominator to prevent division by 0
+ cfg.DENOM_EPSILON = 1e-12
+ # capacity for tf.FIFOQueue
+ return cfg
+# Author: Bichen Wu (bichen@berkeley.edu) 08/25/2016
+#-*- coding: utf-8 -*-
+"""Model configuration for pascal dataset"""
+import numpy as np
+from .config import base_model_config
+def kitti_squeezeSeg_config():
+ """Specify the parameters to tune below."""
+ mc = base_model_config('KITTI')
+ mc.CLASSES = ['unknown', 'car', 'pedestrian', 'cyclist']
+ mc.NUM_CLASS = len(mc.CLASSES)
+ mc.CLS_2_ID = dict(zip(mc.CLASSES, range(len(mc.CLASSES))))
+ mc.CLS_LOSS_WEIGHT = np.array([1/15.0, 1.0, 10.0, 10.0])
+ mc.CLS_COLOR_MAP = np.array([[ 0.00, 0.00, 0.00],
+ [ 0.12, 0.56, 0.37],
+ [ 0.66, 0.55, 0.71],
+ [ 0.58, 0.72, 0.88]])
+ mc.BATCH_SIZE = 32
+ mc.AZIMUTH_LEVEL = 512 # for Sphrerical Projection
+ mc.ZENITH_LEVEL = 64 # for Sphrerical Projection
+ mc.LCN_HEIGHT = 3 # for Bilateral filter + R-CRF
+ mc.LCN_WIDTH = 5 # for Bilateral filter + R-CRF
+ mc.BILATERAL_THETA_A = np.array([.9, .9, .6, .6]) # for Bilateral filter
+ mc.BILATERAL_THETA_R = np.array([.015, .015, .01, .01]) # for Bilateral filter
+ mc.BI_FILTER_COEF = 0.1 # for Bilatreal filter
+ mc.RCRF_ITER = 3 # for R-CRF
+ mc.ANG_THETA_A = np.array([.9, .9, .6, .6]) # for R-CRF
+ mc.ANG_FILTER_COEF = 0.02 # for R-CRF
+ mc.LEARNING_RATE = 0.01
+ mc.CLS_LOSS_COEF = 15.0 # for Loss funtion
+ mc.WEIGHT_DECAY = 0.0001
+ mc.MAX_GRAD_NORM = 1.0
+ mc.MOMENTUM = 0.9
+ mc.DECAY_STEPS = 10000
+ mc.LR_DECAY_FACTOR = 0.5
+ # x, y, z, intensity, distance
+ mc.INPUT_MEAN = np.array([[[10.88, 0.23, -1.04, 0.21, 12.12]]])
+ mc.INPUT_STD = np.array([[[11.47, 6.91, 0.86, 0.16, 12.32]]])
+ return mc
+#!/usr/bin/env python
+#-*- coding: utf-8 -*-
+from .kitti import kitti
+# Author: Bichen Wu (bichen@berkeley.edu) 02/27/2017
+#-*- coding: utf-8 -*-
+"""The data base wrapper class"""
+import os
+import random
+import shutil
+import numpy as np
+from squeezeseg.utils.util import *
+class imdb(object):
+ """Image database."""
+ def __init__(self, name, mc):
+ self._name = name
+ self._image_set = []
+ self._image_idx = []
+ self._data_root_path = []
+ self.mc = mc
+ # batch reader
+ self._perm_idx = []
+ self._cur_idx = 0
+ @property
+ def name(self):
+ return self._name
+ @property
+ def image_idx(self):
+ return self._image_idx
+ @property
+ def image_set(self):
+ return self._image_set
+ @property
+ def data_root_path(self):
+ return self._data_root_path
+ def _shuffle_image_idx(self):
+ self._perm_idx = [self._image_idx[i] for i in
+ np.random.permutation(np.arange(len(self._image_idx)))]
+ self._cur_idx = 0
+ def read_batch(self, shuffle=True):
+ """Read a batch of lidar data including labels. Data formated as numpy array
+ of shape: height x width x {x, y, z, intensity, range, label}.
+ Args:
+ shuffle: whether or not to shuffle the dataset
+ Returns:
+ lidar_per_batch: LiDAR input. Shape: batch x height x width x 5.
+ lidar_mask_per_batch: LiDAR mask, 0 for missing data and 1 otherwise.
+ Shape: batch x height x width x 1.
+ label_per_batch: point-wise labels. Shape: batch x height x width.
+ weight_per_batch: loss weights for different classes. Shape:
+ batch x height x width
+ """
+ mc = self.mc
+ if shuffle:
+ if self._cur_idx + mc.BATCH_SIZE >= len(self._image_idx):
+ self._shuffle_image_idx()
+ batch_idx = self._perm_idx[self._cur_idx:self._cur_idx+mc.BATCH_SIZE]
+ self._cur_idx += mc.BATCH_SIZE
+ else:
+ if self._cur_idx + mc.BATCH_SIZE >= len(self._image_idx):
+ batch_idx = self._image_idx[self._cur_idx:] \
+ + self._image_idx[:self._cur_idx + mc.BATCH_SIZE-len(self._image_idx)]
+ self._cur_idx += mc.BATCH_SIZE - len(self._image_idx)
+ else:
+ batch_idx = self._image_idx[self._cur_idx:self._cur_idx+mc.BATCH_SIZE]
+ self._cur_idx += mc.BATCH_SIZE
+ lidar_per_batch = []
+ lidar_mask_per_batch = []
+ label_per_batch = []
+ weight_per_batch = []
+ for idx in batch_idx:
+ # load data
+ # loading from npy is 30x faster than loading from pickle
+ record = np.load(self._lidar_2d_path_at(idx)).astype(np.float32, copy=False)
+ if np.random.rand() > 0.5:
+ # flip y
+ record = record[:, ::-1, :]
+ record[:, :, 1] *= -1
+ lidar = record[:, :, :5] # x, y, z, intensity, depth
+ lidar_mask = np.reshape(
+ (lidar[:, :, 4] > 0),
+ )
+ # normalize
+ lidar = (lidar - mc.INPUT_MEAN)/mc.INPUT_STD
+ label = record[:, :, 5]
+ weight = np.zeros(label.shape)
+ for l in range(mc.NUM_CLASS):
+ weight[label==l] = mc.CLS_LOSS_WEIGHT[int(l)]
+ # Append all the data
+ lidar_per_batch.append(lidar)
+ lidar_mask_per_batch.append(lidar_mask)
+ label_per_batch.append(label)
+ weight_per_batch.append(weight)
+ return np.array(lidar_per_batch), np.array(lidar_mask_per_batch), \
+ np.array(label_per_batch), np.array(weight_per_batch)
+ def evaluate_detections(self):
+ raise NotImplementedError
+# Author: Bichen Wu (bichen@berkeley.edu) 02/27/2017
+#-*- coding: utf-8 -*-
+"""Image data base class for kitti"""
+import os
+import numpy as np
+import subprocess
+from .imdb import imdb
+class kitti(imdb):
+ def __init__(self, image_set, data_path, mc):
+ imdb.__init__(self, 'kitti_'+image_set, mc)
+ self._image_set = image_set
+ self._data_root_path = data_path
+ self._lidar_2d_path = os.path.join(self._data_root_path, 'lidar_2d')
+ self._gta_2d_path = os.path.join(self._data_root_path, 'gta')
+ # a list of string indices of images in the directory
+ self._image_idx = self._load_image_set_idx()
+ # a dict of image_idx -> [[cx, cy, w, h, cls_idx]]. x,y,w,h are not divided by
+ # the image width and height
+ ## batch reader ##
+ self._perm_idx = None
+ self._cur_idx = 0
+ # TODO(bichen): add a random seed as parameter
+ self._shuffle_image_idx()
+ def _load_image_set_idx(self):
+ image_set_file = os.path.join(
+ self._data_root_path, 'ImageSet', self._image_set+'.txt')
+ assert os.path.exists(image_set_file), \
+ 'File does not exist: {}'.format(image_set_file)
+ with open(image_set_file) as f:
+ image_idx = [x.strip() for x in f.readlines()]
+ return image_idx
+ def _lidar_2d_path_at(self, idx):
+ if idx[:4] == 'gta_':
+ lidar_2d_path = os.path.join(self._gta_2d_path, idx+'.npy')
+ else:
+ lidar_2d_path = os.path.join(self._lidar_2d_path, idx+'.npy')
+ assert os.path.exists(lidar_2d_path), \
+ 'File does not exist: {}'.format(lidar_2d_path)
+ return lidar_2d_path
+#-*- coding: utf-8 -*-
+from .squeezeSeg import SqueezeSeg
+# Author: Bichen Wu (bichen@berkeley.edu) 02/20/2017
+#-*- coding: utf-8 -*-
+"""SqueezeSeg model"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import sys
+import joblib
+import numpy as np
+import tensorflow as tf
+from squeezeseg.nn_skeleton import ModelSkeleton
+class SqueezeSeg(ModelSkeleton):
+ def __init__(self, mc, gpu_id=0):
+ with tf.device('/gpu:{}'.format(gpu_id)):
+ ModelSkeleton.__init__(self, mc)
+ self._add_forward_graph() # SqueezeNet Model
+ self._add_output_graph() # pred_prob, pred_cls
+ self._add_loss_graph() # cls_loss, total_loss
+ self._add_train_graph() #
+ self._add_viz_graph() # label_to_show, depth_image_to_show, pred_image_to_show
+ self._add_summary_ops() #
+ def _add_forward_graph(self):
+ """NN architecture."""
+ mc = self.mc
+ assert tf.gfile.Exists(mc.PRETRAINED_MODEL_PATH), \
+ 'Cannot find pretrained model at the given path:' \
+ ' {}'.format(mc.PRETRAINED_MODEL_PATH)
+ self.caffemodel_weight = joblib.load(mc.PRETRAINED_MODEL_PATH)
+ conv1 = self._conv_layer(
+ 'conv1', self.lidar_input, filters=64, size=3, stride=2,
+ padding='SAME', freeze=False, xavier=True)
+ conv1_skip = self._conv_layer(
+ 'conv1_skip', self.lidar_input, filters=64, size=1, stride=1,
+ padding='SAME', freeze=False, xavier=True)
+ pool1 = self._pooling_layer(
+ 'pool1', conv1, size=3, stride=2, padding='SAME')
+ fire2 = self._fire_layer(
+ 'fire2', pool1, s1x1=16, e1x1=64, e3x3=64, freeze=False)
+ fire3 = self._fire_layer(
+ 'fire3', fire2, s1x1=16, e1x1=64, e3x3=64, freeze=False)
+ pool3 = self._pooling_layer(
+ 'pool3', fire3, size=3, stride=2, padding='SAME')
+ fire4 = self._fire_layer(
+ 'fire4', pool3, s1x1=32, e1x1=128, e3x3=128, freeze=False)
+ fire5 = self._fire_layer(
+ 'fire5', fire4, s1x1=32, e1x1=128, e3x3=128, freeze=False)
+ pool5 = self._pooling_layer(
+ 'pool5', fire5, size=3, stride=2, padding='SAME')
+ fire6 = self._fire_layer(
+ 'fire6', pool5, s1x1=48, e1x1=192, e3x3=192, freeze=False)
+ fire7 = self._fire_layer(
+ 'fire7', fire6, s1x1=48, e1x1=192, e3x3=192, freeze=False)
+ fire8 = self._fire_layer(
+ 'fire8', fire7, s1x1=64, e1x1=256, e3x3=256, freeze=False)
+ fire9 = self._fire_layer(
+ 'fire9', fire8, s1x1=64, e1x1=256, e3x3=256, freeze=False)
+ # Deconvolation
+ fire10 = self._fire_deconv(
+ 'fire_deconv10', fire9, s1x1=64, e1x1=128, e3x3=128, factors=[1, 2],
+ stddev=0.1)
+ fire10_fuse = tf.add(fire10, fire5, name='fure10_fuse')
+ fire11 = self._fire_deconv(
+ 'fire_deconv11', fire10_fuse, s1x1=32, e1x1=64, e3x3=64, factors=[1, 2],
+ stddev=0.1)
+ fire11_fuse = tf.add(fire11, fire3, name='fire11_fuse')
+ fire12 = self._fire_deconv(
+ 'fire_deconv12', fire11_fuse, s1x1=16, e1x1=32, e3x3=32, factors=[1, 2],
+ stddev=0.1)
+ fire12_fuse = tf.add(fire12, conv1, name='fire12_fuse')
+ fire13 = self._fire_deconv(
+ 'fire_deconv13', fire12_fuse, s1x1=16, e1x1=32, e3x3=32, factors=[1, 2],
+ stddev=0.1)
+ fire13_fuse = tf.add(fire13, conv1_skip, name='fire13_fuse')
+ drop13 = tf.nn.dropout(fire13_fuse, self.keep_prob, name='drop13')
+ conv14 = self._conv_layer(
+ 'conv14_prob', drop13, filters=mc.NUM_CLASS, size=3, stride=1,
+ padding='SAME', relu=False, stddev=0.1)
+ bilateral_filter_weights = self._bilateral_filter_layer(
+ 'bilateral_filter', self.lidar_input[:, :, :, :3], # x, y, z
+ sizes=[mc.LCN_HEIGHT, mc.LCN_WIDTH], stride=1)
+ self.output_prob = self._recurrent_crf_layer(
+ 'recurrent_crf', conv14, bilateral_filter_weights,
+ sizes=[mc.LCN_HEIGHT, mc.LCN_WIDTH], num_iterations=mc.RCRF_ITER,
+ padding='SAME')
+ def _fire_layer(self, layer_name, inputs, s1x1, e1x1, e3x3, stddev=0.001,
+ freeze=False):
+ """Fire layer constructor.
+ Args:
+ layer_name: layer name
+ inputs: input tensor
+ s1x1: number of 1x1 filters in squeeze layer.
+ e1x1: number of 1x1 filters in expand layer.
+ e3x3: number of 3x3 filters in expand layer.
+ freeze: if true, do not train parameters in this layer.
+ Returns:
+ fire layer operation.
+ """
+ sq1x1 = self._conv_layer(
+ layer_name+'/squeeze1x1', inputs, filters=s1x1, size=1, stride=1,
+ padding='SAME', freeze=freeze, stddev=stddev)
+ ex1x1 = self._conv_layer(
+ layer_name+'/expand1x1', sq1x1, filters=e1x1, size=1, stride=1,
+ padding='SAME', freeze=freeze, stddev=stddev)
+ ex3x3 = self._conv_layer(
+ layer_name+'/expand3x3', sq1x1, filters=e3x3, size=3, stride=1,
+ padding='SAME', freeze=freeze, stddev=stddev)
+ return tf.concat([ex1x1, ex3x3], 3, name=layer_name+'/concat')
+ def _fire_deconv(self, layer_name, inputs, s1x1, e1x1, e3x3,
+ factors=[1, 2], freeze=False, stddev=0.001):
+ """Fire deconvolution layer constructor.
+ Args:
+ layer_name: layer name
+ inputs: input tensor
+ s1x1: number of 1x1 filters in squeeze layer.
+ e1x1: number of 1x1 filters in expand layer.
+ e3x3: number of 3x3 filters in expand layer.
+ factors: spatial upsampling factors.
+ freeze: if true, do not train parameters in this layer.
+ Returns:
+ fire layer operation.
+ """
+ assert len(factors) == 2,'factors should be an array of size 2'
+ ksize_h = factors[0] * 2 - factors[0] % 2
+ ksize_w = factors[1] * 2 - factors[1] % 2
+ sq1x1 = self._conv_layer(
+ layer_name+'/squeeze1x1', inputs, filters=s1x1, size=1, stride=1,
+ padding='SAME', freeze=freeze, stddev=stddev)
+ deconv = self._deconv_layer(
+ layer_name+'/deconv', sq1x1, filters=s1x1, size=[ksize_h, ksize_w],
+ stride=factors, padding='SAME', init='bilinear')
+ ex1x1 = self._conv_layer(
+ layer_name+'/expand1x1', deconv, filters=e1x1, size=1, stride=1,
+ padding='SAME', freeze=freeze, stddev=stddev)
+ ex3x3 = self._conv_layer(
+ layer_name+'/expand3x3', deconv, filters=e3x3, size=3, stride=1,
+ padding='SAME', freeze=freeze, stddev=stddev)
+ return tf.concat([ex1x1, ex3x3], 3, name=layer_name+'/concat')
+# Author: Bichen Wu (bichen@berkeley.edu) 02/20/2017
+#-*- coding: utf-8 -*-
+"""Neural network model base class."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import sys
+from squeezeseg.utils import util
+import numpy as np
+import tensorflow as tf
+def _variable_on_device(name, shape, initializer, trainable=True):
+ """Helper to create a Variable.
+ Args:
+ name: name of the variable
+ shape: list of ints
+ initializer: initializer for Variable
+ Returns:
+ Variable Tensor
+ """
+ # TODO(bichen): fix the hard-coded data type below
+ dtype = tf.float32
+ if not callable(initializer):
+ var = tf.get_variable(name, initializer=initializer, trainable=trainable)
+ else:
+ var = tf.get_variable(
+ name, shape, initializer=initializer, dtype=dtype, trainable=trainable)
+ return var
+def _variable_with_weight_decay(name, shape, wd, initializer, trainable=True):
+ """Helper to create an initialized Variable with weight decay.
+ Note that the Variable is initialized with a truncated normal distribution.
+ A weight decay is added only if one is specified.
+ Args:
+ name: name of the variable
+ shape: list of ints
+ wd: add L2Loss weight decay multiplied by this float. If None, weight
+ decay is not added for this Variable.
+ Returns:
+ Variable Tensor
+ """
+ var = _variable_on_device(name, shape, initializer, trainable)
+ if wd is not None and trainable:
+ weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
+ tf.add_to_collection('losses', weight_decay)
+ return var
+class ModelSkeleton:
+ """Base class of NN detection models."""
+ def __init__(self, mc):
+ self.mc = mc
+ # a scalar tensor in range (0, 1]. Usually set to 0.5 in training phase and
+ # 1.0 in evaluation phase
+ self.ph_keep_prob = tf.placeholder(tf.float32, name='keep_prob')
+ # projected lidar points on a 2D spherical surface
+ self.ph_lidar_input = tf.placeholder(
+ tf.float32, [mc.BATCH_SIZE, mc.ZENITH_LEVEL, mc.AZIMUTH_LEVEL, 5],
+ name='lidar_input'
+ )
+ # A tensor where an element is 1 if the corresponding cell contains an
+ # valid lidar measurement. Or if the data is missing, then mark it as 0.
+ self.ph_lidar_mask = tf.placeholder(
+ tf.float32, [mc.BATCH_SIZE, mc.ZENITH_LEVEL, mc.AZIMUTH_LEVEL, 1],
+ name='lidar_mask')
+ # A tensor where each element contains the class of each pixel
+ self.ph_label = tf.placeholder(
+ name='label')
+ # weighted loss for different classes
+ self.ph_loss_weight = tf.placeholder(
+ tf.float32, [mc.BATCH_SIZE, mc.ZENITH_LEVEL, mc.AZIMUTH_LEVEL],
+ name='loss_weight')
+ # define a FIFOqueue for pre-fetching data
+ self.q = tf.FIFOQueue(
+ capacity=mc.QUEUE_CAPACITY,
+ dtypes=[tf.float32, tf.float32, tf.float32, tf.int32, tf.float32],
+ shapes=[[],
+ )
+ # train.py:129 : enqueue_op에 feed_dict가 들어간다
+ self.enqueue_op = self.q.enqueue(
+ [self.ph_keep_prob, self.ph_lidar_input, self.ph_lidar_mask,
+ self.ph_label, self.ph_loss_weight]
+ )
+ self.keep_prob, self.lidar_input, self.lidar_mask, self.label, \
+ self.loss_weight = self.q.dequeue()
+ # model parameters
+ self.model_params = []
+ # model size counter
+ self.model_size_counter = [] # array of tuple of layer name, parameter size
+ # flop counter
+ self.flop_counter = [] # array of tuple of layer name, flop number
+ # activation counter
+ self.activation_counter = [] # array of tuple of layer name, output activations
+ self.activation_counter.append(('input', mc.AZIMUTH_LEVEL*mc.ZENITH_LEVEL*3))
+ def _add_forward_graph(self):
+ """NN architecture specification."""
+ raise NotImplementedError
+ def _add_output_graph(self):
+ """Define how to intepret output."""
+ mc = self.mc
+ with tf.variable_scope('interpret_output') as scope:
+ self.prob = tf.multiply(
+ tf.nn.softmax(self.output_prob, dim=-1), self.lidar_mask,
+ name='pred_prob')
+ self.pred_cls = tf.argmax(self.prob, axis=3, name='pred_cls')
+ # add summaries
+ for cls_id, cls in enumerate(mc.CLASSES):
+ self._activation_summary(self.prob[:, :, :, cls_id], 'prob_'+cls)
+ def _add_loss_graph(self):
+ """Define the loss operation."""
+ mc = self.mc
+ with tf.variable_scope('cls_loss') as scope:
+ self.cls_loss = tf.identity(
+ tf.reduce_sum(
+ tf.nn.sparse_softmax_cross_entropy_with_logits(
+ labels=tf.reshape(self.label, (-1, )),
+ logits=tf.reshape(self.output_prob, (-1, mc.NUM_CLASS))
+ ) \
+ * tf.reshape(self.lidar_mask, (-1, )) \
+ * tf.reshape(self.loss_weight, (-1, ))
+ ) / tf.reduce_sum(self.lidar_mask)*mc.CLS_LOSS_COEF,
+ name='cls_loss')
+ tf.add_to_collection('losses', self.cls_loss)
+ # add above losses as well as weight decay losses to form the total loss
+ self.loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
+ # add loss summaries
+ # _add_loss_summaries(self.loss)
+ tf.summary.scalar(self.cls_loss.op.name, self.cls_loss)
+ tf.summary.scalar(self.loss.op.name, self.loss)
+ def _add_train_graph(self):
+ """Define the training operation."""
+ mc = self.mc
+ self.global_step = tf.Variable(0, name='global_step', trainable=False)
+ # 가변적으로 learning rate가 변한다
+ lr = tf.train.exponential_decay(mc.LEARNING_RATE,
+ self.global_step,
+ staircase=True)
+ tf.summary.scalar('learning_rate', lr)
+ opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=mc.MOMENTUM)
+ grads_vars = opt.compute_gradients(self.loss, tf.trainable_variables())
+ with tf.variable_scope('clip_gradient') as scope:
+ for i, (grad, var) in enumerate(grads_vars):
+ grads_vars[i] = (tf.clip_by_norm(grad, mc.MAX_GRAD_NORM), var)
+ apply_gradient_op = opt.apply_gradients(grads_vars, global_step=self.global_step)
+ with tf.control_dependencies([apply_gradient_op]):
+ self.train_op = tf.no_op(name='train')
+ def _add_viz_graph(self):
+ """Define the visualization operation."""
+ mc = self.mc
+ self.label_to_show = tf.placeholder(
+ tf.float32, [None, mc.ZENITH_LEVEL, mc.AZIMUTH_LEVEL, 3],
+ name='label_to_show'
+ )
+ self.depth_image_to_show = tf.placeholder(
+ tf.float32, [None, mc.ZENITH_LEVEL, mc.AZIMUTH_LEVEL, 1],
+ name='depth_image_to_show'
+ )
+ self.pred_image_to_show = tf.placeholder(
+ tf.float32, [None, mc.ZENITH_LEVEL, mc.AZIMUTH_LEVEL, 3],
+ name='pred_image_to_show'
+ )
+ self.show_label = tf.summary.image('label_to_show',
+ self.label_to_show, collections='image_summary',
+ max_outputs=mc.BATCH_SIZE)
+ self.show_depth_img = tf.summary.image('depth_image_to_show',
+ self.depth_image_to_show, collections='image_summary',
+ max_outputs=mc.BATCH_SIZE)
+ self.show_pred = tf.summary.image('pred_image_to_show',
+ self.pred_image_to_show, collections='image_summary',
+ max_outputs=mc.BATCH_SIZE)
+ def _add_summary_ops(self):
+ """Add extra summary operations."""
+ mc = self.mc
+ iou_summary_placeholders = []
+ iou_summary_ops = []
+ for cls in mc.CLASSES:
+ ph = tf.placeholder(tf.float32, name=cls+'_iou')
+ iou_summary_placeholders.append(ph)
+ iou_summary_ops.append(
+ tf.summary.scalar('Eval/'+cls+'_iou', ph, collections='eval_summary')
+ )
+ self.iou_summary_placeholders = iou_summary_placeholders
+ self.iou_summary_ops = iou_summary_ops
+ def _conv_bn_layer(
+ self, inputs, conv_param_name, bn_param_name, scale_param_name, filters,
+ size, stride, padding='SAME', freeze=False, relu=True,
+ conv_with_bias=False, stddev=0.001):
+ """ Convolution + BatchNorm + [relu] layer. Batch mean and var are treated
+ as constant. Weights have to be initialized from a pre-trained model or
+ restored from a checkpoint.
+ Args:
+ inputs: input tensor
+ conv_param_name: name of the convolution parameters
+ bn_param_name: name of the batch normalization parameters
+ scale_param_name: name of the scale parameters
+ filters: number of output filters.
+ size: kernel size.
+ stride: stride
+ padding: 'SAME' or 'VALID'. See tensorflow doc for detailed description.
+ freeze: if true, then do not train the parameters in this layer.
+ xavier: whether to use xavier weight initializer or not.
+ relu: whether to use relu or not.
+ conv_with_bias: whether or not add bias term to the convolution output.
+ stddev: standard deviation used for random weight initializer.
+ Returns:
+ A convolutional layer operation.
+ """
+ mc = self.mc
+ with tf.variable_scope(conv_param_name) as scope:
+ channels = inputs.get_shape()[3]
+ cw = self.caffemodel_weight
+ kernel_val = np.transpose(cw[conv_param_name][0], [2,3,1,0])
+ if conv_with_bias:
+ bias_val = cw[conv_param_name][1]
+ mean_val = cw[bn_param_name][0]
+ var_val = cw[bn_param_name][1]
+ gamma_val = cw[scale_param_name][0]
+ beta_val = cw[scale_param_name][1]
+ else:
+ kernel_val = tf.truncated_normal_initializer(
+ stddev=stddev, dtype=tf.float32)
+ if conv_with_bias:
+ bias_val = tf.constant_initializer(0.0)
+ mean_val = tf.constant_initializer(0.0)
+ var_val = tf.constant_initializer(1.0)
+ gamma_val = tf.constant_initializer(1.0)
+ beta_val = tf.constant_initializer(0.0)
+ # re-order the caffe kernel with shape [out, in, h, w] -> tf kernel with
+ # shape [h, w, in, out]
+ kernel = _variable_with_weight_decay(
+ 'kernels', shape=[size, size, int(channels), filters],
+ wd=mc.WEIGHT_DECAY, initializer=kernel_val, trainable=(not freeze))
+ self.model_params += [kernel]
+ if conv_with_bias:
+ biases = _variable_on_device('biases', [filters], bias_val,
+ trainable=(not freeze))
+ self.model_params += [biases]
+ gamma = _variable_on_device('gamma', [filters], gamma_val,
+ trainable=(not freeze))
+ beta = _variable_on_device('beta', [filters], beta_val,
+ trainable=(not freeze))
+ mean = _variable_on_device('mean', [filters], mean_val, trainable=False)
+ var = _variable_on_device('var', [filters], var_val, trainable=False)
+ self.model_params += [gamma, beta, mean, var]
+ conv = tf.nn.conv2d(
+ inputs, kernel, [1, 1, stride, 1], padding=padding,
+ name='convolution')
+ if conv_with_bias:
+ conv = tf.nn.bias_add(conv, biases, name='bias_add')
+ conv = tf.nn.batch_normalization(
+ conv, mean=mean, variance=var, offset=beta, scale=gamma,
+ variance_epsilon=mc.BATCH_NORM_EPSILON, name='batch_norm')
+ self.model_size_counter.append(
+ (conv_param_name, (1+size*size*int(channels))*filters)
+ )
+ out_shape = conv.get_shape().as_list()
+ num_flops = \
+ (1+2*int(channels)*size*size)*filters*out_shape[1]*out_shape[2]
+ if relu:
+ num_flops += 2*filters*out_shape[1]*out_shape[2]
+ self.flop_counter.append((conv_param_name, num_flops))
+ self.activation_counter.append(
+ (conv_param_name, out_shape[1]*out_shape[2]*out_shape[3])
+ )
+ if relu:
+ return tf.nn.relu(conv)
+ else:
+ return conv
+ def _conv_layer(
+ self, layer_name, inputs, filters, size, stride, padding='SAME',
+ freeze=False, xavier=False, relu=True, stddev=0.001, bias_init_val=0.0):
+ """Convolutional layer operation constructor.
+ Args:
+ layer_name: layer name.
+ inputs: input tensor
+ filters: number of output filters.
+ size: kernel size.
+ stride: stride
+ padding: 'SAME' or 'VALID'. See tensorflow doc for detailed description.
+ freeze: if true, then do not train the parameters in this layer.
+ xavier: whether to use xavier weight initializer or not.
+ relu: whether to use relu or not.
+ stddev: standard deviation used for random weight initializer.
+ Returns:
+ A convolutional layer operation.
+ """
+ mc = self.mc
+ use_pretrained_param = False
+ cw = self.caffemodel_weight
+ if layer_name in cw:
+ kernel_val = np.transpose(cw[layer_name][0], [2,3,1,0])
+ bias_val = cw[layer_name][1]
+ # check the shape
+ if (kernel_val.shape ==
+ (size, size, inputs.get_shape().as_list()[-1], filters)) \
+ and (bias_val.shape == (filters, )):
+ use_pretrained_param = True
+ else:
+ print ('Shape of the pretrained parameter of {} does not match, '
+ 'use randomly initialized parameter'.format(layer_name))
+ else:
+ print ('Cannot find {} in the pretrained model. Use randomly initialized '
+ 'parameters'.format(layer_name))
+ if mc.DEBUG_MODE:
+ print('Input tensor shape to {}: {}'.format(layer_name, inputs.get_shape()))
+ with tf.variable_scope(layer_name) as scope:
+ channels = inputs.get_shape()[3]
+ # re-order the caffe kernel with shape [out, in, h, w] -> tf kernel with
+ # shape [h, w, in, out]
+ if use_pretrained_param:
+ if mc.DEBUG_MODE:
+ print ('Using pretrained model for {}'.format(layer_name))
+ kernel_init = tf.constant(kernel_val , dtype=tf.float32)
+ bias_init = tf.constant(bias_val, dtype=tf.float32)
+ elif xavier:
+ kernel_init = tf.contrib.layers.xavier_initializer_conv2d()
+ bias_init = tf.constant_initializer(bias_init_val)
+ else:
+ kernel_init = tf.truncated_normal_initializer(
+ stddev=stddev, dtype=tf.float32)
+ bias_init = tf.constant_initializer(bias_init_val)
+ kernel = _variable_with_weight_decay(
+ 'kernels', shape=[size, size, int(channels), filters],
+ wd=mc.WEIGHT_DECAY, initializer=kernel_init, trainable=(not freeze))
+ biases = _variable_on_device('biases', [filters], bias_init,
+ trainable=(not freeze))
+ self.model_params += [kernel, biases]
+ conv = tf.nn.conv2d(
+ inputs, kernel, [1, 1, stride, 1], padding=padding,
+ name='convolution')
+ conv_bias = tf.nn.bias_add(conv, biases, name='bias_add')
+ if relu:
+ out = tf.nn.relu(conv_bias, 'relu')
+ else:
+ out = conv_bias
+ self.model_size_counter.append(
+ (layer_name, (1+size*size*int(channels))*filters)
+ )
+ out_shape = out.get_shape().as_list()
+ num_flops = \
+ (1+2*int(channels)*size*size)*filters*out_shape[1]*out_shape[2]
+ if relu:
+ num_flops += 2*filters*out_shape[1]*out_shape[2]
+ self.flop_counter.append((layer_name, num_flops))
+ self.activation_counter.append(
+ (layer_name, out_shape[1]*out_shape[2]*out_shape[3])
+ )
+ return out
+ def _deconv_layer(
+ self, layer_name, inputs, filters, size, stride, padding='SAME',
+ freeze=False, init='trunc_norm', relu=True, stddev=0.001):
+ """Deconvolutional layer operation constructor.
+ Args:
+ layer_name: layer name.
+ inputs: input tensor
+ filters: number of output filters.
+ size: kernel size. An array of size 2 or 1.
+ stride: stride. An array of size 2 or 1.
+ padding: 'SAME' or 'VALID'. See tensorflow doc for detailed description.
+ freeze: if true, then do not train the parameters in this layer.
+ init: how to initialize kernel weights. Now accept 'xavier',
+ 'trunc_norm', 'bilinear'
+ relu: whether to use relu or not.
+ stddev: standard deviation used for random weight initializer.
+ Returns:
+ A convolutional layer operation.
+ """
+ assert len(size) == 1 or len(size) == 2, \
+ 'size should be a scalar or an array of size 2.'
+ assert len(stride) == 1 or len(stride) == 2, \
+ 'stride should be a scalar or an array of size 2.'
+ assert init == 'xavier' or init == 'bilinear' or init == 'trunc_norm', \
+ 'initi mode not supported {}'.format(init)
+ if len(size) == 1:
+ size_h, size_w = size[0], size[0]
+ else:
+ size_h, size_w = size[0], size[1]
+ if len(stride) == 1:
+ stride_h, stride_w = stride[0], stride[0]
+ else:
+ stride_h, stride_w = stride[0], stride[1]
+ mc = self.mc
+ # TODO(bichen): Currently do not support pretrained parameters for deconv
+ # layer.
+ if mc.DEBUG_MODE:
+ print('Input tensor shape to {}: {}'.format(layer_name, inputs.get_shape()))
+ with tf.variable_scope(layer_name) as scope:
+ in_height = int(inputs.get_shape()[1])
+ in_width = int(inputs.get_shape()[2])
+ channels = int(inputs.get_shape()[3])
+ if init == 'xavier':
+ kernel_init = tf.contrib.layers.xavier_initializer_conv2d()
+ bias_init = tf.constant_initializer(0.0)
+ elif init == 'bilinear':
+ assert size_h == 1, 'Now only support size_h=1'
+ assert channels == filters, \
+ 'In bilinear interporlation mode, input channel size and output' \
+ 'filter size should be the same'
+ assert stride_h == 1, \
+ 'In bilinear interpolation mode, stride_h should be 1'
+ kernel_init = np.zeros(
+ (size_h, size_w, channels, channels),
+ dtype=np.float32)
+ factor_w = (size_w + 1)//2
+ assert factor_w == stride_w, \
+ 'In bilinear interpolation mode, stride_w == factor_w'
+ center_w = (factor_w - 1) if (size_w % 2 == 1) else (factor_w - 0.5)
+ og_w = np.reshape(np.arange(size_w), (size_h, -1))
+ up_kernel = (1 - np.abs(og_w - center_w)/factor_w)
+ for c in range(channels):
+ kernel_init[:, :, c, c] = up_kernel
+ bias_init = tf.constant_initializer(0.0)
+ else:
+ kernel_init = tf.truncated_normal_initializer(
+ stddev=stddev, dtype=tf.float32)
+ bias_init = tf.constant_initializer(0.0)
+ # Kernel layout for deconv layer: [H_f, W_f, O_c, I_c] where I_c is the
+ # input channel size. It should be the same as the channel size of the
+ # input tensor.
+ kernel = _variable_with_weight_decay(
+ 'kernels', shape=[size_h, size_w, filters, channels],
+ wd=mc.WEIGHT_DECAY, initializer=kernel_init, trainable=(not freeze))
+ biases = _variable_on_device(
+ 'biases', [filters], bias_init, trainable=(not freeze))
+ self.model_params += [kernel, biases]
+ # TODO(bichen): fix this
+ deconv = tf.nn.conv2d_transpose(
+ inputs, kernel,
+ [mc.BATCH_SIZE, stride_h*in_height, stride_w*in_width, filters],
+ [1, stride_h, stride_w, 1], padding=padding,
+ name='deconv')
+ deconv_bias = tf.nn.bias_add(deconv, biases, name='bias_add')
+ if relu:
+ out = tf.nn.relu(deconv_bias, 'relu')
+ else:
+ out = deconv_bias
+ self.model_size_counter.append(
+ (layer_name, (1+size_h*size_w*channels)*filters)
+ )
+ out_shape = out.get_shape().as_list()
+ num_flops = \
+ (1+2*channels*size_h*size_w)*filters*out_shape[1]*out_shape[2]
+ if relu:
+ num_flops += 2*filters*out_shape[1]*out_shape[2]
+ self.flop_counter.append((layer_name, num_flops))
+ self.activation_counter.append(
+ (layer_name, out_shape[1]*out_shape[2]*out_shape[3])
+ )
+ return out
+ def _pooling_layer(
+ self, layer_name, inputs, size, stride, padding='SAME'):
+ """Pooling layer operation constructor.
+ Args:
+ layer_name: layer name.
+ inputs: input tensor
+ size: kernel size.
+ stride: stride
+ padding: 'SAME' or 'VALID'. See tensorflow doc for detailed description.
+ Returns:
+ A pooling layer operation.
+ """
+ with tf.variable_scope(layer_name) as scope:
+ out = tf.nn.max_pool(inputs,
+ ksize=[1, size, size, 1],
+ strides=[1, 1, stride, 1],
+ padding=padding)
+ activation_size = np.prod(out.get_shape().as_list()[1:])
+ self.activation_counter.append((layer_name, activation_size))
+ return out
+ def _fc_layer(
+ self, layer_name, inputs, hiddens, flatten=False, relu=True,
+ xavier=False, stddev=0.001, bias_init_val=0.0):
+ """Fully connected layer operation constructor.
+ Args:
+ layer_name: layer name.
+ inputs: input tensor
+ hiddens: number of (hidden) neurons in this layer.
+ flatten: if true, reshape the input 4D tensor of shape
+ (batch, height, weight, channel) into a 2D tensor with shape
+ (batch, -1). This is used when the input to the fully connected layer
+ is output of a convolutional layer.
+ relu: whether to use relu or not.
+ xavier: whether to use xavier weight initializer or not.
+ stddev: standard deviation used for random weight initializer.
+ Returns:
+ A fully connected layer operation.
+ """
+ mc = self.mc
+ use_pretrained_param = False
+ cw = self.caffemodel_weight
+ if layer_name in cw:
+ use_pretrained_param = True
+ kernel_val = cw[layer_name][0]
+ bias_val = cw[layer_name][1]
+ if mc.DEBUG_MODE:
+ print('Input tensor shape to {}: {}'.format(layer_name, inputs.get_shape()))
+ with tf.variable_scope(layer_name) as scope:
+ input_shape = inputs.get_shape().as_list()
+ if flatten:
+ dim = input_shape[1]*input_shape[2]*input_shape[3]
+ inputs = tf.reshape(inputs, [-1, dim])
+ if use_pretrained_param:
+ try:
+ # check the size before layout transform
+ assert kernel_val.shape == (hiddens, dim), \
+ 'kernel shape error at {}'.format(layer_name)
+ kernel_val = np.reshape(
+ np.transpose(
+ np.reshape(
+ kernel_val, # O x (C*H*W)
+ (hiddens, input_shape[3], input_shape[1], input_shape[2])
+ ), # O x C x H x W
+ (2, 3, 1, 0)
+ ), # H x W x C x O
+ (dim, -1)
+ ) # (H*W*C) x O
+ # check the size after layout transform
+ assert kernel_val.shape == (dim, hiddens), \
+ 'kernel shape error at {}'.format(layer_name)
+ except:
+ # Do not use pretrained parameter if shape doesn't match
+ use_pretrained_param = False
+ print ('Shape of the pretrained parameter of {} does not match, '
+ 'use randomly initialized parameter'.format(layer_name))
+ else:
+ dim = input_shape[1]
+ if use_pretrained_param:
+ try:
+ kernel_val = np.transpose(kernel_val, (1,0))
+ assert kernel_val.shape == (dim, hiddens), \
+ 'kernel shape error at {}'.format(layer_name)
+ except:
+ use_pretrained_param = False
+ print ('Shape of the pretrained parameter of {} does not match, '
+ 'use randomly initialized parameter'.format(layer_name))
+ if use_pretrained_param:
+ if mc.DEBUG_MODE:
+ print ('Using pretrained model for {}'.format(layer_name))
+ kernel_init = tf.constant(kernel_val, dtype=tf.float32)
+ bias_init = tf.constant(bias_val, dtype=tf.float32)
+ elif xavier:
+ kernel_init = tf.contrib.layers.xavier_initializer()
+ bias_init = tf.constant_initializer(bias_init_val)
+ else:
+ kernel_init = tf.truncated_normal_initializer(
+ stddev=stddev, dtype=tf.float32)
+ bias_init = tf.constant_initializer(bias_init_val)
+ weights = _variable_with_weight_decay(
+ 'weights', shape=[dim, hiddens], wd=mc.WEIGHT_DECAY,
+ initializer=kernel_init)
+ biases = _variable_on_device('biases', [hiddens], bias_init)
+ self.model_params += [weights, biases]
+ outputs = tf.nn.bias_add(tf.matmul(inputs, weights), biases)
+ if relu:
+ outputs = tf.nn.relu(outputs, 'relu')
+ # count layer stats
+ self.model_size_counter.append((layer_name, (dim+1)*hiddens))
+ num_flops = 2 * dim * hiddens + hiddens
+ if relu:
+ num_flops += 2*hiddens
+ self.flop_counter.append((layer_name, num_flops))
+ self.activation_counter.append((layer_name, hiddens))
+ return outputs
+ def _recurrent_crf_layer(
+ self, layer_name, inputs, bilateral_filters, sizes=[3, 5],
+ num_iterations=1, padding='SAME'):
+ """Recurrent conditional random field layer. Iterative meanfield inference is
+ implemented as a reccurent neural network.
+ Args:
+ layer_name: layer name
+ inputs: input tensor with shape [batch_size, zenith, azimuth, num_class].
+ bilateral_filters: filter weight with shape
+ [batch_size, zenith, azimuth, sizes[0]*size[1]-1].
+ sizes: size of the local region to be filtered.
+ num_iterations: number of meanfield inferences.
+ padding: padding strategy
+ Returns:
+ outputs: tensor with shape [batch_size, zenith, azimuth, num_class].
+ """
+ assert num_iterations >= 1, 'number of iterations should >= 1'
+ mc = self.mc
+ with tf.variable_scope(layer_name) as scope:
+ # initialize compatibilty matrices
+ compat_kernel_init = tf.constant(
+ np.reshape(
+ np.ones((mc.NUM_CLASS, mc.NUM_CLASS)) - np.identity(mc.NUM_CLASS),
+ [1, 1, mc.NUM_CLASS, mc.NUM_CLASS]
+ ),
+ dtype=tf.float32
+ )
+ bi_compat_kernel = _variable_on_device(
+ name='bilateral_compatibility_matrix',
+ shape=[1, 1, mc.NUM_CLASS, mc.NUM_CLASS],
+ initializer=compat_kernel_init*mc.BI_FILTER_COEF,
+ trainable=True
+ )
+ self._activation_summary(bi_compat_kernel, 'bilateral_compat_mat')
+ angular_compat_kernel = _variable_on_device(
+ name='angular_compatibility_matrix',
+ shape=[1, 1, mc.NUM_CLASS, mc.NUM_CLASS],
+ initializer=compat_kernel_init*mc.ANG_FILTER_COEF,
+ trainable=True
+ )
+ self._activation_summary(angular_compat_kernel, 'angular_compat_mat')
+ self.model_params += [bi_compat_kernel, angular_compat_kernel]
+ condensing_kernel = tf.constant(
+ util.condensing_matrix(sizes[0], sizes[1], mc.NUM_CLASS),
+ dtype=tf.float32,
+ name='condensing_kernel'
+ )
+ angular_filters = tf.constant(
+ util.angular_filter_kernel(
+ sizes[0], sizes[1], mc.NUM_CLASS, mc.ANG_THETA_A**2),
+ dtype=tf.float32,
+ name='angular_kernel'
+ )
+ bi_angular_filters = tf.constant(
+ util.angular_filter_kernel(
+ sizes[0], sizes[1], mc.NUM_CLASS, mc.BILATERAL_THETA_A**2),
+ dtype=tf.float32,
+ name='bi_angular_kernel'
+ )
+ for it in range(num_iterations):
+ unary = tf.nn.softmax(
+ inputs, dim=-1, name='unary_term_at_iter_{}'.format(it))
+ ang_output, bi_output = self._locally_connected_layer(
+ 'message_passing_iter_{}'.format(it), unary,
+ bilateral_filters, angular_filters, bi_angular_filters,
+ condensing_kernel, sizes=sizes,
+ padding=padding
+ )
+ # 1x1 convolution as compatibility transform
+ ang_output = tf.nn.conv2d(
+ ang_output, angular_compat_kernel, strides=[1, 1, 1, 1],
+ padding='SAME', name='angular_compatibility_transformation')
+ self._activation_summary(
+ ang_output, 'ang_transfer_iter_{}'.format(it))
+ bi_output = tf.nn.conv2d(
+ bi_output, bi_compat_kernel, strides=[1, 1, 1, 1], padding='SAME',
+ name='bilateral_compatibility_transformation')
+ self._activation_summary(
+ bi_output, 'bi_transfer_iter_{}'.format(it))
+ pairwise = tf.add(ang_output, bi_output,
+ name='pairwise_term_at_iter_{}'.format(it))
+ outputs = tf.add(unary, pairwise,
+ name='energy_at_iter_{}'.format(it))
+ inputs = outputs
+ return outputs
+ def _locally_connected_layer(
+ self, layer_name, inputs, bilateral_filters,
+ angular_filters, bi_angular_filters, condensing_kernel, sizes=[3, 5],
+ padding='SAME'):
+ """Locally connected layer with non-trainable filter parameters)
+ Args:
+ layer_name: layer name
+ inputs: input tensor with shape
+ [batch_size, zenith, azimuth, num_class].
+ bilateral_filters: bilateral filter weight with shape
+ [batch_size, zenith, azimuth, sizes[0]*size[1]-1].
+ angular_filters: angular filter weight with shape
+ [sizes[0], sizes[1], in_channel, in_channel].
+ condensing_kernel: tensor with shape
+ [size[0], size[1], num_class, (sizes[0]*size[1]-1)*num_class]
+ sizes: size of the local region to be filtered.
+ padding: padding strategy
+ Returns:
+ ang_output: output tensor filtered by anguler filter with shape
+ [batch_size, zenith, azimuth, num_class].
+ bi_output: output tensor filtered by bilateral filter with shape
+ [batch_size, zenith, azimuth, num_class].
+ """
+ assert padding=='SAME', 'only support SAME padding strategy'
+ assert sizes[0] % 2 == 1 and sizes[1] % 2 == 1, \
+ 'Currently only support odd filter size.'
+ mc = self.mc
+ size_z, size_a = sizes
+ pad_z, pad_a = size_z//2, size_a//2
+ half_filter_dim = (size_z*size_a)//2
+ batch, zenith, azimuth, in_channel = inputs.shape.as_list()
+ with tf.variable_scope(layer_name) as scope:
+ # message passing
+ ang_output = tf.nn.conv2d(
+ inputs, angular_filters, [1, 1, 1, 1], padding=padding,
+ name='angular_filtered_term'
+ )
+ bi_ang_output = tf.nn.conv2d(
+ inputs, bi_angular_filters, [1, 1, 1, 1], padding=padding,
+ name='bi_angular_filtered_term'
+ )
+ condensed_input = tf.reshape(
+ tf.nn.conv2d(
+ inputs*self.lidar_mask, condensing_kernel, [1, 1, 1, 1], padding=padding,
+ name='condensed_prob_map'
+ ),
+ [batch, zenith, azimuth, size_z*size_a-1, in_channel]
+ )
+ bi_output = tf.multiply(
+ tf.reduce_sum(condensed_input*bilateral_filters, axis=3),
+ self.lidar_mask,
+ name='bilateral_filtered_term'
+ )
+ bi_output *= bi_ang_output
+ return ang_output, bi_output
+ def _bilateral_filter_layer(
+ self, layer_name, inputs, thetas=[0.9, 0.01], sizes=[3, 5], stride=1,
+ padding='SAME'):
+ """Computing pairwise energy with a bilateral filter for CRF.
+ Args:
+ layer_name: layer name
+ inputs: input tensor with shape [batch_size, zenith, azimuth, 2] where the
+ last 2 elements are intensity and range of a lidar point.
+ thetas: theta parameter for bilateral filter.
+ sizes: filter size for zenith and azimuth dimension.
+ strides: kernel strides.
+ padding: padding.
+ Returns:
+ out: bilateral filter weight output with size
+ [batch_size, zenith, azimuth, sizes[0]*sizes[1]-1, num_class]. Each
+ [b, z, a, :, cls] represents filter weights around the center position
+ for each class.
+ """
+ assert padding == 'SAME', 'currently only supports "SAME" padding stategy'
+ assert stride == 1, 'currently only supports striding of 1'
+ assert sizes[0] % 2 == 1 and sizes[1] % 2 == 1, \
+ 'Currently only support odd filter size.'
+ mc = self.mc
+ theta_a, theta_r = thetas
+ size_z, size_a = sizes
+ pad_z, pad_a = size_z//2, size_a//2
+ half_filter_dim = (size_z*size_a)//2
+ batch, zenith, azimuth, in_channel = inputs.shape.as_list()
+ # assert in_channel == 1, 'Only support input channel == 1'
+ with tf.variable_scope(layer_name) as scope:
+ condensing_kernel = tf.constant(
+ util.condensing_matrix(size_z, size_a, in_channel),
+ dtype=tf.float32,
+ name='condensing_kernel'
+ )
+ condensed_input = tf.nn.conv2d(
+ inputs, condensing_kernel, [1, 1, stride, 1], padding=padding,
+ name='condensed_input'
+ )
+ # diff_intensity = tf.reshape(
+ # inputs[:, :, :], [batch, zenith, azimuth, 1]) \
+ # - condensed_input[:, :, :, ::in_channel]
+ diff_x = tf.reshape(
+ inputs[:, :, :, 0], [batch, zenith, azimuth, 1]) \
+ - condensed_input[:, :, :, 0::in_channel]
+ diff_y = tf.reshape(
+ inputs[:, :, :, 1], [batch, zenith, azimuth, 1]) \
+ - condensed_input[:, :, :, 1::in_channel]
+ diff_z = tf.reshape(
+ inputs[:, :, :, 2], [batch, zenith, azimuth, 1]) \
+ - condensed_input[:, :, :, 2::in_channel]
+ bi_filters = []
+ for cls in range(mc.NUM_CLASS):
+ theta_a = mc.BILATERAL_THETA_A[cls]
+ theta_r = mc.BILATERAL_THETA_R[cls]
+ bi_filter = tf.exp(-(diff_x**2+diff_y**2+diff_z**2)/2/theta_r**2)
+ bi_filters.append(bi_filter)
+ out = tf.transpose(
+ tf.stack(bi_filters),
+ [1, 2, 3, 4, 0],
+ name='bilateral_filter_weights'
+ )
+ return out
+ def _activation_summary(self, x, layer_name):
+ """Helper to create summaries for activations.
+ Args:
+ x: layer output tensor
+ layer_name: name of the layer
+ Returns:
+ nothing
+ """
+ with tf.variable_scope('activation_summary') as scope:
+ tf.summary.histogram(layer_name, x)
+ tf.summary.scalar(layer_name+'/sparsity', tf.nn.zero_fraction(x))
+ tf.summary.scalar(layer_name+'/average', tf.reduce_mean(x))
+ tf.summary.scalar(layer_name+'/max', tf.reduce_max(x))
+ tf.summary.scalar(layer_name+'/min', tf.reduce_min(x))
+# Author: Bichen Wu (bichen@berkeley.edu) 02/20/2017
+#! /usr/bin/python2
+# -*- coding: utf-8 -*-
+Clock function to take running time following Segmatch.
+import datetime
+class Clock(object):
+ def __init__(self):
+ self.kSecondsToMiliseconds = 1000.0
+ self.kMicrosecondsToMiliseconds = 0.001
+ self.start()
+ def start(self):
+ self.real_time_start_ = datetime.datetime.now()
+ def takeTime(self):
+ seconds = (datetime.datetime.now() - self.real_time_start_).seconds
+ useconds = (datetime.datetime.now() - self.real_time_start_).microseconds
+ self.real_time_ms_ = (seconds*self.kSecondsToMiliseconds + useconds*self.kMicrosecondsToMiliseconds) + 0.5
+ def getRealTime(self):
+ return self.real_time_ms_
+ def takeRealTime(self):
+ self.takeTime()
+ return self.getRealTime()
+# Author: Bichen Wu (bichen@berkeley.edu) 02/20/2017
+# -*- coding: utf-8 -*-
+"""Utility functions."""
+import numpy as np
+import time
+def visualize_seg(label_map, mc, one_hot=False):
+ if one_hot:
+ label_map = np.argmax(label_map, axis=-1)
+ out = np.zeros(
+ (label_map.shape[0], label_map.shape[1], label_map.shape[2], 3))
+ for l in range(1, mc.NUM_CLASS):
+ out[label_map==l, :] = mc.CLS_COLOR_MAP[l]
+ return out
+def bgr_to_rgb(ims):
+ """Convert a list of images from BGR format to RGB format."""
+ out = []
+ for im in ims:
+ out.append(im[:,:,::-1])
+ return out
+class Timer(object):
+ def __init__(self):
+ self.total_time = 0.0
+ self.calls = 0
+ self.start_time = 0.0
+ self.duration = 0.0
+ self.average_time = 0.0
+ def tic(self):
+ self.start_time = time.time()
+ def toc(self, average=True):
+ self.duration = time.time() - self.start_time
+ self.total_time += self.duration
+ self.calls += 1
+ self.average_time = self.total_time/self.calls
+ if average:
+ return self.average_time
+ else:
+ return self.duration
+def conf_error_rate_at_thresh_fn(mask, conf, thresh):
+ return np.mean((conf>thresh) != mask)
+def rmse_fn(diff, nnz):
+ return np.sqrt(np.sum(diff**2)/nnz)
+def abs_accuracy_at_thresh_fn(diff, thresh, mask):
+ return np.sum((np.abs(diff) < thresh)*mask)/float(np.sum(mask))
+def rel_accuracy_at_thresh_fn(pred_ogm, gt_ogm, mask, thresh):
+ return np.sum(
+ mask * (np.maximum(pred_ogm, gt_ogm) /
+ np.minimum(gt_ogm, pred_ogm) < thresh)
+ )/float(np.sum(mask))
+def evaluate_iou(label, pred, n_class, epsilon=1e-12):
+ """Evaluation script to compute pixel level IoU.
+ Args:
+ label: N-d array of shape [batch, W, H], where each element is a class
+ index.
+ pred: N-d array of shape [batch, W, H], the each element is the predicted
+ class index.
+ n_class: number of classes
+ epsilon: a small value to prevent division by 0
+ Returns:
+ IoU: array of lengh n_class, where each element is the average IoU for this
+ class.
+ tps: same shape as IoU, where each element is the number of TP for each
+ class.
+ fps: same shape as IoU, where each element is the number of FP for each
+ class.
+ fns: same shape as IoU, where each element is the number of FN for each
+ class.
+ """
+ assert label.shape == pred.shape, \
+ 'label and pred shape mismatch: {} vs {}'.format(
+ label.shape, pred.shape)
+ ious = np.zeros(n_class)
+ tps = np.zeros(n_class)
+ fns = np.zeros(n_class)
+ fps = np.zeros(n_class)
+ for cls_id in range(n_class):
+ tp = np.sum(pred[label == cls_id] == cls_id)
+ fp = np.sum(label[pred == cls_id] != cls_id)
+ fn = np.sum(pred[label == cls_id] != cls_id)
+ ious[cls_id] = tp/(tp+fn+fp+epsilon)
+ tps[cls_id] = tp
+ fps[cls_id] = fp
+ fns[cls_id] = fn
+ return ious, tps, fps, fns
+def condensing_matrix(size_z, size_a, in_channel):
+ assert size_z % 2 == 1 and size_a % 2==1, \
+ 'size_z and size_a should be odd number'
+ half_filter_dim = (size_z*size_a)//2
+ # moving neigboring pixels to channel dimension
+ nbr2ch_mat = np.zeros(
+ (size_z, size_a, in_channel, size_z*size_a*in_channel),
+ dtype=np.float32
+ )
+ for z in range(size_z):
+ for a in range(size_a):
+ for ch in range(in_channel):
+ nbr2ch_mat[z, a, ch, z*(size_a*in_channel) + a*in_channel + ch] = 1
+ # exclude the channel index corresponding to the center position
+ nbr2ch_mat = np.concatenate(
+ [nbr2ch_mat[:, :, :, :in_channel*half_filter_dim],
+ nbr2ch_mat[:, :, :, in_channel*(half_filter_dim+1):]],
+ axis=3
+ )
+ assert nbr2ch_mat.shape == \
+ (size_z, size_a, in_channel, (size_a*size_z-1)*in_channel), \
+ 'error with the shape of nbr2ch_mat after removing center position'
+ return nbr2ch_mat
+def angular_filter_kernel(size_z, size_a, in_channel, theta_sqs):
+ """Compute a gaussian kernel.
+ Args:
+ size_z: size on the z dimension.
+ size_a: size on the a dimension.
+ in_channel: input (and output) channel size
+ theta_sqs: an array with length == in_channel. Contains variance for
+ gaussian kernel for each channel.
+ Returns:
+ kernel: ND array of size [size_z, size_a, in_channel, in_channel], which is
+ just guassian kernel parameters for each channel.
+ """
+ assert size_z % 2 == 1 and size_a % 2==1, \
+ 'size_z and size_a should be odd number'
+ assert len(theta_sqs) == in_channel, \
+ 'length of theta_sqs and in_channel does no match'
+ # gaussian kernel
+ kernel = np.zeros((size_z, size_a, in_channel, in_channel), dtype=np.float32)
+ for k in range(in_channel):
+ kernel_2d = np.zeros((size_z, size_a), dtype=np.float32)
+ for i in range(size_z):
+ for j in range(size_a):
+ diff = np.sum(
+ (np.array([i-size_z//2, j-size_a//2]))**2)
+ kernel_2d[i, j] = np.exp(-diff/2/theta_sqs[k])
+ # exclude the center position
+ kernel_2d[size_z//2, size_a//2] = 0
+ kernel[:, :, k, k] = kernel_2d
+ return kernel
+ [+] reference : https://github.com/iralabdisco/kitti_player/blob/public/src/kitti_player.cpp
+int main(int argc, char **argv) {
+ ros::init(argc, argv, "lidar_annotator_node");
+ ros::NodeHandle nh;
+ std::string path;
+ std::string full_file_name;
+ nh.param("kitti_bin_path", path,
+ std::string("/media/dyros-vehicle/edward_6/datasets/KITTI_datasets/2011_09_26_drive_0005_sync/velodyne_points/data/"));
+ ros::Publisher kitti_pub = nh.advertise("/velodyne_points_kitti",1);
+ // ed: 5 hz
+ ros::Rate loop_rate(5);
+ int entries_played = 0;
+ int total_entries = 0;
+ DIR* dir = opendir(path.c_str());
+ struct dirent *ent;
+ unsigned int len = 0;
+ // ed: get the number of files in directory
+ while((ent = readdir(dir))){
+ len = strlen(ent->d_name);
+ if(len > 2)
+ total_entries++;
+ }
+ closedir(dir);
+ /* std::cout << "total entries : " << total_entries << std::endl; // : 154 */
+ while(ros::ok() && entries_played <= total_entries-1) {
+ full_file_name = path + boost::str(boost::format("%010d") % entries_played) + ".bin";
+ // ed: for debugging
+ std::cout << "full file name : " << full_file_name << std::endl;
+ // ed: open file
+ std::fstream input(full_file_name.c_str(), std::ios::in | std::ios::binary);
+ if(!input.good()) {
+ ROS_ERROR_STREAM("could not read file: " << full_file_name);
+ return 0;
+ }
+ else {
+ ROS_DEBUG_STREAM("reading " << full_file_name);
+ // ed: go to beggining of the file
+ input.seekg(0, std::ios::beg);
+ pcl::PointCloud::Ptr points(new pcl::PointCloud);
+ // ed: read data
+ for(int i=0; input.good() && !input.eof(); i++) {
+ pcl::PointXYZI point;
+ input.read((char*)&point.x, 3*sizeof(float));
+ input.read((char*)&point.intensity, sizeof(float));
+ points->push_back(point);
+ }
+ input.close();
+ sensor_msgs::PointCloud2 pc2;
+ pc2.header.frame_id = "velodyne_link";
+ pc2.header.stamp = ros::Time::now();
+ points->header = pcl_conversions::toPCL(pc2.header);
+ // ed: /velodyne_points_kitti publish
+ kitti_pub.publish(points);
+ }
+ loop_rate.sleep();
+ entries_played++;
+ }
+ return 0;
+cmake_minimum_required(VERSION 2.8.3)
+find_package(catkin REQUIRED COMPONENTS
+ pcl_ros
+ roscpp
+ sensor_msgs
+# INCLUDE_DIRS include
+# LIBRARIES squeezeseg_cpp_preprocessing
+# CATKIN_DEPENDS pcl_ros roscpp sensor_msgs
+# DEPENDS system_lib
+ ${catkin_INCLUDE_DIRS}
+add_executable(cpp_preprocessing src/cpp_preprocessing.cpp)
+add_dependencies(cpp_preprocessing ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
+ ${catkin_LIBRARIES}
+# -*- coding: utf-8 -*-
+ #+DESCRITION: online segmentation
+ #+FROM: github.com/durant35/SqueezeSeg
+ #+DATE: 2018-07-23-Mon
+ #+AUTHOR: Edward Im (edwardim@snu.ac.kr)
+import argparse
+import tensorflow as tf
+import rospy
+from segment_node import SegmentNode
+FLAGS = tf.app.flags.FLAGS
+ 'checkpoint', '/home/dyros-vehicle/gitrepo/ims_ros/catkin_ws_kinetic/src/squeezeseg_cpp_preprocessing/checkpoint/SqueezeSeg/model.ckpt-23000',
+ """Path to the model parameter file.""")
+if __name__ == '__main__':
+ # parse arguments from command line
+ parser = argparse.ArgumentParser(description='LiDAR point cloud semantic segmentation')
+ parser.add_argument('--sub_topic', type=str,
+ help='the pointcloud message topic to be subscribed, default `/ss_filtered`',
+ default='/ss_filtered')
+ parser.add_argument('--pub_topic', type=str,
+ help='the pointcloud message topic to be published, default `/squeeze_seg/points`',
+ default='/squeeze_seg/points')
+ args = parser.parse_args(rospy.myargv()[1:])
+ rospy.init_node('segment_node')
+ node = SegmentNode(sub_topic=args.sub_topic,
+ pub_topic=args.pub_topic,
+# -*- coding: utf-8 -*-
+ #+DESCRITION: online segmentation
+ #+FROM: github.com/durant35/SqueezeSeg
+ #+DATE: 2018-08-08-Wed
+ #+AUTHOR: Edward Im (edwardim@snu.ac.kr)
+import sys
+import os.path
+import numpy as np
+from PIL import Image
+import tensorflow as tf
+import rospy
+from sensor_msgs.msg import PointCloud2
+import sensor_msgs.point_cloud2 as pc2
+from sensor_msgs.msg import Image as ImageMsg
+from std_msgs.msg import Header
+from std_msgs.msg import Int8
+from config import *
+from nets import SqueezeSeg
+from utils.util import *
+from utils.clock import Clock
+from imdb import kitti # ed: header added
+def _make_point_field(num_field):
+ msg_pf1 = pc2.PointField()
+ msg_pf1.name = np.str('x')
+ msg_pf1.offset = np.uint32(0)
+ msg_pf1.datatype = np.uint8(7)
+ msg_pf1.count = np.uint32(1)
+ msg_pf2 = pc2.PointField()
+ msg_pf2.name = np.str('y')
+ msg_pf2.offset = np.uint32(4)
+ msg_pf2.datatype = np.uint8(7)
+ msg_pf2.count = np.uint32(1)
+ msg_pf3 = pc2.PointField()
+ msg_pf3.name = np.str('z')
+ msg_pf3.offset = np.uint32(8)
+ msg_pf3.datatype = np.uint8(7)
+ msg_pf3.count = np.uint32(1)
+ msg_pf4 = pc2.PointField()
+ msg_pf4.name = np.str('intensity')
+ msg_pf4.offset = np.uint32(16)
+ msg_pf4.datatype = np.uint8(7)
+ msg_pf4.count = np.uint32(1)
+ if num_field == 4:
+ return [msg_pf1, msg_pf2, msg_pf3, msg_pf4]
+ msg_pf5 = pc2.PointField()
+ msg_pf5.name = np.str('label')
+ msg_pf5.offset = np.uint32(20)
+ msg_pf5.datatype = np.uint8(4)
+ msg_pf5.count = np.uint32(1)
+ return [msg_pf1, msg_pf2, msg_pf3, msg_pf4, msg_pf5]
+class SegmentNode():
+ """LiDAR point cloud segment ros node"""
+ def __init__(self,
+ sub_topic, pub_topic, FLAGS):
+ # os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
+ self._mc = kitti_squeezeSeg_config()
+ self._mc.LOAD_PRETRAINED_MODEL = False
+ self._mc.BATCH_SIZE = 1 # TODO(bichen): fix this hard-coded batch size.
+ self._model = SqueezeSeg(self._mc)
+ self._saver = tf.train.Saver(self._model.model_params)
+ self._session = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
+ self._saver.restore(self._session, FLAGS.checkpoint)
+ self._sub = rospy.Subscriber("/ss_filtered", PointCloud2, self.point_cloud_callback, queue_size=1)
+ self._pub = rospy.Publisher(pub_topic, PointCloud2, queue_size=1)
+ rospy.spin()
+ def point_cloud_callback(self, cloud_msg):
+ """
+ :param cloud_msg:
+ :return:
+ """
+ clock = Clock()
+ # rospy.logwarn("subscribed. width: %d, height: %u, point_step: %d, row_step: %d",
+ # cloud_msg.width, cloud_msg.height, cloud_msg.point_step, cloud_msg.row_step)
+ pc = pc2.read_points(cloud_msg, skip_nans=False, field_names=("x", "y", "z","intensity","d"))
+ # to conver pc into numpy.ndarray format
+ np_p = np.array(list(pc))
+ # print("shape : {}".format(np_p.shape))
+ # get depth map
+ lidar = np_p.reshape(64,512,5)
+ # print("{}".format(lidar.shape))
+ lidar_f = lidar.astype(np.float32)
+ # to perform prediction
+ lidar_mask = np.reshape(
+ (lidar[:, :, 4] > 0),
+ [self._mc.ZENITH_LEVEL, self._mc.AZIMUTH_LEVEL, 1]
+ )
+ lidar_f = (lidar_f - self._mc.INPUT_MEAN) / self._mc.INPUT_STD
+ pred_cls = self._session.run(
+ self._model.pred_cls,
+ feed_dict={
+ self._model.lidar_input: [lidar_f],
+ self._model.keep_prob: 1.0,
+ self._model.lidar_mask: [lidar_mask]
+ }
+ )
+ label = pred_cls[0]
+ ## point cloud for SqueezeSeg segments
+ x = lidar[:, :, 0].reshape(-1)
+ y = lidar[:, :, 1].reshape(-1)
+ z = lidar[:, :, 2].reshape(-1)
+ i = lidar[:, :, 3].reshape(-1)
+ label = label.reshape(-1)
+ cloud = np.stack((x, y, z, i, label))
+ header = Header()
+ header.stamp = rospy.Time()
+ header.frame_id = "velodyne_link"
+ # point cloud segments
+ # 4 PointFields as channel description
+ msg_segment = pc2.create_cloud(header=header,
+ fields=_make_point_field(cloud.shape[0]),
+ points=cloud.T)
+ # ed: /squeeze_seg/points publish
+ self._pub.publish(msg_segment)
+ rospy.loginfo("Point cloud processed. Took %.6f ms.", clock.takeRealTime())
diff --git a/ROS_packages/squeezeseg_cpp_preprocessing/script/squeezeseg/config/__init__.py b/ROS_packages/squeezeseg_cpp_preprocessing/script/squeezeseg/config/__init__.py
+from .kitti_squeezeSeg_config import kitti_squeezeSeg_config
+#G Author: Bichen Wu (bichen@berkeley.edu) 02/20/2017
+#!/usr/bin/env python
+#-*- coding: utf-8 -*-
+"""Base Model configurations"""
+import os
+import os.path as osp
+import numpy as np
+from easydict import EasyDict as edict
+def base_model_config(dataset='KITTI'):
+ assert dataset.upper()=='KITTI', \
+ 'Currently only support KITTI dataset'
+ cfg = edict()
+ # Dataset used to train/val/test model. Now support KITTI
+ cfg.DATASET = dataset.upper()
+ # classes
+ cfg.CLASSES = [
+ 'unknown',
+ 'car',
+ 'van',
+ 'truck',
+ 'pedestrian',
+ 'person_sitting',
+ 'cyclist',
+ 'tram',
+ 'misc',
+ ]
+ # number of classes
+ cfg.NUM_CLASS = len(cfg.CLASSES)
+ # dict from class name to id
+ cfg.CLS_2_ID = dict(zip(cfg.CLASSES, range(len(cfg.CLASSES))))
+ # loss weight for each class
+ cfg.CLS_LOSS_WEIGHT = np.array(
+ [1/20.0, 1.0, 2.0, 3.0,
+ 8.0, 10.0, 8.0, 2.0, 1.0]
+ )
+ # rgb color for each class
+ cfg.CLS_COLOR_MAP = np.array(
+ [[ 0.00, 0.00, 0.00],
+ [ 0.12, 0.56, 0.37],
+ [ 0.66, 0.55, 0.71],
+ [ 0.58, 0.72, 0.88],
+ [ 0.25, 0.51, 0.76],
+ [ 0.98, 0.47, 0.73],
+ [ 0.40, 0.19, 0.10],
+ [ 0.87, 0.19, 0.17],
+ [ 0.13, 0.55, 0.63]]
+ )
+ # Probability to keep a node in dropout
+ cfg.KEEP_PROB = 0.5
+ # image width
+ cfg.IMAGE_WIDTH = 224
+ # image height
+ cfg.IMAGE_HEIGHT = 224
+ # number of vertical levels
+ cfg.NUM_LEVEL = 10
+ # number of pie sectors of the field of view
+ cfg.NUM_SECTOR = 90
+ # maximum distance of a measurement
+ cfg.MAX_DIST = 100.0
+ # batch size
+ cfg.BATCH_SIZE = 20
+ # Pixel mean values (BGR order) as a (1, 1, 3) array. Below is the BGR mean
+ # of VGG16
+ cfg.BGR_MEANS = np.array([[[103.939, 116.779, 123.68]]])
+ # Pixel mean values (BGR order) as a (1, 1, 3) array. Below is the BGR mean
+ # of VGG16
+ cfg.RGB_MEANS = np.array([[[123.68, 116.779, 103.939]]])
+ # reduce step size after this many steps
+ cfg.DECAY_STEPS = 10000
+ # multiply the learning rate by this factor
+ cfg.LR_DECAY_FACTOR = 0.1
+ # learning rate
+ cfg.LEARNING_RATE = 0.005
+ # momentum
+ cfg.MOMENTUM = 0.9
+ # weight decay
+ cfg.WEIGHT_DECAY = 0.0005
+ # wether to load pre-trained model
+ # path to load the pre-trained model
+ # print log to console in debug mode
+ cfg.DEBUG_MODE = False
+ # gradients with norm larger than this is going to be clipped.
+ cfg.MAX_GRAD_NORM = 10.0
+ # Whether to do data augmentation
+ # The range to randomly shift the image widht
+ cfg.DRIFT_X = 0
+ # The range to randomly shift the image height
+ cfg.DRIFT_Y = 0
+ # small value used in batch normalization to prevent dividing by 0. The
+ # default value here is the same with caffe's default value.
+ # small value used in denominator to prevent division by 0
+ cfg.DENOM_EPSILON = 1e-12
+ # capacity for tf.FIFOQueue
+ return cfg
+# Author: Bichen Wu (bichen@berkeley.edu) 08/25/2016
+#-*- coding: utf-8 -*-
+"""Model configuration for pascal dataset"""
+import numpy as np
+from .config import base_model_config
+def kitti_squeezeSeg_config():
+ """Specify the parameters to tune below."""
+ mc = base_model_config('KITTI')
+ mc.CLASSES = ['unknown', 'car', 'pedestrian', 'cyclist']
+ mc.NUM_CLASS = len(mc.CLASSES)
+ mc.CLS_2_ID = dict(zip(mc.CLASSES, range(len(mc.CLASSES))))
+ mc.CLS_LOSS_WEIGHT = np.array([1/15.0, 1.0, 10.0, 10.0])
+ mc.CLS_COLOR_MAP = np.array([[ 0.00, 0.00, 0.00],
+ [ 0.12, 0.56, 0.37],
+ [ 0.66, 0.55, 0.71],
+ [ 0.58, 0.72, 0.88]])
+ mc.BATCH_SIZE = 32
+ mc.AZIMUTH_LEVEL = 512 # for Sphrerical Projection
+ mc.ZENITH_LEVEL = 64 # for Sphrerical Projection
+ mc.LCN_HEIGHT = 3 # for Bilateral filter + R-CRF (필터 사이즈)
+ mc.LCN_WIDTH = 5 # for Bilateral filter + R-CRF (필터 사이즈)
+ mc.BILATERAL_THETA_A = np.array([.9, .9, .6, .6]) # for Bilateral filter (클래스의 갯수만큼 array)
+ mc.BILATERAL_THETA_R = np.array([.015, .015, .01, .01]) # for Bilateral filter (클래스의 갯수만큼 array)
+ mc.BI_FILTER_COEF = 0.1 # for Bilatreal filter
+ mc.RCRF_ITER = 3 # for R-CRF (meanfield inference 반복 횟수)
+ mc.ANG_THETA_A = np.array([.9, .9, .6, .6]) # for R-CRF
+ mc.ANG_FILTER_COEF = 0.02 # for R-CRF
+ mc.LEARNING_RATE = 0.01
+ mc.CLS_LOSS_COEF = 15.0 # for Loss funtion
+ mc.WEIGHT_DECAY = 0.0001 # for lr을 주기적으로 변경하는 파라미터
+ mc.MAX_GRAD_NORM = 1.0 # for gradient 수동 업데이트를 위한 파라미터
+ mc.MOMENTUM = 0.9 # for momentum Optimizer에 사용하는 파라미터
+ mc.DECAY_STEPS = 10000 # for learning_rate.exponential_decay() 함수에 사용하는 파라미터 (10000번 학습마다 lr을 낮춘다)
+ mc.LR_DECAY_FACTOR = 0.5 # for learning_rate.exponential_decay() 함수에 사용하는 파라미터
+ mc.DATA_AUGMENTATION = True # for imdb.py data y flipping (학습데이터 양을 늘리기 위한 기법 중 하나)
+ mc.RANDOM_FLIPPING = True # for imdb.py data y flipping
+ # x, y, z, intensity, distance
+ mc.INPUT_MEAN = np.array([[[10.88, 0.23, -1.04, 0.21, 12.12]]])
+ mc.INPUT_STD = np.array([[[11.47, 6.91, 0.86, 0.16, 12.32]]])
+ return mc
+#-*- coding: utf-8 -*-
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from datetime import datetime
+import os.path
+import sys
+import time
+import glob
+import numpy as np
+from six.moves import xrange
+import tensorflow as tf
+from PIL import Image
+from config import *
+from imdb import kitti
+from utils.util import *
+from nets import *
+# ed: Windows version
+FLAGS = tf.app.flags.FLAGS
+tf.app.flags.DEFINE_string('gpu', '0', """gpu id.""")
+ # ed: for Windows
+ tf.app.flags.DEFINE_string(
+ 'checkpoint', '.\\data\\SqueezeSeg\\model.ckpt-23000',
+ """Path to the model parameter file.""")
+ tf.app.flags.DEFINE_string(
+ 'input_path', '.\\data\\samples\\*',
+ """Input lidar scan to be detected. Can process glob input such as """
+ """./data/samples/*.npy or single input.""")
+ tf.app.flags.DEFINE_string(
+ 'out_dir', '.\\data\\samples_out\\', """Directory to dump output.""")
+ # ed: for Linux
+ tf.app.flags.DEFINE_string(
+ 'checkpoint', './data/SqueezeSeg/model.ckpt-23000',
+ """Path to the model parameter file.""")
+ tf.app.flags.DEFINE_string(
+ 'input_path', './data/samples/*',
+ """Input lidar scan to be detected. Can process glob input such as """
+ """./data/samples/*.npy or single input.""")
+ tf.app.flags.DEFINE_string(
+ 'out_dir', './data/samples_out/', """Directory to dump output.""")
+def _normalize(x):
+ return (x - x.min())/(x.max() - x.min())
+def detect():
+ """Detect LiDAR data."""
+ os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
+ with tf.Graph().as_default():
+ mc = kitti_squeezeSeg_config()
+ # ed: SqueezeNet의 pretrained model weight를 불러올 필요가 없고
+ # 바로 SqueezeNet Model Checkpoint를 불러오면 되므로 False로 선언
+ mc.BATCH_SIZE = 1 # TODO(bichen): fix this hard-coded batch size.
+ model = SqueezeSeg(mc)
+ # ed: 모델 가중치를 불러오는 saver 변수 선언
+ saver = tf.train.Saver(model.model_params)
+ with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
+ # ed: 저장된 모델 가중치를 불러옵니다
+ saver.restore(sess, FLAGS.checkpoint)
+ for f in glob.iglob(FLAGS.input_path):
+ lidar = np.load(f).astype(np.float32, copy=False)[:, :, :5]
+ lidar_mask = np.reshape(
+ (lidar[:, :, 4] > 0),
+ )
+ lidar = (lidar - mc.INPUT_MEAN)/mc.INPUT_STD
+ pred_cls = sess.run(
+ model.pred_cls,
+ feed_dict={
+ model.lidar_input:[lidar],
+ model.keep_prob: 1.0,
+ model.lidar_mask:[lidar_mask]
+ }
+ )
+ # save the data
+ file_name = f.strip('.npy').split('/')[-1]
+ np.save(
+ os.path.join(FLAGS.out_dir, 'pred_windows.npy'),
+ pred_cls[0]
+ )
+ else:
+ np.save(
+ os.path.join(FLAGS.out_dir, 'pred_'+file_name+'.npy'),
+ pred_cls[0]
+ )
+ # save the plot
+ # ed: intensity 값을 통해 depth_map을 변수에 저장한다
+ depth_map = Image.fromarray(
+ (255 * _normalize(lidar[:, :, 3])).astype(np.uint8))
+ # ed: 예측된 레이블 포인트들의 이미지 데이터를 label_map 데이터에 저장한다
+ label_map = Image.fromarray(
+ (255 * visualize_seg(pred_cls, mc)[0]).astype(np.uint8))
+ # ed: 위 두 이미지를 합성한다
+ blend_map = Image.blend(
+ depth_map.convert('RGBA'),
+ label_map.convert('RGBA'),
+ alpha=0.4
+ )
+ # ed: 최종적으로 합성한 이미지를 png 파일로 저장한다
+ blend_map.save(
+ os.path.join(FLAGS.out_dir, 'plot_windows.png'))
+ else:
+ blend_map.save(
+ os.path.join(FLAGS.out_dir, 'plot_'+file_name+'.png'))
+def main(argv=None):
+ if not tf.gfile.Exists(FLAGS.out_dir):
+ tf.gfile.MakeDirs(FLAGS.out_dir)
+ detect()
+ print('Detection output written to {}'.format(FLAGS.out_dir))
+if __name__ == '__main__':
+ tf.app.run()
+# Author: Bichen Wu (bichen@berkeley.edu) 03/07/2017
+#-*- coding: utf-8 -*-
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from datetime import datetime
+import os.path
+import sys
+import time
+import numpy as np
+from six.moves import xrange
+import tensorflow as tf
+from config import *
+from imdb import kitti
+from utils.util import *
+from nets import *
+FLAGS = tf.app.flags.FLAGS
+tf.app.flags.DEFINE_string('dataset', 'KITTI',
+ """Currently support KITTI dataset.""")
+tf.app.flags.DEFINE_string('data_path', '../data', """Root directory of data""")
+tf.app.flags.DEFINE_string('image_set', 'val',
+ """Can be train, trainval, val, or test""")
+tf.app.flags.DEFINE_string('eval_dir', '../log/eval/',
+ """Directory where to write event logs """)
+tf.app.flags.DEFINE_string('checkpoint_path', '../log/train/',
+ """Path to the training checkpoint.""")
+tf.app.flags.DEFINE_integer('eval_interval_secs', 60 * 1,
+ """How often to check if new cpt is saved.""")
+tf.app.flags.DEFINE_boolean('run_once', False,
+ """Whether to run eval only once.""")
+tf.app.flags.DEFINE_string('net', 'squeezeSeg',
+ """Neural net architecture.""")
+tf.app.flags.DEFINE_string('gpu', '0', """gpu id.""")
+def eval_once(saver, ckpt_path, summary_writer, eval_summary_ops, eval_summary_phs, imdb, model):
+ with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
+ # Restores from checkpoint
+ saver.restore(sess, ckpt_path)
+ # saver.restore(sess, '/media/dyros-data/gitrepo/ims_ml/SqueezeSeg/log/train/model.ckpt-9999')
+ # Assuming model_checkpoint_path looks something like:
+ # /ckpt_dir/model.ckpt-0,
+ # extract global_step from it.
+ global_step = ckpt_path.split('/')[-1].split('-')[-1]
+ mc = model.mc
+ num_images = len(imdb.image_idx)
+ _t = {
+ 'detect': Timer(),
+ 'read': Timer(),
+ 'eval': Timer()
+ }
+ tot_error_rate, tot_rmse, tot_th_correct = 0.0, 0.0, 0.0
+ # class-level metrics
+ tp_sum = np.zeros(mc.NUM_CLASS)
+ fn_sum = np.zeros(mc.NUM_CLASS)
+ fp_sum = np.zeros(mc.NUM_CLASS)
+ # instance-level metrics
+ itp_sum = np.zeros(mc.NUM_CLASS)
+ ifn_sum = np.zeros(mc.NUM_CLASS)
+ ifp_sum = np.zeros(mc.NUM_CLASS)
+ # instance-level object matching metrics
+ otp_sum = np.zeros(mc.NUM_CLASS)
+ ofn_sum = np.zeros(mc.NUM_CLASS)
+ ofp_sum = np.zeros(mc.NUM_CLASS)
+ # ed: 이 부분이 데이터를 읽어서 evaluation을 하는 코드인듯
+ for i in xrange(int(num_images/mc.BATCH_SIZE)):
+ offset = max((i+1)*mc.BATCH_SIZE - num_images, 0)
+ _t['read'].tic()
+ # ed: lidar_2d (.npy) 데이터를 불러온다
+ lidar_per_batch, lidar_mask_per_batch, label_per_batch, _ \
+ = imdb.read_batch(shuffle=False)
+ _t['read'].toc()
+ _t['detect'].tic()
+ # ed: prediction을 하는 코드
+ pred_cls = sess.run(
+ model.pred_cls,
+ feed_dict={
+ model.lidar_input:lidar_per_batch,
+ model.keep_prob: 1.0,
+ model.lidar_mask:lidar_mask_per_batch
+ }
+ )
+ _t['detect'].toc()
+ _t['eval'].tic()
+ # ed: IOU evalution을 하는 코드
+ # Evaluation
+ iou, tps, fps, fns = evaluate_iou(
+ label_per_batch[:mc.BATCH_SIZE-offset], # ed: 정답데이터
+ pred_cls[:mc.BATCH_SIZE-offset]*np.squeeze(lidar_mask_per_batch[:mc.BATCH_SIZE-offset]), # ed: 예측데이터
+ )
+ tp_sum += tps
+ fn_sum += fns
+ fp_sum += fps
+ _t['eval'].toc()
+ print ('detect: {:d}/{:d} im_read: {:.3f}s '
+ 'detect: {:.3f}s evaluation: {:.3f}s'.format(
+ (i+1)*mc.BATCH_SIZE-offset, num_images,
+ _t['read'].average_time/mc.BATCH_SIZE, # im_read : 0.040s
+ _t['detect'].average_time/mc.BATCH_SIZE, # detect : 0.012s
+ _t['eval'].average_time/mc.BATCH_SIZE)) # evaluation : 0.002s
+ # ed: Precision, Recall, IOU를 구하는 코드
+ ious = tp_sum.astype(np.float)/(tp_sum + fn_sum + fp_sum + mc.DENOM_EPSILON)
+ pr = tp_sum.astype(np.float)/(tp_sum + fp_sum + mc.DENOM_EPSILON)
+ re = tp_sum.astype(np.float)/(tp_sum + fn_sum + mc.DENOM_EPSILON)
+ print ('Evaluation summary:')
+ print (' Timing:')
+ print (' read: {:.3f}s detect: {:.3f}s'.format(
+ _t['read'].average_time/mc.BATCH_SIZE,
+ _t['detect'].average_time/mc.BATCH_SIZE
+ ))
+ eval_sum_feed_dict = {
+ eval_summary_phs['Timing/detect']:_t['detect'].average_time/mc.BATCH_SIZE,
+ eval_summary_phs['Timing/read']:_t['read'].average_time/mc.BATCH_SIZE,
+ }
+ print (' Accuracy:')
+ # ed: for tensorboard
+ for i in range(1, mc.NUM_CLASS):
+ print (' {}:'.format(mc.CLASSES[i]))
+ print ('\tPixel-seg: P: {:.3f}, R: {:.3f}, IoU: {:.3f}'.format(
+ pr[i], re[i], ious[i]))
+ eval_sum_feed_dict[
+ eval_summary_phs['Pixel_seg_accuracy/'+mc.CLASSES[i]+'_iou']] = ious[i]
+ eval_sum_feed_dict[
+ eval_summary_phs['Pixel_seg_accuracy/'+mc.CLASSES[i]+'_precision']] = pr[i]
+ eval_sum_feed_dict[
+ eval_summary_phs['Pixel_seg_accuracy/'+mc.CLASSES[i]+'_recall']] = re[i]
+ # ed: for tensorboard
+ eval_summary_str = sess.run(eval_summary_ops, feed_dict=eval_sum_feed_dict)
+ # ed: for tensorboard
+ for sum_str in eval_summary_str:
+ summary_writer.add_summary(sum_str, global_step)
+ summary_writer.flush()
+def evaluate():
+ """Evaluate."""
+ assert FLAGS.dataset == 'KITTI', \
+ 'Currently only supports KITTI dataset'
+ os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
+ with tf.Graph().as_default() as g:
+ assert FLAGS.net == 'squeezeSeg', \
+ 'Selected neural net architecture not supported: {}'.format(FLAGS.net)
+ if FLAGS.net == 'squeezeSeg':
+ mc = kitti_squeezeSeg_config()
+ mc.BATCH_SIZE = 1 # TODO(bichen): fix this hard-coded batch size.
+ model = SqueezeSeg(mc)
+ imdb = kitti(FLAGS.image_set, FLAGS.data_path, mc)
+ eval_summary_ops = []
+ eval_summary_phs = {}
+ eval_summary_names = [
+ 'Timing/read',
+ 'Timing/detect',
+ ]
+ # ed: 아래 코드들은 tensorboard에서 확인할 수 있다
+ for i in range(1, mc.NUM_CLASS):
+ eval_summary_names.append('Pixel_seg_accuracy/'+mc.CLASSES[i]+'_iou')
+ eval_summary_names.append('Pixel_seg_accuracy/'+mc.CLASSES[i]+'_precision')
+ eval_summary_names.append('Pixel_seg_accuracy/'+mc.CLASSES[i]+'_recall')
+ # ed: for tensorboard
+ for sm in eval_summary_names:
+ ph = tf.placeholder(tf.float32)
+ eval_summary_phs[sm] = ph
+ eval_summary_ops.append(tf.summary.scalar(sm, ph))
+ saver = tf.train.Saver(model.model_params)
+ summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g)
+ ckpts = set()
+ while True:
+ if FLAGS.run_once:
+ # When run_once is true, checkpoint_path should point to the exact
+ # checkpoint file.
+ eval_once(
+ saver, FLAGS.checkpoint_path, summary_writer, eval_summary_ops,
+ eval_summary_phs, imdb, model)
+ return
+ else:
+ # When run_once is false, checkpoint_path should point to the directory
+ # that stores checkpoint files.
+ ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
+ if ckpt and ckpt.model_checkpoint_path:
+ # ed: ckpt 파일들을 순차적으로 돌면서 eval_once() 함수를 실행시키고 ckpt 파일은 ckpts() 배열에 넣는다
+ if ckpt.model_checkpoint_path in ckpts:
+ # Do not evaluate on the same checkpoint
+ print ('Wait {:d}s for new checkpoints to be saved ... '
+ .format(FLAGS.eval_interval_secs))
+ time.sleep(FLAGS.eval_interval_secs)
+ else:
+ ckpts.add(ckpt.model_checkpoint_path)
+ print ('Evaluating {}...'.format(ckpt.model_checkpoint_path))
+ eval_once(
+ saver, ckpt.model_checkpoint_path, summary_writer,
+ eval_summary_ops, eval_summary_phs, imdb, model)
+ else:
+ print('No checkpoint file found')
+ if not FLAGS.run_once:
+ print ('Wait {:d}s for new checkpoints to be saved ... '
+ .format(FLAGS.eval_interval_secs))
+ time.sleep(FLAGS.eval_interval_secs)
+def main(argv=None): # pylint: disable=unused-argument
+ if tf.gfile.Exists(FLAGS.eval_dir):
+ tf.gfile.DeleteRecursively(FLAGS.eval_dir)
+ tf.gfile.MakeDirs(FLAGS.eval_dir)
+ evaluate()
+if __name__ == '__main__':
+ tf.app.run()
+# Author: Bichen Wu (bichen@berkeley.edu) 03/07/2017
+#-*- coding: utf-8 -*-
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from datetime import datetime
+import os.path
+import sys
+import time
+import numpy as np
+from six.moves import xrange
+import tensorflow as tf
+from config import *
+from imdb import kitti
+from utils.util import *
+from nets import *
+FLAGS = tf.app.flags.FLAGS
+tf.app.flags.DEFINE_string('dataset', 'KITTI',
+ """Currently support KITTI dataset.""")
+tf.app.flags.DEFINE_string('data_path', '../data', """Root directory of data""")
+tf.app.flags.DEFINE_string('image_set', 'val',
+ """Can be train, trainval, val, or test""")
+tf.app.flags.DEFINE_string('eval_dir', '../log/eval/',
+ """Directory where to write event logs """)
+tf.app.flags.DEFINE_string('checkpoint_path', '../log/train/',
+ """Path to the training checkpoint.""")
+tf.app.flags.DEFINE_integer('eval_interval_secs', 60 * 1,
+ """How often to check if new cpt is saved.""")
+tf.app.flags.DEFINE_boolean('run_once', False,
+ """Whether to run eval only once.""")
+tf.app.flags.DEFINE_string('net', 'squeezeSeg',
+ """Neural net architecture.""")
+tf.app.flags.DEFINE_string('gpu', '0', """gpu id.""")
+def eval_once(saver, ckpt_path, summary_writer, eval_summary_ops, eval_summary_phs, imdb, model):
+ with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
+ # Restores from checkpoint
+ saver.restore(sess, ckpt_path)
+ # saver.restore(sess, '/media/dyros-data/gitrepo/ims_ml/SqueezeSeg/log/train/model.ckpt-9999')
+ # Assuming model_checkpoint_path looks something like:
+ # /ckpt_dir/model.ckpt-0,
+ # extract global_step from it.
+ global_step = ckpt_path.split('/')[-1].split('-')[-1]
+ mc = model.mc
+ num_images = len(imdb.image_idx)
+ _t = {
+ 'detect': Timer(),
+ 'read': Timer(),
+ 'eval': Timer()
+ }
+ tot_error_rate, tot_rmse, tot_th_correct = 0.0, 0.0, 0.0
+ # class-level metrics
+ tp_sum = np.zeros(mc.NUM_CLASS)
+ fn_sum = np.zeros(mc.NUM_CLASS)
+ fp_sum = np.zeros(mc.NUM_CLASS)
+ # instance-level metrics
+ itp_sum = np.zeros(mc.NUM_CLASS)
+ ifn_sum = np.zeros(mc.NUM_CLASS)
+ ifp_sum = np.zeros(mc.NUM_CLASS)
+ # instance-level object matching metrics
+ otp_sum = np.zeros(mc.NUM_CLASS)
+ ofn_sum = np.zeros(mc.NUM_CLASS)
+ ofp_sum = np.zeros(mc.NUM_CLASS)
+ # ed: 이 부분이 데이터를 읽어서 evaluation을 하는 코드인듯
+ for i in xrange(int(num_images/mc.BATCH_SIZE)):
+ offset = max((i+1)*mc.BATCH_SIZE - num_images, 0)
+ _t['read'].tic()
+ # ed: lidar_2d (.npy) 데이터를 불러온다
+ lidar_per_batch, lidar_mask_per_batch, label_per_batch, _ \
+ = imdb.read_batch(shuffle=False)
+ _t['read'].toc()
+ _t['detect'].tic()
+ # ed: prediction을 하는 코드
+ pred_cls = sess.run(
+ model.pred_cls,
+ feed_dict={
+ model.lidar_input:lidar_per_batch,
+ model.keep_prob: 1.0,
+ model.lidar_mask:lidar_mask_per_batch
+ }
+ )
+ _t['detect'].toc()
+ # ed: code added
+ out_dir = "./data/sample_out_ed"
+ if not tf.gfile.Exists(out_dir):
+ tf.gfile.MakeDirs(out_dir)
+ np.save(os.path.join(out_dir, 'pred_'+ imdb.image_idx[i] +'.npy'),
+ pred_cls[0])
+ _t['eval'].tic()
+ # ed: IOU evalution을 하는 코드
+ # Evaluation
+ iou, tps, fps, fns = evaluate_iou(
+ label_per_batch[:mc.BATCH_SIZE-offset], # ed: 정답데이터
+ pred_cls[:mc.BATCH_SIZE-offset]*np.squeeze(lidar_mask_per_batch[:mc.BATCH_SIZE-offset]), # ed: 예측데이터
+ )
+ tp_sum += tps
+ fn_sum += fns
+ fp_sum += fps
+ _t['eval'].toc()
+ print ('detect: {:d}/{:d} im_read: {:.3f}s '
+ 'detect: {:.3f}s evaluation: {:.3f}s'.format(
+ (i+1)*mc.BATCH_SIZE-offset, num_images,
+ _t['read'].average_time/mc.BATCH_SIZE, # im_read : 0.040s
+ _t['detect'].average_time/mc.BATCH_SIZE, # detect : 0.012s
+ _t['eval'].average_time/mc.BATCH_SIZE)) # evaluation : 0.002s
+ # ed: Precision, Recall, IOU를 구하는 코드
+ ious = tp_sum.astype(np.float)/(tp_sum + fn_sum + fp_sum + mc.DENOM_EPSILON)
+ pr = tp_sum.astype(np.float)/(tp_sum + fp_sum + mc.DENOM_EPSILON)
+ re = tp_sum.astype(np.float)/(tp_sum + fn_sum + mc.DENOM_EPSILON)
+ print ('Evaluation summary:')
+ print (' Timing:')
+ print (' read: {:.3f}s detect: {:.3f}s'.format(
+ _t['read'].average_time/mc.BATCH_SIZE,
+ _t['detect'].average_time/mc.BATCH_SIZE
+ ))
+ eval_sum_feed_dict = {
+ eval_summary_phs['Timing/detect']:_t['detect'].average_time/mc.BATCH_SIZE,
+ eval_summary_phs['Timing/read']:_t['read'].average_time/mc.BATCH_SIZE,
+ }
+ print (' Accuracy:')
+ # ed: for tensorboard
+ for i in range(1, mc.NUM_CLASS):
+ print (' {}:'.format(mc.CLASSES[i]))
+ print ('\tPixel-seg: P: {:.3f}, R: {:.3f}, IoU: {:.3f}'.format(
+ pr[i], re[i], ious[i]))
+ eval_sum_feed_dict[
+ eval_summary_phs['Pixel_seg_accuracy/'+mc.CLASSES[i]+'_iou']] = ious[i]
+ eval_sum_feed_dict[
+ eval_summary_phs['Pixel_seg_accuracy/'+mc.CLASSES[i]+'_precision']] = pr[i]
+ eval_sum_feed_dict[
+ eval_summary_phs['Pixel_seg_accuracy/'+mc.CLASSES[i]+'_recall']] = re[i]
+ # ed: for tensorboard
+ eval_summary_str = sess.run(eval_summary_ops, feed_dict=eval_sum_feed_dict)
+ # ed: for tensorboard
+ for sum_str in eval_summary_str:
+ summary_writer.add_summary(sum_str, global_step)
+ summary_writer.flush()
+def evaluate():
+ """Evaluate."""
+ assert FLAGS.dataset == 'KITTI', \
+ 'Currently only supports KITTI dataset'
+ os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
+ with tf.Graph().as_default() as g:
+ assert FLAGS.net == 'squeezeSeg', \
+ 'Selected neural net architecture not supported: {}'.format(FLAGS.net)
+ if FLAGS.net == 'squeezeSeg':
+ mc = kitti_squeezeSeg_config()
+ mc.BATCH_SIZE = 1 # TODO(bichen): fix this hard-coded batch size.
+ model = SqueezeSeg(mc)
+ imdb = kitti(FLAGS.image_set, FLAGS.data_path, mc)
+ eval_summary_ops = []
+ eval_summary_phs = {}
+ eval_summary_names = [
+ 'Timing/read',
+ 'Timing/detect',
+ ]
+ # ed: 아래 코드들은 tensorboard에서 확인할 수 있다
+ for i in range(1, mc.NUM_CLASS):
+ eval_summary_names.append('Pixel_seg_accuracy/'+mc.CLASSES[i]+'_iou')
+ eval_summary_names.append('Pixel_seg_accuracy/'+mc.CLASSES[i]+'_precision')
+ eval_summary_names.append('Pixel_seg_accuracy/'+mc.CLASSES[i]+'_recall')
+ # ed: for tensorboard
+ for sm in eval_summary_names:
+ ph = tf.placeholder(tf.float32)
+ eval_summary_phs[sm] = ph
+ eval_summary_ops.append(tf.summary.scalar(sm, ph))
+ saver = tf.train.Saver(model.model_params)
+ summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g)
+ ckpts = set()
+ while True:
+ if FLAGS.run_once:
+ # When run_once is true, checkpoint_path should point to the exact
+ # checkpoint file.
+ eval_once(
+ saver, FLAGS.checkpoint_path, summary_writer, eval_summary_ops,
+ eval_summary_phs, imdb, model)
+ return
+ else:
+ # When run_once is false, checkpoint_path should point to the directory
+ # that stores checkpoint files.
+ ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
+ if ckpt and ckpt.model_checkpoint_path:
+ # ed: ckpt 파일들을 순차적으로 돌면서 eval_once() 함수를 실행시키고 ckpt 파일은 ckpts() 배열에 넣는다
+ if ckpt.model_checkpoint_path in ckpts:
+ # Do not evaluate on the same checkpoint
+ print ('Wait {:d}s for new checkpoints to be saved ... '
+ .format(FLAGS.eval_interval_secs))
+ time.sleep(FLAGS.eval_interval_secs)
+ else:
+ ckpts.add(ckpt.model_checkpoint_path)
+ print ('Evaluating {}...'.format(ckpt.model_checkpoint_path))
+ eval_once(
+ saver, ckpt.model_checkpoint_path, summary_writer,
+ eval_summary_ops, eval_summary_phs, imdb, model)
+ else:
+ print('No checkpoint file found')
+ if not FLAGS.run_once:
+ print ('Wait {:d}s for new checkpoints to be saved ... '
+ .format(FLAGS.eval_interval_secs))
+ time.sleep(FLAGS.eval_interval_secs)
+def main(argv=None): # pylint: disable=unused-argument
+ if tf.gfile.Exists(FLAGS.eval_dir):
+ tf.gfile.DeleteRecursively(FLAGS.eval_dir)
+ tf.gfile.MakeDirs(FLAGS.eval_dir)
+ evaluate()
+if __name__ == '__main__':
+ tf.app.run()
+#!/usr/bin/env python
+#-*- coding: utf-8 -*-
+from .kitti import kitti
+# Author: Bichen Wu (bichen@berkeley.edu) 02/27/2017
+#-*- coding: utf-8 -*-
+"""The data base wrapper class"""
+import os
+import random
+import shutil
+import numpy as np
+from utils.util import *
+class imdb(object):
+ """Image database."""
+ def __init__(self, name, mc):
+ self._name = name
+ self._image_set = []
+ self._image_idx = []
+ self._data_root_path = []
+ self.mc = mc
+ # batch reader
+ self._perm_idx = []
+ self._cur_idx = 0
+ @property
+ def name(self):
+ return self._name
+ @property
+ def image_idx(self):
+ return self._image_idx
+ @property
+ def image_set(self):
+ return self._image_set
+ @property
+ def data_root_path(self):
+ return self._data_root_path
+ def _shuffle_image_idx(self):
+ self._perm_idx = [self._image_idx[i] for i in
+ np.random.permutation(np.arange(len(self._image_idx)))]
+ self._cur_idx = 0
+ # ed: Input (.npy) 파일들을 SqueezeSeg에 맞게 불러오는 함수인듯
+ def read_batch(self, shuffle=True):
+ """Read a batch of lidar data including labels. Data formated as numpy array
+ of shape: height x width x {x, y, z, intensity, range, label}.
+ Args:
+ shuffle: whether or not to shuffle the dataset
+ Returns:
+ lidar_per_batch: LiDAR input. Shape: batch x height x width x 5.
+ lidar_mask_per_batch: LiDAR mask, 0 for missing data and 1 otherwise.
+ Shape: batch x height x width x 1.
+ label_per_batch: point-wise labels. Shape: batch x height x width.
+ weight_per_batch: loss weights for different classes. Shape:
+ batch x height x width
+ """
+ mc = self.mc
+ if shuffle:
+ if self._cur_idx + mc.BATCH_SIZE >= len(self._image_idx):
+ self._shuffle_image_idx()
+ batch_idx = self._perm_idx[self._cur_idx:self._cur_idx+mc.BATCH_SIZE]
+ self._cur_idx += mc.BATCH_SIZE
+ else:
+ if self._cur_idx + mc.BATCH_SIZE >= len(self._image_idx):
+ batch_idx = self._image_idx[self._cur_idx:] \
+ + self._image_idx[:self._cur_idx + mc.BATCH_SIZE-len(self._image_idx)]
+ self._cur_idx += mc.BATCH_SIZE - len(self._image_idx)
+ else:
+ batch_idx = self._image_idx[self._cur_idx:self._cur_idx+mc.BATCH_SIZE]
+ self._cur_idx += mc.BATCH_SIZE
+ lidar_per_batch = []
+ lidar_mask_per_batch = []
+ label_per_batch = []
+ weight_per_batch = []
+ for idx in batch_idx:
+ # ed: .npy 데이터형식이 pickle보다 30배이상 빠르기 때문에 사용한다고 한다
+ # load data
+ # loading from npy is 30x faster than loading from pickle
+ record = np.load(self._lidar_2d_path_at(idx)).astype(np.float32, copy=False)
+ # ed: data augmentation을 위한 코드 (데이터의 양을 늘리기 위한 기법 중 하나)
+ if np.random.rand() > 0.5:
+ # ed: y축 순서를 좌표를 반전시키는 코드
+ # flip y
+ record = record[:, ::-1, :]
+ # ed: y축 부호 또한 (-)로 반전시킨다
+ record[:, :, 1] *= -1
+ lidar = record[:, :, :5] # x, y, z, intensity, depth
+ lidar_mask = np.reshape(
+ (lidar[:, :, 4] > 0),
+ )
+ # ed: INPUT_MEAN, INPUT_STD 모두 하드코딩된 값이다. 이 값들로 lidar 데이터를 정규화시킨다
+ # mc.INPUT_MEAN = np.array([[[10.88, 0.23, -1.04, 0.21, 12.12]]])
+ # mc.INPUT_STD = np.array([[[11.47, 6.91, 0.86, 0.16, 12.32]]])
+ # normalize
+ lidar = (lidar - mc.INPUT_MEAN)/mc.INPUT_STD
+ label = record[:, :, 5]
+ weight = np.zeros(label.shape)
+ # ed: weight 변수에 pedestrian, cyclist의 가중치를 더 크게 설정하는 코드
+ # mc.CLASSES = ['unknown', 'car', 'pedestrian', 'cyclist']
+ # mc.NUM_CLASS = 4
+ # mc.CLC_LOSS_WEIGHT = np.array([1/15.0, 1.0, 10.0, 10.0])
+ for l in range(mc.NUM_CLASS):
+ weight[label==l] = mc.CLS_LOSS_WEIGHT[int(l)]
+ # Append all the data
+ lidar_per_batch.append(lidar)
+ lidar_mask_per_batch.append(lidar_mask)
+ label_per_batch.append(label)
+ weight_per_batch.append(weight)
+ return np.array(lidar_per_batch), np.array(lidar_mask_per_batch), \
+ np.array(label_per_batch), np.array(weight_per_batch)
+ def evaluate_detections(self):
+ raise NotImplementedError
+# Author: Bichen Wu (bichen@berkeley.edu) 02/27/2017
+#-*- coding: utf-8 -*-
+"""Image data base class for kitti"""
+import os
+import numpy as np
+import subprocess
+from .imdb import imdb
+class kitti(imdb):
+ def __init__(self, image_set, data_path, mc):
+ imdb.__init__(self, 'kitti_'+image_set, mc)
+ self._image_set = image_set
+ self._data_root_path = data_path
+ self._lidar_2d_path = os.path.join(self._data_root_path, 'lidar_2d')
+ self._gta_2d_path = os.path.join(self._data_root_path, 'gta')
+ # a list of string indices of images in the directory
+ self._image_idx = self._load_image_set_idx()
+ # a dict of image_idx -> [[cx, cy, w, h, cls_idx]]. x,y,w,h are not divided by
+ # the image width and height
+ ## batch reader ##
+ self._perm_idx = None
+ self._cur_idx = 0
+ # TODO(bichen): add a random seed as parameter
+ self._shuffle_image_idx()
+ def _load_image_set_idx(self):
+ image_set_file = os.path.join(
+ self._data_root_path, 'ImageSet', self._image_set+'.txt')
+ assert os.path.exists(image_set_file), \
+ 'File does not exist: {}'.format(image_set_file)
+ with open(image_set_file) as f:
+ image_idx = [x.strip() for x in f.readlines()]
+ return image_idx
+ def _lidar_2d_path_at(self, idx):
+ if idx[:4] == 'gta_':
+ lidar_2d_path = os.path.join(self._gta_2d_path, idx+'.npy')
+ else:
+ lidar_2d_path = os.path.join(self._lidar_2d_path, idx+'.npy')
+ assert os.path.exists(lidar_2d_path), \
+ 'File does not exist: {}'.format(lidar_2d_path)
+ return lidar_2d_path
+#-*- coding: utf-8 -*-
+from .squeezeSeg import SqueezeSeg
+# Author: Bichen Wu (bichen@berkeley.edu) 02/20/2017
+#-*- coding: utf-8 -*-
+"""SqueezeSeg model"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import sys
+import joblib
+from utils import util
+import numpy as np
+import tensorflow as tf
+from nn_skeleton import ModelSkeleton
+class SqueezeSeg(ModelSkeleton):
+ def __init__(self, mc, gpu_id=0):
+ with tf.device('/gpu:{}'.format(gpu_id)):
+ ModelSkeleton.__init__(self, mc)
+ self._add_forward_graph() # ed: SqueezeNet Model
+ self._add_output_graph() # pred_prob, pred_cls
+ self._add_loss_graph() # cls_loss, total_loss
+ self._add_train_graph() #
+ self._add_viz_graph() # label_to_show, depth_image_to_show, pred_image_to_show
+ self._add_summary_ops() #
+ def _add_forward_graph(self):
+ """NN architecture."""
+ mc = self.mc
+ assert tf.gfile.Exists(mc.PRETRAINED_MODEL_PATH), \
+ 'Cannot find pretrained model at the given path:' \
+ ' {}'.format(mc.PRETRAINED_MODEL_PATH)
+ self.caffemodel_weight = joblib.load(mc.PRETRAINED_MODEL_PATH)
+ conv1 = self._conv_layer(
+ 'conv1', self.lidar_input, filters=64, size=3, stride=2,
+ padding='SAME', freeze=False, xavier=True)
+ conv1_skip = self._conv_layer(
+ 'conv1_skip', self.lidar_input, filters=64, size=1, stride=1,
+ padding='SAME', freeze=False, xavier=True)
+ pool1 = self._pooling_layer(
+ 'pool1', conv1, size=3, stride=2, padding='SAME')
+ fire2 = self._fire_layer(
+ 'fire2', pool1, s1x1=16, e1x1=64, e3x3=64, freeze=False)
+ fire3 = self._fire_layer(
+ 'fire3', fire2, s1x1=16, e1x1=64, e3x3=64, freeze=False)
+ pool3 = self._pooling_layer(
+ 'pool3', fire3, size=3, stride=2, padding='SAME')
+ fire4 = self._fire_layer(
+ 'fire4', pool3, s1x1=32, e1x1=128, e3x3=128, freeze=False)
+ fire5 = self._fire_layer(
+ 'fire5', fire4, s1x1=32, e1x1=128, e3x3=128, freeze=False)
+ pool5 = self._pooling_layer(
+ 'pool5', fire5, size=3, stride=2, padding='SAME')
+ fire6 = self._fire_layer(
+ 'fire6', pool5, s1x1=48, e1x1=192, e3x3=192, freeze=False)
+ fire7 = self._fire_layer(
+ 'fire7', fire6, s1x1=48, e1x1=192, e3x3=192, freeze=False)
+ fire8 = self._fire_layer(
+ 'fire8', fire7, s1x1=64, e1x1=256, e3x3=256, freeze=False)
+ fire9 = self._fire_layer(
+ 'fire9', fire8, s1x1=64, e1x1=256, e3x3=256, freeze=False)
+ # Deconvolation
+ # ed: deconv를 하면서 _fuse 변수에서 기존에 fire5, fire3, conv1, conv1_skip 같은 이전 layer들을 더해준다
+ fire10 = self._fire_deconv(
+ 'fire_deconv10', fire9, s1x1=64, e1x1=128, e3x3=128, factors=[1, 2],
+ stddev=0.1)
+ fire10_fuse = tf.add(fire10, fire5, name='fure10_fuse')
+ fire11 = self._fire_deconv(
+ 'fire_deconv11', fire10_fuse, s1x1=32, e1x1=64, e3x3=64, factors=[1, 2],
+ stddev=0.1)
+ fire11_fuse = tf.add(fire11, fire3, name='fire11_fuse')
+ fire12 = self._fire_deconv(
+ 'fire_deconv12', fire11_fuse, s1x1=16, e1x1=32, e3x3=32, factors=[1, 2],
+ stddev=0.1)
+ fire12_fuse = tf.add(fire12, conv1, name='fire12_fuse')
+ fire13 = self._fire_deconv(
+ 'fire_deconv13', fire12_fuse, s1x1=16, e1x1=32, e3x3=32, factors=[1, 2],
+ stddev=0.1)
+ fire13_fuse = tf.add(fire13, conv1_skip, name='fire13_fuse')
+ drop13 = tf.nn.dropout(fire13_fuse, self.keep_prob, name='drop13')
+ conv14 = self._conv_layer(
+ 'conv14_prob', drop13, filters=mc.NUM_CLASS, size=3, stride=1,
+ padding='SAME', relu=False, stddev=0.1)
+ # ed: bilateral Filter를 적용한 weight 생성
+ bilateral_filter_weights = self._bilateral_filter_layer(
+ 'bilateral_filter', self.lidar_input[:, :, :, :3], # x, y, z
+ sizes=[mc.LCN_HEIGHT, mc.LCN_WIDTH], stride=1)
+ # ed: Recurrent CRF을 적용한 output_prob 변수를 생성한다
+ self.output_prob = self._recurrent_crf_layer(
+ 'recurrent_crf', conv14, bilateral_filter_weights,
+ sizes=[mc.LCN_HEIGHT, mc.LCN_WIDTH], num_iterations=mc.RCRF_ITER,
+ padding='SAME')
+ # ed: fire module layer 1층을 만드는 함수
+ def _fire_layer(self, layer_name, inputs, s1x1, e1x1, e3x3, stddev=0.001,
+ freeze=False):
+ """Fire layer constructor.
+ Args:
+ layer_name: layer name
+ inputs: input tensor
+ s1x1: number of 1x1 filters in squeeze layer.
+ e1x1: number of 1x1 filters in expand layer.
+ e3x3: number of 3x3 filters in expand layer.
+ freeze: if true, do not train parameters in this layer.
+ Returns:
+ fire layer operation.
+ """
+ sq1x1 = self._conv_layer(
+ layer_name+'/squeeze1x1', inputs, filters=s1x1, size=1, stride=1,
+ padding='SAME', freeze=freeze, stddev=stddev)
+ ex1x1 = self._conv_layer(
+ layer_name+'/expand1x1', sq1x1, filters=e1x1, size=1, stride=1,
+ padding='SAME', freeze=freeze, stddev=stddev)
+ ex3x3 = self._conv_layer(
+ layer_name+'/expand3x3', sq1x1, filters=e3x3, size=3, stride=1,
+ padding='SAME', freeze=freeze, stddev=stddev)
+ return tf.concat([ex1x1, ex3x3], 3, name=layer_name+'/concat')
+ def _fire_deconv(self, layer_name, inputs, s1x1, e1x1, e3x3,
+ factors=[1, 2], freeze=False, stddev=0.001):
+ """Fire deconvolution layer constructor.
+ Args:
+ layer_name: layer name
+ inputs: input tensor
+ s1x1: number of 1x1 filters in squeeze layer.
+ e1x1: number of 1x1 filters in expand layer.
+ e3x3: number of 3x3 filters in expand layer.
+ factors: spatial upsampling factors.
+ freeze: if true, do not train parameters in this layer.
+ Returns:
+ fire layer operation.
+ """
+ assert len(factors) == 2,'factors should be an array of size 2'
+ ksize_h = factors[0] * 2 - factors[0] % 2
+ ksize_w = factors[1] * 2 - factors[1] % 2
+ sq1x1 = self._conv_layer(
+ layer_name+'/squeeze1x1', inputs, filters=s1x1, size=1, stride=1,
+ padding='SAME', freeze=freeze, stddev=stddev)
+ deconv = self._deconv_layer(
+ layer_name+'/deconv', sq1x1, filters=s1x1, size=[ksize_h, ksize_w],
+ stride=factors, padding='SAME', init='bilinear')
+ ex1x1 = self._conv_layer(
+ layer_name+'/expand1x1', deconv, filters=e1x1, size=1, stride=1,
+ padding='SAME', freeze=freeze, stddev=stddev)
+ ex3x3 = self._conv_layer(
+ layer_name+'/expand3x3', deconv, filters=e3x3, size=3, stride=1,
+ padding='SAME', freeze=freeze, stddev=stddev)
+ return tf.concat([ex1x1, ex3x3], 3, name=layer_name+'/concat')
+# Author: Bichen Wu (bichen@berkeley.edu) 02/20/2017
+#-*- coding: utf-8 -*-
+"""Neural network model base class."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import sys
+from utils import util
+import numpy as np
+import tensorflow as tf
+def _variable_on_device(name, shape, initializer, trainable=True):
+ """Helper to create a Variable.
+ Args:
+ name: name of the variable
+ shape: list of ints
+ initializer: initializer for Variable
+ Returns:
+ Variable Tensor
+ """
+ # TODO(bichen): fix the hard-coded data type below
+ dtype = tf.float32
+ if not callable(initializer):
+ var = tf.get_variable(name, initializer=initializer, trainable=trainable)
+ else:
+ var = tf.get_variable(
+ name, shape, initializer=initializer, dtype=dtype, trainable=trainable)
+ return var
+def _variable_with_weight_decay(name, shape, wd, initializer, trainable=True):
+ """Helper to create an initialized Variable with weight decay.
+ Note that the Variable is initialized with a truncated normal distribution.
+ A weight decay is added only if one is specified.
+ Args:
+ name: name of the variable
+ shape: list of ints
+ wd: add L2Loss weight decay multiplied by this float. If None, weight
+ decay is not added for this Variable.
+ Returns:
+ Variable Tensor
+ """
+ var = _variable_on_device(name, shape, initializer, trainable)
+ if wd is not None and trainable:
+ weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
+ tf.add_to_collection('losses', weight_decay)
+ return var
+class ModelSkeleton:
+ """Base class of NN detection models."""
+ def __init__(self, mc):
+ self.mc = mc
+ # a scalar tensor in range (0, 1]. Usually set to 0.5 in training phase and
+ # 1.0 in evaluation phase
+ self.ph_keep_prob = tf.placeholder(tf.float32, name='keep_prob')
+ # projected lidar points on a 2D spherical surface
+ self.ph_lidar_input = tf.placeholder(
+ tf.float32, [mc.BATCH_SIZE, mc.ZENITH_LEVEL, mc.AZIMUTH_LEVEL, 5],
+ name='lidar_input'
+ )
+ # A tensor where an element is 1 if the corresponding cell contains an
+ # valid lidar measurement. Or if the data is missing, then mark it as 0.
+ self.ph_lidar_mask = tf.placeholder(
+ tf.float32, [mc.BATCH_SIZE, mc.ZENITH_LEVEL, mc.AZIMUTH_LEVEL, 1],
+ name='lidar_mask')
+ # A tensor where each element contains the class of each pixel
+ self.ph_label = tf.placeholder(
+ name='label')
+ # weighted loss for different classes
+ self.ph_loss_weight = tf.placeholder(
+ tf.float32, [mc.BATCH_SIZE, mc.ZENITH_LEVEL, mc.AZIMUTH_LEVEL],
+ name='loss_weight')
+ # define a FIFOqueue for pre-fetching data
+ self.q = tf.FIFOQueue(
+ capacity=mc.QUEUE_CAPACITY,
+ dtypes=[tf.float32, tf.float32, tf.float32, tf.int32, tf.float32],
+ shapes=[[],
+ )
+ # ed: tensorflow의 FIFOQueue라는 함수를 사용해서 여러 placeholder들을 묶어서 처리하는듯하다
+ # train.py:129 : enqueue_op에 feed_dict가 들어간다
+ self.enqueue_op = self.q.enqueue(
+ [self.ph_keep_prob, self.ph_lidar_input, self.ph_lidar_mask,
+ self.ph_label, self.ph_loss_weight]
+ )
+ self.keep_prob, self.lidar_input, self.lidar_mask, self.label, \
+ self.loss_weight = self.q.dequeue()
+ # model parameters
+ self.model_params = []
+ # model size counter
+ self.model_size_counter = [] # array of tuple of layer name, parameter size
+ # flop counter
+ self.flop_counter = [] # array of tuple of layer name, flop number
+ # activation counter
+ self.activation_counter = [] # array of tuple of layer name, output activations
+ self.activation_counter.append(('input', mc.AZIMUTH_LEVEL*mc.ZENITH_LEVEL*3))
+ def _add_forward_graph(self):
+ """NN architecture specification."""
+ raise NotImplementedError
+ def _add_output_graph(self):
+ """Define how to intepret output."""
+ mc = self.mc
+ with tf.variable_scope('interpret_output') as scope:
+ # ed: self.output_prob : SqueezeSeg Model에서 마지막 변수
+ self.prob = tf.multiply(
+ tf.nn.softmax(self.output_prob, dim=-1), self.lidar_mask,
+ name='pred_prob')
+ # ed: axis=3 이므로 단순한 scalar 값이 나오는게 아니라 행렬이 출력된다
+ self.pred_cls = tf.argmax(self.prob, axis=3, name='pred_cls')
+ # add summaries
+ for cls_id, cls in enumerate(mc.CLASSES):
+ self._activation_summary(self.prob[:, :, :, cls_id], 'prob_'+cls)
+ def _add_loss_graph(self):
+ """Define the loss operation."""
+ mc = self.mc
+ with tf.variable_scope('cls_loss') as scope:
+ self.cls_loss = tf.identity(
+ tf.reduce_sum(
+ tf.nn.sparse_softmax_cross_entropy_with_logits(
+ labels=tf.reshape(self.label, (-1, )),
+ logits=tf.reshape(self.output_prob, (-1, mc.NUM_CLASS))
+ ) \
+ * tf.reshape(self.lidar_mask, (-1, )) \
+ * tf.reshape(self.loss_weight, (-1, ))
+ ) / tf.reduce_sum(self.lidar_mask)*mc.CLS_LOSS_COEF,
+ name='cls_loss')
+ tf.add_to_collection('losses', self.cls_loss)
+ # ed: self.loss = cls_loss + weight_decay loss를 합친 변수
+ # add above losses as well as weight decay losses to form the total loss
+ self.loss = tf.add_n(tf.get_collection('losses'), name='total_loss')
+ # ed: for tensorboard
+ # add loss summaries
+ # _add_loss_summaries(self.loss)
+ tf.summary.scalar(self.cls_loss.op.name, self.cls_loss)
+ tf.summary.scalar(self.loss.op.name, self.loss)
+ def _add_train_graph(self):
+ """Define the training operation."""
+ mc = self.mc
+ self.global_step = tf.Variable(0, name='global_step', trainable=False)
+ # ed: learning rate를 설정하는 코드인듯
+ # 가변적으로 learning rate가 변한다
+ lr = tf.train.exponential_decay(mc.LEARNING_RATE,
+ self.global_step,
+ staircase=True)
+ tf.summary.scalar('learning_rate', lr)
+ # ed: momentum optimizer를 사용하는구만, 이게뭐지?
+ opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=mc.MOMENTUM)
+ # ed: Tensorflow가 알아서 계산해주는 Gradient 위에 조작된 값을 덮어쓰기 위해 사용하는 코드
+ # self.loss의 weights들을 minimize() 함수를 사용하는 대신 임의로 덮어쓰기 위한 코드인듯
+ grads_vars = opt.compute_gradients(self.loss, tf.trainable_variables())
+ # ed: gradient clipping : exploding gradient를 방지하기 위해 최대 가중치값을 limit으로 정해놓는다
+ with tf.variable_scope('clip_gradient') as scope:
+ for i, (grad, var) in enumerate(grads_vars):
+ grads_vars[i] = (tf.clip_by_norm(grad, mc.MAX_GRAD_NORM), var)
+ apply_gradient_op = opt.apply_gradients(grads_vars, global_step=self.global_step)
+ with tf.control_dependencies([apply_gradient_op]):
+ # ed: no operation은 뭐징
+ # 위에 모든것들을 한꺼번에 실행시키기 위한 empty Variable을 선언하는듯하다
+ self.train_op = tf.no_op(name='train')
+ def _add_viz_graph(self):
+ """Define the visualization operation."""
+ mc = self.mc
+ self.label_to_show = tf.placeholder(
+ tf.float32, [None, mc.ZENITH_LEVEL, mc.AZIMUTH_LEVEL, 3],
+ name='label_to_show'
+ )
+ self.depth_image_to_show = tf.placeholder(
+ tf.float32, [None, mc.ZENITH_LEVEL, mc.AZIMUTH_LEVEL, 1],
+ name='depth_image_to_show'
+ )
+ self.pred_image_to_show = tf.placeholder(
+ tf.float32, [None, mc.ZENITH_LEVEL, mc.AZIMUTH_LEVEL, 3],
+ name='pred_image_to_show'
+ )
+ # ed: tensorboard에서 이미지를 표시해주는 코드인듯
+ self.show_label = tf.summary.image('label_to_show',
+ self.label_to_show, collections='image_summary',
+ max_outputs=mc.BATCH_SIZE)
+ self.show_depth_img = tf.summary.image('depth_image_to_show',
+ self.depth_image_to_show, collections='image_summary',
+ max_outputs=mc.BATCH_SIZE)
+ self.show_pred = tf.summary.image('pred_image_to_show',
+ self.pred_image_to_show, collections='image_summary',
+ max_outputs=mc.BATCH_SIZE)
+ # ed: tensorboard에서 요약창에 띄울 데이터를 만들기 위한 함수인듯
+ def _add_summary_ops(self):
+ """Add extra summary operations."""
+ mc = self.mc
+ iou_summary_placeholders = []
+ iou_summary_ops = []
+ for cls in mc.CLASSES:
+ ph = tf.placeholder(tf.float32, name=cls+'_iou')
+ iou_summary_placeholders.append(ph)
+ iou_summary_ops.append(
+ tf.summary.scalar('Eval/'+cls+'_iou', ph, collections='eval_summary')
+ )
+ self.iou_summary_placeholders = iou_summary_placeholders
+ self.iou_summary_ops = iou_summary_ops
+ # ed: conv + batch nomarlization layer를 만들어주는 함수 (SqueezeSeg에서는 안쓰인다) (not used)
+ def _conv_bn_layer(
+ self, inputs, conv_param_name, bn_param_name, scale_param_name, filters,
+ size, stride, padding='SAME', freeze=False, relu=True,
+ conv_with_bias=False, stddev=0.001):
+ """ Convolution + BatchNorm + [relu] layer. Batch mean and var are treated
+ as constant. Weights have to be initialized from a pre-trained model or
+ restored from a checkpoint.
+ Args:
+ inputs: input tensor
+ conv_param_name: name of the convolution parameters
+ bn_param_name: name of the batch normalization parameters
+ scale_param_name: name of the scale parameters
+ filters: number of output filters.
+ size: kernel size.
+ stride: stride
+ padding: 'SAME' or 'VALID'. See tensorflow doc for detailed description.
+ freeze: if true, then do not train the parameters in this layer.
+ xavier: whether to use xavier weight initializer or not.
+ relu: whether to use relu or not.
+ conv_with_bias: whether or not add bias term to the convolution output.
+ stddev: standard deviation used for random weight initializer.
+ Returns:
+ A convolutional layer operation.
+ """
+ mc = self.mc
+ with tf.variable_scope(conv_param_name) as scope:
+ channels = inputs.get_shape()[3]
+ cw = self.caffemodel_weight
+ kernel_val = np.transpose(cw[conv_param_name][0], [2,3,1,0])
+ if conv_with_bias:
+ bias_val = cw[conv_param_name][1]
+ mean_val = cw[bn_param_name][0]
+ var_val = cw[bn_param_name][1]
+ gamma_val = cw[scale_param_name][0]
+ beta_val = cw[scale_param_name][1]
+ else:
+ kernel_val = tf.truncated_normal_initializer(
+ stddev=stddev, dtype=tf.float32)
+ if conv_with_bias:
+ bias_val = tf.constant_initializer(0.0)
+ mean_val = tf.constant_initializer(0.0)
+ var_val = tf.constant_initializer(1.0)
+ gamma_val = tf.constant_initializer(1.0)
+ beta_val = tf.constant_initializer(0.0)
+ # re-order the caffe kernel with shape [out, in, h, w] -> tf kernel with
+ # shape [h, w, in, out]
+ kernel = _variable_with_weight_decay(
+ 'kernels', shape=[size, size, int(channels), filters],
+ wd=mc.WEIGHT_DECAY, initializer=kernel_val, trainable=(not freeze))
+ self.model_params += [kernel]
+ if conv_with_bias:
+ biases = _variable_on_device('biases', [filters], bias_val,
+ trainable=(not freeze))
+ self.model_params += [biases]
+ gamma = _variable_on_device('gamma', [filters], gamma_val,
+ trainable=(not freeze))
+ beta = _variable_on_device('beta', [filters], beta_val,
+ trainable=(not freeze))
+ mean = _variable_on_device('mean', [filters], mean_val, trainable=False)
+ var = _variable_on_device('var', [filters], var_val, trainable=False)
+ self.model_params += [gamma, beta, mean, var]
+ conv = tf.nn.conv2d(
+ inputs, kernel, [1, 1, stride, 1], padding=padding,
+ name='convolution')
+ if conv_with_bias:
+ conv = tf.nn.bias_add(conv, biases, name='bias_add')
+ conv = tf.nn.batch_normalization(
+ conv, mean=mean, variance=var, offset=beta, scale=gamma,
+ variance_epsilon=mc.BATCH_NORM_EPSILON, name='batch_norm')
+ self.model_size_counter.append(
+ (conv_param_name, (1+size*size*int(channels))*filters)
+ )
+ out_shape = conv.get_shape().as_list()
+ num_flops = \
+ (1+2*int(channels)*size*size)*filters*out_shape[1]*out_shape[2]
+ if relu:
+ num_flops += 2*filters*out_shape[1]*out_shape[2]
+ self.flop_counter.append((conv_param_name, num_flops))
+ self.activation_counter.append(
+ (conv_param_name, out_shape[1]*out_shape[2]*out_shape[3])
+ )
+ if relu:
+ return tf.nn.relu(conv)
+ else:
+ return conv
+ # ed: Convolution Net 1층을 만들어주는 함수
+ def _conv_layer(
+ self, layer_name, inputs, filters, size, stride, padding='SAME',
+ freeze=False, xavier=False, relu=True, stddev=0.001, bias_init_val=0.0):
+ """Convolutional layer operation constructor.
+ Args:
+ layer_name: layer name.
+ inputs: input tensor
+ filters: number of output filters.
+ size: kernel size.
+ stride: stride
+ padding: 'SAME' or 'VALID'. See tensorflow doc for detailed description.
+ freeze: if true, then do not train the parameters in this layer.
+ xavier: whether to use xavier weight initializer or not.
+ relu: whether to use relu or not.
+ stddev: standard deviation used for random weight initializer.
+ Returns:
+ A convolutional layer operation.
+ """
+ mc = self.mc
+ use_pretrained_param = False
+ cw = self.caffemodel_weight
+ if layer_name in cw:
+ kernel_val = np.transpose(cw[layer_name][0], [2,3,1,0])
+ bias_val = cw[layer_name][1]
+ # check the shape
+ if (kernel_val.shape ==
+ (size, size, inputs.get_shape().as_list()[-1], filters)) \
+ and (bias_val.shape == (filters, )):
+ use_pretrained_param = True
+ else:
+ print ('Shape of the pretrained parameter of {} does not match, '
+ 'use randomly initialized parameter'.format(layer_name))
+ else:
+ print ('Cannot find {} in the pretrained model. Use randomly initialized '
+ 'parameters'.format(layer_name))
+ if mc.DEBUG_MODE:
+ print('Input tensor shape to {}: {}'.format(layer_name, inputs.get_shape()))
+ with tf.variable_scope(layer_name) as scope:
+ channels = inputs.get_shape()[3]
+ # re-order the caffe kernel with shape [out, in, h, w] -> tf kernel with
+ # shape [h, w, in, out]
+ if use_pretrained_param:
+ if mc.DEBUG_MODE:
+ print ('Using pretrained model for {}'.format(layer_name))
+ kernel_init = tf.constant(kernel_val , dtype=tf.float32)
+ bias_init = tf.constant(bias_val, dtype=tf.float32)
+ elif xavier:
+ kernel_init = tf.contrib.layers.xavier_initializer_conv2d()
+ bias_init = tf.constant_initializer(bias_init_val)
+ else:
+ kernel_init = tf.truncated_normal_initializer(
+ stddev=stddev, dtype=tf.float32)
+ bias_init = tf.constant_initializer(bias_init_val)
+ kernel = _variable_with_weight_decay(
+ 'kernels', shape=[size, size, int(channels), filters],
+ wd=mc.WEIGHT_DECAY, initializer=kernel_init, trainable=(not freeze))
+ biases = _variable_on_device('biases', [filters], bias_init,
+ trainable=(not freeze))
+ self.model_params += [kernel, biases]
+ conv = tf.nn.conv2d(
+ inputs, kernel, [1, 1, stride, 1], padding=padding,
+ name='convolution')
+ conv_bias = tf.nn.bias_add(conv, biases, name='bias_add')
+ if relu:
+ out = tf.nn.relu(conv_bias, 'relu')
+ else:
+ out = conv_bias
+ self.model_size_counter.append(
+ (layer_name, (1+size*size*int(channels))*filters)
+ )
+ out_shape = out.get_shape().as_list()
+ num_flops = \
+ (1+2*int(channels)*size*size)*filters*out_shape[1]*out_shape[2]
+ if relu:
+ num_flops += 2*filters*out_shape[1]*out_shape[2]
+ self.flop_counter.append((layer_name, num_flops))
+ self.activation_counter.append(
+ (layer_name, out_shape[1]*out_shape[2]*out_shape[3])
+ )
+ return out
+ # ed: Deconvolution Net 1층을 만들어주는 함수
+ def _deconv_layer(
+ self, layer_name, inputs, filters, size, stride, padding='SAME',
+ freeze=False, init='trunc_norm', relu=True, stddev=0.001):
+ """Deconvolutional layer operation constructor.
+ Args:
+ layer_name: layer name.
+ inputs: input tensor
+ filters: number of output filters.
+ size: kernel size. An array of size 2 or 1.
+ stride: stride. An array of size 2 or 1.
+ padding: 'SAME' or 'VALID'. See tensorflow doc for detailed description.
+ freeze: if true, then do not train the parameters in this layer.
+ init: how to initialize kernel weights. Now accept 'xavier',
+ 'trunc_norm', 'bilinear'
+ relu: whether to use relu or not.
+ stddev: standard deviation used for random weight initializer.
+ Returns:
+ A convolutional layer operation.
+ """
+ assert len(size) == 1 or len(size) == 2, \
+ 'size should be a scalar or an array of size 2.'
+ assert len(stride) == 1 or len(stride) == 2, \
+ 'stride should be a scalar or an array of size 2.'
+ assert init == 'xavier' or init == 'bilinear' or init == 'trunc_norm', \
+ 'initi mode not supported {}'.format(init)
+ if len(size) == 1:
+ size_h, size_w = size[0], size[0]
+ else:
+ size_h, size_w = size[0], size[1]
+ if len(stride) == 1:
+ stride_h, stride_w = stride[0], stride[0]
+ else:
+ stride_h, stride_w = stride[0], stride[1]
+ mc = self.mc
+ # TODO(bichen): Currently do not support pretrained parameters for deconv
+ # layer.
+ if mc.DEBUG_MODE:
+ print('Input tensor shape to {}: {}'.format(layer_name, inputs.get_shape()))
+ with tf.variable_scope(layer_name) as scope:
+ in_height = int(inputs.get_shape()[1])
+ in_width = int(inputs.get_shape()[2])
+ channels = int(inputs.get_shape()[3])
+ if init == 'xavier':
+ kernel_init = tf.contrib.layers.xavier_initializer_conv2d()
+ bias_init = tf.constant_initializer(0.0)
+ elif init == 'bilinear':
+ assert size_h == 1, 'Now only support size_h=1'
+ assert channels == filters, \
+ 'In bilinear interporlation mode, input channel size and output' \
+ 'filter size should be the same'
+ assert stride_h == 1, \
+ 'In bilinear interpolation mode, stride_h should be 1'
+ kernel_init = np.zeros(
+ (size_h, size_w, channels, channels),
+ dtype=np.float32)
+ factor_w = (size_w + 1)//2
+ assert factor_w == stride_w, \
+ 'In bilinear interpolation mode, stride_w == factor_w'
+ center_w = (factor_w - 1) if (size_w % 2 == 1) else (factor_w - 0.5)
+ og_w = np.reshape(np.arange(size_w), (size_h, -1))
+ up_kernel = (1 - np.abs(og_w - center_w)/factor_w)
+ for c in range(channels):
+ kernel_init[:, :, c, c] = up_kernel
+ bias_init = tf.constant_initializer(0.0)
+ else:
+ kernel_init = tf.truncated_normal_initializer(
+ stddev=stddev, dtype=tf.float32)
+ bias_init = tf.constant_initializer(0.0)
+ # Kernel layout for deconv layer: [H_f, W_f, O_c, I_c] where I_c is the
+ # input channel size. It should be the same as the channel size of the
+ # input tensor.
+ kernel = _variable_with_weight_decay(
+ 'kernels', shape=[size_h, size_w, filters, channels],
+ wd=mc.WEIGHT_DECAY, initializer=kernel_init, trainable=(not freeze))
+ biases = _variable_on_device(
+ 'biases', [filters], bias_init, trainable=(not freeze))
+ self.model_params += [kernel, biases]
+ # TODO(bichen): fix this
+ deconv = tf.nn.conv2d_transpose(
+ inputs, kernel,
+ [mc.BATCH_SIZE, stride_h*in_height, stride_w*in_width, filters],
+ [1, stride_h, stride_w, 1], padding=padding,
+ name='deconv')
+ deconv_bias = tf.nn.bias_add(deconv, biases, name='bias_add')
+ if relu:
+ out = tf.nn.relu(deconv_bias, 'relu')
+ else:
+ out = deconv_bias
+ self.model_size_counter.append(
+ (layer_name, (1+size_h*size_w*channels)*filters)
+ )
+ out_shape = out.get_shape().as_list()
+ num_flops = \
+ (1+2*channels*size_h*size_w)*filters*out_shape[1]*out_shape[2]
+ if relu:
+ num_flops += 2*filters*out_shape[1]*out_shape[2]
+ self.flop_counter.append((layer_name, num_flops))
+ self.activation_counter.append(
+ (layer_name, out_shape[1]*out_shape[2]*out_shape[3])
+ )
+ return out
+ # ed: Pooling Layer 1층을 만들어주는 함수
+ def _pooling_layer(
+ self, layer_name, inputs, size, stride, padding='SAME'):
+ """Pooling layer operation constructor.
+ Args:
+ layer_name: layer name.
+ inputs: input tensor
+ size: kernel size.
+ stride: stride
+ padding: 'SAME' or 'VALID'. See tensorflow doc for detailed description.
+ Returns:
+ A pooling layer operation.
+ """
+ with tf.variable_scope(layer_name) as scope:
+ out = tf.nn.max_pool(inputs,
+ ksize=[1, size, size, 1],
+ strides=[1, 1, stride, 1],
+ padding=padding)
+ activation_size = np.prod(out.get_shape().as_list()[1:])
+ self.activation_counter.append((layer_name, activation_size))
+ return out
+ # ed: Fully Connected Layer 1층을 만들어주는 함수
+ def _fc_layer(
+ self, layer_name, inputs, hiddens, flatten=False, relu=True,
+ xavier=False, stddev=0.001, bias_init_val=0.0):
+ """Fully connected layer operation constructor.
+ Args:
+ layer_name: layer name.
+ inputs: input tensor
+ hiddens: number of (hidden) neurons in this layer.
+ flatten: if true, reshape the input 4D tensor of shape
+ (batch, height, weight, channel) into a 2D tensor with shape
+ (batch, -1). This is used when the input to the fully connected layer
+ is output of a convolutional layer.
+ relu: whether to use relu or not.
+ xavier: whether to use xavier weight initializer or not.
+ stddev: standard deviation used for random weight initializer.
+ Returns:
+ A fully connected layer operation.
+ """
+ mc = self.mc
+ use_pretrained_param = False
+ cw = self.caffemodel_weight
+ if layer_name in cw:
+ use_pretrained_param = True
+ kernel_val = cw[layer_name][0]
+ bias_val = cw[layer_name][1]
+ if mc.DEBUG_MODE:
+ print('Input tensor shape to {}: {}'.format(layer_name, inputs.get_shape()))
+ with tf.variable_scope(layer_name) as scope:
+ input_shape = inputs.get_shape().as_list()
+ if flatten:
+ dim = input_shape[1]*input_shape[2]*input_shape[3]
+ inputs = tf.reshape(inputs, [-1, dim])
+ if use_pretrained_param:
+ try:
+ # check the size before layout transform
+ assert kernel_val.shape == (hiddens, dim), \
+ 'kernel shape error at {}'.format(layer_name)
+ kernel_val = np.reshape(
+ np.transpose(
+ np.reshape(
+ kernel_val, # O x (C*H*W)
+ (hiddens, input_shape[3], input_shape[1], input_shape[2])
+ ), # O x C x H x W
+ (2, 3, 1, 0)
+ ), # H x W x C x O
+ (dim, -1)
+ ) # (H*W*C) x O
+ # check the size after layout transform
+ assert kernel_val.shape == (dim, hiddens), \
+ 'kernel shape error at {}'.format(layer_name)
+ except:
+ # Do not use pretrained parameter if shape doesn't match
+ use_pretrained_param = False
+ print ('Shape of the pretrained parameter of {} does not match, '
+ 'use randomly initialized parameter'.format(layer_name))
+ else:
+ dim = input_shape[1]
+ if use_pretrained_param:
+ try:
+ kernel_val = np.transpose(kernel_val, (1,0))
+ assert kernel_val.shape == (dim, hiddens), \
+ 'kernel shape error at {}'.format(layer_name)
+ except:
+ use_pretrained_param = False
+ print ('Shape of the pretrained parameter of {} does not match, '
+ 'use randomly initialized parameter'.format(layer_name))
+ if use_pretrained_param:
+ if mc.DEBUG_MODE:
+ print ('Using pretrained model for {}'.format(layer_name))
+ kernel_init = tf.constant(kernel_val, dtype=tf.float32)
+ bias_init = tf.constant(bias_val, dtype=tf.float32)
+ elif xavier:
+ kernel_init = tf.contrib.layers.xavier_initializer()
+ bias_init = tf.constant_initializer(bias_init_val)
+ else:
+ kernel_init = tf.truncated_normal_initializer(
+ stddev=stddev, dtype=tf.float32)
+ bias_init = tf.constant_initializer(bias_init_val)
+ weights = _variable_with_weight_decay(
+ 'weights', shape=[dim, hiddens], wd=mc.WEIGHT_DECAY,
+ initializer=kernel_init)
+ biases = _variable_on_device('biases', [hiddens], bias_init)
+ self.model_params += [weights, biases]
+ outputs = tf.nn.bias_add(tf.matmul(inputs, weights), biases)
+ if relu:
+ outputs = tf.nn.relu(outputs, 'relu')
+ # count layer stats
+ self.model_size_counter.append((layer_name, (dim+1)*hiddens))
+ num_flops = 2 * dim * hiddens + hiddens
+ if relu:
+ num_flops += 2*hiddens
+ self.flop_counter.append((layer_name, num_flops))
+ self.activation_counter.append((layer_name, hiddens))
+ return outputs
+ # ed: Recurrent CRF layer 1층을 만들어주는 함수
+ def _recurrent_crf_layer(
+ self, layer_name, inputs, bilateral_filters, sizes=[3, 5],
+ num_iterations=1, padding='SAME'):
+ """Recurrent conditional random field layer. Iterative meanfield inference is
+ implemented as a reccurent neural network.
+ Args:
+ layer_name: layer name
+ inputs: input tensor with shape [batch_size, zenith, azimuth, num_class].
+ bilateral_filters: filter weight with shape
+ [batch_size, zenith, azimuth, sizes[0]*size[1]-1].
+ sizes: size of the local region to be filtered.
+ num_iterations: number of meanfield inferences.
+ padding: padding strategy
+ Returns:
+ outputs: tensor with shape [batch_size, zenith, azimuth, num_class].
+ """
+ assert num_iterations >= 1, 'number of iterations should >= 1'
+ mc = self.mc
+ # ed: layer_name : recurrent_crf
+ with tf.variable_scope(layer_name) as scope:
+ # initialize compatibilty matrices
+ compat_kernel_init = tf.constant(
+ np.reshape(
+ np.ones((mc.NUM_CLASS, mc.NUM_CLASS)) - np.identity(mc.NUM_CLASS),
+ [1, 1, mc.NUM_CLASS, mc.NUM_CLASS]
+ ),
+ dtype=tf.float32
+ )
+ bi_compat_kernel = _variable_on_device(
+ name='bilateral_compatibility_matrix',
+ shape=[1, 1, mc.NUM_CLASS, mc.NUM_CLASS],
+ initializer=compat_kernel_init*mc.BI_FILTER_COEF,
+ trainable=True
+ )
+ self._activation_summary(bi_compat_kernel, 'bilateral_compat_mat')
+ angular_compat_kernel = _variable_on_device(
+ name='angular_compatibility_matrix',
+ shape=[1, 1, mc.NUM_CLASS, mc.NUM_CLASS],
+ initializer=compat_kernel_init*mc.ANG_FILTER_COEF,
+ trainable=True
+ )
+ self._activation_summary(angular_compat_kernel, 'angular_compat_mat')
+ self.model_params += [bi_compat_kernel, angular_compat_kernel]
+ condensing_kernel = tf.constant(
+ util.condensing_matrix(sizes[0], sizes[1], mc.NUM_CLASS),
+ dtype=tf.float32,
+ name='condensing_kernel'
+ )
+ angular_filters = tf.constant(
+ util.angular_filter_kernel(
+ sizes[0], sizes[1], mc.NUM_CLASS, mc.ANG_THETA_A**2),
+ dtype=tf.float32,
+ name='angular_kernel'
+ )
+ bi_angular_filters = tf.constant(
+ util.angular_filter_kernel(
+ sizes[0], sizes[1], mc.NUM_CLASS, mc.BILATERAL_THETA_A**2),
+ dtype=tf.float32,
+ name='bi_angular_kernel'
+ )
+ for it in range(num_iterations):
+ unary = tf.nn.softmax(
+ inputs, dim=-1, name='unary_term_at_iter_{}'.format(it))
+ ang_output, bi_output = self._locally_connected_layer(
+ 'message_passing_iter_{}'.format(it), unary,
+ bilateral_filters, angular_filters, bi_angular_filters,
+ condensing_kernel, sizes=sizes,
+ padding=padding
+ )
+ # 1x1 convolution as compatibility transform
+ ang_output = tf.nn.conv2d(
+ ang_output, angular_compat_kernel, strides=[1, 1, 1, 1],
+ padding='SAME', name='angular_compatibility_transformation')
+ self._activation_summary(
+ ang_output, 'ang_transfer_iter_{}'.format(it))
+ bi_output = tf.nn.conv2d(
+ bi_output, bi_compat_kernel, strides=[1, 1, 1, 1], padding='SAME',
+ name='bilateral_compatibility_transformation')
+ self._activation_summary(
+ bi_output, 'bi_transfer_iter_{}'.format(it))
+ pairwise = tf.add(ang_output, bi_output,
+ name='pairwise_term_at_iter_{}'.format(it))
+ outputs = tf.add(unary, pairwise,
+ name='energy_at_iter_{}'.format(it))
+ inputs = outputs
+ return outputs
+ def _locally_connected_layer(
+ self, layer_name, inputs, bilateral_filters,
+ angular_filters, bi_angular_filters, condensing_kernel, sizes=[3, 5],
+ padding='SAME'):
+ """Locally connected layer with non-trainable filter parameters)
+ Args:
+ layer_name: layer name
+ inputs: input tensor with shape
+ [batch_size, zenith, azimuth, num_class].
+ bilateral_filters: bilateral filter weight with shape
+ [batch_size, zenith, azimuth, sizes[0]*size[1]-1].
+ angular_filters: angular filter weight with shape
+ [sizes[0], sizes[1], in_channel, in_channel].
+ condensing_kernel: tensor with shape
+ [size[0], size[1], num_class, (sizes[0]*size[1]-1)*num_class]
+ sizes: size of the local region to be filtered.
+ padding: padding strategy
+ Returns:
+ ang_output: output tensor filtered by anguler filter with shape
+ [batch_size, zenith, azimuth, num_class].
+ bi_output: output tensor filtered by bilateral filter with shape
+ [batch_size, zenith, azimuth, num_class].
+ """
+ assert padding=='SAME', 'only support SAME padding strategy'
+ assert sizes[0] % 2 == 1 and sizes[1] % 2 == 1, \
+ 'Currently only support odd filter size.'
+ mc = self.mc
+ size_z, size_a = sizes
+ pad_z, pad_a = size_z//2, size_a//2 # ed: not used
+ half_filter_dim = (size_z*size_a)//2 # not used
+ batch, zenith, azimuth, in_channel = inputs.shape.as_list()
+ with tf.variable_scope(layer_name) as scope:
+ # message passing
+ ang_output = tf.nn.conv2d(
+ inputs, angular_filters, [1, 1, 1, 1], padding=padding,
+ name='angular_filtered_term'
+ )
+ bi_ang_output = tf.nn.conv2d(
+ inputs, bi_angular_filters, [1, 1, 1, 1], padding=padding,
+ name='bi_angular_filtered_term'
+ )
+ condensed_input = tf.reshape(
+ tf.nn.conv2d(
+ inputs*self.lidar_mask, condensing_kernel, [1, 1, 1, 1], padding=padding,
+ name='condensed_prob_map'
+ ),
+ [batch, zenith, azimuth, size_z*size_a-1, in_channel]
+ )
+ bi_output = tf.multiply(
+ tf.reduce_sum(condensed_input*bilateral_filters, axis=3),
+ self.lidar_mask,
+ name='bilateral_filtered_term'
+ )
+ bi_output *= bi_ang_output
+ return ang_output, bi_output
+ # ed: bilateral filter가 적용된 layer 1층을 만드는 함수
+ def _bilateral_filter_layer(
+ self, layer_name, inputs, thetas=[0.9, 0.01], sizes=[3, 5], stride=1,
+ padding='SAME'):
+ """Computing pairwise energy with a bilateral filter for CRF.
+ Args:
+ layer_name: layer name
+ inputs: input tensor with shape [batch_size, zenith, azimuth, 2] where the
+ last 2 elements are intensity and range of a lidar point.
+ thetas: theta parameter for bilateral filter.
+ sizes: filter size for zenith and azimuth dimension.
+ strides: kernel strides.
+ padding: padding.
+ Returns:
+ out: bilateral filter weight output with size
+ [batch_size, zenith, azimuth, sizes[0]*sizes[1]-1, num_class]. Each
+ [b, z, a, :, cls] represents filter weights around the center position
+ for each class.
+ """
+ assert padding == 'SAME', 'currently only supports "SAME" padding stategy'
+ assert stride == 1, 'currently only supports striding of 1'
+ assert sizes[0] % 2 == 1 and sizes[1] % 2 == 1, \
+ 'Currently only support odd filter size.'
+ mc = self.mc
+ theta_a, theta_r = thetas
+ size_z, size_a = sizes
+ pad_z, pad_a = size_z//2, size_a//2 # ed: not used
+ half_filter_dim = (size_z*size_a)//2 # not used
+ batch, zenith, azimuth, in_channel = inputs.shape.as_list()
+ # assert in_channel == 1, 'Only support input channel == 1'
+ with tf.variable_scope(layer_name) as scope:
+ condensing_kernel = tf.constant(
+ util.condensing_matrix(size_z, size_a, in_channel),
+ dtype=tf.float32,
+ name='condensing_kernel'
+ )
+ condensed_input = tf.nn.conv2d(
+ inputs, condensing_kernel, [1, 1, stride, 1], padding=padding,
+ name='condensed_input'
+ )
+ # diff_intensity = tf.reshape(
+ # inputs[:, :, :], [batch, zenith, azimuth, 1]) \
+ # - condensed_input[:, :, :, ::in_channel]
+ diff_x = tf.reshape(
+ inputs[:, :, :, 0], [batch, zenith, azimuth, 1]) \
+ - condensed_input[:, :, :, 0::in_channel]
+ diff_y = tf.reshape(
+ inputs[:, :, :, 1], [batch, zenith, azimuth, 1]) \
+ - condensed_input[:, :, :, 1::in_channel]
+ diff_z = tf.reshape(
+ inputs[:, :, :, 2], [batch, zenith, azimuth, 1]) \
+ - condensed_input[:, :, :, 2::in_channel]
+ bi_filters = []
+ for cls in range(mc.NUM_CLASS):
+ theta_a = mc.BILATERAL_THETA_A[cls]
+ theta_r = mc.BILATERAL_THETA_R[cls]
+ bi_filter = tf.exp(-(diff_x**2+diff_y**2+diff_z**2)/2/theta_r**2)
+ bi_filters.append(bi_filter)
+ out = tf.transpose(
+ tf.stack(bi_filters),
+ [1, 2, 3, 4, 0],
+ name='bilateral_filter_weights'
+ )
+ return out
+ def _activation_summary(self, x, layer_name):
+ """Helper to create summaries for activations.
+ Args:
+ x: layer output tensor
+ layer_name: name of the layer
+ Returns:
+ nothing
+ """
+ with tf.variable_scope('activation_summary') as scope:
+ tf.summary.histogram(layer_name, x)
+ tf.summary.scalar(layer_name+'/sparsity', tf.nn.zero_fraction(x))
+ tf.summary.scalar(layer_name+'/average', tf.reduce_mean(x))
+ tf.summary.scalar(layer_name+'/max', tf.reduce_max(x))
+ tf.summary.scalar(layer_name+'/min', tf.reduce_min(x))
+# Author: Bichen Wu (bichen@berkeley.edu) 08/25/2016
+#-*- coding: utf-8 -*-
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from datetime import datetime
+import os.path
+import sys
+import time
+import math
+import numpy as np
+from six.moves import xrange
+import tensorflow as tf
+import threading
+from config import *
+from imdb import kitti
+from utils.util import *
+from nets import *
+FLAGS = tf.app.flags.FLAGS
+tf.app.flags.DEFINE_string('dataset', 'KITTI',
+ """Currently only support KITTI dataset.""")
+tf.app.flags.DEFINE_string('data_path', '', """Root directory of data""")
+tf.app.flags.DEFINE_string('image_set', 'train',
+ """ Can be train, trainval, val, or test""")
+tf.app.flags.DEFINE_string('train_dir', '/tmp/bichen/logs/squeezeseg/train',
+ """Directory where to write event logs """
+ """and checkpoint.""")
+tf.app.flags.DEFINE_integer('max_steps', 1000000,
+ """Maximum number of batches to run.""")
+tf.app.flags.DEFINE_string('net', 'squeezeSeg',
+ """Neural net architecture. """)
+tf.app.flags.DEFINE_string('pretrained_model_path', '',
+ """Path to the pretrained model.""")
+tf.app.flags.DEFINE_integer('summary_step', 50,
+ """Number of steps to save summary.""")
+tf.app.flags.DEFINE_integer('checkpoint_step', 1000,
+ """Number of steps to save summary.""")
+tf.app.flags.DEFINE_string('gpu', '0', """gpu id.""")
+def train():
+ """Train SqueezeSeg model"""
+ assert FLAGS.dataset == 'KITTI', \
+ 'Currently only support KITTI dataset'
+ os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
+ with tf.Graph().as_default():
+ assert FLAGS.net == 'squeezeSeg', \
+ 'Selected neural net architecture not supported: {}'.format(FLAGS.net)
+ if FLAGS.net == 'squeezeSeg':
+ mc = kitti_squeezeSeg_config()
+ # ed: SqueezeSeg을 본격적으로 training 하기 전에 전이학습을 위해 SqueezeNet의 pretrained_model을 불러오는듯
+ mc.PRETRAINED_MODEL_PATH = FLAGS.pretrained_model_path
+ model = SqueezeSeg(mc)
+ imdb = kitti(FLAGS.image_set, FLAGS.data_path, mc)
+ # ed: Model의 여러 정보를 저장하기 위한 코드
+ # save model size, flops, activations by layers
+ with open(os.path.join(FLAGS.train_dir, 'model_metrics.txt'), 'w') as f:
+ f.write('Number of parameter by layer:\n')
+ # ed: parameter size를 기록하는 코드
+ count = 0
+ for c in model.model_size_counter:
+ f.write('\t{}: {}\n'.format(c[0], c[1]))
+ count += c[1]
+ f.write('\ttotal: {}\n'.format(count))
+ # ed: output activation이 정확히 뭔지 모르겠지만 그걸 기록해주는 코드
+ count = 0
+ f.write('\nActivation size by layer:\n')
+ for c in model.activation_counter:
+ f.write('\t{}: {}\n'.format(c[0], c[1]))
+ count += c[1]
+ f.write('\ttotal: {}\n'.format(count))
+ # ed: Flop Count를 기록해주는 코드
+ count = 0
+ f.write('\nNumber of flops by layer:\n')
+ for c in model.flop_counter:
+ f.write('\t{}: {}\n'.format(c[0], c[1]))
+ count += c[1]
+ f.write('\ttotal: {}\n'.format(count))
+ f.close()
+ print ('Model statistics saved to {}.'.format(
+ os.path.join(FLAGS.train_dir, 'model_metrics.txt')))
+ def enqueue(sess, coord):
+ with coord.stop_on_exception():
+ while not coord.should_stop():
+ # ed: 여기가 Input (.npy) 파일들을 처리하는 코드인듯
+ # read batch input
+ lidar_per_batch, lidar_mask_per_batch, label_per_batch,\
+ weight_per_batch = imdb.read_batch()
+ feed_dict = {
+ model.ph_keep_prob: mc.KEEP_PROB,
+ model.ph_lidar_input: lidar_per_batch,
+ model.ph_lidar_mask: lidar_mask_per_batch,
+ model.ph_label: label_per_batch,
+ model.ph_loss_weight: weight_per_batch,
+ }
+ # ed: placeholder에 데이터를 넣어주는 코드
+ # FIFOQueue라는 함수를 사용해서 여러 input들을 병렬적으로 처리하는듯
+ sess.run(model.enqueue_op, feed_dict=feed_dict)
+ saver = tf.train.Saver(tf.all_variables())
+ summary_op = tf.summary.merge_all()
+ init = tf.initialize_all_variables()
+ # ed: sess 초기화
+ sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
+ sess.run(init)
+ summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)
+ coord = tf.train.Coordinator()
+ enq_threads = []
+ for _ in range(mc.NUM_ENQUEUE_THREAD):
+ eqth = threading.Thread(target=enqueue, args=[sess, coord])
+ eqth.start()
+ enq_threads.append(eqth)
+ # ed: 특정 시간이상 연산이 초과되면 assertion을 내주기 위한 코드인듯
+ run_options = tf.RunOptions(timeout_in_ms=60000)
+ try:
+ # ed: 학습하는 코드
+ for step in xrange(FLAGS.max_steps):
+ start_time = time.time()
+ # ed: 50번 마다 실행되고 마지막 step에서 실행되는 제어문
+ if step % FLAGS.summary_step == 0 or step == FLAGS.max_steps-1:
+ op_list = [
+ model.lidar_input, model.lidar_mask, model.label, model.train_op,
+ model.loss, model.pred_cls, summary_op
+ ]
+ # ed: 50번 step과 마지막 step에만 실행되는 코드, 학습이 끝나고 성능을 알아보기 위해 실행하는 듯하다
+ # 이런식으로 Queue를 사용해서 일괄적으로 placeholder들에 feeding을 할 수 있는듯하다
+ lidar_per_batch, lidar_mask_per_batch, label_per_batch, \
+ _, loss_value, pred_cls, summary_str = sess.run(op_list,
+ options=run_options)
+ # ed: label, pred_cls에 의해서 class가 정해진 곳에 colorize를 해주는 코드
+ label_image = visualize_seg(label_per_batch[:6, :, :], mc)
+ pred_image = visualize_seg(pred_cls[:6, :, :], mc)
+ # ed: IOU를 계산하는 코드
+ # Run evaluation on the batch
+ ious, _, _, _ = evaluate_iou(
+ label_per_batch, pred_cls * np.squeeze(lidar_mask_per_batch),
+ feed_dict = {}
+ # Assume that class-0 is the background class
+ for i in range(1, mc.NUM_CLASS):
+ feed_dict[model.iou_summary_placeholders[i]] = ious[i]
+ iou_summary_list = sess.run(model.iou_summary_ops[1:], feed_dict)
+ # ed: 여기서 summary 형식으로 visualize 해주는건 뭘까? ==> tensorboard를 위한 코드
+ # Run visualization
+ viz_op_list = [model.show_label, model.show_depth_img, model.show_pred]
+ viz_summary_list = sess.run(
+ viz_op_list,
+ feed_dict={
+ model.depth_image_to_show: lidar_per_batch[:6, :, :, [4]],
+ model.label_to_show: label_image,
+ model.pred_image_to_show: pred_image,
+ }
+ )
+ # Add summaries
+ summary_writer.add_summary(summary_str, step)
+ for sum_str in iou_summary_list:
+ summary_writer.add_summary(sum_str, step)
+ for viz_sum in viz_summary_list:
+ summary_writer.add_summary(viz_sum, step)
+ # force tensorflow to synchronise summaries
+ summary_writer.flush()
+ else:
+ # ed: 실제 학습을 하는 코드
+ _, loss_value = sess.run(
+ [model.train_op, model.loss], options=run_options)
+ # ed: 알고리즘 수행시간 체크
+ duration = time.time() - start_time
+ # ed: 여러 loss value 중 invalid한 값이 없어야 한다
+ assert not np.isnan(loss_value), \
+ 'Model diverged. Total loss: {}, conf_loss: {}, bbox_loss: {}, ' \
+ 'class_loss: {}'.format(loss_value, conf_loss, bbox_loss,
+ class_loss)
+ # ed: 10번에 한번씩 print 해주는 코드
+ if step % 10 == 0:
+ num_images_per_step = mc.BATCH_SIZE
+ images_per_sec = num_images_per_step / duration
+ sec_per_batch = float(duration)
+ format_str = ('%s: step %d, loss = %.2f (%.1f images/sec; %.3f '
+ 'sec/batch)')
+ print (format_str % (datetime.now(), step, loss_value,
+ images_per_sec, sec_per_batch))
+ sys.stdout.flush()
+ # ed: default=1000 번에 한번씩 model의 가중치를 저장한다
+ # Save the model checkpoint periodically.
+ if step % FLAGS.checkpoint_step == 0 or step == FLAGS.max_steps-1:
+ checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
+ saver.save(sess, checkpoint_path, global_step=step)
+ except Exception, e:
+ coord.request_stop(e)
+ finally:
+ coord.request_stop()
+ sess.run(model.q.close(cancel_pending_enqueues=True))
+ # Wait for all the threads to terminate.
+ coord.join(enq_threads)
+def main(argv=None): # pylint: disable=unused-argument
+ if tf.gfile.Exists(FLAGS.train_dir):
+ tf.gfile.DeleteRecursively(FLAGS.train_dir)
+ tf.gfile.MakeDirs(FLAGS.train_dir)
+ train()
+if __name__ == '__main__':
+ # ed: run main() code
+ tf.app.run()
+# Author: Bichen Wu (bichen@berkeley.edu) 02/20/2017
+#! /usr/bin/python2
+# -*- coding: utf-8 -*-
+Clock function to take running time following Segmatch.
+import datetime
+class Clock(object):
+ def __init__(self):
+ self.kSecondsToMiliseconds = 1000.0
+ self.kMicrosecondsToMiliseconds = 0.001
+ self.start()
+ def start(self):
+ self.real_time_start_ = datetime.datetime.now()
+ def takeTime(self):
+ seconds = (datetime.datetime.now() - self.real_time_start_).seconds
+ useconds = (datetime.datetime.now() - self.real_time_start_).microseconds
+ self.real_time_ms_ = (seconds*self.kSecondsToMiliseconds + useconds*self.kMicrosecondsToMiliseconds) + 0.5
+ def getRealTime(self):
+ return self.real_time_ms_
+ def takeRealTime(self):
+ self.takeTime()
+ return self.getRealTime()
+# Author: Bichen Wu (bichen@berkeley.edu) 02/20/2017
+# -*- coding: utf-8 -*-
+"""Utility functions."""
+import numpy as np
+import time
+# ed: label, pred_cls에서 class가 정해진 좌표에 colorize(=visualize)를 해서 리턴하는 함수
+def visualize_seg(label_map, mc, one_hot=False):
+ if one_hot:
+ label_map = np.argmax(label_map, axis=-1)
+ out = np.zeros(
+ (label_map.shape[0], label_map.shape[1], label_map.shape[2], 3))
+ for l in range(1, mc.NUM_CLASS):
+ out[label_map==l, :] = mc.CLS_COLOR_MAP[l]
+ return out
+def bgr_to_rgb(ims):
+ """Convert a list of images from BGR format to RGB format."""
+ out = []
+ for im in ims:
+ out.append(im[:,:,::-1])
+ return out
+class Timer(object):
+ def __init__(self):
+ self.total_time = 0.0
+ self.calls = 0
+ self.start_time = 0.0
+ self.duration = 0.0
+ self.average_time = 0.0
+ def tic(self):
+ self.start_time = time.time()
+ def toc(self, average=True):
+ self.duration = time.time() - self.start_time
+ self.total_time += self.duration
+ self.calls += 1
+ self.average_time = self.total_time/self.calls
+ if average:
+ return self.average_time
+ else:
+ return self.duration
+def conf_error_rate_at_thresh_fn(mask, conf, thresh):
+ return np.mean((conf>thresh) != mask)
+def rmse_fn(diff, nnz):
+ return np.sqrt(np.sum(diff**2)/nnz)
+def abs_accuracy_at_thresh_fn(diff, thresh, mask):
+ return np.sum((np.abs(diff) < thresh)*mask)/float(np.sum(mask))
+def rel_accuracy_at_thresh_fn(pred_ogm, gt_ogm, mask, thresh):
+ return np.sum(
+ mask * (np.maximum(pred_ogm, gt_ogm) /
+ np.minimum(gt_ogm, pred_ogm) < thresh)
+ )/float(np.sum(mask))
+# ed: IOU를 계산하는 함수
+def evaluate_iou(label, pred, n_class, epsilon=1e-12):
+ """Evaluation script to compute pixel level IoU.
+ Args:
+ label: N-d array of shape [batch, W, H], where each element is a class
+ index.
+ pred: N-d array of shape [batch, W, H], the each element is the predicted
+ class index.
+ n_class: number of classes
+ epsilon: a small value to prevent division by 0
+ Returns:
+ IoU: array of lengh n_class, where each element is the average IoU for this
+ class.
+ tps: same shape as IoU, where each element is the number of TP for each
+ class.
+ fps: same shape as IoU, where each element is the number of FP for each
+ class.
+ fns: same shape as IoU, where each element is the number of FN for each
+ class.
+ """
+ assert label.shape == pred.shape, \
+ 'label and pred shape mismatch: {} vs {}'.format(
+ label.shape, pred.shape)
+ ious = np.zeros(n_class)
+ tps = np.zeros(n_class)
+ fns = np.zeros(n_class)
+ fps = np.zeros(n_class)
+ # ed: 실제 IOU가 아니라 point-wise labeling 이므로
+ # 아래처럼 true positive, false negative...등을 클래스별로 구하는듯하다
+ for cls_id in range(n_class):
+ tp = np.sum(pred[label == cls_id] == cls_id)
+ fp = np.sum(label[pred == cls_id] != cls_id)
+ fn = np.sum(pred[label == cls_id] != cls_id)
+ ious[cls_id] = tp/(tp+fn+fp+epsilon)
+ tps[cls_id] = tp
+ fps[cls_id] = fp
+ fns[cls_id] = fn
+ return ious, tps, fps, fns
+def condensing_matrix(size_z, size_a, in_channel):
+ assert size_z % 2 == 1 and size_a % 2==1, \
+ 'size_z and size_a should be odd number'
+ half_filter_dim = (size_z*size_a)//2
+ # moving neigboring pixels to channel dimension
+ nbr2ch_mat = np.zeros(
+ (size_z, size_a, in_channel, size_z*size_a*in_channel),
+ dtype=np.float32
+ )
+ for z in range(size_z):
+ for a in range(size_a):
+ for ch in range(in_channel):
+ nbr2ch_mat[z, a, ch, z*(size_a*in_channel) + a*in_channel + ch] = 1
+ # exclude the channel index corresponding to the center position
+ nbr2ch_mat = np.concatenate(
+ [nbr2ch_mat[:, :, :, :in_channel*half_filter_dim],
+ nbr2ch_mat[:, :, :, in_channel*(half_filter_dim+1):]],
+ axis=3
+ )
+ assert nbr2ch_mat.shape == \
+ (size_z, size_a, in_channel, (size_a*size_z-1)*in_channel), \
+ 'error with the shape of nbr2ch_mat after removing center position'
+ return nbr2ch_mat
+def angular_filter_kernel(size_z, size_a, in_channel, theta_sqs):
+ """Compute a gaussian kernel.
+ Args:
+ size_z: size on the z dimension.
+ size_a: size on the a dimension.
+ in_channel: input (and output) channel size
+ theta_sqs: an array with length == in_channel. Contains variance for
+ gaussian kernel for each channel.
+ Returns:
+ kernel: ND array of size [size_z, size_a, in_channel, in_channel], which is
+ just guassian kernel parameters for each channel.
+ """
+ assert size_z % 2 == 1 and size_a % 2==1, \
+ 'size_z and size_a should be odd number'
+ assert len(theta_sqs) == in_channel, \
+ 'length of theta_sqs and in_channel does no match'
+ # gaussian kernel
+ kernel = np.zeros((size_z, size_a, in_channel, in_channel), dtype=np.float32)
+ for k in range(in_channel):
+ kernel_2d = np.zeros((size_z, size_a), dtype=np.float32)
+ for i in range(size_z):
+ for j in range(size_a):
+ diff = np.sum(
+ (np.array([i-size_z//2, j-size_a//2]))**2)
+ kernel_2d[i, j] = np.exp(-diff/2/theta_sqs[k])
+ # exclude the center position
+ kernel_2d[size_z//2, size_a//2] = 0
+ kernel[:, :, k, k] = kernel_2d
+ return kernel
+ * #+DESCRIPTION: c++ conversion of preprocessor from velodyne points to [512,64] dataset
+ * strongly inspried from github.com/durant35/SqueezeSeg
+ *
+ * #+DATE: 2018-08-28 Tue
+ * #+AUTHOR: Edward Im (edwardim@snu.ac.kr)
+ */
+#define D2R M_PI/180.0
+#define R2D 180.0/M_PI
+using namespace std;
+namespace pcl {
+struct PointXYZID {
+ PCL_ADD_POINT4D // Macro quad-word XYZ
+ float intensity; // Laser intensity
+ float d; // Distance for SqueezeSeg
+ EIGEN_MAKE_ALIGNED_OPERATOR_NEW // Ensure proper alignment
+} //end of namespace pcl
+ (float, x, x)
+ (float, y, y)
+ (float, z, z)
+ (float, intensity, intensity)
+ (float, d, d)
+ )
+typedef pcl::PointXYZI VPoint;
+typedef pcl::PointCloud VPointCloud;
+typedef pcl::PointXYZID VPointXYZID;
+typedef pcl::PointCloud VPointCloudXYZID;
+ros::Publisher ss_pub;
+// /velodyne_points topic's subcriber callback function
+void velo_callback(const sensor_msgs::PointCloud2::ConstPtr &msg){
+ VPointCloud::Ptr pcl_msg(new VPointCloud);
+ pcl::fromROSMsg(*msg, *pcl_msg);
+ if(pcl_msg->points.empty())
+ return;
+ VPointCloud::Ptr out_msg(new VPointCloud);
+ out_msg->points.clear();
+ // hv_in_range substitute loop
+ for(int i=0; i <= pcl_msg->points.size()-1; i++) {
+ double x = pcl_msg->points[i].x;
+ double y = pcl_msg->points[i].y;
+ double z = pcl_msg->points[i].z;
+ double a = atan2(y,x) * R2D;
+ if(a > -45 && a < 45)
+ out_msg->points.push_back(pcl_msg->points[i]);
+ }
+ VPointCloudXYZID filtered_cloud;
+ sensor_msgs::PointCloud2 ss_msg;
+ filtered_cloud.points.resize(32768); // 64 * 512
+ double dphi = 180/512.0 * D2R;
+ double dtheta = 0.4 * D2R;
+ // pto_depth_map substitute loop
+ for(int i=0; i <= out_msg->points.size(); i++) {
+ double x = out_msg->points[i].x;
+ double y = out_msg->points[i].y;
+ double z = out_msg->points[i].z;
+ double d = sqrt(x*x + y*y + z*z);
+ double r = sqrt(x*x + y*y);
+ double phi = 90 * D2R - asin(y/r);
+ double phi2 = (int)(phi/dphi);
+ double theta = 2 * D2R - asin(z/d);
+ double theta2 = (int)(theta/dtheta);
+ if(phi2 < 0)
+ phi2 = 0;
+ if(phi2 >= 512)
+ phi2 = 511;
+ if(theta2 < 0)
+ theta2 = 0;
+ if(theta2 >= 64)
+ theta2 = 63;
+ filtered_cloud.points[theta2*512 + phi2].x = out_msg->points[i].x;
+ filtered_cloud.points[theta2*512 + phi2].y = out_msg->points[i].y;
+ filtered_cloud.points[theta2*512 + phi2].z = out_msg->points[i].z;
+ // TODO(edward): SqueezeSeg got error if there are intensity data from here. need to fix
+ /* filtered_cloud.points[theta2*512 + phi2].intensity = out_msg->points[i].intensity; */
+ filtered_cloud.points[theta2*512 + phi2].d = d;
+ }
+ // cout << "[+] " << filtered_cloud.points.size() << endl; // DEBUG
+ pcl::toROSMsg(filtered_cloud, ss_msg);
+ ss_msg.header.frame_id = "velodyne_link";
+ ss_msg.header.stamp = ros::Time::now();
+ // publish to /ss_filtered
+ ss_pub.publish(ss_msg);
+ filtered_cloud.points.clear();
+int main(int argc, char **argv){
+ ros::init(argc, argv, "cpp_preprocessing");
+ ros::NodeHandle nh;
+ ros::Subscriber velodyne_sub = nh.subscribe("/velodyne_points", 1, velo_callback);
+ ss_pub = nh.advertise("/ss_filtered",1);
+ ros::spin();
+ return 0;
+ # Uncomment this lines if you want to use visualize with Rviz.
+ # see kitti_velodyne_to_ros in my gitrepo
+ # out_dir = "./data/sample_out_ed"
+ # if not tf.gfile.Exists(out_dir):
+ # tf.gfile.MakeDirs(out_dir)
+ # np.save(os.path.join(out_dir, 'pred_' + imdb._image_idx[i] + '.npy'), pred_cls[0])
# Evaluation
iou, tps, fps, fns = evaluate_iou(