From ed3fe9f0762158f44810d370eb43d0f5c8e243be Mon Sep 17 00:00:00 2001 From: Wei Liu Date: Wed, 6 Jul 2016 23:00:12 -0400 Subject: [PATCH] add cpp demo code to evaluate a model --- README.md | 2 +- examples/ssd/ssd_detect.cpp | 302 ++++++++++++++++++++++++++++++++++++ 2 files changed, 303 insertions(+), 1 deletion(-) create mode 100644 examples/ssd/ssd_detect.cpp diff --git a/README.md b/README.md index 77f94dfe..00ca36de 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,7 @@ Please cite SSD in your publications if it helps your research: ``` [Here](https://drive.google.com/file/d/0BzKzrI_SkD1_R09NcjM1eElLcWc/view) is a demo video of running a SSD500 model trained on [MSCOCO](http://mscoco.org) dataset. -4. Check out `examples/ssd_detect.ipynb` on how to detect objects using a SSD model. +4. Check out `examples/ssd_detect.ipynb` or `examples/ssd/ssd_detect.cpp` on how to detect objects using a SSD model. 5. To train on other dataset, please refer to data/OTHERDATASET for more details. We currently add support for MSCOCO and ILSVRC2016. diff --git a/examples/ssd/ssd_detect.cpp b/examples/ssd/ssd_detect.cpp new file mode 100644 index 00000000..2743ea89 --- /dev/null +++ b/examples/ssd/ssd_detect.cpp @@ -0,0 +1,302 @@ +// This is a demo code for using a SSD model to do detection. +// The code is modified from examples/cpp_classification/classification.cpp. +// Usage: +// ssd_detection [FLAGS] model_file weights_file list_file +// +// where model_file is the .prototxt file defining the network architecture, and +// weights_file is the .caffemodel file containing the network parameters, and +// list_file contains a list of image files with the format as +// folder/img1.JPEG +// folder/img2.JPEG +// +#include +#ifdef USE_OPENCV +#include +#include +#include +#endif // USE_OPENCV +#include +#include +#include +#include +#include +#include + +using namespace caffe; // NOLINT(build/namespaces) + +class Detector { + public: + Detector(const string& model_file, + const string& weights_file, + const string& mean_file, + const string& mean_value); + + std::vector > Detect(const cv::Mat& img); + + private: + void SetMean(const string& mean_file, const string& mean_value); + + void WrapInputLayer(std::vector* input_channels); + + void Preprocess(const cv::Mat& img, + std::vector* input_channels); + + private: + shared_ptr > net_; + cv::Size input_geometry_; + int num_channels_; + cv::Mat mean_; +}; + +Detector::Detector(const string& model_file, + const string& weights_file, + const string& mean_file, + const string& mean_value) { +#ifdef CPU_ONLY + Caffe::set_mode(Caffe::CPU); +#else + Caffe::set_mode(Caffe::GPU); +#endif + + /* Load the network. */ + net_.reset(new Net(model_file, TEST)); + net_->CopyTrainedLayersFrom(weights_file); + + CHECK_EQ(net_->num_inputs(), 1) << "Network should have exactly one input."; + CHECK_EQ(net_->num_outputs(), 1) << "Network should have exactly one output."; + + Blob* input_layer = net_->input_blobs()[0]; + num_channels_ = input_layer->channels(); + CHECK(num_channels_ == 3 || num_channels_ == 1) + << "Input layer should have 1 or 3 channels."; + input_geometry_ = cv::Size(input_layer->width(), input_layer->height()); + + /* Load the binaryproto mean file. */ + SetMean(mean_file, mean_value); +} + +std::vector > Detector::Detect(const cv::Mat& img) { + Blob* input_layer = net_->input_blobs()[0]; + input_layer->Reshape(1, num_channels_, + input_geometry_.height, input_geometry_.width); + /* Forward dimension change to all layers. */ + net_->Reshape(); + + std::vector input_channels; + WrapInputLayer(&input_channels); + + Preprocess(img, &input_channels); + + net_->Forward(); + + /* Copy the output layer to a std::vector */ + Blob* result_blob = net_->output_blobs()[0]; + const float* result = result_blob->cpu_data(); + const int num_det = result_blob->height(); + vector > detections; + for (int k = 0; k < num_det; ++k) { + vector detection(result, result + 7); + detections.push_back(detection); + result += 7; + } + return detections; +} + +/* Load the mean file in binaryproto format. */ +void Detector::SetMean(const string& mean_file, const string& mean_value) { + cv::Scalar channel_mean; + if (!mean_file.empty()) { + CHECK(mean_value.empty()) << + "Cannot specify mean_file and mean_value at the same time"; + BlobProto blob_proto; + ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto); + + /* Convert from BlobProto to Blob */ + Blob mean_blob; + mean_blob.FromProto(blob_proto); + CHECK_EQ(mean_blob.channels(), num_channels_) + << "Number of channels of mean file doesn't match input layer."; + + /* The format of the mean file is planar 32-bit float BGR or grayscale. */ + std::vector channels; + float* data = mean_blob.mutable_cpu_data(); + for (int i = 0; i < num_channels_; ++i) { + /* Extract an individual channel. */ + cv::Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data); + channels.push_back(channel); + data += mean_blob.height() * mean_blob.width(); + } + + /* Merge the separate channels into a single image. */ + cv::Mat mean; + cv::merge(channels, mean); + + /* Compute the global mean pixel value and create a mean image + * filled with this value. */ + channel_mean = cv::mean(mean); + mean_ = cv::Mat(input_geometry_, mean.type(), channel_mean); + } + if (!mean_value.empty()) { + CHECK(mean_file.empty()) << + "Cannot specify mean_file and mean_value at the same time"; + stringstream ss(mean_value); + vector values; + string item; + while (getline(ss, item, ',')) { + float value = std::atof(item.c_str()); + values.push_back(value); + } + CHECK(values.size() == 1 || values.size() == num_channels_) << + "Specify either 1 mean_value or as many as channels: " << num_channels_; + + std::vector channels; + for (int i = 0; i < num_channels_; ++i) { + /* Extract an individual channel. */ + cv::Mat channel(input_geometry_.height, input_geometry_.width, CV_32FC1, + cv::Scalar(values[i])); + channels.push_back(channel); + } + cv::merge(channels, mean_); + } +} + +/* Wrap the input layer of the network in separate cv::Mat objects + * (one per channel). This way we save one memcpy operation and we + * don't need to rely on cudaMemcpy2D. The last preprocessing + * operation will write the separate channels directly to the input + * layer. */ +void Detector::WrapInputLayer(std::vector* input_channels) { + Blob* input_layer = net_->input_blobs()[0]; + + int width = input_layer->width(); + int height = input_layer->height(); + float* input_data = input_layer->mutable_cpu_data(); + for (int i = 0; i < input_layer->channels(); ++i) { + cv::Mat channel(height, width, CV_32FC1, input_data); + input_channels->push_back(channel); + input_data += width * height; + } +} + +void Detector::Preprocess(const cv::Mat& img, + std::vector* input_channels) { + /* Convert the input image to the input image format of the network. */ + cv::Mat sample; + if (img.channels() == 3 && num_channels_ == 1) + cv::cvtColor(img, sample, cv::COLOR_BGR2GRAY); + else if (img.channels() == 4 && num_channels_ == 1) + cv::cvtColor(img, sample, cv::COLOR_BGRA2GRAY); + else if (img.channels() == 4 && num_channels_ == 3) + cv::cvtColor(img, sample, cv::COLOR_BGRA2BGR); + else if (img.channels() == 1 && num_channels_ == 3) + cv::cvtColor(img, sample, cv::COLOR_GRAY2BGR); + else + sample = img; + + cv::Mat sample_resized; + if (sample.size() != input_geometry_) + cv::resize(sample, sample_resized, input_geometry_); + else + sample_resized = sample; + + cv::Mat sample_float; + if (num_channels_ == 3) + sample_resized.convertTo(sample_float, CV_32FC3); + else + sample_resized.convertTo(sample_float, CV_32FC1); + + cv::Mat sample_normalized; + cv::subtract(sample_float, mean_, sample_normalized); + + /* This operation will write the separate BGR planes directly to the + * input layer of the network because it is wrapped by the cv::Mat + * objects in input_channels. */ + cv::split(sample_normalized, *input_channels); + + CHECK(reinterpret_cast(input_channels->at(0).data) + == net_->input_blobs()[0]->cpu_data()) + << "Input channels are not wrapping the input layer of the network."; +} + +DEFINE_string(mean_file, "", + "The mean file used to subtract from the input image."); +DEFINE_string(mean_value, "104,117,123", + "If specified, can be one value or can be same as image channels" + " - would subtract from the corresponding channel). Separated by ','." + "Either mean_file or mean_value should be provided, not both."); +DEFINE_string(out_file, "", + "If provided, store the detection results in the out_file."); +DEFINE_double(confidence_threshold, 0.6, + "Only store detections with score higher than the threshold."); + +int main(int argc, char** argv) { +#ifdef USE_OPENCV + ::google::InitGoogleLogging(argv[0]); + // Print output to stderr (while still logging) + FLAGS_alsologtostderr = 1; + +#ifndef GFLAGS_GFLAGS_H_ + namespace gflags = google; +#endif + + gflags::SetUsageMessage("Do detection using SSD mode.\n" + "Usage:\n" + " ssd_detect [FLAGS] model_file weights_file list_file\n"); + gflags::ParseCommandLineFlags(&argc, &argv, true); + + if (argc < 4) { + gflags::ShowUsageWithFlagsRestrict(argv[0], "examples/ssd/ssd_detect"); + return 1; + } + + const string& model_file = argv[1]; + const string& weights_file = argv[2]; + const string& mean_file = FLAGS_mean_file; + const string& mean_value = FLAGS_mean_value; + const string& out_file = FLAGS_out_file; + const float confidence_threshold = FLAGS_confidence_threshold; + + // Initialize the network. + Detector detector(model_file, weights_file, mean_file, mean_value); + + // Set the output mode. + std::streambuf* buf = std::cout.rdbuf(); + std::ofstream outfile; + if (!out_file.empty()) { + outfile.open(out_file.c_str()); + if (outfile.good()) { + buf = outfile.rdbuf(); + } + } + std::ostream out(buf); + + // Process image one by one. + std::ifstream infile(argv[3]); + std::string imgfile; + while (infile >> imgfile) { + cv::Mat img = cv::imread(imgfile, -1); + CHECK(!img.empty()) << "Unable to decode image " << imgfile; + std::vector > detections = detector.Detect(img); + + /* Print the detection results. */ + for (int i = 0; i < detections.size(); ++i) { + const vector& d = detections[i]; + // Detection format: [image_id, label, score, xmin, ymin, xmax, ymax]. + CHECK_EQ(d.size(), 7); + const float score = d[2]; + if (score >= confidence_threshold) { + out << imgfile << " "; + out << static_cast(d[1]) << " "; + out << score << " "; + out << static_cast(d[3] * img.cols) << " "; + out << static_cast(d[4] * img.rows) << " "; + out << static_cast(d[5] * img.cols) << " "; + out << static_cast(d[6] * img.rows) << std::endl; + } + } + } +#else + LOG(FATAL) << "This example requires OpenCV; compile with USE_OPENCV."; +#endif // USE_OPENCV + return 0; +}