树莓派上运行mobilenetssd目标检测模型

lijingle 目标检测 2020-12-14 13:45 3326人围观

简介

本文主要讲解在树莓派上使用OpenCV库调用深度学习网络mobilenetssd。使用c++程序进行调用模型，如果不进行其他的修改理论上是可以在任何开发板上进行编译的。当然如果想要调用其他模型请自行进行修改。关于OpenCV部分的dnn接口请参考接口。

工具

在开始之前，需要安装OpenCV库，请参考另一篇文章。当然，也可以进行安装一些好用的开发工具，例如 Code::Blocks，可以编辑多个文件，并建立工程。安装命令如下：

#安装程序
sudo apt-get install codeblocks

调用caffe模型

如果codeblocks安装完成，下面的代码就可进行编辑了，当然了你也可以从GitHub上面下载代码。模型的原作者GitHub连接为地址，对于树莓派来说毕竟计算能力有限，所以很多的模型跑在树莓派上都是非常的吃力的。有可能会产生问题。你可以进行适量的创建一些虚拟内存，即使用swap交换技术。扩充电内存，效果并不一定是好的。可能会拖慢你的应用的响应速度。

#include <stdio.h>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <opencv2/highgui.hpp>
#include <iostream>
#include <opencv2/core/ocl.hpp>
using namespace cv;
using namespace std;
const size_t width = 300;
const size_t height = 300;
const float scaleFector = 0.007843f;
const float meanVal = 127.5;
dnn::Net net;
const char* class_video_Names[] = { "background",
"aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair",
"cow", "diningtable", "dog", "horse",
"motorbike", "person", "pottedplant",
"sheep", "sofa", "train", "tvmonitor" };
Mat detect_from_video(Mat &src)
{
    Mat blobimg = dnn::blobFromImage(src, scaleFector, Size(300, 300), meanVal);
  net.setInput(blobimg, "data");
  Mat detection = net.forward("detection_out");
//  cout << detection.size[2]<<" "<< detection.size[3] << endl;
  Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
  const float confidence_threshold = 0.25;
  for(int i=0; i<detectionMat.rows; i++){
    float detect_confidence = detectionMat.at<float>(i, 2);
    if(detect_confidence > confidence_threshold){
      size_t det_index = (size_t)detectionMat.at<float>(i, 1);
      float x1 = detectionMat.at<float>(i, 3)*src.cols;
      float y1 = detectionMat.at<float>(i, 4)*src.rows;
      float x2 = detectionMat.at<float>(i, 5)*src.cols;
      float y2 = detectionMat.at<float>(i, 6)*src.rows;
      Rect rec((int)x1, (int)y1, (int)(x2 - x1), (int)(y2 - y1));
      rectangle(src,rec, Scalar(0, 0, 255), 2, 8, 0);
      putText(src, format("%s", class_video_Names[det_index]), Point(x1, y1-5) ,FONT_HERSHEY_SIMPLEX,1.0, Scalar(0, 0, 255), 2, 8, 0);
    }
  }
    return src;
}
int main(int argc,char ** argv)
{
    float f;
    float FPS[16];
    int i, Fcnt=0;
    Mat frame;
    chrono::steady_clock::time_point Tbegin, Tend;
    net = dnn::readNetFromCaffe("MobileNetSSD_deploy.prototxt", "MobileNetSSD_deploy.caffemodel");
    if (net.empty()){
        cout << "init the model net error";
        exit(-1);
    }
    //cout << "Switched to " << (cv::ocl::useOpenCL() ? "OpenCL enabled" : "CPU") << endl;
    //net.setPreferableTarget(DNN_TARGET_OPENCL);
    cout << "Start grabbing, press ESC on Live window to terminate" << endl;
    while(1){
        frame=imread("004545.jpg");  //need to refresh frame before dnn class detection
        Tbegin = chrono::steady_clock::now();
        detect_from_video(frame);
        Tend = chrono::steady_clock::now();
        //calculate frame rate
        f = chrono::duration_cast <chrono::milliseconds> (Tend - Tbegin).count();
        if(f>0.0) FPS[((Fcnt++)&0x0F)]=1000.0/f;
        for(f=0.0, i=0;i<16;i++){ f+=FPS[i]; }
        putText(frame, format("FPS %0.2f", f/16),Point(10,20),FONT_HERSHEY_SIMPLEX,0.6, Scalar(0, 0, 255));
        //show output
        imshow("frame", frame);
        char esc = waitKey(5);
        if(esc == 27) break;
  }
  cout << "Closing the camera" << endl;
  destroyAllWindows();
  cout << "Bye!" << endl;
  return 0;
}

关于程序的一些解释：

首先，在定义变量的时候，将一些变量定义为全局变量，不然在其他函数进行调用时对进行重复定义，导致变量会非常的多。

程序在读取模型是会花费大量的时间，所以在读模型时将其放到循环外，然后在循环内进行读取图片进行预测，不然程序会反应过慢。

上面的类型大概只有20个，如果你的种类比较多或者比较少，你还需要进行修改种类文件，或者重新进行训练模型文件。下面有TensorFlow版本的例子

上面第67行是进行检测是否使用opencl进行加速的，如果树莓派安装了opencl，将注释去掉，即就开启了opencl加速，如果在其他设备上进行编译，并且设备安装了cuda，即需要进行修改代码，修改定义DNN_TARGET_OPENCL为 DNN_TARGET_CUDA

尽管OpenCV运行速度比较快，但是还是要进行优化，特别再传输图像时，要使用Mat变量保存frame，而不是进行copy frame，同样在子程序在进行调用frame时，即可直接使用Mat进行传输图片。这样可以避免不必要的内存消耗。

编译

一切准备就绪，下面就是进行编译程序，在编译程序之前，还要对codeblock进行设置OpenCV库的环境变量。然后将你的模型文件，MobileNetSSD_deploy.prototxt, MobileNetSSD_deploy.caffemodel两个文件，放到工程目录下面，就是和代码文件一起。当然了还要图片放到文件夹下。接着就是对上面的工程文件进行编译。运行结果如下

运行结果

树莓派运行的速度还好可以达到4帧左右

TensorFlow模型

下面是加载TensorFlow模型例子，OpenCV的NN模块是支持TensorFlow，caffe的，你可以下在训练好的模型从GitHub上。这里验证了两个模型，一个是 MobileNetV1-SSD 另一个 MobileNetV2-SSD 训练的数据集为coco数据集，种类为90多个，实现的方式和上吗的caffe差不多，代码如下：

#include <stdio.h>
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <opencv2/highgui.hpp>
#include <iostream>
#include <opencv2/core/ocl.hpp>
using namespace cv;
using namespace std;
const size_t width = 300;
const size_t height = 300;
dnn::Net net;
std::vector<std::string> Names;
static bool getFileContent(std::string fileName)
{
  // Open the File
  std::ifstream in(fileName.c_str());
  // Check if object is valid
  if(!in.is_open()) return false;
  std::string str;
  // Read the next line from File untill it reaches the end.
  while (std::getline(in, str))
  {
    // Line contains string of length > 0 then save it in vector
    if(str.size()>0) Names.push_back(str);
  }
  // Close The File
  in.close();
  return true;
}
Mat detect_from_video(Mat &src)
{
    Mat blobimg = dnn::blobFromImage(src, 1.0, Size(300, 300), 0.0, true);
  net.setInput(blobimg);
  Mat detection = net.forward("detection_out");
//  cout << detection.size[2]<<" "<< detection.size[3] << endl;
  Mat detectionMat(detection.size[2], detection.size[3], CV_32F, detection.ptr<float>());
  const float confidence_threshold = 0.25;
  for(int i=0; i<detectionMat.rows; i++){
    float detect_confidence = detectionMat.at<float>(i, 2);
    if(detect_confidence > confidence_threshold){
      size_t det_index = (size_t)detectionMat.at<float>(i, 1);
      float x1 = detectionMat.at<float>(i, 3)*src.cols;
      float y1 = detectionMat.at<float>(i, 4)*src.rows;
      float x2 = detectionMat.at<float>(i, 5)*src.cols;
      float y2 = detectionMat.at<float>(i, 6)*src.rows;
      Rect rec((int)x1, (int)y1, (int)(x2 - x1), (int)(y2 - y1));
      rectangle(src,rec, Scalar(0, 0, 255), 1, 8, 0);
      putText(src, format("%s", Names[det_index].c_str()), Point(x1, y1-5) ,FONT_HERSHEY_SIMPLEX,0.5, Scalar(0, 0, 255), 1, 8, 0);
    }
  }
    return src;
}
int main(int argc,char ** argv)
{
    float f;
    float FPS[16];
    int i, Fcnt=0;
    Mat frame;
    chrono::steady_clock::time_point Tbegin, Tend;
    for(i=0;i<16;i++) FPS[i]=0.0;
    //MobileNetV1
    net = dnn::readNetFromTensorflow("frozen_inference_graph_V1.pb","ssd_mobilenet_v1_coco_2017_11_17.pbtxt");
    //MobileNetV2
    //net = dnn::readNetFromTensorflow("frozen_inference_graph_V2.pb","ssd_mobilenet_v2_coco_2018_03_29.pbtxt");
    if (net.empty()){
        cout << "init the model net error";
        exit(-1);
    }
  // Get the names
  bool result = getFileContent("COCO_labels.txt");
  if(!result)
  {
        cout << "loading labels failed";
        exit(-1);
  }
    //cout << "Switched to " << (cv::ocl::useOpenCL() ? "OpenCL enabled" : "CPU") << endl;
    //net.setPreferableTarget(DNN_TARGET_OPENCL);
    cout << "Start grabbing, press ESC on Live window to terminate" << endl;
    while(1){
        frame=imread("Traffic.jpg");  //need to refresh frame before dnn class detection
        Tbegin = chrono::steady_clock::now();
        detect_from_video(frame);
        Tend = chrono::steady_clock::now();
        //calculate frame rate
        f = chrono::duration_cast <chrono::milliseconds> (Tend - Tbegin).count();
        if(f>0.0) FPS[((Fcnt++)&0x0F)]=1000.0/f;
        for(f=0.0, i=0;i<16;i++){ f+=FPS[i]; }
        putText(frame, format("FPS %0.2f", f/16),Point(10,20),FONT_HERSHEY_SIMPLEX,0.6, Scalar(0, 0, 255));
        //show output
        imshow("frame", frame);
        char esc = waitKey(5);
        if(esc == 27) break;
  }
  cout << "Closing the camera" << endl;
  destroyAllWindows();
  cout << "Bye!" << endl;
  return 0;
}

如果进行加速，请参考上吗的，可以直接使用opencl进行加速。两个模型的对比如下，v1版本相对来说比较快，v2版本来说准确率比较高，但是速度比较慢。对于树莓派来说达到这种速度真的是难得可贵。