AKAZE和ORB平面跟踪

介绍

在本教程中，我们将使用它们比较AKAZE和ORB本地功能，以查找视频帧和跟踪对象移动之间的匹配。

算法如下：

检测并描述第一帧上的关键点，手动设置对象边界
对于每一个下一帧：

检测并描述关键点
使用bruteforce匹配器匹配它们
使用RANSAC估计单变图
过滤来自所有比赛的内置值
将单变图应用于边界框以找到对象
绘制边界框和内联，计算不定式比值作为评估指标

AKAZE和ORB平面跟踪

数据

要进行跟踪，我们需要第一帧上的视频和对象位置。

您可以从这里下载我们的示例视频和数据。

要运行代码，您必须指定输入（摄像机ID或video_file）。然后，使用鼠标选择边框，然后按任意键开始跟踪

./planar_tracking blais.mp4

源代码

#include <opencv2/features2d.hpp>
#include <opencv2/videoio.hpp>
#include <opencv2/opencv.hpp>
#include <opencv2/highgui.hpp>      //for imshow
#include <vector>
#include <iostream>
#include <iomanip>
#include "stats.h" // Stats structure definition
#include "utils.h" // Drawing and printing functions
using namespace std;
using namespace cv;
const double akaze_thresh = 3e-4; // AKAZE detection threshold set to locate about 1000 keypoints
const double ransac_thresh = 2.5f; // RANSAC inlier threshold
const double nn_match_ratio = 0.8f; // Nearest-neighbour matching ratio
const int bb_min_inliers = 100; // Minimal number of inliers to draw bounding box
const int stats_update_period = 10; // On-screen statistics are updated every 10 frames
namespace example {
class Tracker
{
public:
    Tracker(Ptr<Feature2D> _detector, Ptr<DescriptorMatcher> _matcher) :
        detector(_detector),
        matcher(_matcher)
    {}
    void setFirstFrame(const Mat frame, vector<Point2f> bb, string title, Stats& stats);
    Mat process(const Mat frame, Stats& stats);
    Ptr<Feature2D> getDetector() {
        return detector;
    }
protected:
    Ptr<Feature2D> detector;
    Ptr<DescriptorMatcher> matcher;
    Mat first_frame, first_desc;
    vector<KeyPoint> first_kp;
    vector<Point2f> object_bb;
};
void Tracker::setFirstFrame(const Mat frame, vector<Point2f> bb, string title, Stats& stats)
{
    cv::Point *ptMask = new cv::Point[bb.size()];
    const Point* ptContain = { &ptMask[0] };
    int iSize = static_cast<int>(bb.size());
    for (size_t i=0; i<bb.size(); i++) {
        ptMask[i].x = static_cast<int>(bb[i].x);
        ptMask[i].y = static_cast<int>(bb[i].y);
    }
    first_frame = frame.clone();
    cv::Mat matMask = cv::Mat::zeros(frame.size(), CV_8UC1);
    cv::fillPoly(matMask, &ptContain, &iSize, 1, cv::Scalar::all(255));
    detector->detectAndCompute(first_frame, matMask, first_kp, first_desc);
    stats.keypoints = (int)first_kp.size();
    drawBoundingBox(first_frame, bb);
    putText(first_frame, title, Point(0, 60), FONT_HERSHEY_PLAIN, 5, Scalar::all(0), 4);
    object_bb = bb;
    delete[] ptMask;
}
Mat Tracker::process(const Mat frame, Stats& stats)
{
    TickMeter tm;
    vector<KeyPoint> kp;
    Mat desc;
    tm.start();
    detector->detectAndCompute(frame, noArray(), kp, desc);
    stats.keypoints = (int)kp.size();
    vector< vector<DMatch> > matches;
    vector<KeyPoint> matched1, matched2;
    matcher->knnMatch(first_desc, desc, matches, 2);
    for(unsigned i = 0; i < matches.size(); i++) {
        if(matches[i][0].distance < nn_match_ratio * matches[i][1].distance) {
            matched1.push_back(first_kp[matches[i][0].queryIdx]);
            matched2.push_back(      kp[matches[i][0].trainIdx]);
        }
    }
    stats.matches = (int)matched1.size();
    Mat inlier_mask, homography;
    vector<KeyPoint> inliers1, inliers2;
    vector<DMatch> inlier_matches;
    if(matched1.size() >= 4) {
        homography = findHomography(Points(matched1), Points(matched2),
                                    RANSAC, ransac_thresh, inlier_mask);
    }
    tm.stop();
    stats.fps = 1. / tm.getTimeSec();
    if(matched1.size() < 4 || homography.empty()) {
        Mat res;
        hconcat(first_frame, frame, res);
        stats.inliers = 0;
        stats.ratio = 0;
        return res;
    }
    for(unsigned i = 0; i < matched1.size(); i++) {
        if(inlier_mask.at<uchar>(i)) {
            int new_i = static_cast<int>(inliers1.size());
            inliers1.push_back(matched1[i]);
            inliers2.push_back(matched2[i]);
            inlier_matches.push_back(DMatch(new_i, new_i, 0));
        }
    }
    stats.inliers = (int)inliers1.size();
    stats.ratio = stats.inliers * 1.0 / stats.matches;
    vector<Point2f> new_bb;
    perspectiveTransform(object_bb, new_bb, homography);
    Mat frame_with_bb = frame.clone();
    if(stats.inliers >= bb_min_inliers) {
        drawBoundingBox(frame_with_bb, new_bb);
    }
    Mat res;
    drawMatches(first_frame, inliers1, frame_with_bb, inliers2,
                inlier_matches, res,
                Scalar(255, 0, 0), Scalar(255, 0, 0));
    return res;
}
}
int main(int argc, char **argv)
{
    CommandLineParser parser(argc, argv, "{@input_path |0|input path can be a camera id, like 0,1,2 or a video filename}");
    parser.printMessage();
    string input_path = parser.get<string>(0);
    string video_name = input_path;
    VideoCapture video_in;
    if ( ( isdigit(input_path[0]) && input_path.size() == 1 ) )
    {
    int camera_no = input_path[0] - '0';
        video_in.open( camera_no );
    }
    else {
        video_in.open(video_name);
    }
    if(!video_in.isOpened()) {
        cerr << "Couldn't open " << video_name << endl;
        return 1;
    }
    Stats stats, akaze_stats, orb_stats;
    Ptr<AKAZE> akaze = AKAZE::create();
    akaze->setThreshold(akaze_thresh);
    Ptr<ORB> orb = ORB::create();
    Ptr<DescriptorMatcher> matcher = DescriptorMatcher::create("BruteForce-Hamming");
    example::Tracker akaze_tracker(akaze, matcher);
    example::Tracker orb_tracker(orb, matcher);
    Mat frame;
    namedWindow(video_name, WINDOW_NORMAL);
    cout << "\nPress any key to stop the video and select a bounding box" << endl;
    while ( waitKey(1) < 1 )
    {
        video_in >> frame;
        cv::resizeWindow(video_name, frame.size());
        imshow(video_name, frame);
    }
    vector<Point2f> bb;
    cv::Rect uBox = cv::selectROI(video_name, frame);
    bb.push_back(cv::Point2f(static_cast<float>(uBox.x), static_cast<float>(uBox.y)));
    bb.push_back(cv::Point2f(static_cast<float>(uBox.x+uBox.width), static_cast<float>(uBox.y)));
    bb.push_back(cv::Point2f(static_cast<float>(uBox.x+uBox.width), static_cast<float>(uBox.y+uBox.height)));
    bb.push_back(cv::Point2f(static_cast<float>(uBox.x), static_cast<float>(uBox.y+uBox.height)));
    akaze_tracker.setFirstFrame(frame, bb, "AKAZE", stats);
    orb_tracker.setFirstFrame(frame, bb, "ORB", stats);
    Stats akaze_draw_stats, orb_draw_stats;
    Mat akaze_res, orb_res, res_frame;
    int i = 0;
    for(;;) {
        i++;
        bool update_stats = (i % stats_update_period == 0);
        video_in >> frame;
        // stop the program if no more images
        if(frame.empty()) break;
        akaze_res = akaze_tracker.process(frame, stats);
        akaze_stats += stats;
        if(update_stats) {
            akaze_draw_stats = stats;
        }
        orb->setMaxFeatures(stats.keypoints);
        orb_res = orb_tracker.process(frame, stats);
        orb_stats += stats;
        if(update_stats) {
            orb_draw_stats = stats;
        }
        drawStatistics(akaze_res, akaze_draw_stats);
        drawStatistics(orb_res, orb_draw_stats);
        vconcat(akaze_res, orb_res, res_frame);
        cv::imshow(video_name, res_frame);
        if(waitKey(1)==27) break; //quit on ESC button
    }
    akaze_stats /= i - 1;
    orb_stats /= i - 1;
    printStatistics("AKAZE", akaze_stats);
    printStatistics("ORB", orb_stats);
    return 0;
}

说明

跟踪类

该类实现了使用给定的特征检测器和描述符匹配器描述的算法。

设置第一帧

void Tracker::setFirstFrame(const Mat frame, vector<Point2f> bb, string title, Stats& stats)
{
    first_frame = frame.clone();
    (*detector)(first_frame, noArray(), first_kp, first_desc);
    stats.keypoints = (int)first_kp.size();
    drawBoundingBox(first_frame, bb);
    putText(first_frame, title, Point(0, 60), FONT_HERSHEY_PLAIN, 5, Scalar::all(0), 4);
    object_bb = bb;
}

我们从第一帧计算并存储关键点和描述符，并准备输出。

我们需要保存检测到的关键点的数量，以确保两个检测器的位置大致相同。

处理框架

1、找到关键点并计算描述符

(*detector)(frame, noArray(), kp, desc);

要找到帧之间的匹配，我们必须先找到关键点。

在本教程中，检测器设置为在每个帧上找到约1000个关键点。

2、使用2-nn匹配器查找通信

matcher->knnMatch(first_desc, desc, matches, 2);
for(unsigned i = 0; i < matches.size(); i++) {
    if(matches[i][0].distance < nn_match_ratio * matches[i][1].distance) {
        matched1.push_back(first_kp[matches[i][0].queryIdx]);
        matched2.push_back(      kp[matches[i][0].trainIdx]);
    }
}

如果最接近的匹配是nn_match_ratio比第二个最接近的匹配，那么它是一个匹配。

3、使用RANSAC估计单变图

homography = findHomography(Points(matched1), Points(matched2),
                            RANSAC, ransac_thresh, inlier_mask);

如果有至少4场比赛，我们可以使用随机抽样共识来估计图像变换。

4、保存内在的

for(unsigned i = 0; i < matched1.size(); i++) {
    if(inlier_mask.at<uchar>(i)) {
        int new_i = static_cast<int>(inliers1.size());
        inliers1.push_back(matched1[i]);
        inliers2.push_back(matched2[i]);
        inlier_matches.push_back(DMatch(new_i, new_i, 0));
    }
}

因为findHomography可以计算内部值，所以我们只需要保存所选的点和匹配项。

5、项目对象边界框

perspectiveTransform（object_bb，new_bb，singography）;

如果有合理数量的内部值，我们可以使用估计转换来定位对象。

结果

AKAZE统计：

Matches      626
Inliers      410
Inlier ratio 0.58
Keypoints    1117

ORB统计：

Matches      504
Inliers      319
Inlier ratio 0.56
Keypoints    1112

w3cschool 编程狮，随时随地学编程

AKAZE和ORB平面跟踪

介绍

数据

源代码

说明

跟踪类

结果

OpenCV教程

OpenCV入门

OpenCV iOS

OpenCV核心功能（核心模块）

OpenCV图像处理（imgproc模块）

OpenCV高级GUI和媒体（highgui模块）

OpenCV图像输入和输出（imgcodecs模块）

OpenCV视频输入和输出（videoio模块）

OpenCV相机校准和3D重建（calib3d模块）

2D特征框架（feature2d模块）

OpenCV视频分析

对象检测（objdetect模块）

深层神经网络（dnn模块）

机器学习（ml模块）

计算摄影（照片模块）

图像拼接（拼接模块）

GPU加速计算机视觉（cuda模块）

OpenCV Viz