Hi Guys,
i tested image with 2560*1440 and 100 iterations, my test code is below. The cpu execution time is around 30ms per iteration and the gpu is around 120ms per iteration.
The system i used is lebian_9 with prebuild tensorflow. I found libOpenCL.so in /usr/lib/aarch64-linux-gnu and include file in /usr/include/aarch64-linux-gnu/CL. That’s what i used for build with opencv 3.4.3:
-D WITH_OPENCL=OFF
-D HAVE_OPENCL_STATIC=ON
-D OPENCL_LIBRARIES=/usr/lib/aarch64-linux-gnu/libOpenCL.so
-D OPENCL_INCLUDE_DIRS=/usr/include/aarch64-linux-gnu/CL
I am wondering if the library and the header files I used are correct for GPU Mali G72 or not.
Anyone has a explanation why GPU is more slower than CPU?
Thanks for help.
Théo
#include "opencv2/opencv.hpp"
#include "opencv2/core/ocl.hpp"
#include <iostream>
#include <stdio.h>
using namespace cv;
using namespace std;
int main(int argc, char** argv)
{
ocl::setUseOpenCL(true);
if (ocl::haveOpenCL())
{
cout << "OpenCL is available..." << endl;
//return;
}
cv::ocl::Context context;
if (!context.create(cv::ocl::Device::TYPE_GPU))
{
//cout << "Failed creating the context..." << endl;
//return;
}
cout << context.ndevices() << " GPU devices are detected." << endl;
for (int i = 0; i < context.ndevices(); i++)
{
cv::ocl::Device device = context.device(i);
cout << "name : " << device.name() << endl;
cout << "available : " << device.available() << endl;
cout << "imageSupport : " << device.imageSupport() << endl;
cout << "OpenCL_C_Version : " << device.OpenCL_C_Version() << endl;
cout << endl;
}
UMat img, gray;
imread("image_2560.jpg", IMREAD_COLOR).copyTo(img);
//img = imread("image_2560.jpg", 1);
int64 t=getTickCount();
for(int i=0; i<100; i++)
{
int64 t1=getTickCount();
cvtColor(img, gray, COLOR_BGR2GRAY);
GaussianBlur(gray, gray, Size(7, 7), 1.5);
Canny(gray, gray, 0, 50);
t1 = getTickCount() - t1;
printf("Time elapsed t1: %fms\n", t1*1000/getTickFrequency());
}
t = getTickCount() - t;
printf("Time elapsed t: %fms\n", t*1000/getTickFrequency());
return 0;
}