Extracting frame from a gstreamer pipeline and displaying it with OpenCV

Not much to write or say in this post. I was trying to extract a frame from the gstreamer pipeline and then display it with OpenCV.

There are two approaches in the code below.

1. Register a callback function whenever a new buffer becomes available with appsink and then use a locking mechanism to synchronize the extraction of the frame and display in the main thread.

2. The second one is to extract the buffer yourself in a while loop in the main thread.

The first one is active in the code below and the second one commented out. To enable the first mechanism, uncomment the mutex locking and signal connect mechanism and comment out the pull buffer call related stuff in the while loop.

Learn more about gstreamer from http://gstreamer.freedesktop.org/data/doc/gstreamer/head/manual/html/index.html and especially refer section 19.

For some reason, i am experiencing a memory leak issue with the below code (more so with the fist approach) and haven’t got around and being able to fix it. Also, for your platform the gstreamer pipeline elements will be different. Another problem was, i get x-raw-yuv data from my gstreamer source element and i am only able to display the black and white image with OpenCV. Nonetheless, i thought this might be useful and may be someone can also point out the error to me. Not a gstreamer expert by any means.


#include <opencv2/objdetect/objdetect.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <opencv/cv.h>
#include <gstreamer-0.10/gst/gst.h>
#include <gstreamer-0.10/gst/gstelement.h>
#include <gstreamer-0.10/gst/app/gstappsink.h>
#include <iostream>
#include <stdio.h>
#include <unistd.h>
#include <pthread.h>
#include <X11/Xlib.h>
#include <X11/Xutil.h>

using namespace std;
using namespace cv;

/* Structure to contain all our information, so we can pass it around */
typedef struct _CustomData
{
    GstElement *appsink;
    GstElement *colorSpace;    
    GstElement *pipeline;
    GstElement *vsource_capsfilter, *mixercsp_capsfilter, *cspappsink_capsfilter;
    GstElement *mixer_capsfilter;
    GstElement *bin_capture;
    GstElement *video_source, *deinterlace;     
    GstElement *nv_video_mixer;    
    GstPad *pad;
    GstCaps *srcdeinterlace_caps, *mixercsp_caps, *cspappsink_caps;    
    GstBus *bus;
    GstMessage *msg;        
}gstData;

GstBuffer* buffer;        

pthread_mutex_t threadMutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t waitForGstBuffer = PTHREAD_COND_INITIALIZER; 

/* Global variables */
CascadeClassifier face_cascade;
IplImage *frame = NULL;     
string window_name =         "Toradex Face Detection Demo";
String face_cascade_name =    "/home/root/haarcascade_frontalface_alt2.xml";
const int BORDER =             8;          // Border between GUI elements to the edge of the image.

template <typename T> string toString(T t)
{
    ostringstream out;
    out << t;
    return out.str();
}

// Draw text into an image. Defaults to top-left-justified text, but you can give negative x coords for right-justified text,
// and/or negative y coords for bottom-justified text
// Returns the bounding rect around the drawn text
Rect drawString(Mat img, string text, Point coord, Scalar color, float fontScale = 0.6f, int thickness = 1, int fontFace = FONT_HERSHEY_COMPLEX)
{
    // Get the text size & baseline.
    int baseline = 0;
    Size textSize = getTextSize(text, fontFace, fontScale, thickness, &baseline);
    baseline += thickness;

    // Adjust the coords for left/right-justified or top/bottom-justified.
    if (coord.y >= 0) {
        // Coordinates are for the top-left corner of the text from the top-left of the image, so move down by one row.
        coord.y += textSize.height;
    }
    else {
        // Coordinates are for the bottom-left corner of the text from the bottom-left of the image, so come up from the bottom.
        coord.y += img.rows - baseline + 1;
    }
    // Become right-justified if desired.
    if (coord.x < 0) {
        coord.x += img.cols - textSize.width + 1;
    }

    // Get the bounding box around the text.
    Rect boundingRect = Rect(coord.x, coord.y - textSize.height, textSize.width, baseline + textSize.height);

    // Draw anti-aliased text.
    putText(img, text, coord, fontFace, fontScale, color, thickness, CV_AA);

    // Let the user know how big their text is, in case they want to arrange things.
    return boundingRect;
}

void create_pipeline(gstData *data)
{
    data->pipeline = gst_pipeline_new ("pipeline");
    gst_element_set_state (data->pipeline, GST_STATE_NULL);
}

gboolean CaptureGstBuffer(GstAppSink *sink, gstData *data)
{            
    //g_signal_emit_by_name (sink, "pull-buffer", &buffer);
    pthread_mutex_lock(&threadMutex);
    buffer = gst_app_sink_pull_buffer(sink);
    if (buffer)
    {        
        frame = cvCreateImage(cvSize(720, 576), IPL_DEPTH_16U, 3);
        if (frame == NULL)
        {
            g_printerr("IplImageFrame is null.\n");
        }
        else
        {
            //buffer = gst_app_sink_pull_buffer(sink);
            frame->imageData = (char*)GST_BUFFER_DATA(buffer);        
            if (frame->imageData == NULL)
            {
                g_printerr("IplImage data is null.\n");        
            }
        }        
        pthread_cond_signal(&waitForGstBuffer);            
    }            
    pthread_mutex_unlock(&threadMutex);
    return TRUE;
}

gboolean init_video_capture(gstData *data)
{    
    data->video_source = gst_element_factory_make("v4l2src", "video_source_live");
    data->vsource_capsfilter = gst_element_factory_make ("capsfilter", "vsource_cptr_capsfilter");
    data->deinterlace = gst_element_factory_make("deinterlace", "deinterlace_live");
    data->nv_video_mixer = gst_element_factory_make("nv_omx_videomixer", "nv_video_mixer_capture");    
    data->mixercsp_capsfilter = gst_element_factory_make ("capsfilter", "mixercsp_capsfilter");
    data->colorSpace = gst_element_factory_make("ffmpegcolorspace", "csp");        
    data->cspappsink_capsfilter = gst_element_factory_make ("capsfilter", "cspappsink_capsfilter");
    data->appsink = gst_element_factory_make("appsink", "asink");
        
    if (!data->video_source || !data->vsource_capsfilter || !data->deinterlace || !data->nv_video_mixer || !data->mixercsp_capsfilter || !data->appsink \
        || !data->colorSpace || !data->cspappsink_capsfilter)
    {
        g_printerr ("Not all elements for video were created.\n");
        return FALSE;
    }        
    
    g_signal_connect( data->pipeline, "deep-notify", G_CALLBACK( gst_object_default_deep_notify ), NULL );        
    
    gst_app_sink_set_emit_signals((GstAppSink*)data->appsink, true);
    gst_app_sink_set_drop((GstAppSink*)data->appsink, true);
    gst_app_sink_set_max_buffers((GstAppSink*)data->appsink, 1);    
    
    data->srcdeinterlace_caps = gst_caps_from_string("video/x-raw-yuv, width=(int)720, height=(int)576, format=(fourcc)I420, framerate=(fraction)1/1");        
    if (!data->srcdeinterlace_caps)
        g_printerr("1. Could not create media format string.\n");        
    g_object_set (G_OBJECT (data->vsource_capsfilter), "caps", data->srcdeinterlace_caps, NULL);
    gst_caps_unref(data->srcdeinterlace_caps);        
    
    data->mixercsp_caps = gst_caps_from_string("video/x-raw-yuv, width=(int)720, height=(int)576, format=(fourcc)I420, framerate=(fraction)1/1, pixel-aspect-ratio=(fraction)1/1");    
    if (!data->mixercsp_caps)
        g_printerr("2. Could not create media format string.\n");        
    g_object_set (G_OBJECT (data->mixercsp_capsfilter), "caps", data->mixercsp_caps, NULL);
    gst_caps_unref(data->mixercsp_caps);    
    
    data->cspappsink_caps = gst_caps_from_string("video/x-raw-yuv, width=(int)720, height=(int)576, format=(fourcc)I420, framerate=(fraction)1/1");        
    if (!data->cspappsink_caps)
        g_printerr("3. Could not create media format string.\n");        
    g_object_set (G_OBJECT (data->cspappsink_capsfilter), "caps", data->cspappsink_caps, NULL);    
    gst_caps_unref(data->cspappsink_caps);        
            
    data->bin_capture = gst_bin_new ("bin_capture");        
    
    /*if(g_signal_connect(data->appsink, "new-buffer", G_CALLBACK(CaptureGstBuffer), NULL) <= 0)
    {
        g_printerr("Could not connect signal handler.\n");
        exit(1);
    }*/
    
    gst_bin_add_many (GST_BIN (data->bin_capture), data->video_source, data->vsource_capsfilter, data->deinterlace, data->nv_video_mixer, \
                        data->mixercsp_capsfilter, data->colorSpace, data->cspappsink_capsfilter, data->appsink, NULL);
    
    if (gst_element_link_many(data->video_source, data->vsource_capsfilter, data->deinterlace, NULL) != TRUE)
    {
        g_printerr ("video_src to deinterlace not linked.\n");
        return FALSE;
    }        
    
    if (gst_element_link_many (data->deinterlace, data->nv_video_mixer, NULL) != TRUE)
    {
        g_printerr ("deinterlace to video_mixer not linked.\n");
        return FALSE;
    }        
    
    if (gst_element_link_many (data->nv_video_mixer, data->mixercsp_capsfilter, data->colorSpace, NULL) != TRUE)
    {
        g_printerr ("video_mixer to colorspace not linked.\n");
        return FALSE;    
    }
    
    if (gst_element_link_many (data->colorSpace, data->appsink, NULL) != TRUE)
    {
        g_printerr ("colorspace to appsink not linked.\n");
        return FALSE;    
    }
    
    cout << "Returns from init_video_capture." << endl;
    return TRUE;
}

void delete_pipeline(gstData *data)
{
    gst_element_set_state (data->pipeline, GST_STATE_NULL);
    g_print ("Pipeline set to NULL\n");
    gst_object_unref (data->bus);
    gst_object_unref (data->pipeline);
    g_print ("Pipeline deleted\n");
}

gboolean add_bin_capture_to_pipe(gstData *data)
{
    if((gst_bin_add(GST_BIN (data->pipeline), data->bin_capture)) != TRUE)
    {
        g_print("bin_capture not added to pipeline\n");
    }
    
    if(gst_element_set_state (data->pipeline, GST_STATE_NULL) == GST_STATE_CHANGE_SUCCESS)
    {        
        return TRUE;
    }
    else
    {
        cout << "Failed to set pipeline state to NULL." << endl;
        return FALSE;        
    }
}

gboolean remove_bin_capture_from_pipe(gstData *data)
{
    gst_element_set_state (data->pipeline, GST_STATE_NULL);
    gst_element_set_state (data->bin_capture, GST_STATE_NULL);
    if((gst_bin_remove(GST_BIN (data->pipeline), data->bin_capture)) != TRUE)
    {
        g_print("bin_capture not removed from pipeline\n");
    }    
    return TRUE;
}

gboolean start_capture_pipe(gstData *data)
{
    if(gst_element_set_state (data->pipeline, GST_STATE_PLAYING) == GST_STATE_CHANGE_SUCCESS)
        return TRUE;
    else
    {
        cout << "Failed to set pipeline state to PLAYING." << endl;
        return FALSE;
    }
}

gboolean stop_capture_pipe(gstData *data)
{
    gst_element_set_state (data->bin_capture, GST_STATE_NULL);
    gst_element_set_state (data->pipeline, GST_STATE_NULL);
    return TRUE;
}

gboolean deinit_video_live(gstData *data)
{
    gst_element_set_state (data->pipeline, GST_STATE_NULL);
    gst_element_set_state (data->bin_capture, GST_STATE_NULL);
    gst_object_unref (data->bin_capture);
    return TRUE;
}

gboolean check_bus_cb(gstData *data)
{
    GError *err = NULL;                
    gchar *dbg = NULL;   
          
    g_print("Got message: %s\n", GST_MESSAGE_TYPE_NAME(data->msg));
    switch(GST_MESSAGE_TYPE (data->msg))
    {
        case GST_MESSAGE_EOS:       
            g_print ("END OF STREAM... \n");
            break;

        case GST_MESSAGE_ERROR:
            gst_message_parse_error (data->msg, &err, &dbg);
            if (err)
            {
                g_printerr ("ERROR: %s\n", err->message);
                g_error_free (err);
            }
            if (dbg)
            {
                g_printerr ("[Debug details: %s]\n", dbg);
                g_free (dbg);
            }
            break;

        default:
            g_printerr ("Unexpected message of type %d", GST_MESSAGE_TYPE (data->msg));
            break;
    }
    return TRUE;
}

void get_pipeline_bus(gstData *data)
{
    data->bus = gst_element_get_bus (data->pipeline);
    data->msg = gst_bus_poll (data->bus, GST_MESSAGE_EOS | GST_MESSAGE_ERROR, -1);
    if(GST_MESSAGE_TYPE (data->msg))
    {
        check_bus_cb(data);
    }
    gst_message_unref (data->msg);
}

int main(int argc, char *argv[])
{        
    //Mat frame;
    VideoCapture capture;    
    gstData gstreamerData;
    GstBuffer *gstImageBuffer;
    
    //XInitThreads();
    gst_init (&argc, &argv);
    create_pipeline(&gstreamerData);
    if(init_video_capture(&gstreamerData))
    {        
        add_bin_capture_to_pipe(&gstreamerData);    
        start_capture_pipe(&gstreamerData);
        //get_pipeline_bus(&gstreamerData);    
    
        cout << "Starting while loop..." << endl;
        cvNamedWindow("Toradex Face Detection Demo with Gstreamer", 0);    
    
        while(true)
        {    
            //pthread_mutex_lock(&threadMutex);
            //pthread_cond_wait(&waitForGstBuffer, &threadMutex);
            
            gstImageBuffer = gst_app_sink_pull_buffer((GstAppSink*)gstreamerData.appsink);
        
            if (gstImageBuffer != NULL)
            {        
                frame = cvCreateImage(cvSize(720, 576), IPL_DEPTH_8U, 1);
                    
                if (frame == NULL)
                {
                    g_printerr("IplImageFrame is null.\n");
                }
                else
                {        
                    frame->imageData = (char*)GST_BUFFER_DATA(gstImageBuffer);        
                    if (frame->imageData == NULL)
                    {
                        g_printerr("IplImage data is null.\n");            
                    }                    
                    cvShowImage("Toradex Face Detection Demo with Gstreamer", frame);  
                    cvWaitKey(1);                    
                    gst_buffer_unref(gstImageBuffer);
                }
            }
            else
            {
                cout << "Appsink buffer didn't return buffer." << endl;
            }
            /*
            if (frame)
            {
                cvShowImage("Toradex Face Detection Demo with Gstreamer", frame);
            }
            gst_buffer_unref(buffer);
            buffer = NULL;            
            pthread_mutex_unlock(&threadMutex);    
            cvWaitKey(1);*/                                    
        }
    }
    else
    {
        exit(1);
    }
              
    //Destroy the window
    cvDestroyWindow("Toradex Face Detection Demo with Gstreamer");
       remove_bin_capture_from_pipe(&gstreamerData);
       deinit_video_live(&gstreamerData);    
    delete_pipeline(&gstreamerData);
    
       return 0;
}

Face detection with OpenCV


#include <opencv/cv.h>
#include <opencv/cxcore.h>
#include <opencv/highgui.h>
#include "stdio.h"

int main()
{
IplImage* frame = NULL;
const char* cascade_path = "C:\\OpenCV\\opencv\\data\\haarcascades\\haarcascade_frontalface_alt2.xml";
CvHaarClassifierCascade* hc = NULL;
CvMemStorage* storage = cvCreateMemStorage(0);
CvSize minSize = cvSize(100, 100);
CvSize maxSize = cvSize(640, 480);
CvSeq* faces = NULL;
CvCapture* input_camera = NULL;

int key = 0;
int loopCounter = 0;

hc = (CvHaarClassifierCascade*)cvLoad(cascade_path, NULL, NULL, NULL);
if (hc == NULL)
{
printf("\nLoading of classifier failed\n");
return -1;
}

input_camera = cvCaptureFromCAM(-1);
if (input_camera == NULL)
{
printf("\nCould not open camera\n");
return -1;
}

//Grabs and returns a frame from camera
frame = cvQueryFrame(input_camera);
if (frame == NULL)
{
printf("\nCould not capture frame\n");
return -1;
}

cvNamedWindow("Capturing Image ...", 0);

cvResizeWindow("Capturing Image ...",
(int) cvGetCaptureProperty(input_camera, CV_CAP_PROP_FRAME_HEIGHT),
(int) cvGetCaptureProperty(input_camera, CV_CAP_PROP_FRAME_WIDTH));

while(frame != NULL)
{
faces = cvHaarDetectObjects(frame, hc, storage, 1.2, 3, CV_HAAR_DO_CANNY_PRUNING, minSize, maxSize);

for (loopCounter = 0; loopCounter < (faces ? faces->total : 0); loopCounter++)
{
CvRect *r = (CvRect*)cvGetSeqElem(faces, loopCounter);
CvPoint pt1 = { r->x, r->y };
CvPoint pt2 = { r->x + r->width, r->y + r->height };
cvRectangle(frame, pt1, pt2, CV_RGB(0, 255, 0), 3, 4, 0);
}

//Shows a frame
cvShowImage("Capturing Image ...", frame);

// Checks if ESC is pressed and gives a delay
// so that the frame can be displayed properly
key = cvWaitKey(1);
if (key == 27)        // ESC key
{
break;
}
//Grabs and returns the next frame
frame = cvQueryFrame(input_camera);
}

//cvReleaseImage( &frame );
cvReleaseCapture(&input_camera);
cvReleaseMemStorage( &storage );
cvReleaseHaarClassifierCascade( &hc );

//Destroy the window
cvDestroyWindow("Capturing Image ...");

return 0;
}

I have tested the above code on a NVidia Tegra2 with Toradex Colibri T20 module and also on my laptop. Works, but there is a bug it seems. The program crashes if i enable the cvReleaseImage() line. Not an OpenCV expert, will have to really study the functions properly before playing around i guess. Will update this as soon as i found out.

I have not explained the OpenCV installation procedure for windows as it is already documented on the OpenCV website. For using with Beagleboard you can refer to a previous article of mine https://coherentmusings.wordpress.com/2012/06/24/getting-started-with-opencv-on-beagleboard-xm/ .  You can also use OpenCV with embedded Linux if you use Buildroot or OpenEmbedded for building your image.

OpenCV on Beagleboard-xM | Getting started

Before i start, let me mention a few things which i am assuming.

1. You know how to use the vi Editor.

2. You are connected over a serial link to the BB – xM and are doing all operations from the terminal.

Get the Angstrom Image from the below link and set up the SD card as per  the instructions given.

http://elinux.org/Getting_a_Workshop_SD_Image

After following the set up procedure, you will have a boot partition and root filesystem partition on the SD card.

After setting up the SD card, transfer an image in which you want faces to be detected. This image you can transfer to the /home/root directory in the root filesystem on the SD card.

Plug the SD card and boot up. The board will boot up and you will get the login prompt. Log in using root as the password, after which, you will be placed in the /home/root directory. This is the reason i told you to copy the image in this particular directory.

Use vi and open up a file faces.c by typing “vi faces.c” on the command line. Write the below code in it.

#include <opencv/cv.h>
#include <opencv/cxcore.h>
#include "stdio.h"
#include "string.h"
int main(void)
{
    const char * cascade_path = "/usr/share/opencv/haarcascades/haarcascade_frontalface_default.xml";
    IplImage * img;
    CvHaarClassifierCascade * hc;
    CvSeq * faces;
    CvMemStorage * storage = cvCreateMemStorage(0);
    CvSize minSize = cvSize(20, 20);
    CvSize maxSize = cvSize(60, 60);

   img = cvLoadImage("faces.jpg",0);  //Assuming faces is in the directory the code is in.
   if(img)
   {
       printf("Success");
       printf("\n");
   }
   else
   {
       printf("Failed");
       printf("\n");
   }

   hc = (CvHaarClassifierCascade*)cvLoad(cascade_path,NULL,NULL,NULL);
   faces = cvHaarDetectObjects(img, hc, storage, 1.2, 2, CV_HAAR_DO_CANNY_PRUNING, minSize, maxSize);

   if(faces)
   {
       printf("faces detected %d\n",faces->total);
   }
   else
   {
       printf("cvHaarDetectObjects returned null\n");
   }

   cvReleaseHaarClassifierCascade( &hc );
   cvReleaseImage( &img );
   cvReleaseMemStorage( &storage );

   return 0;

}

After saving the above program, compile it using the command:

arm-angstrom-linux-gnueabi-gcc   faces.c   /usr/lib/libopencv_*.so   -o   faces

I have mentioned /usr/lib/libopencv_*.so on the command line, as the compiler would then dynamically link all the OpenCV library files against this code.

This will create a executable called faces. Run it from the command line by typing “./faces”. The code would run and print the number of faces detected in the image on the command line.

I didn’t have a HDMI to DVI-D cable to connect my monitor to the board, so i printed the output on the command line. If you can connect a monitor, use the functions from the highgui library, and you can display your output on the monitor. You can draw rectangles around the faces detected.

I am currently working on face detection in video and facing a few glitches, but, i will post it when i am successful.

To learn what the functions actually do, refer to the OpenCV 2.2 C reference on the below link.

http://opencv.jp/opencv-2.2_org/c/.

Disclaimer: I have modified the example code given in the reference documentation to suit my own needs.