Skip to main content
Tweeted twitter.com/StackCodeReview/status/759640310993543168
edited tags; edited title
Link
200_success
  • 145.7k
  • 22
  • 191
  • 481

Is this an efficient implementation of H.264 image encoding using Media Foundation .NET?

added more specific tags
Link
JanDotNet
  • 8.6k
  • 2
  • 22
  • 48
Source Link
geometrikal
  • 385
  • 3
  • 6
  • 17

Is this an efficient implementation of H.264 image encoding using Media Foundation .NET?

We have some video analysis software written in c# .NET that uses OpenCV via the Emgu.CV wrappers. The video frames come from a GiGEVision camera (not a normal capture device) which are then analysed, graphically annotated, and then encoded to a video file.

Previously we have used the OpenCV VideoWriter class to encode the video. However, the VideoWriter class uses video-for-windows codecs and often corrupts the indexing of the output file.

After much searching I am yet to find another .NET implementation of encoding frames to H264 video, so I decided to write my own. The code below is based on the MediaFoundation C++ SinkWriter tutorial and implemented in .NET using the MediaFoundation.NET wrapper.

The main changes I have made are:

  • Everything is in a single thread, due to problems accessing the WriteFrame method from other threads. I believe this is due to interacting with the underlying COM object but I've no experience with that.
  • New frames are passed to the thread using a BlockingCollection
  • IDisposable was implemented to make sure Stop() is called.

Some questions:

  • Is the thread implementation using CancellationTokenSource appropriate?
  • Is BlockingCollection the best way to pass the frames in?
  • Is it possible to reuse the IMFMediaBuffer and IMFSample objects? If so, should I do this? Will it improve efficiency?
  • Is the implementation of IDisposable correct?

Code:

class MFVideoEncoder : IDisposable
{
    private int videoBitRate = 800000;
    const int VIDEO_FPS = 30;
    const int BYTES_PER_PIXEL = 3;
    const long TICKS_PER_SECOND = 10 * 1000 * 1000;
    const long VIDEO_FRAME_DURATION = TICKS_PER_SECOND / VIDEO_FPS;

    public bool HasStarted = false;

    private IMFSinkWriter sinkWriter;
    private int streamIndex = 0;
    private int frameSizeBytes = 0;
    private long frames = 0;

    private int videoWidth = 0;
    private int videoHeight = 0;
    private string outputFile = "//output.mp4";

    private CancellationTokenSource encodeTaskCTS;
    private Thread encodeThread;

    BlockingCollection<Emgu.CV.Mat> FrameQueue = new BlockingCollection<Emgu.CV.Mat>();

    public MFVideoEncoder()
    {

    }

    public void Start(String outputFile, int width, int height, int bitRate)
    {
        this.videoWidth = width;
        this.videoHeight = height;
        this.outputFile = outputFile;
        this.videoBitRate = bitRate;

        frames = 0;
        frameSizeBytes = BYTES_PER_PIXEL * videoWidth * videoHeight;
        HasStarted = false;

        encodeTaskCTS?.Dispose();
        encodeTaskCTS = new CancellationTokenSource();
        var token = encodeTaskCTS.Token;

        encodeThread = new Thread(() => EncodeTask(token));
        encodeThread.Priority = ThreadPriority.Highest;
        //encodeThread.SetApartmentState(ApartmentState.STA);
        encodeThread.Start();
    }

    public void Start(String outputFile, int width, int height, double compressionFactor)
    {
        int bitRate = (int) (VIDEO_FPS * width * height * BYTES_PER_PIXEL / compressionFactor);
        Console.WriteLine("# Bit rate: {0}", bitRate);
        Start(outputFile, width, height, bitRate);
    }

    public void Stop()
    {
        if (HasStarted)
        {
            encodeTaskCTS.Cancel();                
        }
    }

    public void AddFrame(Mat frame)
    {
        Mat flippedFrame = new Mat(frame.Size, frame.Depth, frame.NumberOfChannels);
        CvInvoke.Flip(frame, flippedFrame, Emgu.CV.CvEnum.FlipType.Vertical);
        FrameQueue.TryAdd(flippedFrame);
    }

    private void EncodeTask(CancellationToken token)
    {
        Mat frame;

        // Start up
        int hr = MFExtern.MFStartup(0x00020070, MFStartup.Full);
        if (Succeeded(hr))
        {
            hr = InitializeSinkWriter(outputFile, videoWidth, videoHeight);
        }
        HasStarted = Succeeded(hr);

        // Check encoder running
        if (!HasStarted)
        {
            Console.WriteLine("! Encode thread didn't start");
            return;
        }

        //Write frames
        var exit = false;
        while (!exit)
        {
            try
            {
                token.ThrowIfCancellationRequested();
                if (FrameQueue.TryTake(out frame, 200))
                {
                    WriteFrame(frame);
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine("! Thread exit: " + ex.Message);
                exit = true;
            }
        }

        //Clean up
        sinkWriter.Finalize_();
        COMBase.SafeRelease(sinkWriter);
        MFExtern.MFShutdown();
    }

    private int InitializeSinkWriter(String outputFile, int videoWidth, int videoHeight)
    {            
        IMFMediaType mediaTypeIn = null;
        IMFMediaType mediaTypeOut = null;
        IMFAttributes attributes = null;
        
        int hr = 0;

        if (Succeeded(hr)) hr = MFExtern.MFCreateAttributes(out attributes, 1);
        if (Succeeded(hr)) hr = attributes.SetUINT32(MFAttributesClsid.MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, 1);
        //if (Succeeded(hr)) hr = attributes.SetUINT32(MFAttributesClsid.MF_SINK_WRITER_DISABLE_THROTTLING, 1);
        if (Succeeded(hr)) hr = attributes.SetUINT32(MFAttributesClsid.MF_LOW_LATENCY, 1);

        // Create the sink writer 
        if (Succeeded(hr)) hr = MFExtern.MFCreateSinkWriterFromURL(outputFile, null, attributes, out sinkWriter);

        // Create the output type
        if (Succeeded(hr)) hr = MFExtern.MFCreateMediaType(out mediaTypeOut);
        if (Succeeded(hr)) hr = mediaTypeOut.SetGUID(MFAttributesClsid.MF_MT_MAJOR_TYPE, MFMediaType.Video);
        if (Succeeded(hr)) hr = mediaTypeOut.SetGUID(MFAttributesClsid.MF_MT_SUBTYPE, MFMediaType.H264);
        if (Succeeded(hr)) hr = mediaTypeOut.SetUINT32(MFAttributesClsid.MF_MT_AVG_BITRATE, videoBitRate);
        if (Succeeded(hr)) hr = mediaTypeOut.SetUINT32(MFAttributesClsid.MF_MT_INTERLACE_MODE, (int) MFVideoInterlaceMode.Progressive);
        if (Succeeded(hr)) hr = MFExtern.MFSetAttributeSize(mediaTypeOut, MFAttributesClsid.MF_MT_FRAME_SIZE, videoWidth, videoHeight);
        if (Succeeded(hr)) hr = MFExtern.MFSetAttributeRatio(mediaTypeOut, MFAttributesClsid.MF_MT_FRAME_RATE, VIDEO_FPS, 1);
        if (Succeeded(hr)) hr = MFExtern.MFSetAttributeRatio(mediaTypeOut, MFAttributesClsid.MF_MT_PIXEL_ASPECT_RATIO, 1, 1);
        if (Succeeded(hr)) hr = sinkWriter.AddStream(mediaTypeOut, out streamIndex);

        // Create the input type 
        if (Succeeded(hr))  hr = MFExtern.MFCreateMediaType(out mediaTypeIn);
        if (Succeeded(hr)) hr = mediaTypeIn.SetGUID(MFAttributesClsid.MF_MT_MAJOR_TYPE, MFMediaType.Video);
        if (Succeeded(hr)) hr = mediaTypeIn.SetGUID(MFAttributesClsid.MF_MT_SUBTYPE, MFMediaType.RGB24);
        if (Succeeded(hr)) hr = mediaTypeIn.SetUINT32(MFAttributesClsid.MF_MT_INTERLACE_MODE, (int)MFVideoInterlaceMode.Progressive);
        if (Succeeded(hr)) hr = MFExtern.MFSetAttributeSize(mediaTypeIn, MFAttributesClsid.MF_MT_FRAME_SIZE, videoWidth, videoHeight);
        if (Succeeded(hr)) hr = MFExtern.MFSetAttributeRatio(mediaTypeIn, MFAttributesClsid.MF_MT_FRAME_RATE, VIDEO_FPS, 1);
        if (Succeeded(hr)) hr = MFExtern.MFSetAttributeRatio(mediaTypeIn, MFAttributesClsid.MF_MT_PIXEL_ASPECT_RATIO, 1, 1);
        if (Succeeded(hr)) hr = sinkWriter.SetInputMediaType(streamIndex, mediaTypeIn, null);
        
        // Start accepting data
        if (Succeeded(hr))  hr = sinkWriter.BeginWriting();
        
        COMBase.SafeRelease(mediaTypeIn);
        COMBase.SafeRelease(mediaTypeOut);

        return hr;            
    }

    private int WriteFrame(Mat frame)
    {
        if (!HasStarted) return -1;

        IMFSample sample = null;
        IMFMediaBuffer buffer = null;

        IntPtr data = new IntPtr();
        int bufferMaxLength;
        int bufferCurrentLength;
        
        int hr = MFExtern.MFCreateMemoryBuffer(frameSizeBytes, out buffer);

        if (Succeeded(hr))  hr = buffer.Lock(out data, out bufferMaxLength, out bufferCurrentLength);
        if (Succeeded(hr))
        {
            using (AutoPinner ap = new AutoPinner(frame.Data))
            {
                hr = MFExtern.MFCopyImage(data, videoWidth * BYTES_PER_PIXEL, frame.DataPointer, videoWidth * BYTES_PER_PIXEL, videoWidth * BYTES_PER_PIXEL, videoHeight);
            }
        }
        if (Succeeded(hr))  hr = buffer.Unlock();
        if (Succeeded(hr)) hr = buffer.SetCurrentLength(frameSizeBytes);
        if (Succeeded(hr)) hr = MFExtern.MFCreateSample(out sample);
        if (Succeeded(hr)) hr = sample.AddBuffer(buffer);
        if (Succeeded(hr)) hr = sample.SetSampleTime(TICKS_PER_SECOND * frames / VIDEO_FPS);
        if (Succeeded(hr)) hr = sample.SetSampleDuration(VIDEO_FRAME_DURATION);
        if (Succeeded(hr)) hr = sinkWriter.WriteSample(streamIndex, sample);
        if (Succeeded(hr))  frames++;

        COMBase.SafeRelease(sample);
        COMBase.SafeRelease(buffer);

        return hr;
    }

    private bool Succeeded(int hr)
    {
        return hr >= 0;
    }

    #region IDisposable Support
    private bool disposedValue = false;
    protected virtual void Dispose(bool disposing)
    {
        if (!disposedValue)
        {
            if (disposing)
            {
                if (HasStarted)
                {
                    Stop();
                }
            }
            disposedValue = true;
        }
    }

    public void Dispose()
    {
        Dispose(true);
    }
    #endregion
}