Ticket #32865: vidl_ffmpeg_istream_v2.txx

File vidl_ffmpeg_istream_v2.txx, 15.3 KB (added by ozt@…, 13 years ago)
Line 
1// This is core/vidl/vidl_ffmpeg_istream_v2.txx
2#ifndef vidl_ffmpeg_istream_v2_txx_
3#define vidl_ffmpeg_istream_v2_txx_
4#include "vidl_ffmpeg_istream.h"
5//:
6// \file
7// \author Matt Leotta
8// \author Amitha Perera
9// \date   26 Dec 2007
10//
11// Update implementation based on ffmpeg svn -r11322
12// and libswscale svn -r25485.
13
14//-----------------------------------------------------------------------------
15
16#include "vidl_ffmpeg_init.h"
17#include "vidl_frame.h"
18#include "vidl_ffmpeg_convert.h"
19
20#include <vcl_string.h>
21#include <vcl_iostream.h>
22
23extern "C" {
24#if FFMPEG_IN_SEVERAL_DIRECTORIES
25#include <libavcodec/avcodec.h>
26#include <libavformat/avformat.h>
27#include <libswscale/swscale.h>
28#else
29#include <ffmpeg/avcodec.h>
30#include <ffmpeg/avformat.h>
31#include <ffmpeg/swscale.h>
32#endif
33}
34
35//--------------------------------------------------------------------------------
36
37struct vidl_ffmpeg_istream::pimpl
38{
39  pimpl()
40  : fmt_cxt_( NULL ),
41    vid_index_( -1 ),
42    vid_str_( NULL ),
43    last_dts( 0 ),
44    frame_( NULL ),
45    num_frames_( -2 ), // sentinel value to indicate not yet computed
46    sws_context_( NULL ),
47    cur_frame_( NULL ),
48    deinterlace_( false ),
49    frame_number_offset_( 0 )
50  {
51  }
52
53  AVFormatContext* fmt_cxt_;
54  int vid_index_;
55  AVStream* vid_str_;
56
57  //: Decode time of last frame.
58  int64_t last_dts;
59
60  //: Start time of the stream, to offset the dts when computing the frame number.
61  int64_t start_time;
62
63  //: The last successfully read frame.
64  //
65  // If frame_->data[0] is not NULL, then the frame corresponds to
66  // the codec state, so that codec.width and so on apply to the
67  // frame data.
68  AVFrame* frame_;
69
70  //: number of counted frames
71  int num_frames_;
72
73  //: A software scaling context
74  //
75  // This is the context used for the software scaling and colour
76  // conversion routines. Since the conversion is likely to be the
77  // same for each frame, we save the context to avoid re-creating it
78  // every time.
79  SwsContext* sws_context_;
80
81  //: A contiguous memory buffer to store the current image data
82  vil_memory_chunk_sptr contig_memory_;
83
84  //: The last successfully decoded frame.
85  mutable vidl_frame_sptr cur_frame_;
86
87  //: Apply deinterlacing on the frames?
88  bool deinterlace_;
89
90  //: Some codec/file format combinations need a frame number offset.
91  // These codecs have a delay between reading packets and generating frames.
92  unsigned frame_number_offset_;
93};
94
95
96//--------------------------------------------------------------------------------
97
98//: Constructor
99vidl_ffmpeg_istream::
100vidl_ffmpeg_istream()
101  : is_( new vidl_ffmpeg_istream::pimpl )
102{
103  vidl_ffmpeg_init();
104}
105
106
107//: Constructor - from a filename
108vidl_ffmpeg_istream::
109vidl_ffmpeg_istream(const vcl_string& filename)
110  : is_( new vidl_ffmpeg_istream::pimpl )
111{
112  vidl_ffmpeg_init();
113  open(filename);
114}
115
116
117//: Destructor
118vidl_ffmpeg_istream::
119~vidl_ffmpeg_istream()
120{
121  close();
122  delete is_;
123}
124
125//: Open a new stream using a filename
126bool
127vidl_ffmpeg_istream::
128open(const vcl_string& filename)
129{
130  // Close any currently opened file
131  close();
132
133  // Open the file
134  int err;
135  if ( ( err = av_open_input_file( &is_->fmt_cxt_, filename.c_str(), NULL, 0, NULL ) ) != 0 ) {
136    return false;
137  }
138
139  // Get the stream information by reading a bit of the file
140  if ( av_find_stream_info( is_->fmt_cxt_ ) < 0 ) {
141    return false;
142  }
143
144  // Find a video stream. Use the first one we find.
145  is_->vid_index_ = -1;
146  for ( unsigned i = 0; i < is_->fmt_cxt_->nb_streams; ++i ) {
147    AVCodecContext *enc = is_->fmt_cxt_->streams[i]->codec;
148    if ( enc->codec_type == AVMEDIA_TYPE_VIDEO ) {
149      is_->vid_index_ = i;
150      break;
151    }
152  }
153  if ( is_->vid_index_ == -1 ) {
154    return false;
155  }
156
157  dump_format( is_->fmt_cxt_, 0, filename.c_str(), 0 );
158  AVCodecContext *enc = is_->fmt_cxt_->streams[is_->vid_index_]->codec;
159
160  // Open the stream
161  AVCodec* codec = avcodec_find_decoder(enc->codec_id);
162  if ( !codec || avcodec_open( enc, codec ) < 0 ) {
163    return false;
164  }
165
166  is_->vid_str_ = is_->fmt_cxt_->streams[ is_->vid_index_ ];
167  is_->frame_ = avcodec_alloc_frame();
168
169  if ( is_->vid_str_->start_time == int64_t(1)<<63 ) {
170    is_->start_time = 0;
171  }
172  else {
173    is_->start_time = is_->vid_str_->start_time;
174  }
175
176  // The MPEG 2 codec has a latency of 1 frame when encoded in an AVI
177  // stream, so the dts of the last packet (stored in last_dts) is
178  // actually the next frame's dts.
179  if ( is_->vid_str_->codec->codec_id == CODEC_ID_MPEG2VIDEO &&
180       vcl_string("avi") == is_->fmt_cxt_->iformat->name ) {
181    is_->frame_number_offset_ = 1;
182  }
183
184
185  return true;
186}
187
188
189//: Close the stream
190void
191vidl_ffmpeg_istream::
192close()
193{
194  if ( is_->frame_ ) {
195    av_freep( &is_->frame_ );
196  }
197
198  is_->num_frames_ = -2;
199  is_->contig_memory_ = 0;
200  is_->vid_index_ = -1;
201  if ( is_->vid_str_ ) {
202    avcodec_close( is_->vid_str_->codec );
203    is_->vid_str_ = 0;
204  }
205  if ( is_->fmt_cxt_ ) {
206    av_close_input_file( is_->fmt_cxt_ );
207    is_->fmt_cxt_ = 0;
208  }
209}
210
211
212//: Return true if the stream is open for reading
213bool
214vidl_ffmpeg_istream::
215is_open() const
216{
217  return ! ! is_->frame_;
218}
219
220
221//: Return true if the stream is in a valid state
222bool
223vidl_ffmpeg_istream::
224is_valid() const
225{
226  return is_open() && is_->frame_->data[0] != 0;
227}
228
229
230//: Return true if the stream support seeking
231bool
232vidl_ffmpeg_istream::
233is_seekable() const
234{
235  return true;
236}
237
238
239//: Return the number of frames if known
240//  returns -1 for non-seekable streams
241int
242vidl_ffmpeg_istream::num_frames() const
243{
244  // to get an accurate frame count, quickly run through the entire
245  // video.  We'll only do this if the user hasn't read any frames,
246  // because we have no guarantee that we can successfully seek back
247  // to anywhere but the beginning.  There is logic in advance() to
248  // ensure this.
249  vidl_ffmpeg_istream* mutable_this = const_cast<vidl_ffmpeg_istream*>(this);
250  if ( mutable_this->is_->num_frames_ == -2 ) {
251    mutable_this->is_->num_frames_ = 0;
252    while (mutable_this->advance()) {
253      ++mutable_this->is_->num_frames_;
254    }
255    av_seek_frame( mutable_this->is_->fmt_cxt_,
256                   mutable_this->is_->vid_index_,
257                   0,
258                   AVSEEK_FLAG_BACKWARD );
259  }
260
261  return is_->num_frames_;
262}
263
264
265//: Return the current frame number
266unsigned int
267vidl_ffmpeg_istream::
268frame_number() const
269{
270  // Quick return if the stream isn't open.
271  if ( !is_valid() ) {
272    return static_cast<unsigned int>(-1);
273  }
274
275  return ((is_->last_dts - is_->start_time)
276          * is_->vid_str_->r_frame_rate.num / is_->vid_str_->r_frame_rate.den
277          * is_->vid_str_->time_base.num + is_->vid_str_->time_base.den/2)
278         / is_->vid_str_->time_base.den
279         - int(is_->frame_number_offset_);
280}
281
282
283//: Return the width of each frame
284unsigned int
285vidl_ffmpeg_istream
286::width() const
287{
288  // Quick return if the stream isn't open.
289  if ( !is_open() ) {
290    return 0;
291  }
292
293  return is_->fmt_cxt_->streams[is_->vid_index_]->codec->width;
294}
295
296
297//: Return the height of each frame
298unsigned int
299vidl_ffmpeg_istream
300::height() const
301{
302  // Quick return if the stream isn't open.
303  if ( !is_open() ) {
304    return 0;
305  }
306
307  return is_->fmt_cxt_->streams[is_->vid_index_]->codec->height;
308}
309
310
311//: Return the pixel format
312vidl_pixel_format
313vidl_ffmpeg_istream
314::format() const
315{
316  // Quick return if the stream isn't open.
317  if ( !is_open() ) {
318    return VIDL_PIXEL_FORMAT_UNKNOWN;
319  }
320
321  AVCodecContext* enc = is_->fmt_cxt_->streams[is_->vid_index_]->codec;
322  vidl_pixel_format fmt = vidl_pixel_format_from_ffmpeg(enc->pix_fmt);
323  if (fmt == VIDL_PIXEL_FORMAT_UNKNOWN)
324    return VIDL_PIXEL_FORMAT_RGB_24;
325  return fmt;
326}
327
328
329//: Return the frame rate (0.0 if unspecified)
330double
331vidl_ffmpeg_istream
332::frame_rate() const
333{
334  // Quick return if the stream isn't open.
335  if ( !is_open() ) {
336    return 0.0;
337  }
338
339  return static_cast<double>(is_->vid_str_->r_frame_rate.num) / is_->vid_str_->r_frame_rate.den;
340}
341
342
343//: Return the duration in seconds (0.0 if unknown)
344double
345vidl_ffmpeg_istream
346::duration() const
347{
348  // Quick return if the stream isn't open.
349  if ( !is_open() ) {
350    return 0.0;
351  }
352  return static_cast<double>(is_->vid_str_->time_base.num)/is_->vid_str_->time_base.den
353         * static_cast<double>(is_->vid_str_->duration);
354}
355
356
357//: Advance to the next frame (but don't acquire an image)
358bool
359vidl_ffmpeg_istream::
360advance()
361{
362  // Quick return if the file isn't open.
363  if ( !is_open() ) {
364    return false;
365  }
366
367  // See the comment in num_frames().  This is to make sure that once
368  // we start reading frames, we'll never try to march to the end to
369  // figure out how many frames there are.
370  if ( is_->num_frames_ == -2 ) {
371    is_->num_frames_ = -1;
372  }
373
374  AVCodecContext* codec = is_->fmt_cxt_->streams[is_->vid_index_]->codec;
375
376  AVPacket pkt;
377  int got_picture = 0;
378
379  while ( got_picture == 0 ) {
380    if ( av_read_frame( is_->fmt_cxt_, &pkt ) < 0 ) {
381      break;
382    }
383    is_->last_dts = pkt.dts;
384
385    // Make sure that the packet is from the actual video stream.
386    if (pkt.stream_index==is_->vid_index_)
387    {
388      // HACK to avoid errors from ffmpeg
389      AVPacket avpkt;
390      av_init_packet(&avpkt);
391      avpkt.data = pkt.data;
392      avpkt.size = pkt.size;
393
394      if ( avcodec_decode_video2( codec, is_->frame_, &got_picture, &avpkt ) < 0 ){
395        vcl_cerr << "vidl_ffmpeg_istream: Error decoding packet!\n";
396        return false;
397      }
398
399// Functions Removed from ffmpeg on 4/19/11
400/*
401      if ( avcodec_decode_video( codec,
402                                is_->frame_, &got_picture,
403                               pkt.data, pkt.size ) < 0 ) {
404        vcl_cerr << "vidl_ffmpeg_istream: Error decoding packet!\n";
405        return false;
406      }
407*/
408
409    }
410    av_free_packet( &pkt );
411  }
412
413  // From ffmpeg apiexample.c: some codecs, such as MPEG, transmit the
414  // I and P frame with a latency of one frame. You must do the
415  // following to have a chance to get the last frame of the video.
416  if ( !got_picture ) {
417    // Functions Removed from ffmpeg on 4/19/11
418/*
419    if ( avcodec_decode_video( codec,
420                            is_->frame_, &got_picture,
421                            NULL, 0 ) >= 0 ) {
422      is_->last_dts += int64_t(is_->vid_str_->time_base.den) * is_->vid_str_->r_frame_rate.den
423        / is_->vid_str_->time_base.num / is_->vid_str_->r_frame_rate.num;
424    }
425*/
426    if ( avcodec_decode_video2( codec, is_->frame_, &got_picture, NULL ) < 0) {
427      is_->last_dts += int64_t(is_->vid_str_->time_base.den) * is_->vid_str_->r_frame_rate.den
428        / is_->vid_str_->time_base.num / is_->vid_str_->r_frame_rate.num;
429    }
430  }
431
432  // The cached frame is out of date, whether we managed to get a new
433  // frame or not.
434  if (is_->cur_frame_)
435    is_->cur_frame_->invalidate();
436  is_->cur_frame_ = 0;
437
438  if ( ! got_picture ) {
439    is_->frame_->data[0] = NULL;
440  }
441
442  return got_picture != 0;
443}
444
445
446//: Read the next frame from the stream
447vidl_frame_sptr
448vidl_ffmpeg_istream::read_frame()
449{
450  if (advance())
451    return current_frame();
452  return NULL;
453}
454
455
456//: Return the current frame in the stream
457vidl_frame_sptr
458vidl_ffmpeg_istream::current_frame()
459{
460  // Quick return if the stream isn't valid
461  if ( !is_valid() ) {
462    return NULL;
463  }
464  AVCodecContext* enc = is_->fmt_cxt_->streams[is_->vid_index_]->codec;
465  // If we have not already converted this frame, try to convert it
466  if ( !is_->cur_frame_ && is_->frame_->data[0] != 0 )
467  {
468    int width = enc->width;
469    int height = enc->height;
470
471    // Deinterlace if requested
472    if ( is_->deinterlace_ ) {
473      avpicture_deinterlace( (AVPicture*)is_->frame_, (AVPicture*)is_->frame_,
474                             enc->pix_fmt, width, height );
475    }
476
477    // If the pixel format is not recognized by vidl then convert the data into RGB_24
478    vidl_pixel_format fmt = vidl_pixel_format_from_ffmpeg(enc->pix_fmt);
479    if (fmt == VIDL_PIXEL_FORMAT_UNKNOWN)
480    {
481      int size = width*height*3;
482      if (!is_->contig_memory_)
483        is_->contig_memory_ = new vil_memory_chunk(size, VIL_PIXEL_FORMAT_BYTE);
484      else
485        is_->contig_memory_->set_size(size, VIL_PIXEL_FORMAT_BYTE);
486
487      // Reuse the previous context if we can.
488      is_->sws_context_ = sws_getCachedContext(
489        is_->sws_context_,
490        width, height, enc->pix_fmt,
491        width, height, PIX_FMT_RGB24,
492        SWS_BILINEAR,
493        NULL, NULL, NULL );
494
495      if ( is_->sws_context_ == NULL ) {
496        vcl_cerr << "vidl_ffmpeg_istream: couldn't create conversion context\n";
497        return vidl_frame_sptr();
498      }
499
500      AVPicture rgb_frame;
501      avpicture_fill(&rgb_frame, (uint8_t*)is_->contig_memory_->data(), PIX_FMT_RGB24, width, height);
502
503      sws_scale( is_->sws_context_,
504                 is_->frame_->data, is_->frame_->linesize,
505                 0, height,
506                 rgb_frame.data, rgb_frame.linesize );
507
508      is_->cur_frame_ = new vidl_shared_frame(is_->contig_memory_->data(),width,height,
509                                              VIDL_PIXEL_FORMAT_RGB_24);
510    }
511    else
512    {
513      // Test for contiguous memory.  Sometimes FFMPEG uses scanline buffers larger
514      // than the image width.  The extra memory is used in optimized decoding routines.
515      // This leads to a segmented image buffer, not supported by vidl.
516      AVPicture test_frame;
517      avpicture_fill(&test_frame, is_->frame_->data[0], enc->pix_fmt, width, height);
518      if (test_frame.data[1] == is_->frame_->data[1] &&
519          test_frame.data[2] == is_->frame_->data[2] &&
520          test_frame.linesize[0] == is_->frame_->linesize[0] &&
521          test_frame.linesize[1] == is_->frame_->linesize[1] &&
522          test_frame.linesize[2] == is_->frame_->linesize[2] )
523      {
524        is_->cur_frame_ = new vidl_shared_frame(is_->frame_->data[0], width, height, fmt);
525      }
526      // Copy the image into contiguous memory.
527      else
528      {
529        if (!is_->contig_memory_) {
530          int size = avpicture_get_size( enc->pix_fmt, width, height );
531          is_->contig_memory_ = new vil_memory_chunk(size, VIL_PIXEL_FORMAT_BYTE);
532        }
533        avpicture_fill(&test_frame, (uint8_t*)is_->contig_memory_->data(), enc->pix_fmt, width, height);
534        av_picture_copy(&test_frame, (AVPicture*)is_->frame_, enc->pix_fmt, width, height);
535        // use a shared frame because the vil_memory_chunk is reused for each frame
536        is_->cur_frame_ = new vidl_shared_frame(is_->contig_memory_->data(),width,height,fmt);
537      }
538    }
539  }
540
541  return is_->cur_frame_;
542}
543
544
545//: Seek to the given frame number
546// \returns true if successful
547bool
548vidl_ffmpeg_istream::
549seek_frame(unsigned int frame)
550{
551  // Quick return if the stream isn't open.
552  if ( !is_open() ) {
553    return false;
554  }
555
556  // We rely on the initial cast to make sure all the operations happen in int64.
557  int64_t req_timestamp =
558    int64_t(frame + is_->frame_number_offset_)
559    * is_->vid_str_->time_base.den
560    * is_->vid_str_->r_frame_rate.den
561    / is_->vid_str_->time_base.num
562    / is_->vid_str_->r_frame_rate.num
563    + is_->start_time;
564
565  // Seek to a keyframe before the timestamp that we want.
566  int seek = av_seek_frame( is_->fmt_cxt_, is_->vid_index_, req_timestamp, AVSEEK_FLAG_BACKWARD );
567
568  if ( seek < 0 )
569    return false;
570
571  avcodec_flush_buffers( is_->vid_str_->codec );
572
573  // We got to a key frame. Forward until we get to the frame we want.
574  while ( true )
575  {
576    if ( ! advance() ) {
577      return false;
578    }
579    if ( is_->last_dts >= req_timestamp ) {
580      if ( is_->last_dts > req_timestamp ) {
581        vcl_cerr << "Warning: seek went into the future!\n";
582        return false;
583      }
584      return true;
585    }
586  }
587}
588
589#endif // vidl_ffmpeg_istream_v2_txx_