Commit 31fad34f authored by Zhicheng Yan's avatar Zhicheng Yan Committed by Francisco Massa
Browse files

[video reader] inception commit (#1303)

* [video reader] inception commit

* add method save_metadata to class VideoClips in video_utils.py

* add load_metadata() method to VideoClips class

* add Exception to not catch unexpected events such as memory erros, interrupt

* fix bugs in video_plus.py

* [video reader]remove logging. update setup.py

* remove time measurement in test_video_reader.py

* Remove glog and try making ffmpeg finding more robust

* Add ffmpeg to conda build

* Add ffmpeg to conda build [again]

* Make library path finding more robust

* Missing import

* One more missing fix for import

* Py2 compatibility and change package to av to avoid version conflict with ffmpeg

* Fix for python2

* [video reader] support to decode one stream only (e.g. video/audio stream)

* remove argument _precomputed_metadata_filepath

* remove save_metadata method

* add get_metadata method

* expose _precomputed_metadata and frame_rate arguments in video dataset __init__ method

* remove ssize_t

* remove size_t to pass CI check on Windows

* add PyInit__video_reader function to pass CI check on Windows

* minor fix to define PyInit_video_reader symbol

* Make c++ video reader optional

* Temporarily revert changes to test_io

* Revert changes to python files

* Rename files to make it private

* Fix python lint

* Fix C++ lint

* add a functor object EnumClassHash to make Enum class instances usable as key type of std::unordered_map

* fix cpp format check
parent a6a926bc
......@@ -12,6 +12,7 @@ requirements:
host:
- python
- setuptools
- av
{{ environ.get('CONDA_PYTORCH_BUILD_CONSTRAINT') }}
{{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }}
{{ environ.get('CONDA_CPUONLY_FEATURE') }}
......@@ -21,6 +22,7 @@ requirements:
- pillow >=4.1.1
- numpy >=1.11
- six
- av
{{ environ.get('CONDA_PYTORCH_CONSTRAINT') }}
{{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }}
......
......@@ -7,11 +7,12 @@ from setuptools import setup, find_packages
from pkg_resources import get_distribution, DistributionNotFound
import subprocess
import distutils.command.clean
import distutils.spawn
import glob
import shutil
import torch
from torch.utils.cpp_extension import CppExtension, CUDAExtension, CUDA_HOME
from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension, CUDA_HOME
def read(*names, **kwargs):
......@@ -124,6 +125,17 @@ def get_extensions():
include_dirs = [extensions_dir]
tests_include_dirs = [test_dir, models_dir]
ffmpeg_exe = distutils.spawn.find_executable('ffmpeg')
has_ffmpeg = ffmpeg_exe is not None
if has_ffmpeg:
ffmpeg_bin = os.path.dirname(ffmpeg_exe)
ffmpeg_root = os.path.dirname(ffmpeg_bin)
ffmpeg_include_dir = os.path.join(ffmpeg_root, 'include')
# TorchVision video reader
video_reader_src_dir = os.path.join(this_dir, 'torchvision', 'csrc', 'cpu', 'video_reader')
video_reader_src = glob.glob(os.path.join(video_reader_src_dir, "*.cpp"))
ext_modules = [
extension(
'torchvision._C',
......@@ -140,6 +152,27 @@ def get_extensions():
extra_compile_args=extra_compile_args,
),
]
if has_ffmpeg:
ext_modules.append(
CppExtension(
'torchvision.video_reader',
video_reader_src,
include_dirs=[
video_reader_src_dir,
ffmpeg_include_dir,
extensions_dir,
],
libraries=[
'avcodec',
'avformat',
'avutil',
'swresample',
'swscale',
],
extra_compile_args=["-std=c++14"],
extra_link_args=["-std=c++14"],
)
)
return ext_modules
......@@ -179,6 +212,8 @@ setup(
"scipy": ["scipy"],
},
ext_modules=get_extensions(),
cmdclass={'build_ext': torch.utils.cpp_extension.BuildExtension,
'clean': clean}
cmdclass={
'build_ext': BuildExtension.with_options(no_python_abi_suffix=True),
'clean': clean,
}
)
Video meta-information Notation
Video File Name
video: codec, fps
audio: codec, bits per sample, sample rate
Test videos are listed below.
--------------------------------
- RATRACE_wave_f_nm_np1_fr_goo_37.avi
- source: hmdb51
- video: DivX MPEG-4
- fps: 30
- audio: N/A
- SchoolRulesHowTheyHelpUs_wave_f_nm_np1_ba_med_0.avi
- source: hmdb51
- video: DivX MPEG-4
- fps: 30
- audio: N/A
- TrumanShow_wave_f_nm_np1_fr_med_26.avi
- source: hmdb51
- video: DivX MPEG-4
- fps: 30
- audio: N/A
- v_SoccerJuggling_g23_c01.avi
- source: ucf101
- video: Xvid MPEG-4
- fps: 29.97
- audio: N/A
- v_SoccerJuggling_g24_c01.avi
- source: ucf101
- video: Xvid MPEG-4
- fps: 29.97
- audio: N/A
- R6llTwEh07w.mp4
- source: kinetics-400
- video: H-264 - MPEG-4 AVC (part 10) (avc1)
- fps: 30
- audio: MPEG AAC audio (mp4a)
- sample rate: 44.1K Hz
- SOX5yA1l24A.mp4
- source: kinetics-400
- video: H-264 - MPEG-4 AVC (part 10) (avc1)
- fps: 29.97
- audio: MPEG AAC audio (mp4a)
- sample rate: 48K Hz
- WUzgd7C1pWA.mp4
- source: kinetics-400
- video: H-264 - MPEG-4 AVC (part 10) (avc1)
- fps: 29.97
- audio: MPEG AAC audio (mp4a)
- sample rate: 48K Hz
This diff is collapsed.
#include "FfmpegAudioSampler.h"
#include <memory>
#include "FfmpegUtil.h"
using namespace std;
FfmpegAudioSampler::FfmpegAudioSampler(
const AudioFormat& in,
const AudioFormat& out)
: inFormat_(in), outFormat_(out) {}
FfmpegAudioSampler::~FfmpegAudioSampler() {
if (swrContext_) {
swr_free(&swrContext_);
}
}
int FfmpegAudioSampler::init() {
swrContext_ = swr_alloc_set_opts(
nullptr, // we're allocating a new context
av_get_default_channel_layout(outFormat_.channels), // out_ch_layout
static_cast<AVSampleFormat>(outFormat_.format), // out_sample_fmt
outFormat_.samples, // out_sample_rate
av_get_default_channel_layout(inFormat_.channels), // in_ch_layout
static_cast<AVSampleFormat>(inFormat_.format), // in_sample_fmt
inFormat_.samples, // in_sample_rate
0, // log_offset
nullptr); // log_ctx
if (swrContext_ == nullptr) {
LOG(ERROR) << "swr_alloc_set_opts fails";
return -1;
}
int result = 0;
if ((result = swr_init(swrContext_)) < 0) {
LOG(ERROR) << "swr_init failed, err: " << ffmpeg_util::getErrorDesc(result)
<< ", in -> format: " << inFormat_.format
<< ", channels: " << inFormat_.channels
<< ", samples: " << inFormat_.samples
<< ", out -> format: " << outFormat_.format
<< ", channels: " << outFormat_.channels
<< ", samples: " << outFormat_.samples;
return -1;
}
return 0;
}
int64_t FfmpegAudioSampler::getSampleBytes(const AVFrame* frame) const {
auto outSamples = getOutNumSamples(frame->nb_samples);
return av_samples_get_buffer_size(
nullptr,
outFormat_.channels,
outSamples,
static_cast<AVSampleFormat>(outFormat_.format),
1);
}
// https://www.ffmpeg.org/doxygen/3.2/group__lswr.html
unique_ptr<DecodedFrame> FfmpegAudioSampler::sample(const AVFrame* frame) {
if (!frame) {
return nullptr; // no flush for videos
}
auto inNumSamples = frame->nb_samples;
auto outNumSamples = getOutNumSamples(frame->nb_samples);
auto outSampleSize = getSampleBytes(frame);
AvDataPtr frameData(static_cast<uint8_t*>(av_malloc(outSampleSize)));
uint8_t* outPlanes[AVRESAMPLE_MAX_CHANNELS];
int result = 0;
if ((result = av_samples_fill_arrays(
outPlanes,
nullptr, // linesize is not needed
frameData.get(),
outFormat_.channels,
outNumSamples,
static_cast<AVSampleFormat>(outFormat_.format),
1)) < 0) {
LOG(ERROR) << "av_samples_fill_arrays failed, err: "
<< ffmpeg_util::getErrorDesc(result)
<< ", outNumSamples: " << outNumSamples
<< ", format: " << outFormat_.format;
return nullptr;
}
if ((result = swr_convert(
swrContext_,
&outPlanes[0],
outNumSamples,
(const uint8_t**)&frame->data[0],
inNumSamples)) < 0) {
LOG(ERROR) << "swr_convert faield, err: "
<< ffmpeg_util::getErrorDesc(result);
return nullptr;
}
// result returned by swr_convert is the No. of actual output samples.
// So update the buffer size using av_samples_get_buffer_size
result = av_samples_get_buffer_size(
nullptr,
outFormat_.channels,
result,
static_cast<AVSampleFormat>(outFormat_.format),
1);
return make_unique<DecodedFrame>(std::move(frameData), result, 0);
}
/*
Because of decoding delay, the returned value is an upper bound of No. of
output samples
*/
int64_t FfmpegAudioSampler::getOutNumSamples(int inNumSamples) const {
return av_rescale_rnd(
swr_get_delay(swrContext_, inFormat_.samples) + inNumSamples,
outFormat_.samples,
inFormat_.samples,
AV_ROUND_UP);
}
#pragma once
#include "FfmpegSampler.h"
#define AVRESAMPLE_MAX_CHANNELS 32
/**
* Class transcode audio frames from one format into another
*/
class FfmpegAudioSampler : public FfmpegSampler {
public:
explicit FfmpegAudioSampler(const AudioFormat& in, const AudioFormat& out);
~FfmpegAudioSampler() override;
int init() override;
int64_t getSampleBytes(const AVFrame* frame) const;
// FfmpegSampler overrides
// returns number of bytes of the sampled data
std::unique_ptr<DecodedFrame> sample(const AVFrame* frame) override;
const AudioFormat& getInFormat() const {
return inFormat_;
}
private:
int64_t getOutNumSamples(int inNumSamples) const;
AudioFormat inFormat_;
AudioFormat outFormat_;
SwrContext* swrContext_{nullptr};
};
#include "FfmpegAudioStream.h"
#include "FfmpegUtil.h"
using namespace std;
namespace {
bool operator==(const AudioFormat& x, const AVCodecContext& y) {
return x.samples == y.sample_rate && x.channels == y.channels &&
x.format == y.sample_fmt;
}
AudioFormat& toAudioFormat(
AudioFormat& audioFormat,
const AVCodecContext& codecCtx) {
audioFormat.samples = codecCtx.sample_rate;
audioFormat.channels = codecCtx.channels;
audioFormat.format = codecCtx.sample_fmt;
return audioFormat;
}
} // namespace
FfmpegAudioStream::FfmpegAudioStream(
AVFormatContext* inputCtx,
int index,
enum AVMediaType avMediaType,
MediaFormat mediaFormat,
double seekFrameMargin)
: FfmpegStream(inputCtx, index, avMediaType, seekFrameMargin),
mediaFormat_(mediaFormat) {}
FfmpegAudioStream::~FfmpegAudioStream() {}
void FfmpegAudioStream::checkStreamDecodeParams() {
auto timeBase = getTimeBase();
if (timeBase.first > 0) {
CHECK_EQ(timeBase.first, inputCtx_->streams[index_]->time_base.num);
CHECK_EQ(timeBase.second, inputCtx_->streams[index_]->time_base.den);
}
}
void FfmpegAudioStream::updateStreamDecodeParams() {
auto timeBase = getTimeBase();
if (timeBase.first == 0) {
mediaFormat_.format.audio.timeBaseNum =
inputCtx_->streams[index_]->time_base.num;
mediaFormat_.format.audio.timeBaseDen =
inputCtx_->streams[index_]->time_base.den;
}
}
int FfmpegAudioStream::initFormat() {
AudioFormat& format = mediaFormat_.format.audio;
if (format.samples == 0) {
format.samples = codecCtx_->sample_rate;
}
if (format.channels == 0) {
format.channels = codecCtx_->channels;
}
if (format.format == AV_SAMPLE_FMT_NONE) {
format.format = codecCtx_->sample_fmt;
VLOG(2) << "set stream format sample_fmt: " << format.format;
}
checkStreamDecodeParams();
updateStreamDecodeParams();
if (format.samples > 0 && format.channels > 0 &&
format.format != AV_SAMPLE_FMT_NONE) {
return 0;
} else {
return -1;
}
}
unique_ptr<DecodedFrame> FfmpegAudioStream::sampleFrameData() {
AudioFormat& audioFormat = mediaFormat_.format.audio;
if (!sampler_ || !(sampler_->getInFormat() == *codecCtx_)) {
AudioFormat newInFormat;
newInFormat = toAudioFormat(newInFormat, *codecCtx_);
sampler_ = make_unique<FfmpegAudioSampler>(newInFormat, audioFormat);
VLOG(1) << "Set sampler input audio format"
<< ", samples: " << newInFormat.samples
<< ", channels: " << newInFormat.channels
<< ", format: " << newInFormat.format
<< " : output audio sampler format"
<< ", samples: " << audioFormat.samples
<< ", channels: " << audioFormat.channels
<< ", format: " << audioFormat.format;
int ret = sampler_->init();
if (ret < 0) {
VLOG(1) << "Fail to initialize audio sampler";
return nullptr;
}
}
return sampler_->sample(frame_);
}
#pragma once
#include <utility>
#include "FfmpegAudioSampler.h"
#include "FfmpegStream.h"
/**
* Class uses FFMPEG library to decode one video stream.
*/
class FfmpegAudioStream : public FfmpegStream {
public:
explicit FfmpegAudioStream(
AVFormatContext* inputCtx,
int index,
enum AVMediaType avMediaType,
MediaFormat mediaFormat,
double seekFrameMargin);
~FfmpegAudioStream() override;
// FfmpegStream overrides
MediaType getMediaType() const override {
return MediaType::TYPE_AUDIO;
}
FormatUnion getMediaFormat() const override {
return mediaFormat_.format;
}
int64_t getStartPts() const override {
return mediaFormat_.format.audio.startPts;
}
int64_t getEndPts() const override {
return mediaFormat_.format.audio.endPts;
}
// return numerator and denominator of time base
std::pair<int, int> getTimeBase() const {
return std::make_pair(
mediaFormat_.format.audio.timeBaseNum,
mediaFormat_.format.audio.timeBaseDen);
}
void checkStreamDecodeParams();
void updateStreamDecodeParams();
protected:
int initFormat() override;
std::unique_ptr<DecodedFrame> sampleFrameData() override;
private:
MediaFormat mediaFormat_;
std::unique_ptr<FfmpegAudioSampler> sampler_{nullptr};
};
#include "FfmpegDecoder.h"
#include "FfmpegAudioStream.h"
#include "FfmpegUtil.h"
#include "FfmpegVideoStream.h"
using namespace std;
static AVPacket avPkt;
namespace {
unique_ptr<FfmpegStream> createFfmpegStream(
MediaType type,
AVFormatContext* ctx,
int idx,
MediaFormat& mediaFormat,
double seekFrameMargin) {
enum AVMediaType avType;
CHECK(ffmpeg_util::mapMediaType(type, &avType));
switch (type) {
case MediaType::TYPE_VIDEO:
return make_unique<FfmpegVideoStream>(
ctx, idx, avType, mediaFormat, seekFrameMargin);
case MediaType::TYPE_AUDIO:
return make_unique<FfmpegAudioStream>(
ctx, idx, avType, mediaFormat, seekFrameMargin);
default:
return nullptr;
}
}
} // namespace
FfmpegAvioContext::FfmpegAvioContext()
: workBuffersize_(VIO_BUFFER_SZ),
workBuffer_((uint8_t*)av_malloc(workBuffersize_)),
inputFile_(nullptr),
inputBuffer_(nullptr),
inputBufferSize_(0) {}
int FfmpegAvioContext::initAVIOContext(const uint8_t* buffer, int64_t size) {
inputBuffer_ = buffer;
inputBufferSize_ = size;
avioCtx_ = avio_alloc_context(
workBuffer_,
workBuffersize_,
0,
reinterpret_cast<void*>(this),
&FfmpegAvioContext::readMemory,
nullptr, // no write function
&FfmpegAvioContext::seekMemory);
return 0;
}
FfmpegAvioContext::~FfmpegAvioContext() {
/* note: the internal buffer could have changed, and be != workBuffer_ */
if (avioCtx_) {
av_freep(&avioCtx_->buffer);
av_freep(&avioCtx_);
} else {
av_freep(&workBuffer_);
}
if (inputFile_) {
fclose(inputFile_);
}
}
int FfmpegAvioContext::read(uint8_t* buf, int buf_size) {
if (inputBuffer_) {
return readMemory(this, buf, buf_size);
} else {
return -1;
}
}
int FfmpegAvioContext::readMemory(void* opaque, uint8_t* buf, int buf_size) {
FfmpegAvioContext* h = static_cast<FfmpegAvioContext*>(opaque);
if (buf_size < 0) {
return -1;
}
int reminder = h->inputBufferSize_ - h->offset_;
int r = buf_size < reminder ? buf_size : reminder;
if (r < 0) {
return AVERROR_EOF;
}
memcpy(buf, h->inputBuffer_ + h->offset_, r);
h->offset_ += r;
return r;
}
int64_t FfmpegAvioContext::seek(int64_t offset, int whence) {
if (inputBuffer_) {
return seekMemory(this, offset, whence);
} else {
return -1;
}
}
int64_t FfmpegAvioContext::seekMemory(
void* opaque,
int64_t offset,
int whence) {
FfmpegAvioContext* h = static_cast<FfmpegAvioContext*>(opaque);
switch (whence) {
case SEEK_CUR: // from current position
h->offset_ += offset;
break;
case SEEK_END: // from eof
h->offset_ = h->inputBufferSize_ + offset;
break;
case SEEK_SET: // from beginning of file
h->offset_ = offset;
break;
case AVSEEK_SIZE:
return h->inputBufferSize_;
}
return h->offset_;
}
int FfmpegDecoder::init(
const std::string& filename,
bool isDecodeFile,
FfmpegAvioContext& ioctx,
DecoderOutput& decoderOutput) {
cleanUp();
int ret = 0;
if (!isDecodeFile) {
formatCtx_ = avformat_alloc_context();
if (!formatCtx_) {
LOG(ERROR) << "avformat_alloc_context failed";
return -1;
}
formatCtx_->pb = ioctx.get_avio();
formatCtx_->flags |= AVFMT_FLAG_CUSTOM_IO;
// Determining the input format:
int probeSz = AVPROBE_SIZE + AVPROBE_PADDING_SIZE;
uint8_t* probe((uint8_t*)av_malloc(probeSz));
memset(probe, 0, probeSz);
int len = ioctx.read(probe, probeSz - AVPROBE_PADDING_SIZE);
if (len < probeSz - AVPROBE_PADDING_SIZE) {
LOG(ERROR) << "Insufficient data to determine video format";
av_freep(&probe);
return -1;
}
// seek back to start of stream
ioctx.seek(0, SEEK_SET);
unique_ptr<AVProbeData> probeData(new AVProbeData());
probeData->buf = probe;
probeData->buf_size = len;
probeData->filename = "";
// Determine the input-format:
formatCtx_->iformat = av_probe_input_format(probeData.get(), 1);
// this is to avoid the double-free error
if (formatCtx_->iformat == nullptr) {
LOG(ERROR) << "av_probe_input_format fails";
return -1;
}
VLOG(1) << "av_probe_input_format succeeds";
av_freep(&probe);
ret = avformat_open_input(&formatCtx_, "", nullptr, nullptr);
} else {
ret = avformat_open_input(&formatCtx_, filename.c_str(), nullptr, nullptr);
}
if (ret < 0) {
LOG(ERROR) << "avformat_open_input failed, error: "
<< ffmpeg_util::getErrorDesc(ret);
cleanUp();
return ret;
}
ret = avformat_find_stream_info(formatCtx_, nullptr);
if (ret < 0) {
LOG(ERROR) << "avformat_find_stream_info failed, error: "
<< ffmpeg_util::getErrorDesc(ret);
cleanUp();
return ret;
}
if (!initStreams()) {
LOG(ERROR) << "Cannot activate streams";
cleanUp();
return -1;
}
for (auto& stream : streams_) {
MediaType mediaType = stream.second->getMediaType();
decoderOutput.initMediaType(mediaType, stream.second->getMediaFormat());
}
VLOG(1) << "FfmpegDecoder initialized";
return 0;
}
int FfmpegDecoder::decodeFile(
unique_ptr<DecoderParameters> params,
const string& fileName,
DecoderOutput& decoderOutput) {
VLOG(1) << "decode file: " << fileName;
FfmpegAvioContext ioctx;
int ret = decodeLoop(std::move(params), fileName, true, ioctx, decoderOutput);
return ret;
}
int FfmpegDecoder::decodeMemory(
unique_ptr<DecoderParameters> params,
const uint8_t* buffer,
int64_t size,
DecoderOutput& decoderOutput) {
VLOG(1) << "decode video data in memory";
FfmpegAvioContext ioctx;
int ret = ioctx.initAVIOContext(buffer, size);
if (ret == 0) {
ret =
decodeLoop(std::move(params), string(""), false, ioctx, decoderOutput);
}
return ret;
}
void FfmpegDecoder::cleanUp() {
if (formatCtx_) {
for (auto& stream : streams_) {
// Drain stream buffers.
DecoderOutput decoderOutput;
stream.second->flush(1, decoderOutput);
stream.second.reset();
}
streams_.clear();
avformat_close_input(&formatCtx_);
}
}
FfmpegStream* FfmpegDecoder::findStreamByIndex(int streamIndex) const {
auto it = streams_.find(streamIndex);
return it != streams_.end() ? it->second.get() : nullptr;
}
/*
Reference implementation:
https://ffmpeg.org/doxygen/3.4/demuxing_decoding_8c-example.html
*/
int FfmpegDecoder::decodeLoop(
unique_ptr<DecoderParameters> params,
const std::string& filename,
bool isDecodeFile,
FfmpegAvioContext& ioctx,
DecoderOutput& decoderOutput) {
params_ = std::move(params);
int ret = init(filename, isDecodeFile, ioctx, decoderOutput);
if (ret < 0) {
return ret;
}
// init package
av_init_packet(&avPkt);
avPkt.data = nullptr;
avPkt.size = 0;
int result = 0;
bool ptsInRange = true;
while (ptsInRange) {
result = av_read_frame(formatCtx_, &avPkt);
if (result == AVERROR(EAGAIN)) {
VLOG(1) << "Decoder is busy";
ret = 0;
break;
} else if (result == AVERROR_EOF) {
VLOG(1) << "Stream decoding is completed";
ret = 0;
break;
} else if (result < 0) {
VLOG(1) << "av_read_frame fails. Break decoder loop. Error: "
<< ffmpeg_util::getErrorDesc(result);
ret = result;
break;
}
ret = 0;
auto stream = findStreamByIndex(avPkt.stream_index);
if (stream == nullptr) {
// the packet is from a stream the caller is not interested. Ignore it
VLOG(2) << "avPkt ignored. stream index: " << avPkt.stream_index;
// Need to free the memory of AVPacket. Otherwise, memory leak happens
av_packet_unref(&avPkt);
continue;
}
do {
result = stream->sendPacket(&avPkt);
if (result == AVERROR(EAGAIN)) {
VLOG(2) << "avcodec_send_packet returns AVERROR(EAGAIN)";
// start to recevie available frames from internal buffer
stream->receiveAvailFrames(params_->getPtsOnly, decoderOutput);
if (isPtsExceedRange()) {
// exit the most-outer while loop
VLOG(1) << "In all streams, exceed the end pts. Exit decoding loop";
ret = 0;
ptsInRange = false;
break;
}
} else if (result < 0) {
LOG(WARNING) << "avcodec_send_packet failed. Error: "
<< ffmpeg_util::getErrorDesc(result);
ret = result;
break;
} else {
VLOG(2) << "avcodec_send_packet succeeds";
// succeed. Read the next AVPacket and send out it
break;
}
} while (ptsInRange);
// Need to free the memory of AVPacket. Otherwise, memory leak happens
av_packet_unref(&avPkt);
}
/* flush cached frames */
flushStreams(decoderOutput);
return ret;
}
bool FfmpegDecoder::initStreams() {
for (auto it = params_->formats.begin(); it != params_->formats.end(); ++it) {
AVMediaType mediaType;
if (!ffmpeg_util::mapMediaType(it->first, &mediaType)) {
LOG(ERROR) << "Unknown media type: " << it->first;
return false;
}
int streamIdx =
av_find_best_stream(formatCtx_, mediaType, -1, -1, nullptr, 0);
if (streamIdx >= 0) {
VLOG(2) << "find stream index: " << streamIdx;
auto stream = createFfmpegStream(
it->first,
formatCtx_,
streamIdx,
it->second,
params_->seekFrameMargin);
CHECK(stream);
if (stream->openCodecContext() < 0) {
LOG(ERROR) << "Cannot open codec. Stream index: " << streamIdx;
return false;
}
streams_.emplace(streamIdx, move(stream));
} else {
VLOG(1) << "Cannot open find stream of type " << it->first;
}
}
// Seek frames in each stream
int ret = 0;
for (auto& stream : streams_) {
auto startPts = stream.second->getStartPts();
VLOG(1) << "stream: " << stream.first << " startPts: " << startPts;
if (startPts > 0 && (ret = stream.second->seekFrame(startPts)) < 0) {
LOG(WARNING) << "seekFrame in stream fails";
return false;
}
}
VLOG(1) << "initStreams succeeds";
return true;
}
bool FfmpegDecoder::isPtsExceedRange() {
bool exceed = true;
for (auto& stream : streams_) {
exceed = exceed && stream.second->isFramePtsExceedRange();
}
return exceed;
}
void FfmpegDecoder::flushStreams(DecoderOutput& decoderOutput) {
for (auto& stream : streams_) {
stream.second->flush(params_->getPtsOnly, decoderOutput);
}
}
#pragma once
#include <string>
#include <vector>
#include "FfmpegHeaders.h"
#include "FfmpegStream.h"
#include "Interface.h"
#define VIO_BUFFER_SZ 81920
#define AVPROBE_SIZE 8192
class DecoderParameters {
public:
std::unordered_map<MediaType, MediaFormat, EnumClassHash> formats;
// av_seek_frame is imprecise so seek to a timestamp earlier by a margin
// The unit of margin is second
double seekFrameMargin{1.0};
// When getPtsOnly is set to 1, we only get pts of each frame and don not
// output frame data. It will be much faster
int64_t getPtsOnly{0};
};
class FfmpegAvioContext {
public:
FfmpegAvioContext();
int initAVIOContext(const uint8_t* buffer, int64_t size);
~FfmpegAvioContext();
int read(uint8_t* buf, int buf_size);
static int readMemory(void* opaque, uint8_t* buf, int buf_size);
int64_t seek(int64_t offset, int whence);
static int64_t seekMemory(void* opaque, int64_t offset, int whence);
AVIOContext* get_avio() {
return avioCtx_;
}
private:
int workBuffersize_;
uint8_t* workBuffer_;
// for file mode
FILE* inputFile_;
// for memory mode
const uint8_t* inputBuffer_;
int inputBufferSize_;
int offset_ = 0;
AVIOContext* avioCtx_{nullptr};
};
class FfmpegDecoder {
public:
FfmpegDecoder() {
av_register_all();
}
~FfmpegDecoder() {
cleanUp();
}
// return 0 on success
// return negative number on failure
int decodeFile(
std::unique_ptr<DecoderParameters> params,
const std::string& filename,
DecoderOutput& decoderOutput);
// return 0 on success
// return negative number on failure
int decodeMemory(
std::unique_ptr<DecoderParameters> params,
const uint8_t* buffer,
int64_t size,
DecoderOutput& decoderOutput);
void cleanUp();
private:
FfmpegStream* findStreamByIndex(int streamIndex) const;
int init(
const std::string& filename,
bool isDecodeFile,
FfmpegAvioContext& ioctx,
DecoderOutput& decoderOutput);
// return 0 on success
// return negative number on failure
int decodeLoop(
std::unique_ptr<DecoderParameters> params,
const std::string& filename,
bool isDecodeFile,
FfmpegAvioContext& ioctx,
DecoderOutput& decoderOutput);
bool initStreams();
void flushStreams(DecoderOutput& decoderOutput);
// whether in all streams, the pts of most recent frame exceeds range
bool isPtsExceedRange();
std::unordered_map<int, std::unique_ptr<FfmpegStream>> streams_;
AVFormatContext* formatCtx_{nullptr};
std::unique_ptr<DecoderParameters> params_{nullptr};
};
#pragma once
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavformat/avio.h>
#include <libavutil/avutil.h>
#include <libavutil/imgutils.h>
#include <libavutil/log.h>
#include <libavutil/samplefmt.h>
#include <libswresample/swresample.h>
#include <libswscale/swscale.h>
}
#pragma once
#include "FfmpegHeaders.h"
#include "Interface.h"
/**
* Class sample data from AVFrame
*/
class FfmpegSampler {
public:
virtual ~FfmpegSampler() = default;
// return 0 on success and negative number on failure
virtual int init() = 0;
// sample from the given frame
virtual std::unique_ptr<DecodedFrame> sample(const AVFrame* frame) = 0;
};
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment