Unverified Commit 8e2bd0e0 authored by Bruno Korbar's avatar Bruno Korbar Committed by GitHub
Browse files

[docs] descriptive comments of the decoder C++ api (#3754)



* document video_sampler.

* minor docs for decoder.cpp

* descriptive comments for the stream.c

* descriptive comments for decoder.cpp

* per-stream descriptive comments

* Fixing CLANG hopefully

* addressing prabhat's comments

* typo I think
Co-authored-by: default avatarFrancisco Massa <fvsmassa@gmail.com>
Co-authored-by: default avatarVasilis Vryniotis <datumbox@users.noreply.github.com>
parent 38175edb
...@@ -68,6 +68,7 @@ int AudioStream::initFormat() { ...@@ -68,6 +68,7 @@ int AudioStream::initFormat() {
: -1; : -1;
} }
// copies audio sample bytes via swr_convert call in audio_sampler.cpp
int AudioStream::copyFrameBytes(ByteStorage* out, bool flush) { int AudioStream::copyFrameBytes(ByteStorage* out, bool flush) {
if (!sampler_) { if (!sampler_) {
sampler_ = std::make_unique<AudioSampler>(codecCtx_); sampler_ = std::make_unique<AudioSampler>(codecCtx_);
...@@ -95,6 +96,8 @@ int AudioStream::copyFrameBytes(ByteStorage* out, bool flush) { ...@@ -95,6 +96,8 @@ int AudioStream::copyFrameBytes(ByteStorage* out, bool flush) {
<< ", channels: " << format_.format.audio.channels << ", channels: " << format_.format.audio.channels
<< ", format: " << format_.format.audio.format; << ", format: " << format_.format.audio.format;
} }
// calls to a sampler that converts the audio samples and copies them to the
// out buffer via ffmpeg::swr_convert
return sampler_->sample(flush ? nullptr : frame_, out); return sampler_->sample(flush ? nullptr : frame_, out);
} }
......
...@@ -218,6 +218,12 @@ Decoder::~Decoder() { ...@@ -218,6 +218,12 @@ Decoder::~Decoder() {
cleanUp(); cleanUp();
} }
// Initialise the format context that holds information about the container and
// fill it with minimal information about the format (codecs are not opened
// here). Function reads in information about the streams from the container
// into inputCtx and then passes it to decoder::openStreams. Finally, if seek is
// specified within the decoder parameters, it seeks into the correct frame
// (note, the seek defined here is "precise" seek).
bool Decoder::init( bool Decoder::init(
const DecoderParameters& params, const DecoderParameters& params,
DecoderInCallback&& in, DecoderInCallback&& in,
...@@ -384,7 +390,7 @@ bool Decoder::init( ...@@ -384,7 +390,7 @@ bool Decoder::init(
cleanUp(); cleanUp();
return false; return false;
} }
// SyncDecoder inherits Decoder which would override onInit.
onInit(); onInit();
if (params.startOffset != 0) { if (params.startOffset != 0) {
...@@ -399,6 +405,8 @@ bool Decoder::init( ...@@ -399,6 +405,8 @@ bool Decoder::init(
return true; return true;
} }
// open appropriate CODEC for every type of stream and move it to the class
// variable `streams_` and make sure it is in range for decoding
bool Decoder::openStreams(std::vector<DecoderMetadata>* metadata) { bool Decoder::openStreams(std::vector<DecoderMetadata>* metadata) {
for (unsigned int i = 0; i < inputCtx_->nb_streams; i++) { for (unsigned int i = 0; i < inputCtx_->nb_streams; i++) {
// - find the corespondent format at params_.formats set // - find the corespondent format at params_.formats set
...@@ -485,6 +493,10 @@ void Decoder::cleanUp() { ...@@ -485,6 +493,10 @@ void Decoder::cleanUp() {
seekableBuffer_.shutdown(); seekableBuffer_.shutdown();
} }
// function does actual work, derived class calls it in working thread
// periodically. On success method returns 0, ENODATA on EOF, ETIMEDOUT if
// no frames got decoded in the specified timeout time, and error on
// unrecoverable error.
int Decoder::getFrame(size_t workingTimeInMs) { int Decoder::getFrame(size_t workingTimeInMs) {
if (inRange_.none()) { if (inRange_.none()) {
return ENODATA; return ENODATA;
...@@ -601,11 +613,13 @@ int Decoder::getFrame(size_t workingTimeInMs) { ...@@ -601,11 +613,13 @@ int Decoder::getFrame(size_t workingTimeInMs) {
return 0; return 0;
} }
// find stream by stream index
Stream* Decoder::findByIndex(int streamIndex) const { Stream* Decoder::findByIndex(int streamIndex) const {
auto it = streams_.find(streamIndex); auto it = streams_.find(streamIndex);
return it != streams_.end() ? it->second.get() : nullptr; return it != streams_.end() ? it->second.get() : nullptr;
} }
// find stream by type; note finds only the first stream of a given type
Stream* Decoder::findByType(const MediaFormat& format) const { Stream* Decoder::findByType(const MediaFormat& format) const {
for (auto& stream : streams_) { for (auto& stream : streams_) {
if (stream.second->getMediaFormat().type == format.type) { if (stream.second->getMediaFormat().type == format.type) {
...@@ -615,6 +629,8 @@ Stream* Decoder::findByType(const MediaFormat& format) const { ...@@ -615,6 +629,8 @@ Stream* Decoder::findByType(const MediaFormat& format) const {
return nullptr; return nullptr;
} }
// given the stream and packet, decode the frame buffers into the
// DecoderOutputMessage data structure via stream::decodePacket function.
int Decoder::processPacket( int Decoder::processPacket(
Stream* stream, Stream* stream,
AVPacket* packet, AVPacket* packet,
......
...@@ -24,10 +24,15 @@ Stream::~Stream() { ...@@ -24,10 +24,15 @@ Stream::~Stream() {
} }
} }
// look up the proper CODEC querying the function
AVCodec* Stream::findCodec(AVCodecParameters* params) { AVCodec* Stream::findCodec(AVCodecParameters* params) {
return avcodec_find_decoder(params->codec_id); return avcodec_find_decoder(params->codec_id);
} }
// Allocate memory for the AVCodecContext, which will hold the context for
// decode/encode process. Then fill this codec context with CODEC parameters
// defined in stream parameters. Open the codec, and allocate the global frame
// defined in the header file
int Stream::openCodec(std::vector<DecoderMetadata>* metadata) { int Stream::openCodec(std::vector<DecoderMetadata>* metadata) {
AVStream* steam = inputCtx_->streams[format_.stream]; AVStream* steam = inputCtx_->streams[format_.stream];
...@@ -93,6 +98,9 @@ int Stream::openCodec(std::vector<DecoderMetadata>* metadata) { ...@@ -93,6 +98,9 @@ int Stream::openCodec(std::vector<DecoderMetadata>* metadata) {
return ret; return ret;
} }
// send the raw data packet (compressed frame) to the decoder, through the codec
// context and receive the raw data frame (uncompressed frame) from the
// decoder, through the same codec context
int Stream::analyzePacket(const AVPacket* packet, bool* gotFrame) { int Stream::analyzePacket(const AVPacket* packet, bool* gotFrame) {
int consumed = 0; int consumed = 0;
int result = avcodec_send_packet(codecCtx_, packet); int result = avcodec_send_packet(codecCtx_, packet);
...@@ -134,6 +142,9 @@ int Stream::analyzePacket(const AVPacket* packet, bool* gotFrame) { ...@@ -134,6 +142,9 @@ int Stream::analyzePacket(const AVPacket* packet, bool* gotFrame) {
return consumed; return consumed;
} }
// General decoding function:
// given the packet, analyse the metadata, and write the
// metadata and the buffer to the DecoderOutputImage.
int Stream::decodePacket( int Stream::decodePacket(
const AVPacket* packet, const AVPacket* packet,
DecoderOutputMessage* out, DecoderOutputMessage* out,
...@@ -167,6 +178,9 @@ int Stream::flush(DecoderOutputMessage* out, bool headerOnly) { ...@@ -167,6 +178,9 @@ int Stream::flush(DecoderOutputMessage* out, bool headerOnly) {
return 1; return 1;
} }
// Sets the header and payload via stream::setHeader and copyFrameBytes
// functions that are defined in type stream subclass (VideoStream, AudioStream,
// ...)
int Stream::getMessage(DecoderOutputMessage* out, bool flush, bool headerOnly) { int Stream::getMessage(DecoderOutputMessage* out, bool flush, bool headerOnly) {
if (flush) { if (flush) {
// only flush of audio frames makes sense // only flush of audio frames makes sense
......
...@@ -7,6 +7,17 @@ ...@@ -7,6 +7,17 @@
namespace ffmpeg { namespace ffmpeg {
namespace { namespace {
// Setup the data pointers and linesizes based on the specified image
// parameters and the provided array. This sets up "planes" to point to a
// "buffer"
// NOTE: this is most likely culprit behind #3534
//
// Args:
// fmt: desired output video format
// buffer: source constant image buffer (in different format) that will contain
// the final image after SWScale planes: destination data pointer to be filled
// lineSize: target destination linesize (always {0})
int preparePlanes( int preparePlanes(
const VideoFormat& fmt, const VideoFormat& fmt,
const uint8_t* buffer, const uint8_t* buffer,
...@@ -14,6 +25,7 @@ int preparePlanes( ...@@ -14,6 +25,7 @@ int preparePlanes(
int* lineSize) { int* lineSize) {
int result; int result;
// NOTE: 1 at the end of av_fill_arrays is the value used for alignment
if ((result = av_image_fill_arrays( if ((result = av_image_fill_arrays(
planes, planes,
lineSize, lineSize,
...@@ -28,6 +40,18 @@ int preparePlanes( ...@@ -28,6 +40,18 @@ int preparePlanes(
return result; return result;
} }
// Scale (and crop) the image slice in srcSlice and put the resulting scaled
// slice to `planes` buffer, which is mapped to be `out` via preparePlanes as
// `sws_scale` cannot access buffers directly.
//
// Args:
// context: SWSContext allocated on line 119 (if crop, optional) or 163 (if
// scale) srcSlice: frame data in YUV420P srcStride: the array containing the
// strides for each plane of the source
// image (from AVFrame->linesize[0])
// out: destination buffer
// planes: indirect destination buffer (mapped to "out" via preparePlanes)
// lines: destination linesize; constant {0}
int transformImage( int transformImage(
SwsContext* context, SwsContext* context,
const uint8_t* const srcSlice[], const uint8_t* const srcSlice[],
...@@ -41,7 +65,7 @@ int transformImage( ...@@ -41,7 +65,7 @@ int transformImage(
if ((result = preparePlanes(outFormat, out, planes, lines)) < 0) { if ((result = preparePlanes(outFormat, out, planes, lines)) < 0) {
return result; return result;
} }
// NOTE: srcY stride always 0: this is a parameter of YUV format
if ((result = sws_scale( if ((result = sws_scale(
context, srcSlice, srcStride, 0, inFormat.height, planes, lines)) < context, srcSlice, srcStride, 0, inFormat.height, planes, lines)) <
0) { 0) {
...@@ -153,6 +177,12 @@ bool VideoSampler::init(const SamplerParameters& params) { ...@@ -153,6 +177,12 @@ bool VideoSampler::init(const SamplerParameters& params) {
return scaleContext_ != nullptr; return scaleContext_ != nullptr;
} }
// Main body of the sample function called from one of the overloads below
//
// Args:
// srcSlice: decoded AVFrame->data perpared buffer
// srcStride: linesize (usually obtained from AVFrame->linesize)
// out: return buffer (ByteStorage*)
int VideoSampler::sample( int VideoSampler::sample(
const uint8_t* const srcSlice[], const uint8_t* const srcSlice[],
int srcStride[], int srcStride[],
...@@ -221,6 +251,7 @@ int VideoSampler::sample( ...@@ -221,6 +251,7 @@ int VideoSampler::sample(
return outImageSize; return outImageSize;
} }
// Call from `video_stream.cpp::114` - occurs during file reads
int VideoSampler::sample(AVFrame* frame, ByteStorage* out) { int VideoSampler::sample(AVFrame* frame, ByteStorage* out) {
if (!frame) { if (!frame) {
return 0; // no flush for videos return 0; // no flush for videos
...@@ -229,6 +260,7 @@ int VideoSampler::sample(AVFrame* frame, ByteStorage* out) { ...@@ -229,6 +260,7 @@ int VideoSampler::sample(AVFrame* frame, ByteStorage* out) {
return sample(frame->data, frame->linesize, out); return sample(frame->data, frame->linesize, out);
} }
// Call from `video_stream.cpp::114` - not sure when this occurs
int VideoSampler::sample(const ByteStorage* in, ByteStorage* out) { int VideoSampler::sample(const ByteStorage* in, ByteStorage* out) {
if (!in) { if (!in) {
return 0; // no flush for videos return 0; // no flush for videos
......
...@@ -82,6 +82,7 @@ int VideoStream::initFormat() { ...@@ -82,6 +82,7 @@ int VideoStream::initFormat() {
: -1; : -1;
} }
// copies frame bytes via sws_scale call in video_sampler.cpp
int VideoStream::copyFrameBytes(ByteStorage* out, bool flush) { int VideoStream::copyFrameBytes(ByteStorage* out, bool flush) {
if (!sampler_) { if (!sampler_) {
sampler_ = std::make_unique<VideoSampler>(SWS_AREA, loggingUuid_); sampler_ = std::make_unique<VideoSampler>(SWS_AREA, loggingUuid_);
...@@ -112,7 +113,9 @@ int VideoStream::copyFrameBytes(ByteStorage* out, bool flush) { ...@@ -112,7 +113,9 @@ int VideoStream::copyFrameBytes(ByteStorage* out, bool flush) {
<< ", minDimension: " << format_.format.video.minDimension << ", minDimension: " << format_.format.video.minDimension
<< ", crop: " << format_.format.video.cropImage; << ", crop: " << format_.format.video.cropImage;
} }
// calls to a sampler that converts the frame from YUV422 to RGB24, and
// optionally crops and resizes the frame. Frame bytes are copied from
// frame_->data to out buffer
return sampler_->sample(flush ? nullptr : frame_, out); return sampler_->sample(flush ? nullptr : frame_, out);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment