#pragma once #include #include #include #include extern "C" { #include #include void av_free(void* ptr); } struct avDeleter { void operator()(uint8_t* p) const { av_free(p); } }; const AVPixelFormat defaultVideoPixelFormat = AV_PIX_FMT_RGB24; const AVSampleFormat defaultAudioSampleFormat = AV_SAMPLE_FMT_FLT; using AvDataPtr = std::unique_ptr; enum MediaType : uint32_t { TYPE_VIDEO = 1, TYPE_AUDIO = 2, }; struct EnumClassHash { template uint32_t operator()(T t) const { return static_cast(t); } }; struct VideoFormat { // fields are initialized for the auto detection // caller can specify some/all of field values if specific output is desirable int width{0}; // width in pixels int height{0}; // height in pixels int minDimension{0}; // choose min dimension and rescale accordingly // Output image pixel format. data type AVPixelFormat AVPixelFormat format{defaultVideoPixelFormat}; // type AVPixelFormat int64_t startPts{0}, endPts{0}; // Start and end presentation timestamp int timeBaseNum{0}; int timeBaseDen{1}; // numerator and denominator of time base float fps{0.0}; }; struct AudioFormat { // fields are initialized for the auto detection // caller can specify some/all of field values if specific output is desirable int samples{0}; // number samples per second (frequency) int channels{0}; // number of channels AVSampleFormat format{defaultAudioSampleFormat}; // type AVSampleFormat int64_t startPts{0}, endPts{0}; // Start and end presentation timestamp int timeBaseNum{0}; int timeBaseDen{1}; // numerator and denominator of time base }; union FormatUnion { FormatUnion() {} VideoFormat video; AudioFormat audio; }; struct MediaFormat { MediaFormat() {} MediaFormat(const MediaFormat& mediaFormat) : type(mediaFormat.type) { if (type == MediaType::TYPE_VIDEO) { format.video = mediaFormat.format.video; } else if (type == MediaType::TYPE_AUDIO) { format.audio = mediaFormat.format.audio; } } MediaFormat(MediaType mediaType) : type(mediaType) { if (mediaType == MediaType::TYPE_VIDEO) { format.video = VideoFormat(); } else if (mediaType == MediaType::TYPE_AUDIO) { format.audio = AudioFormat(); } } // media type MediaType type; // format data FormatUnion format; }; class DecodedFrame { public: explicit DecodedFrame() : frame_(nullptr), frameSize_(0), pts_(0) {} explicit DecodedFrame(AvDataPtr frame, int frameSize, int64_t pts) : frame_(std::move(frame)), frameSize_(frameSize), pts_(pts) {} AvDataPtr frame_{nullptr}; int frameSize_{0}; int64_t pts_{0}; }; struct MediaData { MediaData() {} MediaData(FormatUnion format) : format_(format) {} FormatUnion format_; std::vector> frames_; }; class DecoderOutput { public: explicit DecoderOutput() {} ~DecoderOutput() {} void initMediaType(MediaType mediaType, FormatUnion format); void addMediaFrame(MediaType mediaType, std::unique_ptr frame); void clear(); std::unordered_map media_data_; };