#pragma once #include #include #include #include #include #include namespace ffmpeg { // bit mask of formats, keep them in form 2^n enum MediaType : size_t { TYPE_AUDIO = 1, TYPE_VIDEO = 2, TYPE_SUBTITLE = 4, TYPE_CC = 8, // closed captions from transport streams }; // audio struct AudioFormat { // fields are initialized for the auto detection // caller can specify some/all of field values if specific output is desirable bool operator==(const AudioFormat& x) const { return x.format == format && x.samples == samples && x.channels == channels; } size_t samples{0}; // number samples per second (frequency) size_t channels{0}; // number of channels ssize_t format{-1}; // AVSampleFormat, auto AV_SAMPLE_FMT_NONE size_t padding[2]; // -- alignment 40 bytes }; // video struct VideoFormat { // fields are initialized for the auto detection // caller can specify some/all of field values if specific output is desirable bool operator==(const VideoFormat& x) const { return x.format == format && x.width == width && x.height == height; } size_t width{0}; // width in pixels size_t height{0}; // height in pixels ssize_t format{-1}; // AVPixelFormat, auto AV_PIX_FMT_NONE size_t minDimension{0}; // choose min dimension and rescale accordingly size_t cropImage{0}; // request image crop // -- alignment 40 bytes }; // subtitle/cc struct SubtitleFormat { ssize_t type{0}; // AVSubtitleType, auto SUBTITLE_NONE size_t padding[4]; // -- alignment 40 bytes }; union FormatUnion { FormatUnion() : audio() {} explicit FormatUnion(int) : video() {} explicit FormatUnion(char) : subtitle() {} explicit FormatUnion(double) : subtitle() {} AudioFormat audio; VideoFormat video; SubtitleFormat subtitle; // -- alignment 40 bytes }; /* MediaFormat data structure serves as input/output parameter. Caller assigns values for input formats or leave default values for auto detection For output formats all fields will be set to the specific values */ struct MediaFormat { // for using map/set data structures bool operator<(const MediaFormat& x) const { return type < x.type; } bool operator==(const MediaFormat& x) const { if (type != x.type) { return false; } switch (type) { case TYPE_AUDIO: return format.audio == x.format.audio; case TYPE_VIDEO: return format.video == x.format.video; case TYPE_SUBTITLE: case TYPE_CC: return true; default: return false; } } explicit MediaFormat(ssize_t s = -1) : type(TYPE_AUDIO), stream(s), format() {} explicit MediaFormat(int x, ssize_t s = -1) : type(TYPE_VIDEO), stream(s), format(x) {} explicit MediaFormat(char x, ssize_t s = -1) : type(TYPE_SUBTITLE), stream(s), format(x) {} explicit MediaFormat(double x, ssize_t s = -1) : type(TYPE_CC), stream(s), format(x) {} static MediaFormat makeMediaFormat(AudioFormat format, ssize_t stream) { MediaFormat result(stream); result.format.audio = format; return result; } static MediaFormat makeMediaFormat(VideoFormat format, ssize_t stream) { MediaFormat result(0, stream); result.format.video = format; return result; } static MediaFormat makeMediaFormat(SubtitleFormat format, ssize_t stream) { MediaFormat result('0', stream); result.format.subtitle = format; return result; } // format type MediaType type; // stream index: // set -1 for one stream auto detection, -2 for all streams auto detection, // >= 0, specified stream, if caller knows the stream index (unlikely) ssize_t stream; // union keeps one of the possible formats, defined by MediaType FormatUnion format; // output parameters, ignored while initialization // time base numerator ssize_t num{0}; // time base denominator ssize_t den{1}; // duration of the stream, in stream time base, if available ssize_t duration{-1}; }; struct DecoderParameters { // local file, remote file, http url, rtmp stream uri, etc. anything that // ffmpeg can recognize std::string uri; // timeout on getting bytes for decoding size_t timeoutMs{1000}; // logging level, default AV_LOG_PANIC ssize_t logLevel{0}; // when decoder would give up, 0 means never size_t maxPackageErrors{0}; // max allowed consecutive times no bytes are processed. 0 means for infinite. size_t maxProcessNoBytes{0}; // start offset ssize_t startOffsetMs{0}; // end offset ssize_t endOffsetMs{-1}; // logging id int64_t loggingUuid{0}; // adjust header pts to the epoch time bool convertPtsToWallTime{false}; // indicate if input stream is an encoded image bool isImage{false}; // what media types should be processed, default none std::set formats; // listen and wait for new rtmp stream bool listen{false}; // don't copy frame body, only header bool headerOnly{false}; // seek tolerated accuracy double seekAccuracySec{1.0}; }; struct DecoderHeader { // message id, from 0 till ... size_t seqno{0}; // decoded timestamp in microseconds from either beginning of the stream or // from epoch time, see DecoderParameters::convertPtsToWallTime ssize_t pts{0}; // decoded key frame size_t keyFrame{0}; // frames per second, valid only for video streams double fps{0}; // format specifies what kind frame is in a payload MediaFormat format; }; // Abstract interface ByteStorage class class ByteStorage { public: virtual ~ByteStorage() = default; // makes sure that buffer has at least n bytes available for writing, if not // storage must reallocate memory. virtual void ensure(size_t n) = 0; // caller must not to write more than available bytes virtual uint8_t* writableTail() = 0; // caller confirms that n bytes were written to the writable tail virtual void append(size_t n) = 0; // caller confirms that n bytes were read from the read buffer virtual void trim(size_t n) = 0; // gives an access to the beginning of the read buffer virtual const uint8_t* data() const = 0; // returns the stored size in bytes virtual size_t length() const = 0; // returns available capacity for writable tail virtual size_t tail() const = 0; // clears content, keeps capacity virtual void clear() = 0; }; struct DecoderOutputMessage { DecoderHeader header; std::unique_ptr payload; }; /* * External provider of the ecnoded bytes, specific implementation is left for * different use cases, like file, memory, external network end-points, etc. * Normally input/output parameter @out set to valid, not null buffer pointer, * which indicates "read" call, however there are "seek" modes as well. * @out != nullptr, @size != 0, @timeoutMs != 0 => read from the current offset * @size bytes => return number bytes read, 0 if no more bytes available, < 0 * on error. * @out == nullptr, @size == 0, @timeoutMs == 0 => does provider support "seek" * capability in a first place? return 0 on success, < 0 if "seek" mode is not * supported. * @out == nullptr, @size > 0 => seek the absolute offset == @size, return * 0 on success and < 0 on error. * @out == nullptr, @size < 0 => seek the end of the media, return 0 on success * and < 0 on failure. Provider might support seek doesn't know the media size. * Additionally if @out is set to null AND @size is set to zero AND * @timeoutMs is set to zero, caller requests the seek capability of the * provider, i.e. returns 0 on success and error if provider is not supporting * seek. */ using DecoderInCallback = std::function; using DecoderOutCallback = std::function; /** * Abstract class for decoding media bytes * It has two diffrent modes. Internal media bytes retrieval for given uri and * external media bytes provider in case of memory streams */ class MediaDecoder { public: virtual ~MediaDecoder() = default; /** * Initializes media decoder with parameters, * calls callback when media bytes are available. * Media bytes get fetched internally from provided URI * or invokes provided input callback to get media bytes. * Input callback must be empty for the internal media provider */ virtual bool init( const DecoderParameters& params, DecoderInCallback&& in) = 0; /** * Polls available decoded one frame from decoder * Returns error code, 0 - for success */ virtual int decode(DecoderOutputMessage* out, uint64_t timeoutMs) = 0; /** * Polls available decoded bytes from decoder, till EOF or error */ virtual int decode_all(const DecoderOutCallback& callback) = 0; /** * Stops calling callback, releases resources */ virtual void shutdown() = 0; /** * Interrupts whatever decoder is doing at any time */ virtual void interrupt() = 0; /** * Factory to create ByteStorage class instances, particular implementation is * left to the derived class. Caller provides the initially allocated size */ virtual std::unique_ptr createByteStorage(size_t n) = 0; }; struct SamplerParameters { MediaType type{TYPE_AUDIO}; FormatUnion in; FormatUnion out; int64_t loggingUuid{0}; }; /** * Abstract class for sampling media bytes */ class MediaSampler { public: virtual ~MediaSampler() = default; /** * Initializes media sampler with parameters */ virtual bool init(const SamplerParameters& params) = 0; /** * Samples media bytes * Returns error code < 0, or >=0 - for success, indicating number of bytes * processed. * set @in to null for flushing data */ virtual int sample(const ByteStorage* in, ByteStorage* out) = 0; /** * Releases resources */ virtual void shutdown() = 0; /* * Returns media type */ MediaType getMediaType() const { return params_.type; } /* * Returns formats */ FormatUnion getInputFormat() const { return params_.in; } FormatUnion getOutFormat() const { return params_.out; } protected: SamplerParameters params_; }; } // namespace ffmpeg