ffmpeg.h 6.78 KB
Newer Older
1
2
// One stop header for all ffmepg needs
#pragma once
Moto Hira's avatar
Moto Hira committed
3
#include <torch/types.h>
4
#include <cstdint>
5
#include <map>
6
7
8
9
10
11
12
13
14
15
#include <memory>
#include <string>

extern "C" {
#include <libavcodec/avcodec.h>
#include <libavdevice/avdevice.h>
#include <libavfilter/avfilter.h>
#include <libavfilter/buffersink.h>
#include <libavfilter/buffersrc.h>
#include <libavformat/avformat.h>
16
#include <libavformat/avio.h>
17
#include <libavutil/avutil.h>
18
#include <libavutil/channel_layout.h>
19
20
21
22
23
24
#include <libavutil/frame.h>
#include <libavutil/imgutils.h>
#include <libavutil/log.h>
#include <libavutil/pixdesc.h>
}

moto's avatar
moto committed
25
26
/// @cond

moto-meta's avatar
moto-meta committed
27
namespace torio {
28
namespace io {
29

30
using OptionDict = std::map<std::string, std::string>;
31

32
33
34
35
36
37
38
39
40
// https://github.com/FFmpeg/FFmpeg/blob/4e6debe1df7d53f3f59b37449b82265d5c08a172/doc/APIchanges#L252-L260
// Starting from libavformat 59 (ffmpeg 5),
// AVInputFormat is const and related functions expect constant.
#if LIBAVFORMAT_VERSION_MAJOR >= 59
#define AVFORMAT_CONST const
#else
#define AVFORMAT_CONST
#endif

moto's avatar
moto committed
41
42
43
// Replacement of av_err2str, which causes
// `error: taking address of temporary array`
// https://github.com/joncampbell123/composite-video-simulator/issues/5
44
45
46
47
av_always_inline std::string av_err2string(int errnum) {
  char str[AV_ERROR_MAX_STRING_SIZE];
  return av_make_error_string(str, AV_ERROR_MAX_STRING_SIZE, errnum);
}
moto's avatar
moto committed
48

49
50
51
52
53
54
55
56
57
58
59
60
// Base structure that handles memory management.
// Resource is freed by the destructor of unique_ptr,
// which will call custom delete mechanism provided via Deleter
// https://stackoverflow.com/a/19054280
//
// The resource allocation will be provided by custom constructors.
template <typename T, typename Deleter>
class Wrapper {
  std::unique_ptr<T, Deleter> ptr;

 public:
  Wrapper() = delete;
61
  explicit Wrapper<T, Deleter>(T* t) : ptr(t) {}
62
63
  T* operator->() const {
    return ptr.get();
64
  }
65
66
  explicit operator bool() const {
    return (bool)ptr;
67
  }
68
69
70
71
72
  operator T*() const {
    return ptr.get();
  }
};

moto's avatar
moto committed
73
74
75
76
77
78
79
////////////////////////////////////////////////////////////////////////////////
// AVDictionary
////////////////////////////////////////////////////////////////////////////////
// Since AVDictionaries are relocated by FFmpeg APIs it does not suit to
// IIRC-semantic. Instead we provide helper functions.

// Convert standard dict to FFmpeg native type
80
AVDictionary* get_option_dict(const c10::optional<OptionDict>& option);
moto's avatar
moto committed
81
82
83
84

// Clean up the dict after use. If there is an unsed key, throw runtime error
void clean_up_dict(AVDictionary* p);

85
86
87
////////////////////////////////////////////////////////////////////////////////
// AVFormatContext
////////////////////////////////////////////////////////////////////////////////
88
struct AVFormatInputContextDeleter {
89
90
91
  void operator()(AVFormatContext* p);
};

92
93
94
struct AVFormatInputContextPtr
    : public Wrapper<AVFormatContext, AVFormatInputContextDeleter> {
  explicit AVFormatInputContextPtr(AVFormatContext* p);
95
96
};

moto's avatar
moto committed
97
98
99
100
101
102
103
104
105
struct AVFormatOutputContextDeleter {
  void operator()(AVFormatContext* p);
};

struct AVFormatOutputContextPtr
    : public Wrapper<AVFormatContext, AVFormatOutputContextDeleter> {
  explicit AVFormatOutputContextPtr(AVFormatContext* p);
};

106
107
108
109
110
111
112
113
114
115
////////////////////////////////////////////////////////////////////////////////
// AVIO
////////////////////////////////////////////////////////////////////////////////
struct AVIOContextDeleter {
  void operator()(AVIOContext* p);
};

struct AVIOContextPtr : public Wrapper<AVIOContext, AVIOContextDeleter> {
  explicit AVIOContextPtr(AVIOContext* p);
};
116

117
118
119
120
121
122
123
124
////////////////////////////////////////////////////////////////////////////////
// AVPacket
////////////////////////////////////////////////////////////////////////////////
struct AVPacketDeleter {
  void operator()(AVPacket* p);
};

struct AVPacketPtr : public Wrapper<AVPacket, AVPacketDeleter> {
125
  explicit AVPacketPtr(AVPacket* p);
126
127
};

128
129
AVPacketPtr alloc_avpacket();

130
131
132
133
134
135
136
137
138
139
140
141
142
////////////////////////////////////////////////////////////////////////////////
// AVPacket - buffer unref
////////////////////////////////////////////////////////////////////////////////
// AVPacket structure employs two-staged memory allocation.
// The first-stage is for allocating AVPacket object itself, and it typically
// happens only once throughout the lifetime of application.
// The second-stage is for allocating the content (media data) each time the
// input file is processed and a chunk of data is read. The memory allocated
// during this time has to be released before the next iteration.
// The first-stage memory management is handled by `AVPacketPtr`.
// `AutoPacketUnref` handles the second-stage memory management.
struct AutoPacketUnref {
  AVPacketPtr& p_;
143
  explicit AutoPacketUnref(AVPacketPtr& p);
144
145
146
147
148
149
150
151
152
153
154
155
  ~AutoPacketUnref();
  operator AVPacket*() const;
};

////////////////////////////////////////////////////////////////////////////////
// AVFrame
////////////////////////////////////////////////////////////////////////////////
struct AVFrameDeleter {
  void operator()(AVFrame* p);
};

struct AVFramePtr : public Wrapper<AVFrame, AVFrameDeleter> {
156
  explicit AVFramePtr(AVFrame* p);
157
158
};

159
160
AVFramePtr alloc_avframe();

161
162
163
164
165
166
167
168
169
////////////////////////////////////////////////////////////////////////////////
// AutoBufferUnrer is responsible for performing unref at the end of lifetime
// of AVBufferRefPtr.
////////////////////////////////////////////////////////////////////////////////
struct AutoBufferUnref {
  void operator()(AVBufferRef* p);
};

struct AVBufferRefPtr : public Wrapper<AVBufferRef, AutoBufferUnref> {
170
  explicit AVBufferRefPtr(AVBufferRef* p);
171
172
};

173
174
175
176
177
178
179
180
////////////////////////////////////////////////////////////////////////////////
// AVCodecContext
////////////////////////////////////////////////////////////////////////////////
struct AVCodecContextDeleter {
  void operator()(AVCodecContext* p);
};
struct AVCodecContextPtr
    : public Wrapper<AVCodecContext, AVCodecContextDeleter> {
181
  explicit AVCodecContextPtr(AVCodecContext* p);
182
183
184
185
186
187
188
189
190
};

////////////////////////////////////////////////////////////////////////////////
// AVFilterGraph
////////////////////////////////////////////////////////////////////////////////
struct AVFilterGraphDeleter {
  void operator()(AVFilterGraph* p);
};
struct AVFilterGraphPtr : public Wrapper<AVFilterGraph, AVFilterGraphDeleter> {
191
  explicit AVFilterGraphPtr(AVFilterGraph* p);
192
};
193
194
195
196
197
198
199
200
201
202

////////////////////////////////////////////////////////////////////////////////
// AVCodecParameters
////////////////////////////////////////////////////////////////////////////////
struct AVCodecParametersDeleter {
  void operator()(AVCodecParameters* p);
};

struct AVCodecParametersPtr
    : public Wrapper<AVCodecParameters, AVCodecParametersDeleter> {
203
  explicit AVCodecParametersPtr(AVCodecParameters* p);
204
205
206
};

struct StreamParams {
207
  AVCodecParametersPtr codec_params{nullptr};
208
209
210
  AVRational time_base{};
  int stream_index{};
};
211
} // namespace io
moto-meta's avatar
moto-meta committed
212
} // namespace torio
moto's avatar
moto committed
213
214

/// @endcond