Commit b14ced1a authored by moto's avatar moto Committed by Facebook GitHub Bot
Browse files

Use dlopen for FFmpeg (#3353)

Summary:
This commit changes the way FFmpeg extension is built and used.
Instead of linking (LGPL) FFmpeg libraries to torchaudio at build time,
It uses dlopen to search and link them at run time.

For dlopen-ing, we use PyTorch's `at::DynamicLibrary` class, which provides
portable wrapper.

Pull Request resolved: https://github.com/pytorch/audio/pull/3353

Differential Revision: D46059199

Pulled By: mthrok

fbshipit-source-id: 4493a5fd8a4c802178d20276522f5334d637307d
parent bc54ac8a
...@@ -2,11 +2,10 @@ message(STATUS "FFMPEG_ROOT=$ENV{FFMPEG_ROOT}") ...@@ -2,11 +2,10 @@ message(STATUS "FFMPEG_ROOT=$ENV{FFMPEG_ROOT}")
find_package(FFMPEG 4.1 REQUIRED COMPONENTS avdevice avfilter avformat avcodec avutil) find_package(FFMPEG 4.1 REQUIRED COMPONENTS avdevice avfilter avformat avcodec avutil)
add_library(ffmpeg INTERFACE) add_library(ffmpeg INTERFACE)
target_include_directories(ffmpeg INTERFACE "${FFMPEG_INCLUDE_DIRS}") target_include_directories(ffmpeg INTERFACE "${FFMPEG_INCLUDE_DIRS}")
target_link_libraries(ffmpeg INTERFACE "${FFMPEG_LIBRARIES}")
set( set(
sources sources
libav.cpp
ffmpeg.cpp ffmpeg.cpp
filter_graph.cpp filter_graph.cpp
hw_context.cpp hw_context.cpp
......
#include <c10/util/Exception.h> #include <c10/util/Exception.h>
#include <torchaudio/csrc/ffmpeg/ffmpeg.h> #include <torchaudio/csrc/ffmpeg/ffmpeg.h>
#include <torchaudio/csrc/ffmpeg/libav.h>
#include <sstream> #include <sstream>
#include <stdexcept> #include <stdexcept>
#include <string> #include <string>
...@@ -8,6 +9,8 @@ ...@@ -8,6 +9,8 @@
namespace torchaudio { namespace torchaudio {
namespace io { namespace io {
using torchaudio::io::detail::libav;
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// AVDictionary // AVDictionary
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
...@@ -15,7 +18,7 @@ AVDictionary* get_option_dict(const c10::optional<OptionDict>& option) { ...@@ -15,7 +18,7 @@ AVDictionary* get_option_dict(const c10::optional<OptionDict>& option) {
AVDictionary* opt = nullptr; AVDictionary* opt = nullptr;
if (option) { if (option) {
for (auto const& [key, value] : option.value()) { for (auto const& [key, value] : option.value()) {
av_dict_set(&opt, key.c_str(), value.c_str(), 0); libav().av_dict_set(&opt, key.c_str(), value.c_str(), 0);
} }
} }
return opt; return opt;
...@@ -26,10 +29,10 @@ void clean_up_dict(AVDictionary* p) { ...@@ -26,10 +29,10 @@ void clean_up_dict(AVDictionary* p) {
std::vector<std::string> unused_keys; std::vector<std::string> unused_keys;
// Check and copy unused keys, clean up the original dictionary // Check and copy unused keys, clean up the original dictionary
AVDictionaryEntry* t = nullptr; AVDictionaryEntry* t = nullptr;
while ((t = av_dict_get(p, "", t, AV_DICT_IGNORE_SUFFIX))) { while ((t = libav().av_dict_get(p, "", t, AV_DICT_IGNORE_SUFFIX))) {
unused_keys.emplace_back(t->key); unused_keys.emplace_back(t->key);
} }
av_dict_free(&p); libav().av_dict_free(&p);
TORCH_CHECK( TORCH_CHECK(
unused_keys.empty(), unused_keys.empty(),
"Unexpected options: ", "Unexpected options: ",
...@@ -41,14 +44,14 @@ void clean_up_dict(AVDictionary* p) { ...@@ -41,14 +44,14 @@ void clean_up_dict(AVDictionary* p) {
// AVFormatContext // AVFormatContext
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void AVFormatInputContextDeleter::operator()(AVFormatContext* p) { void AVFormatInputContextDeleter::operator()(AVFormatContext* p) {
avformat_close_input(&p); libav().avformat_close_input(&p);
}; };
AVFormatInputContextPtr::AVFormatInputContextPtr(AVFormatContext* p) AVFormatInputContextPtr::AVFormatInputContextPtr(AVFormatContext* p)
: Wrapper<AVFormatContext, AVFormatInputContextDeleter>(p) {} : Wrapper<AVFormatContext, AVFormatInputContextDeleter>(p) {}
void AVFormatOutputContextDeleter::operator()(AVFormatContext* p) { void AVFormatOutputContextDeleter::operator()(AVFormatContext* p) {
avformat_free_context(p); libav().avformat_free_context(p);
}; };
AVFormatOutputContextPtr::AVFormatOutputContextPtr(AVFormatContext* p) AVFormatOutputContextPtr::AVFormatOutputContextPtr(AVFormatContext* p)
...@@ -58,9 +61,9 @@ AVFormatOutputContextPtr::AVFormatOutputContextPtr(AVFormatContext* p) ...@@ -58,9 +61,9 @@ AVFormatOutputContextPtr::AVFormatOutputContextPtr(AVFormatContext* p)
// AVIO // AVIO
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void AVIOContextDeleter::operator()(AVIOContext* p) { void AVIOContextDeleter::operator()(AVIOContext* p) {
avio_flush(p); libav().avio_flush(p);
av_freep(&p->buffer); libav().av_freep(&p->buffer);
av_freep(&p); libav().av_freep(&p);
}; };
AVIOContextPtr::AVIOContextPtr(AVIOContext* p) AVIOContextPtr::AVIOContextPtr(AVIOContext* p)
...@@ -70,13 +73,13 @@ AVIOContextPtr::AVIOContextPtr(AVIOContext* p) ...@@ -70,13 +73,13 @@ AVIOContextPtr::AVIOContextPtr(AVIOContext* p)
// AVPacket // AVPacket
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void AVPacketDeleter::operator()(AVPacket* p) { void AVPacketDeleter::operator()(AVPacket* p) {
av_packet_free(&p); libav().av_packet_free(&p);
}; };
AVPacketPtr::AVPacketPtr(AVPacket* p) : Wrapper<AVPacket, AVPacketDeleter>(p) {} AVPacketPtr::AVPacketPtr(AVPacket* p) : Wrapper<AVPacket, AVPacketDeleter>(p) {}
AVPacketPtr alloc_avpacket() { AVPacketPtr alloc_avpacket() {
AVPacket* p = av_packet_alloc(); AVPacket* p = libav().av_packet_alloc();
TORCH_CHECK(p, "Failed to allocate AVPacket object."); TORCH_CHECK(p, "Failed to allocate AVPacket object.");
return AVPacketPtr{p}; return AVPacketPtr{p};
} }
...@@ -86,7 +89,7 @@ AVPacketPtr alloc_avpacket() { ...@@ -86,7 +89,7 @@ AVPacketPtr alloc_avpacket() {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
AutoPacketUnref::AutoPacketUnref(AVPacketPtr& p) : p_(p){}; AutoPacketUnref::AutoPacketUnref(AVPacketPtr& p) : p_(p){};
AutoPacketUnref::~AutoPacketUnref() { AutoPacketUnref::~AutoPacketUnref() {
av_packet_unref(p_); libav().av_packet_unref(p_);
} }
AutoPacketUnref::operator AVPacket*() const { AutoPacketUnref::operator AVPacket*() const {
return p_; return p_;
...@@ -96,13 +99,13 @@ AutoPacketUnref::operator AVPacket*() const { ...@@ -96,13 +99,13 @@ AutoPacketUnref::operator AVPacket*() const {
// AVFrame // AVFrame
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void AVFrameDeleter::operator()(AVFrame* p) { void AVFrameDeleter::operator()(AVFrame* p) {
av_frame_free(&p); libav().av_frame_free(&p);
}; };
AVFramePtr::AVFramePtr(AVFrame* p) : Wrapper<AVFrame, AVFrameDeleter>(p) {} AVFramePtr::AVFramePtr(AVFrame* p) : Wrapper<AVFrame, AVFrameDeleter>(p) {}
AVFramePtr alloc_avframe() { AVFramePtr alloc_avframe() {
AVFrame* p = av_frame_alloc(); AVFrame* p = libav().av_frame_alloc();
TORCH_CHECK(p, "Failed to allocate AVFrame object."); TORCH_CHECK(p, "Failed to allocate AVFrame object.");
return AVFramePtr{p}; return AVFramePtr{p};
}; };
...@@ -111,7 +114,7 @@ AVFramePtr alloc_avframe() { ...@@ -111,7 +114,7 @@ AVFramePtr alloc_avframe() {
// AVCodecContext // AVCodecContext
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void AVCodecContextDeleter::operator()(AVCodecContext* p) { void AVCodecContextDeleter::operator()(AVCodecContext* p) {
avcodec_free_context(&p); libav().avcodec_free_context(&p);
}; };
AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p) AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p)
...@@ -121,7 +124,7 @@ AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p) ...@@ -121,7 +124,7 @@ AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p)
// AVBufferRefPtr // AVBufferRefPtr
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void AutoBufferUnref::operator()(AVBufferRef* p) { void AutoBufferUnref::operator()(AVBufferRef* p) {
av_buffer_unref(&p); libav().av_buffer_unref(&p);
} }
AVBufferRefPtr::AVBufferRefPtr(AVBufferRef* p) AVBufferRefPtr::AVBufferRefPtr(AVBufferRef* p)
...@@ -131,7 +134,7 @@ AVBufferRefPtr::AVBufferRefPtr(AVBufferRef* p) ...@@ -131,7 +134,7 @@ AVBufferRefPtr::AVBufferRefPtr(AVBufferRef* p)
// AVFilterGraph // AVFilterGraph
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void AVFilterGraphDeleter::operator()(AVFilterGraph* p) { void AVFilterGraphDeleter::operator()(AVFilterGraph* p) {
avfilter_graph_free(&p); libav().avfilter_graph_free(&p);
}; };
AVFilterGraphPtr::AVFilterGraphPtr(AVFilterGraph* p) AVFilterGraphPtr::AVFilterGraphPtr(AVFilterGraph* p)
...@@ -141,7 +144,7 @@ AVFilterGraphPtr::AVFilterGraphPtr(AVFilterGraph* p) ...@@ -141,7 +144,7 @@ AVFilterGraphPtr::AVFilterGraphPtr(AVFilterGraph* p)
// AVCodecParameters // AVCodecParameters
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
void AVCodecParametersDeleter::operator()(AVCodecParameters* codecpar) { void AVCodecParametersDeleter::operator()(AVCodecParameters* codecpar) {
avcodec_parameters_free(&codecpar); libav().avcodec_parameters_free(&codecpar);
} }
AVCodecParametersPtr::AVCodecParametersPtr(AVCodecParameters* p) AVCodecParametersPtr::AVCodecParametersPtr(AVCodecParameters* p)
......
...@@ -6,6 +6,9 @@ ...@@ -6,6 +6,9 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include <torchaudio/csrc/ffmpeg/libav.h>
#include <torchaudio/csrc/ffmpeg/macro.h>
extern "C" { extern "C" {
#include <libavcodec/avcodec.h> #include <libavcodec/avcodec.h>
#include <libavdevice/avdevice.h> #include <libavdevice/avdevice.h>
...@@ -29,21 +32,13 @@ namespace io { ...@@ -29,21 +32,13 @@ namespace io {
using OptionDict = std::map<std::string, std::string>; using OptionDict = std::map<std::string, std::string>;
// https://github.com/FFmpeg/FFmpeg/blob/4e6debe1df7d53f3f59b37449b82265d5c08a172/doc/APIchanges#L252-L260
// Starting from libavformat 59 (ffmpeg 5),
// AVInputFormat is const and related functions expect constant.
#if LIBAVFORMAT_VERSION_MAJOR >= 59
#define AVFORMAT_CONST const
#else
#define AVFORMAT_CONST
#endif
// Replacement of av_err2str, which causes // Replacement of av_err2str, which causes
// `error: taking address of temporary array` // `error: taking address of temporary array`
// https://github.com/joncampbell123/composite-video-simulator/issues/5 // https://github.com/joncampbell123/composite-video-simulator/issues/5
av_always_inline std::string av_err2string(int errnum) { av_always_inline std::string av_err2string(int errnum) {
char str[AV_ERROR_MAX_STRING_SIZE]; char str[AV_ERROR_MAX_STRING_SIZE];
return av_make_error_string(str, AV_ERROR_MAX_STRING_SIZE, errnum); detail::libav().av_strerror(errnum, str, AV_ERROR_MAX_STRING_SIZE);
return str;
} }
// Base structure that handles memory management. // Base structure that handles memory management.
......
#include <torchaudio/csrc/ffmpeg/filter_graph.h> #include <torchaudio/csrc/ffmpeg/filter_graph.h>
#include <torchaudio/csrc/ffmpeg/libav.h>
#include <stdexcept> #include <stdexcept>
namespace torchaudio { namespace torchaudio {
namespace io { namespace io {
using torchaudio::io::detail::libav;
namespace { namespace {
AVFilterGraph* get_filter_graph() { AVFilterGraph* get_filter_graph() {
AVFilterGraph* ptr = avfilter_graph_alloc(); AVFilterGraph* ptr = libav().avfilter_graph_alloc();
TORCH_CHECK(ptr, "Failed to allocate resouce."); TORCH_CHECK(ptr, "Failed to allocate resouce.");
ptr->nb_threads = 1; ptr->nb_threads = 1;
return ptr; return ptr;
...@@ -32,7 +35,7 @@ std::string get_audio_src_args( ...@@ -32,7 +35,7 @@ std::string get_audio_src_args(
time_base.num, time_base.num,
time_base.den, time_base.den,
sample_rate, sample_rate,
av_get_sample_fmt_name(format), libav().av_get_sample_fmt_name(format),
channel_layout); channel_layout);
return std::string(args); return std::string(args);
} }
...@@ -51,7 +54,7 @@ std::string get_video_src_args( ...@@ -51,7 +54,7 @@ std::string get_video_src_args(
"video_size=%dx%d:pix_fmt=%s:time_base=%d/%d:frame_rate=%d/%d:pixel_aspect=%d/%d", "video_size=%dx%d:pix_fmt=%s:time_base=%d/%d:frame_rate=%d/%d:pixel_aspect=%d/%d",
width, width,
height, height,
av_get_pix_fmt_name(format), libav().av_get_pix_fmt_name(format),
time_base.num, time_base.num,
time_base.den, time_base.den,
frame_rate.num, frame_rate.num,
...@@ -69,7 +72,7 @@ void FilterGraph::add_audio_src( ...@@ -69,7 +72,7 @@ void FilterGraph::add_audio_src(
int sample_rate, int sample_rate,
uint64_t channel_layout) { uint64_t channel_layout) {
add_src( add_src(
avfilter_get_by_name("abuffer"), libav().avfilter_get_by_name("abuffer"),
get_audio_src_args(format, time_base, sample_rate, channel_layout)); get_audio_src_args(format, time_base, sample_rate, channel_layout));
} }
...@@ -81,13 +84,13 @@ void FilterGraph::add_video_src( ...@@ -81,13 +84,13 @@ void FilterGraph::add_video_src(
int height, int height,
AVRational sample_aspect_ratio) { AVRational sample_aspect_ratio) {
add_src( add_src(
avfilter_get_by_name("buffer"), libav().avfilter_get_by_name("buffer"),
get_video_src_args( get_video_src_args(
format, time_base, frame_rate, width, height, sample_aspect_ratio)); format, time_base, frame_rate, width, height, sample_aspect_ratio));
} }
void FilterGraph::add_src(const AVFilter* buffersrc, const std::string& args) { void FilterGraph::add_src(const AVFilter* buffersrc, const std::string& args) {
int ret = avfilter_graph_create_filter( int ret = libav().avfilter_graph_create_filter(
&buffersrc_ctx, buffersrc, "in", args.c_str(), nullptr, graph); &buffersrc_ctx, buffersrc, "in", args.c_str(), nullptr, graph);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, ret >= 0,
...@@ -96,11 +99,11 @@ void FilterGraph::add_src(const AVFilter* buffersrc, const std::string& args) { ...@@ -96,11 +99,11 @@ void FilterGraph::add_src(const AVFilter* buffersrc, const std::string& args) {
} }
void FilterGraph::add_audio_sink() { void FilterGraph::add_audio_sink() {
add_sink(avfilter_get_by_name("abuffersink")); add_sink(libav().avfilter_get_by_name("abuffersink"));
} }
void FilterGraph::add_video_sink() { void FilterGraph::add_video_sink() {
add_sink(avfilter_get_by_name("buffersink")); add_sink(libav().avfilter_get_by_name("buffersink"));
} }
void FilterGraph::add_sink(const AVFilter* buffersink) { void FilterGraph::add_sink(const AVFilter* buffersink) {
...@@ -114,7 +117,7 @@ void FilterGraph::add_sink(const AVFilter* buffersink) { ...@@ -114,7 +117,7 @@ void FilterGraph::add_sink(const AVFilter* buffersink) {
// According to the other example // According to the other example
// https://ffmpeg.org/doxygen/4.1/filter_audio_8c-example.html // https://ffmpeg.org/doxygen/4.1/filter_audio_8c-example.html
// `abuffersink` should not take options, and this resolved issue. // `abuffersink` should not take options, and this resolved issue.
int ret = avfilter_graph_create_filter( int ret = libav().avfilter_graph_create_filter(
&buffersink_ctx, buffersink, "out", nullptr, nullptr, graph); &buffersink_ctx, buffersink, "out", nullptr, nullptr, graph);
TORCH_CHECK(ret >= 0, "Failed to create output filter."); TORCH_CHECK(ret >= 0, "Failed to create output filter.");
} }
...@@ -131,15 +134,15 @@ class InOuts { ...@@ -131,15 +134,15 @@ class InOuts {
public: public:
InOuts(const char* name, AVFilterContext* pCtx) { InOuts(const char* name, AVFilterContext* pCtx) {
p = avfilter_inout_alloc(); p = libav().avfilter_inout_alloc();
TORCH_CHECK(p, "Failed to allocate AVFilterInOut."); TORCH_CHECK(p, "Failed to allocate AVFilterInOut.");
p->name = av_strdup(name); p->name = libav().av_strdup(name);
p->filter_ctx = pCtx; p->filter_ctx = pCtx;
p->pad_idx = 0; p->pad_idx = 0;
p->next = nullptr; p->next = nullptr;
} }
~InOuts() { ~InOuts() {
avfilter_inout_free(&p); libav().avfilter_inout_free(&p);
} }
operator AVFilterInOut**() { operator AVFilterInOut**() {
return &p; return &p;
...@@ -156,7 +159,7 @@ void FilterGraph::add_process(const std::string& filter_description) { ...@@ -156,7 +159,7 @@ void FilterGraph::add_process(const std::string& filter_description) {
// If you are debugging this part of the code, you might get confused. // If you are debugging this part of the code, you might get confused.
InOuts in{"in", buffersrc_ctx}, out{"out", buffersink_ctx}; InOuts in{"in", buffersrc_ctx}, out{"out", buffersink_ctx};
int ret = avfilter_graph_parse_ptr( int ret = libav().avfilter_graph_parse_ptr(
graph, filter_description.c_str(), out, in, nullptr); graph, filter_description.c_str(), out, in, nullptr);
TORCH_CHECK( TORCH_CHECK(
...@@ -167,11 +170,11 @@ void FilterGraph::add_process(const std::string& filter_description) { ...@@ -167,11 +170,11 @@ void FilterGraph::add_process(const std::string& filter_description) {
void FilterGraph::create_filter(AVBufferRef* hw_frames_ctx) { void FilterGraph::create_filter(AVBufferRef* hw_frames_ctx) {
buffersrc_ctx->outputs[0]->hw_frames_ctx = hw_frames_ctx; buffersrc_ctx->outputs[0]->hw_frames_ctx = hw_frames_ctx;
int ret = avfilter_graph_config(graph, nullptr); int ret = libav().avfilter_graph_config(graph, nullptr);
TORCH_CHECK(ret >= 0, "Failed to configure the graph: " + av_err2string(ret)); TORCH_CHECK(ret >= 0, "Failed to configure the graph: " + av_err2string(ret));
// char* desc = avfilter_graph_dump(graph, NULL); // char* desc = libav().avfilter_graph_dump(graph, NULL);
// std::cerr << "Filter created:\n" << desc << std::endl; // std::cerr << "Filter created:\n" << desc << std::endl;
// av_free(static_cast<void*>(desc)); // libav().av_free(static_cast<void*>(desc));
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
...@@ -191,7 +194,8 @@ FilterGraphOutputInfo FilterGraph::get_output_info() const { ...@@ -191,7 +194,8 @@ FilterGraphOutputInfo FilterGraph::get_output_info() const {
ret.num_channels = l->ch_layout.nb_channels; ret.num_channels = l->ch_layout.nb_channels;
#else #else
// Before FFmpeg 5.1 // Before FFmpeg 5.1
ret.num_channels = av_get_channel_layout_nb_channels(l->channel_layout); ret.num_channels =
libav().av_get_channel_layout_nb_channels(l->channel_layout);
#endif #endif
break; break;
} }
...@@ -214,12 +218,12 @@ FilterGraphOutputInfo FilterGraph::get_output_info() const { ...@@ -214,12 +218,12 @@ FilterGraphOutputInfo FilterGraph::get_output_info() const {
// Streaming process // Streaming process
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
int FilterGraph::add_frame(AVFrame* pInputFrame) { int FilterGraph::add_frame(AVFrame* pInputFrame) {
return av_buffersrc_add_frame_flags( return libav().av_buffersrc_add_frame_flags(
buffersrc_ctx, pInputFrame, AV_BUFFERSRC_FLAG_KEEP_REF); buffersrc_ctx, pInputFrame, AV_BUFFERSRC_FLAG_KEEP_REF);
} }
int FilterGraph::get_frame(AVFrame* pOutputFrame) { int FilterGraph::get_frame(AVFrame* pOutputFrame) {
return av_buffersink_get_frame(buffersink_ctx, pOutputFrame); return libav().av_buffersink_get_frame(buffersink_ctx, pOutputFrame);
} }
} // namespace io } // namespace io
......
#include <torchaudio/csrc/ffmpeg/hw_context.h> #include <torchaudio/csrc/ffmpeg/hw_context.h>
#include <torchaudio/csrc/ffmpeg/libav.h>
namespace torchaudio::io { namespace torchaudio::io {
using detail::libav;
namespace { namespace {
static std::mutex MUTEX; static std::mutex MUTEX;
...@@ -15,7 +19,7 @@ AVBufferRef* get_cuda_context(int index) { ...@@ -15,7 +19,7 @@ AVBufferRef* get_cuda_context(int index) {
} }
if (CUDA_CONTEXT_CACHE.count(index) == 0) { if (CUDA_CONTEXT_CACHE.count(index) == 0) {
AVBufferRef* p = nullptr; AVBufferRef* p = nullptr;
int ret = av_hwdevice_ctx_create( int ret = libav().av_hwdevice_ctx_create(
&p, AV_HWDEVICE_TYPE_CUDA, std::to_string(index).c_str(), nullptr, 0); &p, AV_HWDEVICE_TYPE_CUDA, std::to_string(index).c_str(), nullptr, 0);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, ret >= 0,
......
#include <ATen/DynamicLibrary.h>
#include <c10/util/CallOnce.h>
#include <torchaudio/csrc/ffmpeg/libav.h>
extern "C" {
#include <libavcodec/version.h>
#include <libavdevice/version.h>
#include <libavfilter/version.h>
#include <libavformat/version.h>
#include <libavutil/version.h>
}
namespace torchaudio::io::detail {
namespace {
class LibAVImpl {
at::DynamicLibrary libavutil;
at::DynamicLibrary libavcodec;
at::DynamicLibrary libavformat;
at::DynamicLibrary libavdevice;
at::DynamicLibrary libavfilter;
public:
// The struct that holds all the function pointers to be used.
LibAV libav{};
LibAVImpl(
const char* util,
const char* codec,
const char* format,
const char* device,
const char* filter)
: libavutil(util),
libavcodec(codec),
libavformat(format),
libavdevice(device),
libavfilter(filter) {
#define set(X) this->libav.X = (decltype(LibAV::X))libavutil.sym(#X)
set(av_buffer_ref);
set(av_buffer_unref);
set(av_d2q);
set(av_dict_free);
set(av_dict_get);
set(av_dict_set);
set(av_frame_alloc);
set(av_frame_free);
set(av_frame_get_buffer);
set(av_frame_is_writable);
set(av_frame_make_writable);
set(av_frame_unref);
set(av_freep);
set(av_get_channel_layout_nb_channels);
set(av_get_channel_name);
set(av_get_default_channel_layout);
set(av_get_media_type_string);
set(av_get_pix_fmt);
set(av_get_pix_fmt_name);
set(av_get_sample_fmt);
set(av_get_sample_fmt_name);
set(av_get_time_base_q);
set(av_hwdevice_ctx_create);
set(av_hwframe_ctx_alloc);
set(av_hwframe_ctx_init);
set(av_hwframe_get_buffer);
set(av_log_get_level);
set(av_log_set_level);
set(av_malloc);
set(av_pix_fmt_desc_get);
set(av_rescale_q);
set(av_sample_fmt_is_planar);
set(av_strdup);
set(av_strerror);
set(avutil_version);
#undef set
#define set(X) this->libav.X = (decltype(LibAV::X))libavcodec.sym(#X)
set(av_codec_is_decoder);
set(av_codec_is_encoder);
set(av_codec_iterate);
set(av_packet_alloc);
set(av_packet_clone);
set(av_packet_free);
set(av_packet_ref);
set(av_packet_rescale_ts);
set(av_packet_unref);
set(avcodec_alloc_context3);
set(avcodec_configuration);
set(avcodec_descriptor_get);
set(avcodec_find_decoder);
set(avcodec_find_decoder_by_name);
set(avcodec_find_encoder);
set(avcodec_find_encoder_by_name);
set(avcodec_flush_buffers);
set(avcodec_free_context);
set(avcodec_get_hw_config);
set(avcodec_get_name);
set(avcodec_open2);
set(avcodec_parameters_alloc);
set(avcodec_parameters_copy);
set(avcodec_parameters_free);
set(avcodec_parameters_from_context);
set(avcodec_parameters_to_context);
set(avcodec_receive_frame);
set(avcodec_receive_packet);
set(avcodec_send_frame);
set(avcodec_send_packet);
set(avcodec_version);
#undef set
#define set(X) this->libav.X = (decltype(LibAV::X))libavformat.sym(#X)
set(av_demuxer_iterate);
set(av_dump_format);
set(av_find_best_stream);
set(av_find_input_format);
set(av_guess_frame_rate);
set(av_interleaved_write_frame);
set(av_muxer_iterate);
set(av_read_frame);
set(av_seek_frame);
set(av_write_trailer);
set(avio_alloc_context);
set(avio_enum_protocols);
set(avio_closep);
set(avio_flush);
set(avio_open2);
set(avformat_alloc_context);
set(avformat_alloc_output_context2);
set(avformat_close_input);
set(avformat_find_stream_info);
set(avformat_free_context);
set(avformat_new_stream);
set(avformat_open_input);
set(avformat_version);
set(avformat_write_header);
#undef set
#define set(X) this->libav.X = (decltype(LibAV::X))libavdevice.sym(#X)
set(avdevice_register_all);
set(avdevice_version);
#undef set
#define set(X) this->libav.X = (decltype(LibAV::X))libavfilter.sym(#X)
set(av_buffersink_get_frame);
set(av_buffersrc_add_frame_flags);
set(avfilter_get_by_name);
set(avfilter_graph_alloc);
set(avfilter_graph_config);
set(avfilter_graph_create_filter);
set(avfilter_graph_free);
set(avfilter_graph_parse_ptr);
set(avfilter_inout_alloc);
set(avfilter_inout_free);
set(avfilter_version);
#undef set
}
};
static std::unique_ptr<LibAVImpl> _libav;
void _load_libav() {
#if defined(_WIN32)
_libav = std::make_unique<LibAVImpl>(
"avutil-" AV_STRINGIFY(LIBAVUTIL_VERSION_MAJOR) ".dll",
"avcodec-" AV_STRINGIFY(LIBAVCODEC_VERSION_MAJOR) ".dll",
"avformat-" AV_STRINGIFY(LIBAVFORMAT_VERSION_MAJOR) ".dll",
"avdevice-" AV_STRINGIFY(LIBAVDEVICE_VERSION_MAJOR) ".dll",
"avfilter-" AV_STRINGIFY(LIBAVFILTER_VERSION_MAJOR) ".dll");
#elif defined(__APPLE__)
_libav = std::make_unique<LibAVImpl>(
"libavutil." AV_STRINGIFY(LIBAVUTIL_VERSION_MAJOR) ".dylib",
"libavcodec." AV_STRINGIFY(LIBAVCODEC_VERSION_MAJOR) ".dylib",
"libavformat." AV_STRINGIFY(LIBAVFORMAT_VERSION_MAJOR) ".dylib",
"libavdevice." AV_STRINGIFY(LIBAVDEVICE_VERSION_MAJOR) ".dylib",
"libavfilter." AV_STRINGIFY(LIBAVFILTER_VERSION_MAJOR) ".dylib");
#else
_libav = std::make_unique<LibAVImpl>(
"libavutil.so." AV_STRINGIFY(LIBAVUTIL_VERSION_MAJOR),
"libavcodec.so." AV_STRINGIFY(LIBAVCODEC_VERSION_MAJOR),
"libavformat.so." AV_STRINGIFY(LIBAVFORMAT_VERSION_MAJOR),
"libavdevice.so." AV_STRINGIFY(LIBAVDEVICE_VERSION_MAJOR),
"libavfilter.so." AV_STRINGIFY(LIBAVFILTER_VERSION_MAJOR));
#endif
}
} // namespace
LibAV& libav() {
static c10::once_flag init_flag;
c10::call_once(init_flag, _load_libav);
return _libav->libav;
}
} // namespace torchaudio::io::detail
#pragma once
extern "C" {
#include <libavcodec/avcodec.h>
#include <libavfilter/avfilter.h>
#include <libavformat/avformat.h>
#include <libavutil/avutil.h>
#include <libavutil/pixdesc.h>
}
#include <torchaudio/csrc/ffmpeg/macro.h>
namespace torchaudio::io::detail {
struct LibAV {
/////////////////////////////////////////////////////////////////////////////
// libavutil
/////////////////////////////////////////////////////////////////////////////
AVBufferRef* (*av_buffer_ref)(const AVBufferRef*);
void (*av_buffer_unref)(AVBufferRef**);
AVRational (*av_d2q)(double, int) av_const;
void (*av_dict_free)(AVDictionary**);
AVDictionaryEntry* (*av_dict_get)(
const AVDictionary*,
const char*,
const AVDictionaryEntry*,
int);
int (*av_dict_set)(AVDictionary**, const char*, const char*, int);
AVFrame* (*av_frame_alloc)();
void (*av_frame_free)(AVFrame**);
int (*av_frame_get_buffer)(AVFrame*, int);
int (*av_frame_is_writable)(AVFrame*);
int (*av_frame_make_writable)(AVFrame*);
void (*av_frame_unref)(AVFrame*);
void (*av_freep)(void*);
int (*av_get_channel_layout_nb_channels)(uint64_t);
const char* (*av_get_channel_name)(uint64_t);
int64_t (*av_get_default_channel_layout)(int);
const char* (*av_get_media_type_string)(enum AVMediaType);
enum AVPixelFormat (*av_get_pix_fmt)(const char*);
const char* (*av_get_pix_fmt_name)(enum AVPixelFormat);
enum AVSampleFormat (*av_get_sample_fmt)(const char*);
const char* (*av_get_sample_fmt_name)(enum AVSampleFormat);
AVRational (*av_get_time_base_q)();
int (*av_hwdevice_ctx_create)(
AVBufferRef**,
enum AVHWDeviceType,
const char*,
AVDictionary*,
int);
AVBufferRef* (*av_hwframe_ctx_alloc)(AVBufferRef*);
int (*av_hwframe_ctx_init)(AVBufferRef*);
int (*av_hwframe_get_buffer)(AVBufferRef*, AVFrame*, int);
int (*av_log_get_level)();
void (*av_log_set_level)(int);
void* (*av_malloc)(size_t);
const AVPixFmtDescriptor* (*av_pix_fmt_desc_get)(enum AVPixelFormat);
int64_t (*av_rescale_q)(int64_t, AVRational, AVRational) av_const;
int (*av_sample_fmt_is_planar)(enum AVSampleFormat);
char* (*av_strdup)(const char*);
int (*av_strerror)(int, char*, size_t);
unsigned (*avutil_version)();
/////////////////////////////////////////////////////////////////////////////
// libavcodec
/////////////////////////////////////////////////////////////////////////////
int (*av_codec_is_decoder)(const AVCodec*);
int (*av_codec_is_encoder)(const AVCodec*);
const AVCodec* (*av_codec_iterate)(void**);
AVPacket* (*av_packet_alloc)();
AVPacket* (*av_packet_clone)(const AVPacket*);
void (*av_packet_free)(AVPacket**);
int (*av_packet_ref)(AVPacket*, const AVPacket*);
void (*av_packet_rescale_ts)(AVPacket*, AVRational, AVRational);
void (*av_packet_unref)(AVPacket*);
AVCodecContext* (*avcodec_alloc_context3)(const AVCodec*);
const char* (*avcodec_configuration)();
const AVCodecDescriptor* (*avcodec_descriptor_get)(enum AVCodecID);
AVCodec* (*avcodec_find_decoder)(enum AVCodecID);
AVCodec* (*avcodec_find_decoder_by_name)(const char*);
AVCodec* (*avcodec_find_encoder)(enum AVCodecID);
AVCodec* (*avcodec_find_encoder_by_name)(const char*);
void (*avcodec_flush_buffers)(AVCodecContext*);
void (*avcodec_free_context)(AVCodecContext**);
const AVCodecHWConfig* (*avcodec_get_hw_config)(const AVCodec*, int);
const char* (*avcodec_get_name)(enum AVCodecID);
int (*avcodec_open2)(AVCodecContext*, const AVCodec*, AVDictionary**);
AVCodecParameters* (*avcodec_parameters_alloc)();
int (*avcodec_parameters_copy)(AVCodecParameters*, const AVCodecParameters*);
void (*avcodec_parameters_free)(AVCodecParameters**);
int (*avcodec_parameters_from_context)(
AVCodecParameters*,
const AVCodecContext*);
int (*avcodec_parameters_to_context)(
AVCodecContext*,
const AVCodecParameters*);
int (*avcodec_receive_frame)(AVCodecContext*, AVFrame*);
int (*avcodec_receive_packet)(AVCodecContext*, AVPacket*);
int (*avcodec_send_frame)(AVCodecContext*, const AVFrame*);
int (*avcodec_send_packet)(AVCodecContext*, const AVPacket*);
unsigned (*avcodec_version)();
/////////////////////////////////////////////////////////////////////////////
// libavformat
/////////////////////////////////////////////////////////////////////////////
const AVInputFormat* (*av_demuxer_iterate)(void**);
void (*av_dump_format)(AVFormatContext*, int, const char*, int);
int (*av_find_best_stream)(
AVFormatContext*,
enum AVMediaType,
int,
int,
AVCodec**,
int);
AVInputFormat* (*av_find_input_format)(const char*);
AVRational (*av_guess_frame_rate)(AVFormatContext*, AVStream*, AVFrame*);
int (*av_interleaved_write_frame)(AVFormatContext*, AVPacket*);
const AVOutputFormat* (*av_muxer_iterate)(void**);
int (*av_read_frame)(AVFormatContext*, AVPacket*);
int (*av_seek_frame)(AVFormatContext*, int, int64_t, int);
int (*av_write_trailer)(AVFormatContext* s);
AVIOContext* (*avio_alloc_context)(
unsigned char*,
int,
int,
void*,
int (*)(void*, uint8_t*, int),
int (*)(void*, uint8_t*, int),
int64_t (*)(void*, int64_t, int));
const char* (*avio_enum_protocols)(void**, int);
int (*avio_closep)(AVIOContext**);
void (*avio_flush)(AVIOContext*);
int (*avio_open2)(
AVIOContext**,
const char*,
int,
const AVIOInterruptCB*,
AVDictionary**);
AVFormatContext* (*avformat_alloc_context)();
int (*avformat_alloc_output_context2)(
AVFormatContext**,
AVOutputFormat*,
const char*,
const char*);
void (*avformat_close_input)(AVFormatContext**);
int (*avformat_find_stream_info)(AVFormatContext*, AVDictionary**);
void (*avformat_free_context)(AVFormatContext*);
AVStream* (*avformat_new_stream)(AVFormatContext*, const AVCodec*);
int (*avformat_open_input)(
AVFormatContext**,
const char*,
AVFORMAT_CONST AVInputFormat*,
AVDictionary**);
unsigned (*avformat_version)();
int (*avformat_write_header)(AVFormatContext*, AVDictionary**);
/////////////////////////////////////////////////////////////////////////////
// libavdevice
/////////////////////////////////////////////////////////////////////////////
void (*avdevice_register_all)();
unsigned (*avdevice_version)();
/////////////////////////////////////////////////////////////////////////////
// libavfilter
/////////////////////////////////////////////////////////////////////////////
int (*av_buffersink_get_frame)(AVFilterContext*, AVFrame*);
int (*av_buffersrc_add_frame_flags)(AVFilterContext*, AVFrame*, int);
const AVFilter* (*avfilter_get_by_name)(const char*);
AVFilterGraph* (*avfilter_graph_alloc)();
int (*avfilter_graph_config)(AVFilterGraph*, void*);
int (*avfilter_graph_create_filter)(
AVFilterContext**,
const AVFilter*,
const char*,
const char*,
void*,
AVFilterGraph*);
void (*avfilter_graph_free)(AVFilterGraph**);
int (*avfilter_graph_parse_ptr)(
AVFilterGraph*,
const char*,
AVFilterInOut**,
AVFilterInOut**,
void*);
AVFilterInOut* (*avfilter_inout_alloc)();
void (*avfilter_inout_free)(AVFilterInOut**);
unsigned (*avfilter_version)();
};
// Fetch handler for dlopen-ed FFmpeg libraries.
LibAV& libav();
} // namespace torchaudio::io::detail
#pragma once
extern "C" {
#include <libavformat/version.h>
}
#ifndef LIBAVFORMAT_VERSION_MAJOR
#error LIBAVFORMAT_VERSION_MAJOR is not defined.
#endif
#if LIBAVFORMAT_VERSION_MAJOR >= 59
#define AVFORMAT_CONST const
#else
#define AVFORMAT_CONST
#endif
#include <torch/extension.h> #include <torch/extension.h>
#include <torchaudio/csrc/ffmpeg/hw_context.h> #include <torchaudio/csrc/ffmpeg/hw_context.h>
#include <torchaudio/csrc/ffmpeg/libav.h>
#include <torchaudio/csrc/ffmpeg/stream_reader/stream_reader.h> #include <torchaudio/csrc/ffmpeg/stream_reader/stream_reader.h>
#include <torchaudio/csrc/ffmpeg/stream_writer/stream_writer.h> #include <torchaudio/csrc/ffmpeg/stream_writer/stream_writer.h>
namespace torchaudio { namespace torchaudio {
namespace io { namespace io {
using detail::libav;
namespace { namespace {
std::map<std::string, std::tuple<int64_t, int64_t, int64_t>> get_versions() { std::map<std::string, std::tuple<int64_t, int64_t, int64_t>> get_versions() {
std::map<std::string, std::tuple<int64_t, int64_t, int64_t>> ret; std::map<std::string, std::tuple<int64_t, int64_t, int64_t>> ret;
#define add_version(NAME) \ #define add_version(NAME) \
{ \ { \
int ver = NAME##_version(); \ int ver = libav().NAME##_version(); \
ret.emplace( \ ret.emplace( \
"lib" #NAME, \ "lib" #NAME, \
std::make_tuple<>( \ std::make_tuple<>( \
AV_VERSION_MAJOR(ver), \ AV_VERSION_MAJOR(ver), \
AV_VERSION_MINOR(ver), \ AV_VERSION_MINOR(ver), \
AV_VERSION_MICRO(ver))); \ AV_VERSION_MICRO(ver))); \
} }
add_version(avutil); add_version(avutil);
...@@ -35,7 +39,7 @@ std::map<std::string, std::string> get_demuxers(bool req_device) { ...@@ -35,7 +39,7 @@ std::map<std::string, std::string> get_demuxers(bool req_device) {
std::map<std::string, std::string> ret; std::map<std::string, std::string> ret;
const AVInputFormat* fmt = nullptr; const AVInputFormat* fmt = nullptr;
void* i = nullptr; void* i = nullptr;
while ((fmt = av_demuxer_iterate(&i))) { while ((fmt = libav().av_demuxer_iterate(&i))) {
assert(fmt); assert(fmt);
bool is_device = [&]() { bool is_device = [&]() {
const AVClass* avclass = fmt->priv_class; const AVClass* avclass = fmt->priv_class;
...@@ -52,7 +56,7 @@ std::map<std::string, std::string> get_muxers(bool req_device) { ...@@ -52,7 +56,7 @@ std::map<std::string, std::string> get_muxers(bool req_device) {
std::map<std::string, std::string> ret; std::map<std::string, std::string> ret;
const AVOutputFormat* fmt = nullptr; const AVOutputFormat* fmt = nullptr;
void* i = nullptr; void* i = nullptr;
while ((fmt = av_muxer_iterate(&i))) { while ((fmt = libav().av_muxer_iterate(&i))) {
assert(fmt); assert(fmt);
bool is_device = [&]() { bool is_device = [&]() {
const AVClass* avclass = fmt->priv_class; const AVClass* avclass = fmt->priv_class;
...@@ -71,10 +75,10 @@ std::map<std::string, std::string> get_codecs( ...@@ -71,10 +75,10 @@ std::map<std::string, std::string> get_codecs(
const AVCodec* c = nullptr; const AVCodec* c = nullptr;
void* i = nullptr; void* i = nullptr;
std::map<std::string, std::string> ret; std::map<std::string, std::string> ret;
while ((c = av_codec_iterate(&i))) { while ((c = libav().av_codec_iterate(&i))) {
assert(c); assert(c);
if ((req_encoder && av_codec_is_encoder(c)) || if ((req_encoder && libav().av_codec_is_encoder(c)) ||
(!req_encoder && av_codec_is_decoder(c))) { (!req_encoder && libav().av_codec_is_decoder(c))) {
if (c->type == type && c->name) { if (c->type == type && c->name) {
ret.emplace(c->name, c->long_name ? c->long_name : ""); ret.emplace(c->name, c->long_name ? c->long_name : "");
} }
...@@ -87,7 +91,7 @@ std::vector<std::string> get_protocols(bool output) { ...@@ -87,7 +91,7 @@ std::vector<std::string> get_protocols(bool output) {
void* opaque = nullptr; void* opaque = nullptr;
const char* name = nullptr; const char* name = nullptr;
std::vector<std::string> ret; std::vector<std::string> ret;
while ((name = avio_enum_protocols(&opaque, output))) { while ((name = libav().avio_enum_protocols(&opaque, output))) {
assert(name); assert(name);
ret.emplace_back(name); ret.emplace_back(name);
} }
...@@ -95,7 +99,7 @@ std::vector<std::string> get_protocols(bool output) { ...@@ -95,7 +99,7 @@ std::vector<std::string> get_protocols(bool output) {
} }
std::string get_build_config() { std::string get_build_config() {
return avcodec_configuration(); return libav().avcodec_configuration();
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
...@@ -188,9 +192,9 @@ struct StreamWriterFileObj : private FileObj, public StreamWriterCustomIO { ...@@ -188,9 +192,9 @@ struct StreamWriterFileObj : private FileObj, public StreamWriterCustomIO {
}; };
PYBIND11_MODULE(_torchaudio_ffmpeg, m) { PYBIND11_MODULE(_torchaudio_ffmpeg, m) {
m.def("init", []() { avdevice_register_all(); }); m.def("init", []() { libav().avdevice_register_all(); });
m.def("get_log_level", []() { return av_log_get_level(); }); m.def("get_log_level", []() { return libav().av_log_get_level(); });
m.def("set_log_level", [](int level) { av_log_set_level(level); }); m.def("set_log_level", [](int level) { libav().av_log_set_level(level); });
m.def("get_versions", &get_versions); m.def("get_versions", &get_versions);
m.def("get_muxers", []() { return get_muxers(false); }); m.def("get_muxers", []() { return get_muxers(false); });
m.def("get_demuxers", []() { return get_demuxers(false); }); m.def("get_demuxers", []() { return get_demuxers(false); });
...@@ -246,21 +250,22 @@ PYBIND11_MODULE(_torchaudio_ffmpeg, m) { ...@@ -246,21 +250,22 @@ PYBIND11_MODULE(_torchaudio_ffmpeg, m) {
.def_property_readonly( .def_property_readonly(
"media_type", "media_type",
[](const OutputStreamInfo& o) -> std::string { [](const OutputStreamInfo& o) -> std::string {
return av_get_media_type_string(o.media_type); return libav().av_get_media_type_string(o.media_type);
}) })
.def_property_readonly( .def_property_readonly(
"format", "format",
[](const OutputStreamInfo& o) -> std::string { [](const OutputStreamInfo& o) -> std::string {
switch (o.media_type) { switch (o.media_type) {
case AVMEDIA_TYPE_AUDIO: case AVMEDIA_TYPE_AUDIO:
return av_get_sample_fmt_name((AVSampleFormat)(o.format)); return libav().av_get_sample_fmt_name(
(AVSampleFormat)(o.format));
case AVMEDIA_TYPE_VIDEO: case AVMEDIA_TYPE_VIDEO:
return av_get_pix_fmt_name((AVPixelFormat)(o.format)); return libav().av_get_pix_fmt_name((AVPixelFormat)(o.format));
default: default:
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
false, false,
"FilterGraph is returning unexpected media type: ", "FilterGraph is returning unexpected media type: ",
av_get_media_type_string(o.media_type)); libav().av_get_media_type_string(o.media_type));
} }
}) })
.def_readonly("sample_rate", &OutputStreamInfo::sample_rate) .def_readonly("sample_rate", &OutputStreamInfo::sample_rate)
...@@ -284,7 +289,7 @@ PYBIND11_MODULE(_torchaudio_ffmpeg, m) { ...@@ -284,7 +289,7 @@ PYBIND11_MODULE(_torchaudio_ffmpeg, m) {
.def_property_readonly( .def_property_readonly(
"media_type", "media_type",
[](const SrcStreamInfo& s) { [](const SrcStreamInfo& s) {
return av_get_media_type_string(s.media_type); return libav().av_get_media_type_string(s.media_type);
}) })
.def_readonly("codec_name", &SrcStreamInfo::codec_name) .def_readonly("codec_name", &SrcStreamInfo::codec_name)
.def_readonly("codec_long_name", &SrcStreamInfo::codec_long_name) .def_readonly("codec_long_name", &SrcStreamInfo::codec_long_name)
......
#include <torch/torch.h> #include <torch/torch.h>
#include <torchaudio/csrc/ffmpeg/libav.h>
#include <torchaudio/csrc/ffmpeg/stream_reader/conversion.h> #include <torchaudio/csrc/ffmpeg/stream_reader/conversion.h>
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -7,6 +8,8 @@ ...@@ -7,6 +8,8 @@
namespace torchaudio::io { namespace torchaudio::io {
using detail::libav;
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Audio // Audio
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
...@@ -429,11 +432,11 @@ void NV12CudaConverter::convert(const AVFrame* src, torch::Tensor& dst) { ...@@ -429,11 +432,11 @@ void NV12CudaConverter::convert(const AVFrame* src, torch::Tensor& dst) {
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
AV_PIX_FMT_CUDA == fmt, AV_PIX_FMT_CUDA == fmt,
"Expected CUDA frame. Found: ", "Expected CUDA frame. Found: ",
av_get_pix_fmt_name(fmt)); libav().av_get_pix_fmt_name(fmt));
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
AV_PIX_FMT_NV12 == sw_fmt, AV_PIX_FMT_NV12 == sw_fmt,
"Expected NV12 format. Found: ", "Expected NV12 format. Found: ",
av_get_pix_fmt_name(sw_fmt)); libav().av_get_pix_fmt_name(sw_fmt));
// Write Y plane directly // Write Y plane directly
auto status = cudaMemcpy2D( auto status = cudaMemcpy2D(
...@@ -506,11 +509,11 @@ void P010CudaConverter::convert(const AVFrame* src, torch::Tensor& dst) { ...@@ -506,11 +509,11 @@ void P010CudaConverter::convert(const AVFrame* src, torch::Tensor& dst) {
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
AV_PIX_FMT_CUDA == fmt, AV_PIX_FMT_CUDA == fmt,
"Expected CUDA frame. Found: ", "Expected CUDA frame. Found: ",
av_get_pix_fmt_name(fmt)); libav().av_get_pix_fmt_name(fmt));
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
AV_PIX_FMT_P010 == sw_fmt, AV_PIX_FMT_P010 == sw_fmt,
"Expected P010 format. Found: ", "Expected P010 format. Found: ",
av_get_pix_fmt_name(sw_fmt)); libav().av_get_pix_fmt_name(sw_fmt));
// Write Y plane directly // Write Y plane directly
auto status = cudaMemcpy2D( auto status = cudaMemcpy2D(
...@@ -581,11 +584,11 @@ void YUV444PCudaConverter::convert(const AVFrame* src, torch::Tensor& dst) { ...@@ -581,11 +584,11 @@ void YUV444PCudaConverter::convert(const AVFrame* src, torch::Tensor& dst) {
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
AV_PIX_FMT_CUDA == fmt, AV_PIX_FMT_CUDA == fmt,
"Expected CUDA frame. Found: ", "Expected CUDA frame. Found: ",
av_get_pix_fmt_name(fmt)); libav().av_get_pix_fmt_name(fmt));
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
AV_PIX_FMT_YUV444P == sw_fmt, AV_PIX_FMT_YUV444P == sw_fmt,
"Expected YUV444P format. Found: ", "Expected YUV444P format. Found: ",
av_get_pix_fmt_name(sw_fmt)); libav().av_get_pix_fmt_name(sw_fmt));
// Write Y plane directly // Write Y plane directly
for (int i = 0; i < num_channels; ++i) { for (int i = 0; i < num_channels; ++i) {
......
#include <torchaudio/csrc/ffmpeg/libav.h>
#include <torchaudio/csrc/ffmpeg/stream_reader/packet_buffer.h> #include <torchaudio/csrc/ffmpeg/stream_reader/packet_buffer.h>
namespace torchaudio { namespace torchaudio {
namespace io { namespace io {
using detail::libav;
void PacketBuffer::push_packet(AVPacket* packet) { void PacketBuffer::push_packet(AVPacket* packet) {
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(packet, "Packet is null."); TORCH_INTERNAL_ASSERT_DEBUG_ONLY(packet, "Packet is null.");
AVPacket* p = av_packet_clone(packet); AVPacket* p = libav().av_packet_clone(packet);
TORCH_INTERNAL_ASSERT(p, "Failed to clone packet."); TORCH_INTERNAL_ASSERT(p, "Failed to clone packet.");
packets.emplace_back(p); packets.emplace_back(p);
} }
......
#include <torchaudio/csrc/ffmpeg/libav.h>
#include <torchaudio/csrc/ffmpeg/stream_reader/buffer/chunked_buffer.h> #include <torchaudio/csrc/ffmpeg/stream_reader/buffer/chunked_buffer.h>
#include <torchaudio/csrc/ffmpeg/stream_reader/buffer/unchunked_buffer.h> #include <torchaudio/csrc/ffmpeg/stream_reader/buffer/unchunked_buffer.h>
#include <torchaudio/csrc/ffmpeg/stream_reader/conversion.h> #include <torchaudio/csrc/ffmpeg/stream_reader/conversion.h>
...@@ -5,6 +6,9 @@ ...@@ -5,6 +6,9 @@
namespace torchaudio::io { namespace torchaudio::io {
namespace detail { namespace detail {
using detail::libav;
namespace { namespace {
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
...@@ -48,7 +52,7 @@ FilterGraphFactory get_video_factory( ...@@ -48,7 +52,7 @@ FilterGraphFactory get_video_factory(
f.add_video_sink(); f.add_video_sink();
f.add_process(filter_desc); f.add_process(filter_desc);
if (hw_frames_ctx) { if (hw_frames_ctx) {
f.create_filter(av_buffer_ref(hw_frames_ctx)); f.create_filter(libav().av_buffer_ref(hw_frames_ctx));
} else { } else {
f.create_filter(); f.create_filter();
} }
...@@ -139,7 +143,7 @@ struct ProcessImpl : public IPostDecodeProcess { ...@@ -139,7 +143,7 @@ struct ProcessImpl : public IPostDecodeProcess {
if (ret >= 0) { if (ret >= 0) {
buffer.push_frame(converter.convert(frame), frame->pts); buffer.push_frame(converter.convert(frame), frame->pts);
} }
av_frame_unref(frame); libav().av_frame_unref(frame);
} }
return ret; return ret;
} }
...@@ -159,7 +163,7 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_audio_process( ...@@ -159,7 +163,7 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_audio_process(
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
i.type == AVMEDIA_TYPE_AUDIO, i.type == AVMEDIA_TYPE_AUDIO,
"Unsupported media type found: ", "Unsupported media type found: ",
av_get_media_type_string(i.type)); libav().av_get_media_type_string(i.type));
using B = UnchunkedBuffer; using B = UnchunkedBuffer;
...@@ -226,7 +230,7 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_audio_process( ...@@ -226,7 +230,7 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_audio_process(
} }
default: default:
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
false, "Unexpected audio type:", av_get_sample_fmt_name(fmt)); false, "Unexpected audio type:", libav().av_get_sample_fmt_name(fmt));
} }
} }
...@@ -239,7 +243,7 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_audio_process( ...@@ -239,7 +243,7 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_audio_process(
TORCH_INTERNAL_ASSERT_DEBUG_ONLY( TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
i.type == AVMEDIA_TYPE_AUDIO, i.type == AVMEDIA_TYPE_AUDIO,
"Unsupported media type found: ", "Unsupported media type found: ",
av_get_media_type_string(i.type)); libav().av_get_media_type_string(i.type));
using B = ChunkedBuffer; using B = ChunkedBuffer;
B buffer{i.time_base, frames_per_chunk, num_chunks}; B buffer{i.time_base, frames_per_chunk, num_chunks};
...@@ -307,7 +311,7 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_audio_process( ...@@ -307,7 +311,7 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_audio_process(
} }
default: default:
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
false, "Unexpected audio type:", av_get_sample_fmt_name(fmt)); false, "Unexpected audio type:", libav().av_get_sample_fmt_name(fmt));
} }
} }
...@@ -321,7 +325,7 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_video_process( ...@@ -321,7 +325,7 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_video_process(
TORCH_INTERNAL_ASSERT_DEBUG_ONLY( TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
i.type == AVMEDIA_TYPE_VIDEO, i.type == AVMEDIA_TYPE_VIDEO,
"Unsupported media type found: ", "Unsupported media type found: ",
av_get_media_type_string(i.type)); libav().av_get_media_type_string(i.type));
auto h = i.height; auto h = i.height;
auto w = i.width; auto w = i.width;
...@@ -375,7 +379,9 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_video_process( ...@@ -375,7 +379,9 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_video_process(
} }
default: { default: {
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
false, "Unexpected video format found: ", av_get_pix_fmt_name(fmt)); false,
"Unexpected video format found: ",
libav().av_get_pix_fmt_name(fmt));
} }
} }
} }
...@@ -393,7 +399,7 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_cuda_video_process( ...@@ -393,7 +399,7 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_cuda_video_process(
TORCH_INTERNAL_ASSERT_DEBUG_ONLY( TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
i.type == AVMEDIA_TYPE_VIDEO, i.type == AVMEDIA_TYPE_VIDEO,
"Unsupported media type found: ", "Unsupported media type found: ",
av_get_media_type_string(i.type)); libav().av_get_media_type_string(i.type));
using B = UnchunkedBuffer; using B = UnchunkedBuffer;
switch (auto fmt = (AVPixelFormat)i.format; fmt) { switch (auto fmt = (AVPixelFormat)i.format; fmt) {
...@@ -416,13 +422,13 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_cuda_video_process( ...@@ -416,13 +422,13 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_cuda_video_process(
TORCH_CHECK( TORCH_CHECK(
false, false,
"Unsupported video format found in CUDA HW: ", "Unsupported video format found in CUDA HW: ",
av_get_pix_fmt_name(fmt)); libav().av_get_pix_fmt_name(fmt));
} }
default: { default: {
TORCH_CHECK( TORCH_CHECK(
false, false,
"Unexpected video format found in CUDA HW: ", "Unexpected video format found in CUDA HW: ",
av_get_pix_fmt_name(fmt)); libav().av_get_pix_fmt_name(fmt));
} }
} }
#endif #endif
...@@ -437,7 +443,7 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_video_process( ...@@ -437,7 +443,7 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_video_process(
TORCH_INTERNAL_ASSERT_DEBUG_ONLY( TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
i.type == AVMEDIA_TYPE_VIDEO, i.type == AVMEDIA_TYPE_VIDEO,
"Unsupported media type found: ", "Unsupported media type found: ",
av_get_media_type_string(i.type)); libav().av_get_media_type_string(i.type));
auto h = i.height; auto h = i.height;
auto w = i.width; auto w = i.width;
...@@ -491,7 +497,9 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_video_process( ...@@ -491,7 +497,9 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_video_process(
} }
default: { default: {
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
false, "Unexpected video format found: ", av_get_pix_fmt_name(fmt)); false,
"Unexpected video format found: ",
libav().av_get_pix_fmt_name(fmt));
} }
} }
} }
...@@ -511,7 +519,7 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_cuda_video_process( ...@@ -511,7 +519,7 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_cuda_video_process(
TORCH_INTERNAL_ASSERT_DEBUG_ONLY( TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
i.type == AVMEDIA_TYPE_VIDEO, i.type == AVMEDIA_TYPE_VIDEO,
"Unsupported media type found: ", "Unsupported media type found: ",
av_get_media_type_string(i.type)); libav().av_get_media_type_string(i.type));
using B = ChunkedBuffer; using B = ChunkedBuffer;
switch (auto fmt = (AVPixelFormat)i.format; fmt) { switch (auto fmt = (AVPixelFormat)i.format; fmt) {
...@@ -540,13 +548,13 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_cuda_video_process( ...@@ -540,13 +548,13 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_cuda_video_process(
TORCH_CHECK( TORCH_CHECK(
false, false,
"Unsupported video format found in CUDA HW: ", "Unsupported video format found in CUDA HW: ",
av_get_pix_fmt_name(fmt)); libav().av_get_pix_fmt_name(fmt));
} }
default: { default: {
TORCH_CHECK( TORCH_CHECK(
false, false,
"Unexpected video format found in CUDA HW: ", "Unexpected video format found in CUDA HW: ",
av_get_pix_fmt_name(fmt)); libav().av_get_pix_fmt_name(fmt));
} }
} }
#endif #endif
......
#include <torchaudio/csrc/ffmpeg/hw_context.h> #include <torchaudio/csrc/ffmpeg/hw_context.h>
#include <torchaudio/csrc/ffmpeg/libav.h>
#include <torchaudio/csrc/ffmpeg/stream_reader/stream_processor.h> #include <torchaudio/csrc/ffmpeg/stream_reader/stream_processor.h>
#include <stdexcept> #include <stdexcept>
#include <string_view> #include <string_view>
...@@ -6,6 +7,8 @@ ...@@ -6,6 +7,8 @@
namespace torchaudio { namespace torchaudio {
namespace io { namespace io {
using detail::libav;
namespace { namespace {
AVCodecContextPtr alloc_codec_context( AVCodecContextPtr alloc_codec_context(
enum AVCodecID codec_id, enum AVCodecID codec_id,
...@@ -13,24 +16,24 @@ AVCodecContextPtr alloc_codec_context( ...@@ -13,24 +16,24 @@ AVCodecContextPtr alloc_codec_context(
const AVCodec* codec = [&]() { const AVCodec* codec = [&]() {
if (decoder_name) { if (decoder_name) {
const AVCodec* c = const AVCodec* c =
avcodec_find_decoder_by_name(decoder_name.value().c_str()); libav().avcodec_find_decoder_by_name(decoder_name.value().c_str());
TORCH_CHECK(c, "Unsupported codec: ", decoder_name.value()); TORCH_CHECK(c, "Unsupported codec: ", decoder_name.value());
return c; return c;
} else { } else {
const AVCodec* c = avcodec_find_decoder(codec_id); const AVCodec* c = libav().avcodec_find_decoder(codec_id);
TORCH_CHECK(c, "Unsupported codec: ", avcodec_get_name(codec_id)); TORCH_CHECK(c, "Unsupported codec: ", libav().avcodec_get_name(codec_id));
return c; return c;
} }
}(); }();
AVCodecContext* codec_ctx = avcodec_alloc_context3(codec); AVCodecContext* codec_ctx = libav().avcodec_alloc_context3(codec);
TORCH_CHECK(codec_ctx, "Failed to allocate CodecContext."); TORCH_CHECK(codec_ctx, "Failed to allocate CodecContext.");
return AVCodecContextPtr(codec_ctx); return AVCodecContextPtr(codec_ctx);
} }
const AVCodecHWConfig* get_cuda_config(const AVCodec* codec) { const AVCodecHWConfig* get_cuda_config(const AVCodec* codec) {
for (int i = 0;; ++i) { for (int i = 0;; ++i) {
const AVCodecHWConfig* config = avcodec_get_hw_config(codec, i); const AVCodecHWConfig* config = libav().avcodec_get_hw_config(codec, i);
if (!config) { if (!config) {
break; break;
} }
...@@ -83,7 +86,7 @@ enum AVPixelFormat get_hw_format( ...@@ -83,7 +86,7 @@ enum AVPixelFormat get_hw_format(
} }
AVBufferRef* get_hw_frames_ctx(AVCodecContext* codec_ctx) { AVBufferRef* get_hw_frames_ctx(AVCodecContext* codec_ctx) {
AVBufferRef* p = av_hwframe_ctx_alloc(codec_ctx->hw_device_ctx); AVBufferRef* p = libav().av_hwframe_ctx_alloc(codec_ctx->hw_device_ctx);
TORCH_CHECK( TORCH_CHECK(
p, p,
"Failed to allocate CUDA frame context from device context at ", "Failed to allocate CUDA frame context from device context at ",
...@@ -94,11 +97,11 @@ AVBufferRef* get_hw_frames_ctx(AVCodecContext* codec_ctx) { ...@@ -94,11 +97,11 @@ AVBufferRef* get_hw_frames_ctx(AVCodecContext* codec_ctx) {
frames_ctx->width = codec_ctx->width; frames_ctx->width = codec_ctx->width;
frames_ctx->height = codec_ctx->height; frames_ctx->height = codec_ctx->height;
frames_ctx->initial_pool_size = 5; frames_ctx->initial_pool_size = 5;
int ret = av_hwframe_ctx_init(p); int ret = libav().av_hwframe_ctx_init(p);
if (ret >= 0) { if (ret >= 0) {
return p; return p;
} }
av_buffer_unref(&p); libav().av_buffer_unref(&p);
TORCH_CHECK( TORCH_CHECK(
false, "Failed to initialize CUDA frame context: ", av_err2string(ret)); false, "Failed to initialize CUDA frame context: ", av_err2string(ret));
} }
...@@ -107,7 +110,7 @@ void configure_codec_context( ...@@ -107,7 +110,7 @@ void configure_codec_context(
AVCodecContext* codec_ctx, AVCodecContext* codec_ctx,
const AVCodecParameters* params, const AVCodecParameters* params,
const torch::Device& device) { const torch::Device& device) {
int ret = avcodec_parameters_to_context(codec_ctx, params); int ret = libav().avcodec_parameters_to_context(codec_ctx, params);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, "Failed to set CodecContext parameter: ", av_err2string(ret)); ret >= 0, "Failed to set CodecContext parameter: ", av_err2string(ret));
...@@ -122,7 +125,8 @@ void configure_codec_context( ...@@ -122,7 +125,8 @@ void configure_codec_context(
// 2. Set pCodecContext->get_format call back function which // 2. Set pCodecContext->get_format call back function which
// will retrieve the HW pixel format from opaque pointer. // will retrieve the HW pixel format from opaque pointer.
codec_ctx->get_format = get_hw_format; codec_ctx->get_format = get_hw_format;
codec_ctx->hw_device_ctx = av_buffer_ref(get_cuda_context(device.index())); codec_ctx->hw_device_ctx =
libav().av_buffer_ref(get_cuda_context(device.index()));
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
codec_ctx->hw_device_ctx, "Failed to reference HW device context."); codec_ctx->hw_device_ctx, "Failed to reference HW device context.");
#endif #endif
...@@ -135,16 +139,16 @@ void open_codec( ...@@ -135,16 +139,16 @@ void open_codec(
AVDictionary* opts = get_option_dict(decoder_option); AVDictionary* opts = get_option_dict(decoder_option);
// Default to single thread execution. // Default to single thread execution.
if (!av_dict_get(opts, "threads", nullptr, 0)) { if (!libav().av_dict_get(opts, "threads", nullptr, 0)) {
av_dict_set(&opts, "threads", "1", 0); libav().av_dict_set(&opts, "threads", "1", 0);
} }
if (!codec_ctx->channel_layout) { if (!codec_ctx->channel_layout) {
codec_ctx->channel_layout = codec_ctx->channel_layout =
av_get_default_channel_layout(codec_ctx->channels); libav().av_get_default_channel_layout(codec_ctx->channels);
} }
int ret = avcodec_open2(codec_ctx, codec_ctx->codec, &opts); int ret = libav().avcodec_open2(codec_ctx, codec_ctx->codec, &opts);
clean_up_dict(opts); clean_up_dict(opts);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, "Failed to initialize CodecContext: ", av_err2string(ret)); ret >= 0, "Failed to initialize CodecContext: ", av_err2string(ret));
...@@ -259,8 +263,8 @@ void StreamProcessor::remove_stream(KeyType key) { ...@@ -259,8 +263,8 @@ void StreamProcessor::remove_stream(KeyType key) {
void StreamProcessor::set_discard_timestamp(int64_t timestamp) { void StreamProcessor::set_discard_timestamp(int64_t timestamp) {
TORCH_CHECK(timestamp >= 0, "timestamp must be non-negative."); TORCH_CHECK(timestamp >= 0, "timestamp must be non-negative.");
discard_before_pts = discard_before_pts = libav().av_rescale_q(
av_rescale_q(timestamp, av_get_time_base_q(), stream_time_base); timestamp, libav().av_get_time_base_q(), stream_time_base);
} }
void StreamProcessor::set_decoder( void StreamProcessor::set_decoder(
...@@ -306,9 +310,9 @@ int StreamProcessor::process_packet(AVPacket* packet) { ...@@ -306,9 +310,9 @@ int StreamProcessor::process_packet(AVPacket* packet) {
TORCH_INTERNAL_ASSERT_DEBUG_ONLY( TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
is_decoder_set(), is_decoder_set(),
"Decoder must have been set prior to calling this function."); "Decoder must have been set prior to calling this function.");
int ret = avcodec_send_packet(codec_ctx, packet); int ret = libav().avcodec_send_packet(codec_ctx, packet);
while (ret >= 0) { while (ret >= 0) {
ret = avcodec_receive_frame(codec_ctx, frame); ret = libav().avcodec_receive_frame(codec_ctx, frame);
// AVERROR(EAGAIN) means that new input data is required to return new // AVERROR(EAGAIN) means that new input data is required to return new
// output. // output.
if (ret == AVERROR(EAGAIN)) if (ret == AVERROR(EAGAIN))
...@@ -355,7 +359,7 @@ int StreamProcessor::process_packet(AVPacket* packet) { ...@@ -355,7 +359,7 @@ int StreamProcessor::process_packet(AVPacket* packet) {
} }
// else we can just unref the frame and continue // else we can just unref the frame and continue
av_frame_unref(frame); libav().av_frame_unref(frame);
} }
return ret; return ret;
} }
...@@ -364,7 +368,7 @@ void StreamProcessor::flush() { ...@@ -364,7 +368,7 @@ void StreamProcessor::flush() {
TORCH_INTERNAL_ASSERT_DEBUG_ONLY( TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
is_decoder_set(), is_decoder_set(),
"Decoder must have been set prior to calling this function."); "Decoder must have been set prior to calling this function.");
avcodec_flush_buffers(codec_ctx); libav().avcodec_flush_buffers(codec_ctx);
for (auto& ite : post_processes) { for (auto& ite : post_processes) {
ite.second->flush(); ite.second->flush();
} }
......
#include <torchaudio/csrc/ffmpeg/ffmpeg.h> #include <torchaudio/csrc/ffmpeg/ffmpeg.h>
#include <torchaudio/csrc/ffmpeg/libav.h>
#include <torchaudio/csrc/ffmpeg/stream_reader/stream_reader.h> #include <torchaudio/csrc/ffmpeg/stream_reader/stream_reader.h>
#include <chrono> #include <chrono>
#include <sstream> #include <sstream>
#include <stdexcept> #include <stdexcept>
#include <thread> #include <thread>
extern "C" {
#include <libavutil/rational.h>
}
namespace torchaudio { namespace torchaudio {
namespace io { namespace io {
using detail::libav;
using KeyType = StreamProcessor::KeyType; using KeyType = StreamProcessor::KeyType;
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
...@@ -19,7 +25,7 @@ AVFormatContext* get_input_format_context( ...@@ -19,7 +25,7 @@ AVFormatContext* get_input_format_context(
const c10::optional<std::string>& format, const c10::optional<std::string>& format,
const c10::optional<OptionDict>& option, const c10::optional<OptionDict>& option,
AVIOContext* io_ctx) { AVIOContext* io_ctx) {
AVFormatContext* p = avformat_alloc_context(); AVFormatContext* p = libav().avformat_alloc_context();
TORCH_CHECK(p, "Failed to allocate AVFormatContext."); TORCH_CHECK(p, "Failed to allocate AVFormatContext.");
if (io_ctx) { if (io_ctx) {
p->pb = io_ctx; p->pb = io_ctx;
...@@ -29,7 +35,7 @@ AVFormatContext* get_input_format_context( ...@@ -29,7 +35,7 @@ AVFormatContext* get_input_format_context(
if (format.has_value()) { if (format.has_value()) {
std::string format_str = format.value(); std::string format_str = format.value();
AVFORMAT_CONST AVInputFormat* pInput = AVFORMAT_CONST AVInputFormat* pInput =
av_find_input_format(format_str.c_str()); libav().av_find_input_format(format_str.c_str());
TORCH_CHECK(pInput, "Unsupported device/format: \"", format_str, "\""); TORCH_CHECK(pInput, "Unsupported device/format: \"", format_str, "\"");
return pInput; return pInput;
} }
...@@ -37,7 +43,7 @@ AVFormatContext* get_input_format_context( ...@@ -37,7 +43,7 @@ AVFormatContext* get_input_format_context(
}(); }();
AVDictionary* opt = get_option_dict(option); AVDictionary* opt = get_option_dict(option);
int ret = avformat_open_input(&p, src.c_str(), pInputFormat, &opt); int ret = libav().avformat_open_input(&p, src.c_str(), pInputFormat, &opt);
clean_up_dict(opt); clean_up_dict(opt);
TORCH_CHECK( TORCH_CHECK(
...@@ -53,7 +59,7 @@ AVFormatContext* get_input_format_context( ...@@ -53,7 +59,7 @@ AVFormatContext* get_input_format_context(
StreamReader::StreamReader(AVFormatContext* p) : format_ctx(p) { StreamReader::StreamReader(AVFormatContext* p) : format_ctx(p) {
C10_LOG_API_USAGE_ONCE("torchaudio.io.StreamReader"); C10_LOG_API_USAGE_ONCE("torchaudio.io.StreamReader");
int ret = avformat_find_stream_info(format_ctx, nullptr); int ret = libav().avformat_find_stream_info(format_ctx, nullptr);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, "Failed to find stream information: ", av_err2string(ret)); ret >= 0, "Failed to find stream information: ", av_err2string(ret));
...@@ -110,7 +116,7 @@ void validate_src_stream_type( ...@@ -110,7 +116,7 @@ void validate_src_stream_type(
"Stream ", "Stream ",
i, i,
" is not ", " is not ",
av_get_media_type_string(type), libav().av_get_media_type_string(type),
" stream."); " stream.");
} }
...@@ -125,7 +131,8 @@ namespace { ...@@ -125,7 +131,8 @@ namespace {
OptionDict parse_metadata(const AVDictionary* metadata) { OptionDict parse_metadata(const AVDictionary* metadata) {
AVDictionaryEntry* tag = nullptr; AVDictionaryEntry* tag = nullptr;
OptionDict ret; OptionDict ret;
while ((tag = av_dict_get(metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) { while (
(tag = libav().av_dict_get(metadata, "", tag, AV_DICT_IGNORE_SUFFIX))) {
ret.emplace(std::string(tag->key), std::string(tag->value)); ret.emplace(std::string(tag->key), std::string(tag->value));
} }
return ret; return ret;
...@@ -148,7 +155,8 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const { ...@@ -148,7 +155,8 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const {
ret.num_frames = stream->nb_frames; ret.num_frames = stream->nb_frames;
ret.bits_per_sample = codecpar->bits_per_raw_sample; ret.bits_per_sample = codecpar->bits_per_raw_sample;
ret.metadata = parse_metadata(stream->metadata); ret.metadata = parse_metadata(stream->metadata);
const AVCodecDescriptor* desc = avcodec_descriptor_get(codecpar->codec_id); const AVCodecDescriptor* desc =
libav().avcodec_descriptor_get(codecpar->codec_id);
if (desc) { if (desc) {
ret.codec_name = desc->name; ret.codec_name = desc->name;
ret.codec_long_name = desc->long_name; ret.codec_long_name = desc->long_name;
...@@ -158,7 +166,7 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const { ...@@ -158,7 +166,7 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const {
case AVMEDIA_TYPE_AUDIO: { case AVMEDIA_TYPE_AUDIO: {
AVSampleFormat smp_fmt = static_cast<AVSampleFormat>(codecpar->format); AVSampleFormat smp_fmt = static_cast<AVSampleFormat>(codecpar->format);
if (smp_fmt != AV_SAMPLE_FMT_NONE) { if (smp_fmt != AV_SAMPLE_FMT_NONE) {
ret.fmt_name = av_get_sample_fmt_name(smp_fmt); ret.fmt_name = libav().av_get_sample_fmt_name(smp_fmt);
} }
ret.sample_rate = static_cast<double>(codecpar->sample_rate); ret.sample_rate = static_cast<double>(codecpar->sample_rate);
ret.num_channels = codecpar->channels; ret.num_channels = codecpar->channels;
...@@ -167,7 +175,7 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const { ...@@ -167,7 +175,7 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const {
case AVMEDIA_TYPE_VIDEO: { case AVMEDIA_TYPE_VIDEO: {
AVPixelFormat pix_fmt = static_cast<AVPixelFormat>(codecpar->format); AVPixelFormat pix_fmt = static_cast<AVPixelFormat>(codecpar->format);
if (pix_fmt != AV_PIX_FMT_NONE) { if (pix_fmt != AV_PIX_FMT_NONE) {
ret.fmt_name = av_get_pix_fmt_name(pix_fmt); ret.fmt_name = libav().av_get_pix_fmt_name(pix_fmt);
} }
ret.width = codecpar->width; ret.width = codecpar->width;
ret.height = codecpar->height; ret.height = codecpar->height;
...@@ -181,7 +189,7 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const { ...@@ -181,7 +189,7 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const {
namespace { namespace {
AVCodecParameters* get_codecpar() { AVCodecParameters* get_codecpar() {
AVCodecParameters* ptr = avcodec_parameters_alloc(); AVCodecParameters* ptr = libav().avcodec_parameters_alloc();
TORCH_CHECK(ptr, "Failed to allocate resource."); TORCH_CHECK(ptr, "Failed to allocate resource.");
return ptr; return ptr;
} }
...@@ -192,7 +200,7 @@ StreamParams StreamReader::get_src_stream_params(int i) { ...@@ -192,7 +200,7 @@ StreamParams StreamReader::get_src_stream_params(int i) {
AVStream* stream = format_ctx->streams[i]; AVStream* stream = format_ctx->streams[i];
AVCodecParametersPtr codec_params(get_codecpar()); AVCodecParametersPtr codec_params(get_codecpar());
int ret = avcodec_parameters_copy(codec_params, stream->codecpar); int ret = libav().avcodec_parameters_copy(codec_params, stream->codecpar);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, ret >= 0,
"Failed to copy the stream's codec parameters. (", "Failed to copy the stream's codec parameters. (",
...@@ -234,12 +242,12 @@ OutputStreamInfo StreamReader::get_out_stream_info(int i) const { ...@@ -234,12 +242,12 @@ OutputStreamInfo StreamReader::get_out_stream_info(int i) const {
} }
int64_t StreamReader::find_best_audio_stream() const { int64_t StreamReader::find_best_audio_stream() const {
return av_find_best_stream( return libav().av_find_best_stream(
format_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, nullptr, 0); format_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, nullptr, 0);
} }
int64_t StreamReader::find_best_video_stream() const { int64_t StreamReader::find_best_video_stream() const {
return av_find_best_stream( return libav().av_find_best_stream(
format_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0); format_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, nullptr, 0);
} }
...@@ -289,7 +297,7 @@ void StreamReader::seek(double timestamp_s, int64_t mode) { ...@@ -289,7 +297,7 @@ void StreamReader::seek(double timestamp_s, int64_t mode) {
TORCH_CHECK(false, "Invalid mode value: ", mode); TORCH_CHECK(false, "Invalid mode value: ", mode);
} }
int ret = av_seek_frame(format_ctx, -1, timestamp_av_tb, flag); int ret = libav().av_seek_frame(format_ctx, -1, timestamp_av_tb, flag);
if (ret < 0) { if (ret < 0) {
seek_timestamp = 0; seek_timestamp = 0;
...@@ -402,12 +410,12 @@ void StreamReader::add_stream( ...@@ -402,12 +410,12 @@ void StreamReader::add_stream(
case AVMEDIA_TYPE_AUDIO: case AVMEDIA_TYPE_AUDIO:
return AVRational{0, 1}; return AVRational{0, 1};
case AVMEDIA_TYPE_VIDEO: case AVMEDIA_TYPE_VIDEO:
return av_guess_frame_rate(format_ctx, stream, nullptr); return libav().av_guess_frame_rate(format_ctx, stream, nullptr);
default: default:
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
false, false,
"Unexpected media type is given: ", "Unexpected media type is given: ",
av_get_media_type_string(media_type)); libav().av_get_media_type_string(media_type));
} }
}(); }();
int key = processors[i]->add_stream( int key = processors[i]->add_stream(
...@@ -446,7 +454,7 @@ void StreamReader::remove_stream(int64_t i) { ...@@ -446,7 +454,7 @@ void StreamReader::remove_stream(int64_t i) {
// 1: It's done, caller should stop calling // 1: It's done, caller should stop calling
// <0: Some error happened // <0: Some error happened
int StreamReader::process_packet() { int StreamReader::process_packet() {
int ret = av_read_frame(format_ctx, packet); int ret = libav().av_read_frame(format_ctx, packet);
if (ret == AVERROR_EOF) { if (ret == AVERROR_EOF) {
ret = drain(); ret = drain();
return (ret < 0) ? ret : 1; return (ret < 0) ? ret : 1;
...@@ -577,12 +585,13 @@ AVIOContext* get_io_context( ...@@ -577,12 +585,13 @@ AVIOContext* get_io_context(
int buffer_size, int buffer_size,
int (*read_packet)(void* opaque, uint8_t* buf, int buf_size), int (*read_packet)(void* opaque, uint8_t* buf, int buf_size),
int64_t (*seek)(void* opaque, int64_t offset, int whence)) { int64_t (*seek)(void* opaque, int64_t offset, int whence)) {
unsigned char* buffer = static_cast<unsigned char*>(av_malloc(buffer_size)); unsigned char* buffer =
static_cast<unsigned char*>(libav().av_malloc(buffer_size));
TORCH_CHECK(buffer, "Failed to allocate buffer."); TORCH_CHECK(buffer, "Failed to allocate buffer.");
AVIOContext* io_ctx = avio_alloc_context( AVIOContext* io_ctx = libav().avio_alloc_context(
buffer, buffer_size, 0, opaque, read_packet, nullptr, seek); buffer, buffer_size, 0, opaque, read_packet, nullptr, seek);
if (!io_ctx) { if (!io_ctx) {
av_freep(&buffer); libav().av_freep(&buffer);
TORCH_CHECK(false, "Failed to allocate AVIOContext."); TORCH_CHECK(false, "Failed to allocate AVIOContext.");
} }
return io_ctx; return io_ctx;
......
#include <torchaudio/csrc/ffmpeg/hw_context.h> #include <torchaudio/csrc/ffmpeg/hw_context.h>
#include <torchaudio/csrc/ffmpeg/libav.h>
#include <torchaudio/csrc/ffmpeg/stream_writer/encode_process.h> #include <torchaudio/csrc/ffmpeg/stream_writer/encode_process.h>
#include <cmath> #include <cmath>
extern "C" {
#include <libavutil/rational.h>
}
namespace torchaudio::io { namespace torchaudio::io {
using detail::libav;
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// EncodeProcess Logic Implementation // EncodeProcess Logic Implementation
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
...@@ -56,7 +63,7 @@ void EncodeProcess::process_frame(AVFrame* src) { ...@@ -56,7 +63,7 @@ void EncodeProcess::process_frame(AVFrame* src) {
if (ret >= 0) { if (ret >= 0) {
encoder.encode(dst_frame); encoder.encode(dst_frame);
} }
av_frame_unref(dst_frame); libav().av_frame_unref(dst_frame);
} }
} }
...@@ -71,8 +78,8 @@ void EncodeProcess::flush() { ...@@ -71,8 +78,8 @@ void EncodeProcess::flush() {
namespace { namespace {
enum AVSampleFormat get_src_sample_fmt(const std::string& src) { enum AVSampleFormat get_src_sample_fmt(const std::string& src) {
auto fmt = av_get_sample_fmt(src.c_str()); auto fmt = libav().av_get_sample_fmt(src.c_str());
if (fmt != AV_SAMPLE_FMT_NONE && !av_sample_fmt_is_planar(fmt)) { if (fmt != AV_SAMPLE_FMT_NONE && !libav().av_sample_fmt_is_planar(fmt)) {
return fmt; return fmt;
} }
TORCH_CHECK( TORCH_CHECK(
...@@ -89,7 +96,7 @@ enum AVSampleFormat get_src_sample_fmt(const std::string& src) { ...@@ -89,7 +96,7 @@ enum AVSampleFormat get_src_sample_fmt(const std::string& src) {
AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S64,
AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLT,
AV_SAMPLE_FMT_DBL}) { AV_SAMPLE_FMT_DBL}) {
ret.emplace_back(av_get_sample_fmt_name(fmt)); ret.emplace_back(libav().av_get_sample_fmt_name(fmt));
} }
return c10::Join(", ", ret); return c10::Join(", ", ret);
}(), }(),
...@@ -97,7 +104,7 @@ enum AVSampleFormat get_src_sample_fmt(const std::string& src) { ...@@ -97,7 +104,7 @@ enum AVSampleFormat get_src_sample_fmt(const std::string& src) {
} }
enum AVPixelFormat get_src_pix_fmt(const std::string& src) { enum AVPixelFormat get_src_pix_fmt(const std::string& src) {
AVPixelFormat fmt = av_get_pix_fmt(src.c_str()); AVPixelFormat fmt = libav().av_get_pix_fmt(src.c_str());
switch (fmt) { switch (fmt) {
case AV_PIX_FMT_GRAY8: case AV_PIX_FMT_GRAY8:
case AV_PIX_FMT_RGB24: case AV_PIX_FMT_RGB24:
...@@ -118,7 +125,7 @@ enum AVPixelFormat get_src_pix_fmt(const std::string& src) { ...@@ -118,7 +125,7 @@ enum AVPixelFormat get_src_pix_fmt(const std::string& src) {
AV_PIX_FMT_RGB24, AV_PIX_FMT_RGB24,
AV_PIX_FMT_BGR24, AV_PIX_FMT_BGR24,
AV_PIX_FMT_YUV444P}) { AV_PIX_FMT_YUV444P}) {
ret.emplace_back(av_get_pix_fmt_name(fmt)); ret.emplace_back(libav().av_get_pix_fmt_name(fmt));
} }
return c10::Join(", ", ret); return c10::Join(", ", ret);
}(), }(),
...@@ -132,18 +139,21 @@ const AVCodec* get_codec( ...@@ -132,18 +139,21 @@ const AVCodec* get_codec(
AVCodecID default_codec, AVCodecID default_codec,
const c10::optional<std::string>& encoder) { const c10::optional<std::string>& encoder) {
if (encoder) { if (encoder) {
const AVCodec* c = avcodec_find_encoder_by_name(encoder.value().c_str()); const AVCodec* c =
libav().avcodec_find_encoder_by_name(encoder.value().c_str());
TORCH_CHECK(c, "Unexpected codec: ", encoder.value()); TORCH_CHECK(c, "Unexpected codec: ", encoder.value());
return c; return c;
} }
const AVCodec* c = avcodec_find_encoder(default_codec); const AVCodec* c = libav().avcodec_find_encoder(default_codec);
TORCH_CHECK( TORCH_CHECK(
c, "Encoder not found for codec: ", avcodec_get_name(default_codec)); c,
"Encoder not found for codec: ",
libav().avcodec_get_name(default_codec));
return c; return c;
} }
AVCodecContextPtr get_codec_ctx(const AVCodec* codec, int flags) { AVCodecContextPtr get_codec_ctx(const AVCodec* codec, int flags) {
AVCodecContext* ctx = avcodec_alloc_context3(codec); AVCodecContext* ctx = libav().avcodec_alloc_context3(codec);
TORCH_CHECK(ctx, "Failed to allocate CodecContext."); TORCH_CHECK(ctx, "Failed to allocate CodecContext.");
if (flags & AVFMT_GLOBALHEADER) { if (flags & AVFMT_GLOBALHEADER) {
...@@ -169,25 +179,25 @@ void open_codec( ...@@ -169,25 +179,25 @@ void open_codec(
// while "libopus" refers to the one depends on libopusenc // while "libopus" refers to the one depends on libopusenc
// https://ffmpeg.org/doxygen/4.1/libopusenc_8c.html#aa1d649e48cd2ec00cfe181cf9d0f3251 // https://ffmpeg.org/doxygen/4.1/libopusenc_8c.html#aa1d649e48cd2ec00cfe181cf9d0f3251
if (std::strcmp(codec_ctx->codec->name, "vorbis") == 0) { if (std::strcmp(codec_ctx->codec->name, "vorbis") == 0) {
if (!av_dict_get(opt, "strict", nullptr, 0)) { if (!libav().av_dict_get(opt, "strict", nullptr, 0)) {
TORCH_WARN_ONCE( TORCH_WARN_ONCE(
"\"vorbis\" encoder is selected. Enabling '-strict experimental'. ", "\"vorbis\" encoder is selected. Enabling '-strict experimental'. ",
"If this is not desired, please provide \"strict\" encoder option ", "If this is not desired, please provide \"strict\" encoder option ",
"with desired value."); "with desired value.");
av_dict_set(&opt, "strict", "experimental", 0); libav().av_dict_set(&opt, "strict", "experimental", 0);
} }
} }
if (std::strcmp(codec_ctx->codec->name, "opus") == 0) { if (std::strcmp(codec_ctx->codec->name, "opus") == 0) {
if (!av_dict_get(opt, "strict", nullptr, 0)) { if (!libav().av_dict_get(opt, "strict", nullptr, 0)) {
TORCH_WARN_ONCE( TORCH_WARN_ONCE(
"\"opus\" encoder is selected. Enabling '-strict experimental'. ", "\"opus\" encoder is selected. Enabling '-strict experimental'. ",
"If this is not desired, please provide \"strict\" encoder option ", "If this is not desired, please provide \"strict\" encoder option ",
"with desired value."); "with desired value.");
av_dict_set(&opt, "strict", "experimental", 0); libav().av_dict_set(&opt, "strict", "experimental", 0);
} }
} }
int ret = avcodec_open2(codec_ctx, codec_ctx->codec, &opt); int ret = libav().avcodec_open2(codec_ctx, codec_ctx->codec, &opt);
clean_up_dict(opt); clean_up_dict(opt);
TORCH_CHECK(ret >= 0, "Failed to open codec: (", av_err2string(ret), ")"); TORCH_CHECK(ret >= 0, "Failed to open codec: (", av_err2string(ret), ")");
} }
...@@ -214,7 +224,7 @@ bool supported_sample_fmt( ...@@ -214,7 +224,7 @@ bool supported_sample_fmt(
std::string get_supported_formats(const AVSampleFormat* sample_fmts) { std::string get_supported_formats(const AVSampleFormat* sample_fmts) {
std::vector<std::string> ret; std::vector<std::string> ret;
while (*sample_fmts != AV_SAMPLE_FMT_NONE) { while (*sample_fmts != AV_SAMPLE_FMT_NONE) {
ret.emplace_back(av_get_sample_fmt_name(*sample_fmts)); ret.emplace_back(libav().av_get_sample_fmt_name(*sample_fmts));
++sample_fmts; ++sample_fmts;
} }
return c10::Join(", ", ret); return c10::Join(", ", ret);
...@@ -226,7 +236,7 @@ AVSampleFormat get_enc_fmt( ...@@ -226,7 +236,7 @@ AVSampleFormat get_enc_fmt(
const AVCodec* codec) { const AVCodec* codec) {
if (encoder_format) { if (encoder_format) {
auto& enc_fmt_val = encoder_format.value(); auto& enc_fmt_val = encoder_format.value();
auto fmt = av_get_sample_fmt(enc_fmt_val.c_str()); auto fmt = libav().av_get_sample_fmt(enc_fmt_val.c_str());
TORCH_CHECK( TORCH_CHECK(
fmt != AV_SAMPLE_FMT_NONE, "Unknown sample format: ", enc_fmt_val); fmt != AV_SAMPLE_FMT_NONE, "Unknown sample format: ", enc_fmt_val);
TORCH_CHECK( TORCH_CHECK(
...@@ -313,8 +323,8 @@ std::string get_supported_channels(const uint64_t* channel_layouts) { ...@@ -313,8 +323,8 @@ std::string get_supported_channels(const uint64_t* channel_layouts) {
std::vector<std::string> names; std::vector<std::string> names;
while (*channel_layouts) { while (*channel_layouts) {
std::stringstream ss; std::stringstream ss;
ss << av_get_channel_layout_nb_channels(*channel_layouts); ss << libav().av_get_channel_layout_nb_channels(*channel_layouts);
ss << " (" << av_get_channel_name(*channel_layouts) << ")"; ss << " (" << libav().av_get_channel_name(*channel_layouts) << ")";
names.emplace_back(ss.str()); names.emplace_back(ss.str());
++channel_layouts; ++channel_layouts;
} }
...@@ -331,10 +341,10 @@ uint64_t get_channel_layout( ...@@ -331,10 +341,10 @@ uint64_t get_channel_layout(
TORCH_CHECK( TORCH_CHECK(
val > 0, "The number of channels must be greater than 0. Found: ", val); val > 0, "The number of channels must be greater than 0. Found: ", val);
if (!codec->channel_layouts) { if (!codec->channel_layouts) {
return static_cast<uint64_t>(av_get_default_channel_layout(val)); return static_cast<uint64_t>(libav().av_get_default_channel_layout(val));
} }
for (const uint64_t* it = codec->channel_layouts; *it; ++it) { for (const uint64_t* it = codec->channel_layouts; *it; ++it) {
if (av_get_channel_layout_nb_channels(*it) == val) { if (libav().av_get_channel_layout_nb_channels(*it) == val) {
return *it; return *it;
} }
} }
...@@ -371,8 +381,9 @@ void configure_audio_codec_ctx( ...@@ -371,8 +381,9 @@ void configure_audio_codec_ctx(
const c10::optional<CodecConfig>& codec_config) { const c10::optional<CodecConfig>& codec_config) {
codec_ctx->sample_fmt = format; codec_ctx->sample_fmt = format;
codec_ctx->sample_rate = sample_rate; codec_ctx->sample_rate = sample_rate;
codec_ctx->time_base = av_inv_q(av_d2q(sample_rate, 1 << 24)); codec_ctx->time_base = av_inv_q(libav().av_d2q(sample_rate, 1 << 24));
codec_ctx->channels = av_get_channel_layout_nb_channels(channel_layout); codec_ctx->channels =
libav().av_get_channel_layout_nb_channels(channel_layout);
codec_ctx->channel_layout = channel_layout; codec_ctx->channel_layout = channel_layout;
// Set optional stuff // Set optional stuff
...@@ -411,7 +422,7 @@ bool supported_pix_fmt(const AVPixelFormat fmt, const AVPixelFormat* pix_fmts) { ...@@ -411,7 +422,7 @@ bool supported_pix_fmt(const AVPixelFormat fmt, const AVPixelFormat* pix_fmts) {
std::string get_supported_formats(const AVPixelFormat* pix_fmts) { std::string get_supported_formats(const AVPixelFormat* pix_fmts) {
std::vector<std::string> ret; std::vector<std::string> ret;
while (*pix_fmts != AV_PIX_FMT_NONE) { while (*pix_fmts != AV_PIX_FMT_NONE) {
ret.emplace_back(av_get_pix_fmt_name(*pix_fmts)); ret.emplace_back(libav().av_get_pix_fmt_name(*pix_fmts));
++pix_fmts; ++pix_fmts;
} }
return c10::Join(", ", ret); return c10::Join(", ", ret);
...@@ -423,7 +434,7 @@ AVPixelFormat get_enc_fmt( ...@@ -423,7 +434,7 @@ AVPixelFormat get_enc_fmt(
const AVCodec* codec) { const AVCodec* codec) {
if (encoder_format) { if (encoder_format) {
const auto& val = encoder_format.value(); const auto& val = encoder_format.value();
auto fmt = av_get_pix_fmt(val.c_str()); auto fmt = libav().av_get_pix_fmt(val.c_str());
TORCH_CHECK( TORCH_CHECK(
supported_pix_fmt(fmt, codec->pix_fmts), supported_pix_fmt(fmt, codec->pix_fmts),
codec->name, codec->name,
...@@ -461,7 +472,7 @@ AVRational get_enc_rate( ...@@ -461,7 +472,7 @@ AVRational get_enc_rate(
std::isfinite(enc_rate) && enc_rate > 0, std::isfinite(enc_rate) && enc_rate > 0,
"Encoder sample rate must be positive and fininte. Found: ", "Encoder sample rate must be positive and fininte. Found: ",
enc_rate); enc_rate);
AVRational rate = av_d2q(enc_rate, 1 << 24); AVRational rate = libav().av_d2q(enc_rate, 1 << 24);
TORCH_CHECK( TORCH_CHECK(
supported_frame_rate(rate, codec->supported_framerates), supported_frame_rate(rate, codec->supported_framerates),
codec->name, codec->name,
...@@ -545,14 +556,14 @@ void configure_hw_accel(AVCodecContext* ctx, const std::string& hw_accel) { ...@@ -545,14 +556,14 @@ void configure_hw_accel(AVCodecContext* ctx, const std::string& hw_accel) {
// context to AVCodecContext. But this way, it will be deallocated // context to AVCodecContext. But this way, it will be deallocated
// automatically at the time AVCodecContext is freed, so we do that. // automatically at the time AVCodecContext is freed, so we do that.
ctx->hw_device_ctx = av_buffer_ref(get_cuda_context(device.index())); ctx->hw_device_ctx = libav().av_buffer_ref(get_cuda_context(device.index()));
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
ctx->hw_device_ctx, "Failed to reference HW device context."); ctx->hw_device_ctx, "Failed to reference HW device context.");
ctx->sw_pix_fmt = ctx->pix_fmt; ctx->sw_pix_fmt = ctx->pix_fmt;
ctx->pix_fmt = AV_PIX_FMT_CUDA; ctx->pix_fmt = AV_PIX_FMT_CUDA;
ctx->hw_frames_ctx = av_hwframe_ctx_alloc(ctx->hw_device_ctx); ctx->hw_frames_ctx = libav().av_hwframe_ctx_alloc(ctx->hw_device_ctx);
TORCH_CHECK(ctx->hw_frames_ctx, "Failed to create CUDA frame context."); TORCH_CHECK(ctx->hw_frames_ctx, "Failed to create CUDA frame context.");
auto frames_ctx = (AVHWFramesContext*)(ctx->hw_frames_ctx->data); auto frames_ctx = (AVHWFramesContext*)(ctx->hw_frames_ctx->data);
...@@ -562,7 +573,7 @@ void configure_hw_accel(AVCodecContext* ctx, const std::string& hw_accel) { ...@@ -562,7 +573,7 @@ void configure_hw_accel(AVCodecContext* ctx, const std::string& hw_accel) {
frames_ctx->height = ctx->height; frames_ctx->height = ctx->height;
frames_ctx->initial_pool_size = 5; frames_ctx->initial_pool_size = 5;
int ret = av_hwframe_ctx_init(ctx->hw_frames_ctx); int ret = libav().av_hwframe_ctx_init(ctx->hw_frames_ctx);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, ret >= 0,
"Failed to initialize CUDA frame context: ", "Failed to initialize CUDA frame context: ",
...@@ -574,11 +585,12 @@ void configure_hw_accel(AVCodecContext* ctx, const std::string& hw_accel) { ...@@ -574,11 +585,12 @@ void configure_hw_accel(AVCodecContext* ctx, const std::string& hw_accel) {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
AVStream* get_stream(AVFormatContext* format_ctx, AVCodecContext* codec_ctx) { AVStream* get_stream(AVFormatContext* format_ctx, AVCodecContext* codec_ctx) {
AVStream* stream = avformat_new_stream(format_ctx, nullptr); AVStream* stream = libav().avformat_new_stream(format_ctx, nullptr);
TORCH_CHECK(stream, "Failed to allocate stream."); TORCH_CHECK(stream, "Failed to allocate stream.");
stream->time_base = codec_ctx->time_base; stream->time_base = codec_ctx->time_base;
int ret = avcodec_parameters_from_context(stream->codecpar, codec_ctx); int ret =
libav().avcodec_parameters_from_context(stream->codecpar, codec_ctx);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, "Failed to copy the stream parameter: ", av_err2string(ret)); ret >= 0, "Failed to copy the stream parameter: ", av_err2string(ret));
return stream; return stream;
...@@ -605,7 +617,7 @@ FilterGraph get_audio_filter_graph( ...@@ -605,7 +617,7 @@ FilterGraph get_audio_filter_graph(
if (filter_desc || src_fmt != enc_fmt || if (filter_desc || src_fmt != enc_fmt ||
src_sample_rate != enc_sample_rate || src_ch_layout != enc_ch_layout) { src_sample_rate != enc_sample_rate || src_ch_layout != enc_ch_layout) {
std::stringstream ss; std::stringstream ss;
ss << "aformat=sample_fmts=" << av_get_sample_fmt_name(enc_fmt) ss << "aformat=sample_fmts=" << libav().av_get_sample_fmt_name(enc_fmt)
<< ":sample_rates=" << enc_sample_rate << ":channel_layouts=0x" << ":sample_rates=" << enc_sample_rate << ":channel_layouts=0x"
<< std::hex << enc_ch_layout; << std::hex << enc_ch_layout;
parts.push_back(ss.str()); parts.push_back(ss.str());
...@@ -656,7 +668,7 @@ FilterGraph get_video_filter_graph( ...@@ -656,7 +668,7 @@ FilterGraph get_video_filter_graph(
} }
if (filter_desc || src_fmt != enc_fmt) { if (filter_desc || src_fmt != enc_fmt) {
std::stringstream ss; std::stringstream ss;
ss << "format=" << av_get_pix_fmt_name(enc_fmt); ss << "format=" << libav().av_get_pix_fmt_name(enc_fmt);
parts.emplace_back(ss.str()); parts.emplace_back(ss.str());
} }
if (filter_desc || if (filter_desc ||
...@@ -695,7 +707,7 @@ AVFramePtr get_audio_frame( ...@@ -695,7 +707,7 @@ AVFramePtr get_audio_frame(
frame->channel_layout = channel_layout; frame->channel_layout = channel_layout;
frame->sample_rate = sample_rate; frame->sample_rate = sample_rate;
frame->nb_samples = nb_samples; frame->nb_samples = nb_samples;
int ret = av_frame_get_buffer(frame, 0); int ret = libav().av_frame_get_buffer(frame, 0);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, "Error allocating the source audio frame:", av_err2string(ret)); ret >= 0, "Error allocating the source audio frame:", av_err2string(ret));
...@@ -711,7 +723,7 @@ AVFramePtr get_video_frame(AVPixelFormat src_fmt, int width, int height) { ...@@ -711,7 +723,7 @@ AVFramePtr get_video_frame(AVPixelFormat src_fmt, int width, int height) {
frame->format = src_fmt; frame->format = src_fmt;
frame->width = width; frame->width = width;
frame->height = height; frame->height = height;
int ret = av_frame_get_buffer(frame, 0); int ret = libav().av_frame_get_buffer(frame, 0);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, "Error allocating a video buffer :", av_err2string(ret)); ret >= 0, "Error allocating a video buffer :", av_err2string(ret));
...@@ -756,10 +768,10 @@ EncodeProcess get_audio_encode_process( ...@@ -756,10 +768,10 @@ EncodeProcess get_audio_encode_process(
// case, restrictions on the format to support tensor inputs do not apply, and // case, restrictions on the format to support tensor inputs do not apply, and
// so we directly get the format via FFmpeg. // so we directly get the format via FFmpeg.
const AVSampleFormat src_fmt = (disable_converter) const AVSampleFormat src_fmt = (disable_converter)
? av_get_sample_fmt(format.c_str()) ? libav().av_get_sample_fmt(format.c_str())
: get_src_sample_fmt(format); : get_src_sample_fmt(format);
const auto src_ch_layout = const auto src_ch_layout = static_cast<uint64_t>(
static_cast<uint64_t>(av_get_default_channel_layout(src_num_channels)); libav().av_get_default_channel_layout(src_num_channels));
// 2. Fetch codec from default or override // 2. Fetch codec from default or override
TORCH_CHECK( TORCH_CHECK(
...@@ -779,7 +791,7 @@ EncodeProcess get_audio_encode_process( ...@@ -779,7 +791,7 @@ EncodeProcess get_audio_encode_process(
// https://github.com/FFmpeg/FFmpeg/blob/0684e58886881a998f1a7b510d73600ff1df2b90/libavcodec/vorbisenc.c#L1277 // https://github.com/FFmpeg/FFmpeg/blob/0684e58886881a998f1a7b510d73600ff1df2b90/libavcodec/vorbisenc.c#L1277
// This is the case for at least until FFmpeg 6.0, so it will be // This is the case for at least until FFmpeg 6.0, so it will be
// like this for a while. // like this for a while.
return static_cast<uint64_t>(av_get_default_channel_layout(2)); return static_cast<uint64_t>(libav().av_get_default_channel_layout(2));
} }
return get_channel_layout(src_ch_layout, encoder_num_channels, codec); return get_channel_layout(src_ch_layout, encoder_num_channels, codec);
}(); }();
...@@ -867,9 +879,9 @@ EncodeProcess get_video_encode_process( ...@@ -867,9 +879,9 @@ EncodeProcess get_video_encode_process(
// case, restrictions on the format to support tensor inputs do not apply, and // case, restrictions on the format to support tensor inputs do not apply, and
// so we directly get the format via FFmpeg. // so we directly get the format via FFmpeg.
const AVPixelFormat src_fmt = (disable_converter) const AVPixelFormat src_fmt = (disable_converter)
? av_get_pix_fmt(format.c_str()) ? libav().av_get_pix_fmt(format.c_str())
: get_src_pix_fmt(format); : get_src_pix_fmt(format);
const AVRational src_rate = av_d2q(frame_rate, 1 << 24); const AVRational src_rate = libav().av_d2q(frame_rate, 1 << 24);
// 2. Fetch codec from default or override // 2. Fetch codec from default or override
TORCH_CHECK( TORCH_CHECK(
...@@ -936,7 +948,8 @@ EncodeProcess get_video_encode_process( ...@@ -936,7 +948,8 @@ EncodeProcess get_video_encode_process(
AVFramePtr src_frame = [&]() { AVFramePtr src_frame = [&]() {
if (codec_ctx->hw_frames_ctx) { if (codec_ctx->hw_frames_ctx) {
AVFramePtr frame{alloc_avframe()}; AVFramePtr frame{alloc_avframe()};
int ret = av_hwframe_get_buffer(codec_ctx->hw_frames_ctx, frame, 0); int ret =
libav().av_hwframe_get_buffer(codec_ctx->hw_frames_ctx, frame, 0);
TORCH_CHECK(ret >= 0, "Failed to fetch CUDA frame: ", av_err2string(ret)); TORCH_CHECK(ret >= 0, "Failed to fetch CUDA frame: ", av_err2string(ret));
frame->nb_samples = 1; frame->nb_samples = 1;
frame->pts = 0; frame->pts = 0;
......
#include <torchaudio/csrc/ffmpeg/libav.h>
#include <torchaudio/csrc/ffmpeg/stream_writer/encoder.h> #include <torchaudio/csrc/ffmpeg/stream_writer/encoder.h>
namespace torchaudio::io { namespace torchaudio::io {
using detail::libav;
Encoder::Encoder( Encoder::Encoder(
AVFormatContext* format_ctx, AVFormatContext* format_ctx,
AVCodecContext* codec_ctx, AVCodecContext* codec_ctx,
...@@ -13,10 +16,10 @@ Encoder::Encoder( ...@@ -13,10 +16,10 @@ Encoder::Encoder(
/// ///
/// @param frame Frame data to encode /// @param frame Frame data to encode
void Encoder::encode(AVFrame* frame) { void Encoder::encode(AVFrame* frame) {
int ret = avcodec_send_frame(codec_ctx, frame); int ret = libav().avcodec_send_frame(codec_ctx, frame);
TORCH_CHECK(ret >= 0, "Failed to encode frame (", av_err2string(ret), ")."); TORCH_CHECK(ret >= 0, "Failed to encode frame (", av_err2string(ret), ").");
while (ret >= 0) { while (ret >= 0) {
ret = avcodec_receive_packet(codec_ctx, packet); ret = libav().avcodec_receive_packet(codec_ctx, packet);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) { if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
if (ret == AVERROR_EOF) { if (ret == AVERROR_EOF) {
// Note: // Note:
...@@ -31,7 +34,7 @@ void Encoder::encode(AVFrame* frame) { ...@@ -31,7 +34,7 @@ void Encoder::encode(AVFrame* frame) {
// An alternative is to use `av_write_frame` functoin, but in that case // An alternative is to use `av_write_frame` functoin, but in that case
// client code is responsible for ordering packets, which makes it // client code is responsible for ordering packets, which makes it
// complicated to use StreamWriter // complicated to use StreamWriter
ret = av_interleaved_write_frame(format_ctx, nullptr); ret = libav().av_interleaved_write_frame(format_ctx, nullptr);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, "Failed to flush packet (", av_err2string(ret), ")."); ret >= 0, "Failed to flush packet (", av_err2string(ret), ").");
} }
...@@ -51,10 +54,11 @@ void Encoder::encode(AVFrame* frame) { ...@@ -51,10 +54,11 @@ void Encoder::encode(AVFrame* frame) {
// This has to be set before av_packet_rescale_ts bellow. // This has to be set before av_packet_rescale_ts bellow.
packet->duration = 1; packet->duration = 1;
} }
av_packet_rescale_ts(packet, codec_ctx->time_base, stream->time_base); libav().av_packet_rescale_ts(
packet, codec_ctx->time_base, stream->time_base);
packet->stream_index = stream->index; packet->stream_index = stream->index;
ret = av_interleaved_write_frame(format_ctx, packet); ret = libav().av_interleaved_write_frame(format_ctx, packet);
TORCH_CHECK(ret >= 0, "Failed to write packet (", av_err2string(ret), ")."); TORCH_CHECK(ret >= 0, "Failed to write packet (", av_err2string(ret), ").");
} }
} }
......
#include <torchaudio/csrc/ffmpeg/libav.h>
#include <torchaudio/csrc/ffmpeg/stream_writer/packet_writer.h> #include <torchaudio/csrc/ffmpeg/stream_writer/packet_writer.h>
namespace torchaudio::io { namespace torchaudio::io {
using detail::libav;
namespace { namespace {
AVStream* add_stream( AVStream* add_stream(
AVFormatContext* format_ctx, AVFormatContext* format_ctx,
const StreamParams& stream_params) { const StreamParams& stream_params) {
AVStream* stream = avformat_new_stream(format_ctx, nullptr); AVStream* stream = libav().avformat_new_stream(format_ctx, nullptr);
int ret = int ret = libav().avcodec_parameters_copy(
avcodec_parameters_copy(stream->codecpar, stream_params.codec_params); stream->codecpar, stream_params.codec_params);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, ret >= 0,
"Failed to copy the stream's codec parameters. (", "Failed to copy the stream's codec parameters. (",
...@@ -26,11 +30,12 @@ PacketWriter::PacketWriter( ...@@ -26,11 +30,12 @@ PacketWriter::PacketWriter(
void PacketWriter::write_packet(const AVPacketPtr& packet) { void PacketWriter::write_packet(const AVPacketPtr& packet) {
AVPacket dst_packet; AVPacket dst_packet;
int ret = av_packet_ref(&dst_packet, packet); int ret = libav().av_packet_ref(&dst_packet, packet);
TORCH_CHECK(ret >= 0, "Failed to copy packet."); TORCH_CHECK(ret >= 0, "Failed to copy packet.");
av_packet_rescale_ts(&dst_packet, original_time_base, stream->time_base); libav().av_packet_rescale_ts(
&dst_packet, original_time_base, stream->time_base);
dst_packet.stream_index = stream->index; dst_packet.stream_index = stream->index;
ret = av_interleaved_write_frame(format_ctx, &dst_packet); ret = libav().av_interleaved_write_frame(format_ctx, &dst_packet);
TORCH_CHECK(ret >= 0, "Failed to write packet to destination."); TORCH_CHECK(ret >= 0, "Failed to write packet to destination.");
} }
} // namespace torchaudio::io } // namespace torchaudio::io
#include <torchaudio/csrc/ffmpeg/libav.h>
#include <torchaudio/csrc/ffmpeg/stream_writer/stream_writer.h> #include <torchaudio/csrc/ffmpeg/stream_writer/stream_writer.h>
#ifdef USE_CUDA #ifdef USE_CUDA
#include <c10/cuda/CUDAStream.h> #include <c10/cuda/CUDAStream.h>
#endif #endif
namespace torchaudio { namespace torchaudio::io {
namespace io {
using detail::libav;
namespace { namespace {
AVFormatContext* get_output_format_context( AVFormatContext* get_output_format_context(
...@@ -19,7 +22,7 @@ AVFormatContext* get_output_format_context( ...@@ -19,7 +22,7 @@ AVFormatContext* get_output_format_context(
} }
AVFormatContext* p = nullptr; AVFormatContext* p = nullptr;
int ret = avformat_alloc_output_context2( int ret = libav().avformat_alloc_output_context2(
&p, nullptr, format ? format.value().c_str() : nullptr, dst.c_str()); &p, nullptr, format ? format.value().c_str() : nullptr, dst.c_str());
TORCH_CHECK( TORCH_CHECK(
ret >= 0, ret >= 0,
...@@ -208,14 +211,14 @@ void StreamWriter::add_video_frame_stream( ...@@ -208,14 +211,14 @@ void StreamWriter::add_video_frame_stream(
} }
void StreamWriter::set_metadata(const OptionDict& metadata) { void StreamWriter::set_metadata(const OptionDict& metadata) {
av_dict_free(&format_ctx->metadata); libav().av_dict_free(&format_ctx->metadata);
for (auto const& [key, value] : metadata) { for (auto const& [key, value] : metadata) {
av_dict_set(&format_ctx->metadata, key.c_str(), value.c_str(), 0); libav().av_dict_set(&format_ctx->metadata, key.c_str(), value.c_str(), 0);
} }
} }
void StreamWriter::dump_format(int64_t i) { void StreamWriter::dump_format(int64_t i) {
av_dump_format(format_ctx, (int)i, format_ctx->url, 1); libav().av_dump_format(format_ctx, (int)i, format_ctx->url, 1);
} }
void StreamWriter::open(const c10::optional<OptionDict>& option) { void StreamWriter::open(const c10::optional<OptionDict>& option) {
...@@ -231,10 +234,10 @@ void StreamWriter::open(const c10::optional<OptionDict>& option) { ...@@ -231,10 +234,10 @@ void StreamWriter::open(const c10::optional<OptionDict>& option) {
AVDictionary* opt = get_option_dict(option); AVDictionary* opt = get_option_dict(option);
if (!(fmt->flags & AVFMT_NOFILE) && if (!(fmt->flags & AVFMT_NOFILE) &&
!(format_ctx->flags & AVFMT_FLAG_CUSTOM_IO)) { !(format_ctx->flags & AVFMT_FLAG_CUSTOM_IO)) {
ret = avio_open2( ret = libav().avio_open2(
&format_ctx->pb, format_ctx->url, AVIO_FLAG_WRITE, nullptr, &opt); &format_ctx->pb, format_ctx->url, AVIO_FLAG_WRITE, nullptr, &opt);
if (ret < 0) { if (ret < 0) {
av_dict_free(&opt); libav().av_dict_free(&opt);
TORCH_CHECK( TORCH_CHECK(
false, false,
"Failed to open dst: ", "Failed to open dst: ",
...@@ -245,7 +248,7 @@ void StreamWriter::open(const c10::optional<OptionDict>& option) { ...@@ -245,7 +248,7 @@ void StreamWriter::open(const c10::optional<OptionDict>& option) {
} }
} }
ret = avformat_write_header(format_ctx, &opt); ret = libav().avformat_write_header(format_ctx, &opt);
clean_up_dict(opt); clean_up_dict(opt);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, ret >= 0,
...@@ -258,7 +261,7 @@ void StreamWriter::open(const c10::optional<OptionDict>& option) { ...@@ -258,7 +261,7 @@ void StreamWriter::open(const c10::optional<OptionDict>& option) {
} }
void StreamWriter::close() { void StreamWriter::close() {
int ret = av_write_trailer(format_ctx); int ret = libav().av_write_trailer(format_ctx);
if (ret < 0) { if (ret < 0) {
LOG(WARNING) << "Failed to write trailer. (" << av_err2string(ret) << ")."; LOG(WARNING) << "Failed to write trailer. (" << av_err2string(ret) << ").";
} }
...@@ -269,7 +272,7 @@ void StreamWriter::close() { ...@@ -269,7 +272,7 @@ void StreamWriter::close() {
if (!(fmt->flags & AVFMT_NOFILE) && if (!(fmt->flags & AVFMT_NOFILE) &&
!(format_ctx->flags & AVFMT_FLAG_CUSTOM_IO)) { !(format_ctx->flags & AVFMT_FLAG_CUSTOM_IO)) {
// avio_closep can be only applied to AVIOContext opened by avio_open // avio_closep can be only applied to AVIOContext opened by avio_open
avio_closep(&(format_ctx->pb)); libav().avio_closep(&(format_ctx->pb));
} }
is_open = false; is_open = false;
} }
...@@ -355,12 +358,13 @@ AVIOContext* get_io_context( ...@@ -355,12 +358,13 @@ AVIOContext* get_io_context(
int buffer_size, int buffer_size,
int (*write_packet)(void* opaque, uint8_t* buf, int buf_size), int (*write_packet)(void* opaque, uint8_t* buf, int buf_size),
int64_t (*seek)(void* opaque, int64_t offset, int whence)) { int64_t (*seek)(void* opaque, int64_t offset, int whence)) {
unsigned char* buffer = static_cast<unsigned char*>(av_malloc(buffer_size)); unsigned char* buffer =
static_cast<unsigned char*>(libav().av_malloc(buffer_size));
TORCH_CHECK(buffer, "Failed to allocate buffer."); TORCH_CHECK(buffer, "Failed to allocate buffer.");
AVIOContext* io_ctx = avio_alloc_context( AVIOContext* io_ctx = libav().avio_alloc_context(
buffer, buffer_size, 1, opaque, nullptr, write_packet, seek); buffer, buffer_size, 1, opaque, nullptr, write_packet, seek);
if (!io_ctx) { if (!io_ctx) {
av_freep(&buffer); libav().av_freep(&buffer);
TORCH_CHECK(false, "Failed to allocate AVIOContext."); TORCH_CHECK(false, "Failed to allocate AVIOContext.");
} }
return io_ctx; return io_ctx;
...@@ -384,5 +388,4 @@ StreamWriterCustomIO::StreamWriterCustomIO( ...@@ -384,5 +388,4 @@ StreamWriterCustomIO::StreamWriterCustomIO(
: CustomOutput(opaque, buffer_size, write_packet, seek), : CustomOutput(opaque, buffer_size, write_packet, seek),
StreamWriter(io_ctx, format) {} StreamWriter(io_ctx, format) {}
} // namespace io } // namespace torchaudio::io
} // namespace torchaudio
#include <torchaudio/csrc/ffmpeg/libav.h>
#include <torchaudio/csrc/ffmpeg/stream_writer/tensor_converter.h> #include <torchaudio/csrc/ffmpeg/stream_writer/tensor_converter.h>
#ifdef USE_CUDA #ifdef USE_CUDA
...@@ -6,6 +7,8 @@ ...@@ -6,6 +7,8 @@
namespace torchaudio::io { namespace torchaudio::io {
using detail::libav;
namespace { namespace {
using InitFunc = TensorConverter::InitFunc; using InitFunc = TensorConverter::InitFunc;
...@@ -41,8 +44,8 @@ void convert_func_(const torch::Tensor& chunk, AVFrame* buffer) { ...@@ -41,8 +44,8 @@ void convert_func_(const torch::Tensor& chunk, AVFrame* buffer) {
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(chunk.size(1) == buffer->channels); TORCH_INTERNAL_ASSERT_DEBUG_ONLY(chunk.size(1) == buffer->channels);
// https://ffmpeg.org/doxygen/4.1/muxing_8c_source.html#l00334 // https://ffmpeg.org/doxygen/4.1/muxing_8c_source.html#l00334
if (!av_frame_is_writable(buffer)) { if (!libav().av_frame_is_writable(buffer)) {
int ret = av_frame_make_writable(buffer); int ret = libav().av_frame_make_writable(buffer);
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
ret >= 0, "Failed to make frame writable: ", av_err2string(ret)); ret >= 0, "Failed to make frame writable: ", av_err2string(ret));
} }
...@@ -145,8 +148,8 @@ void write_interlaced_video( ...@@ -145,8 +148,8 @@ void write_interlaced_video(
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.size(3) == num_channels); TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.size(3) == num_channels);
// https://ffmpeg.org/doxygen/4.1/muxing_8c_source.html#l00472 // https://ffmpeg.org/doxygen/4.1/muxing_8c_source.html#l00472
if (!av_frame_is_writable(buffer)) { if (!libav().av_frame_is_writable(buffer)) {
int ret = av_frame_make_writable(buffer); int ret = libav().av_frame_make_writable(buffer);
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
ret >= 0, "Failed to make frame writable: ", av_err2string(ret)); ret >= 0, "Failed to make frame writable: ", av_err2string(ret));
} }
...@@ -187,7 +190,7 @@ void write_planar_video( ...@@ -187,7 +190,7 @@ void write_planar_video(
AVFrame* buffer, AVFrame* buffer,
int num_planes) { int num_planes) {
const auto num_colors = const auto num_colors =
av_pix_fmt_desc_get((AVPixelFormat)buffer->format)->nb_components; libav().av_pix_fmt_desc_get((AVPixelFormat)buffer->format)->nb_components;
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.dim() == 4); TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.dim() == 4);
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.size(0) == 1); TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.size(0) == 1);
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.size(1) == num_colors); TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.size(1) == num_colors);
...@@ -195,8 +198,8 @@ void write_planar_video( ...@@ -195,8 +198,8 @@ void write_planar_video(
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.size(3), buffer->width); TORCH_INTERNAL_ASSERT_DEBUG_ONLY(frame.size(3), buffer->width);
// https://ffmpeg.org/doxygen/4.1/muxing_8c_source.html#l00472 // https://ffmpeg.org/doxygen/4.1/muxing_8c_source.html#l00472
if (!av_frame_is_writable(buffer)) { if (!libav().av_frame_is_writable(buffer)) {
int ret = av_frame_make_writable(buffer); int ret = libav().av_frame_make_writable(buffer);
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
ret >= 0, "Failed to make frame writable: ", av_err2string(ret)); ret >= 0, "Failed to make frame writable: ", av_err2string(ret));
} }
...@@ -308,7 +311,7 @@ std::pair<InitFunc, ConvertFunc> get_video_func(AVFrame* buffer) { ...@@ -308,7 +311,7 @@ std::pair<InitFunc, ConvertFunc> get_video_func(AVFrame* buffer) {
TORCH_CHECK( TORCH_CHECK(
false, false,
"Unexpected pixel format for CUDA: ", "Unexpected pixel format for CUDA: ",
av_get_pix_fmt_name(sw_pix_fmt)); libav().av_get_pix_fmt_name(sw_pix_fmt));
} }
} }
...@@ -317,7 +320,7 @@ std::pair<InitFunc, ConvertFunc> get_video_func(AVFrame* buffer) { ...@@ -317,7 +320,7 @@ std::pair<InitFunc, ConvertFunc> get_video_func(AVFrame* buffer) {
case AV_PIX_FMT_GRAY8: case AV_PIX_FMT_GRAY8:
case AV_PIX_FMT_RGB24: case AV_PIX_FMT_RGB24:
case AV_PIX_FMT_BGR24: { case AV_PIX_FMT_BGR24: {
int channels = av_pix_fmt_desc_get(pix_fmt)->nb_components; int channels = libav().av_pix_fmt_desc_get(pix_fmt)->nb_components;
InitFunc init_func = [=](const torch::Tensor& t, AVFrame* f) { InitFunc init_func = [=](const torch::Tensor& t, AVFrame* f) {
validate_video_input(t, f, channels); validate_video_input(t, f, channels);
return init_interlaced(t); return init_interlaced(t);
...@@ -339,7 +342,9 @@ std::pair<InitFunc, ConvertFunc> get_video_func(AVFrame* buffer) { ...@@ -339,7 +342,9 @@ std::pair<InitFunc, ConvertFunc> get_video_func(AVFrame* buffer) {
} }
default: default:
TORCH_CHECK( TORCH_CHECK(
false, "Unexpected pixel format: ", av_get_pix_fmt_name(pix_fmt)); false,
"Unexpected pixel format: ",
libav().av_get_pix_fmt_name(pix_fmt));
} }
} }
...@@ -383,7 +388,9 @@ TensorConverter::TensorConverter(AVMediaType type, AVFrame* buf, int buf_size) ...@@ -383,7 +388,9 @@ TensorConverter::TensorConverter(AVMediaType type, AVFrame* buf, int buf_size)
break; break;
default: default:
TORCH_INTERNAL_ASSERT( TORCH_INTERNAL_ASSERT(
false, "Unsupported media type: ", av_get_media_type_string(type)); false,
"Unsupported media type: ",
libav().av_get_media_type_string(type));
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment