Commit 10ac6d2b authored by moto's avatar moto Committed by Facebook GitHub Bot
Browse files

Move helper functions out of common utility for better locality (#2512)

Summary:
This commits move helper functions/definitions around so that better locality of logics are achieved.

## Detail

`ffmpeg.[h|cpp]` implements classes that convert FFmpeg structures into RAII semantics.
Initially it these classes included the construction logic in their constructors, but such logics were
extracted to factory functions in https://github.com/pytorch/audio/issues/2373.

Now the reason why the factory functions stayed in `ffmpeg.[h|cpp]` was because the logic for
the initialization and  clean-up of AVDictionary class was only available in `ffmpeg.cpp`.

Now AVDictionary class handling is properly defined in https://github.com/pytorch/audio/issues/2507, the factory functions, which are not
that reusable better stay with the implementation that use them.

This makes `ffmpeg.h` lean and clean, makes it easier to see what can be reused.

Pull Request resolved: https://github.com/pytorch/audio/pull/2512

Reviewed By: hwangjeff

Differential Revision: D37477592

Pulled By: mthrok

fbshipit-source-id: 8c1b5059ea5f44649cc0eb1f82d1a92877ef186e
parent 515fd01c
...@@ -6,6 +6,125 @@ namespace ffmpeg { ...@@ -6,6 +6,125 @@ namespace ffmpeg {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Decoder // Decoder
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
namespace {
AVCodecContextPtr get_decode_context(
enum AVCodecID codec_id,
const c10::optional<std::string>& decoder_name) {
const AVCodec* pCodec = !decoder_name.has_value()
? avcodec_find_decoder(codec_id)
: avcodec_find_decoder_by_name(decoder_name.value().c_str());
if (!pCodec) {
std::stringstream ss;
if (!decoder_name.has_value()) {
ss << "Unsupported codec: \"" << avcodec_get_name(codec_id) << "\", ("
<< codec_id << ").";
} else {
ss << "Unsupported codec: \"" << decoder_name.value() << "\".";
}
throw std::runtime_error(ss.str());
}
AVCodecContext* pCodecContext = avcodec_alloc_context3(pCodec);
if (!pCodecContext) {
throw std::runtime_error("Failed to allocate CodecContext.");
}
return AVCodecContextPtr(pCodecContext);
}
#ifdef USE_CUDA
enum AVPixelFormat get_hw_format(
AVCodecContext* ctx,
const enum AVPixelFormat* pix_fmts) {
const enum AVPixelFormat* p = nullptr;
AVPixelFormat pix_fmt = *static_cast<AVPixelFormat*>(ctx->opaque);
for (p = pix_fmts; *p != -1; p++) {
if (*p == pix_fmt) {
return *p;
}
}
TORCH_WARN("Failed to get HW surface format.");
return AV_PIX_FMT_NONE;
}
const AVCodecHWConfig* get_cuda_config(const AVCodec* pCodec) {
for (int i = 0;; ++i) {
const AVCodecHWConfig* config = avcodec_get_hw_config(pCodec, i);
if (!config) {
break;
}
if (config->device_type == AV_HWDEVICE_TYPE_CUDA &&
config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) {
return config;
}
}
std::stringstream ss;
ss << "CUDA device was requested, but the codec \"" << pCodec->name
<< "\" is not supported.";
throw std::runtime_error(ss.str());
}
#endif
void init_codec_context(
AVCodecContext* pCodecContext,
AVCodecParameters* pParams,
const OptionDict& decoder_option,
const torch::Device& device,
AVBufferRefPtr& pHWBufferRef) {
int ret = avcodec_parameters_to_context(pCodecContext, pParams);
if (ret < 0) {
throw std::runtime_error(
"Failed to set CodecContext parameter: " + av_err2string(ret));
}
#ifdef USE_CUDA
// Enable HW Acceleration
if (device.type() == c10::DeviceType::CUDA) {
const AVCodecHWConfig* config = get_cuda_config(pCodecContext->codec);
// TODO: check how to log
// C10_LOG << "Decoder " << pCodec->name << " supports device " <<
// av_hwdevice_get_type_name(config->device_type);
// https://www.ffmpeg.org/doxygen/trunk/hw__decode_8c_source.html#l00221
// 1. Set HW pixel format (config->pix_fmt) to opaue pointer.
static thread_local AVPixelFormat pix_fmt = config->pix_fmt;
pCodecContext->opaque = static_cast<void*>(&pix_fmt);
// 2. Set pCodecContext->get_format call back function which
// will retrieve the HW pixel format from opaque pointer.
pCodecContext->get_format = get_hw_format;
// 3. Create HW device context and set to pCodecContext.
AVBufferRef* hw_device_ctx = nullptr;
ret = av_hwdevice_ctx_create(
&hw_device_ctx,
AV_HWDEVICE_TYPE_CUDA,
std::to_string(device.index()).c_str(),
nullptr,
0);
if (ret < 0) {
throw std::runtime_error(
"Failed to create CUDA device context: " + av_err2string(ret));
}
assert(hw_device_ctx);
pCodecContext->hw_device_ctx = av_buffer_ref(hw_device_ctx);
pHWBufferRef.reset(hw_device_ctx);
}
#endif
AVDictionary* opts = get_option_dict(decoder_option);
ret = avcodec_open2(pCodecContext, pCodecContext->codec, &opts);
clean_up_dict(opts);
if (ret < 0) {
throw std::runtime_error(
"Failed to initialize CodecContext: " + av_err2string(ret));
}
if (pParams->codec_type == AVMEDIA_TYPE_AUDIO && !pParams->channel_layout)
pParams->channel_layout =
av_get_default_channel_layout(pCodecContext->channels);
}
} // namespace
Decoder::Decoder( Decoder::Decoder(
AVCodecParameters* pParam, AVCodecParameters* pParam,
const c10::optional<std::string>& decoder_name, const c10::optional<std::string>& decoder_name,
...@@ -13,12 +132,7 @@ Decoder::Decoder( ...@@ -13,12 +132,7 @@ Decoder::Decoder(
const torch::Device& device) const torch::Device& device)
: pCodecContext(get_decode_context(pParam->codec_id, decoder_name)) { : pCodecContext(get_decode_context(pParam->codec_id, decoder_name)) {
init_codec_context( init_codec_context(
pCodecContext, pCodecContext, pParam, decoder_option, device, pHWBufferRef);
pParam,
decoder_name,
decoder_option,
device,
pHWBufferRef);
} }
int Decoder::process_packet(AVPacket* pPacket) { int Decoder::process_packet(AVPacket* pPacket) {
......
...@@ -35,19 +35,6 @@ void clean_up_dict(AVDictionary* p) { ...@@ -35,19 +35,6 @@ void clean_up_dict(AVDictionary* p) {
} }
} }
namespace {
// https://github.com/FFmpeg/FFmpeg/blob/4e6debe1df7d53f3f59b37449b82265d5c08a172/doc/APIchanges#L252-L260
// Starting from libavformat 59 (ffmpeg 5),
// AVInputFormat is const and related functions expect constant.
#if LIBAVFORMAT_VERSION_MAJOR >= 59
#define AVINPUT_FORMAT_CONST const
#else
#define AVINPUT_FORMAT_CONST
#endif
} // namespace
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// AVFormatContext // AVFormatContext
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
...@@ -55,45 +42,6 @@ void AVFormatContextDeleter::operator()(AVFormatContext* p) { ...@@ -55,45 +42,6 @@ void AVFormatContextDeleter::operator()(AVFormatContext* p) {
avformat_close_input(&p); avformat_close_input(&p);
}; };
AVFormatContextPtr get_input_format_context(
const std::string& src,
const c10::optional<std::string>& device,
const OptionDict& option,
AVIOContext* io_ctx) {
AVFormatContext* pFormat = avformat_alloc_context();
if (!pFormat) {
throw std::runtime_error("Failed to allocate AVFormatContext.");
}
if (io_ctx) {
pFormat->pb = io_ctx;
}
auto* pInput = [&]() -> AVINPUT_FORMAT_CONST AVInputFormat* {
if (device.has_value()) {
std::string device_str = device.value();
AVINPUT_FORMAT_CONST AVInputFormat* p =
av_find_input_format(device_str.c_str());
if (!p) {
std::ostringstream msg;
msg << "Unsupported device/format: \"" << device_str << "\"";
throw std::runtime_error(msg.str());
}
return p;
}
return nullptr;
}();
AVDictionary* opt = get_option_dict(option);
int ret = avformat_open_input(&pFormat, src.c_str(), pInput, &opt);
clean_up_dict(opt);
if (ret < 0)
throw std::runtime_error(
"Failed to open the input \"" + src + "\" (" + av_err2string(ret) +
").");
return AVFormatContextPtr(pFormat);
}
AVFormatContextPtr::AVFormatContextPtr(AVFormatContext* p) AVFormatContextPtr::AVFormatContextPtr(AVFormatContext* p)
: Wrapper<AVFormatContext, AVFormatContextDeleter>(p) {} : Wrapper<AVFormatContext, AVFormatContextDeleter>(p) {}
...@@ -162,136 +110,6 @@ void AVCodecContextDeleter::operator()(AVCodecContext* p) { ...@@ -162,136 +110,6 @@ void AVCodecContextDeleter::operator()(AVCodecContext* p) {
avcodec_free_context(&p); avcodec_free_context(&p);
}; };
namespace {
const AVCodec* get_decode_codec(
enum AVCodecID codec_id,
const c10::optional<std::string>& decoder_name) {
const AVCodec* pCodec = !decoder_name.has_value()
? avcodec_find_decoder(codec_id)
: avcodec_find_decoder_by_name(decoder_name.value().c_str());
if (!pCodec) {
std::stringstream ss;
if (!decoder_name.has_value()) {
ss << "Unsupported codec: \"" << avcodec_get_name(codec_id) << "\", ("
<< codec_id << ").";
} else {
ss << "Unsupported codec: \"" << decoder_name.value() << "\".";
}
throw std::runtime_error(ss.str());
}
return pCodec;
}
} // namespace
AVCodecContextPtr get_decode_context(
enum AVCodecID codec_id,
const c10::optional<std::string>& decoder_name) {
const AVCodec* pCodec = get_decode_codec(codec_id, decoder_name);
AVCodecContext* pCodecContext = avcodec_alloc_context3(pCodec);
if (!pCodecContext) {
throw std::runtime_error("Failed to allocate CodecContext.");
}
return AVCodecContextPtr(pCodecContext);
}
#ifdef USE_CUDA
enum AVPixelFormat get_hw_format(
AVCodecContext* ctx,
const enum AVPixelFormat* pix_fmts) {
const enum AVPixelFormat* p = nullptr;
AVPixelFormat pix_fmt = *static_cast<AVPixelFormat*>(ctx->opaque);
for (p = pix_fmts; *p != -1; p++) {
if (*p == pix_fmt) {
return *p;
}
}
TORCH_WARN("Failed to get HW surface format.");
return AV_PIX_FMT_NONE;
}
const AVCodecHWConfig* get_cuda_config(const AVCodec* pCodec) {
for (int i = 0;; ++i) {
const AVCodecHWConfig* config = avcodec_get_hw_config(pCodec, i);
if (!config) {
break;
}
if (config->device_type == AV_HWDEVICE_TYPE_CUDA &&
config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) {
return config;
}
}
std::stringstream ss;
ss << "CUDA device was requested, but the codec \"" << pCodec->name
<< "\" is not supported.";
throw std::runtime_error(ss.str());
}
#endif
void init_codec_context(
AVCodecContext* pCodecContext,
AVCodecParameters* pParams,
const c10::optional<std::string>& decoder_name,
const OptionDict& decoder_option,
const torch::Device& device,
AVBufferRefPtr& pHWBufferRef) {
const AVCodec* pCodec = get_decode_codec(pParams->codec_id, decoder_name);
int ret = avcodec_parameters_to_context(pCodecContext, pParams);
if (ret < 0) {
throw std::runtime_error(
"Failed to set CodecContext parameter: " + av_err2string(ret));
}
#ifdef USE_CUDA
// Enable HW Acceleration
if (device.type() == c10::DeviceType::CUDA) {
const AVCodecHWConfig* config = get_cuda_config(pCodec);
// TODO: check how to log
// C10_LOG << "Decoder " << pCodec->name << " supports device " <<
// av_hwdevice_get_type_name(config->device_type);
// https://www.ffmpeg.org/doxygen/trunk/hw__decode_8c_source.html#l00221
// 1. Set HW pixel format (config->pix_fmt) to opaue pointer.
static thread_local AVPixelFormat pix_fmt = config->pix_fmt;
pCodecContext->opaque = static_cast<void*>(&pix_fmt);
// 2. Set pCodecContext->get_format call back function which
// will retrieve the HW pixel format from opaque pointer.
pCodecContext->get_format = get_hw_format;
// 3. Create HW device context and set to pCodecContext.
AVBufferRef* hw_device_ctx = nullptr;
ret = av_hwdevice_ctx_create(
&hw_device_ctx,
AV_HWDEVICE_TYPE_CUDA,
std::to_string(device.index()).c_str(),
nullptr,
0);
if (ret < 0) {
throw std::runtime_error(
"Failed to create CUDA device context: " + av_err2string(ret));
}
assert(hw_device_ctx);
pCodecContext->hw_device_ctx = av_buffer_ref(hw_device_ctx);
pHWBufferRef.reset(hw_device_ctx);
}
#endif
AVDictionary* opts = get_option_dict(decoder_option);
ret = avcodec_open2(pCodecContext, pCodec, &opts);
clean_up_dict(opts);
if (ret < 0) {
throw std::runtime_error(
"Failed to initialize CodecContext: " + av_err2string(ret));
}
if (pParams->codec_type == AVMEDIA_TYPE_AUDIO && !pParams->channel_layout)
pParams->channel_layout =
av_get_default_channel_layout(pCodecContext->channels);
}
AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p) AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p)
: Wrapper<AVCodecContext, AVCodecContextDeleter>(p) {} : Wrapper<AVCodecContext, AVCodecContextDeleter>(p) {}
......
...@@ -27,6 +27,15 @@ namespace ffmpeg { ...@@ -27,6 +27,15 @@ namespace ffmpeg {
using OptionDict = std::map<std::string, std::string>; using OptionDict = std::map<std::string, std::string>;
// https://github.com/FFmpeg/FFmpeg/blob/4e6debe1df7d53f3f59b37449b82265d5c08a172/doc/APIchanges#L252-L260
// Starting from libavformat 59 (ffmpeg 5),
// AVInputFormat is const and related functions expect constant.
#if LIBAVFORMAT_VERSION_MAJOR >= 59
#define AVFORMAT_CONST const
#else
#define AVFORMAT_CONST
#endif
// Replacement of av_err2str, which causes // Replacement of av_err2str, which causes
// `error: taking address of temporary array` // `error: taking address of temporary array`
// https://github.com/joncampbell123/composite-video-simulator/issues/5 // https://github.com/joncampbell123/composite-video-simulator/issues/5
...@@ -84,13 +93,6 @@ struct AVFormatContextPtr ...@@ -84,13 +93,6 @@ struct AVFormatContextPtr
explicit AVFormatContextPtr(AVFormatContext* p); explicit AVFormatContextPtr(AVFormatContext* p);
}; };
// create format context for reading media
AVFormatContextPtr get_input_format_context(
const std::string& src,
const c10::optional<std::string>& device,
const OptionDict& option,
AVIOContext* io_ctx = nullptr);
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// AVIO // AVIO
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
...@@ -166,20 +168,6 @@ struct AVCodecContextPtr ...@@ -166,20 +168,6 @@ struct AVCodecContextPtr
explicit AVCodecContextPtr(AVCodecContext* p); explicit AVCodecContextPtr(AVCodecContext* p);
}; };
// Allocate codec context from either decoder name or ID
AVCodecContextPtr get_decode_context(
enum AVCodecID codec_id,
const c10::optional<std::string>& decoder);
// Initialize codec context with the parameters
void init_codec_context(
AVCodecContext* pCodecContext,
AVCodecParameters* pParams,
const c10::optional<std::string>& decoder_name,
const OptionDict& decoder_option,
const torch::Device& device,
AVBufferRefPtr& pHWBufferRef);
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// AVFilterGraph // AVFilterGraph
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
......
...@@ -55,6 +55,45 @@ OutInfo convert(OutputStreamInfo osi) { ...@@ -55,6 +55,45 @@ OutInfo convert(OutputStreamInfo osi) {
} }
} // namespace } // namespace
AVFormatContextPtr get_input_format_context(
const std::string& src,
const c10::optional<std::string>& device,
const OptionDict& option,
AVIOContext* io_ctx) {
AVFormatContext* pFormat = avformat_alloc_context();
if (!pFormat) {
throw std::runtime_error("Failed to allocate AVFormatContext.");
}
if (io_ctx) {
pFormat->pb = io_ctx;
}
auto* pInput = [&]() -> AVFORMAT_CONST AVInputFormat* {
if (device.has_value()) {
std::string device_str = device.value();
AVFORMAT_CONST AVInputFormat* p =
av_find_input_format(device_str.c_str());
if (!p) {
std::ostringstream msg;
msg << "Unsupported device/format: \"" << device_str << "\"";
throw std::runtime_error(msg.str());
}
return p;
}
return nullptr;
}();
AVDictionary* opt = get_option_dict(option);
int ret = avformat_open_input(&pFormat, src.c_str(), pInput, &opt);
clean_up_dict(opt);
if (ret < 0)
throw std::runtime_error(
"Failed to open the input \"" + src + "\" (" + av_err2string(ret) +
").");
return AVFormatContextPtr(pFormat);
}
StreamReaderBinding::StreamReaderBinding(AVFormatContextPtr&& p) StreamReaderBinding::StreamReaderBinding(AVFormatContextPtr&& p)
: StreamReader(std::move(p)) {} : StreamReader(std::move(p)) {}
......
...@@ -5,6 +5,13 @@ ...@@ -5,6 +5,13 @@
namespace torchaudio { namespace torchaudio {
namespace ffmpeg { namespace ffmpeg {
// create format context for reading media
AVFormatContextPtr get_input_format_context(
const std::string& src,
const c10::optional<std::string>& device,
const OptionDict& option,
AVIOContext* io_ctx = nullptr);
// Because TorchScript requires c10::Dict type to pass dict, // Because TorchScript requires c10::Dict type to pass dict,
// while PyBind11 requires std::map type to pass dict, // while PyBind11 requires std::map type to pass dict,
// we duplicate the return tuple. // we duplicate the return tuple.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment