Commit 26acdbff authored by moto's avatar moto Committed by Facebook GitHub Bot
Browse files

Simplify HW encoder object handling (#3138)

Summary:
hw_device_ctx and hw_frame_ctx assigned to an AVCodecContext
object are owned by libavformat, and get freed in [av_codec_free](https://ffmpeg.org/doxygen/4.1/group__lavc__core.html#gaf869d0829ed607cec3a4a02a1c7026b3)
(actually in [avcodec_close](https://ffmpeg.org/doxygen/4.1/libavcodec_2utils_8c_source.html#l01069)),
so we do not need to keep the reference around.

Pull Request resolved: https://github.com/pytorch/audio/pull/3138

Reviewed By: nateanl

Differential Revision: D43738009

Pulled By: mthrok

fbshipit-source-id: 8c1f4217fa7b21dce872d12be9245056f3fc7537
parent 41e3b93d
...@@ -342,6 +342,56 @@ AVCodecContextPtr get_audio_codec( ...@@ -342,6 +342,56 @@ AVCodecContextPtr get_audio_codec(
return ctx; return ctx;
} }
void configure_hw_accel(AVCodecContext* ctx, const std::string& hw_accel) {
torch::Device device{hw_accel};
TORCH_CHECK(
device.type() == c10::DeviceType::CUDA,
"Only CUDA is supported for hardware acceleration. Found: ",
device.str());
// NOTES:
// 1. Examples like
// https://ffmpeg.org/doxygen/4.1/hw_decode_8c-example.html#a9 wraps the HW
// device context and the HW frames context with av_buffer_ref. This
// increments the reference counting and the resource won't be automatically
// dallocated at the time AVCodecContex is destructed. (We will need to
// decrement once ourselves), so we do not do it. When adding support to share
// context objects, this needs to be reviewed.
//
// 2. When encoding, it is technically not necessary to attach HW device
// context to AVCodecContext. But this way, it will be deallocated
// automatically at the time AVCodecContext is freed, so we do that.
int ret = av_hwdevice_ctx_create(
&ctx->hw_device_ctx,
AV_HWDEVICE_TYPE_CUDA,
std::to_string(device.index()).c_str(),
nullptr,
0);
TORCH_CHECK(
ret >= 0, "Failed to create CUDA device context: ", av_err2string(ret));
assert(ctx->hw_device_ctx);
ctx->sw_pix_fmt = ctx->pix_fmt;
ctx->pix_fmt = AV_PIX_FMT_CUDA;
ctx->hw_frames_ctx = av_hwframe_ctx_alloc(ctx->hw_device_ctx);
TORCH_CHECK(ctx->hw_frames_ctx, "Failed to create CUDA frame context.");
auto frames_ctx = (AVHWFramesContext*)(ctx->hw_frames_ctx->data);
frames_ctx->format = ctx->pix_fmt;
frames_ctx->sw_format = ctx->sw_pix_fmt;
frames_ctx->width = ctx->width;
frames_ctx->height = ctx->height;
frames_ctx->initial_pool_size = 5;
ret = av_hwframe_ctx_init(ctx->hw_frames_ctx);
TORCH_CHECK(
ret >= 0,
"Failed to initialize CUDA frame context: ",
av_err2string(ret));
}
AVCodecContextPtr get_video_codec( AVCodecContextPtr get_video_codec(
AVFORMAT_CONST AVOutputFormat* oformat, AVFORMAT_CONST AVOutputFormat* oformat,
double frame_rate, double frame_rate,
...@@ -350,59 +400,18 @@ AVCodecContextPtr get_video_codec( ...@@ -350,59 +400,18 @@ AVCodecContextPtr get_video_codec(
const c10::optional<std::string>& encoder, const c10::optional<std::string>& encoder,
const c10::optional<OptionDict>& encoder_option, const c10::optional<OptionDict>& encoder_option,
const c10::optional<std::string>& encoder_format, const c10::optional<std::string>& encoder_format,
const c10::optional<std::string>& hw_accel, const c10::optional<std::string>& hw_accel) {
AVBufferRefPtr& hw_device_ctx,
AVBufferRefPtr& hw_frame_ctx) {
AVCodecContextPtr ctx = get_codec_ctx(AVMEDIA_TYPE_VIDEO, oformat, encoder); AVCodecContextPtr ctx = get_codec_ctx(AVMEDIA_TYPE_VIDEO, oformat, encoder);
configure_video_codec(ctx, frame_rate, width, height, encoder_format); configure_video_codec(ctx, frame_rate, width, height, encoder_format);
if (hw_accel) { if (hw_accel) {
#ifndef USE_CUDA #ifdef USE_CUDA
TORCH_CHECK( configure_hw_accel(ctx, hw_accel.value());
false,
"torchaudio is not compiled with CUDA support. Hardware acceleration is not available.");
#else #else
torch::Device device{hw_accel.value()};
TORCH_CHECK(
device.type() == c10::DeviceType::CUDA,
"Only CUDA is supported for hardware acceleration. Found: ",
device.str());
AVBufferRef* device_ctx = nullptr;
int ret = av_hwdevice_ctx_create(
&device_ctx,
AV_HWDEVICE_TYPE_CUDA,
std::to_string(device.index()).c_str(),
nullptr,
0);
TORCH_CHECK(
ret >= 0, "Failed to create CUDA device context: ", av_err2string(ret));
hw_device_ctx.reset(device_ctx);
AVBufferRef* frames_ref = av_hwframe_ctx_alloc(device_ctx);
TORCH_CHECK(frames_ref, "Failed to create CUDA frame context.");
hw_frame_ctx.reset(frames_ref);
AVHWFramesContext* frames_ctx = (AVHWFramesContext*)(frames_ref->data);
frames_ctx->format = AV_PIX_FMT_CUDA;
frames_ctx->sw_format = ctx->pix_fmt;
frames_ctx->width = ctx->width;
frames_ctx->height = ctx->height;
frames_ctx->initial_pool_size = 20;
ctx->sw_pix_fmt = ctx->pix_fmt;
ctx->pix_fmt = AV_PIX_FMT_CUDA;
ret = av_hwframe_ctx_init(frames_ref);
TORCH_CHECK(
ret >= 0,
"Failed to initialize CUDA frame context: ",
av_err2string(ret));
ctx->hw_frames_ctx = av_buffer_ref(frames_ref);
TORCH_CHECK( TORCH_CHECK(
ctx->hw_frames_ctx, false,
"Failed to attach CUDA frames to encoding context: ", "torchaudio is not compiled with CUDA support. ",
av_err2string(ret)); "Hardware acceleration is not available.");
#endif #endif
} }
...@@ -478,27 +487,18 @@ void StreamWriter::add_video_stream( ...@@ -478,27 +487,18 @@ void StreamWriter::add_video_stream(
const c10::optional<OptionDict>& encoder_option, const c10::optional<OptionDict>& encoder_option,
const c10::optional<std::string>& encoder_format, const c10::optional<std::string>& encoder_format,
const c10::optional<std::string>& hw_accel) { const c10::optional<std::string>& hw_accel) {
AVBufferRefPtr hw_device_ctx{};
AVBufferRefPtr hw_frame_ctx{};
AVCodecContextPtr ctx = get_video_codec(
pFormatContext->oformat,
frame_rate,
width,
height,
encoder,
encoder_option,
encoder_format,
hw_accel,
hw_device_ctx,
hw_frame_ctx);
streams.emplace_back(std::make_unique<VideoOutputStream>( streams.emplace_back(std::make_unique<VideoOutputStream>(
pFormatContext, pFormatContext,
get_src_pixel_fmt(format), get_src_pixel_fmt(format),
std::move(ctx), get_video_codec(
std::move(hw_device_ctx), pFormatContext->oformat,
std::move(hw_frame_ctx))); frame_rate,
width,
height,
encoder,
encoder_option,
encoder_format,
hw_accel)));
} }
void StreamWriter::set_metadata(const OptionDict& metadata) { void StreamWriter::set_metadata(const OptionDict& metadata) {
......
...@@ -59,17 +59,13 @@ AVFramePtr get_video_frame(AVPixelFormat src_fmt, AVCodecContext* codec_ctx) { ...@@ -59,17 +59,13 @@ AVFramePtr get_video_frame(AVPixelFormat src_fmt, AVCodecContext* codec_ctx) {
VideoOutputStream::VideoOutputStream( VideoOutputStream::VideoOutputStream(
AVFormatContext* format_ctx, AVFormatContext* format_ctx,
AVPixelFormat src_fmt, AVPixelFormat src_fmt,
AVCodecContextPtr&& codec_ctx_, AVCodecContextPtr&& codec_ctx_)
AVBufferRefPtr&& hw_device_ctx_,
AVBufferRefPtr&& hw_frame_ctx_)
: OutputStream( : OutputStream(
format_ctx, format_ctx,
codec_ctx_, codec_ctx_,
get_video_filter(src_fmt, codec_ctx_)), get_video_filter(src_fmt, codec_ctx_)),
buffer(get_video_frame(src_fmt, codec_ctx_)), buffer(get_video_frame(src_fmt, codec_ctx_)),
converter(buffer), converter(buffer),
hw_device_ctx(std::move(hw_device_ctx_)),
hw_frame_ctx(std::move(hw_frame_ctx_)),
codec_ctx(std::move(codec_ctx_)) {} codec_ctx(std::move(codec_ctx_)) {}
void VideoOutputStream::write_chunk(const torch::Tensor& frames) { void VideoOutputStream::write_chunk(const torch::Tensor& frames) {
......
...@@ -7,16 +7,12 @@ namespace torchaudio::io { ...@@ -7,16 +7,12 @@ namespace torchaudio::io {
struct VideoOutputStream : OutputStream { struct VideoOutputStream : OutputStream {
AVFramePtr buffer; AVFramePtr buffer;
VideoTensorConverter converter; VideoTensorConverter converter;
AVBufferRefPtr hw_device_ctx;
AVBufferRefPtr hw_frame_ctx;
AVCodecContextPtr codec_ctx; AVCodecContextPtr codec_ctx;
VideoOutputStream( VideoOutputStream(
AVFormatContext* format_ctx, AVFormatContext* format_ctx,
AVPixelFormat src_fmt, AVPixelFormat src_fmt,
AVCodecContextPtr&& codec_ctx, AVCodecContextPtr&& codec_ctx);
AVBufferRefPtr&& hw_device_ctx,
AVBufferRefPtr&& hw_frame_ctx);
void write_chunk(const torch::Tensor& frames) override; void write_chunk(const torch::Tensor& frames) override;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment