Commit 26acdbff authored by moto's avatar moto Committed by Facebook GitHub Bot
Browse files

Simplify HW encoder object handling (#3138)

Summary:
hw_device_ctx and hw_frame_ctx assigned to an AVCodecContext
object are owned by libavformat, and get freed in [av_codec_free](https://ffmpeg.org/doxygen/4.1/group__lavc__core.html#gaf869d0829ed607cec3a4a02a1c7026b3)
(actually in [avcodec_close](https://ffmpeg.org/doxygen/4.1/libavcodec_2utils_8c_source.html#l01069)),
so we do not need to keep the reference around.

Pull Request resolved: https://github.com/pytorch/audio/pull/3138

Reviewed By: nateanl

Differential Revision: D43738009

Pulled By: mthrok

fbshipit-source-id: 8c1f4217fa7b21dce872d12be9245056f3fc7537
parent 41e3b93d
...@@ -342,67 +342,76 @@ AVCodecContextPtr get_audio_codec( ...@@ -342,67 +342,76 @@ AVCodecContextPtr get_audio_codec(
return ctx; return ctx;
} }
AVCodecContextPtr get_video_codec( void configure_hw_accel(AVCodecContext* ctx, const std::string& hw_accel) {
AVFORMAT_CONST AVOutputFormat* oformat, torch::Device device{hw_accel};
double frame_rate,
int64_t width,
int64_t height,
const c10::optional<std::string>& encoder,
const c10::optional<OptionDict>& encoder_option,
const c10::optional<std::string>& encoder_format,
const c10::optional<std::string>& hw_accel,
AVBufferRefPtr& hw_device_ctx,
AVBufferRefPtr& hw_frame_ctx) {
AVCodecContextPtr ctx = get_codec_ctx(AVMEDIA_TYPE_VIDEO, oformat, encoder);
configure_video_codec(ctx, frame_rate, width, height, encoder_format);
if (hw_accel) {
#ifndef USE_CUDA
TORCH_CHECK(
false,
"torchaudio is not compiled with CUDA support. Hardware acceleration is not available.");
#else
torch::Device device{hw_accel.value()};
TORCH_CHECK( TORCH_CHECK(
device.type() == c10::DeviceType::CUDA, device.type() == c10::DeviceType::CUDA,
"Only CUDA is supported for hardware acceleration. Found: ", "Only CUDA is supported for hardware acceleration. Found: ",
device.str()); device.str());
AVBufferRef* device_ctx = nullptr; // NOTES:
// 1. Examples like
// https://ffmpeg.org/doxygen/4.1/hw_decode_8c-example.html#a9 wraps the HW
// device context and the HW frames context with av_buffer_ref. This
// increments the reference counting and the resource won't be automatically
// dallocated at the time AVCodecContex is destructed. (We will need to
// decrement once ourselves), so we do not do it. When adding support to share
// context objects, this needs to be reviewed.
//
// 2. When encoding, it is technically not necessary to attach HW device
// context to AVCodecContext. But this way, it will be deallocated
// automatically at the time AVCodecContext is freed, so we do that.
int ret = av_hwdevice_ctx_create( int ret = av_hwdevice_ctx_create(
&device_ctx, &ctx->hw_device_ctx,
AV_HWDEVICE_TYPE_CUDA, AV_HWDEVICE_TYPE_CUDA,
std::to_string(device.index()).c_str(), std::to_string(device.index()).c_str(),
nullptr, nullptr,
0); 0);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, "Failed to create CUDA device context: ", av_err2string(ret)); ret >= 0, "Failed to create CUDA device context: ", av_err2string(ret));
hw_device_ctx.reset(device_ctx); assert(ctx->hw_device_ctx);
ctx->sw_pix_fmt = ctx->pix_fmt;
ctx->pix_fmt = AV_PIX_FMT_CUDA;
AVBufferRef* frames_ref = av_hwframe_ctx_alloc(device_ctx); ctx->hw_frames_ctx = av_hwframe_ctx_alloc(ctx->hw_device_ctx);
TORCH_CHECK(frames_ref, "Failed to create CUDA frame context."); TORCH_CHECK(ctx->hw_frames_ctx, "Failed to create CUDA frame context.");
hw_frame_ctx.reset(frames_ref);
AVHWFramesContext* frames_ctx = (AVHWFramesContext*)(frames_ref->data); auto frames_ctx = (AVHWFramesContext*)(ctx->hw_frames_ctx->data);
frames_ctx->format = AV_PIX_FMT_CUDA; frames_ctx->format = ctx->pix_fmt;
frames_ctx->sw_format = ctx->pix_fmt; frames_ctx->sw_format = ctx->sw_pix_fmt;
frames_ctx->width = ctx->width; frames_ctx->width = ctx->width;
frames_ctx->height = ctx->height; frames_ctx->height = ctx->height;
frames_ctx->initial_pool_size = 20; frames_ctx->initial_pool_size = 5;
ctx->sw_pix_fmt = ctx->pix_fmt;
ctx->pix_fmt = AV_PIX_FMT_CUDA;
ret = av_hwframe_ctx_init(frames_ref); ret = av_hwframe_ctx_init(ctx->hw_frames_ctx);
TORCH_CHECK( TORCH_CHECK(
ret >= 0, ret >= 0,
"Failed to initialize CUDA frame context: ", "Failed to initialize CUDA frame context: ",
av_err2string(ret)); av_err2string(ret));
}
ctx->hw_frames_ctx = av_buffer_ref(frames_ref); AVCodecContextPtr get_video_codec(
AVFORMAT_CONST AVOutputFormat* oformat,
double frame_rate,
int64_t width,
int64_t height,
const c10::optional<std::string>& encoder,
const c10::optional<OptionDict>& encoder_option,
const c10::optional<std::string>& encoder_format,
const c10::optional<std::string>& hw_accel) {
AVCodecContextPtr ctx = get_codec_ctx(AVMEDIA_TYPE_VIDEO, oformat, encoder);
configure_video_codec(ctx, frame_rate, width, height, encoder_format);
if (hw_accel) {
#ifdef USE_CUDA
configure_hw_accel(ctx, hw_accel.value());
#else
TORCH_CHECK( TORCH_CHECK(
ctx->hw_frames_ctx, false,
"Failed to attach CUDA frames to encoding context: ", "torchaudio is not compiled with CUDA support. ",
av_err2string(ret)); "Hardware acceleration is not available.");
#endif #endif
} }
...@@ -478,10 +487,10 @@ void StreamWriter::add_video_stream( ...@@ -478,10 +487,10 @@ void StreamWriter::add_video_stream(
const c10::optional<OptionDict>& encoder_option, const c10::optional<OptionDict>& encoder_option,
const c10::optional<std::string>& encoder_format, const c10::optional<std::string>& encoder_format,
const c10::optional<std::string>& hw_accel) { const c10::optional<std::string>& hw_accel) {
AVBufferRefPtr hw_device_ctx{}; streams.emplace_back(std::make_unique<VideoOutputStream>(
AVBufferRefPtr hw_frame_ctx{}; pFormatContext,
get_src_pixel_fmt(format),
AVCodecContextPtr ctx = get_video_codec( get_video_codec(
pFormatContext->oformat, pFormatContext->oformat,
frame_rate, frame_rate,
width, width,
...@@ -489,16 +498,7 @@ void StreamWriter::add_video_stream( ...@@ -489,16 +498,7 @@ void StreamWriter::add_video_stream(
encoder, encoder,
encoder_option, encoder_option,
encoder_format, encoder_format,
hw_accel, hw_accel)));
hw_device_ctx,
hw_frame_ctx);
streams.emplace_back(std::make_unique<VideoOutputStream>(
pFormatContext,
get_src_pixel_fmt(format),
std::move(ctx),
std::move(hw_device_ctx),
std::move(hw_frame_ctx)));
} }
void StreamWriter::set_metadata(const OptionDict& metadata) { void StreamWriter::set_metadata(const OptionDict& metadata) {
......
...@@ -59,17 +59,13 @@ AVFramePtr get_video_frame(AVPixelFormat src_fmt, AVCodecContext* codec_ctx) { ...@@ -59,17 +59,13 @@ AVFramePtr get_video_frame(AVPixelFormat src_fmt, AVCodecContext* codec_ctx) {
VideoOutputStream::VideoOutputStream( VideoOutputStream::VideoOutputStream(
AVFormatContext* format_ctx, AVFormatContext* format_ctx,
AVPixelFormat src_fmt, AVPixelFormat src_fmt,
AVCodecContextPtr&& codec_ctx_, AVCodecContextPtr&& codec_ctx_)
AVBufferRefPtr&& hw_device_ctx_,
AVBufferRefPtr&& hw_frame_ctx_)
: OutputStream( : OutputStream(
format_ctx, format_ctx,
codec_ctx_, codec_ctx_,
get_video_filter(src_fmt, codec_ctx_)), get_video_filter(src_fmt, codec_ctx_)),
buffer(get_video_frame(src_fmt, codec_ctx_)), buffer(get_video_frame(src_fmt, codec_ctx_)),
converter(buffer), converter(buffer),
hw_device_ctx(std::move(hw_device_ctx_)),
hw_frame_ctx(std::move(hw_frame_ctx_)),
codec_ctx(std::move(codec_ctx_)) {} codec_ctx(std::move(codec_ctx_)) {}
void VideoOutputStream::write_chunk(const torch::Tensor& frames) { void VideoOutputStream::write_chunk(const torch::Tensor& frames) {
......
...@@ -7,16 +7,12 @@ namespace torchaudio::io { ...@@ -7,16 +7,12 @@ namespace torchaudio::io {
struct VideoOutputStream : OutputStream { struct VideoOutputStream : OutputStream {
AVFramePtr buffer; AVFramePtr buffer;
VideoTensorConverter converter; VideoTensorConverter converter;
AVBufferRefPtr hw_device_ctx;
AVBufferRefPtr hw_frame_ctx;
AVCodecContextPtr codec_ctx; AVCodecContextPtr codec_ctx;
VideoOutputStream( VideoOutputStream(
AVFormatContext* format_ctx, AVFormatContext* format_ctx,
AVPixelFormat src_fmt, AVPixelFormat src_fmt,
AVCodecContextPtr&& codec_ctx, AVCodecContextPtr&& codec_ctx);
AVBufferRefPtr&& hw_device_ctx,
AVBufferRefPtr&& hw_frame_ctx);
void write_chunk(const torch::Tensor& frames) override; void write_chunk(const torch::Tensor& frames) override;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment