Move helper functions out of common utility for better locality (#2512)

Summary: This commits move helper functions/definitions around so that better locality of logics are achieved. ## Detail `ffmpeg.[h|cpp]` implements classes that convert FFmpeg structures into RAII semantics. Initially it these classes included the construction logic in their constructors, but such logics were extracted to factory functions in https://github.com/pytorch/audio/issues/2373. Now the reason why the factory functions stayed in `ffmpeg.[h|cpp]` was because the logic for the initialization and clean-up of AVDictionary class was only available in `ffmpeg.cpp`. Now AVDictionary class handling is properly defined in https://github.com/pytorch/audio/issues/2507, the factory functions, which are not that reusable better stay with the implementation that use them. This makes `ffmpeg.h` lean and clean, makes it easier to see what can be reused. Pull Request resolved: https://github.com/pytorch/audio/pull/2512 Reviewed By: hwangjeff Differential Revision: D37477592 Pulled By: mthrok fbshipit-source-id: 8c1b5059ea5f44649cc0eb1f82d1a92877ef186e

Move helper functions out of common utility for better locality (#2512)
Summary: This commits move helper functions/definitions around so that better locality of logics are achieved. ## Detail `ffmpeg.[h|cpp]` implements classes that convert FFmpeg structures into RAII semantics. Initially it these classes included the construction logic in their constructors, but such logics were extracted to factory functions in https://github.com/pytorch/audio/issues/2373. Now the reason why the factory functions stayed in `ffmpeg.[h|cpp]` was because the logic for the initialization and clean-up of AVDictionary class was only available in `ffmpeg.cpp`. Now AVDictionary class handling is properly defined in https://github.com/pytorch/audio/issues/2507, the factory functions, which are not that reusable better stay with the implementation that use them. This makes `ffmpeg.h` lean and clean, makes it easier to see what can be reused. Pull Request resolved: https://github.com/pytorch/audio/pull/2512 Reviewed By: hwangjeff Differential Revision: D37477592 Pulled By: mthrok fbshipit-source-id: 8c1b5059ea5f44649cc0eb1f82d1a92877ef186e
10ac6d2b · moto · Facebook GitHub Bot · 515fd01c · 10ac6d2b · 10ac6d2b
Commit 10ac6d2b authored Jul 06, 2022 by moto Committed by Facebook GitHub Bot Jul 06, 2022
5 changed files
--- a/torchaudio/csrc/ffmpeg/decoder.cpp
+++ b/torchaudio/csrc/ffmpeg/decoder.cpp
@@ -6,6 +6,125 @@ namespace ffmpeg {
 ////////////////////////////////////////////////////////////////////////////////
 // Decoder
 ////////////////////////////////////////////////////////////////////////////////
+namespace {
+AVCodecContextPtr get_decode_context(
+    enum AVCodecID codec_id,
+    const c10::optional<std::string>& decoder_name) {
+  const AVCodec* pCodec = !decoder_name.has_value()
+      ? avcodec_find_decoder(codec_id)
+      : avcodec_find_decoder_by_name(decoder_name.value().c_str());
+  if (!pCodec) {
+    std::stringstream ss;
+    if (!decoder_name.has_value()) {
+      ss << "Unsupported codec: \"" << avcodec_get_name(codec_id) << "\", ("
+         << codec_id << ").";
+    } else {
+      ss << "Unsupported codec: \"" << decoder_name.value() << "\".";
+    }
+    throw std::runtime_error(ss.str());
+  }
+  AVCodecContext* pCodecContext = avcodec_alloc_context3(pCodec);
+  if (!pCodecContext) {
+    throw std::runtime_error("Failed to allocate CodecContext.");
+  }
+  return AVCodecContextPtr(pCodecContext);
+}
+#ifdef USE_CUDA
+enum AVPixelFormat get_hw_format(
+    AVCodecContext* ctx,
+    const enum AVPixelFormat* pix_fmts) {
+  const enum AVPixelFormat* p = nullptr;
+  AVPixelFormat pix_fmt = *static_cast<AVPixelFormat*>(ctx->opaque);
+  for (p = pix_fmts; *p != -1; p++) {
+    if (*p == pix_fmt) {
+      return *p;
+    }
+  }
+  TORCH_WARN("Failed to get HW surface format.");
+  return AV_PIX_FMT_NONE;
+}
+const AVCodecHWConfig* get_cuda_config(const AVCodec* pCodec) {
+  for (int i = 0;; ++i) {
+    const AVCodecHWConfig* config = avcodec_get_hw_config(pCodec, i);
+    if (!config) {
+      break;
+    }
+    if (config->device_type == AV_HWDEVICE_TYPE_CUDA &&
+        config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) {
+      return config;
+    }
+  }
+  std::stringstream ss;
+  ss << "CUDA device was requested, but the codec \"" << pCodec->name
+     << "\" is not supported.";
+  throw std::runtime_error(ss.str());
+}
+#endif
+void init_codec_context(
+    AVCodecContext* pCodecContext,
+    AVCodecParameters* pParams,
+    const OptionDict& decoder_option,
+    const torch::Device& device,
+    AVBufferRefPtr& pHWBufferRef) {
+  int ret = avcodec_parameters_to_context(pCodecContext, pParams);
+  if (ret < 0) {
+    throw std::runtime_error(
+        "Failed to set CodecContext parameter: " + av_err2string(ret));
+  }
+#ifdef USE_CUDA
+  // Enable HW Acceleration
+  if (device.type() == c10::DeviceType::CUDA) {
+    const AVCodecHWConfig* config = get_cuda_config(pCodecContext->codec);
+    // TODO: check how to log
+    // C10_LOG << "Decoder " << pCodec->name << " supports device " <<
+    // av_hwdevice_get_type_name(config->device_type);
+    // https://www.ffmpeg.org/doxygen/trunk/hw__decode_8c_source.html#l00221
+    // 1. Set HW pixel format (config->pix_fmt) to opaue pointer.
+    static thread_local AVPixelFormat pix_fmt = config->pix_fmt;
+    pCodecContext->opaque = static_cast<void*>(&pix_fmt);
+    // 2. Set pCodecContext->get_format call back function which
+    // will retrieve the HW pixel format from opaque pointer.
+    pCodecContext->get_format = get_hw_format;
+    // 3. Create HW device context and set to pCodecContext.
+    AVBufferRef* hw_device_ctx = nullptr;
+    ret = av_hwdevice_ctx_create(
+        &hw_device_ctx,
+        AV_HWDEVICE_TYPE_CUDA,
+        std::to_string(device.index()).c_str(),
+        nullptr,
+        0);
+    if (ret < 0) {
+      throw std::runtime_error(
+          "Failed to create CUDA device context: " + av_err2string(ret));
+    }
+    assert(hw_device_ctx);
+    pCodecContext->hw_device_ctx = av_buffer_ref(hw_device_ctx);
+    pHWBufferRef.reset(hw_device_ctx);
+  }
+#endif
+  AVDictionary* opts = get_option_dict(decoder_option);
+  ret = avcodec_open2(pCodecContext, pCodecContext->codec, &opts);
+  clean_up_dict(opts);
+  if (ret < 0) {
+    throw std::runtime_error(
+        "Failed to initialize CodecContext: " + av_err2string(ret));
+  }
+  if (pParams->codec_type == AVMEDIA_TYPE_AUDIO && !pParams->channel_layout)
+    pParams->channel_layout =
+        av_get_default_channel_layout(pCodecContext->channels);
+}
+} // namespace
 Decoder::Decoder(
    AVCodecParameters* pParam,
    const c10::optional<std::string>& decoder_name,
@@ -13,12 +132,7 @@ Decoder::Decoder(
    const torch::Device& device)
    : pCodecContext(get_decode_context(pParam->codec_id, decoder_name)) {
  init_codec_context(
-      pCodecContext,
+      pCodecContext, pParam, decoder_option, device, pHWBufferRef);
-      pParam,
-      decoder_name,
-      decoder_option,
-      device,
-      pHWBufferRef);
 }
 int Decoder::process_packet(AVPacket* pPacket) {

--- a/torchaudio/csrc/ffmpeg/ffmpeg.cpp
+++ b/torchaudio/csrc/ffmpeg/ffmpeg.cpp
@@ -35,19 +35,6 @@ void clean_up_dict(AVDictionary* p) {
  }
 }
-namespace {
-// https://github.com/FFmpeg/FFmpeg/blob/4e6debe1df7d53f3f59b37449b82265d5c08a172/doc/APIchanges#L252-L260
-// Starting from libavformat 59 (ffmpeg 5),
-// AVInputFormat is const and related functions expect constant.
-#if LIBAVFORMAT_VERSION_MAJOR >= 59
-#define AVINPUT_FORMAT_CONST const
-#else
-#define AVINPUT_FORMAT_CONST
-#endif
-} // namespace
 ////////////////////////////////////////////////////////////////////////////////
 // AVFormatContext
 ////////////////////////////////////////////////////////////////////////////////
@@ -55,45 +42,6 @@ void AVFormatContextDeleter::operator()(AVFormatContext* p) {
  avformat_close_input(&p);
 };
-AVFormatContextPtr get_input_format_context(
-    const std::string& src,
-    const c10::optional<std::string>& device,
-    const OptionDict& option,
-    AVIOContext* io_ctx) {
-  AVFormatContext* pFormat = avformat_alloc_context();
-  if (!pFormat) {
-    throw std::runtime_error("Failed to allocate AVFormatContext.");
-  }
-  if (io_ctx) {
-    pFormat->pb = io_ctx;
-  }
-  auto* pInput = [&]() -> AVINPUT_FORMAT_CONST AVInputFormat* {
-    if (device.has_value()) {
-      std::string device_str = device.value();
-      AVINPUT_FORMAT_CONST AVInputFormat* p =
-          av_find_input_format(device_str.c_str());
-      if (!p) {
-        std::ostringstream msg;
-        msg << "Unsupported device/format: \"" << device_str << "\"";
-        throw std::runtime_error(msg.str());
-      }
-      return p;
-    }
-    return nullptr;
-  }();
-  AVDictionary* opt = get_option_dict(option);
-  int ret = avformat_open_input(&pFormat, src.c_str(), pInput, &opt);
-  clean_up_dict(opt);
-  if (ret < 0)
-    throw std::runtime_error(
-        "Failed to open the input \"" + src + "\" (" + av_err2string(ret) +
-        ").");
-  return AVFormatContextPtr(pFormat);
-}
 AVFormatContextPtr::AVFormatContextPtr(AVFormatContext* p)
    : Wrapper<AVFormatContext, AVFormatContextDeleter>(p) {}
@@ -162,136 +110,6 @@ void AVCodecContextDeleter::operator()(AVCodecContext* p) {
  avcodec_free_context(&p);
 };
-namespace {
-const AVCodec* get_decode_codec(
-    enum AVCodecID codec_id,
-    const c10::optional<std::string>& decoder_name) {
-  const AVCodec* pCodec = !decoder_name.has_value()
-      ? avcodec_find_decoder(codec_id)
-      : avcodec_find_decoder_by_name(decoder_name.value().c_str());
-  if (!pCodec) {
-    std::stringstream ss;
-    if (!decoder_name.has_value()) {
-      ss << "Unsupported codec: \"" << avcodec_get_name(codec_id) << "\", ("
-         << codec_id << ").";
-    } else {
-      ss << "Unsupported codec: \"" << decoder_name.value() << "\".";
-    }
-    throw std::runtime_error(ss.str());
-  }
-  return pCodec;
-}
-} // namespace
-AVCodecContextPtr get_decode_context(
-    enum AVCodecID codec_id,
-    const c10::optional<std::string>& decoder_name) {
-  const AVCodec* pCodec = get_decode_codec(codec_id, decoder_name);
-  AVCodecContext* pCodecContext = avcodec_alloc_context3(pCodec);
-  if (!pCodecContext) {
-    throw std::runtime_error("Failed to allocate CodecContext.");
-  }
-  return AVCodecContextPtr(pCodecContext);
-}
-#ifdef USE_CUDA
-enum AVPixelFormat get_hw_format(
-    AVCodecContext* ctx,
-    const enum AVPixelFormat* pix_fmts) {
-  const enum AVPixelFormat* p = nullptr;
-  AVPixelFormat pix_fmt = *static_cast<AVPixelFormat*>(ctx->opaque);
-  for (p = pix_fmts; *p != -1; p++) {
-    if (*p == pix_fmt) {
-      return *p;
-    }
-  }
-  TORCH_WARN("Failed to get HW surface format.");
-  return AV_PIX_FMT_NONE;
-}
-const AVCodecHWConfig* get_cuda_config(const AVCodec* pCodec) {
-  for (int i = 0;; ++i) {
-    const AVCodecHWConfig* config = avcodec_get_hw_config(pCodec, i);
-    if (!config) {
-      break;
-    }
-    if (config->device_type == AV_HWDEVICE_TYPE_CUDA &&
-        config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) {
-      return config;
-    }
-  }
-  std::stringstream ss;
-  ss << "CUDA device was requested, but the codec \"" << pCodec->name
-     << "\" is not supported.";
-  throw std::runtime_error(ss.str());
-}
-#endif
-void init_codec_context(
-    AVCodecContext* pCodecContext,
-    AVCodecParameters* pParams,
-    const c10::optional<std::string>& decoder_name,
-    const OptionDict& decoder_option,
-    const torch::Device& device,
-    AVBufferRefPtr& pHWBufferRef) {
-  const AVCodec* pCodec = get_decode_codec(pParams->codec_id, decoder_name);
-  int ret = avcodec_parameters_to_context(pCodecContext, pParams);
-  if (ret < 0) {
-    throw std::runtime_error(
-        "Failed to set CodecContext parameter: " + av_err2string(ret));
-  }
-#ifdef USE_CUDA
-  // Enable HW Acceleration
-  if (device.type() == c10::DeviceType::CUDA) {
-    const AVCodecHWConfig* config = get_cuda_config(pCodec);
-    // TODO: check how to log
-    // C10_LOG << "Decoder " << pCodec->name << " supports device " <<
-    // av_hwdevice_get_type_name(config->device_type);
-    // https://www.ffmpeg.org/doxygen/trunk/hw__decode_8c_source.html#l00221
-    // 1. Set HW pixel format (config->pix_fmt) to opaue pointer.
-    static thread_local AVPixelFormat pix_fmt = config->pix_fmt;
-    pCodecContext->opaque = static_cast<void*>(&pix_fmt);
-    // 2. Set pCodecContext->get_format call back function which
-    // will retrieve the HW pixel format from opaque pointer.
-    pCodecContext->get_format = get_hw_format;
-    // 3. Create HW device context and set to pCodecContext.
-    AVBufferRef* hw_device_ctx = nullptr;
-    ret = av_hwdevice_ctx_create(
-        &hw_device_ctx,
-        AV_HWDEVICE_TYPE_CUDA,
-        std::to_string(device.index()).c_str(),
-        nullptr,
-        0);
-    if (ret < 0) {
-      throw std::runtime_error(
-          "Failed to create CUDA device context: " + av_err2string(ret));
-    }
-    assert(hw_device_ctx);
-    pCodecContext->hw_device_ctx = av_buffer_ref(hw_device_ctx);
-    pHWBufferRef.reset(hw_device_ctx);
-  }
-#endif
-  AVDictionary* opts = get_option_dict(decoder_option);
-  ret = avcodec_open2(pCodecContext, pCodec, &opts);
-  clean_up_dict(opts);
-  if (ret < 0) {
-    throw std::runtime_error(
-        "Failed to initialize CodecContext: " + av_err2string(ret));
-  }
-  if (pParams->codec_type == AVMEDIA_TYPE_AUDIO && !pParams->channel_layout)
-    pParams->channel_layout =
-        av_get_default_channel_layout(pCodecContext->channels);
-}
 AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p)
    : Wrapper<AVCodecContext, AVCodecContextDeleter>(p) {}

--- a/torchaudio/csrc/ffmpeg/ffmpeg.h
+++ b/torchaudio/csrc/ffmpeg/ffmpeg.h
@@ -27,6 +27,15 @@ namespace ffmpeg {
 using OptionDict = std::map<std::string, std::string>;
+// https://github.com/FFmpeg/FFmpeg/blob/4e6debe1df7d53f3f59b37449b82265d5c08a172/doc/APIchanges#L252-L260
+// Starting from libavformat 59 (ffmpeg 5),
+// AVInputFormat is const and related functions expect constant.
+#if LIBAVFORMAT_VERSION_MAJOR >= 59
+#define AVFORMAT_CONST const
+#else
+#define AVFORMAT_CONST
+#endif
 // Replacement of av_err2str, which causes
 // `error: taking address of temporary array`
 // https://github.com/joncampbell123/composite-video-simulator/issues/5
@@ -84,13 +93,6 @@ struct AVFormatContextPtr
  explicit AVFormatContextPtr(AVFormatContext* p);
 };
-// create format context for reading media
-AVFormatContextPtr get_input_format_context(
-    const std::string& src,
-    const c10::optional<std::string>& device,
-    const OptionDict& option,
-    AVIOContext* io_ctx = nullptr);
 ////////////////////////////////////////////////////////////////////////////////
 // AVIO
 ////////////////////////////////////////////////////////////////////////////////
@@ -166,20 +168,6 @@ struct AVCodecContextPtr
  explicit AVCodecContextPtr(AVCodecContext* p);
 };
-// Allocate codec context from either decoder name or ID
-AVCodecContextPtr get_decode_context(
-    enum AVCodecID codec_id,
-    const c10::optional<std::string>& decoder);
-// Initialize codec context with the parameters
-void init_codec_context(
-    AVCodecContext* pCodecContext,
-    AVCodecParameters* pParams,
-    const c10::optional<std::string>& decoder_name,
-    const OptionDict& decoder_option,
-    const torch::Device& device,
-    AVBufferRefPtr& pHWBufferRef);
 ////////////////////////////////////////////////////////////////////////////////
 // AVFilterGraph
 ////////////////////////////////////////////////////////////////////////////////

--- a/torchaudio/csrc/ffmpeg/stream_reader_wrapper.cpp
+++ b/torchaudio/csrc/ffmpeg/stream_reader_wrapper.cpp
@@ -55,6 +55,45 @@ OutInfo convert(OutputStreamInfo osi) {
 }
 } // namespace
+AVFormatContextPtr get_input_format_context(
+    const std::string& src,
+    const c10::optional<std::string>& device,
+    const OptionDict& option,
+    AVIOContext* io_ctx) {
+  AVFormatContext* pFormat = avformat_alloc_context();
+  if (!pFormat) {
+    throw std::runtime_error("Failed to allocate AVFormatContext.");
+  }
+  if (io_ctx) {
+    pFormat->pb = io_ctx;
+  }
+  auto* pInput = [&]() -> AVFORMAT_CONST AVInputFormat* {
+    if (device.has_value()) {
+      std::string device_str = device.value();
+      AVFORMAT_CONST AVInputFormat* p =
+          av_find_input_format(device_str.c_str());
+      if (!p) {
+        std::ostringstream msg;
+        msg << "Unsupported device/format: \"" << device_str << "\"";
+        throw std::runtime_error(msg.str());
+      }
+      return p;
+    }
+    return nullptr;
+  }();
+  AVDictionary* opt = get_option_dict(option);
+  int ret = avformat_open_input(&pFormat, src.c_str(), pInput, &opt);
+  clean_up_dict(opt);
+  if (ret < 0)
+    throw std::runtime_error(
+        "Failed to open the input \"" + src + "\" (" + av_err2string(ret) +
+        ").");
+  return AVFormatContextPtr(pFormat);
+}
 StreamReaderBinding::StreamReaderBinding(AVFormatContextPtr&& p)
    : StreamReader(std::move(p)) {}

--- a/torchaudio/csrc/ffmpeg/stream_reader_wrapper.h
+++ b/torchaudio/csrc/ffmpeg/stream_reader_wrapper.h
@@ -5,6 +5,13 @@
 namespace torchaudio {
 namespace ffmpeg {
+// create format context for reading media
+AVFormatContextPtr get_input_format_context(
+    const std::string& src,
+    const c10::optional<std::string>& device,
+    const OptionDict& option,
+    AVIOContext* io_ctx = nullptr);
 // Because TorchScript requires c10::Dict type to pass dict,
 // while PyBind11 requires std::map type to pass dict,
 // we duplicate the return tuple.