Simplify FilterGraph interface (#3251)

Summary: Pull Request resolved: https://github.com/pytorch/audio/pull/3251 Removes unnecessary media type check in FilterGraph. Allows to define filters that have different media type for input and output. Reviewed By: nateanl Differential Revision: D44792340 fbshipit-source-id: e00497e0d30b5b3c3aacc66dd9b8c401757af288

Simplify FilterGraph interface (#3251)
Summary: Pull Request resolved: https://github.com/pytorch/audio/pull/3251 Removes unnecessary media type check in FilterGraph. Allows to define filters that have different media type for input and output. Reviewed By: nateanl Differential Revision: D44792340 fbshipit-source-id: e00497e0d30b5b3c3aacc66dd9b8c401757af288
631bcc9f · Moto Hira · Facebook GitHub Bot · ea78478e · 631bcc9f · 631bcc9f
Commit 631bcc9f authored Apr 07, 2023 by Moto Hira Committed by Facebook GitHub Bot Apr 07, 2023
4 changed files
--- a/torchaudio/csrc/ffmpeg/filter_graph.cpp
+++ b/torchaudio/csrc/ffmpeg/filter_graph.cpp
@@ -13,16 +13,7 @@ AVFilterGraph* get_filter_graph() {
 }
 } // namespace
-FilterGraph::FilterGraph(AVMediaType media_type)
+FilterGraph::FilterGraph() : graph(get_filter_graph()) {}
-    : media_type(media_type), pFilterGraph(get_filter_graph()) {
-  switch (media_type) {
-    case AVMEDIA_TYPE_AUDIO:
-    case AVMEDIA_TYPE_VIDEO:
-      break;
-    default:
-      TORCH_CHECK(false, "Only audio and video type is supported.");
-  }
-}
 ////////////////////////////////////////////////////////////////////////////////
 // Configuration methods
@@ -77,11 +68,9 @@ void FilterGraph::add_audio_src(
    AVRational time_base,
    int sample_rate,
    uint64_t channel_layout) {
-  TORCH_CHECK(
+  add_src(
-      media_type == AVMEDIA_TYPE_AUDIO, "The filter graph is not audio type.");
+      avfilter_get_by_name("abuffer"),
-  std::string args =
+      get_audio_src_args(format, time_base, sample_rate, channel_layout));
-      get_audio_src_args(format, time_base, sample_rate, channel_layout);
-  add_src(args);
 }
 void FilterGraph::add_video_src(
@@ -91,28 +80,31 @@ void FilterGraph::add_video_src(
    int width,
    int height,
    AVRational sample_aspect_ratio) {
-  TORCH_CHECK(
+  add_src(
-      media_type == AVMEDIA_TYPE_VIDEO, "The filter graph is not video type.");
+      avfilter_get_by_name("buffer"),
-  std::string args = get_video_src_args(
+      get_video_src_args(
-      format, time_base, frame_rate, width, height, sample_aspect_ratio);
+          format, time_base, frame_rate, width, height, sample_aspect_ratio));
-  add_src(args);
 }
-void FilterGraph::add_src(const std::string& args) {
+void FilterGraph::add_src(const AVFilter* buffersrc, const std::string& args) {
-  const AVFilter* buffersrc = avfilter_get_by_name(
-      media_type == AVMEDIA_TYPE_AUDIO ? "abuffer" : "buffer");
  int ret = avfilter_graph_create_filter(
-      &buffersrc_ctx, buffersrc, "in", args.c_str(), NULL, pFilterGraph);
+      &buffersrc_ctx, buffersrc, "in", args.c_str(), nullptr, graph);
  TORCH_CHECK(
      ret >= 0,
      "Failed to create input filter: \"" + args + "\" (" + av_err2string(ret) +
          ")");
 }
-void FilterGraph::add_sink() {
+void FilterGraph::add_audio_sink() {
+  add_sink(avfilter_get_by_name("abuffersink"));
+}
+void FilterGraph::add_video_sink() {
+  add_sink(avfilter_get_by_name("buffersink"));
+}
+void FilterGraph::add_sink(const AVFilter* buffersink) {
  TORCH_CHECK(!buffersink_ctx, "Sink buffer is already allocated.");
-  const AVFilter* buffersink = avfilter_get_by_name(
-      media_type == AVMEDIA_TYPE_AUDIO ? "abuffersink" : "buffersink");
  // Note
  // Originally, the code here followed the example
  // https://ffmpeg.org/doxygen/4.1/filtering_audio_8c-example.html
@@ -123,7 +115,7 @@ void FilterGraph::add_sink() {
  // https://ffmpeg.org/doxygen/4.1/filter_audio_8c-example.html
  // `abuffersink` should not take options, and this resolved issue.
  int ret = avfilter_graph_create_filter(
-      &buffersink_ctx, buffersink, "out", nullptr, nullptr, pFilterGraph);
+      &buffersink_ctx, buffersink, "out", nullptr, nullptr, graph);
  TORCH_CHECK(ret >= 0, "Failed to create output filter.");
 }
@@ -165,7 +157,7 @@ void FilterGraph::add_process(const std::string& filter_description) {
  InOuts in{"in", buffersrc_ctx}, out{"out", buffersink_ctx};
  int ret = avfilter_graph_parse_ptr(
-      pFilterGraph, filter_description.c_str(), out, in, nullptr);
+      graph, filter_description.c_str(), out, in, nullptr);
  TORCH_CHECK(
      ret >= 0,
@@ -175,9 +167,9 @@ void FilterGraph::add_process(const std::string& filter_description) {
 void FilterGraph::create_filter(AVBufferRef* hw_frames_ctx) {
  buffersrc_ctx->outputs[0]->hw_frames_ctx = hw_frames_ctx;
-  int ret = avfilter_graph_config(pFilterGraph, nullptr);
+  int ret = avfilter_graph_config(graph, nullptr);
  TORCH_CHECK(ret >= 0, "Failed to configure the graph: " + av_err2string(ret));
-  // char* desc = avfilter_graph_dump(pFilterGraph, NULL);
+  // char* desc = avfilter_graph_dump(graph, NULL);
  // std::cerr << "Filter created:\n" << desc << std::endl;
  // av_free(static_cast<void*>(desc));
 }

--- a/torchaudio/csrc/ffmpeg/filter_graph.h
+++ b/torchaudio/csrc/ffmpeg/filter_graph.h
@@ -22,9 +22,7 @@ struct FilterGraphOutputInfo {
 };
 class FilterGraph {
-  AVMediaType media_type;
+  AVFilterGraphPtr graph;
-  AVFilterGraphPtr pFilterGraph;
  // AVFilterContext is freed as a part of AVFilterGraph
  // so we do not manage the resource.
@@ -32,7 +30,7 @@ class FilterGraph {
  AVFilterContext* buffersink_ctx = nullptr;
 public:
-  explicit FilterGraph(AVMediaType media_type);
+  explicit FilterGraph();
  // Custom destructor to release AVFilterGraph*
  ~FilterGraph() = default;
  // Non-copyable
@@ -59,17 +57,23 @@ class FilterGraph {
      int height,
      AVRational sample_aspect_ratio);
-  void add_src(const std::string& arg);
+  void add_audio_sink();
-  void add_sink();
+  void add_video_sink();
  void add_process(const std::string& filter_description);
  void create_filter(AVBufferRef* hw_frames_ctx = nullptr);
+ private:
+  void add_src(const AVFilter* buffersrc, const std::string& arg);
+  void add_sink(const AVFilter* buffersrc);
  //////////////////////////////////////////////////////////////////////////////
  // Query methods
  //////////////////////////////////////////////////////////////////////////////
+ public:
  [[nodiscard]] FilterGraphOutputInfo get_output_info() const;
  //////////////////////////////////////////////////////////////////////////////

--- a/torchaudio/csrc/ffmpeg/stream_reader/post_process.cpp
+++ b/torchaudio/csrc/ffmpeg/stream_reader/post_process.cpp
@@ -21,9 +21,9 @@ FilterGraphFactory get_audio_factory(
          rate = codec_ctx->sample_rate,
          channel_layout = codec_ctx->channel_layout](
             const std::string& filter_desc) -> FilterGraph {
-    FilterGraph f{AVMEDIA_TYPE_AUDIO};
+    FilterGraph f;
    f.add_audio_src(fmt, time_base, rate, channel_layout);
-    f.add_sink();
+    f.add_audio_sink();
    f.add_process(filter_desc);
    f.create_filter();
    return f;
@@ -43,9 +43,9 @@ FilterGraphFactory get_video_factory(
          ratio = codec_ctx->sample_aspect_ratio,
          hw_frames_ctx = codec_ctx->hw_frames_ctx](
             const std::string& filter_desc) -> FilterGraph {
-    FilterGraph f{AVMEDIA_TYPE_VIDEO};
+    FilterGraph f;
    f.add_video_src(fmt, time_base, frame_rate, w, h, ratio);
-    f.add_sink();
+    f.add_video_sink();
    f.add_process(filter_desc);
    if (hw_frames_ctx) {
      f.create_filter(av_buffer_ref(hw_frames_ctx));

--- a/torchaudio/csrc/ffmpeg/stream_writer/encode_process.cpp
+++ b/torchaudio/csrc/ffmpeg/stream_writer/encode_process.cpp
@@ -607,10 +607,10 @@ FilterGraph get_audio_filter_graph(
    return "anull";
  }();
-  FilterGraph f{AVMEDIA_TYPE_AUDIO};
+  FilterGraph f;
  f.add_audio_src(
      src_fmt, {1, src_sample_rate}, src_sample_rate, src_ch_layout);
-  f.add_sink();
+  f.add_audio_sink();
  f.add_process(desc);
  f.create_filter();
  return f;
@@ -657,10 +657,10 @@ FilterGraph get_video_filter_graph(
    return "null";
  }();
-  FilterGraph f{AVMEDIA_TYPE_VIDEO};
+  FilterGraph f;
  f.add_video_src(
      src_fmt, av_inv_q(src_rate), src_rate, src_width, src_height, {1, 1});
-  f.add_sink();
+  f.add_video_sink();
  f.add_process(desc);
  f.create_filter();
  return f;