Add FilterGraph class (#2043)

Summary: Part of https://github.com/pytorch/audio/issues/1986. Splitting the PR for easier review. Add FilterGraph class that is responsible for handling AVFilterGraph structure and the application of filters. For the overall architecture, see https://github.com/mthrok/audio/blob/ffmpeg/torchaudio/csrc/ffmpeg/README.md. Note: Without a change to build process, the code added here won't be compiled. The build process will be updated later. Needs to be imported after https://github.com/pytorch/audio/issues/2042. Pull Request resolved: https://github.com/pytorch/audio/pull/2043 Reviewed By: carolineechen Differential Revision: D32940535 Pulled By: mthrok fbshipit-source-id: 231e3ad17df2d67b6c7b323e5c89e718a3d48d0d

Add FilterGraph class (#2043)
Summary: Part of https://github.com/pytorch/audio/issues/1986. Splitting the PR for easier review. Add FilterGraph class that is responsible for handling AVFilterGraph structure and the application of filters. For the overall architecture, see https://github.com/mthrok/audio/blob/ffmpeg/torchaudio/csrc/ffmpeg/README.md. Note: Without a change to build process, the code added here won't be compiled. The build process will be updated later. Needs to be imported after https://github.com/pytorch/audio/issues/2042. Pull Request resolved: https://github.com/pytorch/audio/pull/2043 Reviewed By: carolineechen Differential Revision: D32940535 Pulled By: mthrok fbshipit-source-id: 231e3ad17df2d67b6c7b323e5c89e718a3d48d0d
cd52d008 · moto · Facebook GitHub Bot · a76b0066 · cd52d008 · cd52d008
Commit cd52d008 authored Dec 23, 2021 by moto Committed by Facebook GitHub Bot Dec 23, 2021
Hide whitespace changes
Inline Side-by-side

Showing with 233 additions and 0 deletions

torchaudio/csrc/ffmpeg/filter_graph.cpp torchaudio/csrc/ffmpeg/filter_graph.cpp +181 -0

torchaudio/csrc/ffmpeg/filter_graph.h torchaudio/csrc/ffmpeg/filter_graph.h +52 -0

No files found.
--- a/torchaudio/csrc/ffmpeg/filter_graph.cpp
+++ b/torchaudio/csrc/ffmpeg/filter_graph.cpp
+#include <torchaudio/csrc/ffmpeg/filter_graph.h>
+#include <stdexcept>
+
+namespace torchaudio {
+namespace ffmpeg {
+
+FilterGraph::FilterGraph(
+    AVRational time_base,
+    AVCodecParameters* codecpar,
+    std::string filter_description)
+    : filter_description(filter_description) {
+  add_src(time_base, codecpar);
+  add_sink();
+  add_process();
+  create_filter();
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Configuration methods
+////////////////////////////////////////////////////////////////////////////////
+namespace {
+std::string get_audio_src_args(
+    AVRational time_base,
+    AVCodecParameters* codecpar) {
+  char args[512];
+  std::snprintf(
+      args,
+      sizeof(args),
+      "time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%" PRIx64,
+      time_base.num,
+      time_base.den,
+      codecpar->sample_rate,
+      av_get_sample_fmt_name(static_cast<AVSampleFormat>(codecpar->format)),
+      codecpar->channel_layout);
+  return std::string(args);
+}
+
+std::string get_video_src_args(
+    AVRational time_base,
+    AVCodecParameters* codecpar) {
+  char args[512];
+  std::snprintf(
+      args,
+      sizeof(args),
+      "video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d",
+      codecpar->width,
+      codecpar->height,
+      static_cast<AVPixelFormat>(codecpar->format),
+      time_base.num,
+      time_base.den,
+      codecpar->sample_aspect_ratio.num,
+      codecpar->sample_aspect_ratio.den);
+  return std::string(args);
+}
+
+} // namespace
+
+void FilterGraph::add_src(AVRational time_base, AVCodecParameters* codecpar) {
+  if (media_type != AVMEDIA_TYPE_UNKNOWN) {
+    throw std::runtime_error("Source buffer is already allocated.");
+  }
+  media_type = codecpar->codec_type;
+  std::string args;
+  switch (media_type) {
+    case AVMEDIA_TYPE_AUDIO:
+      args = get_audio_src_args(time_base, codecpar);
+      break;
+    case AVMEDIA_TYPE_VIDEO:
+      args = get_video_src_args(time_base, codecpar);
+      break;
+    default:
+      throw std::runtime_error("Only audio/video are supported.");
+  }
+
+  const AVFilter* buffersrc = avfilter_get_by_name(
+      media_type == AVMEDIA_TYPE_AUDIO ? "abuffer" : "buffer");
+  int ret = avfilter_graph_create_filter(
+      &buffersrc_ctx, buffersrc, "in", args.c_str(), NULL, pFilterGraph);
+  if (ret < 0) {
+    throw std::runtime_error("Failed to create input filter: \"" + args + "\"");
+  }
+}
+
+void FilterGraph::add_sink() {
+  if (media_type == AVMEDIA_TYPE_UNKNOWN) {
+    throw std::runtime_error("Source buffer is not allocated.");
+  }
+  if (buffersink_ctx) {
+    throw std::runtime_error("Sink buffer is already allocated.");
+  }
+  const AVFilter* buffersink = avfilter_get_by_name(
+      media_type == AVMEDIA_TYPE_AUDIO ? "abuffersink" : "buffersink");
+  // Note
+  // Originally, the code here followed the example
+  // https://ffmpeg.org/doxygen/4.1/filtering_audio_8c-example.html
+  // which sets option for `abuffersink`, which caused an issue where the
+  // `abuffersink` parameters set for the first time survive across multiple
+  // fitler generations.
+  // According to the other example
+  // https://ffmpeg.org/doxygen/4.1/filter_audio_8c-example.html
+  // `abuffersink` should not take options, and this resolved issue.
+  int ret = avfilter_graph_create_filter(
+      &buffersink_ctx, buffersink, "out", nullptr, nullptr, pFilterGraph);
+  if (ret < 0) {
+    throw std::runtime_error("Failed to create output filter.");
+  }
+}
+
+namespace {
+
+// Encapsulating AVFilterInOut* with handy methods since
+// we need to deal with multiple of them at the same time.
+class InOuts {
+  AVFilterInOut* p = nullptr;
+  // Disable copy constructor/assignment just in case.
+  InOuts(const InOuts&) = delete;
+  InOuts& operator=(const InOuts&) = delete;
+
+ public:
+  InOuts(const char* name, AVFilterContext* pCtx) {
+    p = avfilter_inout_alloc();
+    if (!p) {
+      throw std::runtime_error("Failed to allocate AVFilterInOut.");
+    }
+    p->name = av_strdup(name);
+    p->filter_ctx = pCtx;
+    p->pad_idx = 0;
+    p->next = nullptr;
+  }
+  ~InOuts() {
+    avfilter_inout_free(&p);
+  }
+  operator AVFilterInOut**() {
+    return &p;
+  }
+};
+
+} // namespace
+
+void FilterGraph::add_process() {
+  // Note
+  // The official example and other derived codes out there use
+  // https://ffmpeg.org/doxygen/4.1/filtering_audio_8c-example.html#_a37
+  // variable name `in` for "out"/buffersink, and `out` for "in"/buffersrc.
+  // If you are debugging this part of the code, you might get confused.
+  InOuts in{"in", buffersrc_ctx}, out{"out", buffersink_ctx};
+
+  std::string desc = filter_description.empty()
+      ? (media_type == AVMEDIA_TYPE_AUDIO) ? "anull" : "null"
+      : filter_description;
+
+  int ret =
+      avfilter_graph_parse_ptr(pFilterGraph, desc.c_str(), out, in, nullptr);
+
+  if (ret < 0) {
+    throw std::runtime_error("Failed to create the filter.");
+  }
+}
+
+void FilterGraph::create_filter() {
+  if (avfilter_graph_config(pFilterGraph, nullptr) < 0)
+    throw std::runtime_error("Failed to configure the graph.");
+  // char* desc = avfilter_graph_dump(pFilterGraph.get(), NULL);
+  // std::cerr << "Filter created:\n" << desc << std::endl;
+  // av_free(static_cast<void*>(desc));
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Streaming process
+//////////////////////////////////////////////////////////////////////////////
+int FilterGraph::add_frame(AVFrame* pInputFrame) {
+  return av_buffersrc_add_frame_flags(
+      buffersrc_ctx, pInputFrame, AV_BUFFERSRC_FLAG_KEEP_REF);
+}
+
+int FilterGraph::get_frame(AVFrame* pOutputFrame) {
+  return av_buffersink_get_frame(buffersink_ctx, pOutputFrame);
+}
+
+} // namespace ffmpeg
+} // namespace torchaudio
--- a/torchaudio/csrc/ffmpeg/filter_graph.h
+++ b/torchaudio/csrc/ffmpeg/filter_graph.h
+#pragma once
+
+#include <torchaudio/csrc/ffmpeg/ffmpeg.h>
+namespace torchaudio {
+namespace ffmpeg {
+
+class FilterGraph {
+  AVMediaType media_type = AVMEDIA_TYPE_UNKNOWN;
+  AVFilterGraphPtr pFilterGraph;
+  // AVFilterContext is freed as a part of AVFilterGraph
+  // so we do not manage the resource.
+  AVFilterContext* buffersrc_ctx = nullptr;
+  AVFilterContext* buffersink_ctx = nullptr;
+
+ public:
+  const std::string filter_description;
+
+  FilterGraph(
+      AVRational time_base,
+      AVCodecParameters* codecpar,
+      std::string filter_desc);
+  // Custom destructor to release AVFilterGraph*
+  ~FilterGraph() = default;
+  // Non-copyable
+  FilterGraph(const FilterGraph&) = delete;
+  FilterGraph& operator=(const FilterGraph&) = delete;
+  // Movable
+  FilterGraph(FilterGraph&&) = default;
+  FilterGraph& operator=(FilterGraph&&) = default;
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Configuration methods
+  //////////////////////////////////////////////////////////////////////////////
+ private:
+  void add_src(AVRational time_base, AVCodecParameters* codecpar);
+
+  void add_sink();
+
+  void add_process();
+
+  void create_filter();
+
+  //////////////////////////////////////////////////////////////////////////////
+  // Streaming process
+  //////////////////////////////////////////////////////////////////////////////
+ public:
+  int add_frame(AVFrame* pInputFrame);
+  int get_frame(AVFrame* pOutputFrame);
+};
+
+} // namespace ffmpeg
+} // namespace torchaudio