Commit cd52d008 authored by moto's avatar moto Committed by Facebook GitHub Bot
Browse files

Add FilterGraph class (#2043)

Summary:
Part of https://github.com/pytorch/audio/issues/1986. Splitting the PR for easier review.

Add FilterGraph class that is responsible for handling AVFilterGraph structure and the application of filters.
For the overall architecture, see https://github.com/mthrok/audio/blob/ffmpeg/torchaudio/csrc/ffmpeg/README.md.

Note: Without a change to build process, the code added here won't be compiled. The build process will be updated later.
Needs to be imported after https://github.com/pytorch/audio/issues/2042.

Pull Request resolved: https://github.com/pytorch/audio/pull/2043

Reviewed By: carolineechen

Differential Revision: D32940535

Pulled By: mthrok

fbshipit-source-id: 231e3ad17df2d67b6c7b323e5c89e718a3d48d0d
parent a76b0066
#include <torchaudio/csrc/ffmpeg/filter_graph.h>
#include <stdexcept>
namespace torchaudio {
namespace ffmpeg {
FilterGraph::FilterGraph(
AVRational time_base,
AVCodecParameters* codecpar,
std::string filter_description)
: filter_description(filter_description) {
add_src(time_base, codecpar);
add_sink();
add_process();
create_filter();
}
////////////////////////////////////////////////////////////////////////////////
// Configuration methods
////////////////////////////////////////////////////////////////////////////////
namespace {
std::string get_audio_src_args(
AVRational time_base,
AVCodecParameters* codecpar) {
char args[512];
std::snprintf(
args,
sizeof(args),
"time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%" PRIx64,
time_base.num,
time_base.den,
codecpar->sample_rate,
av_get_sample_fmt_name(static_cast<AVSampleFormat>(codecpar->format)),
codecpar->channel_layout);
return std::string(args);
}
std::string get_video_src_args(
AVRational time_base,
AVCodecParameters* codecpar) {
char args[512];
std::snprintf(
args,
sizeof(args),
"video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d",
codecpar->width,
codecpar->height,
static_cast<AVPixelFormat>(codecpar->format),
time_base.num,
time_base.den,
codecpar->sample_aspect_ratio.num,
codecpar->sample_aspect_ratio.den);
return std::string(args);
}
} // namespace
void FilterGraph::add_src(AVRational time_base, AVCodecParameters* codecpar) {
if (media_type != AVMEDIA_TYPE_UNKNOWN) {
throw std::runtime_error("Source buffer is already allocated.");
}
media_type = codecpar->codec_type;
std::string args;
switch (media_type) {
case AVMEDIA_TYPE_AUDIO:
args = get_audio_src_args(time_base, codecpar);
break;
case AVMEDIA_TYPE_VIDEO:
args = get_video_src_args(time_base, codecpar);
break;
default:
throw std::runtime_error("Only audio/video are supported.");
}
const AVFilter* buffersrc = avfilter_get_by_name(
media_type == AVMEDIA_TYPE_AUDIO ? "abuffer" : "buffer");
int ret = avfilter_graph_create_filter(
&buffersrc_ctx, buffersrc, "in", args.c_str(), NULL, pFilterGraph);
if (ret < 0) {
throw std::runtime_error("Failed to create input filter: \"" + args + "\"");
}
}
void FilterGraph::add_sink() {
if (media_type == AVMEDIA_TYPE_UNKNOWN) {
throw std::runtime_error("Source buffer is not allocated.");
}
if (buffersink_ctx) {
throw std::runtime_error("Sink buffer is already allocated.");
}
const AVFilter* buffersink = avfilter_get_by_name(
media_type == AVMEDIA_TYPE_AUDIO ? "abuffersink" : "buffersink");
// Note
// Originally, the code here followed the example
// https://ffmpeg.org/doxygen/4.1/filtering_audio_8c-example.html
// which sets option for `abuffersink`, which caused an issue where the
// `abuffersink` parameters set for the first time survive across multiple
// fitler generations.
// According to the other example
// https://ffmpeg.org/doxygen/4.1/filter_audio_8c-example.html
// `abuffersink` should not take options, and this resolved issue.
int ret = avfilter_graph_create_filter(
&buffersink_ctx, buffersink, "out", nullptr, nullptr, pFilterGraph);
if (ret < 0) {
throw std::runtime_error("Failed to create output filter.");
}
}
namespace {
// Encapsulating AVFilterInOut* with handy methods since
// we need to deal with multiple of them at the same time.
class InOuts {
AVFilterInOut* p = nullptr;
// Disable copy constructor/assignment just in case.
InOuts(const InOuts&) = delete;
InOuts& operator=(const InOuts&) = delete;
public:
InOuts(const char* name, AVFilterContext* pCtx) {
p = avfilter_inout_alloc();
if (!p) {
throw std::runtime_error("Failed to allocate AVFilterInOut.");
}
p->name = av_strdup(name);
p->filter_ctx = pCtx;
p->pad_idx = 0;
p->next = nullptr;
}
~InOuts() {
avfilter_inout_free(&p);
}
operator AVFilterInOut**() {
return &p;
}
};
} // namespace
void FilterGraph::add_process() {
// Note
// The official example and other derived codes out there use
// https://ffmpeg.org/doxygen/4.1/filtering_audio_8c-example.html#_a37
// variable name `in` for "out"/buffersink, and `out` for "in"/buffersrc.
// If you are debugging this part of the code, you might get confused.
InOuts in{"in", buffersrc_ctx}, out{"out", buffersink_ctx};
std::string desc = filter_description.empty()
? (media_type == AVMEDIA_TYPE_AUDIO) ? "anull" : "null"
: filter_description;
int ret =
avfilter_graph_parse_ptr(pFilterGraph, desc.c_str(), out, in, nullptr);
if (ret < 0) {
throw std::runtime_error("Failed to create the filter.");
}
}
void FilterGraph::create_filter() {
if (avfilter_graph_config(pFilterGraph, nullptr) < 0)
throw std::runtime_error("Failed to configure the graph.");
// char* desc = avfilter_graph_dump(pFilterGraph.get(), NULL);
// std::cerr << "Filter created:\n" << desc << std::endl;
// av_free(static_cast<void*>(desc));
}
////////////////////////////////////////////////////////////////////////////////
// Streaming process
//////////////////////////////////////////////////////////////////////////////
int FilterGraph::add_frame(AVFrame* pInputFrame) {
return av_buffersrc_add_frame_flags(
buffersrc_ctx, pInputFrame, AV_BUFFERSRC_FLAG_KEEP_REF);
}
int FilterGraph::get_frame(AVFrame* pOutputFrame) {
return av_buffersink_get_frame(buffersink_ctx, pOutputFrame);
}
} // namespace ffmpeg
} // namespace torchaudio
#pragma once
#include <torchaudio/csrc/ffmpeg/ffmpeg.h>
namespace torchaudio {
namespace ffmpeg {
class FilterGraph {
AVMediaType media_type = AVMEDIA_TYPE_UNKNOWN;
AVFilterGraphPtr pFilterGraph;
// AVFilterContext is freed as a part of AVFilterGraph
// so we do not manage the resource.
AVFilterContext* buffersrc_ctx = nullptr;
AVFilterContext* buffersink_ctx = nullptr;
public:
const std::string filter_description;
FilterGraph(
AVRational time_base,
AVCodecParameters* codecpar,
std::string filter_desc);
// Custom destructor to release AVFilterGraph*
~FilterGraph() = default;
// Non-copyable
FilterGraph(const FilterGraph&) = delete;
FilterGraph& operator=(const FilterGraph&) = delete;
// Movable
FilterGraph(FilterGraph&&) = default;
FilterGraph& operator=(FilterGraph&&) = default;
//////////////////////////////////////////////////////////////////////////////
// Configuration methods
//////////////////////////////////////////////////////////////////////////////
private:
void add_src(AVRational time_base, AVCodecParameters* codecpar);
void add_sink();
void add_process();
void create_filter();
//////////////////////////////////////////////////////////////////////////////
// Streaming process
//////////////////////////////////////////////////////////////////////////////
public:
int add_frame(AVFrame* pInputFrame);
int get_frame(AVFrame* pOutputFrame);
};
} // namespace ffmpeg
} // namespace torchaudio
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment