Commit c5b96558 authored by moto's avatar moto Committed by Facebook GitHub Bot
Browse files

Support CUDA frame in FilterGraph (#3183)

Summary:
This commit adds CUDA frame support to FilterGraph

It initializes and attaches CUDA frames context to FilterGraph,
so that CUDA frames can be processed in FilterGraph.

As a result, it enables
1. CUDA filter support such as `scale_cuda`
2. Properly retrieve the pixel format coming out of FilterGraph when
   CUDA HW acceleration is enabled. (currently it is reported as "cuda")

Resolves https://github.com/pytorch/audio/issues/3159

Pull Request resolved: https://github.com/pytorch/audio/pull/3183

Reviewed By: hwangjeff

Differential Revision: D44183722

Pulled By: mthrok

fbshipit-source-id: 522d21039c361ddfaa87fa89cf49c19d210ac62f
parent 0c8c138c
...@@ -654,6 +654,7 @@ jobs: ...@@ -654,6 +654,7 @@ jobs:
command: .circleci/unittest/linux/scripts/run_test.sh command: .circleci/unittest/linux/scripts/run_test.sh
environment: environment:
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CUDA: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CUDA: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_HW_ACCEL: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_AUDIO_OUT_DEVICE: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_AUDIO_OUT_DEVICE: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MACOS: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MACOS: true
...@@ -689,11 +690,12 @@ jobs: ...@@ -689,11 +690,12 @@ jobs:
- run: - run:
name: Run tests name: Run tests
environment: environment:
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_HW_ACCEL: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_CUDA_SMALL_MEMORY: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_CUDA_SMALL_MEMORY: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MACOS: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MACOS: true
command: | command: |
docker run -t --gpus all -v $PWD:$PWD -w $PWD -e "CI=${CI}" -e TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310 -e TORCHAUDIO_TEST_ALLOW_SKIP_IF_CUDA_SMALL_MEMORY -e TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MACOS "${image_name}" .circleci/unittest/linux/scripts/run_test.sh docker run -t --gpus all -v $PWD:$PWD -w $PWD -e "CI=${CI}" -e TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310 -e TORCHAUDIO_TEST_ALLOW_SKIP_IF_CUDA_SMALL_MEMORY -e TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MACOS -e TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_HW_ACCEL "${image_name}" .circleci/unittest/linux/scripts/run_test.sh
- store_test_results: - store_test_results:
path: test-results path: test-results
- store_artifacts: - store_artifacts:
...@@ -726,6 +728,7 @@ jobs: ...@@ -726,6 +728,7 @@ jobs:
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_SPECTROGRAM_FEATS: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_SPECTROGRAM_FEATS: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_SOX: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_SOX: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CUDA: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CUDA: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_HW_ACCEL: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_KALDI: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_KALDI: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_SOX: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_SOX: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true
...@@ -814,6 +817,7 @@ jobs: ...@@ -814,6 +817,7 @@ jobs:
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_MFCC_FEATS: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_MFCC_FEATS: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_SPECTROGRAM_FEATS: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_SPECTROGRAM_FEATS: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CUDA: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CUDA: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_HW_ACCEL: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_QUANTIZATION: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_QUANTIZATION: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_sentencepiece: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_sentencepiece: true
......
...@@ -654,6 +654,7 @@ jobs: ...@@ -654,6 +654,7 @@ jobs:
command: .circleci/unittest/linux/scripts/run_test.sh command: .circleci/unittest/linux/scripts/run_test.sh
environment: environment:
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CUDA: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CUDA: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_HW_ACCEL: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_AUDIO_OUT_DEVICE: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_AUDIO_OUT_DEVICE: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MACOS: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MACOS: true
...@@ -689,11 +690,12 @@ jobs: ...@@ -689,11 +690,12 @@ jobs:
- run: - run:
name: Run tests name: Run tests
environment: environment:
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_HW_ACCEL: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_CUDA_SMALL_MEMORY: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_CUDA_SMALL_MEMORY: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MACOS: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MACOS: true
command: | command: |
docker run -t --gpus all -v $PWD:$PWD -w $PWD -e "CI=${CI}" -e TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310 -e TORCHAUDIO_TEST_ALLOW_SKIP_IF_CUDA_SMALL_MEMORY -e TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MACOS "${image_name}" .circleci/unittest/linux/scripts/run_test.sh docker run -t --gpus all -v $PWD:$PWD -w $PWD -e "CI=${CI}" -e TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310 -e TORCHAUDIO_TEST_ALLOW_SKIP_IF_CUDA_SMALL_MEMORY -e TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MACOS -e TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_HW_ACCEL "${image_name}" .circleci/unittest/linux/scripts/run_test.sh
- store_test_results: - store_test_results:
path: test-results path: test-results
- store_artifacts: - store_artifacts:
...@@ -726,6 +728,7 @@ jobs: ...@@ -726,6 +728,7 @@ jobs:
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_SPECTROGRAM_FEATS: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_SPECTROGRAM_FEATS: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_SOX: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_SOX: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CUDA: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CUDA: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_HW_ACCEL: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_KALDI: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_KALDI: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_SOX: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_SOX: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true
...@@ -814,6 +817,7 @@ jobs: ...@@ -814,6 +817,7 @@ jobs:
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_MFCC_FEATS: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_MFCC_FEATS: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_SPECTROGRAM_FEATS: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_SPECTROGRAM_FEATS: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CUDA: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CUDA: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_HW_ACCEL: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_QUANTIZATION: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_QUANTIZATION: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310: true
TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_sentencepiece: true TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_MOD_sentencepiece: true
......
...@@ -72,6 +72,7 @@ jobs: ...@@ -72,6 +72,7 @@ jobs:
export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_MFCC_FEATS=true export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_MFCC_FEATS=true
export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_SPECTROGRAM_FEATS=true export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_CMD_COMPUTE_SPECTROGRAM_FEATS=true
export TORCHAUDIO_TEST_ALLOW_SKIP_IF_CUDA_SMALL_MEMORY=true export TORCHAUDIO_TEST_ALLOW_SKIP_IF_CUDA_SMALL_MEMORY=true
export TORCHAUDIO_TEST_ALLOW_SKIP_IF_NO_HW_ACCEL=true
export TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310=true export TORCHAUDIO_TEST_ALLOW_SKIP_IF_ON_PYTHON_310=true
declare -a args=( declare -a args=(
......
...@@ -10,6 +10,7 @@ from .case_utils import ( ...@@ -10,6 +10,7 @@ from .case_utils import (
skipIfNoCuda, skipIfNoCuda,
skipIfNoExec, skipIfNoExec,
skipIfNoFFmpeg, skipIfNoFFmpeg,
skipIfNoHWAccel,
skipIfNoKaldi, skipIfNoKaldi,
skipIfNoMacOS, skipIfNoMacOS,
skipIfNoModule, skipIfNoModule,
...@@ -55,6 +56,7 @@ __all__ = [ ...@@ -55,6 +56,7 @@ __all__ = [
"skipIfRocm", "skipIfRocm",
"skipIfNoQengine", "skipIfNoQengine",
"skipIfNoFFmpeg", "skipIfNoFFmpeg",
"skipIfNoHWAccel",
"skipIfPy310", "skipIfPy310",
"get_wav_data", "get_wav_data",
"normalize_wav", "normalize_wav",
......
...@@ -12,6 +12,7 @@ import torch ...@@ -12,6 +12,7 @@ import torch
import torchaudio import torchaudio
from torch.testing._internal.common_utils import TestCase as PytorchTestCase from torch.testing._internal.common_utils import TestCase as PytorchTestCase
from torchaudio._internal.module_utils import is_module_available from torchaudio._internal.module_utils import is_module_available
from torchaudio.utils.ffmpeg_utils import get_video_decoders, get_video_encoders
from .backend_utils import set_audio_backend from .backend_utils import set_audio_backend
...@@ -270,6 +271,19 @@ skipIfNoMacOS = _skipIf( ...@@ -270,6 +271,19 @@ skipIfNoMacOS = _skipIf(
) )
def skipIfNoHWAccel(name):
key = "NO_HW_ACCEL"
if not is_ffmpeg_available():
return _skipIf(True, reason="ffmpeg features are not available.", key=key)
if not torch.cuda.is_available():
return _skipIf(True, reason="CUDA is not available.", key=key)
if torchaudio._extension._check_cuda_version() is None:
return _skipIf(True, "Torchaudio is not compiled with CUDA.", key=key)
if name not in get_video_decoders() and name not in get_video_encoders():
return _skipIf(True, f"{name} is not in the list of available decoders or encoders", key=key)
return _pass
def zip_equal(*iterables): def zip_equal(*iterables):
"""With the regular Python `zip` function, if one iterable is longer than the other, """With the regular Python `zip` function, if one iterable is longer than the other,
the remainder portions are ignored.This is resolved in Python 3.10 where we can use the remainder portions are ignored.This is resolved in Python 3.10 where we can use
......
...@@ -14,10 +14,12 @@ from torchaudio_unittest.common_utils import ( ...@@ -14,10 +14,12 @@ from torchaudio_unittest.common_utils import (
save_image, save_image,
save_wav, save_wav,
skipIfNoFFmpeg, skipIfNoFFmpeg,
skipIfNoHWAccel,
TempDirMixin, TempDirMixin,
TorchaudioTestCase, TorchaudioTestCase,
) )
if is_ffmpeg_available(): if is_ffmpeg_available():
from torchaudio.io import StreamReader, StreamWriter from torchaudio.io import StreamReader, StreamWriter
from torchaudio.io._stream_reader import ( from torchaudio.io._stream_reader import (
...@@ -1048,3 +1050,105 @@ class StreamReaderImageTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestCase) ...@@ -1048,3 +1050,105 @@ class StreamReaderImageTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestCase)
self.assertEqual(chunks[8], rgba, atol=0, rtol=0) self.assertEqual(chunks[8], rgba, atol=0, rtol=0)
self.assertEqual(chunks[9], abgr, atol=0, rtol=0) self.assertEqual(chunks[9], abgr, atol=0, rtol=0)
self.assertEqual(chunks[10], bgra, atol=0, rtol=0) self.assertEqual(chunks[10], bgra, atol=0, rtol=0)
@skipIfNoHWAccel("h264_cuvid")
class CuvidHWAccelInterfaceTest(TorchaudioTestCase):
def test_dup_hw_acel(self):
"""Specifying the same source stream with and without HW accel should fail (instead of segfault later)"""
src = get_asset_path("nasa_13013.mp4")
r = StreamReader(src)
r.add_video_stream(-1, decoder="h264_cuvid")
with self.assertRaises(RuntimeError):
r.add_video_stream(-1, decoder="h264_cuvid", hw_accel="cuda")
r = StreamReader(src)
r.add_video_stream(-1, decoder="h264_cuvid", hw_accel="cuda")
with self.assertRaises(RuntimeError):
r.add_video_stream(-1, decoder="h264_cuvid")
@_media_source
class CudaDecoderTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestCase):
@skipIfNoHWAccel("h264_cuvid")
def test_h264_cuvid(self):
"""GPU decoder works for H264"""
src = self.get_src(get_asset_path("nasa_13013.mp4"))
r = StreamReader(src)
r.add_video_stream(10, decoder="h264_cuvid")
num_frames = 0
for (chunk,) in r.stream():
self.assertEqual(chunk.device, torch.device("cpu"))
self.assertEqual(chunk.dtype, torch.uint8)
self.assertEqual(chunk.shape, torch.Size([10, 3, 270, 480]))
num_frames += chunk.size(0)
assert num_frames == 390
@skipIfNoHWAccel("h264_cuvid")
def test_h264_cuvid_hw_accel(self):
"""GPU decoder works for H264 with HW acceleration, and put the frames on CUDA tensor"""
src = self.get_src(get_asset_path("nasa_13013.mp4"))
r = StreamReader(src)
r.add_video_stream(10, decoder="h264_cuvid", hw_accel="cuda")
num_frames = 0
for (chunk,) in r.stream():
self.assertEqual(chunk.device, torch.device("cuda:0"))
self.assertEqual(chunk.dtype, torch.uint8)
self.assertEqual(chunk.shape, torch.Size([10, 3, 270, 480]))
num_frames += chunk.size(0)
assert num_frames == 390
@skipIfNoHWAccel("hevc_cuvid")
def test_hevc_cuvid(self):
"""GPU decoder works for H265/HEVC"""
src = self.get_src(get_asset_path("testsrc.hevc"))
r = StreamReader(src)
r.add_video_stream(10, decoder="hevc_cuvid")
num_frames = 0
for (chunk,) in r.stream():
self.assertEqual(chunk.device, torch.device("cpu"))
self.assertEqual(chunk.dtype, torch.uint8)
self.assertEqual(chunk.shape, torch.Size([10, 3, 144, 256]))
num_frames += chunk.size(0)
assert num_frames == 300
@skipIfNoHWAccel("hevc_cuvid")
def test_hevc_cuvid_hw_accel(self):
"""GPU decoder works for H265/HEVC with HW acceleration, and put the frames on CUDA tensor"""
src = self.get_src(get_asset_path("testsrc.hevc"))
r = StreamReader(src)
r.add_video_stream(10, decoder="hevc_cuvid", hw_accel="cuda")
num_frames = 0
for (chunk,) in r.stream():
self.assertEqual(chunk.device, torch.device("cuda:0"))
self.assertEqual(chunk.dtype, torch.int16)
self.assertEqual(chunk.shape, torch.Size([10, 3, 144, 256]))
num_frames += chunk.size(0)
assert num_frames == 300
@skipIfNoHWAccel("h264_cuvid")
class FilterGraphWithCudaAccel(TorchaudioTestCase):
def test_sclae_cuda_change_size(self):
"""scale_cuda filter can be used when HW accel is on"""
src = get_asset_path("nasa_13013.mp4")
r = StreamReader(src)
r.add_video_stream(10, decoder="h264_cuvid", hw_accel="cuda", filter_desc="scale_cuda=iw/2:ih/2")
num_frames = 0
for (chunk,) in r.stream():
self.assertEqual(chunk.device, torch.device("cuda:0"))
self.assertEqual(chunk.dtype, torch.uint8)
self.assertEqual(chunk.shape, torch.Size([10, 3, 135, 240]))
num_frames += chunk.size(0)
assert num_frames == 390
def test_scale_cuda_format(self):
"""yuv444p format conversion does not work (yet)"""
src = get_asset_path("nasa_13013.mp4")
r = StreamReader(src)
with self.assertRaises(RuntimeError):
r.add_video_stream(10, decoder="h264_cuvid", hw_accel="cuda", filter_desc="scale_cuda=format=yuv444p")
...@@ -165,7 +165,8 @@ void FilterGraph::add_process(const std::string& filter_description) { ...@@ -165,7 +165,8 @@ void FilterGraph::add_process(const std::string& filter_description) {
av_err2string(ret) + ".)"); av_err2string(ret) + ".)");
} }
void FilterGraph::create_filter() { void FilterGraph::create_filter(AVBufferRef* hw_frames_ctx) {
buffersrc_ctx->outputs[0]->hw_frames_ctx = hw_frames_ctx;
int ret = avfilter_graph_config(pFilterGraph, nullptr); int ret = avfilter_graph_config(pFilterGraph, nullptr);
TORCH_CHECK(ret >= 0, "Failed to configure the graph: " + av_err2string(ret)); TORCH_CHECK(ret >= 0, "Failed to configure the graph: " + av_err2string(ret));
// char* desc = avfilter_graph_dump(pFilterGraph, NULL); // char* desc = avfilter_graph_dump(pFilterGraph, NULL);
...@@ -196,6 +197,10 @@ FilterGraphOutputInfo FilterGraph::get_output_info() const { ...@@ -196,6 +197,10 @@ FilterGraphOutputInfo FilterGraph::get_output_info() const {
ret.num_channels = av_get_channel_layout_nb_channels(l->channel_layout); ret.num_channels = av_get_channel_layout_nb_channels(l->channel_layout);
#endif #endif
} else { } else {
if (l->format == AV_PIX_FMT_CUDA && l->hw_frames_ctx) {
auto frames_ctx = (AVHWFramesContext*)(l->hw_frames_ctx->data);
ret.format = frames_ctx->sw_format;
}
ret.frame_rate = l->frame_rate; ret.frame_rate = l->frame_rate;
ret.height = l->h; ret.height = l->h;
ret.width = l->w; ret.width = l->w;
......
...@@ -63,7 +63,7 @@ class FilterGraph { ...@@ -63,7 +63,7 @@ class FilterGraph {
void add_process(const std::string& filter_description); void add_process(const std::string& filter_description);
void create_filter(); void create_filter(AVBufferRef* hw_frames_ctx = nullptr);
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// Query methods // Query methods
......
#include <torchaudio/csrc/ffmpeg/hw_context.h>
#include <torchaudio/csrc/ffmpeg/stream_reader/buffer/chunked_buffer.h> #include <torchaudio/csrc/ffmpeg/stream_reader/buffer/chunked_buffer.h>
#include <torchaudio/csrc/ffmpeg/stream_reader/buffer/unchunked_buffer.h> #include <torchaudio/csrc/ffmpeg/stream_reader/buffer/unchunked_buffer.h>
#include <torchaudio/csrc/ffmpeg/stream_reader/sink.h> #include <torchaudio/csrc/ffmpeg/stream_reader/sink.h>
...@@ -47,14 +48,8 @@ std::unique_ptr<Buffer> get_buffer( ...@@ -47,14 +48,8 @@ std::unique_ptr<Buffer> get_buffer(
codec_ctx->channels); codec_ctx->channels);
} }
} else { } else {
// Note
// When using HW decoder, the pixel format is CUDA, and FilterGraph does
// not yet support CUDA frames, nor propagating the software pixel format,
// so here, we refer to AVCodecContext* to look at the pixel format.
AVPixelFormat fmt = (AVPixelFormat)(info.format); AVPixelFormat fmt = (AVPixelFormat)(info.format);
if (fmt == AV_PIX_FMT_CUDA) { TORCH_INTERNAL_ASSERT(fmt != AV_PIX_FMT_CUDA);
fmt = codec_ctx->sw_pix_fmt;
}
if (frames_per_chunk == -1) { if (frames_per_chunk == -1) {
return detail::get_unchunked_buffer(fmt, info.height, info.width, device); return detail::get_unchunked_buffer(fmt, info.height, info.width, device);
...@@ -77,7 +72,6 @@ FilterGraph get_filter_graph( ...@@ -77,7 +72,6 @@ FilterGraph get_filter_graph(
AVRational frame_rate, AVRational frame_rate,
const std::string& filter_description) { const std::string& filter_description) {
auto p = FilterGraph{codec_ctx->codec_type}; auto p = FilterGraph{codec_ctx->codec_type};
switch (codec_ctx->codec_type) { switch (codec_ctx->codec_type) {
case AVMEDIA_TYPE_AUDIO: case AVMEDIA_TYPE_AUDIO:
p.add_audio_src( p.add_audio_src(
...@@ -100,7 +94,11 @@ FilterGraph get_filter_graph( ...@@ -100,7 +94,11 @@ FilterGraph get_filter_graph(
} }
p.add_sink(); p.add_sink();
p.add_process(filter_description); p.add_process(filter_description);
p.create_filter(); if (codec_ctx->hw_frames_ctx) {
p.create_filter(av_buffer_ref(codec_ctx->hw_frames_ctx));
} else {
p.create_filter(nullptr);
}
return p; return p;
} }
......
...@@ -81,6 +81,27 @@ enum AVPixelFormat get_hw_format( ...@@ -81,6 +81,27 @@ enum AVPixelFormat get_hw_format(
return AV_PIX_FMT_NONE; return AV_PIX_FMT_NONE;
} }
AVBufferRef* get_hw_frames_ctx(AVCodecContext* codec_ctx) {
AVBufferRef* p = av_hwframe_ctx_alloc(codec_ctx->hw_device_ctx);
TORCH_CHECK(
p,
"Failed to allocate CUDA frame context from device context at ",
codec_ctx->hw_device_ctx);
auto frames_ctx = (AVHWFramesContext*)(p->data);
frames_ctx->format = codec_ctx->pix_fmt;
frames_ctx->sw_format = codec_ctx->sw_pix_fmt;
frames_ctx->width = codec_ctx->width;
frames_ctx->height = codec_ctx->height;
frames_ctx->initial_pool_size = 5;
int ret = av_hwframe_ctx_init(p);
if (ret >= 0) {
return p;
}
av_buffer_unref(&p);
TORCH_CHECK(
false, "Failed to initialize CUDA frame context: ", av_err2string(ret));
}
void configure_codec_context( void configure_codec_context(
AVCodecContext* codec_ctx, AVCodecContext* codec_ctx,
const AVCodecParameters* params, const AVCodecParameters* params,
...@@ -135,6 +156,9 @@ AVCodecContextPtr get_codec_ctx( ...@@ -135,6 +156,9 @@ AVCodecContextPtr get_codec_ctx(
alloc_codec_context(params->codec_id, decoder_name); alloc_codec_context(params->codec_id, decoder_name);
configure_codec_context(codec_ctx, params, device); configure_codec_context(codec_ctx, params, device);
open_codec(codec_ctx, decoder_option); open_codec(codec_ctx, decoder_option);
if (codec_ctx->hw_device_ctx) {
codec_ctx->hw_frames_ctx = av_buffer_ref(get_hw_frames_ctx(codec_ctx));
}
return codec_ctx; return codec_ctx;
} }
...@@ -160,6 +184,38 @@ KeyType StreamProcessor::add_stream( ...@@ -160,6 +184,38 @@ KeyType StreamProcessor::add_stream(
AVRational frame_rate, AVRational frame_rate,
const c10::optional<std::string>& filter_description, const c10::optional<std::string>& filter_description,
const torch::Device& device) { const torch::Device& device) {
// If device is provided, then check that codec_ctx has hw_device_ctx set.
// In case, defining an output stream with HW accel on an input stream that
// has decoder set without HW accel, it will cause seg fault.
// i.e.
// The following should be rejected here.
// reader = StreamReader(...)
// reader.add_video_stream(..., decoder="h264_cuvid")
// reader.add_video_stream(..., decoder="h264_cuvid", hw_accel="cuda")
// TODO:
// One idea to work around this is to always define HW device context, and
// if HW acceleration is not required, insert `hwdownload` filter.
// This way it will be possible to handle both cases at the same time.
switch (device.type()) {
case torch::kCPU:
TORCH_CHECK(
!codec_ctx->hw_device_ctx,
"Decoding without Hardware acceleration is requested, however, "
"the decoder has been already defined with a HW acceleration. "
"Decoding a stream with and without HW acceleration simultaneously "
"is not supported.");
break;
case torch::kCUDA:
TORCH_CHECK(
codec_ctx->hw_device_ctx,
"CUDA Hardware acceleration is requested, however, the decoder has "
"been already defined without a HW acceleration. "
"Decoding a stream with and without HW acceleration simultaneously "
"is not supported.");
break;
default:;
}
switch (codec_ctx->codec_type) { switch (codec_ctx->codec_type) {
case AVMEDIA_TYPE_AUDIO: case AVMEDIA_TYPE_AUDIO:
case AVMEDIA_TYPE_VIDEO: case AVMEDIA_TYPE_VIDEO:
......
...@@ -350,6 +350,15 @@ void StreamReader::add_stream( ...@@ -350,6 +350,15 @@ void StreamReader::add_stream(
processors[i] = std::make_unique<StreamProcessor>( processors[i] = std::make_unique<StreamProcessor>(
stream->time_base, stream->codecpar, decoder, decoder_option, device); stream->time_base, stream->codecpar, decoder, decoder_option, device);
processors[i]->set_discard_timestamp(seek_timestamp); processors[i]->set_discard_timestamp(seek_timestamp);
} else {
if (decoder) {
// TODO: Validate that the decoder is consistent as the one used to define
// previous output streams.
// i.e. the following is not permitted.
// reader.add_video_stream(..., decoder="h264")
// reader.add_video_stream(..., decoder="x264")
// reader.add_video_stream(..., decoder="h264_cuvid")
}
} }
stream->discard = AVDISCARD_DEFAULT; stream->discard = AVDISCARD_DEFAULT;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment