"git@developer.sourcefind.cn:OpenDAS/fairscale.git" did not exist on "a8dd9254a756ebbe70fedb0f2b8c0eed900d825b"
Commit 7972be99 authored by moto's avatar moto Committed by Facebook GitHub Bot
Browse files

Support NV12 format in video decoding (#2330)

Summary:
Support NV12 format in Streamer API.

NV12 is a biplanar format with a full sized Y plane followed by a single chroma plane with weaved U and V values.
https://chromium.googlesource.com/libyuv/libyuv/+/HEAD/docs/formats.md#nv12-and-nv21

The original UV plane is smaller than Y plane, so in this implmentation,
UV plane is upsampled to match the size of Y plane.

Pull Request resolved: https://github.com/pytorch/audio/pull/2330

Reviewed By: hwangjeff

Differential Revision: D35632351

Pulled By: mthrok

fbshipit-source-id: aab4fbc0ce2bb7a1fb67264c27208b610fb56e27
parent 2f70e2f9
...@@ -2,6 +2,10 @@ ...@@ -2,6 +2,10 @@
#include <stdexcept> #include <stdexcept>
#include <vector> #include <vector>
#ifdef USE_CUDA
#include <c10/cuda/CUDAStream.h>
#endif
namespace torchaudio { namespace torchaudio {
namespace ffmpeg { namespace ffmpeg {
...@@ -213,6 +217,94 @@ torch::Tensor convert_yuv420p(AVFrame* pFrame) { ...@@ -213,6 +217,94 @@ torch::Tensor convert_yuv420p(AVFrame* pFrame) {
return t.permute({0, 3, 1, 2}); // NCHW return t.permute({0, 3, 1, 2}); // NCHW
} }
torch::Tensor convert_nv12_cpu(AVFrame* pFrame) {
int width = pFrame->width;
int height = pFrame->height;
auto options = torch::TensorOptions()
.dtype(torch::kUInt8)
.layout(torch::kStrided)
.device(torch::kCPU);
torch::Tensor y = torch::empty({1, height, width, 1}, options);
{
uint8_t* tgt = y.data_ptr<uint8_t>();
uint8_t* src = pFrame->data[0];
int linesize = pFrame->linesize[0];
for (int h = 0; h < height; ++h) {
memcpy(tgt, src, width);
tgt += width;
src += linesize;
}
}
torch::Tensor uv = torch::empty({1, height / 2, width / 2, 2}, options);
{
uint8_t* tgt = uv.data_ptr<uint8_t>();
uint8_t* src = pFrame->data[1];
int linesize = pFrame->linesize[1];
for (int h = 0; h < height / 2; ++h) {
memcpy(tgt, src, width);
tgt += width;
src += linesize;
}
}
// Upsample width and height
uv = uv.repeat_interleave(2, -2).repeat_interleave(2, -3);
torch::Tensor t = torch::cat({y, uv}, -1);
return t.permute({0, 3, 1, 2}); // NCHW
}
#ifdef USE_CUDA
torch::Tensor convert_nv12_cuda(AVFrame* pFrame) {
int width = pFrame->width;
int height = pFrame->height;
auto options = torch::TensorOptions()
.dtype(torch::kUInt8)
.layout(torch::kStrided)
.device(torch::kCUDA);
torch::Tensor y = torch::empty({1, height, width, 1}, options);
{
uint8_t* tgt = y.data_ptr<uint8_t>();
CUdeviceptr src = (CUdeviceptr)pFrame->data[0];
int linesize = pFrame->linesize[0];
if (cudaSuccess !=
cudaMemcpy2D(
(void*)tgt,
width,
(const void*)src,
linesize,
width,
height,
cudaMemcpyDeviceToDevice)) {
throw std::runtime_error("Failed to copy Y plane to Cuda tensor.");
}
}
torch::Tensor uv = torch::empty({1, height / 2, width / 2, 2}, options);
{
uint8_t* tgt = uv.data_ptr<uint8_t>();
CUdeviceptr src = (CUdeviceptr)pFrame->data[1];
int linesize = pFrame->linesize[1];
if (cudaSuccess !=
cudaMemcpy2D(
(void*)tgt,
width,
(const void*)src,
linesize,
width,
height / 2,
cudaMemcpyDeviceToDevice)) {
throw std::runtime_error("Failed to copy UV plane to Cuda tensor.");
}
}
// Upsample width and height
uv = uv.repeat_interleave(2, -2).repeat_interleave(2, -3);
torch::Tensor t = torch::cat({y, uv}, -1);
return t.permute({0, 3, 1, 2}); // NCHW
}
#endif
torch::Tensor convert_image_tensor(AVFrame* pFrame) { torch::Tensor convert_image_tensor(AVFrame* pFrame) {
// ref: // ref:
// https://ffmpeg.org/doxygen/4.1/filtering__video_8c_source.html#l00179 // https://ffmpeg.org/doxygen/4.1/filtering__video_8c_source.html#l00179
...@@ -240,9 +332,35 @@ torch::Tensor convert_image_tensor(AVFrame* pFrame) { ...@@ -240,9 +332,35 @@ torch::Tensor convert_image_tensor(AVFrame* pFrame) {
break; break;
case AV_PIX_FMT_YUV420P: case AV_PIX_FMT_YUV420P:
return convert_yuv420p(pFrame); return convert_yuv420p(pFrame);
case AV_PIX_FMT_NV12:
return convert_nv12_cpu(pFrame);
#ifdef USE_CUDA
case AV_PIX_FMT_CUDA: {
AVHWFramesContext* hwctx =
(AVHWFramesContext*)pFrame->hw_frames_ctx->data;
AVPixelFormat sw_format = hwctx->sw_format;
// cuvid decoder (nvdec frontend of ffmpeg) only supports the following
// output formats
// https://github.com/FFmpeg/FFmpeg/blob/072101bd52f7f092ee976f4e6e41c19812ad32fd/libavcodec/cuviddec.c#L1121-L1124
switch (sw_format) {
case AV_PIX_FMT_NV12:
return convert_nv12_cuda(pFrame);
case AV_PIX_FMT_P010:
case AV_PIX_FMT_P016:
throw std::runtime_error(
"Unsupported video format found in CUDA HW: " +
std::string(av_get_pix_fmt_name(sw_format)));
default:
throw std::runtime_error(
"Unexpected video format found in CUDA HW: " +
std::string(av_get_pix_fmt_name(sw_format)));
}
}
#endif
default: default:
throw std::runtime_error( throw std::runtime_error(
"Unexpected format: " + std::string(av_get_pix_fmt_name(format))); "Unexpected video format: " +
std::string(av_get_pix_fmt_name(format)));
} }
torch::Tensor t; torch::Tensor t;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment