Commit c12f4734 authored by moto's avatar moto Committed by Facebook GitHub Bot
Browse files

Add 420p10le CPU support to StreamReader (#3332)

Summary:
This commit add support to decode YUV420P010LE format.

The image tensor returned by this format
- NCHW format (C == 3)
- int16 type
- value range [0, 2^10).

Note that the value range is different from what "hevc_cuvid" decoder
returns. "hevc_cuvid" decoder uses full range of int16 (internally,
it's uint16) to express the color (with some intervals), but the values
returned by CPU "hevc" decoder are with in [0, 2^10).

Address https://github.com/pytorch/audio/issues/3331

Pull Request resolved: https://github.com/pytorch/audio/pull/3332

Reviewed By: hwangjeff

Differential Revision: D45925097

Pulled By: mthrok

fbshipit-source-id: 4e669b65c030f388bba2fdbb8f00faf7e2981508
parent d38a7854
......@@ -1054,6 +1054,7 @@ class StreamReaderImageTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestCase)
rgb16 = ((rgb.to(torch.int32) - 128) << 8).to(torch.int16)
yuv = rgb_to_yuv_ccir(rgb)
yuv16 = yuv.to(torch.int16) * 4
bgr = rgb[:, [2, 1, 0], :, :]
gray = rgb_to_gray(rgb)
argb = torch.cat([alpha, rgb], dim=1)
......@@ -1073,6 +1074,7 @@ class StreamReaderImageTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestCase)
s.add_basic_video_stream(frames_per_chunk=-1, format="rgba")
s.add_basic_video_stream(frames_per_chunk=-1, format="abgr")
s.add_basic_video_stream(frames_per_chunk=-1, format="bgra")
s.add_basic_video_stream(frames_per_chunk=-1, format="yuv420p10le")
s.process_all_packets()
chunks = s.pop_chunks()
self.assertEqual(chunks[0], yuv, atol=1, rtol=0)
......@@ -1086,6 +1088,7 @@ class StreamReaderImageTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestCase)
self.assertEqual(chunks[8], rgba, atol=0, rtol=0)
self.assertEqual(chunks[9], abgr, atol=0, rtol=0)
self.assertEqual(chunks[10], bgra, atol=0, rtol=0)
self.assertEqual(chunks[11], yuv16, atol=4, rtol=0)
@skipIfNoHWAccel("h264_cuvid")
......
......@@ -269,6 +269,78 @@ torch::Tensor YUV420PConverter::convert(const AVFrame* src) {
return buffer;
}
////////////////////////////////////////////////////////////////////////////////
// YUV420P10LE
////////////////////////////////////////////////////////////////////////////////
YUV420P10LEConverter::YUV420P10LEConverter(int h, int w)
: ImageConverterBase(h, w, 3) {
TORCH_WARN_ONCE(
"The output format YUV420PLE is selected. "
"This will be implicitly converted to YUV444P (16-bit), "
"in which all the color components Y, U, V have the same dimension.");
}
void YUV420P10LEConverter::convert(const AVFrame* src, torch::Tensor& dst) {
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(src);
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
(AVPixelFormat)(src->format) == AV_PIX_FMT_YUV420P10LE);
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(src->height == height);
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(src->width == width);
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(dst.size(1) == 3);
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(dst.size(2) == height);
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(dst.size(3) == width);
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(dst.dtype() == torch::kInt16);
// Write Y plane directly
{
int16_t* p_dst = dst.data_ptr<int16_t>();
uint8_t* p_src = src->data[0];
for (int h = 0; h < height; ++h) {
memcpy(p_dst, p_src, (size_t)width * 2);
p_dst += width;
p_src += src->linesize[0];
}
}
// Chroma (U and V planes) are subsamapled by 2 in both vertical and
// holizontal directions.
// https://en.wikipedia.org/wiki/Chroma_subsampling
// Since we are returning data in Tensor, which has the same size for all
// color planes, we need to upsample the UV planes. PyTorch has interpolate
// function but it does not work for int16 type. So we manually copy them.
//
// block1 block2 block3 block4
// ab -> aabb = a b * a b * *
// cd aabb a b a b
// ccdd c d c d
// ccdd c d c d
//
auto block00 = dst.slice(2, 0, {}, 2).slice(3, 0, {}, 2);
auto block01 = dst.slice(2, 0, {}, 2).slice(3, 1, {}, 2);
auto block10 = dst.slice(2, 1, {}, 2).slice(3, 0, {}, 2);
auto block11 = dst.slice(2, 1, {}, 2).slice(3, 1, {}, 2);
for (int i = 1; i < 3; ++i) {
// borrow data
auto tmp = torch::from_blob(
src->data[i],
{height / 2, width / 2},
{src->linesize[i] / 2, 1},
[](void*) {},
torch::TensorOptions().dtype(torch::kInt16).layout(torch::kStrided));
// Copy to each block
block00.slice(1, i, i + 1).copy_(tmp);
block01.slice(1, i, i + 1).copy_(tmp);
block10.slice(1, i, i + 1).copy_(tmp);
block11.slice(1, i, i + 1).copy_(tmp);
}
}
torch::Tensor YUV420P10LEConverter::convert(const AVFrame* src) {
torch::Tensor buffer =
get_image_buffer({1, num_channels, height, width}, torch::kInt16);
convert(src, buffer);
return buffer;
}
////////////////////////////////////////////////////////////////////////////////
// NV12
////////////////////////////////////////////////////////////////////////////////
......
......@@ -73,6 +73,13 @@ class YUV420PConverter : public ImageConverterBase {
torch::Tensor convert(const AVFrame* src);
};
class YUV420P10LEConverter : public ImageConverterBase {
public:
YUV420P10LEConverter(int height, int width);
void convert(const AVFrame* src, torch::Tensor& dst);
torch::Tensor convert(const AVFrame* src);
};
class NV12Converter : public ImageConverterBase {
torch::Tensor tmp_uv;
......
......@@ -363,6 +363,11 @@ std::unique_ptr<IPostDecodeProcess> get_unchunked_video_process(
return std::make_unique<ProcessImpl<C, B>>(
std::move(filter), C{h, w}, B{tb});
}
case AV_PIX_FMT_YUV420P10LE: {
using C = YUV420P10LEConverter;
return std::make_unique<ProcessImpl<C, B>>(
std::move(filter), C{h, w}, B{tb});
}
case AV_PIX_FMT_NV12: {
using C = NV12Converter;
return std::make_unique<ProcessImpl<C, B>>(
......@@ -474,6 +479,11 @@ std::unique_ptr<IPostDecodeProcess> get_chunked_video_process(
return std::make_unique<ProcessImpl<C, B>>(
std::move(filter), C{h, w}, B{tb, frames_per_chunk, num_chunks});
}
case AV_PIX_FMT_YUV420P10LE: {
using C = YUV420P10LEConverter;
return std::make_unique<ProcessImpl<C, B>>(
std::move(filter), C{h, w}, B{tb, frames_per_chunk, num_chunks});
}
case AV_PIX_FMT_NV12: {
using C = NV12Converter;
return std::make_unique<ProcessImpl<C, B>>(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment