Commit 5ee34516 authored by moto's avatar moto Committed by Facebook GitHub Bot
Browse files

Refactor buffer helper functions (#2943)

Summary:
* move helper functions to `detail` namespace.
* move helper functions out of `buffer.h`

Pull Request resolved: https://github.com/pytorch/audio/pull/2943

Reviewed By: carolineechen

Differential Revision: D42271652

Pulled By: mthrok

fbshipit-source-id: abbfc8e8bac97d4eeb34221d4c20763477bd982e
parent 45651245
...@@ -133,7 +133,7 @@ if(USE_FFMPEG) ...@@ -133,7 +133,7 @@ if(USE_FFMPEG)
LIBTORCHAUDIO_FFMPEG_SOURCES LIBTORCHAUDIO_FFMPEG_SOURCES
ffmpeg/ffmpeg.cpp ffmpeg/ffmpeg.cpp
ffmpeg/filter_graph.cpp ffmpeg/filter_graph.cpp
ffmpeg/stream_reader/buffer.cpp ffmpeg/stream_reader/buffer/common.cpp
ffmpeg/stream_reader/buffer/chunked_buffer.cpp ffmpeg/stream_reader/buffer/chunked_buffer.cpp
ffmpeg/stream_reader/buffer/unchunked_buffer.cpp ffmpeg/stream_reader/buffer/unchunked_buffer.cpp
ffmpeg/stream_reader/decoder.cpp ffmpeg/stream_reader/decoder.cpp
......
#pragma once #pragma once
#include <torch/torch.h> #include <torch/torch.h>
#include <torchaudio/csrc/ffmpeg/ffmpeg.h> #include <torchaudio/csrc/ffmpeg/ffmpeg.h>
#include <deque>
namespace torchaudio { namespace torchaudio {
namespace ffmpeg { namespace ffmpeg {
...@@ -29,11 +28,5 @@ class Buffer { ...@@ -29,11 +28,5 @@ class Buffer {
virtual void flush() = 0; virtual void flush() = 0;
}; };
//////////////////////////////////////////////////////////////////////////////
// Helper functions
//////////////////////////////////////////////////////////////////////////////
torch::Tensor convert_audio_tensor(AVFrame* frame);
torch::Tensor convert_image_tensor(AVFrame* frame, const torch::Device& device);
} // namespace ffmpeg } // namespace ffmpeg
} // namespace torchaudio } // namespace torchaudio
#include <torchaudio/csrc/ffmpeg/stream_reader/buffer/chunked_buffer.h> #include <torchaudio/csrc/ffmpeg/stream_reader/buffer/chunked_buffer.h>
#include <torchaudio/csrc/ffmpeg/stream_reader/buffer/common.h>
namespace torchaudio { namespace torchaudio {
namespace ffmpeg { namespace ffmpeg {
...@@ -68,7 +69,7 @@ void ChunkedAudioBuffer::push_tensor(torch::Tensor frame) { ...@@ -68,7 +69,7 @@ void ChunkedAudioBuffer::push_tensor(torch::Tensor frame) {
} }
void ChunkedAudioBuffer::push_frame(AVFrame* frame) { void ChunkedAudioBuffer::push_frame(AVFrame* frame) {
push_tensor(convert_audio_tensor(frame)); push_tensor(detail::convert_audio(frame));
} }
void ChunkedVideoBuffer::push_tensor(const torch::Tensor& frame) { void ChunkedVideoBuffer::push_tensor(const torch::Tensor& frame) {
...@@ -90,7 +91,7 @@ void ChunkedVideoBuffer::push_tensor(const torch::Tensor& frame) { ...@@ -90,7 +91,7 @@ void ChunkedVideoBuffer::push_tensor(const torch::Tensor& frame) {
} }
void ChunkedVideoBuffer::push_frame(AVFrame* frame) { void ChunkedVideoBuffer::push_frame(AVFrame* frame) {
push_tensor(convert_image_tensor(frame, device)); push_tensor(detail::convert_image(frame, device));
} }
c10::optional<torch::Tensor> ChunkedAudioBuffer::pop_chunk() { c10::optional<torch::Tensor> ChunkedAudioBuffer::pop_chunk() {
......
#include <torchaudio/csrc/ffmpeg/stream_reader/buffer.h> #include <torchaudio/csrc/ffmpeg/stream_reader/buffer/common.h>
#include <stdexcept> #include <stdexcept>
#include <vector> #include <vector>
...@@ -8,11 +8,9 @@ ...@@ -8,11 +8,9 @@
namespace torchaudio { namespace torchaudio {
namespace ffmpeg { namespace ffmpeg {
namespace detail {
////////////////////////////////////////////////////////////////////////////// torch::Tensor convert_audio(AVFrame* pFrame) {
// Helper functions - audio
//////////////////////////////////////////////////////////////////////////////
torch::Tensor convert_audio_tensor(AVFrame* pFrame) {
// ref: https://ffmpeg.org/doxygen/4.1/filter__audio_8c_source.html#l00215 // ref: https://ffmpeg.org/doxygen/4.1/filter__audio_8c_source.html#l00215
AVSampleFormat format = static_cast<AVSampleFormat>(pFrame->format); AVSampleFormat format = static_cast<AVSampleFormat>(pFrame->format);
int num_channels = pFrame->channels; int num_channels = pFrame->channels;
...@@ -33,7 +31,7 @@ torch::Tensor convert_audio_tensor(AVFrame* pFrame) { ...@@ -33,7 +31,7 @@ torch::Tensor convert_audio_tensor(AVFrame* pFrame) {
: std::vector<int64_t>{num_frames, num_channels}; : std::vector<int64_t>{num_frames, num_channels};
torch::Tensor t; torch::Tensor t;
uint8_t* ptr = NULL; uint8_t* ptr = nullptr;
switch (format) { switch (format) {
case AV_SAMPLE_FMT_U8: case AV_SAMPLE_FMT_U8:
case AV_SAMPLE_FMT_U8P: { case AV_SAMPLE_FMT_U8P: {
...@@ -81,15 +79,12 @@ torch::Tensor convert_audio_tensor(AVFrame* pFrame) { ...@@ -81,15 +79,12 @@ torch::Tensor convert_audio_tensor(AVFrame* pFrame) {
memcpy(ptr, pFrame->extended_data[i], plane_size); memcpy(ptr, pFrame->extended_data[i], plane_size);
ptr += plane_size; ptr += plane_size;
} }
if (is_planar) if (is_planar) {
t = t.t(); t = t.t();
}
return t; return t;
} }
//////////////////////////////////////////////////////////////////////////////
// Helper functions - video
//////////////////////////////////////////////////////////////////////////////
namespace {
torch::Tensor get_interlaced_image_buffer(AVFrame* pFrame) { torch::Tensor get_interlaced_image_buffer(AVFrame* pFrame) {
int width = pFrame->width; int width = pFrame->width;
int height = pFrame->height; int height = pFrame->height;
...@@ -116,12 +111,6 @@ void write_interlaced_image(AVFrame* pFrame, torch::Tensor& frame) { ...@@ -116,12 +111,6 @@ void write_interlaced_image(AVFrame* pFrame, torch::Tensor& frame) {
} }
} }
torch::Tensor convert_interlaced_video(AVFrame* pFrame) {
torch::Tensor frame = get_interlaced_image_buffer(pFrame);
write_interlaced_image(pFrame, frame);
return frame.permute({0, 3, 1, 2});
}
torch::Tensor get_planar_image_buffer(AVFrame* pFrame) { torch::Tensor get_planar_image_buffer(AVFrame* pFrame) {
int width = pFrame->width; int width = pFrame->width;
int height = pFrame->height; int height = pFrame->height;
...@@ -152,6 +141,14 @@ void write_planar_image(AVFrame* pFrame, torch::Tensor& frame) { ...@@ -152,6 +141,14 @@ void write_planar_image(AVFrame* pFrame, torch::Tensor& frame) {
} }
} }
namespace {
torch::Tensor convert_interlaced_video(AVFrame* pFrame) {
torch::Tensor frame = get_interlaced_image_buffer(pFrame);
write_interlaced_image(pFrame, frame);
return frame.permute({0, 3, 1, 2});
}
torch::Tensor convert_planar_video(AVFrame* pFrame) { torch::Tensor convert_planar_video(AVFrame* pFrame) {
torch::Tensor frame = get_planar_image_buffer(pFrame); torch::Tensor frame = get_planar_image_buffer(pFrame);
write_planar_image(pFrame, frame); write_planar_image(pFrame, frame);
...@@ -297,9 +294,7 @@ torch::Tensor convert_nv12_cuda(AVFrame* pFrame, const torch::Device& device) { ...@@ -297,9 +294,7 @@ torch::Tensor convert_nv12_cuda(AVFrame* pFrame, const torch::Device& device) {
#endif #endif
} // namespace } // namespace
torch::Tensor convert_image_tensor( torch::Tensor convert_image(AVFrame* pFrame, const torch::Device& device) {
AVFrame* pFrame,
const torch::Device& device) {
// ref: // ref:
// https://ffmpeg.org/doxygen/4.1/filtering__video_8c_source.html#l00179 // https://ffmpeg.org/doxygen/4.1/filtering__video_8c_source.html#l00179
// https://ffmpeg.org/doxygen/4.1/decode__video_8c_source.html#l00038 // https://ffmpeg.org/doxygen/4.1/decode__video_8c_source.html#l00038
...@@ -352,5 +347,6 @@ torch::Tensor convert_image_tensor( ...@@ -352,5 +347,6 @@ torch::Tensor convert_image_tensor(
} }
} }
} // namespace detail
} // namespace ffmpeg } // namespace ffmpeg
} // namespace torchaudio } // namespace torchaudio
#pragma once
#include <torch/torch.h>
#include <torchaudio/csrc/ffmpeg/ffmpeg.h>
namespace torchaudio {
namespace ffmpeg {
namespace detail {
//////////////////////////////////////////////////////////////////////////////
// Helper functions
//////////////////////////////////////////////////////////////////////////////
torch::Tensor convert_audio(AVFrame* frame);
torch::Tensor get_interlaced_image_buffer(AVFrame* pFrame);
torch::Tensor get_planar_image_buffer(AVFrame* pFrame);
void write_interlaced_image(AVFrame* pFrame, torch::Tensor& frame);
void write_planar_image(AVFrame* pFrame, torch::Tensor& frame);
torch::Tensor convert_image(AVFrame* frame, const torch::Device& device);
} // namespace detail
} // namespace ffmpeg
} // namespace torchaudio
#include <torchaudio/csrc/ffmpeg/stream_reader/buffer/common.h>
#include <torchaudio/csrc/ffmpeg/stream_reader/buffer/unchunked_buffer.h> #include <torchaudio/csrc/ffmpeg/stream_reader/buffer/unchunked_buffer.h>
namespace torchaudio { namespace torchaudio {
...@@ -18,11 +19,11 @@ void UnchunkedBuffer::push_tensor(const torch::Tensor& t) { ...@@ -18,11 +19,11 @@ void UnchunkedBuffer::push_tensor(const torch::Tensor& t) {
} }
void UnchunkedAudioBuffer::push_frame(AVFrame* frame) { void UnchunkedAudioBuffer::push_frame(AVFrame* frame) {
push_tensor(convert_audio_tensor(frame)); push_tensor(detail::convert_audio(frame));
} }
void UnchunkedVideoBuffer::push_frame(AVFrame* frame) { void UnchunkedVideoBuffer::push_frame(AVFrame* frame) {
push_tensor(convert_image_tensor(frame, device)); push_tensor(detail::convert_image(frame, device));
} }
c10::optional<torch::Tensor> UnchunkedBuffer::pop_chunk() { c10::optional<torch::Tensor> UnchunkedBuffer::pop_chunk() {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment