Commit 5ee34516 authored by moto's avatar moto Committed by Facebook GitHub Bot
Browse files

Refactor buffer helper functions (#2943)

Summary:
* move helper functions to `detail` namespace.
* move helper functions out of `buffer.h`

Pull Request resolved: https://github.com/pytorch/audio/pull/2943

Reviewed By: carolineechen

Differential Revision: D42271652

Pulled By: mthrok

fbshipit-source-id: abbfc8e8bac97d4eeb34221d4c20763477bd982e
parent 45651245
......@@ -133,7 +133,7 @@ if(USE_FFMPEG)
LIBTORCHAUDIO_FFMPEG_SOURCES
ffmpeg/ffmpeg.cpp
ffmpeg/filter_graph.cpp
ffmpeg/stream_reader/buffer.cpp
ffmpeg/stream_reader/buffer/common.cpp
ffmpeg/stream_reader/buffer/chunked_buffer.cpp
ffmpeg/stream_reader/buffer/unchunked_buffer.cpp
ffmpeg/stream_reader/decoder.cpp
......
#pragma once
#include <torch/torch.h>
#include <torchaudio/csrc/ffmpeg/ffmpeg.h>
#include <deque>
namespace torchaudio {
namespace ffmpeg {
......@@ -29,11 +28,5 @@ class Buffer {
virtual void flush() = 0;
};
//////////////////////////////////////////////////////////////////////////////
// Helper functions
//////////////////////////////////////////////////////////////////////////////
torch::Tensor convert_audio_tensor(AVFrame* frame);
torch::Tensor convert_image_tensor(AVFrame* frame, const torch::Device& device);
} // namespace ffmpeg
} // namespace torchaudio
#include <torchaudio/csrc/ffmpeg/stream_reader/buffer/chunked_buffer.h>
#include <torchaudio/csrc/ffmpeg/stream_reader/buffer/common.h>
namespace torchaudio {
namespace ffmpeg {
......@@ -68,7 +69,7 @@ void ChunkedAudioBuffer::push_tensor(torch::Tensor frame) {
}
void ChunkedAudioBuffer::push_frame(AVFrame* frame) {
push_tensor(convert_audio_tensor(frame));
push_tensor(detail::convert_audio(frame));
}
void ChunkedVideoBuffer::push_tensor(const torch::Tensor& frame) {
......@@ -90,7 +91,7 @@ void ChunkedVideoBuffer::push_tensor(const torch::Tensor& frame) {
}
void ChunkedVideoBuffer::push_frame(AVFrame* frame) {
push_tensor(convert_image_tensor(frame, device));
push_tensor(detail::convert_image(frame, device));
}
c10::optional<torch::Tensor> ChunkedAudioBuffer::pop_chunk() {
......
#include <torchaudio/csrc/ffmpeg/stream_reader/buffer.h>
#include <torchaudio/csrc/ffmpeg/stream_reader/buffer/common.h>
#include <stdexcept>
#include <vector>
......@@ -8,11 +8,9 @@
namespace torchaudio {
namespace ffmpeg {
namespace detail {
//////////////////////////////////////////////////////////////////////////////
// Helper functions - audio
//////////////////////////////////////////////////////////////////////////////
torch::Tensor convert_audio_tensor(AVFrame* pFrame) {
torch::Tensor convert_audio(AVFrame* pFrame) {
// ref: https://ffmpeg.org/doxygen/4.1/filter__audio_8c_source.html#l00215
AVSampleFormat format = static_cast<AVSampleFormat>(pFrame->format);
int num_channels = pFrame->channels;
......@@ -33,7 +31,7 @@ torch::Tensor convert_audio_tensor(AVFrame* pFrame) {
: std::vector<int64_t>{num_frames, num_channels};
torch::Tensor t;
uint8_t* ptr = NULL;
uint8_t* ptr = nullptr;
switch (format) {
case AV_SAMPLE_FMT_U8:
case AV_SAMPLE_FMT_U8P: {
......@@ -81,15 +79,12 @@ torch::Tensor convert_audio_tensor(AVFrame* pFrame) {
memcpy(ptr, pFrame->extended_data[i], plane_size);
ptr += plane_size;
}
if (is_planar)
if (is_planar) {
t = t.t();
}
return t;
}
//////////////////////////////////////////////////////////////////////////////
// Helper functions - video
//////////////////////////////////////////////////////////////////////////////
namespace {
torch::Tensor get_interlaced_image_buffer(AVFrame* pFrame) {
int width = pFrame->width;
int height = pFrame->height;
......@@ -116,12 +111,6 @@ void write_interlaced_image(AVFrame* pFrame, torch::Tensor& frame) {
}
}
torch::Tensor convert_interlaced_video(AVFrame* pFrame) {
torch::Tensor frame = get_interlaced_image_buffer(pFrame);
write_interlaced_image(pFrame, frame);
return frame.permute({0, 3, 1, 2});
}
torch::Tensor get_planar_image_buffer(AVFrame* pFrame) {
int width = pFrame->width;
int height = pFrame->height;
......@@ -152,6 +141,14 @@ void write_planar_image(AVFrame* pFrame, torch::Tensor& frame) {
}
}
namespace {
torch::Tensor convert_interlaced_video(AVFrame* pFrame) {
torch::Tensor frame = get_interlaced_image_buffer(pFrame);
write_interlaced_image(pFrame, frame);
return frame.permute({0, 3, 1, 2});
}
torch::Tensor convert_planar_video(AVFrame* pFrame) {
torch::Tensor frame = get_planar_image_buffer(pFrame);
write_planar_image(pFrame, frame);
......@@ -297,9 +294,7 @@ torch::Tensor convert_nv12_cuda(AVFrame* pFrame, const torch::Device& device) {
#endif
} // namespace
torch::Tensor convert_image_tensor(
AVFrame* pFrame,
const torch::Device& device) {
torch::Tensor convert_image(AVFrame* pFrame, const torch::Device& device) {
// ref:
// https://ffmpeg.org/doxygen/4.1/filtering__video_8c_source.html#l00179
// https://ffmpeg.org/doxygen/4.1/decode__video_8c_source.html#l00038
......@@ -352,5 +347,6 @@ torch::Tensor convert_image_tensor(
}
}
} // namespace detail
} // namespace ffmpeg
} // namespace torchaudio
#pragma once
#include <torch/torch.h>
#include <torchaudio/csrc/ffmpeg/ffmpeg.h>
namespace torchaudio {
namespace ffmpeg {
namespace detail {
//////////////////////////////////////////////////////////////////////////////
// Helper functions
//////////////////////////////////////////////////////////////////////////////
torch::Tensor convert_audio(AVFrame* frame);
torch::Tensor get_interlaced_image_buffer(AVFrame* pFrame);
torch::Tensor get_planar_image_buffer(AVFrame* pFrame);
void write_interlaced_image(AVFrame* pFrame, torch::Tensor& frame);
void write_planar_image(AVFrame* pFrame, torch::Tensor& frame);
torch::Tensor convert_image(AVFrame* frame, const torch::Device& device);
} // namespace detail
} // namespace ffmpeg
} // namespace torchaudio
#include <torchaudio/csrc/ffmpeg/stream_reader/buffer/common.h>
#include <torchaudio/csrc/ffmpeg/stream_reader/buffer/unchunked_buffer.h>
namespace torchaudio {
......@@ -18,11 +19,11 @@ void UnchunkedBuffer::push_tensor(const torch::Tensor& t) {
}
void UnchunkedAudioBuffer::push_frame(AVFrame* frame) {
push_tensor(convert_audio_tensor(frame));
push_tensor(detail::convert_audio(frame));
}
void UnchunkedVideoBuffer::push_frame(AVFrame* frame) {
push_tensor(convert_image_tensor(frame, device));
push_tensor(detail::convert_image(frame, device));
}
c10::optional<torch::Tensor> UnchunkedBuffer::pop_chunk() {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment