"git@developer.sourcefind.cn:OpenDAS/lmdeploy.git" did not exist on "9484fd1c7db21381124dfd3581fd7f738d5f8e9c"
Commit bb77cbeb authored by moto's avatar moto Committed by Facebook GitHub Bot
Browse files

Update source info (#2418)

Summary:
Add num_frames and bits_per_sample to match with the current
`torchaudio.info` capability.

Pull Request resolved: https://github.com/pytorch/audio/pull/2418

Reviewed By: carolineechen

Differential Revision: D36749077

Pulled By: mthrok

fbshipit-source-id: 7b368ee993cf5ed63ff2f53c9e3b1f50fcce7713
parent fd7ace17
...@@ -96,6 +96,8 @@ class StreamReaderInterfaceTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestC ...@@ -96,6 +96,8 @@ class StreamReaderInterfaceTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestC
codec_long_name="H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10", codec_long_name="H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10",
format="yuv420p", format="yuv420p",
bit_rate=71925, bit_rate=71925,
num_frames=325,
bits_per_sample=8,
width=320, width=320,
height=180, height=180,
frame_rate=25.0, frame_rate=25.0,
...@@ -106,6 +108,8 @@ class StreamReaderInterfaceTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestC ...@@ -106,6 +108,8 @@ class StreamReaderInterfaceTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestC
codec_long_name="AAC (Advanced Audio Coding)", codec_long_name="AAC (Advanced Audio Coding)",
format="fltp", format="fltp",
bit_rate=72093, bit_rate=72093,
num_frames=103,
bits_per_sample=0,
sample_rate=8000.0, sample_rate=8000.0,
num_channels=2, num_channels=2,
), ),
...@@ -115,6 +119,8 @@ class StreamReaderInterfaceTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestC ...@@ -115,6 +119,8 @@ class StreamReaderInterfaceTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestC
codec_long_name="MOV text", codec_long_name="MOV text",
format=None, format=None,
bit_rate=None, bit_rate=None,
num_frames=None,
bits_per_sample=None,
), ),
StreamReaderSourceVideoStream( StreamReaderSourceVideoStream(
media_type="video", media_type="video",
...@@ -122,6 +128,8 @@ class StreamReaderInterfaceTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestC ...@@ -122,6 +128,8 @@ class StreamReaderInterfaceTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestC
codec_long_name="H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10", codec_long_name="H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10",
format="yuv420p", format="yuv420p",
bit_rate=128783, bit_rate=128783,
num_frames=390,
bits_per_sample=8,
width=480, width=480,
height=270, height=270,
frame_rate=29.97002997002997, frame_rate=29.97002997002997,
...@@ -132,6 +140,8 @@ class StreamReaderInterfaceTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestC ...@@ -132,6 +140,8 @@ class StreamReaderInterfaceTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestC
codec_long_name="AAC (Advanced Audio Coding)", codec_long_name="AAC (Advanced Audio Coding)",
format="fltp", format="fltp",
bit_rate=128837, bit_rate=128837,
num_frames=205,
bits_per_sample=0,
sample_rate=16000.0, sample_rate=16000.0,
num_channels=2, num_channels=2,
), ),
...@@ -141,6 +151,8 @@ class StreamReaderInterfaceTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestC ...@@ -141,6 +151,8 @@ class StreamReaderInterfaceTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestC
codec_long_name="MOV text", codec_long_name="MOV text",
format=None, format=None,
bit_rate=None, bit_rate=None,
num_frames=None,
bits_per_sample=None,
), ),
] ]
output = [s.get_src_stream_info(i) for i in range(6)] output = [s.get_src_stream_info(i) for i in range(6)]
......
...@@ -79,6 +79,8 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const { ...@@ -79,6 +79,8 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const {
SrcStreamInfo ret; SrcStreamInfo ret;
ret.media_type = codecpar->codec_type; ret.media_type = codecpar->codec_type;
ret.bit_rate = codecpar->bit_rate; ret.bit_rate = codecpar->bit_rate;
ret.num_frames = stream->nb_frames;
ret.bits_per_sample = codecpar->bits_per_raw_sample;
const AVCodecDescriptor* desc = avcodec_descriptor_get(codecpar->codec_id); const AVCodecDescriptor* desc = avcodec_descriptor_get(codecpar->codec_id);
if (desc) { if (desc) {
ret.codec_name = desc->name; ret.codec_name = desc->name;
......
...@@ -11,6 +11,8 @@ SrcInfo convert(SrcStreamInfo ssi) { ...@@ -11,6 +11,8 @@ SrcInfo convert(SrcStreamInfo ssi) {
ssi.codec_long_name, ssi.codec_long_name,
ssi.fmt_name, ssi.fmt_name,
ssi.bit_rate, ssi.bit_rate,
ssi.num_frames,
ssi.bits_per_sample,
ssi.sample_rate, ssi.sample_rate,
ssi.num_channels, ssi.num_channels,
ssi.width, ssi.width,
......
...@@ -11,6 +11,8 @@ using SrcInfo = std::tuple< ...@@ -11,6 +11,8 @@ using SrcInfo = std::tuple<
std::string, // codec long name std::string, // codec long name
std::string, // format name std::string, // format name
int64_t, // bit_rate int64_t, // bit_rate
int64_t, // num_frames
int64_t, // bits_per_sample
// Audio // Audio
double, // sample_rate double, // sample_rate
int64_t, // num_channels int64_t, // num_channels
......
...@@ -12,6 +12,8 @@ struct SrcStreamInfo { ...@@ -12,6 +12,8 @@ struct SrcStreamInfo {
const char* codec_long_name = "N/A"; const char* codec_long_name = "N/A";
const char* fmt_name = "N/A"; const char* fmt_name = "N/A";
int64_t bit_rate = 0; int64_t bit_rate = 0;
int64_t num_frames = 0;
int bits_per_sample = 0;
// Audio // Audio
double sample_rate = 0; double sample_rate = 0;
int num_channels = 0; int num_channels = 0;
......
...@@ -55,6 +55,12 @@ class StreamReaderSourceStream: ...@@ -55,6 +55,12 @@ class StreamReaderSourceStream:
This is an estimated values based on the initial few frames of the stream. This is an estimated values based on the initial few frames of the stream.
For container formats and variable bit rate, it can be 0. For container formats and variable bit rate, it can be 0.
""" """
num_frames: Optional[int]
"""The number of frames in the stream"""
bits_per_sample: Optional[int]
"""This is the number of valid bits in each output sample.
For compressed format, it can be 0.
"""
@dataclass @dataclass
...@@ -100,41 +106,59 @@ _CODEC = 1 ...@@ -100,41 +106,59 @@ _CODEC = 1
_CODEC_LONG = 2 _CODEC_LONG = 2
_FORMAT = 3 _FORMAT = 3
_BIT_RATE = 4 _BIT_RATE = 4
_NUM_FRAMES = 5
_BPS = 6
# - AUDIO # - AUDIO
_SAMPLE_RATE = 5 _SAMPLE_RATE = 7
_NUM_CHANNELS = 6 _NUM_CHANNELS = 8
# - VIDEO # - VIDEO
_WIDTH = 7 _WIDTH = 9
_HEIGHT = 8 _HEIGHT = 10
_FRAME_RATE = 9 _FRAME_RATE = 11
def _parse_si(i): def _parse_si(i):
media_type = i[_MEDIA_TYPE] media_type = i[_MEDIA_TYPE]
codec_name = i[_CODEC] codec_name = i[_CODEC]
codec_long_name = i[_CODEC_LONG] codec_long_name = i[_CODEC_LONG]
fmt = i[_FORMAT]
bit_rate = i[_BIT_RATE]
num_frames = i[_NUM_FRAMES]
bps = i[_BPS]
if media_type == "audio": if media_type == "audio":
return StreamReaderSourceAudioStream( return StreamReaderSourceAudioStream(
media_type, media_type=media_type,
codec_name, codec=codec_name,
codec_long_name, codec_long_name=codec_long_name,
i[_FORMAT], format=fmt,
i[_BIT_RATE], bit_rate=bit_rate,
i[_SAMPLE_RATE], num_frames=num_frames,
i[_NUM_CHANNELS], bits_per_sample=bps,
sample_rate=i[_SAMPLE_RATE],
num_channels=i[_NUM_CHANNELS],
) )
if media_type == "video": if media_type == "video":
return StreamReaderSourceVideoStream( return StreamReaderSourceVideoStream(
media_type, media_type=media_type,
codec_name, codec=codec_name,
codec_long_name, codec_long_name=codec_long_name,
i[_FORMAT], format=fmt,
i[_BIT_RATE], bit_rate=bit_rate,
i[_WIDTH], num_frames=num_frames,
i[_HEIGHT], bits_per_sample=bps,
i[_FRAME_RATE], width=i[_WIDTH],
height=i[_HEIGHT],
frame_rate=i[_FRAME_RATE],
) )
return StreamReaderSourceStream(media_type, codec_name, codec_long_name, None, None) return StreamReaderSourceStream(
media_type=media_type,
codec=codec_name,
codec_long_name=codec_long_name,
format=None,
bit_rate=None,
num_frames=None,
bits_per_sample=None,
)
@dataclass @dataclass
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment