audio_stream.cpp 3.37 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
// Copyright 2004-present Facebook. All Rights Reserved.

#include "audio_stream.h"
#include <c10/util/Logging.h>
#include <limits>
#include "util.h"

namespace ffmpeg {

namespace {
bool operator==(const AudioFormat& x, const AVCodecContext& y) {
  return x.samples == y.sample_rate && x.channels == y.channels &&
      x.format == y.sample_fmt;
}

AudioFormat& toAudioFormat(AudioFormat& x, const AVCodecContext& y) {
  x.samples = y.sample_rate;
  x.channels = y.channels;
  x.format = y.sample_fmt;
  return x;
}
} // namespace

AudioStream::AudioStream(
    AVFormatContext* inputCtx,
    int index,
    bool convertPtsToWallTime,
    const AudioFormat& format)
    : Stream(
          inputCtx,
          MediaFormat::makeMediaFormat(format, index),
          convertPtsToWallTime) {}

AudioStream::~AudioStream() {
  if (sampler_) {
    sampler_->shutdown();
    sampler_.reset();
  }
}

void AudioStream::ensureSampler() {
  if (!sampler_) {
    sampler_ = std::make_unique<AudioSampler>(codecCtx_);
  }
}

int AudioStream::initFormat() {
  // set output format
  if (format_.format.audio.samples == 0) {
    format_.format.audio.samples = codecCtx_->sample_rate;
  }
  if (format_.format.audio.channels == 0) {
    format_.format.audio.channels = codecCtx_->channels;
  }
  if (format_.format.audio.format == AV_SAMPLE_FMT_NONE) {
    format_.format.audio.format = codecCtx_->sample_fmt;
  }

  return format_.format.audio.samples != 0 &&
          format_.format.audio.channels != 0 &&
          format_.format.audio.format != AV_SAMPLE_FMT_NONE
      ? 0
      : -1;
}

int AudioStream::estimateBytes(bool flush) {
  ensureSampler();
  if (!(sampler_->getInputFormat().audio == *codecCtx_)) {
    // - reinit sampler
    SamplerParameters params;
    params.type = format_.type;
    params.out = format_.format;
    toAudioFormat(params.in.audio, *codecCtx_);
    if (flush || !sampler_->init(params)) {
      return -1;
    }

    VLOG(1) << "Set input audio sampler format"
            << ", samples: " << params.in.audio.samples
            << ", channels: " << params.in.audio.channels
            << ", format: " << params.in.audio.format
            << " : output audio sampler format"
            << ", samples: " << format_.format.audio.samples
            << ", channels: " << format_.format.audio.channels
            << ", format: " << format_.format.audio.format;
  }
  return sampler_->getSamplesBytes(frame_);
}

int AudioStream::copyFrameBytes(ByteStorage* out, bool flush) {
  ensureSampler();
  return sampler_->sample(flush ? nullptr : frame_, out);
}

void AudioStream::setHeader(DecoderHeader* header) {
  header->seqno = numGenerator_++;

  if (codecCtx_->time_base.num != 0) {
    header->pts = av_rescale_q(
        av_frame_get_best_effort_timestamp(frame_),
        codecCtx_->time_base,
        AV_TIME_BASE_Q);
  } else {
    // If the codec time_base is missing then we would've skipped the
    // rescalePackage step to rescale to codec time_base, so here we can
    // rescale straight from the stream time_base into AV_TIME_BASE_Q.
    header->pts = av_rescale_q(
        av_frame_get_best_effort_timestamp(frame_),
        inputCtx_->streams[format_.stream]->time_base,
        AV_TIME_BASE_Q);
  }

  if (convertPtsToWallTime_) {
    keeper_.adjust(header->pts);
  }

  header->keyFrame = 1;
  header->fps = std::numeric_limits<double>::quiet_NaN();
  header->format = format_;
}

} // namespace ffmpeg