"vscode:/vscode.git/clone" did not exist on "c06b9b7304a53ed5526dc51fc4fcafc45d92be54"
Interface.h 3.21 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#pragma once

#include <c10/util/Logging.h>
#include <sys/types.h>
#include <memory>
#include <unordered_map>

extern "C" {

#include <libavutil/pixfmt.h>
#include <libavutil/samplefmt.h>
void av_free(void* ptr);
}

struct avDeleter {
  void operator()(uint8_t* p) const {
    av_free(p);
  }
};

const AVPixelFormat defaultVideoPixelFormat = AV_PIX_FMT_RGB24;
const AVSampleFormat defaultAudioSampleFormat = AV_SAMPLE_FMT_FLT;

using AvDataPtr = std::unique_ptr<uint8_t, avDeleter>;

enum MediaType : uint32_t {
  TYPE_VIDEO = 1,
  TYPE_AUDIO = 2,
};

struct EnumClassHash {
  template <typename T>
  uint32_t operator()(T t) const {
    return static_cast<uint32_t>(t);
  }
};

struct VideoFormat {
  // fields are initialized for the auto detection
  // caller can specify some/all of field values if specific output is desirable

  int width{0}; // width in pixels
  int height{0}; // height in pixels
  int minDimension{0}; // choose min dimension and rescale accordingly
  // Output image pixel format. data type AVPixelFormat
  AVPixelFormat format{defaultVideoPixelFormat}; // type AVPixelFormat
  int64_t startPts{0}, endPts{0}; // Start and end presentation timestamp
  int timeBaseNum{0};
  int timeBaseDen{1}; // numerator and denominator of time base
  float fps{0.0};
};

struct AudioFormat {
  // fields are initialized for the auto detection
  // caller can specify some/all of field values if specific output is desirable

  int samples{0}; // number samples per second (frequency)
  int channels{0}; // number of channels
  AVSampleFormat format{defaultAudioSampleFormat}; // type AVSampleFormat
  int64_t startPts{0}, endPts{0}; // Start and end presentation timestamp
  int timeBaseNum{0};
  int timeBaseDen{1}; // numerator and denominator of time base
};

union FormatUnion {
  FormatUnion() {}
  VideoFormat video;
  AudioFormat audio;
};

struct MediaFormat {
  MediaFormat() {}

  MediaFormat(const MediaFormat& mediaFormat) : type(mediaFormat.type) {
    if (type == MediaType::TYPE_VIDEO) {
      format.video = mediaFormat.format.video;
    } else if (type == MediaType::TYPE_AUDIO) {
      format.audio = mediaFormat.format.audio;
    }
  }

  MediaFormat(MediaType mediaType) : type(mediaType) {
    if (mediaType == MediaType::TYPE_VIDEO) {
      format.video = VideoFormat();
    } else if (mediaType == MediaType::TYPE_AUDIO) {
      format.audio = AudioFormat();
    }
  }
  // media type
  MediaType type;
  // format data
  FormatUnion format;
};

class DecodedFrame {
 public:
  explicit DecodedFrame() : frame_(nullptr), frameSize_(0), pts_(0) {}
  explicit DecodedFrame(AvDataPtr frame, int frameSize, int64_t pts)
      : frame_(std::move(frame)), frameSize_(frameSize), pts_(pts) {}
  AvDataPtr frame_{nullptr};
  int frameSize_{0};
  int64_t pts_{0};
};

struct MediaData {
  MediaData() {}
  MediaData(FormatUnion format) : format_(format) {}
  FormatUnion format_;
  std::vector<std::unique_ptr<DecodedFrame>> frames_;
};

class DecoderOutput {
 public:
  explicit DecoderOutput() {}

  ~DecoderOutput() {}

  void initMediaType(MediaType mediaType, FormatUnion format);

  void addMediaFrame(MediaType mediaType, std::unique_ptr<DecodedFrame> frame);

  void clear();

  std::unordered_map<MediaType, MediaData, EnumClassHash> media_data_;
};