stream_reader_wrapper.h 2.37 KB
Newer Older
1
2
#pragma once
#include <torch/script.h>
3
#include <torchaudio/csrc/ffmpeg/stream_reader.h>
4
5
6
7

namespace torchaudio {
namespace ffmpeg {

8
9
10
11
12
13
14
// create format context for reading media
AVFormatContextPtr get_input_format_context(
    const std::string& src,
    const c10::optional<std::string>& device,
    const OptionDict& option,
    AVIOContext* io_ctx = nullptr);

15
16
17
18
19
20
21
22
// Because TorchScript requires c10::Dict type to pass dict,
// while PyBind11 requires std::map type to pass dict,
// we duplicate the return tuple.
// Even though all the PyBind-based implementations are placed
// in `pybind` directory, because std::map does not require pybind11
// header, we define both of them here, for the sake of
// better locality/maintainability.

23
24
25
26
27
28
using SrcInfo = std::tuple<
    std::string, // media_type
    std::string, // codec name
    std::string, // codec long name
    std::string, // format name
    int64_t, // bit_rate
moto's avatar
moto committed
29
30
    int64_t, // num_frames
    int64_t, // bits_per_sample
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
    c10::Dict<std::string, std::string>, // metadata
    // Audio
    double, // sample_rate
    int64_t, // num_channels
    // Video
    int64_t, // width
    int64_t, // height
    double // frame_rate
    >;

using SrcInfoPyBind = std::tuple<
    std::string, // media_type
    std::string, // codec name
    std::string, // codec long name
    std::string, // format name
    int64_t, // bit_rate
    int64_t, // num_frames
    int64_t, // bits_per_sample
    std::map<std::string, std::string>, // metadata
50
51
52
53
54
55
56
57
58
59
60
61
62
63
    // Audio
    double, // sample_rate
    int64_t, // num_channels
    // Video
    int64_t, // width
    int64_t, // height
    double // frame_rate
    >;

using OutInfo = std::tuple<
    int64_t, // source index
    std::string // filter description
    >;

64
65
66
67
// Structure to implement wrapper API around StreamReader, which is more
// suitable for Binding the code (i.e. it receives/returns pritimitves)
struct StreamReaderBinding : public StreamReader,
                             public torch::CustomClassHolder {
68
69
  explicit StreamReaderBinding(AVFormatContextPtr&& p);
  SrcInfo get_src_stream_info(int64_t i);
70
  SrcInfoPyBind get_src_stream_info_pybind(int64_t i);
71
72
73
74
75
76
77
78
79
80
81
  OutInfo get_out_stream_info(int64_t i);

  int64_t process_packet(
      const c10::optional<double>& timeout = c10::optional<double>(),
      const double backoff = 10.);

  void process_all_packets();
};

} // namespace ffmpeg
} // namespace torchaudio