Revert "Implementing multithreaded video decoding (#3389)" (#3623)

This reverts commit 3bfdb427.

Revert "Implementing multithreaded video decoding (#3389)" (#3623)
This reverts commit 3bfdb427.
37eb37a8 · Francisco Massa · GitHub · 02a1918a · 37eb37a8 · 37eb37a8
Unverified Commit 37eb37a8 authored Mar 31, 2021 by Francisco Massa Committed by GitHub Mar 31, 2021
7 changed files
--- a/torchvision/csrc/io/decoder/decoder.cpp
+++ b/torchvision/csrc/io/decoder/decoder.cpp
@@ -432,7 +432,7 @@ bool Decoder::openStreams(std::vector<DecoderMetadata>* metadata) {
          it->format,
          params_.loggingUuid);
      CHECK(stream);
-      if (stream->openCodec(metadata, params_.numThreads) < 0) {
+      if (stream->openCodec(metadata) < 0) {
        LOG(ERROR) << "uuid=" << params_.loggingUuid
                   << " open codec failed, stream_idx=" << i;
        return false;

--- a/torchvision/csrc/io/decoder/defs.h
+++ b/torchvision/csrc/io/decoder/defs.h
@@ -194,9 +194,6 @@ struct DecoderParameters {
  bool preventStaleness{true};
  // seek tolerated accuracy (us)
  double seekAccuracy{1000000.0};
-  // Allow multithreaded decoding for numThreads > 1;
-  // 0 numThreads=0 sets up sensible defaults
-  int numThreads{1};
  // what media types should be processed, default none
  std::set<MediaFormat> formats;


--- a/torchvision/csrc/io/decoder/stream.cpp
+++ b/torchvision/csrc/io/decoder/stream.cpp
 #include "stream.h"
-#include <ATen/Parallel.h>
 #include <c10/util/Logging.h>
-#include <stdio.h>
-#include <string.h>
 #include "util.h"

 namespace ffmpeg {
@@ -31,7 +28,7 @@ AVCodec* Stream::findCodec(AVCodecParameters* params) {
  return avcodec_find_decoder(params->codec_id);
 }

-int Stream::openCodec(std::vector<DecoderMetadata>* metadata, int num_threads) {
+int Stream::openCodec(std::vector<DecoderMetadata>* metadata) {
  AVStream* steam = inputCtx_->streams[format_.stream];

  AVCodec* codec = findCodec(steam->codecpar);
@@ -56,49 +53,6 @@ int Stream::openCodec(std::vector<DecoderMetadata>* metadata, int num_threads) {
    return ret;
  }

-  // multithreading heuristics
-  int max_threads = at::get_num_threads();
-  // first a safety check
-  if (num_threads > max_threads) {
-    num_threads = max_threads;
-  }
-
-  if (num_threads > 0) {
-    if (num_threads > 1) {
-      codecCtx_->active_thread_type = 1;
-    }
-    // if user defined, respect that
-    codecCtx_->thread_count = num_threads;
-
-  } else {
-    // otherwise set sensible defaults
-    // with the special case for the different MPEG4 codecs
-    // that don't have threading context functions
-    // TODO: potentially automate this using native c++ function lookups
-    if (strcmp(codecCtx_->codec->name, "mpeg4") == 0 &&
-        codecCtx_->codec_type == 0) {
-      if (codecCtx_->codec_tag == 1684633208) {
-        codecCtx_->thread_count = (8 <= max_threads) ? 8 : max_threads;
-        codecCtx_->thread_type = 1;
-      } else {
-        codecCtx_->thread_count = (2 <= max_threads) ? 2 : max_threads;
-        codecCtx_->thread_type = 2;
-      }
-    } else {
-      // otherwise default to multithreading
-      codecCtx_->thread_count = (8 <= max_threads) ? 8 : max_threads;
-      codecCtx_->active_thread_type = 1;
-    }
-  }
-
-  // print codec type and number of threads
-  LOG(INFO) << "Codec " << codecCtx_->codec->long_name
-            << " Codec id: " << codecCtx_->codec_id
-            << " Codec tag: " << codecCtx_->codec_tag
-            << " Codec type: " << codecCtx_->codec_type
-            << " Codec extradata: " << codecCtx_->extradata
-            << " Number of threads: " << at::get_num_threads();
-
  // after avcodec_open2, value of codecCtx_->time_base is NOT meaningful
  if ((ret = avcodec_open2(codecCtx_, codec, nullptr)) < 0) {
    LOG(ERROR) << "LoggingUuid #" << loggingUuid_

--- a/torchvision/csrc/io/decoder/stream.h
+++ b/torchvision/csrc/io/decoder/stream.h
@@ -20,8 +20,7 @@ class Stream {
  virtual ~Stream();

  // returns 0 - on success or negative error
-  // num_threads sets up the codec context for multithreading if needed
-  int openCodec(std::vector<DecoderMetadata>* metadata, int num_threads = 1);
+  int openCodec(std::vector<DecoderMetadata>* metadata);
  // returns 1 - if packet got consumed, 0 - if it's not, and < 0 on error
  int decodePacket(
      const AVPacket* packet,

--- a/torchvision/csrc/io/video/video.cpp
+++ b/torchvision/csrc/io/video/video.cpp
@@ -97,17 +97,15 @@ void Video::_getDecoderParams(
    double videoStartS,
    int64_t getPtsOnly,
    std::string stream,
-    long stream_id,
-    bool all_streams,
-    int64_t num_threads,
-    double seekFrameMarginUs) {
+    long stream_id = -1,
+    bool all_streams = false,
+    double seekFrameMarginUs = 10) {
  int64_t videoStartUs = int64_t(videoStartS * 1e6);

  params.timeoutMs = decoderTimeoutMs;
  params.startOffset = videoStartUs;
  params.seekAccuracy = seekFrameMarginUs;
  params.headerOnly = false;
-  params.numThreads = num_threads;

  params.preventStaleness = false; // not sure what this is about

@@ -154,9 +152,7 @@ void Video::_getDecoderParams(

 } // _get decoder params

-Video::Video(std::string videoPath, std::string stream, int64_t numThreads) {
-  // set number of threads global
-  numThreads_ = numThreads;
+Video::Video(std::string videoPath, std::string stream) {
  // parse stream information
  current_stream = _parseStream(stream);
  // note that in the initial call we want to get all streams
@@ -165,8 +161,7 @@ Video::Video(std::string videoPath, std::string stream, int64_t numThreads) {
      0, // headerOnly
      std::get<0>(current_stream), // stream info - remove that
      long(-1), // stream_id parsed from info above change to -2
-      true, // read all streams
-      numThreads_ // global number of Threads for decoding
+      true // read all streams
  );

  std::string logMessage, logType;
@@ -230,7 +225,7 @@ Video::Video(std::string videoPath, std::string stream, int64_t numThreads) {
  }
 } // video

-bool Video::setCurrentStream(std::string stream) {
+bool Video::setCurrentStream(std::string stream = "video") {
  if ((!stream.empty()) && (_parseStream(stream) != current_stream)) {
    current_stream = _parseStream(stream);
  }
@@ -246,8 +241,7 @@ bool Video::setCurrentStream(std::string stream) {
      std::get<0>(current_stream), // stream
      long(std::get<1>(
          current_stream)), // stream_id parsed from info above change to -2
-      false, // read all streams
-      numThreads_ // global number of threads
+      false // read all streams
  );

  // calback and metadata defined in Video.h
@@ -271,8 +265,7 @@ void Video::Seek(double ts) {
      std::get<0>(current_stream), // stream
      long(std::get<1>(
          current_stream)), // stream_id parsed from info above change to -2
-      false, // read all streams
-      numThreads_ // global num threads
+      false // read all streams
  );

  // calback and metadata defined in Video.h
@@ -338,7 +331,7 @@ std::tuple<torch::Tensor, double> Video::Next() {

 static auto registerVideo =
    torch::class_<Video>("torchvision", "Video")
-        .def(torch::init<std::string, std::string, int64_t>())
+        .def(torch::init<std::string, std::string>())
        .def("get_current_stream", &Video::getCurrentStream)
        .def("set_current_stream", &Video::setCurrentStream)
        .def("get_metadata", &Video::getStreamMetadata)

--- a/torchvision/csrc/io/video/video.h
+++ b/torchvision/csrc/io/video/video.h
@@ -16,15 +16,14 @@ struct Video : torch::CustomClassHolder {
  // global video metadata
  c10::Dict<std::string, c10::Dict<std::string, std::vector<double>>>
      streamsMetadata;
-  int64_t numThreads_{0};

 public:
-  Video(std::string videoPath, std::string stream, int64_t numThreads);
+  Video(std::string videoPath, std::string stream);
  std::tuple<std::string, int64_t> getCurrentStream() const;
  c10::Dict<std::string, c10::Dict<std::string, std::vector<double>>>
  getStreamMetadata() const;
  void Seek(double ts);
-  bool setCurrentStream(std::string stream = "video");
+  bool setCurrentStream(std::string stream);
  std::tuple<torch::Tensor, double> Next();

 private:
@@ -38,10 +37,9 @@ struct Video : torch::CustomClassHolder {
      double videoStartS,
      int64_t getPtsOnly,
      std::string stream,
-      long stream_id = -1,
-      bool all_streams = false,
-      int64_t num_threads = 0,
-      double seekFrameMarginUs = 10); // this needs to be improved
+      long stream_id,
+      bool all_streams,
+      double seekFrameMarginUs); // this needs to be improved

  std::map<std::string, std::vector<double>> streamTimeBase; // not used


--- a/torchvision/io/__init__.py
+++ b/torchvision/io/__init__.py
@@ -96,13 +96,9 @@ class VideoReader:
        stream (string, optional): descriptor of the required stream, followed by the stream id,
            in the format ``{stream_type}:{stream_id}``. Defaults to ``"video:0"``.
            Currently available options include ``['video', 'audio']``
-
-        num_threads (int, optional): number of threads used by the codec to decode video.
-            Default value (0) enables multithreading with codec-dependent heuristic. The performance
-            will depend on the version of FFMPEG codecs supported.
    """

-    def __init__(self, path, stream="video", num_threads=0):
+    def __init__(self, path, stream="video"):
        if not _has_video_opt():
            raise RuntimeError(
                "Not compiled with video_reader support, "
@@ -110,7 +106,7 @@ class VideoReader:
                + "ffmpeg (version 4.2 is currently supported) and"
                + "build torchvision from source."
            )
-        self._c = torch.classes.torchvision.Video(path, stream, num_threads)
+        self._c = torch.classes.torchvision.Video(path, stream)

    def __next__(self):
        """Decodes and returns the next frame of the current stream.