Add utility function to fetch FFmpeg library versions (#2467)

Summary: Follow-up of https://github.com/pytorch/audio/issues/2464. Add utility function to fetch the versions of FFmpeg. Pull Request resolved: https://github.com/pytorch/audio/pull/2467 Reviewed By: carolineechen Differential Revision: D37028006 Pulled By: mthrok fbshipit-source-id: 72adce1e6b43985760ce55b715b0e59af5244fdb

Add utility function to fetch FFmpeg library versions (#2467)
Summary: Follow-up of https://github.com/pytorch/audio/issues/2464. Add utility function to fetch the versions of FFmpeg. Pull Request resolved: https://github.com/pytorch/audio/pull/2467 Reviewed By: carolineechen Differential Revision: D37028006 Pulled By: mthrok fbshipit-source-id: 72adce1e6b43985760ce55b715b0e59af5244fdb
4ba7dc38 · moto · Facebook GitHub Bot · 8ede3e1e · 4ba7dc38 · 4ba7dc38
Commit 4ba7dc38 authored Jun 27, 2022 by moto Committed by Facebook GitHub Bot Jun 27, 2022
5 changed files
--- a/test/torchaudio_unittest/io/stream_reader_test.py
+++ b/test/torchaudio_unittest/io/stream_reader_test.py
 import torch
+import torchaudio
 from parameterized import parameterized, parameterized_class
 from torchaudio_unittest.common_utils import (
    get_asset_path,
@@ -90,10 +91,16 @@ class StreamReaderInterfaceTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestC
        assert s.num_src_streams == 6

        # Note:
-        # FFmpeg 4.4.1 and FFmpeg 5 also report
-        # `"vendor_id": "[0][0][0][0]"` in audio/video metadata.
-        # TODO:
-        # change expected metadata value based on FFmpeg version.
+        # Starting from FFmpeg 4.4, audio/video stream metadata
+        # include "vendor_id"
+        ver = torchaudio.utils.ffmpeg_utils.get_versions()["libavutil"]
+        print(ver)
+        major, minor, _ = ver
+        if major >= 57 or (major == 56 and minor >= 70):
+            base_metadata = {"vendor_id": "[0][0][0][0]"}
+        else:
+            base_metadata = {}
+
        expected = [
            StreamReaderSourceVideoStream(
                media_type="video",
@@ -103,10 +110,11 @@ class StreamReaderInterfaceTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestC
                bit_rate=71925,
                num_frames=325,
                bits_per_sample=8,
-                metadata={
-                    "handler_name": "\x1fMainconcept Video Media Handler",
-                    "language": "eng",
-                },
+                metadata=dict(
+                    base_metadata,
+                    handler_name="\x1fMainconcept Video Media Handler",
+                    language="eng",
+                ),
                width=320,
                height=180,
                frame_rate=25.0,
@@ -119,10 +127,11 @@ class StreamReaderInterfaceTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestC
                bit_rate=72093,
                num_frames=103,
                bits_per_sample=0,
-                metadata={
-                    "handler_name": "#Mainconcept MP4 Sound Media Handler",
-                    "language": "eng",
-                },
+                metadata=dict(
+                    base_metadata,
+                    handler_name="#Mainconcept MP4 Sound Media Handler",
+                    language="eng",
+                ),
                sample_rate=8000.0,
                num_channels=2,
            ),
@@ -147,10 +156,11 @@ class StreamReaderInterfaceTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestC
                bit_rate=128783,
                num_frames=390,
                bits_per_sample=8,
-                metadata={
-                    "handler_name": "\x1fMainconcept Video Media Handler",
-                    "language": "eng",
-                },
+                metadata=dict(
+                    base_metadata,
+                    handler_name="\x1fMainconcept Video Media Handler",
+                    language="eng",
+                ),
                width=480,
                height=270,
                frame_rate=29.97002997002997,
@@ -163,10 +173,11 @@ class StreamReaderInterfaceTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestC
                bit_rate=128837,
                num_frames=205,
                bits_per_sample=0,
-                metadata={
-                    "handler_name": "#Mainconcept MP4 Sound Media Handler",
-                    "language": "eng",
-                },
+                metadata=dict(
+                    base_metadata,
+                    handler_name="#Mainconcept MP4 Sound Media Handler",
+                    language="eng",
+                ),
                sample_rate=16000.0,
                num_channels=2,
            ),
@@ -185,12 +196,6 @@ class StreamReaderInterfaceTest(_MediaSourceMixin, TempDirMixin, TorchaudioTestC
            ),
        ]
        output = [s.get_src_stream_info(i) for i in range(6)]
-        # Remove "vendor_id" if exists
-        # TODO: don't remove "vendor_id", instead,
-        # change expected based on FFmpeg version
-        for sinfo in output:
-            if "vendor_id" in sinfo.metadata:
-                del sinfo.metadata["vendor_id"]
        assert expected == output

    def test_id3tag(self):

--- a/test/torchaudio_unittest/utils/ffmpeg_utils_test.py
+++ b/test/torchaudio_unittest/utils/ffmpeg_utils_test.py
@@ -20,3 +20,8 @@ class TestFFmpegUtils(PytorchTestCase):
        for i in range(-100, 100):
            ffmpeg_utils.set_log_level(i)
            assert ffmpeg_utils.get_log_level() == i
+
+    def test_get_version(self):
+        """`get_versions` does not crash"""
+        versions = ffmpeg_utils.get_versions()
+        assert set(versions.keys()) == {"libavutil", "libavcodec", "libavformat", "libavfilter", "libavdevice"}
--- a/torchaudio/csrc/CMakeLists.txt
+++ b/torchaudio/csrc/CMakeLists.txt
@@ -196,6 +196,7 @@ if(USE_FFMPEG)
    ffmpeg/stream_reader.cpp
    ffmpeg/stream_reader_wrapper.cpp
    ffmpeg/stream_reader_binding.cpp
+    ffmpeg/utils.cpp
    )
  message(STATUS "FFMPEG_ROOT=$ENV{FFMPEG_ROOT}")
  find_package(FFMPEG 4.1 REQUIRED COMPONENTS avdevice avfilter avformat avcodec avutil)

--- a/torchaudio/csrc/ffmpeg/utils.cpp
+++ b/torchaudio/csrc/ffmpeg/utils.cpp
+#include <torch/script.h>
+#include <torchaudio/csrc/ffmpeg/ffmpeg.h>
+
+namespace torchaudio {
+namespace ffmpeg {
+namespace {
+
+c10::Dict<std::string, std::tuple<int64_t, int64_t, int64_t>> get_versions() {
+  c10::Dict<std::string, std::tuple<int64_t, int64_t, int64_t>> ret;
+
+#define add_version(NAME)            \
+  {                                  \
+    int ver = NAME##_version();      \
+    ret.insert(                      \
+        "lib" #NAME,                 \
+        std::make_tuple<>(           \
+            AV_VERSION_MAJOR(ver),   \
+            AV_VERSION_MINOR(ver),   \
+            AV_VERSION_MICRO(ver))); \
+  }
+
+  add_version(avutil);
+  add_version(avcodec);
+  add_version(avformat);
+  add_version(avfilter);
+  add_version(avdevice);
+  return ret;
+
+#undef add_version
+}
+
+TORCH_LIBRARY_FRAGMENT(torchaudio, m) {
+  m.def("torchaudio::ffmpeg_get_versions", &get_versions);
+}
+
+} // namespace
+} // namespace ffmpeg
+} // namespace torchaudio
--- a/torchaudio/utils/ffmpeg_utils.py
+++ b/torchaudio/utils/ffmpeg_utils.py
+from typing import Dict, Tuple
+
 import torch


+def get_versions() -> Dict[str, Tuple[int]]:
+    """Get the versions of FFmpeg libraries
+
+    Returns:
+        dict: mapping from library names to version string,
+            i.e. `"libavutil": (56, 22, 100)`.
+    """
+    return torch.ops.torchaudio.ffmpeg_get_versions()
+
+
 def get_log_level() -> int:
    """Get the log level of FFmpeg.