add v0.19.1 release

bf491463 · limm · e17f5ea2 · e17f5ea2 · e17f5ea2 · e17f5ea2
Commit bf491463 authored May 30, 2025 by limm
20 changed files
--- a/packaging/vs2017/conda_build_config.yaml
+++ b/packaging/vs2017/conda_build_config.yaml
-blas_impl:
-  - mkl                        # [x86_64]
-c_compiler:
-  - vs2017                     # [win]
-cxx_compiler:
-  - vs2017                     # [win]
-python:
-  - 3.5
-  - 3.6
-# This differs from target_platform in that it determines what subdir the compiler
-#    will target, not what subdir the compiler package will be itself.
-#    For example, we need a win-64 vs2008_win-32 package, so that we compile win-32
-#    code on win-64 miniconda.
-cross_compiler_target_platform:
-  - win-64                     # [win]
-target_platform:
-  - win-64                     # [win]
-vc:
-  - 14
-zip_keys:
-  -                             # [win]
-    - vc                        # [win]
-    - c_compiler                # [win]
-    - cxx_compiler              # [win]
--- a/packaging/vs2017/install_activate.bat
+++ b/packaging/vs2017/install_activate.bat
-set YEAR=2017
-set VER=15
-
-mkdir "%PREFIX%\etc\conda\activate.d"
-COPY "%RECIPE_DIR%\activate.bat" "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-
-IF "%cross_compiler_target_platform%" == "win-64" (
-  set "target_platform=amd64"
-  echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR% Win64" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  IF "%VSDEVCMD_ARGS%" == "" (
-    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-    echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-    echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  ) ELSE (
-    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-    echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-    echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-    echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  )
-  echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  ) else (
-  set "target_platform=x86"
-  echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  echo CALL "VC\Auxiliary\Build\vcvars32.bat" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
-  echo popd
-  )
-
--- a/packaging/vs2017/install_runtime.bat
+++ b/packaging/vs2017/install_runtime.bat
-set VC_PATH=x86
-if "%ARCH%"=="64" (
-   set VC_PATH=x64
-)
-
-set MSC_VER=2017
-
-rem :: This should always be present for VC installed with VS.  Not sure about VC installed with Visual C++ Build Tools 2015
-rem FOR /F "usebackq tokens=3*" %%A IN (`REG QUERY "HKEY_LOCAL_MACHINE\Software\Microsoft\DevDiv\VC\Servicing\14.0\IDE.x64" /v UpdateVersion`) DO (
-rem     set SP=%%A
-rem     )
-
-rem if not "%SP%" == "%PKG_VERSION%" (
-rem    echo "Version detected from registry: %SP%"
-rem    echo    "does not match version of package being built (%PKG_VERSION%)"
-rem    echo "Do you have current updates for VS 2015 installed?"
-rem    exit 1
-rem )
-
-
-REM ========== REQUIRES Win 10 SDK be installed, or files otherwise copied to location below!
-robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%"  "%LIBRARY_BIN%" *.dll /E
-robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%"  "%PREFIX%" *.dll /E
-if %ERRORLEVEL% GEQ 8 exit 1
-
-REM ========== This one comes from visual studio 2017
-set "VC_VER=141"
-
-for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do (
-    if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
-        set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat"
-        goto :eof
-    )
-)
-
-@setlocal
-call "%VS15VARSALL%" x64
-
-set "REDIST_ROOT=%VCToolsRedistDir%%VC_PATH%"
-
-robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%LIBRARY_BIN%" *.dll /E
-if %ERRORLEVEL% LSS 8 exit 0
-robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%PREFIX%" *.dll /E
-if %ERRORLEVEL% LSS 8 exit 0
-robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%LIBRARY_BIN%" *.dll /E
-if %ERRORLEVEL% LSS 8 exit 0
-robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%PREFIX%" *.dll /E
-if %ERRORLEVEL% LSS 8 exit 0
-@endlocal
--- a/packaging/vs2017/meta.yaml
+++ b/packaging/vs2017/meta.yaml
-{% set vcver="14.1" %}
-{% set vcfeature="14" %}
-{% set vsyear="2017" %}
-{% set fullver="15.4.27004.2010" %}
-
-package:
-  name: vs{{ vsyear }}
-  version: {{ fullver }}
-
-build:
-  skip: True  [not win]
-  script_env:
-    - VSDEVCMD_ARGS # [win]
-
-outputs:
-  - name: vs{{ vsyear }}_{{ cross_compiler_target_platform }}
-    script: install_activate.bat
-    track_features:
-      # VS 2017 is binary-compatible with VS 2015/vc14.  Tools are "v141".
-      strong:
-        - vc{{ vcfeature }}
-    about:
-      summary: Activation and version verification of MSVC {{ vcver }} (VS {{ vsyear }}) compiler
-      license: BSD 3-clause
--- a/packaging/vs2019/conda_build_config.yaml
+++ b/packaging/vs2019/conda_build_config.yaml
@@ -5,8 +5,7 @@ c_compiler:
 cxx_compiler:
  - vs2019                     # [win]
 python:
-  - 3.5
-  - 3.6
+  - 3.8
 # This differs from target_platform in that it determines what subdir the compiler
 #    will target, not what subdir the compiler package will be itself.
 #    For example, we need a win-64 vs2008_win-32 package, so that we compile win-32

--- a/packaging/vs2019/install_activate.bat
+++ b/packaging/vs2019/install_activate.bat
@@ -27,4 +27,3 @@ IF "%cross_compiler_target_platform%" == "win-64" (
  echo CALL "VC\Auxiliary\Build\vcvars32.bat" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
  echo popd
  )
-
--- a/packaging/vs2019/install_runtime.bat
+++ b/packaging/vs2019/install_runtime.bat
-set VC_PATH=x86
-if "%ARCH%"=="64" (
-   set VC_PATH=x64
-)
-
-set MSC_VER=2019
-
-rem :: This should always be present for VC installed with VS.  Not sure about VC installed with Visual C++ Build Tools 2015
-rem FOR /F "usebackq tokens=3*" %%A IN (`REG QUERY "HKEY_LOCAL_MACHINE\Software\Microsoft\DevDiv\VC\Servicing\14.0\IDE.x64" /v UpdateVersion`) DO (
-rem     set SP=%%A
-rem     )
-
-rem if not "%SP%" == "%PKG_VERSION%" (
-rem    echo "Version detected from registry: %SP%"
-rem    echo    "does not match version of package being built (%PKG_VERSION%)"
-rem    echo "Do you have current updates for VS 2015 installed?"
-rem    exit 1
-rem )
-
-
-REM ========== REQUIRES Win 10 SDK be installed, or files otherwise copied to location below!
-robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%"  "%LIBRARY_BIN%" *.dll /E
-robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%"  "%PREFIX%" *.dll /E
-if %ERRORLEVEL% GEQ 8 exit 1
-
-REM ========== This one comes from visual studio 2019
-set "VC_VER=142"
-
-for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [16^,17^) -property installationPath`) do (
-    if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
-        set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat"
-        goto :eof
-    )
-)
-
-@setlocal
-call "%VS15VARSALL%" x64
-
-set "REDIST_ROOT=%VCToolsRedistDir%%VC_PATH%"
-
-robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%LIBRARY_BIN%" *.dll /E
-if %ERRORLEVEL% LSS 8 exit 0
-robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%PREFIX%" *.dll /E
-if %ERRORLEVEL% LSS 8 exit 0
-robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%LIBRARY_BIN%" *.dll /E
-if %ERRORLEVEL% LSS 8 exit 0
-robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%PREFIX%" *.dll /E
-if %ERRORLEVEL% LSS 8 exit 0
-@endlocal
--- a/packaging/wheel/linux_manywheel.sh
+++ b/packaging/wheel/linux_manywheel.sh
-#!/bin/bash
-set -ex
-
-if [ "$#" -ne 1 ]; then
-    echo "Illegal number of parameters. Pass cuda version"
-    echo "CUDA version should be cu92, cu100 or cpu"
-    exit 1
-fi
-export CUVER="$1" # cu[0-9]* cpu
-
-if [[ "$CUVER" == "cu102" ]]; then
-  cu_suffix=""
-else
-  cu_suffix="+$CUVER"
-fi
-
-export TORCHVISION_BUILD_VERSION="0.4.0.dev$(date "+%Y%m%d")${cu_suffix}"
-export TORCHVISION_BUILD_NUMBER="1"
-export TORCHVISION_LOCAL_VERSION_LABEL="$CUVER"
-export OUT_DIR="/remote/$CUVER"
-
-pushd /opt/python
-DESIRED_PYTHON=(*/)
-popd
-for desired_py in "${DESIRED_PYTHON[@]}"; do
-    python_installations+=("/opt/python/$desired_py")
-done
-
-OLD_PATH=$PATH
-cd /tmp
-rm -rf vision
-git clone https://github.com/pytorch/vision
-
-cd /tmp/vision
-
-for PYDIR in "${python_installations[@]}"; do
-    export PATH=$PYDIR/bin:$OLD_PATH
-    pip install --upgrade pip
-    pip install numpy pyyaml future
-
-    pip uninstall -y torch || true
-    pip uninstall -y torch_nightly || true
-
-    export TORCHVISION_PYTORCH_DEPENDENCY_NAME=torch_nightly
-    pip install torch_nightly -f https://download.pytorch.org/whl/nightly/$CUVER/torch_nightly.html
-    # CPU/CUDA variants of PyTorch have ABI compatible PyTorch for
-    # the CPU only bits.  Therefore, we
-    # strip off the local package qualifier, but ONLY if we're
-    # doing a CPU build.
-    if [[ "$CUVER" == "cpu" ]]; then
-        export TORCHVISION_PYTORCH_DEPENDENCY_VERSION="$(pip show torch_nightly | grep ^Version: | sed 's/Version: \+//' | sed 's/+.\+//')"
-    else
-        export TORCHVISION_PYTORCH_DEPENDENCY_VERSION="$(pip show torch_nightly | grep ^Version: | sed 's/Version: \+//')"
-    fi
-    echo "Building against ${TORCHVISION_PYTORCH_DEPENDENCY_VERSION}"
-
-    pip install ninja
-    python setup.py clean
-    python setup.py bdist_wheel
-    mkdir -p $OUT_DIR
-    cp dist/*.whl $OUT_DIR/
-done
--- a/packaging/wheel/osx_wheel.sh
+++ b/packaging/wheel/osx_wheel.sh
-if [[ ":$PATH:" == *"conda"* ]]; then
-    echo "existing anaconda install in PATH, remove it and run script"
-    exit 1
-fi
-# download and activate anaconda
-rm -rf ~/minconda_wheel_env_tmp
-wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh && \
-    chmod +x Miniconda3-latest-MacOSX-x86_64.sh && \
-    ./Miniconda3-latest-MacOSX-x86_64.sh -b -p ~/minconda_wheel_env_tmp && \
-    rm Miniconda3-latest-MacOSX-x86_64.sh
-
-. ~/minconda_wheel_env_tmp/bin/activate
-
-
-export TORCHVISION_BUILD_VERSION="0.4.0.dev$(date "+%Y%m%d")"
-export TORCHVISION_BUILD_NUMBER="1"
-export OUT_DIR=~/torchvision_wheels
-
-export MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++
-
-pushd /tmp
-rm -rf vision
-git clone https://github.com/pytorch/vision
-pushd vision
-
-desired_pythons=( "2.7" "3.5" "3.6" "3.7" )
-# for each python
-for desired_python in "${desired_pythons[@]}"
-do
-    # create and activate python env
-    env_name="env$desired_python"
-    conda create -yn $env_name python="$desired_python"
-    conda activate $env_name
-
-    pip uninstall -y torch || true
-    pip uninstall -y torch_nightly || true
-
-    export TORCHVISION_PYTORCH_DEPENDENCY_NAME=torch_nightly
-    pip install torch_nightly -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
-    export TORCHVISION_PYTORCH_DEPENDENCY_VERSION="$(pip show torch_nightly | grep ^Version: | sed 's/Version:  *//')"
-    echo "Building against ${TORCHAUDIO_PYTORCH_DEPENDENCY_VERSION}"
-
-    # install torchvision dependencies
-    pip install ninja scipy pytest
-
-    python setup.py clean
-    python setup.py bdist_wheel
-    mkdir -p $OUT_DIR
-    cp dist/*.whl $OUT_DIR/
-done
-popd
-popd
--- a/packaging/wheel/relocate.py
+++ b/packaging/wheel/relocate.py
-# -*- coding: utf-8 -*-
-
 """Helper script to package wheels and relocate binaries."""

-# Standard library imports
-import os
-import io
-import sys
 import glob
-import shutil
-import zipfile
 import hashlib
+
+# Standard library imports
+import os
+import os.path as osp
 import platform
+import shutil
 import subprocess
-import os.path as osp
+import sys
+import zipfile
 from base64 import urlsafe_b64encode

 # Third party imports
-if sys.platform == 'linux':
+if sys.platform == "linux":
    from auditwheel.lddtree import lddtree
-from wheel.bdist_wheel import get_abi_tag


 ALLOWLIST = {
-    'libgcc_s.so.1', 'libstdc++.so.6', 'libm.so.6',
-    'libdl.so.2', 'librt.so.1', 'libc.so.6',
-    'libnsl.so.1', 'libutil.so.1', 'libpthread.so.0',
-    'libresolv.so.2', 'libX11.so.6', 'libXext.so.6',
-    'libXrender.so.1', 'libICE.so.6', 'libSM.so.6',
-    'libGL.so.1', 'libgobject-2.0.so.0', 'libgthread-2.0.so.0',
-    'libglib-2.0.so.0', 'ld-linux-x86-64.so.2', 'ld-2.17.so'
+    "libgcc_s.so.1",
+    "libstdc++.so.6",
+    "libm.so.6",
+    "libdl.so.2",
+    "librt.so.1",
+    "libc.so.6",
+    "libnsl.so.1",
+    "libutil.so.1",
+    "libpthread.so.0",
+    "libresolv.so.2",
+    "libX11.so.6",
+    "libXext.so.6",
+    "libXrender.so.1",
+    "libICE.so.6",
+    "libSM.so.6",
+    "libGL.so.1",
+    "libgobject-2.0.so.0",
+    "libgthread-2.0.so.0",
+    "libglib-2.0.so.0",
+    "ld-linux-x86-64.so.2",
+    "ld-2.17.so",
 }

 WINDOWS_ALLOWLIST = {
-    'MSVCP140.dll', 'KERNEL32.dll',
-    'VCRUNTIME140_1.dll', 'VCRUNTIME140.dll',
-    'api-ms-win-crt-heap-l1-1-0.dll',
-    'api-ms-win-crt-runtime-l1-1-0.dll',
-    'api-ms-win-crt-stdio-l1-1-0.dll',
-    'api-ms-win-crt-filesystem-l1-1-0.dll',
-    'api-ms-win-crt-string-l1-1-0.dll',
-    'api-ms-win-crt-environment-l1-1-0.dll',
-    'api-ms-win-crt-math-l1-1-0.dll',
-    'api-ms-win-crt-convert-l1-1-0.dll'
+    "MSVCP140.dll",
+    "KERNEL32.dll",
+    "VCRUNTIME140_1.dll",
+    "VCRUNTIME140.dll",
+    "api-ms-win-crt-heap-l1-1-0.dll",
+    "api-ms-win-crt-runtime-l1-1-0.dll",
+    "api-ms-win-crt-stdio-l1-1-0.dll",
+    "api-ms-win-crt-filesystem-l1-1-0.dll",
+    "api-ms-win-crt-string-l1-1-0.dll",
+    "api-ms-win-crt-environment-l1-1-0.dll",
+    "api-ms-win-crt-math-l1-1-0.dll",
+    "api-ms-win-crt-convert-l1-1-0.dll",
 }


@@ -51,33 +64,22 @@ PLATFORM_ARCH = platform.machine()
 PYTHON_VERSION = sys.version_info


-def read_chunks(file, size=io.DEFAULT_BUFFER_SIZE):
-    """Yield pieces of data from a file-like object until EOF."""
-    while True:
-        chunk = file.read(size)
-        if not chunk:
-            break
-        yield chunk
-
-
 def rehash(path, blocksize=1 << 20):
    """Return (hash, length) for path using hashlib.sha256()"""
    h = hashlib.sha256()
    length = 0
-    with open(path, 'rb') as f:
-        for block in read_chunks(f, size=blocksize):
+    with open(path, "rb") as f:
+        while block := f.read(blocksize):
            length += len(block)
            h.update(block)
-    digest = 'sha256=' + urlsafe_b64encode(
-        h.digest()
-    ).decode('latin1').rstrip('=')
+    digest = "sha256=" + urlsafe_b64encode(h.digest()).decode("latin1").rstrip("=")
    # unicode/str python2 issues
    return (digest, str(length))  # type: ignore


 def unzip_file(file, dest):
    """Decompress zip `file` into directory `dest`."""
-    with zipfile.ZipFile(file, 'r') as zip_ref:
+    with zipfile.ZipFile(file, "r") as zip_ref:
        zip_ref.extractall(dest)


@@ -88,8 +90,7 @@ def is_program_installed(basename):
    On macOS systems, a .app is considered installed if
    it exists.
    """
-    if (sys.platform == 'darwin' and basename.endswith('.app') and
-            osp.exists(basename)):
+    if sys.platform == "darwin" and basename.endswith(".app") and osp.exists(basename):
        return basename

    for path in os.environ["PATH"].split(os.pathsep):
@@ -105,9 +106,9 @@ def find_program(basename):
    (return None if not found)
    """
    names = [basename]
-    if os.name == 'nt':
+    if os.name == "nt":
        # Windows platforms
-        extensions = ('.exe', '.bat', '.cmd', '.dll')
+        extensions = (".exe", ".bat", ".cmd", ".dll")
        if not basename.endswith(extensions):
            names = [basename + ext for ext in extensions] + [basename]
    for name in names:
@@ -118,19 +119,18 @@ def find_program(basename):

 def patch_new_path(library_path, new_dir):
    library = osp.basename(library_path)
-    name, *rest = library.split('.')
-    rest = '.'.join(rest)
-    hash_id = hashlib.sha256(library_path.encode('utf-8')).hexdigest()[:8]
-    new_name = '.'.join([name, hash_id, rest])
+    name, *rest = library.split(".")
+    rest = ".".join(rest)
+    hash_id = hashlib.sha256(library_path.encode("utf-8")).hexdigest()[:8]
+    new_name = ".".join([name, hash_id, rest])
    return osp.join(new_dir, new_name)


 def find_dll_dependencies(dumpbin, binary):
-    out = subprocess.run([dumpbin, "/dependents", binary],
-                         stdout=subprocess.PIPE)
-    out = out.stdout.strip().decode('utf-8')
-    start_index = out.find('dependencies:') + len('dependencies:')
-    end_index = out.find('Summary')
+    out = subprocess.run([dumpbin, "/dependents", binary], stdout=subprocess.PIPE)
+    out = out.stdout.strip().decode("utf-8")
+    start_index = out.find("dependencies:") + len("dependencies:")
+    end_index = out.find("Summary")
    dlls = out[start_index:end_index].strip()
    dlls = dlls.split(os.linesep)
    dlls = [dll.strip() for dll in dlls]
@@ -145,13 +145,13 @@ def relocate_elf_library(patchelf, output_dir, output_library, binary):
    rename and copy them into the wheel while updating their respective rpaths.
    """

-    print('Relocating {0}'.format(binary))
+    print(f"Relocating {binary}")
    binary_path = osp.join(output_library, binary)

    ld_tree = lddtree(binary_path)
-    tree_libs = ld_tree['libs']
+    tree_libs = ld_tree["libs"]

-    binary_queue = [(n, binary) for n in ld_tree['needed']]
+    binary_queue = [(n, binary) for n in ld_tree["needed"]]
    binary_paths = {binary: binary_path}
    binary_dependencies = {}

@@ -160,13 +160,13 @@ def relocate_elf_library(patchelf, output_dir, output_library, binary):
        library_info = tree_libs[library]
        print(library)

-        if library_info['path'] is None:
-            print('Omitting {0}'.format(library))
+        if library_info["path"] is None:
+            print(f"Omitting {library}")
            continue

        if library in ALLOWLIST:
            # Omit glibc/gcc/system libraries
-            print('Omitting {0}'.format(library))
+            print(f"Omitting {library}")
            continue

        parent_dependencies = binary_dependencies.get(parent, [])
@@ -176,12 +176,12 @@ def relocate_elf_library(patchelf, output_dir, output_library, binary):
        if library in binary_paths:
            continue

-        binary_paths[library] = library_info['path']
-        binary_queue += [(n, library) for n in library_info['needed']]
+        binary_paths[library] = library_info["path"]
+        binary_queue += [(n, library) for n in library_info["needed"]]

-    print('Copying dependencies to wheel directory')
-    new_libraries_path = osp.join(output_dir, 'torchvision.libs')
-    os.makedirs(new_libraries_path)
+    print("Copying dependencies to wheel directory")
+    new_libraries_path = osp.join(output_dir, "torchvision.libs")
+    os.makedirs(new_libraries_path, exist_ok=True)

    new_names = {binary: binary_path}

@@ -189,11 +189,11 @@ def relocate_elf_library(patchelf, output_dir, output_library, binary):
        if library != binary:
            library_path = binary_paths[library]
            new_library_path = patch_new_path(library_path, new_libraries_path)
-            print('{0} -> {1}'.format(library, new_library_path))
+            print(f"{library} -> {new_library_path}")
            shutil.copyfile(library_path, new_library_path)
            new_names[library] = new_library_path

-    print('Updating dependency names by new files')
+    print("Updating dependency names by new files")
    for library in binary_paths:
        if library != binary:
            if library not in binary_dependencies:
@@ -202,59 +202,26 @@ def relocate_elf_library(patchelf, output_dir, output_library, binary):
            new_library_name = new_names[library]
            for dep in library_dependencies:
                new_dep = osp.basename(new_names[dep])
-                print('{0}: {1} -> {2}'.format(library, dep, new_dep))
+                print(f"{library}: {dep} -> {new_dep}")
                subprocess.check_output(
-                    [
-                        patchelf,
-                        '--replace-needed',
-                        dep,
-                        new_dep,
-                        new_library_name
-                    ],
-                    cwd=new_libraries_path)
-
-            print('Updating library rpath')
-            subprocess.check_output(
-                [
-                    patchelf,
-                    '--set-rpath',
-                    "$ORIGIN",
-                    new_library_name
-                ],
-                cwd=new_libraries_path)
-
-            subprocess.check_output(
-                [
-                    patchelf,
-                    '--print-rpath',
-                    new_library_name
-                ],
-                cwd=new_libraries_path)
+                    [patchelf, "--replace-needed", dep, new_dep, new_library_name], cwd=new_libraries_path
+                )
+
+            print("Updating library rpath")
+            subprocess.check_output([patchelf, "--set-rpath", "$ORIGIN", new_library_name], cwd=new_libraries_path)
+
+            subprocess.check_output([patchelf, "--print-rpath", new_library_name], cwd=new_libraries_path)

    print("Update library dependencies")
    library_dependencies = binary_dependencies[binary]
    for dep in library_dependencies:
        new_dep = osp.basename(new_names[dep])
-        print('{0}: {1} -> {2}'.format(binary, dep, new_dep))
-        subprocess.check_output(
-            [
-                patchelf,
-                '--replace-needed',
-                dep,
-                new_dep,
-                binary
-            ],
-            cwd=output_library)
-
-    print('Update library rpath')
+        print(f"{binary}: {dep} -> {new_dep}")
+        subprocess.check_output([patchelf, "--replace-needed", dep, new_dep, binary], cwd=output_library)
+
+    print("Update library rpath")
    subprocess.check_output(
-        [
-            patchelf,
-            '--set-rpath',
-            "$ORIGIN:$ORIGIN/../torchvision.libs",
-            binary_path
-        ],
-        cwd=output_library
+        [patchelf, "--set-rpath", "$ORIGIN:$ORIGIN/../torchvision.libs", binary_path], cwd=output_library
    )


@@ -265,7 +232,7 @@ def relocate_dll_library(dumpbin, output_dir, output_library, binary):
    Given a shared library, find the transitive closure of its dependencies,
    rename and copy them into the wheel.
    """
-    print('Relocating {0}'.format(binary))
+    print(f"Relocating {binary}")
    binary_path = osp.join(output_library, binary)

    library_dlls = find_dll_dependencies(dumpbin, binary_path)
@@ -275,19 +242,19 @@ def relocate_dll_library(dumpbin, output_dir, output_library, binary):

    while binary_queue != []:
        library, parent = binary_queue.pop(0)
-        if library in WINDOWS_ALLOWLIST or library.startswith('api-ms-win'):
-            print('Omitting {0}'.format(library))
+        if library in WINDOWS_ALLOWLIST or library.startswith("api-ms-win"):
+            print(f"Omitting {library}")
            continue

        library_path = find_program(library)
        if library_path is None:
-            print('{0} not found'.format(library))
+            print(f"{library} not found")
            continue

-        if osp.basename(osp.dirname(library_path)) == 'system32':
+        if osp.basename(osp.dirname(library_path)) == "system32":
            continue

-        print('{0}: {1}'.format(library, library_path))
+        print(f"{library}: {library_path}")
        parent_dependencies = binary_dependencies.get(parent, [])
        parent_dependencies.append(library)
        binary_dependencies[parent] = parent_dependencies
@@ -299,55 +266,54 @@ def relocate_dll_library(dumpbin, output_dir, output_library, binary):
        downstream_dlls = find_dll_dependencies(dumpbin, library_path)
        binary_queue += [(n, library) for n in downstream_dlls]

-    print('Copying dependencies to wheel directory')
-    package_dir = osp.join(output_dir, 'torchvision')
+    print("Copying dependencies to wheel directory")
+    package_dir = osp.join(output_dir, "torchvision")
    for library in binary_paths:
        if library != binary:
            library_path = binary_paths[library]
            new_library_path = osp.join(package_dir, library)
-            print('{0} -> {1}'.format(library, new_library_path))
+            print(f"{library} -> {new_library_path}")
            shutil.copyfile(library_path, new_library_path)


 def compress_wheel(output_dir, wheel, wheel_dir, wheel_name):
    """Create RECORD file and compress wheel distribution."""
-    print('Update RECORD file in wheel')
-    dist_info = glob.glob(osp.join(output_dir, '*.dist-info'))[0]
-    record_file = osp.join(dist_info, 'RECORD')
+    print("Update RECORD file in wheel")
+    dist_info = glob.glob(osp.join(output_dir, "*.dist-info"))[0]
+    record_file = osp.join(dist_info, "RECORD")

-    with open(record_file, 'w') as f:
+    with open(record_file, "w") as f:
        for root, _, files in os.walk(output_dir):
            for this_file in files:
                full_file = osp.join(root, this_file)
                rel_file = osp.relpath(full_file, output_dir)
                if full_file == record_file:
-                    f.write('{0},,\n'.format(rel_file))
+                    f.write(f"{rel_file},,\n")
                else:
                    digest, size = rehash(full_file)
-                    f.write('{0},{1},{2}\n'.format(rel_file, digest, size))
+                    f.write(f"{rel_file},{digest},{size}\n")

-    print('Compressing wheel')
+    print("Compressing wheel")
    base_wheel_name = osp.join(wheel_dir, wheel_name)
-    shutil.make_archive(base_wheel_name, 'zip', output_dir)
+    shutil.make_archive(base_wheel_name, "zip", output_dir)
    os.remove(wheel)
-    shutil.move('{0}.zip'.format(base_wheel_name), wheel)
+    shutil.move(f"{base_wheel_name}.zip", wheel)
    shutil.rmtree(output_dir)


 def patch_linux():
    # Get patchelf location
-    patchelf = find_program('patchelf')
+    patchelf = find_program("patchelf")
    if patchelf is None:
-        raise FileNotFoundError('Patchelf was not found in the system, please'
-                                ' make sure that is available on the PATH.')
+        raise FileNotFoundError("Patchelf was not found in the system, please make sure that is available on the PATH.")

    # Find wheel
-    print('Finding wheels...')
-    wheels = glob.glob(osp.join(PACKAGE_ROOT, 'dist', '*.whl'))
-    output_dir = osp.join(PACKAGE_ROOT, 'dist', '.wheel-process')
+    print("Finding wheels...")
+    wheels = glob.glob(osp.join(PACKAGE_ROOT, "dist", "*.whl"))
+    output_dir = osp.join(PACKAGE_ROOT, "dist", ".wheel-process")

-    image_binary = 'image.so'
-    video_binary = 'video_reader.so'
+    image_binary = "image.so"
+    video_binary = "video_reader.so"
    torchvision_binaries = [image_binary, video_binary]
    for wheel in wheels:
        if osp.exists(output_dir):
@@ -355,37 +321,35 @@ def patch_linux():

        os.makedirs(output_dir)

-        print('Unzipping wheel...')
+        print("Unzipping wheel...")
        wheel_file = osp.basename(wheel)
        wheel_dir = osp.dirname(wheel)
-        print('{0}'.format(wheel_file))
+        print(f"{wheel_file}")
        wheel_name, _ = osp.splitext(wheel_file)
        unzip_file(wheel, output_dir)

-        print('Finding ELF dependencies...')
-        output_library = osp.join(output_dir, 'torchvision')
+        print("Finding ELF dependencies...")
+        output_library = osp.join(output_dir, "torchvision")
        for binary in torchvision_binaries:
            if osp.exists(osp.join(output_library, binary)):
-                relocate_elf_library(
-                    patchelf, output_dir, output_library, binary)
+                relocate_elf_library(patchelf, output_dir, output_library, binary)

        compress_wheel(output_dir, wheel, wheel_dir, wheel_name)


 def patch_win():
    # Get dumpbin location
-    dumpbin = find_program('dumpbin')
+    dumpbin = find_program("dumpbin")
    if dumpbin is None:
-        raise FileNotFoundError('Dumpbin was not found in the system, please'
-                                ' make sure that is available on the PATH.')
+        raise FileNotFoundError("Dumpbin was not found in the system, please make sure that is available on the PATH.")

    # Find wheel
-    print('Finding wheels...')
-    wheels = glob.glob(osp.join(PACKAGE_ROOT, 'dist', '*.whl'))
-    output_dir = osp.join(PACKAGE_ROOT, 'dist', '.wheel-process')
+    print("Finding wheels...")
+    wheels = glob.glob(osp.join(PACKAGE_ROOT, "dist", "*.whl"))
+    output_dir = osp.join(PACKAGE_ROOT, "dist", ".wheel-process")

-    image_binary = 'image.pyd'
-    video_binary = 'video_reader.pyd'
+    image_binary = "image.pyd"
+    video_binary = "video_reader.pyd"
    torchvision_binaries = [image_binary, video_binary]
    for wheel in wheels:
        if osp.exists(output_dir):
@@ -393,25 +357,24 @@ def patch_win():

        os.makedirs(output_dir)

-        print('Unzipping wheel...')
+        print("Unzipping wheel...")
        wheel_file = osp.basename(wheel)
        wheel_dir = osp.dirname(wheel)
-        print('{0}'.format(wheel_file))
+        print(f"{wheel_file}")
        wheel_name, _ = osp.splitext(wheel_file)
        unzip_file(wheel, output_dir)

-        print('Finding DLL/PE dependencies...')
-        output_library = osp.join(output_dir, 'torchvision')
+        print("Finding DLL/PE dependencies...")
+        output_library = osp.join(output_dir, "torchvision")
        for binary in torchvision_binaries:
            if osp.exists(osp.join(output_library, binary)):
-                relocate_dll_library(
-                    dumpbin, output_dir, output_library, binary)
+                relocate_dll_library(dumpbin, output_dir, output_library, binary)

        compress_wheel(output_dir, wheel, wheel_dir, wheel_name)


-if __name__ == '__main__':
-    if sys.platform == 'linux':
+if __name__ == "__main__":
+    if sys.platform == "linux":
        patch_linux()
-    elif sys.platform == 'win32':
+    elif sys.platform == "win32":
        patch_win()
--- a/packaging/windows/internal/build_cpp_example.bat
+++ b/packaging/windows/internal/build_cpp_example.bat
 @echo on
 set CL=/I"C:\Program Files (x86)\torchvision\include"
-msbuild "-p:Configuration=Release" "-p:BuildInParallel=true" "-p:MultiProcessorCompilation=true" "-p:CL_MPCount=%1" hello-world.vcxproj -maxcpucount:%1
+msbuild "-p:Configuration=Release" "-p:BuildInParallel=true" "-p:MultiProcessorCompilation=true" "-p:CL_MPCount=%1" run_model.vcxproj -maxcpucount:%1
--- a/packaging/windows/internal/build_frcnn.bat
+++ b/packaging/windows/internal/build_frcnn.bat
-@echo on
-set CL=/I"C:\Program Files (x86)\torchvision\include"
-msbuild "-p:Configuration=Release" "-p:BuildInParallel=true" "-p:MultiProcessorCompilation=true" "-p:CL_MPCount=%1" test_frcnn_tracing.vcxproj -maxcpucount:%1
--- a/packaging/windows/internal/cuda_install.bat
+++ b/packaging/windows/internal/cuda_install.bat
-@echo on
-
-if "%CU_VERSION%" == "cpu" (
-    echo Skipping for CPU builds
-    exit /b 0
-)
-
-set SRC_DIR=%~dp0\..
-
-if not exist "%SRC_DIR%\temp_build" mkdir "%SRC_DIR%\temp_build"
-
-set /a CUDA_VER=%CU_VERSION:cu=%
-set CUDA_VER_MAJOR=%CUDA_VER:~0,-1%
-set CUDA_VER_MINOR=%CUDA_VER:~-1,1%
-set CUDA_VERSION_STR=%CUDA_VER_MAJOR%.%CUDA_VER_MINOR%
-
-if %CUDA_VER% EQU 92 goto cuda92
-if %CUDA_VER% EQU 100 goto cuda100
-if %CUDA_VER% EQU 101 goto cuda101
-if %CUDA_VER% EQU 102 goto cuda102
-if %CUDA_VER% EQU 110 goto cuda110
-if %CUDA_VER% EQU 111 goto cuda111
-if %CUDA_VER% EQU 112 goto cuda112
-
-echo CUDA %CUDA_VERSION_STR% is not supported
-exit /b 1
-
-:cuda92
-if not exist "%SRC_DIR%\temp_build\cuda_9.2.148_win10.exe" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cuda_9.2.148_win10.exe --output "%SRC_DIR%\temp_build\cuda_9.2.148_win10.exe"
-    if errorlevel 1 exit /b 1
-    set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_9.2.148_win10.exe"
-    set "ARGS=nvcc_9.2 cuobjdump_9.2 nvprune_9.2 cupti_9.2 cublas_9.2 cublas_dev_9.2 cudart_9.2 cufft_9.2 cufft_dev_9.2 curand_9.2 curand_dev_9.2 cusolver_9.2 cusolver_dev_9.2 cusparse_9.2 cusparse_dev_9.2 nvgraph_9.2 nvgraph_dev_9.2 npp_9.2 npp_dev_9.2 nvrtc_9.2 nvrtc_dev_9.2 nvml_dev_9.2"
-)
-
-if not exist "%SRC_DIR%\temp_build\cudnn-9.2-windows10-x64-v7.2.1.38.zip" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cudnn-9.2-windows10-x64-v7.2.1.38.zip --output "%SRC_DIR%\temp_build\cudnn-9.2-windows10-x64-v7.2.1.38.zip"
-    if errorlevel 1 exit /b 1
-    set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-9.2-windows10-x64-v7.2.1.38.zip"
-)
-
-goto cuda_common
-
-:cuda100
-
-if not exist "%SRC_DIR%\temp_build\cuda_10.0.130_411.31_win10.exe" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cuda_10.0.130_411.31_win10.exe --output "%SRC_DIR%\temp_build\cuda_10.0.130_411.31_win10.exe"
-    if errorlevel 1 exit /b 1
-    set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_10.0.130_411.31_win10.exe"
-    set "ARGS=nvcc_10.0 cuobjdump_10.0 nvprune_10.0 cupti_10.0 cublas_10.0 cublas_dev_10.0 cudart_10.0 cufft_10.0 cufft_dev_10.0 curand_10.0 curand_dev_10.0 cusolver_10.0 cusolver_dev_10.0 cusparse_10.0 cusparse_dev_10.0 nvgraph_10.0 nvgraph_dev_10.0 npp_10.0 npp_dev_10.0 nvrtc_10.0 nvrtc_dev_10.0 nvml_dev_10.0"
-)
-
-if not exist "%SRC_DIR%\temp_build\cudnn-10.0-windows10-x64-v7.4.1.5.zip" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cudnn-10.0-windows10-x64-v7.4.1.5.zip --output "%SRC_DIR%\temp_build\cudnn-10.0-windows10-x64-v7.4.1.5.zip"
-    if errorlevel 1 exit /b 1
-    set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-10.0-windows10-x64-v7.4.1.5.zip"
-)
-
-goto cuda_common
-
-:cuda101
-
-if not exist "%SRC_DIR%\temp_build\cuda_10.1.243_426.00_win10.exe" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_10.1.243_426.00_win10.exe --output "%SRC_DIR%\temp_build\cuda_10.1.243_426.00_win10.exe"
-    if errorlevel 1 exit /b 1
-    set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_10.1.243_426.00_win10.exe"
-    set "ARGS=nvcc_10.1 cuobjdump_10.1 nvprune_10.1 cupti_10.1 cublas_10.1 cublas_dev_10.1 cudart_10.1 cufft_10.1 cufft_dev_10.1 curand_10.1 curand_dev_10.1 cusolver_10.1 cusolver_dev_10.1 cusparse_10.1 cusparse_dev_10.1 nvgraph_10.1 nvgraph_dev_10.1 npp_10.1 npp_dev_10.1 nvrtc_10.1 nvrtc_dev_10.1 nvml_dev_10.1"
-)
-
-if not exist "%SRC_DIR%\temp_build\cudnn-10.1-windows10-x64-v7.6.4.38.zip" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-10.1-windows10-x64-v7.6.4.38.zip --output "%SRC_DIR%\temp_build\cudnn-10.1-windows10-x64-v7.6.4.38.zip"
-    if errorlevel 1 exit /b 1
-    set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-10.1-windows10-x64-v7.6.4.38.zip"
-)
-
-goto cuda_common
-
-:cuda102
-
-if not exist "%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_10.2.89_441.22_win10.exe --output "%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe"
-    if errorlevel 1 exit /b 1
-    set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe"
-    set "ARGS=nvcc_10.2 cuobjdump_10.2 nvprune_10.2 cupti_10.2 cublas_10.2 cublas_dev_10.2 cudart_10.2 cufft_10.2 cufft_dev_10.2 curand_10.2 curand_dev_10.2 cusolver_10.2 cusolver_dev_10.2 cusparse_10.2 cusparse_dev_10.2 nvgraph_10.2 nvgraph_dev_10.2 npp_10.2 npp_dev_10.2 nvrtc_10.2 nvrtc_dev_10.2 nvml_dev_10.2"
-)
-
-if not exist "%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-10.2-windows10-x64-v7.6.5.32.zip --output "%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip"
-    if errorlevel 1 exit /b 1
-    set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip"
-)
-
-goto cuda_common
-
-:cuda110
-
-if not exist "%SRC_DIR%\temp_build\cuda_11.0.2_451.48_win10.exe" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_11.0.2_451.48_win10.exe --output "%SRC_DIR%\temp_build\cuda_11.0.2_451.48_win10.exe"
-    if errorlevel 1 exit /b 1
-    set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_11.0.2_451.48_win10.exe"
-    set "ARGS=nvcc_11.0 cuobjdump_11.0 nvprune_11.0 nvprof_11.0 cupti_11.0 cublas_11.0 cublas_dev_11.0 cudart_11.0 cufft_11.0 cufft_dev_11.0 curand_11.0 curand_dev_11.0 cusolver_11.0 cusolver_dev_11.0 cusparse_11.0 cusparse_dev_11.0 npp_11.0 npp_dev_11.0 nvrtc_11.0 nvrtc_dev_11.0 nvml_dev_11.0"
-)
-
-if not exist "%SRC_DIR%\temp_build\cudnn-11.0-windows-x64-v8.0.4.30.zip" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-11.0-windows-x64-v8.0.4.30.zip --output "%SRC_DIR%\temp_build\cudnn-11.0-windows-x64-v8.0.4.30.zip"
-    if errorlevel 1 exit /b 1
-    set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-11.0-windows-x64-v8.0.4.30.zip"
-)
-
-goto cuda_common
-
-:cuda111
-
-if not exist "%SRC_DIR%\temp_build\cuda_11.1.0_456.43_win10.exe" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_11.1.0_456.43_win10.exe --output "%SRC_DIR%\temp_build\cuda_11.1.0_456.43_win10.exe"
-    if errorlevel 1 exit /b 1
-    set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_11.1.0_456.43_win10.exe"
-    set "ARGS=nvcc_11.1 cuobjdump_11.1 nvprune_11.1 nvprof_11.1 cupti_11.1 cublas_11.1 cublas_dev_11.1 cudart_11.1 cufft_11.1 cufft_dev_11.1 curand_11.1 curand_dev_11.1 cusolver_11.1 cusolver_dev_11.1 cusparse_11.1 cusparse_dev_11.1 npp_11.1 npp_dev_11.1 nvrtc_11.1 nvrtc_dev_11.1 nvml_dev_11.1"
-)
-
-@REM There is no downloadable driver for Tesla on CUDA 11.1 yet. We will use
-@REM the driver inside CUDA
-if "%JOB_EXECUTOR%" == "windows-with-nvidia-gpu" set "ARGS=%ARGS% Display.Driver"
-
-if not exist "%SRC_DIR%\temp_build\cudnn-11.1-windows-x64-v8.0.5.39.zip" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-11.1-windows-x64-v8.0.5.39.zip --output "%SRC_DIR%\temp_build\cudnn-11.1-windows-x64-v8.0.5.39.zip"
-    if errorlevel 1 exit /b 1
-    set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-11.1-windows-x64-v8.0.5.39.zip"
-)
-
-goto cuda_common
-
-:cuda112
-
-if not exist "%SRC_DIR%\temp_build\cuda_11.2.0_460.89_win10.exe" (
-    curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_11.2.0_460.89_win10.exe --output "%SRC_DIR%\temp_build\cuda_11.2.0_460.89_win10.exe"
-    if errorlevel 1 exit /b 1
-    set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_11.2.0_460.89_win10.exe"
-    set "ARGS=nvcc_11.2 cuobjdump_11.2 nvprune_11.2 nvprof_11.2 cupti_11.2 cublas_11.2 cublas_dev_11.2 cudart_11.2 cufft_11.2 cufft_dev_11.2 curand_11.2 curand_dev_11.2 cusolver_11.2 cusolver_dev_11.2 cusparse_11.2 cusparse_dev_11.2 npp_11.2 npp_dev_11.2 nvrtc_11.2 nvrtc_dev_11.2 nvml_dev_11.2"
-)
-
-if not exist "%SRC_DIR%\temp_build\cudnn-11.2-windows-x64-v8.1.0.77.zip" (
-    curl -k -L http://s3.amazonaws.com/ossci-windows/cudnn-11.2-windows-x64-v8.1.0.77.zip --output "%SRC_DIR%\temp_build\cudnn-11.2-windows-x64-v8.1.0.77.zip"
-    if errorlevel 1 exit /b 1
-    set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-11.2-windows-x64-v8.1.0.77.zip"
-)
-
-goto cuda_common
-
-:cuda_common
-
-if not exist "%SRC_DIR%\temp_build\NvToolsExt.7z" (
-    curl -k -L https://www.dropbox.com/s/9mcolalfdj4n979/NvToolsExt.7z?dl=1 --output "%SRC_DIR%\temp_build\NvToolsExt.7z"
-    if errorlevel 1 exit /b 1
-)
-
-if not exist "%SRC_DIR%\temp_build\gpu_driver_dlls.7z" (
-    curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "%SRC_DIR%\temp_build\gpu_driver_dlls.zip"
-    if errorlevel 1 exit /b 1
-)
-
-echo Installing CUDA toolkit...
-7z x %CUDA_SETUP_FILE% -o"%SRC_DIR%\temp_build\cuda"
-pushd "%SRC_DIR%\temp_build\cuda"
-start /wait setup.exe -s %ARGS%
-popd
-
-echo Installing VS integration...
-xcopy /Y "%SRC_DIR%\temp_build\cuda\CUDAVisualStudioIntegration\extras\visual_studio_integration\MSBuildExtensions\*.*" "C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\Common7\IDE\VC\VCTargets\BuildCustomizations"
-
-echo Installing NvToolsExt...
-7z x %SRC_DIR%\temp_build\NvToolsExt.7z -o"%SRC_DIR%\temp_build\NvToolsExt"
-mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64"
-mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\include"
-mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\lib\x64"
-xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\bin\x64\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64"
-xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\include\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\include"
-xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\lib\x64\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\lib\x64"
-
-echo Setting up environment...
-set "PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\libnvvp;%PATH%"
-set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%"
-set "CUDA_PATH_V%CUDA_VER_MAJOR%_%CUDA_VER_MINOR%=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%"
-set "NVTOOLSEXT_PATH=%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64"
-
-if not exist "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin\nvcc.exe" (
-    echo CUDA %CUDA_VERSION_STR% installed failed.
-    exit /b 1
-)
-
-echo Installing cuDNN...
-7z x %CUDNN_SETUP_FILE% -o"%SRC_DIR%\temp_build\cudnn"
-xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\bin\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin"
-xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\lib\x64\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\lib\x64"
-xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\include\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\include"
-
-echo Installing GPU driver DLLs
-7z x %SRC_DIR%\temp_build\gpu_driver_dlls.zip -o"C:\Windows\System32"
-
-echo Cleaning temp files
-rd /s /q "%SRC_DIR%\temp_build" || ver > nul
--- a/packaging/windows/internal/vc_env_helper.bat
+++ b/packaging/windows/internal/vc_env_helper.bat
 @echo on

-set VC_VERSION_LOWER=16
-set VC_VERSION_UPPER=17
+set VC_VERSION_LOWER=17
+set VC_VERSION_UPPER=18
+if "%VC_YEAR%" == "2019" (
+    set VC_VERSION_LOWER=16
+    set VC_VERSION_UPPER=17
+)
 if "%VC_YEAR%" == "2017" (
    set VC_VERSION_LOWER=15
    set VC_VERSION_UPPER=16

--- a/packaging/windows/internal/vc_install_helper.sh
+++ b/packaging/windows/internal/vc_install_helper.sh
-#!/bin/bash
-
-set -ex
-
-if [[ "$CU_VERSION" == "cu92" ]]; then
-  export VC_YEAR=2017
-  export VSDEVCMD_ARGS="-vcvars_ver=14.13"
-  powershell packaging/windows/internal/vs2017_install.ps1
-elif [[ "$CU_VERSION" == "cu100" ]]; then
-  export VC_YEAR=2017
-  export VSDEVCMD_ARGS=""
-  powershell packaging/windows/internal/vs2017_install.ps1
-else
-  export VC_YEAR=2019
-  export VSDEVCMD_ARGS=""
-fi
--- a/packaging/windows/internal/vs2017_install.ps1
+++ b/packaging/windows/internal/vs2017_install.ps1
-$VS_DOWNLOAD_LINK = "https://aka.ms/vs/15/release/vs_buildtools.exe"
-$VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools",
-                                                     "--add Microsoft.VisualStudio.Component.VC.Tools.14.13",
-                                                     "--add Microsoft.Component.MSBuild",
-                                                     "--add Microsoft.VisualStudio.Component.Roslyn.Compiler",
-                                                     "--add Microsoft.VisualStudio.Component.TextTemplating",
-                                                     "--add Microsoft.VisualStudio.Component.VC.CoreIde",
-                                                     "--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest",
-                                                     "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core",
-                                                     "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64",
-                                                     "--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Win81")
-
-curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe
-if ($LASTEXITCODE -ne 0) {
-    echo "Download of the VS 2017 installer failed"
-    exit 1
-}
-
-$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru
-Remove-Item -Path vs_installer.exe -Force
-$exitCode = $process.ExitCode
-if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
-    echo "VS 2017 installer exited with code $exitCode, which should be one of [0, 3010]."
-    exit 1
-}
--- a/packaging/windows/internal/vs2019_install.ps1
+++ b/packaging/windows/internal/vs2019_install.ps1
-$VS_DOWNLOAD_LINK = "https://aka.ms/vs/16/release/vs_buildtools.exe"
-$VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools",
-                                                     "--add Microsoft.Component.MSBuild",
-                                                     "--add Microsoft.VisualStudio.Component.Roslyn.Compiler",
-                                                     "--add Microsoft.VisualStudio.Component.VC.CoreBuildTools",
-                                                     "--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest",
-                                                     "--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64")
-
-curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe
-if ($LASTEXITCODE -ne 0) {
-    echo "Download of the VS 2019 installer failed"
-    exit 1
-}
-
-$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru
-Remove-Item -Path vs_installer.exe -Force
-$exitCode = $process.ExitCode
-if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
-    echo "VS 2019 installer exited with code $exitCode, which should be one of [0, 3010]."
-    exit 1
-}
--- a/pyproject.toml
+++ b/pyproject.toml
+[tool.usort]
+
+first_party_detection = false
+
+[tool.black]
+
+line-length = 120
+target-version = ["py38"]
+
+[tool.ufmt]
+
+excludes = [
+    "gallery",
+]
+
+[build-system]
+
+requires = ["setuptools", "torch", "wheel"]
--- a/pytest.ini
+++ b/pytest.ini
+[pytest]
+addopts =
+    # show tests that (f)ailed, (E)rror, or (X)passed in the summary
+    -rfEX
+    # Make tracebacks shorter
+    --tb=short
+    # enable all warnings
+    -Wd
+    --ignore=test/test_datasets_download.py
+    --ignore-glob=test/test_prototype_*.py
+testpaths =
+    test
+xfail_strict = True
--- a/references/classification/README.md
+++ b/references/classification/README.md
@@ -20,35 +20,56 @@ the following parameters:
 ### AlexNet and VGG

 Since `AlexNet` and the original `VGG` architectures do not include batch 
-normalization, the default initial learning rate `--lr 0.1` is to high.
+normalization, the default initial learning rate `--lr 0.1` is too high.

 ```
-python main.py --model $MODEL --lr 1e-2
+torchrun --nproc_per_node=8 train.py\
+    --model $MODEL --lr 1e-2
 ```

 Here `$MODEL` is one of `alexnet`, `vgg11`, `vgg13`, `vgg16` or `vgg19`. Note
 that `vgg11_bn`, `vgg13_bn`, `vgg16_bn`, and `vgg19_bn` include batch
 normalization and thus are trained with the default parameters.

-### ResNext-50 32x4d
+### GoogLeNet
+
+The weights of the GoogLeNet model are ported from the original paper rather than trained from scratch.
+
+### Inception V3
+
+The weights of the Inception V3 model are ported from the original paper rather than trained from scratch.
+
+Since it expects tensors with a size of N x 3 x 299 x 299, to validate the model use the following command:
+
 ```
-python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
-    --model resnext50_32x4d --epochs 100
+torchrun --nproc_per_node=8 train.py --model inception_v3\
+      --test-only --weights Inception_V3_Weights.IMAGENET1K_V1
 ```

+### ResNet
+```
+torchrun --nproc_per_node=8 train.py --model $MODEL
+```

-### ResNext-101 32x8d
+Here `$MODEL` is one of `resnet18`, `resnet34`, `resnet50`, `resnet101` or `resnet152`.

-On 8 nodes, each with 8 GPUs (for a total of 64 GPUS)
+### ResNext
 ```
-python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
-    --model resnext101_32x8d --epochs 100
+torchrun --nproc_per_node=8 train.py\
+    --model $MODEL --epochs 100
 ```

+Here `$MODEL` is one of `resnext50_32x4d` or `resnext101_32x8d`.
+Note that the above command corresponds to a single node with 8 GPUs. If you use
+a different number of GPUs and/or a different batch size, then the learning rate
+should be scaled accordingly. For example, the pretrained model provided by
+`torchvision` was trained on 8 nodes, each with 8 GPUs (for a total of 64 GPUs),
+with `--batch_size 16` and `--lr 0.4`, instead of the current defaults
+which are respectively batch_size=32 and lr=0.1

 ### MobileNetV2
 ```
-python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
+torchrun --nproc_per_node=8 train.py\
     --model mobilenet_v2 --epochs 300 --lr 0.045 --wd 0.00004\
     --lr-step-size 1 --lr-gamma 0.98
 ```
@@ -56,7 +77,7 @@ python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\

 ### MobileNetV3 Large & Small
 ```
-python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
+torchrun --nproc_per_node=8 train.py\
     --model $MODEL --epochs 600 --opt rmsprop --batch-size 128 --lr 0.064\ 
     --wd 0.00001 --lr-step-size 2 --lr-gamma 0.973 --auto-augment imagenet --random-erase 0.2
 ```
@@ -67,37 +88,236 @@ Then we averaged the parameters of the last 3 checkpoints that improved the Acc@
 and [#3354](https://github.com/pytorch/vision/pull/3354) for details.


+### EfficientNet-V1
+
+The weights of the B0-B4 variants are ported from Ross Wightman's [timm repo](https://github.com/rwightman/pytorch-image-models/blob/01cb46a9a50e3ba4be167965b5764e9702f09b30/timm/models/efficientnet.py#L95-L108).
+
+The weights of the B5-B7 variants are ported from Luke Melas' [EfficientNet-PyTorch repo](https://github.com/lukemelas/EfficientNet-PyTorch/blob/1039e009545d9329ea026c9f7541341439712b96/efficientnet_pytorch/utils.py#L562-L564).
+
+All models were trained using Bicubic interpolation and each have custom crop and resize sizes. To validate the models use the following commands:
+```
+torchrun --nproc_per_node=8 train.py --model efficientnet_b0 --test-only --weights EfficientNet_B0_Weights.IMAGENET1K_V1
+torchrun --nproc_per_node=8 train.py --model efficientnet_b1 --test-only --weights EfficientNet_B1_Weights.IMAGENET1K_V1
+torchrun --nproc_per_node=8 train.py --model efficientnet_b2 --test-only --weights EfficientNet_B2_Weights.IMAGENET1K_V1
+torchrun --nproc_per_node=8 train.py --model efficientnet_b3 --test-only --weights EfficientNet_B3_Weights.IMAGENET1K_V1
+torchrun --nproc_per_node=8 train.py --model efficientnet_b4 --test-only --weights EfficientNet_B4_Weights.IMAGENET1K_V1
+torchrun --nproc_per_node=8 train.py --model efficientnet_b5 --test-only --weights EfficientNet_B5_Weights.IMAGENET1K_V1
+torchrun --nproc_per_node=8 train.py --model efficientnet_b6 --test-only --weights EfficientNet_B6_Weights.IMAGENET1K_V1
+torchrun --nproc_per_node=8 train.py --model efficientnet_b7 --test-only --weights EfficientNet_B7_Weights.IMAGENET1K_V1
+```
+
+
+### EfficientNet-V2
+```
+torchrun --nproc_per_node=8 train.py \
+--model $MODEL --batch-size 128 --lr 0.5 --lr-scheduler cosineannealinglr \
+--lr-warmup-epochs 5 --lr-warmup-method linear --auto-augment ta_wide --epochs 600 --random-erase 0.1 \
+--label-smoothing 0.1 --mixup-alpha 0.2 --cutmix-alpha 1.0 --weight-decay 0.00002 --norm-weight-decay 0.0 \
+--train-crop-size $TRAIN_SIZE --model-ema --val-crop-size $EVAL_SIZE --val-resize-size $EVAL_SIZE \
+--ra-sampler --ra-reps 4
+```
+Here `$MODEL` is one of `efficientnet_v2_s` and `efficientnet_v2_m`. 
+Note that the Small variant had a `$TRAIN_SIZE` of `300` and a `$EVAL_SIZE` of `384`, while the Medium `384` and `480` respectively.
+
+Note that the above command corresponds to training on a single node with 8 GPUs.
+For generating the pre-trained weights, we trained with 4 nodes, each with 8 GPUs (for a total of 32 GPUs),
+and `--batch_size 32`.
+
+The weights of the Large variant are ported from the original paper rather than trained from scratch. See the `EfficientNet_V2_L_Weights` entry for their exact preprocessing transforms.
+
+
+### RegNet
+
+#### Small models
+```
+torchrun --nproc_per_node=8 train.py\
+     --model $MODEL --epochs 100 --batch-size 128 --wd 0.00005 --lr=0.8\
+     --lr-scheduler=cosineannealinglr --lr-warmup-method=linear\
+     --lr-warmup-epochs=5 --lr-warmup-decay=0.1
+```
+Here `$MODEL` is one of `regnet_x_400mf`, `regnet_x_800mf`, `regnet_x_1_6gf`, `regnet_y_400mf`, `regnet_y_800mf` and `regnet_y_1_6gf`. Please note we used learning rate 0.4 for `regent_y_400mf` to get the same Acc@1 as [the paper)(https://arxiv.org/abs/2003.13678).
+
+#### Medium models
+```
+torchrun --nproc_per_node=8 train.py\
+     --model $MODEL --epochs 100 --batch-size 64 --wd 0.00005 --lr=0.4\
+     --lr-scheduler=cosineannealinglr --lr-warmup-method=linear\
+     --lr-warmup-epochs=5 --lr-warmup-decay=0.1
+```
+Here `$MODEL` is one of `regnet_x_3_2gf`, `regnet_x_8gf`, `regnet_x_16gf`, `regnet_y_3_2gf` and `regnet_y_8gf`.
+
+#### Large models
+```
+torchrun --nproc_per_node=8 train.py\
+     --model $MODEL --epochs 100 --batch-size 32 --wd 0.00005 --lr=0.2\
+     --lr-scheduler=cosineannealinglr --lr-warmup-method=linear\
+     --lr-warmup-epochs=5 --lr-warmup-decay=0.1
+```
+Here `$MODEL` is one of `regnet_x_32gf`, `regnet_y_16gf` and `regnet_y_32gf`.
+
+### Vision Transformer
+
+#### vit_b_16
+```
+torchrun --nproc_per_node=8 train.py\
+    --model vit_b_16 --epochs 300 --batch-size 512 --opt adamw --lr 0.003 --wd 0.3\
+    --lr-scheduler cosineannealinglr --lr-warmup-method linear --lr-warmup-epochs 30\
+    --lr-warmup-decay 0.033 --amp --label-smoothing 0.11 --mixup-alpha 0.2 --auto-augment ra\
+    --clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema
+```
+
+Note that the above command corresponds to training on a single node with 8 GPUs.
+For generating the pre-trained weights, we trained with 8 nodes, each with 8 GPUs (for a total of 64 GPUs),
+and `--batch_size 64`.
+
+#### vit_b_32
+```
+torchrun --nproc_per_node=8 train.py\
+    --model vit_b_32 --epochs 300 --batch-size 512 --opt adamw --lr 0.003 --wd 0.3\
+    --lr-scheduler cosineannealinglr --lr-warmup-method linear --lr-warmup-epochs 30\
+    --lr-warmup-decay 0.033 --amp --label-smoothing 0.11 --mixup-alpha 0.2 --auto-augment imagenet\
+    --clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema
+```
+
+Note that the above command corresponds to training on a single node with 8 GPUs.
+For generating the pre-trained weights, we trained with 2 nodes, each with 8 GPUs (for a total of 16 GPUs),
+and `--batch_size 256`.
+
+#### vit_l_16
+```
+torchrun --nproc_per_node=8 train.py\
+    --model vit_l_16 --epochs 600 --batch-size 128 --lr 0.5 --lr-scheduler cosineannealinglr\
+    --lr-warmup-method linear --lr-warmup-epochs 5 --label-smoothing 0.1 --mixup-alpha 0.2\
+    --auto-augment ta_wide --random-erase 0.1 --weight-decay 0.00002 --norm-weight-decay 0.0\
+    --clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema --val-resize-size 232
+```
+
+Note that the above command corresponds to training on a single node with 8 GPUs.
+For generating the pre-trained weights, we trained with 2 nodes, each with 8 GPUs (for a total of 16 GPUs),
+and `--batch_size 64`.
+
+#### vit_l_32
+```
+torchrun --nproc_per_node=8 train.py\
+    --model vit_l_32 --epochs 300 --batch-size 512 --opt adamw --lr 0.003 --wd 0.3\
+    --lr-scheduler cosineannealinglr --lr-warmup-method linear --lr-warmup-epochs 30\
+    --lr-warmup-decay 0.033 --amp --label-smoothing 0.11 --mixup-alpha 0.2 --auto-augment ra\
+    --clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema
+```
+
+Note that the above command corresponds to training on a single node with 8 GPUs.
+For generating the pre-trained weights, we trained with 8 nodes, each with 8 GPUs (for a total of 64 GPUs),
+and `--batch_size 64`.
+
+
+### ConvNeXt
+```
+torchrun --nproc_per_node=8 train.py\ 
+--model $MODEL --batch-size 128 --opt adamw --lr 1e-3 --lr-scheduler cosineannealinglr \ 
+--lr-warmup-epochs 5 --lr-warmup-method linear --auto-augment ta_wide --epochs 600 --random-erase 0.1 \ 
+--label-smoothing 0.1 --mixup-alpha 0.2 --cutmix-alpha 1.0 --weight-decay 0.05 --norm-weight-decay 0.0 \
+--train-crop-size 176 --model-ema --val-resize-size 232 --ra-sampler --ra-reps 4
+```
+Here `$MODEL` is one of `convnext_tiny`, `convnext_small`, `convnext_base` and `convnext_large`. Note that each variant had its `--val-resize-size` optimized in a post-training step, see their `Weights` entry for their exact value.
+
+Note that the above command corresponds to training on a single node with 8 GPUs.
+For generating the pre-trained weights, we trained with 2 nodes, each with 8 GPUs (for a total of 16 GPUs),
+and `--batch_size 64`.
+
+
+### SwinTransformer
+```
+torchrun --nproc_per_node=8 train.py\ 
+--model $MODEL --epochs 300 --batch-size 128 --opt adamw --lr 0.001 --weight-decay 0.05 --norm-weight-decay 0.0  --bias-weight-decay 0.0 --transformer-embedding-decay 0.0 --lr-scheduler cosineannealinglr --lr-min 0.00001 --lr-warmup-method linear  --lr-warmup-epochs 20 --lr-warmup-decay 0.01 --amp --label-smoothing 0.1 --mixup-alpha 0.8 --clip-grad-norm 5.0 --cutmix-alpha 1.0 --random-erase 0.25 --interpolation bicubic --auto-augment ta_wide --model-ema --ra-sampler --ra-reps 4  --val-resize-size 224
+```
+Here `$MODEL` is one of `swin_t`, `swin_s` or `swin_b`.
+Note that `--val-resize-size` was optimized in a post-training step, see their `Weights` entry for the exact value.
+
+
+
+
+### SwinTransformer V2
+```
+torchrun --nproc_per_node=8 train.py\
+--model $MODEL --epochs 300 --batch-size 128 --opt adamw --lr 0.001 --weight-decay 0.05 --norm-weight-decay 0.0  --bias-weight-decay 0.0 --transformer-embedding-decay 0.0 --lr-scheduler cosineannealinglr --lr-min 0.00001 --lr-warmup-method linear  --lr-warmup-epochs 20 --lr-warmup-decay 0.01 --amp --label-smoothing 0.1 --mixup-alpha 0.8 --clip-grad-norm 5.0 --cutmix-alpha 1.0 --random-erase 0.25 --interpolation bicubic --auto-augment ta_wide --model-ema --ra-sampler --ra-reps 4  --val-resize-size 256 --val-crop-size 256 --train-crop-size 256 
+```
+Here `$MODEL` is one of `swin_v2_t`, `swin_v2_s` or `swin_v2_b`.
+Note that `--val-resize-size` was optimized in a post-training step, see their `Weights` entry for the exact value.
+
+
+### MaxViT
+```
+torchrun --nproc_per_node=8 --n_nodes=4 train.py\
+--model $MODEL --epochs 400 --batch-size 128 --opt adamw --lr 3e-3 --weight-decay 0.05 --lr-scheduler cosineannealinglr --lr-min 1e-5 --lr-warmup-method linear  --lr-warmup-epochs 32  --label-smoothing 0.1 --mixup-alpha 0.8 --clip-grad-norm 1.0 --interpolation bicubic --auto-augment ta_wide --policy-magnitude 15 --model-ema --val-resize-size 224\
+--val-crop-size 224 --train-crop-size 224 --amp  --model-ema-steps 32 --transformer-embedding-decay 0 --sync-bn
+```
+Here `$MODEL` is `maxvit_t`.
+Note that `--val-resize-size` was not optimized in a post-training step.
+
+
+### ShuffleNet V2
+```
+torchrun --nproc_per_node=8 train.py \
+--batch-size=128 \
+--lr=0.5 --lr-scheduler=cosineannealinglr --lr-warmup-epochs=5 --lr-warmup-method=linear \
+--auto-augment=ta_wide --epochs=600 --random-erase=0.1 --weight-decay=0.00002 \
+--norm-weight-decay=0.0 --label-smoothing=0.1 --mixup-alpha=0.2 --cutmix-alpha=1.0 \
+--train-crop-size=176 --model-ema --val-resize-size=232 --ra-sampler --ra-reps=4
+```
+Here `$MODEL` is either `shufflenet_v2_x1_5` or `shufflenet_v2_x2_0`.
+
+The models `shufflenet_v2_x0_5` and `shufflenet_v2_x1_0` were contributed by the community. See [PR-849](https://github.com/pytorch/vision/pull/849#issuecomment-483391686) for details.
+
+
 ## Mixed precision training
-Automatic Mixed Precision (AMP) training on GPU for Pytorch can be enabled with the [NVIDIA Apex extension](https://github.com/NVIDIA/apex).
+Automatic Mixed Precision (AMP) training on GPU for Pytorch can be enabled with the [torch.cuda.amp](https://pytorch.org/docs/stable/amp.html?highlight=amp#module-torch.cuda.amp).

-Mixed precision training makes use of both FP32 and FP16 precisions where appropriate. FP16 operations can leverage the Tensor cores on NVIDIA GPUs (Volta, Turing or newer architectures) for improved throughput, generally without loss in model accuracy. Mixed precision training also often allows larger batch sizes. GPU automatic mixed precision training for Pytorch Vision can be enabled via the flag value `--apex=True`.
+Mixed precision training makes use of both FP32 and FP16 precisions where appropriate. FP16 operations can leverage the Tensor cores on NVIDIA GPUs (Volta, Turing or newer architectures) for improved throughput, generally without loss in model accuracy. Mixed precision training also often allows larger batch sizes. GPU automatic mixed precision training for Pytorch Vision can be enabled via the flag value `--amp=True`.

 ```
-python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
-    --model resnext50_32x4d --epochs 100 --apex
+torchrun --nproc_per_node=8 train.py\
+    --model resnext50_32x4d --epochs 100 --amp
 ```

 ## Quantized

-### Parameters used for generating quantized models:
+### Post training quantized models

-For all post training quantized models (All quantized models except mobilenet-v2), the settings are:
+For all post training quantized models, the settings are:

 1. num_calibration_batches: 32
 2. num_workers: 16
 3. batch_size: 32
 4. eval_batch_size: 128
-5. backend: 'fbgemm'
+5. qbackend: 'fbgemm'
+
+```
+python train_quantization.py --device='cpu' --post-training-quantize --qbackend='fbgemm' --model='$MODEL'
+```
+Here `$MODEL` is one of `googlenet`, `inception_v3`, `resnet18`, `resnet50`, `resnext101_32x8d`, `shufflenet_v2_x0_5` and `shufflenet_v2_x1_0`.

+### Quantized ShuffleNet V2
+
+Here are commands that we use to quantize the `shufflenet_v2_x1_5` and `shufflenet_v2_x2_0` models.
 ```
-python train_quantization.py --device='cpu' --post-training-quantize --backend='fbgemm' --model='<model_name>'
+# For shufflenet_v2_x1_5
+python train_quantization.py --device='cpu' --post-training-quantize --qbackend='fbgemm' \
+    --model=shufflenet_v2_x1_5 --weights="ShuffleNet_V2_X1_5_Weights.IMAGENET1K_V1" \
+    --train-crop-size 176 --val-resize-size 232 --data-path /datasets01_ontap/imagenet_full_size/061417/
+
+# For shufflenet_v2_x2_0
+python train_quantization.py --device='cpu' --post-training-quantize --qbackend='fbgemm' \
+    --model=shufflenet_v2_x2_0 --weights="ShuffleNet_V2_X2_0_Weights.IMAGENET1K_V1" \
+    --train-crop-size 176 --val-resize-size 232 --data-path /datasets01_ontap/imagenet_full_size/061417/
 ```

+### QAT MobileNetV2
+
 For Mobilenet-v2, the model was trained with quantization aware training, the settings used are:
 1. num_workers: 16
 2. batch_size: 32
 3. eval_batch_size: 128
-4. backend: 'qnnpack'
+4. qbackend: 'qnnpack'
 5. learning-rate: 0.0001
 6. num_epochs: 90
 7. num_observer_update_epochs:4
@@ -108,16 +328,18 @@ For Mobilenet-v2, the model was trained with quantization aware training, the se
 12. weight-decay: 0.0001

 ```
-python -m torch.distributed.launch --nproc_per_node=8 --use_env train_quantization.py --model='mobilenet_v2'
+torchrun --nproc_per_node=8 train_quantization.py --model='mobilenet_v2'
 ```

 Training converges at about 10 epochs.

+### QAT MobileNetV3
+
 For Mobilenet-v3 Large, the model was trained with quantization aware training, the settings used are:
 1. num_workers: 16
 2. batch_size: 32
 3. eval_batch_size: 128
-4. backend: 'qnnpack'
+4. qbackend: 'qnnpack'
 5. learning-rate: 0.001
 6. num_epochs: 90
 7. num_observer_update_epochs:4
@@ -128,7 +350,7 @@ For Mobilenet-v3 Large, the model was trained with quantization aware training,
 12. weight-decay: 0.00001

 ```
-python -m torch.distributed.launch --nproc_per_node=8 --use_env train_quantization.py --model='mobilenet_v3_large' \
+torchrun --nproc_per_node=8 train_quantization.py --model='mobilenet_v3_large' \
    --wd 0.00001 --lr 0.001
 ```

@@ -137,6 +359,10 @@ For post training quant, device is set to CPU. For training, the device is set t
 ### Command to evaluate quantized models using the pre-trained weights:

 ```
-python train_quantization.py --device='cpu' --test-only --backend='<backend>' --model='<model_name>'
+python train_quantization.py --device='cpu' --test-only --qbackend='<qbackend>' --model='<model_name>'
 ```

+For inception_v3 you need to pass the following extra parameters:
+```
+--val-resize-size 342 --val-crop-size 299 --train-crop-size 299
+```