Commit bf491463 authored by limm's avatar limm
Browse files

add v0.19.1 release

parent e17f5ea2
blas_impl:
- mkl # [x86_64]
c_compiler:
- vs2017 # [win]
cxx_compiler:
- vs2017 # [win]
python:
- 3.5
- 3.6
# This differs from target_platform in that it determines what subdir the compiler
# will target, not what subdir the compiler package will be itself.
# For example, we need a win-64 vs2008_win-32 package, so that we compile win-32
# code on win-64 miniconda.
cross_compiler_target_platform:
- win-64 # [win]
target_platform:
- win-64 # [win]
vc:
- 14
zip_keys:
- # [win]
- vc # [win]
- c_compiler # [win]
- cxx_compiler # [win]
set YEAR=2017
set VER=15
mkdir "%PREFIX%\etc\conda\activate.d"
COPY "%RECIPE_DIR%\activate.bat" "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
IF "%cross_compiler_target_platform%" == "win-64" (
set "target_platform=amd64"
echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR% Win64" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
IF "%VSDEVCMD_ARGS%" == "" (
echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
) ELSE (
echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
echo CALL "VC\Auxiliary\Build\vcvarsall.bat" x86_amd64 %VSDEVCMD_ARGS% >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
)
echo popd >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
) else (
set "target_platform=x86"
echo SET "CMAKE_GENERATOR=Visual Studio %VER% %YEAR%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
echo pushd "%%VSINSTALLDIR%%" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
echo CALL "VC\Auxiliary\Build\vcvars32.bat" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
echo popd
)
set VC_PATH=x86
if "%ARCH%"=="64" (
set VC_PATH=x64
)
set MSC_VER=2017
rem :: This should always be present for VC installed with VS. Not sure about VC installed with Visual C++ Build Tools 2015
rem FOR /F "usebackq tokens=3*" %%A IN (`REG QUERY "HKEY_LOCAL_MACHINE\Software\Microsoft\DevDiv\VC\Servicing\14.0\IDE.x64" /v UpdateVersion`) DO (
rem set SP=%%A
rem )
rem if not "%SP%" == "%PKG_VERSION%" (
rem echo "Version detected from registry: %SP%"
rem echo "does not match version of package being built (%PKG_VERSION%)"
rem echo "Do you have current updates for VS 2015 installed?"
rem exit 1
rem )
REM ========== REQUIRES Win 10 SDK be installed, or files otherwise copied to location below!
robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%" "%LIBRARY_BIN%" *.dll /E
robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%" "%PREFIX%" *.dll /E
if %ERRORLEVEL% GEQ 8 exit 1
REM ========== This one comes from visual studio 2017
set "VC_VER=141"
for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [15^,16^) -property installationPath`) do (
if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat"
goto :eof
)
)
@setlocal
call "%VS15VARSALL%" x64
set "REDIST_ROOT=%VCToolsRedistDir%%VC_PATH%"
robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%LIBRARY_BIN%" *.dll /E
if %ERRORLEVEL% LSS 8 exit 0
robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%PREFIX%" *.dll /E
if %ERRORLEVEL% LSS 8 exit 0
robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%LIBRARY_BIN%" *.dll /E
if %ERRORLEVEL% LSS 8 exit 0
robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%PREFIX%" *.dll /E
if %ERRORLEVEL% LSS 8 exit 0
@endlocal
{% set vcver="14.1" %}
{% set vcfeature="14" %}
{% set vsyear="2017" %}
{% set fullver="15.4.27004.2010" %}
package:
name: vs{{ vsyear }}
version: {{ fullver }}
build:
skip: True [not win]
script_env:
- VSDEVCMD_ARGS # [win]
outputs:
- name: vs{{ vsyear }}_{{ cross_compiler_target_platform }}
script: install_activate.bat
track_features:
# VS 2017 is binary-compatible with VS 2015/vc14. Tools are "v141".
strong:
- vc{{ vcfeature }}
about:
summary: Activation and version verification of MSVC {{ vcver }} (VS {{ vsyear }}) compiler
license: BSD 3-clause
...@@ -5,8 +5,7 @@ c_compiler: ...@@ -5,8 +5,7 @@ c_compiler:
cxx_compiler: cxx_compiler:
- vs2019 # [win] - vs2019 # [win]
python: python:
- 3.5 - 3.8
- 3.6
# This differs from target_platform in that it determines what subdir the compiler # This differs from target_platform in that it determines what subdir the compiler
# will target, not what subdir the compiler package will be itself. # will target, not what subdir the compiler package will be itself.
# For example, we need a win-64 vs2008_win-32 package, so that we compile win-32 # For example, we need a win-64 vs2008_win-32 package, so that we compile win-32
......
...@@ -27,4 +27,3 @@ IF "%cross_compiler_target_platform%" == "win-64" ( ...@@ -27,4 +27,3 @@ IF "%cross_compiler_target_platform%" == "win-64" (
echo CALL "VC\Auxiliary\Build\vcvars32.bat" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat" echo CALL "VC\Auxiliary\Build\vcvars32.bat" >> "%PREFIX%\etc\conda\activate.d\vs%YEAR%_compiler_vars.bat"
echo popd echo popd
) )
set VC_PATH=x86
if "%ARCH%"=="64" (
set VC_PATH=x64
)
set MSC_VER=2019
rem :: This should always be present for VC installed with VS. Not sure about VC installed with Visual C++ Build Tools 2015
rem FOR /F "usebackq tokens=3*" %%A IN (`REG QUERY "HKEY_LOCAL_MACHINE\Software\Microsoft\DevDiv\VC\Servicing\14.0\IDE.x64" /v UpdateVersion`) DO (
rem set SP=%%A
rem )
rem if not "%SP%" == "%PKG_VERSION%" (
rem echo "Version detected from registry: %SP%"
rem echo "does not match version of package being built (%PKG_VERSION%)"
rem echo "Do you have current updates for VS 2015 installed?"
rem exit 1
rem )
REM ========== REQUIRES Win 10 SDK be installed, or files otherwise copied to location below!
robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%" "%LIBRARY_BIN%" *.dll /E
robocopy "C:\Program Files (x86)\Windows Kits\10\Redist\ucrt\DLLs\%VC_PATH%" "%PREFIX%" *.dll /E
if %ERRORLEVEL% GEQ 8 exit 1
REM ========== This one comes from visual studio 2019
set "VC_VER=142"
for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [16^,17^) -property installationPath`) do (
if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat"
goto :eof
)
)
@setlocal
call "%VS15VARSALL%" x64
set "REDIST_ROOT=%VCToolsRedistDir%%VC_PATH%"
robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%LIBRARY_BIN%" *.dll /E
if %ERRORLEVEL% LSS 8 exit 0
robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.CRT" "%PREFIX%" *.dll /E
if %ERRORLEVEL% LSS 8 exit 0
robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%LIBRARY_BIN%" *.dll /E
if %ERRORLEVEL% LSS 8 exit 0
robocopy "%REDIST_ROOT%\Microsoft.VC%VC_VER%.OpenMP" "%PREFIX%" *.dll /E
if %ERRORLEVEL% LSS 8 exit 0
@endlocal
#!/bin/bash
set -ex
if [ "$#" -ne 1 ]; then
echo "Illegal number of parameters. Pass cuda version"
echo "CUDA version should be cu92, cu100 or cpu"
exit 1
fi
export CUVER="$1" # cu[0-9]* cpu
if [[ "$CUVER" == "cu102" ]]; then
cu_suffix=""
else
cu_suffix="+$CUVER"
fi
export TORCHVISION_BUILD_VERSION="0.4.0.dev$(date "+%Y%m%d")${cu_suffix}"
export TORCHVISION_BUILD_NUMBER="1"
export TORCHVISION_LOCAL_VERSION_LABEL="$CUVER"
export OUT_DIR="/remote/$CUVER"
pushd /opt/python
DESIRED_PYTHON=(*/)
popd
for desired_py in "${DESIRED_PYTHON[@]}"; do
python_installations+=("/opt/python/$desired_py")
done
OLD_PATH=$PATH
cd /tmp
rm -rf vision
git clone https://github.com/pytorch/vision
cd /tmp/vision
for PYDIR in "${python_installations[@]}"; do
export PATH=$PYDIR/bin:$OLD_PATH
pip install --upgrade pip
pip install numpy pyyaml future
pip uninstall -y torch || true
pip uninstall -y torch_nightly || true
export TORCHVISION_PYTORCH_DEPENDENCY_NAME=torch_nightly
pip install torch_nightly -f https://download.pytorch.org/whl/nightly/$CUVER/torch_nightly.html
# CPU/CUDA variants of PyTorch have ABI compatible PyTorch for
# the CPU only bits. Therefore, we
# strip off the local package qualifier, but ONLY if we're
# doing a CPU build.
if [[ "$CUVER" == "cpu" ]]; then
export TORCHVISION_PYTORCH_DEPENDENCY_VERSION="$(pip show torch_nightly | grep ^Version: | sed 's/Version: \+//' | sed 's/+.\+//')"
else
export TORCHVISION_PYTORCH_DEPENDENCY_VERSION="$(pip show torch_nightly | grep ^Version: | sed 's/Version: \+//')"
fi
echo "Building against ${TORCHVISION_PYTORCH_DEPENDENCY_VERSION}"
pip install ninja
python setup.py clean
python setup.py bdist_wheel
mkdir -p $OUT_DIR
cp dist/*.whl $OUT_DIR/
done
if [[ ":$PATH:" == *"conda"* ]]; then
echo "existing anaconda install in PATH, remove it and run script"
exit 1
fi
# download and activate anaconda
rm -rf ~/minconda_wheel_env_tmp
wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh && \
chmod +x Miniconda3-latest-MacOSX-x86_64.sh && \
./Miniconda3-latest-MacOSX-x86_64.sh -b -p ~/minconda_wheel_env_tmp && \
rm Miniconda3-latest-MacOSX-x86_64.sh
. ~/minconda_wheel_env_tmp/bin/activate
export TORCHVISION_BUILD_VERSION="0.4.0.dev$(date "+%Y%m%d")"
export TORCHVISION_BUILD_NUMBER="1"
export OUT_DIR=~/torchvision_wheels
export MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++
pushd /tmp
rm -rf vision
git clone https://github.com/pytorch/vision
pushd vision
desired_pythons=( "2.7" "3.5" "3.6" "3.7" )
# for each python
for desired_python in "${desired_pythons[@]}"
do
# create and activate python env
env_name="env$desired_python"
conda create -yn $env_name python="$desired_python"
conda activate $env_name
pip uninstall -y torch || true
pip uninstall -y torch_nightly || true
export TORCHVISION_PYTORCH_DEPENDENCY_NAME=torch_nightly
pip install torch_nightly -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
export TORCHVISION_PYTORCH_DEPENDENCY_VERSION="$(pip show torch_nightly | grep ^Version: | sed 's/Version: *//')"
echo "Building against ${TORCHAUDIO_PYTORCH_DEPENDENCY_VERSION}"
# install torchvision dependencies
pip install ninja scipy pytest
python setup.py clean
python setup.py bdist_wheel
mkdir -p $OUT_DIR
cp dist/*.whl $OUT_DIR/
done
popd
popd
# -*- coding: utf-8 -*-
"""Helper script to package wheels and relocate binaries.""" """Helper script to package wheels and relocate binaries."""
# Standard library imports
import os
import io
import sys
import glob import glob
import shutil
import zipfile
import hashlib import hashlib
# Standard library imports
import os
import os.path as osp
import platform import platform
import shutil
import subprocess import subprocess
import os.path as osp import sys
import zipfile
from base64 import urlsafe_b64encode from base64 import urlsafe_b64encode
# Third party imports # Third party imports
if sys.platform == 'linux': if sys.platform == "linux":
from auditwheel.lddtree import lddtree from auditwheel.lddtree import lddtree
from wheel.bdist_wheel import get_abi_tag
ALLOWLIST = { ALLOWLIST = {
'libgcc_s.so.1', 'libstdc++.so.6', 'libm.so.6', "libgcc_s.so.1",
'libdl.so.2', 'librt.so.1', 'libc.so.6', "libstdc++.so.6",
'libnsl.so.1', 'libutil.so.1', 'libpthread.so.0', "libm.so.6",
'libresolv.so.2', 'libX11.so.6', 'libXext.so.6', "libdl.so.2",
'libXrender.so.1', 'libICE.so.6', 'libSM.so.6', "librt.so.1",
'libGL.so.1', 'libgobject-2.0.so.0', 'libgthread-2.0.so.0', "libc.so.6",
'libglib-2.0.so.0', 'ld-linux-x86-64.so.2', 'ld-2.17.so' "libnsl.so.1",
"libutil.so.1",
"libpthread.so.0",
"libresolv.so.2",
"libX11.so.6",
"libXext.so.6",
"libXrender.so.1",
"libICE.so.6",
"libSM.so.6",
"libGL.so.1",
"libgobject-2.0.so.0",
"libgthread-2.0.so.0",
"libglib-2.0.so.0",
"ld-linux-x86-64.so.2",
"ld-2.17.so",
} }
WINDOWS_ALLOWLIST = { WINDOWS_ALLOWLIST = {
'MSVCP140.dll', 'KERNEL32.dll', "MSVCP140.dll",
'VCRUNTIME140_1.dll', 'VCRUNTIME140.dll', "KERNEL32.dll",
'api-ms-win-crt-heap-l1-1-0.dll', "VCRUNTIME140_1.dll",
'api-ms-win-crt-runtime-l1-1-0.dll', "VCRUNTIME140.dll",
'api-ms-win-crt-stdio-l1-1-0.dll', "api-ms-win-crt-heap-l1-1-0.dll",
'api-ms-win-crt-filesystem-l1-1-0.dll', "api-ms-win-crt-runtime-l1-1-0.dll",
'api-ms-win-crt-string-l1-1-0.dll', "api-ms-win-crt-stdio-l1-1-0.dll",
'api-ms-win-crt-environment-l1-1-0.dll', "api-ms-win-crt-filesystem-l1-1-0.dll",
'api-ms-win-crt-math-l1-1-0.dll', "api-ms-win-crt-string-l1-1-0.dll",
'api-ms-win-crt-convert-l1-1-0.dll' "api-ms-win-crt-environment-l1-1-0.dll",
"api-ms-win-crt-math-l1-1-0.dll",
"api-ms-win-crt-convert-l1-1-0.dll",
} }
...@@ -51,33 +64,22 @@ PLATFORM_ARCH = platform.machine() ...@@ -51,33 +64,22 @@ PLATFORM_ARCH = platform.machine()
PYTHON_VERSION = sys.version_info PYTHON_VERSION = sys.version_info
def read_chunks(file, size=io.DEFAULT_BUFFER_SIZE):
"""Yield pieces of data from a file-like object until EOF."""
while True:
chunk = file.read(size)
if not chunk:
break
yield chunk
def rehash(path, blocksize=1 << 20): def rehash(path, blocksize=1 << 20):
"""Return (hash, length) for path using hashlib.sha256()""" """Return (hash, length) for path using hashlib.sha256()"""
h = hashlib.sha256() h = hashlib.sha256()
length = 0 length = 0
with open(path, 'rb') as f: with open(path, "rb") as f:
for block in read_chunks(f, size=blocksize): while block := f.read(blocksize):
length += len(block) length += len(block)
h.update(block) h.update(block)
digest = 'sha256=' + urlsafe_b64encode( digest = "sha256=" + urlsafe_b64encode(h.digest()).decode("latin1").rstrip("=")
h.digest()
).decode('latin1').rstrip('=')
# unicode/str python2 issues # unicode/str python2 issues
return (digest, str(length)) # type: ignore return (digest, str(length)) # type: ignore
def unzip_file(file, dest): def unzip_file(file, dest):
"""Decompress zip `file` into directory `dest`.""" """Decompress zip `file` into directory `dest`."""
with zipfile.ZipFile(file, 'r') as zip_ref: with zipfile.ZipFile(file, "r") as zip_ref:
zip_ref.extractall(dest) zip_ref.extractall(dest)
...@@ -88,8 +90,7 @@ def is_program_installed(basename): ...@@ -88,8 +90,7 @@ def is_program_installed(basename):
On macOS systems, a .app is considered installed if On macOS systems, a .app is considered installed if
it exists. it exists.
""" """
if (sys.platform == 'darwin' and basename.endswith('.app') and if sys.platform == "darwin" and basename.endswith(".app") and osp.exists(basename):
osp.exists(basename)):
return basename return basename
for path in os.environ["PATH"].split(os.pathsep): for path in os.environ["PATH"].split(os.pathsep):
...@@ -105,9 +106,9 @@ def find_program(basename): ...@@ -105,9 +106,9 @@ def find_program(basename):
(return None if not found) (return None if not found)
""" """
names = [basename] names = [basename]
if os.name == 'nt': if os.name == "nt":
# Windows platforms # Windows platforms
extensions = ('.exe', '.bat', '.cmd', '.dll') extensions = (".exe", ".bat", ".cmd", ".dll")
if not basename.endswith(extensions): if not basename.endswith(extensions):
names = [basename + ext for ext in extensions] + [basename] names = [basename + ext for ext in extensions] + [basename]
for name in names: for name in names:
...@@ -118,19 +119,18 @@ def find_program(basename): ...@@ -118,19 +119,18 @@ def find_program(basename):
def patch_new_path(library_path, new_dir): def patch_new_path(library_path, new_dir):
library = osp.basename(library_path) library = osp.basename(library_path)
name, *rest = library.split('.') name, *rest = library.split(".")
rest = '.'.join(rest) rest = ".".join(rest)
hash_id = hashlib.sha256(library_path.encode('utf-8')).hexdigest()[:8] hash_id = hashlib.sha256(library_path.encode("utf-8")).hexdigest()[:8]
new_name = '.'.join([name, hash_id, rest]) new_name = ".".join([name, hash_id, rest])
return osp.join(new_dir, new_name) return osp.join(new_dir, new_name)
def find_dll_dependencies(dumpbin, binary): def find_dll_dependencies(dumpbin, binary):
out = subprocess.run([dumpbin, "/dependents", binary], out = subprocess.run([dumpbin, "/dependents", binary], stdout=subprocess.PIPE)
stdout=subprocess.PIPE) out = out.stdout.strip().decode("utf-8")
out = out.stdout.strip().decode('utf-8') start_index = out.find("dependencies:") + len("dependencies:")
start_index = out.find('dependencies:') + len('dependencies:') end_index = out.find("Summary")
end_index = out.find('Summary')
dlls = out[start_index:end_index].strip() dlls = out[start_index:end_index].strip()
dlls = dlls.split(os.linesep) dlls = dlls.split(os.linesep)
dlls = [dll.strip() for dll in dlls] dlls = [dll.strip() for dll in dlls]
...@@ -145,13 +145,13 @@ def relocate_elf_library(patchelf, output_dir, output_library, binary): ...@@ -145,13 +145,13 @@ def relocate_elf_library(patchelf, output_dir, output_library, binary):
rename and copy them into the wheel while updating their respective rpaths. rename and copy them into the wheel while updating their respective rpaths.
""" """
print('Relocating {0}'.format(binary)) print(f"Relocating {binary}")
binary_path = osp.join(output_library, binary) binary_path = osp.join(output_library, binary)
ld_tree = lddtree(binary_path) ld_tree = lddtree(binary_path)
tree_libs = ld_tree['libs'] tree_libs = ld_tree["libs"]
binary_queue = [(n, binary) for n in ld_tree['needed']] binary_queue = [(n, binary) for n in ld_tree["needed"]]
binary_paths = {binary: binary_path} binary_paths = {binary: binary_path}
binary_dependencies = {} binary_dependencies = {}
...@@ -160,13 +160,13 @@ def relocate_elf_library(patchelf, output_dir, output_library, binary): ...@@ -160,13 +160,13 @@ def relocate_elf_library(patchelf, output_dir, output_library, binary):
library_info = tree_libs[library] library_info = tree_libs[library]
print(library) print(library)
if library_info['path'] is None: if library_info["path"] is None:
print('Omitting {0}'.format(library)) print(f"Omitting {library}")
continue continue
if library in ALLOWLIST: if library in ALLOWLIST:
# Omit glibc/gcc/system libraries # Omit glibc/gcc/system libraries
print('Omitting {0}'.format(library)) print(f"Omitting {library}")
continue continue
parent_dependencies = binary_dependencies.get(parent, []) parent_dependencies = binary_dependencies.get(parent, [])
...@@ -176,12 +176,12 @@ def relocate_elf_library(patchelf, output_dir, output_library, binary): ...@@ -176,12 +176,12 @@ def relocate_elf_library(patchelf, output_dir, output_library, binary):
if library in binary_paths: if library in binary_paths:
continue continue
binary_paths[library] = library_info['path'] binary_paths[library] = library_info["path"]
binary_queue += [(n, library) for n in library_info['needed']] binary_queue += [(n, library) for n in library_info["needed"]]
print('Copying dependencies to wheel directory') print("Copying dependencies to wheel directory")
new_libraries_path = osp.join(output_dir, 'torchvision.libs') new_libraries_path = osp.join(output_dir, "torchvision.libs")
os.makedirs(new_libraries_path) os.makedirs(new_libraries_path, exist_ok=True)
new_names = {binary: binary_path} new_names = {binary: binary_path}
...@@ -189,11 +189,11 @@ def relocate_elf_library(patchelf, output_dir, output_library, binary): ...@@ -189,11 +189,11 @@ def relocate_elf_library(patchelf, output_dir, output_library, binary):
if library != binary: if library != binary:
library_path = binary_paths[library] library_path = binary_paths[library]
new_library_path = patch_new_path(library_path, new_libraries_path) new_library_path = patch_new_path(library_path, new_libraries_path)
print('{0} -> {1}'.format(library, new_library_path)) print(f"{library} -> {new_library_path}")
shutil.copyfile(library_path, new_library_path) shutil.copyfile(library_path, new_library_path)
new_names[library] = new_library_path new_names[library] = new_library_path
print('Updating dependency names by new files') print("Updating dependency names by new files")
for library in binary_paths: for library in binary_paths:
if library != binary: if library != binary:
if library not in binary_dependencies: if library not in binary_dependencies:
...@@ -202,59 +202,26 @@ def relocate_elf_library(patchelf, output_dir, output_library, binary): ...@@ -202,59 +202,26 @@ def relocate_elf_library(patchelf, output_dir, output_library, binary):
new_library_name = new_names[library] new_library_name = new_names[library]
for dep in library_dependencies: for dep in library_dependencies:
new_dep = osp.basename(new_names[dep]) new_dep = osp.basename(new_names[dep])
print('{0}: {1} -> {2}'.format(library, dep, new_dep)) print(f"{library}: {dep} -> {new_dep}")
subprocess.check_output( subprocess.check_output(
[ [patchelf, "--replace-needed", dep, new_dep, new_library_name], cwd=new_libraries_path
patchelf, )
'--replace-needed',
dep, print("Updating library rpath")
new_dep, subprocess.check_output([patchelf, "--set-rpath", "$ORIGIN", new_library_name], cwd=new_libraries_path)
new_library_name
], subprocess.check_output([patchelf, "--print-rpath", new_library_name], cwd=new_libraries_path)
cwd=new_libraries_path)
print('Updating library rpath')
subprocess.check_output(
[
patchelf,
'--set-rpath',
"$ORIGIN",
new_library_name
],
cwd=new_libraries_path)
subprocess.check_output(
[
patchelf,
'--print-rpath',
new_library_name
],
cwd=new_libraries_path)
print("Update library dependencies") print("Update library dependencies")
library_dependencies = binary_dependencies[binary] library_dependencies = binary_dependencies[binary]
for dep in library_dependencies: for dep in library_dependencies:
new_dep = osp.basename(new_names[dep]) new_dep = osp.basename(new_names[dep])
print('{0}: {1} -> {2}'.format(binary, dep, new_dep)) print(f"{binary}: {dep} -> {new_dep}")
subprocess.check_output( subprocess.check_output([patchelf, "--replace-needed", dep, new_dep, binary], cwd=output_library)
[
patchelf, print("Update library rpath")
'--replace-needed',
dep,
new_dep,
binary
],
cwd=output_library)
print('Update library rpath')
subprocess.check_output( subprocess.check_output(
[ [patchelf, "--set-rpath", "$ORIGIN:$ORIGIN/../torchvision.libs", binary_path], cwd=output_library
patchelf,
'--set-rpath',
"$ORIGIN:$ORIGIN/../torchvision.libs",
binary_path
],
cwd=output_library
) )
...@@ -265,7 +232,7 @@ def relocate_dll_library(dumpbin, output_dir, output_library, binary): ...@@ -265,7 +232,7 @@ def relocate_dll_library(dumpbin, output_dir, output_library, binary):
Given a shared library, find the transitive closure of its dependencies, Given a shared library, find the transitive closure of its dependencies,
rename and copy them into the wheel. rename and copy them into the wheel.
""" """
print('Relocating {0}'.format(binary)) print(f"Relocating {binary}")
binary_path = osp.join(output_library, binary) binary_path = osp.join(output_library, binary)
library_dlls = find_dll_dependencies(dumpbin, binary_path) library_dlls = find_dll_dependencies(dumpbin, binary_path)
...@@ -275,19 +242,19 @@ def relocate_dll_library(dumpbin, output_dir, output_library, binary): ...@@ -275,19 +242,19 @@ def relocate_dll_library(dumpbin, output_dir, output_library, binary):
while binary_queue != []: while binary_queue != []:
library, parent = binary_queue.pop(0) library, parent = binary_queue.pop(0)
if library in WINDOWS_ALLOWLIST or library.startswith('api-ms-win'): if library in WINDOWS_ALLOWLIST or library.startswith("api-ms-win"):
print('Omitting {0}'.format(library)) print(f"Omitting {library}")
continue continue
library_path = find_program(library) library_path = find_program(library)
if library_path is None: if library_path is None:
print('{0} not found'.format(library)) print(f"{library} not found")
continue continue
if osp.basename(osp.dirname(library_path)) == 'system32': if osp.basename(osp.dirname(library_path)) == "system32":
continue continue
print('{0}: {1}'.format(library, library_path)) print(f"{library}: {library_path}")
parent_dependencies = binary_dependencies.get(parent, []) parent_dependencies = binary_dependencies.get(parent, [])
parent_dependencies.append(library) parent_dependencies.append(library)
binary_dependencies[parent] = parent_dependencies binary_dependencies[parent] = parent_dependencies
...@@ -299,55 +266,54 @@ def relocate_dll_library(dumpbin, output_dir, output_library, binary): ...@@ -299,55 +266,54 @@ def relocate_dll_library(dumpbin, output_dir, output_library, binary):
downstream_dlls = find_dll_dependencies(dumpbin, library_path) downstream_dlls = find_dll_dependencies(dumpbin, library_path)
binary_queue += [(n, library) for n in downstream_dlls] binary_queue += [(n, library) for n in downstream_dlls]
print('Copying dependencies to wheel directory') print("Copying dependencies to wheel directory")
package_dir = osp.join(output_dir, 'torchvision') package_dir = osp.join(output_dir, "torchvision")
for library in binary_paths: for library in binary_paths:
if library != binary: if library != binary:
library_path = binary_paths[library] library_path = binary_paths[library]
new_library_path = osp.join(package_dir, library) new_library_path = osp.join(package_dir, library)
print('{0} -> {1}'.format(library, new_library_path)) print(f"{library} -> {new_library_path}")
shutil.copyfile(library_path, new_library_path) shutil.copyfile(library_path, new_library_path)
def compress_wheel(output_dir, wheel, wheel_dir, wheel_name): def compress_wheel(output_dir, wheel, wheel_dir, wheel_name):
"""Create RECORD file and compress wheel distribution.""" """Create RECORD file and compress wheel distribution."""
print('Update RECORD file in wheel') print("Update RECORD file in wheel")
dist_info = glob.glob(osp.join(output_dir, '*.dist-info'))[0] dist_info = glob.glob(osp.join(output_dir, "*.dist-info"))[0]
record_file = osp.join(dist_info, 'RECORD') record_file = osp.join(dist_info, "RECORD")
with open(record_file, 'w') as f: with open(record_file, "w") as f:
for root, _, files in os.walk(output_dir): for root, _, files in os.walk(output_dir):
for this_file in files: for this_file in files:
full_file = osp.join(root, this_file) full_file = osp.join(root, this_file)
rel_file = osp.relpath(full_file, output_dir) rel_file = osp.relpath(full_file, output_dir)
if full_file == record_file: if full_file == record_file:
f.write('{0},,\n'.format(rel_file)) f.write(f"{rel_file},,\n")
else: else:
digest, size = rehash(full_file) digest, size = rehash(full_file)
f.write('{0},{1},{2}\n'.format(rel_file, digest, size)) f.write(f"{rel_file},{digest},{size}\n")
print('Compressing wheel') print("Compressing wheel")
base_wheel_name = osp.join(wheel_dir, wheel_name) base_wheel_name = osp.join(wheel_dir, wheel_name)
shutil.make_archive(base_wheel_name, 'zip', output_dir) shutil.make_archive(base_wheel_name, "zip", output_dir)
os.remove(wheel) os.remove(wheel)
shutil.move('{0}.zip'.format(base_wheel_name), wheel) shutil.move(f"{base_wheel_name}.zip", wheel)
shutil.rmtree(output_dir) shutil.rmtree(output_dir)
def patch_linux(): def patch_linux():
# Get patchelf location # Get patchelf location
patchelf = find_program('patchelf') patchelf = find_program("patchelf")
if patchelf is None: if patchelf is None:
raise FileNotFoundError('Patchelf was not found in the system, please' raise FileNotFoundError("Patchelf was not found in the system, please make sure that is available on the PATH.")
' make sure that is available on the PATH.')
# Find wheel # Find wheel
print('Finding wheels...') print("Finding wheels...")
wheels = glob.glob(osp.join(PACKAGE_ROOT, 'dist', '*.whl')) wheels = glob.glob(osp.join(PACKAGE_ROOT, "dist", "*.whl"))
output_dir = osp.join(PACKAGE_ROOT, 'dist', '.wheel-process') output_dir = osp.join(PACKAGE_ROOT, "dist", ".wheel-process")
image_binary = 'image.so' image_binary = "image.so"
video_binary = 'video_reader.so' video_binary = "video_reader.so"
torchvision_binaries = [image_binary, video_binary] torchvision_binaries = [image_binary, video_binary]
for wheel in wheels: for wheel in wheels:
if osp.exists(output_dir): if osp.exists(output_dir):
...@@ -355,37 +321,35 @@ def patch_linux(): ...@@ -355,37 +321,35 @@ def patch_linux():
os.makedirs(output_dir) os.makedirs(output_dir)
print('Unzipping wheel...') print("Unzipping wheel...")
wheel_file = osp.basename(wheel) wheel_file = osp.basename(wheel)
wheel_dir = osp.dirname(wheel) wheel_dir = osp.dirname(wheel)
print('{0}'.format(wheel_file)) print(f"{wheel_file}")
wheel_name, _ = osp.splitext(wheel_file) wheel_name, _ = osp.splitext(wheel_file)
unzip_file(wheel, output_dir) unzip_file(wheel, output_dir)
print('Finding ELF dependencies...') print("Finding ELF dependencies...")
output_library = osp.join(output_dir, 'torchvision') output_library = osp.join(output_dir, "torchvision")
for binary in torchvision_binaries: for binary in torchvision_binaries:
if osp.exists(osp.join(output_library, binary)): if osp.exists(osp.join(output_library, binary)):
relocate_elf_library( relocate_elf_library(patchelf, output_dir, output_library, binary)
patchelf, output_dir, output_library, binary)
compress_wheel(output_dir, wheel, wheel_dir, wheel_name) compress_wheel(output_dir, wheel, wheel_dir, wheel_name)
def patch_win(): def patch_win():
# Get dumpbin location # Get dumpbin location
dumpbin = find_program('dumpbin') dumpbin = find_program("dumpbin")
if dumpbin is None: if dumpbin is None:
raise FileNotFoundError('Dumpbin was not found in the system, please' raise FileNotFoundError("Dumpbin was not found in the system, please make sure that is available on the PATH.")
' make sure that is available on the PATH.')
# Find wheel # Find wheel
print('Finding wheels...') print("Finding wheels...")
wheels = glob.glob(osp.join(PACKAGE_ROOT, 'dist', '*.whl')) wheels = glob.glob(osp.join(PACKAGE_ROOT, "dist", "*.whl"))
output_dir = osp.join(PACKAGE_ROOT, 'dist', '.wheel-process') output_dir = osp.join(PACKAGE_ROOT, "dist", ".wheel-process")
image_binary = 'image.pyd' image_binary = "image.pyd"
video_binary = 'video_reader.pyd' video_binary = "video_reader.pyd"
torchvision_binaries = [image_binary, video_binary] torchvision_binaries = [image_binary, video_binary]
for wheel in wheels: for wheel in wheels:
if osp.exists(output_dir): if osp.exists(output_dir):
...@@ -393,25 +357,24 @@ def patch_win(): ...@@ -393,25 +357,24 @@ def patch_win():
os.makedirs(output_dir) os.makedirs(output_dir)
print('Unzipping wheel...') print("Unzipping wheel...")
wheel_file = osp.basename(wheel) wheel_file = osp.basename(wheel)
wheel_dir = osp.dirname(wheel) wheel_dir = osp.dirname(wheel)
print('{0}'.format(wheel_file)) print(f"{wheel_file}")
wheel_name, _ = osp.splitext(wheel_file) wheel_name, _ = osp.splitext(wheel_file)
unzip_file(wheel, output_dir) unzip_file(wheel, output_dir)
print('Finding DLL/PE dependencies...') print("Finding DLL/PE dependencies...")
output_library = osp.join(output_dir, 'torchvision') output_library = osp.join(output_dir, "torchvision")
for binary in torchvision_binaries: for binary in torchvision_binaries:
if osp.exists(osp.join(output_library, binary)): if osp.exists(osp.join(output_library, binary)):
relocate_dll_library( relocate_dll_library(dumpbin, output_dir, output_library, binary)
dumpbin, output_dir, output_library, binary)
compress_wheel(output_dir, wheel, wheel_dir, wheel_name) compress_wheel(output_dir, wheel, wheel_dir, wheel_name)
if __name__ == '__main__': if __name__ == "__main__":
if sys.platform == 'linux': if sys.platform == "linux":
patch_linux() patch_linux()
elif sys.platform == 'win32': elif sys.platform == "win32":
patch_win() patch_win()
@echo on @echo on
set CL=/I"C:\Program Files (x86)\torchvision\include" set CL=/I"C:\Program Files (x86)\torchvision\include"
msbuild "-p:Configuration=Release" "-p:BuildInParallel=true" "-p:MultiProcessorCompilation=true" "-p:CL_MPCount=%1" hello-world.vcxproj -maxcpucount:%1 msbuild "-p:Configuration=Release" "-p:BuildInParallel=true" "-p:MultiProcessorCompilation=true" "-p:CL_MPCount=%1" run_model.vcxproj -maxcpucount:%1
@echo on
set CL=/I"C:\Program Files (x86)\torchvision\include"
msbuild "-p:Configuration=Release" "-p:BuildInParallel=true" "-p:MultiProcessorCompilation=true" "-p:CL_MPCount=%1" test_frcnn_tracing.vcxproj -maxcpucount:%1
@echo on
if "%CU_VERSION%" == "cpu" (
echo Skipping for CPU builds
exit /b 0
)
set SRC_DIR=%~dp0\..
if not exist "%SRC_DIR%\temp_build" mkdir "%SRC_DIR%\temp_build"
set /a CUDA_VER=%CU_VERSION:cu=%
set CUDA_VER_MAJOR=%CUDA_VER:~0,-1%
set CUDA_VER_MINOR=%CUDA_VER:~-1,1%
set CUDA_VERSION_STR=%CUDA_VER_MAJOR%.%CUDA_VER_MINOR%
if %CUDA_VER% EQU 92 goto cuda92
if %CUDA_VER% EQU 100 goto cuda100
if %CUDA_VER% EQU 101 goto cuda101
if %CUDA_VER% EQU 102 goto cuda102
if %CUDA_VER% EQU 110 goto cuda110
if %CUDA_VER% EQU 111 goto cuda111
if %CUDA_VER% EQU 112 goto cuda112
echo CUDA %CUDA_VERSION_STR% is not supported
exit /b 1
:cuda92
if not exist "%SRC_DIR%\temp_build\cuda_9.2.148_win10.exe" (
curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cuda_9.2.148_win10.exe --output "%SRC_DIR%\temp_build\cuda_9.2.148_win10.exe"
if errorlevel 1 exit /b 1
set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_9.2.148_win10.exe"
set "ARGS=nvcc_9.2 cuobjdump_9.2 nvprune_9.2 cupti_9.2 cublas_9.2 cublas_dev_9.2 cudart_9.2 cufft_9.2 cufft_dev_9.2 curand_9.2 curand_dev_9.2 cusolver_9.2 cusolver_dev_9.2 cusparse_9.2 cusparse_dev_9.2 nvgraph_9.2 nvgraph_dev_9.2 npp_9.2 npp_dev_9.2 nvrtc_9.2 nvrtc_dev_9.2 nvml_dev_9.2"
)
if not exist "%SRC_DIR%\temp_build\cudnn-9.2-windows10-x64-v7.2.1.38.zip" (
curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cudnn-9.2-windows10-x64-v7.2.1.38.zip --output "%SRC_DIR%\temp_build\cudnn-9.2-windows10-x64-v7.2.1.38.zip"
if errorlevel 1 exit /b 1
set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-9.2-windows10-x64-v7.2.1.38.zip"
)
goto cuda_common
:cuda100
if not exist "%SRC_DIR%\temp_build\cuda_10.0.130_411.31_win10.exe" (
curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cuda_10.0.130_411.31_win10.exe --output "%SRC_DIR%\temp_build\cuda_10.0.130_411.31_win10.exe"
if errorlevel 1 exit /b 1
set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_10.0.130_411.31_win10.exe"
set "ARGS=nvcc_10.0 cuobjdump_10.0 nvprune_10.0 cupti_10.0 cublas_10.0 cublas_dev_10.0 cudart_10.0 cufft_10.0 cufft_dev_10.0 curand_10.0 curand_dev_10.0 cusolver_10.0 cusolver_dev_10.0 cusparse_10.0 cusparse_dev_10.0 nvgraph_10.0 nvgraph_dev_10.0 npp_10.0 npp_dev_10.0 nvrtc_10.0 nvrtc_dev_10.0 nvml_dev_10.0"
)
if not exist "%SRC_DIR%\temp_build\cudnn-10.0-windows10-x64-v7.4.1.5.zip" (
curl -k -L https://ossci-windows.s3.amazonaws.com/win2016/cudnn-10.0-windows10-x64-v7.4.1.5.zip --output "%SRC_DIR%\temp_build\cudnn-10.0-windows10-x64-v7.4.1.5.zip"
if errorlevel 1 exit /b 1
set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-10.0-windows10-x64-v7.4.1.5.zip"
)
goto cuda_common
:cuda101
if not exist "%SRC_DIR%\temp_build\cuda_10.1.243_426.00_win10.exe" (
curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_10.1.243_426.00_win10.exe --output "%SRC_DIR%\temp_build\cuda_10.1.243_426.00_win10.exe"
if errorlevel 1 exit /b 1
set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_10.1.243_426.00_win10.exe"
set "ARGS=nvcc_10.1 cuobjdump_10.1 nvprune_10.1 cupti_10.1 cublas_10.1 cublas_dev_10.1 cudart_10.1 cufft_10.1 cufft_dev_10.1 curand_10.1 curand_dev_10.1 cusolver_10.1 cusolver_dev_10.1 cusparse_10.1 cusparse_dev_10.1 nvgraph_10.1 nvgraph_dev_10.1 npp_10.1 npp_dev_10.1 nvrtc_10.1 nvrtc_dev_10.1 nvml_dev_10.1"
)
if not exist "%SRC_DIR%\temp_build\cudnn-10.1-windows10-x64-v7.6.4.38.zip" (
curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-10.1-windows10-x64-v7.6.4.38.zip --output "%SRC_DIR%\temp_build\cudnn-10.1-windows10-x64-v7.6.4.38.zip"
if errorlevel 1 exit /b 1
set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-10.1-windows10-x64-v7.6.4.38.zip"
)
goto cuda_common
:cuda102
if not exist "%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe" (
curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_10.2.89_441.22_win10.exe --output "%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe"
if errorlevel 1 exit /b 1
set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_10.2.89_441.22_win10.exe"
set "ARGS=nvcc_10.2 cuobjdump_10.2 nvprune_10.2 cupti_10.2 cublas_10.2 cublas_dev_10.2 cudart_10.2 cufft_10.2 cufft_dev_10.2 curand_10.2 curand_dev_10.2 cusolver_10.2 cusolver_dev_10.2 cusparse_10.2 cusparse_dev_10.2 nvgraph_10.2 nvgraph_dev_10.2 npp_10.2 npp_dev_10.2 nvrtc_10.2 nvrtc_dev_10.2 nvml_dev_10.2"
)
if not exist "%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip" (
curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-10.2-windows10-x64-v7.6.5.32.zip --output "%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip"
if errorlevel 1 exit /b 1
set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-10.2-windows10-x64-v7.6.5.32.zip"
)
goto cuda_common
:cuda110
if not exist "%SRC_DIR%\temp_build\cuda_11.0.2_451.48_win10.exe" (
curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_11.0.2_451.48_win10.exe --output "%SRC_DIR%\temp_build\cuda_11.0.2_451.48_win10.exe"
if errorlevel 1 exit /b 1
set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_11.0.2_451.48_win10.exe"
set "ARGS=nvcc_11.0 cuobjdump_11.0 nvprune_11.0 nvprof_11.0 cupti_11.0 cublas_11.0 cublas_dev_11.0 cudart_11.0 cufft_11.0 cufft_dev_11.0 curand_11.0 curand_dev_11.0 cusolver_11.0 cusolver_dev_11.0 cusparse_11.0 cusparse_dev_11.0 npp_11.0 npp_dev_11.0 nvrtc_11.0 nvrtc_dev_11.0 nvml_dev_11.0"
)
if not exist "%SRC_DIR%\temp_build\cudnn-11.0-windows-x64-v8.0.4.30.zip" (
curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-11.0-windows-x64-v8.0.4.30.zip --output "%SRC_DIR%\temp_build\cudnn-11.0-windows-x64-v8.0.4.30.zip"
if errorlevel 1 exit /b 1
set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-11.0-windows-x64-v8.0.4.30.zip"
)
goto cuda_common
:cuda111
if not exist "%SRC_DIR%\temp_build\cuda_11.1.0_456.43_win10.exe" (
curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_11.1.0_456.43_win10.exe --output "%SRC_DIR%\temp_build\cuda_11.1.0_456.43_win10.exe"
if errorlevel 1 exit /b 1
set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_11.1.0_456.43_win10.exe"
set "ARGS=nvcc_11.1 cuobjdump_11.1 nvprune_11.1 nvprof_11.1 cupti_11.1 cublas_11.1 cublas_dev_11.1 cudart_11.1 cufft_11.1 cufft_dev_11.1 curand_11.1 curand_dev_11.1 cusolver_11.1 cusolver_dev_11.1 cusparse_11.1 cusparse_dev_11.1 npp_11.1 npp_dev_11.1 nvrtc_11.1 nvrtc_dev_11.1 nvml_dev_11.1"
)
@REM There is no downloadable driver for Tesla on CUDA 11.1 yet. We will use
@REM the driver inside CUDA
if "%JOB_EXECUTOR%" == "windows-with-nvidia-gpu" set "ARGS=%ARGS% Display.Driver"
if not exist "%SRC_DIR%\temp_build\cudnn-11.1-windows-x64-v8.0.5.39.zip" (
curl -k -L https://ossci-windows.s3.amazonaws.com/cudnn-11.1-windows-x64-v8.0.5.39.zip --output "%SRC_DIR%\temp_build\cudnn-11.1-windows-x64-v8.0.5.39.zip"
if errorlevel 1 exit /b 1
set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-11.1-windows-x64-v8.0.5.39.zip"
)
goto cuda_common
:cuda112
if not exist "%SRC_DIR%\temp_build\cuda_11.2.0_460.89_win10.exe" (
curl -k -L https://ossci-windows.s3.amazonaws.com/cuda_11.2.0_460.89_win10.exe --output "%SRC_DIR%\temp_build\cuda_11.2.0_460.89_win10.exe"
if errorlevel 1 exit /b 1
set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\cuda_11.2.0_460.89_win10.exe"
set "ARGS=nvcc_11.2 cuobjdump_11.2 nvprune_11.2 nvprof_11.2 cupti_11.2 cublas_11.2 cublas_dev_11.2 cudart_11.2 cufft_11.2 cufft_dev_11.2 curand_11.2 curand_dev_11.2 cusolver_11.2 cusolver_dev_11.2 cusparse_11.2 cusparse_dev_11.2 npp_11.2 npp_dev_11.2 nvrtc_11.2 nvrtc_dev_11.2 nvml_dev_11.2"
)
if not exist "%SRC_DIR%\temp_build\cudnn-11.2-windows-x64-v8.1.0.77.zip" (
curl -k -L http://s3.amazonaws.com/ossci-windows/cudnn-11.2-windows-x64-v8.1.0.77.zip --output "%SRC_DIR%\temp_build\cudnn-11.2-windows-x64-v8.1.0.77.zip"
if errorlevel 1 exit /b 1
set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\cudnn-11.2-windows-x64-v8.1.0.77.zip"
)
goto cuda_common
:cuda_common
if not exist "%SRC_DIR%\temp_build\NvToolsExt.7z" (
curl -k -L https://www.dropbox.com/s/9mcolalfdj4n979/NvToolsExt.7z?dl=1 --output "%SRC_DIR%\temp_build\NvToolsExt.7z"
if errorlevel 1 exit /b 1
)
if not exist "%SRC_DIR%\temp_build\gpu_driver_dlls.7z" (
curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "%SRC_DIR%\temp_build\gpu_driver_dlls.zip"
if errorlevel 1 exit /b 1
)
echo Installing CUDA toolkit...
7z x %CUDA_SETUP_FILE% -o"%SRC_DIR%\temp_build\cuda"
pushd "%SRC_DIR%\temp_build\cuda"
start /wait setup.exe -s %ARGS%
popd
echo Installing VS integration...
xcopy /Y "%SRC_DIR%\temp_build\cuda\CUDAVisualStudioIntegration\extras\visual_studio_integration\MSBuildExtensions\*.*" "C:\Program Files (x86)\Microsoft Visual Studio\2017\BuildTools\Common7\IDE\VC\VCTargets\BuildCustomizations"
echo Installing NvToolsExt...
7z x %SRC_DIR%\temp_build\NvToolsExt.7z -o"%SRC_DIR%\temp_build\NvToolsExt"
mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64"
mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\include"
mkdir "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\lib\x64"
xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\bin\x64\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64"
xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\include\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\include"
xcopy /Y "%SRC_DIR%\temp_build\NvToolsExt\lib\x64\*.*" "%ProgramFiles%\NVIDIA Corporation\NvToolsExt\lib\x64"
echo Setting up environment...
set "PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin;%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\libnvvp;%PATH%"
set "CUDA_PATH=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%"
set "CUDA_PATH_V%CUDA_VER_MAJOR%_%CUDA_VER_MINOR%=%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%"
set "NVTOOLSEXT_PATH=%ProgramFiles%\NVIDIA Corporation\NvToolsExt\bin\x64"
if not exist "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin\nvcc.exe" (
echo CUDA %CUDA_VERSION_STR% installed failed.
exit /b 1
)
echo Installing cuDNN...
7z x %CUDNN_SETUP_FILE% -o"%SRC_DIR%\temp_build\cudnn"
xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\bin\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\bin"
xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\lib\x64\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\lib\x64"
xcopy /Y "%SRC_DIR%\temp_build\cudnn\cuda\include\*.*" "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%\include"
echo Installing GPU driver DLLs
7z x %SRC_DIR%\temp_build\gpu_driver_dlls.zip -o"C:\Windows\System32"
echo Cleaning temp files
rd /s /q "%SRC_DIR%\temp_build" || ver > nul
@echo on @echo on
set VC_VERSION_LOWER=16 set VC_VERSION_LOWER=17
set VC_VERSION_UPPER=17 set VC_VERSION_UPPER=18
if "%VC_YEAR%" == "2019" (
set VC_VERSION_LOWER=16
set VC_VERSION_UPPER=17
)
if "%VC_YEAR%" == "2017" ( if "%VC_YEAR%" == "2017" (
set VC_VERSION_LOWER=15 set VC_VERSION_LOWER=15
set VC_VERSION_UPPER=16 set VC_VERSION_UPPER=16
......
#!/bin/bash
set -ex
if [[ "$CU_VERSION" == "cu92" ]]; then
export VC_YEAR=2017
export VSDEVCMD_ARGS="-vcvars_ver=14.13"
powershell packaging/windows/internal/vs2017_install.ps1
elif [[ "$CU_VERSION" == "cu100" ]]; then
export VC_YEAR=2017
export VSDEVCMD_ARGS=""
powershell packaging/windows/internal/vs2017_install.ps1
else
export VC_YEAR=2019
export VSDEVCMD_ARGS=""
fi
$VS_DOWNLOAD_LINK = "https://aka.ms/vs/15/release/vs_buildtools.exe"
$VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools",
"--add Microsoft.VisualStudio.Component.VC.Tools.14.13",
"--add Microsoft.Component.MSBuild",
"--add Microsoft.VisualStudio.Component.Roslyn.Compiler",
"--add Microsoft.VisualStudio.Component.TextTemplating",
"--add Microsoft.VisualStudio.Component.VC.CoreIde",
"--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest",
"--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Core",
"--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64",
"--add Microsoft.VisualStudio.ComponentGroup.NativeDesktop.Win81")
curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe
if ($LASTEXITCODE -ne 0) {
echo "Download of the VS 2017 installer failed"
exit 1
}
$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru
Remove-Item -Path vs_installer.exe -Force
$exitCode = $process.ExitCode
if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
echo "VS 2017 installer exited with code $exitCode, which should be one of [0, 3010]."
exit 1
}
$VS_DOWNLOAD_LINK = "https://aka.ms/vs/16/release/vs_buildtools.exe"
$VS_INSTALL_ARGS = @("--nocache","--quiet","--wait", "--add Microsoft.VisualStudio.Workload.VCTools",
"--add Microsoft.Component.MSBuild",
"--add Microsoft.VisualStudio.Component.Roslyn.Compiler",
"--add Microsoft.VisualStudio.Component.VC.CoreBuildTools",
"--add Microsoft.VisualStudio.Component.VC.Redist.14.Latest",
"--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64")
curl.exe --retry 3 -kL $VS_DOWNLOAD_LINK --output vs_installer.exe
if ($LASTEXITCODE -ne 0) {
echo "Download of the VS 2019 installer failed"
exit 1
}
$process = Start-Process "${PWD}\vs_installer.exe" -ArgumentList $VS_INSTALL_ARGS -NoNewWindow -Wait -PassThru
Remove-Item -Path vs_installer.exe -Force
$exitCode = $process.ExitCode
if (($exitCode -ne 0) -and ($exitCode -ne 3010)) {
echo "VS 2019 installer exited with code $exitCode, which should be one of [0, 3010]."
exit 1
}
[tool.usort]
first_party_detection = false
[tool.black]
line-length = 120
target-version = ["py38"]
[tool.ufmt]
excludes = [
"gallery",
]
[build-system]
requires = ["setuptools", "torch", "wheel"]
[pytest]
addopts =
# show tests that (f)ailed, (E)rror, or (X)passed in the summary
-rfEX
# Make tracebacks shorter
--tb=short
# enable all warnings
-Wd
--ignore=test/test_datasets_download.py
--ignore-glob=test/test_prototype_*.py
testpaths =
test
xfail_strict = True
...@@ -20,35 +20,56 @@ the following parameters: ...@@ -20,35 +20,56 @@ the following parameters:
### AlexNet and VGG ### AlexNet and VGG
Since `AlexNet` and the original `VGG` architectures do not include batch Since `AlexNet` and the original `VGG` architectures do not include batch
normalization, the default initial learning rate `--lr 0.1` is to high. normalization, the default initial learning rate `--lr 0.1` is too high.
``` ```
python main.py --model $MODEL --lr 1e-2 torchrun --nproc_per_node=8 train.py\
--model $MODEL --lr 1e-2
``` ```
Here `$MODEL` is one of `alexnet`, `vgg11`, `vgg13`, `vgg16` or `vgg19`. Note Here `$MODEL` is one of `alexnet`, `vgg11`, `vgg13`, `vgg16` or `vgg19`. Note
that `vgg11_bn`, `vgg13_bn`, `vgg16_bn`, and `vgg19_bn` include batch that `vgg11_bn`, `vgg13_bn`, `vgg16_bn`, and `vgg19_bn` include batch
normalization and thus are trained with the default parameters. normalization and thus are trained with the default parameters.
### ResNext-50 32x4d ### GoogLeNet
The weights of the GoogLeNet model are ported from the original paper rather than trained from scratch.
### Inception V3
The weights of the Inception V3 model are ported from the original paper rather than trained from scratch.
Since it expects tensors with a size of N x 3 x 299 x 299, to validate the model use the following command:
``` ```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ torchrun --nproc_per_node=8 train.py --model inception_v3\
--model resnext50_32x4d --epochs 100 --test-only --weights Inception_V3_Weights.IMAGENET1K_V1
``` ```
### ResNet
```
torchrun --nproc_per_node=8 train.py --model $MODEL
```
### ResNext-101 32x8d Here `$MODEL` is one of `resnet18`, `resnet34`, `resnet50`, `resnet101` or `resnet152`.
On 8 nodes, each with 8 GPUs (for a total of 64 GPUS) ### ResNext
``` ```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ torchrun --nproc_per_node=8 train.py\
--model resnext101_32x8d --epochs 100 --model $MODEL --epochs 100
``` ```
Here `$MODEL` is one of `resnext50_32x4d` or `resnext101_32x8d`.
Note that the above command corresponds to a single node with 8 GPUs. If you use
a different number of GPUs and/or a different batch size, then the learning rate
should be scaled accordingly. For example, the pretrained model provided by
`torchvision` was trained on 8 nodes, each with 8 GPUs (for a total of 64 GPUs),
with `--batch_size 16` and `--lr 0.4`, instead of the current defaults
which are respectively batch_size=32 and lr=0.1
### MobileNetV2 ### MobileNetV2
``` ```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ torchrun --nproc_per_node=8 train.py\
--model mobilenet_v2 --epochs 300 --lr 0.045 --wd 0.00004\ --model mobilenet_v2 --epochs 300 --lr 0.045 --wd 0.00004\
--lr-step-size 1 --lr-gamma 0.98 --lr-step-size 1 --lr-gamma 0.98
``` ```
...@@ -56,7 +77,7 @@ python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ ...@@ -56,7 +77,7 @@ python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\
### MobileNetV3 Large & Small ### MobileNetV3 Large & Small
``` ```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ torchrun --nproc_per_node=8 train.py\
--model $MODEL --epochs 600 --opt rmsprop --batch-size 128 --lr 0.064\ --model $MODEL --epochs 600 --opt rmsprop --batch-size 128 --lr 0.064\
--wd 0.00001 --lr-step-size 2 --lr-gamma 0.973 --auto-augment imagenet --random-erase 0.2 --wd 0.00001 --lr-step-size 2 --lr-gamma 0.973 --auto-augment imagenet --random-erase 0.2
``` ```
...@@ -67,37 +88,236 @@ Then we averaged the parameters of the last 3 checkpoints that improved the Acc@ ...@@ -67,37 +88,236 @@ Then we averaged the parameters of the last 3 checkpoints that improved the Acc@
and [#3354](https://github.com/pytorch/vision/pull/3354) for details. and [#3354](https://github.com/pytorch/vision/pull/3354) for details.
### EfficientNet-V1
The weights of the B0-B4 variants are ported from Ross Wightman's [timm repo](https://github.com/rwightman/pytorch-image-models/blob/01cb46a9a50e3ba4be167965b5764e9702f09b30/timm/models/efficientnet.py#L95-L108).
The weights of the B5-B7 variants are ported from Luke Melas' [EfficientNet-PyTorch repo](https://github.com/lukemelas/EfficientNet-PyTorch/blob/1039e009545d9329ea026c9f7541341439712b96/efficientnet_pytorch/utils.py#L562-L564).
All models were trained using Bicubic interpolation and each have custom crop and resize sizes. To validate the models use the following commands:
```
torchrun --nproc_per_node=8 train.py --model efficientnet_b0 --test-only --weights EfficientNet_B0_Weights.IMAGENET1K_V1
torchrun --nproc_per_node=8 train.py --model efficientnet_b1 --test-only --weights EfficientNet_B1_Weights.IMAGENET1K_V1
torchrun --nproc_per_node=8 train.py --model efficientnet_b2 --test-only --weights EfficientNet_B2_Weights.IMAGENET1K_V1
torchrun --nproc_per_node=8 train.py --model efficientnet_b3 --test-only --weights EfficientNet_B3_Weights.IMAGENET1K_V1
torchrun --nproc_per_node=8 train.py --model efficientnet_b4 --test-only --weights EfficientNet_B4_Weights.IMAGENET1K_V1
torchrun --nproc_per_node=8 train.py --model efficientnet_b5 --test-only --weights EfficientNet_B5_Weights.IMAGENET1K_V1
torchrun --nproc_per_node=8 train.py --model efficientnet_b6 --test-only --weights EfficientNet_B6_Weights.IMAGENET1K_V1
torchrun --nproc_per_node=8 train.py --model efficientnet_b7 --test-only --weights EfficientNet_B7_Weights.IMAGENET1K_V1
```
### EfficientNet-V2
```
torchrun --nproc_per_node=8 train.py \
--model $MODEL --batch-size 128 --lr 0.5 --lr-scheduler cosineannealinglr \
--lr-warmup-epochs 5 --lr-warmup-method linear --auto-augment ta_wide --epochs 600 --random-erase 0.1 \
--label-smoothing 0.1 --mixup-alpha 0.2 --cutmix-alpha 1.0 --weight-decay 0.00002 --norm-weight-decay 0.0 \
--train-crop-size $TRAIN_SIZE --model-ema --val-crop-size $EVAL_SIZE --val-resize-size $EVAL_SIZE \
--ra-sampler --ra-reps 4
```
Here `$MODEL` is one of `efficientnet_v2_s` and `efficientnet_v2_m`.
Note that the Small variant had a `$TRAIN_SIZE` of `300` and a `$EVAL_SIZE` of `384`, while the Medium `384` and `480` respectively.
Note that the above command corresponds to training on a single node with 8 GPUs.
For generating the pre-trained weights, we trained with 4 nodes, each with 8 GPUs (for a total of 32 GPUs),
and `--batch_size 32`.
The weights of the Large variant are ported from the original paper rather than trained from scratch. See the `EfficientNet_V2_L_Weights` entry for their exact preprocessing transforms.
### RegNet
#### Small models
```
torchrun --nproc_per_node=8 train.py\
--model $MODEL --epochs 100 --batch-size 128 --wd 0.00005 --lr=0.8\
--lr-scheduler=cosineannealinglr --lr-warmup-method=linear\
--lr-warmup-epochs=5 --lr-warmup-decay=0.1
```
Here `$MODEL` is one of `regnet_x_400mf`, `regnet_x_800mf`, `regnet_x_1_6gf`, `regnet_y_400mf`, `regnet_y_800mf` and `regnet_y_1_6gf`. Please note we used learning rate 0.4 for `regent_y_400mf` to get the same Acc@1 as [the paper)(https://arxiv.org/abs/2003.13678).
#### Medium models
```
torchrun --nproc_per_node=8 train.py\
--model $MODEL --epochs 100 --batch-size 64 --wd 0.00005 --lr=0.4\
--lr-scheduler=cosineannealinglr --lr-warmup-method=linear\
--lr-warmup-epochs=5 --lr-warmup-decay=0.1
```
Here `$MODEL` is one of `regnet_x_3_2gf`, `regnet_x_8gf`, `regnet_x_16gf`, `regnet_y_3_2gf` and `regnet_y_8gf`.
#### Large models
```
torchrun --nproc_per_node=8 train.py\
--model $MODEL --epochs 100 --batch-size 32 --wd 0.00005 --lr=0.2\
--lr-scheduler=cosineannealinglr --lr-warmup-method=linear\
--lr-warmup-epochs=5 --lr-warmup-decay=0.1
```
Here `$MODEL` is one of `regnet_x_32gf`, `regnet_y_16gf` and `regnet_y_32gf`.
### Vision Transformer
#### vit_b_16
```
torchrun --nproc_per_node=8 train.py\
--model vit_b_16 --epochs 300 --batch-size 512 --opt adamw --lr 0.003 --wd 0.3\
--lr-scheduler cosineannealinglr --lr-warmup-method linear --lr-warmup-epochs 30\
--lr-warmup-decay 0.033 --amp --label-smoothing 0.11 --mixup-alpha 0.2 --auto-augment ra\
--clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema
```
Note that the above command corresponds to training on a single node with 8 GPUs.
For generating the pre-trained weights, we trained with 8 nodes, each with 8 GPUs (for a total of 64 GPUs),
and `--batch_size 64`.
#### vit_b_32
```
torchrun --nproc_per_node=8 train.py\
--model vit_b_32 --epochs 300 --batch-size 512 --opt adamw --lr 0.003 --wd 0.3\
--lr-scheduler cosineannealinglr --lr-warmup-method linear --lr-warmup-epochs 30\
--lr-warmup-decay 0.033 --amp --label-smoothing 0.11 --mixup-alpha 0.2 --auto-augment imagenet\
--clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema
```
Note that the above command corresponds to training on a single node with 8 GPUs.
For generating the pre-trained weights, we trained with 2 nodes, each with 8 GPUs (for a total of 16 GPUs),
and `--batch_size 256`.
#### vit_l_16
```
torchrun --nproc_per_node=8 train.py\
--model vit_l_16 --epochs 600 --batch-size 128 --lr 0.5 --lr-scheduler cosineannealinglr\
--lr-warmup-method linear --lr-warmup-epochs 5 --label-smoothing 0.1 --mixup-alpha 0.2\
--auto-augment ta_wide --random-erase 0.1 --weight-decay 0.00002 --norm-weight-decay 0.0\
--clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema --val-resize-size 232
```
Note that the above command corresponds to training on a single node with 8 GPUs.
For generating the pre-trained weights, we trained with 2 nodes, each with 8 GPUs (for a total of 16 GPUs),
and `--batch_size 64`.
#### vit_l_32
```
torchrun --nproc_per_node=8 train.py\
--model vit_l_32 --epochs 300 --batch-size 512 --opt adamw --lr 0.003 --wd 0.3\
--lr-scheduler cosineannealinglr --lr-warmup-method linear --lr-warmup-epochs 30\
--lr-warmup-decay 0.033 --amp --label-smoothing 0.11 --mixup-alpha 0.2 --auto-augment ra\
--clip-grad-norm 1 --ra-sampler --cutmix-alpha 1.0 --model-ema
```
Note that the above command corresponds to training on a single node with 8 GPUs.
For generating the pre-trained weights, we trained with 8 nodes, each with 8 GPUs (for a total of 64 GPUs),
and `--batch_size 64`.
### ConvNeXt
```
torchrun --nproc_per_node=8 train.py\
--model $MODEL --batch-size 128 --opt adamw --lr 1e-3 --lr-scheduler cosineannealinglr \
--lr-warmup-epochs 5 --lr-warmup-method linear --auto-augment ta_wide --epochs 600 --random-erase 0.1 \
--label-smoothing 0.1 --mixup-alpha 0.2 --cutmix-alpha 1.0 --weight-decay 0.05 --norm-weight-decay 0.0 \
--train-crop-size 176 --model-ema --val-resize-size 232 --ra-sampler --ra-reps 4
```
Here `$MODEL` is one of `convnext_tiny`, `convnext_small`, `convnext_base` and `convnext_large`. Note that each variant had its `--val-resize-size` optimized in a post-training step, see their `Weights` entry for their exact value.
Note that the above command corresponds to training on a single node with 8 GPUs.
For generating the pre-trained weights, we trained with 2 nodes, each with 8 GPUs (for a total of 16 GPUs),
and `--batch_size 64`.
### SwinTransformer
```
torchrun --nproc_per_node=8 train.py\
--model $MODEL --epochs 300 --batch-size 128 --opt adamw --lr 0.001 --weight-decay 0.05 --norm-weight-decay 0.0 --bias-weight-decay 0.0 --transformer-embedding-decay 0.0 --lr-scheduler cosineannealinglr --lr-min 0.00001 --lr-warmup-method linear --lr-warmup-epochs 20 --lr-warmup-decay 0.01 --amp --label-smoothing 0.1 --mixup-alpha 0.8 --clip-grad-norm 5.0 --cutmix-alpha 1.0 --random-erase 0.25 --interpolation bicubic --auto-augment ta_wide --model-ema --ra-sampler --ra-reps 4 --val-resize-size 224
```
Here `$MODEL` is one of `swin_t`, `swin_s` or `swin_b`.
Note that `--val-resize-size` was optimized in a post-training step, see their `Weights` entry for the exact value.
### SwinTransformer V2
```
torchrun --nproc_per_node=8 train.py\
--model $MODEL --epochs 300 --batch-size 128 --opt adamw --lr 0.001 --weight-decay 0.05 --norm-weight-decay 0.0 --bias-weight-decay 0.0 --transformer-embedding-decay 0.0 --lr-scheduler cosineannealinglr --lr-min 0.00001 --lr-warmup-method linear --lr-warmup-epochs 20 --lr-warmup-decay 0.01 --amp --label-smoothing 0.1 --mixup-alpha 0.8 --clip-grad-norm 5.0 --cutmix-alpha 1.0 --random-erase 0.25 --interpolation bicubic --auto-augment ta_wide --model-ema --ra-sampler --ra-reps 4 --val-resize-size 256 --val-crop-size 256 --train-crop-size 256
```
Here `$MODEL` is one of `swin_v2_t`, `swin_v2_s` or `swin_v2_b`.
Note that `--val-resize-size` was optimized in a post-training step, see their `Weights` entry for the exact value.
### MaxViT
```
torchrun --nproc_per_node=8 --n_nodes=4 train.py\
--model $MODEL --epochs 400 --batch-size 128 --opt adamw --lr 3e-3 --weight-decay 0.05 --lr-scheduler cosineannealinglr --lr-min 1e-5 --lr-warmup-method linear --lr-warmup-epochs 32 --label-smoothing 0.1 --mixup-alpha 0.8 --clip-grad-norm 1.0 --interpolation bicubic --auto-augment ta_wide --policy-magnitude 15 --model-ema --val-resize-size 224\
--val-crop-size 224 --train-crop-size 224 --amp --model-ema-steps 32 --transformer-embedding-decay 0 --sync-bn
```
Here `$MODEL` is `maxvit_t`.
Note that `--val-resize-size` was not optimized in a post-training step.
### ShuffleNet V2
```
torchrun --nproc_per_node=8 train.py \
--batch-size=128 \
--lr=0.5 --lr-scheduler=cosineannealinglr --lr-warmup-epochs=5 --lr-warmup-method=linear \
--auto-augment=ta_wide --epochs=600 --random-erase=0.1 --weight-decay=0.00002 \
--norm-weight-decay=0.0 --label-smoothing=0.1 --mixup-alpha=0.2 --cutmix-alpha=1.0 \
--train-crop-size=176 --model-ema --val-resize-size=232 --ra-sampler --ra-reps=4
```
Here `$MODEL` is either `shufflenet_v2_x1_5` or `shufflenet_v2_x2_0`.
The models `shufflenet_v2_x0_5` and `shufflenet_v2_x1_0` were contributed by the community. See [PR-849](https://github.com/pytorch/vision/pull/849#issuecomment-483391686) for details.
## Mixed precision training ## Mixed precision training
Automatic Mixed Precision (AMP) training on GPU for Pytorch can be enabled with the [NVIDIA Apex extension](https://github.com/NVIDIA/apex). Automatic Mixed Precision (AMP) training on GPU for Pytorch can be enabled with the [torch.cuda.amp](https://pytorch.org/docs/stable/amp.html?highlight=amp#module-torch.cuda.amp).
Mixed precision training makes use of both FP32 and FP16 precisions where appropriate. FP16 operations can leverage the Tensor cores on NVIDIA GPUs (Volta, Turing or newer architectures) for improved throughput, generally without loss in model accuracy. Mixed precision training also often allows larger batch sizes. GPU automatic mixed precision training for Pytorch Vision can be enabled via the flag value `--apex=True`. Mixed precision training makes use of both FP32 and FP16 precisions where appropriate. FP16 operations can leverage the Tensor cores on NVIDIA GPUs (Volta, Turing or newer architectures) for improved throughput, generally without loss in model accuracy. Mixed precision training also often allows larger batch sizes. GPU automatic mixed precision training for Pytorch Vision can be enabled via the flag value `--amp=True`.
``` ```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py\ torchrun --nproc_per_node=8 train.py\
--model resnext50_32x4d --epochs 100 --apex --model resnext50_32x4d --epochs 100 --amp
``` ```
## Quantized ## Quantized
### Parameters used for generating quantized models: ### Post training quantized models
For all post training quantized models (All quantized models except mobilenet-v2), the settings are: For all post training quantized models, the settings are:
1. num_calibration_batches: 32 1. num_calibration_batches: 32
2. num_workers: 16 2. num_workers: 16
3. batch_size: 32 3. batch_size: 32
4. eval_batch_size: 128 4. eval_batch_size: 128
5. backend: 'fbgemm' 5. qbackend: 'fbgemm'
```
python train_quantization.py --device='cpu' --post-training-quantize --qbackend='fbgemm' --model='$MODEL'
```
Here `$MODEL` is one of `googlenet`, `inception_v3`, `resnet18`, `resnet50`, `resnext101_32x8d`, `shufflenet_v2_x0_5` and `shufflenet_v2_x1_0`.
### Quantized ShuffleNet V2
Here are commands that we use to quantize the `shufflenet_v2_x1_5` and `shufflenet_v2_x2_0` models.
``` ```
python train_quantization.py --device='cpu' --post-training-quantize --backend='fbgemm' --model='<model_name>' # For shufflenet_v2_x1_5
python train_quantization.py --device='cpu' --post-training-quantize --qbackend='fbgemm' \
--model=shufflenet_v2_x1_5 --weights="ShuffleNet_V2_X1_5_Weights.IMAGENET1K_V1" \
--train-crop-size 176 --val-resize-size 232 --data-path /datasets01_ontap/imagenet_full_size/061417/
# For shufflenet_v2_x2_0
python train_quantization.py --device='cpu' --post-training-quantize --qbackend='fbgemm' \
--model=shufflenet_v2_x2_0 --weights="ShuffleNet_V2_X2_0_Weights.IMAGENET1K_V1" \
--train-crop-size 176 --val-resize-size 232 --data-path /datasets01_ontap/imagenet_full_size/061417/
``` ```
### QAT MobileNetV2
For Mobilenet-v2, the model was trained with quantization aware training, the settings used are: For Mobilenet-v2, the model was trained with quantization aware training, the settings used are:
1. num_workers: 16 1. num_workers: 16
2. batch_size: 32 2. batch_size: 32
3. eval_batch_size: 128 3. eval_batch_size: 128
4. backend: 'qnnpack' 4. qbackend: 'qnnpack'
5. learning-rate: 0.0001 5. learning-rate: 0.0001
6. num_epochs: 90 6. num_epochs: 90
7. num_observer_update_epochs:4 7. num_observer_update_epochs:4
...@@ -108,16 +328,18 @@ For Mobilenet-v2, the model was trained with quantization aware training, the se ...@@ -108,16 +328,18 @@ For Mobilenet-v2, the model was trained with quantization aware training, the se
12. weight-decay: 0.0001 12. weight-decay: 0.0001
``` ```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train_quantization.py --model='mobilenet_v2' torchrun --nproc_per_node=8 train_quantization.py --model='mobilenet_v2'
``` ```
Training converges at about 10 epochs. Training converges at about 10 epochs.
### QAT MobileNetV3
For Mobilenet-v3 Large, the model was trained with quantization aware training, the settings used are: For Mobilenet-v3 Large, the model was trained with quantization aware training, the settings used are:
1. num_workers: 16 1. num_workers: 16
2. batch_size: 32 2. batch_size: 32
3. eval_batch_size: 128 3. eval_batch_size: 128
4. backend: 'qnnpack' 4. qbackend: 'qnnpack'
5. learning-rate: 0.001 5. learning-rate: 0.001
6. num_epochs: 90 6. num_epochs: 90
7. num_observer_update_epochs:4 7. num_observer_update_epochs:4
...@@ -128,7 +350,7 @@ For Mobilenet-v3 Large, the model was trained with quantization aware training, ...@@ -128,7 +350,7 @@ For Mobilenet-v3 Large, the model was trained with quantization aware training,
12. weight-decay: 0.00001 12. weight-decay: 0.00001
``` ```
python -m torch.distributed.launch --nproc_per_node=8 --use_env train_quantization.py --model='mobilenet_v3_large' \ torchrun --nproc_per_node=8 train_quantization.py --model='mobilenet_v3_large' \
--wd 0.00001 --lr 0.001 --wd 0.00001 --lr 0.001
``` ```
...@@ -137,6 +359,10 @@ For post training quant, device is set to CPU. For training, the device is set t ...@@ -137,6 +359,10 @@ For post training quant, device is set to CPU. For training, the device is set t
### Command to evaluate quantized models using the pre-trained weights: ### Command to evaluate quantized models using the pre-trained weights:
``` ```
python train_quantization.py --device='cpu' --test-only --backend='<backend>' --model='<model_name>' python train_quantization.py --device='cpu' --test-only --qbackend='<qbackend>' --model='<model_name>'
``` ```
For inception_v3 you need to pass the following extra parameters:
```
--val-resize-size 342 --val-crop-size 299 --train-crop-size 299
```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment