Revert removing `requests` and make it a hard dependency instead (#5047)

* Revert "[FBcode->GH] remove unused requests functionality (#5014)" This reverts commit 33123bee. * Revert "replace requests with urllib (#4973)" This reverts commit 8d25de7b. * add requests as hard dependency * install library stubs in CI * fix syntax * add requests to conda dependencies * fix mypy CI

Revert removing `requests` and make it a hard dependency instead (#5047)
* Revert "[FBcode->GH] remove unused requests functionality (#5014)" This reverts commit 33123bee. * Revert "replace requests with urllib (#4973)" This reverts commit 8d25de7b. * add requests as hard dependency * install library stubs in CI * fix syntax * add requests to conda dependencies * fix mypy CI
4282c9fc · Philip Meier · GitHub · e250db37 · 4282c9fc · 4282c9fc
Unverified Commit 4282c9fc authored Dec 08, 2021 by Philip Meier Committed by GitHub Dec 08, 2021
5 changed files
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -311,7 +311,7 @@ jobs:
          descr: Install Python type check utilities
      - run:
          name: Check Python types statically
-          command: mypy --config-file mypy.ini
+          command: mypy --install-types --non-interactive --config-file mypy.ini
  unittest_torchhub:
    docker:

--- a/.circleci/config.yml.in
+++ b/.circleci/config.yml.in
@@ -311,7 +311,7 @@ jobs:
          descr: Install Python type check utilities
      - run:
          name: Check Python types statically
-          command: mypy --config-file mypy.ini
+          command: mypy --install-types --non-interactive --config-file mypy.ini
  unittest_torchhub:
    docker:

--- a/packaging/torchvision/meta.yaml
+++ b/packaging/torchvision/meta.yaml
@@ -24,6 +24,7 @@ requirements:
  run:
    - python
    - defaults::numpy >=1.11
+    - requests
    - libpng
    - ffmpeg >=4.2  # [not win]
    - jpeg

--- a/setup.py
+++ b/setup.py
@@ -59,6 +59,7 @@ if os.getenv("PYTORCH_VERSION"):
 requirements = [
    "numpy",
+    "requests",
    pytorch_dep,
 ]

--- a/torchvision/datasets/utils.py
+++ b/torchvision/datasets/utils.py
@@ -15,6 +15,7 @@ import zipfile
 from typing import Any, Callable, List, Iterable, Optional, TypeVar, Dict, IO, Tuple, Iterator
 from urllib.parse import urlparse
+import requests
 import torch
 from torch.utils.model_zoo import tqdm
@@ -199,28 +200,37 @@ def download_file_from_google_drive(file_id: str, root: str, filename: Optional[
        filename (str, optional): Name to save the file under. If None, use the id of the file.
        md5 (str, optional): MD5 checksum of the download. If None, do not check
    """
-    url = f"https://drive.google.com/uc?export=download&id={file_id}"
+    # Based on https://stackoverflow.com/questions/38511444/python-download-files-from-google-drive-using-url
+    url = "https://docs.google.com/uc?export=download"
    root = os.path.expanduser(root)
    if not filename:
        filename = file_id
    fpath = os.path.join(root, filename)
+    os.makedirs(root, exist_ok=True)
    if os.path.isfile(fpath) and check_integrity(fpath, md5):
        print("Using downloaded and verified file: " + fpath)
-        return
+    else:
+        session = requests.Session()
-    os.makedirs(root, exist_ok=True)
+        response = session.get(url, params={"id": file_id}, stream=True)
+        token = _get_confirm_token(response)
+        if token:
+            params = {"id": file_id, "confirm": token}
+            response = session.get(url, params=params, stream=True)
-    with urllib.request.urlopen(url) as response:
        # Ideally, one would use response.status_code to check for quota limits, but google drive is not consistent
        # with their own API, refer https://github.com/pytorch/vision/issues/2992#issuecomment-730614517.
        # Should this be fixed at some place in future, one could refactor the following to no longer rely on decoding
        # the first_chunk of the payload
-        content = iter(lambda: response.read(32768), b"")
+        response_content_generator = response.iter_content(32768)
        first_chunk = None
        while not first_chunk:  # filter out keep-alive new chunks
-            first_chunk = next(content)
+            first_chunk = next(response_content_generator)
        if _quota_exceeded(first_chunk):
            msg = (
@@ -230,12 +240,21 @@ def download_file_from_google_drive(file_id: str, root: str, filename: Optional[
            )
            raise RuntimeError(msg)
-        _save_response_content(itertools.chain((first_chunk,), content), fpath)
+        _save_response_content(itertools.chain((first_chunk,), response_content_generator), fpath)
+        response.close()
+def _get_confirm_token(response: requests.models.Response) -> Optional[str]:
+    for key, value in response.cookies.items():
+        if key.startswith("download_warning"):
+            return value
+    return None
 def _save_response_content(
    response_gen: Iterator[bytes],
-    destination: str,  # type: ignore[name-defined]
+    destination: str,
 ) -> None:
    with open(destination, "wb") as f:
        pbar = tqdm(total=None)