Unverified Commit b9a1984c authored by Adam J. Stewart's avatar Adam J. Stewart Committed by GitHub
Browse files

Simpler file chunking (#7673)


Co-authored-by: default avatarPhilip Meier <github.pmeier@posteo.de>
parent 3d70e4bb
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
import glob import glob
import hashlib import hashlib
import io
# Standard library imports # Standard library imports
import os import os
...@@ -65,21 +64,12 @@ PLATFORM_ARCH = platform.machine() ...@@ -65,21 +64,12 @@ PLATFORM_ARCH = platform.machine()
PYTHON_VERSION = sys.version_info PYTHON_VERSION = sys.version_info
def read_chunks(file, size=io.DEFAULT_BUFFER_SIZE):
"""Yield pieces of data from a file-like object until EOF."""
while True:
chunk = file.read(size)
if not chunk:
break
yield chunk
def rehash(path, blocksize=1 << 20): def rehash(path, blocksize=1 << 20):
"""Return (hash, length) for path using hashlib.sha256()""" """Return (hash, length) for path using hashlib.sha256()"""
h = hashlib.sha256() h = hashlib.sha256()
length = 0 length = 0
with open(path, "rb") as f: with open(path, "rb") as f:
for block in read_chunks(f, size=blocksize): while block := f.read(blocksize):
length += len(block) length += len(block)
h.update(block) h.update(block)
digest = "sha256=" + urlsafe_b64encode(h.digest()).decode("latin1").rstrip("=") digest = "sha256=" + urlsafe_b64encode(h.digest()).decode("latin1").rstrip("=")
......
...@@ -57,7 +57,7 @@ def calculate_md5(fpath: str, chunk_size: int = 1024 * 1024) -> str: ...@@ -57,7 +57,7 @@ def calculate_md5(fpath: str, chunk_size: int = 1024 * 1024) -> str:
else: else:
md5 = hashlib.md5() md5 = hashlib.md5()
with open(fpath, "rb") as f: with open(fpath, "rb") as f:
for chunk in iter(lambda: f.read(chunk_size), b""): while chunk := f.read(chunk_size):
md5.update(chunk) md5.update(chunk)
return md5.hexdigest() return md5.hexdigest()
......
...@@ -91,7 +91,7 @@ import hashlib ...@@ -91,7 +91,7 @@ import hashlib
def sha256sum(path, chunk_size=1024 * 1024): def sha256sum(path, chunk_size=1024 * 1024):
checksum = hashlib.sha256() checksum = hashlib.sha256()
with open(path, "rb") as f: with open(path, "rb") as f:
for chunk in iter(lambda: f.read(chunk_size), b""): while chunk := f.read(chunk_size):
checksum.update(chunk) checksum.update(chunk)
print(checksum.hexdigest()) print(checksum.hexdigest())
``` ```
......
...@@ -136,7 +136,7 @@ class OnlineResource(abc.ABC): ...@@ -136,7 +136,7 @@ class OnlineResource(abc.ABC):
def _check_sha256(self, path: pathlib.Path, *, chunk_size: int = 1024 * 1024) -> None: def _check_sha256(self, path: pathlib.Path, *, chunk_size: int = 1024 * 1024) -> None:
hash = hashlib.sha256() hash = hashlib.sha256()
with open(path, "rb") as file: with open(path, "rb") as file:
for chunk in iter(lambda: file.read(chunk_size), b""): while chunk := file.read(chunk_size):
hash.update(chunk) hash.update(chunk)
sha256 = hash.hexdigest() sha256 = hash.hexdigest()
if sha256 != self.sha256: if sha256 != self.sha256:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment