"...en/git@developer.sourcefind.cn:chenpangpang/diffusers.git" did not exist on "357855f8fca8d27beb0a3fde333f46db22f29391"
Unverified Commit fd65aa98 authored by Lucain's avatar Lucain Committed by GitHub
Browse files

Set `usedforsecurity=False` in hashlib methods (FIPS compliance) (#27483)

* Set usedforsecurity=False in hashlib methods (FIPS compliance)

* trigger ci

* tokenizers version

* deps

* bump hfh version

* let's try this
parent 5603fad2
import gzip import gzip
import hashlib
import json import json
import multiprocessing import multiprocessing
import os import os
...@@ -11,6 +10,7 @@ from pathlib import Path ...@@ -11,6 +10,7 @@ from pathlib import Path
import numpy as np import numpy as np
from arguments import PreprocessingArguments from arguments import PreprocessingArguments
from datasets import load_dataset from datasets import load_dataset
from huggingface_hub.utils import insecure_hashlib
from minhash_deduplication import deduplicate_dataset from minhash_deduplication import deduplicate_dataset
from transformers import AutoTokenizer, HfArgumentParser from transformers import AutoTokenizer, HfArgumentParser
...@@ -21,7 +21,7 @@ PATTERN = re.compile(r"\s+") ...@@ -21,7 +21,7 @@ PATTERN = re.compile(r"\s+")
def get_hash(example): def get_hash(example):
"""Get hash of content field.""" """Get hash of content field."""
return {"hash": hashlib.md5(re.sub(PATTERN, "", example["content"]).encode("utf-8")).hexdigest()} return {"hash": insecure_hashlib.md5(re.sub(PATTERN, "", example["content"]).encode("utf-8")).hexdigest()}
def line_stats(example): def line_stats(example):
......
...@@ -28,7 +28,6 @@ import tempfile ...@@ -28,7 +28,6 @@ import tempfile
from collections import OrderedDict from collections import OrderedDict
from contextlib import contextmanager from contextlib import contextmanager
from functools import partial from functools import partial
from hashlib import sha256
from io import BytesIO from io import BytesIO
from pathlib import Path from pathlib import Path
from urllib.parse import urlparse from urllib.parse import urlparse
...@@ -39,6 +38,7 @@ import numpy as np ...@@ -39,6 +38,7 @@ import numpy as np
import requests import requests
import wget import wget
from filelock import FileLock from filelock import FileLock
from huggingface_hub.utils import insecure_hashlib
from PIL import Image from PIL import Image
from tqdm.auto import tqdm from tqdm.auto import tqdm
from yaml import Loader, dump, load from yaml import Loader, dump, load
...@@ -402,12 +402,12 @@ def get_from_cache( ...@@ -402,12 +402,12 @@ def get_from_cache(
def url_to_filename(url, etag=None): def url_to_filename(url, etag=None):
url_bytes = url.encode("utf-8") url_bytes = url.encode("utf-8")
url_hash = sha256(url_bytes) url_hash = insecure_hashlib.sha256(url_bytes)
filename = url_hash.hexdigest() filename = url_hash.hexdigest()
if etag: if etag:
etag_bytes = etag.encode("utf-8") etag_bytes = etag.encode("utf-8")
etag_hash = sha256(etag_bytes) etag_hash = insecure_hashlib.sha256(etag_bytes)
filename += "." + etag_hash.hexdigest() filename += "." + etag_hash.hexdigest()
if url.endswith(".h5"): if url.endswith(".h5"):
......
...@@ -28,7 +28,6 @@ import tempfile ...@@ -28,7 +28,6 @@ import tempfile
from collections import OrderedDict from collections import OrderedDict
from contextlib import contextmanager from contextlib import contextmanager
from functools import partial from functools import partial
from hashlib import sha256
from io import BytesIO from io import BytesIO
from pathlib import Path from pathlib import Path
from urllib.parse import urlparse from urllib.parse import urlparse
...@@ -39,6 +38,7 @@ import numpy as np ...@@ -39,6 +38,7 @@ import numpy as np
import requests import requests
import wget import wget
from filelock import FileLock from filelock import FileLock
from huggingface_hub.utils import insecure_hashlib
from PIL import Image from PIL import Image
from tqdm.auto import tqdm from tqdm.auto import tqdm
from yaml import Loader, dump, load from yaml import Loader, dump, load
...@@ -402,12 +402,12 @@ def get_from_cache( ...@@ -402,12 +402,12 @@ def get_from_cache(
def url_to_filename(url, etag=None): def url_to_filename(url, etag=None):
url_bytes = url.encode("utf-8") url_bytes = url.encode("utf-8")
url_hash = sha256(url_bytes) url_hash = insecure_hashlib.sha256(url_bytes)
filename = url_hash.hexdigest() filename = url_hash.hexdigest()
if etag: if etag:
etag_bytes = etag.encode("utf-8") etag_bytes = etag.encode("utf-8")
etag_hash = sha256(etag_bytes) etag_hash = insecure_hashlib.sha256(etag_bytes)
filename += "." + etag_hash.hexdigest() filename += "." + etag_hash.hexdigest()
if url.endswith(".h5"): if url.endswith(".h5"):
......
...@@ -118,7 +118,7 @@ _deps = [ ...@@ -118,7 +118,7 @@ _deps = [
"fugashi>=1.0", "fugashi>=1.0",
"GitPython<3.1.19", "GitPython<3.1.19",
"hf-doc-builder>=0.3.0", "hf-doc-builder>=0.3.0",
"huggingface-hub>=0.16.4,<1.0", "huggingface-hub>=0.19.3,<1.0",
"importlib_metadata", "importlib_metadata",
"ipadic>=1.0.0,<2.0", "ipadic>=1.0.0,<2.0",
"isort>=5.5.4", "isort>=5.5.4",
...@@ -321,6 +321,7 @@ extras["testing"] = ( ...@@ -321,6 +321,7 @@ extras["testing"] = (
"rjieba", "rjieba",
"beautifulsoup4", "beautifulsoup4",
"tensorboard", "tensorboard",
"pydantic",
) )
+ extras["retrieval"] + extras["retrieval"]
+ extras["modelcreation"] + extras["modelcreation"]
......
...@@ -25,7 +25,7 @@ deps = { ...@@ -25,7 +25,7 @@ deps = {
"fugashi": "fugashi>=1.0", "fugashi": "fugashi>=1.0",
"GitPython": "GitPython<3.1.19", "GitPython": "GitPython<3.1.19",
"hf-doc-builder": "hf-doc-builder>=0.3.0", "hf-doc-builder": "hf-doc-builder>=0.3.0",
"huggingface-hub": "huggingface-hub>=0.16.4,<1.0", "huggingface-hub": "huggingface-hub>=0.19.3,<1.0",
"importlib_metadata": "importlib_metadata", "importlib_metadata": "importlib_metadata",
"ipadic": "ipadic>=1.0.0,<2.0", "ipadic": "ipadic>=1.0.0,<2.0",
"isort": "isort>=5.5.4", "isort": "isort>=5.5.4",
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
# limitations under the License. # limitations under the License.
import argparse import argparse
import hashlib
import io import io
import json import json
import os import os
...@@ -24,6 +23,7 @@ import urllib ...@@ -24,6 +23,7 @@ import urllib
import warnings import warnings
import torch import torch
from huggingface_hub.utils import insecure_hashlib
from torch import nn from torch import nn
from tqdm import tqdm from tqdm import tqdm
...@@ -114,7 +114,7 @@ def _download(url: str, root: str) -> io.BytesIO: ...@@ -114,7 +114,7 @@ def _download(url: str, root: str) -> io.BytesIO:
if os.path.isfile(download_target): if os.path.isfile(download_target):
model_bytes = open(download_target, "rb").read() model_bytes = open(download_target, "rb").read()
if hashlib.sha256(model_bytes).hexdigest() == expected_sha256: if insecure_hashlib.sha256(model_bytes).hexdigest() == expected_sha256:
return torch.load(io.BytesIO(model_bytes)) return torch.load(io.BytesIO(model_bytes))
else: else:
warnings.warn(f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file") warnings.warn(f"{download_target} exists, but the SHA256 checksum does not match; re-downloading the file")
...@@ -132,7 +132,7 @@ def _download(url: str, root: str) -> io.BytesIO: ...@@ -132,7 +132,7 @@ def _download(url: str, root: str) -> io.BytesIO:
loop.update(len(buffer)) loop.update(len(buffer))
model_bytes = open(download_target, "rb").read() model_bytes = open(download_target, "rb").read()
if hashlib.sha256(model_bytes).hexdigest() != expected_sha256: if insecure_hashlib.sha256(model_bytes).hexdigest() != expected_sha256:
raise RuntimeError( raise RuntimeError(
"Model has been downloaded but the SHA256 checksum does not not match. Please retry loading the model." "Model has been downloaded but the SHA256 checksum does not not match. Please retry loading the model."
) )
......
...@@ -12,9 +12,10 @@ ...@@ -12,9 +12,10 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import hashlib
import unittest import unittest
from huggingface_hub.utils import insecure_hashlib
from transformers import MODEL_FOR_DEPTH_ESTIMATION_MAPPING, is_torch_available, is_vision_available from transformers import MODEL_FOR_DEPTH_ESTIMATION_MAPPING, is_torch_available, is_vision_available
from transformers.pipelines import DepthEstimationPipeline, pipeline from transformers.pipelines import DepthEstimationPipeline, pipeline
from transformers.testing_utils import ( from transformers.testing_utils import (
...@@ -44,7 +45,7 @@ else: ...@@ -44,7 +45,7 @@ else:
def hashimage(image: Image) -> str: def hashimage(image: Image) -> str:
m = hashlib.md5(image.tobytes()) m = insecure_hashlib.md5(image.tobytes())
return m.hexdigest() return m.hexdigest()
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import hashlib
import tempfile import tempfile
import unittest import unittest
from typing import Dict from typing import Dict
...@@ -21,6 +20,7 @@ import datasets ...@@ -21,6 +20,7 @@ import datasets
import numpy as np import numpy as np
import requests import requests
from datasets import load_dataset from datasets import load_dataset
from huggingface_hub.utils import insecure_hashlib
from transformers import ( from transformers import (
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING, MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
...@@ -59,7 +59,7 @@ else: ...@@ -59,7 +59,7 @@ else:
def hashimage(image: Image) -> str: def hashimage(image: Image) -> str:
m = hashlib.md5(image.tobytes()) m = insecure_hashlib.md5(image.tobytes())
return m.hexdigest()[:10] return m.hexdigest()[:10]
......
...@@ -12,11 +12,11 @@ ...@@ -12,11 +12,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import hashlib
import unittest import unittest
from typing import Dict from typing import Dict
import numpy as np import numpy as np
from huggingface_hub.utils import insecure_hashlib
from transformers import ( from transformers import (
MODEL_FOR_MASK_GENERATION_MAPPING, MODEL_FOR_MASK_GENERATION_MAPPING,
...@@ -46,7 +46,7 @@ else: ...@@ -46,7 +46,7 @@ else:
def hashimage(image: Image) -> str: def hashimage(image: Image) -> str:
m = hashlib.md5(image.tobytes()) m = insecure_hashlib.md5(image.tobytes())
return m.hexdigest()[:10] return m.hexdigest()[:10]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment