Unverified Commit e69ca5ed authored by achervyakov's avatar achervyakov Committed by GitHub
Browse files

add image hashing and `LMEVAL_HASHMM` envar (#2973)



* add image hashing

* remove unused params decription

* use `LMEVAL_HASHMM` (defualt '1') to save raw images

---------
Co-authored-by: default avatarBaber <baber@hey.com>
parent 0e96cd18
import itertools import itertools
import json import json
import logging import logging
import os
import random import random
import time import time
from collections import defaultdict from collections import defaultdict
...@@ -29,6 +30,7 @@ from lm_eval.loggers.utils import add_env_info, add_tokenizer_info, get_git_comm ...@@ -29,6 +30,7 @@ from lm_eval.loggers.utils import add_env_info, add_tokenizer_info, get_git_comm
from lm_eval.tasks import TaskManager, get_task_dict from lm_eval.tasks import TaskManager, get_task_dict
from lm_eval.utils import ( from lm_eval.utils import (
handle_non_serializable, handle_non_serializable,
hash_dict_images,
hash_string, hash_string,
positional_deprecated, positional_deprecated,
setup_logging, setup_logging,
...@@ -140,7 +142,6 @@ def simple_evaluate( ...@@ -140,7 +142,6 @@ def simple_evaluate(
Random seed for fewshot sampler random generator. If set to None, the seed of generator will be set to None. Random seed for fewshot sampler random generator. If set to None, the seed of generator will be set to None.
:param metadata: dict :param metadata: dict
Additional metadata to be added to the task manager. Will get passed to the download function of the task. Additional metadata to be added to the task manager. Will get passed to the download function of the task.
return return
Dictionary of results Dictionary of results
""" """
...@@ -747,6 +748,12 @@ def evaluate( ...@@ -747,6 +748,12 @@ def evaluate(
}, },
} }
if log_samples: if log_samples:
# default: hash images
samples = (
hash_dict_images(samples)
if os.environ.get("LMEVAL_HASHMM", "1") != "0"
else samples
)
results_dict["samples"] = dict(samples) results_dict["samples"] = dict(samples)
return results_dict return results_dict
......
...@@ -550,3 +550,54 @@ def weighted_f1_score(items): ...@@ -550,3 +550,54 @@ def weighted_f1_score(items):
preds = unzipped_list[1] preds = unzipped_list[1]
fscore = f1_score(golds, preds, average="weighted") fscore = f1_score(golds, preds, average="weighted")
return fscore return fscore
def convert_pil_to_hash(value):
from io import BytesIO
img_bytes = BytesIO()
value.save(img_bytes, format="PNG")
return hashlib.sha256(str(img_bytes).encode()).hexdigest()
def convert_bytes_to_hash(value):
return hashlib.sha256(str(value).encode()).hexdigest()
def hash_dict_images(data_dict):
"""
Create a deep copy of `data_dict` where all bytes and PIL.Image.Image values
are replaced by their respective hashes using the provided converter functions.
Parameters:
data_dict (dict): The input dictionary with arbitrary nesting of dicts and lists.
Returns:
dict: A new dictionary with the same structure as `data_dict`, but with all
bytes and PIL.Image.Image objects replaced by their hashes.
"""
from PIL import Image
def _process_value(value):
# Bytes -> hash
if isinstance(value, (bytes, bytearray)):
return convert_bytes_to_hash(value)
# PIL Image -> hash
if isinstance(value, Image.Image):
return convert_pil_to_hash(value)
# Nested dictionary -> recurse
if isinstance(value, dict):
return {k: _process_value(v) for k, v in value.items()}
# List or tuple -> recurse, preserving type
if isinstance(value, list):
return [_process_value(v) for v in value]
if isinstance(value, tuple):
return tuple(_process_value(v) for v in value)
# Other types remain unchanged
return value
# Ensure the top-level is a dict
if not isinstance(data_dict, dict):
raise TypeError("Input must be a dictionary")
return {key: _process_value(val) for key, val in data_dict.items()}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment