Unverified Commit f78e2da4 authored by LSinev's avatar LSinev Committed by GitHub
Browse files

Add environment and transformers version logging in results dump (#1464)

* Save git_hash to results even if git is not available to call as subprocess

* Store more info about environment and transformers version in results to help researchers track inconsistencies

* moved added logging to logging_utils

* moved get_git_commit_hash to logging_utils.py

* moved add_env_info inside evaluator
parent eacb74e3
...@@ -10,10 +10,10 @@ import torch ...@@ -10,10 +10,10 @@ import torch
import lm_eval.api.metrics import lm_eval.api.metrics
import lm_eval.api.registry import lm_eval.api.registry
import lm_eval.models import lm_eval.models
from lm_eval.logging_utils import add_env_info, get_git_commit_hash
from lm_eval.tasks import TaskManager, get_task_dict from lm_eval.tasks import TaskManager, get_task_dict
from lm_eval.utils import ( from lm_eval.utils import (
eval_logger, eval_logger,
get_git_commit_hash,
positional_deprecated, positional_deprecated,
run_task_tests, run_task_tests,
simple_parse_args_string, simple_parse_args_string,
...@@ -221,6 +221,7 @@ def simple_evaluate( ...@@ -221,6 +221,7 @@ def simple_evaluate(
"gen_kwargs": gen_kwargs, "gen_kwargs": gen_kwargs,
} }
results["git_hash"] = get_git_commit_hash() results["git_hash"] = get_git_commit_hash()
add_env_info(results) # additional environment info to results
return results return results
else: else:
return None return None
......
import copy import copy
import json import json
import logging import logging
import os
import re import re
from typing import Any, Dict, List, Literal, Tuple, Union import subprocess
from pathlib import Path
from typing import Any, Dict, List, Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from packaging.version import Version from packaging.version import Version
from torch.utils.collect_env import get_pretty_env_info
from transformers import __version__ as trans_version
from lm_eval import utils from lm_eval.utils import simple_parse_args_string
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -85,9 +90,7 @@ class WandbLogger: ...@@ -85,9 +90,7 @@ class WandbLogger:
results (Dict[str, Any]): The results dictionary. results (Dict[str, Any]): The results dictionary.
args (Any): Arguments for configuration. args (Any): Arguments for configuration.
""" """
self.wandb_args: Dict[str, Any] = utils.simple_parse_args_string( self.wandb_args: Dict[str, Any] = simple_parse_args_string(args.wandb_args)
args.wandb_args
)
# initialize a W&B run # initialize a W&B run
if wandb.run is None: if wandb.run is None:
...@@ -384,3 +387,55 @@ class WandbLogger: ...@@ -384,3 +387,55 @@ class WandbLogger:
self._log_samples_as_artifact(eval_preds, task_name) self._log_samples_as_artifact(eval_preds, task_name)
self.run.log({f"{group}_eval_results": grouped_df}) self.run.log({f"{group}_eval_results": grouped_df})
def get_commit_from_path(repo_path: Path) -> Optional[str]:
git_folder = Path(repo_path, ".git")
if git_folder.is_file():
git_folder = Path(
git_folder.parent,
git_folder.read_text(encoding="utf-8").split("\n")[0].split(" ")[-1],
)
if Path(git_folder, "HEAD").exists():
head_name = (
Path(git_folder, "HEAD")
.read_text(encoding="utf-8")
.split("\n")[0]
.split(" ")[-1]
)
head_ref = Path(git_folder, head_name)
git_hash = head_ref.read_text(encoding="utf-8").replace("\n", "")
else:
git_hash = None
return git_hash
def get_git_commit_hash():
"""
Gets the git commit hash of your current repo (if it exists).
Source: https://github.com/EleutherAI/gpt-neox/blob/b608043be541602170bfcfb8ec9bf85e8a0799e0/megatron/neox_arguments/neox_args.py#L42
"""
try:
git_hash = subprocess.check_output(["git", "describe", "--always"]).strip()
git_hash = git_hash.decode()
except (subprocess.CalledProcessError, FileNotFoundError):
# FileNotFoundError occurs when git not installed on system
git_hash = get_commit_from_path(os.getcwd()) # git hash of repo if exists
return git_hash
def add_env_info(storage: Dict[str, Any]):
try:
pretty_env_info = get_pretty_env_info()
except Exception as err:
pretty_env_info = str(err)
transformers_version = trans_version
upper_dir_commit = get_commit_from_path(
Path(os.getcwd(), "..")
) # git hash of upper repo if exists
added_info = {
"pretty_env_info": pretty_env_info,
"transformers_version": transformers_version,
"upper_git_hash": upper_dir_commit, # in case this repo is submodule
}
storage.update(added_info)
...@@ -5,16 +5,11 @@ import importlib.util ...@@ -5,16 +5,11 @@ import importlib.util
import inspect import inspect
import logging import logging
import os import os
import pathlib
import re import re
import subprocess
import sys import sys
from itertools import islice from itertools import islice
from typing import ( from pathlib import Path
Any, from typing import Any, Callable, List
Callable,
List,
)
import yaml import yaml
from jinja2 import BaseLoader, Environment, StrictUndefined from jinja2 import BaseLoader, Environment, StrictUndefined
...@@ -291,7 +286,7 @@ def positional_deprecated(fn): ...@@ -291,7 +286,7 @@ def positional_deprecated(fn):
@positional_deprecated @positional_deprecated
def find_test_root(start_path: pathlib.Path) -> pathlib.Path: def find_test_root(start_path: Path) -> Path:
""" """
Search upward in the directory tree to a maximum of three layers Search upward in the directory tree to a maximum of three layers
to find and return the package root (containing the 'tests' folder) to find and return the package root (containing the 'tests' folder)
...@@ -315,7 +310,7 @@ def run_task_tests(task_list: List[str]): ...@@ -315,7 +310,7 @@ def run_task_tests(task_list: List[str]):
""" """
import pytest import pytest
package_root = find_test_root(start_path=pathlib.Path(__file__)) package_root = find_test_root(start_path=Path(__file__))
task_string = " or ".join(task_list) task_string = " or ".join(task_list)
args = [ args = [
f"{package_root}/tests/test_version_stable.py", f"{package_root}/tests/test_version_stable.py",
...@@ -331,20 +326,6 @@ def run_task_tests(task_list: List[str]): ...@@ -331,20 +326,6 @@ def run_task_tests(task_list: List[str]):
) )
def get_git_commit_hash():
"""
Gets the git commit hash of your current repo (if it exists).
Source: https://github.com/EleutherAI/gpt-neox/blob/b608043be541602170bfcfb8ec9bf85e8a0799e0/megatron/neox_arguments/neox_args.py#L42
"""
try:
git_hash = subprocess.check_output(["git", "describe", "--always"]).strip()
git_hash = git_hash.decode()
except subprocess.CalledProcessError or FileNotFoundError:
# FileNotFoundError occurs when git not installed on system
git_hash = None
return git_hash
def ignore_constructor(loader, node): def ignore_constructor(loader, node):
return node return node
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment