Log `fewshot_as_multiturn` in results files (#1995)

* log fewshot_as_multiturn in general tracker args * Update evaluator.py --------- Co-authored-by: Lintang Sutawika <lintang@eleuther.ai>

Log `fewshot_as_multiturn` in results files (#1995)
* log fewshot_as_multiturn in general tracker args * Update evaluator.py --------- Co-authored-by: Lintang Sutawika <lintang@eleuther.ai>
78a54e14 · Hailey Schoelkopf · GitHub · ead2964e · 78a54e14 · 78a54e14
Unverified Commit 78a54e14 authored Jun 19, 2024 by Hailey Schoelkopf Committed by GitHub Jun 19, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 0 deletions

lm_eval/evaluator.py lm_eval/evaluator.py +1 -0

lm_eval/loggers/evaluation_tracker.py lm_eval/loggers/evaluation_tracker.py +3 -0

No files found.
--- a/lm_eval/evaluator.py
+++ b/lm_eval/evaluator.py
@@ -271,6 +271,7 @@ def simple_evaluate(
            model_args=model_args,
            system_instruction=system_instruction,
            chat_template=lm.chat_template if apply_chat_template else None,
+            fewshot_as_multiturn=fewshot_as_multiturn,
        )

    results = evaluate(

--- a/lm_eval/loggers/evaluation_tracker.py
+++ b/lm_eval/loggers/evaluation_tracker.py
@@ -48,6 +48,7 @@ class GeneralConfigTracker:
    model_name_sanitized: str = None
    system_instruction: str = None
    system_instruction_sha: str = None
+    fewshot_as_multiturn: bool = None
    chat_template: str = None
    chat_template_sha: str = None
    start_time: float = None
@@ -80,6 +81,7 @@ class GeneralConfigTracker:
        model_args: str,
        system_instruction: str,
        chat_template: str,
+        fewshot_as_multiturn: bool,
    ) -> None:
        """Logs model parameters and job ID."""
        self.model_source = model_source
@@ -91,6 +93,7 @@ class GeneralConfigTracker:
        )
        self.chat_template = chat_template
        self.chat_template_sha = hash_string(chat_template) if chat_template else None
+        self.fewshot_as_multiturn = fewshot_as_multiturn

    def log_end_time(self) -> None:
        """Logs the end time of the evaluation and calculates the total evaluation time."""