remove trust-remote-code in configs; fix escape sequences (#3180)

* remove trust-remote-code * add W605 rule

remove trust-remote-code in configs; fix escape sequences (#3180)
* remove trust-remote-code * add W605 rule
314f7176 · Baber Abbasi · GitHub · 8c6fde08 · 314f7176 · 314f7176
Unverified Commit 314f7176 authored Jul 23, 2025 by Baber Abbasi Committed by GitHub Jul 23, 2025
20 changed files
--- a/lm_eval/tasks/hrm8k/default/utils.py
+++ b/lm_eval/tasks/hrm8k/default/utils.py
@@ -111,7 +111,7 @@ def parse_math_answer(raw_string):
        return retval
    def get_answer_with_dollar_sign(s):
-        first_pattern = "\$(.*)\$"
+        first_pattern = r"\$(.*)\$"
        last_match = None
        matches = re.findall(first_pattern, s)
        if matches:
@@ -127,7 +127,7 @@ def parse_math_answer(raw_string):
            if "\\n" in last_match:
                last_match = last_match.split("\\n")[0]
        else:
-            pattern = "(?:\\$)?\d+(?:\.\d+)?(?![\w\d])"
+            pattern = "(?:\\$)?\\d+(?:\\.\\d+)?(?![\\w\\d])"
            matches = re.findall(pattern, s)
            if matches:
                last_match = matches[-1]
@@ -250,7 +250,7 @@ def _strip_string(string):
    # remove percentage
    string = string.replace("\\%", "")
-    string = string.replace("\%", "")
+    string = string.replace(r"\%", "")
    # " 0." equivalent to " ." and "{0." equivalent to "{." Alternatively, add "0" if "." is the start of the string
    string = string.replace(" .", " 0.")

--- a/lm_eval/tasks/hrm8k/en/utils.py
+++ b/lm_eval/tasks/hrm8k/en/utils.py
@@ -111,7 +111,7 @@ def parse_math_answer(raw_string):
        return retval
    def get_answer_with_dollar_sign(s):
-        first_pattern = "\$(.*)\$"
+        first_pattern = r"\$(.*)\$"
        last_match = None
        matches = re.findall(first_pattern, s)
        if matches:
@@ -127,7 +127,7 @@ def parse_math_answer(raw_string):
            if "\\n" in last_match:
                last_match = last_match.split("\\n")[0]
        else:
-            pattern = "(?:\\$)?\d+(?:\.\d+)?(?![\w\d])"
+            pattern = "(?:\\$)?\\d+(?:\\.\\d+)?(?![\\w\\d])"
            matches = re.findall(pattern, s)
            if matches:
                last_match = matches[-1]
@@ -250,7 +250,7 @@ def _strip_string(string):
    # remove percentage
    string = string.replace("\\%", "")
-    string = string.replace("\%", "")
+    string = string.replace(r"\%", "")
    # " 0." equivalent to " ." and "{0." equivalent to "{." Alternatively, add "0" if "." is the start of the string
    string = string.replace(" .", " 0.")

--- a/lm_eval/tasks/inverse_scaling/inverse_scaling_winobias_antistereotype.yaml
+++ b/lm_eval/tasks/inverse_scaling/inverse_scaling_winobias_antistereotype.yaml
@@ -14,7 +14,5 @@ metric_list:
  - metric: acc_norm
    aggregation: mean
    higher_is_better: true
-dataset_kwargs:
-  trust_remote_code: true
 metadata:
  version: 0
--- a/lm_eval/tasks/kobest/kobest_sentineg.yaml
+++ b/lm_eval/tasks/kobest/kobest_sentineg.yaml
@@ -19,5 +19,3 @@ metric_list:
    higher_is_better: True
 metadata:
  version: 1.0
-dataset_kwargs:
-  trust_remote_code: true
--- a/lm_eval/tasks/kobest/kobest_wic.yaml
+++ b/lm_eval/tasks/kobest/kobest_wic.yaml
@@ -19,5 +19,3 @@ metric_list:
    higher_is_better: True
 metadata:
  version: 1.0
-dataset_kwargs:
-  trust_remote_code: true
--- a/lm_eval/tasks/kormedmcqa/_template_yaml
+++ b/lm_eval/tasks/kormedmcqa/_template_yaml
@@ -29,5 +29,3 @@ generation_kwargs:
  max_gen_toks: 1024
 metadata:
  version: 2.0
-dataset_kwargs:
-  trust_remote_code: true
--- a/lm_eval/tasks/lambada/lambada_openai.yaml
+++ b/lm_eval/tasks/lambada/lambada_openai.yaml
@@ -18,5 +18,3 @@ metric_list:
    higher_is_better: true
 metadata:
  version: 1.0
-dataset_kwargs:
-  trust_remote_code: true
--- a/lm_eval/tasks/leaderboard/math/_template_yaml
+++ b/lm_eval/tasks/leaderboard/math/_template_yaml
@@ -22,8 +22,6 @@ metric_list:
 num_fewshot: 4
 metadata:
  version: 3.0
-dataset_kwargs:
-  trust_remote_code: true
 fewshot_config:
  sampler: first_n
  samples: !function utils.list_fewshot_samples
--- a/lm_eval/tasks/llama3/instruct/mmlu/_continuation_template_yaml
+++ b/lm_eval/tasks/llama3/instruct/mmlu/_continuation_template_yaml
@@ -29,5 +29,3 @@ filter_list:
      - function: take_first
 metadata:
  version: 1.0
-dataset_kwargs:
-  trust_remote_code: true
--- a/lm_eval/tasks/llama3/instruct/mmlu_cot/_mmlu_cot_llama_template_yaml
+++ b/lm_eval/tasks/llama3/instruct/mmlu_cot/_mmlu_cot_llama_template_yaml
@@ -23,6 +23,4 @@ metric_list:
    ignore_punctuation: true
 metadata:
  version: 1.0
-dataset_kwargs:
-  trust_remote_code: true
 num_fewshot: 0
--- a/lm_eval/tasks/llama3/instruct/mmlu_de/_continuation_template_yaml
+++ b/lm_eval/tasks/llama3/instruct/mmlu_de/_continuation_template_yaml
@@ -28,5 +28,3 @@ filter_list:
      - function: take_first
 metadata:
  version: 1.0
-dataset_kwargs:
-  trust_remote_code: true
--- a/lm_eval/tasks/llama3/instruct/mmlu_es/_continuation_template_yaml
+++ b/lm_eval/tasks/llama3/instruct/mmlu_es/_continuation_template_yaml
@@ -28,5 +28,3 @@ filter_list:
      - function: take_first
 metadata:
  version: 1.0
-dataset_kwargs:
-  trust_remote_code: true
--- a/lm_eval/tasks/llama3/instruct/mmlu_fr/_continuation_template_yaml
+++ b/lm_eval/tasks/llama3/instruct/mmlu_fr/_continuation_template_yaml
@@ -28,5 +28,3 @@ filter_list:
      - function: take_first
 metadata:
  version: 1.0
-dataset_kwargs:
-  trust_remote_code: true
--- a/lm_eval/tasks/llama3/instruct/mmlu_hi/_continuation_template_yaml
+++ b/lm_eval/tasks/llama3/instruct/mmlu_hi/_continuation_template_yaml
@@ -28,5 +28,3 @@ filter_list:
      - function: take_first
 metadata:
  version: 1.0
-dataset_kwargs:
-  trust_remote_code: true
--- a/lm_eval/tasks/llama3/instruct/mmlu_it/_continuation_template_yaml
+++ b/lm_eval/tasks/llama3/instruct/mmlu_it/_continuation_template_yaml
@@ -28,5 +28,3 @@ filter_list:
      - function: take_first
 metadata:
  version: 1.0
-dataset_kwargs:
-  trust_remote_code: true
--- a/lm_eval/tasks/llama3/instruct/mmlu_pro/_default_template_yaml
+++ b/lm_eval/tasks/llama3/instruct/mmlu_pro/_default_template_yaml
@@ -31,5 +31,3 @@ filter_list:
        - function: take_first
 metadata:
  version: 1.0
-dataset_kwargs:
-  trust_remote_code: true
--- a/lm_eval/tasks/llama3/instruct/mmlu_pt/_continuation_template_yaml
+++ b/lm_eval/tasks/llama3/instruct/mmlu_pt/_continuation_template_yaml
@@ -28,5 +28,3 @@ filter_list:
      - function: take_first
 metadata:
  version: 1.0
-dataset_kwargs:
-  trust_remote_code: true
--- a/lm_eval/tasks/llama3/instruct/mmlu_th/_continuation_template_yaml
+++ b/lm_eval/tasks/llama3/instruct/mmlu_th/_continuation_template_yaml
@@ -28,5 +28,3 @@ filter_list:
      - function: take_first
 metadata:
  version: 1.0
-dataset_kwargs:
-  trust_remote_code: true
--- a/lm_eval/tasks/logiqa/logiqa.yaml
+++ b/lm_eval/tasks/logiqa/logiqa.yaml
@@ -19,5 +19,3 @@ metric_list:
    higher_is_better: true
 metadata:
  version: 1.0
-dataset_kwargs:
-  trust_remote_code: true
--- a/lm_eval/tasks/logiqa2/logieval.yaml
+++ b/lm_eval/tasks/logiqa2/logieval.yaml
@@ -25,5 +25,3 @@ filter_list:
      - function: "take_first"
 metadata:
  version: 0.0
-dataset_kwargs:
-  trust_remote_code: true