import re from mathruler.grader import grade_answer def r1v_format_reward(predict_str: str) -> float: pattern = r".*?\s*.*?" match = re.fullmatch(pattern, predict_str, re.DOTALL) return 1.0 if match else 0.0 def r1v_accuracy_reward(predict_str: str, ground_truth: str) -> float: try: ground_truth = ground_truth.strip() content_match = re.search(r"(.*?)", predict_str) pred_answer = content_match.group(1).strip() if content_match else predict_str.strip() if grade_answer(pred_answer, ground_truth): return 1.0 except Exception: pass return 0.0 def r1v_compute_score(predict_str: str, ground_truth: str) -> float: acc_reward = r1v_accuracy_reward(predict_str, ground_truth) format_reward = r1v_format_reward(predict_str) reward = acc_reward + format_reward reward /= 2 return reward