# Convert Parquet to JSON (AIME) import pandas as pd ''' # Specify the Parquet file path # link: https://huggingface.co/datasets/AI-MO/aimo-validation-aime parquet_file = "./train-00000-of-00001.parquet" # Use pandas to read the Parquet file df = pd.read_parquet(parquet_file) # Filter the DataFrame to keep only rows where '2024_AIME' appears in the 'url' column filtered_df = df[df['url'].str.contains('2024_AIME', na=False)] # Print the first few rows of the filtered DataFrame to confirm print(filtered_df.head()) # Export to a JSON file with indentation json_file = "./aime_2024.json" filtered_df.to_json(json_file, orient='records', force_ascii=False, indent=4) print(f"Filtered data has been saved to {json_file}") ''' # Data preprocess for AIME import csv import json from tqdm import tqdm test_path = './aime_2024.json' output_path = './test.json' data_list = [] with open(test_path, 'r', encoding='utf-8') as file: data = json.load(file) for id, line in enumerate(tqdm(data)): data_list.append({ 'id': id, 'Question': line['problem'], 'Solution': line['solution'], 'answer': str(int(line['answer'])), }) # Write the updated data to JSON with open(output_path, mode='w', encoding='utf-8') as json_file: json.dump(data_list, json_file, indent=4, ensure_ascii=False)