"test/srt/test_mla_deepseek_v3.py" did not exist on "bb66cc4c52b1440a8e85247b706b2b3d645e902d"
convert_parquet_to_json.py 661 Bytes
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import sys

import pyarrow.parquet as pq


def convert_parquet_to_json(input_file, output_file):
    # read parquet file
    table = pq.read_table(input_file)

    # turn parquet data to dataframe
    df = table.to_pandas()

    # turn dataframe to json form
    json_data = df.to_json(orient="records", lines=True)

    # write json to file
    with open(output_file, "w") as f:
        f.write(json_data)


if __name__ == "__main__":
    if len(sys.argv) != 3:
        print("Usage:python convert_parquet_to_json.py <input_file> <output_file>")

    input_file = sys.argv[1]
    output_file = sys.argv[2]

    convert_parquet_to_json(input_file, output_file)