"src/vscode:/vscode.git/clone" did not exist on "96cd2ee6533a13d2d7a74039380cfe615a3c5b13"
csv.py 1.26 KB
Newer Older
1
import logging
2

3
4
5
import pandas as pd
import pyarrow
import pyarrow.csv
6

7
8
from .registry import register_array_parser

9

10
11
@register_array_parser("csv")
class CSVArrayParser(object):
12
    def __init__(self, delimiter=","):
13
14
15
        self.delimiter = delimiter

    def read(self, path):
16
17
18
19
        logging.info(
            "Reading from %s using CSV format with configuration %s"
            % (path, self.__dict__)
        )
20
21
22
        # do not read the first line as header
        read_options = pyarrow.csv.ReadOptions(autogenerate_column_names=True)
        parse_options = pyarrow.csv.ParseOptions(delimiter=self.delimiter)
23
24
25
26
        arr = pyarrow.csv.read_csv(
            path, read_options=read_options, parse_options=parse_options
        )
        logging.info("Done reading from %s" % path)
27
28
29
        return arr.to_pandas().to_numpy()

    def write(self, path, arr):
30
31
32
33
34
35
36
        logging.info(
            "Writing to %s using CSV format with configuration %s"
            % (path, self.__dict__)
        )
        write_options = pyarrow.csv.WriteOptions(
            include_header=False, delimiter=self.delimiter
        )
37
38
        arr = pyarrow.Table.from_pandas(pd.DataFrame(arr))
        pyarrow.csv.write_csv(arr, path, write_options=write_options)
39
        logging.info("Done writing to %s" % path)