"sgl-kernel/vscode:/vscode.git/clone" did not exist on "db7343c99201d58b7e3026c95050b245a1af640a"
np_utils.py 1.32 KB
Newer Older
wanglch's avatar
wanglch committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import Any, Dict, List, Tuple, Union

import numpy as np
from numpy import ndarray
from numpy.random import RandomState
from pandas import DataFrame
from torch import Tensor


def transform_jsonl_to_df(dict_list: List[Dict[str, Any]]) -> DataFrame:
    """Relevant function: `io_utils.read_from_jsonl()`"""
    data_dict: Dict[str, List[Any]] = {}
    for i, obj in enumerate(dict_list):
        for k, v in obj.items():
            if k not in data_dict:
                data_dict[k] = [None] * i
            data_dict[k].append(v)
        for k in set(data_dict.keys()) - set(obj.keys()):
            data_dict[k].append(None)
    return DataFrame.from_dict(data_dict)


def get_seed(random_state: RandomState) -> int:
    seed_max = np.iinfo(np.int32).max
    seed = random_state.randint(0, seed_max)
    return seed


def stat_array(array: Union[ndarray, List[int], Tensor]) -> Tuple[Dict[str, float], str]:
    if isinstance(array, list):
        array = np.array(array)
    mean = array.mean().item()
    std = array.std().item()
    min_ = array.min().item()
    max_ = array.max().item()
    size = array.shape[0]
    string = f'{mean:.6f}±{std:.6f}, min={min_:.6f}, max={max_:.6f}, size={size}'
    return {'mean': mean, 'std': std, 'min': min_, 'max': max_, 'size': size}, string