make_hres_input.py 3.56 KB
Newer Older
tpys's avatar
tpys committed
1
2
3
4
5
6
import os
import numpy as np
import pandas as pd
import xarray as xr


tpys's avatar
tpys committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
"""
FuXi模型的输入变量:
    5个气压层变量: ['Z', 'T', 'U', 'V', 'R'], 
    每个变量包含13层: [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000], 
    5个地面变量: ['T2M', 'U10', 'V10', 'MSL', 'TP'];
    
注意事项:
    1. 输入是连续的两个历史时刻, 间隔6小时, 分辨率是0.25; eg: [00, 06]做为输入那么起报时刻是06点;
    2. Z不是Geopential Height, 是 Geopential;
    3. 降水是6小时累积单位mm, 第一个时刻的降水可以置0;
    4. 温度是开尔文单位;
    5. R表示相对湿度;
    6. 纬度方向是90 ~ -90;
    7. 气压层的顺序是从高空到地面50 ~ 1000; eg: Z50, Z100, ... , Z1000;
    8. 数据中不能有NAN;
"""

tpys's avatar
tpys committed
24
def make_hres_input(init_time, data_dir, save_dir, degree=0.25):
tpys's avatar
tpys committed
25
    lat = np.linspace(-90, 90, int(180 / degree) + 1, dtype=np.float32)
tpys's avatar
tpys committed
26
27
    lon = np.arange(0, 360, degree, dtype=np.float32)

tpys's avatar
tpys committed
28
29
    pl_names = ["z", "t", "u", "v", "r"]
    sfc_names = ["t2m", "u10", "v10", "msl", "tp"]
tpys's avatar
tpys committed
30
    levels = [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000]
tpys's avatar
tpys committed
31

tpys's avatar
tpys committed
32
33
34
35
    input = []
    level = []

    for name in pl_names + sfc_names:
tpys's avatar
tpys committed
36
        src_name = "{}_{}".format(name, init_time.strftime("%Y%m%d%H.nc"))
tpys's avatar
tpys committed
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
        src_file = os.path.join(data_dir, src_name)

        if not os.path.exists(src_file):
            return

        try:
            v = xr.open_dataset(src_file)
            v = v.sel(time=init_time, drop=True).data
        except:
            print(f"open {src_file} failed")
            return

        # is there nan in raw data ?
        if np.isnan(v).sum() > 0:
            print(f"{src_name} has nan value")
            return

        # interpolate to 0.25 deg
        v = v.interp(lat=lat, lon=lon, kwargs={"fill_value": "extrapolate"})

        # make sure on nan
        if np.isnan(v).sum() > 0:
            print(f"{src_name} has nan value")
            return

        # reverse pressure level
        try:
            if name in pl_names:
tpys's avatar
tpys committed
65
66
                v = xr.concat([v.sel(level=l) for l in levels], "level")
                level.extend([f"{name}{l}" for l in levels])
tpys's avatar
tpys committed
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
        except:
            print("missing pressure level")
            return

        if name in sfc_names:
            level.append(name)

        # temperature in kelvin
        if name == "t":
            v = v + 273.15

        # FuXi take two step as input
        if name == "tp":
            v = v.clip(min=0, max=1000)
            zero = v * 0
            zero = zero.assign_coords(dtime=[0])
            v = xr.concat([zero, v], "dtime")

tpys's avatar
tpys committed
85
        print(f"{src_name}: {v.min().values:.2f} ~ {v.max().values:.2f}")
tpys's avatar
tpys committed
86
87

        v.attrs = {}
tpys's avatar
tpys committed
88
89
        v = v.rename({"dtime": "time"})
        v = v.squeeze("member").drop("member")
tpys's avatar
tpys committed
90
91
92
93
94
95
96
97
98
99
100
        input.append(v)

    # concat and reshape
    input = xr.concat(input, "level")
    input = input.transpose("time", "level", "lat", "lon")
    valid_time = init_time + pd.Timedelta(hours=6)  # utc time
    v = v.assign_coords(time=[init_time, valid_time])

    # reverse latitude
    input = input.reindex(lat=input.lat[::-1])
    input = input.assign_coords(level=level)
tpys's avatar
tpys committed
101
    input.name = "data"
tpys's avatar
tpys committed
102
103
104
105
106
107
108
109
110
111
112
113
114
115

    # save to nc
    print(input)
    save_name = os.path.join(save_dir, init_time.strftime("%Y%m%d-%H.nc"))
    input = input.astype(np.float32)
    input.to_netcdf(save_name)


def test_make_input():
    init_time = pd.to_datetime("20230731-12")  # must utc
    data_dir = "data/HRES"
    save_dir = "data/HRES/input"
    os.makedirs(save_dir, exist_ok=True)
    make_hres_input(init_time, data_dir, save_dir)