developing_transform.ipynb 4.81 KB
Newer Older
zhangwq5's avatar
all  
zhangwq5 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\nataxcan\\miniconda3\\envs\\graphml_proj\\lib\\site-packages\\torch_geometric\\data\\in_memory_dataset.py:157: UserWarning: It is not recommended to directly access the internal storage format `data` of an 'InMemoryDataset'. If you are absolutely certain what you are doing, access the internal storage via `InMemoryDataset._data` instead to suppress this warning. Alternatively, you can access stacked individual attributes of every graph via `dataset.{attr_name}`.\n",
      "  warnings.warn(msg)\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "from torch_geometric.loader import DataLoader\n",
    "from datasets.PowerFlowData import PowerFlowData\n",
    "\n",
    "testset = PowerFlowData(root='data', case='14', split=[.5, .2, .3], task='test')\n",
    "test_loader = DataLoader(testset, batch_size=128, shuffle=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Data(x=[14, 9], edge_index=[2, 20], edge_attr=[20, 5], y=[14, 8])\n",
      "Data(x=[14, 16], edge_index=[2, 20], edge_attr=[20, 5], y=[14, 6])\n"
     ]
    }
   ],
   "source": [
    "# here you can experiment with the normalization function\n",
    "\n",
    "def transform(data):\n",
    "    ## selecting the right features\n",
    "    # for x\n",
    "    data.x[:,4] = data.x[:,4] - data.x[:,8] # Pd = Pd - Pg\n",
    "    # + 3 for the one-hot encoding for four node types, -2 because we remove the index and Pg\n",
    "    template = torch.zeros((data.x.shape[0], data.x.shape[1] + 3 - 2))\n",
    "    template[:,0:4] = torch.nn.functional.one_hot(data.x[:,1].type(torch.int64))\n",
    "    template[:,4:10] = data.x[:,2:8]\n",
    "    data.x = template\n",
    "    # for y\n",
    "    # - 2 for removing index and type\n",
    "    data.y = data.y[:,2:]\n",
    "\n",
    "    # SHAPE NOW: torch.Size([14, 10]) torch.Size([14, 6]) for x and y\n",
    "\n",
    "    ## normalizing\n",
    "    # for node attributes\n",
    "    xy = torch.concat([data.x[:,4:], data.y], dim=0)\n",
    "    mean = torch.mean(xy, dim=0).unsqueeze(dim=0).expand(data.x.shape[0], 6)# 6 for:\n",
    "    std = torch.std(xy, dim=0).unsqueeze(dim=0).expand(data.x.shape[0], 6)#   Vm, Va, Pd, Qd, Gs, Bs\n",
    "    data.x[:,4:] = (data.x[:,4:] - mean) / (std + 0.1) # + 0.1 to avoid NaN's because of division by zero\n",
    "    data.y = (data.y - mean) / (std + 0.1)\n",
    "    # for edge attributes\n",
    "    mean = torch.mean(data.edge_attr, dim=0).unsqueeze(dim=0).expand(data.edge_attr.shape[0], data.edge_attr.shape[1])\n",
    "    std = torch.std(data.edge_attr, dim=0).unsqueeze(dim=0).expand(data.edge_attr.shape[0], data.edge_attr.shape[1])\n",
    "    data.edge_attr = (data.edge_attr - mean) / (std + 0.1)\n",
    "\n",
    "    ## adding the mask\n",
    "    # where x and y are unequal, the network must predict\n",
    "    unequal = (data.x[:,4:] != data.y).float()\n",
    "    data.x = torch.concat([data.x, unequal], dim=1)\n",
    "\n",
    "    return data\n",
    "\n",
    "notnormed = PowerFlowData(root='data', case='14', split=[.5, .2, .3], task='test', normalize=False)\n",
    "data1 = notnormed[90]\n",
    "print(data1)\n",
    "print(transform(data1))\n",
    "\n",
    "# # print(xold.x[:,2:8]) # the old power values\n",
    "# # print(xold.y[:,2].float())\n",
    "# # print(xnew.x[:,4:10]) # the normalized ones\n",
    "# # print(xnew.x[:,10:]) # the mask to know which "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Data(x=[14, 9], edge_index=[2, 20], edge_attr=[20, 5], y=[14, 8])\n",
      "Data(x=[14, 16], edge_index=[2, 20], edge_attr=[20, 5], y=[14, 6])\n"
     ]
    }
   ],
   "source": [
    "# here you can see it in action\n",
    "\n",
    "notnormed = PowerFlowData(root='data', case='14', split=[.5, .2, .3], task='test', normalize=False)\n",
    "normed = PowerFlowData(root='data', case='14', split=[.5, .2, .3], task='test', normalize=True)\n",
    "data1 = notnormed[90]\n",
    "data2 = normed[90]\n",
    "\n",
    "print(data1)\n",
    "print(data2)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "graphml_proj",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}