developing_transform.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\nataxcan\\miniconda3\\envs\\graphml_proj\\lib\\site-packages\\torch_geometric\\data\\in_memory_dataset.py:157: UserWarning: It is not recommended to directly access the internal storage format `data` of an 'InMemoryDataset'. If you are absolutely certain what you are doing, access the internal storage via `InMemoryDataset._data` instead to suppress this warning. Alternatively, you can access stacked individual attributes of every graph via `dataset.{attr_name}`.\n",
      "  warnings.warn(msg)\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "from torch_geometric.loader import DataLoader\n",
    "from datasets.PowerFlowData import PowerFlowData\n",
    "\n",
    "testset = PowerFlowData(root='data', case='14', split=[.5, .2, .3], task='test')\n",
    "test_loader = DataLoader(testset, batch_size=128, shuffle=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Data(x=[14, 9], edge_index=[2, 20], edge_attr=[20, 5], y=[14, 8])\n",
      "Data(x=[14, 16], edge_index=[2, 20], edge_attr=[20, 5], y=[14, 6])\n"
     ]
    }
   ],
   "source": [
    "# here you can experiment with the normalization function\n",
    "\n",
    "def transform(data):\n",
    "    ## selecting the right features\n",
    "    # for x\n",
    "    data.x[:,4] = data.x[:,4] - data.x[:,8] # Pd = Pd - Pg\n",
    "    # + 3 for the one-hot encoding for four node types, -2 because we remove the index and Pg\n",
    "    template = torch.zeros((data.x.shape[0], data.x.shape[1] + 3 - 2))\n",
    "    template[:,0:4] = torch.nn.functional.one_hot(data.x[:,1].type(torch.int64))\n",
    "    template[:,4:10] = data.x[:,2:8]\n",
    "    data.x = template\n",
    "    # for y\n",
    "    # - 2 for removing index and type\n",
    "    data.y = data.y[:,2:]\n",
    "\n",
    "    # SHAPE NOW: torch.Size([14, 10]) torch.Size([14, 6]) for x and y\n",
    "\n",
    "    ## normalizing\n",
    "    # for node attributes\n",
    "    xy = torch.concat([data.x[:,4:], data.y], dim=0)\n",
    "    mean = torch.mean(xy, dim=0).unsqueeze(dim=0).expand(data.x.shape[0], 6)# 6 for:\n",
    "    std = torch.std(xy, dim=0).unsqueeze(dim=0).expand(data.x.shape[0], 6)#   Vm, Va, Pd, Qd, Gs, Bs\n",
    "    data.x[:,4:] = (data.x[:,4:] - mean) / (std + 0.1) # + 0.1 to avoid NaN's because of division by zero\n",
    "    data.y = (data.y - mean) / (std + 0.1)\n",
    "    # for edge attributes\n",
    "    mean = torch.mean(data.edge_attr, dim=0).unsqueeze(dim=0).expand(data.edge_attr.shape[0], data.edge_attr.shape[1])\n",
    "    std = torch.std(data.edge_attr, dim=0).unsqueeze(dim=0).expand(data.edge_attr.shape[0], data.edge_attr.shape[1])\n",
    "    data.edge_attr = (data.edge_attr - mean) / (std + 0.1)\n",
    "\n",
    "    ## adding the mask\n",
    "    # where x and y are unequal, the network must predict\n",
    "    unequal = (data.x[:,4:] != data.y).float()\n",
    "    data.x = torch.concat([data.x, unequal], dim=1)\n",
    "\n",
    "    return data\n",
    "\n",
    "notnormed = PowerFlowData(root='data', case='14', split=[.5, .2, .3], task='test', normalize=False)\n",
    "data1 = notnormed[90]\n",
    "print(data1)\n",
    "print(transform(data1))\n",
    "\n",
    "# # print(xold.x[:,2:8]) # the old power values\n",
    "# # print(xold.y[:,2].float())\n",
    "# # print(xnew.x[:,4:10]) # the normalized ones\n",
    "# # print(xnew.x[:,10:]) # the mask to know which "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Data(x=[14, 9], edge_index=[2, 20], edge_attr=[20, 5], y=[14, 8])\n",
      "Data(x=[14, 16], edge_index=[2, 20], edge_attr=[20, 5], y=[14, 6])\n"
     ]
    }
   ],
   "source": [
    "# here you can see it in action\n",
    "\n",
    "notnormed = PowerFlowData(root='data', case='14', split=[.5, .2, .3], task='test', normalize=False)\n",
    "normed = PowerFlowData(root='data', case='14', split=[.5, .2, .3], task='test', normalize=True)\n",
    "data1 = notnormed[90]\n",
    "data2 = normed[90]\n",
    "\n",
    "print(data1)\n",
    "print(data2)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "graphml_proj",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.16"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}