Commit fba8bde8 authored by bailuo's avatar bailuo
Browse files

update

parents
Pipeline #1808 failed with stages
# coding: utf8
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
TEST_PATH = os.path.join(LOCAL_PATH, "../../../", "test")
sys.path.append(TEST_PATH)
from paddleseg.utils.download import download_file_and_uncompress
model_urls = {
"pphumanseg_lite_portrait_398x224_with_softmax":
"https://paddleseg.bj.bcebos.com/dygraph/ppseg/ppseg_lite_portrait_398x224_with_softmax.tar.gz",
"deeplabv3p_resnet50_os8_humanseg_512x512_100k_with_softmax":
"https://paddleseg.bj.bcebos.com/dygraph/humanseg/export/deeplabv3p_resnet50_os8_humanseg_512x512_100k_with_softmax.zip",
"fcn_hrnetw18_small_v1_humanseg_192x192_with_softmax":
"https://paddleseg.bj.bcebos.com/dygraph/humanseg/export/fcn_hrnetw18_small_v1_humanseg_192x192_with_softmax.zip",
"pphumanseg_lite_generic_humanseg_192x192_with_softmax":
"https://paddleseg.bj.bcebos.com/dygraph/humanseg/export/pphumanseg_lite_generic_192x192_with_softmax.zip",
}
if __name__ == "__main__":
for model_name, url in model_urls.items():
download_file_and_uncompress(
url=url,
savepath=LOCAL_PATH,
extrapath=LOCAL_PATH,
extraname=model_name)
print("Export model download success!")
# coding: utf8
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
TEST_PATH = os.path.join(LOCAL_PATH, "../../../", "test")
sys.path.append(TEST_PATH)
from paddleseg.utils.download import download_file_and_uncompress
model_urls = {
"pphumanseg_lite_portrait_398x224":
"https://paddleseg.bj.bcebos.com/dygraph/ppseg/ppseg_lite_portrait_398x224.tar.gz",
"deeplabv3p_resnet50_os8_humanseg_512x512_100k":
"https://paddleseg.bj.bcebos.com/dygraph/humanseg/train/deeplabv3p_resnet50_os8_humanseg_512x512_100k.zip",
"fcn_hrnetw18_small_v1_humanseg_192x192":
"https://paddleseg.bj.bcebos.com/dygraph/humanseg/train/fcn_hrnetw18_small_v1_humanseg_192x192.zip",
"pphumanseg_lite_generic_human_192x192":
"https://paddleseg.bj.bcebos.com/dygraph/humanseg/train/pphumanseg_lite_generic_192x192.zip",
}
if __name__ == "__main__":
for model_name, url in model_urls.items():
download_file_and_uncompress(
url=url,
savepath=LOCAL_PATH,
extrapath=LOCAL_PATH,
extraname=model_name)
print("Pretrained model download success!")
# StyleGAN-Human: A Data-Centric Odyssey of Human Generation
<img src="./img/demo_V5_thumbnails-min.png" width="96%" height="96%">
<!--
**stylegan-human/StyleGAN-Human** is a ✨ _special_ ✨ repository because its `README.md` (this file) appears on your GitHub profile.
-->
>
>
> **Abstract:** *Unconditional human image generation is an important task in vision and graphics, which enables various applications in the creative industry. Existing studies in this field mainly focus on "network engineering" such as designing new components and objective functions. This work takes a data-centric perspective and investigates multiple critical aspects in "data engineering", which we believe would complement the current practice. To facilitate a comprehensive study, we collect and annotate a large-scale human image dataset with over 230K samples capturing diverse poses and textures. Equipped with this large dataset, we rigorously investigate three essential factors in data engineering for StyleGAN-based human generation, namely data size, data distribution, and data alignment. Extensive experiments reveal several valuable observations w.r.t. these aspects: 1) Large-scale data, more than 40K images, are needed to train a high-fidelity unconditional human generation model with vanilla StyleGAN. 2) A balanced training set helps improve the generation quality with rare face poses compared to the long-tailed counterpart, whereas simply balancing the clothing texture distribution does not effectively bring an improvement. 3) Human GAN models with body centers for alignment outperform models trained using face centers or pelvis points as alignment anchors. In addition, a model zoo and human editing applications are demonstrated to facilitate future research in the community.* <br>
**Keyword:** Human Image Generation, Data-Centric, StyleGAN
[Jianglin Fu](mailto:fujianglin@sensetime.com), [Shikai Li](mailto:lishikai@sensetime.com), [Yuming Jiang](https://yumingj.github.io/), [Kwan-Yee Lin](https://kwanyeelin.github.io/), [Chen Qian](https://scholar.google.com/citations?user=AerkT0YAAAAJ&hl=zh-CN), [Chen Change Loy](https://www.mmlab-ntu.com/person/ccloy/), [Wayne Wu](https://wywu.github.io/), and [Ziwei Liu](https://liuziwei7.github.io/) <br>
**[[Demo Video]](https://youtu.be/nIrb9hwsdcI)** | **[[Project Page]](https://stylegan-human.github.io/)** | **[[Paper]](https://arxiv.org/pdf/2204.11823.pdf)**
## Updates
- [20/07/2022] [SHHQ-1.0](./docs/Dataset.md) dataset with 40K images is released! :sparkles:
- [15/06/2022] Data alignment and real-image inversion scripts are released.
- [26/04/2022] Technical report released!
- [22/04/2022] Technical report will be released before May.
- [21/04/2022] The codebase and project page are created.
## Data Download
The first version SHHQ-1.0, with 40K images is released. To download and use the dataset set, please read the instructions in [Dataset.md](./docs/Dataset.md)
(We are currently facing large incoming applications, and we need to carefully verify all the applicants, please be patient, and we will reply to you as soon as possible.)
## Model Zoo
| Structure | 1024x512 | Metric | Scores | 512x256 | Metric | Scores |
| --------- |:----------:| :----------:| :----------:| :-----: | :-----: | :-----: |
| StyleGAN1 |[stylegan_human_v1_1024.pkl](https://drive.google.com/file/d/1h-R-IV-INGdPEzj4P9ml6JTEvihuNgLX/view?usp=sharing)| fid50k | 3.79 | to be released | - | - |
| StyleGAN2 |[stylegan_human_v2_1024.pkl](https://drive.google.com/file/d/1FlAb1rYa0r_--Zj_ML8e6shmaF28hQb5/view?usp=sharing)| fid50k_full | 1.57 |[stylegan_human_v2_512.pkl](https://drive.google.com/file/d/1dlFEHbu-WzQWJl7nBBZYcTyo000H9hVm/view?usp=sharing) | fid50k_full | 1.97 |
| StyleGAN3 |to be released | - | - | [stylegan_human_v3_512.pkl](https://drive.google.com/file/d/1_274jk_N6WSCkKWeu7hjHycqGvbuOFf5/view?usp=sharing) | fid50k_full | 2.54 |
## Web Demo
Integrated into [Huggingface Spaces 🤗](https://huggingface.co/spaces) using [Gradio](https://github.com/gradio-app/gradio). Try out the Web Demo for generation: [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/hysts/StyleGAN-Human) and interpolation [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/hysts/StyleGAN-Human-Interpolation)
<a href="https://colab.research.google.com/drive/1sgxoDM55iM07FS54vz9ALg1XckiYA2On"><img src="https://colab.research.google.com/assets/colab-badge.svg" height=22.5></a>
We prepare a Colab demo to allow you to synthesize images with the provided models, as well as visualize the performance of style-mixing, interpolation, and attributes editing.
The notebook will guide you to install the necessary environment and download pretrained models. The output images can be found in `./StyleGAN-Human/outputs/`.
Hope you enjoy!
## Usage
### System requirements
* The original code bases are [stylegan (tensorflow)](https://github.com/NVlabs/stylegan), [stylegan2-ada (pytorch)](https://github.com/NVlabs/stylegan2-ada-pytorch), [stylegan3 (pytorch)](https://github.com/NVlabs/stylegan3), released by NVidia
* We tested in Python 3.8.5 and PyTorch 1.9.1 with CUDA 11.1. (See https://pytorch.org for PyTorch install instructions.)
### Installation
To work with this project on your own machine, you need to install the environmnet as follows:
```
conda env create -f environment.yml
conda activate stylehuman
# [Optional: tensorflow 1.x is required for StyleGAN1. ]
pip install nvidia-pyindex
pip install nvidia-tensorflow[horovod]
pip install nvidia-tensorboard==1.15
```
Extra notes:
1. In case having some conflicts when calling CUDA version, please try to empty the LD_LIBRARY_PATH. For example:
```
LD_LIBRARY_PATH=; python generate.py --outdir=out/stylegan_human_v2_1024 --trunc=1 --seeds=1,3,5,7
--network=pretrained_models/stylegan_human_v2_1024.pkl --version 2
```
2. We found the following troubleshooting links might be helpful: [1.](https://github.com/NVlabs/stylegan3), [2.](https://github.com/NVlabs/stylegan3/blob/main/docs/troubleshooting.md)
### Train
The training scripts are based on the original [stylegan1](https://github.com/NVlabs/stylegan), [stylegan2-ada](https://github.com/NVlabs/stylegan2-ada-pytorch), and [stylegan3](https://github.com/NVlabs/stylegan3) with minor changes. Here we only provide the scripts with modifications for SG2 and SG3. You can replace the old files with the provided scripts to train. (assume SHHQ-1.0 is placed under data/)
#### Train Stylegan2-ada-pytorch with SHHQ-1.0
```
python train.py --outdir=training_results/sg2/ --data=data/SHHQ-1.0/ \
--gpus=8 --aug=noaug --mirror=1 --snap=250 --cfg=shhq --square=False
```
#### Train Stylegan3 with SHHQ-1.0
```
python train.py --outdir=training_results/sg3/ --cfg=stylegan3-r --gpus=8 --batch=32 --gamma=12.4 \
--mirror=1 --aug=noaug --data=data/SHHQ-1.0/ --square=False --snap=250
```
### Pretrained models
Please put the downloaded pretrained models [from above link](#Model-Zoo) under the folder 'pretrained_models'.
### Generate full-body human images using our pretrained model
```
# Generate human full-body images without truncation
python generate.py --outdir=outputs/generate/stylegan_human_v2_1024 --trunc=1 --seeds=1,3,5,7 --network=pretrained_models/stylegan_human_v2_1024.pkl --version 2
# Generate human full-body images with truncation
python generate.py --outdir=outputs/generate/stylegan_human_v2_1024 --trunc=0.8 --seeds=0-10 --network=pretrained_models/stylegan_human_v2_1024.pkl --version 2
# Generate human full-body images using stylegan V1
python generate.py --outdir=outputs/generate/stylegan_human_v1_1024 --network=pretrained_models/stylegan_human_v1_1024.pkl --version 1 --seeds=1,3,5
# Generate human full-body images using stylegan V3
python generate.py --outdir=outputs/generate/stylegan_human_v3_512 --network=pretrained_models/stylegan_human_v3_512.pkl --version 3 --seeds=1,3,5
```
#### Note: The following demos are generated based on models related to StyleGAN V2 (stylegan_human_v2_512.pkl and stylegan_human_v2_1024.pkl). If you want to see results for V1 or V3, you need to change the loading method of the corresponding models.
### Interpolation
```
python interpolation.py --network=pretrained_models/stylegan_human_v2_1024.pkl --seeds=85,100 --outdir=outputs/inter_gifs
```
### Style-mixing **image** using stylegan2
```
python style_mixing.py --network=pretrained_models/stylegan_human_v2_1024.pkl --rows=85,100,75,458,1500 \\
--cols=55,821,1789,293 --styles=0-3 --outdir=outputs/stylemixing
```
### Style-mixing **video** using stylegan2
```
python stylemixing_video.py --network=pretrained_models/stylegan_human_v2_1024.pkl --row-seed=3859 \\
--col-seeds=3098,31759,3791 --col-styles=8-12 --trunc=0.8 --outdir=outputs/stylemixing_video
```
### Aligned raw images
For alignment, we use [openpose-pytorch](https://github.com/Hzzone/pytorch-openpose) for body-keypoints detection and [PaddlePaddle](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.5/contrib/PP-HumanSeg) for human segmentation.
Before running the alignment script, few models need to be installed:
1. download [body_pose_model.pth](https://drive.google.com/drive/folders/1JsvI4M4ZTg98fmnCZLFM-3TeovnCRElG?usp=sharing) and place it into openpose/model/.
2. download and extract [deeplabv3p_resnet50_os8_humanseg_512x512_100k_with_softmax](https://paddleseg.bj.bcebos.com/dygraph/humanseg/export/deeplabv3p_resnet50_os8_humanseg_512x512_100k_with_softmax.zip) into PP_HumanSeg/export_model/deeplabv3p_resnet50_os8_humanseg_512x512_100k_with_softmax.
3. download and extract [deeplabv3p_resnet50_os8_humanseg_512x512_100k](https://paddleseg.bj.bcebos.com/dygraph/humanseg/train/deeplabv3p_resnet50_os8_humanseg_512x512_100k.zip) into PP_HumanSeg/pretrained_model/deeplabv3p_resnet50_os8_humanseg_512x512_100k.
4. install paddlepaddel: ``` pip install paddleseg ```
Then you can start alignment:
```
python alignment.py --image-folder img/test/ --output-folder aligned_image/
```
### Invert real image with [PTI](https://github.com/danielroich/PTI)
Before inversion, please download our PTI weights: [e4e_w+.pt](https://drive.google.com/file/d/1NUfSJqLhsrU7c9PwAtlZ9xtrxhzS_6tu/view?usp=sharing) into /pti/.
Few parameters you can change:
- /pti/pti_configs/hyperparameters.py:
- first_inv_type = 'w+' -> Use pretrained e4e encoder
- first_inv_type = 'w' -> Use projection and optimization
- /pti/pti_configs/paths_config.py:
- input_data_path: path of real images
- e4e: path of e4e_w+.pt
- stylegan2_ada_shhq: pretrained stylegan2-ada model for SHHQ
```
python run_pti.py
```
Note: we used the test image under 'aligned_image/' (the output of alignment.py), the inverted latent code and fine-tuned generator will be saved in 'outputs/pti/'
### Editing with InterfaceGAN, StyleSpace, and Sefa
```
python edit.py --network pretrained_models/stylegan_human_v2_1024.pkl --attr_name upper_length \\
--seeds 61531,61570,61571,61610 --outdir outputs/edit_results
```
### Editing using inverted latent code
```
python edit.py ---network outputs/pti/checkpoints/model_test.pkl --attr_name upper_length \\
--outdir outputs/edit_results --real True --real_w_path outputs/pti/embeddings/test/PTI/test/0.pt --real_img_path aligned_image/test.png
```
Note:
1. ''upper_length'' and ''bottom_length'' of ''attr_name'' are available for demo.
2. Layers to control and editing strength are set in edit/edit_config.py.
### Demo for [InsetGAN](https://arxiv.org/abs/2203.07293)
We implement a quick demo using the key idea from InsetGAN: combining the face generated by FFHQ with the human-body generated by our pretrained model, optimizing both face and body latent codes to get a coherent full-body image.
Before running the script, you need to download the [FFHQ face model]( https://docs.google.com/uc?export=download&confirm=t&id=125OG7SMkXI-Kf2aqiwLLHyCvSW-gZk3M), or you can use your own face model, as well as [pretrained face landmark](https://docs.google.com/uc?export=download&confirm=&id=1A82DnJBJzt8wI2J8ZrCK5fgHcQ2-tcWM) and [pretrained CNN face detection model for dlib](https://docs.google.com/uc?export=download&confirm=&id=1MduBgju5KFNrQfDLoQXJ_1_h5MnctCIG)
```
python insetgan.py --body_network=pretrained_models/stylegan_human_v2_1024.pkl --face_network=pretrained_models/ffhq.pkl \\
--body_seed=82 --face_seed=43 --trunc=0.6 --outdir=outputs/insetgan/ --video 1
```
## Results
### Editing with inverted real image
(from left to right: real image | inverted image | InterFaceGAN result | StyleSpace result | SeFa result)
https://user-images.githubusercontent.com/98547009/173773800-bb7fe54a-84d3-4b30-9864-a6b7b311f8ff.mp4
### For more demo, please visit our [**web page**](https://stylegan-human.github.io/) .
## TODO List
- [ ] Release 1024x512 version of StyleGAN-Human based on StyleGAN3
- [ ] Release 512x256 version of StyleGAN-Human based on StyleGAN1
- [ ] Extension of downstream application (InsetGAN): Add face inversion interface to support fusing user face image and stylegen-human body image
- [x] Add Inversion Script into the provided editing pipeline
- [ ] Release Dataset
## Related Works
* (SIGGRAPH 2022) **Text2Human: Text-Driven Controllable Human Image Generation**, Yuming Jiang et al. [[Paper](https://arxiv.org/pdf/2205.15996.pdf)], [[Code](https://github.com/yumingj/Text2Human)], [[Project Page](https://yumingj.github.io/projects/Text2Human.html)], [[Dataset](https://github.com/yumingj/DeepFashion-MultiModal)]
* (ICCV 2021) **Talk-to-Edit: Fine-Grained Facial Editing via Dialog**, Yuming Jiang et al. [[Paper](https://arxiv.org/abs/2109.04425)], [[Code](https://github.com/yumingj/Talk-to-Edit)], [[Project Page](https://www.mmlab-ntu.com/project/talkedit/)], [[Dataset](https://mmlab.ie.cuhk.edu.hk/projects/CelebA/CelebA_Dialog.html)]
* (Technical Report 2022) **Generalizable Neural Performer: Learning Robust Radiance Fields for Human Novel View Synthesis**, Wei Cheng et al. [[Paper](https://arxiv.org/pdf/2204.11798.pdf)], [[Code](https://github.com/generalizable-neural-performer/gnr)], [[Project Page](https://generalizable-neural-performer.github.io/)], [[Dataset](https://generalizable-neural-performer.github.io/genebody.html)]
## Citation
If you find this work useful for your research, please consider citing our paper:
```bibtex
@article{fu2022styleganhuman,
title={StyleGAN-Human: A Data-Centric Odyssey of Human Generation},
author={Fu, Jianglin and Li, Shikai and Jiang, Yuming and Lin, Kwan-Yee and Qian, Chen and Loy, Chen-Change and Wu, Wayne and Liu, Ziwei},
journal = {arXiv preprint},
volume = {arXiv:2204.11823},
year = {2022}
```
## Acknowlegement
Part of the code is borrowed from [stylegan (tensorflow)](https://github.com/NVlabs/stylegan), [stylegan2-ada (pytorch)](https://github.com/NVlabs/stylegan2-ada-pytorch), [stylegan3 (pytorch)](https://github.com/NVlabs/stylegan3).
# Copyright (c) SenseTime Research. All rights reserved.
import os
import argparse
import numpy as np
import torch
from torch.utils.data import DataLoader
from torchvision.transforms import transforms
from utils.ImagesDataset import ImagesDataset
import cv2
import time
import copy
import imutils
# for openpose body keypoint detector : # (src:https://github.com/Hzzone/pytorch-openpose)
from openpose.src import util
from openpose.src.body import Body
# for paddlepaddle human segmentation : #(src: https://github.com/PaddlePaddle/PaddleSeg/blob/release/2.5/contrib/PP-HumanSeg/)
from PP_HumanSeg.deploy.infer import Predictor as PP_HumenSeg_Predictor
import math
def angle_between_points(p0,p1,p2):
if p0[1]==-1 or p1[1]==-1 or p2[1]==-1:
return -1
a = (p1[0]-p0[0])**2 + (p1[1]-p0[1])**2
b = (p1[0]-p2[0])**2 + (p1[1]-p2[1])**2
c = (p2[0]-p0[0])**2 + (p2[1]-p0[1])**2
if a * b == 0:
return -1
return math.acos((a+b-c) / math.sqrt(4*a*b)) * 180 / math.pi
def crop_img_with_padding(img, keypoints, rect):
person_xmin,person_xmax, ymin, ymax= rect
img_h,img_w,_ = img.shape ## find body center using keypoints
middle_shoulder_x = keypoints[1][0]
middle_hip_x = (keypoints[8][0] + keypoints[11][0]) // 2
mid_x = (middle_hip_x + middle_shoulder_x) // 2
mid_y = (ymin + ymax) // 2
## find which side (l or r) is further than center x, use the further side
if abs(mid_x-person_xmin) > abs(person_xmax-mid_x): #left further
xmin = person_xmin
xmax = mid_x + (mid_x-person_xmin)
else:
############### may be negtive
### in this case, the script won't output any image, leave the case like this
### since we don't want to pad human body
xmin = mid_x - (person_xmax-mid_x)
xmax = person_xmax
w = xmax - xmin
h = ymax - ymin
## pad rectangle to w:h = 1:2 ## calculate desired border length
if h / w >= 2: #pad horizontally
target_w = h // 2
xmin_prime = int(mid_x - target_w / 2)
xmax_prime = int(mid_x + target_w / 2)
if xmin_prime < 0:
pad_left = abs(xmin_prime)# - xmin
xmin = 0
else:
pad_left = 0
xmin = xmin_prime
if xmax_prime > img_w:
pad_right = xmax_prime - img_w
xmax = img_w
else:
pad_right = 0
xmax = xmax_prime
cropped_img = img[int(ymin):int(ymax), int(xmin):int(xmax)]
im_pad = cv2.copyMakeBorder(cropped_img, 0, 0, int(pad_left), int(pad_right), cv2.BORDER_REPLICATE)
else: #pad vertically
target_h = w * 2
ymin_prime = mid_y - (target_h / 2)
ymax_prime = mid_y + (target_h / 2)
if ymin_prime < 0:
pad_up = abs(ymin_prime)# - ymin
ymin = 0
else:
pad_up = 0
ymin = ymin_prime
if ymax_prime > img_h:
pad_down = ymax_prime - img_h
ymax = img_h
else:
pad_down = 0
ymax = ymax_prime
print(ymin,ymax, xmin,xmax, img.shape)
cropped_img = img[int(ymin):int(ymax), int(xmin):int(xmax)]
im_pad = cv2.copyMakeBorder(cropped_img, int(pad_up), int(pad_down), 0,
0, cv2.BORDER_REPLICATE)
result = cv2.resize(im_pad,(512,1024),interpolation = cv2.INTER_AREA)
return result
def run(args):
os.makedirs(args.output_folder, exist_ok=True)
dataset = ImagesDataset(args.image_folder, transforms.Compose([transforms.ToTensor()]))
dataloader = DataLoader(dataset, batch_size=1, shuffle=False)
body_estimation = Body('openpose/model/body_pose_model.pth')
total = len(dataloader)
print('Num of dataloader : ', total)
os.makedirs(f'{args.output_folder}', exist_ok=True)
# os.makedirs(f'{args.output_folder}/middle_result', exist_ok=True)
## initialzide HumenSeg
human_seg_args = {}
human_seg_args['cfg'] = 'PP_HumanSeg/export_model/deeplabv3p_resnet50_os8_humanseg_512x512_100k_with_softmax/deploy.yaml'
human_seg_args['input_shape'] = [1024,512]
human_seg_args['save_dir'] = args.output_folder
human_seg_args['soft_predict'] = False
human_seg_args['use_gpu'] = True
human_seg_args['test_speed'] = False
human_seg_args['use_optic_flow'] = False
human_seg_args['add_argmax'] = True
human_seg_args= argparse.Namespace(**human_seg_args)
human_seg = PP_HumenSeg_Predictor(human_seg_args)
from tqdm import tqdm
for fname, image in tqdm(dataloader):
# try:
## tensor to numpy image
fname = fname[0]
print(f'Processing \'{fname}\'.')
image = (image.permute(0, 2, 3, 1) * 255).clamp(0, 255)
image = image.squeeze(0).numpy() # --> tensor to numpy, (H,W,C)
# avoid super high res img
if image.shape[0] >= 2000: # height ### for shein image
ratio = image.shape[0]/1200 #height
dim = (int(image.shape[1]/ratio),1200)#(width, height)
image = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
## create segmentation
# mybg = cv2.imread('mybg.png')
comb, segmentation, bg, ori_img = human_seg.run(image,None) #mybg)
# cv2.imwrite('comb.png',comb) # [0,255]
# cv2.imwrite('alpha.png',segmentation*255) # segmentation [0,1] --> [0.255]
# cv2.imwrite('bg.png',bg) #[0,255]
# cv2.imwrite('ori_img.png',ori_img) # [0,255]
masks_np = (segmentation* 255)# .byte().cpu().numpy() #1024,512,1
mask0_np = masks_np[:,:,0].astype(np.uint8)#[0, :, :]
contours = cv2.findContours(mask0_np, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(contours)
c = max(cnts, key=cv2.contourArea)
extTop = tuple(c[c[:, :, 1].argmin()][0])
extBot = tuple(c[c[:, :, 1].argmax()][0])
extBot = list(extBot)
extTop = list(extTop)
pad_range = int((extBot[1]-extTop[1])*0.05)
if (int(extTop[1])<=5 and int(extTop[1])>0) and (comb.shape[0]>int(extBot[1]) and int(extBot[1])>=comb.shape[0]-5): #seg mask already reaches to the edge
#pad with pure white, top 100 px, bottom 100 px
comb= cv2.copyMakeBorder(comb,pad_range+5,pad_range+5,0,0,cv2.BORDER_CONSTANT,value=[255,255,255])
elif int(extTop[1])<=0 or int(extBot[1])>=comb.shape[0]:
print('PAD: body out of boundary', fname) #should not happened
return {}
else:
comb = cv2.copyMakeBorder(comb, pad_range+5, pad_range+5, 0, 0, cv2.BORDER_REPLICATE) #105 instead of 100: give some extra space
extBot[1] = extBot[1] + pad_range+5
extTop[1] = extTop[1] + pad_range+5
extLeft = tuple(c[c[:, :, 0].argmin()][0])
extRight = tuple(c[c[:, :, 0].argmax()][0])
extLeft = list(extLeft)
extRight = list(extRight)
person_ymin = int(extTop[1])-pad_range # 100
person_ymax = int(extBot[1])+pad_range # 100 #height
if person_ymin<0 or person_ymax>comb.shape[0]: # out of range
return {}
person_xmin = int(extLeft[0])
person_xmax = int(extRight[0])
rect = [person_xmin,person_xmax,person_ymin, person_ymax]
# recimg = copy.deepcopy(comb)
# cv2.rectangle(recimg,(person_xmin,person_ymin),(person_xmax,person_ymax),(0,255,0),2)
# cv2.imwrite(f'{args.output_folder}/middle_result/{fname}_rec.png',recimg)
## detect keypoints
keypoints, subset = body_estimation(comb)
# print(keypoints, subset, len(subset))
if len(subset) != 1 or (len(subset)==1 and subset[0][-1]<15):
print(f'Processing \'{fname}\'. Please import image contains one person only. Also can check segmentation mask. ')
continue
# canvas = copy.deepcopy(comb)
# canvas = util.draw_bodypose(canvas, keypoints, subset, show_number=True)
# cv2.imwrite(f'{args.output_folder}/middle_result/{fname}_keypoints.png',canvas)
comb = crop_img_with_padding(comb, keypoints, rect)
cv2.imwrite(f'{args.output_folder}/{fname}.png', comb)
print(f' -- Finished processing \'{fname}\'. --')
# except:
# print(f'Processing \'{fname}\'. Not satisfied the alignment strategy.')
if __name__ == '__main__':
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.deterministic = False
t1 = time.time()
arg_formatter = argparse.ArgumentDefaultsHelpFormatter
description = 'StyleGAN-Human data process'
parser = argparse.ArgumentParser(formatter_class=arg_formatter,
description=description)
parser.add_argument('--image-folder', type=str, dest='image_folder')
parser.add_argument('--output-folder', dest='output_folder', default='results', type=str)
# parser.add_argument('--cfg', dest='cfg for segmentation', default='PP_HumanSeg/export_model/ppseg_lite_portrait_398x224_with_softmax/deploy.yaml', type=str)
print('parsing arguments')
cmd_args = parser.parse_args()
run(cmd_args)
print('total time elapsed: ', str(time.time() - t1))
\ No newline at end of file
# Copyright (c) SenseTime Research. All rights reserved.
import os
import click
import cv2
import numpy as np
def bg_white(seg, raw, blur_level=3, gaussian=81):
seg = cv2.blur(seg, (blur_level, blur_level))
empty = np.ones_like(seg)
seg_bg = (empty - seg) * 255
seg_bg = cv2.GaussianBlur(seg_bg,(gaussian,gaussian),0)
background_mask = cv2.cvtColor(255 - cv2.cvtColor(seg, cv2.COLOR_BGR2GRAY), cv2.COLOR_GRAY2BGR)
masked_fg = (raw * (1 / 255)) * (seg * (1 / 255))
masked_bg = (seg_bg * (1 / 255)) * (background_mask * (1 / 255))
frame = np.uint8(cv2.add(masked_bg,masked_fg)*255)
return frame
"""
To turn background into white.
Examples:
\b
python bg_white.py --raw_img_dir=./SHHQ-1.0/no_segment/ --raw_seg_dir=./SHHQ-1.0/segments/ \\
--outdir=./SHHQ-1.0/bg_white/
"""
@click.command()
@click.pass_context
@click.option('--raw_img_dir', default="./SHHQ-1.0/no_segment/", help='folder of raw image', required=True)
@click.option('--raw_seg_dir', default='./SHHQ-1.0/segments/', help='folder of segmentation masks', required=True)
@click.option('--outdir', help='Where to save the output images', default= "./SHHQ-1.0/bg_white/" , type=str, required=True, metavar='DIR')
def main(
ctx: click.Context,
raw_img_dir: str,
raw_seg_dir: str,
outdir: str):
os.makedirs(outdir, exist_ok=True)
files = os.listdir(raw_img_dir)
for file in files:
print(file)
raw = cv2.imread(os.path.join(raw_img_dir, file))
seg = cv2.imread(os.path.join(raw_seg_dir, file))
assert raw is not None
assert seg is not None
white_frame = bg_white(seg, raw)
cv2.imwrite(os.path.join(outdir,file), white_frame)
if __name__ == "__main__":
main()
\ No newline at end of file
# Copyright (c) SenseTime Research. All rights reserved.
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
from .util import EasyDict, make_cache_dir_path
# Copyright (c) SenseTime Research. All rights reserved.
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
from . import autosummary
from . import network
from . import optimizer
from . import tfutil
from . import custom_ops
from .tfutil import *
from .network import Network
from .optimizer import Optimizer
from .custom_ops import get_plugin
# Copyright (c) SenseTime Research. All rights reserved.
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Helper for adding automatically tracked values to Tensorboard.
Autosummary creates an identity op that internally keeps track of the input
values and automatically shows up in TensorBoard. The reported value
represents an average over input components. The average is accumulated
constantly over time and flushed when save_summaries() is called.
Notes:
- The output tensor must be used as an input for something else in the
graph. Otherwise, the autosummary op will not get executed, and the average
value will not get accumulated.
- It is perfectly fine to include autosummaries with the same name in
several places throughout the graph, even if they are executed concurrently.
- It is ok to also pass in a python scalar or numpy array. In this case, it
is added to the average immediately.
"""
from collections import OrderedDict
import numpy as np
import tensorflow as tf
from tensorboard import summary as summary_lib
from tensorboard.plugins.custom_scalar import layout_pb2
from . import tfutil
from .tfutil import TfExpression
from .tfutil import TfExpressionEx
# Enable "Custom scalars" tab in TensorBoard for advanced formatting.
# Disabled by default to reduce tfevents file size.
enable_custom_scalars = False
_dtype = tf.float64
_vars = OrderedDict() # name => [var, ...]
_immediate = OrderedDict() # name => update_op, update_value
_finalized = False
_merge_op = None
def _create_var(name: str, value_expr: TfExpression) -> TfExpression:
"""Internal helper for creating autosummary accumulators."""
assert not _finalized
name_id = name.replace("/", "_")
v = tf.cast(value_expr, _dtype)
if v.shape.is_fully_defined():
size = np.prod(v.shape.as_list())
size_expr = tf.constant(size, dtype=_dtype)
else:
size = None
size_expr = tf.reduce_prod(tf.cast(tf.shape(v), _dtype))
if size == 1:
if v.shape.ndims != 0:
v = tf.reshape(v, [])
v = [size_expr, v, tf.square(v)]
else:
v = [size_expr, tf.reduce_sum(v), tf.reduce_sum(tf.square(v))]
v = tf.cond(tf.is_finite(v[1]), lambda: tf.stack(v), lambda: tf.zeros(3, dtype=_dtype))
with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.control_dependencies(None):
var = tf.Variable(tf.zeros(3, dtype=_dtype), trainable=False) # [sum(1), sum(x), sum(x**2)]
update_op = tf.cond(tf.is_variable_initialized(var), lambda: tf.assign_add(var, v), lambda: tf.assign(var, v))
if name in _vars:
_vars[name].append(var)
else:
_vars[name] = [var]
return update_op
def autosummary(name: str, value: TfExpressionEx, passthru: TfExpressionEx = None, condition: TfExpressionEx = True) -> TfExpressionEx:
"""Create a new autosummary.
Args:
name: Name to use in TensorBoard
value: TensorFlow expression or python value to track
passthru: Optionally return this TF node without modifications but tack an autosummary update side-effect to this node.
Example use of the passthru mechanism:
n = autosummary('l2loss', loss, passthru=n)
This is a shorthand for the following code:
with tf.control_dependencies([autosummary('l2loss', loss)]):
n = tf.identity(n)
"""
tfutil.assert_tf_initialized()
name_id = name.replace("/", "_")
if tfutil.is_tf_expression(value):
with tf.name_scope("summary_" + name_id), tf.device(value.device):
condition = tf.convert_to_tensor(condition, name='condition')
update_op = tf.cond(condition, lambda: tf.group(_create_var(name, value)), tf.no_op)
with tf.control_dependencies([update_op]):
return tf.identity(value if passthru is None else passthru)
else: # python scalar or numpy array
assert not tfutil.is_tf_expression(passthru)
assert not tfutil.is_tf_expression(condition)
if condition:
if name not in _immediate:
with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.device(None), tf.control_dependencies(None):
update_value = tf.placeholder(_dtype)
update_op = _create_var(name, update_value)
_immediate[name] = update_op, update_value
update_op, update_value = _immediate[name]
tfutil.run(update_op, {update_value: value})
return value if passthru is None else passthru
def finalize_autosummaries() -> None:
"""Create the necessary ops to include autosummaries in TensorBoard report.
Note: This should be done only once per graph.
"""
global _finalized
tfutil.assert_tf_initialized()
if _finalized:
return None
_finalized = True
tfutil.init_uninitialized_vars([var for vars_list in _vars.values() for var in vars_list])
# Create summary ops.
with tf.device(None), tf.control_dependencies(None):
for name, vars_list in _vars.items():
name_id = name.replace("/", "_")
with tfutil.absolute_name_scope("Autosummary/" + name_id):
moments = tf.add_n(vars_list)
moments /= moments[0]
with tf.control_dependencies([moments]): # read before resetting
reset_ops = [tf.assign(var, tf.zeros(3, dtype=_dtype)) for var in vars_list]
with tf.name_scope(None), tf.control_dependencies(reset_ops): # reset before reporting
mean = moments[1]
std = tf.sqrt(moments[2] - tf.square(moments[1]))
tf.summary.scalar(name, mean)
if enable_custom_scalars:
tf.summary.scalar("xCustomScalars/" + name + "/margin_lo", mean - std)
tf.summary.scalar("xCustomScalars/" + name + "/margin_hi", mean + std)
# Setup layout for custom scalars.
layout = None
if enable_custom_scalars:
cat_dict = OrderedDict()
for series_name in sorted(_vars.keys()):
p = series_name.split("/")
cat = p[0] if len(p) >= 2 else ""
chart = "/".join(p[1:-1]) if len(p) >= 3 else p[-1]
if cat not in cat_dict:
cat_dict[cat] = OrderedDict()
if chart not in cat_dict[cat]:
cat_dict[cat][chart] = []
cat_dict[cat][chart].append(series_name)
categories = []
for cat_name, chart_dict in cat_dict.items():
charts = []
for chart_name, series_names in chart_dict.items():
series = []
for series_name in series_names:
series.append(layout_pb2.MarginChartContent.Series(
value=series_name,
lower="xCustomScalars/" + series_name + "/margin_lo",
upper="xCustomScalars/" + series_name + "/margin_hi"))
margin = layout_pb2.MarginChartContent(series=series)
charts.append(layout_pb2.Chart(title=chart_name, margin=margin))
categories.append(layout_pb2.Category(title=cat_name, chart=charts))
layout = summary_lib.custom_scalar_pb(layout_pb2.Layout(category=categories))
return layout
def save_summaries(file_writer, global_step=None):
"""Call FileWriter.add_summary() with all summaries in the default graph,
automatically finalizing and merging them on the first call.
"""
global _merge_op
tfutil.assert_tf_initialized()
if _merge_op is None:
layout = finalize_autosummaries()
if layout is not None:
file_writer.add_summary(layout)
with tf.device(None), tf.control_dependencies(None):
_merge_op = tf.summary.merge_all()
file_writer.add_summary(_merge_op.eval(), global_step)
# Copyright (c) SenseTime Research. All rights reserved.
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""TensorFlow custom ops builder.
"""
import os
import re
import uuid
import hashlib
import tempfile
import shutil
import tensorflow as tf
from tensorflow.python.client import device_lib # pylint: disable=no-name-in-module
#----------------------------------------------------------------------------
# Global options.
cuda_cache_path = os.path.join(os.path.dirname(__file__), '_cudacache')
cuda_cache_version_tag = 'v1'
do_not_hash_included_headers = False # Speed up compilation by assuming that headers included by the CUDA code never change. Unsafe!
verbose = True # Print status messages to stdout.
compiler_bindir_search_path = [
'C:/Program Files (x86)/Microsoft Visual Studio/2017/Community/VC/Tools/MSVC/14.14.26428/bin/Hostx64/x64',
'C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.23.28105/bin/Hostx64/x64',
'C:/Program Files (x86)/Microsoft Visual Studio 14.0/vc/bin',
]
#----------------------------------------------------------------------------
# Internal helper funcs.
def _find_compiler_bindir():
for compiler_path in compiler_bindir_search_path:
if os.path.isdir(compiler_path):
return compiler_path
return None
def _get_compute_cap(device):
caps_str = device.physical_device_desc
m = re.search('compute capability: (\\d+).(\\d+)', caps_str)
major = m.group(1)
minor = m.group(2)
return (major, minor)
def _get_cuda_gpu_arch_string():
gpus = [x for x in device_lib.list_local_devices() if x.device_type == 'GPU']
if len(gpus) == 0:
raise RuntimeError('No GPU devices found')
(major, minor) = _get_compute_cap(gpus[0])
return 'sm_%s%s' % (major, minor)
def _run_cmd(cmd):
with os.popen(cmd) as pipe:
output = pipe.read()
status = pipe.close()
if status is not None:
raise RuntimeError('NVCC returned an error. See below for full command line and output log:\n\n%s\n\n%s' % (cmd, output))
def _prepare_nvcc_cli(opts):
cmd = 'nvcc ' + opts.strip()
cmd += ' --disable-warnings'
cmd += ' --include-path "%s"' % tf.sysconfig.get_include()
cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'protobuf_archive', 'src')
cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'com_google_absl')
cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'eigen_archive')
compiler_bindir = _find_compiler_bindir()
if compiler_bindir is None:
# Require that _find_compiler_bindir succeeds on Windows. Allow
# nvcc to use whatever is the default on Linux.
if os.name == 'nt':
raise RuntimeError('Could not find MSVC/GCC/CLANG installation on this computer. Check compiler_bindir_search_path list in "%s".' % __file__)
else:
cmd += ' --compiler-bindir "%s"' % compiler_bindir
cmd += ' 2>&1'
return cmd
#----------------------------------------------------------------------------
# Main entry point.
_plugin_cache = dict()
def get_plugin(cuda_file):
cuda_file_base = os.path.basename(cuda_file)
cuda_file_name, cuda_file_ext = os.path.splitext(cuda_file_base)
# Already in cache?
if cuda_file in _plugin_cache:
return _plugin_cache[cuda_file]
# Setup plugin.
if verbose:
print('Setting up TensorFlow plugin "%s": ' % cuda_file_base, end='', flush=True)
try:
# Hash CUDA source.
md5 = hashlib.md5()
with open(cuda_file, 'rb') as f:
md5.update(f.read())
md5.update(b'\n')
# Hash headers included by the CUDA code by running it through the preprocessor.
if not do_not_hash_included_headers:
if verbose:
print('Preprocessing... ', end='', flush=True)
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + cuda_file_ext)
_run_cmd(_prepare_nvcc_cli('"%s" --preprocess -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir)))
with open(tmp_file, 'rb') as f:
bad_file_str = ('"' + cuda_file.replace('\\', '/') + '"').encode('utf-8') # __FILE__ in error check macros
good_file_str = ('"' + cuda_file_base + '"').encode('utf-8')
for ln in f:
if not ln.startswith(b'# ') and not ln.startswith(b'#line '): # ignore line number pragmas
ln = ln.replace(bad_file_str, good_file_str)
md5.update(ln)
md5.update(b'\n')
# Select compiler options.
compile_opts = ''
if os.name == 'nt':
compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.lib')
elif os.name == 'posix':
compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.so')
compile_opts += ' --compiler-options \'-fPIC -D_GLIBCXX_USE_CXX11_ABI=0\''
else:
assert False # not Windows or Linux, w00t?
compile_opts += ' --gpu-architecture=%s' % _get_cuda_gpu_arch_string()
compile_opts += ' --use_fast_math'
nvcc_cmd = _prepare_nvcc_cli(compile_opts)
# Hash build configuration.
md5.update(('nvcc_cmd: ' + nvcc_cmd).encode('utf-8') + b'\n')
md5.update(('tf.VERSION: ' + tf.VERSION).encode('utf-8') + b'\n')
md5.update(('cuda_cache_version_tag: ' + cuda_cache_version_tag).encode('utf-8') + b'\n')
# Compile if not already compiled.
bin_file_ext = '.dll' if os.name == 'nt' else '.so'
bin_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + md5.hexdigest() + bin_file_ext)
if not os.path.isfile(bin_file):
if verbose:
print('Compiling... ', end='', flush=True)
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + bin_file_ext)
_run_cmd(nvcc_cmd + ' "%s" --shared -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir))
os.makedirs(cuda_cache_path, exist_ok=True)
intermediate_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + uuid.uuid4().hex + '_tmp' + bin_file_ext)
shutil.copyfile(tmp_file, intermediate_file)
os.rename(intermediate_file, bin_file) # atomic
# Load.
if verbose:
print('Loading... ', end='', flush=True)
plugin = tf.load_op_library(bin_file)
# Add to cache.
_plugin_cache[cuda_file] = plugin
if verbose:
print('Done.', flush=True)
return plugin
except:
if verbose:
print('Failed!', flush=True)
raise
#----------------------------------------------------------------------------
# Copyright (c) SenseTime Research. All rights reserved.
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Helper for managing networks."""
import types
import inspect
import re
import uuid
import sys
import numpy as np
import tensorflow as tf
from collections import OrderedDict
from typing import Any, List, Tuple, Union
from . import tfutil
from .. import util
from .tfutil import TfExpression, TfExpressionEx
_import_handlers = [] # Custom import handlers for dealing with legacy data in pickle import.
_import_module_src = dict() # Source code for temporary modules created during pickle import.
def import_handler(handler_func):
"""Function decorator for declaring custom import handlers."""
_import_handlers.append(handler_func)
return handler_func
class Network:
"""Generic network abstraction.
Acts as a convenience wrapper for a parameterized network construction
function, providing several utility methods and convenient access to
the inputs/outputs/weights.
Network objects can be safely pickled and unpickled for long-term
archival purposes. The pickling works reliably as long as the underlying
network construction function is defined in a standalone Python module
that has no side effects or application-specific imports.
Args:
name: Network name. Used to select TensorFlow name and variable scopes.
func_name: Fully qualified name of the underlying network construction function, or a top-level function object.
static_kwargs: Keyword arguments to be passed in to the network construction function.
Attributes:
name: User-specified name, defaults to build func name if None.
scope: Unique TensorFlow scope containing template graph and variables, derived from the user-specified name.
static_kwargs: Arguments passed to the user-supplied build func.
components: Container for sub-networks. Passed to the build func, and retained between calls.
num_inputs: Number of input tensors.
num_outputs: Number of output tensors.
input_shapes: Input tensor shapes (NC or NCHW), including minibatch dimension.
output_shapes: Output tensor shapes (NC or NCHW), including minibatch dimension.
input_shape: Short-hand for input_shapes[0].
output_shape: Short-hand for output_shapes[0].
input_templates: Input placeholders in the template graph.
output_templates: Output tensors in the template graph.
input_names: Name string for each input.
output_names: Name string for each output.
own_vars: Variables defined by this network (local_name => var), excluding sub-networks.
vars: All variables (local_name => var).
trainables: All trainable variables (local_name => var).
var_global_to_local: Mapping from variable global names to local names.
"""
def __init__(self, name: str = None, func_name: Any = None, **static_kwargs):
tfutil.assert_tf_initialized()
assert isinstance(name, str) or name is None
assert func_name is not None
assert isinstance(func_name, str) or util.is_top_level_function(func_name)
assert util.is_pickleable(static_kwargs)
self._init_fields()
self.name = name
self.static_kwargs = util.EasyDict(static_kwargs)
# Locate the user-specified network build function.
if util.is_top_level_function(func_name):
func_name = util.get_top_level_function_name(func_name)
module, self._build_func_name = util.get_module_from_obj_name(func_name)
self._build_func = util.get_obj_from_module(module, self._build_func_name)
assert callable(self._build_func)
# Dig up source code for the module containing the build function.
self._build_module_src = _import_module_src.get(module, None)
if self._build_module_src is None:
self._build_module_src = inspect.getsource(module)
# Init TensorFlow graph.
self._init_graph()
self.reset_own_vars()
def _init_fields(self) -> None:
self.name = None
self.scope = None
self.static_kwargs = util.EasyDict()
self.components = util.EasyDict()
self.num_inputs = 0
self.num_outputs = 0
self.input_shapes = [[]]
self.output_shapes = [[]]
self.input_shape = []
self.output_shape = []
self.input_templates = []
self.output_templates = []
self.input_names = []
self.output_names = []
self.own_vars = OrderedDict()
self.vars = OrderedDict()
self.trainables = OrderedDict()
self.var_global_to_local = OrderedDict()
self._build_func = None # User-supplied build function that constructs the network.
self._build_func_name = None # Name of the build function.
self._build_module_src = None # Full source code of the module containing the build function.
self._run_cache = dict() # Cached graph data for Network.run().
def _init_graph(self) -> None:
# Collect inputs.
self.input_names = []
for param in inspect.signature(self._build_func).parameters.values():
if param.kind == param.POSITIONAL_OR_KEYWORD and param.default is param.empty:
self.input_names.append(param.name)
self.num_inputs = len(self.input_names)
assert self.num_inputs >= 1
# Choose name and scope.
if self.name is None:
self.name = self._build_func_name
assert re.match("^[A-Za-z0-9_.\\-]*$", self.name)
with tf.name_scope(None):
self.scope = tf.get_default_graph().unique_name(self.name, mark_as_used=True)
# Finalize build func kwargs.
build_kwargs = dict(self.static_kwargs)
build_kwargs["is_template_graph"] = True
build_kwargs["components"] = self.components
# Build template graph.
with tfutil.absolute_variable_scope(self.scope, reuse=False), tfutil.absolute_name_scope(self.scope): # ignore surrounding scopes
assert tf.get_variable_scope().name == self.scope
assert tf.get_default_graph().get_name_scope() == self.scope
with tf.control_dependencies(None): # ignore surrounding control dependencies
self.input_templates = [tf.placeholder(tf.float32, name=name) for name in self.input_names]
out_expr = self._build_func(*self.input_templates, **build_kwargs)
# Collect outputs.
assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple)
self.output_templates = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr)
self.num_outputs = len(self.output_templates)
assert self.num_outputs >= 1
assert all(tfutil.is_tf_expression(t) for t in self.output_templates)
# Perform sanity checks.
if any(t.shape.ndims is None for t in self.input_templates):
raise ValueError("Network input shapes not defined. Please call x.set_shape() for each input.")
if any(t.shape.ndims is None for t in self.output_templates):
raise ValueError("Network output shapes not defined. Please call x.set_shape() where applicable.")
if any(not isinstance(comp, Network) for comp in self.components.values()):
raise ValueError("Components of a Network must be Networks themselves.")
if len(self.components) != len(set(comp.name for comp in self.components.values())):
raise ValueError("Components of a Network must have unique names.")
# List inputs and outputs.
self.input_shapes = [t.shape.as_list() for t in self.input_templates]
self.output_shapes = [t.shape.as_list() for t in self.output_templates]
self.input_shape = self.input_shapes[0]
self.output_shape = self.output_shapes[0]
self.output_names = [t.name.split("/")[-1].split(":")[0] for t in self.output_templates]
# List variables.
self.own_vars = OrderedDict((var.name[len(self.scope) + 1:].split(":")[0], var) for var in tf.global_variables(self.scope + "/"))
self.vars = OrderedDict(self.own_vars)
self.vars.update((comp.name + "/" + name, var) for comp in self.components.values() for name, var in comp.vars.items())
self.trainables = OrderedDict((name, var) for name, var in self.vars.items() if var.trainable)
self.var_global_to_local = OrderedDict((var.name.split(":")[0], name) for name, var in self.vars.items())
def reset_own_vars(self) -> None:
"""Re-initialize all variables of this network, excluding sub-networks."""
tfutil.run([var.initializer for var in self.own_vars.values()])
def reset_vars(self) -> None:
"""Re-initialize all variables of this network, including sub-networks."""
tfutil.run([var.initializer for var in self.vars.values()])
def reset_trainables(self) -> None:
"""Re-initialize all trainable variables of this network, including sub-networks."""
tfutil.run([var.initializer for var in self.trainables.values()])
def get_output_for(self, *in_expr: TfExpression, return_as_list: bool = False, **dynamic_kwargs) -> Union[TfExpression, List[TfExpression]]:
"""Construct TensorFlow expression(s) for the output(s) of this network, given the input expression(s)."""
assert len(in_expr) == self.num_inputs
assert not all(expr is None for expr in in_expr)
# Finalize build func kwargs.
build_kwargs = dict(self.static_kwargs)
build_kwargs.update(dynamic_kwargs)
build_kwargs["is_template_graph"] = False
build_kwargs["components"] = self.components
# Build TensorFlow graph to evaluate the network.
with tfutil.absolute_variable_scope(self.scope, reuse=True), tf.name_scope(self.name):
assert tf.get_variable_scope().name == self.scope
valid_inputs = [expr for expr in in_expr if expr is not None]
final_inputs = []
for expr, name, shape in zip(in_expr, self.input_names, self.input_shapes):
if expr is not None:
expr = tf.identity(expr, name=name)
else:
expr = tf.zeros([tf.shape(valid_inputs[0])[0]] + shape[1:], name=name)
final_inputs.append(expr)
out_expr = self._build_func(*final_inputs, **build_kwargs)
# Propagate input shapes back to the user-specified expressions.
for expr, final in zip(in_expr, final_inputs):
if isinstance(expr, tf.Tensor):
expr.set_shape(final.shape)
# Express outputs in the desired format.
assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple)
if return_as_list:
out_expr = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr)
return out_expr
def get_var_local_name(self, var_or_global_name: Union[TfExpression, str]) -> str:
"""Get the local name of a given variable, without any surrounding name scopes."""
assert tfutil.is_tf_expression(var_or_global_name) or isinstance(var_or_global_name, str)
global_name = var_or_global_name if isinstance(var_or_global_name, str) else var_or_global_name.name
return self.var_global_to_local[global_name]
def find_var(self, var_or_local_name: Union[TfExpression, str]) -> TfExpression:
"""Find variable by local or global name."""
assert tfutil.is_tf_expression(var_or_local_name) or isinstance(var_or_local_name, str)
return self.vars[var_or_local_name] if isinstance(var_or_local_name, str) else var_or_local_name
def get_var(self, var_or_local_name: Union[TfExpression, str]) -> np.ndarray:
"""Get the value of a given variable as NumPy array.
Note: This method is very inefficient -- prefer to use tflib.run(list_of_vars) whenever possible."""
return self.find_var(var_or_local_name).eval()
def set_var(self, var_or_local_name: Union[TfExpression, str], new_value: Union[int, float, np.ndarray]) -> None:
"""Set the value of a given variable based on the given NumPy array.
Note: This method is very inefficient -- prefer to use tflib.set_vars() whenever possible."""
tfutil.set_vars({self.find_var(var_or_local_name): new_value})
def __getstate__(self) -> dict:
"""Pickle export."""
state = dict()
state["version"] = 4
state["name"] = self.name
state["static_kwargs"] = dict(self.static_kwargs)
state["components"] = dict(self.components)
state["build_module_src"] = self._build_module_src
state["build_func_name"] = self._build_func_name
state["variables"] = list(zip(self.own_vars.keys(), tfutil.run(list(self.own_vars.values()))))
return state
def __setstate__(self, state: dict) -> None:
"""Pickle import."""
# pylint: disable=attribute-defined-outside-init
tfutil.assert_tf_initialized()
self._init_fields()
# Execute custom import handlers.
for handler in _import_handlers:
state = handler(state)
# Set basic fields.
assert state["version"] in [2, 3, 4]
self.name = state["name"]
self.static_kwargs = util.EasyDict(state["static_kwargs"])
self.components = util.EasyDict(state.get("components", {}))
self._build_module_src = state["build_module_src"]
self._build_func_name = state["build_func_name"]
# Create temporary module from the imported source code.
module_name = "_tflib_network_import_" + uuid.uuid4().hex
module = types.ModuleType(module_name)
sys.modules[module_name] = module
_import_module_src[module] = self._build_module_src
exec(self._build_module_src, module.__dict__) # pylint: disable=exec-used
# Locate network build function in the temporary module.
self._build_func = util.get_obj_from_module(module, self._build_func_name)
assert callable(self._build_func)
# Init TensorFlow graph.
self._init_graph()
self.reset_own_vars()
tfutil.set_vars({self.find_var(name): value for name, value in state["variables"]})
def clone(self, name: str = None, **new_static_kwargs) -> "Network":
"""Create a clone of this network with its own copy of the variables."""
# pylint: disable=protected-access
net = object.__new__(Network)
net._init_fields()
net.name = name if name is not None else self.name
net.static_kwargs = util.EasyDict(self.static_kwargs)
net.static_kwargs.update(new_static_kwargs)
net._build_module_src = self._build_module_src
net._build_func_name = self._build_func_name
net._build_func = self._build_func
net._init_graph()
net.copy_vars_from(self)
return net
def copy_own_vars_from(self, src_net: "Network") -> None:
"""Copy the values of all variables from the given network, excluding sub-networks."""
names = [name for name in self.own_vars.keys() if name in src_net.own_vars]
tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names}))
def copy_vars_from(self, src_net: "Network") -> None:
"""Copy the values of all variables from the given network, including sub-networks."""
names = [name for name in self.vars.keys() if name in src_net.vars]
tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names}))
def copy_trainables_from(self, src_net: "Network") -> None:
"""Copy the values of all trainable variables from the given network, including sub-networks."""
names = [name for name in self.trainables.keys() if name in src_net.trainables]
tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names}))
def convert(self, new_func_name: str, new_name: str = None, **new_static_kwargs) -> "Network":
"""Create new network with the given parameters, and copy all variables from this network."""
if new_name is None:
new_name = self.name
static_kwargs = dict(self.static_kwargs)
static_kwargs.update(new_static_kwargs)
net = Network(name=new_name, func_name=new_func_name, **static_kwargs)
net.copy_vars_from(self)
return net
def setup_as_moving_average_of(self, src_net: "Network", beta: TfExpressionEx = 0.99, beta_nontrainable: TfExpressionEx = 0.0) -> tf.Operation:
"""Construct a TensorFlow op that updates the variables of this network
to be slightly closer to those of the given network."""
with tfutil.absolute_name_scope(self.scope + "/_MovingAvg"):
ops = []
for name, var in self.vars.items():
if name in src_net.vars:
cur_beta = beta if name in self.trainables else beta_nontrainable
new_value = tfutil.lerp(src_net.vars[name], var, cur_beta)
ops.append(var.assign(new_value))
return tf.group(*ops)
def run(self,
*in_arrays: Tuple[Union[np.ndarray, None], ...],
input_transform: dict = None,
output_transform: dict = None,
return_as_list: bool = False,
print_progress: bool = False,
minibatch_size: int = None,
num_gpus: int = 1,
assume_frozen: bool = False,
**dynamic_kwargs) -> Union[np.ndarray, Tuple[np.ndarray, ...], List[np.ndarray]]:
"""Run this network for the given NumPy array(s), and return the output(s) as NumPy array(s).
Args:
input_transform: A dict specifying a custom transformation to be applied to the input tensor(s) before evaluating the network.
The dict must contain a 'func' field that points to a top-level function. The function is called with the input
TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs.
output_transform: A dict specifying a custom transformation to be applied to the output tensor(s) after evaluating the network.
The dict must contain a 'func' field that points to a top-level function. The function is called with the output
TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs.
return_as_list: True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs.
print_progress: Print progress to the console? Useful for very large input arrays.
minibatch_size: Maximum minibatch size to use, None = disable batching.
num_gpus: Number of GPUs to use.
assume_frozen: Improve multi-GPU performance by assuming that the trainable parameters will remain changed between calls.
dynamic_kwargs: Additional keyword arguments to be passed into the network build function.
"""
assert len(in_arrays) == self.num_inputs
assert not all(arr is None for arr in in_arrays)
assert input_transform is None or util.is_top_level_function(input_transform["func"])
assert output_transform is None or util.is_top_level_function(output_transform["func"])
output_transform, dynamic_kwargs = _handle_legacy_output_transforms(output_transform, dynamic_kwargs)
num_items = in_arrays[0].shape[0]
if minibatch_size is None:
minibatch_size = num_items
# Construct unique hash key from all arguments that affect the TensorFlow graph.
key = dict(input_transform=input_transform, output_transform=output_transform, num_gpus=num_gpus, assume_frozen=assume_frozen, dynamic_kwargs=dynamic_kwargs)
def unwind_key(obj):
if isinstance(obj, dict):
return [(key, unwind_key(value)) for key, value in sorted(obj.items())]
if callable(obj):
return util.get_top_level_function_name(obj)
return obj
key = repr(unwind_key(key))
# Build graph.
if key not in self._run_cache:
with tfutil.absolute_name_scope(self.scope + "/_Run"), tf.control_dependencies(None):
with tf.device("/cpu:0"):
in_expr = [tf.placeholder(tf.float32, name=name) for name in self.input_names]
in_split = list(zip(*[tf.split(x, num_gpus) for x in in_expr]))
out_split = []
for gpu in range(num_gpus):
with tf.device("/gpu:%d" % gpu):
net_gpu = self.clone() if assume_frozen else self
in_gpu = in_split[gpu]
if input_transform is not None:
in_kwargs = dict(input_transform)
in_gpu = in_kwargs.pop("func")(*in_gpu, **in_kwargs)
in_gpu = [in_gpu] if tfutil.is_tf_expression(in_gpu) else list(in_gpu)
assert len(in_gpu) == self.num_inputs
out_gpu = net_gpu.get_output_for(*in_gpu, return_as_list=True, **dynamic_kwargs)
if output_transform is not None:
out_kwargs = dict(output_transform)
out_gpu = out_kwargs.pop("func")(*out_gpu, **out_kwargs)
out_gpu = [out_gpu] if tfutil.is_tf_expression(out_gpu) else list(out_gpu)
assert len(out_gpu) == self.num_outputs
out_split.append(out_gpu)
with tf.device("/cpu:0"):
out_expr = [tf.concat(outputs, axis=0) for outputs in zip(*out_split)]
self._run_cache[key] = in_expr, out_expr
# Run minibatches.
in_expr, out_expr = self._run_cache[key]
out_arrays = [np.empty([num_items] + expr.shape.as_list()[1:], expr.dtype.name) for expr in out_expr]
for mb_begin in range(0, num_items, minibatch_size):
if print_progress:
print("\r%d / %d" % (mb_begin, num_items), end="")
mb_end = min(mb_begin + minibatch_size, num_items)
mb_num = mb_end - mb_begin
mb_in = [src[mb_begin : mb_end] if src is not None else np.zeros([mb_num] + shape[1:]) for src, shape in zip(in_arrays, self.input_shapes)]
mb_out = tf.get_default_session().run(out_expr, dict(zip(in_expr, mb_in)))
for dst, src in zip(out_arrays, mb_out):
dst[mb_begin: mb_end] = src
# Done.
if print_progress:
print("\r%d / %d" % (num_items, num_items))
if not return_as_list:
out_arrays = out_arrays[0] if len(out_arrays) == 1 else tuple(out_arrays)
return out_arrays
def list_ops(self) -> List[TfExpression]:
include_prefix = self.scope + "/"
exclude_prefix = include_prefix + "_"
ops = tf.get_default_graph().get_operations()
ops = [op for op in ops if op.name.startswith(include_prefix)]
ops = [op for op in ops if not op.name.startswith(exclude_prefix)]
return ops
def list_layers(self) -> List[Tuple[str, TfExpression, List[TfExpression]]]:
"""Returns a list of (layer_name, output_expr, trainable_vars) tuples corresponding to
individual layers of the network. Mainly intended to be used for reporting."""
layers = []
def recurse(scope, parent_ops, parent_vars, level):
# Ignore specific patterns.
if any(p in scope for p in ["/Shape", "/strided_slice", "/Cast", "/concat", "/Assign"]):
return
# Filter ops and vars by scope.
global_prefix = scope + "/"
local_prefix = global_prefix[len(self.scope) + 1:]
cur_ops = [op for op in parent_ops if op.name.startswith(global_prefix) or op.name == global_prefix[:-1]]
cur_vars = [(name, var) for name, var in parent_vars if name.startswith(local_prefix) or name == local_prefix[:-1]]
if not cur_ops and not cur_vars:
return
# Filter out all ops related to variables.
for var in [op for op in cur_ops if op.type.startswith("Variable")]:
var_prefix = var.name + "/"
cur_ops = [op for op in cur_ops if not op.name.startswith(var_prefix)]
# Scope does not contain ops as immediate children => recurse deeper.
contains_direct_ops = any("/" not in op.name[len(global_prefix):] and op.type not in ["Identity", "Cast", "Transpose"] for op in cur_ops)
if (level == 0 or not contains_direct_ops) and (len(cur_ops) + len(cur_vars)) > 1:
visited = set()
for rel_name in [op.name[len(global_prefix):] for op in cur_ops] + [name[len(local_prefix):] for name, _var in cur_vars]:
token = rel_name.split("/")[0]
if token not in visited:
recurse(global_prefix + token, cur_ops, cur_vars, level + 1)
visited.add(token)
return
# Report layer.
layer_name = scope[len(self.scope) + 1:]
layer_output = cur_ops[-1].outputs[0] if cur_ops else cur_vars[-1][1]
layer_trainables = [var for _name, var in cur_vars if var.trainable]
layers.append((layer_name, layer_output, layer_trainables))
recurse(self.scope, self.list_ops(), list(self.vars.items()), 0)
return layers
def print_layers(self, title: str = None, hide_layers_with_no_params: bool = False) -> None:
"""Print a summary table of the network structure."""
rows = [[title if title is not None else self.name, "Params", "OutputShape", "WeightShape"]]
rows += [["---"] * 4]
total_params = 0
for layer_name, layer_output, layer_trainables in self.list_layers():
num_params = sum(int(np.prod(var.shape.as_list())) for var in layer_trainables)
weights = [var for var in layer_trainables if var.name.endswith("/weight:0")]
weights.sort(key=lambda x: len(x.name))
if len(weights) == 0 and len(layer_trainables) == 1:
weights = layer_trainables
total_params += num_params
if not hide_layers_with_no_params or num_params != 0:
num_params_str = str(num_params) if num_params > 0 else "-"
output_shape_str = str(layer_output.shape)
weight_shape_str = str(weights[0].shape) if len(weights) >= 1 else "-"
rows += [[layer_name, num_params_str, output_shape_str, weight_shape_str]]
rows += [["---"] * 4]
rows += [["Total", str(total_params), "", ""]]
widths = [max(len(cell) for cell in column) for column in zip(*rows)]
print()
for row in rows:
print(" ".join(cell + " " * (width - len(cell)) for cell, width in zip(row, widths)))
print()
def setup_weight_histograms(self, title: str = None) -> None:
"""Construct summary ops to include histograms of all trainable parameters in TensorBoard."""
if title is None:
title = self.name
with tf.name_scope(None), tf.device(None), tf.control_dependencies(None):
for local_name, var in self.trainables.items():
if "/" in local_name:
p = local_name.split("/")
name = title + "_" + p[-1] + "/" + "_".join(p[:-1])
else:
name = title + "_toplevel/" + local_name
tf.summary.histogram(name, var)
#----------------------------------------------------------------------------
# Backwards-compatible emulation of legacy output transformation in Network.run().
_print_legacy_warning = True
def _handle_legacy_output_transforms(output_transform, dynamic_kwargs):
global _print_legacy_warning
legacy_kwargs = ["out_mul", "out_add", "out_shrink", "out_dtype"]
if not any(kwarg in dynamic_kwargs for kwarg in legacy_kwargs):
return output_transform, dynamic_kwargs
if _print_legacy_warning:
_print_legacy_warning = False
print()
print("WARNING: Old-style output transformations in Network.run() are deprecated.")
print("Consider using 'output_transform=dict(func=tflib.convert_images_to_uint8)'")
print("instead of 'out_mul=127.5, out_add=127.5, out_dtype=np.uint8'.")
print()
assert output_transform is None
new_kwargs = dict(dynamic_kwargs)
new_transform = {kwarg: new_kwargs.pop(kwarg) for kwarg in legacy_kwargs if kwarg in dynamic_kwargs}
new_transform["func"] = _legacy_output_transform_func
return new_transform, new_kwargs
def _legacy_output_transform_func(*expr, out_mul=1.0, out_add=0.0, out_shrink=1, out_dtype=None):
if out_mul != 1.0:
expr = [x * out_mul for x in expr]
if out_add != 0.0:
expr = [x + out_add for x in expr]
if out_shrink > 1:
ksize = [1, 1, out_shrink, out_shrink]
expr = [tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW") for x in expr]
if out_dtype is not None:
if tf.as_dtype(out_dtype).is_integer:
expr = [tf.round(x) for x in expr]
expr = [tf.saturate_cast(x, out_dtype) for x in expr]
return expr
# Copyright (c) SenseTime Research. All rights reserved.
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
# empty
// Copyright (c) SenseTime Research. All rights reserved.
// Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
//
// This work is made available under the Nvidia Source Code License-NC.
// To view a copy of this license, visit
// https://nvlabs.github.io/stylegan2/license.html
#define EIGEN_USE_GPU
#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/shape_inference.h"
#include <stdio.h>
using namespace tensorflow;
using namespace tensorflow::shape_inference;
#define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false)
//------------------------------------------------------------------------
// CUDA kernel.
template <class T>
struct FusedBiasActKernelParams
{
const T* x; // [sizeX]
const T* b; // [sizeB] or NULL
const T* ref; // [sizeX] or NULL
T* y; // [sizeX]
int grad;
int axis;
int act;
float alpha;
float gain;
int sizeX;
int sizeB;
int stepB;
int loopX;
};
template <class T>
static __global__ void FusedBiasActKernel(const FusedBiasActKernelParams<T> p)
{
const float expRange = 80.0f;
const float halfExpRange = 40.0f;
const float seluScale = 1.0507009873554804934193349852946f;
const float seluAlpha = 1.6732632423543772848170429916717f;
// Loop over elements.
int xi = blockIdx.x * p.loopX * blockDim.x + threadIdx.x;
for (int loopIdx = 0; loopIdx < p.loopX && xi < p.sizeX; loopIdx++, xi += blockDim.x)
{
// Load and apply bias.
float x = (float)p.x[xi];
if (p.b)
x += (float)p.b[(xi / p.stepB) % p.sizeB];
float ref = (p.ref) ? (float)p.ref[xi] : 0.0f;
if (p.gain != 0.0f & p.act != 9)
ref /= p.gain;
// Evaluate activation func.
float y;
switch (p.act * 10 + p.grad)
{
// linear
default:
case 10: y = x; break;
case 11: y = x; break;
case 12: y = 0.0f; break;
// relu
case 20: y = (x > 0.0f) ? x : 0.0f; break;
case 21: y = (ref > 0.0f) ? x : 0.0f; break;
case 22: y = 0.0f; break;
// lrelu
case 30: y = (x > 0.0f) ? x : x * p.alpha; break;
case 31: y = (ref > 0.0f) ? x : x * p.alpha; break;
case 32: y = 0.0f; break;
// tanh
case 40: { float c = expf(x); float d = 1.0f / c; y = (x < -expRange) ? -1.0f : (x > expRange) ? 1.0f : (c - d) / (c + d); } break;
case 41: y = x * (1.0f - ref * ref); break;
case 42: y = x * (1.0f - ref * ref) * (-2.0f * ref); break;
// sigmoid
case 50: y = (x < -expRange) ? 0.0f : 1.0f / (expf(-x) + 1.0f); break;
case 51: y = x * ref * (1.0f - ref); break;
case 52: y = x * ref * (1.0f - ref) * (1.0f - 2.0f * ref); break;
// elu
case 60: y = (x >= 0.0f) ? x : expf(x) - 1.0f; break;
case 61: y = (ref >= 0.0f) ? x : x * (ref + 1.0f); break;
case 62: y = (ref >= 0.0f) ? 0.0f : x * (ref + 1.0f); break;
// selu
case 70: y = (x >= 0.0f) ? seluScale * x : (seluScale * seluAlpha) * (expf(x) - 1.0f); break;
case 71: y = (ref >= 0.0f) ? x * seluScale : x * (ref + seluScale * seluAlpha); break;
case 72: y = (ref >= 0.0f) ? 0.0f : x * (ref + seluScale * seluAlpha); break;
// softplus
case 80: y = (x > expRange) ? x : logf(expf(x) + 1.0f); break;
case 81: y = x * (1.0f - expf(-ref)); break;
case 82: { float c = expf(-ref); y = x * c * (1.0f - c); } break;
// swish
case 90: y = (x < -expRange) ? 0.0f : x / (expf(-x) + 1.0f); break;
case 91: { float c = expf(ref); float d = c + 1.0f; y = (ref > halfExpRange) ? x : x * c * (ref + d) / (d * d); } break;
case 92: { float c = expf(ref); float d = c + 1.0f; y = (ref > halfExpRange) ? 0.0f : x * c * (ref * (2.0f - d) + 2.0f * d) / (d * d * d); } break;
}
// Apply gain and store.
p.y[xi] = (T)(y * p.gain);
}
}
//------------------------------------------------------------------------
// TensorFlow op.
template <class T>
struct FusedBiasActOp : public OpKernel
{
FusedBiasActKernelParams<T> m_attribs;
FusedBiasActOp(OpKernelConstruction* ctx) : OpKernel(ctx)
{
memset(&m_attribs, 0, sizeof(m_attribs));
OP_REQUIRES_OK(ctx, ctx->GetAttr("grad", &m_attribs.grad));
OP_REQUIRES_OK(ctx, ctx->GetAttr("axis", &m_attribs.axis));
OP_REQUIRES_OK(ctx, ctx->GetAttr("act", &m_attribs.act));
OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &m_attribs.alpha));
OP_REQUIRES_OK(ctx, ctx->GetAttr("gain", &m_attribs.gain));
OP_REQUIRES(ctx, m_attribs.grad >= 0, errors::InvalidArgument("grad must be non-negative"));
OP_REQUIRES(ctx, m_attribs.axis >= 0, errors::InvalidArgument("axis must be non-negative"));
OP_REQUIRES(ctx, m_attribs.act >= 0, errors::InvalidArgument("act must be non-negative"));
}
void Compute(OpKernelContext* ctx)
{
FusedBiasActKernelParams<T> p = m_attribs;
cudaStream_t stream = ctx->eigen_device<Eigen::GpuDevice>().stream();
const Tensor& x = ctx->input(0); // [...]
const Tensor& b = ctx->input(1); // [sizeB] or [0]
const Tensor& ref = ctx->input(2); // x.shape or [0]
p.x = x.flat<T>().data();
p.b = (b.NumElements()) ? b.flat<T>().data() : NULL;
p.ref = (ref.NumElements()) ? ref.flat<T>().data() : NULL;
OP_REQUIRES(ctx, b.NumElements() == 0 || m_attribs.axis < x.dims(), errors::InvalidArgument("axis out of bounds"));
OP_REQUIRES(ctx, b.dims() == 1, errors::InvalidArgument("b must have rank 1"));
OP_REQUIRES(ctx, b.NumElements() == 0 || b.NumElements() == x.dim_size(m_attribs.axis), errors::InvalidArgument("b has wrong number of elements"));
OP_REQUIRES(ctx, ref.NumElements() == ((p.grad == 0) ? 0 : x.NumElements()), errors::InvalidArgument("ref has wrong number of elements"));
OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("x is too large"));
p.sizeX = (int)x.NumElements();
p.sizeB = (int)b.NumElements();
p.stepB = 1;
for (int i = m_attribs.axis + 1; i < x.dims(); i++)
p.stepB *= (int)x.dim_size(i);
Tensor* y = NULL; // x.shape
OP_REQUIRES_OK(ctx, ctx->allocate_output(0, x.shape(), &y));
p.y = y->flat<T>().data();
p.loopX = 4;
int blockSize = 4 * 32;
int gridSize = (p.sizeX - 1) / (p.loopX * blockSize) + 1;
void* args[] = {&p};
OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel((void*)FusedBiasActKernel<T>, gridSize, blockSize, args, 0, stream));
}
};
REGISTER_OP("FusedBiasAct")
.Input ("x: T")
.Input ("b: T")
.Input ("ref: T")
.Output ("y: T")
.Attr ("T: {float, half}")
.Attr ("grad: int = 0")
.Attr ("axis: int = 1")
.Attr ("act: int = 0")
.Attr ("alpha: float = 0.0")
.Attr ("gain: float = 1.0");
REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint<float>("T"), FusedBiasActOp<float>);
REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"), FusedBiasActOp<Eigen::half>);
//------------------------------------------------------------------------
# Copyright (c) SenseTime Research. All rights reserved.
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Custom TensorFlow ops for efficient bias and activation."""
import os
import numpy as np
import tensorflow as tf
from .. import custom_ops
from ...util import EasyDict
def _get_plugin():
return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu')
#----------------------------------------------------------------------------
activation_funcs = {
'linear': EasyDict(func=lambda x, **_: x, def_alpha=None, def_gain=1.0, cuda_idx=1, ref='y', zero_2nd_grad=True),
'relu': EasyDict(func=lambda x, **_: tf.nn.relu(x), def_alpha=None, def_gain=np.sqrt(2), cuda_idx=2, ref='y', zero_2nd_grad=True),
'lrelu': EasyDict(func=lambda x, alpha, **_: tf.nn.leaky_relu(x, alpha), def_alpha=0.2, def_gain=np.sqrt(2), cuda_idx=3, ref='y', zero_2nd_grad=True),
'tanh': EasyDict(func=lambda x, **_: tf.nn.tanh(x), def_alpha=None, def_gain=1.0, cuda_idx=4, ref='y', zero_2nd_grad=False),
'sigmoid': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x), def_alpha=None, def_gain=1.0, cuda_idx=5, ref='y', zero_2nd_grad=False),
'elu': EasyDict(func=lambda x, **_: tf.nn.elu(x), def_alpha=None, def_gain=1.0, cuda_idx=6, ref='y', zero_2nd_grad=False),
'selu': EasyDict(func=lambda x, **_: tf.nn.selu(x), def_alpha=None, def_gain=1.0, cuda_idx=7, ref='y', zero_2nd_grad=False),
'softplus': EasyDict(func=lambda x, **_: tf.nn.softplus(x), def_alpha=None, def_gain=1.0, cuda_idx=8, ref='y', zero_2nd_grad=False),
'swish': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x) * x, def_alpha=None, def_gain=np.sqrt(2), cuda_idx=9, ref='x', zero_2nd_grad=False),
}
#----------------------------------------------------------------------------
def fused_bias_act(x, b=None, axis=1, act='linear', alpha=None, gain=None, impl='cuda'):
r"""Fused bias and activation function.
Adds bias `b` to activation tensor `x`, evaluates activation function `act`,
and scales the result by `gain`. Each of the steps is optional. In most cases,
the fused op is considerably more efficient than performing the same calculation
using standard TensorFlow ops. It supports first and second order gradients,
but not third order gradients.
Args:
x: Input activation tensor. Can have any shape, but if `b` is defined, the
dimension corresponding to `axis`, as well as the rank, must be known.
b: Bias vector, or `None` to disable. Must be a 1D tensor of the same type
as `x`. The shape must be known, and it must match the dimension of `x`
corresponding to `axis`.
axis: The dimension in `x` corresponding to the elements of `b`.
The value of `axis` is ignored if `b` is not specified.
act: Name of the activation function to evaluate, or `"linear"` to disable.
Can be e.g. `"relu"`, `"lrelu"`, `"tanh"`, `"sigmoid"`, `"swish"`, etc.
See `activation_funcs` for a full list. `None` is not allowed.
alpha: Shape parameter for the activation function, or `None` to use the default.
gain: Scaling factor for the output tensor, or `None` to use default.
See `activation_funcs` for the default scaling of each activation function.
If unsure, consider specifying `1.0`.
impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
Returns:
Tensor of the same shape and datatype as `x`.
"""
impl_dict = {
'ref': _fused_bias_act_ref,
'cuda': _fused_bias_act_cuda,
}
return impl_dict[impl](x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain)
#----------------------------------------------------------------------------
def _fused_bias_act_ref(x, b, axis, act, alpha, gain):
"""Slow reference implementation of `fused_bias_act()` using standard TensorFlow ops."""
# Validate arguments.
x = tf.convert_to_tensor(x)
b = tf.convert_to_tensor(b) if b is not None else tf.constant([], dtype=x.dtype)
act_spec = activation_funcs[act]
assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis])
assert b.shape[0] == 0 or 0 <= axis < x.shape.rank
if alpha is None:
alpha = act_spec.def_alpha
if gain is None:
gain = act_spec.def_gain
# Add bias.
if b.shape[0] != 0:
x += tf.reshape(b, [-1 if i == axis else 1 for i in range(x.shape.rank)])
# Evaluate activation function.
x = act_spec.func(x, alpha=alpha)
# Scale by gain.
if gain != 1:
x *= gain
return x
#----------------------------------------------------------------------------
def _fused_bias_act_cuda(x, b, axis, act, alpha, gain):
"""Fast CUDA implementation of `fused_bias_act()` using custom ops."""
# Validate arguments.
x = tf.convert_to_tensor(x)
empty_tensor = tf.constant([], dtype=x.dtype)
b = tf.convert_to_tensor(b) if b is not None else empty_tensor
act_spec = activation_funcs[act]
assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis])
assert b.shape[0] == 0 or 0 <= axis < x.shape.rank
if alpha is None:
alpha = act_spec.def_alpha
if gain is None:
gain = act_spec.def_gain
# Special cases.
if act == 'linear' and b is None and gain == 1.0:
return x
if act_spec.cuda_idx is None:
return _fused_bias_act_ref(x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain)
# CUDA kernel.
cuda_kernel = _get_plugin().fused_bias_act
cuda_kwargs = dict(axis=axis, act=act_spec.cuda_idx, alpha=alpha, gain=gain)
# Forward pass: y = func(x, b).
def func_y(x, b):
y = cuda_kernel(x=x, b=b, ref=empty_tensor, grad=0, **cuda_kwargs)
y.set_shape(x.shape)
return y
# Backward pass: dx, db = grad(dy, x, y)
def grad_dx(dy, x, y):
ref = {'x': x, 'y': y}[act_spec.ref]
dx = cuda_kernel(x=dy, b=empty_tensor, ref=ref, grad=1, **cuda_kwargs)
dx.set_shape(x.shape)
return dx
def grad_db(dx):
if b.shape[0] == 0:
return empty_tensor
db = dx
if axis < x.shape.rank - 1:
db = tf.reduce_sum(db, list(range(axis + 1, x.shape.rank)))
if axis > 0:
db = tf.reduce_sum(db, list(range(axis)))
db.set_shape(b.shape)
return db
# Second order gradients: d_dy, d_x = grad2(d_dx, d_db, x, y)
def grad2_d_dy(d_dx, d_db, x, y):
ref = {'x': x, 'y': y}[act_spec.ref]
d_dy = cuda_kernel(x=d_dx, b=d_db, ref=ref, grad=1, **cuda_kwargs)
d_dy.set_shape(x.shape)
return d_dy
def grad2_d_x(d_dx, d_db, x, y):
ref = {'x': x, 'y': y}[act_spec.ref]
d_x = cuda_kernel(x=d_dx, b=d_db, ref=ref, grad=2, **cuda_kwargs)
d_x.set_shape(x.shape)
return d_x
# Fast version for piecewise-linear activation funcs.
@tf.custom_gradient
def func_zero_2nd_grad(x, b):
y = func_y(x, b)
@tf.custom_gradient
def grad(dy):
dx = grad_dx(dy, x, y)
db = grad_db(dx)
def grad2(d_dx, d_db):
d_dy = grad2_d_dy(d_dx, d_db, x, y)
return d_dy
return (dx, db), grad2
return y, grad
# Slow version for general activation funcs.
@tf.custom_gradient
def func_nonzero_2nd_grad(x, b):
y = func_y(x, b)
def grad_wrap(dy):
@tf.custom_gradient
def grad_impl(dy, x):
dx = grad_dx(dy, x, y)
db = grad_db(dx)
def grad2(d_dx, d_db):
d_dy = grad2_d_dy(d_dx, d_db, x, y)
d_x = grad2_d_x(d_dx, d_db, x, y)
return d_dy, d_x
return (dx, db), grad2
return grad_impl(dy, x)
return y, grad_wrap
# Which version to use?
if act_spec.zero_2nd_grad:
return func_zero_2nd_grad(x, b)
return func_nonzero_2nd_grad(x, b)
#----------------------------------------------------------------------------
// Copyright (c) SenseTime Research. All rights reserved.
// Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
//
// This work is made available under the Nvidia Source Code License-NC.
// To view a copy of this license, visit
// https://nvlabs.github.io/stylegan2/license.html
#define EIGEN_USE_GPU
#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/shape_inference.h"
#include <stdio.h>
using namespace tensorflow;
using namespace tensorflow::shape_inference;
//------------------------------------------------------------------------
// Helpers.
#define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false)
static __host__ __device__ __forceinline__ int floorDiv(int a, int b)
{
int c = a / b;
if (c * b > a)
c--;
return c;
}
//------------------------------------------------------------------------
// CUDA kernel params.
template <class T>
struct UpFirDn2DKernelParams
{
const T* x; // [majorDim, inH, inW, minorDim]
const T* k; // [kernelH, kernelW]
T* y; // [majorDim, outH, outW, minorDim]
int upx;
int upy;
int downx;
int downy;
int padx0;
int padx1;
int pady0;
int pady1;
int majorDim;
int inH;
int inW;
int minorDim;
int kernelH;
int kernelW;
int outH;
int outW;
int loopMajor;
int loopX;
};
//------------------------------------------------------------------------
// General CUDA implementation for large filter kernels.
template <class T>
static __global__ void UpFirDn2DKernel_large(const UpFirDn2DKernelParams<T> p)
{
// Calculate thread index.
int minorIdx = blockIdx.x * blockDim.x + threadIdx.x;
int outY = minorIdx / p.minorDim;
minorIdx -= outY * p.minorDim;
int outXBase = blockIdx.y * p.loopX * blockDim.y + threadIdx.y;
int majorIdxBase = blockIdx.z * p.loopMajor;
if (outXBase >= p.outW || outY >= p.outH || majorIdxBase >= p.majorDim)
return;
// Setup Y receptive field.
int midY = outY * p.downy + p.upy - 1 - p.pady0;
int inY = min(max(floorDiv(midY, p.upy), 0), p.inH);
int h = min(max(floorDiv(midY + p.kernelH, p.upy), 0), p.inH) - inY;
int kernelY = midY + p.kernelH - (inY + 1) * p.upy;
// Loop over majorDim and outX.
for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor && majorIdx < p.majorDim; loopMajor++, majorIdx++)
for (int loopX = 0, outX = outXBase; loopX < p.loopX && outX < p.outW; loopX++, outX += blockDim.y)
{
// Setup X receptive field.
int midX = outX * p.downx + p.upx - 1 - p.padx0;
int inX = min(max(floorDiv(midX, p.upx), 0), p.inW);
int w = min(max(floorDiv(midX + p.kernelW, p.upx), 0), p.inW) - inX;
int kernelX = midX + p.kernelW - (inX + 1) * p.upx;
// Initialize pointers.
const T* xp = &p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx];
const T* kp = &p.k[kernelY * p.kernelW + kernelX];
int xpx = p.minorDim;
int kpx = -p.upx;
int xpy = p.inW * p.minorDim;
int kpy = -p.upy * p.kernelW;
// Inner loop.
float v = 0.0f;
for (int y = 0; y < h; y++)
{
for (int x = 0; x < w; x++)
{
v += (float)(*xp) * (float)(*kp);
xp += xpx;
kp += kpx;
}
xp += xpy - w * xpx;
kp += kpy - w * kpx;
}
// Store result.
p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v;
}
}
//------------------------------------------------------------------------
// Specialized CUDA implementation for small filter kernels.
template <class T, int upx, int upy, int downx, int downy, int kernelW, int kernelH, int tileOutW, int tileOutH>
static __global__ void UpFirDn2DKernel_small(const UpFirDn2DKernelParams<T> p)
{
//assert(kernelW % upx == 0);
//assert(kernelH % upy == 0);
const int tileInW = ((tileOutW - 1) * downx + kernelW - 1) / upx + 1;
const int tileInH = ((tileOutH - 1) * downy + kernelH - 1) / upy + 1;
__shared__ volatile float sk[kernelH][kernelW];
__shared__ volatile float sx[tileInH][tileInW];
// Calculate tile index.
int minorIdx = blockIdx.x;
int tileOutY = minorIdx / p.minorDim;
minorIdx -= tileOutY * p.minorDim;
tileOutY *= tileOutH;
int tileOutXBase = blockIdx.y * p.loopX * tileOutW;
int majorIdxBase = blockIdx.z * p.loopMajor;
if (tileOutXBase >= p.outW | tileOutY >= p.outH | majorIdxBase >= p.majorDim)
return;
// Load filter kernel (flipped).
for (int tapIdx = threadIdx.x; tapIdx < kernelH * kernelW; tapIdx += blockDim.x)
{
int ky = tapIdx / kernelW;
int kx = tapIdx - ky * kernelW;
float v = 0.0f;
if (kx < p.kernelW & ky < p.kernelH)
v = (float)p.k[(p.kernelH - 1 - ky) * p.kernelW + (p.kernelW - 1 - kx)];
sk[ky][kx] = v;
}
// Loop over majorDim and outX.
for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor & majorIdx < p.majorDim; loopMajor++, majorIdx++)
for (int loopX = 0, tileOutX = tileOutXBase; loopX < p.loopX & tileOutX < p.outW; loopX++, tileOutX += tileOutW)
{
// Load input pixels.
int tileMidX = tileOutX * downx + upx - 1 - p.padx0;
int tileMidY = tileOutY * downy + upy - 1 - p.pady0;
int tileInX = floorDiv(tileMidX, upx);
int tileInY = floorDiv(tileMidY, upy);
__syncthreads();
for (int inIdx = threadIdx.x; inIdx < tileInH * tileInW; inIdx += blockDim.x)
{
int relInY = inIdx / tileInW;
int relInX = inIdx - relInY * tileInW;
int inX = relInX + tileInX;
int inY = relInY + tileInY;
float v = 0.0f;
if (inX >= 0 & inY >= 0 & inX < p.inW & inY < p.inH)
v = (float)p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx];
sx[relInY][relInX] = v;
}
// Loop over output pixels.
__syncthreads();
for (int outIdx = threadIdx.x; outIdx < tileOutH * tileOutW; outIdx += blockDim.x)
{
int relOutY = outIdx / tileOutW;
int relOutX = outIdx - relOutY * tileOutW;
int outX = relOutX + tileOutX;
int outY = relOutY + tileOutY;
// Setup receptive field.
int midX = tileMidX + relOutX * downx;
int midY = tileMidY + relOutY * downy;
int inX = floorDiv(midX, upx);
int inY = floorDiv(midY, upy);
int relInX = inX - tileInX;
int relInY = inY - tileInY;
int kernelX = (inX + 1) * upx - midX - 1; // flipped
int kernelY = (inY + 1) * upy - midY - 1; // flipped
// Inner loop.
float v = 0.0f;
#pragma unroll
for (int y = 0; y < kernelH / upy; y++)
#pragma unroll
for (int x = 0; x < kernelW / upx; x++)
v += sx[relInY + y][relInX + x] * sk[kernelY + y * upy][kernelX + x * upx];
// Store result.
if (outX < p.outW & outY < p.outH)
p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v;
}
}
}
//------------------------------------------------------------------------
// TensorFlow op.
template <class T>
struct UpFirDn2DOp : public OpKernel
{
UpFirDn2DKernelParams<T> m_attribs;
UpFirDn2DOp(OpKernelConstruction* ctx) : OpKernel(ctx)
{
memset(&m_attribs, 0, sizeof(m_attribs));
OP_REQUIRES_OK(ctx, ctx->GetAttr("upx", &m_attribs.upx));
OP_REQUIRES_OK(ctx, ctx->GetAttr("upy", &m_attribs.upy));
OP_REQUIRES_OK(ctx, ctx->GetAttr("downx", &m_attribs.downx));
OP_REQUIRES_OK(ctx, ctx->GetAttr("downy", &m_attribs.downy));
OP_REQUIRES_OK(ctx, ctx->GetAttr("padx0", &m_attribs.padx0));
OP_REQUIRES_OK(ctx, ctx->GetAttr("padx1", &m_attribs.padx1));
OP_REQUIRES_OK(ctx, ctx->GetAttr("pady0", &m_attribs.pady0));
OP_REQUIRES_OK(ctx, ctx->GetAttr("pady1", &m_attribs.pady1));
OP_REQUIRES(ctx, m_attribs.upx >= 1 && m_attribs.upy >= 1, errors::InvalidArgument("upx and upy must be at least 1x1"));
OP_REQUIRES(ctx, m_attribs.downx >= 1 && m_attribs.downy >= 1, errors::InvalidArgument("downx and downy must be at least 1x1"));
}
void Compute(OpKernelContext* ctx)
{
UpFirDn2DKernelParams<T> p = m_attribs;
cudaStream_t stream = ctx->eigen_device<Eigen::GpuDevice>().stream();
const Tensor& x = ctx->input(0); // [majorDim, inH, inW, minorDim]
const Tensor& k = ctx->input(1); // [kernelH, kernelW]
p.x = x.flat<T>().data();
p.k = k.flat<T>().data();
OP_REQUIRES(ctx, x.dims() == 4, errors::InvalidArgument("input must have rank 4"));
OP_REQUIRES(ctx, k.dims() == 2, errors::InvalidArgument("kernel must have rank 2"));
OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("input too large"));
OP_REQUIRES(ctx, k.NumElements() <= kint32max, errors::InvalidArgument("kernel too large"));
p.majorDim = (int)x.dim_size(0);
p.inH = (int)x.dim_size(1);
p.inW = (int)x.dim_size(2);
p.minorDim = (int)x.dim_size(3);
p.kernelH = (int)k.dim_size(0);
p.kernelW = (int)k.dim_size(1);
OP_REQUIRES(ctx, p.kernelW >= 1 && p.kernelH >= 1, errors::InvalidArgument("kernel must be at least 1x1"));
p.outW = (p.inW * p.upx + p.padx0 + p.padx1 - p.kernelW + p.downx) / p.downx;
p.outH = (p.inH * p.upy + p.pady0 + p.pady1 - p.kernelH + p.downy) / p.downy;
OP_REQUIRES(ctx, p.outW >= 1 && p.outH >= 1, errors::InvalidArgument("output must be at least 1x1"));
Tensor* y = NULL; // [majorDim, outH, outW, minorDim]
TensorShape ys;
ys.AddDim(p.majorDim);
ys.AddDim(p.outH);
ys.AddDim(p.outW);
ys.AddDim(p.minorDim);
OP_REQUIRES_OK(ctx, ctx->allocate_output(0, ys, &y));
p.y = y->flat<T>().data();
OP_REQUIRES(ctx, y->NumElements() <= kint32max, errors::InvalidArgument("output too large"));
// Choose CUDA kernel to use.
void* cudaKernel = (void*)UpFirDn2DKernel_large<T>;
int tileOutW = -1;
int tileOutH = -1;
if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 7 && p.kernelH <= 7) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 7,7, 64,16>; tileOutW = 64; tileOutH = 16; }
if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 6,6, 64,16>; tileOutW = 64; tileOutH = 16; }
if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 5 && p.kernelH <= 5) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 5,5, 64,16>; tileOutW = 64; tileOutH = 16; }
if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 4,4, 64,16>; tileOutW = 64; tileOutH = 16; }
if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 3 && p.kernelH <= 3) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 3,3, 64,16>; tileOutW = 64; tileOutH = 16; }
if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 8 && p.kernelH <= 8) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 2,2, 1,1, 8,8, 64,16>; tileOutW = 64; tileOutH = 16; }
if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 2,2, 1,1, 6,6, 64,16>; tileOutW = 64; tileOutH = 16; }
if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 2,2, 1,1, 4,4, 64,16>; tileOutW = 64; tileOutH = 16; }
if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 2 && p.kernelH <= 2) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 2,2, 1,1, 2,2, 64,16>; tileOutW = 64; tileOutH = 16; }
if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 8 && p.kernelH <= 8) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 2,2, 8,8, 32,8>; tileOutW = 32; tileOutH = 8; }
if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 2,2, 6,6, 32,8>; tileOutW = 32; tileOutH = 8; }
if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 2,2, 4,4, 32,8>; tileOutW = 32; tileOutH = 8; }
if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 2 && p.kernelH <= 2) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 2,2, 2,2, 32,8>; tileOutW = 32; tileOutH = 8; }
// Choose launch params.
dim3 blockSize;
dim3 gridSize;
if (tileOutW > 0 && tileOutH > 0) // small
{
p.loopMajor = (p.majorDim - 1) / 16384 + 1;
p.loopX = 1;
blockSize = dim3(32 * 8, 1, 1);
gridSize = dim3(((p.outH - 1) / tileOutH + 1) * p.minorDim, (p.outW - 1) / (p.loopX * tileOutW) + 1, (p.majorDim - 1) / p.loopMajor + 1);
}
else // large
{
p.loopMajor = (p.majorDim - 1) / 16384 + 1;
p.loopX = 4;
blockSize = dim3(4, 32, 1);
gridSize = dim3((p.outH * p.minorDim - 1) / blockSize.x + 1, (p.outW - 1) / (p.loopX * blockSize.y) + 1, (p.majorDim - 1) / p.loopMajor + 1);
}
// Launch CUDA kernel.
void* args[] = {&p};
OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel(cudaKernel, gridSize, blockSize, args, 0, stream));
}
};
REGISTER_OP("UpFirDn2D")
.Input ("x: T")
.Input ("k: T")
.Output ("y: T")
.Attr ("T: {float, half}")
.Attr ("upx: int = 1")
.Attr ("upy: int = 1")
.Attr ("downx: int = 1")
.Attr ("downy: int = 1")
.Attr ("padx0: int = 0")
.Attr ("padx1: int = 0")
.Attr ("pady0: int = 0")
.Attr ("pady1: int = 0");
REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint<float>("T"), UpFirDn2DOp<float>);
REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"), UpFirDn2DOp<Eigen::half>);
//------------------------------------------------------------------------
# Copyright (c) SenseTime Research. All rights reserved.
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Custom TensorFlow ops for efficient resampling of 2D images."""
import os
import numpy as np
import tensorflow as tf
from .. import custom_ops
def _get_plugin():
return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu')
#----------------------------------------------------------------------------
def upfirdn_2d(x, k, upx=1, upy=1, downx=1, downy=1, padx0=0, padx1=0, pady0=0, pady1=0, impl='cuda'):
r"""Pad, upsample, FIR filter, and downsample a batch of 2D images.
Accepts a batch of 2D images of the shape `[majorDim, inH, inW, minorDim]`
and performs the following operations for each image, batched across
`majorDim` and `minorDim`:
1. Pad the image with zeros by the specified number of pixels on each side
(`padx0`, `padx1`, `pady0`, `pady1`). Specifying a negative value
corresponds to cropping the image.
2. Upsample the image by inserting the zeros after each pixel (`upx`, `upy`).
3. Convolve the image with the specified 2D FIR filter (`k`), shrinking the
image so that the footprint of all output pixels lies within the input image.
4. Downsample the image by throwing away pixels (`downx`, `downy`).
This sequence of operations bears close resemblance to scipy.signal.upfirdn().
The fused op is considerably more efficient than performing the same calculation
using standard TensorFlow ops. It supports gradients of arbitrary order.
Args:
x: Input tensor of the shape `[majorDim, inH, inW, minorDim]`.
k: 2D FIR filter of the shape `[firH, firW]`.
upx: Integer upsampling factor along the X-axis (default: 1).
upy: Integer upsampling factor along the Y-axis (default: 1).
downx: Integer downsampling factor along the X-axis (default: 1).
downy: Integer downsampling factor along the Y-axis (default: 1).
padx0: Number of pixels to pad on the left side (default: 0).
padx1: Number of pixels to pad on the right side (default: 0).
pady0: Number of pixels to pad on the top side (default: 0).
pady1: Number of pixels to pad on the bottom side (default: 0).
impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
Returns:
Tensor of the shape `[majorDim, outH, outW, minorDim]`, and same datatype as `x`.
"""
impl_dict = {
'ref': _upfirdn_2d_ref,
'cuda': _upfirdn_2d_cuda,
}
return impl_dict[impl](x=x, k=k, upx=upx, upy=upy, downx=downx, downy=downy, padx0=padx0, padx1=padx1, pady0=pady0, pady1=pady1)
#----------------------------------------------------------------------------
def _upfirdn_2d_ref(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1):
"""Slow reference implementation of `upfirdn_2d()` using standard TensorFlow ops."""
x = tf.convert_to_tensor(x)
k = np.asarray(k, dtype=np.float32)
assert x.shape.rank == 4
inH = x.shape[1].value
inW = x.shape[2].value
minorDim = _shape(x, 3)
kernelH, kernelW = k.shape
assert inW >= 1 and inH >= 1
assert kernelW >= 1 and kernelH >= 1
assert isinstance(upx, int) and isinstance(upy, int)
assert isinstance(downx, int) and isinstance(downy, int)
assert isinstance(padx0, int) and isinstance(padx1, int)
assert isinstance(pady0, int) and isinstance(pady1, int)
# Upsample (insert zeros).
x = tf.reshape(x, [-1, inH, 1, inW, 1, minorDim])
x = tf.pad(x, [[0, 0], [0, 0], [0, upy - 1], [0, 0], [0, upx - 1], [0, 0]])
x = tf.reshape(x, [-1, inH * upy, inW * upx, minorDim])
# Pad (crop if negative).
x = tf.pad(x, [[0, 0], [max(pady0, 0), max(pady1, 0)], [max(padx0, 0), max(padx1, 0)], [0, 0]])
x = x[:, max(-pady0, 0) : x.shape[1].value - max(-pady1, 0), max(-padx0, 0) : x.shape[2].value - max(-padx1, 0), :]
# Convolve with filter.
x = tf.transpose(x, [0, 3, 1, 2])
x = tf.reshape(x, [-1, 1, inH * upy + pady0 + pady1, inW * upx + padx0 + padx1])
w = tf.constant(k[::-1, ::-1, np.newaxis, np.newaxis], dtype=x.dtype)
x = tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='VALID', data_format='NCHW')
x = tf.reshape(x, [-1, minorDim, inH * upy + pady0 + pady1 - kernelH + 1, inW * upx + padx0 + padx1 - kernelW + 1])
x = tf.transpose(x, [0, 2, 3, 1])
# Downsample (throw away pixels).
return x[:, ::downy, ::downx, :]
#----------------------------------------------------------------------------
def _upfirdn_2d_cuda(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1):
"""Fast CUDA implementation of `upfirdn_2d()` using custom ops."""
x = tf.convert_to_tensor(x)
k = np.asarray(k, dtype=np.float32)
majorDim, inH, inW, minorDim = x.shape.as_list()
kernelH, kernelW = k.shape
assert inW >= 1 and inH >= 1
assert kernelW >= 1 and kernelH >= 1
assert isinstance(upx, int) and isinstance(upy, int)
assert isinstance(downx, int) and isinstance(downy, int)
assert isinstance(padx0, int) and isinstance(padx1, int)
assert isinstance(pady0, int) and isinstance(pady1, int)
outW = (inW * upx + padx0 + padx1 - kernelW) // downx + 1
outH = (inH * upy + pady0 + pady1 - kernelH) // downy + 1
assert outW >= 1 and outH >= 1
kc = tf.constant(k, dtype=x.dtype)
gkc = tf.constant(k[::-1, ::-1], dtype=x.dtype)
gpadx0 = kernelW - padx0 - 1
gpady0 = kernelH - pady0 - 1
gpadx1 = inW * upx - outW * downx + padx0 - upx + 1
gpady1 = inH * upy - outH * downy + pady0 - upy + 1
@tf.custom_gradient
def func(x):
y = _get_plugin().up_fir_dn2d(x=x, k=kc, upx=upx, upy=upy, downx=downx, downy=downy, padx0=padx0, padx1=padx1, pady0=pady0, pady1=pady1)
y.set_shape([majorDim, outH, outW, minorDim])
@tf.custom_gradient
def grad(dy):
dx = _get_plugin().up_fir_dn2d(x=dy, k=gkc, upx=downx, upy=downy, downx=upx, downy=upy, padx0=gpadx0, padx1=gpadx1, pady0=gpady0, pady1=gpady1)
dx.set_shape([majorDim, inH, inW, minorDim])
return dx, func
return y, grad
return func(x)
#----------------------------------------------------------------------------
def filter_2d(x, k, gain=1, data_format='NCHW', impl='cuda'):
r"""Filter a batch of 2D images with the given FIR filter.
Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]`
and filters each image with the given filter. The filter is normalized so that
if the input pixels are constant, they will be scaled by the specified `gain`.
Pixels outside the image are assumed to be zero.
Args:
x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
gain: Scaling factor for signal magnitude (default: 1.0).
data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
Returns:
Tensor of the same shape and datatype as `x`.
"""
k = _setup_kernel(k) * gain
p = k.shape[0] - 1
return _simple_upfirdn_2d(x, k, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl)
#----------------------------------------------------------------------------
def upsample_2d(x, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'):
r"""Upsample a batch of 2D images with the given filter.
Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]`
and upsamples each image with the given filter. The filter is normalized so that
if the input pixels are constant, they will be scaled by the specified `gain`.
Pixels outside the image are assumed to be zero, and the filter is padded with
zeros so that its shape is a multiple of the upsampling factor.
Args:
x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
The default is `[1] * factor`, which corresponds to nearest-neighbor
upsampling.
factor: Integer upsampling factor (default: 2).
gain: Scaling factor for signal magnitude (default: 1.0).
data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
Returns:
Tensor of the shape `[N, C, H * factor, W * factor]` or
`[N, H * factor, W * factor, C]`, and same datatype as `x`.
"""
assert isinstance(factor, int) and factor >= 1
if k is None:
k = [1] * factor
k = _setup_kernel(k) * (gain * (factor ** 2))
p = k.shape[0] - factor
return _simple_upfirdn_2d(x, k, up=factor, pad0=(p+1)//2+factor-1, pad1=p//2, data_format=data_format, impl=impl)
#----------------------------------------------------------------------------
def downsample_2d(x, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'):
r"""Downsample a batch of 2D images with the given filter.
Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]`
and downsamples each image with the given filter. The filter is normalized so that
if the input pixels are constant, they will be scaled by the specified `gain`.
Pixels outside the image are assumed to be zero, and the filter is padded with
zeros so that its shape is a multiple of the downsampling factor.
Args:
x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
The default is `[1] * factor`, which corresponds to average pooling.
factor: Integer downsampling factor (default: 2).
gain: Scaling factor for signal magnitude (default: 1.0).
data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
Returns:
Tensor of the shape `[N, C, H // factor, W // factor]` or
`[N, H // factor, W // factor, C]`, and same datatype as `x`.
"""
assert isinstance(factor, int) and factor >= 1
if k is None:
k = [1] * factor
k = _setup_kernel(k) * gain
p = k.shape[0] - factor
return _simple_upfirdn_2d(x, k, down=factor, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl)
#----------------------------------------------------------------------------
def upsample_conv_2d(x, w, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'):
r"""Fused `upsample_2d()` followed by `tf.nn.conv2d()`.
Padding is performed only once at the beginning, not between the operations.
The fused op is considerably more efficient than performing the same calculation
using standard TensorFlow ops. It supports gradients of arbitrary order.
Args:
x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
w: Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`.
Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`.
k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
The default is `[1] * factor`, which corresponds to nearest-neighbor
upsampling.
factor: Integer upsampling factor (default: 2).
gain: Scaling factor for signal magnitude (default: 1.0).
data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
Returns:
Tensor of the shape `[N, C, H * factor, W * factor]` or
`[N, H * factor, W * factor, C]`, and same datatype as `x`.
"""
assert isinstance(factor, int) and factor >= 1
# Check weight shape.
w = tf.convert_to_tensor(w)
assert w.shape.rank == 4
convH = w.shape[0].value
convW = w.shape[1].value
inC = _shape(w, 2)
outC = _shape(w, 3)
assert convW == convH
# Setup filter kernel.
if k is None:
k = [1] * factor
k = _setup_kernel(k) * (gain * (factor ** 2))
p = (k.shape[0] - factor) - (convW - 1)
# Determine data dimensions.
if data_format == 'NCHW':
stride = [1, 1, factor, factor]
output_shape = [_shape(x, 0), outC, (_shape(x, 2) - 1) * factor + convH, (_shape(x, 3) - 1) * factor + convW]
num_groups = _shape(x, 1) // inC
else:
stride = [1, factor, factor, 1]
output_shape = [_shape(x, 0), (_shape(x, 1) - 1) * factor + convH, (_shape(x, 2) - 1) * factor + convW, outC]
num_groups = _shape(x, 3) // inC
# Transpose weights.
w = tf.reshape(w, [convH, convW, inC, num_groups, -1])
w = tf.transpose(w[::-1, ::-1], [0, 1, 4, 3, 2])
w = tf.reshape(w, [convH, convW, -1, num_groups * inC])
# Execute.
x = tf.nn.conv2d_transpose(x, w, output_shape=output_shape, strides=stride, padding='VALID', data_format=data_format)
return _simple_upfirdn_2d(x, k, pad0=(p+1)//2+factor-1, pad1=p//2+1, data_format=data_format, impl=impl)
#----------------------------------------------------------------------------
def conv_downsample_2d(x, w, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'):
r"""Fused `tf.nn.conv2d()` followed by `downsample_2d()`.
Padding is performed only once at the beginning, not between the operations.
The fused op is considerably more efficient than performing the same calculation
using standard TensorFlow ops. It supports gradients of arbitrary order.
Args:
x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
w: Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`.
Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`.
k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
The default is `[1] * factor`, which corresponds to average pooling.
factor: Integer downsampling factor (default: 2).
gain: Scaling factor for signal magnitude (default: 1.0).
data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
Returns:
Tensor of the shape `[N, C, H // factor, W // factor]` or
`[N, H // factor, W // factor, C]`, and same datatype as `x`.
"""
assert isinstance(factor, int) and factor >= 1
w = tf.convert_to_tensor(w)
convH, convW, _inC, _outC = w.shape.as_list()
assert convW == convH
if k is None:
k = [1] * factor
k = _setup_kernel(k) * gain
p = (k.shape[0] - factor) + (convW - 1)
if data_format == 'NCHW':
s = [1, 1, factor, factor]
else:
s = [1, factor, factor, 1]
x = _simple_upfirdn_2d(x, k, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl)
return tf.nn.conv2d(x, w, strides=s, padding='VALID', data_format=data_format)
#----------------------------------------------------------------------------
# Internal helper funcs.
def _shape(tf_expr, dim_idx):
if tf_expr.shape.rank is not None:
dim = tf_expr.shape[dim_idx].value
if dim is not None:
return dim
return tf.shape(tf_expr)[dim_idx]
def _setup_kernel(k):
k = np.asarray(k, dtype=np.float32)
if k.ndim == 1:
k = np.outer(k, k)
k /= np.sum(k)
assert k.ndim == 2
assert k.shape[0] == k.shape[1]
return k
def _simple_upfirdn_2d(x, k, up=1, down=1, pad0=0, pad1=0, data_format='NCHW', impl='cuda'):
assert data_format in ['NCHW', 'NHWC']
assert x.shape.rank == 4
y = x
if data_format == 'NCHW':
y = tf.reshape(y, [-1, _shape(y, 2), _shape(y, 3), 1])
y = upfirdn_2d(y, k, upx=up, upy=up, downx=down, downy=down, padx0=pad0, padx1=pad1, pady0=pad0, pady1=pad1, impl=impl)
if data_format == 'NCHW':
y = tf.reshape(y, [-1, _shape(x, 1), _shape(y, 1), _shape(y, 2)])
return y
#----------------------------------------------------------------------------
# Copyright (c) SenseTime Research. All rights reserved.
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Helper wrapper for a Tensorflow optimizer."""
import numpy as np
import tensorflow as tf
from collections import OrderedDict
from typing import List, Union
from . import autosummary
from . import tfutil
from .. import util
from .tfutil import TfExpression, TfExpressionEx
try:
# TensorFlow 1.13
from tensorflow.python.ops import nccl_ops
except:
# Older TensorFlow versions
import tensorflow.contrib.nccl as nccl_ops
class Optimizer:
"""A Wrapper for tf.train.Optimizer.
Automatically takes care of:
- Gradient averaging for multi-GPU training.
- Gradient accumulation for arbitrarily large minibatches.
- Dynamic loss scaling and typecasts for FP16 training.
- Ignoring corrupted gradients that contain NaNs/Infs.
- Reporting statistics.
- Well-chosen default settings.
"""
def __init__(self,
name: str = "Train", # Name string that will appear in TensorFlow graph.
tf_optimizer: str = "tf.train.AdamOptimizer", # Underlying optimizer class.
learning_rate: TfExpressionEx = 0.001, # Learning rate. Can vary over time.
minibatch_multiplier: TfExpressionEx = None, # Treat N consecutive minibatches as one by accumulating gradients.
share: "Optimizer" = None, # Share internal state with a previously created optimizer?
use_loss_scaling: bool = False, # Enable dynamic loss scaling for robust mixed-precision training?
loss_scaling_init: float = 64.0, # Log2 of initial loss scaling factor.
loss_scaling_inc: float = 0.0005, # Log2 of per-minibatch loss scaling increment when there is no overflow.
loss_scaling_dec: float = 1.0, # Log2 of per-minibatch loss scaling decrement when there is an overflow.
report_mem_usage: bool = False, # Report fine-grained memory usage statistics in TensorBoard?
**kwargs):
# Public fields.
self.name = name
self.learning_rate = learning_rate
self.minibatch_multiplier = minibatch_multiplier
self.id = self.name.replace("/", ".")
self.scope = tf.get_default_graph().unique_name(self.id)
self.optimizer_class = util.get_obj_by_name(tf_optimizer)
self.optimizer_kwargs = dict(kwargs)
self.use_loss_scaling = use_loss_scaling
self.loss_scaling_init = loss_scaling_init
self.loss_scaling_inc = loss_scaling_inc
self.loss_scaling_dec = loss_scaling_dec
# Private fields.
self._updates_applied = False
self._devices = OrderedDict() # device_name => EasyDict()
self._shared_optimizers = OrderedDict() # device_name => optimizer_class
self._gradient_shapes = None # [shape, ...]
self._report_mem_usage = report_mem_usage
# Validate arguments.
assert callable(self.optimizer_class)
# Share internal state if requested.
if share is not None:
assert isinstance(share, Optimizer)
assert self.optimizer_class is share.optimizer_class
assert self.learning_rate is share.learning_rate
assert self.optimizer_kwargs == share.optimizer_kwargs
self._shared_optimizers = share._shared_optimizers # pylint: disable=protected-access
def _get_device(self, device_name: str):
"""Get internal state for the given TensorFlow device."""
tfutil.assert_tf_initialized()
if device_name in self._devices:
return self._devices[device_name]
# Initialize fields.
device = util.EasyDict()
device.name = device_name
device.optimizer = None # Underlying optimizer: optimizer_class
device.loss_scaling_var = None # Log2 of loss scaling: tf.Variable
device.grad_raw = OrderedDict() # Raw gradients: var => [grad, ...]
device.grad_clean = OrderedDict() # Clean gradients: var => grad
device.grad_acc_vars = OrderedDict() # Accumulation sums: var => tf.Variable
device.grad_acc_count = None # Accumulation counter: tf.Variable
device.grad_acc = OrderedDict() # Accumulated gradients: var => grad
# Setup TensorFlow objects.
with tfutil.absolute_name_scope(self.scope + "/Devices"), tf.device(device_name), tf.control_dependencies(None):
if device_name not in self._shared_optimizers:
optimizer_name = self.scope.replace("/", "_") + "_opt%d" % len(self._shared_optimizers)
self._shared_optimizers[device_name] = self.optimizer_class(name=optimizer_name, learning_rate=self.learning_rate, **self.optimizer_kwargs)
device.optimizer = self._shared_optimizers[device_name]
if self.use_loss_scaling:
device.loss_scaling_var = tf.Variable(np.float32(self.loss_scaling_init), trainable=False, name="loss_scaling_var")
# Register device.
self._devices[device_name] = device
return device
def register_gradients(self, loss: TfExpression, trainable_vars: Union[List, dict]) -> None:
"""Register the gradients of the given loss function with respect to the given variables.
Intended to be called once per GPU."""
tfutil.assert_tf_initialized()
assert not self._updates_applied
device = self._get_device(loss.device)
# Validate trainables.
if isinstance(trainable_vars, dict):
trainable_vars = list(trainable_vars.values()) # allow passing in Network.trainables as vars
assert isinstance(trainable_vars, list) and len(trainable_vars) >= 1
assert all(tfutil.is_tf_expression(expr) for expr in trainable_vars + [loss])
assert all(var.device == device.name for var in trainable_vars)
# Validate shapes.
if self._gradient_shapes is None:
self._gradient_shapes = [var.shape.as_list() for var in trainable_vars]
assert len(trainable_vars) == len(self._gradient_shapes)
assert all(var.shape.as_list() == var_shape for var, var_shape in zip(trainable_vars, self._gradient_shapes))
# Report memory usage if requested.
deps = []
if self._report_mem_usage:
self._report_mem_usage = False
try:
with tf.name_scope(self.id + '_mem'), tf.device(device.name), tf.control_dependencies([loss]):
deps.append(autosummary.autosummary(self.id + "/mem_usage_gb", tf.contrib.memory_stats.BytesInUse() / 2**30))
except tf.errors.NotFoundError:
pass
# Compute gradients.
with tf.name_scope(self.id + "_grad"), tf.device(device.name), tf.control_dependencies(deps):
loss = self.apply_loss_scaling(tf.cast(loss, tf.float32))
gate = tf.train.Optimizer.GATE_NONE # disable gating to reduce memory usage
grad_list = device.optimizer.compute_gradients(loss=loss, var_list=trainable_vars, gate_gradients=gate)
# Register gradients.
for grad, var in grad_list:
if var not in device.grad_raw:
device.grad_raw[var] = []
device.grad_raw[var].append(grad)
def apply_updates(self, allow_no_op: bool = False) -> tf.Operation:
"""Construct training op to update the registered variables based on their gradients."""
tfutil.assert_tf_initialized()
assert not self._updates_applied
self._updates_applied = True
all_ops = []
# Check for no-op.
if allow_no_op and len(self._devices) == 0:
with tfutil.absolute_name_scope(self.scope):
return tf.no_op(name='TrainingOp')
# Clean up gradients.
for device_idx, device in enumerate(self._devices.values()):
with tfutil.absolute_name_scope(self.scope + "/Clean%d" % device_idx), tf.device(device.name):
for var, grad in device.grad_raw.items():
# Filter out disconnected gradients and convert to float32.
grad = [g for g in grad if g is not None]
grad = [tf.cast(g, tf.float32) for g in grad]
# Sum within the device.
if len(grad) == 0:
grad = tf.zeros(var.shape) # No gradients => zero.
elif len(grad) == 1:
grad = grad[0] # Single gradient => use as is.
else:
grad = tf.add_n(grad) # Multiple gradients => sum.
# Scale as needed.
scale = 1.0 / len(device.grad_raw[var]) / len(self._devices)
scale = tf.constant(scale, dtype=tf.float32, name="scale")
if self.minibatch_multiplier is not None:
scale /= tf.cast(self.minibatch_multiplier, tf.float32)
scale = self.undo_loss_scaling(scale)
device.grad_clean[var] = grad * scale
# Sum gradients across devices.
if len(self._devices) > 1:
with tfutil.absolute_name_scope(self.scope + "/Broadcast"), tf.device(None):
for all_vars in zip(*[device.grad_clean.keys() for device in self._devices.values()]):
if len(all_vars) > 0 and all(dim > 0 for dim in all_vars[0].shape.as_list()): # NCCL does not support zero-sized tensors.
all_grads = [device.grad_clean[var] for device, var in zip(self._devices.values(), all_vars)]
all_grads = nccl_ops.all_sum(all_grads)
for device, var, grad in zip(self._devices.values(), all_vars, all_grads):
device.grad_clean[var] = grad
# Apply updates separately on each device.
for device_idx, device in enumerate(self._devices.values()):
with tfutil.absolute_name_scope(self.scope + "/Apply%d" % device_idx), tf.device(device.name):
# pylint: disable=cell-var-from-loop
# Accumulate gradients over time.
if self.minibatch_multiplier is None:
acc_ok = tf.constant(True, name='acc_ok')
device.grad_acc = OrderedDict(device.grad_clean)
else:
# Create variables.
with tf.control_dependencies(None):
for var in device.grad_clean.keys():
device.grad_acc_vars[var] = tf.Variable(tf.zeros(var.shape), trainable=False, name="grad_acc_var")
device.grad_acc_count = tf.Variable(tf.zeros([]), trainable=False, name="grad_acc_count")
# Track counter.
count_cur = device.grad_acc_count + 1.0
count_inc_op = lambda: tf.assign(device.grad_acc_count, count_cur)
count_reset_op = lambda: tf.assign(device.grad_acc_count, tf.zeros([]))
acc_ok = (count_cur >= tf.cast(self.minibatch_multiplier, tf.float32))
all_ops.append(tf.cond(acc_ok, count_reset_op, count_inc_op))
# Track gradients.
for var, grad in device.grad_clean.items():
acc_var = device.grad_acc_vars[var]
acc_cur = acc_var + grad
device.grad_acc[var] = acc_cur
with tf.control_dependencies([acc_cur]):
acc_inc_op = lambda: tf.assign(acc_var, acc_cur)
acc_reset_op = lambda: tf.assign(acc_var, tf.zeros(var.shape))
all_ops.append(tf.cond(acc_ok, acc_reset_op, acc_inc_op))
# No overflow => apply gradients.
all_ok = tf.reduce_all(tf.stack([acc_ok] + [tf.reduce_all(tf.is_finite(g)) for g in device.grad_acc.values()]))
apply_op = lambda: device.optimizer.apply_gradients([(tf.cast(grad, var.dtype), var) for var, grad in device.grad_acc.items()])
all_ops.append(tf.cond(all_ok, apply_op, tf.no_op))
# Adjust loss scaling.
if self.use_loss_scaling:
ls_inc_op = lambda: tf.assign_add(device.loss_scaling_var, self.loss_scaling_inc)
ls_dec_op = lambda: tf.assign_sub(device.loss_scaling_var, self.loss_scaling_dec)
ls_update_op = lambda: tf.group(tf.cond(all_ok, ls_inc_op, ls_dec_op))
all_ops.append(tf.cond(acc_ok, ls_update_op, tf.no_op))
# Last device => report statistics.
if device_idx == len(self._devices) - 1:
all_ops.append(autosummary.autosummary(self.id + "/learning_rate", self.learning_rate))
all_ops.append(autosummary.autosummary(self.id + "/overflow_frequency", tf.where(all_ok, 0, 1), condition=acc_ok))
if self.use_loss_scaling:
all_ops.append(autosummary.autosummary(self.id + "/loss_scaling_log2", device.loss_scaling_var))
# Initialize variables.
self.reset_optimizer_state()
if self.use_loss_scaling:
tfutil.init_uninitialized_vars([device.loss_scaling_var for device in self._devices.values()])
if self.minibatch_multiplier is not None:
tfutil.run([var.initializer for device in self._devices.values() for var in list(device.grad_acc_vars.values()) + [device.grad_acc_count]])
# Group everything into a single op.
with tfutil.absolute_name_scope(self.scope):
return tf.group(*all_ops, name="TrainingOp")
def reset_optimizer_state(self) -> None:
"""Reset internal state of the underlying optimizer."""
tfutil.assert_tf_initialized()
tfutil.run([var.initializer for device in self._devices.values() for var in device.optimizer.variables()])
def get_loss_scaling_var(self, device: str) -> Union[tf.Variable, None]:
"""Get or create variable representing log2 of the current dynamic loss scaling factor."""
return self._get_device(device).loss_scaling_var
def apply_loss_scaling(self, value: TfExpression) -> TfExpression:
"""Apply dynamic loss scaling for the given expression."""
assert tfutil.is_tf_expression(value)
if not self.use_loss_scaling:
return value
return value * tfutil.exp2(self.get_loss_scaling_var(value.device))
def undo_loss_scaling(self, value: TfExpression) -> TfExpression:
"""Undo the effect of dynamic loss scaling for the given expression."""
assert tfutil.is_tf_expression(value)
if not self.use_loss_scaling:
return value
return value * tfutil.exp2(-self.get_loss_scaling_var(value.device)) # pylint: disable=invalid-unary-operand-type
class SimpleAdam:
"""Simplified version of tf.train.AdamOptimizer that behaves identically when used with dnnlib.tflib.Optimizer."""
def __init__(self, name="Adam", learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
self.name = name
self.learning_rate = learning_rate
self.beta1 = beta1
self.beta2 = beta2
self.epsilon = epsilon
self.all_state_vars = []
def variables(self):
return self.all_state_vars
def compute_gradients(self, loss, var_list, gate_gradients=tf.train.Optimizer.GATE_NONE):
assert gate_gradients == tf.train.Optimizer.GATE_NONE
return list(zip(tf.gradients(loss, var_list), var_list))
def apply_gradients(self, grads_and_vars):
with tf.name_scope(self.name):
state_vars = []
update_ops = []
# Adjust learning rate to deal with startup bias.
with tf.control_dependencies(None):
b1pow_var = tf.Variable(dtype=tf.float32, initial_value=1, trainable=False)
b2pow_var = tf.Variable(dtype=tf.float32, initial_value=1, trainable=False)
state_vars += [b1pow_var, b2pow_var]
b1pow_new = b1pow_var * self.beta1
b2pow_new = b2pow_var * self.beta2
update_ops += [tf.assign(b1pow_var, b1pow_new), tf.assign(b2pow_var, b2pow_new)]
lr_new = self.learning_rate * tf.sqrt(1 - b2pow_new) / (1 - b1pow_new)
# Construct ops to update each variable.
for grad, var in grads_and_vars:
with tf.control_dependencies(None):
m_var = tf.Variable(dtype=tf.float32, initial_value=tf.zeros_like(var), trainable=False)
v_var = tf.Variable(dtype=tf.float32, initial_value=tf.zeros_like(var), trainable=False)
state_vars += [m_var, v_var]
m_new = self.beta1 * m_var + (1 - self.beta1) * grad
v_new = self.beta2 * v_var + (1 - self.beta2) * tf.square(grad)
var_delta = lr_new * m_new / (tf.sqrt(v_new) + self.epsilon)
update_ops += [tf.assign(m_var, m_new), tf.assign(v_var, v_new), tf.assign_sub(var, var_delta)]
# Group everything together.
self.all_state_vars += state_vars
return tf.group(*update_ops)
# Copyright (c) SenseTime Research. All rights reserved.
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Miscellaneous helper utils for Tensorflow."""
import os
import numpy as np
import tensorflow as tf
# Silence deprecation warnings from TensorFlow 1.13 onwards
import logging
logging.getLogger('tensorflow').setLevel(logging.ERROR)
import tensorflow.contrib # requires TensorFlow 1.x!
tf.contrib = tensorflow.contrib
from typing import Any, Iterable, List, Union
TfExpression = Union[tf.Tensor, tf.Variable, tf.Operation]
"""A type that represents a valid Tensorflow expression."""
TfExpressionEx = Union[TfExpression, int, float, np.ndarray]
"""A type that can be converted to a valid Tensorflow expression."""
def run(*args, **kwargs) -> Any:
"""Run the specified ops in the default session."""
assert_tf_initialized()
return tf.get_default_session().run(*args, **kwargs)
def is_tf_expression(x: Any) -> bool:
"""Check whether the input is a valid Tensorflow expression, i.e., Tensorflow Tensor, Variable, or Operation."""
return isinstance(x, (tf.Tensor, tf.Variable, tf.Operation))
def shape_to_list(shape: Iterable[tf.Dimension]) -> List[Union[int, None]]:
"""Convert a Tensorflow shape to a list of ints. Retained for backwards compatibility -- use TensorShape.as_list() in new code."""
return [dim.value for dim in shape]
def flatten(x: TfExpressionEx) -> TfExpression:
"""Shortcut function for flattening a tensor."""
with tf.name_scope("Flatten"):
return tf.reshape(x, [-1])
def log2(x: TfExpressionEx) -> TfExpression:
"""Logarithm in base 2."""
with tf.name_scope("Log2"):
return tf.log(x) * np.float32(1.0 / np.log(2.0))
def exp2(x: TfExpressionEx) -> TfExpression:
"""Exponent in base 2."""
with tf.name_scope("Exp2"):
return tf.exp(x * np.float32(np.log(2.0)))
def lerp(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpressionEx:
"""Linear interpolation."""
with tf.name_scope("Lerp"):
return a + (b - a) * t
def lerp_clip(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpression:
"""Linear interpolation with clip."""
with tf.name_scope("LerpClip"):
return a + (b - a) * tf.clip_by_value(t, 0.0, 1.0)
def absolute_name_scope(scope: str) -> tf.name_scope:
"""Forcefully enter the specified name scope, ignoring any surrounding scopes."""
return tf.name_scope(scope + "/")
def absolute_variable_scope(scope: str, **kwargs) -> tf.variable_scope:
"""Forcefully enter the specified variable scope, ignoring any surrounding scopes."""
return tf.variable_scope(tf.VariableScope(name=scope, **kwargs), auxiliary_name_scope=False)
def _sanitize_tf_config(config_dict: dict = None) -> dict:
# Defaults.
cfg = dict()
cfg["rnd.np_random_seed"] = None # Random seed for NumPy. None = keep as is.
cfg["rnd.tf_random_seed"] = "auto" # Random seed for TensorFlow. 'auto' = derive from NumPy random state. None = keep as is.
cfg["env.TF_CPP_MIN_LOG_LEVEL"] = "1" # 0 = Print all available debug info from TensorFlow. 1 = Print warnings and errors, but disable debug info.
cfg["graph_options.place_pruned_graph"] = True # False = Check that all ops are available on the designated device. True = Skip the check for ops that are not used.
cfg["gpu_options.allow_growth"] = True # False = Allocate all GPU memory at the beginning. True = Allocate only as much GPU memory as needed.
# Remove defaults for environment variables that are already set.
for key in list(cfg):
fields = key.split(".")
if fields[0] == "env":
assert len(fields) == 2
if fields[1] in os.environ:
del cfg[key]
# User overrides.
if config_dict is not None:
cfg.update(config_dict)
return cfg
def init_tf(config_dict: dict = None) -> None:
"""Initialize TensorFlow session using good default settings."""
# Skip if already initialized.
if tf.get_default_session() is not None:
return
# Setup config dict and random seeds.
cfg = _sanitize_tf_config(config_dict)
np_random_seed = cfg["rnd.np_random_seed"]
if np_random_seed is not None:
np.random.seed(np_random_seed)
tf_random_seed = cfg["rnd.tf_random_seed"]
if tf_random_seed == "auto":
tf_random_seed = np.random.randint(1 << 31)
if tf_random_seed is not None:
tf.set_random_seed(tf_random_seed)
# Setup environment variables.
for key, value in cfg.items():
fields = key.split(".")
if fields[0] == "env":
assert len(fields) == 2
os.environ[fields[1]] = str(value)
# Create default TensorFlow session.
create_session(cfg, force_as_default=True)
def assert_tf_initialized():
"""Check that TensorFlow session has been initialized."""
if tf.get_default_session() is None:
raise RuntimeError("No default TensorFlow session found. Please call dnnlib.tflib.init_tf().")
def create_session(config_dict: dict = None, force_as_default: bool = False) -> tf.Session:
"""Create tf.Session based on config dict."""
# Setup TensorFlow config proto.
cfg = _sanitize_tf_config(config_dict)
config_proto = tf.ConfigProto()
for key, value in cfg.items():
fields = key.split(".")
if fields[0] not in ["rnd", "env"]:
obj = config_proto
for field in fields[:-1]:
obj = getattr(obj, field)
setattr(obj, fields[-1], value)
# Create session.
session = tf.Session(config=config_proto)
if force_as_default:
# pylint: disable=protected-access
session._default_session = session.as_default()
session._default_session.enforce_nesting = False
session._default_session.__enter__()
return session
def init_uninitialized_vars(target_vars: List[tf.Variable] = None) -> None:
"""Initialize all tf.Variables that have not already been initialized.
Equivalent to the following, but more efficient and does not bloat the tf graph:
tf.variables_initializer(tf.report_uninitialized_variables()).run()
"""
assert_tf_initialized()
if target_vars is None:
target_vars = tf.global_variables()
test_vars = []
test_ops = []
with tf.control_dependencies(None): # ignore surrounding control_dependencies
for var in target_vars:
assert is_tf_expression(var)
try:
tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/IsVariableInitialized:0"))
except KeyError:
# Op does not exist => variable may be uninitialized.
test_vars.append(var)
with absolute_name_scope(var.name.split(":")[0]):
test_ops.append(tf.is_variable_initialized(var))
init_vars = [var for var, inited in zip(test_vars, run(test_ops)) if not inited]
run([var.initializer for var in init_vars])
def set_vars(var_to_value_dict: dict) -> None:
"""Set the values of given tf.Variables.
Equivalent to the following, but more efficient and does not bloat the tf graph:
tflib.run([tf.assign(var, value) for var, value in var_to_value_dict.items()]
"""
assert_tf_initialized()
ops = []
feed_dict = {}
for var, value in var_to_value_dict.items():
assert is_tf_expression(var)
try:
setter = tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/setter:0")) # look for existing op
except KeyError:
with absolute_name_scope(var.name.split(":")[0]):
with tf.control_dependencies(None): # ignore surrounding control_dependencies
setter = tf.assign(var, tf.placeholder(var.dtype, var.shape, "new_value"), name="setter") # create new setter
ops.append(setter)
feed_dict[setter.op.inputs[1]] = value
run(ops, feed_dict)
def create_var_with_large_initial_value(initial_value: np.ndarray, *args, **kwargs):
"""Create tf.Variable with large initial value without bloating the tf graph."""
assert_tf_initialized()
assert isinstance(initial_value, np.ndarray)
zeros = tf.zeros(initial_value.shape, initial_value.dtype)
var = tf.Variable(zeros, *args, **kwargs)
set_vars({var: initial_value})
return var
def convert_images_from_uint8(images, drange=[-1,1], nhwc_to_nchw=False):
"""Convert a minibatch of images from uint8 to float32 with configurable dynamic range.
Can be used as an input transformation for Network.run().
"""
images = tf.cast(images, tf.float32)
if nhwc_to_nchw:
images = tf.transpose(images, [0, 3, 1, 2])
return images * ((drange[1] - drange[0]) / 255) + drange[0]
def convert_images_to_uint8(images, drange=[-1,1], nchw_to_nhwc=False, shrink=1):
"""Convert a minibatch of images from float32 to uint8 with configurable dynamic range.
Can be used as an output transformation for Network.run().
"""
images = tf.cast(images, tf.float32)
if shrink > 1:
ksize = [1, 1, shrink, shrink]
images = tf.nn.avg_pool(images, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW")
if nchw_to_nhwc:
images = tf.transpose(images, [0, 2, 3, 1])
scale = 255 / (drange[1] - drange[0])
images = images * scale + (0.5 - drange[0] * scale)
return tf.saturate_cast(images, tf.uint8)
# Copyright (c) SenseTime Research. All rights reserved.
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
"""Miscellaneous utility classes and functions."""
import ctypes
import fnmatch
import importlib
import inspect
import numpy as np
import os
import shutil
import sys
import types
import io
import pickle
import re
import requests
import html
import hashlib
import glob
import tempfile
import urllib
import urllib.request
import uuid
from distutils.util import strtobool
from typing import Any, List, Tuple, Union
# Util classes
# ------------------------------------------------------------------------------------------
class EasyDict(dict):
"""Convenience class that behaves like a dict but allows access with the attribute syntax."""
def __getattr__(self, name: str) -> Any:
try:
return self[name]
except KeyError:
raise AttributeError(name)
def __setattr__(self, name: str, value: Any) -> None:
self[name] = value
def __delattr__(self, name: str) -> None:
del self[name]
class Logger(object):
"""Redirect stderr to stdout, optionally print stdout to a file, and optionally force flushing on both stdout and the file."""
def __init__(self, file_name: str = None, file_mode: str = "w", should_flush: bool = True):
self.file = None
if file_name is not None:
self.file = open(file_name, file_mode)
self.should_flush = should_flush
self.stdout = sys.stdout
self.stderr = sys.stderr
sys.stdout = self
sys.stderr = self
def __enter__(self) -> "Logger":
return self
def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
self.close()
def write(self, text: Union[str, bytes]) -> None:
"""Write text to stdout (and a file) and optionally flush."""
if isinstance(text, bytes):
text = text.decode()
if len(text) == 0: # workaround for a bug in VSCode debugger: sys.stdout.write(''); sys.stdout.flush() => crash
return
if self.file is not None:
self.file.write(text)
self.stdout.write(text)
if self.should_flush:
self.flush()
def flush(self) -> None:
"""Flush written text to both stdout and a file, if open."""
if self.file is not None:
self.file.flush()
self.stdout.flush()
def close(self) -> None:
"""Flush, close possible files, and remove stdout/stderr mirroring."""
self.flush()
# if using multiple loggers, prevent closing in wrong order
if sys.stdout is self:
sys.stdout = self.stdout
if sys.stderr is self:
sys.stderr = self.stderr
if self.file is not None:
self.file.close()
self.file = None
# Cache directories
# ------------------------------------------------------------------------------------------
_dnnlib_cache_dir = None
def set_cache_dir(path: str) -> None:
global _dnnlib_cache_dir
_dnnlib_cache_dir = path
def make_cache_dir_path(*paths: str) -> str:
if _dnnlib_cache_dir is not None:
return os.path.join(_dnnlib_cache_dir, *paths)
if 'DNNLIB_CACHE_DIR' in os.environ:
return os.path.join(os.environ['DNNLIB_CACHE_DIR'], *paths)
if 'HOME' in os.environ:
return os.path.join(os.environ['HOME'], '.cache', 'dnnlib', *paths)
if 'USERPROFILE' in os.environ:
return os.path.join(os.environ['USERPROFILE'], '.cache', 'dnnlib', *paths)
return os.path.join(tempfile.gettempdir(), '.cache', 'dnnlib', *paths)
# Small util functions
# ------------------------------------------------------------------------------------------
def format_time(seconds: Union[int, float]) -> str:
"""Convert the seconds to human readable string with days, hours, minutes and seconds."""
s = int(np.rint(seconds))
if s < 60:
return "{0}s".format(s)
elif s < 60 * 60:
return "{0}m {1:02}s".format(s // 60, s % 60)
elif s < 24 * 60 * 60:
return "{0}h {1:02}m {2:02}s".format(s // (60 * 60), (s // 60) % 60, s % 60)
else:
return "{0}d {1:02}h {2:02}m".format(s // (24 * 60 * 60), (s // (60 * 60)) % 24, (s // 60) % 60)
def ask_yes_no(question: str) -> bool:
"""Ask the user the question until the user inputs a valid answer."""
while True:
try:
print("{0} [y/n]".format(question))
return strtobool(input().lower())
except ValueError:
pass
def tuple_product(t: Tuple) -> Any:
"""Calculate the product of the tuple elements."""
result = 1
for v in t:
result *= v
return result
_str_to_ctype = {
"uint8": ctypes.c_ubyte,
"uint16": ctypes.c_uint16,
"uint32": ctypes.c_uint32,
"uint64": ctypes.c_uint64,
"int8": ctypes.c_byte,
"int16": ctypes.c_int16,
"int32": ctypes.c_int32,
"int64": ctypes.c_int64,
"float32": ctypes.c_float,
"float64": ctypes.c_double
}
def get_dtype_and_ctype(type_obj: Any) -> Tuple[np.dtype, Any]:
"""Given a type name string (or an object having a __name__ attribute), return matching Numpy and ctypes types that have the same size in bytes."""
type_str = None
if isinstance(type_obj, str):
type_str = type_obj
elif hasattr(type_obj, "__name__"):
type_str = type_obj.__name__
elif hasattr(type_obj, "name"):
type_str = type_obj.name
else:
raise RuntimeError("Cannot infer type name from input")
assert type_str in _str_to_ctype.keys()
my_dtype = np.dtype(type_str)
my_ctype = _str_to_ctype[type_str]
assert my_dtype.itemsize == ctypes.sizeof(my_ctype)
return my_dtype, my_ctype
def is_pickleable(obj: Any) -> bool:
try:
with io.BytesIO() as stream:
pickle.dump(obj, stream)
return True
except:
return False
# Functionality to import modules/objects by name, and call functions by name
# ------------------------------------------------------------------------------------------
def get_module_from_obj_name(obj_name: str) -> Tuple[types.ModuleType, str]:
"""Searches for the underlying module behind the name to some python object.
Returns the module and the object name (original name with module part removed)."""
# allow convenience shorthands, substitute them by full names
obj_name = re.sub("^np.", "numpy.", obj_name)
obj_name = re.sub("^tf.", "tensorflow.", obj_name)
# list alternatives for (module_name, local_obj_name)
parts = obj_name.split(".")
name_pairs = [(".".join(parts[:i]), ".".join(parts[i:])) for i in range(len(parts), 0, -1)]
# try each alternative in turn
for module_name, local_obj_name in name_pairs:
try:
module = importlib.import_module(module_name) # may raise ImportError
get_obj_from_module(module, local_obj_name) # may raise AttributeError
return module, local_obj_name
except:
pass
# maybe some of the modules themselves contain errors?
for module_name, _local_obj_name in name_pairs:
try:
importlib.import_module(module_name) # may raise ImportError
except ImportError:
if not str(sys.exc_info()[1]).startswith("No module named '" + module_name + "'"):
raise
# maybe the requested attribute is missing?
for module_name, local_obj_name in name_pairs:
try:
module = importlib.import_module(module_name) # may raise ImportError
get_obj_from_module(module, local_obj_name) # may raise AttributeError
except ImportError:
pass
# we are out of luck, but we have no idea why
raise ImportError(obj_name)
def get_obj_from_module(module: types.ModuleType, obj_name: str) -> Any:
"""Traverses the object name and returns the last (rightmost) python object."""
if obj_name == '':
return module
obj = module
for part in obj_name.split("."):
obj = getattr(obj, part)
return obj
def get_obj_by_name(name: str) -> Any:
"""Finds the python object with the given name."""
module, obj_name = get_module_from_obj_name(name)
return get_obj_from_module(module, obj_name)
def call_func_by_name(*args, func_name: str = None, **kwargs) -> Any:
"""Finds the python object with the given name and calls it as a function."""
assert func_name is not None
# print('func_name: ', func_name) #'training.dataset.ImageFolderDataset'
func_obj = get_obj_by_name(func_name)
assert callable(func_obj)
return func_obj(*args, **kwargs)
def construct_class_by_name(*args, class_name: str = None, **kwargs) -> Any:
"""Finds the python class with the given name and constructs it with the given arguments."""
return call_func_by_name(*args, func_name=class_name, **kwargs)
def get_module_dir_by_obj_name(obj_name: str) -> str:
"""Get the directory path of the module containing the given object name."""
module, _ = get_module_from_obj_name(obj_name)
return os.path.dirname(inspect.getfile(module))
def is_top_level_function(obj: Any) -> bool:
"""Determine whether the given object is a top-level function, i.e., defined at module scope using 'def'."""
return callable(obj) and obj.__name__ in sys.modules[obj.__module__].__dict__
def get_top_level_function_name(obj: Any) -> str:
"""Return the fully-qualified name of a top-level function."""
assert is_top_level_function(obj)
module = obj.__module__
if module == '__main__':
module = os.path.splitext(os.path.basename(sys.modules[module].__file__))[0]
return module + "." + obj.__name__
# File system helpers
# ------------------------------------------------------------------------------------------
def list_dir_recursively_with_ignore(dir_path: str, ignores: List[str] = None, add_base_to_relative: bool = False) -> List[Tuple[str, str]]:
"""List all files recursively in a given directory while ignoring given file and directory names.
Returns list of tuples containing both absolute and relative paths."""
assert os.path.isdir(dir_path)
base_name = os.path.basename(os.path.normpath(dir_path))
if ignores is None:
ignores = []
result = []
for root, dirs, files in os.walk(dir_path, topdown=True):
for ignore_ in ignores:
dirs_to_remove = [d for d in dirs if fnmatch.fnmatch(d, ignore_)]
# dirs need to be edited in-place
for d in dirs_to_remove:
dirs.remove(d)
files = [f for f in files if not fnmatch.fnmatch(f, ignore_)]
absolute_paths = [os.path.join(root, f) for f in files]
relative_paths = [os.path.relpath(p, dir_path) for p in absolute_paths]
if add_base_to_relative:
relative_paths = [os.path.join(base_name, p) for p in relative_paths]
assert len(absolute_paths) == len(relative_paths)
result += zip(absolute_paths, relative_paths)
return result
def copy_files_and_create_dirs(files: List[Tuple[str, str]]) -> None:
"""Takes in a list of tuples of (src, dst) paths and copies files.
Will create all necessary directories."""
for file in files:
target_dir_name = os.path.dirname(file[1])
# will create all intermediate-level directories
if not os.path.exists(target_dir_name):
os.makedirs(target_dir_name)
shutil.copyfile(file[0], file[1])
# URL helpers
# ------------------------------------------------------------------------------------------
def is_url(obj: Any, allow_file_urls: bool = False) -> bool:
"""Determine whether the given object is a valid URL string."""
if not isinstance(obj, str) or not "://" in obj:
return False
if allow_file_urls and obj.startswith('file://'):
return True
try:
res = requests.compat.urlparse(obj)
if not res.scheme or not res.netloc or not "." in res.netloc:
return False
res = requests.compat.urlparse(requests.compat.urljoin(obj, "/"))
if not res.scheme or not res.netloc or not "." in res.netloc:
return False
except:
return False
return True
def open_url(url: str, cache_dir: str = None, num_attempts: int = 10, verbose: bool = True, return_filename: bool = False, cache: bool = True) -> Any:
"""Download the given URL and return a binary-mode file object to access the data."""
assert num_attempts >= 1
assert not (return_filename and (not cache))
# Doesn't look like an URL scheme so interpret it as a local filename.
if not re.match('^[a-z]+://', url):
return url if return_filename else open(url, "rb")
# Handle file URLs. This code handles unusual file:// patterns that
# arise on Windows:
#
# file:///c:/foo.txt
#
# which would translate to a local '/c:/foo.txt' filename that's
# invalid. Drop the forward slash for such pathnames.
#
# If you touch this code path, you should test it on both Linux and
# Windows.
#
# Some internet resources suggest using urllib.request.url2pathname() but
# but that converts forward slashes to backslashes and this causes
# its own set of problems.
if url.startswith('file://'):
filename = urllib.parse.urlparse(url).path
if re.match(r'^/[a-zA-Z]:', filename):
filename = filename[1:]
return filename if return_filename else open(filename, "rb")
assert is_url(url)
# Lookup from cache.
if cache_dir is None:
cache_dir = make_cache_dir_path('downloads')
url_md5 = hashlib.md5(url.encode("utf-8")).hexdigest()
if cache:
cache_files = glob.glob(os.path.join(cache_dir, url_md5 + "_*"))
if len(cache_files) == 1:
filename = cache_files[0]
return filename if return_filename else open(filename, "rb")
# Download.
url_name = None
url_data = None
with requests.Session() as session:
if verbose:
print("Downloading %s ..." % url, end="", flush=True)
for attempts_left in reversed(range(num_attempts)):
try:
with session.get(url) as res:
res.raise_for_status()
if len(res.content) == 0:
raise IOError("No data received")
if len(res.content) < 8192:
content_str = res.content.decode("utf-8")
if "download_warning" in res.headers.get("Set-Cookie", ""):
links = [html.unescape(link) for link in content_str.split('"') if "export=download" in link]
if len(links) == 1:
url = requests.compat.urljoin(url, links[0])
raise IOError("Google Drive virus checker nag")
if "Google Drive - Quota exceeded" in content_str:
raise IOError("Google Drive download quota exceeded -- please try again later")
match = re.search(r'filename="([^"]*)"', res.headers.get("Content-Disposition", ""))
url_name = match[1] if match else url
url_data = res.content
if verbose:
print(" done")
break
except KeyboardInterrupt:
raise
except:
if not attempts_left:
if verbose:
print(" failed")
raise
if verbose:
print(".", end="", flush=True)
# Save to cache.
if cache:
safe_name = re.sub(r"[^0-9a-zA-Z-._]", "_", url_name)
cache_file = os.path.join(cache_dir, url_md5 + "_" + safe_name)
temp_file = os.path.join(cache_dir, "tmp_" + uuid.uuid4().hex + "_" + url_md5 + "_" + safe_name)
os.makedirs(cache_dir, exist_ok=True)
with open(temp_file, "wb") as f:
f.write(url_data)
os.replace(temp_file, cache_file) # atomic
if return_filename:
return cache_file
# Return data as file object.
assert not return_filename
return io.BytesIO(url_data)
# SHHQ Dataset
<img src="../img/preview_samples1.png" width="96%" height="96%">
## Overview
SHHQ is a dataset with high-quality full-body human images in a resolution of 1024 × 512.
Since we need to follow a rigorous legal review in our institute, we can not release all of the data at once.
For now, SHHQ-1.0 with 40K images is released! More data will be released in the later versions.
## Data Sources
Images are collected in two main ways:
1) From the Internet.
We developed a crawler tool with an official API, mainly downloading images from Flickr, Pixabay and Pexels. So you need to meet all the following licenses when using the dataset: CC0, [Pixabay License](https://pixabay.com/service/license/), and [Pexels Licenses](https://www.pexels.com/license/).
2) From the data providers.
We purchased images from databases of individual photographers, modeling agencies and other suppliers.
Images were reviewed by our legal team prior to purchase to ensure permission for use in research.
### Note:
The composition of SHHQ-1.0:
1) Images obtained from the above sources.
2) Processed 9991 DeepFashion [[1]](#1) images (retain only full body images).
3) 1940 African images from the InFashAI [[2]](#2) dataset to increase data diversity.
## Data License
We are aware of privacy concerns and seriously treat the license and privacy issues. All released data will be ensured under the license of CC0 and free for research use. Also, persons in the dataset are anonymised without additional private or sensitive metadata.
## Agreement
The SHHQ is available for non-commercial research purposes only.
You agree not to reproduce, duplicate, copy, sell, trade, resell or exploit any portion of the images and any portion of the derived data for commercial purposes.
You agree NOT to further copy, publish or distribute any portion of SHHQ to any third party for any purpose. Except, for internal use at a single site within the same organization it is allowed to make copies of the dataset.
Shanghai AI Lab reserves the right to terminate your access to the SHHQ at any time.
## Dataset Preview
For those interested in our dataset, we provide a preview version with 100 images randomly sampled from SHHQ-1.0: [SHHQ-1.0_samples](https://drive.google.com/file/d/1tnNFfmFtzRbYL3qEnNXQ_ShaN9YV5tI5/view?usp=sharing).
In SHHQ-1.0, we provide aligned raw images along with machine-calculated segmentation masks. Later we are planning to release manually annotated human-parsing version of these 40,000 images. Please stay tuned.
> We also provide script [bg_white.py](../bg_white.py) to whiten the background of the raw image using its segmentation mask.
If you want to access the full SHHQ-1.0, please read the following instructions.
## Model trained using SHHQ-1.0
| Structure | 1024x512 | Metric | Scores | 512x256 | Metric | Scores |
| --------- |:----------:| :----------:| :----------:| :-----: | :-----: | :-----: |
| StyleGAN1 | to be released | - | - | to be released | - | - |
| StyleGAN2 | [SHHQ-1.0_sg2_1024.pkl](https://drive.google.com/file/d/1PuvE72xpc69Zq4y58dohuKbG9dFnnjEX/view?usp=sharing) | fid50k_full | 3.56 | [SHHQ-1.0_sg2_512.pkl](https://drive.google.com/file/d/170t2FRWxR8_TG3_y0nVtDBogLPOClnyf/view?usp=sharing) | fid50k_full | 3.68 |
| StyleGAN3 | to be released | - | - |to be released | - | - |
## Download Instructions
Please download the SHHQ Dataset Release Agreement from [link](./SHHQ_Dataset_Release_Agreement.pdf).
Read it carefully, complete and sign it appropriately.
Please send the completed form to Jianglin Fu (arlenefu@outlook.com) and Shikai Li (lishikai@pjlab.org.cn), and cc to Wayne Wu (wuwenyan0503@gmail.com) using institutional email address. The email Subject Title is "SHHQ Dataset Release Agreement". We will verify your request and contact you with the dataset link and password to unzip the image data.
Note:
1. We are currently facing large incoming applications, and we need to carefully verify all the applicants, please be patient, and we will reply to you as soon as possible.
2. The signature in the agreement should be hand-written.
## References
<a id="1">[1]</a>
Liu, Ziwei and Luo, Ping and Qiu, Shi and Wang, Xiaogang and Tang, Xiaoou. DeepFashion: Powering Robust Clothes Recognition and Retrieval with Rich Annotations. CVPR (2016)
<a id="2">[2]</a>
Hacheme, Gilles and Sayouti, Noureini. Neural fashion image captioning: Accounting for data diversity. arXiv preprint arXiv:2106.12154 (2021)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment